# Copyright 2005-2020 Gentoo Authors # Distributed under the terms of the GNU General Public License v2 # Author(s): Brian Harring (ferringb@gentoo.org) from portage.cache import cache_errors from portage.cache.cache_errors import InvalidRestriction from portage.cache.mappings import ProtectedDict import warnings import operator class database: # this is for metadata/cache transfer. # basically flags the cache needs be updated when transferred cache to cache. # leave this. complete_eclass_entries = True autocommits = False cleanse_keys = False serialize_eclasses = True validation_chf = "mtime" store_eclass_paths = True def __init__(self, location, label, auxdbkeys, readonly=False): """initialize the derived class; specifically, store label/keys""" self._known_keys = auxdbkeys self.location = location self.label = label self.readonly = readonly self.sync_rate = 0 self.updates = 0 def __getitem__(self, cpv): """set a cpv to values This shouldn't be overridden in derived classes since it handles the __eclasses__ conversion. that said, if the class handles it, they can override it.""" if self.updates > self.sync_rate: self.commit() self.updates = 0 d = self._getitem(cpv) try: chf_types = self.chf_types except AttributeError: chf_types = (self.validation_chf,) if self.serialize_eclasses and "_eclasses_" in d: for chf_type in chf_types: if f"_{chf_type}_" not in d: # Skip the reconstruct_eclasses call, since it's # a waste of time if it contains a different chf_type # than the current one. In the past, it was possible # for reconstruct_eclasses called with chf_type='md5' # to "successfully" return invalid data here, because # it was unable to distinguish between md5 data and # mtime data. continue try: d["_eclasses_"] = reconstruct_eclasses( cpv, d["_eclasses_"], chf_type, paths=self.store_eclass_paths ) except cache_errors.CacheCorruption: if chf_type is chf_types[-1]: raise else: break else: raise cache_errors.CacheCorruption( cpv, "entry does not contain a recognized chf_type" ) elif "_eclasses_" not in d: d["_eclasses_"] = {} # Never return INHERITED, since portdbapi.aux_get() will # generate it automatically from _eclasses_, and we want # to omit it in comparisons between cache entries like # those that egencache uses to avoid redundant writes. d.pop("INHERITED", None) mtime_required = not any(d.get(f"_{x}_") for x in chf_types if x != "mtime") mtime = d.get("_mtime_") if not mtime: if mtime_required: raise cache_errors.CacheCorruption(cpv, "_mtime_ field is missing") d.pop("_mtime_", None) else: try: mtime = int(mtime) except ValueError: raise cache_errors.CacheCorruption( cpv, f"_mtime_ conversion to int failed: {mtime}" ) d["_mtime_"] = mtime return d def _getitem(self, cpv): """get cpv's values. override this in derived classes""" raise NotImplementedError @staticmethod def _internal_eclasses(extern_ec_dict, chf_type, paths): """ When serialize_eclasses is False, we have to convert an external eclass dict containing hashed_path objects into an appropriate internal dict containing values of chf_type (and eclass dirs if store_eclass_paths is True). """ if not extern_ec_dict: return extern_ec_dict chf_getter = operator.attrgetter(chf_type) if paths: intern_ec_dict = { k: (v.eclass_dir, chf_getter(v)) for k, v in extern_ec_dict.items() } else: intern_ec_dict = {k: chf_getter(v) for k, v in extern_ec_dict.items()} return intern_ec_dict def __setitem__(self, cpv, values): """set a cpv to values This shouldn't be overridden in derived classes since it handles the readonly checks """ if self.readonly: raise cache_errors.ReadOnlyRestriction() d = None if self.cleanse_keys: d = ProtectedDict(values) for k, v in list(item for item in d.items() if item[0] != "_eclasses_"): if not v: del d[k] if "_eclasses_" in values: if d is None: d = ProtectedDict(values) if self.serialize_eclasses: d["_eclasses_"] = serialize_eclasses( d["_eclasses_"], self.validation_chf, paths=self.store_eclass_paths ) else: d["_eclasses_"] = self._internal_eclasses( d["_eclasses_"], self.validation_chf, self.store_eclass_paths ) elif d is None: d = values self._setitem(cpv, d) if not self.autocommits: self.updates += 1 if self.updates > self.sync_rate: self.commit() self.updates = 0 def _setitem(self, name, values): """__setitem__ calls this after readonly checks. override it in derived classes note _eclassees_ key *must* be handled""" raise NotImplementedError def __delitem__(self, cpv): """delete a key from the cache. This shouldn't be overridden in derived classes since it handles the readonly checks """ if self.readonly: raise cache_errors.ReadOnlyRestriction() if not self.autocommits: self.updates += 1 self._delitem(cpv) if self.updates > self.sync_rate: self.commit() self.updates = 0 def _delitem(self, cpv): """__delitem__ calls this after readonly checks. override it in derived classes""" raise NotImplementedError def has_key(self, cpv): return cpv in self def iterkeys(self): return iter(self) def iteritems(self): for x in self: yield (x, self[x]) def sync(self, rate=0): self.sync_rate = rate if rate == 0: self.commit() def commit(self): if not self.autocommits: raise NotImplementedError(self) def __del__(self): # This used to be handled by an atexit hook that called # close_portdbapi_caches() for all portdbapi instances, but that was # prone to memory leaks for API consumers that needed to create/destroy # many portdbapi instances. So, instead we rely on __del__. self.sync() def __contains__(self, cpv): """This method should always be overridden. It is provided only for backward compatibility with modules that override has_key instead. It will automatically raise a NotImplementedError if has_key has not been overridden.""" if self.has_key is database.has_key: # prevent a possible recursive loop raise NotImplementedError warnings.warn( "portage.cache.template.database.has_key() is " "deprecated, override __contains__ instead", DeprecationWarning, ) return self.has_key(cpv) def __iter__(self): """This method should always be overridden. It is provided only for backward compatibility with modules that override iterkeys instead. It will automatically raise a NotImplementedError if iterkeys has not been overridden.""" if self.iterkeys is database.iterkeys: # prevent a possible recursive loop raise NotImplementedError(self) return iter(self.keys()) def get(self, k, x=None): try: return self[k] except KeyError: return x def validate_entry(self, entry, ebuild_hash, eclass_db): try: chf_types = self.chf_types except AttributeError: chf_types = (self.validation_chf,) for chf_type in chf_types: if self._validate_entry(chf_type, entry, ebuild_hash, eclass_db): return True return False def _validate_entry(self, chf_type, entry, ebuild_hash, eclass_db): hash_key = f"_{chf_type}_" try: entry_hash = entry[hash_key] except KeyError: return False else: if entry_hash != getattr(ebuild_hash, chf_type): return False update = eclass_db.validate_and_rewrite_cache( entry["_eclasses_"], chf_type, self.store_eclass_paths ) if update is None: return False if update: entry["_eclasses_"] = update return True def get_matches(self, match_dict): """generic function for walking the entire cache db, matching restrictions to filter what cpv's are returned. Derived classes should override this if they can implement a faster method then pulling each cpv:values, and checking it. For example, RDBMS derived classes should push the matching logic down to the actual RDBM.""" import re restricts = {} for key, match in match_dict.items(): # XXX this sucks. try: if isinstance(match, str): restricts[key] = re.compile(match).match else: restricts[key] = re.compile(match[0], match[1]).match except re.error as e: raise InvalidRestriction(key, match, e) if key not in self.__known_keys: raise InvalidRestriction(key, match, "Key isn't valid") for cpv in self: cont = True vals = self[cpv] for key, match in restricts.items(): if not match(vals[key]): cont = False break if cont: yield cpv keys = __iter__ items = iteritems _keysorter = operator.itemgetter(0) def serialize_eclasses(eclass_dict, chf_type="mtime", paths=True): """takes a dict, returns a string representing said dict""" """The "new format", which causes older versions of