From 2ed1cb53cc4158af08c22d466b15b9a9a7767212 Mon Sep 17 00:00:00 2001 From: Brian Harring Date: Thu, 13 Oct 2011 23:27:22 -0700 Subject: cache: rewrite to support arbitrary validation method Specifically, the cache can use any portage supported checksum method, or use the standard mtime approach. In addition, support controlling whether or not paths are stored, and generally try to restore some of the centralization/encapsulation that was in place originally. (cherry picked from commit bc1aed614fb588f0ade5bcb5d1265a8db0f8d247) Change-Id: Ic38057e7dbb15063c64a93c99e66e113a7d4c70e --- bin/ebuild | 6 +-- bin/egencache | 14 +++++-- pym/_emerge/EbuildMetadataPhase.py | 13 +++--- pym/_emerge/MetadataRegen.py | 14 +++---- pym/_emerge/actions.py | 10 +++-- pym/portage/cache/metadata.py | 7 +++- pym/portage/cache/template.py | 63 +++++++++++++++++++++------- pym/portage/dbapi/porttree.py | 86 +++++++++++++++++++++----------------- pym/portage/eclass_cache.py | 66 ++++++++++++++++++++++------- pym/portage/repository/config.py | 18 ++++---- 10 files changed, 192 insertions(+), 105 deletions(-) diff --git a/bin/ebuild b/bin/ebuild index d4b8b71f6..334b36897 100755 --- a/bin/ebuild +++ b/bin/ebuild @@ -228,10 +228,8 @@ build_dir_phases = set(["setup", "unpack", "prepare", "configure", "compile", # sourced again even if $T/environment already exists. ebuild_changed = False if mytree == "porttree" and build_dir_phases.intersection(pargs): - metadata, st, emtime = \ - portage.portdb._pull_valid_cache(cpv, ebuild, ebuild_portdir) - if metadata is None: - ebuild_changed = True + ebuild_changed = \ + portage.portdb._pull_valid_cache(cpv, ebuild, ebuild_portdir)[0] is None tmpsettings = portage.config(clone=portage.settings) tmpsettings["PORTAGE_VERBOSE"] = "1" diff --git a/bin/egencache b/bin/egencache index 26660c1a9..8d16cd693 100755 --- a/bin/egencache +++ b/bin/egencache @@ -215,8 +215,11 @@ class GenCache(object): consumer=self._metadata_callback, max_jobs=max_jobs, max_load=max_load) self.returncode = os.EX_OK - self._trg_cache = metadata.database(portdb.porttrees[0], - "metadata/cache", portage.auxdbkeys[:]) + conf = portdb.repositories.get_repo_for_location(portdb.porttrees[0]) + self._trg_cache = conf.get_pregenerated_cache(portage.auxdbkeys[:], + force=True, readonly=False) + if self._trg_cache is None: + raise Exception("cache format %s isn't supported" % (conf.cache_format,)) if rsync: self._trg_cache.raise_stat_collision = True try: @@ -226,13 +229,16 @@ class GenCache(object): pass self._existing_nodes = set() - def _metadata_callback(self, cpv, ebuild_path, repo_path, metadata): + def _metadata_callback(self, cpv, repo_path, metadata, ebuild_hash): self._existing_nodes.add(cpv) self._cp_missing.discard(cpv_getkey(cpv)) if metadata is not None: if metadata.get('EAPI') == '0': del metadata['EAPI'] try: + chf = self._trg_cache.validation_chf + if chf != 'mtime': + metadata['_%s_' % chf] = getattr(ebuild_hash, chf) try: self._trg_cache[cpv] = metadata except StatCollision as sc: @@ -251,7 +257,7 @@ class GenCache(object): max_mtime += 1 max_mtime = long(max_mtime) try: - os.utime(ebuild_path, (max_mtime, max_mtime)) + os.utime(ebuild_hash.location, (max_mtime, max_mtime)) except OSError as e: self.returncode |= 1 writemsg_level( diff --git a/pym/_emerge/EbuildMetadataPhase.py b/pym/_emerge/EbuildMetadataPhase.py index e53298bae..aeff2f0e8 100644 --- a/pym/_emerge/EbuildMetadataPhase.py +++ b/pym/_emerge/EbuildMetadataPhase.py @@ -20,8 +20,8 @@ class EbuildMetadataPhase(SubProcess): used to extract metadata from the ebuild. """ - __slots__ = ("cpv", "ebuild_path", "fd_pipes", "metadata_callback", - "ebuild_mtime", "metadata", "portdb", "repo_path", "settings") + \ + __slots__ = ("cpv", "ebuild_hash", "fd_pipes", "metadata_callback", + "metadata", "portdb", "repo_path", "settings") + \ ("_raw_metadata",) _file_names = ("ebuild",) @@ -31,7 +31,7 @@ class EbuildMetadataPhase(SubProcess): def _start(self): settings = self.settings settings.setcpv(self.cpv) - ebuild_path = self.ebuild_path + ebuild_path = self.ebuild_hash.location eapi = None if eapi is None and \ @@ -44,8 +44,8 @@ class EbuildMetadataPhase(SubProcess): if eapi is not None: if not portage.eapi_is_supported(eapi): - self.metadata_callback(self.cpv, self.ebuild_path, - self.repo_path, {'EAPI' : eapi}, self.ebuild_mtime) + self.metadata_callback(self.cpv, ebuild_path, + self.repo_path, {'EAPI' : eapi}, self.ebuild_hash.mtime) self._set_returncode((self.pid, os.EX_OK << 8)) self.wait() return @@ -128,6 +128,5 @@ class EbuildMetadataPhase(SubProcess): else: metadata = zip(portage.auxdbkeys, metadata_lines) self.metadata = self.metadata_callback(self.cpv, - self.ebuild_path, self.repo_path, metadata, - self.ebuild_mtime) + self.repo_path, metadata, self.ebuild_hash) diff --git a/pym/_emerge/MetadataRegen.py b/pym/_emerge/MetadataRegen.py index 810317533..b3380562b 100644 --- a/pym/_emerge/MetadataRegen.py +++ b/pym/_emerge/MetadataRegen.py @@ -3,6 +3,7 @@ import portage from portage import os +from portage.eclass_cache import hashed_path from _emerge.EbuildMetadataPhase import EbuildMetadataPhase from _emerge.PollScheduler import PollScheduler @@ -68,16 +69,15 @@ class MetadataRegen(PollScheduler): ebuild_path, repo_path = portdb.findname2(cpv) if ebuild_path is None: raise AssertionError("ebuild not found for '%s'" % cpv) - metadata, st, emtime = portdb._pull_valid_cache( + metadata, ebuild_hash = portdb._pull_valid_cache( cpv, ebuild_path, repo_path) if metadata is not None: if consumer is not None: - consumer(cpv, ebuild_path, - repo_path, metadata) + consumer(cpv, repo_path, metadata, ebuild_hash) continue - yield EbuildMetadataPhase(cpv=cpv, ebuild_path=ebuild_path, - ebuild_mtime=emtime, + yield EbuildMetadataPhase(cpv=cpv, + ebuild_hash=ebuild_hash, metadata_callback=portdb._metadata_callback, portdb=portdb, repo_path=repo_path, settings=portdb.doebuild_settings) @@ -176,9 +176,9 @@ class MetadataRegen(PollScheduler): # On failure, still notify the consumer (in this case the metadata # argument is None). self._consumer(metadata_process.cpv, - metadata_process.ebuild_path, metadata_process.repo_path, - metadata_process.metadata) + metadata_process.metadata, + metadata_process.ebuild_hash) self._schedule() diff --git a/pym/_emerge/actions.py b/pym/_emerge/actions.py index 844cf28ed..c449b5874 100644 --- a/pym/_emerge/actions.py +++ b/pym/_emerge/actions.py @@ -1737,8 +1737,9 @@ def action_metadata(settings, portdb, myopts, porttrees=None): if dest is not None: if not (dest['_mtime_'] == src['_mtime_'] and \ - tree_data.eclass_db.is_eclass_data_valid( - dest['_eclasses_']) and \ + tree_data.eclass_db.validate_and_rewrite_cache( + dest['_eclasses_'], tree_data.dest_db.validation_chf, + tree_data.dest_db.store_eclass_paths) and \ set(dest['_eclasses_']) == set(src['_eclasses_'])): dest = None else: @@ -1763,8 +1764,9 @@ def action_metadata(settings, portdb, myopts, porttrees=None): continue if eclasses is not None: - if not tree_data.eclass_db.is_eclass_data_valid( - src['_eclasses_']): + if not tree_data.eclass_db.validate_and_rewrite_cache( + src['_eclasses_'], tree_data.src_db.validation_chf, + tree_data.src_db.store_eclass_paths): continue inherited = eclasses else: diff --git a/pym/portage/cache/metadata.py b/pym/portage/cache/metadata.py index 4c735d7e3..07ec20ebc 100644 --- a/pym/portage/cache/metadata.py +++ b/pym/portage/cache/metadata.py @@ -6,6 +6,7 @@ import errno import re import stat import sys +from operator import attrgetter from portage import os from portage import _encodings from portage import _unicode_encode @@ -63,9 +64,11 @@ class database(flat_hash.database): if "INHERITED" in d: if self.ec is None: self.ec = portage.eclass_cache.cache(self.location[:-15]) + getter = attrgetter(self.validation_chf) try: - d["_eclasses_"] = self.ec.get_eclass_data( - d["INHERITED"].split()) + ec_data = self.ec.get_eclass_data(d["INHERITED"].split()) + d["_eclasses_"] = dict((k, (v.eclass_dir, getter(v))) + for k,v in ec_data.items()) except KeyError as e: # INHERITED contains a non-existent eclass. raise cache_errors.CacheCorruption(cpv, e) diff --git a/pym/portage/cache/template.py b/pym/portage/cache/template.py index f84d8f4b9..a76a5f59f 100644 --- a/pym/portage/cache/template.py +++ b/pym/portage/cache/template.py @@ -7,6 +7,7 @@ from portage.cache.cache_errors import InvalidRestriction from portage.cache.mappings import ProtectedDict import sys import warnings +import operator if sys.hexversion >= 0x3000000: basestring = str @@ -21,6 +22,8 @@ class database(object): autocommits = False cleanse_keys = False serialize_eclasses = True + validation_chf = 'mtime' + store_eclass_paths = True def __init__(self, location, label, auxdbkeys, readonly=False): """ initialize the derived class; specifically, store label/keys""" @@ -40,7 +43,8 @@ class database(object): self.updates = 0 d=self._getitem(cpv) if self.serialize_eclasses and "_eclasses_" in d: - d["_eclasses_"] = reconstruct_eclasses(cpv, d["_eclasses_"]) + d["_eclasses_"] = reconstruct_eclasses(cpv, d["_eclasses_"], + self.validation_chf, paths=self.store_eclass_paths) elif "_eclasses_" not in d: d["_eclasses_"] = {} mtime = d.get('_mtime_') @@ -71,10 +75,12 @@ class database(object): if not v: del d[k] if self.serialize_eclasses and "_eclasses_" in values: - d["_eclasses_"] = serialize_eclasses(d["_eclasses_"]) + d["_eclasses_"] = serialize_eclasses(d["_eclasses_"], + self.validation_chf, paths=self.store_eclass_paths) elif self.serialize_eclasses and "_eclasses_" in values: d = ProtectedDict(values) - d["_eclasses_"] = serialize_eclasses(d["_eclasses_"]) + d["_eclasses_"] = serialize_eclasses(d["_eclasses_"], + self.validation_chf, paths=self.store_eclass_paths) else: d = values self._setitem(cpv, d) @@ -159,6 +165,18 @@ class database(object): except KeyError: return x + def validate_entry(self, entry, ebuild_hash, eclass_db): + hash_key = '_%s_' % self.validation_chf + if entry[hash_key] != getattr(ebuild_hash, self.validation_chf): + return False + update = eclass_db.validate_and_rewrite_cache(entry['_eclasses_'], self.validation_chf, + self.store_eclass_paths) + if update is None: + return False + if update: + entry['_eclasses_'] = update + return True + def get_matches(self, match_dict): """generic function for walking the entire cache db, matching restrictions to filter what cpv's are returned. Derived classes should override this if they @@ -195,7 +213,9 @@ class database(object): keys = __iter__ items = iteritems -def serialize_eclasses(eclass_dict): +_keysorter = operator.itemgetter(0) + +def serialize_eclasses(eclass_dict, chf_type='mtime', paths=True): """takes a dict, returns a string representing said dict""" """The "new format", which causes older versions of = 0x3000000: long = int + +class hashed_path(object): + + def __init__(self, location): + self.location = location + + def __getattr__(self, attr): + if attr == 'mtime': + # use stat.ST_MTIME; accessing .st_mtime gets you a float + # depending on the python version, and long(float) introduces + # some rounding issues that aren't present for people using + # the straight c api. + # thus use the defacto python compatibility work around; + # access via index, which gurantees you get the raw long. + self.mtime = obj = os.stat(self.location)[stat.ST_MTIME] + return obj + if not attr.islower(): + # we don't care to allow .mD5 as an alias for .md5 + raise AttributeError(attr) + try: + val = checksum.perform_checksum(self.location, attr.upper())[0] + except KeyError: + raise AttributeError(attr) + setattr(self, attr, val) + return val + + class cache(object): """ Maintains the cache information about eclasses used in ebuild. """ def __init__(self, porttree_root, overlays=[]): - self.eclasses = {} # {"Name": ("location","_mtime_")} + self.eclasses = {} # {"Name": hashed_path} self._eclass_locations = {} # screw with the porttree ordering, w/out having bash inherit match it, and I'll hurt you. @@ -80,14 +109,16 @@ class cache(object): for y in eclass_filenames: if not y.endswith(".eclass"): continue + obj = hashed_path(os.path.join(x, y)) + obj.eclass_dir = x try: - mtime = os.stat(os.path.join(x, y))[stat.ST_MTIME] + mtime = obj.mtime except OSError: continue ys=y[:-eclass_len] if x == self._master_eclass_root: master_eclasses[ys] = mtime - self.eclasses[ys] = (x, mtime) + self.eclasses[ys] = obj self._eclass_locations[ys] = x continue @@ -98,22 +129,25 @@ class cache(object): # so prefer the master entry. continue - self.eclasses[ys] = (x, mtime) + self.eclasses[ys] = obj self._eclass_locations[ys] = x - def is_eclass_data_valid(self, ec_dict): + def validate_and_rewrite_cache(self, ec_dict, chf_type, stores_paths): if not isinstance(ec_dict, dict): - return False - for eclass, tup in ec_dict.items(): - cached_data = self.eclasses.get(eclass, None) - """ Only use the mtime for validation since the probability of a - collision is small and, depending on the cache implementation, the - path may not be specified (cache from rsync mirrors, for example). - """ - if cached_data is None or tup[1] != cached_data[1]: - return False - - return True + return None + our_getter = operator.attrgetter(chf_type) + cache_getter = lambda x:x + if stores_paths: + key_getter = operator.itemgetter(1) + d = {} + for eclass, ec_data in ec_dict.items(): + cached_data = self.eclasses.get(eclass) + if cached_data is None: + return None + if cache_getter(ec_data) != our_getter(cached_data): + return None + d[eclass] = cached_data + return d def get_eclass_data(self, inherits): ec_dict = {} diff --git a/pym/portage/repository/config.py b/pym/portage/repository/config.py index 9a5473820..a67e7f138 100644 --- a/pym/portage/repository/config.py +++ b/pym/portage/repository/config.py @@ -128,14 +128,18 @@ class RepoConfig(object): self.manifest_hashes = None self.cache_format = None - def get_pregenerated_cache(self, auxdbkeys, readonly=True): - if self.cache_format is None: - return None - elif self.cache_format == 'pms': + def get_pregenerated_cache(self, auxdbkeys, readonly=True, force=False): + format = self.cache_format + if format is None: + if not force: + return None + format = 'pms' + if format == 'pms': from portage.cache.metadata import database - return database(self.location, 'metadata/cache', - auxdbkeys, readonly=readonly) - return None + else: + return None + return database(self.location, 'metadata/cache', + auxdbkeys, readonly=readonly) def load_manifest(self, *args, **kwds): kwds['thin'] = self.thin_manifest -- cgit v1.2.3-1-g7c22