diff options
-rwxr-xr-x | bin/ebuild | 6 | ||||
-rwxr-xr-x | bin/egencache | 14 | ||||
-rw-r--r-- | pym/_emerge/EbuildMetadataPhase.py | 13 | ||||
-rw-r--r-- | pym/_emerge/MetadataRegen.py | 14 | ||||
-rw-r--r-- | pym/_emerge/actions.py | 10 | ||||
-rw-r--r-- | pym/portage/cache/metadata.py | 7 | ||||
-rw-r--r-- | pym/portage/cache/template.py | 63 | ||||
-rw-r--r-- | pym/portage/dbapi/porttree.py | 86 | ||||
-rw-r--r-- | pym/portage/eclass_cache.py | 66 | ||||
-rw-r--r-- | pym/portage/repository/config.py | 18 |
10 files changed, 192 insertions, 105 deletions
diff --git a/bin/ebuild b/bin/ebuild index d4b8b71f6..334b36897 100755 --- a/bin/ebuild +++ b/bin/ebuild @@ -228,10 +228,8 @@ build_dir_phases = set(["setup", "unpack", "prepare", "configure", "compile", # sourced again even if $T/environment already exists. ebuild_changed = False if mytree == "porttree" and build_dir_phases.intersection(pargs): - metadata, st, emtime = \ - portage.portdb._pull_valid_cache(cpv, ebuild, ebuild_portdir) - if metadata is None: - ebuild_changed = True + ebuild_changed = \ + portage.portdb._pull_valid_cache(cpv, ebuild, ebuild_portdir)[0] is None tmpsettings = portage.config(clone=portage.settings) tmpsettings["PORTAGE_VERBOSE"] = "1" diff --git a/bin/egencache b/bin/egencache index 26660c1a9..8d16cd693 100755 --- a/bin/egencache +++ b/bin/egencache @@ -215,8 +215,11 @@ class GenCache(object): consumer=self._metadata_callback, max_jobs=max_jobs, max_load=max_load) self.returncode = os.EX_OK - self._trg_cache = metadata.database(portdb.porttrees[0], - "metadata/cache", portage.auxdbkeys[:]) + conf = portdb.repositories.get_repo_for_location(portdb.porttrees[0]) + self._trg_cache = conf.get_pregenerated_cache(portage.auxdbkeys[:], + force=True, readonly=False) + if self._trg_cache is None: + raise Exception("cache format %s isn't supported" % (conf.cache_format,)) if rsync: self._trg_cache.raise_stat_collision = True try: @@ -226,13 +229,16 @@ class GenCache(object): pass self._existing_nodes = set() - def _metadata_callback(self, cpv, ebuild_path, repo_path, metadata): + def _metadata_callback(self, cpv, repo_path, metadata, ebuild_hash): self._existing_nodes.add(cpv) self._cp_missing.discard(cpv_getkey(cpv)) if metadata is not None: if metadata.get('EAPI') == '0': del metadata['EAPI'] try: + chf = self._trg_cache.validation_chf + if chf != 'mtime': + metadata['_%s_' % chf] = getattr(ebuild_hash, chf) try: self._trg_cache[cpv] = metadata except StatCollision as sc: @@ -251,7 +257,7 @@ class GenCache(object): max_mtime += 1 max_mtime = long(max_mtime) try: - os.utime(ebuild_path, (max_mtime, max_mtime)) + os.utime(ebuild_hash.location, (max_mtime, max_mtime)) except OSError as e: self.returncode |= 1 writemsg_level( diff --git a/pym/_emerge/EbuildMetadataPhase.py b/pym/_emerge/EbuildMetadataPhase.py index e53298bae..aeff2f0e8 100644 --- a/pym/_emerge/EbuildMetadataPhase.py +++ b/pym/_emerge/EbuildMetadataPhase.py @@ -20,8 +20,8 @@ class EbuildMetadataPhase(SubProcess): used to extract metadata from the ebuild. """ - __slots__ = ("cpv", "ebuild_path", "fd_pipes", "metadata_callback", - "ebuild_mtime", "metadata", "portdb", "repo_path", "settings") + \ + __slots__ = ("cpv", "ebuild_hash", "fd_pipes", "metadata_callback", + "metadata", "portdb", "repo_path", "settings") + \ ("_raw_metadata",) _file_names = ("ebuild",) @@ -31,7 +31,7 @@ class EbuildMetadataPhase(SubProcess): def _start(self): settings = self.settings settings.setcpv(self.cpv) - ebuild_path = self.ebuild_path + ebuild_path = self.ebuild_hash.location eapi = None if eapi is None and \ @@ -44,8 +44,8 @@ class EbuildMetadataPhase(SubProcess): if eapi is not None: if not portage.eapi_is_supported(eapi): - self.metadata_callback(self.cpv, self.ebuild_path, - self.repo_path, {'EAPI' : eapi}, self.ebuild_mtime) + self.metadata_callback(self.cpv, ebuild_path, + self.repo_path, {'EAPI' : eapi}, self.ebuild_hash.mtime) self._set_returncode((self.pid, os.EX_OK << 8)) self.wait() return @@ -128,6 +128,5 @@ class EbuildMetadataPhase(SubProcess): else: metadata = zip(portage.auxdbkeys, metadata_lines) self.metadata = self.metadata_callback(self.cpv, - self.ebuild_path, self.repo_path, metadata, - self.ebuild_mtime) + self.repo_path, metadata, self.ebuild_hash) diff --git a/pym/_emerge/MetadataRegen.py b/pym/_emerge/MetadataRegen.py index 810317533..b3380562b 100644 --- a/pym/_emerge/MetadataRegen.py +++ b/pym/_emerge/MetadataRegen.py @@ -3,6 +3,7 @@ import portage from portage import os +from portage.eclass_cache import hashed_path from _emerge.EbuildMetadataPhase import EbuildMetadataPhase from _emerge.PollScheduler import PollScheduler @@ -68,16 +69,15 @@ class MetadataRegen(PollScheduler): ebuild_path, repo_path = portdb.findname2(cpv) if ebuild_path is None: raise AssertionError("ebuild not found for '%s'" % cpv) - metadata, st, emtime = portdb._pull_valid_cache( + metadata, ebuild_hash = portdb._pull_valid_cache( cpv, ebuild_path, repo_path) if metadata is not None: if consumer is not None: - consumer(cpv, ebuild_path, - repo_path, metadata) + consumer(cpv, repo_path, metadata, ebuild_hash) continue - yield EbuildMetadataPhase(cpv=cpv, ebuild_path=ebuild_path, - ebuild_mtime=emtime, + yield EbuildMetadataPhase(cpv=cpv, + ebuild_hash=ebuild_hash, metadata_callback=portdb._metadata_callback, portdb=portdb, repo_path=repo_path, settings=portdb.doebuild_settings) @@ -176,9 +176,9 @@ class MetadataRegen(PollScheduler): # On failure, still notify the consumer (in this case the metadata # argument is None). self._consumer(metadata_process.cpv, - metadata_process.ebuild_path, metadata_process.repo_path, - metadata_process.metadata) + metadata_process.metadata, + metadata_process.ebuild_hash) self._schedule() diff --git a/pym/_emerge/actions.py b/pym/_emerge/actions.py index 844cf28ed..c449b5874 100644 --- a/pym/_emerge/actions.py +++ b/pym/_emerge/actions.py @@ -1737,8 +1737,9 @@ def action_metadata(settings, portdb, myopts, porttrees=None): if dest is not None: if not (dest['_mtime_'] == src['_mtime_'] and \ - tree_data.eclass_db.is_eclass_data_valid( - dest['_eclasses_']) and \ + tree_data.eclass_db.validate_and_rewrite_cache( + dest['_eclasses_'], tree_data.dest_db.validation_chf, + tree_data.dest_db.store_eclass_paths) and \ set(dest['_eclasses_']) == set(src['_eclasses_'])): dest = None else: @@ -1763,8 +1764,9 @@ def action_metadata(settings, portdb, myopts, porttrees=None): continue if eclasses is not None: - if not tree_data.eclass_db.is_eclass_data_valid( - src['_eclasses_']): + if not tree_data.eclass_db.validate_and_rewrite_cache( + src['_eclasses_'], tree_data.src_db.validation_chf, + tree_data.src_db.store_eclass_paths): continue inherited = eclasses else: diff --git a/pym/portage/cache/metadata.py b/pym/portage/cache/metadata.py index 4c735d7e3..07ec20ebc 100644 --- a/pym/portage/cache/metadata.py +++ b/pym/portage/cache/metadata.py @@ -6,6 +6,7 @@ import errno import re import stat import sys +from operator import attrgetter from portage import os from portage import _encodings from portage import _unicode_encode @@ -63,9 +64,11 @@ class database(flat_hash.database): if "INHERITED" in d: if self.ec is None: self.ec = portage.eclass_cache.cache(self.location[:-15]) + getter = attrgetter(self.validation_chf) try: - d["_eclasses_"] = self.ec.get_eclass_data( - d["INHERITED"].split()) + ec_data = self.ec.get_eclass_data(d["INHERITED"].split()) + d["_eclasses_"] = dict((k, (v.eclass_dir, getter(v))) + for k,v in ec_data.items()) except KeyError as e: # INHERITED contains a non-existent eclass. raise cache_errors.CacheCorruption(cpv, e) diff --git a/pym/portage/cache/template.py b/pym/portage/cache/template.py index f84d8f4b9..a76a5f59f 100644 --- a/pym/portage/cache/template.py +++ b/pym/portage/cache/template.py @@ -7,6 +7,7 @@ from portage.cache.cache_errors import InvalidRestriction from portage.cache.mappings import ProtectedDict import sys import warnings +import operator if sys.hexversion >= 0x3000000: basestring = str @@ -21,6 +22,8 @@ class database(object): autocommits = False cleanse_keys = False serialize_eclasses = True + validation_chf = 'mtime' + store_eclass_paths = True def __init__(self, location, label, auxdbkeys, readonly=False): """ initialize the derived class; specifically, store label/keys""" @@ -40,7 +43,8 @@ class database(object): self.updates = 0 d=self._getitem(cpv) if self.serialize_eclasses and "_eclasses_" in d: - d["_eclasses_"] = reconstruct_eclasses(cpv, d["_eclasses_"]) + d["_eclasses_"] = reconstruct_eclasses(cpv, d["_eclasses_"], + self.validation_chf, paths=self.store_eclass_paths) elif "_eclasses_" not in d: d["_eclasses_"] = {} mtime = d.get('_mtime_') @@ -71,10 +75,12 @@ class database(object): if not v: del d[k] if self.serialize_eclasses and "_eclasses_" in values: - d["_eclasses_"] = serialize_eclasses(d["_eclasses_"]) + d["_eclasses_"] = serialize_eclasses(d["_eclasses_"], + self.validation_chf, paths=self.store_eclass_paths) elif self.serialize_eclasses and "_eclasses_" in values: d = ProtectedDict(values) - d["_eclasses_"] = serialize_eclasses(d["_eclasses_"]) + d["_eclasses_"] = serialize_eclasses(d["_eclasses_"], + self.validation_chf, paths=self.store_eclass_paths) else: d = values self._setitem(cpv, d) @@ -159,6 +165,18 @@ class database(object): except KeyError: return x + def validate_entry(self, entry, ebuild_hash, eclass_db): + hash_key = '_%s_' % self.validation_chf + if entry[hash_key] != getattr(ebuild_hash, self.validation_chf): + return False + update = eclass_db.validate_and_rewrite_cache(entry['_eclasses_'], self.validation_chf, + self.store_eclass_paths) + if update is None: + return False + if update: + entry['_eclasses_'] = update + return True + def get_matches(self, match_dict): """generic function for walking the entire cache db, matching restrictions to filter what cpv's are returned. Derived classes should override this if they @@ -195,7 +213,9 @@ class database(object): keys = __iter__ items = iteritems -def serialize_eclasses(eclass_dict): +_keysorter = operator.itemgetter(0) + +def serialize_eclasses(eclass_dict, chf_type='mtime', paths=True): """takes a dict, returns a string representing said dict""" """The "new format", which causes older versions of <portage-2.1.2 to traceback with a ValueError due to failed long() conversion. This format @@ -206,27 +226,40 @@ def serialize_eclasses(eclass_dict): """ if not eclass_dict: return "" - return "\t".join(k + "\t%s\t%s" % eclass_dict[k] \ - for k in sorted(eclass_dict)) + getter = operator.attrgetter(chf_type) + if paths: + return "\t".join("%s\t%s\t%s" % (k, v.eclass_dir, getter(v)) + for k, v in sorted(eclass_dict.items(), key=_keysorter)) + return "\t".join("%s\t%s" % (k, getter(v)) + for k, v in sorted(eclass_dict.items(), key=_keysorter)) -def reconstruct_eclasses(cpv, eclass_string): + +def reconstruct_eclasses(cpv, eclass_string, chf_type='mtime', paths=True): """returns a dict when handed a string generated by serialize_eclasses""" eclasses = eclass_string.rstrip().lstrip().split("\t") if eclasses == [""]: # occasionally this occurs in the fs backends. they suck. return {} - - if len(eclasses) % 2 != 0 and len(eclasses) % 3 != 0: + + converter = str + if chf_type == 'mtime': + converter = long + + if paths: + if len(eclasses) % 3 != 0: + raise cache_errors.CacheCorruption(cpv, "_eclasses_ was of invalid len %i" % len(eclasses)) + elif len(eclasses) % 2 != 0: raise cache_errors.CacheCorruption(cpv, "_eclasses_ was of invalid len %i" % len(eclasses)) d={} try: - if eclasses[1].isdigit(): - for x in range(0, len(eclasses), 2): - d[eclasses[x]] = ("", long(eclasses[x + 1])) - else: + i = iter(eclasses) + if paths: # The old format contains paths that will be discarded. - for x in range(0, len(eclasses), 3): - d[eclasses[x]] = (eclasses[x + 1], long(eclasses[x + 2])) + for name, path, val in zip(i, i, i): + d[name] = (path, converter(val)) + else: + for name, val in zip(i, i): + d[name] = converter(val) except IndexError: raise cache_errors.CacheCorruption(cpv, "_eclasses_ was of invalid len %i" % len(eclasses)) diff --git a/pym/portage/dbapi/porttree.py b/pym/portage/dbapi/porttree.py index dd627e3a7..bffae36bd 100644 --- a/pym/portage/dbapi/porttree.py +++ b/pym/portage/dbapi/porttree.py @@ -358,16 +358,16 @@ class portdbapi(dbapi): @returns: A new EbuildMetadataPhase instance, or None if the metadata cache is already valid. """ - metadata, st, emtime = self._pull_valid_cache(cpv, ebuild_path, repo_path) + metadata, ebuild_hash = self._pull_valid_cache(cpv, ebuild_path, repo_path) if metadata is not None: return None - process = EbuildMetadataPhase(cpv=cpv, ebuild_path=ebuild_path, - ebuild_mtime=emtime, metadata_callback=self._metadata_callback, + process = EbuildMetadataPhase(cpv=cpv, + ebuild_hash=ebuild_hash, metadata_callback=self._metadata_callback, portdb=self, repo_path=repo_path, settings=self.doebuild_settings) return process - def _metadata_callback(self, cpv, ebuild_path, repo_path, metadata, mtime): + def _metadata_callback(self, cpv, repo_path, metadata, ebuild_hash): i = metadata if hasattr(metadata, "items"): @@ -380,8 +380,17 @@ class portdbapi(dbapi): else: metadata["_eclasses_"] = {} + try: + cache = self.auxdb[repo_path] + chf = cache.validation_chf + metadata['_%s_' % chf] = getattr(ebuild_hash, chf) + except CacheError: + # Normally this shouldn't happen, so we'll show + # a traceback for debugging purposes. + traceback.print_exc() + cache = None + metadata.pop("INHERITED", None) - metadata["_mtime_"] = mtime eapi = metadata.get("EAPI") if not eapi or not eapi.strip(): @@ -392,20 +401,24 @@ class portdbapi(dbapi): metadata[k] = "" metadata["EAPI"] = "-" + eapi.lstrip("-") - try: - self.auxdb[repo_path][cpv] = metadata - except CacheError: - # Normally this shouldn't happen, so we'll show - # a traceback for debugging purposes. - traceback.print_exc() + if cache is not None: + try: + cache[cpv] = metadata + except CacheError: + # Normally this shouldn't happen, so we'll show + # a traceback for debugging purposes. + traceback.print_exc() return metadata def _pull_valid_cache(self, cpv, ebuild_path, repo_path): try: # Don't use unicode-wrapped os module, for better performance. - st = _os.stat(_unicode_encode(ebuild_path, - encoding=_encodings['fs'], errors='strict')) - emtime = st[stat.ST_MTIME] + path = _unicode_encode(ebuild_path, + encoding=_encodings['fs'], errors='strict') + ebuild_hash = eclass_cache.hashed_path(path) + # snag mtime since we use it later, and to trigger stat failure + # if it doesn't exist + ebuild_hash.mtime except OSError: writemsg(_("!!! aux_get(): ebuild for " \ "'%s' does not exist at:\n") % (cpv,), noiselevel=-1) @@ -422,36 +435,29 @@ class portdbapi(dbapi): auxdbs.append(self.auxdb[repo_path]) eclass_db = self._repo_info[repo_path].eclass_db - doregen = True for auxdb in auxdbs: try: metadata = auxdb[cpv] except KeyError: - pass + continue except CacheError: if not auxdb.readonly: try: del auxdb[cpv] - except KeyError: - pass - except CacheError: + except (KeyError, CacheError): pass - else: - eapi = metadata.get('EAPI', '').strip() - if not eapi: - eapi = '0' - if not (eapi[:1] == '-' and eapi_is_supported(eapi[1:])) and \ - emtime == metadata['_mtime_'] and \ - eclass_db.is_eclass_data_valid(metadata['_eclasses_']): - doregen = False - - if not doregen: + continue + eapi = metadata.get('EAPI', '').strip() + if not eapi: + eapi = '0' + if eapi[:1] == '-' and eapi_is_supported(eapi[1:]): + continue + if auxdb.validate_entry(metadata, ebuild_hash, eclass_db): break - - if doregen: + else: metadata = None - return (metadata, st, emtime) + return (metadata, ebuild_hash) def aux_get(self, mycpv, mylist, mytree=None, myrepo=None): "stub code for returning auxilliary db information, such as SLOT, DEPEND, etc." @@ -492,7 +498,7 @@ class portdbapi(dbapi): _("ebuild not found for '%s'") % mycpv, noiselevel=1) raise KeyError(mycpv) - mydata, st, emtime = self._pull_valid_cache(mycpv, myebuild, mylocation) + mydata, ebuild_hash = self._pull_valid_cache(mycpv, myebuild, mylocation) doregen = mydata is None if doregen: @@ -515,10 +521,10 @@ class portdbapi(dbapi): if eapi is not None and not portage.eapi_is_supported(eapi): mydata = self._metadata_callback( - mycpv, myebuild, mylocation, {'EAPI':eapi}, emtime) + mycpv, ebuild_hash, mylocation, {'EAPI':eapi}, emtime) else: - proc = EbuildMetadataPhase(cpv=mycpv, ebuild_path=myebuild, - ebuild_mtime=emtime, + proc = EbuildMetadataPhase(cpv=mycpv, + ebuild_hash=ebuild_hash, metadata_callback=self._metadata_callback, portdb=self, repo_path=mylocation, scheduler=PollScheduler().sched_iface, @@ -536,15 +542,17 @@ class portdbapi(dbapi): # do we have a origin repository name for the current package mydata["repository"] = self.repositories.get_name_for_location(mylocation) mydata["INHERITED"] = ' '.join(mydata.get("_eclasses_", [])) - mydata["_mtime_"] = st[stat.ST_MTIME] + mydata["_mtime_"] = ebuild_hash.mtime eapi = mydata.get("EAPI") if not eapi: eapi = "0" mydata["EAPI"] = eapi if not eapi_is_supported(eapi): - for k in set(mydata).difference(("_mtime_", "_eclasses_")): - mydata[k] = "" + keys = set(mydata) + keys.discard("_eclasses_") + keys.discard("_mtime_") + mydata.update((k, '') for k in keys) mydata["EAPI"] = "-" + eapi.lstrip("-") #finally, we look at our internal cache entry and return the requested data. diff --git a/pym/portage/eclass_cache.py b/pym/portage/eclass_cache.py index 1374f1d9b..fb187416d 100644 --- a/pym/portage/eclass_cache.py +++ b/pym/portage/eclass_cache.py @@ -6,21 +6,50 @@ __all__ = ["cache"] import stat import sys +import operator from portage.util import normalize_path import errno from portage.exception import PermissionDenied from portage import os +from portage import checksum if sys.hexversion >= 0x3000000: long = int + +class hashed_path(object): + + def __init__(self, location): + self.location = location + + def __getattr__(self, attr): + if attr == 'mtime': + # use stat.ST_MTIME; accessing .st_mtime gets you a float + # depending on the python version, and long(float) introduces + # some rounding issues that aren't present for people using + # the straight c api. + # thus use the defacto python compatibility work around; + # access via index, which gurantees you get the raw long. + self.mtime = obj = os.stat(self.location)[stat.ST_MTIME] + return obj + if not attr.islower(): + # we don't care to allow .mD5 as an alias for .md5 + raise AttributeError(attr) + try: + val = checksum.perform_checksum(self.location, attr.upper())[0] + except KeyError: + raise AttributeError(attr) + setattr(self, attr, val) + return val + + class cache(object): """ Maintains the cache information about eclasses used in ebuild. """ def __init__(self, porttree_root, overlays=[]): - self.eclasses = {} # {"Name": ("location","_mtime_")} + self.eclasses = {} # {"Name": hashed_path} self._eclass_locations = {} # screw with the porttree ordering, w/out having bash inherit match it, and I'll hurt you. @@ -80,14 +109,16 @@ class cache(object): for y in eclass_filenames: if not y.endswith(".eclass"): continue + obj = hashed_path(os.path.join(x, y)) + obj.eclass_dir = x try: - mtime = os.stat(os.path.join(x, y))[stat.ST_MTIME] + mtime = obj.mtime except OSError: continue ys=y[:-eclass_len] if x == self._master_eclass_root: master_eclasses[ys] = mtime - self.eclasses[ys] = (x, mtime) + self.eclasses[ys] = obj self._eclass_locations[ys] = x continue @@ -98,22 +129,25 @@ class cache(object): # so prefer the master entry. continue - self.eclasses[ys] = (x, mtime) + self.eclasses[ys] = obj self._eclass_locations[ys] = x - def is_eclass_data_valid(self, ec_dict): + def validate_and_rewrite_cache(self, ec_dict, chf_type, stores_paths): if not isinstance(ec_dict, dict): - return False - for eclass, tup in ec_dict.items(): - cached_data = self.eclasses.get(eclass, None) - """ Only use the mtime for validation since the probability of a - collision is small and, depending on the cache implementation, the - path may not be specified (cache from rsync mirrors, for example). - """ - if cached_data is None or tup[1] != cached_data[1]: - return False - - return True + return None + our_getter = operator.attrgetter(chf_type) + cache_getter = lambda x:x + if stores_paths: + key_getter = operator.itemgetter(1) + d = {} + for eclass, ec_data in ec_dict.items(): + cached_data = self.eclasses.get(eclass) + if cached_data is None: + return None + if cache_getter(ec_data) != our_getter(cached_data): + return None + d[eclass] = cached_data + return d def get_eclass_data(self, inherits): ec_dict = {} diff --git a/pym/portage/repository/config.py b/pym/portage/repository/config.py index 9a5473820..a67e7f138 100644 --- a/pym/portage/repository/config.py +++ b/pym/portage/repository/config.py @@ -128,14 +128,18 @@ class RepoConfig(object): self.manifest_hashes = None self.cache_format = None - def get_pregenerated_cache(self, auxdbkeys, readonly=True): - if self.cache_format is None: - return None - elif self.cache_format == 'pms': + def get_pregenerated_cache(self, auxdbkeys, readonly=True, force=False): + format = self.cache_format + if format is None: + if not force: + return None + format = 'pms' + if format == 'pms': from portage.cache.metadata import database - return database(self.location, 'metadata/cache', - auxdbkeys, readonly=readonly) - return None + else: + return None + return database(self.location, 'metadata/cache', + auxdbkeys, readonly=readonly) def load_manifest(self, *args, **kwds): kwds['thin'] = self.thin_manifest |