summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorZac Medico <zmedico@gentoo.org>2007-05-22 11:16:50 +0000
committerZac Medico <zmedico@gentoo.org>2007-05-22 11:16:50 +0000
commitf45516f92d9cfa12736783ec62273e78f3805120 (patch)
tree890bb30d207fbf16d1746db58166bf65360b2352
parent29fd2e335caa465372eb35a1b65ecdf9a37f83a6 (diff)
downloadportage-f45516f92d9cfa12736783ec62273e78f3805120.tar.gz
portage-f45516f92d9cfa12736783ec62273e78f3805120.tar.bz2
portage-f45516f92d9cfa12736783ec62273e78f3805120.zip
Cache xpak metadata in ${PKGDIR}/Packages in order to minimize disk load when binarytree.populate() is called. This does not do all the keys that genpkgindex does but it will copy metadata from the existing Packages file if it appears valid. The binarytree.populate() code is performance critical, so the MD5 will certainly have to be done elsewhere (like just after a package is built).
svn path=/main/trunk/; revision=6569
-rw-r--r--pym/portage/dbapi/bintree.py104
-rw-r--r--pym/portage/getbinpkg.py20
2 files changed, 122 insertions, 2 deletions
diff --git a/pym/portage/dbapi/bintree.py b/pym/portage/dbapi/bintree.py
index 2a9651df5..d15f00b8b 100644
--- a/pym/portage/dbapi/bintree.py
+++ b/pym/portage/dbapi/bintree.py
@@ -115,6 +115,7 @@ class binarytree(object):
self._pkg_paths = {}
self._all_directory = os.path.isdir(
os.path.join(self.pkgdir, "All"))
+ self._pkgindex_keys = set(["CPV", "SLOT", "MTIME", "SIZE"])
def move_ent(self, mylist):
if not self.populated:
@@ -371,20 +372,66 @@ class binarytree(object):
dirs.remove("All")
dirs.sort()
dirs.insert(0, "All")
+ pkgfile = os.path.join(self.pkgdir, "Packages")
+ metadata = {}
+ header = {}
+ try:
+ f = open(pkgfile)
+ except EnvironmentError:
+ pass
+ else:
+ try:
+ header = portage.getbinpkg.readpkgindex(f)
+ while True:
+ d = portage.getbinpkg.readpkgindex(f)
+ if not d:
+ break
+ mycpv = d.get("CPV")
+ if not mycpv:
+ continue
+ d.setdefault("SLOT", "0")
+ metadata[mycpv] = d
+ finally:
+ f.close()
+ del f
+ update_pkgindex = False
for mydir in dirs:
for myfile in listdir(os.path.join(self.pkgdir, mydir)):
if not myfile.endswith(".tbz2"):
continue
mypath = os.path.join(mydir, myfile)
full_path = os.path.join(self.pkgdir, mypath)
- if os.path.islink(full_path):
+ s = os.lstat(full_path)
+ if stat.S_ISLNK(s.st_mode):
continue
+ if mydir != "All":
+ # Validate data from the package index and try to avoid
+ # reading the xpak if possible.
+ mycpv = mydir + "/" + myfile[:-5]
+ d = metadata.get(mycpv)
+ skip = False
+ if d:
+ try:
+ if long(d.get("MTIME")) == long(s.st_mtime):
+ skip = True
+ except ValueError:
+ pass
+ if skip and not self._pkgindex_keys.difference(d):
+ pkg_paths[mycpv] = mypath
+ self.dbapi.cpv_inject(mycpv)
+ if not self.dbapi._aux_cache_keys.difference(d):
+ aux_cache = {}
+ for k in self.dbapi._aux_cache_keys:
+ aux_cache[k] = d[k]
+ self.dbapi._aux_cache[mycpv] = aux_cache
+ continue
mytbz2 = portage.xpak.tbz2(full_path)
# For invalid packages, mycat could be None.
mycat = mytbz2.getfile("CATEGORY")
mypf = mytbz2.getfile("PF")
+ slot = mytbz2.getfile("SLOT")
mypkg = myfile[:-5]
- if not mycat or not mypf:
+ if not mycat or not mypf or not slot:
#old-style or corrupt package
writemsg("!!! Invalid binary package: '%s'\n" % full_path,
noiselevel=-1)
@@ -394,6 +441,7 @@ class binarytree(object):
self.invalids.append(mypkg)
continue
mycat = mycat.strip()
+ slot = slot.strip()
if mycat != mydir and mydir != "All":
continue
if mypkg != mypf.strip():
@@ -412,7 +460,59 @@ class binarytree(object):
continue
pkg_paths[mycpv] = mypath
self.dbapi.cpv_inject(mycpv)
+ update_pkgindex = True
+ d = metadata.get(mycpv, {})
+ if d:
+ # Reuse metadata such as MD5, since we won't calculate
+ # MD5 here due to the performance hit.
+ mtime = d.get("MTIME")
+ if mtime:
+ # genpgkindex really should include the mtime and
+ # then this mtime check should be forced.
+ try:
+ if long(mtime) != long(s.st_mtime):
+ d.clear()
+ except ValueError:
+ d.clear()
+ if d:
+ try:
+ if long(d.get("SIZE")) != long(s.st_size):
+ d.clear()
+ except ValueError:
+ d.clear()
+
+ d["CPV"] = mycpv
+ d["SLOT"] = slot
+ d["MTIME"] = str(long(s.st_mtime))
+ d["SIZE"] = str(s.st_size)
+ metadata[mycpv] = d
+ if not self.dbapi._aux_cache_keys.difference(d):
+ aux_cache = {}
+ for k in self.dbapi._aux_cache_keys:
+ aux_cache[k] = d[k]
+ self.dbapi._aux_cache[mycpv] = aux_cache
+
self._pkg_paths = pkg_paths
+ if update_pkgindex and os.access(self.pkgdir, os.W_OK):
+ cpv_all = self._pkg_paths.keys()
+ stale = set(metadata).difference(cpv_all)
+ for cpv in stale:
+ del metadata[cpv]
+ cpv_all.sort()
+ import time
+ from portage.util import atomic_ofstream
+ header["TIMESTAMP"] = str(long(time.time()))
+ header["PACKAGES"] = str(len(cpv_all))
+ f = atomic_ofstream(pkgfile)
+ try:
+ portage.getbinpkg.writepkgindex(f, header.iteritems())
+ for cpv in cpv_all:
+ d = metadata[cpv]
+ if d["SLOT"] == "0":
+ del d["SLOT"]
+ portage.getbinpkg.writepkgindex(f, d.iteritems())
+ finally:
+ f.close()
if getbinpkgs and not self.settings["PORTAGE_BINHOST"]:
writemsg(red("!!! PORTAGE_BINHOST unset, but use is requested.\n"),
diff --git a/pym/portage/getbinpkg.py b/pym/portage/getbinpkg.py
index b0d5ca9eb..070b036b5 100644
--- a/pym/portage/getbinpkg.py
+++ b/pym/portage/getbinpkg.py
@@ -570,3 +570,23 @@ def dir_get_metadata(baseurl, conn=None, chunk_size=3000, verbose=1, usingcache=
conn.close()
return metadata[baseurl]["data"]
+
+def readpkgindex(pkgfile):
+ d = {}
+ for line in pkgfile:
+ line = line.rstrip("\n")
+ if not line:
+ break
+ line = line.split(":", 1)
+ if not len(line) == 2:
+ continue
+ k, v = line
+ if v:
+ v = v[1:]
+ d[k] = v
+ return d
+
+def writepkgindex(pkgfile, items):
+ for k, v in items:
+ pkgfile.write("%s: %s\n" % (k, v))
+ pkgfile.write("\n")