From 6415910cb04e773c91b0f8b41704a5e9611f4198 Mon Sep 17 00:00:00 2001 From: Zac Medico Date: Wed, 26 Nov 2008 22:40:41 +0000 Subject: Bug #235642 - Create hardlinks when merging identical files. This works by using a tuple of (md5, st_size) as a key to a list of hardlink candidates. Multiple candidates are used in case some happen to be merged to separate devices. (trunk r12109) svn path=/main/branches/2.1.6/; revision=12110 --- pym/portage/__init__.py | 53 +++++++++++++++++++++++++++++++++++++++----- pym/portage/dbapi/vartree.py | 12 +++++++++- 2 files changed, 58 insertions(+), 7 deletions(-) diff --git a/pym/portage/__init__.py b/pym/portage/__init__.py index 186363235..2fcb4b143 100644 --- a/pym/portage/__init__.py +++ b/pym/portage/__init__.py @@ -5958,7 +5958,8 @@ def _movefile(src, dest, **kwargs): raise portage.exception.PortageException( "mv '%s' '%s'" % (src, dest)) -def movefile(src,dest,newmtime=None,sstat=None,mysettings=None): +def movefile(src, dest, newmtime=None, sstat=None, mysettings=None, + hardlink_candidates=None): """moves a file from src to dest, preserving all permissions and attributes; mtime will be preserved even when moving across filesystems. Returns true on success and false on failure. Move is atomic.""" @@ -6030,8 +6031,45 @@ def movefile(src,dest,newmtime=None,sstat=None,mysettings=None): print "!!!",e return None + hardlinked = False + # Since identical files might be merged to multiple filesystems, + # so os.link() calls might fail for some paths, so try them all. + # For atomic replacement, first create the link as a temp file + # and them use os.rename() to replace the destination. + if hardlink_candidates is not None: + head, tail = os.path.split(dest) + hardlink_tmp = os.path.join(head, ".%s._portage_merge_.%s" % \ + (tail, os.getpid())) + try: + os.unlink(hardlink_tmp) + except OSError, e: + if e.errno != errno.ENOENT: + writemsg("!!! Failed to remove hardlink temp file: %s\n" % \ + (hardlink_tmp,), noiselevel=-1) + writemsg("!!! %s\n" % (e,), noiselevel=-1) + return None + del e + for hardlink_src in hardlink_candidates: + try: + os.link(hardlink_src, hardlink_tmp) + except OSError: + continue + else: + try: + os.rename(hardlink_tmp, dest) + except OSError, e: + writemsg("!!! Failed to rename %s to %s\n" % \ + (hardlink_tmp, dest), noiselevel=-1) + writemsg("!!! %s\n" % (e,), noiselevel=-1) + return None + hardlinked = True + break + renamefailed=1 - if sstat[stat.ST_DEV]==dstat[stat.ST_DEV] or selinux_enabled: + if hardlinked: + renamefailed = False + if not hardlinked and \ + (selinux_enabled or sstat[stat.ST_DEV] == dstat[stat.ST_DEV]): try: if selinux_enabled: ret=selinux.secure_rename(src,dest) @@ -6092,11 +6130,14 @@ def movefile(src,dest,newmtime=None,sstat=None,mysettings=None): return None try: - if newmtime is not None: - os.utime(dest, (newmtime, newmtime)) + if hardlinked: + newmtime = long(os.stat(dest).st_mtime) else: - os.utime(dest, (sstat.st_atime, sstat.st_mtime)) - newmtime = long(sstat.st_mtime) + if newmtime is not None: + os.utime(dest, (newmtime, newmtime)) + else: + os.utime(dest, (sstat.st_atime, sstat.st_mtime)) + newmtime = long(sstat.st_mtime) except OSError: # The utime can fail here with EPERM even though the move succeeded. # Instead of failing, use stat to return the mtime if possible. diff --git a/pym/portage/dbapi/vartree.py b/pym/portage/dbapi/vartree.py index 2555657ba..b27f30fe3 100644 --- a/pym/portage/dbapi/vartree.py +++ b/pym/portage/dbapi/vartree.py @@ -1100,6 +1100,7 @@ class dblink(object): self.contentscache = None self._contents_inodes = None self._contents_basenames = None + self._md5_merge_map = {} def lockdb(self): if self._lock_vdb: @@ -2377,6 +2378,7 @@ class dblink(object): if self.mergeme(srcroot, destroot, outfile, None, secondhand, cfgfiledict, mymtime): return 1 + self._md5_merge_map.clear() #restore umask os.umask(prevmask) @@ -2734,9 +2736,17 @@ class dblink(object): # whether config protection or not, we merge the new file the # same way. Unless moveme=0 (blocking directory) if moveme: - mymtime = movefile(mysrc, mydest, newmtime=thismtime, sstat=mystat, mysettings=self.settings) + hardlink_key = (mymd5, mystat.st_size) + hardlink_candidates = self._md5_merge_map.get(hardlink_key) + if hardlink_candidates is None: + hardlink_candidates = [] + self._md5_merge_map[hardlink_key] = hardlink_candidates + mymtime = movefile(mysrc, mydest, newmtime=thismtime, + sstat=mystat, mysettings=self.settings, + hardlink_candidates=hardlink_candidates) if mymtime is None: return 1 + hardlink_candidates.append(mydest) zing = ">>>" if mymtime != None: -- cgit v1.2.3-1-g7c22