From 89bcc67e2930a841e1120492634ebc20530bc1d7 Mon Sep 17 00:00:00 2001 From: Zac Medico Date: Tue, 13 Dec 2011 20:24:35 -0800 Subject: locks.py: fix hardlink locks for bug #394195 This updates the hardlink locking code to support the non-blocking, lockfile(wantnewlockfile=False), and lockfile(file_object) behaviors which are used by portage code. --- pym/portage/locks.py | 181 ++++++++++++++------- .../package/ebuild/_config/special_env_vars.py | 2 +- pym/portage/tests/ebuild/test_doebuild_spawn.py | 5 + pym/portage/tests/ebuild/test_ipc_daemon.py | 9 + pym/portage/tests/emerge/test_simple.py | 4 + pym/portage/tests/locks/test_asynchronous_lock.py | 48 +++++- pym/portage/tests/locks/test_lock_nonblock.py | 12 +- 7 files changed, 200 insertions(+), 61 deletions(-) diff --git a/pym/portage/locks.py b/pym/portage/locks.py index 41539ba88..cab0fa3b5 100644 --- a/pym/portage/locks.py +++ b/pym/portage/locks.py @@ -9,13 +9,12 @@ __all__ = ["lockdir", "unlockdir", "lockfile", "unlockfile", \ import errno import fcntl import platform -import stat import sys import time +import warnings import portage -from portage import os -from portage.const import PORTAGE_BIN_PATH +from portage import os, _encodings, _unicode_decode from portage.exception import DirectoryNotFound, FileNotFound, \ InvalidData, TryAgain, OperationNotPermitted, PermissionDenied from portage.data import portage_gid @@ -26,6 +25,7 @@ if sys.hexversion >= 0x3000000: basestring = str HARDLINK_FD = -2 +_HARDLINK_POLL_LATENCY = 3 # seconds _default_lock_fn = fcntl.lockf if platform.python_implementation() == 'PyPy': @@ -54,7 +54,9 @@ def lockfile(mypath, wantnewlockfile=0, unlinkfile=0, if isinstance(mypath, basestring) and mypath[-1] == '/': mypath = mypath[:-1] + lockfilename_path = mypath if hasattr(mypath, 'fileno'): + lockfilename_path = getattr(mypath, 'name', None) mypath = mypath.fileno() if isinstance(mypath, int): lockfilename = mypath @@ -63,7 +65,7 @@ def lockfile(mypath, wantnewlockfile=0, unlinkfile=0, elif wantnewlockfile: base, tail = os.path.split(mypath) lockfilename = os.path.join(base, "." + tail + ".portage_lockfile") - del base, tail + lockfilename_path = lockfilename unlinkfile = 1 else: lockfilename = mypath @@ -117,6 +119,8 @@ def lockfile(mypath, wantnewlockfile=0, unlinkfile=0, # we're waiting on lockfile and use a blocking attempt. locking_method = _default_lock_fn try: + if "__PORTAGE_TEST_HARDLINK_LOCKS" in os.environ: + raise IOError(errno.ENOSYS, "Function not implemented") locking_method(myfd, fcntl.LOCK_EX|fcntl.LOCK_NB) except IOError as e: if not hasattr(e, "errno"): @@ -148,20 +152,22 @@ def lockfile(mypath, wantnewlockfile=0, unlinkfile=0, raise if out is not None: out.eend(os.EX_OK) - elif e.errno == errno.ENOLCK: + elif e.errno in (errno.ENOSYS, errno.ENOLCK): # We're not allowed to lock on this FS. - os.close(myfd) - link_success = False - if lockfilename == str(lockfilename): - if wantnewlockfile: - try: - if os.stat(lockfilename)[stat.ST_NLINK] == 1: - os.unlink(lockfilename) - except OSError: - pass - link_success = hardlink_lockfile(lockfilename) + if not isinstance(lockfilename, int): + # If a file object was passed in, it's not safe + # to close the file descriptor because it may + # still be in use (for example, see emergelog). + os.close(myfd) + lockfilename_path = _unicode_decode(lockfilename_path, + encoding=_encodings['fs'], errors='strict') + if not isinstance(lockfilename_path, basestring): + raise + link_success = hardlink_lockfile(lockfilename_path, + waiting_msg=waiting_msg, flags=flags) if not link_success: raise + lockfilename = lockfilename_path locking_method = None myfd = HARDLINK_FD else: @@ -208,7 +214,7 @@ def unlockfile(mytuple): raise InvalidData if(myfd == HARDLINK_FD): - unhardlink_lockfile(lockfilename) + unhardlink_lockfile(lockfilename, unlinkfile=unlinkfile) return True # myfd may be None here due to myfd = mypath in lockfile() @@ -273,61 +279,126 @@ def hardlock_name(path): def hardlink_is_mine(link,lock): try: - return os.stat(link).st_nlink == 2 + lock_st = os.stat(lock) + if lock_st.st_nlink == 2: + link_st = os.stat(link) + return lock_st.st_ino == link_st.st_ino and \ + lock_st.st_dev == link_st.st_dev except OSError: return False -def hardlink_lockfile(lockfilename, max_wait=14400): +def hardlink_lockfile(lockfilename, max_wait=DeprecationWarning, + waiting_msg=None, flags=0): """Does the NFS, hardlink shuffle to ensure locking on the disk. - We create a PRIVATE lockfile, that is just a placeholder on the disk. - Then we HARDLINK the real lockfile to that private file. + We create a PRIVATE hardlink to the real lockfile, that is just a + placeholder on the disk. If our file can 2 references, then we have the lock. :) Otherwise we lather, rise, and repeat. - We default to a 4 hour timeout. """ - start_time = time.time() + if max_wait is not DeprecationWarning: + warnings.warn("The 'max_wait' parameter of " + "portage.locks.hardlink_lockfile() is now unused. Use " + "flags=os.O_NONBLOCK instead.", + DeprecationWarning, stacklevel=2) + + global _quiet + out = None + displayed_waiting_msg = False myhardlock = hardlock_name(lockfilename) - reported_waiting = False - - while(time.time() < (start_time + max_wait)): - # We only need it to exist. - myfd = os.open(myhardlock, os.O_CREAT|os.O_RDWR,0o660) - os.close(myfd) - - if not os.path.exists(myhardlock): - raise FileNotFound( - _("Created lockfile is missing: %(filename)s") % \ - {"filename" : myhardlock}) - try: - res = os.link(myhardlock, lockfilename) - except OSError: + # myhardlock must not exist prior to our link() call, and we can + # can safely unlink it since its file name is unique to our PID + try: + os.unlink(myhardlock) + except OSError as e: + if e.errno in (errno.ENOENT, errno.ESTALE): pass + else: + func_call = "unlink('%s')" % myhardlock + if e.errno == OperationNotPermitted.errno: + raise OperationNotPermitted(func_call) + elif e.errno == PermissionDenied.errno: + raise PermissionDenied(func_call) + else: + raise + + while True: + # create lockfilename if it doesn't exist yet + try: + myfd = os.open(lockfilename, os.O_CREAT|os.O_RDWR, 0o660) + except OSError as e: + func_call = "open('%s')" % lockfilename + if e.errno == OperationNotPermitted.errno: + raise OperationNotPermitted(func_call) + elif e.errno == PermissionDenied.errno: + raise PermissionDenied(func_call) + else: + raise + else: + try: + if os.fstat(myfd).st_gid != portage_gid: + os.fchown(myfd, -1, portage_gid) + except OSError as e: + if e.errno not in (errno.ENOENT, errno.ESTALE): + writemsg("%s: fchown('%s', -1, %d)\n" % \ + (e, lockfilename, portage_gid), noiselevel=-1) + writemsg(_("Cannot chown a lockfile: '%s'\n") % \ + lockfilename, noiselevel=-1) + writemsg(_("Group IDs of current user: %s\n") % \ + " ".join(str(n) for n in os.getgroups()), + noiselevel=-1) + finally: + os.close(myfd) + + try: + os.link(lockfilename, myhardlock) + except OSError as e: + func_call = "link('%s', '%s')" % (lockfilename, myhardlock) + if e.errno == OperationNotPermitted.errno: + raise OperationNotPermitted(func_call) + elif e.errno == PermissionDenied.errno: + raise PermissionDenied(func_call) + elif e.errno in (errno.ESTALE, errno.ENOENT): + # another process has removed the file, so we'll have + # to create it again + continue + else: + raise if hardlink_is_mine(myhardlock, lockfilename): - # We have the lock. - if reported_waiting: - writemsg("\n", noiselevel=-1) - return True + if out is not None: + out.eend(os.EX_OK) + break - if reported_waiting: - writemsg(".", noiselevel=-1) - else: - reported_waiting = True - msg = _("\nWaiting on (hardlink) lockfile: (one '.' per 3 seconds)\n" - "%(bin_path)s/clean_locks can fix stuck locks.\n" - "Lockfile: %(lockfilename)s\n") % \ - {"bin_path": PORTAGE_BIN_PATH, "lockfilename": lockfilename} - writemsg(msg, noiselevel=-1) - time.sleep(3) - - os.unlink(myhardlock) - return False + try: + os.unlink(myhardlock) + except OSError as e: + # This should not happen, since the file name of + # myhardlock is unique to our host and PID, + # and the above link() call succeeded. + if e.errno not in (errno.ENOENT, errno.ESTALE): + raise + raise FileNotFound(myhardlock) + + if flags & os.O_NONBLOCK: + raise TryAgain(lockfilename) + + if out is None and not _quiet: + out = portage.output.EOutput() + if out is not None and not displayed_waiting_msg: + displayed_waiting_msg = True + if waiting_msg is None: + waiting_msg = _("waiting for lock on %s\n") % lockfilename + out.ebegin(waiting_msg) + + time.sleep(_HARDLINK_POLL_LATENCY) + + return True -def unhardlink_lockfile(lockfilename): +def unhardlink_lockfile(lockfilename, unlinkfile=True): myhardlock = hardlock_name(lockfilename) - if hardlink_is_mine(myhardlock, lockfilename): + if unlinkfile and hardlink_is_mine(myhardlock, lockfilename): # Make sure not to touch lockfilename unless we really have a lock. try: os.unlink(lockfilename) diff --git a/pym/portage/package/ebuild/_config/special_env_vars.py b/pym/portage/package/ebuild/_config/special_env_vars.py index 6b378b3db..d6ee647e8 100644 --- a/pym/portage/package/ebuild/_config/special_env_vars.py +++ b/pym/portage/package/ebuild/_config/special_env_vars.py @@ -66,7 +66,7 @@ environ_whitelist += [ "REPLACING_VERSIONS", "REPLACED_BY_VERSION", "ROOT", "ROOTPATH", "T", "TMP", "TMPDIR", "USE_EXPAND", "USE_ORDER", "WORKDIR", - "XARGS", + "XARGS", "__PORTAGE_TEST_HARDLINK_LOCKS", ] # user config variables diff --git a/pym/portage/tests/ebuild/test_doebuild_spawn.py b/pym/portage/tests/ebuild/test_doebuild_spawn.py index daa6e7528..89e27a331 100644 --- a/pym/portage/tests/ebuild/test_doebuild_spawn.py +++ b/pym/portage/tests/ebuild/test_doebuild_spawn.py @@ -26,6 +26,11 @@ class DoebuildSpawnTestCase(TestCase): playground = ResolverPlayground() try: settings = config(clone=playground.settings) + if "__PORTAGE_TEST_HARDLINK_LOCKS" in os.environ: + settings["__PORTAGE_TEST_HARDLINK_LOCKS"] = \ + os.environ["__PORTAGE_TEST_HARDLINK_LOCKS"] + settings.backup_changes("__PORTAGE_TEST_HARDLINK_LOCKS") + cpv = 'sys-apps/portage-2.1' metadata = { 'EAPI' : '2', diff --git a/pym/portage/tests/ebuild/test_ipc_daemon.py b/pym/portage/tests/ebuild/test_ipc_daemon.py index 379953d4b..c63843792 100644 --- a/pym/portage/tests/ebuild/test_ipc_daemon.py +++ b/pym/portage/tests/ebuild/test_ipc_daemon.py @@ -10,6 +10,7 @@ from portage.tests import TestCase from portage.const import PORTAGE_BIN_PATH from portage.const import PORTAGE_PYM_PATH from portage.const import BASH_BINARY +from portage.locks import hardlock_cleanup from portage.package.ebuild._ipc.ExitCommand import ExitCommand from portage.util import ensure_dirs from _emerge.SpawnProcess import SpawnProcess @@ -39,6 +40,10 @@ class IpcDaemonTestCase(TestCase): env['PORTAGE_PYM_PATH'] = PORTAGE_PYM_PATH env['PORTAGE_BUILDDIR'] = os.path.join(tmpdir, 'cat', 'pkg-1') + if "__PORTAGE_TEST_HARDLINK_LOCKS" in os.environ: + env["__PORTAGE_TEST_HARDLINK_LOCKS"] = \ + os.environ["__PORTAGE_TEST_HARDLINK_LOCKS"] + task_scheduler = TaskScheduler(max_jobs=2) build_dir = EbuildBuildDir( scheduler=task_scheduler.sched_iface, @@ -75,6 +80,8 @@ class IpcDaemonTestCase(TestCase): start_time = time.time() task_scheduler.run(timeout=self._SCHEDULE_TIMEOUT) task_scheduler.clear() + hardlock_cleanup(env['PORTAGE_BUILDDIR'], + remove_all_locks=True) self.assertEqual(self.received_command, True, "command not received after %d seconds" % \ @@ -110,6 +117,8 @@ class IpcDaemonTestCase(TestCase): start_time = time.time() task_scheduler.run(timeout=short_timeout_ms) task_scheduler.clear() + hardlock_cleanup(env['PORTAGE_BUILDDIR'], + remove_all_locks=True) self.assertEqual(self.received_command, False, "command received after %d seconds" % \ diff --git a/pym/portage/tests/emerge/test_simple.py b/pym/portage/tests/emerge/test_simple.py index a3efa62ca..86919509d 100644 --- a/pym/portage/tests/emerge/test_simple.py +++ b/pym/portage/tests/emerge/test_simple.py @@ -318,6 +318,10 @@ pkg_preinst() { "PYTHONPATH" : pythonpath, } + if "__PORTAGE_TEST_HARDLINK_LOCKS" in os.environ: + env["__PORTAGE_TEST_HARDLINK_LOCKS"] = \ + os.environ["__PORTAGE_TEST_HARDLINK_LOCKS"] + updates_dir = os.path.join(portdir, "profiles", "updates") dirs = [cachedir, cachedir_pregen, distdir, fake_bin, portage_tmpdir, updates_dir, diff --git a/pym/portage/tests/locks/test_asynchronous_lock.py b/pym/portage/tests/locks/test_asynchronous_lock.py index 0d6f11656..c896cb45c 100644 --- a/pym/portage/tests/locks/test_asynchronous_lock.py +++ b/pym/portage/tests/locks/test_asynchronous_lock.py @@ -12,7 +12,7 @@ from _emerge.PollScheduler import PollScheduler class AsynchronousLockTestCase(TestCase): - def testAsynchronousLock(self): + def _testAsynchronousLock(self): scheduler = PollScheduler().sched_iface tempdir = tempfile.mkdtemp() try: @@ -39,7 +39,17 @@ class AsynchronousLockTestCase(TestCase): finally: shutil.rmtree(tempdir) - def testAsynchronousLockWait(self): + def testAsynchronousLock(self): + self._testAsynchronousLock() + + def testAsynchronousLockHardlink(self): + os.environ["__PORTAGE_TEST_HARDLINK_LOCKS"] = "1" + try: + self._testAsynchronousLock() + finally: + os.environ.pop("__PORTAGE_TEST_HARDLINK_LOCKS", None) + + def _testAsynchronousLockWait(self): scheduler = PollScheduler().sched_iface tempdir = tempfile.mkdtemp() try: @@ -67,7 +77,17 @@ class AsynchronousLockTestCase(TestCase): finally: shutil.rmtree(tempdir) - def testAsynchronousLockWaitCancel(self): + def testAsynchronousLockWait(self): + self._testAsynchronousLockWait() + + def testAsynchronousLockWaitHardlink(self): + os.environ["__PORTAGE_TEST_HARDLINK_LOCKS"] = "1" + try: + self._testAsynchronousLockWait() + finally: + os.environ.pop("__PORTAGE_TEST_HARDLINK_LOCKS", None) + + def _testAsynchronousLockWaitCancel(self): scheduler = PollScheduler().sched_iface tempdir = tempfile.mkdtemp() try: @@ -92,7 +112,17 @@ class AsynchronousLockTestCase(TestCase): finally: shutil.rmtree(tempdir) - def testAsynchronousLockWaitKill(self): + def testAsynchronousLockWaitCancel(self): + self._testAsynchronousLockWaitCancel() + + def testAsynchronousLockWaitCancelHardlink(self): + os.environ["__PORTAGE_TEST_HARDLINK_LOCKS"] = "1" + try: + self._testAsynchronousLockWaitCancel() + finally: + os.environ.pop("__PORTAGE_TEST_HARDLINK_LOCKS", None) + + def _testAsynchronousLockWaitKill(self): scheduler = PollScheduler().sched_iface tempdir = tempfile.mkdtemp() try: @@ -122,3 +152,13 @@ class AsynchronousLockTestCase(TestCase): lock1.unlock() finally: shutil.rmtree(tempdir) + + def testAsynchronousLockWaitKill(self): + self._testAsynchronousLockWaitKill() + + def testAsynchronousLockWaitKillHardlink(self): + os.environ["__PORTAGE_TEST_HARDLINK_LOCKS"] = "1" + try: + self._testAsynchronousLockWaitKill() + finally: + os.environ.pop("__PORTAGE_TEST_HARDLINK_LOCKS", None) diff --git a/pym/portage/tests/locks/test_lock_nonblock.py b/pym/portage/tests/locks/test_lock_nonblock.py index cc1b28e82..914084ca7 100644 --- a/pym/portage/tests/locks/test_lock_nonblock.py +++ b/pym/portage/tests/locks/test_lock_nonblock.py @@ -11,7 +11,7 @@ from portage.tests import TestCase class LockNonblockTestCase(TestCase): - def testLockNonblock(self): + def _testLockNonblock(self): tempdir = tempfile.mkdtemp() try: path = os.path.join(tempdir, 'lock_me') @@ -44,3 +44,13 @@ class LockNonblockTestCase(TestCase): finally: shutil.rmtree(tempdir) + def testLockNonblock(self): + self._testLockNonblock() + + def testLockNonblockHardlink(self): + os.environ["__PORTAGE_TEST_HARDLINK_LOCKS"] = "1" + try: + self._testLockNonblock() + finally: + os.environ.pop("__PORTAGE_TEST_HARDLINK_LOCKS", None) + -- cgit v1.2.3-1-g7c22