From 2217fa6295070f137988006c4bb00d25dfc0cb5e Mon Sep 17 00:00:00 2001 From: "Chris St. Pierre" Date: Mon, 15 Jul 2013 15:32:21 -0400 Subject: Read-only yum cache This makes the yum cache read-only so that bcfg2-yum-helper cannot update the cache on the fly, which should help avoid locking issues with the yum caches that can cause client runs to fail. It also makes the Packages plugin behave more consistently, since use of yum libraries won't cause the cache to be refreshed at random times on the fly, but rather more predictably as with the Apt cache or the yum cache without using yum libraries. Unlike those two cases, though, the caches will not all be downloaded initially, but rather opportunistically as needed. In order for this to work, the Bcfg2 server must not run as root. Root ignores the 'w' permissions bit, so the cache cannot be made read-only. --- .../Bcfg2/Server/Plugins/Packages/Collection.py | 4 + src/lib/Bcfg2/Server/Plugins/Packages/Yum.py | 60 ++++++++++----- src/lib/Bcfg2/Server/Plugins/Packages/__init__.py | 85 +++++++++++++++++++--- 3 files changed, 122 insertions(+), 27 deletions(-) (limited to 'src/lib/Bcfg2/Server') diff --git a/src/lib/Bcfg2/Server/Plugins/Packages/Collection.py b/src/lib/Bcfg2/Server/Plugins/Packages/Collection.py index b25cb0fc4..39c51f351 100644 --- a/src/lib/Bcfg2/Server/Plugins/Packages/Collection.py +++ b/src/lib/Bcfg2/Server/Plugins/Packages/Collection.py @@ -614,6 +614,10 @@ class Collection(list, Bcfg2.Server.Plugin.Debuggable): self.filter_unknown(unknown) return packages, unknown + def __repr__(self): + return "%s(%s)" % (self.__class__.__name__, + list.__repr__(self)) + def get_collection_class(source_type): """ Given a source type, determine the class of Collection object diff --git a/src/lib/Bcfg2/Server/Plugins/Packages/Yum.py b/src/lib/Bcfg2/Server/Plugins/Packages/Yum.py index 4608bcca5..55787681f 100644 --- a/src/lib/Bcfg2/Server/Plugins/Packages/Yum.py +++ b/src/lib/Bcfg2/Server/Plugins/Packages/Yum.py @@ -53,6 +53,7 @@ The Yum Backend import os import re import sys +import stat import copy import errno import socket @@ -282,13 +283,15 @@ class YumCollection(Collection): #: for cached yum metadata self.cachefile = os.path.join(self.cachepath, "cache-%s" % self.cachekey) - if not os.path.exists(self.cachefile): - os.mkdir(self.cachefile) #: The path to the server-side config file used when #: resolving packages with the Python yum libraries self.cfgfile = os.path.join(self.cachefile, "yum.conf") - self.write_config() + + if not os.path.exists(self.cachefile): + self.debug_log("Creating common cache %s" % self.cachefile) + os.mkdir(self.cachefile) + self.setup_data() else: self.cachefile = None @@ -924,6 +927,28 @@ class YumCollection(Collection): "output: %s" % err) raise + def _set_cache_writeable(self, writeable): + """ Set the writeability of the yum cache. + + :param writeable: If True, the cache will be made writeable. + If False, the cache will be made read-only. + :type writeable: bool + """ + fmode = stat.S_IRUSR | stat.S_IRGRP | stat.S_IROTH + if writeable: + self.debug_log("Packages: Making cache %s writeable" % + self.cachefile) + fmode |= stat.S_IWUSR + else: + self.debug_log("Packages: Making cache %s read-only" % + self.cachefile) + dmode = fmode | stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH + for root, dirs, files in os.walk(self.cachefile): + for dname in dirs: + os.chmod(os.path.join(root, dname), dmode) + for fname in files: + os.chmod(os.path.join(root, fname), fmode) + def setup_data(self, force_update=False): """ Do any collection-level data setup tasks. This is called when sources are loaded or reloaded by @@ -931,11 +956,11 @@ class YumCollection(Collection): If the builtin yum parsers are in use, this defers to :func:`Bcfg2.Server.Plugins.Packages.Collection.Collection.setup_data`. - If using the yum Python libraries, this cleans up cached yum - metadata, regenerates the server-side yum config (in order to - catch any new sources that have been added to this server), - and then cleans up cached yum metadata again, in case the new - config has any preexisting cache. + If using the yum Python libraries, this makes the cache + writeable, cleans up cached yum metadata, regenerates the + server-side yum config (in order to catch any new sources that + have been added to this server), regenerates the yum cache, + and then sets the cache back to read-only. :param force_update: Ignore all local cache and setup data from its original upstream sources (i.e., @@ -945,24 +970,25 @@ class YumCollection(Collection): if not self.use_yum: return Collection.setup_data(self, force_update) + self._set_cache_writeable(True) if force_update: - # we call this twice: one to clean up data from the old - # config, and once to clean up data from the new config + # clean up data from the old config try: self.call_helper("clean") except ValueError: # error reported by call_helper pass - os.unlink(self.cfgfile) + if os.path.exists(self.cfgfile): + os.unlink(self.cfgfile) self.write_config() - if force_update: - try: - self.call_helper("clean") - except ValueError: - # error reported by call_helper - pass + try: + self.call_helper("makecache") + except ValueError: + # error reported by call_helper + pass + self._set_cache_writeable(False) class YumSource(Source): diff --git a/src/lib/Bcfg2/Server/Plugins/Packages/__init__.py b/src/lib/Bcfg2/Server/Plugins/Packages/__init__.py index f93bd0932..aea447520 100644 --- a/src/lib/Bcfg2/Server/Plugins/Packages/__init__.py +++ b/src/lib/Bcfg2/Server/Plugins/Packages/__init__.py @@ -9,7 +9,8 @@ import shutil import lxml.etree import Bcfg2.Logger import Bcfg2.Server.Plugin -from Bcfg2.Compat import ConfigParser, urlopen, HTTPError, URLError +from Bcfg2.Compat import ConfigParser, urlopen, HTTPError, URLError, \ + MutableMapping from Bcfg2.Server.Plugins.Packages.Collection import Collection, \ get_collection_class from Bcfg2.Server.Plugins.Packages.PackagesSources import PackagesSources @@ -22,6 +23,52 @@ APT_CONFIG_DEFAULT = \ "/etc/apt/sources.list.d/bcfg2-packages-generated-sources.list" +class OnDemandDict(MutableMapping): + """ This maps a set of keys to a set of value-getting functions; + the values are populated on-the-fly by the functions as the values + are needed (and not before). This is used by + :func:`Bcfg2.Server.Plugins.Packages.Packages.get_additional_data`; + see the docstring for that function for details on why. + + Unlike a dict, you should not specify values for for the righthand + side of this mapping, but functions that get values. E.g.: + + .. code-block:: python + + d = OnDemandDict(foo=load_foo, + bar=lambda: "bar"); + """ + + def __init__(self, **getters): + self._values = dict() + self._getters = dict(**getters) + + def __getitem__(self, key): + if key not in self._values: + self._values[key] = self._getters[key]() + return self._values[key] + + def __setitem__(self, key, getter): + self._getters[key] = getter + + def __delitem__(self, key): + del self._values[key] + del self._getters[key] + + def __len__(self): + return len(self._getters) + + def __iter__(self): + return iter(self._getters.keys()) + + def __str__(self): + rv = dict(self._values) + for key in self._getters.keys(): + if key not in rv: + rv[key] = 'unknown' + return str(rv) + + class Packages(Bcfg2.Server.Plugin.Plugin, Bcfg2.Server.Plugin.StructureValidator, Bcfg2.Server.Plugin.Generator, @@ -535,20 +582,38 @@ class Packages(Bcfg2.Server.Plugin.Plugin, def get_additional_data(self, metadata): """ Return additional data for the given client. This will be - a dict containing a single key, ``sources``, whose value is a - list of data returned from - :func:`Bcfg2.Server.Plugins.Packages.Collection.Collection.get_additional_data`, - namely, a list of - :attr:`Bcfg2.Server.Plugins.Packages.Source.Source.url_map` - data. + an :class:`Bcfg2.Server.Plugins.Packages.OnDemandDict` + containing two keys: + + * ``sources``, whose value is a list of data returned from + :func:`Bcfg2.Server.Plugins.Packages.Collection.Collection.get_additional_data`, + namely, a list of + :attr:`Bcfg2.Server.Plugins.Packages.Source.Source.url_map` + data; and + * ``get_config``, whose value is the + :func:`Bcfg2.Server.Plugins.Packages.Packages.get_config` + function, which can be used to get the Packages config for + other systems. + + This uses an OnDemandDict instead of just a normal dict + because loading a source collection can be a fairly + time-consuming process, particularly for the first time. As a + result, when all metadata objects are built at once (such as + after the server is restarted, or far more frequently if + Metadata caching is disabled), this function would be a major + bottleneck if we tried to build all collections at the same + time. Instead, they're merely built on-demand. :param metadata: The client metadata :type metadata: Bcfg2.Server.Plugins.Metadata.ClientMetadata :return: dict of lists of ``url_map`` data """ - collection = self.get_collection(metadata) - return dict(sources=collection.get_additional_data(), - get_config=self.get_config) + def get_sources(): + return self.get_collection(metadata).get_additional_data + + return OnDemandDict( + sources=get_sources, + get_config=lambda: self.get_config) def end_client_run(self, metadata): """ Hook to clear the cache for this client in -- cgit v1.2.3-1-g7c22