From bd0fd1c4c32864414b60b51828c79198503cb3f6 Mon Sep 17 00:00:00 2001 From: "Chris St. Pierre" Date: Fri, 7 Oct 2011 08:37:17 -0400 Subject: * Added support for yum libraries (if available and configured). This can dramatically reduce memory usage, and fixed several bugs: * #1014 (Package plugin can't resolve dependencies for rpms with Require: tags for full paths that aren't Provided explicitly) * #991 (Dependency Resolution difference between Package and yum) * #996 (Packages high memory usage) * Added support for Yum package groups when using yum libraries (#1039) * Fixed #911 (bcfg2 output for wrong package version with Packages is misleading) * YUMng turns down the Yum debug level itself depending on the debug/verbosity level requested by bcfg2 so you don't have to reduce the Yum debug level on a global basis * Added support for Pulp repositories, including registering Pulp consumers and binding to repositories * Added ability to disable magic OS groups --- src/lib/Server/Plugins/Packages/Apt.py | 142 +++ src/lib/Server/Plugins/Packages/Collection.py | 336 ++++++++ src/lib/Server/Plugins/Packages/Pac.py | 122 +++ src/lib/Server/Plugins/Packages/PackagesConfig.py | 28 + src/lib/Server/Plugins/Packages/PackagesSources.py | 66 ++ src/lib/Server/Plugins/Packages/Source.py | 262 ++++++ src/lib/Server/Plugins/Packages/Yum.py | 950 +++++++++++++++++++++ src/lib/Server/Plugins/Packages/__init__.py | 239 ++++++ 8 files changed, 2145 insertions(+) create mode 100644 src/lib/Server/Plugins/Packages/Apt.py create mode 100644 src/lib/Server/Plugins/Packages/Collection.py create mode 100644 src/lib/Server/Plugins/Packages/Pac.py create mode 100644 src/lib/Server/Plugins/Packages/PackagesConfig.py create mode 100644 src/lib/Server/Plugins/Packages/PackagesSources.py create mode 100644 src/lib/Server/Plugins/Packages/Source.py create mode 100644 src/lib/Server/Plugins/Packages/Yum.py create mode 100644 src/lib/Server/Plugins/Packages/__init__.py (limited to 'src/lib/Server/Plugins/Packages') diff --git a/src/lib/Server/Plugins/Packages/Apt.py b/src/lib/Server/Plugins/Packages/Apt.py new file mode 100644 index 000000000..5c80200a4 --- /dev/null +++ b/src/lib/Server/Plugins/Packages/Apt.py @@ -0,0 +1,142 @@ +import re +import gzip +import logging +from Bcfg2.Server.Plugins.Packages.Collection import Collection +from Bcfg2.Server.Plugins.Packages.Source import Source +from Bcfg2.Bcfg2Py3k import cPickle, file + +logger = logging.getLogger("Packages") + +class AptCollection(Collection): + def get_group(self, group): + self.logger.warning("Package groups are not supported by APT") + return [] + +class AptSource(Source): + basegroups = ['apt', 'debian', 'ubuntu', 'nexenta'] + ptype = 'deb' + + def __init__(self, basepath, xsource, config): + Source.__init__(self, basepath, xsource, config) + self.pkgnames = set() + + self.url_map = [{'rawurl': self.rawurl, 'url': self.url, + 'version': self.version, + 'components': self.components, 'arches': self.arches}] + + def save_state(self): + cache = file(self.cachefile, 'wb') + cPickle.dump((self.pkgnames, self.deps, self.provides), + cache, 2) + cache.close() + + def load_state(self): + data = file(self.cachefile) + self.pkgnames, self.deps, self.provides = cPickle.load(data) + + def filter_unknown(self, unknown): + filtered = set([u for u in unknown if u.startswith('choice')]) + unknown.difference_update(filtered) + + def get_urls(self): + if not self.rawurl: + rv = [] + for part in self.components: + for arch in self.arches: + rv.append("%sdists/%s/%s/binary-%s/Packages.gz" % + (self.url, self.version, part, arch)) + return rv + else: + return ["%sPackages.gz" % self.rawurl] + urls = property(get_urls) + + def read_files(self): + bdeps = dict() + bprov = dict() + if self.recommended: + depfnames = ['Depends', 'Pre-Depends', 'Recommends'] + else: + depfnames = ['Depends', 'Pre-Depends'] + for fname in self.files: + if not self.rawurl: + barch = [x + for x in fname.split('@') + if x.startswith('binary-')][0][7:] + else: + # RawURL entries assume that they only have one + # element and that it is the architecture of the source. + barch = self.arches[0] + if barch not in bdeps: + bdeps[barch] = dict() + bprov[barch] = dict() + try: + reader = gzip.GzipFile(fname) + except: + print("Failed to read file %s" % fname) + raise + for line in reader.readlines(): + words = str(line.strip()).split(':', 1) + if words[0] == 'Package': + pkgname = words[1].strip().rstrip() + self.pkgnames.add(pkgname) + bdeps[barch][pkgname] = [] + elif words[0] in depfnames: + vindex = 0 + for dep in words[1].split(','): + if '|' in dep: + cdeps = [re.sub('\s+', '', + re.sub('\(.*\)', '', cdep)) + for cdep in dep.split('|')] + dyn_dname = "choice-%s-%s-%s" % (pkgname, + barch, + vindex) + vindex += 1 + bdeps[barch][pkgname].append(dyn_dname) + bprov[barch][dyn_dname] = set(cdeps) + else: + raw_dep = re.sub('\(.*\)', '', dep) + raw_dep = raw_dep.rstrip().strip() + bdeps[barch][pkgname].append(raw_dep) + elif words[0] == 'Provides': + for pkg in words[1].split(','): + dname = pkg.rstrip().strip() + if dname not in bprov[barch]: + bprov[barch][dname] = set() + bprov[barch][dname].add(pkgname) + + self.deps['global'] = dict() + self.provides['global'] = dict() + for barch in bdeps: + self.deps[barch] = dict() + self.provides[barch] = dict() + for pkgname in self.pkgnames: + pset = set() + for barch in bdeps: + if pkgname not in bdeps[barch]: + bdeps[barch][pkgname] = [] + pset.add(tuple(bdeps[barch][pkgname])) + if len(pset) == 1: + self.deps['global'][pkgname] = pset.pop() + else: + for barch in bdeps: + self.deps[barch][pkgname] = bdeps[barch][pkgname] + provided = set() + for bprovided in list(bprov.values()): + provided.update(set(bprovided)) + for prov in provided: + prset = set() + for barch in bprov: + if prov not in bprov[barch]: + continue + prset.add(tuple(bprov[barch].get(prov, ()))) + if len(prset) == 1: + self.provides['global'][prov] = prset.pop() + else: + for barch in bprov: + self.provides[barch][prov] = bprov[barch].get(prov, ()) + self.save_state() + + def is_package(self, _, pkg): + return (pkg in self.pkgnames and + pkg not in self.blacklist and + (len(self.whitelist) == 0 or pkg in self.whitelist)) diff --git a/src/lib/Server/Plugins/Packages/Collection.py b/src/lib/Server/Plugins/Packages/Collection.py new file mode 100644 index 000000000..69e38134e --- /dev/null +++ b/src/lib/Server/Plugins/Packages/Collection.py @@ -0,0 +1,336 @@ +import copy +import logging + +try: + from hashlib import md5 +except ImportError: + import md5 + +logger = logging.getLogger("Packages") + +_collections = dict() + +class Collection(object): + def __init__(self, metadata, sources, basepath): + """ don't call this directly; use the Factory method """ + self.metadata = metadata + self.sources = sources + self.logger = logging.getLogger("Packages") + self.basepath = basepath + self.virt_pkgs = dict() + + try: + self.config = sources[0].config + self.cachepath = sources[0].basepath + self.ptype = sources[0].ptype + except IndexError: + self.config = None + self.cachepath = None + self.ptype = "unknown" + + self.cachefile = None + + @property + def cachekey(self): + return md5(self.get_config()).hexdigest() + + def get_config(self): + self.logger.error("Cannot generate config for host with multiple " + "source types (%s)" % self.metadata.hostname) + return "" + + def get_relevant_groups(self): + groups = [] + for source in self.sources: + groups.extend(source.get_relevant_groups(self.metadata)) + return sorted(list(set(groups))) + + @property + def basegroups(self): + groups = set() + for source in self.sources: + groups.update(source.basegroups) + return list(groups) + + @property + def cachefiles(self): + cachefiles = set([self.cachefile]) + for source in self.sources: + cachefiles.add(source.cachefile) + return list(cachefiles) + + def get_group(self, group): + for source in self.sources: + pkgs = source.get_group(self.metadata, group) + if pkgs: + return pkgs + self.logger.warning("'%s' is not a valid group" % group) + return [] + + def is_package(self, package): + for source in self.sources: + if source.is_package(self.metadata, package): + return True + return False + + def is_virtual_package(self, package): + for source in self.sources: + if source.is_virtual_package(self.metadata, package): + return True + return False + + def get_deps(self, package): + for source in self.sources: + if source.is_package(self.metadata, package): + return source.get_deps(self.metadata, package) + return [] + + def get_provides(self, package): + for source in self.sources: + providers = source.get_provides(self.metadata, package) + if providers: + return providers + return [] + + def get_vpkgs(self): + """ get virtual packages """ + vpkgs = dict() + for source in self.sources: + s_vpkgs = source.get_vpkgs(self.metadata) + for name, prov_set in list(s_vpkgs.items()): + if name not in vpkgs: + vpkgs[name] = set(prov_set) + else: + vpkgs[name].update(prov_set) + return vpkgs + + def filter_unknown(self, unknown): + for source in self.sources: + source.filter_unknown(unknown) + + def magic_groups_match(self): + for source in self.sources: + if source.magic_groups_match(self.metadata): + return True + + def build_extra_structures(self, independent): + pass + + def get_additional_data(self): + sdata = [] + for source in self.sources: + sdata.extend(copy.deepcopy(source.url_map)) + return sdata + + def setup_data(self, force_update=False): + """ do any collection-level data setup tasks """ + for source in self.sources: + source.setup_data(force_update) + + def complete(self, packagelist): + '''Build the transitive closure of all package dependencies + + Arguments: + packageslist - set of package names + returns => (set(packages), set(unsatisfied requirements)) + ''' + + # setup vpkg cache + pgrps = tuple(self.get_relevant_groups()) + if pgrps not in self.virt_pkgs: + self.virt_pkgs[pgrps] = self.get_vpkgs() + vpkg_cache = self.virt_pkgs[pgrps] + + # unclassified is set of unsatisfied requirements (may be pkg + # for vpkg) + unclassified = set(packagelist) + vpkgs = set() + both = set() + pkgs = set(packagelist) + + packages = set() + examined = set() + unknown = set() + + final_pass = False + really_done = False + # do while unclassified or vpkgs or both or pkgs + while unclassified or pkgs or both or final_pass: + if really_done: + break + if len(unclassified) + len(pkgs) + len(both) == 0: + # one more pass then exit + really_done = True + + while unclassified: + current = unclassified.pop() + examined.add(current) + is_pkg = False + if self.is_package(current): + is_pkg = True + + is_vpkg = current in vpkg_cache + + if is_pkg and is_vpkg: + both.add(current) + elif is_pkg and not is_vpkg: + pkgs.add(current) + elif is_vpkg and not is_pkg: + vpkgs.add(current) + elif not is_vpkg and not is_pkg: + unknown.add(current) + + while pkgs: + # direct packages; current can be added, and all deps + # should be resolved + current = pkgs.pop() + self.logger.debug("Packages: handling package requirement %s" % + current) + packages.add(current) + deps = self.get_deps(current) + newdeps = set(deps).difference(examined) + if newdeps: + self.logger.debug("Packages: Package %s added " + "requirements %s" % (current, newdeps)) + unclassified.update(newdeps) + + satisfied_vpkgs = set() + for current in vpkgs: + # virtual dependencies, satisfied if one of N in the + # config, or can be forced if only one provider + if len(vpkg_cache[current]) == 1: + self.logger.debug("Packages: requirement %s satisfied by " + "%s" % (current, + vpkg_cache[current])) + unclassified.update(vpkg_cache[current].difference(examined)) + satisfied_vpkgs.add(current) + else: + satisfiers = [item for item in vpkg_cache[current] + if item in packages] + self.logger.debug("Packages: requirement %s satisfied by " + "%s" % (current, satisfiers)) + satisfied_vpkgs.add(current) + vpkgs.difference_update(satisfied_vpkgs) + + satisfied_both = set() + for current in both: + # packages that are both have virtual providers as + # well as a package with that name. allow use of virt + # through explicit specification, then fall back to + # forcing current on last pass + satisfiers = [item for item in vpkg_cache[current] + if item in packages] + if satisfiers: + self.logger.debug("Packages: requirement %s satisfied by " + "%s" % (current, satisfiers)) + satisfied_both.add(current) + elif current in packagelist or final_pass: + pkgs.add(current) + satisfied_both.add(current) + both.difference_update(satisfied_both) + + if len(unclassified) + len(pkgs) == 0: + final_pass = True + else: + final_pass = False + + self.filter_unknown(unknown) + + return packages, unknown + + def __len__(self): + return len(self.sources) + + def __getitem__(self, item): + return self.sources[item] + + def __setitem__(self, item, value): + self.sources[item] = value + + def __delitem__(self, item): + del self.sources[item] + + def append(self, item): + self.sources.append(item) + + def count(self): + return self.sources.count() + + def index(self, item): + return self.sources.index(item) + + def extend(self, items): + self.sources.extend(items) + + def insert(self, index, item): + self.sources.insert(index, item) + + def pop(self, index=None): + self.sources.pop(index) + + def remove(self, item): + self.sources.remove(item) + + def reverse(self): + self.sources.reverse() + + def sort(self, cmp=None, key=None, reverse=False): + self.sources.sort(cmp, key, reverse) + +def clear_cache(): + global _collections + _collections = dict() + +def factory(metadata, sources, basepath): + global _collections + + if not sources.loaded: + # if sources.xml has not received a FAM event yet, defer; + # instantiate a dummy Collection object, but do not cache it + # in _collections + return Collection(metadata, [], basepath) + + sclasses = set() + relevant = list() + + for source in sources: + if source.applies(metadata): + relevant.append(source) + sclasses.update([source.__class__]) + + # _collections is a cache dict of Collection objects that is keyed + # off of the set of source urls that apply to each Collection + ckeydata = set() + for source in relevant: + ckeydata.update(source.urls) + ckey = tuple(sorted(list(ckeydata))) + if ckey not in _collections: + if len(sclasses) > 1: + logger.warning("Multiple source types found for %s: %s" % + ",".join([s.__name__ for s in sclasses])) + cclass = Collection + elif len(sclasses) == 0: + logger.warning("No sources found for %s" % metadata.hostname) + cclass = Collection + else: + stype = sclasses.pop().__name__.replace("Source", "") + try: + module = \ + getattr(__import__("Bcfg2.Server.Plugins.Packages.%s" % + stype.title()).Server.Plugins.Packages, + stype.title()) + cclass = getattr(module, "%sCollection" % stype.title()) + except ImportError: + logger.error("Unknown source type %s" % stype) + except AttributeError: + logger.warning("No collection class found for %s sources" % + stype) + + logger.debug("Using %s for Collection of sources for %s" % + (cclass.__name__, metadata.hostname)) + + collection = cclass(metadata, relevant, basepath) + # reverse so that file order determines precedence + collection.reverse() + _collections[ckey] = collection + return _collections[ckey] diff --git a/src/lib/Server/Plugins/Packages/Pac.py b/src/lib/Server/Plugins/Packages/Pac.py new file mode 100644 index 000000000..8b75c1e1d --- /dev/null +++ b/src/lib/Server/Plugins/Packages/Pac.py @@ -0,0 +1,122 @@ +import gzip +import tarfile +import logging +from Bcfg2.Bcfg2Py3k import cPickle, file +from Bcfg2.Server.Plugins.Packages.Collection import Collection +from Bcfg2.Server.Plugins.Packages.Source import Source + +logger = logging.getLogger("Packages") + +class PacCollection(Collection): + def get_group(self, group): + self.logger.warning("Package groups are not supported by APT") + return [] + +class PacSource(Source): + basegroups = ['arch', 'parabola'] + ptype = 'pacman' + + def __init__(self, basepath, xsource, config): + Source.__init__(self, basepath, xsource, config) + self.pkgnames = set() + + self.url_map = [{'rawurl': self.rawurl, 'url': self.url, + 'version': self.version, + 'components': self.components, 'arches': self.arches}] + + def save_state(self): + cache = file(self.cachefile, 'wb') + cPickle.dump((self.pkgnames, self.deps, self.provides), + cache, 2) + cache.close() + + def load_state(self): + data = file(self.cachefile) + self.pkgnames, self.deps, self.provides = cPickle.load(data) + + def filter_unknown(self, unknown): + filtered = set([u for u in unknown if u.startswith('choice')]) + unknown.difference_update(filtered) + + def get_urls(self): + if not self.rawurl: + rv = [] + for part in self.components: + for arch in self.arches: + rv.append("%s%s/os/%s/%s.db.tar.gz" % + (self.url, part, arch, part)) + return rv + else: + raise Exception("PacSource : RAWUrl not supported (yet)") + urls = property(get_urls) + + def read_files(self): + bdeps = dict() + bprov = dict() + + if self.recommended: + depfnames = ['Depends', 'Pre-Depends', 'Recommends'] + else: + depfnames = ['Depends', 'Pre-Depends'] + + for fname in self.files: + if not self.rawurl: + barch = [x for x in fname.split('@') if x in self.arches][0] + else: + # RawURL entries assume that they only have one + # element and that it is the architecture of the source. + barch = self.arches[0] + + if barch not in bdeps: + bdeps[barch] = dict() + bprov[barch] = dict() + try: + print("try to read : " + fname) + tar = tarfile.open(fname, "r") + reader = gzip.GzipFile(fname) + except: + print("Failed to read file %s" % fname) + raise + + for tarinfo in tar: + if tarinfo.isdir(): + self.pkgnames.add(tarinfo.name.rsplit("-", 2)[0]) + print("added : " + tarinfo.name.rsplit("-", 2)[0]) + tar.close() + + self.deps['global'] = dict() + self.provides['global'] = dict() + for barch in bdeps: + self.deps[barch] = dict() + self.provides[barch] = dict() + for pkgname in self.pkgnames: + pset = set() + for barch in bdeps: + if pkgname not in bdeps[barch]: + bdeps[barch][pkgname] = [] + pset.add(tuple(bdeps[barch][pkgname])) + if len(pset) == 1: + self.deps['global'][pkgname] = pset.pop() + else: + for barch in bdeps: + self.deps[barch][pkgname] = bdeps[barch][pkgname] + provided = set() + for bprovided in list(bprov.values()): + provided.update(set(bprovided)) + for prov in provided: + prset = set() + for barch in bprov: + if prov not in bprov[barch]: + continue + prset.add(tuple(bprov[barch].get(prov, ()))) + if len(prset) == 1: + self.provides['global'][prov] = prset.pop() + else: + for barch in bprov: + self.provides[barch][prov] = bprov[barch].get(prov, ()) + self.save_state() + + def is_package(self, _, pkg): + return (pkg in self.pkgnames and + pkg not in self.blacklist and + (len(self.whitelist) == 0 or pkg in self.whitelist)) diff --git a/src/lib/Server/Plugins/Packages/PackagesConfig.py b/src/lib/Server/Plugins/Packages/PackagesConfig.py new file mode 100644 index 000000000..1bb250007 --- /dev/null +++ b/src/lib/Server/Plugins/Packages/PackagesConfig.py @@ -0,0 +1,28 @@ +import os +import logging +from Bcfg2.Bcfg2Py3k import ConfigParser +from Bcfg2.Server.Plugins.Packages import * + +logger = logging.getLogger('Packages') + +class PackagesConfig(Bcfg2.Server.Plugin.FileBacked, + ConfigParser.SafeConfigParser): + def __init__(self, filename, fam, packages): + Bcfg2.Server.Plugin.FileBacked.__init__(self, filename) + ConfigParser.SafeConfigParser.__init__(self) + + self.fam = fam + # packages.conf isn't strictly necessary, so only set a + # monitor if it exists. if it gets added, that will require a + # server restart + if os.path.exists(self.name): + self.fam.AddMonitor(self.name, self) + + self.pkg_obj = packages + + def Index(self): + """ Build local data structures """ + for section in self.sections(): + self.remove_section(section) + self.read(self.name) + self.pkg_obj.Reload() diff --git a/src/lib/Server/Plugins/Packages/PackagesSources.py b/src/lib/Server/Plugins/Packages/PackagesSources.py new file mode 100644 index 000000000..5f82deb1f --- /dev/null +++ b/src/lib/Server/Plugins/Packages/PackagesSources.py @@ -0,0 +1,66 @@ +import os +import sys +import lxml.etree +import logging +import Bcfg2.Server.Plugin + +logger = logging.getLogger("Packages") + + +class PackagesSources(Bcfg2.Server.Plugin.SingleXMLFileBacked, + Bcfg2.Server.Plugin.StructFile): + __identifier__ = None + + def __init__(self, filename, cachepath, fam, packages, config): + try: + Bcfg2.Server.Plugin.SingleXMLFileBacked.__init__(self, + filename, + fam) + except OSError: + err = sys.exc_info()[1] + msg = "Packages: Failed to read configuration file: %s" % err + if not os.path.exists(self.name): + msg += " Have you created it?" + logger.error(msg) + raise Bcfg2.Server.Plugin.PluginInitError(msg) + Bcfg2.Server.Plugin.StructFile.__init__(self, filename) + self.cachepath = cachepath + self.config = config + if not os.path.exists(self.cachepath): + # create cache directory if needed + os.makedirs(self.cachepath) + self.pkg_obj = packages + self.loaded = False + + def Index(self): + Bcfg2.Server.Plugin.SingleXMLFileBacked.Index(self) + self.entries = [] + for xsource in self.xdata.findall('.//Source'): + source = self.source_from_xml(xsource) + if source is not None: + self.entries.append(source) + + self.pkg_obj.Reload() + self.loaded = True + + def source_from_xml(self, xsource): + """ create a *Source object from its XML representation in + sources.xml """ + stype = xsource.get("type") + if stype is None: + logger.error("No type specified for source, skipping") + return None + + try: + module = getattr(__import__("Bcfg2.Server.Plugins.Packages.%s" % + stype.title()).Server.Plugins.Packages, + stype.title()) + cls = getattr(module, "%sSource" % stype.title()) + except (ImportError, AttributeError): + logger.error("Unknown source type %s" % stype) + return None + + return cls(self.cachepath, xsource, self.config) + + def __getitem__(self, key): + return self.entries[key] diff --git a/src/lib/Server/Plugins/Packages/Source.py b/src/lib/Server/Plugins/Packages/Source.py new file mode 100644 index 000000000..255f3ea7a --- /dev/null +++ b/src/lib/Server/Plugins/Packages/Source.py @@ -0,0 +1,262 @@ +import os +import re +import sys +import base64 +import logging +from Bcfg2.Bcfg2Py3k import HTTPError, HTTPBasicAuthHandler, \ + HTTPPasswordMgrWithDefaultRealm, install_opener, build_opener, \ + urlopen, file, cPickle + +try: + from hashlib import md5 +except ImportError: + import md5 + +logger = logging.getLogger('Packages') + +def fetch_url(url): + if '@' in url: + mobj = re.match('(\w+://)([^:]+):([^@]+)@(.*)$', url) + if not mobj: + raise ValueError + user = mobj.group(2) + passwd = mobj.group(3) + url = mobj.group(1) + mobj.group(4) + auth = HTTPBasicAuthHandler(HTTPPasswordMgrWithDefaultRealm()) + auth.add_password(None, url, user, passwd) + install_opener(build_opener(auth)) + return urlopen(url).read() + + +class Source(object): + reponame_re = re.compile(r'.*/(?:RPMS\.)?([^/]+)') + basegroups = [] + + def __init__(self, basepath, xsource, config): + self.basepath = basepath + self.xsource = xsource + self.config = config + + try: + self.version = xsource.find('Version').text + except AttributeError: + pass + + for key, tag in [('components', 'Component'), ('arches', 'Arch'), + ('blacklist', 'Blacklist'), + ('whitelist', 'Whitelist')]: + self.__dict__[key] = [item.text for item in xsource.findall(tag)] + + self.gpgkeys = [el.text for el in xsource.findall("GPGKey")] + + self.recommended = xsource.get('recommended', 'false').lower() == 'true' + + self.rawurl = xsource.get('rawurl', '') + if self.rawurl and not self.rawurl.endswith("/"): + self.rawurl += "/" + self.url = xsource.get('url', '') + if self.url and not self.url.endswith("/"): + self.url += "/" + self.version = xsource.get('version', '') + + # build the set of conditions to see if this source applies to + # a given set of metadata + self.conditions = [] + self.groups = [] # provided for some limited backwards compat + for el in xsource.iterancestors(): + if el.tag == "Group": + if el.get("negate", "false").lower() == "true": + self.conditions.append(lambda m, el=el: + el.get("name") not in m.groups) + else: + self.groups.append(el.get("name")) + self.conditions.append(lambda m, el=el: + el.get("name") in m.groups) + elif el.tag == "Client": + if el.get("negate", "false").lower() == "true": + self.conditions.append(lambda m, el=el: + el.get("name") != m.hostname) + else: + self.conditions.append(lambda m, el=el: + el.get("name") == m.hostname) + + self.deps = dict() + self.provides = dict() + + self.cachefile = os.path.join(self.basepath, + "cache-%s" % self.cachekey) + self.url_map = [] + + @property + def cachekey(self): + return md5(cPickle.dumps([self.version, self.components, self.url, + self.rawurl, self.arches])).hexdigest() + + def get_relevant_groups(self, metadata): + return sorted(list(set([g for g in metadata.groups + if (g in self.basegroups or + g in self.groups or + g in self.arches)]))) + + def load_state(self): + pass + + def setup_data(self, force_update=False): + should_read = True + should_download = False + if os.path.exists(self.cachefile): + try: + self.load_state() + should_read = False + except: + logger.error("Cachefile %s load failed; " + "falling back to file read" % self.cachefile) + if should_read: + try: + self.read_files() + except: + logger.error("Packages: File read failed; " + "falling back to file download") + should_download = True + + if should_download or force_update: + try: + self.update() + self.read_files() + except: + logger.error("Failed to update source", exc_info=1) + + def get_repo_name(self, url_map): + # try to find a sensible name for a repo + match = self.reponame_re.search(url_map['url']) + if url_map['component']: + return url_map['component'] + elif match: + return match.group(1) + else: + # couldn't figure out the name from the URL or URL map + # (which probably means its a screwy URL), so we just + # generate a random one + name = base64.b64encode(os.urandom(16))[:-2] + return "%s-%s" % (self.groups[0], name) + + def __str__(self): + if self.rawurl: + return "%s at %s" % (self.__class__.__name__, self.rawurl) + elif self.url: + return "%s at %s" % (self.__class__.__name__, self.url) + else: + return self.__class__.__name__ + + def get_urls(self): + return [] + urls = property(get_urls) + + def get_files(self): + return [self.escape_url(url) for url in self.urls] + files = property(get_files) + + def get_vpkgs(self, metadata): + agroups = ['global'] + [a for a in self.arches + if a in metadata.groups] + vdict = dict() + for agrp in agroups: + for key, value in list(self.provides[agrp].items()): + if key not in vdict: + vdict[key] = set(value) + else: + vdict[key].update(value) + return vdict + + def is_virtual_package(self, metadata, package): + """ called to determine if a package is a virtual package. + this is only invoked if the package is not listed in the dict + returned by get_vpkgs """ + return False + + def escape_url(self, url): + return os.path.join(self.basepath, url.replace('/', '@')) + + def file_init(self): + pass + + def read_files(self): + pass + + def filter_unknown(self, unknown): + pass + + def update(self): + for url in self.urls: + logger.info("Packages: Updating %s" % url) + fname = self.escape_url(url) + try: + data = fetch_url(url) + except ValueError: + logger.error("Packages: Bad url string %s" % url) + continue + except HTTPError: + err = sys.exc_info()[1] + logger.error("Packages: Failed to fetch url %s. code=%s" % + (url, err.code)) + continue + file(fname, 'w').write(data) + + def applies(self, metadata): + # check base groups + if not self.magic_groups_match(metadata): + return False + + # check Group/Client tags from sources.xml + for condition in self.conditions: + if not condition(metadata): + return False + + return True + + def get_arches(self, metadata): + return ['global'] + [a for a in self.arches if a in metadata.groups] + + def get_deps(self, metadata, pkgname): + for arch in self.get_arches(metadata): + if pkgname in self.deps[arch]: + return self.deps[arch][pkgname] + return [] + + def get_provides(self, metadata, required): + for arch in self.get_arches(metadata): + if required in self.provides[arch]: + return self.provides[arch][required] + return [] + + def is_package(self, metadata, _): + return False + + def get_package(self, metadata, package): + return package + + def get_group(self, metadata, package): + return [] + + def magic_groups_match(self, metadata): + """ check to see if this source applies to the given host + metadata by checking 'magic' (base) groups only, or if magic + groups are off """ + # we always check that arch matches + found_arch = False + for arch in self.arches: + if arch in metadata.groups: + found_arch = True + break + if not found_arch: + return False + + if (self.config.has_section("global") and + self.config.has_option("global", "magic_groups") and + self.config.getboolean("global", "magic_groups") == False): + return True + else: + for group in self.basegroups: + if group in metadata.groups: + return True + return False diff --git a/src/lib/Server/Plugins/Packages/Yum.py b/src/lib/Server/Plugins/Packages/Yum.py new file mode 100644 index 000000000..ae1fcd956 --- /dev/null +++ b/src/lib/Server/Plugins/Packages/Yum.py @@ -0,0 +1,950 @@ +import os +import sys +import time +import copy +import glob +import socket +import random +import logging +import threading +import lxml.etree +from UserDict import DictMixin +import Bcfg2.Server.Plugin +from Bcfg2.Bcfg2Py3k import StringIO, cPickle, HTTPError, ConfigParser, file +from Bcfg2.Server.Plugins.Packages.Collection import Collection +from Bcfg2.Server.Plugins.Packages.Source import Source, fetch_url + +logger = logging.getLogger("Packages") + +try: + from pulp.client.consumer.config import ConsumerConfig + from pulp.client.api.repository import RepositoryAPI + from pulp.client.api.consumer import ConsumerAPI + from pulp.client.api import server + has_pulp = True +except ImportError: + has_pulp = False + +try: + import yum + has_yum = True +except ImportError: + has_yum = False + logger.info("No yum libraries found; forcing use of internal dependency " + "resolver") + +XP = '{http://linux.duke.edu/metadata/common}' +RP = '{http://linux.duke.edu/metadata/rpm}' +RPO = '{http://linux.duke.edu/metadata/repo}' +FL = '{http://linux.duke.edu/metadata/filelists}' + +PULPSERVER = None +PULPCONFIG = None + +def _setup_pulp(config): + global PULPSERVER, PULPCONFIG + if not has_pulp: + logger.error("Cannot create Pulp collection: Pulp libraries not " + "found") + raise Bcfg2.Server.Plugin.PluginInitError + + if PULPSERVER is None: + try: + username = config.get("pulp", "username") + password = config.get("pulp", "password") + except ConfigParser.NoSectionError: + logger.error("No [pulp] section found in Packages/packages.conf") + raise Bcfg2.Server.Plugin.PluginInitError + except ConfigParser.NoOptionError: + err = sys.exc_info()[1] + logger.error("Required option not found in " + "Packages/packages.conf: %s" % err) + raise Bcfg2.Server.Plugin.PluginInitError + + PULPCONFIG = ConsumerConfig() + serveropts = PULPCONFIG.server + + PULPSERVER = server.PulpServer(serveropts['host'], + int(serveropts['port']), + serveropts['scheme'], + serveropts['path']) + PULPSERVER.set_basic_auth_credentials(username, password) + server.set_active_server(PULPSERVER) + return PULPSERVER + + +class CacheItem(object): + def __init__(self, value, expiration=None): + self.value = value + if expiration: + self.expiration = time.time() + expiration + + def expired(self): + if self.expiration: + return time.time() > self.expiration + else: + return False + + +class Cache(DictMixin): + def __init__(self, expiration=None, tidy=None): + """ params: + - expiration: How many seconds a cache entry stays alive for. + Specify None for no expiration. + - tidy: How frequently to tidy the cache (remove all expired + entries). Without this, entries are only expired as they + are accessed. Cache will be tidied once per every + accesses to cache data; a sensible value might be, e.g., + 10000. Specify 0 to fully tidy the cache every access; this + makes the cache much slower, but also smaller in memory. + Specify None to never tidy the cache; this makes the cache + faster, but potentially much larger in memory, especially if + cache items are accessed infrequently.""" + self.cache = dict() + self.expiration = expiration + self.tidy = tidy + self.access_count = 0 + + def __getitem__(self, key): + self._expire(key) + if key in self.cache: + return self.cache[key].value + else: + raise KeyError(key) + + def __setitem__(self, key, value): + self.cache[key] = CacheItem(value, self.expiration) + + def __delitem__(self, key): + del self.cache[key] + + def __contains__(self, key): + self.expire(key) + return key in self.cache + + def keys(self): + return self.cache.keys() + + def __iter__(self): + for k in self.cache.keys(): + try: + yield k + except KeyError: + pass + + def iteritems(self): + for k in self: + try: + yield (k, self[k]) + except KeyError: + pass + + def _expire(self, *args): + if args: + self.access_count += 1 + if self.access_count >= self.tidy: + self.access_count = 0 + candidates = self.cache.items() + else: + candidates = [(k, self.cache[k]) for k in args] + else: + candidates = self.cache.items() + + expire = [] + for key, item in candidates: + if item.expired(): + expire.append(key) + for key in expire: + del self.cache[key] + + def clear(self): + self.cache = dict() + + +class YumCollection(Collection): + def __init__(self, metadata, sources, basepath): + Collection.__init__(self, metadata, sources, basepath) + self.keypath = os.path.join(self.basepath, "keys") + + if len(sources): + config = sources[0].config + self.use_yum = has_yum + try: + self.use_yum &= config.getboolean("yum", "use_yum_libraries") + except (ConfigParser.NoOptionError, ConfigParser.NoSectionError): + self.use_yum = False + else: + self.use_yum = False + + if self.use_yum: + self._yb = None + self.cachefile = os.path.join(self.cachepath, + "cache-%s" % self.cachekey) + if not os.path.exists(self.cachefile): + os.mkdir(self.cachefile) + + self.configdir = os.path.join(self.basepath, "yum") + if not os.path.exists(self.configdir): + os.mkdir(self.configdir) + self.cfgfile = os.path.join(self.configdir, + "%s-yum.conf" % self.cachekey) + if self.config.has_option("yum", "metadata_expire"): + cache_expire = self.config.getint("yum", "metadata_expire") + else: + cache_expire = 21600 + + self.pkgs_cache = Cache(expiration=cache_expire) + self.deps_cache = Cache(expiration=cache_expire) + self.vpkgs_cache = Cache(expiration=cache_expire) + self.group_cache = Cache(expiration=cache_expire) + self.pkgset_cache = Cache(expiration=cache_expire) + + if has_pulp: + _setup_pulp(self.config) + + @property + def yumbase(self): + """ if we try to access a Yum SQLitePackageSack object in a + different thread from the one it was created in, we get a + nasty error. but I can't find a way to detect when a new + thread is started (which happens for every new client + connection, I think), so this property creates a new YumBase + object if the old YumBase object was created in a different + thread than the current one. (I definitely don't want to + create a new YumBase object every time it's used, because that + involves writing a temp file, at least for now.) """ + if not self.use_yum: + self._yb = None + self._yb_thread = None + elif (self._yb is None or + self._yb_thread != threading.current_thread().ident): + self._yb = yum.YumBase() + self._yb_thread = threading.current_thread().ident + + if not os.path.exists(self.cfgfile): + # todo: detect yum version. Supposedly very new + # versions of yum have better support for + # reconfiguring on the fly using the RepoStorage API + yumconf = self.get_config(raw=True) + yumconf.add_section("main") + + mainopts = dict(cachedir=self.cachefile, + keepcache="0", + sslverify="0", + reposdir="/dev/null") + try: + for opt in self.config.options("yum"): + if opt != "use_yum_libraries": + mainopts[opt] = self.config.get("yum", opt) + except ConfigParser.NoSectionError: + pass + + for opt, val in list(mainopts.items()): + yumconf.set("main", opt, val) + + yumconf.write(open(self.cfgfile, 'w')) + + # it'd be nice if we could change this to be more verbose + # if -v was given, but Collection objects don't get setup. + # It'd also be nice if we could tell yum to log to syslog, + # but so would a unicorn. + self._yb.preconf.debuglevel = 1 + self._yb.preconf.fn = self.cfgfile + return self._yb + + def get_config(self, raw=False): + config = ConfigParser.SafeConfigParser() + for source in self.sources: + # get_urls() loads url_map as a side-effect + source.get_urls() + for url_map in source.url_map: + if url_map['arch'] in self.metadata.groups: + reponame = source.get_repo_name(url_map) + config.add_section(reponame) + config.set(reponame, "name", reponame) + config.set(reponame, "baseurl", url_map['url']) + config.set(reponame, "enabled", "1") + if len(source.gpgkeys): + config.set(reponame, "gpgcheck", "1") + config.set(reponame, "gpgkey", + " ".join(source.gpgkeys)) + else: + config.set(reponame, "gpgcheck", "0") + + if len(source.blacklist): + config.set(reponame, "exclude", + " ".join(source.blacklist)) + if len(source.whitelist): + config.set(reponame, "includepkgs", + " ".join(source.whitelist)) + + if raw: + return config + else: + # configparser only writes to file, so we have to use a + # StringIO object to get the data out as a string + buf = StringIO() + config.write(buf) + return "# This config was generated automatically by the Bcfg2 " \ + "Packages plugin\n\n" + buf.getvalue() + + def build_extra_structures(self, independent): + """ build list of gpg keys to be added to the specification by + validate_structures() """ + needkeys = set() + for source in self.sources: + for key in source.gpgkeys: + needkeys.add(key) + + if len(needkeys): + keypkg = lxml.etree.Element('BoundPackage', name="gpg-pubkey", + type=self.ptype, origin='Packages') + + for key in needkeys: + # figure out the path of the key on the client + try: + keydir = self.config.get("global", "gpg_keypath") + except (ConfigParser.NoOptionError, + ConfigParser.NoSectionError): + keydir = "/etc/pki/rpm-gpg" + remotekey = os.path.join(keydir, os.path.basename(key)) + localkey = os.path.join(self.keypath, os.path.basename(key)) + kdata = open(localkey).read() + + # copy the key to the client + keypath = lxml.etree.Element("BoundPath", name=remotekey, + encoding='ascii', + owner='root', group='root', + type='file', perms='0644', + important='true') + keypath.text = kdata + + # hook to add version/release info if possible + self._add_gpg_instances(keypkg, kdata, localkey, remotekey) + independent.append(keypath) + independent.append(keypkg) + + # see if there are any pulp sources to handle + has_pulp_sources = False + for source in self.sources: + if source.pulp_id: + has_pulp_sources = True + break + + if has_pulp_sources: + consumerapi = ConsumerAPI() + consumer = self._get_pulp_consumer(consumerapi=consumerapi) + if consumer is None: + consumer = consumerapi.create(self.metadata.hostname, + self.metadata.hostname) + lxml.etree.SubElement(independent, "BoundAction", + name="pulp-update", timing="pre", + when="always", status="check", + command="pulp-consumer consumer update") + + for source in self.sources: + # each pulp source can only have one arch, so we don't + # have to check the arch in url_map + if (source.pulp_id and + source.pulp_id not in consumer['repoids']): + consumerapi.bind(self.metadata.hostname, source.pulp_id) + + crt = lxml.etree.SubElement(independent, "BoundPath", + name="/etc/pki/consumer/cert.pem", + type="file", owner="root", + group="root", perms="0644") + crt.text = consumerapi.certificate(self.metadata.hostname) + + def _get_pulp_consumer(self, consumerapi=None): + if consumerapi is None: + consumerapi = ConsumerAPI() + consumer = None + try: + consumer = consumerapi.consumer(self.metadata.hostname) + except server.ServerRequestError: + # consumer does not exist + pass + except socket.error: + err = sys.exc_info()[1] + logger.error("Could not contact Pulp server: %s" % err) + except: + err = sys.exc_info()[1] + logger.error("Unknown error querying Pulp server: %s" % err) + return consumer + + def _add_gpg_instances(self, keyentry, keydata, localkey, remotekey): + """ add gpg keys to the specification to ensure they get + installed """ + if self.use_yum: + try: + kinfo = yum.misc.getgpgkeyinfo(keydata) + version = yum.misc.keyIdToRPMVer(kinfo['keyid']) + release = yum.misc.keyIdToRPMVer(kinfo['timestamp']) + + lxml.etree.SubElement(keyentry, 'Instance', + version=version, + release=release, + simplefile=remotekey) + except ValueError: + err = sys.exc_info()[1] + self.logger.error("Could not read GPG key %s: %s" % + (localkey, err)) + + def is_package(self, package): + if not self.use_yum: + return Collection.is_package(self, package) + + if isinstance(package, tuple): + if package[1] is None and package[2] == (None, None, None): + package = package[0] + else: + return None + + try: + return self.pkgs_cache[package] + except KeyError: + pass + + self.pkgs_cache[package] = bool(self.get_package_object(package, + silent=True)) + return self.pkgs_cache[package] + + def is_virtual_package(self, package): + if self.use_yum: + try: + return bool(self.vpkgs_cache[package]) + except KeyError: + return bool(self.get_provides(package, silent=True)) + else: + return Collection.is_virtual_package(self, package) + + def get_package_object(self, package, silent=False): + """ package objects cannot be cached since they are sqlite + objects, so they can't be reused between threads. """ + try: + matches = self.yumbase.pkgSack.returnNewestByName(name=package) + except yum.Errors.PackageSackError: + if not silent: + self.logger.warning("Packages: Package '%s' not found" % + self.get_package_name(package)) + matches = [] + except yum.Errors.RepoError: + err = sys.exc_info()[1] + self.logger.error("Packages: Temporary failure loading metadata " + "for '%s': %s" % + (self.get_package_name(package), err)) + matches = [] + + pkgs = self._filter_arch(matches) + if pkgs: + return pkgs[0] + else: + return None + + def get_deps(self, package): + if not self.use_yum: + return Collection.get_deps(self, package) + + try: + return self.deps_cache[package] + except KeyError: + pass + + pkg = self.get_package_object(package) + deps = [] + if pkg: + deps = set(pkg.requires) + # filter out things the package itself provides + deps.difference_update([dep for dep in deps + if pkg.checkPrco('provides', dep)]) + else: + self.logger.error("Packages: No package available: %s" % + self.get_package_name(package)) + self.deps_cache[package] = deps + return self.deps_cache[package] + + def get_provides(self, required, all=False, silent=False): + if not self.use_yum: + return Collection.get_provides(self, package) + + if not isinstance(required, tuple): + required = (required, None, (None, None, None)) + + try: + return self.vpkgs_cache[required] + except KeyError: + pass + + try: + prov = \ + self.yumbase.whatProvides(*required).returnNewestByNameArch() + except yum.Errors.NoMoreMirrorsRepoError: + err = sys.exc_info()[1] + self.logger.error("Packages: Temporary failure loading metadata " + "for '%s': %s" % + (self.get_package_name(required), + err)) + self.vpkgs_cache[required] = None + return [] + + if prov and not all: + prov = self._filter_provides(required, prov) + elif not prov and not silent: + self.logger.error("Packages: No package provides %s" % + self.get_package_name(required)) + self.vpkgs_cache[required] = prov + return self.vpkgs_cache[required] + + def get_group(self, group): + if not self.use_yum: + self.logger.warning("Package groups are not supported by Bcfg2's " + "internal Yum dependency generator") + return [] + + if group.startswith("@"): + group = group[1:] + + try: + return self.groups_cache[group] + except KeyError: + pass + + try: + if self.yumbase.comps.has_group(group): + pkgs = self.yumbase.comps.return_group(group).packages + else: + self.logger.warning("Packages: '%s' is not a valid group" % + group) + pkgs = [] + except yum.Errors.GroupsError: + err = sys.exc_info()[1] + self.logger.warning("Packages: %s" % err) + pkgs = [] + + self.groups_cache[group] = pkgs + return self.groups_cache[group] + + def _filter_provides(self, package, providers): + providers = [pkg for pkg in self._filter_arch(providers)] + if len(providers) > 1: + # go through each provider and make sure it's the newest + # package of its name available. If we have multiple + # providers, avoid installing old packages. + # + # For instance: on Fedora 14, + # perl-Sub-WrapPackages-2.0-2.fc14 erroneously provided + # perl(lib), which should not have been provided; + # perl(lib) is provided by the "perl" package. The bogus + # provide was removed in perl-Sub-WrapPackages-2.0-4.fc14, + # but if we just queried to resolve the "perl(lib)" + # dependency, we'd get both packages. By performing this + # check, we learn that there's a newer + # perl-Sub-WrapPackages available, so it can't be the best + # provider of perl(lib). + rv = [] + for pkg in providers: + if self.get_package_object(pkg.name) == pkg: + rv.append(pkg) + else: + rv = providers + return [p.name for p in rv] + + def _filter_arch(self, packages): + groups = set(list(self.get_relevant_groups()) + ["noarch"]) + matching = [pkg for pkg in packages if pkg.arch in groups] + if matching: + return matching + else: + # no packages match architecture; we'll assume that the + # user knows what s/he is doing and this is a multiarch + # box. + return packages + + def get_package_name(self, package): + """ get the name of a package or virtual package from the + internal representation used by this Collection class """ + if self.use_yum and isinstance(package, tuple): + return yum.misc.prco_tuple_to_string(package) + else: + return str(package) + + def complete(self, packagelist): + if not self.use_yum: + return Collection.complete(self, packagelist) + + cachekey = cPickle.dumps(sorted(packagelist)) + try: + return self.pkgset_cache[cachekey] + except KeyError: pass + + packages = set() + pkgs = set(packagelist) + requires = set() + satisfied = set() + unknown = set() + final_pass = False + + while requires or pkgs: + # infinite loop protection + start_reqs = len(requires) + + while pkgs: + package = pkgs.pop() + if package in packages: + continue + + if not self.is_package(package): + # try this package out as a requirement + requires.add((package, None, (None, None, None))) + continue + + packages.add(package) + reqs = set(self.get_deps(package)).difference(satisfied) + if reqs: + requires.update(reqs) + + reqs_satisfied = set() + for req in requires: + if req in satisfied: + reqs_satisfied.add(req) + continue + + if req[1] is None and self.is_package(req[0]): + if req[0] not in packages: + pkgs.add(req[0]) + reqs_satisfied.add(req) + continue + + self.logger.debug("Packages: Handling requirement '%s'" % + self.get_package_name(req)) + providers = list(set(self.get_provides(req))) + if len(providers) > 1: + # hopefully one of the providing packages is already + # included + best = [p for p in providers if p in packages] + if best: + providers = best + else: + # pick a provider whose name matches the requirement + best = [p for p in providers if p == req[0]] + if len(best) == 1: + providers = best + elif not final_pass: + # found no "best" package, so defer + providers = None + # else: found no "best" package, but it's the + # final pass, so include them all + + if providers: + self.logger.debug("Packages: Requirement '%s' satisfied " + "by %s" % + (self.get_package_name(req), + ",".join([self.get_package_name(p) + for p in providers]))) + newpkgs = set(providers).difference(packages) + if newpkgs: + for package in newpkgs: + if self.is_package(package): + pkgs.add(package) + else: + unknown.add(package) + reqs_satisfied.add(req) + elif providers is not None: + # nothing provided this requirement at all + unknown.add(req) + reqs_satisfied.add(req) + # else, defer + requires.difference_update(reqs_satisfied) + + # infinite loop protection + if len(requires) == start_reqs and len(pkgs) == 0: + final_pass = True + + if final_pass and requires: + unknown.update(requires) + requires = set() + + self.filter_unknown(unknown) + unknown = [self.get_package_name(p) for p in unknown] + + self.pkgset_cache[cachekey] = (packages, unknown) + + return packages, unknown + + def setup_data(self, force_update=False): + if not self.use_yum: + return Collection.setup_data(self, force_update) + + for cfile in glob.glob(os.path.join(self.configdir, "*-yum.conf")): + os.unlink(cfile) + self._yb = None + + self.pkgs_cache.clear() + self.deps_cache.clear() + self.vpkgs_cache.clear() + self.group_cache.clear() + self.pkgset_cache.clear() + + if force_update: + for mdtype in ["Headers", "Packages", "Sqlite", "Metadata", + "ExpireCache"]: + # for reasons that are entirely obvious, all of the + # yum API clean* methods return a tuple of 0 (zero, + # always zero) and a list containing a single message + # about how many files were deleted. so useful. + # thanks, yum. + self.logger.info("Packages: %s" % + getattr(self.yumbase, + "clean%s" % mdtype)()[1][0]) + + +class YumSource(Source): + basegroups = ['yum', 'redhat', 'centos', 'fedora'] + ptype = 'yum' + + def __init__(self, basepath, xsource, config): + Source.__init__(self, basepath, xsource, config) + self.pulp_id = None + if has_pulp and xsource.get("pulp_id"): + self.pulp_id = xsource.get("pulp_id") + + _setup_pulp(self.config) + repoapi = RepositoryAPI() + try: + self.repo = repoapi.repository(self.pulp_id) + self.gpgkeys = ["%s/%s" % (PULPCONFIG.cds['keyurl'], key) + for key in repoapi.listkeys(self.pulp_id)] + except server.ServerRequestError: + err = sys.exc_info()[1] + if err[0] == 401: + msg = "Error authenticating to Pulp: %s" % err[1] + elif err[0] == 404: + msg = "Pulp repo id %s not found: %s" % (self.pulp_id, + err[1]) + else: + msg = "Error %d fetching pulp repo %s: %s" % (err[0], + self.pulp_id, + err[1]) + logger.error(msg) + raise Bcfg2.Server.Plugin.PluginInitError + except socket.error: + err = sys.exc_info()[1] + logger.error("Could not contact Pulp server: %s" % err) + raise Bcfg2.Server.Plugin.PluginInitError + except: + err = sys.exc_info()[1] + logger.error("Unknown error querying Pulp server: %s" % err) + raise Bcfg2.Server.Plugin.PluginInitError + self.rawurl = "%s/%s" % (PULPCONFIG.cds['baseurl'], + self.repo['relative_path']) + self.arches = [self.repo['arch']] + + if not self.rawurl: + self.baseurl = self.url + "%(version)s/%(component)s/%(arch)s/" + else: + self.baseurl = self.rawurl + self.packages = dict() + self.deps = dict([('global', dict())]) + self.provides = dict([('global', dict())]) + self.filemap = dict([(x, dict()) + for x in ['global'] + self.arches]) + self.needed_paths = set() + self.file_to_arch = dict() + + self.use_yum = has_yum + try: + self.use_yum &= config.getboolean("yum", "use_yum_libraries") + except (ConfigParser.NoOptionError, ConfigParser.NoSectionError): + self.use_yum = False + + def save_state(self): + if not self.use_yum: + cache = file(self.cachefile, 'wb') + cPickle.dump((self.packages, self.deps, self.provides, + self.filemap, self.url_map), cache, 2) + cache.close() + + + def load_state(self): + if not self.use_yum: + data = file(self.cachefile) + (self.packages, self.deps, self.provides, + self.filemap, self.url_map) = cPickle.load(data) + + def get_urls(self): + surls = list() + self.url_map = [] + for arch in self.arches: + if self.url: + usettings = [{'version':self.version, 'component':comp, + 'arch':arch} + for comp in self.components] + else: # rawurl given + usettings = [{'version':self.version, 'component':None, + 'arch':arch}] + + for setting in usettings: + setting['url'] = self.baseurl % setting + self.url_map.append(copy.deepcopy(setting)) + surls.append((arch, [setting['url'] for setting in usettings])) + urls = [] + for (sarch, surl_list) in surls: + for surl in surl_list: + urls.extend(self._get_urls_from_repodata(surl, sarch)) + return urls + urls = property(get_urls) + + def _get_urls_from_repodata(self, url, arch): + if self.use_yum: + return [url] + + rmdurl = '%srepodata/repomd.xml' % url + try: + repomd = fetch_url(rmdurl) + xdata = lxml.etree.XML(repomd) + except ValueError: + logger.error("Packages: Bad url string %s" % rmdurl) + return [] + except HTTPError: + err = sys.exc_info()[1] + logger.error("Packages: Failed to fetch url %s. code=%s" % + (rmdurl, err.code)) + return [] + except lxml.etree.XMLSyntaxError: + err = sys.exc_info()[1] + logger.error("Packages: Failed to process metadata at %s: %s" % + (rmdurl, err)) + return [] + + urls = [] + for elt in xdata.findall(RPO + 'data'): + if elt.get('type') in ['filelists', 'primary']: + floc = elt.find(RPO + 'location') + fullurl = url + floc.get('href') + urls.append(fullurl) + self.file_to_arch[self.escape_url(fullurl)] = arch + return urls + + def read_files(self): + # we have to read primary.xml first, and filelists.xml afterwards; + primaries = list() + filelists = list() + for fname in self.files: + if fname.endswith('primary.xml.gz'): + primaries.append(fname) + elif fname.endswith('filelists.xml.gz'): + filelists.append(fname) + + for fname in primaries: + farch = self.file_to_arch[fname] + fdata = lxml.etree.parse(fname).getroot() + self.parse_primary(fdata, farch) + for fname in filelists: + farch = self.file_to_arch[fname] + fdata = lxml.etree.parse(fname).getroot() + self.parse_filelist(fdata, farch) + + # merge data + sdata = list(self.packages.values()) + try: + self.packages['global'] = copy.deepcopy(sdata.pop()) + except IndexError: + logger.error("No packages in repo") + while sdata: + self.packages['global'] = \ + self.packages['global'].intersection(sdata.pop()) + + for key in self.packages: + if key == 'global': + continue + self.packages[key] = \ + self.packages[key].difference(self.packages['global']) + self.save_state() + + def parse_filelist(self, data, arch): + if arch not in self.filemap: + self.filemap[arch] = dict() + for pkg in data.findall(FL + 'package'): + for fentry in pkg.findall(FL + 'file'): + if fentry.text in self.needed_paths: + if fentry.text in self.filemap[arch]: + self.filemap[arch][fentry.text].add(pkg.get('name')) + else: + self.filemap[arch][fentry.text] = \ + set([pkg.get('name')]) + + def parse_primary(self, data, arch): + if arch not in self.packages: + self.packages[arch] = set() + if arch not in self.deps: + self.deps[arch] = dict() + if arch not in self.provides: + self.provides[arch] = dict() + for pkg in data.getchildren(): + if not pkg.tag.endswith('package'): + continue + pkgname = pkg.find(XP + 'name').text + self.packages[arch].add(pkgname) + + pdata = pkg.find(XP + 'format') + pre = pdata.find(RP + 'requires') + self.deps[arch][pkgname] = set() + for entry in pre.getchildren(): + self.deps[arch][pkgname].add(entry.get('name')) + if entry.get('name').startswith('/'): + self.needed_paths.add(entry.get('name')) + pro = pdata.find(RP + 'provides') + if pro != None: + for entry in pro.getchildren(): + prov = entry.get('name') + if prov not in self.provides[arch]: + self.provides[arch][prov] = list() + self.provides[arch][prov].append(pkgname) + + def is_package(self, metadata, item): + arch = [a for a in self.arches if a in metadata.groups] + if not arch: + return False + return ((item in self.packages['global'] or + item in self.packages[arch[0]]) and + item not in self.blacklist and + (len(self.whitelist) == 0 or item in self.whitelist)) + + def get_vpkgs(self, metadata): + if self.use_yum: + return dict() + + rv = Source.get_vpkgs(self, metadata) + for arch, fmdata in list(self.filemap.items()): + if arch not in metadata.groups and arch != 'global': + continue + for filename, pkgs in list(fmdata.items()): + rv[filename] = pkgs + return rv + + def filter_unknown(self, unknown): + if self.use_yum: + filtered = set() + for unk in unknown: + try: + if unk.startswith('rpmlib'): + filtered.update(unk) + except AttributeError: + try: + if unk[0].startswith('rpmlib'): + filtered.update(unk) + except (IndexError, AttributeError): + pass + else: + filtered = set([u for u in unknown if u.startswith('rpmlib')]) + unknown.difference_update(filtered) + + def setup_data(self, force_update=False): + if not self.use_yum: + Source.setup_data(self, force_update=force_update) + + def get_repo_name(self, url_map): + if self.pulp_id: + return self.pulp_id + else: + return Source.get_repo_name(self, url_map) diff --git a/src/lib/Server/Plugins/Packages/__init__.py b/src/lib/Server/Plugins/Packages/__init__.py new file mode 100644 index 000000000..7dd5d25db --- /dev/null +++ b/src/lib/Server/Plugins/Packages/__init__.py @@ -0,0 +1,239 @@ +import os +import sys +import time +import copy +import glob +import shutil +import logging +import lxml.etree +import Bcfg2.Logger +import Bcfg2.Server.Plugin +from Bcfg2.Bcfg2Py3k import ConfigParser, urlopen +from Bcfg2.Server.Plugins.Packages import Collection +from Bcfg2.Server.Plugins.Packages.PackagesSources import PackagesSources +from Bcfg2.Server.Plugins.Packages.PackagesConfig import PackagesConfig + +logger = logging.getLogger('Packages') + +class Packages(Bcfg2.Server.Plugin.Plugin, + Bcfg2.Server.Plugin.StructureValidator, + Bcfg2.Server.Plugin.Generator, + Bcfg2.Server.Plugin.Connector): + name = 'Packages' + conflicts = ['Pkgmgr'] + experimental = True + __rmi__ = Bcfg2.Server.Plugin.Plugin.__rmi__ + ['Refresh', 'Reload'] + + def __init__(self, core, datastore): + Bcfg2.Server.Plugin.Plugin.__init__(self, core, datastore) + Bcfg2.Server.Plugin.StructureValidator.__init__(self) + Bcfg2.Server.Plugin.Generator.__init__(self) + Bcfg2.Server.Plugin.Connector.__init__(self) + Bcfg2.Server.Plugin.Probing.__init__(self) + + self.collections = dict() + self.sentinels = set() + self.cachepath = os.path.join(self.data, 'cache') + self.keypath = os.path.join(self.data, 'keys') + if not os.path.exists(self.keypath): + # create key directory if needed + os.makedirs(self.keypath) + + # set up config files + self.config = PackagesConfig(os.path.join(self.data, "packages.conf"), + core.fam, self) + self.sources = PackagesSources(os.path.join(self.data, "sources.xml"), + self.cachepath, core.fam, self, + self.config) + + @property + def disableResolver(self): + try: + return self.config.get("global", "resolver").lower() == "disabled" + except (ConfigParser.NoSectionError, ConfigParser.NoOptionError): + return False + + @property + def disableMetaData(self): + try: + return self.config.get("global", "metadata").lower() == "disabled" + except (ConfigParser.NoSectionError, ConfigParser.NoOptionError): + return False + + def create_config(self, entry, metadata): + """ create yum/apt config for the specified host """ + attrib = {'encoding': 'ascii', + 'owner': 'root', + 'group': 'root', + 'type': 'file', + 'perms': '0644'} + + collection = self._get_collection(metadata) + entry.text = collection.get_config() + for (key, value) in list(attrib.items()): + entry.attrib.__setitem__(key, value) + + def HandleEntry(self, entry, metadata): + if entry.tag == 'Package': + collection = self._get_collection(metadata) + entry.set('version', 'auto') + entry.set('type', collection.ptype) + elif entry.tag == 'Path': + if (self.config.has_section("global") and + ((self.config.has_option("global", "yum_config") and + entry.get("name") == self.config.get("global", + "yum_config")) or + (self.config.has_option("global", "apt_config") and + entry.get("name") == self.config.get("global", + "apt_config")))): + self.create_config(entry, metadata) + + def HandlesEntry(self, entry, metadata): + if entry.tag == 'Package': + collection = self._get_collection(metadata) + if collection.magic_groups_match(): + return True + elif entry.tag == 'Path': + # managed entries for yum/apt configs + if ((self.config.has_option("global", "yum_config") and + entry.get("name") == self.config.get("global", + "yum_config")) or + (self.config.has_option("global", "apt_config") and + entry.get("name") == self.config.get("global", "apt_config"))): + return True + return False + + def validate_structures(self, metadata, structures): + '''Ensure client configurations include all needed prerequisites + + Arguments: + metadata - client metadata instance + structures - a list of structure-stage entry combinations + ''' + collection = self._get_collection(metadata) + indep = lxml.etree.Element('Independent') + self._build_packages(metadata, indep, structures, + collection=collection) + collection.build_extra_structures(indep) + structures.append(indep) + + def _build_packages(self, metadata, independent, structures, + collection=None): + """ build list of packages that need to be included in the + specification by validate_structures() """ + if self.disableResolver: + # Config requests no resolver + return + + if collection is None: + collection = self._get_collection(metadata) + initial = set() + to_remove = [] + for struct in structures: + for pkg in struct.xpath('//Package | //BoundPackage'): + if pkg.get("name"): + initial.add(pkg.get("name")) + elif pkg.get("group"): + initial.update(collection.get_group(pkg.get("group"))) + to_remove.append(pkg) + else: + self.logger.error("Malformed Package: %s" % + lxml.etree.tostring(pkg)) + for el in to_remove: + el.getparent().remove(el) + + packages, unknown = collection.complete(initial) + if unknown: + self.logger.info("Got %d unknown entries" % len(unknown)) + self.logger.info(list(unknown)) + newpkgs = list(packages.difference(initial)) + self.logger.debug("%d initial, %d complete, %d new" % + (len(initial), len(packages), len(newpkgs))) + newpkgs.sort() + for pkg in newpkgs: + lxml.etree.SubElement(independent, 'BoundPackage', name=pkg, + version='auto', type=collection.ptype, + origin='Packages') + + def Refresh(self): + '''Packages.Refresh() => True|False\nReload configuration + specification and download sources\n''' + self._load_config(force_update=True) + return True + + def Reload(self): + '''Packages.Refresh() => True|False\nReload configuration + specification and sources\n''' + self._load_config() + return True + + def _load_config(self, force_update=False): + ''' + Load the configuration data and setup sources + + Keyword args: + force_update Force downloading repo data + ''' + Collection.clear_cache() + self._load_sources(force_update) + self._load_gpg_keys(force_update) + + def _load_sources(self, force_update): + """ Load sources from the config """ + self.sentinels = set() + cachefiles = [] + + for hostname, collection in list(self.collections.items()): + cachefiles.extend(collection.cachefiles) + if not self.disableMetaData: + collection.setup_data(force_update) + self.sentinels.update(collection.basegroups) + + self.collections = dict() + + for cfile in glob.glob(os.path.join(self.cachepath, "cache-*")): + if cfile not in cachefiles: + try: + if os.path.isdir(cfile): + shutil.rmtree(cfile) + else: + os.unlink(cfile) + except OSError: + err = sys.exc_info()[1] + logger.error("Packages: Could not remove cache file %s: %s" + % (cfile, err)) + + def _load_gpg_keys(self, force_update): + """ Load gpg keys from the config """ + keyfiles = [] + keys = [] + for source in self.sources: + for key in source.gpgkeys: + localfile = os.path.join(self.keypath, os.path.basename(key)) + if localfile not in keyfiles: + keyfiles.append(localfile) + if ((force_update and key not in keys) or + not os.path.exists(localfile)): + self.logger.info("Downloading and parsing %s" % key) + response = urlopen(key) + open(localfile, 'w').write(response.read()) + keys.append(key) + + for kfile in glob.glob(os.path.join(self.keypath, "*")): + if kfile not in keyfiles: + os.unlink(kfile) + + def _get_collection(self, metadata): + if not self.sources.loaded: + # do not cache a collection object instantiated before + # sources have been loaded + return Collection.factory(metadata, self.sources, self.data) + + if metadata.hostname not in self.collections: + self.collections[metadata.hostname] = \ + Collection.factory(metadata, self.sources, self.data) + return self.collections[metadata.hostname] + + def get_additional_data(self, metadata): + collection = self._get_collection(metadata) + return dict(sources=collection.get_additional_data()) -- cgit v1.2.3-1-g7c22