summaryrefslogtreecommitdiffstats
path: root/src/lib/Bcfg2/Server/Plugins/Packages/Source.py
blob: b57d1b0cc4f567ff0bbeaa0d2f9b48e61af40dbc (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
""" ``Source`` objects represent a single <Source> tag in
``sources.xml``.  Note that a single Source tag can itself describe
multiple repositories (if it uses the "url" attribute instead of
"rawurl"), and so can the ``Source`` object.  This can be the source
(har har) of some confusion.  See
:func:`Bcfg2.Server.Plugins.Packages.Collection._Collection.sourcelist`
for the proper way to get all repos from a ``Source`` object.

Source objects are aggregated into
:class:`Bcfg2.Server.Plugins.Packages.Collection._Collection`
objects, which are actually called by
:class:`Bcfg2.Server.Plugins.Packages.Packages`.  This way a more
advanced subclass can query repositories in aggregate rather than
individually, which may give faster or more accurate results.

The base ``Source`` object must be subclassed to handle each
repository type.  How you subclass ``Source`` will depend on how you
subclassed
:class:`Bcfg2.Server.Plugins.Packages.Collection._Collection`; see
:mod:`Bcfg2.Server.Plugins.Packages.Collection` for more details on
different methods for doing that.

If you are using the stock (or a near-stock)
:class:`Bcfg2.Server.Plugins.Packages.Collection._Collection` object,
then you will need to implement the following methods and attributes
in your ``Source`` subclass:

* :func:`Source.get_urls`
* :func:`Source.read_files`
* :attr:`Source.basegroups`
* :attr:`Source.ptype`

Additionally, you may want to consider overriding the following
methods and attributes:

* :func:`Source.is_virtual_package`
* :func:`Source.get_group`
* :attr:`Source.unknown_filter`
* :attr:`Source.load_state`
* :attr:`Source.save_state`

If you are overriding the ``_Collection`` object in more depth, then
you have more leeway in what you might want to override or implement
in your ``Source`` subclass.
"""

import os
import re
import sys
import Bcfg2.Server.Plugin
from Bcfg2.Compat import HTTPError, HTTPBasicAuthHandler, \
     HTTPPasswordMgrWithDefaultRealm, install_opener, build_opener, \
     urlopen, cPickle, md5


def fetch_url(url):
    """ Return the content of the given URL.

    :param url: The URL to fetch content from.
    :type url: string
    :raises: ValueError - Malformed URL
    :raises: URLError - Failure fetching URL
    :returns: string - the content of the page at the given URL """
    if '@' in url:
        mobj = re.match('(\w+://)([^:]+):([^@]+)@(.*)$', url)
        if not mobj:
            raise ValueError
        user = mobj.group(2)
        passwd = mobj.group(3)
        url = mobj.group(1) + mobj.group(4)
        auth = HTTPBasicAuthHandler(HTTPPasswordMgrWithDefaultRealm())
        auth.add_password(None, url, user, passwd)
        install_opener(build_opener(auth))
    return urlopen(url).read()


class SourceInitError(Exception):
    """ Raised when a :class:`Source` object fails instantiation. """
    pass


class Source(Bcfg2.Server.Plugin.Debuggable):
    mrepo_re = re.compile(r'/RPMS\.([^/]+)')
    pulprepo_re = re.compile(r'pulp/repos/([^/]+)')
    genericrepo_re = re.compile('https?://.*?/([^/]+)/?$')
    basegroups = []
    unknown_filter = lambda p: p.startswith("choice")

    def __init__(self, basepath, xsource, setup):
        Bcfg2.Server.Plugin.Debuggable.__init__(self)
        self.basepath = basepath
        self.xsource = xsource
        self.setup = setup
        self.essentialpkgs = set()
        self.pkgnames = set()

        try:
            self.version = xsource.find('Version').text
        except AttributeError:
            self.version = None

        self.components = [item.text for item in xsource.findall('Component')]
        self.arches = [item.text for item in xsource.findall('Arch')]
        self.blacklist = [item.text for item in xsource.findall('Blacklist')]
        self.whitelist = [item.text for item in xsource.findall('Whitelist')]

        self.server_options = dict()
        self.client_options = dict()
        opts = xsource.findall("Options")
        for el in opts:
            repoopts = dict([(k, v)
                             for k, v in el.attrib.items()
                             if k != "clientonly" and k != "serveronly"])
            if el.get("clientonly", "false").lower() == "false":
                self.server_options.update(repoopts)
            if el.get("serveronly", "false").lower() == "false":
                self.client_options.update(repoopts)

        self.gpgkeys = [el.text for el in xsource.findall("GPGKey")]

        self.essential = xsource.get('essential', 'true').lower() == 'true'
        self.recommended = xsource.get('recommended',
                                       'false').lower() == 'true'

        self.rawurl = xsource.get('rawurl', '')
        if self.rawurl and not self.rawurl.endswith("/"):
            self.rawurl += "/"
        self.url = xsource.get('url', '')
        if self.url and not self.url.endswith("/"):
            self.url += "/"
        self.version = xsource.get('version', '')

        # build the set of conditions to see if this source applies to
        # a given set of metadata
        self.conditions = []
        self.groups = []  # provided for some limited backwards compat
        for el in xsource.iterancestors():
            if el.tag == "Group":
                if el.get("negate", "false").lower() == "true":
                    self.conditions.append(lambda m, el=el:
                                           el.get("name") not in m.groups)
                else:
                    self.groups.append(el.get("name"))
                    self.conditions.append(lambda m, el=el:
                                           el.get("name") in m.groups)
            elif el.tag == "Client":
                if el.get("negate", "false").lower() == "true":
                    self.conditions.append(lambda m, el=el:
                                           el.get("name") != m.hostname)
                else:
                    self.conditions.append(lambda m, el=el:
                                           el.get("name") == m.hostname)

        self.deps = dict()
        self.provides = dict()

        self.cachefile = os.path.join(self.basepath,
                                      "cache-%s" % self.cachekey)
        if not self.rawurl:
            self.baseurl = self.url + "%(version)s/%(component)s/%(arch)s/"
        else:
            self.baseurl = self.rawurl
        self.url_map = []
        for arch in self.arches:
            if self.url:
                usettings = [dict(version=self.version, component=comp,
                                  arch=arch)
                             for comp in self.components]
            else:  # rawurl given
                usettings = [dict(version=self.version, component=None,
                                  arch=arch)]

            for setting in usettings:
                if not self.rawurl:
                    setting['baseurl'] = self.url
                else:
                    setting['baseurl'] = self.rawurl
                setting['url'] = self.baseurl % setting
            self.url_map.extend(usettings)

    @property
    def cachekey(self):
        return md5(cPickle.dumps([self.version, self.components, self.url,
                                  self.rawurl, self.arches])).hexdigest()

    def get_relevant_groups(self, metadata):
        return sorted(list(set([g for g in metadata.groups
                                if (g in self.basegroups or
                                    g in self.groups or
                                    g in self.arches)])))

    def load_state(self):
        data = open(self.cachefile)
        (self.pkgnames, self.deps, self.provides,
         self.essentialpkgs) = cPickle.load(data)

    def save_state(self):
        cache = open(self.cachefile, 'wb')
        cPickle.dump((self.pkgnames, self.deps, self.provides,
                      self.essentialpkgs), cache, 2)
        cache.close()

    def setup_data(self, force_update=False):
        should_read = True
        should_download = False
        if os.path.exists(self.cachefile):
            try:
                self.load_state()
                should_read = False
            except:
                self.logger.error("Packages: Cachefile %s load failed; "
                                  "falling back to file read" % self.cachefile)
        if should_read:
            try:
                self.read_files()
            except:
                self.logger.error("Packages: File read failed; "
                                  "falling back to file download")
                should_download = True

        if should_download or force_update:
            try:
                self.update()
                self.read_files()
            except:
                self.logger.error("Packages: Failed to load data for Source "
                                  "of %s. Some Packages will be missing." %
                                  self.urls)

    def get_repo_name(self, url_map):
        # try to find a sensible name for a repo
        if url_map['component']:
            rname = url_map['component']
        else:
            name = None
            for repo_re in (self.mrepo_re,
                            self.pulprepo_re,
                            self.genericrepo_re):
                match = repo_re.search(url_map['url'])
                if match:
                    name = match.group(1)
                    break
            if name and self.groups:
                rname = "%s-%s" % (self.groups[0], name)
            elif self.groups:
                rname = self.groups[0]
            else:
                # a global source with no reasonable name.  just use
                # the full url and let the regex below make it even
                # uglier.
                rname = url_map['url']
        # see yum/__init__.py in the yum source, lines 441-449, for
        # the source of this regex.  yum doesn't like anything but
        # string.ascii_letters, string.digits, and [-_.:].  There
        # doesn't seem to be a reason for this, because yum.
        return re.sub(r'[^A-Za-z0-9-_.:]', '-', rname)

    def __str__(self):
        if self.rawurl:
            return "%s at %s" % (self.__class__.__name__, self.rawurl)
        elif self.url:
            return "%s at %s" % (self.__class__.__name__, self.url)
        else:
            return self.__class__.__name__

    def __repr__(self):
        return str(self)

    def get_urls(self):
        return []
    urls = property(get_urls)

    def get_files(self):
        return [self.escape_url(url) for url in self.urls]
    files = property(get_files)

    def get_vpkgs(self, metadata):
        agroups = ['global'] + [a for a in self.arches
                                if a in metadata.groups]
        vdict = dict()
        for agrp in agroups:
            if agrp not in self.provides:
                self.logger.warning("%s provides no packages for %s" %
                                    (self, agrp))
                continue
            for key, value in list(self.provides[agrp].items()):
                if key not in vdict:
                    vdict[key] = set(value)
                else:
                    vdict[key].update(value)
        return vdict

    def is_virtual_package(self, metadata, package):
        """ called to determine if a package is a virtual package.
        this is only invoked if the package is not listed in the dict
        returned by get_vpkgs """
        return False

    def escape_url(self, url):
        return os.path.join(self.basepath, url.replace('/', '@'))

    def read_files(self):
        pass

    def process_files(self, deps, prov):
        self.deps['global'] = dict()
        self.provides['global'] = dict()
        for barch in deps:
            self.deps[barch] = dict()
            self.provides[barch] = dict()
        for pkgname in self.pkgnames:
            pset = set()
            for barch in deps:
                if pkgname not in deps[barch]:
                    deps[barch][pkgname] = []
                pset.add(tuple(deps[barch][pkgname]))
            if len(pset) == 1:
                self.deps['global'][pkgname] = pset.pop()
            else:
                for barch in deps:
                    self.deps[barch][pkgname] = deps[barch][pkgname]
        provided = set()
        for bprovided in list(prov.values()):
            provided.update(set(bprovided))
        for prov in provided:
            prset = set()
            for barch in prov:
                if prov not in prov[barch]:
                    continue
                prset.add(tuple(prov[barch].get(prov, ())))
            if len(prset) == 1:
                self.provides['global'][prov] = prset.pop()
            else:
                for barch in prov:
                    self.provides[barch][prov] = prov[barch].get(prov, ())
        self.save_state()

    def filter_unknown(self, unknown):
        unknown.difference_update(set([u for u in unknown
                                       if self.unknown_filter(u)]))

    def update(self):
        for url in self.urls:
            self.logger.info("Packages: Updating %s" % url)
            fname = self.escape_url(url)
            try:
                data = fetch_url(url)
                open(fname, 'w').write(data)
            except ValueError:
                self.logger.error("Packages: Bad url string %s" % url)
                raise
            except HTTPError:
                err = sys.exc_info()[1]
                self.logger.error("Packages: Failed to fetch url %s. HTTP "
                                  "response code=%s" % (url, err.code))
                raise

    def applies(self, metadata):
        # check base groups
        if not self.magic_groups_match(metadata):
            return False

        # check Group/Client tags from sources.xml
        for condition in self.conditions:
            if not condition(metadata):
                return False

        return True

    def get_arches(self, metadata):
        return ['global'] + [a for a in self.arches if a in metadata.groups]

    def get_deps(self, metadata, pkgname):
        for arch in self.get_arches(metadata):
            if pkgname in self.deps[arch]:
                return self.deps[arch][pkgname]
        return []

    def get_provides(self, metadata, required):
        for arch in self.get_arches(metadata):
            if required in self.provides[arch]:
                return self.provides[arch][required]
        return []

    def is_package(self, metadata, pkg):
        return (pkg in self.pkgnames and
                pkg not in self.blacklist and
                (len(self.whitelist) == 0 or pkg in self.whitelist))

    def get_package(self, metadata, package):
        return package

    def get_group(self, metadata, group, ptype=None):
        return []

    def magic_groups_match(self, metadata):
        """ check to see if this source applies to the given host
        metadata by checking 'magic' (base) groups only, or if magic
        groups are off """
        # we always check that arch matches
        found_arch = False
        for arch in self.arches:
            if arch in metadata.groups:
                found_arch = True
                break
        if not found_arch:
            return False

        if not self.setup.cfp.getboolean("packages", "magic_groups",
                                         default=True):
            return True
        else:
            for group in self.basegroups:
                if group in metadata.groups:
                    return True
            return False