summaryrefslogtreecommitdiffstats
path: root/src/lib/Bcfg2/Server/Plugins/Packages/Source.py
blob: 2c2783394e99dcaecc8f50dcafbe3cd0e87e486f (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
""" ``Source`` objects represent a single <Source> tag in
``sources.xml``.  Note that a single Source tag can itself describe
multiple repositories (if it uses the "url" attribute instead of
"rawurl"), and so can the ``Source`` object.  This can be the source
(har har) of some confusion.  See
:func:`Bcfg2.Server.Plugins.Packages.Collection.Collection.sourcelist`
for the proper way to get all repos from a ``Source`` object.

Source objects are aggregated into
:class:`Bcfg2.Server.Plugins.Packages.Collection.Collection`
objects, which are actually called by
:class:`Bcfg2.Server.Plugins.Packages.Packages`.  This way a more
advanced subclass can query repositories in aggregate rather than
individually, which may give faster or more accurate results.

The base ``Source`` object must be subclassed to handle each
repository type.  How you subclass ``Source`` will depend on how you
subclassed
:class:`Bcfg2.Server.Plugins.Packages.Collection.Collection`; see
:mod:`Bcfg2.Server.Plugins.Packages.Collection` for more details on
different methods for doing that.

If you are using the stock (or a near-stock)
:class:`Bcfg2.Server.Plugins.Packages.Collection.Collection` object,
then you will need to implement the following methods and attributes
in your ``Source`` subclass:

* :func:`Source.urls`
* :func:`Source.read_files`

Additionally, you may want to consider overriding the following
methods and attributes:

* :func:`Source.is_virtual_package`
* :func:`Source.get_group`
* :attr:`Source.unknown_filter`
* :attr:`Source.load_state`
* :attr:`Source.save_state`

For an example of this kind of ``Source`` object, see
:mod:`Bcfg2.Server.Plugins.Packages.Apt`.

If you are overriding the ``Collection`` object in more depth, then
you have more leeway in what you might want to override or implement
in your ``Source`` subclass.  For an example of this kind of
``Source`` object, see :mod:`Bcfg2.Server.Plugins.Packages.Yum`.
"""

import os
import re
import sys
from Bcfg2.Logger import Debuggable
from Bcfg2.Compat import HTTPError, HTTPBasicAuthHandler, \
    HTTPPasswordMgrWithDefaultRealm, install_opener, build_opener, urlopen, \
    cPickle, md5
from Bcfg2.Server.Statistics import track_statistics


def fetch_url(url):
    """ Return the content of the given URL.

    :param url: The URL to fetch content from.
    :type url: string
    :raises: ValueError - Malformed URL
    :raises: URLError - Failure fetching URL
    :returns: string - the content of the page at the given URL """
    if '@' in url:
        mobj = re.match(r'(\w+://)([^:]+):([^@]+)@(.*)$', url)
        if not mobj:
            raise ValueError("Invalid URL")
        user = mobj.group(2)
        passwd = mobj.group(3)
        url = mobj.group(1) + mobj.group(4)
        auth = HTTPBasicAuthHandler(HTTPPasswordMgrWithDefaultRealm())
        auth.add_password(None, url, user, passwd)
        install_opener(build_opener(auth))
    return urlopen(url).read()


class SourceInitError(Exception):
    """ Raised when a :class:`Source` object fails instantiation. """
    pass


#: A regular expression used to determine the base name of a repo from
#: its URL.  This is used when generating repo configs and by
#: :func:`Source.get_repo_name`.  It handles `Pulp
#: <http://www.pulpproject.org/>`_ and `mrepo
#: <http://dag.wieers.com/home-made/mrepo/>`_ repositories specially,
#: and otherwise grabs the last component of the URL (as delimited by
#: slashes).
REPO_RE = re.compile(r'(?:pulp/repos/|/RPMS\.|/)([^/]+)/?$')


class Source(Debuggable):  # pylint: disable=R0902
    """ ``Source`` objects represent a single <Source> tag in
    ``sources.xml``.  Note that a single Source tag can itself
    describe multiple repositories (if it uses the "url" attribute
    instead of "rawurl"), and so can the ``Source`` object.

    Note that a number of the attributes of this object may be more or
    less specific to one backend (e.g., :attr:`essentialpkgs`,
    :attr:`recommended`, :attr:`gpgkeys`, but they are included in the
    superclass to make the parsing of sources from XML more
    consistent, and to make it trivial for other backends to support
    those features.
    """

    #: The Package type handled by this Source class.  The ``type``
    #: attribute of Package entries will be set to the value ``ptype``
    #: when they are handled by :mod:`Bcfg2.Server.Plugins.Packages`.
    ptype = None

    def __init__(self, basepath, xsource):  # pylint: disable=R0912
        """
        :param basepath: The base filesystem path under which cache
                         data for this source should be stored
        :type basepath: string
        :param xsource: The XML tag that describes this source
        :type source: lxml.etree._Element
        :raises: :class:`Bcfg2.Server.Plugins.Packages.Source.SourceInitError`
        """
        Debuggable.__init__(self)

        #: The base filesystem path under which cache data for this
        #: source should be stored
        self.basepath = basepath

        #: The XML tag that describes this source
        self.xsource = xsource

        #: A set of package names that are deemed "essential" by this
        #: source
        self.essentialpkgs = set()

        #: A list of the text of all 'Component' attributes of this
        #: source from XML
        self.components = [item.text for item in xsource.findall('Component')]

        #: A list of the arches supported by this source
        self.arches = [item.text for item in xsource.findall('Arch')]

        #: A list of the the names of packages that are blacklisted
        #: from this source
        self.blacklist = [item.text for item in xsource.findall('Blacklist')]

        #: A list of the the names of packages that are whitelisted in
        #: this source
        self.whitelist = [item.text for item in xsource.findall('Whitelist')]

        #: Whether or not to include deb-src lines in the generated APT
        #: configuration
        self.debsrc = xsource.get('debsrc', 'false') == 'true'

        #: A dict of repository options that will be included in the
        #: configuration generated on the server side (if such is
        #: applicable; most backends do not generate any sort of
        #: repository configuration on the Bcfg2 server)
        self.server_options = dict()

        #: A dict of repository options that will be included in the
        #: configuration generated for the client (if that is
        #: supported by the backend)
        self.client_options = dict()
        opts = xsource.findall("Options")
        for el in opts:
            repoopts = dict([(k, v)
                             for k, v in el.attrib.items()
                             if k != "clientonly" and k != "serveronly"])
            if el.get("clientonly", "false").lower() == "false":
                self.server_options.update(repoopts)
            if el.get("serveronly", "false").lower() == "false":
                self.client_options.update(repoopts)

        #: A list of URLs to GPG keys that apply to this source
        self.gpgkeys = [el.text for el in xsource.findall("GPGKey")]

        #: Whether or not to include essential packages from this source
        self.essential = xsource.get('essential', 'true').lower() == 'true'

        #: Whether or not to include recommended packages from this source
        self.recommended = xsource.get('recommended',
                                       'false').lower() == 'true'

        #: The "rawurl" attribute from :attr:`xsource`, if applicable.
        #: A trailing slash is automatically appended to this if there
        #: wasn't one already present.
        self.rawurl = xsource.get('rawurl', '')
        if self.rawurl and not self.rawurl.endswith("/"):
            self.rawurl += "/"

        #: The "url" attribute from :attr:`xsource`, if applicable.  A
        #: trailing slash is automatically appended to this if there
        #: wasn't one already present.
        self.url = xsource.get('url', '')
        if self.url and not self.url.endswith("/"):
            self.url += "/"

        #: The "version" attribute from :attr:`xsource`
        self.version = xsource.get('version', '')

        #: The "name" attribute from :attr:`xsource`
        self.name = xsource.get('name', None)

        #: The "priority" attribute from :attr:`xsource`
        self.priority = xsource.get('priority', 500)

        #: A list of predicates that are used to determine if this
        #: source applies to a given
        #: :class:`Bcfg2.Server.Plugins.Metadata.ClientMetadata`
        #: object.
        self.conditions = []
        #: Formerly, :ref:`server-plugins-generators-packages` only
        #: supported applying package sources to groups; that is, they
        #: could not be assigned by more complicated logic like
        #: per-client repositories and group or client negation.  This
        #: attribute attempts to provide for some limited backwards
        #: compat with older code that relies on this.
        self.groups = []
        for el in xsource.iterancestors():
            if el.tag == "Group":
                if el.get("negate", "false").lower() == "true":
                    self.conditions.append(lambda m, el=el:
                                           el.get("name") not in m.groups)
                else:
                    self.groups.append(el.get("name"))
                    self.conditions.append(lambda m, el=el:
                                           el.get("name") in m.groups)
            elif el.tag == "Client":
                if el.get("negate", "false").lower() == "true":
                    self.conditions.append(lambda m, el=el:
                                           el.get("name") != m.hostname)
                else:
                    self.conditions.append(lambda m, el=el:
                                           el.get("name") == m.hostname)

        #: A set of all package names in this source.  This will not
        #: necessarily be populated, particularly by backends that
        #: reimplement large portions of
        #: :class:`Bcfg2.Server.Plugins.Packages.Collection.Collection`
        self.pkgnames = set()

        #: A dict of ``<package name>`` -> ``<list of dependencies>``.
        #: This will not necessarily be populated, particularly by
        #: backends that reimplement large portions of
        #: :class:`Bcfg2.Server.Plugins.Packages.Collection.Collection`
        self.deps = dict()

        #: A dict of ``<package name>`` -> ``<list of provided
        #: symbols>``.  This will not necessarily be populated,
        #: particularly by backends that reimplement large portions of
        #: :class:`Bcfg2.Server.Plugins.Packages.Collection.Collection`
        self.provides = dict()

        #: A dict of ``<package name>`` -> ``<list of recommended
        #: symbols>``.  This will not necessarily be populated.
        self.recommends = dict()

        #: The file (or directory) used for this source's cache data
        self.cachefile = os.path.join(self.basepath,
                                      "cache-%s" % self.cachekey)
        if not self.rawurl:
            baseurl = self.url + "%(version)s/%(component)s/%(arch)s/"
        else:
            baseurl = self.rawurl

        #: A list of dicts, each of which describes the URL to one
        #: repository contained in this source.  Each dict contains
        #: the following keys:
        #:
        #: * ``version``: The version of the repo (``None`` for
        #:   ``rawurl`` repos)
        #: * ``component``: The component use to form this URL
        #:   (``None`` for ``rawurl`` repos)
        #: * ``arch``: The architecture of this repo
        #: * ``baseurl``: Either the ``rawurl`` attribute, or the
        #:   format string built from the ``url`` attribute
        #: * ``url``: The actual URL to the repository
        self.url_map = []
        for arch in self.arches:
            if self.url:
                usettings = [dict(version=self.version, component=comp,
                                  arch=arch, debsrc=self.debsrc)
                             for comp in self.components]
            else:  # rawurl given
                usettings = [dict(version=self.version, component=None,
                                  arch=arch, debsrc=self.debsrc)]

            for setting in usettings:
                if not self.rawurl:
                    setting['baseurl'] = self.url
                else:
                    setting['baseurl'] = self.rawurl
                setting['url'] = baseurl % setting
                setting['name'] = self.get_repo_name(setting)
            self.url_map.extend(usettings)

    @property
    def cachekey(self):
        """ A unique key for this source that will be used to generate
        :attr:`cachefile` and other cache paths """
        return md5(cPickle.dumps([self.version, self.components, self.url,
                                  self.rawurl, self.arches])).hexdigest()

    def get_relevant_groups(self, metadata):
        """ Get all groups that might be relevant to determining which
        sources apply to this collection's client.

        :return: list of strings - group names
        """
        return sorted(list(set([g for g in metadata.groups
                                if (g in self.groups or
                                    g in self.arches)])))

    def load_state(self):
        """ Load saved state from :attr:`cachefile`.  If caching and
        state is handled by the package library, then this function
        does not need to be implemented.

        :raises: OSError - If the saved data cannot be read
        :raises: cPickle.UnpicklingError - If the saved data is corrupt """
        data = open(self.cachefile, 'rb')
        (self.pkgnames, self.deps, self.provides,
         self.essentialpkgs, self.recommends) = cPickle.load(data)

    def save_state(self):
        """ Save state to :attr:`cachefile`.  If caching and
        state is handled by the package library, then this function
        does not need to be implemented. """
        cache = open(self.cachefile, 'wb')
        cPickle.dump((self.pkgnames, self.deps, self.provides,
                      self.essentialpkgs, self.recommends), cache, 2)
        cache.close()

    @track_statistics()
    def setup_data(self, force_update=False):
        """Perform all data fetching and setup tasks.

        For most backends, this involves downloading all metadata from
        the repository, parsing it, and caching the parsed data
        locally.  The order of operations is:

        #. Call :func:`load_state` to try to load data from the local
           cache.
        #. If that fails, call :func:`read_files` to read and parse
           the locally downloaded metadata files.
        #. If that fails, call :func:`update` to fetch the metadata,
           then :func:`read_files` to parse it.

        Obviously with a backend that leverages repo access libraries
        to avoid downloading all metadata, many of the functions
        called by ``setup_data`` can be no-ops (or nearly so).

        :param force_update: Ignore all locally cached and downloaded
                             data and fetch the metadata anew from the
                             upstream repository.
        :type force_update: bool
        """
        # there are a bunch of wildcard except statements here,
        # because the various functions called herein (``load_state``,
        # ``read_files``, ``update``) are defined entirely by the
        # Packages plugins that implement them.
        #
        # TODO: we should define an exception subclass that each of
        # these functions can raise when an *expected* error condition
        # is encountered.
        #
        # pylint: disable=W0702
        if not force_update:
            if os.path.exists(self.cachefile):
                try:
                    self.load_state()
                except (OSError, cPickle.UnpicklingError):
                    err = sys.exc_info()[1]
                    self.logger.error("Packages: Cachefile %s load failed: %s"
                                      % (self.cachefile, err))
                    self.logger.error("Falling back to file read")

                    try:
                        self.read_files()
                    except:
                        err = sys.exc_info()[1]
                        self.logger.error("Packages: File read failed: %s" %
                                          err)
                        self.logger.error("Falling back to file download")
                        force_update = True
            else:
                force_update = True

        if force_update:
            try:
                self.update()
                self.read_files()
            except:
                err = sys.exc_info()[1]
                self.logger.error("Packages: Failed to load data for %s: %s" %
                                  (self, err))
                self.logger.error("Some Packages will be missing")
        # pylint: enable=W0702

    def get_repo_name(self, url_map):
        """ Try to find a sensible name for a repository. Since
        ``sources.xml`` doesn't provide for repository names, we have
        to try to guess at the names when generating config files or
        doing other operations that require repository names.  This
        function tries several approaches:

        #. First, if the source element containts a ``name`` attribute,
           use that as the name.
        #. If the map contains a ``component`` key, use that as the
           name.
        #. If not, then try to match the repository URL against
           :attr:`Bcfg2.Server.Plugins.Packages.Source.REPO_RE`.  If
           that succeeds, use the first matched group; additionally,
           if the Source tag that describes this repo is contained in
           a ``<Group>`` tag, prepend that to the name.
        #. If :attr:`Bcfg2.Server.Plugins.Packages.Source.REPO_RE`
           does not match the repository, and the Source tag that
           describes this repo is contained in a ``<Group>`` tag, use
           the name of the group.
        #. Failing that, use the full URL to this repository, with the
           protocol and trailing slash stripped off if possible.

        Once that is done, all characters disallowed in yum source
        names are replaced by dashes.  See below for the exact regex.
        The yum rules are used here because they are so restrictive.

        ``get_repo_name`` is **not** guaranteed to return a unique
        name.  If you require a unique name, then you will need to
        generate all repo names and make them unique through the
        approach of your choice, e.g., appending numbers to non-unique
        repository names.  See
        :func:`Bcfg2.Server.Plugins.Packages.Yum.Source.get_repo_name`
        for an example.

        :param url_map: A single :attr:`url_map` dict, i.e., any
                        single element of :attr:`url_map`.
        :type url_map: dict
        :returns: string - the name of the repository.
        """
        if self.name:
            return self.name

        if url_map['component']:
            rname = url_map['component']
        else:
            match = REPO_RE.search(url_map['url'])
            if match:
                rname = match.group(1)
                if self.groups:
                    rname = "%s-%s" % (self.groups[0], rname)
            elif self.groups:
                rname = self.groups[0]
            else:
                # a global source with no reasonable name.  Try to
                # strip off the protocol and trailing slash.
                match = re.search(r'^[A-z]://(.*?)/?', url_map['url'])
                if match:
                    rname = match.group(1)
                else:
                    # what kind of crazy url is this?  I give up!
                    # just use the full url and let the regex below
                    # make it even uglier.
                    rname = url_map['url']
        # see yum/__init__.py in the yum source, lines 441-449, for
        # the source of this regex.  yum doesn't like anything but
        # string.ascii_letters, string.digits, and [-_.:].  There
        # doesn't seem to be a reason for this, because yum.
        return re.sub(r'[^A-Za-z0-9-_.:]', '-', rname)

    def __repr__(self):
        if self.rawurl:
            return "%s at %s" % (self.__class__.__name__, self.rawurl)
        elif self.url:
            return "%s at %s" % (self.__class__.__name__, self.url)
        else:
            return self.__class__.__name__

    @property
    def urls(self):
        """ A list of URLs to the base metadata file for each
        repository described by this source. """
        return []

    @property
    def files(self):
        """ A list of files stored in the local cache by this backend.
        """
        return [self.escape_url(url) for url in self.urls]

    def get_vpkgs(self, metadata):
        """ Get a list of all virtual packages provided by all sources.

        :returns: list of strings
        """
        agroups = ['global'] + [a for a in self.arches
                                if a in metadata.groups]
        vdict = dict()
        for agrp in agroups:
            if agrp not in self.provides:
                self.logger.warning("%s provides no packages for %s" %
                                    (self, agrp))
                continue
            for key, value in list(self.provides[agrp].items()):
                if key not in vdict:
                    vdict[key] = set(value)
                else:
                    vdict[key].update(value)
        return vdict

    def is_virtual_package(self, metadata, package):  # pylint: disable=W0613
        """ Return True if a name is a virtual package (i.e., is a
        symbol provided by a real package), False otherwise.

        :param package: The name of the symbol, but see :ref:`pkg-objects`
        :type package: string
        :returns: bool
        """
        return False

    def escape_url(self, url):
        """ Given a URL to a repository metadata file, return the full
        path to a file suitable for storing that file locally.  This
        is acheived by replacing all forward slashes in the URL with
        ``@``.

        :param url: The URL to escape
        :type url: string
        :returns: string
        """
        return os.path.join(self.basepath, url.replace('/', '@'))

    def read_files(self):
        """ Read and parse locally downloaded metadata files and
        populates
        :attr:`Bcfg2.Server.Plugins.Packages.Source.Source.pkgnames`. Should
        call
        :func:`Bcfg2.Server.Plugins.Packages.Source.Source.process_files`
        as its final step."""
        pass

    def process_files(self, dependencies,  # pylint: disable=R0912,W0102
                      provides, recommends=dict()):
        """ Given dicts of depends and provides generated by
        :func:`read_files`, this generates :attr:`deps` and
        :attr:`provides` and calls :func:`save_state` to save the
        cached data to disk.

        All arguments are dicts of dicts of lists.  Keys are the
        arches of packages contained in this source; values are dicts
        whose keys are package names and values are lists of either
        dependencies for each package the symbols provided by each
        package.

        :param dependencies: A dict of dependencies found in the
                             metadata for this source.
        :type dependencies: dict; see above.
        :param provides: A dict of symbols provided by packages in
                        this repository.
        :type provides: dict; see above.
        :param recommends: A dict of recommended dependencies
                           found for this source.
        :type recommends: dict; see above.
        """
        self.deps['global'] = dict()
        self.recommends['global'] = dict()
        self.provides['global'] = dict()
        for barch in dependencies:
            self.deps[barch] = dict()
            self.recommends[barch] = dict()
            self.provides[barch] = dict()
        for pkgname in self.pkgnames:
            pset = set()
            rset = set()
            for barch in dependencies:
                if pkgname not in dependencies[barch]:
                    dependencies[barch][pkgname] = []
                pset.add(tuple(dependencies[barch][pkgname]))
            if len(pset) == 1:
                self.deps['global'][pkgname] = pset.pop()
            else:
                for barch in dependencies:
                    self.deps[barch][pkgname] = dependencies[barch][pkgname]

            for barch in recommends:
                if pkgname not in recommends[barch]:
                    recommends[barch][pkgname] = []
                rset.add(tuple(recommends[barch][pkgname]))
            if len(rset) == 1:
                self.recommends['global'][pkgname] = rset.pop()
            else:
                for barch in recommends:
                    self.recommends[barch][pkgname] = \
                        recommends[barch][pkgname]

        provided = set()
        for bprovided in list(provides.values()):
            provided.update(set(bprovided))
        for prov in provided:
            prset = set()
            for barch in provides:
                if prov not in provides[barch]:
                    continue
                prset.add(tuple(provides[barch].get(prov, ())))
            if len(prset) == 1:
                self.provides['global'][prov] = prset.pop()
            else:
                for barch in provides:
                    self.provides[barch][prov] = provides[barch].get(prov, ())
        self.save_state()

    def unknown_filter(self, package):
        """ A predicate that is used by :func:`filter_unknown` to
        filter packages from the results of
        :func:`Bcfg2.Server.Plugins.Packages.Collection.Collection.complete`
        that should not be shown to the end user (i.e., that are not
        truly unknown, but are rather packaging system artifacts).  By
        default, excludes any package whose name starts with "choice"

        :param package: The name of a package that was unknown to the
                        backend
        :type package: string
        :returns: bool
        """
        return package.startswith("choice")

    def filter_unknown(self, unknown):
        """ After
        :func:`Bcfg2.Server.Plugins.Packages.Collection.Collection.complete`,
        filter out packages that appear in the list of unknown
        packages but should not be presented to the user.
        :attr:`unknown_filter` is called to assess whether or not a
        package is expected to be unknown.

        :param unknown: A set of unknown packages.  The set should be
                        modified in place.
        :type unknown: set of strings
        """
        unknown.difference_update(set([u for u in unknown
                                       if self.unknown_filter(u)]))

    def update(self):
        """ Download metadata from the upstream repository and cache
        it locally.

        :raises: ValueError - If any URL in :attr:`urls` is malformed
        :raises: OSError - If there is an error writing the local
                 cache
        :raises: HTTPError - If there is an error fetching the remote
                 data
        """
        for url in self.urls:
            self.logger.info("Packages: Updating %s" % url)
            fname = self.escape_url(url)
            try:
                open(fname, 'wb').write(fetch_url(url))
            except ValueError:
                self.logger.error("Packages: Bad url string %s" % url)
                raise
            except OSError:
                err = sys.exc_info()[1]
                self.logger.error("Packages: Could not write data from %s to "
                                  "local cache at %s: %s" % (url, fname, err))
                raise
            except HTTPError:
                err = sys.exc_info()[1]
                self.logger.error("Packages: Failed to fetch url %s. HTTP "
                                  "response code=%s" % (url, err.code))
                raise

    def applies(self, metadata):
        """ Return true if this source applies to the given client,
        i.e., the client is in all necessary groups.

        :param metadata: The client metadata to check to see if this
                         source applies
        :type metadata: Bcfg2.Server.Plugins.Metadata.ClientMetadata
        :returns: bool
        """
        # check arch groups
        if not self.arch_groups_match(metadata):
            return False

        # check Group/Client tags from sources.xml
        for condition in self.conditions:
            if not condition(metadata):
                return False

        return True

    def get_arches(self, metadata):
        """ Get a list of architectures that the given client has and
        for which this source provides packages for.  The return value
        will always include ``global``.

        :param metadata: The client metadata to get matching
                         architectures for
        :type metadata: Bcfg2.Server.Plugins.Metadata.ClientMetadata
        :returns: list of strings
        """
        return ['global'] + [a for a in self.arches if a in metadata.groups]

    def get_deps(self, metadata, package, recommended=None):
        """ Get a list of the dependencies of the given package.

        :param package: The name of the symbol
        :type package: string
        :returns: list of strings
        """
        recs = []
        if ((recommended is None and self.recommended) or
            (recommended and recommended.lower() == 'true')):
            for arch in self.get_arches(metadata):
                if package in self.recommends[arch]:
                    recs.extend(self.recommends[arch][package])

        for arch in self.get_arches(metadata):
            if package in self.deps[arch]:
                recs.extend(self.deps[arch][package])
        return recs

    def get_provides(self, metadata, package):
        """ Get a list of all symbols provided by the given package.

        :param package: The name of the package
        :type package: string
        :returns: list of strings
        """
        for arch in self.get_arches(metadata):
            if package in self.provides[arch]:
                return self.provides[arch][package]
        return []

    def is_package(self, metadata, package):  # pylint: disable=W0613
        """ Return True if a package is a package, False otherwise.

        :param package: The name of the package
        :type package: string
        :returns: bool
        """
        return (package in self.pkgnames and
                package not in self.blacklist and
                (len(self.whitelist) == 0 or package in self.whitelist))

    def get_group(self, metadata, group, ptype=None):  # pylint: disable=W0613
        """ Get the list of packages of the given type in a package
        group.

        :param group: The name of the group to query
        :type group: string
        :param ptype: The type of packages to get, for backends that
                      support multiple package types in package groups
                      (e.g., "recommended," "optional," etc.)
        :type ptype: string
        :returns: list of strings - package names
        """
        return []

    def arch_groups_match(self, metadata):
        """ Returns True if the client is in an arch group that
        matches the arch of this source.

        :returns: bool
        """
        for arch in self.arches:
            if arch in metadata.groups:
                return True
        return False