summaryrefslogtreecommitdiffstats
path: root/src/lib/Bcfg2/Server/Lint/MergeFiles.py
blob: 972475d913dfe07d732d8c06d241bdc45d1a9f89 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
""" find Probes or Cfg files with multiple similar files that might be
merged into one """

import os
import copy
from difflib import SequenceMatcher
import Bcfg2.Server.Lint
from Bcfg2.Server.Plugins.Cfg import CfgGenerator


def threshold(val):
    """ Option type processor to accept either a percentage (e.g.,
     "threshold=75") or a ratio (e.g., "threshold=.75") """
    rv = float(val)
    if rv > 1:
        rv /= 100
    return rv


class MergeFiles(Bcfg2.Server.Lint.ServerPlugin):
    """ find Probes or Cfg files with multiple similar files that
    might be merged into one """

    options = Bcfg2.Server.Lint.ServerPlugin.options + [
        Bcfg2.Options.Option(
            cf=("MergeFiles", "threshold"), default="0.75", type=threshold,
            help="The threshold at which to suggest merging files and probes")]

    def Run(self):
        if 'Cfg' in self.core.plugins:
            self.check_cfg()
        if 'Probes' in self.core.plugins:
            self.check_probes()

    @classmethod
    def Errors(cls):
        return {"merge-cfg": "warning",
                "merge-probes": "warning"}

    def check_cfg(self):
        """ check Cfg for similar files """
        for filename, entryset in self.core.plugins['Cfg'].entries.items():
            candidates = dict([(f, e) for f, e in entryset.entries.items()
                               if isinstance(e, CfgGenerator)])
            for mset in self.get_similar(candidates):
                self.LintError("merge-cfg",
                               "The following files are similar: %s. "
                               "Consider merging them into a single Genshi "
                               "template." %
                               ", ".join([os.path.join(filename, p)
                                          for p in mset]))

    def check_probes(self):
        """ check Probes for similar files """
        probes = self.core.plugins['Probes'].probes.entries
        for mset in self.get_similar(probes):
            self.LintError("merge-probes",
                           "The following probes are similar: %s. "
                           "Consider merging them into a single probe." %
                           ", ".join([p for p in mset]))

    def get_similar(self, entries):
        """ Get a list of similar files from the entry dict.  Return
        value is a list of lists, each of which gives the filenames of
        similar files """
        rv = []
        elist = list(entries.items())
        while elist:
            result = self._find_similar(elist.pop(0), copy.copy(elist))
            if len(result) > 1:
                elist = [(fname, fdata)
                         for fname, fdata in elist
                         if fname not in result]
                rv.append(result)
        return rv

    def _find_similar(self, ftuple, others):
        """ Find files similar to the one described by ftupe in the
        list of other files.  ftuple is a tuple of (filename, data);
        others is a list of such tuples.  threshold is a float between
        0 and 1 that describes how similar two files much be to rate
        as 'similar' """
        fname, fdata = ftuple
        rv = [fname]
        while others:
            cname, cdata = others.pop(0)
            seqmatch = SequenceMatcher(None, fdata.data, cdata.data)
            # perform progressively more expensive comparisons
            if (seqmatch.real_quick_ratio() > Bcfg2.Options.setup.threshold and
                seqmatch.quick_ratio() > Bcfg2.Options.setup.threshold and
                seqmatch.ratio() > Bcfg2.Options.setup.threshold):
                rv.extend(
                    self._find_similar((cname, cdata), copy.copy(others)))
        return rv