summaryrefslogtreecommitdiffstats
path: root/src/lib/Server/Lint/MergeFiles.py
blob: 27e7aa99ac997ae136ba0f36f36afe902123531e (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
import os
from copy import deepcopy
from difflib import SequenceMatcher
import Bcfg2.Options
import Bcfg2.Server.Lint

class MergeFiles(Bcfg2.Server.Lint.ServerPlugin):
    """ find Probes or Cfg files with multiple similar files that
    might be merged into one """

    def Run(self):
        if 'Cfg' in self.core.plugins:
            self.check_cfg()
        if 'Probes' in self.core.plugins:
            self.check_probes()

    def check_cfg(self):
        for filename, entryset in self.core.plugins['Cfg'].entries.items():
            for mset in self.get_similar(entryset.entries):
                self.LintError("merge-cfg",
                               "The following files are similar: %s. "
                               "Consider merging them into a single Genshi "
                               "template." %
                               ", ".join([os.path.join(filename, p)
                                          for p in mset]))

    def check_probes(self):
        probes = self.core.plugins['Probes'].probes.entries
        for mset in self.get_similar(probes):
                self.LintError("merge-cfg",
                               "The following probes are similar: %s. "
                               "Consider merging them into a single probe." %
                               ", ".join([p for p in mset]))

    def get_similar(self, entries):
        if "threshold" in self.config:
            # accept threshold either as a percent (e.g., "threshold=75") or
            # as a ratio (e.g., "threshold=.75")
            threshold = float(self.config['threshold'])
            if threshold > 1:
                threshold /= 100
        else:
            threshold = 0.75
        rv = []
        elist = entries.items()
        while elist:
            result = self._find_similar(elist.pop(0), deepcopy(elist),
                                        threshold)
            if len(result) > 1:
                elist = [(fname, fdata)
                         for fname, fdata in elist
                         if fname not in result]
                rv.append(result)
        return rv

    def _find_similar(self, ftuple, others, threshold):
        fname, fdata = ftuple
        rv = [fname]
        while others:
            cname, cdata = others.pop(0)
            sm = SequenceMatcher(None, fdata.data, cdata.data)
            # perform progressively more expensive comparisons
            if (sm.real_quick_ratio() > threshold and
                sm.quick_ratio() > threshold and
                sm.ratio() > threshold):
                rv.extend(self._find_similar((cname, cdata), deepcopy(others),
                                             threshold))
        return rv