1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
|
""" find Probes or Cfg files with multiple similar files that might be
merged into one """
import os
import copy
from difflib import SequenceMatcher
import Bcfg2.Server.Lint
from Bcfg2.Server.Plugins.Cfg import CfgGenerator
from Bcfg2.Utils import is_string
def threshold(val):
""" Option type processor to accept either a percentage (e.g.,
"threshold=75") or a ratio (e.g., "threshold=.75") """
rv = float(val)
if rv > 1:
rv /= 100
return rv
class MergeFiles(Bcfg2.Server.Lint.ServerPlugin):
""" find Probes or Cfg files with multiple similar files that
might be merged into one """
options = Bcfg2.Server.Lint.ServerPlugin.options + [
Bcfg2.Options.Option(
cf=("MergeFiles", "threshold"), default="0.75", type=threshold,
help="The threshold at which to suggest merging files and probes")]
def Run(self):
if 'Cfg' in self.core.plugins:
self.check_cfg()
if 'Probes' in self.core.plugins:
self.check_probes()
@classmethod
def Errors(cls):
return {"merge-cfg": "warning",
"identical-cfg": "error",
"merge-probes": "warning",
"identical-probes": "error"}
def check_cfg(self):
""" check Cfg for similar files """
# ignore non-specific Cfg entries, e.g., privkey.xml
ignore = []
for hdlr in Bcfg2.Options.setup.cfg_handlers:
if not hdlr.__specific__:
ignore.extend(hdlr.__basenames__)
for filename, entryset in self.core.plugins['Cfg'].entries.items():
candidates = dict([(f, e) for f, e in entryset.entries.items()
if (isinstance(e, CfgGenerator) and
is_string(e.data,
Bcfg2.Options.setup.encoding) and
f not in ignore and
not f.endswith(".crypt"))])
similar, identical = self.get_similar(candidates)
for mset in similar:
self.LintError("merge-cfg",
"The following files are similar: %s. "
"Consider merging them into a single Genshi "
"template." %
", ".join([os.path.join(filename, p)
for p in mset]))
for mset in identical:
self.LintError("identical-cfg",
"The following files are identical: %s. "
"Strongly consider merging them into a single "
"Genshi template." %
", ".join([os.path.join(filename, p)
for p in mset]))
def check_probes(self):
""" check Probes for similar files """
probes = self.core.plugins['Probes'].probes.entries
similar, identical = self.get_similar(probes)
for mset in similar:
self.LintError("merge-probes",
"The following probes are similar: %s. "
"Consider merging them into a single probe." %
", ".join([p for p in mset]))
for mset in identical:
self.LintError("identical-probes",
"The following probes are identical: %s. "
"Strongly consider merging them into a single "
"probe." %
", ".join([p for p in mset]))
def get_similar(self, entries):
""" Get a list of similar files from the entry dict. Return
value is a list of lists, each of which gives the filenames of
similar files """
similar = []
identical = []
elist = list(entries.items())
while elist:
rv = self._find_similar(elist.pop(0), copy.copy(elist))
if rv[0]:
similar.append(rv[0])
if rv[1]:
identical.append(rv[1])
elist = [(fname, fdata)
for fname, fdata in elist
if fname not in rv[0] | rv[1]]
return similar, identical
def _find_similar(self, ftuple, others):
""" Find files similar to the one described by ftupe in the
list of other files. ftuple is a tuple of (filename, data);
others is a list of such tuples. threshold is a float between
0 and 1 that describes how similar two files much be to rate
as 'similar' """
fname, fdata = ftuple
similar = set()
identical = set()
for cname, cdata in others:
seqmatch = SequenceMatcher(None, fdata.data, cdata.data)
# perform progressively more expensive comparisons
if seqmatch.real_quick_ratio() == 1.0:
identical.add(cname)
elif (
seqmatch.real_quick_ratio() > Bcfg2.Options.setup.threshold and
seqmatch.quick_ratio() > Bcfg2.Options.setup.threshold and
seqmatch.ratio() > Bcfg2.Options.setup.threshold):
similar.add(cname)
if similar:
similar.add(fname)
if identical:
identical.add(fname)
return (similar, identical)
|