summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAlexander Sulfrian <alexander.sulfrian@fu-berlin.de>2015-07-21 20:48:04 +0200
committerAlexander Sulfrian <alexander.sulfrian@fu-berlin.de>2015-07-21 21:09:09 +0200
commit0bd8cd384bbbb2062d2850923dfb33dc9c25a0b9 (patch)
treeffa5b22cc5db4b1ecd4b16b63cecff675a2bd631
parenta120c653a8bf2c380cc2d158e5169a9b7d2020a8 (diff)
downloadbcfg2-0bd8cd384bbbb2062d2850923dfb33dc9c25a0b9.tar.gz
bcfg2-0bd8cd384bbbb2062d2850923dfb33dc9c25a0b9.tar.bz2
bcfg2-0bd8cd384bbbb2062d2850923dfb33dc9c25a0b9.zip
Lint/MergeFiles: Ignore binary files
Ignore files with binary content, because SequenceMatcher seems to have problems and sometimes detect files with different content as identically.
-rw-r--r--src/lib/Bcfg2/Server/Lint/MergeFiles.py7
1 files changed, 7 insertions, 0 deletions
diff --git a/src/lib/Bcfg2/Server/Lint/MergeFiles.py b/src/lib/Bcfg2/Server/Lint/MergeFiles.py
index 8e6a926ae..bdb97cee2 100644
--- a/src/lib/Bcfg2/Server/Lint/MergeFiles.py
+++ b/src/lib/Bcfg2/Server/Lint/MergeFiles.py
@@ -17,6 +17,12 @@ def threshold(val):
return rv
+def is_binary(data):
+ """ Check if a given string contains only text or binary data. """
+ text_chars = bytearray([7, 8, 9, 10, 12, 13, 27] + range(0x20, 0x100))
+ return bool(data.translate(None, text_chars))
+
+
class MergeFiles(Bcfg2.Server.Lint.ServerPlugin):
""" find Probes or Cfg files with multiple similar files that
might be merged into one """
@@ -50,6 +56,7 @@ class MergeFiles(Bcfg2.Server.Lint.ServerPlugin):
for filename, entryset in self.core.plugins['Cfg'].entries.items():
candidates = dict([(f, e) for f, e in entryset.entries.items()
if (isinstance(e, CfgGenerator) and
+ not is_binary(e.data) and
f not in ignore and
not f.endswith(".crypt"))])
similar, identical = self.get_similar(candidates)