From 0bd8cd384bbbb2062d2850923dfb33dc9c25a0b9 Mon Sep 17 00:00:00 2001 From: Alexander Sulfrian Date: Tue, 21 Jul 2015 20:48:04 +0200 Subject: Lint/MergeFiles: Ignore binary files Ignore files with binary content, because SequenceMatcher seems to have problems and sometimes detect files with different content as identically. --- src/lib/Bcfg2/Server/Lint/MergeFiles.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/lib/Bcfg2/Server/Lint/MergeFiles.py b/src/lib/Bcfg2/Server/Lint/MergeFiles.py index 8e6a926ae..bdb97cee2 100644 --- a/src/lib/Bcfg2/Server/Lint/MergeFiles.py +++ b/src/lib/Bcfg2/Server/Lint/MergeFiles.py @@ -17,6 +17,12 @@ def threshold(val): return rv +def is_binary(data): + """ Check if a given string contains only text or binary data. """ + text_chars = bytearray([7, 8, 9, 10, 12, 13, 27] + range(0x20, 0x100)) + return bool(data.translate(None, text_chars)) + + class MergeFiles(Bcfg2.Server.Lint.ServerPlugin): """ find Probes or Cfg files with multiple similar files that might be merged into one """ @@ -50,6 +56,7 @@ class MergeFiles(Bcfg2.Server.Lint.ServerPlugin): for filename, entryset in self.core.plugins['Cfg'].entries.items(): candidates = dict([(f, e) for f, e in entryset.entries.items() if (isinstance(e, CfgGenerator) and + not is_binary(e.data) and f not in ignore and not f.endswith(".crypt"))]) similar, identical = self.get_similar(candidates) -- cgit v1.2.3-1-g7c22