From 0bd8cd384bbbb2062d2850923dfb33dc9c25a0b9 Mon Sep 17 00:00:00 2001 From: Alexander Sulfrian Date: Tue, 21 Jul 2015 20:48:04 +0200 Subject: Lint/MergeFiles: Ignore binary files Ignore files with binary content, because SequenceMatcher seems to have problems and sometimes detect files with different content as identically. --- src/lib/Bcfg2/Server/Lint/MergeFiles.py | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'src/lib/Bcfg2/Server/Lint') diff --git a/src/lib/Bcfg2/Server/Lint/MergeFiles.py b/src/lib/Bcfg2/Server/Lint/MergeFiles.py index 8e6a926ae..bdb97cee2 100644 --- a/src/lib/Bcfg2/Server/Lint/MergeFiles.py +++ b/src/lib/Bcfg2/Server/Lint/MergeFiles.py @@ -17,6 +17,12 @@ def threshold(val): return rv +def is_binary(data): + """ Check if a given string contains only text or binary data. """ + text_chars = bytearray([7, 8, 9, 10, 12, 13, 27] + range(0x20, 0x100)) + return bool(data.translate(None, text_chars)) + + class MergeFiles(Bcfg2.Server.Lint.ServerPlugin): """ find Probes or Cfg files with multiple similar files that might be merged into one """ @@ -50,6 +56,7 @@ class MergeFiles(Bcfg2.Server.Lint.ServerPlugin): for filename, entryset in self.core.plugins['Cfg'].entries.items(): candidates = dict([(f, e) for f, e in entryset.entries.items() if (isinstance(e, CfgGenerator) and + not is_binary(e.data) and f not in ignore and not f.endswith(".crypt"))]) similar, identical = self.get_similar(candidates) -- cgit v1.2.3-1-g7c22 From 06a6fce3f2f5c78a12937d4e52de3d824e3dd5e0 Mon Sep 17 00:00:00 2001 From: Alexander Sulfrian Date: Wed, 22 Jul 2015 16:23:07 +0200 Subject: Utils: Generalize is_string from POSIX/File is_string from POSIX/File could be used in other situations, too. So we move it to Utils, use it from Lint/MergeFiles and replace a custom is_binary function. --- src/lib/Bcfg2/Server/Lint/MergeFiles.py | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) (limited to 'src/lib/Bcfg2/Server/Lint') diff --git a/src/lib/Bcfg2/Server/Lint/MergeFiles.py b/src/lib/Bcfg2/Server/Lint/MergeFiles.py index bdb97cee2..3a6251594 100644 --- a/src/lib/Bcfg2/Server/Lint/MergeFiles.py +++ b/src/lib/Bcfg2/Server/Lint/MergeFiles.py @@ -6,6 +6,7 @@ import copy from difflib import SequenceMatcher import Bcfg2.Server.Lint from Bcfg2.Server.Plugins.Cfg import CfgGenerator +from Bcfg2.Utils import is_string def threshold(val): @@ -17,12 +18,6 @@ def threshold(val): return rv -def is_binary(data): - """ Check if a given string contains only text or binary data. """ - text_chars = bytearray([7, 8, 9, 10, 12, 13, 27] + range(0x20, 0x100)) - return bool(data.translate(None, text_chars)) - - class MergeFiles(Bcfg2.Server.Lint.ServerPlugin): """ find Probes or Cfg files with multiple similar files that might be merged into one """ @@ -56,7 +51,8 @@ class MergeFiles(Bcfg2.Server.Lint.ServerPlugin): for filename, entryset in self.core.plugins['Cfg'].entries.items(): candidates = dict([(f, e) for f, e in entryset.entries.items() if (isinstance(e, CfgGenerator) and - not is_binary(e.data) and + is_string(e.data, + Bcfg2.Options.setup.encoding) and f not in ignore and not f.endswith(".crypt"))]) similar, identical = self.get_similar(candidates) -- cgit v1.2.3-1-g7c22