diff options
Diffstat (limited to 'forum/utils/diff.py')
-rw-r--r-- | forum/utils/diff.py | 66 |
1 files changed, 66 insertions, 0 deletions
diff --git a/forum/utils/diff.py b/forum/utils/diff.py new file mode 100644 index 00000000..d741d788 --- /dev/null +++ b/forum/utils/diff.py @@ -0,0 +1,66 @@ +#!/usr/bin/python2.2 +"""HTML Diff: http://www.aaronsw.com/2002/diff +Rough code, badly documented. Send me comments and patches.""" + +__author__ = 'Aaron Swartz <me@aaronsw.com>' +__copyright__ = '(C) 2003 Aaron Swartz. GNU GPL 2.' +__version__ = '0.22' + +import difflib, string + +def isTag(x): return x[0] == "<" and x[-1] == ">" + +def textDiff(a, b): + """Takes in strings a and b and returns a human-readable HTML diff.""" + + out = [] + a, b = html2list(a), html2list(b) + s = difflib.SequenceMatcher(None, a, b) + for e in s.get_opcodes(): + if e[0] == "replace": + # @@ need to do something more complicated here + # call textDiff but not for html, but for some html... ugh + # gonna cop-out for now + out.append('<del>'+''.join(a[e[1]:e[2]]) + '</del><ins>'+''.join(b[e[3]:e[4]])+"</ins>") + elif e[0] == "delete": + out.append('<del >'+ ''.join(a[e[1]:e[2]]) + "</del>") + elif e[0] == "insert": + out.append('<ins >'+''.join(b[e[3]:e[4]]) + "</ins>") + elif e[0] == "equal": + out.append(''.join(b[e[3]:e[4]])) + else: + raise "Um, something's broken. I didn't expect a '" + `e[0]` + "'." + return ''.join(out) + +def html2list(x, b=0): + mode = 'char' + cur = '' + out = [] + for c in x: + if mode == 'tag': + if c == '>': + if b: cur += ']' + else: cur += c + out.append(cur); cur = ''; mode = 'char' + else: cur += c + elif mode == 'char': + if c == '<': + out.append(cur) + if b: cur = '[' + else: cur = c + mode = 'tag' + elif c in string.whitespace: out.append(cur+c); cur = '' + else: cur += c + out.append(cur) + return filter(lambda x: x is not '', out) + +if __name__ == '__main__': + import sys + try: + a, b = sys.argv[1:3] + except ValueError: + print "htmldiff: highlight the differences between two html files" + print "usage: " + sys.argv[0] + " a b" + sys.exit(1) + print textDiff(open(a).read(), open(b).read()) + |