From 4dd5fc4a1fc5f5a9676c212c93244ec8c50288dc Mon Sep 17 00:00:00 2001 From: Zac Medico Date: Fri, 21 Oct 2011 00:06:32 -0700 Subject: update_copyright: process files as raw bytes This function will work correctly with files encoded in any character set, as long as the copyright statements consist of plain ASCII. --- pym/repoman/utilities.py | 30 ++++++++++++++++++++---------- 1 file changed, 20 insertions(+), 10 deletions(-) (limited to 'pym') diff --git a/pym/repoman/utilities.py b/pym/repoman/utilities.py index eec6fdfee..9be69011b 100644 --- a/pym/repoman/utilities.py +++ b/pym/repoman/utilities.py @@ -524,8 +524,8 @@ def FindVCS(): return outvcs -_copyright_re1 = re.compile(r'^(# Copyright \d\d\d\d)-\d\d\d\d ') -_copyright_re2 = re.compile(r'^(# Copyright )(\d\d\d\d) ') +_copyright_re1 = re.compile(br'^(# Copyright \d\d\d\d)-\d\d\d\d ') +_copyright_re2 = re.compile(br'^(# Copyright )(\d\d\d\d) ') class _copyright_repl(object): @@ -536,8 +536,8 @@ class _copyright_repl(object): if matchobj.group(2) == self.year: return matchobj.group(0) else: - return '%s%s-%s ' % \ - (matchobj.group(1), matchobj.group(2), self.year) + return matchobj.group(1) + matchobj.group(2) + \ + b'-' + self.year + b' ' def _update_copyright_year(year, line): """ @@ -545,8 +545,14 @@ def _update_copyright_year(year, line): update_copyright(), except that we don't hardcode 1999 here (in order to be more generic). """ - line = _copyright_re1.sub(r'\1-%s ' % year, line) + is_bytes = isinstance(line, bytes) + year = _unicode_encode(year) + line = _unicode_encode(line) + + line = _copyright_re1.sub(br'\1-' + year + b' ', line) line = _copyright_re2.sub(_copyright_repl(year), line) + if not is_bytes: + line = _unicode_decode(line) return line def update_copyright(fn_path, year, pretend): @@ -555,12 +561,15 @@ def update_copyright(fn_path, year, pretend): patterns used for replacing copyrights are taken from echangelog. Only the first lines of each file that start with a hash ('#') are considered, until a line is found that doesn't start with a hash. + Files are read and written in binary mode, so that this function + will work correctly with files encoded in any character set, as + long as the copyright statements consist of plain ASCII. """ try: fn_hdl = io.open(_unicode_encode(fn_path, encoding=_encodings['fs'], errors='strict'), - mode='r', encoding=_encodings['repo.content'], errors='replace') + mode='rb') except EnvironmentError: return @@ -570,7 +579,7 @@ def update_copyright(fn_path, year, pretend): for line in fn_hdl: line_strip = line.strip() orig_header.append(line) - if not line_strip or line_strip[:1] != '#': + if not line_strip or line_strip[:1] != b'#': new_header.append(line) break @@ -578,7 +587,9 @@ def update_copyright(fn_path, year, pretend): new_header.append(line) difflines = 0 - for line in difflib.unified_diff(orig_header, new_header, + for line in difflib.unified_diff( + [_unicode_decode(line) for line in orig_header], + [_unicode_decode(line) for line in new_header], fromfile=fn_path, tofile=fn_path, n=0): util.writemsg_stdout(line, noiselevel=-1) difflines += 1 @@ -588,8 +599,7 @@ def update_copyright(fn_path, year, pretend): if difflines > 3 and not pretend: # write new file with changed header f, fnnew_path = mkstemp() - f = io.open(f, mode='w', encoding=_encodings['repo.content'], - errors='backslashreplace') + f = io.open(f, mode='wb') for line in new_header: f.write(line) for line in fn_hdl: -- cgit v1.2.3-1-g7c22