diff options
author | Zac Medico <zmedico@gentoo.org> | 2011-10-21 00:06:32 -0700 |
---|---|---|
committer | Zac Medico <zmedico@gentoo.org> | 2011-10-21 00:06:32 -0700 |
commit | 4dd5fc4a1fc5f5a9676c212c93244ec8c50288dc (patch) | |
tree | 8cdb7aa81496c99355133796d40ce7b16d9d10b2 /pym | |
parent | 1e1413717df6ed6809833004bf47088e021ccb46 (diff) | |
download | portage-4dd5fc4a1fc5f5a9676c212c93244ec8c50288dc.tar.gz portage-4dd5fc4a1fc5f5a9676c212c93244ec8c50288dc.tar.bz2 portage-4dd5fc4a1fc5f5a9676c212c93244ec8c50288dc.zip |
update_copyright: process files as raw bytes
This function will work correctly with files encoded in any character
set, as long as the copyright statements consist of plain ASCII.
Diffstat (limited to 'pym')
-rw-r--r-- | pym/repoman/utilities.py | 30 |
1 files changed, 20 insertions, 10 deletions
diff --git a/pym/repoman/utilities.py b/pym/repoman/utilities.py index eec6fdfee..9be69011b 100644 --- a/pym/repoman/utilities.py +++ b/pym/repoman/utilities.py @@ -524,8 +524,8 @@ def FindVCS(): return outvcs -_copyright_re1 = re.compile(r'^(# Copyright \d\d\d\d)-\d\d\d\d ') -_copyright_re2 = re.compile(r'^(# Copyright )(\d\d\d\d) ') +_copyright_re1 = re.compile(br'^(# Copyright \d\d\d\d)-\d\d\d\d ') +_copyright_re2 = re.compile(br'^(# Copyright )(\d\d\d\d) ') class _copyright_repl(object): @@ -536,8 +536,8 @@ class _copyright_repl(object): if matchobj.group(2) == self.year: return matchobj.group(0) else: - return '%s%s-%s ' % \ - (matchobj.group(1), matchobj.group(2), self.year) + return matchobj.group(1) + matchobj.group(2) + \ + b'-' + self.year + b' ' def _update_copyright_year(year, line): """ @@ -545,8 +545,14 @@ def _update_copyright_year(year, line): update_copyright(), except that we don't hardcode 1999 here (in order to be more generic). """ - line = _copyright_re1.sub(r'\1-%s ' % year, line) + is_bytes = isinstance(line, bytes) + year = _unicode_encode(year) + line = _unicode_encode(line) + + line = _copyright_re1.sub(br'\1-' + year + b' ', line) line = _copyright_re2.sub(_copyright_repl(year), line) + if not is_bytes: + line = _unicode_decode(line) return line def update_copyright(fn_path, year, pretend): @@ -555,12 +561,15 @@ def update_copyright(fn_path, year, pretend): patterns used for replacing copyrights are taken from echangelog. Only the first lines of each file that start with a hash ('#') are considered, until a line is found that doesn't start with a hash. + Files are read and written in binary mode, so that this function + will work correctly with files encoded in any character set, as + long as the copyright statements consist of plain ASCII. """ try: fn_hdl = io.open(_unicode_encode(fn_path, encoding=_encodings['fs'], errors='strict'), - mode='r', encoding=_encodings['repo.content'], errors='replace') + mode='rb') except EnvironmentError: return @@ -570,7 +579,7 @@ def update_copyright(fn_path, year, pretend): for line in fn_hdl: line_strip = line.strip() orig_header.append(line) - if not line_strip or line_strip[:1] != '#': + if not line_strip or line_strip[:1] != b'#': new_header.append(line) break @@ -578,7 +587,9 @@ def update_copyright(fn_path, year, pretend): new_header.append(line) difflines = 0 - for line in difflib.unified_diff(orig_header, new_header, + for line in difflib.unified_diff( + [_unicode_decode(line) for line in orig_header], + [_unicode_decode(line) for line in new_header], fromfile=fn_path, tofile=fn_path, n=0): util.writemsg_stdout(line, noiselevel=-1) difflines += 1 @@ -588,8 +599,7 @@ def update_copyright(fn_path, year, pretend): if difflines > 3 and not pretend: # write new file with changed header f, fnnew_path = mkstemp() - f = io.open(f, mode='w', encoding=_encodings['repo.content'], - errors='backslashreplace') + f = io.open(f, mode='wb') for line in new_header: f.write(line) for line in fn_hdl: |