From 4dd5fc4a1fc5f5a9676c212c93244ec8c50288dc Mon Sep 17 00:00:00 2001
From: Zac Medico <zmedico@gentoo.org>
Date: Fri, 21 Oct 2011 00:06:32 -0700
Subject: update_copyright: process files as raw bytes

This function will work correctly with files encoded in any character
set, as long as the copyright statements consist of plain ASCII.
---
 pym/repoman/utilities.py | 30 ++++++++++++++++++++----------
 1 file changed, 20 insertions(+), 10 deletions(-)

(limited to 'pym')

diff --git a/pym/repoman/utilities.py b/pym/repoman/utilities.py
index eec6fdfee..9be69011b 100644
--- a/pym/repoman/utilities.py
+++ b/pym/repoman/utilities.py
@@ -524,8 +524,8 @@ def FindVCS():
 
 	return outvcs
 
-_copyright_re1 = re.compile(r'^(# Copyright \d\d\d\d)-\d\d\d\d ')
-_copyright_re2 = re.compile(r'^(# Copyright )(\d\d\d\d) ')
+_copyright_re1 = re.compile(br'^(# Copyright \d\d\d\d)-\d\d\d\d ')
+_copyright_re2 = re.compile(br'^(# Copyright )(\d\d\d\d) ')
 
 
 class _copyright_repl(object):
@@ -536,8 +536,8 @@ class _copyright_repl(object):
 		if matchobj.group(2) == self.year:
 			return matchobj.group(0)
 		else:
-			return '%s%s-%s ' % \
-				(matchobj.group(1), matchobj.group(2), self.year)
+			return matchobj.group(1) + matchobj.group(2) + \
+				b'-' + self.year + b' '
 
 def _update_copyright_year(year, line):
 	"""
@@ -545,8 +545,14 @@ def _update_copyright_year(year, line):
 	update_copyright(), except that we don't hardcode
 	1999 here (in order to be more generic).
 	"""
-	line = _copyright_re1.sub(r'\1-%s ' % year, line)
+	is_bytes = isinstance(line, bytes)
+	year = _unicode_encode(year)
+	line = _unicode_encode(line)
+	
+	line = _copyright_re1.sub(br'\1-' + year + b' ', line)
 	line = _copyright_re2.sub(_copyright_repl(year), line)
+	if not is_bytes:
+		line = _unicode_decode(line)
 	return line
 
 def update_copyright(fn_path, year, pretend):
@@ -555,12 +561,15 @@ def update_copyright(fn_path, year, pretend):
 	patterns used for replacing copyrights are taken from echangelog.
 	Only the first lines of each file that start with a hash ('#') are
 	considered, until a line is found that doesn't start with a hash.
+	Files are read and written in binary mode, so that this function
+	will work correctly with files encoded in any character set, as
+	long as the copyright statements consist of plain ASCII.
 	"""
 
 	try:
 		fn_hdl = io.open(_unicode_encode(fn_path,
 			encoding=_encodings['fs'], errors='strict'),
-			mode='r', encoding=_encodings['repo.content'], errors='replace')
+			mode='rb')
 	except EnvironmentError:
 		return
 
@@ -570,7 +579,7 @@ def update_copyright(fn_path, year, pretend):
 	for line in fn_hdl:
 		line_strip = line.strip()
 		orig_header.append(line)
-		if not line_strip or line_strip[:1] != '#':
+		if not line_strip or line_strip[:1] != b'#':
 			new_header.append(line)
 			break
 
@@ -578,7 +587,9 @@ def update_copyright(fn_path, year, pretend):
 		new_header.append(line)
 
 	difflines = 0
-	for line in difflib.unified_diff(orig_header, new_header,
+	for line in difflib.unified_diff(
+		[_unicode_decode(line) for line in orig_header],
+		[_unicode_decode(line) for line in new_header],
 			fromfile=fn_path, tofile=fn_path, n=0):
 		util.writemsg_stdout(line, noiselevel=-1)
 		difflines += 1
@@ -588,8 +599,7 @@ def update_copyright(fn_path, year, pretend):
 	if difflines > 3 and not pretend:
 		# write new file with changed header
 		f, fnnew_path = mkstemp()
-		f = io.open(f, mode='w', encoding=_encodings['repo.content'],
-			errors='backslashreplace')
+		f = io.open(f, mode='wb')
 		for line in new_header:
 			f.write(line)
 		for line in fn_hdl:
-- 
cgit v1.2.3-1-g7c22