From 8cc8d12a674ab6271183e5c35202263a36497279 Mon Sep 17 00:00:00 2001 From: Zac Medico Date: Sun, 10 Jul 2011 16:26:24 -0700 Subject: Migrate from codecs.open() to io.open(). The io.open() function is the same as the built-in open() function in python3, and its implementation is optimized in python-2.7 and later. In addition to the possible performance improvement, this also allows us to avoid any future compatibility issues with codecs.open() that may arise if it is delegated to the built-in open() function as discussed in PEP 400. The main caveat involved with io.open() is that TextIOWrapper.write() raises TypeError if given raw bytes, unlike the streams returned from codecs.open(). This is mainly an issue for python2 since literal strings are raw bytes. We handle this by wrapping TextIOWrapper.write() arguments with our _unicode_decode() function. Also, the atomic_ofstream class overrides the write() method in python2 so that it performs automatic coercion to unicode when necessary. --- bin/binhost-snapshot | 6 +++--- bin/egencache | 41 +++++++++++++++++++++++------------------ bin/repoman | 10 +++++----- 3 files changed, 31 insertions(+), 26 deletions(-) (limited to 'bin') diff --git a/bin/binhost-snapshot b/bin/binhost-snapshot index 825a11672..9d2697d03 100755 --- a/bin/binhost-snapshot +++ b/bin/binhost-snapshot @@ -1,8 +1,8 @@ #!/usr/bin/python -# Copyright 2010 Gentoo Foundation +# Copyright 2010-2011 Gentoo Foundation # Distributed under the terms of the GNU General Public License v2 -import codecs +import io import optparse import os import sys @@ -109,7 +109,7 @@ def main(argv): if not (os.WIFEXITED(ret) and os.WEXITSTATUS(ret) == os.EX_OK): return 1 - infile = codecs.open(portage._unicode_encode(src_pkgs_index, + infile = io.open(portage._unicode_encode(src_pkgs_index, encoding=portage._encodings['fs'], errors='strict'), mode='r', encoding=portage._encodings['repo.content'], errors='strict') diff --git a/bin/egencache b/bin/egencache index 5307cd5a2..1b4265df1 100755 --- a/bin/egencache +++ b/bin/egencache @@ -20,7 +20,7 @@ try: except KeyboardInterrupt: sys.exit(128 + signal.SIGINT) -import codecs +import io import logging import optparse import subprocess @@ -391,10 +391,10 @@ class GenUseLocalDesc(object): output = open(_unicode_encode(desc_path, encoding=_encodings['fs'], errors='strict'), 'r+b') else: - output = codecs.open(_unicode_encode(desc_path, + output = io.open(_unicode_encode(desc_path, encoding=_encodings['fs'], errors='strict'), mode='w', encoding=_encodings['repo.content'], - errors='replace') + errors='backslashreplace') except IOError as e: if not self._preserve_comments or \ os.path.isfile(desc_path): @@ -413,10 +413,10 @@ class GenUseLocalDesc(object): level=logging.WARNING, noiselevel=-1) self._preserve_comments = False try: - output = codecs.open(_unicode_encode(desc_path, + output = io.open(_unicode_encode(desc_path, encoding=_encodings['fs'], errors='strict'), mode='w', encoding=_encodings['repo.content'], - errors='replace') + errors='backslashreplace') except IOError as e: writemsg_level( "ERROR: failed to open output file %s: %s\n" \ @@ -437,18 +437,18 @@ class GenUseLocalDesc(object): # Finished probing comments in binary mode, now append # in text mode. - output = codecs.open(_unicode_encode(desc_path, + output = io.open(_unicode_encode(desc_path, encoding=_encodings['fs'], errors='strict'), mode='a', encoding=_encodings['repo.content'], - errors='replace') - output.write('\n') + errors='backslashreplace') + output.write(_unicode_decode('\n')) else: - output.write(''' + output.write(_unicode_decode(''' # This file is deprecated as per GLEP 56 in favor of metadata.xml. Please add # your descriptions to your package's metadata.xml ONLY. # * generated automatically using egencache * -'''.lstrip()) +'''.lstrip())) # The cmp function no longer exists in python3, so we'll # implement our own here under a slightly different name @@ -522,7 +522,8 @@ class GenUseLocalDesc(object): resatoms = sorted(reskeys, key=cmp_sort_key(atomcmp)) resdesc = resdict[reskeys[resatoms[-1]]] - output.write('%s:%s - %s\n' % (cp, flag, resdesc)) + output.write(_unicode_decode( + '%s:%s - %s\n' % (cp, flag, resdesc))) output.close() @@ -609,9 +610,9 @@ class GenChangeLogs(object): def generate_changelog(self, cp): try: - output = codecs.open('ChangeLog', + output = io.open('ChangeLog', mode='w', encoding=_encodings['repo.content'], - errors='replace') + errors='backslashreplace') except IOError as e: writemsg_level( "ERROR: failed to open ChangeLog for %s: %s\n" % (cp,e,), @@ -619,7 +620,7 @@ class GenChangeLogs(object): self.returncode |= 2 return - output.write((''' + output.write(_unicode_decode(''' # ChangeLog for %s # Copyright 1999-%s Gentoo Foundation; Distributed under the GPL v2 # $Header: $ @@ -688,10 +689,11 @@ class GenChangeLogs(object): # Reverse the sort order for headers. for c in reversed(changed): if c.startswith('+') and c.endswith('.ebuild'): - output.write('*%s (%s)\n' % (c[1:-7], date)) + output.write(_unicode_decode( + '*%s (%s)\n' % (c[1:-7], date))) wroteheader = True if wroteheader: - output.write('\n') + output.write(_unicode_decode('\n')) # strip ': ', '[] ', and similar body[0] = re.sub(r'^\W*' + re.escape(cp) + r'\W+', '', body[0]) @@ -711,10 +713,13 @@ class GenChangeLogs(object): # don't break filenames on hyphens self._wrapper.break_on_hyphens = False - output.write(self._wrapper.fill('%s; %s %s:' % (date, author, ', '.join(changed)))) + output.write(_unicode_decode( + self._wrapper.fill( + '%s; %s %s:' % (date, author, ', '.join(changed))))) # but feel free to break commit messages there self._wrapper.break_on_hyphens = True - output.write('\n%s\n\n' % '\n'.join([self._wrapper.fill(x) for x in body])) + output.write(_unicode_decode( + '\n%s\n\n' % '\n'.join(self._wrapper.fill(x) for x in body))) output.close() diff --git a/bin/repoman b/bin/repoman index d1d393a82..3e0203681 100755 --- a/bin/repoman +++ b/bin/repoman @@ -9,10 +9,10 @@ from __future__ import print_function import calendar -import codecs import copy import errno import formatter +import io import logging import optparse import re @@ -700,7 +700,7 @@ for path in portdb.porttrees: desc_path = os.path.join(path, 'profiles', 'profiles.desc') try: - desc_file = codecs.open(_unicode_encode(desc_path, + desc_file = io.open(_unicode_encode(desc_path, encoding=_encodings['fs'], errors='strict'), mode='r', encoding=_encodings['repo.content'], errors='replace') except EnvironmentError: @@ -1209,7 +1209,7 @@ for x in scanlist: continue try: line = 1 - for l in codecs.open(_unicode_encode(os.path.join(checkdir, y), + for l in io.open(_unicode_encode(os.path.join(checkdir, y), encoding=_encodings['fs'], errors='strict'), mode='r', encoding=_encodings['repo.content']): line +=1 @@ -1822,7 +1822,7 @@ for x in scanlist: pkg.mtime = None try: # All ebuilds should have utf_8 encoding. - f = codecs.open(_unicode_encode(full_path, + f = io.open(_unicode_encode(full_path, encoding=_encodings['fs'], errors='strict'), mode='r', encoding=_encodings['repo.content']) try: @@ -2319,7 +2319,7 @@ else: commitmessage = options.commitmsg if options.commitmsgfile: try: - f = codecs.open(_unicode_encode(options.commitmsgfile, + f = io.open(_unicode_encode(options.commitmsgfile, encoding=_encodings['fs'], errors='strict'), mode='r', encoding=_encodings['content'], errors='replace') commitmessage = f.read() -- cgit v1.2.3-1-g7c22