summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorZac Medico <zmedico@gentoo.org>2009-07-20 23:50:20 +0000
committerZac Medico <zmedico@gentoo.org>2009-07-20 23:50:20 +0000
commit581381acffbedfdc553b9643d47924bf44e32238 (patch)
tree878d3777e6b79c5e0712e38f63505dc6e7079f21
parent8c70715b17035030d5729972e01b2c2d3d842967 (diff)
downloadportage-581381acffbedfdc553b9643d47924bf44e32238.tar.gz
portage-581381acffbedfdc553b9643d47924bf44e32238.tar.bz2
portage-581381acffbedfdc553b9643d47924bf44e32238.zip
Always pass encodings='utf_8' to codecs.open(), since otherwise it can
return non-unicode strings (at least in some cases, observed with python-2.6.2). Don't use unicode in portage.util.getconfig() for now, since shlex doesn't seem to support it (spurious \0 characters). If we use unicode for config variables, it breaks shlex.split() calls on those variables due to the same issue (spurious \0 characters). svn path=/main/trunk/; revision=13845
-rwxr-xr-xbin/repoman6
-rw-r--r--pym/portage/__init__.py4
-rw-r--r--pym/portage/env/loaders.py2
-rw-r--r--pym/portage/output.py3
-rw-r--r--pym/portage/util.py14
5 files changed, 17 insertions, 12 deletions
diff --git a/bin/repoman b/bin/repoman
index 3ea879c35..c8f32f4a4 100755
--- a/bin/repoman
+++ b/bin/repoman
@@ -590,7 +590,8 @@ for path in portdb.porttrees:
desc_path = os.path.join(path, 'profiles', 'profiles.desc')
try:
- desc_file = codecs.open(desc_path, mode='r', errors='replace')
+ desc_file = codecs.open(desc_path, mode='r',
+ encoding='utf_8', errors='replace')
except EnvironmentError:
pass
else:
@@ -960,7 +961,8 @@ for x in scanlist:
continue
try:
line = 1
- for l in codecs.open(checkdir+"/"+y, "r", "utf8"):
+ for l in codecs.open(os.path.join(checkdir, y), mode='r',
+ encoding='utf_8'):
line +=1
except UnicodeDecodeError, ue:
stats["file.UTF8"] += 1
diff --git a/pym/portage/__init__.py b/pym/portage/__init__.py
index 3a969bbce..d897af0e8 100644
--- a/pym/portage/__init__.py
+++ b/pym/portage/__init__.py
@@ -1682,8 +1682,8 @@ class config(object):
repo_conf_parser = SafeConfigParser()
try:
repo_conf_parser.readfp(
- codecs.open(self._local_repo_conf_path,
- mode='r', errors='replace'))
+ codecs.open(self._local_repo_conf_path, mode='r',
+ encoding='utf_8', errors='replace'))
except EnvironmentError, e:
if e.errno != errno.ENOENT:
raise
diff --git a/pym/portage/env/loaders.py b/pym/portage/env/loaders.py
index 7cd2600ae..7b4d72721 100644
--- a/pym/portage/env/loaders.py
+++ b/pym/portage/env/loaders.py
@@ -139,7 +139,7 @@ class FileLoader(DataLoader):
# once, which may be expensive due to digging in child classes.
func = self.lineParser
for fn in RecursiveFileLoader(self.fname):
- f = codecs.open(fn, mode='r', errors='replace')
+ f = codecs.open(fn, mode='r', encoding='utf_8', errors='replace')
for line_num, line in enumerate(f):
func(line, line_num, data, errors)
return (data, errors)
diff --git a/pym/portage/output.py b/pym/portage/output.py
index 4c533c427..eb0d247b7 100644
--- a/pym/portage/output.py
+++ b/pym/portage/output.py
@@ -165,7 +165,8 @@ def _parse_color_map(onerror=None):
return token
try:
lineno=0
- for line in codecs.open( myfile, mode = 'r', errors = 'replace' ):
+ for line in codecs.open( myfile, mode='r',
+ encoding='utf_8', errors='replace' ):
lineno += 1
commenter_pos = line.find("#")
diff --git a/pym/portage/util.py b/pym/portage/util.py
index 6d9a23cd1..63b504cf9 100644
--- a/pym/portage/util.py
+++ b/pym/portage/util.py
@@ -318,7 +318,7 @@ def grablines(myfilename,recursive=0):
else:
try:
myfile = codecs.open(myfilename, mode='r',
- encoding=sys.getdefaultencoding(), errors='replace')
+ encoding='utf_8', errors='replace')
mylines = myfile.readlines()
myfile.close()
except IOError, e:
@@ -368,10 +368,11 @@ def getconfig(mycfg, tolerant=0, allow_sourcing=False, expand=True):
# Workaround for avoiding a silent error in shlex that
# is triggered by a source statement at the end of the file without a
# trailing newline after the source statement
- content = codecs.open(mycfg, mode='r', errors='replace').read()
- if content and content[-1] != u'\n':
- content += u'\n'
- f = StringIO(content)
+ # NOTE: shex doesn't seem to supported unicode objects
+ # (produces spurious \0 characters with python-2.6.2)
+ content = open(mycfg).read()
+ if content and content[-1] != '\n':
+ content += '\n'
except IOError, e:
if e.errno == PermissionDenied.errno:
raise PermissionDenied(mycfg)
@@ -387,7 +388,7 @@ def getconfig(mycfg, tolerant=0, allow_sourcing=False, expand=True):
# The default shlex.sourcehook() implementation
# only joins relative paths when the infile
# attribute is properly set.
- lex = shlex_class(f, infile=mycfg, posix=True)
+ lex = shlex_class(content, infile=mycfg, posix=True)
lex.wordchars = string.digits + string.ascii_letters + \
"~!@#$%*_\:;?,./-+{}"
lex.quotes="\"'"
@@ -874,6 +875,7 @@ class atomic_ofstream(ObjectProxy):
open_func = open
else:
open_func = codecs.open
+ kargs.setdefault('encoding', 'utf_8')
kargs.setdefault('errors', 'replace')
if follow_links: