diff options
Diffstat (limited to 'web.py')
-rwxr-xr-x | web.py | 18 |
1 files changed, 17 insertions, 1 deletions
@@ -5,7 +5,8 @@ Author: Sean B. Palmer, inamidst.com About: http://inamidst.com/phenny/ """ -import urllib +import re, urllib +from htmlentitydefs import name2codepoint class Grab(urllib.URLopener): def __init__(self, *args): @@ -40,5 +41,20 @@ def post(uri, query): u.close() return bytes +r_entity = re.compile(r'&([^;\s]+);') + +def entity(match): + value = match.group(1).lower() + if value.startswith('#x'): + return unichr(int(value[2:], 16)) + elif value.startswith('#'): + return unichr(int(value[1:])) + elif name2codepoint.has_key(value): + return unichr(name2codepoint[value]) + return '[' + value + ']' + +def decode(html): + return r_entity.sub(entity, html) + if __name__=="__main__": main() |