diff options
Diffstat (limited to 'modules/etymology.py')
-rwxr-xr-x | modules/etymology.py | 31 |
1 files changed, 22 insertions, 9 deletions
diff --git a/modules/etymology.py b/modules/etymology.py index 55c5deb..cc93cfe 100755 --- a/modules/etymology.py +++ b/modules/etymology.py @@ -7,17 +7,25 @@ Licensed under the Eiffel Forum License 2. http://inamidst.com/phenny/ """ -import re +import re, urllib import web from tools import deprecated -etyuri = 'http://etymonline.com/?term=%s' -etysearch = 'http://etymonline.com/?search=%s' +etysite = 'http://www.etymonline.com/index.php?' +etyuri = etysite + 'allowed_in_frame=0&term=%s' +etysearch = etysite + 'allowed_in_frame=0&search=%s' r_definition = re.compile(r'(?ims)<dd[^>]*>.*?</dd>') r_tag = re.compile(r'<(?!!)[^>]+>') r_whitespace = re.compile(r'[\t\r\n ]+') +class Grab(urllib.URLopener): + def __init__(self, *args): + self.version = 'Mozilla/5.0 (Phenny)' + urllib.URLopener.__init__(self, *args) + def http_error_default(self, url, fp, errcode, errmsg, headers): + return urllib.addinfourl(fp, [headers, errcode], "http:" + url) + abbrs = [ 'cf', 'lit', 'etc', 'Ger', 'Du', 'Skt', 'Rus', 'Eng', 'Amer.Eng', 'Sp', 'Fr', 'N', 'E', 'S', 'W', 'L', 'Gen', 'J.C', 'dial', 'Gk', @@ -46,7 +54,11 @@ def etymology(word): raise ValueError("Word too long: %s[...]" % word[:10]) word = {'axe': 'ax/axe'}.get(word, word) + grab = urllib._urlopener + urllib._urlopener = Grab() + urllib._urlopener.addheader("Referer", "http://www.etymonline.com/") bytes = web.get(etyuri % web.urllib.quote(word)) + urllib._urlopener = grab definitions = r_definition.findall(bytes) if not definitions: @@ -58,10 +70,11 @@ def etymology(word): return None sentence = m.group(0) - try: - sentence = unicode(sentence, 'iso-8859-1') - sentence = sentence.encode('utf-8') - except: pass + # try: + # sentence = unicode(sentence, 'iso-8859-1') + # sentence = sentence.encode('utf-8') + # except: pass + sentence = web.decode(sentence) maxlength = 275 if len(sentence) > maxlength: @@ -71,7 +84,7 @@ def etymology(word): sentence = ' '.join(words) + ' [...]' sentence = '"' + sentence.replace('"', "'") + '"' - return sentence + ' - ' + (etyuri % word) + return sentence + ' - ' + ('http://etymonline.com/index.php?term=%s' % web.urllib.quote(word)) @deprecated def f_etymology(self, origin, match, args): @@ -89,7 +102,7 @@ def f_etymology(self, origin, match, args): self.msg(origin.sender, result) else: uri = etysearch % word - msg = 'Can\'t find the etymology for "%s". Try %s' % (word, uri) + msg = 'Can\'t find the etymology for "%s". Try %s' % (word, ('http://etymonline.com/index.php?term=%s' % web.urllib.quote(word))) self.msg(origin.sender, msg) # @@ Cf. http://swhack.com/logs/2006-01-04#T01-50-22 f_etymology.rule = (['ety'], r"(.+?)$") |