From c5fe137b01a0b0fe01b4f2d6b85afc28fc4a1f8a Mon Sep 17 00:00:00 2001 From: "Sean B. Palmer" Date: Mon, 27 Feb 2012 00:10:33 +0000 Subject: Updated etymology interface --- modules/etymology.py | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/modules/etymology.py b/modules/etymology.py index 55c5deb..37d1316 100755 --- a/modules/etymology.py +++ b/modules/etymology.py @@ -7,17 +7,25 @@ Licensed under the Eiffel Forum License 2. http://inamidst.com/phenny/ """ -import re +import re, urllib import web from tools import deprecated -etyuri = 'http://etymonline.com/?term=%s' -etysearch = 'http://etymonline.com/?search=%s' +etysite = 'http://www.etymonline.com/index.php?' +etyuri = etysite + 'allowed_in_frame=0&term=%s' +etysearch = etysite + 'allowed_in_frame=0&search=%s' r_definition = re.compile(r'(?ims)]*>.*?') r_tag = re.compile(r'<(?!!)[^>]+>') r_whitespace = re.compile(r'[\t\r\n ]+') +class Grab(urllib.URLopener): + def __init__(self, *args): + self.version = 'Mozilla/5.0 (Phenny)' + urllib.URLopener.__init__(self, *args) + def http_error_default(self, url, fp, errcode, errmsg, headers): + return urllib.addinfourl(fp, [headers, errcode], "http:" + url) + abbrs = [ 'cf', 'lit', 'etc', 'Ger', 'Du', 'Skt', 'Rus', 'Eng', 'Amer.Eng', 'Sp', 'Fr', 'N', 'E', 'S', 'W', 'L', 'Gen', 'J.C', 'dial', 'Gk', @@ -46,7 +54,11 @@ def etymology(word): raise ValueError("Word too long: %s[...]" % word[:10]) word = {'axe': 'ax/axe'}.get(word, word) + grab = urllib._urlopener + urllib._urlopener = Grab() + urllib._urlopener.addheader("Referer", "http://www.etymonline.com/") bytes = web.get(etyuri % web.urllib.quote(word)) + urllib._urlopener = grab definitions = r_definition.findall(bytes) if not definitions: @@ -62,6 +74,7 @@ def etymology(word): sentence = unicode(sentence, 'iso-8859-1') sentence = sentence.encode('utf-8') except: pass + sentence = web.decode(sentence) maxlength = 275 if len(sentence) > maxlength: @@ -71,7 +84,7 @@ def etymology(word): sentence = ' '.join(words) + ' [...]' sentence = '"' + sentence.replace('"', "'") + '"' - return sentence + ' - ' + (etyuri % word) + return sentence + ' - etymonline.com' @deprecated def f_etymology(self, origin, match, args): -- cgit v1.2.3-1-g7c22