summaryrefslogtreecommitdiffstats
path: root/modules/etymology.py
diff options
context:
space:
mode:
Diffstat (limited to 'modules/etymology.py')
-rwxr-xr-xmodules/etymology.py31
1 files changed, 22 insertions, 9 deletions
diff --git a/modules/etymology.py b/modules/etymology.py
index 55c5deb..cc93cfe 100755
--- a/modules/etymology.py
+++ b/modules/etymology.py
@@ -7,17 +7,25 @@ Licensed under the Eiffel Forum License 2.
http://inamidst.com/phenny/
"""
-import re
+import re, urllib
import web
from tools import deprecated
-etyuri = 'http://etymonline.com/?term=%s'
-etysearch = 'http://etymonline.com/?search=%s'
+etysite = 'http://www.etymonline.com/index.php?'
+etyuri = etysite + 'allowed_in_frame=0&term=%s'
+etysearch = etysite + 'allowed_in_frame=0&search=%s'
r_definition = re.compile(r'(?ims)<dd[^>]*>.*?</dd>')
r_tag = re.compile(r'<(?!!)[^>]+>')
r_whitespace = re.compile(r'[\t\r\n ]+')
+class Grab(urllib.URLopener):
+ def __init__(self, *args):
+ self.version = 'Mozilla/5.0 (Phenny)'
+ urllib.URLopener.__init__(self, *args)
+ def http_error_default(self, url, fp, errcode, errmsg, headers):
+ return urllib.addinfourl(fp, [headers, errcode], "http:" + url)
+
abbrs = [
'cf', 'lit', 'etc', 'Ger', 'Du', 'Skt', 'Rus', 'Eng', 'Amer.Eng', 'Sp',
'Fr', 'N', 'E', 'S', 'W', 'L', 'Gen', 'J.C', 'dial', 'Gk',
@@ -46,7 +54,11 @@ def etymology(word):
raise ValueError("Word too long: %s[...]" % word[:10])
word = {'axe': 'ax/axe'}.get(word, word)
+ grab = urllib._urlopener
+ urllib._urlopener = Grab()
+ urllib._urlopener.addheader("Referer", "http://www.etymonline.com/")
bytes = web.get(etyuri % web.urllib.quote(word))
+ urllib._urlopener = grab
definitions = r_definition.findall(bytes)
if not definitions:
@@ -58,10 +70,11 @@ def etymology(word):
return None
sentence = m.group(0)
- try:
- sentence = unicode(sentence, 'iso-8859-1')
- sentence = sentence.encode('utf-8')
- except: pass
+ # try:
+ # sentence = unicode(sentence, 'iso-8859-1')
+ # sentence = sentence.encode('utf-8')
+ # except: pass
+ sentence = web.decode(sentence)
maxlength = 275
if len(sentence) > maxlength:
@@ -71,7 +84,7 @@ def etymology(word):
sentence = ' '.join(words) + ' [...]'
sentence = '"' + sentence.replace('"', "'") + '"'
- return sentence + ' - ' + (etyuri % word)
+ return sentence + ' - ' + ('http://etymonline.com/index.php?term=%s' % web.urllib.quote(word))
@deprecated
def f_etymology(self, origin, match, args):
@@ -89,7 +102,7 @@ def f_etymology(self, origin, match, args):
self.msg(origin.sender, result)
else:
uri = etysearch % word
- msg = 'Can\'t find the etymology for "%s". Try %s' % (word, uri)
+ msg = 'Can\'t find the etymology for "%s". Try %s' % (word, ('http://etymonline.com/index.php?term=%s' % web.urllib.quote(word)))
self.msg(origin.sender, msg)
# @@ Cf. http://swhack.com/logs/2006-01-04#T01-50-22
f_etymology.rule = (['ety'], r"(.+?)$")