summaryrefslogtreecommitdiffstats
path: root/modules/dict.py
diff options
context:
space:
mode:
Diffstat (limited to 'modules/dict.py')
-rwxr-xr-xmodules/dict.py120
1 files changed, 120 insertions, 0 deletions
diff --git a/modules/dict.py b/modules/dict.py
new file mode 100755
index 0000000..7ecaf0a
--- /dev/null
+++ b/modules/dict.py
@@ -0,0 +1,120 @@
+#!/usr/bin/env python
+"""
+dict.py - Phenny Dictionary Module
+Copyright 2008, Sean B. Palmer, inamidst.com
+Licensed under the Eiffel Forum License 2.
+
+http://inamidst.com/phenny/
+"""
+
+import re, urllib
+import web
+from tools import deprecated
+
+formuri = 'http://wordnet.princeton.edu/perl/webwn?s='
+
+r_li = re.compile(r'(?ims)<li>.*?</li>')
+r_tag = re.compile(r'<[^>]+>')
+r_parens = re.compile(r'(?<=\()(?:[^()]+|\([^)]+\))*(?=\))')
+r_word = re.compile(r'^[A-Za-z0-9\' -]+$')
+
+@deprecated
+def f_wordnet(self, origin, match, args):
+ """Gives the definition of a word using Wordnet."""
+ command = 'w'
+ term = match.group(2)
+ term = term.encode('utf-8')
+
+ if origin.sender != '#inamidst':
+ if not r_word.match(term):
+ msg = "Words must match the regexp %s" % r'^[A-Za-z0-9\' -]+$'
+ return self.msg(origin.sender, origin.nick + ": " + msg)
+ if ('--' in term) or ("''" in term) or (' ' in term):
+ self.msg(origin.sender, origin.nick + ": That's not in WordNet.")
+ return
+
+ bytes = web.get(formuri + web.urllib.quote(term)) # @@ ugh!
+ items = r_li.findall(bytes)
+
+ nouns, verbs, adjectives = [], [], []
+ for item in items:
+ item = r_tag.sub('', item)
+ chunks = r_parens.findall(item)
+ # self.msg(origin.sender, item)
+ if len(chunks) < 2: continue
+
+ kind, defn = chunks[0], chunks[1]
+ if command != 'wordnet':
+ defn = defn.split(';')[0]
+ if not defn: continue
+ defn = defn[0].upper() + defn[1:]
+
+ if kind == 'n':
+ nouns.append(defn)
+ elif kind == 'v':
+ verbs.append(defn)
+ elif kind == 'adj':
+ adjectives.append(defn)
+
+ if not (nouns or verbs or adjectives):
+ self.msg(origin.sender, "I couldn't find '%s' in WordNet." % term)
+ return
+
+ while len(nouns + verbs + adjectives) > 3:
+ if len(nouns) >= len(verbs) and len(nouns) >= len(adjectives):
+ nouns.pop()
+ elif len(verbs) >= len(nouns) and len(verbs) >= len(adjectives):
+ verbs.pop()
+ elif len(adjectives) >= len(nouns) and len(adjectives) >= len(verbs):
+ adjectives.pop()
+
+ if adjectives:
+ adjectives[-1] = adjectives[-1] + '.'
+ elif verbs:
+ verbs[-1] = verbs[-1] + '.'
+ elif nouns:
+ nouns[-1] = nouns[-1] + '.'
+
+ for (i, defn) in enumerate(nouns):
+ self.msg(origin.sender, '%s n. %r: %s' % (term, i+1, defn))
+ for (i, defn) in enumerate(verbs):
+ self.msg(origin.sender, '%s v. %r: %s' % (term, i+1, defn))
+ for (i, defn) in enumerate(adjectives):
+ self.msg(origin.sender, '%s a. %r: %s' % (term, i+1, defn))
+f_wordnet.commands = ['wordnet']
+f_wordnet.priority = 'low'
+
+uri = 'http://encarta.msn.com/dictionary_/%s.html'
+r_info = re.compile(
+ r'(?:ResultBody"><br /><br />(.*?)&nbsp;)|(?:<b>(.*?)</b>)'
+)
+
+def dict(phenny, input):
+ word = input.group(2)
+ word = urllib.quote(word.encode('utf-8'))
+
+ def trim(thing):
+ if thing.endswith('&nbsp;'):
+ thing = thing[:-6]
+ return thing.strip(' :.')
+
+ bytes = web.get(uri % word)
+ results = {}
+ wordkind = None
+ for kind, sense in r_info.findall(bytes):
+ kind, sense = trim(kind), trim(sense)
+ if kind: wordkind = kind
+ elif sense:
+ results.setdefault(wordkind, []).append(sense)
+ result = input.group(2).encode('utf-8') + ' - '
+ for key in sorted(results.keys()):
+ if results[key]:
+ result += key + ' 1. ' + results[key][0]
+ if len(results[key]) > 1:
+ result += ', 2. ' + results[key][1]
+ result += '; '
+ phenny.say(result.rstrip('; '))
+dict.commands = ['dict']
+
+if __name__ == '__main__':
+ print __doc__.strip()