From cbdf9ebd7312bf570a212057ad793ae520bac38f Mon Sep 17 00:00:00 2001 From: "Sean B. Palmer" Date: Sat, 23 Feb 2008 12:17:06 +0000 Subject: And some new modules too... --- modules/dict.py | 120 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 120 insertions(+) create mode 100755 modules/dict.py (limited to 'modules/dict.py') diff --git a/modules/dict.py b/modules/dict.py new file mode 100755 index 0000000..7ecaf0a --- /dev/null +++ b/modules/dict.py @@ -0,0 +1,120 @@ +#!/usr/bin/env python +""" +dict.py - Phenny Dictionary Module +Copyright 2008, Sean B. Palmer, inamidst.com +Licensed under the Eiffel Forum License 2. + +http://inamidst.com/phenny/ +""" + +import re, urllib +import web +from tools import deprecated + +formuri = 'http://wordnet.princeton.edu/perl/webwn?s=' + +r_li = re.compile(r'(?ims)
  • .*?
  • ') +r_tag = re.compile(r'<[^>]+>') +r_parens = re.compile(r'(?<=\()(?:[^()]+|\([^)]+\))*(?=\))') +r_word = re.compile(r'^[A-Za-z0-9\' -]+$') + +@deprecated +def f_wordnet(self, origin, match, args): + """Gives the definition of a word using Wordnet.""" + command = 'w' + term = match.group(2) + term = term.encode('utf-8') + + if origin.sender != '#inamidst': + if not r_word.match(term): + msg = "Words must match the regexp %s" % r'^[A-Za-z0-9\' -]+$' + return self.msg(origin.sender, origin.nick + ": " + msg) + if ('--' in term) or ("''" in term) or (' ' in term): + self.msg(origin.sender, origin.nick + ": That's not in WordNet.") + return + + bytes = web.get(formuri + web.urllib.quote(term)) # @@ ugh! + items = r_li.findall(bytes) + + nouns, verbs, adjectives = [], [], [] + for item in items: + item = r_tag.sub('', item) + chunks = r_parens.findall(item) + # self.msg(origin.sender, item) + if len(chunks) < 2: continue + + kind, defn = chunks[0], chunks[1] + if command != 'wordnet': + defn = defn.split(';')[0] + if not defn: continue + defn = defn[0].upper() + defn[1:] + + if kind == 'n': + nouns.append(defn) + elif kind == 'v': + verbs.append(defn) + elif kind == 'adj': + adjectives.append(defn) + + if not (nouns or verbs or adjectives): + self.msg(origin.sender, "I couldn't find '%s' in WordNet." % term) + return + + while len(nouns + verbs + adjectives) > 3: + if len(nouns) >= len(verbs) and len(nouns) >= len(adjectives): + nouns.pop() + elif len(verbs) >= len(nouns) and len(verbs) >= len(adjectives): + verbs.pop() + elif len(adjectives) >= len(nouns) and len(adjectives) >= len(verbs): + adjectives.pop() + + if adjectives: + adjectives[-1] = adjectives[-1] + '.' + elif verbs: + verbs[-1] = verbs[-1] + '.' + elif nouns: + nouns[-1] = nouns[-1] + '.' + + for (i, defn) in enumerate(nouns): + self.msg(origin.sender, '%s n. %r: %s' % (term, i+1, defn)) + for (i, defn) in enumerate(verbs): + self.msg(origin.sender, '%s v. %r: %s' % (term, i+1, defn)) + for (i, defn) in enumerate(adjectives): + self.msg(origin.sender, '%s a. %r: %s' % (term, i+1, defn)) +f_wordnet.commands = ['wordnet'] +f_wordnet.priority = 'low' + +uri = 'http://encarta.msn.com/dictionary_/%s.html' +r_info = re.compile( + r'(?:ResultBody">

    (.*?) )|(?:(.*?))' +) + +def dict(phenny, input): + word = input.group(2) + word = urllib.quote(word.encode('utf-8')) + + def trim(thing): + if thing.endswith(' '): + thing = thing[:-6] + return thing.strip(' :.') + + bytes = web.get(uri % word) + results = {} + wordkind = None + for kind, sense in r_info.findall(bytes): + kind, sense = trim(kind), trim(sense) + if kind: wordkind = kind + elif sense: + results.setdefault(wordkind, []).append(sense) + result = input.group(2).encode('utf-8') + ' - ' + for key in sorted(results.keys()): + if results[key]: + result += key + ' 1. ' + results[key][0] + if len(results[key]) > 1: + result += ', 2. ' + results[key][1] + result += '; ' + phenny.say(result.rstrip('; ')) +dict.commands = ['dict'] + +if __name__ == '__main__': + print __doc__.strip() -- cgit v1.2.3-1-g7c22