diff options
Diffstat (limited to 'modules/wiktionary.py')
-rwxr-xr-x | modules/wiktionary.py | 140 |
1 files changed, 70 insertions, 70 deletions
diff --git a/modules/wiktionary.py b/modules/wiktionary.py index 9229194..d3321c5 100755 --- a/modules/wiktionary.py +++ b/modules/wiktionary.py @@ -14,87 +14,87 @@ uri = 'http://en.wiktionary.org/w/index.php?title=%s&printable=yes' r_tag = re.compile(r'<[^>]+>') r_ul = re.compile(r'(?ims)<ul>.*?</ul>') -def text(html): - text = r_tag.sub('', html).strip() - text = text.replace('\n', ' ') - text = text.replace('\r', '') - text = text.replace('(intransitive', '(intr.') - text = text.replace('(transitive', '(trans.') - return text +def text(html): + text = r_tag.sub('', html).strip() + text = text.replace('\n', ' ') + text = text.replace('\r', '') + text = text.replace('(intransitive', '(intr.') + text = text.replace('(transitive', '(trans.') + return text -def wiktionary(word): - bytes = web.get(uri % web.urllib.quote(word.encode('utf-8'))) - bytes = r_ul.sub('', bytes) +def wiktionary(word): + bytes = web.get(uri % web.urllib.quote(word.encode('utf-8'))) + bytes = r_ul.sub('', bytes) - mode = None - etymology = None - definitions = {} - for line in bytes.splitlines(): - if 'id="Etymology"' in line: - mode = 'etymology' - elif 'id="Noun"' in line: - mode = 'noun' - elif 'id="Verb"' in line: - mode = 'verb' - elif 'id="Adjective"' in line: - mode = 'adjective' - elif 'id="Adverb"' in line: - mode = 'adverb' - elif 'id="Interjection"' in line: - mode = 'interjection' - elif 'id="Particle"' in line: - mode = 'particle' - elif 'id="Preposition"' in line: - mode = 'preposition' - elif 'id="' in line: - mode = None + mode = None + etymology = None + definitions = {} + for line in bytes.splitlines(): + if 'id="Etymology"' in line: + mode = 'etymology' + elif 'id="Noun"' in line: + mode = 'noun' + elif 'id="Verb"' in line: + mode = 'verb' + elif 'id="Adjective"' in line: + mode = 'adjective' + elif 'id="Adverb"' in line: + mode = 'adverb' + elif 'id="Interjection"' in line: + mode = 'interjection' + elif 'id="Particle"' in line: + mode = 'particle' + elif 'id="Preposition"' in line: + mode = 'preposition' + elif 'id="' in line: + mode = None - elif (mode == 'etmyology') and ('<p>' in line): - etymology = text(line) - elif (mode is not None) and ('<li>' in line): - definitions.setdefault(mode, []).append(text(line)) + elif (mode == 'etmyology') and ('<p>' in line): + etymology = text(line) + elif (mode is not None) and ('<li>' in line): + definitions.setdefault(mode, []).append(text(line)) - if '<hr' in line: - break - return etymology, definitions + if '<hr' in line: + break + return etymology, definitions -parts = ('preposition', 'particle', 'noun', 'verb', - 'adjective', 'adverb', 'interjection') +parts = ('preposition', 'particle', 'noun', 'verb', + 'adjective', 'adverb', 'interjection') -def format(word, definitions, number=2): - result = '%s' % word.encode('utf-8') - for part in parts: - if definitions.has_key(part): - defs = definitions[part][:number] - result += u' \u2014 '.encode('utf-8') + ('%s: ' % part) - n = ['%s. %s' % (i + 1, e.strip(' .')) for i, e in enumerate(defs)] - result += ', '.join(n) - return result.strip(' .,') +def format(word, definitions, number=2): + result = '%s' % word.encode('utf-8') + for part in parts: + if definitions.has_key(part): + defs = definitions[part][:number] + result += u' \u2014 '.encode('utf-8') + ('%s: ' % part) + n = ['%s. %s' % (i + 1, e.strip(' .')) for i, e in enumerate(defs)] + result += ', '.join(n) + return result.strip(' .,') -def w(phenny, input): - if not input.group(2): - return phenny.reply("Nothing to define.") - word = input.group(2) - etymology, definitions = wiktionary(word) - if not definitions: - phenny.say("Couldn't get any definitions for %s." % word) - return +def w(phenny, input): + if not input.group(2): + return phenny.reply("Nothing to define.") + word = input.group(2) + etymology, definitions = wiktionary(word) + if not definitions: + phenny.say("Couldn't get any definitions for %s." % word) + return - result = format(word, definitions) - if len(result) < 150: - result = format(word, definitions, 3) - if len(result) < 150: - result = format(word, definitions, 5) + result = format(word, definitions) + if len(result) < 150: + result = format(word, definitions, 3) + if len(result) < 150: + result = format(word, definitions, 5) - if len(result) > 300: - result = result[:295] + '[...]' - phenny.say(result) + if len(result) > 300: + result = result[:295] + '[...]' + phenny.say(result) w.commands = ['w'] w.example = '.w bailiwick' -def encarta(phenny, input): - return phenny.reply('Microsoft removed Encarta, try .w instead!') +def encarta(phenny, input): + return phenny.reply('Microsoft removed Encarta, try .w instead!') encarta.commands = ['dict'] -if __name__ == '__main__': - print __doc__.strip() +if __name__ == '__main__': + print __doc__.strip() |