From 87b9866c7c2ebeaa3c1c010176fa3bc24e017300 Mon Sep 17 00:00:00 2001 From: "Sean B. Palmer" Date: Fri, 23 May 2008 19:16:38 +0100 Subject: New translation module, using the Google Ajax interface. --- modules/translate.py | 126 ++++++++++++++++++--------------------------------- 1 file changed, 43 insertions(+), 83 deletions(-) diff --git a/modules/translate.py b/modules/translate.py index 27bd094..cf960aa 100755 --- a/modules/translate.py +++ b/modules/translate.py @@ -8,107 +8,67 @@ Licensed under the Eiffel Forum License 2. http://inamidst.com/phenny/ """ -import re, time +import re, urllib import web -r_translation = re.compile(r'
([^<]+)
') +r_json = re.compile(r'^[,:{}\[\]0-9.\-+Eaeflnr-u \n\r\t]+$') +r_string = re.compile(r'("(\\.|[^"\\])*")') +env = {'__builtins__': None, 'null': None, + 'true': True, 'false': False} -def guess_language(phrase): - languages = { - 'english': 'en', - 'french': 'fr', - 'spanish': 'es', - 'portuguese': 'pt', - 'german': 'de', - 'italian': 'it', - 'korean': 'ko', - 'japanese': 'ja', - 'chinese': 'zh', - 'dutch': 'nl', - 'greek': 'el', - 'russian': 'ru' - } +def json(text): + """Evaluate JSON text safely (we hope).""" + if r_json.match(r_string.sub('', text)): + text = r_string.sub(lambda m: 'u' + m.group(1), text) + return eval(text.strip(' \t\r\n'), env, {}) + raise ValueError('Input must be serialised JSON.') - uri = 'http://www.xrce.xerox.com/cgi-bin/mltt/LanguageGuesser' - form = {'Text': phrase} - bytes = web.post(uri, form) - for line in bytes.splitlines(): - if '' in line: - i = line.find('') - lang = line[i+len(''):].strip() - lang = lang.lower() - if '_' in lang: - j = lang.find('_') - lang = lang[:j] - try: return languages[lang].lower() - except KeyError: - return lang.lower() - return 'Moon Language' +def detect(text): + uri = 'http://ajax.googleapis.com/ajax/services/language/detect' + q = urllib.quote(text) + bytes = web.get(uri + '?q=' + q + '&v=1.0') + result = json(bytes) + try: return result['responseData']['language'] + except Exception: return None -def translate(phrase, lang, target='en'): - babelfish = 'http://uk.babelfish.yahoo.com/translate_txt' - form = { - 'ei': 'UTF-8', - 'doit': 'done', - 'fr': 'bf-home', - 'intl': '1', - 'tt': 'urltext', - 'trtext': phrase, - 'lp': lang + '_' + target - } +def translate(text, input, output): + uri = 'http://ajax.googleapis.com/ajax/services/language/translate' + q = urllib.quote(text) + pair = input + '%7C' + output + bytes = web.get(uri + '?q=' + q + '&v=1.0&langpair=' + pair) + result = json(bytes) + try: msg = result['responseData']['translatedText'] + except Exception: + msg = 'The %s to %s translation failed, sorry!' % (input, output) + else: + msg = msg.encode('cp1252').replace(''', "'") + msg = '"%s" (%s to %s, translate.google.com)' % (msg, input, output) + return msg - bytes = web.post(babelfish, form) - m = r_translation.search(bytes) - if m: - translation = m.group(1) - translation = translation.replace('\r', ' ') - translation = translation.replace('\n', ' ') - while ' ' in translation: - translation = translation.replace(' ', ' ') - return translation.lower() - return None - -def tr(phenny, input): +def tr(phenny, context): """Translates a phrase, with an optional language hint.""" - original_input = input - input, output, phrase = original_input.groups() + input, output, phrase = context.groups() + phrase = phrase.encode('utf-8') - if (len(phrase) > 350) and (not original_input.admin): + + if (len(phrase) > 350) and (not context.admin): return phenny.reply('Phrase must be under 350 characters.') - input = input or guess_language(phrase) + input = input or detect(phrase) if not input: - return phenny.reply('Unable to guess the language, sorry.') + err = 'Unable to guess your crazy moon language, sorry.' + return phenny.reply(err) input = input.encode('utf-8') output = (output or 'en').encode('utf-8') - if not ((input == 'en') and (output == 'en')): - translation = translate(phrase, input, output) - if translation is not None: - translation = translation.decode('utf-8').encode('utf-8') - if output == 'en': - return phenny.reply('"%s" (%s)' % (translation, input)) - else: return phenny.reply('"%s" (%s -> %s)' % \ - (translation, input, output)) + if input != output: + msg = translate(phrase, input, output) + phenny.reply(msg) + else: phenny.reply('Ehwhatnow?') - error = "I think it's %s, but I can't translate it currently." - return phenny.reply(error % input.title()) - - # Otherwise, it's English, so mangle it for fun - for other in ['de', 'ja', 'de', 'ja', 'de', 'ja', 'de', 'ja', 'de', 'ja']: - phrase = translate(phrase, 'en', other) - phrase = translate(phrase, other, 'en') - time.sleep(0.1) - - if phrase is not None: - return phenny.reply(u'"%s" (en-unmangled)' % phrase) - return phenny.reply("I think it's English already.") - # @@ or 'Why but that be English, sire.' tr.rule = ('$nick', ur'(?:([a-z]{2}) +)?(?:([a-z]{2}) +)?["“](.+?)["”]\? *$') tr.example = '$nickname: "mon chien"? or $nickname: fr "mon chien"?' tr.priority = 'low' -# @@ mangle - if __name__ == '__main__': print __doc__.strip() -- cgit v1.2.3-1-g7c22