summaryrefslogtreecommitdiffstats
path: root/modules/translate.py
blob: ed3589f0066741d830fe5c3574f6d0a6b4df289a (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
#!/usr/bin/env python
# coding=utf-8
"""
translate.py - Phenny Translation Module
Copyright 2008, Sean B. Palmer, inamidst.com
Licensed under the Eiffel Forum License 2.

http://inamidst.com/phenny/
"""

import re
import web

r_translation = re.compile(r'<div style=padding:10px;>([^<]+)</div>')

def guess_language(phrase): 
   languages = {
      'english': 'en', 
      'french': 'fr', 
      'spanish': 'es', 
      'portuguese': 'pt', 
      'german': 'de', 
      'italian': 'it', 
      'korean': 'ko', 
      'japanese': 'ja', 
      'chinese': 'zh', 
      'dutch': 'nl', 
      'greek': 'el', 
      'russian': 'ru'
   }

   uri = 'http://www.xrce.xerox.com/cgi-bin/mltt/LanguageGuesser'
   form = {'Text': phrase}
   bytes = web.post(uri, form)
   for line in bytes.splitlines(): 
      if '<listing><font size=+1>' in line: 
         i = line.find('<listing><font size=+1>')
         lang = line[i+len('<listing><font size=+1>'):].strip()
         lang = lang.lower()
         if '_' in lang: 
            j = lang.find('_')
            lang = lang[:j]
         try: return languages[lang]
         except KeyError: 
            return lang
   return 'unknown'

def translate(phrase, lang, target='en'): 
   babelfish = 'http://world.altavista.com/tr'
   form = {
      'doit': 'done', 
      'intl': '1', 
      'tt': 'urltext', 
      'trtext': phrase, 
      'lp': lang + '_' + target
   }

   bytes = web.post(babelfish, form)
   m = r_translation.search(bytes)
   if m: 
      translation = m.group(1)
      translation = translation.replace('\r', ' ')
      translation = translation.replace('\n', ' ')
      while '  ' in translation:
         translation = translation.replace('  ', ' ')
      return translation
   return None

def tr(phenny, input): 
   lang, phrase = input.groups()

   if (len(phrase) > 350) and (not phenny.admin(input.nick)): 
      return phenny.reply('Phrase must be under 350 characters.')

   language = guess_language(phrase)
   if language is None: 
      return phenny.reply('Unable to guess the language, sorry.')

   if language != 'en': 
      translation = translate(phrase, language)
      if translation is not None: 
         return phenny.reply(u'"%s" (%s)' % (translation, language))

      error = "I think it's %s, but I can't translate that language."
      return phenny.reply(error % language.title())

   # Otherwise, it's English, so mangle it for fun
   for other in ['de', 'ja']: 
      phrase = translate(phrase, 'en', other)
      phrase = translate(phrase, other, 'en')

   if phrase is not None: 
      return phenny.reply(u'"%s" (en-unmangled)' % phrase)
   return phenny.reply("I think it's English already.")
   # @@ or 'Why but that be English, sire.'
tr.doc = ('phenny: "<phrase>"? or phenny: <lang> "<phrase>"?', 
   'Translate <phrase>, optionally forcing the <lang> interpretation.')
tr.rule = ('$nick', ur'(?:([a-z]{2}) +)?["“](.+?)["”]\? *$')
tr.priority = 'low'

if __name__ == '__main__': 
   print __doc__.strip()