h/plugins/babel.py

94 lines
2.4 KiB
Python
Raw Normal View History

2009-03-25 04:45:33 +00:00
import urllib
2009-03-28 03:42:19 +00:00
import htmlentitydefs
import re
import json
2009-03-25 04:45:33 +00:00
from util import hook
2009-03-28 03:42:19 +00:00
########### from http://effbot.org/zone/re-sub.htm#unescape-html #############
2009-03-28 03:42:19 +00:00
def unescape(text):
2009-03-28 03:42:19 +00:00
def fixup(m):
text = m.group(0)
if text[:2] == "&#":
# character reference
try:
if text[:3] == "&#x":
return unichr(int(text[3:-1], 16))
else:
return unichr(int(text[2:-1]))
except ValueError:
pass
else:
# named entity
try:
text = unichr(htmlentitydefs.name2codepoint[text[1:-1]])
except KeyError:
pass
return text # leave as is
2009-03-28 03:42:19 +00:00
return re.sub("&#?\w+;", fixup, text)
2009-03-28 03:42:19 +00:00
##############################################################################
2009-03-25 04:45:33 +00:00
languages = 'ja fr de ko ru zh'.split()
2009-03-25 04:45:33 +00:00
language_pairs = zip(languages[:-1], languages[1:])
2009-03-28 03:42:19 +00:00
def goog_trans(text, slang, tlang):
2009-03-25 04:45:33 +00:00
req_url = 'http://ajax.googleapis.com/ajax/services/language/translate' \
'?v=1.0&q=%s&langpair=%s'
2009-03-28 03:42:19 +00:00
url = req_url % (urllib.quote(text, safe=''), slang + '%7C' + tlang)
page = urllib.urlopen(url).read()
parsed = json.loads(page)
2009-03-28 03:42:19 +00:00
if not 200 <= parsed['responseStatus'] < 300:
raise IOError('error with the translation server: %d: %s' % (
parsed['responseStatus'], ''))
2009-03-28 03:42:19 +00:00
return unescape(parsed['responseData']['translatedText'])
2009-03-25 04:45:33 +00:00
2009-03-28 03:42:19 +00:00
def babel_gen(inp):
for language in languages:
2009-03-25 04:45:33 +00:00
inp = inp.encode('utf8')
2009-03-28 03:42:19 +00:00
trans = goog_trans(inp, 'en', language).encode('utf8')
inp = goog_trans(trans, language, 'en')
yield language, trans, inp
2009-03-25 04:45:33 +00:00
2009-03-25 04:45:33 +00:00
@hook.command
def babel(inp):
".babel <sentence> -- translates <sentence> through multiple languages"
if not inp:
return babel.__doc__
2009-03-25 04:45:33 +00:00
try:
2009-03-28 03:42:19 +00:00
return list(babel_gen(inp))[-1][2]
except IOError, e:
return e
2009-03-25 04:45:33 +00:00
2009-03-25 04:45:33 +00:00
@hook.command
2009-03-28 03:42:19 +00:00
def babelext(inp):
".babelext <sentence> -- like .babel, but with more detailed output"
if not inp:
return babelext.__doc__
2009-03-25 04:45:33 +00:00
try:
babels = list(babel_gen(inp))
2009-03-28 03:42:19 +00:00
except IOError, e:
return e
2009-03-25 04:45:33 +00:00
out = u''
2009-03-28 03:42:19 +00:00
for lang, trans, text in babels:
2009-03-25 04:45:33 +00:00
out += '%s:"%s", ' % (lang, text.decode('utf8'))
2009-03-28 03:42:19 +00:00
out += 'en:"' + babels[-1][2].decode('utf8') + '"'
2009-03-25 04:45:33 +00:00
if len(out) > 300:
out = out[:150] + ' ... ' + out[-150:]
2009-03-28 03:42:19 +00:00
return out