diff --git a/plugins/babel.py b/plugins/babel.py index 225abd3..17f1add 100644 --- a/plugins/babel.py +++ b/plugins/babel.py @@ -1,46 +1,76 @@ import yaml import urllib +import htmlentitydefs +import re import hook +########### from http://effbot.org/zone/re-sub.htm#unescape-html ############# +def unescape(text): + def fixup(m): + text = m.group(0) + if text[:2] == "&#": + # character reference + try: + if text[:3] == "&#x": + return unichr(int(text[3:-1], 16)) + else: + return unichr(int(text[2:-1])) + except ValueError: + pass + else: + # named entity + try: + text = unichr(htmlentitydefs.name2codepoint[text[1:-1]]) + except KeyError: + pass + return text # leave as is + return re.sub("&#?\w+;", fixup, text) +############################################################################## -languages = 'en ja de he es ko ru en zh en'.split(); +languages = 'ja fr de ko ru zh'.split(); language_pairs = zip(languages[:-1], languages[1:]) -def babel_gen(inp): +def goog_trans(text, slang, tlang): req_url = 'http://ajax.googleapis.com/ajax/services/language/translate' \ '?v=1.0&q=%s&langpair=%s' + url = req_url % (urllib.quote(text, safe=''), slang + '%7C' + tlang) + json = urllib.urlopen(url).read() + parsed = yaml.load(json) + if not 200 <= parsed['responseStatus'] < 300: + print parsed + raise IOError, 'error with the translation server: %d: %s' % ( + parsed['responseStatus'], '') + return unescape(parsed['responseData']['translatedText']) - yield 'en', inp - for slang, tlang in language_pairs: +def babel_gen(inp): + for language in languages: inp = inp.encode('utf8') - print slang, tlang, inp - json = urllib.urlopen(req_url % (urllib.quote(inp, safe=''), - slang + '%7C' + tlang)).read() - parsed = yaml.load(json) - if not 200 <= parsed['responseStatus'] < 300: - raise IOError, 'error with the translation server' - inp = parsed['responseData']['translatedText'] - yield tlang, inp + trans = goog_trans(inp, 'en', language).encode('utf8') + inp = goog_trans(trans, language, 'en') + print language, trans, inp + yield language, trans, inp @hook.command def babel(inp): try: - return list(babel_gen(inp))[-1][1] - except IOError: - return 'error with the translation server' + return list(babel_gen(inp))[-1][2] + except IOError, e: + return e @hook.command -def babelexp(inp): +def babelext(inp): try: babels = list(babel_gen(inp)) - except IOError: - return 'error with the translation server' + except IOError, e: + return e out = u'' - for lang, text in babels: + for lang, trans, text in babels: out += '%s:"%s", ' % (lang, text.decode('utf8')) + out += 'en:"' + babels[-1][2].decode('utf8') + '"' + if len(out) > 300: out = out[:150] + ' ... ' + out[-150:] - return out[:-2] + return out diff --git a/plugins/iambuttbot.py b/plugins/iambuttbot.py index 51d4606..91f925b 100644 --- a/plugins/iambuttbot.py +++ b/plugins/iambuttbot.py @@ -13,6 +13,6 @@ def iambuttbot(bot, input): password = open('iambuttbot_password').readlines()[0].strip() status = input.inp if len(input.inp) <= 140 else input.inp[:137] + "..." - data = urllib.urlencode({"status": status}) + data = urllib.urlencode({"status": status.encode('utf8')}) url = 'http://iambuttbot:%s@twitter.com/statuses/update.xml' % password response = urllib.urlopen(url, data)