babel fixes, iambuttbot utf8 fix

This commit is contained in:
Ryan Hitchman 2009-03-27 21:42:19 -06:00
parent 852bebe58c
commit b1d722fdf7
2 changed files with 51 additions and 21 deletions

View File

@ -1,46 +1,76 @@
import yaml import yaml
import urllib import urllib
import htmlentitydefs
import re
import hook import hook
########### from http://effbot.org/zone/re-sub.htm#unescape-html #############
def unescape(text):
def fixup(m):
text = m.group(0)
if text[:2] == "&#":
# character reference
try:
if text[:3] == "&#x":
return unichr(int(text[3:-1], 16))
else:
return unichr(int(text[2:-1]))
except ValueError:
pass
else:
# named entity
try:
text = unichr(htmlentitydefs.name2codepoint[text[1:-1]])
except KeyError:
pass
return text # leave as is
return re.sub("&#?\w+;", fixup, text)
##############################################################################
languages = 'en ja de he es ko ru en zh en'.split(); languages = 'ja fr de ko ru zh'.split();
language_pairs = zip(languages[:-1], languages[1:]) language_pairs = zip(languages[:-1], languages[1:])
def babel_gen(inp): def goog_trans(text, slang, tlang):
req_url = 'http://ajax.googleapis.com/ajax/services/language/translate' \ req_url = 'http://ajax.googleapis.com/ajax/services/language/translate' \
'?v=1.0&q=%s&langpair=%s' '?v=1.0&q=%s&langpair=%s'
url = req_url % (urllib.quote(text, safe=''), slang + '%7C' + tlang)
json = urllib.urlopen(url).read()
parsed = yaml.load(json)
if not 200 <= parsed['responseStatus'] < 300:
print parsed
raise IOError, 'error with the translation server: %d: %s' % (
parsed['responseStatus'], '')
return unescape(parsed['responseData']['translatedText'])
yield 'en', inp def babel_gen(inp):
for slang, tlang in language_pairs: for language in languages:
inp = inp.encode('utf8') inp = inp.encode('utf8')
print slang, tlang, inp trans = goog_trans(inp, 'en', language).encode('utf8')
json = urllib.urlopen(req_url % (urllib.quote(inp, safe=''), inp = goog_trans(trans, language, 'en')
slang + '%7C' + tlang)).read() print language, trans, inp
parsed = yaml.load(json) yield language, trans, inp
if not 200 <= parsed['responseStatus'] < 300:
raise IOError, 'error with the translation server'
inp = parsed['responseData']['translatedText']
yield tlang, inp
@hook.command @hook.command
def babel(inp): def babel(inp):
try: try:
return list(babel_gen(inp))[-1][1] return list(babel_gen(inp))[-1][2]
except IOError: except IOError, e:
return 'error with the translation server' return e
@hook.command @hook.command
def babelexp(inp): def babelext(inp):
try: try:
babels = list(babel_gen(inp)) babels = list(babel_gen(inp))
except IOError: except IOError, e:
return 'error with the translation server' return e
out = u'' out = u''
for lang, text in babels: for lang, trans, text in babels:
out += '%s:"%s", ' % (lang, text.decode('utf8')) out += '%s:"%s", ' % (lang, text.decode('utf8'))
out += 'en:"' + babels[-1][2].decode('utf8') + '"'
if len(out) > 300: if len(out) > 300:
out = out[:150] + ' ... ' + out[-150:] out = out[:150] + ' ... ' + out[-150:]
return out[:-2] return out

View File

@ -13,6 +13,6 @@ def iambuttbot(bot, input):
password = open('iambuttbot_password').readlines()[0].strip() password = open('iambuttbot_password').readlines()[0].strip()
status = input.inp if len(input.inp) <= 140 else input.inp[:137] + "..." status = input.inp if len(input.inp) <= 140 else input.inp[:137] + "..."
data = urllib.urlencode({"status": status}) data = urllib.urlencode({"status": status.encode('utf8')})
url = 'http://iambuttbot:%s@twitter.com/statuses/update.xml' % password url = 'http://iambuttbot:%s@twitter.com/statuses/update.xml' % password
response = urllib.urlopen(url, data) response = urllib.urlopen(url, data)