move babel.py to translate.py, add .translate -- use the google translate API for useful work, minor PEP8 fixes
This commit is contained in:
parent
bef56c0a93
commit
36ed83d664
|
@ -1,82 +0,0 @@
|
||||||
import htmlentitydefs
|
|
||||||
import re
|
|
||||||
|
|
||||||
from util import hook, http
|
|
||||||
|
|
||||||
########### from http://effbot.org/zone/re-sub.htm#unescape-html #############
|
|
||||||
|
|
||||||
|
|
||||||
def unescape(text):
|
|
||||||
def fixup(m):
|
|
||||||
text = m.group(0)
|
|
||||||
if text[:2] == "&#":
|
|
||||||
# character reference
|
|
||||||
try:
|
|
||||||
if text[:3] == "&#x":
|
|
||||||
return unichr(int(text[3:-1], 16))
|
|
||||||
else:
|
|
||||||
return unichr(int(text[2:-1]))
|
|
||||||
except ValueError:
|
|
||||||
pass
|
|
||||||
else:
|
|
||||||
# named entity
|
|
||||||
try:
|
|
||||||
text = unichr(htmlentitydefs.name2codepoint[text[1:-1]])
|
|
||||||
except KeyError:
|
|
||||||
pass
|
|
||||||
return text # leave as is
|
|
||||||
|
|
||||||
return re.sub("&#?\w+;", fixup, text)
|
|
||||||
|
|
||||||
##############################################################################
|
|
||||||
|
|
||||||
languages = 'ja fr de ko ru zh'.split()
|
|
||||||
language_pairs = zip(languages[:-1], languages[1:])
|
|
||||||
|
|
||||||
|
|
||||||
def goog_trans(text, slang, tlang):
|
|
||||||
url = 'http://ajax.googleapis.com/ajax/services/language/translate?v=1.0'
|
|
||||||
parsed = http.get_json(url, q=text, langpair=(slang + '|' + tlang))
|
|
||||||
if not 200 <= parsed['responseStatus'] < 300:
|
|
||||||
raise IOError('error with the translation server: %d: %s' % (
|
|
||||||
parsed['responseStatus'], ''))
|
|
||||||
return unescape(parsed['responseData']['translatedText'])
|
|
||||||
|
|
||||||
|
|
||||||
def babel_gen(inp):
|
|
||||||
for language in languages:
|
|
||||||
inp = inp.encode('utf8')
|
|
||||||
trans = goog_trans(inp, 'en', language).encode('utf8')
|
|
||||||
inp = goog_trans(trans, language, 'en')
|
|
||||||
yield language, trans, inp
|
|
||||||
|
|
||||||
|
|
||||||
@hook.command
|
|
||||||
def babel(inp):
|
|
||||||
".babel <sentence> -- translates <sentence> through multiple languages"
|
|
||||||
|
|
||||||
try:
|
|
||||||
return list(babel_gen(inp))[-1][2]
|
|
||||||
except IOError, e:
|
|
||||||
return e
|
|
||||||
|
|
||||||
|
|
||||||
@hook.command
|
|
||||||
def babelext(inp):
|
|
||||||
".babelext <sentence> -- like .babel, but with more detailed output"
|
|
||||||
|
|
||||||
try:
|
|
||||||
babels = list(babel_gen(inp))
|
|
||||||
except IOError, e:
|
|
||||||
return e
|
|
||||||
|
|
||||||
out = u''
|
|
||||||
for lang, trans, text in babels:
|
|
||||||
out += '%s:"%s", ' % (lang, text.decode('utf8'))
|
|
||||||
|
|
||||||
out += 'en:"' + babels[-1][2].decode('utf8') + '"'
|
|
||||||
|
|
||||||
if len(out) > 300:
|
|
||||||
out = out[:150] + ' ... ' + out[-150:]
|
|
||||||
|
|
||||||
return out
|
|
|
@ -0,0 +1,186 @@
|
||||||
|
import htmlentitydefs
|
||||||
|
import re
|
||||||
|
|
||||||
|
from util import hook, http
|
||||||
|
|
||||||
|
########### from http://effbot.org/zone/re-sub.htm#unescape-html #############
|
||||||
|
|
||||||
|
|
||||||
|
def unescape(text):
|
||||||
|
def fixup(m):
|
||||||
|
text = m.group(0)
|
||||||
|
if text[:2] == "&#":
|
||||||
|
# character reference
|
||||||
|
try:
|
||||||
|
if text[:3] == "&#x":
|
||||||
|
return unichr(int(text[3:-1], 16))
|
||||||
|
else:
|
||||||
|
return unichr(int(text[2:-1]))
|
||||||
|
except ValueError:
|
||||||
|
pass
|
||||||
|
else:
|
||||||
|
# named entity
|
||||||
|
try:
|
||||||
|
text = unichr(htmlentitydefs.name2codepoint[text[1:-1]])
|
||||||
|
except KeyError:
|
||||||
|
pass
|
||||||
|
return text # leave as is
|
||||||
|
|
||||||
|
return re.sub("&#?\w+;", fixup, text)
|
||||||
|
|
||||||
|
##############################################################################
|
||||||
|
|
||||||
|
|
||||||
|
def goog_trans(text, slang, tlang):
|
||||||
|
url = 'http://ajax.googleapis.com/ajax/services/language/translate?v=1.0'
|
||||||
|
parsed = http.get_json(url, q=text, langpair=(slang + '|' + tlang))
|
||||||
|
print slang, tlang, parsed
|
||||||
|
if not 200 <= parsed['responseStatus'] < 300:
|
||||||
|
raise IOError('error with the translation server: %d: %s' % (
|
||||||
|
parsed['responseStatus'], ''))
|
||||||
|
if not slang:
|
||||||
|
return unescape('(%(detectedSourceLanguage)s) %(translatedText)s' %
|
||||||
|
(parsed['responseData']))
|
||||||
|
return unescape(parsed['responseData']['translatedText'])
|
||||||
|
|
||||||
|
|
||||||
|
def match_language(fragment):
|
||||||
|
fragment = fragment.lower()
|
||||||
|
for short, _ in lang_pairs:
|
||||||
|
if fragment in short.split():
|
||||||
|
return short.split()[0]
|
||||||
|
|
||||||
|
for short, full in lang_pairs:
|
||||||
|
if fragment in full.lower():
|
||||||
|
return short.split()[0]
|
||||||
|
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
@hook.command
|
||||||
|
def translate(inp):
|
||||||
|
'.translate [source language [target language]] <sentence> -- translates' \
|
||||||
|
' <sentence> from source language (default autodetect) to target' \
|
||||||
|
' language (default English) using Google Translate'
|
||||||
|
|
||||||
|
args = inp.split(' ', 2)
|
||||||
|
|
||||||
|
if len(args) >= 2:
|
||||||
|
sl = match_language(args[0])
|
||||||
|
if not sl:
|
||||||
|
return goog_trans(inp, '', 'en')
|
||||||
|
if len(args) >= 3:
|
||||||
|
tl = match_language(args[1])
|
||||||
|
if not tl:
|
||||||
|
if sl == 'en':
|
||||||
|
return 'unable to determine desired target language'
|
||||||
|
return goog_trans(args[1] + ' ' + args[2], sl, 'en')
|
||||||
|
return goog_trans(args[2], sl, tl)
|
||||||
|
return goog_trans(inp, '', 'en')
|
||||||
|
|
||||||
|
|
||||||
|
languages = 'ja fr de ko ru zh'.split()
|
||||||
|
language_pairs = zip(languages[:-1], languages[1:])
|
||||||
|
|
||||||
|
|
||||||
|
def babel_gen(inp):
|
||||||
|
for language in languages:
|
||||||
|
inp = inp.encode('utf8')
|
||||||
|
trans = goog_trans(inp, 'en', language).encode('utf8')
|
||||||
|
inp = goog_trans(trans, language, 'en')
|
||||||
|
yield language, trans, inp
|
||||||
|
|
||||||
|
|
||||||
|
@hook.command
|
||||||
|
def babel(inp):
|
||||||
|
".babel <sentence> -- translates <sentence> through multiple languages"
|
||||||
|
|
||||||
|
try:
|
||||||
|
return list(babel_gen(inp))[-1][2]
|
||||||
|
except IOError, e:
|
||||||
|
return e
|
||||||
|
|
||||||
|
|
||||||
|
@hook.command
|
||||||
|
def babelext(inp):
|
||||||
|
".babelext <sentence> -- like .babel, but with more detailed output"
|
||||||
|
|
||||||
|
try:
|
||||||
|
babels = list(babel_gen(inp))
|
||||||
|
except IOError, e:
|
||||||
|
return e
|
||||||
|
|
||||||
|
out = u''
|
||||||
|
for lang, trans, text in babels:
|
||||||
|
out += '%s:"%s", ' % (lang, text.decode('utf8'))
|
||||||
|
|
||||||
|
out += 'en:"' + babels[-1][2].decode('utf8') + '"'
|
||||||
|
|
||||||
|
if len(out) > 300:
|
||||||
|
out = out[:150] + ' ... ' + out[-150:]
|
||||||
|
|
||||||
|
return out
|
||||||
|
|
||||||
|
|
||||||
|
lang_pairs = [
|
||||||
|
("no", "Norwegian"),
|
||||||
|
("it", "Italian"),
|
||||||
|
("ht", "Haitian Creole"),
|
||||||
|
("af", "Afrikaans"),
|
||||||
|
("sq", "Albanian"),
|
||||||
|
("ar", "Arabic"),
|
||||||
|
("hy", "Armenian"),
|
||||||
|
("az", "Azerbaijani"),
|
||||||
|
("eu", "Basque"),
|
||||||
|
("be", "Belarusian"),
|
||||||
|
("bg", "Bulgarian"),
|
||||||
|
("ca", "Catalan"),
|
||||||
|
("zh-CN", "Chinese"),
|
||||||
|
("hr", "Croatian"),
|
||||||
|
("cs", "Czech"),
|
||||||
|
("da", "Danish"),
|
||||||
|
("nl", "Dutch"),
|
||||||
|
("en", "English"),
|
||||||
|
("et", "Estonian"),
|
||||||
|
("tl", "Filipino"),
|
||||||
|
("fi", "Finnish"),
|
||||||
|
("fr", "French"),
|
||||||
|
("gl", "Galician"),
|
||||||
|
("ka", "Georgian"),
|
||||||
|
("de", "German"),
|
||||||
|
("el", "Greek"),
|
||||||
|
("ht", "Haitian Creole"),
|
||||||
|
("iw", "Hebrew"),
|
||||||
|
("hi", "Hindi"),
|
||||||
|
("hu", "Hungarian"),
|
||||||
|
("is", "Icelandic"),
|
||||||
|
("id", "Indonesian"),
|
||||||
|
("ga", "Irish"),
|
||||||
|
("it", "Italian"),
|
||||||
|
("ja jpn", "Japanese"),
|
||||||
|
("ko", "Korean"),
|
||||||
|
("lv", "Latvian"),
|
||||||
|
("lt", "Lithuanian"),
|
||||||
|
("mk", "Macedonian"),
|
||||||
|
("ms", "Malay"),
|
||||||
|
("mt", "Maltese"),
|
||||||
|
("no", "Norwegian"),
|
||||||
|
("fa", "Persian"),
|
||||||
|
("pl", "Polish"),
|
||||||
|
("pt", "Portuguese"),
|
||||||
|
("ro", "Romanian"),
|
||||||
|
("ru", "Russian"),
|
||||||
|
("sr", "Serbian"),
|
||||||
|
("sk", "Slovak"),
|
||||||
|
("sl", "Slovenian"),
|
||||||
|
("es", "Spanish"),
|
||||||
|
("sw", "Swahili"),
|
||||||
|
("sv", "Swedish"),
|
||||||
|
("th", "Thai"),
|
||||||
|
("tr", "Turkish"),
|
||||||
|
("uk", "Ukrainian"),
|
||||||
|
("ur", "Urdu"),
|
||||||
|
("vi", "Vietnamese"),
|
||||||
|
("cy", "Welsh"),
|
||||||
|
("yi", "Yiddish")
|
||||||
|
]
|
|
@ -15,15 +15,17 @@ from util import hook, http
|
||||||
base_url = "http://thetvdb.com/api/"
|
base_url = "http://thetvdb.com/api/"
|
||||||
api_key = "469B73127CA0C411"
|
api_key = "469B73127CA0C411"
|
||||||
|
|
||||||
|
|
||||||
def get_zipped_xml(*args, **kwargs):
|
def get_zipped_xml(*args, **kwargs):
|
||||||
try:
|
try:
|
||||||
path = kwargs.pop("path")
|
path = kwargs.pop("path")
|
||||||
except KeyError:
|
except KeyError:
|
||||||
raise KeyError("must specify a path for the zipped file to be read")
|
raise KeyError("must specify a path for the zipped file to be read")
|
||||||
|
|
||||||
zip_buffer = StringIO(http.get(*args, **kwargs))
|
zip_buffer = StringIO(http.get(*args, **kwargs))
|
||||||
return etree.parse(ZipFile(zip_buffer, "r").open(path))
|
return etree.parse(ZipFile(zip_buffer, "r").open(path))
|
||||||
|
|
||||||
|
|
||||||
@hook.command
|
@hook.command
|
||||||
def tv_next(inp):
|
def tv_next(inp):
|
||||||
".tv_next <series> -- get the next episode of <series> from thetvdb.com"
|
".tv_next <series> -- get the next episode of <series> from thetvdb.com"
|
||||||
|
@ -33,7 +35,7 @@ def tv_next(inp):
|
||||||
query = http.get_xml(base_url + 'GetSeries.php', seriesname=inp)
|
query = http.get_xml(base_url + 'GetSeries.php', seriesname=inp)
|
||||||
except URLError:
|
except URLError:
|
||||||
return "error contacting thetvdb.com"
|
return "error contacting thetvdb.com"
|
||||||
|
|
||||||
series_id = query.xpath('//seriesid/text()')
|
series_id = query.xpath('//seriesid/text()')
|
||||||
|
|
||||||
if not series_id:
|
if not series_id:
|
||||||
|
@ -44,9 +46,9 @@ def tv_next(inp):
|
||||||
try:
|
try:
|
||||||
series = get_zipped_xml(base_url + '%s/series/%s/all/en.zip' %
|
series = get_zipped_xml(base_url + '%s/series/%s/all/en.zip' %
|
||||||
(api_key, series_id), path="en.xml")
|
(api_key, series_id), path="en.xml")
|
||||||
except URLError:
|
except URLError:
|
||||||
return "error contacting thetvdb.com"
|
return "error contacting thetvdb.com"
|
||||||
|
|
||||||
series_name = series.xpath('//SeriesName/text()')[0]
|
series_name = series.xpath('//SeriesName/text()')[0]
|
||||||
|
|
||||||
if series.xpath('//Status/text()')[0] == 'Ended':
|
if series.xpath('//Status/text()')[0] == 'Ended':
|
||||||
|
@ -57,12 +59,12 @@ def tv_next(inp):
|
||||||
|
|
||||||
for episode in reversed(series.xpath('//Episode')):
|
for episode in reversed(series.xpath('//Episode')):
|
||||||
first_aired = episode.findtext("FirstAired")
|
first_aired = episode.findtext("FirstAired")
|
||||||
|
|
||||||
try:
|
try:
|
||||||
airdate = datetime.date(*map(int, first_aired.split('-')))
|
airdate = datetime.date(*map(int, first_aired.split('-')))
|
||||||
except (ValueError, TypeError):
|
except (ValueError, TypeError):
|
||||||
continue
|
continue
|
||||||
|
|
||||||
episode_num = "S%02dE%02d" % (int(episode.findtext("SeasonNumber")),
|
episode_num = "S%02dE%02d" % (int(episode.findtext("SeasonNumber")),
|
||||||
int(episode.findtext("EpisodeNumber")))
|
int(episode.findtext("EpisodeNumber")))
|
||||||
|
|
||||||
|
|
|
@ -87,8 +87,8 @@ def twitter(inp):
|
||||||
tweet = http.get_xml(url)
|
tweet = http.get_xml(url)
|
||||||
except http.HTTPError, e:
|
except http.HTTPError, e:
|
||||||
errors = {400: 'bad request (ratelimited?)',
|
errors = {400: 'bad request (ratelimited?)',
|
||||||
401: 'tweet is private',
|
401: 'tweet is private',
|
||||||
403: 'tweet is private',
|
403: 'tweet is private',
|
||||||
404: 'invalid user/id',
|
404: 'invalid user/id',
|
||||||
500: 'twitter is broken',
|
500: 'twitter is broken',
|
||||||
502: 'twitter is down ("getting upgraded")',
|
502: 'twitter is down ("getting upgraded")',
|
||||||
|
|
Loading…
Reference in New Issue