From 36ed83d6649f7ae65134765b6f5d71fa4bb3ae69 Mon Sep 17 00:00:00 2001 From: Ryan Hitchman Date: Fri, 10 Sep 2010 22:01:20 -0500 Subject: [PATCH] move babel.py to translate.py, add .translate -- use the google translate API for useful work, minor PEP8 fixes --- plugins/babel.py | 82 ------------------- plugins/translate.py | 186 +++++++++++++++++++++++++++++++++++++++++++ plugins/tvdb.py | 14 ++-- plugins/twitter.py | 4 +- 4 files changed, 196 insertions(+), 90 deletions(-) delete mode 100644 plugins/babel.py create mode 100644 plugins/translate.py diff --git a/plugins/babel.py b/plugins/babel.py deleted file mode 100644 index e4880f1..0000000 --- a/plugins/babel.py +++ /dev/null @@ -1,82 +0,0 @@ -import htmlentitydefs -import re - -from util import hook, http - -########### from http://effbot.org/zone/re-sub.htm#unescape-html ############# - - -def unescape(text): - def fixup(m): - text = m.group(0) - if text[:2] == "&#": - # character reference - try: - if text[:3] == "&#x": - return unichr(int(text[3:-1], 16)) - else: - return unichr(int(text[2:-1])) - except ValueError: - pass - else: - # named entity - try: - text = unichr(htmlentitydefs.name2codepoint[text[1:-1]]) - except KeyError: - pass - return text # leave as is - - return re.sub("&#?\w+;", fixup, text) - -############################################################################## - -languages = 'ja fr de ko ru zh'.split() -language_pairs = zip(languages[:-1], languages[1:]) - - -def goog_trans(text, slang, tlang): - url = 'http://ajax.googleapis.com/ajax/services/language/translate?v=1.0' - parsed = http.get_json(url, q=text, langpair=(slang + '|' + tlang)) - if not 200 <= parsed['responseStatus'] < 300: - raise IOError('error with the translation server: %d: %s' % ( - parsed['responseStatus'], '')) - return unescape(parsed['responseData']['translatedText']) - - -def babel_gen(inp): - for language in languages: - inp = inp.encode('utf8') - trans = goog_trans(inp, 'en', language).encode('utf8') - inp = goog_trans(trans, language, 'en') - yield language, trans, inp - - -@hook.command -def babel(inp): - ".babel -- translates through multiple languages" - - try: - return list(babel_gen(inp))[-1][2] - except IOError, e: - return e - - -@hook.command -def babelext(inp): - ".babelext -- like .babel, but with more detailed output" - - try: - babels = list(babel_gen(inp)) - except IOError, e: - return e - - out = u'' - for lang, trans, text in babels: - out += '%s:"%s", ' % (lang, text.decode('utf8')) - - out += 'en:"' + babels[-1][2].decode('utf8') + '"' - - if len(out) > 300: - out = out[:150] + ' ... ' + out[-150:] - - return out diff --git a/plugins/translate.py b/plugins/translate.py new file mode 100644 index 0000000..9ed19dc --- /dev/null +++ b/plugins/translate.py @@ -0,0 +1,186 @@ +import htmlentitydefs +import re + +from util import hook, http + +########### from http://effbot.org/zone/re-sub.htm#unescape-html ############# + + +def unescape(text): + def fixup(m): + text = m.group(0) + if text[:2] == "&#": + # character reference + try: + if text[:3] == "&#x": + return unichr(int(text[3:-1], 16)) + else: + return unichr(int(text[2:-1])) + except ValueError: + pass + else: + # named entity + try: + text = unichr(htmlentitydefs.name2codepoint[text[1:-1]]) + except KeyError: + pass + return text # leave as is + + return re.sub("&#?\w+;", fixup, text) + +############################################################################## + + +def goog_trans(text, slang, tlang): + url = 'http://ajax.googleapis.com/ajax/services/language/translate?v=1.0' + parsed = http.get_json(url, q=text, langpair=(slang + '|' + tlang)) + print slang, tlang, parsed + if not 200 <= parsed['responseStatus'] < 300: + raise IOError('error with the translation server: %d: %s' % ( + parsed['responseStatus'], '')) + if not slang: + return unescape('(%(detectedSourceLanguage)s) %(translatedText)s' % + (parsed['responseData'])) + return unescape(parsed['responseData']['translatedText']) + + +def match_language(fragment): + fragment = fragment.lower() + for short, _ in lang_pairs: + if fragment in short.split(): + return short.split()[0] + + for short, full in lang_pairs: + if fragment in full.lower(): + return short.split()[0] + + return None + + +@hook.command +def translate(inp): + '.translate [source language [target language]] -- translates' \ + ' from source language (default autodetect) to target' \ + ' language (default English) using Google Translate' + + args = inp.split(' ', 2) + + if len(args) >= 2: + sl = match_language(args[0]) + if not sl: + return goog_trans(inp, '', 'en') + if len(args) >= 3: + tl = match_language(args[1]) + if not tl: + if sl == 'en': + return 'unable to determine desired target language' + return goog_trans(args[1] + ' ' + args[2], sl, 'en') + return goog_trans(args[2], sl, tl) + return goog_trans(inp, '', 'en') + + +languages = 'ja fr de ko ru zh'.split() +language_pairs = zip(languages[:-1], languages[1:]) + + +def babel_gen(inp): + for language in languages: + inp = inp.encode('utf8') + trans = goog_trans(inp, 'en', language).encode('utf8') + inp = goog_trans(trans, language, 'en') + yield language, trans, inp + + +@hook.command +def babel(inp): + ".babel -- translates through multiple languages" + + try: + return list(babel_gen(inp))[-1][2] + except IOError, e: + return e + + +@hook.command +def babelext(inp): + ".babelext -- like .babel, but with more detailed output" + + try: + babels = list(babel_gen(inp)) + except IOError, e: + return e + + out = u'' + for lang, trans, text in babels: + out += '%s:"%s", ' % (lang, text.decode('utf8')) + + out += 'en:"' + babels[-1][2].decode('utf8') + '"' + + if len(out) > 300: + out = out[:150] + ' ... ' + out[-150:] + + return out + + +lang_pairs = [ + ("no", "Norwegian"), + ("it", "Italian"), + ("ht", "Haitian Creole"), + ("af", "Afrikaans"), + ("sq", "Albanian"), + ("ar", "Arabic"), + ("hy", "Armenian"), + ("az", "Azerbaijani"), + ("eu", "Basque"), + ("be", "Belarusian"), + ("bg", "Bulgarian"), + ("ca", "Catalan"), + ("zh-CN", "Chinese"), + ("hr", "Croatian"), + ("cs", "Czech"), + ("da", "Danish"), + ("nl", "Dutch"), + ("en", "English"), + ("et", "Estonian"), + ("tl", "Filipino"), + ("fi", "Finnish"), + ("fr", "French"), + ("gl", "Galician"), + ("ka", "Georgian"), + ("de", "German"), + ("el", "Greek"), + ("ht", "Haitian Creole"), + ("iw", "Hebrew"), + ("hi", "Hindi"), + ("hu", "Hungarian"), + ("is", "Icelandic"), + ("id", "Indonesian"), + ("ga", "Irish"), + ("it", "Italian"), + ("ja jpn", "Japanese"), + ("ko", "Korean"), + ("lv", "Latvian"), + ("lt", "Lithuanian"), + ("mk", "Macedonian"), + ("ms", "Malay"), + ("mt", "Maltese"), + ("no", "Norwegian"), + ("fa", "Persian"), + ("pl", "Polish"), + ("pt", "Portuguese"), + ("ro", "Romanian"), + ("ru", "Russian"), + ("sr", "Serbian"), + ("sk", "Slovak"), + ("sl", "Slovenian"), + ("es", "Spanish"), + ("sw", "Swahili"), + ("sv", "Swedish"), + ("th", "Thai"), + ("tr", "Turkish"), + ("uk", "Ukrainian"), + ("ur", "Urdu"), + ("vi", "Vietnamese"), + ("cy", "Welsh"), + ("yi", "Yiddish") +] diff --git a/plugins/tvdb.py b/plugins/tvdb.py index 9fff39b..d2e7496 100644 --- a/plugins/tvdb.py +++ b/plugins/tvdb.py @@ -15,15 +15,17 @@ from util import hook, http base_url = "http://thetvdb.com/api/" api_key = "469B73127CA0C411" + def get_zipped_xml(*args, **kwargs): try: path = kwargs.pop("path") except KeyError: raise KeyError("must specify a path for the zipped file to be read") - + zip_buffer = StringIO(http.get(*args, **kwargs)) return etree.parse(ZipFile(zip_buffer, "r").open(path)) + @hook.command def tv_next(inp): ".tv_next -- get the next episode of from thetvdb.com" @@ -33,7 +35,7 @@ def tv_next(inp): query = http.get_xml(base_url + 'GetSeries.php', seriesname=inp) except URLError: return "error contacting thetvdb.com" - + series_id = query.xpath('//seriesid/text()') if not series_id: @@ -44,9 +46,9 @@ def tv_next(inp): try: series = get_zipped_xml(base_url + '%s/series/%s/all/en.zip' % (api_key, series_id), path="en.xml") - except URLError: + except URLError: return "error contacting thetvdb.com" - + series_name = series.xpath('//SeriesName/text()')[0] if series.xpath('//Status/text()')[0] == 'Ended': @@ -57,12 +59,12 @@ def tv_next(inp): for episode in reversed(series.xpath('//Episode')): first_aired = episode.findtext("FirstAired") - + try: airdate = datetime.date(*map(int, first_aired.split('-'))) except (ValueError, TypeError): continue - + episode_num = "S%02dE%02d" % (int(episode.findtext("SeasonNumber")), int(episode.findtext("EpisodeNumber"))) diff --git a/plugins/twitter.py b/plugins/twitter.py index 2681266..68238e3 100644 --- a/plugins/twitter.py +++ b/plugins/twitter.py @@ -87,8 +87,8 @@ def twitter(inp): tweet = http.get_xml(url) except http.HTTPError, e: errors = {400: 'bad request (ratelimited?)', - 401: 'tweet is private', - 403: 'tweet is private', + 401: 'tweet is private', + 403: 'tweet is private', 404: 'invalid user/id', 500: 'twitter is broken', 502: 'twitter is down ("getting upgraded")',