refactor http (and html, xml, json) handling into util/http.py (not done for dotnetpad), fix mtg, remove dict & goonsay

This commit is contained in:
Ryan Hitchman 2010-04-22 21:47:41 -06:00
parent 148733567e
commit e55774b770
23 changed files with 191 additions and 291 deletions

View File

@ -1,9 +1,7 @@
import urllib
import htmlentitydefs
import re
import json
from util import hook
from util import hook, http
########### from http://effbot.org/zone/re-sub.htm#unescape-html #############
@ -37,11 +35,8 @@ language_pairs = zip(languages[:-1], languages[1:])
def goog_trans(text, slang, tlang):
req_url = 'http://ajax.googleapis.com/ajax/services/language/translate' \
'?v=1.0&q=%s&langpair=%s'
url = req_url % (urllib.quote(text, safe=''), slang + '%7C' + tlang)
page = urllib.urlopen(url).read()
parsed = json.loads(page)
url = 'http://ajax.googleapis.com/ajax/services/language/translate?v=1.0'
parsed = http.get_json(url, q=text, langpair=(slang + '|' + tlang))
if not 200 <= parsed['responseStatus'] < 300:
raise IOError('error with the translation server: %d: %s' % (
parsed['responseStatus'], ''))

View File

@ -1,8 +1,4 @@
import urllib
from lxml import etree
from util import hook
from util import hook, http
@hook.command
@ -22,9 +18,7 @@ def bam(inp):
params['theStyle'] = style
params['theMessage'] = message
url = host + path + urllib.urlencode(params)
response = etree.parse(url)
response = http.get_xml(host + path, params)
status = response.xpath('//status/text()')[0]
if status == 'ok':
return host + response.xpath('//msgid/text()')[0]

View File

@ -1,62 +0,0 @@
import re
import urllib
from lxml import html
from util import hook
@hook.command('u')
@hook.command
def urban(inp):
'''.u/.urban <phrase> -- looks up <phrase> on urbandictionary.com'''
if not inp:
return urban.__doc__
url = 'http://www.urbandictionary.com/define.php?term=' + \
urllib.quote(inp, safe='')
page = html.parse(url)
words = page.xpath("//td[@class='word']")
defs = page.xpath("//div[@class='definition']")
if not defs:
return 'no definitions found'
out = words[0].text_content().strip() + ': ' + ' '.join(
defs[0].text_content().split())
if len(out) > 400:
out = out[:out.rfind(' ', 0, 400)] + '...'
return out
## A dictionary look-up plugin for Skybot made by Ghetto Wizard and Scaevolus
@hook.command('dict')
@hook.command
def define(inp):
".define/.dict <word> -- fetches definition of <word>"
if not inp:
return define.__doc__
base_url = 'http://dictionary.reference.com/browse/'
raw_data = urllib.urlopen(base_url + urllib.quote(inp, '')).read()
raw_data = raw_data.decode('utf-8')
definition = html.fromstring(raw_data).xpath('//span[@class="dnindex"]'
' | //div[@class="dndata"]')
if not definition:
return 'No results for ' + inp
result = ' '.join(section.text_content() for section in definition)
result = re.sub(r'\s+', ' ', result)
if len(result) > 400:
result = result[:result.rfind(' ', 0, 400)]
result = re.sub(r'[^A-Za-z]+\.?$', '', result) + ' ...' #truncate
return result

View File

@ -1,7 +1,6 @@
import urllib2
import urlparse
from util import hook
from util import hook, http
@hook.command
@ -18,10 +17,7 @@ def down(inp):
# http://mail.python.org/pipermail/python-list/2006-December/589854.html
try:
request = urllib2.Request(inp)
request.get_method = lambda: "HEAD"
http_file = urllib2.urlopen(request)
head = http_file.read()
http.get(inp, get_method='HEAD')
return inp + ' seems to be up'
except urllib2.URLError:
except http.URLError:
return inp + ' seems to be down'

View File

@ -1,17 +1,11 @@
'''Searches Encyclopedia Dramatica and returns the first paragraph of the
article'''
import json
from lxml import html
import urllib2
from util import hook, http
from util import hook
api_url = "http://encyclopediadramatica.com/api.php?action=opensearch&search="
api_url = "http://encyclopediadramatica.com/api.php?action=opensearch"
ed_url = "http://encyclopediadramatica.com/"
ua_header = ('User-Agent', 'Skybot/1.0 http://bitbucket.org/Scaevolus/skybot/')
@hook.command('ed')
@hook.command
@ -21,18 +15,13 @@ def drama(inp):
if not inp:
return drama.__doc__
q = api_url + (urllib2.quote(inp, safe=''))
request = urllib2.Request(q)
request.add_header(*ua_header)
j = json.loads(urllib2.build_opener().open(request).read())
j = http.get_json(api_url, search=inp)
if not j[1]:
return 'no results found'
article_name = j[1][0].replace(' ', '_')
article_name = j[1][0].replace(' ', '_').encode('utf8')
url = ed_url + (urllib2.quote(article_name))
request = urllib2.Request(url)
request.add_header(*ua_header)
page = html.fromstring(urllib2.build_opener().open(request).read())
url = ed_url + http.quote(article_name, '')
page = http.get_html(url)
for p in page.xpath('//div[@id="bodyContent"]/p'):
if p.text_content():

View File

@ -1,7 +1,6 @@
import urllib2
import re
from util import hook
from util import hook, http
@hook.command
@ -10,10 +9,7 @@ def calc(inp):
if not inp:
return calc.__doc__
url = "http://www.google.com/search?q="
request = urllib2.Request(url + urllib2.quote(inp, ''))
request.add_header('User-Agent', 'skybot')
page = urllib2.build_opener().open(request).read()
page = http.get('http://www.google.com/search', q=inp)
# ugh, scraping HTML with regexes
m = re.search(r'<h2 class=r style="font-size:138%"><b>(.*?)</b>', page)

View File

@ -1,18 +1,14 @@
import urllib
import random
from lxml import html
import json
from util import hook
from lxml import html
from util import hook, http
def api_get(kind, query):
req_url = 'http://ajax.googleapis.com/ajax/services/search/%s?' \
'v=1.0&safe=off&q=%s'
query = query.encode('utf8')
url = req_url % (kind, urllib.quote(query, safe=''))
page = urllib.urlopen(url).read()
return json.loads(page)
url = 'http://ajax.googleapis.com/ajax/services/search/%s?' \
'v=1.0&safe=off'
return http.get_json(url % kind, q=query)
@hook.command

View File

@ -1,65 +0,0 @@
import urllib2
import json
from util import hook
#Scaevolus: factormystic if you commit a re-enabled goonsay I'm
# going to revoke your commit access
#@hook.command
#def goonsay(inp, say=None):
# say(' __________ /')
# say('(--[. ]-[ .] /')
# say('(_______o__)')
@hook.command
@hook.command('gs')
def goonsay(inp):
".gs/.goonsay <id|add [message]> -- Get's the goonsay.com result for <id> "
" or add a new :goonsay: to the database. With no args, random result."
url = "http://goonsay.com/api/goonsays"
req_headers = {
'User-Agent': 'Skybot/1.0 http://bitbucket.org/Scaevolus/skybot/',
'Content-Type': 'application/json',
}
q = inp.split(' ', 1)
print q
if len(q) == 2:
cmd = q[0]
args = q[1]
if cmd == 'add':
try:
data = json.dumps({'text': args})
req = urllib2.Request('%s/' % (url,), data, req_headers)
j = json.loads(urllib2.urlopen(req).read())
except urllib2.HTTPError, e:
return e
return '#%d - %s' % (j['id'], j['text'])
else:
return goonsay.__doc__
if len(inp):
try:
req = urllib2.Request('%s/%d/' % (url, int(inp)), None,
req_headers)
j = json.loads(urllib2.urlopen(req).read())
except urllib2.HTTPError, e:
if e.code == 410 or e.code == 404:
return 'There is no :goonsay: by that id'
return e
except ValueError:
return goonsay.__doc__
return '#%d - %s' % (j['id'], j['text'])
try:
req = urllib2.Request('%s/random/' % (url,), None, req_headers)
j = json.loads(urllib2.urlopen(req).read())
except urllib2.HTTPError, e:
return e
return '#%d - %s' % (j['id'], j['text'])

View File

@ -1,18 +1,21 @@
from lxml import html
import re
import urllib2
from util import hook
from lxml import html
from util import hook, http
@hook.command
def mtg(inp):
".mtg <name> -- gets information about Magic the Gathering card <name>"
url = 'http://magiccards.info/query.php?cardname='
url += urllib2.quote(inp, safe='')
h = html.parse(url)
name = h.find('/body/table/tr/td/span/a')
if not inp:
return mtg.__doc__
url = 'http://magiccards.info/query?v=card&s=cname'
h = http.get_html(url, q=inp)
name = h.find('body/table/tr/td/span/a')
if name is None:
return "no cards found"
card = name.getparent().getparent().getparent()
@ -37,7 +40,7 @@ def mtg(inp):
rarity_abbrevs.get(x[1], x[1]))
for x in printings)
name.make_links_absolute()
name.make_links_absolute(base_url=url)
link = name.attrib['href']
name = name.text_content().strip()
type = type.strip()

View File

@ -1,7 +1,6 @@
import urllib
import re
from util import hook
from util import hook, http
re_lineends = re.compile(r'[\r\n]*')
@ -14,12 +13,12 @@ def py(inp):
if not inp:
return py.__doc__
res = urllib.urlopen("http://eval.appspot.com/eval?statement=%s" %
urllib.quote(inp, safe='')).readlines()
res = http.get("http://eval.appspot.com/eval", statement=inp).splitlines()
if len(res) == 0:
return
res[0] = re_lineends.split(res[0])[0]
if not res[0] == 'Traceback (most recent call last):':
return res[0]
return res[0].decode('utf8', 'ignore')
else:
return res[-1]
return res[-1].decode('utf8', 'ignore')

View File

@ -1,8 +1,4 @@
import urllib
from lxml import html
from util import hook
from util import hook, http
@hook.command('god')
@ -13,13 +9,14 @@ def bible(inp):
if not inp:
return bible.__doc__
base_url = 'http://www.esvapi.org/v2/rest/passageQuery?key=IP&' \
'output-format=plain-text&include-heading-horizontal-lines&' \
'include-headings=false&include-passage-horizontal-lines=false&' \
'include-passage-references=false&include-short-copyright=false&' \
'include-footnotes=false&line-length=0&passage='
base_url = ('http://www.esvapi.org/v2/rest/passageQuery?key=IP&'
'output-format=plain-text&include-heading-horizontal-lines&'
'include-headings=false&include-passage-horizontal-lines=false&'
'include-passage-references=false&include-short-copyright=false&'
'include-footnotes=false&line-length=0&'
'include-heading-horizontal-lines=false')
text = urllib.urlopen(base_url + urllib.quote(inp)).read()
text = http.get(base_url, passage=inp)
text = ' '.join(text.split())
@ -38,11 +35,9 @@ def koran(inp):
if not inp:
return koran.__doc__
base_url = 'http://quod.lib.umich.edu/cgi/k/koran/koran-idx?type=simple&q1='
url = 'http://quod.lib.umich.edu/cgi/k/koran/koran-idx?type=simple'
raw_data = urllib.urlopen(base_url + urllib.quote(inp, '')).read()
results = html.fromstring(raw_data).xpath('//li')
results = http.get_html(url, q1=inp).xpath('//li')
if not results:
return 'No results for ' + inp

View File

@ -1,10 +1,8 @@
import random
import urllib
import urllib2
import re
import json
import random
import re
from util import hook
from util import hook, http
@hook.command
@ -23,8 +21,7 @@ def suggest(inp, inp_unstripped=''):
else:
num = 0
url = 'http://google.com/complete/search?q=' + urllib.quote(inp, safe='')
page = urllib2.urlopen(url).read()
page = http.get('http://google.com/complete/search', q=inp)
page_json = page.split('(', 1)[1][:-1]
suggestions = json.loads(page_json)[1]
if not suggestions:

View File

@ -3,10 +3,7 @@
# This skybot plugin retreives the number of items
# a given user has waiting from idling in Team Fortress 2.
import json
import urllib
from util import hook
from util import hook, http
@hook.command('hats')
@ -17,14 +14,16 @@ def tf(inp):
if not inp:
return tf.__doc__
if inp.isdigit(): link = 'profiles'
else: link = 'id'
if inp.isdigit():
link = 'profiles'
else:
link = 'id'
url = 'http://steamcommunity.com/%s/%s/tfitems?json=1' % \
(link,urllib.quote(inp, safe=''))
raw_data = urllib.urlopen(url).read().decode('utf-8')
(link, http.quote(inp.encode('utf8'), safe=''))
try:
inv = json.loads(raw_data)
inv = http.get_json(url)
except ValueError:
return '%s is not a valid profile' % inp
@ -41,4 +40,4 @@ def tf(inp):
hats += 1
return '%s has had %s items and %s hats drop (%s total hats)' % \
(inp,dropped,dhats,dhats+hats)
(inp, dropped, dhats, dhats + hats)

View File

@ -1,16 +1,9 @@
import re
import urllib2
from util import hook
from util import hook, http
tinyurl_re = (r'http://(?:www\.)?tinyurl.com/([A-Za-z0-9\-]+)',
re.IGNORECASE)
@hook.regex(*tinyurl_re)
@hook.regex(r'(?i)http://(?:www\.)?tinyurl.com/([A-Za-z0-9\-]+)')
def tinyurl(match):
try:
return urllib2.urlopen(match.group()).url.strip()
except urllib2.URLError:
return http.open(match.group()).url.strip()
except http.URLError, e:
pass

View File

@ -3,13 +3,11 @@ twitter.py: written by Scaevolus 2009
retrieves most recent tweets
"""
import re
import random
import urllib2
from lxml import etree
import re
from time import strptime, strftime
from util import hook
from util import hook, http
def unescape_xml(string):
@ -89,8 +87,8 @@ def twitter(inp):
return 'error: invalid request'
try:
xml = urllib2.urlopen(url).read()
except urllib2.HTTPError, e:
tweet = http.get_xml(url)
except http.HTTPError, e:
errors = {400: 'bad request (ratelimited?)',
401: 'tweet is private',
404: 'invalid user/id',
@ -102,11 +100,9 @@ def twitter(inp):
if e.code in errors:
return 'error: ' + errors[e.code]
return 'error: unknown'
except urllib2.URLerror, e:
except http.URLerror, e:
return 'error: timeout'
tweet = etree.fromstring(xml)
if searching_hashtag:
ns = '{http://www.w3.org/2005/Atom}'
tweets = tweet.findall(ns + 'entry/' + ns + 'id')

View File

@ -0,0 +1,27 @@
import re
from util import hook, http
@hook.command('u')
@hook.command
def urban(inp):
'''.u/.urban <phrase> -- looks up <phrase> on urbandictionary.com'''
if not inp:
return urban.__doc__
url = 'http://www.urbandictionary.com/define.php'
page = http.get_html(url, term=inp)
words = page.xpath("//td[@class='word']")
defs = page.xpath("//div[@class='definition']")
if not defs:
return 'no definitions found'
out = words[0].text_content().strip() + ': ' + ' '.join(
defs[0].text_content().split())
if len(out) > 400:
out = out[:out.rfind(' ', 0, 400)] + '...'
return out

View File

@ -69,6 +69,7 @@ def urlinput(match, nick='', chan='', db=None, bot=None):
db_init(db)
url = urlnorm.normalize(match.group().encode('utf-8'))
if url not in ignored_urls:
url = url.decode('utf-8')
history = get_history(db, chan, url)
insert_history(db, chan, url, nick)
if nick not in dict(history):

69
plugins/util/http.py Normal file
View File

@ -0,0 +1,69 @@
# convenience wrapper for urllib2 & friends
import json
import urllib
import urllib2
import urlparse
from urllib import quote, quote_plus
from urllib2 import HTTPError, URLError
import lxml
user_agent = 'Skybot/1.0 http://bitbucket.org/Scaevolus/skybot'
ua_firefox = 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.6) ' \
'Gecko/20070725 Firefox/2.0.0.6'
ua_internetexplorer = 'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1)'
def get(*args, **kwargs):
return open(*args, **kwargs).read()
def get_html(*args, **kwargs):
return lxml.html.fromstring(get(*args, **kwargs))
def get_xml(*args, **kwargs):
return lxml.etree.fromstring(get(*args, **kwargs))
def get_json(*args, **kwargs):
return json.loads(get(*args, **kwargs))
def open(url, query_params={}, user_agent=user_agent, post_data=None,
get_method=None, **kwargs):
query_params.update(kwargs)
url = prepare_url(url, query_params)
request = urllib2.Request(url, post_data)
if get_method is not None:
request.get_method = lambda: get_method
request.add_header('User-Agent', user_agent)
return urllib2.build_opener().open(request)
def prepare_url(url, queries):
if queries:
scheme, netloc, path, query, fragment = urlparse.urlsplit(url)
query = dict(urlparse.parse_qsl(query))
query.update(queries)
query = urllib.urlencode(dict((to_utf8(key), to_utf8(value))
for key, value in query.iteritems()))
url = urlparse.urlunsplit((scheme, netloc, path, query, fragment))
return url
def to_utf8(s):
if isinstance(s, str):
return s
else:
return s.encode('utf8', 'ignore')

View File

@ -4,17 +4,14 @@ Runs a given url through the w3c validator
by Vladi
'''
import urllib
import urllib2
from util import hook
from util import hook, http
@hook.command('val')
@hook.command('valid')
@hook.command
def validate(inp):
'''.val/.valid/.validate <url> -- runs url through w3c markup validator'''
".val/.valid/.validate <url> -- runs url through w3c markup validator"
if not inp:
return validate.__doc__
@ -22,10 +19,9 @@ def validate(inp):
if not inp.startswith('http://'):
inp = 'http://' + inp
url = 'http://validator.w3.org/check?uri=%s' % urllib.quote(inp, '')
info = dict(urllib2.urlopen(url).info())
url = 'http://validator.w3.org/check?uri=' + http.quote_plus(inp)
info = dict(http.open(url).info())
print info
status = info['x-w3c-validator-status'].lower()
if status in ("valid", "invalid"):
errorcount = info['x-w3c-validator-errors']

View File

@ -1,9 +1,6 @@
"weather, thanks to google"
from lxml import etree
import urllib
from util import hook
from util import hook, http
@hook.command
@ -25,9 +22,8 @@ def weather(inp, nick='', server='', reply=None, db=None):
return weather.__doc__
loc = loc[0]
data = urllib.urlencode({'weather': loc.encode('utf-8')})
url = 'http://www.google.com/ig/api?' + data
w = etree.parse(url).find('weather')
w = http.get_xml('http://www.google.com/ig/api', weather=loc)
w = w.find('weather')
if w.find('problem_cause') is not None:
return "Couldn't fetch weather data for '%s', try using a zip or " \

View File

@ -1,15 +1,13 @@
'''Searches wikipedia and returns first sentence of article
Scaevolus 2009'''
import urllib2
from lxml import etree
import re
from util import hook
from util import hook, http
api_prefix = "http://en.wikipedia.org/w/api.php"
search_url = api_prefix + "?action=opensearch&search=%s&format=xml"
search_url = api_prefix + "?action=opensearch&format=xml"
paren_re = re.compile('\s*\(.*\)$')
@ -23,14 +21,7 @@ def wiki(inp):
if not inp:
return wiki.__doc__
q = search_url % (urllib2.quote(inp, safe=''))
request = urllib2.Request(q)
request.add_header('User-Agent',
'Skybot/1.0 http://bitbucket.org/Scaevolus/skybot/')
opener = urllib2.build_opener()
xml = opener.open(request).read()
x = etree.fromstring(xml)
x = http.get_xml(search_url, search=inp)
ns = '{http://opensearch.org/searchsuggest2}'
items = x.findall(ns + 'Section/' + ns + 'Item')
@ -60,4 +51,10 @@ def wiki(inp):
if len(desc) > 300:
desc = desc[:300] + '...'
return '%s -- %s' % (desc, url)
return '%s -- %s' % (desc, http.quote(url, ':/'))
@hook.command
def dict(inp):
".dict/.define <word> -- gets definition of <word> from Wiktionary"
pass

View File

@ -1,9 +1,6 @@
import re
import urllib2
from lxml import html
from util import hook
from util import hook, http
@hook.command
@ -15,9 +12,9 @@ def wolframalpha(inp):
if not inp:
return wolframalpha.__doc__
url = "http://www.wolframalpha.com/input/?i=%s&asynchronous=false"
url = "http://www.wolframalpha.com/input/?asynchronous=false"
h = html.parse(url % urllib2.quote(inp, safe=''))
h = http.get_html(url, i=inp)
pods = h.xpath("//div[@class='pod ']")

View File

@ -1,11 +1,8 @@
import json
import locale
import re
import time
import urllib2
from util import hook
from urllib import quote_plus
from util import hook, http
locale.setlocale(locale.LC_ALL, '')
@ -15,12 +12,12 @@ youtube_re = (r'(?:youtube.*?(?:v=|/v/)|youtu\.be/|yooouuutuuube.*?id=)'
base_url = 'http://gdata.youtube.com/feeds/api/'
url = base_url + 'videos/%s?v=2&alt=jsonc'
search_api_url = base_url + 'videos?v=2&alt=jsonc&max-results=1&q=%s'
search_api_url = base_url + 'videos?v=2&alt=jsonc&max-results=1'
video_url = "http://youtube.com/watch?v=%s"
def get_video_description(vid_id):
j = json.load(urllib2.urlopen(url % vid_id))
j = http.get_json(url % vid_id)
if j.get('error'):
return
@ -67,8 +64,7 @@ def youtube(inp):
if not inp:
return youtube.__doc__
inp = quote_plus(inp)
j = json.load(urllib2.urlopen(search_api_url % (inp)))
j = http.get_json(search_api_url, q=inp)
if 'error' in j:
return 'error performing search'