2010-04-23 03:47:41 +00:00
|
|
|
# convenience wrapper for urllib2 & friends
|
|
|
|
|
|
|
|
import json
|
|
|
|
import urllib
|
|
|
|
import urllib2
|
|
|
|
import urlparse
|
|
|
|
|
|
|
|
from urllib import quote, quote_plus
|
|
|
|
from urllib2 import HTTPError, URLError
|
|
|
|
|
2010-04-25 21:39:31 +00:00
|
|
|
from lxml import etree, html
|
2010-04-23 03:47:41 +00:00
|
|
|
|
|
|
|
user_agent = 'Skybot/1.0 http://bitbucket.org/Scaevolus/skybot'
|
|
|
|
|
|
|
|
ua_firefox = 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.6) ' \
|
|
|
|
'Gecko/20070725 Firefox/2.0.0.6'
|
|
|
|
ua_internetexplorer = 'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1)'
|
|
|
|
|
|
|
|
|
|
|
|
def get(*args, **kwargs):
|
|
|
|
return open(*args, **kwargs).read()
|
|
|
|
|
|
|
|
|
|
|
|
def get_html(*args, **kwargs):
|
2010-04-25 21:39:31 +00:00
|
|
|
return html.fromstring(get(*args, **kwargs))
|
2010-04-23 03:47:41 +00:00
|
|
|
|
|
|
|
|
|
|
|
def get_xml(*args, **kwargs):
|
2010-04-25 21:39:31 +00:00
|
|
|
return etree.fromstring(get(*args, **kwargs))
|
2010-04-23 03:47:41 +00:00
|
|
|
|
|
|
|
|
|
|
|
def get_json(*args, **kwargs):
|
|
|
|
return json.loads(get(*args, **kwargs))
|
|
|
|
|
|
|
|
|
2010-04-23 03:50:56 +00:00
|
|
|
def open(url, query_params={}, user_agent=user_agent, post_data=None,
|
2010-04-23 03:47:41 +00:00
|
|
|
get_method=None, **kwargs):
|
|
|
|
query_params.update(kwargs)
|
|
|
|
|
|
|
|
url = prepare_url(url, query_params)
|
|
|
|
|
|
|
|
request = urllib2.Request(url, post_data)
|
|
|
|
|
|
|
|
if get_method is not None:
|
|
|
|
request.get_method = lambda: get_method
|
|
|
|
|
|
|
|
request.add_header('User-Agent', user_agent)
|
|
|
|
return urllib2.build_opener().open(request)
|
|
|
|
|
2010-04-23 03:50:56 +00:00
|
|
|
|
2010-04-23 03:47:41 +00:00
|
|
|
def prepare_url(url, queries):
|
|
|
|
if queries:
|
|
|
|
scheme, netloc, path, query, fragment = urlparse.urlsplit(url)
|
|
|
|
|
|
|
|
query = dict(urlparse.parse_qsl(query))
|
|
|
|
query.update(queries)
|
|
|
|
query = urllib.urlencode(dict((to_utf8(key), to_utf8(value))
|
|
|
|
for key, value in query.iteritems()))
|
|
|
|
|
|
|
|
url = urlparse.urlunsplit((scheme, netloc, path, query, fragment))
|
|
|
|
|
|
|
|
return url
|
|
|
|
|
|
|
|
|
|
|
|
def to_utf8(s):
|
|
|
|
if isinstance(s, str):
|
|
|
|
return s
|
|
|
|
else:
|
|
|
|
return s.encode('utf8', 'ignore')
|