2010-04-23 03:47:41 +00:00
|
|
|
# convenience wrapper for urllib2 & friends
|
2013-06-26 18:35:39 +00:00
|
|
|
import binascii
|
2010-08-25 20:26:12 +00:00
|
|
|
import cookielib
|
2013-06-26 18:35:39 +00:00
|
|
|
import hmac
|
2010-04-23 03:47:41 +00:00
|
|
|
import json
|
2013-06-26 18:35:39 +00:00
|
|
|
import random
|
|
|
|
import string
|
|
|
|
import time
|
2010-04-23 03:47:41 +00:00
|
|
|
import urllib
|
|
|
|
import urllib2
|
|
|
|
import urlparse
|
|
|
|
|
2014-01-14 21:12:37 +00:00
|
|
|
from hashlib import sha1
|
2010-07-24 23:38:28 +00:00
|
|
|
from urllib import quote, quote_plus as _quote_plus
|
2010-04-23 03:47:41 +00:00
|
|
|
from urllib2 import HTTPError, URLError
|
|
|
|
|
2010-04-25 21:39:31 +00:00
|
|
|
from lxml import etree, html
|
2010-04-23 03:47:41 +00:00
|
|
|
|
2010-08-25 20:26:12 +00:00
|
|
|
|
2011-03-29 13:48:44 +00:00
|
|
|
ua_skybot = 'Skybot/1.0 http://github.com/rmmh/skybot'
|
2010-04-23 03:47:41 +00:00
|
|
|
|
|
|
|
ua_firefox = 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.6) ' \
|
|
|
|
'Gecko/20070725 Firefox/2.0.0.6'
|
|
|
|
ua_internetexplorer = 'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1)'
|
|
|
|
|
2010-08-25 20:26:12 +00:00
|
|
|
jar = cookielib.CookieJar()
|
|
|
|
|
2010-04-23 03:47:41 +00:00
|
|
|
|
|
|
|
def get(*args, **kwargs):
|
|
|
|
return open(*args, **kwargs).read()
|
|
|
|
|
|
|
|
|
|
|
|
def get_html(*args, **kwargs):
|
2010-04-25 21:39:31 +00:00
|
|
|
return html.fromstring(get(*args, **kwargs))
|
2010-04-23 03:47:41 +00:00
|
|
|
|
|
|
|
|
|
|
|
def get_xml(*args, **kwargs):
|
2010-04-25 21:39:31 +00:00
|
|
|
return etree.fromstring(get(*args, **kwargs))
|
2010-04-23 03:47:41 +00:00
|
|
|
|
|
|
|
|
|
|
|
def get_json(*args, **kwargs):
|
|
|
|
return json.loads(get(*args, **kwargs))
|
|
|
|
|
|
|
|
|
2012-06-02 23:08:30 +00:00
|
|
|
def open(url, query_params=None, user_agent=None, referer=None, post_data=None,
|
2013-06-26 18:35:39 +00:00
|
|
|
get_method=None, cookies=False, oauth=False, oauth_keys=None, **kwargs):
|
2010-11-12 05:18:32 +00:00
|
|
|
|
2010-09-06 03:28:34 +00:00
|
|
|
if query_params is None:
|
|
|
|
query_params = {}
|
2010-11-12 05:18:32 +00:00
|
|
|
|
2011-03-29 13:48:44 +00:00
|
|
|
if user_agent is None:
|
|
|
|
user_agent = ua_skybot
|
|
|
|
|
2010-04-23 03:47:41 +00:00
|
|
|
query_params.update(kwargs)
|
|
|
|
|
|
|
|
url = prepare_url(url, query_params)
|
|
|
|
|
|
|
|
request = urllib2.Request(url, post_data)
|
|
|
|
|
|
|
|
if get_method is not None:
|
|
|
|
request.get_method = lambda: get_method
|
|
|
|
|
|
|
|
request.add_header('User-Agent', user_agent)
|
2010-08-25 20:26:12 +00:00
|
|
|
|
2012-06-02 23:08:30 +00:00
|
|
|
if referer is not None:
|
|
|
|
request.add_header('Referer', referer)
|
|
|
|
|
2013-06-26 18:35:39 +00:00
|
|
|
if oauth:
|
|
|
|
nonce = oauth_nonce()
|
|
|
|
timestamp = oauth_timestamp()
|
|
|
|
api_url, req_data = string.split(url, "?")
|
2014-01-14 21:12:37 +00:00
|
|
|
unsigned_request = oauth_unsigned_request(
|
|
|
|
nonce, timestamp, req_data, oauth_keys['consumer'], oauth_keys['access'])
|
|
|
|
|
|
|
|
signature = oauth_sign_request("GET", api_url, req_data, unsigned_request, oauth_keys[
|
|
|
|
'consumer_secret'], oauth_keys['access_secret'])
|
|
|
|
|
|
|
|
header = oauth_build_header(
|
|
|
|
nonce, signature, timestamp, oauth_keys['consumer'], oauth_keys['access'])
|
2013-06-26 18:35:39 +00:00
|
|
|
request.add_header('Authorization', header)
|
|
|
|
|
2010-08-25 20:26:12 +00:00
|
|
|
if cookies:
|
|
|
|
opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(jar))
|
|
|
|
else:
|
|
|
|
opener = urllib2.build_opener()
|
|
|
|
return opener.open(request)
|
2010-04-23 03:47:41 +00:00
|
|
|
|
2010-04-23 03:50:56 +00:00
|
|
|
|
2010-04-23 03:47:41 +00:00
|
|
|
def prepare_url(url, queries):
|
|
|
|
if queries:
|
|
|
|
scheme, netloc, path, query, fragment = urlparse.urlsplit(url)
|
|
|
|
|
|
|
|
query = dict(urlparse.parse_qsl(query))
|
|
|
|
query.update(queries)
|
|
|
|
query = urllib.urlencode(dict((to_utf8(key), to_utf8(value))
|
2014-01-14 21:12:37 +00:00
|
|
|
for key, value in query.iteritems()))
|
2010-04-23 03:47:41 +00:00
|
|
|
|
|
|
|
url = urlparse.urlunsplit((scheme, netloc, path, query, fragment))
|
|
|
|
|
|
|
|
return url
|
|
|
|
|
|
|
|
|
|
|
|
def to_utf8(s):
|
2010-11-12 05:18:32 +00:00
|
|
|
if isinstance(s, unicode):
|
2010-04-23 03:47:41 +00:00
|
|
|
return s.encode('utf8', 'ignore')
|
2010-11-12 05:18:32 +00:00
|
|
|
else:
|
|
|
|
return str(s)
|
2010-07-24 23:38:28 +00:00
|
|
|
|
|
|
|
|
|
|
|
def quote_plus(s):
|
|
|
|
return _quote_plus(to_utf8(s))
|
2011-05-30 11:18:01 +00:00
|
|
|
|
2014-01-14 21:12:37 +00:00
|
|
|
|
2013-06-26 18:35:39 +00:00
|
|
|
def oauth_nonce():
|
|
|
|
return ''.join([str(random.randint(0, 9)) for i in range(8)])
|
|
|
|
|
2014-01-14 21:12:37 +00:00
|
|
|
|
2013-06-26 18:35:39 +00:00
|
|
|
def oauth_timestamp():
|
|
|
|
return str(int(time.time()))
|
|
|
|
|
2014-01-14 21:12:37 +00:00
|
|
|
|
2013-06-26 18:35:39 +00:00
|
|
|
def oauth_unsigned_request(nonce, timestamp, req, consumer, token):
|
2014-01-14 21:12:37 +00:00
|
|
|
d = {'oauth_consumer_key': consumer,
|
|
|
|
'oauth_nonce': nonce,
|
|
|
|
'oauth_signature_method': 'HMAC-SHA1',
|
|
|
|
'oauth_timestamp': timestamp,
|
|
|
|
'oauth_token': token,
|
|
|
|
'oauth_version': '1.0'}
|
|
|
|
|
|
|
|
k, v = string.split(req, "=")
|
2013-06-26 18:35:39 +00:00
|
|
|
d[k] = v
|
2014-01-14 21:12:37 +00:00
|
|
|
|
2013-06-26 18:35:39 +00:00
|
|
|
unsigned_req = ''
|
2014-01-14 21:12:37 +00:00
|
|
|
|
2013-06-26 18:35:39 +00:00
|
|
|
for x in sorted(d, key=lambda key: key):
|
|
|
|
unsigned_req += x + "=" + d[x] + "&"
|
2014-01-14 21:12:37 +00:00
|
|
|
|
2013-06-26 18:35:39 +00:00
|
|
|
unsigned_req = quote(unsigned_req[:-1])
|
|
|
|
|
|
|
|
return unsigned_req
|
|
|
|
|
2014-01-14 21:12:37 +00:00
|
|
|
|
2013-06-26 18:35:39 +00:00
|
|
|
def oauth_build_header(nonce, signature, timestamp, consumer, token):
|
2014-01-14 21:12:37 +00:00
|
|
|
d = {'oauth_consumer_key': consumer,
|
|
|
|
'oauth_nonce': nonce,
|
|
|
|
'oauth_signature': signature,
|
|
|
|
'oauth_signature_method': 'HMAC-SHA1',
|
|
|
|
'oauth_timestamp': timestamp,
|
|
|
|
'oauth_token': token,
|
|
|
|
'oauth_version': '1.0'}
|
|
|
|
|
|
|
|
header = 'OAuth '
|
|
|
|
|
2013-06-26 18:35:39 +00:00
|
|
|
for x in sorted(d, key=lambda key: key):
|
|
|
|
header += x + '="' + d[x] + '", '
|
|
|
|
|
|
|
|
return header[:-1]
|
|
|
|
|
2014-01-14 21:12:37 +00:00
|
|
|
|
2013-06-26 18:35:39 +00:00
|
|
|
def oauth_sign_request(method, url, params, unsigned_request, consumer_secret, token_secret):
|
|
|
|
key = consumer_secret + "&" + token_secret
|
|
|
|
|
|
|
|
base = method + "&" + quote(url, '') + "&" + unsigned_request
|
|
|
|
|
|
|
|
hash = hmac.new(key, base, sha1)
|
|
|
|
|
|
|
|
signature = quote(binascii.b2a_base64(hash.digest())[:-1])
|
2014-01-14 21:12:37 +00:00
|
|
|
|
2013-06-26 18:35:39 +00:00
|
|
|
return signature
|
2011-05-30 11:18:01 +00:00
|
|
|
|
2014-01-14 21:12:37 +00:00
|
|
|
|
2011-05-30 11:18:01 +00:00
|
|
|
def unescape(s):
|
|
|
|
if not s.strip():
|
|
|
|
return s
|
|
|
|
return html.fromstring(s).text_content()
|