2010-02-01 05:49:52 +00:00
|
|
|
import re
|
|
|
|
import urllib2
|
|
|
|
|
|
|
|
from lxml import html
|
|
|
|
|
|
|
|
from util import hook
|
|
|
|
|
|
|
|
@hook.command
|
|
|
|
@hook.command('wa')
|
|
|
|
def wolframalpha(inp):
|
|
|
|
".wa/.wolframalpha <query> -- scrapes Wolfram Alpha's" \
|
|
|
|
"results for <query>"
|
|
|
|
|
|
|
|
if not inp:
|
2010-02-01 07:29:50 +00:00
|
|
|
return wolframalpha.__doc__
|
2010-02-01 05:49:52 +00:00
|
|
|
|
|
|
|
url = "http://www.wolframalpha.com/input/?i=%s&asynchronous=false"
|
|
|
|
|
|
|
|
h = html.parse(url % urllib2.quote(inp, safe=''))
|
|
|
|
|
|
|
|
pods = h.xpath("//div[@class='pod ']")
|
|
|
|
|
|
|
|
pod_texts = []
|
|
|
|
for pod in pods:
|
2010-02-02 04:52:09 +00:00
|
|
|
heading = pod.find('h1/span')
|
|
|
|
if heading is not None:
|
2010-02-02 05:41:51 +00:00
|
|
|
heading = heading.text_content().strip()
|
|
|
|
if heading.startswith('Input'):
|
|
|
|
continue
|
2010-02-02 04:52:09 +00:00
|
|
|
else:
|
|
|
|
continue
|
|
|
|
|
2010-02-01 05:49:52 +00:00
|
|
|
results = []
|
|
|
|
for image in pod.xpath('div/div[@class="output"]/img'):
|
|
|
|
alt = image.attrib['alt'].strip()
|
|
|
|
alt = alt.replace('\\n', '; ')
|
|
|
|
alt = re.sub(r'\s+', ' ', alt)
|
|
|
|
if alt:
|
|
|
|
results.append(alt)
|
|
|
|
if results:
|
|
|
|
pod_texts.append(heading + ' ' + '|'.join(results))
|
|
|
|
|
2010-02-02 05:41:51 +00:00
|
|
|
ret = '. '.join(pod_texts) # first pod is the input
|
2010-02-01 05:49:52 +00:00
|
|
|
|
2010-02-02 05:41:51 +00:00
|
|
|
if not pod_texts:
|
|
|
|
return 'no results'
|
2010-02-01 05:49:52 +00:00
|
|
|
|
2010-02-17 23:24:52 +00:00
|
|
|
ret = re.sub(r'\\(.)', r'\1', ret)
|
|
|
|
|
|
|
|
def unicode_sub(match):
|
|
|
|
return unichr(int(match.group(1), 16))
|
|
|
|
|
|
|
|
ret = re.sub(r'\\:([0-9a-z]{4})', unicode_sub, ret)
|
|
|
|
|
2010-02-01 05:49:52 +00:00
|
|
|
if len(ret) > 430:
|
|
|
|
ret = ret[:ret.rfind(' ', 0, 430)]
|
|
|
|
ret = re.sub(r'\W+$', '', ret) + '...'
|
|
|
|
|
|
|
|
if not ret:
|
|
|
|
return 'no result'
|
|
|
|
|
|
|
|
return ret
|