2010-02-01 05:49:52 +00:00
|
|
|
import re
|
|
|
|
|
2010-04-23 03:47:41 +00:00
|
|
|
from util import hook, http
|
2010-02-01 05:49:52 +00:00
|
|
|
|
2010-03-01 02:32:41 +00:00
|
|
|
|
2010-02-01 05:49:52 +00:00
|
|
|
@hook.command('wa')
|
2010-05-07 23:16:44 +00:00
|
|
|
@hook.command
|
2010-02-01 05:49:52 +00:00
|
|
|
def wolframalpha(inp):
|
|
|
|
".wa/.wolframalpha <query> -- scrapes Wolfram Alpha's" \
|
2011-03-15 14:09:29 +00:00
|
|
|
" results for <query>"
|
2010-02-01 05:49:52 +00:00
|
|
|
|
2010-04-23 03:47:41 +00:00
|
|
|
url = "http://www.wolframalpha.com/input/?asynchronous=false"
|
2010-02-01 05:49:52 +00:00
|
|
|
|
2010-04-23 03:47:41 +00:00
|
|
|
h = http.get_html(url, i=inp)
|
2010-02-01 05:49:52 +00:00
|
|
|
|
|
|
|
pods = h.xpath("//div[@class='pod ']")
|
|
|
|
|
|
|
|
pod_texts = []
|
2010-03-01 02:32:41 +00:00
|
|
|
for pod in pods:
|
2010-03-13 07:04:59 +00:00
|
|
|
heading = pod.find('h2')
|
2010-02-02 04:52:09 +00:00
|
|
|
if heading is not None:
|
2010-02-02 05:41:51 +00:00
|
|
|
heading = heading.text_content().strip()
|
|
|
|
if heading.startswith('Input'):
|
|
|
|
continue
|
2010-02-02 04:52:09 +00:00
|
|
|
else:
|
|
|
|
continue
|
|
|
|
|
2010-02-01 05:49:52 +00:00
|
|
|
results = []
|
2010-09-23 02:25:25 +00:00
|
|
|
for alt in pod.xpath('div/div[@class="output pnt"]/img/@alt'):
|
|
|
|
alt = alt.strip().replace('\\n', '; ')
|
2010-02-01 05:49:52 +00:00
|
|
|
alt = re.sub(r'\s+', ' ', alt)
|
|
|
|
if alt:
|
|
|
|
results.append(alt)
|
|
|
|
if results:
|
|
|
|
pod_texts.append(heading + ' ' + '|'.join(results))
|
|
|
|
|
2010-03-01 02:32:41 +00:00
|
|
|
ret = '. '.join(pod_texts)
|
2010-02-01 05:49:52 +00:00
|
|
|
|
2010-02-02 05:41:51 +00:00
|
|
|
if not pod_texts:
|
|
|
|
return 'no results'
|
2010-02-01 05:49:52 +00:00
|
|
|
|
2010-02-17 23:24:52 +00:00
|
|
|
ret = re.sub(r'\\(.)', r'\1', ret)
|
|
|
|
|
|
|
|
def unicode_sub(match):
|
|
|
|
return unichr(int(match.group(1), 16))
|
|
|
|
|
|
|
|
ret = re.sub(r'\\:([0-9a-z]{4})', unicode_sub, ret)
|
|
|
|
|
2010-03-01 02:32:41 +00:00
|
|
|
if len(ret) > 430:
|
2010-02-01 05:49:52 +00:00
|
|
|
ret = ret[:ret.rfind(' ', 0, 430)]
|
|
|
|
ret = re.sub(r'\W+$', '', ret) + '...'
|
|
|
|
|
|
|
|
if not ret:
|
|
|
|
return 'no result'
|
|
|
|
|
|
|
|
return ret
|