h/plugins/wolframalpha.py

import re

from util import hook, http


@hook.command
@hook.command('wa')
def wolframalpha(inp):
    ".wa/.wolframalpha <query> -- scrapes Wolfram Alpha's" \
            "results for <query>"

    if not inp:
        return wolframalpha.__doc__

    url = "http://www.wolframalpha.com/input/?asynchronous=false"

    h = http.get_html(url, i=inp)

    pods = h.xpath("//div[@class='pod ']")

    pod_texts = []
    for pod in pods:
        heading = pod.find('h2')
        if heading is not None:
            heading = heading.text_content().strip()
            if heading.startswith('Input'):
                continue
        else:
            continue

        results = []
        for image in pod.xpath('div/div[@class="output"]/img'):
            alt = image.attrib['alt'].strip()
            alt = alt.replace('\\n', '; ')
            alt = re.sub(r'\s+', ' ', alt)
            if alt:
                results.append(alt)
        if results:
            pod_texts.append(heading + ' ' + '|'.join(results))

    ret = '. '.join(pod_texts)

    if not pod_texts:
        return 'no results'

    ret = re.sub(r'\\(.)', r'\1', ret)

    def unicode_sub(match):
        return unichr(int(match.group(1), 16))

    ret = re.sub(r'\\:([0-9a-z]{4})', unicode_sub, ret)

    if len(ret) > 430:
        ret = ret[:ret.rfind(' ', 0, 430)]
        ret = re.sub(r'\W+$', '', ret) + '...'

    if not ret:
        return 'no result'

    return ret
wolframalpha.py: gets results from WA (not using the commercial API)-- works pretty well 2010-02-01 05:49:52 +00:00			`import re`

refactor http (and html, xml, json) handling into util/http.py (not done for dotnetpad), fix mtg, remove dict & goonsay 2010-04-23 03:47:41 +00:00			`from util import hook, http`
wolframalpha.py: gets results from WA (not using the commercial API)-- works pretty well 2010-02-01 05:49:52 +00:00
PEP8 compliance + clean dotnetpad 2010-03-01 02:32:41 +00:00
wolframalpha.py: gets results from WA (not using the commercial API)-- works pretty well 2010-02-01 05:49:52 +00:00			`@hook.command`
			`@hook.command('wa')`
			`def wolframalpha(inp):`
			`".wa/.wolframalpha <query> -- scrapes Wolfram Alpha's" \`
			`"results for <query>"`

			`if not inp:`
rewrite tell, simplify db access in quote, seen, urlhistory. fix wolframalpha for the last time 2010-02-01 07:29:50 +00:00			`return wolframalpha.__doc__`
wolframalpha.py: gets results from WA (not using the commercial API)-- works pretty well 2010-02-01 05:49:52 +00:00
refactor http (and html, xml, json) handling into util/http.py (not done for dotnetpad), fix mtg, remove dict & goonsay 2010-04-23 03:47:41 +00:00			`url = "http://www.wolframalpha.com/input/?asynchronous=false"`
wolframalpha.py: gets results from WA (not using the commercial API)-- works pretty well 2010-02-01 05:49:52 +00:00
refactor http (and html, xml, json) handling into util/http.py (not done for dotnetpad), fix mtg, remove dict & goonsay 2010-04-23 03:47:41 +00:00			`h = http.get_html(url, i=inp)`
wolframalpha.py: gets results from WA (not using the commercial API)-- works pretty well 2010-02-01 05:49:52 +00:00
			`pods = h.xpath("//div[@class='pod ']")`

			`pod_texts = []`
PEP8 compliance + clean dotnetpad 2010-03-01 02:32:41 +00:00			`for pod in pods:`
fix wolframalpha parsing 2010-03-13 07:04:59 +00:00			`heading = pod.find('h2')`
fix wolfralpha scraping when a pod is empty 2010-02-02 04:52:09 +00:00			`if heading is not None:`
rewrote remember.py, improved WA scraping 2010-02-02 05:41:51 +00:00			`heading = heading.text_content().strip()`
			`if heading.startswith('Input'):`
			`continue`
fix wolfralpha scraping when a pod is empty 2010-02-02 04:52:09 +00:00			`else:`
			`continue`

wolframalpha.py: gets results from WA (not using the commercial API)-- works pretty well 2010-02-01 05:49:52 +00:00			`results = []`
			`for image in pod.xpath('div/div[@class="output"]/img'):`
			`alt = image.attrib['alt'].strip()`
			`alt = alt.replace('\\n', '; ')`
			`alt = re.sub(r'\s+', ' ', alt)`
			`if alt:`
			`results.append(alt)`
			`if results:`
			`pod_texts.append(heading + ' ' + '\|'.join(results))`

PEP8 compliance + clean dotnetpad 2010-03-01 02:32:41 +00:00			`ret = '. '.join(pod_texts)`
wolframalpha.py: gets results from WA (not using the commercial API)-- works pretty well 2010-02-01 05:49:52 +00:00
rewrote remember.py, improved WA scraping 2010-02-02 05:41:51 +00:00			`if not pod_texts:`
			`return 'no results'`
wolframalpha.py: gets results from WA (not using the commercial API)-- works pretty well 2010-02-01 05:49:52 +00:00
improve wolframalpha unescaping 2010-02-17 23:24:52 +00:00			`ret = re.sub(r'\\(.)', r'\1', ret)`

			`def unicode_sub(match):`
			`return unichr(int(match.group(1), 16))`

			`ret = re.sub(r'\\:([0-9a-z]{4})', unicode_sub, ret)`

PEP8 compliance + clean dotnetpad 2010-03-01 02:32:41 +00:00			`if len(ret) > 430:`
wolframalpha.py: gets results from WA (not using the commercial API)-- works pretty well 2010-02-01 05:49:52 +00:00			`ret = ret[:ret.rfind(' ', 0, 430)]`
			`ret = re.sub(r'\W+$', '', ret) + '...'`

			`if not ret:`
			`return 'no result'`

			`return ret`