h/plugins/wolframalpha.py

import re
import urllib2

from lxml import html

from util import hook

@hook.command
@hook.command('wa')
def wolframalpha(inp):
    ".wa/.wolframalpha <query> -- scrapes Wolfram Alpha's" \
            "results for <query>"

    if not inp:
        return wolframalpha.__doc__

    url = "http://www.wolframalpha.com/input/?i=%s&asynchronous=false"

    h = html.parse(url % urllib2.quote(inp, safe=''))

    pods = h.xpath("//div[@class='pod ']")

    pod_texts = []
    for pod in pods: 
        heading = pod.find('h1/span')
        if heading is not None:
            heading = heading.text_content().strip()
            if heading.startswith('Input'):
                continue
        else:
            continue

        results = []
        for image in pod.xpath('div/div[@class="output"]/img'):
            alt = image.attrib['alt'].strip()
            alt = alt.replace('\\n', '; ')
            alt = re.sub(r'\s+', ' ', alt)
            if alt:
                results.append(alt)
        if results:
            pod_texts.append(heading + ' ' + '|'.join(results))

    ret = '. '.join(pod_texts) # first pod is the input

    if not pod_texts:
        return 'no results'

    if len(ret) > 430:        
        ret = ret[:ret.rfind(' ', 0, 430)]
        ret = re.sub(r'\W+$', '', ret) + '...'

    if not ret:
        return 'no result'

    return ret
wolframalpha.py: gets results from WA (not using the commercial API)-- works pretty well 2010-02-01 05:49:52 +00:00			`import re`
			`import urllib2`

			`from lxml import html`

			`from util import hook`

			`@hook.command`
			`@hook.command('wa')`
			`def wolframalpha(inp):`
			`".wa/.wolframalpha <query> -- scrapes Wolfram Alpha's" \`
			`"results for <query>"`

			`if not inp:`
rewrite tell, simplify db access in quote, seen, urlhistory. fix wolframalpha for the last time 2010-02-01 07:29:50 +00:00			`return wolframalpha.__doc__`
wolframalpha.py: gets results from WA (not using the commercial API)-- works pretty well 2010-02-01 05:49:52 +00:00
			`url = "http://www.wolframalpha.com/input/?i=%s&asynchronous=false"`

			`h = html.parse(url % urllib2.quote(inp, safe=''))`

			`pods = h.xpath("//div[@class='pod ']")`

			`pod_texts = []`
			`for pod in pods:`
fix wolfralpha scraping when a pod is empty 2010-02-02 04:52:09 +00:00			`heading = pod.find('h1/span')`
			`if heading is not None:`
rewrote remember.py, improved WA scraping 2010-02-02 05:41:51 +00:00			`heading = heading.text_content().strip()`
			`if heading.startswith('Input'):`
			`continue`
fix wolfralpha scraping when a pod is empty 2010-02-02 04:52:09 +00:00			`else:`
			`continue`

wolframalpha.py: gets results from WA (not using the commercial API)-- works pretty well 2010-02-01 05:49:52 +00:00			`results = []`
			`for image in pod.xpath('div/div[@class="output"]/img'):`
			`alt = image.attrib['alt'].strip()`
			`alt = alt.replace('\\n', '; ')`
			`alt = re.sub(r'\s+', ' ', alt)`
			`if alt:`
			`results.append(alt)`
			`if results:`
			`pod_texts.append(heading + ' ' + '\|'.join(results))`

rewrote remember.py, improved WA scraping 2010-02-02 05:41:51 +00:00			`ret = '. '.join(pod_texts) # first pod is the input`
wolframalpha.py: gets results from WA (not using the commercial API)-- works pretty well 2010-02-01 05:49:52 +00:00
rewrote remember.py, improved WA scraping 2010-02-02 05:41:51 +00:00			`if not pod_texts:`
			`return 'no results'`
wolframalpha.py: gets results from WA (not using the commercial API)-- works pretty well 2010-02-01 05:49:52 +00:00
			`if len(ret) > 430:`
			`ret = ret[:ret.rfind(' ', 0, 430)]`
			`ret = re.sub(r'\W+$', '', ret) + '...'`

			`if not ret:`
			`return 'no result'`

			`return ret`