h/plugins/wolframalpha.py

import re
import urllib2

from lxml import html

from util import hook

@hook.command
@hook.command('wa')
def wolframalpha(inp):
    ".wa/.wolframalpha <query> -- scrapes Wolfram Alpha's" \
            "results for <query>"

    if not inp:
        return wolframalpha.__query__

    url = "http://www.wolframalpha.com/input/?i=%s&asynchronous=false"

    h = html.parse(url % urllib2.quote(inp, safe=''))

    pods = h.xpath("//div[@class='pod ']")

    pod_texts = []
    for pod in pods: 
        heading = pod.find('h1/span').text_content().strip()
        results = []
        for image in pod.xpath('div/div[@class="output"]/img'):
            alt = image.attrib['alt'].strip()
            alt = alt.replace('\\n', '; ')
            alt = re.sub(r'\s+', ' ', alt)
            if alt:
                results.append(alt)
        if results:
            pod_texts.append(heading + ' ' + '|'.join(results))

    ret = '. '.join(pod_texts[1:]) # first pod is the input

    if not ret: 
        if not pod_texts:
            return 'no results'
        ret = pod_texts[0] # definite integrals have only the result pod first

    if len(ret) > 430:        
        ret = ret[:ret.rfind(' ', 0, 430)]
        ret = re.sub(r'\W+$', '', ret) + '...'

    if not ret:
        return 'no result'

    return ret
wolframalpha.py: gets results from WA (not using the commercial API)-- works pretty well 2010-02-01 05:49:52 +00:00			`import re`
			`import urllib2`

			`from lxml import html`

			`from util import hook`

			`@hook.command`
			`@hook.command('wa')`
			`def wolframalpha(inp):`
			`".wa/.wolframalpha <query> -- scrapes Wolfram Alpha's" \`
			`"results for <query>"`

			`if not inp:`
			`return wolframalpha.__query__`

			`url = "http://www.wolframalpha.com/input/?i=%s&asynchronous=false"`

			`h = html.parse(url % urllib2.quote(inp, safe=''))`

			`pods = h.xpath("//div[@class='pod ']")`

			`pod_texts = []`
			`for pod in pods:`
			`heading = pod.find('h1/span').text_content().strip()`
			`results = []`
			`for image in pod.xpath('div/div[@class="output"]/img'):`
			`alt = image.attrib['alt'].strip()`
			`alt = alt.replace('\\n', '; ')`
			`alt = re.sub(r'\s+', ' ', alt)`
			`if alt:`
			`results.append(alt)`
			`if results:`
			`pod_texts.append(heading + ' ' + '\|'.join(results))`

			`ret = '. '.join(pod_texts[1:]) # first pod is the input`

			`if not ret:`
			`if not pod_texts:`
			`return 'no results'`
			`ret = pod_texts[0] # definite integrals have only the result pod first`

			`if len(ret) > 430:`
			`ret = ret[:ret.rfind(' ', 0, 430)]`
			`ret = re.sub(r'\W+$', '', ret) + '...'`

			`if not ret:`
			`return 'no result'`

			`return ret`