import re import urllib2 from lxml import html from util import hook @hook.command @hook.command('wa') def wolframalpha(inp): ".wa/.wolframalpha -- scrapes Wolfram Alpha's" \ "results for " if not inp: return wolframalpha.__doc__ url = "http://www.wolframalpha.com/input/?i=%s&asynchronous=false" h = html.parse(url % urllib2.quote(inp, safe='')) pods = h.xpath("//div[@class='pod ']") pod_texts = [] for pod in pods: heading = pod.find('h1/span').text_content().strip() results = [] for image in pod.xpath('div/div[@class="output"]/img'): alt = image.attrib['alt'].strip() alt = alt.replace('\\n', '; ') alt = re.sub(r'\s+', ' ', alt) if alt: results.append(alt) if results: pod_texts.append(heading + ' ' + '|'.join(results)) ret = '. '.join(pod_texts[1:]) # first pod is the input if not ret: if not pod_texts: return 'no results' ret = pod_texts[0] # definite integrals have only the result pod first if len(ret) > 430: ret = ret[:ret.rfind(' ', 0, 430)] ret = re.sub(r'\W+$', '', ret) + '...' if not ret: return 'no result' return ret