2009-03-19 23:13:22 +00:00
|
|
|
'''Searches wikipedia and returns first sentence of article
|
|
|
|
Scaevolus 2009'''
|
|
|
|
|
2010-02-22 04:12:55 +00:00
|
|
|
import urllib2
|
2009-03-19 23:13:22 +00:00
|
|
|
from lxml import etree
|
|
|
|
import re
|
|
|
|
|
2009-07-08 17:04:30 +00:00
|
|
|
from util import hook
|
|
|
|
|
2009-03-19 23:13:22 +00:00
|
|
|
|
|
|
|
api_prefix = "http://en.wikipedia.org/w/api.php"
|
|
|
|
search_url = api_prefix + "?action=opensearch&search=%s&format=xml"
|
|
|
|
|
|
|
|
paren_re = re.compile('\s*\(.*\)$')
|
|
|
|
|
2009-04-18 00:57:18 +00:00
|
|
|
|
2010-01-19 05:14:49 +00:00
|
|
|
@hook.command('w')
|
2009-03-19 23:13:22 +00:00
|
|
|
@hook.command
|
2010-01-17 23:07:08 +00:00
|
|
|
def wiki(inp):
|
2009-04-18 00:57:18 +00:00
|
|
|
'''.w/.wiki <phrase> -- gets first sentence of wikipedia ''' \
|
|
|
|
'''article on <phrase>'''
|
|
|
|
|
2010-01-17 23:07:08 +00:00
|
|
|
if not inp:
|
2009-04-03 18:40:53 +00:00
|
|
|
return wiki.__doc__
|
|
|
|
|
2010-02-22 04:23:52 +00:00
|
|
|
q = search_url % (urllib2.quote(inp, safe=''))
|
2010-02-22 04:12:55 +00:00
|
|
|
|
|
|
|
request = urllib2.Request(q)
|
2010-03-01 02:32:41 +00:00
|
|
|
request.add_header('User-Agent',
|
|
|
|
'Skybot/1.0 http://bitbucket.org/Scaevolus/skybot/')
|
2010-02-22 04:12:55 +00:00
|
|
|
opener = urllib2.build_opener()
|
|
|
|
xml = opener.open(request).read()
|
|
|
|
x = etree.fromstring(xml)
|
2009-03-19 23:13:22 +00:00
|
|
|
|
|
|
|
ns = '{http://opensearch.org/searchsuggest2}'
|
|
|
|
items = x.findall(ns + 'Section/' + ns + 'Item')
|
|
|
|
|
2009-03-24 22:53:56 +00:00
|
|
|
if items == []:
|
2009-03-30 23:32:52 +00:00
|
|
|
if x.find('error') is not None:
|
|
|
|
return 'error: %(code)s: %(info)s' % x.find('error').attrib
|
|
|
|
else:
|
|
|
|
return 'no results found'
|
2009-03-24 22:53:56 +00:00
|
|
|
|
2009-03-19 23:13:22 +00:00
|
|
|
def extract(item):
|
2009-04-18 00:57:18 +00:00
|
|
|
return [item.find(ns + x).text for x in
|
2009-03-19 23:13:22 +00:00
|
|
|
('Text', 'Description', 'Url')]
|
2009-04-18 00:57:18 +00:00
|
|
|
|
2009-03-19 23:13:22 +00:00
|
|
|
title, desc, url = extract(items[0])
|
|
|
|
|
|
|
|
if 'may refer to' in desc:
|
|
|
|
title, desc, url = extract(items[1])
|
2009-04-18 00:57:18 +00:00
|
|
|
|
2009-03-19 23:13:22 +00:00
|
|
|
title = paren_re.sub('', title)
|
|
|
|
|
|
|
|
if title.lower() not in desc.lower():
|
|
|
|
desc = title + desc
|
2009-04-18 00:57:18 +00:00
|
|
|
|
2010-03-01 02:32:41 +00:00
|
|
|
desc = re.sub('\s+', ' ', desc).strip() # remove excess spaces
|
2009-03-19 23:13:22 +00:00
|
|
|
|
|
|
|
if len(desc) > 300:
|
|
|
|
desc = desc[:300] + '...'
|
|
|
|
|
|
|
|
return '%s -- %s' % (desc, url)
|