h/plugins/wikipedia.py

'''Searches wikipedia and returns first sentence of article
Scaevolus 2009'''

import urllib2
from lxml import etree
import re

from util import hook


api_prefix = "http://en.wikipedia.org/w/api.php"
search_url = api_prefix + "?action=opensearch&search=%s&format=xml"

paren_re = re.compile('\s*\(.*\)$')


@hook.command('w')
@hook.command
def wiki(inp):
    '''.w/.wiki <phrase> -- gets first sentence of wikipedia ''' \
    '''article on <phrase>'''

    if not inp:
        return wiki.__doc__

    q = search_url % (urllib2.quote(inp, safe=''))

    request = urllib2.Request(q)
    request.add_header('User-Agent',
                'Skybot/1.0 http://bitbucket.org/Scaevolus/skybot/')
    opener = urllib2.build_opener()
    xml = opener.open(request).read()
    x = etree.fromstring(xml)

    ns = '{http://opensearch.org/searchsuggest2}'
    items = x.findall(ns + 'Section/' + ns + 'Item')

    if items == []:
        if x.find('error') is not None:
            return 'error: %(code)s: %(info)s' % x.find('error').attrib
        else:
            return 'no results found'

    def extract(item):
        return [item.find(ns + x).text for x in
                            ('Text', 'Description', 'Url')]

    title, desc, url = extract(items[0])

    if 'may refer to' in desc:
        title, desc, url = extract(items[1])

    title = paren_re.sub('', title)

    if title.lower() not in desc.lower():
        desc = title + desc

    desc = re.sub('\s+', ' ', desc).strip()  # remove excess spaces

    if len(desc) > 300:
        desc = desc[:300] + '...'

    return '%s -- %s' % (desc, url)
adding wikipedia plugin 2009-03-19 23:13:22 +00:00			`'''Searches wikipedia and returns first sentence of article`
			`Scaevolus 2009'''`

wikipedia: send a useragent when fetching xml from the wikipedia api. If you don't, it sends 403 a lot. 2010-02-22 04:12:55 +00:00			`import urllib2`
adding wikipedia plugin 2009-03-19 23:13:22 +00:00			`from lxml import etree`
			`import re`

restructure utilities, harmonize imports, add doc on other bots 2009-07-08 17:04:30 +00:00			`from util import hook`

adding wikipedia plugin 2009-03-19 23:13:22 +00:00
			`api_prefix = "http://en.wikipedia.org/w/api.php"`
			`search_url = api_prefix + "?action=opensearch&search=%s&format=xml"`

			`paren_re = re.compile('\s\(.\)$')`

PEP8 compliance (only whitespace changes) 2009-04-18 00:57:18 +00:00
make urlhistory nick sorting case-insensitive, simplify a few decorator calls 2010-01-19 05:14:49 +00:00			`@hook.command('w')`
adding wikipedia plugin 2009-03-19 23:13:22 +00:00			`@hook.command`
put input.inp.strip() in sieve, remove repetitive stripping in plugins 2010-01-17 23:07:08 +00:00			`def wiki(inp):`
PEP8 compliance (only whitespace changes) 2009-04-18 00:57:18 +00:00			`'''.w/.wiki <phrase> -- gets first sentence of wikipedia ''' \`
			`'''article on <phrase>'''`

put input.inp.strip() in sieve, remove repetitive stripping in plugins 2010-01-17 23:07:08 +00:00			`if not inp:`
in-irc documentation for wikipedia 2009-04-03 18:40:53 +00:00			`return wiki.__doc__`

hurf durf 2010-02-22 04:23:52 +00:00			`q = search_url % (urllib2.quote(inp, safe=''))`
wikipedia: send a useragent when fetching xml from the wikipedia api. If you don't, it sends 403 a lot. 2010-02-22 04:12:55 +00:00
			`request = urllib2.Request(q)`
PEP8 compliance + clean dotnetpad 2010-03-01 02:32:41 +00:00			`request.add_header('User-Agent',`
			`'Skybot/1.0 http://bitbucket.org/Scaevolus/skybot/')`
wikipedia: send a useragent when fetching xml from the wikipedia api. If you don't, it sends 403 a lot. 2010-02-22 04:12:55 +00:00			`opener = urllib2.build_opener()`
			`xml = opener.open(request).read()`
			`x = etree.fromstring(xml)`
adding wikipedia plugin 2009-03-19 23:13:22 +00:00
			`ns = '{http://opensearch.org/searchsuggest2}'`
			`items = x.findall(ns + 'Section/' + ns + 'Item')`

minor fixes for twitter/wikipedia 2009-03-24 22:53:56 +00:00			`if items == []:`
better error reporting in wikipedia plugin 2009-03-30 23:32:52 +00:00			`if x.find('error') is not None:`
			`return 'error: %(code)s: %(info)s' % x.find('error').attrib`
			`else:`
			`return 'no results found'`
minor fixes for twitter/wikipedia 2009-03-24 22:53:56 +00:00
adding wikipedia plugin 2009-03-19 23:13:22 +00:00			`def extract(item):`
PEP8 compliance (only whitespace changes) 2009-04-18 00:57:18 +00:00			`return [item.find(ns + x).text for x in`
adding wikipedia plugin 2009-03-19 23:13:22 +00:00			`('Text', 'Description', 'Url')]`
PEP8 compliance (only whitespace changes) 2009-04-18 00:57:18 +00:00
adding wikipedia plugin 2009-03-19 23:13:22 +00:00			`title, desc, url = extract(items[0])`

			`if 'may refer to' in desc:`
			`title, desc, url = extract(items[1])`
PEP8 compliance (only whitespace changes) 2009-04-18 00:57:18 +00:00
adding wikipedia plugin 2009-03-19 23:13:22 +00:00			`title = paren_re.sub('', title)`

			`if title.lower() not in desc.lower():`
			`desc = title + desc`
PEP8 compliance (only whitespace changes) 2009-04-18 00:57:18 +00:00
PEP8 compliance + clean dotnetpad 2010-03-01 02:32:41 +00:00			`desc = re.sub('\s+', ' ', desc).strip() # remove excess spaces`
adding wikipedia plugin 2009-03-19 23:13:22 +00:00
			`if len(desc) > 300:`
			`desc = desc[:300] + '...'`

			`return '%s -- %s' % (desc, url)`