h/plugins/wikipedia.py

'''Searches wikipedia and returns first sentence of article
Scaevolus 2009'''

import urllib
from lxml import etree
import re

from util import hook


api_prefix = "http://en.wikipedia.org/w/api.php"
search_url = api_prefix + "?action=opensearch&search=%s&format=xml"

paren_re = re.compile('\s*\(.*\)$')


@hook.command(hook='w(\s+.*|$)')
@hook.command
def wiki(query):
    '''.w/.wiki <phrase> -- gets first sentence of wikipedia ''' \
    '''article on <phrase>'''

    if not query.strip():
        return wiki.__doc__

    q = search_url % (urllib.quote(query.strip(), safe=''))
    x = etree.parse(q)

    ns = '{http://opensearch.org/searchsuggest2}'
    items = x.findall(ns + 'Section/' + ns + 'Item')

    if items == []:
        if x.find('error') is not None:
            return 'error: %(code)s: %(info)s' % x.find('error').attrib
        else:
            return 'no results found'

    def extract(item):
        return [item.find(ns + x).text for x in
                            ('Text', 'Description', 'Url')]

    title, desc, url = extract(items[0])

    if 'may refer to' in desc:
        title, desc, url = extract(items[1])

    title = paren_re.sub('', title)

    if title.lower() not in desc.lower():
        desc = title + desc

    desc = re.sub('\s+', ' ', desc).strip() #remove excess spaces

    if len(desc) > 300:
        desc = desc[:300] + '...'

    return '%s -- %s' % (desc, url)
adding wikipedia plugin 2009-03-19 23:13:22 +00:00			`'''Searches wikipedia and returns first sentence of article`
			`Scaevolus 2009'''`

			`import urllib`
			`from lxml import etree`
			`import re`

restructure utilities, harmonize imports, add doc on other bots 2009-07-08 17:04:30 +00:00			`from util import hook`

adding wikipedia plugin 2009-03-19 23:13:22 +00:00
			`api_prefix = "http://en.wikipedia.org/w/api.php"`
			`search_url = api_prefix + "?action=opensearch&search=%s&format=xml"`

			`paren_re = re.compile('\s\(.\)$')`

PEP8 compliance (only whitespace changes) 2009-04-18 00:57:18 +00:00
in-irc documentation for wikipedia 2009-04-03 18:40:53 +00:00			`@hook.command(hook='w(\s+.*\|$)')`
adding wikipedia plugin 2009-03-19 23:13:22 +00:00			`@hook.command`
			`def wiki(query):`
PEP8 compliance (only whitespace changes) 2009-04-18 00:57:18 +00:00			`'''.w/.wiki <phrase> -- gets first sentence of wikipedia ''' \`
			`'''article on <phrase>'''`

in-irc documentation for wikipedia 2009-04-03 18:40:53 +00:00			`if not query.strip():`
			`return wiki.__doc__`

			`q = search_url % (urllib.quote(query.strip(), safe=''))`
adding wikipedia plugin 2009-03-19 23:13:22 +00:00			`x = etree.parse(q)`

			`ns = '{http://opensearch.org/searchsuggest2}'`
			`items = x.findall(ns + 'Section/' + ns + 'Item')`

minor fixes for twitter/wikipedia 2009-03-24 22:53:56 +00:00			`if items == []:`
better error reporting in wikipedia plugin 2009-03-30 23:32:52 +00:00			`if x.find('error') is not None:`
			`return 'error: %(code)s: %(info)s' % x.find('error').attrib`
			`else:`
			`return 'no results found'`
minor fixes for twitter/wikipedia 2009-03-24 22:53:56 +00:00
adding wikipedia plugin 2009-03-19 23:13:22 +00:00			`def extract(item):`
PEP8 compliance (only whitespace changes) 2009-04-18 00:57:18 +00:00			`return [item.find(ns + x).text for x in`
adding wikipedia plugin 2009-03-19 23:13:22 +00:00			`('Text', 'Description', 'Url')]`
PEP8 compliance (only whitespace changes) 2009-04-18 00:57:18 +00:00
adding wikipedia plugin 2009-03-19 23:13:22 +00:00			`title, desc, url = extract(items[0])`

			`if 'may refer to' in desc:`
			`title, desc, url = extract(items[1])`
PEP8 compliance (only whitespace changes) 2009-04-18 00:57:18 +00:00
adding wikipedia plugin 2009-03-19 23:13:22 +00:00			`title = paren_re.sub('', title)`

			`if title.lower() not in desc.lower():`
			`desc = title + desc`
PEP8 compliance (only whitespace changes) 2009-04-18 00:57:18 +00:00
adding wikipedia plugin 2009-03-19 23:13:22 +00:00			`desc = re.sub('\s+', ' ', desc).strip() #remove excess spaces`

			`if len(desc) > 300:`
			`desc = desc[:300] + '...'`

			`return '%s -- %s' % (desc, url)`