wikipedia: send a useragent when fetching xml from the wikipedia api. If you don't, it sends 403 a lot.

This commit is contained in:
Chris Skalenda 2010-02-21 22:12:55 -06:00
parent ca8b24518e
commit 8531bdd76c
1 changed files with 7 additions and 2 deletions

View File

@ -1,7 +1,7 @@
'''Searches wikipedia and returns first sentence of article '''Searches wikipedia and returns first sentence of article
Scaevolus 2009''' Scaevolus 2009'''
import urllib import urllib2
from lxml import etree from lxml import etree
import re import re
@ -24,7 +24,12 @@ def wiki(inp):
return wiki.__doc__ return wiki.__doc__
q = search_url % (urllib.quote(inp, safe='')) q = search_url % (urllib.quote(inp, safe=''))
x = etree.parse(q)
request = urllib2.Request(q)
request.add_header('User-Agent', 'Skybot/1.0 http://bitbucket.org/Scaevolus/skybot/')
opener = urllib2.build_opener()
xml = opener.open(request).read()
x = etree.fromstring(xml)
ns = '{http://opensearch.org/searchsuggest2}' ns = '{http://opensearch.org/searchsuggest2}'
items = x.findall(ns + 'Section/' + ns + 'Item') items = x.findall(ns + 'Section/' + ns + 'Item')