twitter: fix typo. drama: use a real json parser instead of literal_eval, rewrite to not use BeautifulSoup

This commit is contained in:
Ryan Hitchman 2010-03-03 22:30:54 -07:00
parent 997ad976e8
commit c396be96d2
2 changed files with 30 additions and 40 deletions

View File

@ -1,10 +1,11 @@
'''Searches Encyclopedia Dramatica and returns the first paragraph of the '''Searches Encyclopedia Dramatica and returns the first paragraph of the
article''' article'''
from ast import literal_eval import json
from lxml import html
import urllib2 import urllib2
from util import hook from util import hook
from util import BeautifulSoup
api_url = "http://encyclopediadramatica.com/api.php?action=opensearch&search=%s" api_url = "http://encyclopediadramatica.com/api.php?action=opensearch&search=%s"
ed_url = "http://encyclopediadramatica.com/%s" ed_url = "http://encyclopediadramatica.com/%s"
@ -12,43 +13,32 @@ ed_url = "http://encyclopediadramatica.com/%s"
ua_header = ('User-Agent','Skybot/1.0 http://bitbucket.org/Scaevolus/skybot/') ua_header = ('User-Agent','Skybot/1.0 http://bitbucket.org/Scaevolus/skybot/')
def get_article_name(query):
q = api_url % (urllib2.quote(query, safe=''))
request = urllib2.Request(q)
request.add_header(*ua_header)
opener = urllib2.build_opener()
try:
results = literal_eval(opener.open(request).read())
if isinstance(results,list) and len(results[1]):
return results[1][0].replace(' ','_')
except:
return None
@hook.command('ed') @hook.command('ed')
@hook.command @hook.command
def drama(inp): def drama(inp):
'''.drama <phrase> -- gets first paragraph of Encyclopedia Dramatica ''' \ '''.drama <phrase> -- gets first paragraph of Encyclopedia Dramatica ''' \
'''article on <phrase>''' '''article on <phrase>'''
if not inp: if not inp:
return drama.__doc__ return drama.__doc__
article_name = get_article_name(inp) q = api_url % (urllib2.quote(inp, safe=''))
if not article_name: request = urllib2.Request(q)
return 'no results found' request.add_header(*ua_header)
j = json.loads(urllib2.build_opener().open(request).read())
url = ed_url % (urllib2.quote(article_name)) if not j[1]:
request = urllib2.Request(url) return 'no results found'
request.add_header(*ua_header) article_name = j[1][0].replace(' ', '_')
opener = urllib2.build_opener()
result = opener.open(request).read() url = ed_url % (urllib2.quote(article_name))
request = urllib2.Request(url)
bs = BeautifulSoup.BeautifulSoup(result) request.add_header(*ua_header)
content = bs.find('div', {"id":"bodyContent"}) page = html.fromstring(urllib2.build_opener().open(request).read())
for p in content.findAll('p'): for p in page.xpath('//div[@id="bodyContent"]/p'):
if p.text: if p.text_content():
summary = ''.join(''.join(p.findAll(text=True)).splitlines()) summary = ' '.join(p.text_content().splitlines())
if len(summary) > 300: if len(summary) > 300:
summary = summary[:300] + "..." summary = summary[:summary.rfind(' ', 0, 300)] + "..."
return '%s -- %s' % (summary, url) return '%s :: \x02%s\x02' % (summary, url)
return "error"
return "error"

View File

@ -127,7 +127,7 @@ def twitter(inp):
reply_name = tweet.find(reply_name).text reply_name = tweet.find(reply_name).text
reply_id = tweet.find(reply_id).text reply_id = tweet.find(reply_id).text
reply_user = tweet.find(reply_user).text reply_user = tweet.find(reply_user).text
if reply_name is not None and (reply_id is Not None or if reply_name is not None and (reply_id is not None or
reply_user is not None): reply_user is not None):
add_reply(reply_name, reply_id if reply_id else -1) add_reply(reply_name, reply_id if reply_id else -1)