twitter: fix typo. drama: use a real json parser instead of literal_eval, rewrite to not use BeautifulSoup

2010-03-03 22:30:54 -07:00 · 2010-03-03 22:30:54 -07:00 · c396be96d2
parent 997ad976e8
commit c396be96d2
2 changed files with 30 additions and 40 deletions
--- a/plugins/drama.py
+++ b/plugins/drama.py
@ -1,10 +1,11 @@
 '''Searches Encyclopedia Dramatica and returns the first paragraph of the 
 article'''

-from ast import literal_eval
+import json
+from lxml import html
 import urllib2
+
 from util import hook
-from util import BeautifulSoup

 api_url = "http://encyclopediadramatica.com/api.php?action=opensearch&search=%s"
 ed_url = "http://encyclopediadramatica.com/%s"
@ -12,43 +13,32 @@ ed_url = "http://encyclopediadramatica.com/%s"
 ua_header = ('User-Agent','Skybot/1.0 http://bitbucket.org/Scaevolus/skybot/')


-def get_article_name(query):
-   q = api_url % (urllib2.quote(query, safe=''))
-   request = urllib2.Request(q)
-   request.add_header(*ua_header)
-   opener = urllib2.build_opener()
-   try:
-      results = literal_eval(opener.open(request).read())
-      if isinstance(results,list) and len(results[1]):
-         return results[1][0].replace(' ','_')
-   except:
-      return None
-
@hook.command('ed')   
@hook.command
 def drama(inp):
-   '''.drama <phrase> -- gets first paragraph of Encyclopedia Dramatica ''' \
-   '''article on <phrase>'''
-   if not inp:
-      return drama.__doc__
-   
-   article_name = get_article_name(inp)
-   if not article_name:
-      return 'no results found'
-   
-   url = ed_url % (urllib2.quote(article_name))
-   request = urllib2.Request(url)
-   request.add_header(*ua_header)
-   opener = urllib2.build_opener()
-   result = opener.open(request).read()
-   
-   bs = BeautifulSoup.BeautifulSoup(result)
-   content = bs.find('div', {"id":"bodyContent"})
-   
-   for p in content.findAll('p'):
-      if p.text:
-         summary = ''.join(''.join(p.findAll(text=True)).splitlines())
-         if len(summary) > 300:
-            summary = summary[:300] + "..."
-         return '%s -- %s' % (summary, url)
-   return "error"
+    '''.drama <phrase> -- gets first paragraph of Encyclopedia Dramatica ''' \
+    '''article on <phrase>'''
+    if not inp:
+        return drama.__doc__
+    
+    q = api_url % (urllib2.quote(inp, safe=''))
+    request = urllib2.Request(q)
+    request.add_header(*ua_header)
+    j = json.loads(urllib2.build_opener().open(request).read())
+    if not j[1]:
+        return 'no results found'
+    article_name = j[1][0].replace(' ', '_')
+    
+    url = ed_url % (urllib2.quote(article_name))
+    request = urllib2.Request(url)
+    request.add_header(*ua_header)
+    page = html.fromstring(urllib2.build_opener().open(request).read())
+    
+    for p in page.xpath('//div[@id="bodyContent"]/p'):
+        if p.text_content():
+            summary = ' '.join(p.text_content().splitlines())
+            if len(summary) > 300:
+                summary = summary[:summary.rfind(' ', 0, 300)] + "..."
+            return '%s :: \x02%s\x02' % (summary, url)
+
+    return "error"
--- a/plugins/twitter.py
+++ b/plugins/twitter.py
@ -127,7 +127,7 @@ def twitter(inp):
    reply_name = tweet.find(reply_name).text
    reply_id = tweet.find(reply_id).text
    reply_user = tweet.find(reply_user).text
-    if reply_name is not None and (reply_id is Not None or
+    if reply_name is not None and (reply_id is not None or
            reply_user is not None):
        add_reply(reply_name, reply_id if reply_id else -1)