twitter: fix typo. drama: use a real json parser instead of literal_eval, rewrite to not use BeautifulSoup

2010-03-03 22:30:54 -07:00 · 2010-03-03 22:30:54 -07:00 · c396be96d2
parent 997ad976e8
commit c396be96d2
2 changed files with 30 additions and 40 deletions
--- a/plugins/drama.py
+++ b/plugins/drama.py
@ -1,10 +1,11 @@
 '''Searches Encyclopedia Dramatica and returns the first paragraph of the 
 article'''
-from ast import literal_eval
+import json
 from lxml import html
 import urllib2
 from util import hook
 from util import BeautifulSoup
 api_url = "http://encyclopediadramatica.com/api.php?action=opensearch&search=%s"
 ed_url = "http://encyclopediadramatica.com/%s"
@ -12,43 +13,32 @@ ed_url = "http://encyclopediadramatica.com/%s"
 ua_header = ('User-Agent','Skybot/1.0 http://bitbucket.org/Scaevolus/skybot/')
 def get_article_name(query):
   q = api_url % (urllib2.quote(query, safe=''))
   request = urllib2.Request(q)
   request.add_header(*ua_header)
   opener = urllib2.build_opener()
   try:
      results = literal_eval(opener.open(request).read())
      if isinstance(results,list) and len(results[1]):
         return results[1][0].replace(' ','_')
   except:
      return None
@hook.command('ed')   
@hook.command
 def drama(inp):
-   '''.drama <phrase> -- gets first paragraph of Encyclopedia Dramatica ''' \
+    '''.drama <phrase> -- gets first paragraph of Encyclopedia Dramatica ''' \
-   '''article on <phrase>'''
+    '''article on <phrase>'''
-   if not inp:
+    if not inp:
-      return drama.__doc__
+        return drama.__doc__
-   
+    
-   article_name = get_article_name(inp)
+    q = api_url % (urllib2.quote(inp, safe=''))
-   if not article_name:
+    request = urllib2.Request(q)
-      return 'no results found'
+    request.add_header(*ua_header)
-   
+    j = json.loads(urllib2.build_opener().open(request).read())
-   url = ed_url % (urllib2.quote(article_name))
+    if not j[1]:
-   request = urllib2.Request(url)
+        return 'no results found'
-   request.add_header(*ua_header)
+    article_name = j[1][0].replace(' ', '_')
-   opener = urllib2.build_opener()
+    
-   result = opener.open(request).read()
+    url = ed_url % (urllib2.quote(article_name))
-   
+    request = urllib2.Request(url)
-   bs = BeautifulSoup.BeautifulSoup(result)
+    request.add_header(*ua_header)
-   content = bs.find('div', {"id":"bodyContent"})
+    page = html.fromstring(urllib2.build_opener().open(request).read())
-   
+    
-   for p in content.findAll('p'):
+    for p in page.xpath('//div[@id="bodyContent"]/p'):
-      if p.text:
+        if p.text_content():
-         summary = ''.join(''.join(p.findAll(text=True)).splitlines())
+            summary = ' '.join(p.text_content().splitlines())
-         if len(summary) > 300:
+            if len(summary) > 300:
-            summary = summary[:300] + "..."
+                summary = summary[:summary.rfind(' ', 0, 300)] + "..."
-         return '%s -- %s' % (summary, url)
+            return '%s :: \x02%s\x02' % (summary, url)
-   return "error"
+
    return "error"
--- a/plugins/twitter.py
+++ b/plugins/twitter.py
@ -127,7 +127,7 @@ def twitter(inp):
    reply_name = tweet.find(reply_name).text
    reply_id = tweet.find(reply_id).text
    reply_user = tweet.find(reply_user).text
-    if reply_name is not None and (reply_id is Not None or
+    if reply_name is not None and (reply_id is not None or
            reply_user is not None):
        add_reply(reply_name, reply_id if reply_id else -1)