From f3f4dac7e9a851b51f9467c70ef629e6e7132bef Mon Sep 17 00:00:00 2001 From: Ryan Hitchman Date: Wed, 25 Aug 2010 15:26:12 -0500 Subject: [PATCH] make http able to handle cookies, somethingawful.py: give thread titles for SA forums links --- plugins/drama.py | 0 plugins/explain.py | 0 plugins/somethingawful.py | 58 +++++++++++++++++++++++++++++++++++++++ plugins/util/http.py | 16 +++++++++-- 4 files changed, 71 insertions(+), 3 deletions(-) mode change 100755 => 100644 plugins/drama.py mode change 100755 => 100644 plugins/explain.py create mode 100644 plugins/somethingawful.py diff --git a/plugins/drama.py b/plugins/drama.py old mode 100755 new mode 100644 diff --git a/plugins/explain.py b/plugins/explain.py old mode 100755 new mode 100644 diff --git a/plugins/somethingawful.py b/plugins/somethingawful.py new file mode 100644 index 0000000..d332ad8 --- /dev/null +++ b/plugins/somethingawful.py @@ -0,0 +1,58 @@ +from util import hook, http + + +thread_re = r"(?i)forums\.somethingawful\.com/\S+threadid=(\d+)" +showthread = "http://forums.somethingawful.com/showthread.php?noseen=1" + +def login(user, password): + http.jar.clear_expired_cookies() + if any(cookie.domain == 'forums.somethingawful.com' and + cookie.name == 'bbuserid' for cookie in http.jar): + if any(cookie.domain == 'forums.somethingawful.com' and + cookie.name == 'bbpassword' for cookie in http.jar): + return + assert("malformed cookie jar") + http.get("http://forums.somethingawful.com/account.php", cookies=True, + post_data="action=login&username=%s&password=%s" % (user, password)) + + +@hook.regex(thread_re) +def forum_link(inp, bot=None): + if 'sa_user' not in bot.config or \ + 'sa_password' not in bot.config: + return + + login(bot.config['sa_user'], bot.config['sa_password']) + + thread = http.get_html(showthread, threadid=inp.group(1), perpage='1', + cookies=True) + + breadcrumbs = thread.xpath('//div[@class="breadcrumbs"]//a/text()') + + if not breadcrumbs: + return + + thread_title = breadcrumbs[-1] + forum_title = forum_abbrevs.get(breadcrumbs[-2], breadcrumbs[-2]) + + poster = thread.xpath('//dt[@class="author"]/text()')[0] + + # 1 post per page => n_pages = n_posts + num_posts = thread.xpath('//a[@title="last page"]/@href') + + if not num_posts: + num_posts = 1 + else: + num_posts = int(num_posts[0].rsplit('=', 1)[1]) + + return '\x02%s\x02 > \x02%s\x02 by \x02%s\x02, %s post%s' % ( + forum_title, thread_title, poster, num_posts, + 's' if num_posts > 1 else '') + + +forum_abbrevs = { + 'Serious Hardware / Software Crap': 'SHSC', + 'The Cavern of COBOL': 'CoC', + 'General Bullshit': 'GBS', + 'Haus of Tech Support': 'HoTS' +} diff --git a/plugins/util/http.py b/plugins/util/http.py index 548afeb..4acf19c 100644 --- a/plugins/util/http.py +++ b/plugins/util/http.py @@ -1,5 +1,6 @@ # convenience wrapper for urllib2 & friends +import cookielib import json import urllib import urllib2 @@ -10,12 +11,15 @@ from urllib2 import HTTPError, URLError from lxml import etree, html -user_agent = 'Skybot/1.0 http://bitbucket.org/Scaevolus/skybot' + +user_agent = 'Skybot/1.0 http://github.com/rmmh/skybot' ua_firefox = 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.6) ' \ 'Gecko/20070725 Firefox/2.0.0.6' ua_internetexplorer = 'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1)' +jar = cookielib.CookieJar() + def get(*args, **kwargs): return open(*args, **kwargs).read() @@ -34,7 +38,7 @@ def get_json(*args, **kwargs): def open(url, query_params={}, user_agent=user_agent, post_data=None, - get_method=None, **kwargs): + get_method=None, cookies=False, **kwargs): query_params.update(kwargs) url = prepare_url(url, query_params) @@ -45,7 +49,13 @@ def open(url, query_params={}, user_agent=user_agent, post_data=None, request.get_method = lambda: get_method request.add_header('User-Agent', user_agent) - return urllib2.build_opener().open(request) + + if cookies: + opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(jar)) + else: + opener = urllib2.build_opener() + + return opener.open(request) def prepare_url(url, queries):