From 1bc0a3497a0aa23eb07f6c075117a3cc9a566ffd Mon Sep 17 00:00:00 2001 From: Ryan Hitchman Date: Sun, 17 Jan 2010 13:20:11 -0700 Subject: [PATCH 01/13] make user/realname/port possible to change in config file --- bot.py | 2 +- core/config.py | 16 +++++++++++++--- core/irc.py | 5 +++-- 3 files changed, 17 insertions(+), 6 deletions(-) diff --git a/bot.py b/bot.py index 18ecc57..a56856e 100755 --- a/bot.py +++ b/bot.py @@ -32,7 +32,7 @@ try: print 'ERROR: more than one connection named "%s"' % name raise ValueError bot.conns[name] = irc(conf['server'], conf['nick'], - channels=conf['channels'], conf=conf) + port=conf.get('port', 6667), channels=conf['channels'], conf=conf) except Exception, e: print 'ERROR: malformed config file', Exception, e sys.exit() diff --git a/core/config.py b/core/config.py index 0fc8dee..12336bc 100644 --- a/core/config.py +++ b/core/config.py @@ -4,9 +4,19 @@ from util import yaml if not os.path.exists('config'): conf = {'connections': [ - {'local irc': {'nick': 'skybot', - 'server': 'localhost', - 'channels': ["#test"]}}]} + {'local irc': + {'nick': 'skybot', + #'user': 'skybot', + #'realname': 'Python bot - http://bitbucket.org/Scaevolus/skybot/', + 'server': 'localhost', + #'port': 6667, + 'channels': ["#test"], + #'nickserv_password', 'password', + #'nickserv_name': 'nickserv', + #'nickserv_command': 'IDENTIFY %s' + } + } + ]} yaml.dump(conf, open('config', 'w')) del conf diff --git a/core/irc.py b/core/irc.py index b86ff60..c8c050c 100644 --- a/core/irc.py +++ b/core/irc.py @@ -93,8 +93,9 @@ class irc(object): self.conn = crlf_tcp(self.server, self.port) thread.start_new_thread(self.conn.run, ()) self.set_nick(self.nick) - self.cmd("USER", ["skybot", "3", "*", - ":Python bot - http://bitbucket.org/Scaevolus/skybot/"]) + self.cmd("USER", + [conf.get('user', 'skybot'), "3", "*", ':' + conf.get('realname', + 'Python bot - http://bitbucket.org/Scaevolus/skybot/')]) def parse_loop(self): while True: From 92c6d798b5a2c233fe29dbcc23bdf95ebba65e4b Mon Sep 17 00:00:00 2001 From: Ryan Hitchman Date: Sun, 17 Jan 2010 13:38:37 -0700 Subject: [PATCH 02/13] improve windows compatibility --- bot.py | 3 ++- core/reload.py | 11 ++++++----- plugins/log.py | 3 ++- 3 files changed, 10 insertions(+), 7 deletions(-) diff --git a/bot.py b/bot.py index a56856e..6066049 100755 --- a/bot.py +++ b/bot.py @@ -18,7 +18,8 @@ bot = Bot() print 'Loading plugins' # bootstrap the reloader -eval(compile(open('core/reload.py', 'U').read(), 'core/reload.py', 'exec')) +eval(compile(open(os.path.join('core', 'reload.py'), 'U').read(), + os.path.join('core', 'reload.py'), 'exec')) reload(init=True) print 'Connecting to IRC' diff --git a/core/reload.py b/core/reload.py index dadec02..80510bf 100644 --- a/core/reload.py +++ b/core/reload.py @@ -1,7 +1,8 @@ -import glob import collections -import traceback +import glob +import os import sys +import traceback if 'mtimes' not in globals(): mtimes = {} @@ -19,7 +20,7 @@ def reload(init=False): if init: bot.plugs = collections.defaultdict(lambda: []) - for filename in glob.glob("core/*.py"): + for filename in glob.glob(os.path.join("core", "*.py")): mtime = os.stat(filename).st_mtime if mtime != mtimes.get(filename): mtimes[filename] = mtime @@ -32,11 +33,11 @@ def reload(init=False): sys.exit() # script on startup continue - if filename == 'core/reload.py': + if filename == os.path.join('core', 'reload.py'): reload(init=init) return - fileset = set(glob.glob("plugins/*py")) + fileset = set(glob.glob(os.path.join('plugins', '*py'))) for name, data in bot.plugs.iteritems(): # remove deleted/moved plugins bot.plugs[name] = filter(lambda x: x[0][0] in fileset, data) diff --git a/plugins/log.py b/plugins/log.py index ce27906..339fdf3 100644 --- a/plugins/log.py +++ b/plugins/log.py @@ -33,6 +33,8 @@ irc_color_re = re.compile(r'(\x03(\d+,\d+|\d)|[\x0f\x02\x16\x1f])') def get_log_filename(dir, server, chan): + if chan.startswith(':'): + chan = chan[1:] return os.path.join(dir, 'log', gmtime('%Y'), server, gmtime('%%s.%m-%d.log') % chan).lower() @@ -53,7 +55,6 @@ def beautify(input): args['msg'] = irc_color_re.sub('', args['msg']) if input.command == 'PRIVMSG' and input.msg.count('\x01') >= 2: - #ctcp ctcp = input.msg.split('\x01', 2)[1].split(' ', 1) if len(ctcp) == 1: ctcp += [''] From 33585f3190c2f44c0dda9e6654015692c7013c19 Mon Sep 17 00:00:00 2001 From: Ryan Hitchman Date: Sun, 17 Jan 2010 16:07:08 -0700 Subject: [PATCH 03/13] put input.inp.strip() in sieve, remove repetitive stripping in plugins --- plugins/dice.py | 2 +- plugins/down.py | 1 - plugins/explain.py | 2 +- plugins/help.py | 2 +- plugins/pyexec.py | 2 +- plugins/remember.py | 7 +++---- plugins/seen.py | 2 +- plugins/sieve.py | 3 ++- plugins/suggest.py | 5 +++-- plugins/tell.py | 2 +- plugins/twitter.py | 1 - plugins/urbandictionary.py | 4 ++-- plugins/weather.py | 2 +- plugins/wikipedia.py | 6 +++--- 14 files changed, 20 insertions(+), 21 deletions(-) diff --git a/plugins/dice.py b/plugins/dice.py index 13bb040..d4f6523 100644 --- a/plugins/dice.py +++ b/plugins/dice.py @@ -33,7 +33,7 @@ def nrolls(count, n): def dice(inp): ".dice -- simulates dicerolls, e.g. .dice 2d20-d5+4 roll 2 " \ "D20s, subtract 1D5, add 4" - if not inp.strip(): + if not inp: return dice.__doc__ spec = whitespace_re.sub('', inp) diff --git a/plugins/down.py b/plugins/down.py index a6247d7..4842242 100644 --- a/plugins/down.py +++ b/plugins/down.py @@ -6,7 +6,6 @@ from util import hook @hook.command def down(inp): '''.down -- checks to see if the site is down''' - inp = inp.strip() if not inp: return down.__doc__ diff --git a/plugins/explain.py b/plugins/explain.py index ba24501..33b78a1 100755 --- a/plugins/explain.py +++ b/plugins/explain.py @@ -10,6 +10,6 @@ def explain(inp): inp = inp.encode('utf8', 'ignore') try: - return explain_c_declaration(inp.rstrip()) + return explain_c_declaration(inp) except Exception, e: return 'error: %s' % e diff --git a/plugins/help.py b/plugins/help.py index af73ce1..af55d2d 100644 --- a/plugins/help.py +++ b/plugins/help.py @@ -10,7 +10,7 @@ def help(bot, input): if func.__doc__ is not None: funcs[csig[1]] = func - if not input.inp.strip(): + if not input.inp: input.pm('available commands: ' + ' '.join(sorted(funcs))) else: if input.inp in funcs: diff --git a/plugins/pyexec.py b/plugins/pyexec.py index f6e3e78..3887779 100644 --- a/plugins/pyexec.py +++ b/plugins/pyexec.py @@ -15,7 +15,7 @@ def py(inp): return py.__doc__ res = urllib.urlopen("http://eval.appspot.com/eval?statement=%s" % - urllib.quote(inp.strip(), safe='')).readlines() + urllib.quote(inp, safe='')).readlines() if len(res) == 0: return res[0] = re_lineends.split(res[0])[0] diff --git a/plugins/remember.py b/plugins/remember.py index 4d2904e..e15e22b 100644 --- a/plugins/remember.py +++ b/plugins/remember.py @@ -46,14 +46,13 @@ def remember(bot, input): except ValueError: return remember.__doc__ - tail = tail.strip() low = head.lower() if low not in memory[filename]: input.reply("done.") else: input.reply('forgetting that "%s", remembering this instead.' % memory[filename][low]) - memory[filename][low] = input.inp.strip() + memory[filename][low] = input.inp save_memory(filename, memory[filename]) @@ -64,10 +63,10 @@ def forget(bot, input): filename = make_filename(bot.persist_dir, input.chan) memory.setdefault(filename, load_memory(filename)) - if not input.inp.strip(): + if not input.inp: return forget.__doc__ - low = input.inp.strip().lower() + low = input.inp.lower() if low not in memory[filename]: return "I don't know about that." if not hasattr(input, 'chan'): diff --git a/plugins/seen.py b/plugins/seen.py index 64308f5..72b29ef 100644 --- a/plugins/seen.py +++ b/plugins/seen.py @@ -40,7 +40,7 @@ def seen(bot, input): if len(input.msg) < 6: return seen.__doc__ - query = input.inp.strip() + query = input.inp if query.lower() == input.nick.lower(): return "Have you looked in a mirror lately?" diff --git a/plugins/sieve.py b/plugins/sieve.py index bb3fde3..197a040 100644 --- a/plugins/sieve.py +++ b/plugins/sieve.py @@ -24,6 +24,7 @@ def sieve_suite(bot, input, func, args): if input.re is None: return None - input.inp = ' '.join(input.re.groups()) + input.inp_unstripped = ' '.join(input.re.groups()) + input.inp = input.inp_unstripped.strip() return input diff --git a/plugins/suggest.py b/plugins/suggest.py index 55cbf12..c92c821 100644 --- a/plugins/suggest.py +++ b/plugins/suggest.py @@ -7,11 +7,12 @@ import json from util import hook @hook.command -def suggest(inp): +def suggest(bot, input): ".suggest [#n] -- gets a random/the nth suggested google search" - if not inp.strip(): + if not input.inp: return suggest.__doc__ + inp = input.inp_unstripped m = re.match('^#(\d+) (.+)$', inp) if m: num, inp = m.groups() diff --git a/plugins/tell.py b/plugins/tell.py index bdfc9ec..b2f1237 100644 --- a/plugins/tell.py +++ b/plugins/tell.py @@ -81,7 +81,7 @@ def tell(bot, input): if len(input.msg) < 6: return tell.__doc__ - query = input.msg[6:].strip().partition(" ") + query = input.inp.partition(" ") if query[0] == input.nick: return "No." diff --git a/plugins/twitter.py b/plugins/twitter.py index 245360f..239fc0f 100644 --- a/plugins/twitter.py +++ b/plugins/twitter.py @@ -31,7 +31,6 @@ def twitter(inp): ".twitter / //#/@ -- gets last/th tweet from"\ "/gets tweet /gets random tweet with #/gets replied tweet from @" - inp = inp.strip() if not inp: return twitter.__doc__ diff --git a/plugins/urbandictionary.py b/plugins/urbandictionary.py index 2327df6..7021a49 100644 --- a/plugins/urbandictionary.py +++ b/plugins/urbandictionary.py @@ -8,11 +8,11 @@ from util import hook @hook.command def urban(inp): '''.u/.urban -- looks up on urbandictionary.com''' - if not inp.strip(): + if not inp: return urban.__doc__ url = 'http://www.urbandictionary.com/define.php?term=' + \ - urllib.quote(inp.strip(), safe='') + urllib.quote(inp, safe='') page = html.parse(url) words = page.xpath("//td[@class='word']") defs = page.xpath("//div[@class='definition']") diff --git a/plugins/weather.py b/plugins/weather.py index 9556b9f..9068b8e 100644 --- a/plugins/weather.py +++ b/plugins/weather.py @@ -41,7 +41,7 @@ def weather(bot, input): stalk = load_stalk(filename) nick = input.nick.lower() - loc = input.inp.strip() + loc = input.inp dontsave = loc.endswith(" dontsave") if dontsave: loc = loc[:-9].strip().lower() diff --git a/plugins/wikipedia.py b/plugins/wikipedia.py index 8c560ec..93eaa87 100644 --- a/plugins/wikipedia.py +++ b/plugins/wikipedia.py @@ -16,14 +16,14 @@ paren_re = re.compile('\s*\(.*\)$') @hook.command(hook='w(\s+.*|$)') @hook.command -def wiki(query): +def wiki(inp): '''.w/.wiki -- gets first sentence of wikipedia ''' \ '''article on ''' - if not query.strip(): + if not inp: return wiki.__doc__ - q = search_url % (urllib.quote(query.strip(), safe='')) + q = search_url % (urllib.quote(inp, safe='')) x = etree.parse(q) ns = '{http://opensearch.org/searchsuggest2}' From 7380470cf0ecc82dac1efb00aee409daf6a18a53 Mon Sep 17 00:00:00 2001 From: melonhead Date: Mon, 18 Jan 2010 12:47:55 -0500 Subject: [PATCH 04/13] -Add "timeline" URL history plugin -Create persist dir automatically if it does not exist --- bot.py | 2 + plugins/urlhistory.py | 111 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 113 insertions(+) create mode 100644 plugins/urlhistory.py diff --git a/bot.py b/bot.py index 6066049..3654ca6 100755 --- a/bot.py +++ b/bot.py @@ -39,6 +39,8 @@ except Exception, e: sys.exit() bot.persist_dir = os.path.abspath('persist') +if not os.path.exists(bot.persist_dir): + os.mkdir(bot.persist_dir) print 'Running main loop' diff --git a/plugins/urlhistory.py b/plugins/urlhistory.py new file mode 100644 index 0000000..0bd33d2 --- /dev/null +++ b/plugins/urlhistory.py @@ -0,0 +1,111 @@ +import os +import time +from datetime import datetime +import sqlite3 +import pickle +from datetime import timedelta +import re + +from util import hook, timesince + +url_re = re.compile(r'([a-zA-Z]+://|www\.)[^ ]*') + + +dbname = "skybot.db" + +expiration_period = timedelta(days=1) + +#TODO: Generate expiration_period_text from expiration_period +expiration_period_text = "24 hours" + +def adapt_datetime(ts): + return time.mktime(ts.timetuple()) + +sqlite3.register_adapter(datetime, adapt_datetime) + + +def insert_history(conn, url, channel, nick): + cursor = conn.cursor() + now = datetime.now() + cursor.execute("insert into urlhistory(url, nick, chan, time) values(?,?,?,?)", (url, nick, channel, now)) + conn.commit() + +def select_history_for_url_and_channel(conn, url, channel): + cursor = conn.cursor() + results = cursor.execute("select nick, time from urlhistory where url=? and chan=?", (url, channel)).fetchall() + j = 0 + now = datetime.now() + nicks = [] + for i in xrange(len(results)): + reltime = datetime.fromtimestamp(results[j][1]) + if (now - reltime) > expiration_period: + conn.execute("delete from urlhistory where url=? and chan=? and nick=? and time=?", (url, channel, results[j][0], results[j][1])) + results.remove(results[j]) + else: + nicks.append(results[j][0]) + j += 1 + return nicks + +def get_nicklist(nicks): + nicks = list(set(nicks)) + nicks.sort() + l = len(nicks) + if l == 0: + return "" + elif l == 1: + return nicks[0] + elif l == 2: + return nicks[0] + " and " + nicks[1] + else: + result = "" + for i in xrange(l-1): + result += nicks[i] + ", " + result += "and " + nicks[-1] + return result + +def dbconnect(db): + "check to see that our db has the the seen table and return a connection." + conn = sqlite3.connect(db) + + results = conn.execute("select count(*) from sqlite_master where name=?", + ("urlhistory",)).fetchone() + if(results[0] == 0): + conn.execute("create table if not exists urlhistory(url text not null, nick text not null, chan text not null, time datetime not null, primary key(url, nick, chan, time));") + conn.commit() + return conn + +def normalize_url(url): + # TODO: do something so that: + # - http://www.google.com + # - www.google.com + # - http://google.com + # - http://google.com/ + # etc are all considered to be the same URL + return url + +def get_once_twice(count): + if count == 1: + return "once" + elif count == 2: + return "twice" + else: + return str(count) + " times" + +@hook.command(hook=r'(.*)', prefix=False, ignorebots=True) +def urlinput(bot, input): + dbpath = os.path.join(bot.persist_dir, dbname) + m = url_re.search(input.msg) + if m: + # URL detected + url = normalize_url(m.group(0)) + conn = dbconnect(dbpath) + dupes = select_history_for_url_and_channel(conn, url, input.chan) + num_dupes = len(dupes) + if num_dupes > 0 and input.nick not in dupes: + nicks = get_nicklist(dupes) + reply = "That link has been posted " + get_once_twice(num_dupes) + reply += " in the past " + expiration_period_text + " by " + nicks + input.reply(reply) + insert_history(conn, url, input.chan, input.nick) + conn.close() + From bb709a74bfd7f3943a0202928da1940e33235601 Mon Sep 17 00:00:00 2001 From: melonhead Date: Mon, 18 Jan 2010 15:07:06 -0500 Subject: [PATCH 05/13] Added URL normalization to urlhistory module to allow better detection of duplicates Added configurable ignored URLs to urlhistory module --- plugins/urlhistory.py | 33 +++--- plugins/util/urlnorm.py | 215 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 231 insertions(+), 17 deletions(-) create mode 100644 plugins/util/urlnorm.py diff --git a/plugins/urlhistory.py b/plugins/urlhistory.py index 0bd33d2..ed02ec8 100644 --- a/plugins/urlhistory.py +++ b/plugins/urlhistory.py @@ -7,6 +7,8 @@ from datetime import timedelta import re from util import hook, timesince +from util import urlnorm +#from util import texttime url_re = re.compile(r'([a-zA-Z]+://|www\.)[^ ]*') @@ -15,6 +17,8 @@ dbname = "skybot.db" expiration_period = timedelta(days=1) +ignored_urls = [ urlnorm.normalize("http://google.com") ] + #TODO: Generate expiration_period_text from expiration_period expiration_period_text = "24 hours" @@ -75,13 +79,7 @@ def dbconnect(db): return conn def normalize_url(url): - # TODO: do something so that: - # - http://www.google.com - # - www.google.com - # - http://google.com - # - http://google.com/ - # etc are all considered to be the same URL - return url + return urlnorm.normalize(url) def get_once_twice(count): if count == 1: @@ -98,14 +96,15 @@ def urlinput(bot, input): if m: # URL detected url = normalize_url(m.group(0)) - conn = dbconnect(dbpath) - dupes = select_history_for_url_and_channel(conn, url, input.chan) - num_dupes = len(dupes) - if num_dupes > 0 and input.nick not in dupes: - nicks = get_nicklist(dupes) - reply = "That link has been posted " + get_once_twice(num_dupes) - reply += " in the past " + expiration_period_text + " by " + nicks - input.reply(reply) - insert_history(conn, url, input.chan, input.nick) - conn.close() + if url not in ignored_urls: + conn = dbconnect(dbpath) + dupes = select_history_for_url_and_channel(conn, url, input.chan) + num_dupes = len(dupes) + if num_dupes > 0 and input.nick not in dupes: + nicks = get_nicklist(dupes) + reply = "That link has been posted " + get_once_twice(num_dupes) + reply += " in the past " + expiration_period_text + " by " + nicks + input.reply(reply) + insert_history(conn, url, input.chan, input.nick) + conn.close() diff --git a/plugins/util/urlnorm.py b/plugins/util/urlnorm.py new file mode 100644 index 0000000..3a07621 --- /dev/null +++ b/plugins/util/urlnorm.py @@ -0,0 +1,215 @@ +""" +URI Normalization function: + * Always provide the URI scheme in lowercase characters. + * Always provide the host, if any, in lowercase characters. + * Only perform percent-encoding where it is essential. + * Always use uppercase A-through-F characters when percent-encoding. + * Prevent dot-segments appearing in non-relative URI paths. + * For schemes that define a default authority, use an empty authority if the + default is desired. + * For schemes that define an empty path to be equivalent to a path of "/", + use "/". + * For schemes that define a port, use an empty port if the default is desired + * All portions of the URI must be utf-8 encoded NFC from Unicode strings + +implements: + http://gbiv.com/protocols/uri/rev-2002/rfc2396bis.html#canonical-form + http://www.intertwingly.net/wiki/pie/PaceCanonicalIds + +inspired by: + Tony J. Ibbs, http://starship.python.net/crew/tibs/python/tji_url.py + Mark Nottingham, http://www.mnot.net/python/urlnorm.py +""" + +__license__ = "Python" + +import re, unicodedata, urlparse +from urllib import quote, unquote + +default_port = { + 'ftp': 21, + 'telnet': 23, + 'http': 80, + 'gopher': 70, + 'news': 119, + 'nntp': 119, + 'prospero': 191, + 'https': 443, + 'snews': 563, + 'snntp': 563, +} + +def normalize(url): + """Normalize a URL.""" + + scheme,auth,path,query,fragment = urlparse.urlsplit(url.strip()) + (userinfo,host,port)=re.search('([^@]*@)?([^:]*):?(.*)',auth).groups() + + # Always provide the URI scheme in lowercase characters. + scheme = scheme.lower() + + # Always provide the host, if any, in lowercase characters. + host = host.lower() + if host and host[-1] == '.': host = host[:-1] + if host and host.startswith("www."): + if not scheme: scheme = "http" + host = host[4:] + elif path and path.startswith("www."): + if not scheme: scheme = "http" + path = path[4:] + + # Only perform percent-encoding where it is essential. + # Always use uppercase A-through-F characters when percent-encoding. + # All portions of the URI must be utf-8 encoded NFC from Unicode strings + def clean(string): + string=unicode(unquote(string),'utf-8','replace') + return unicodedata.normalize('NFC',string).encode('utf-8') + path=quote(clean(path),"~:/?#[]@!$&'()*+,;=") + fragment=quote(clean(fragment),"~") + + # note care must be taken to only encode & and = characters as values + query="&".join(["=".join([quote(clean(t) ,"~:/?#[]@!$'()*+,;=") + for t in q.split("=",1)]) for q in query.split("&")]) + + # Prevent dot-segments appearing in non-relative URI paths. + if scheme in ["","http","https","ftp","file"]: + output=[] + for input in path.split('/'): + if input=="": + if not output: output.append(input) + elif input==".": + pass + elif input=="..": + if len(output)>1: output.pop() + else: + output.append(input) + if input in ["",".",".."]: output.append("") + path='/'.join(output) + + # For schemes that define a default authority, use an empty authority if + # the default is desired. + if userinfo in ["@",":@"]: userinfo="" + + # For schemes that define an empty path to be equivalent to a path of "/", + # use "/". + if path=="" and scheme in ["http","https","ftp","file"]: + path="/" + + # For schemes that define a port, use an empty port if the default is + # desired + if port and scheme in default_port.keys(): + if port.isdigit(): + port=str(int(port)) + if int(port)==default_port[scheme]: + port = '' + + # Put it all back together again + auth=(userinfo or "") + host + if port: auth+=":"+port + if url.endswith("#") and query=="" and fragment=="": path+="#" + return urlparse.urlunsplit((scheme,auth,path,query,fragment)).replace("http:///", "http://") + +if __name__ == "__main__": + import unittest + suite = unittest.TestSuite() + + """ from http://www.intertwingly.net/wiki/pie/PaceCanonicalIds """ + tests= [ + (False, "http://:@example.com/"), + (False, "http://@example.com/"), + (False, "http://example.com"), + (False, "HTTP://example.com/"), + (False, "http://EXAMPLE.COM/"), + (False, "http://example.com/%7Ejane"), + (False, "http://example.com/?q=%C7"), + (False, "http://example.com/?q=%5c"), + (False, "http://example.com/?q=C%CC%A7"), + (False, "http://example.com/a/../a/b"), + (False, "http://example.com/a/./b"), + (False, "http://example.com:80/"), + (True, "http://example.com/"), + (True, "http://example.com/?q=%C3%87"), + (True, "http://example.com/?q=%E2%85%A0"), + (True, "http://example.com/?q=%5C"), + (True, "http://example.com/~jane"), + (True, "http://example.com/a/b"), + (True, "http://example.com:8080/"), + (True, "http://user:password@example.com/"), + + # from rfc2396bis + (True, "ftp://ftp.is.co.za/rfc/rfc1808.txt"), + (True, "http://www.ietf.org/rfc/rfc2396.txt"), + (True, "ldap://[2001:db8::7]/c=GB?objectClass?one"), + (True, "mailto:John.Doe@example.com"), + (True, "news:comp.infosystems.www.servers.unix"), + (True, "tel:+1-816-555-1212"), + (True, "telnet://192.0.2.16:80/"), + (True, "urn:oasis:names:specification:docbook:dtd:xml:4.1.2"), + + # other + (True, "http://127.0.0.1/"), + (False, "http://127.0.0.1:80/"), + (True, "http://www.w3.org/2000/01/rdf-schema#"), + (False, "http://example.com:081/"), + ] + + def testcase(expected,value): + class test(unittest.TestCase): + def runTest(self): + assert (normalize(value)==value)==expected, \ + (expected, value, normalize(value)) + return test() + + for (expected,value) in tests: + suite.addTest(testcase(expected,value)) + + """ mnot test suite; three tests updated for rfc2396bis. """ + tests = { + '/foo/bar/.': '/foo/bar/', + '/foo/bar/./': '/foo/bar/', + '/foo/bar/..': '/foo/', + '/foo/bar/../': '/foo/', + '/foo/bar/../baz': '/foo/baz', + '/foo/bar/../..': '/', + '/foo/bar/../../': '/', + '/foo/bar/../../baz': '/baz', + '/foo/bar/../../../baz': '/baz', #was: '/../baz', + '/foo/bar/../../../../baz': '/baz', + '/./foo': '/foo', + '/../foo': '/foo', #was: '/../foo', + '/foo.': '/foo.', + '/.foo': '/.foo', + '/foo..': '/foo..', + '/..foo': '/..foo', + '/./../foo': '/foo', #was: '/../foo', + '/./foo/.': '/foo/', + '/foo/./bar': '/foo/bar', + '/foo/../bar': '/bar', + '/foo//': '/foo/', + '/foo///bar//': '/foo/bar/', + 'http://www.foo.com:80/foo': 'http://www.foo.com/foo', + 'http://www.foo.com:8000/foo': 'http://www.foo.com:8000/foo', + 'http://www.foo.com./foo/bar.html': 'http://www.foo.com/foo/bar.html', + 'http://www.foo.com.:81/foo': 'http://www.foo.com:81/foo', + 'http://www.foo.com/%7ebar': 'http://www.foo.com/~bar', + 'http://www.foo.com/%7Ebar': 'http://www.foo.com/~bar', + 'ftp://user:pass@ftp.foo.net/foo/bar': + 'ftp://user:pass@ftp.foo.net/foo/bar', + 'http://USER:pass@www.Example.COM/foo/bar': + 'http://USER:pass@www.example.com/foo/bar', + 'http://www.example.com./': 'http://www.example.com/', + '-': '-', + } + + def testcase(original,normalized): + class test(unittest.TestCase): + def runTest(self): + assert normalize(original)==normalized, \ + (original, normalized, normalize(original)) + return test() + + for (original,normalized) in tests.items(): + suite.addTest(testcase(original,normalized)) + + """ execute tests """ + unittest.TextTestRunner().run(suite) From 977c62f7efe9ca7a9ca20fd7952d86f98b27b766 Mon Sep 17 00:00:00 2001 From: Ryan Hitchman Date: Mon, 18 Jan 2010 19:42:35 -0700 Subject: [PATCH 06/13] commenting Input() more --- core/main.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/core/main.py b/core/main.py index eed71c8..eaf3974 100644 --- a/core/main.py +++ b/core/main.py @@ -5,19 +5,19 @@ class Input(object): def __init__(self, conn, raw, prefix, command, params, nick, user, host, paraml, msg): - self.conn = conn - self.server = conn.server - self.raw = raw - self.prefix = prefix - self.command = command - self.params = params + self.conn = conn # irc object + self.server = conn.server # hostname of server + self.raw = raw # unprocessed line of text + self.prefix = prefix # usually hostmask + self.command = command # PRIVMSG, JOIN, etc. + self.params = params self.nick = nick - self.user = user + self.user = user # user@host self.host = host - self.paraml = paraml + self.paraml = paraml # params[-1] without the : self.msg = msg self.chan = paraml[0] - if self.chan == conn.nick: + if self.chan == conn.nick: # is a PM self.chan = nick def say(self, msg): From 8271b23cb63a86fb6165d87b4c3d42399e78dcaa Mon Sep 17 00:00:00 2001 From: Ryan Hitchman Date: Mon, 18 Jan 2010 20:16:40 -0700 Subject: [PATCH 07/13] make urlhistory.py record irc servers as well --- plugins/urlhistory.py | 179 ++++++++++++++++-------------------------- 1 file changed, 69 insertions(+), 110 deletions(-) diff --git a/plugins/urlhistory.py b/plugins/urlhistory.py index ed02ec8..ebfd313 100644 --- a/plugins/urlhistory.py +++ b/plugins/urlhistory.py @@ -1,110 +1,69 @@ -import os -import time -from datetime import datetime -import sqlite3 -import pickle -from datetime import timedelta -import re - -from util import hook, timesince -from util import urlnorm -#from util import texttime - -url_re = re.compile(r'([a-zA-Z]+://|www\.)[^ ]*') - - -dbname = "skybot.db" - -expiration_period = timedelta(days=1) - -ignored_urls = [ urlnorm.normalize("http://google.com") ] - -#TODO: Generate expiration_period_text from expiration_period -expiration_period_text = "24 hours" - -def adapt_datetime(ts): - return time.mktime(ts.timetuple()) - -sqlite3.register_adapter(datetime, adapt_datetime) - - -def insert_history(conn, url, channel, nick): - cursor = conn.cursor() - now = datetime.now() - cursor.execute("insert into urlhistory(url, nick, chan, time) values(?,?,?,?)", (url, nick, channel, now)) - conn.commit() - -def select_history_for_url_and_channel(conn, url, channel): - cursor = conn.cursor() - results = cursor.execute("select nick, time from urlhistory where url=? and chan=?", (url, channel)).fetchall() - j = 0 - now = datetime.now() - nicks = [] - for i in xrange(len(results)): - reltime = datetime.fromtimestamp(results[j][1]) - if (now - reltime) > expiration_period: - conn.execute("delete from urlhistory where url=? and chan=? and nick=? and time=?", (url, channel, results[j][0], results[j][1])) - results.remove(results[j]) - else: - nicks.append(results[j][0]) - j += 1 - return nicks - -def get_nicklist(nicks): - nicks = list(set(nicks)) - nicks.sort() - l = len(nicks) - if l == 0: - return "" - elif l == 1: - return nicks[0] - elif l == 2: - return nicks[0] + " and " + nicks[1] - else: - result = "" - for i in xrange(l-1): - result += nicks[i] + ", " - result += "and " + nicks[-1] - return result - -def dbconnect(db): - "check to see that our db has the the seen table and return a connection." - conn = sqlite3.connect(db) - - results = conn.execute("select count(*) from sqlite_master where name=?", - ("urlhistory",)).fetchone() - if(results[0] == 0): - conn.execute("create table if not exists urlhistory(url text not null, nick text not null, chan text not null, time datetime not null, primary key(url, nick, chan, time));") - conn.commit() - return conn - -def normalize_url(url): - return urlnorm.normalize(url) - -def get_once_twice(count): - if count == 1: - return "once" - elif count == 2: - return "twice" - else: - return str(count) + " times" - -@hook.command(hook=r'(.*)', prefix=False, ignorebots=True) -def urlinput(bot, input): - dbpath = os.path.join(bot.persist_dir, dbname) - m = url_re.search(input.msg) - if m: - # URL detected - url = normalize_url(m.group(0)) - if url not in ignored_urls: - conn = dbconnect(dbpath) - dupes = select_history_for_url_and_channel(conn, url, input.chan) - num_dupes = len(dupes) - if num_dupes > 0 and input.nick not in dupes: - nicks = get_nicklist(dupes) - reply = "That link has been posted " + get_once_twice(num_dupes) - reply += " in the past " + expiration_period_text + " by " + nicks - input.reply(reply) - insert_history(conn, url, input.chan, input.nick) - conn.close() - +import os +import time +import sqlite3 +import pickle +import re + +from util import hook, urlnorm + +url_re = re.compile(r'([a-zA-Z]+://|www\.)[^ ]*') + +dbname = "skybot.db" + +expiration_period = 60 * 60 * 24 # 1 day +expiration_period_text = "24 hours" + +ignored_urls = [urlnorm.normalize("http://google.com")] + +def dbconnect(db): + "check to see that our db has the the seen table and return a connection." + conn = sqlite3.connect(db) + conn.execute("create table if not exists urlhistory" + "(server, chan, url, nick, time)") + conn.commit() + return conn + +def insert_history(conn, server, chan, url, nick): + now = time.time() + conn.execute("insert into urlhistory(server, chan, url, nick, time) " + "values(?,?,?,?,?)", (server, chan, url, nick, time.time())) + conn.commit() + +def get_history(conn, server, chan, url): + conn.execute("delete from urlhistory where time < ?", + (time.time() - expiration_period,)) + nicks = conn.execute("select nick from urlhistory where server=? " + "and chan=? and url=?", (server, chan, url)).fetchall() + return [x[0] for x in nicks] + +def get_nicklist(nicks): + nicks = sorted(set(nicks)) + if len(nicks) <= 2: + return ' and '.join(nicks) + else: + return ', and '.join((', '.join(nicks[:-1]), nicks[-1])) + +def ordinal(count): + return ["once", "twice", "%d times" % count][min(count, 3) - 1] + +@hook.command(hook=r'(.*)', prefix=False) +def urlinput(bot, input): + dbpath = os.path.join(bot.persist_dir, dbname) + m = url_re.search(input.msg.encode('utf8')) + if not m: + return + + # URL detected + conn = dbconnect(dbpath) + try: + url = urlnorm.normalize(m.group(0)) + if url not in ignored_urls: + dupes = get_history(conn, input.server, input.chan, url) + insert_history(conn, input.server, input.chan, url, input.nick) + if dupes and input.nick not in dupes: + input.reply("That link has been posted " + ordinal(len(dupes)) + + " in the past " + expiration_period_text + " by " + + get_nicklist(dupes)) + finally: + conn.commit() + conn.close() From c6c94c9ecb594ecd07cfc89d1a0ec9ffefc044f6 Mon Sep 17 00:00:00 2001 From: Ryan Hitchman Date: Mon, 18 Jan 2010 22:14:49 -0700 Subject: [PATCH 08/13] make urlhistory nick sorting case-insensitive, simplify a few decorator calls --- plugins/explain.py | 2 +- plugins/tell.py | 2 +- plugins/urlhistory.py | 2 +- plugins/wikipedia.py | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/plugins/explain.py b/plugins/explain.py index 33b78a1..6d65082 100755 --- a/plugins/explain.py +++ b/plugins/explain.py @@ -1,7 +1,7 @@ from util import hook from pycparser.cdecl import explain_c_declaration -@hook.command('explain') +@hook.command def explain(inp): ".explain -- gives an explanation of C expression" if not inp: diff --git a/plugins/tell.py b/plugins/tell.py index b2f1237..114b1b5 100644 --- a/plugins/tell.py +++ b/plugins/tell.py @@ -17,7 +17,7 @@ def adapt_datetime(ts): sqlite3.register_adapter(datetime, adapt_datetime) -@hook.command(hook=r'(.*)', prefix=False, ignorebots=True) +@hook.command(hook=r'(.*)', prefix=False) def tellinput(bot, input): dbpath = os.path.join(bot.persist_dir, dbname) conn = dbconnect(dbpath) diff --git a/plugins/urlhistory.py b/plugins/urlhistory.py index ebfd313..2988659 100644 --- a/plugins/urlhistory.py +++ b/plugins/urlhistory.py @@ -37,7 +37,7 @@ def get_history(conn, server, chan, url): return [x[0] for x in nicks] def get_nicklist(nicks): - nicks = sorted(set(nicks)) + nicks = sorted(set(nicks), key=unicode.lower) if len(nicks) <= 2: return ' and '.join(nicks) else: diff --git a/plugins/wikipedia.py b/plugins/wikipedia.py index 93eaa87..5ddf19f 100644 --- a/plugins/wikipedia.py +++ b/plugins/wikipedia.py @@ -14,7 +14,7 @@ search_url = api_prefix + "?action=opensearch&search=%s&format=xml" paren_re = re.compile('\s*\(.*\)$') -@hook.command(hook='w(\s+.*|$)') +@hook.command('w') @hook.command def wiki(inp): '''.w/.wiki -- gets first sentence of wikipedia ''' \ From 9f0c2fe9f1fc5673517c52626591223dff0c56ce Mon Sep 17 00:00:00 2001 From: Ryan Hitchman Date: Fri, 22 Jan 2010 02:56:20 -0700 Subject: [PATCH 09/13] quote.py: a simple quote database --- plugins/quote.py | 80 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 80 insertions(+) create mode 100644 plugins/quote.py diff --git a/plugins/quote.py b/plugins/quote.py new file mode 100644 index 0000000..d38302f --- /dev/null +++ b/plugins/quote.py @@ -0,0 +1,80 @@ +import os +import sqlite3 +import random +import re +import time + +from util import hook + +dbname = "skybot.db" + +def db_connect(db): + conn = sqlite3.connect(db) + conn.execute("create table if not exists quote" + "(server, nick, adder, msg unique, time real)") + conn.commit() + return conn + +def add_quote(conn, server, adder, nick, msg): + now = time.time() + print repr((conn, server, adder, nick, msg, time)) + conn.execute("insert or fail into quote(server, nick, adder, msg, time) " + "values(?,?,?,?,?)", (server, nick, adder, msg, now)) + conn.commit() + +def get_quotes(conn, server, nick): + return conn.execute("select time, nick, msg from quote where server=?" + " and nick LIKE ? order by time", (server, nick)).fetchall() + # note: nick_name matches nick-name -- _ in a LIKE indicates any character + # this will probably be unnoticeable, and the fix is easy enough + +@hook.command('q') +@hook.command +def quote(bot, input): + ".q/.quote [#n]/.quote add -- retrieves " \ + "random/numbered quote, adds quote" + + dbpath = os.path.join(bot.persist_dir, dbname) + conn = db_connect(dbpath) + + try: + add = re.match(r"add\s+?\s+(.*)", input.inp, re.I) + retrieve = re.match(r"(\S+)(?:\s+#?(\d+))?", input.inp) + + if add: + nick, msg = add.groups() + try: + add_quote(conn, input.server, input.nick, nick, msg) + except sqlite3.IntegrityError: # message already in DB + return "message already stored, doing nothing." + return "quote added." + elif retrieve: + nick, num = retrieve.groups() + + quotes = get_quotes(conn, input.server, nick) + n_quotes = len(quotes) + + if not n_quotes: + return "no quotes found" + + if num: + num = int(num) + + if num: + if num > n_quotes: + return "I only have %d quote%s for %s" % (n_quotes, + ('s', '')[n_quotes == 1], nick) + else: + selected_quote = quotes[num - 1] + else: + num = random.randint(1, n_quotes) + selected_quote = quotes[num - 1] + + ctime, nick, msg = selected_quote + return "[%d/%d] %s <%s> %s" % (num, n_quotes, + time.strftime("%Y-%m-%d", time.gmtime(ctime)), nick, msg) + else: + return quote.__doc__ + finally: + conn.commit() + conn.close() From 26515cf14bd44b9041e7e3cf2014ddc99f8f5dce Mon Sep 17 00:00:00 2001 From: Ryan Hitchman Date: Fri, 22 Jan 2010 04:19:16 -0700 Subject: [PATCH 10/13] improve quote db schema, add channel random quotes --- plugins/quote.py | 63 ++++++++++++++++++++++++++++++------------------ 1 file changed, 40 insertions(+), 23 deletions(-) diff --git a/plugins/quote.py b/plugins/quote.py index d38302f..17b0a4c 100644 --- a/plugins/quote.py +++ b/plugins/quote.py @@ -10,48 +10,67 @@ dbname = "skybot.db" def db_connect(db): conn = sqlite3.connect(db) - conn.execute("create table if not exists quote" - "(server, nick, adder, msg unique, time real)") + conn.execute('''create table if not exists quotes + (server, chan, nick, add_nick, msg, time real, deleted default 0, + primary key (server, chan, nick, msg))''') conn.commit() return conn -def add_quote(conn, server, adder, nick, msg): +def add_quote(conn, server, chan, nick, add_nick, msg): now = time.time() - print repr((conn, server, adder, nick, msg, time)) - conn.execute("insert or fail into quote(server, nick, adder, msg, time) " - "values(?,?,?,?,?)", (server, nick, adder, msg, now)) + print repr((conn, server, add_nick, nick, msg, time)) + conn.execute('''insert or fail into quotes (server, chan, nick, add_nick, + msg, time) values(?,?,?,?,?,?)''', + (server, chan, nick, add_nick, msg, now)) conn.commit() -def get_quotes(conn, server, nick): - return conn.execute("select time, nick, msg from quote where server=?" - " and nick LIKE ? order by time", (server, nick)).fetchall() - # note: nick_name matches nick-name -- _ in a LIKE indicates any character - # this will probably be unnoticeable, and the fix is easy enough - +def get_quotes_by_nick(conn, server, chan, nick): + return conn.execute("select time, nick, msg from quotes where deleted!=1 " + "and server=? and chan=? and lower(nick)=lower(?) order by time", + (server, chan, nick)).fetchall() + +def get_quotes_by_chan(conn, server, chan): + return conn.execute("select time, nick, msg from quotes where deleted!=1 " + "and server=? and chan=? order by time", (server, chan)).fetchall() + + +def format_quote(q, num, n_quotes): + ctime, nick, msg = q + return "[%d/%d] %s <%s> %s" % (num, n_quotes, + time.strftime("%Y-%m-%d", time.gmtime(ctime)), nick, msg) + + @hook.command('q') @hook.command def quote(bot, input): - ".q/.quote [#n]/.quote add -- retrieves " \ - "random/numbered quote, adds quote" + ".q/.quote [#n]/.quote add -- gets " \ + "random or [#n]th quote by or from <#chan>/adds quote" dbpath = os.path.join(bot.persist_dir, dbname) conn = db_connect(dbpath) try: add = re.match(r"add\s+?\s+(.*)", input.inp, re.I) - retrieve = re.match(r"(\S+)(?:\s+#?(\d+))?", input.inp) + retrieve = re.match(r"(\S+)(?:\s+#?(-?\d+))?", input.inp) + chan = input.chan if add: nick, msg = add.groups() try: - add_quote(conn, input.server, input.nick, nick, msg) - except sqlite3.IntegrityError: # message already in DB + add_quote(conn, input.server, chan, nick, input.nick, msg) + except sqlite3.IntegrityError: return "message already stored, doing nothing." return "quote added." elif retrieve: - nick, num = retrieve.groups() + select, num = retrieve.groups() + + by_chan = False + if select.startswith('#'): + by_chan = True + quotes = get_quotes_by_chan(conn, input.server, select) + else: + quotes = get_quotes_by_nick(conn, input.server, chan, select) - quotes = get_quotes(conn, input.server, nick) n_quotes = len(quotes) if not n_quotes: @@ -63,16 +82,14 @@ def quote(bot, input): if num: if num > n_quotes: return "I only have %d quote%s for %s" % (n_quotes, - ('s', '')[n_quotes == 1], nick) + ('s', '')[n_quotes == 1], select) else: selected_quote = quotes[num - 1] else: num = random.randint(1, n_quotes) selected_quote = quotes[num - 1] - ctime, nick, msg = selected_quote - return "[%d/%d] %s <%s> %s" % (num, n_quotes, - time.strftime("%Y-%m-%d", time.gmtime(ctime)), nick, msg) + return format_quote(selected_quote, num, n_quotes) else: return quote.__doc__ finally: From 7663b3e5d374deda284741f38b75e03e8155a851 Mon Sep 17 00:00:00 2001 From: Ryan Hitchman Date: Sun, 24 Jan 2010 17:30:00 -0700 Subject: [PATCH 11/13] simple acls (deny-except and allow-except), force config rewrite on start --- core/config.py | 2 +- plugins/sieve.py | 21 ++++++++++++++++++--- 2 files changed, 19 insertions(+), 4 deletions(-) diff --git a/core/config.py b/core/config.py index 12336bc..2f788cf 100644 --- a/core/config.py +++ b/core/config.py @@ -21,7 +21,7 @@ if not os.path.exists('config'): del conf bot.config = yaml.load(open('config')) -bot._config_dirty = False +bot._config_dirty = True # force a rewrite on start bot._config_mtime = os.stat('config').st_mtime def config_dirty(self): diff --git a/plugins/sieve.py b/plugins/sieve.py index 197a040..4fa147e 100644 --- a/plugins/sieve.py +++ b/plugins/sieve.py @@ -1,3 +1,4 @@ +import os import re from util import hook @@ -16,14 +17,28 @@ def sieve_suite(bot, input, func, args): hook = args.get('hook', r'(.*)') if args.get('prefix', True): - # add a prefix, unless it's a private message - hook = (r'^(?:[.!]|' if input.chan != input.nick else r'^(?:[.!]?|') \ - + input.conn.nick + r'[:,]*\s*)' + hook + if input.chan == input.nick: # private message, prefix not required + prefix = r'^(?:[.!]?|' + else: + prefix = r'^(?:[.!]|' + hook = prefix + input.conn.nick + r'[:,]*\s)' + hook input.re = re.match(hook, input.msg, flags=re.I) if input.re is None: return None + acl = bot.config.get('acls', {}).get(func.__name__) + if acl: + print acl + if 'deny-except' in acl: + allowed_channels = map(str.lower, acl['deny-except']) + if input.chan.lower() not in allowed_channels: + return None + if 'allow-except' in acl: + denied_channels = map(str.lower, acl['allow-except']) + if input.chan.lower() in denied_channels: + return None + input.inp_unstripped = ' '.join(input.re.groups()) input.inp = input.inp_unstripped.strip() From 4ffc5fe5bf16ec3b5716f637a8cd55a688925918 Mon Sep 17 00:00:00 2001 From: ipsum Date: Mon, 25 Jan 2010 23:39:26 -0500 Subject: [PATCH 12/13] Added regular expressions plugin --- plugins/regular.py | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) create mode 100644 plugins/regular.py diff --git a/plugins/regular.py b/plugins/regular.py new file mode 100644 index 0000000..ccfba87 --- /dev/null +++ b/plugins/regular.py @@ -0,0 +1,38 @@ +''' +regular.py + +skybot plugin for testing regular expressions +by Ipsum +''' + +import thread +import codecs +import re + +from util import hook + + + +@hook.command +def reg(bot, input): + ".reg -- matches regular expression in given (seperate regex and string by 2 spaces)" + + m = "" + + if len(input.msg) < 4: + return reg.__doc__ + + query = input.inp.partition(" ") + + + if query[2] != "": + r = re.compile(query[0]) + + matches = r.findall(query[2]) + for match in matches: + m += match + "|" + + return m.rstrip('|') + + else: + return reg.__doc__ \ No newline at end of file From 768bde1f93208fe77a9eafab7dacd6e7fa7c6410 Mon Sep 17 00:00:00 2001 From: ipsum Date: Tue, 26 Jan 2010 17:57:47 -0500 Subject: [PATCH 13/13] changed command to .re --- plugins/regular.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/plugins/regular.py b/plugins/regular.py index ccfba87..f8d1d15 100644 --- a/plugins/regular.py +++ b/plugins/regular.py @@ -13,13 +13,13 @@ from util import hook -@hook.command +@hook.command('re') def reg(bot, input): - ".reg -- matches regular expression in given (seperate regex and string by 2 spaces)" + ".re -- matches regular expression in given (seperate regex and string by 2 spaces)" m = "" - if len(input.msg) < 4: + if len(input.msg) < 3: return reg.__doc__ query = input.inp.partition(" ")