From 61c42e7d8a40d7fbe35876f507cc7595e6a7d2db Mon Sep 17 00:00:00 2001 From: melonhead Date: Wed, 14 Jul 2010 16:45:26 -0400 Subject: [PATCH 1/3] urlhistory: main regex no longer matches 'http://' or 'www.' urlhistory: added URL normalization for Amazon, Waffleimages, and Youtube --- plugins/urlhistory.py | 2 +- plugins/util/urlnorm.py | 26 +++++++++++++++++++++++--- 2 files changed, 24 insertions(+), 4 deletions(-) diff --git a/plugins/urlhistory.py b/plugins/urlhistory.py index fe839da..b850c02 100644 --- a/plugins/urlhistory.py +++ b/plugins/urlhistory.py @@ -4,7 +4,7 @@ import time from util import hook, urlnorm, timesince -url_re = r'([a-zA-Z]+://|www\.)[^ ]*' +url_re = r'([a-zA-Z]+://|www\.)[^ ]+' expiration_period = 60 * 60 * 24 # 1 day diff --git a/plugins/util/urlnorm.py b/plugins/util/urlnorm.py index a51a76e..6cdc858 100644 --- a/plugins/util/urlnorm.py +++ b/plugins/util/urlnorm.py @@ -29,7 +29,20 @@ from urllib import quote, unquote default_port = { 'http': 80, } - + +class Normalizer(object): + def __init__(self, regex, normalize_func): + self.regex = regex + self.normalize = normalize_func + +normalizers = ( Normalizer( re.compile(r'(?:https?://)?(?:[a-zA-Z0-9\-]+\.)?(?:amazon|amzn){1}\.(?P[a-zA-Z\.]{2,})\/(gp/(?:product|offer-listing|customer-media/product-gallery)/|exec/obidos/tg/detail/-/|o/ASIN/|dp/|(?:[A-Za-z0-9\-]+)/dp/)?(?P[0-9A-Za-z]{10})'), + lambda m: r'http://amazon.%s/dp/%s' % (m.group('tld'), m.group('ASIN'))), + Normalizer( re.compile(r'.*waffleimages\.com.*/([0-9a-fA-F]{40})'), + lambda m: r'http://img.waffleimages.com/%s' % m.group(1) ), + Normalizer( re.compile(r'(?:youtube.*?(?:v=|/v/)|youtu\.be/|yooouuutuuube.*?id=)([-_a-z0-9]+)'), + lambda m: r'http://youtube.com/watch?v=%s' % m.group(1) ), + ) + def normalize(url): """Normalize a URL.""" @@ -100,5 +113,12 @@ def normalize(url): auth+=":"+port if url.endswith("#") and query == "" and fragment == "": path += "#" - return urlparse.urlunsplit((scheme, auth, path, query, fragment)).replace( - "http:///", "http://") + normal_url = urlparse.urlunsplit((scheme, auth, path, query, fragment)).replace( + "http:///", "http://") + for norm in normalizers: + m = norm.regex.match(normal_url) + if m: + print 'Normalized %s to %s' % (url, norm.normalize(m)) + return norm.normalize(m) + print 'Normalized %s to %s' % (url, normal_url) + return normal_url From 93f626c482e86a4c58d7544472c7e442af236ede Mon Sep 17 00:00:00 2001 From: melonhead Date: Wed, 14 Jul 2010 16:48:18 -0400 Subject: [PATCH 2/3] Removed debug print lines from URL normalizer --- plugins/util/urlnorm.py | 40 +++++++++++++++++++--------------------- 1 file changed, 19 insertions(+), 21 deletions(-) diff --git a/plugins/util/urlnorm.py b/plugins/util/urlnorm.py index 6cdc858..024fc32 100644 --- a/plugins/util/urlnorm.py +++ b/plugins/util/urlnorm.py @@ -29,20 +29,20 @@ from urllib import quote, unquote default_port = { 'http': 80, } - -class Normalizer(object): - def __init__(self, regex, normalize_func): - self.regex = regex - self.normalize = normalize_func - -normalizers = ( Normalizer( re.compile(r'(?:https?://)?(?:[a-zA-Z0-9\-]+\.)?(?:amazon|amzn){1}\.(?P[a-zA-Z\.]{2,})\/(gp/(?:product|offer-listing|customer-media/product-gallery)/|exec/obidos/tg/detail/-/|o/ASIN/|dp/|(?:[A-Za-z0-9\-]+)/dp/)?(?P[0-9A-Za-z]{10})'), - lambda m: r'http://amazon.%s/dp/%s' % (m.group('tld'), m.group('ASIN'))), - Normalizer( re.compile(r'.*waffleimages\.com.*/([0-9a-fA-F]{40})'), - lambda m: r'http://img.waffleimages.com/%s' % m.group(1) ), - Normalizer( re.compile(r'(?:youtube.*?(?:v=|/v/)|youtu\.be/|yooouuutuuube.*?id=)([-_a-z0-9]+)'), - lambda m: r'http://youtube.com/watch?v=%s' % m.group(1) ), - ) - + +class Normalizer(object): + def __init__(self, regex, normalize_func): + self.regex = regex + self.normalize = normalize_func + +normalizers = ( Normalizer( re.compile(r'(?:https?://)?(?:[a-zA-Z0-9\-]+\.)?(?:amazon|amzn){1}\.(?P[a-zA-Z\.]{2,})\/(gp/(?:product|offer-listing|customer-media/product-gallery)/|exec/obidos/tg/detail/-/|o/ASIN/|dp/|(?:[A-Za-z0-9\-]+)/dp/)?(?P[0-9A-Za-z]{10})'), + lambda m: r'http://amazon.%s/dp/%s' % (m.group('tld'), m.group('ASIN'))), + Normalizer( re.compile(r'.*waffleimages\.com.*/([0-9a-fA-F]{40})'), + lambda m: r'http://img.waffleimages.com/%s' % m.group(1) ), + Normalizer( re.compile(r'(?:youtube.*?(?:v=|/v/)|youtu\.be/|yooouuutuuube.*?id=)([-_a-z0-9]+)'), + lambda m: r'http://youtube.com/watch?v=%s' % m.group(1) ), + ) + def normalize(url): """Normalize a URL.""" @@ -114,11 +114,9 @@ def normalize(url): if url.endswith("#") and query == "" and fragment == "": path += "#" normal_url = urlparse.urlunsplit((scheme, auth, path, query, fragment)).replace( - "http:///", "http://") - for norm in normalizers: - m = norm.regex.match(normal_url) - if m: - print 'Normalized %s to %s' % (url, norm.normalize(m)) - return norm.normalize(m) - print 'Normalized %s to %s' % (url, normal_url) + "http:///", "http://") + for norm in normalizers: + m = norm.regex.match(normal_url) + if m: + return norm.normalize(m) return normal_url From 4f5cf197bec150fc6236339c82caaa61ff1572ec Mon Sep 17 00:00:00 2001 From: Chris Skalenda Date: Sat, 17 Jul 2010 16:52:21 -0600 Subject: [PATCH 3/3] dotnetpad: make end of line comments work. --- plugins/dotnetpad.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/plugins/dotnetpad.py b/plugins/dotnetpad.py index e38b6af..8ae18f5 100644 --- a/plugins/dotnetpad.py +++ b/plugins/dotnetpad.py @@ -62,12 +62,12 @@ def cs(snippet): class_template = ('public class Default ' '{' - ' %s ' + ' %s \n' '}') main_template = ('public static void Main(String[] args) ' '{' - ' %s ' + ' %s \n' '}') # There are probably better ways to do the following, but I'm feeling lazy