diff --git a/plugins/dotnetpad.py b/plugins/dotnetpad.py index e38b6af..8ae18f5 100644 --- a/plugins/dotnetpad.py +++ b/plugins/dotnetpad.py @@ -62,12 +62,12 @@ def cs(snippet): class_template = ('public class Default ' '{' - ' %s ' + ' %s \n' '}') main_template = ('public static void Main(String[] args) ' '{' - ' %s ' + ' %s \n' '}') # There are probably better ways to do the following, but I'm feeling lazy diff --git a/plugins/urlhistory.py b/plugins/urlhistory.py index fe839da..b850c02 100644 --- a/plugins/urlhistory.py +++ b/plugins/urlhistory.py @@ -4,7 +4,7 @@ import time from util import hook, urlnorm, timesince -url_re = r'([a-zA-Z]+://|www\.)[^ ]*' +url_re = r'([a-zA-Z]+://|www\.)[^ ]+' expiration_period = 60 * 60 * 24 # 1 day diff --git a/plugins/util/urlnorm.py b/plugins/util/urlnorm.py index a51a76e..024fc32 100644 --- a/plugins/util/urlnorm.py +++ b/plugins/util/urlnorm.py @@ -30,6 +30,19 @@ default_port = { 'http': 80, } +class Normalizer(object): + def __init__(self, regex, normalize_func): + self.regex = regex + self.normalize = normalize_func + +normalizers = ( Normalizer( re.compile(r'(?:https?://)?(?:[a-zA-Z0-9\-]+\.)?(?:amazon|amzn){1}\.(?P[a-zA-Z\.]{2,})\/(gp/(?:product|offer-listing|customer-media/product-gallery)/|exec/obidos/tg/detail/-/|o/ASIN/|dp/|(?:[A-Za-z0-9\-]+)/dp/)?(?P[0-9A-Za-z]{10})'), + lambda m: r'http://amazon.%s/dp/%s' % (m.group('tld'), m.group('ASIN'))), + Normalizer( re.compile(r'.*waffleimages\.com.*/([0-9a-fA-F]{40})'), + lambda m: r'http://img.waffleimages.com/%s' % m.group(1) ), + Normalizer( re.compile(r'(?:youtube.*?(?:v=|/v/)|youtu\.be/|yooouuutuuube.*?id=)([-_a-z0-9]+)'), + lambda m: r'http://youtube.com/watch?v=%s' % m.group(1) ), + ) + def normalize(url): """Normalize a URL.""" @@ -100,5 +113,10 @@ def normalize(url): auth+=":"+port if url.endswith("#") and query == "" and fragment == "": path += "#" - return urlparse.urlunsplit((scheme, auth, path, query, fragment)).replace( + normal_url = urlparse.urlunsplit((scheme, auth, path, query, fragment)).replace( "http:///", "http://") + for norm in normalizers: + m = norm.regex.match(normal_url) + if m: + return norm.normalize(m) + return normal_url