From db7ecdbbfe051c49e7435386c9fa1d6296caee51 Mon Sep 17 00:00:00 2001 From: Christine Dodrill Date: Mon, 8 Feb 2016 17:57:28 -0800 Subject: [PATCH] Add scraper for @reednj's list --- src/onetime/.gitignore | 2 ++ src/onetime/reednj.nim | 27 +++++++++++++++++++++++++++ twtxtlist.nimble | 2 +- 3 files changed, 30 insertions(+), 1 deletion(-) create mode 100644 src/onetime/.gitignore create mode 100644 src/onetime/reednj.nim diff --git a/src/onetime/.gitignore b/src/onetime/.gitignore new file mode 100644 index 0000000..87de803 --- /dev/null +++ b/src/onetime/.gitignore @@ -0,0 +1,2 @@ +nimcache +reednj diff --git a/src/onetime/reednj.nim b/src/onetime/reednj.nim new file mode 100644 index 0000000..e6de151 --- /dev/null +++ b/src/onetime/reednj.nim @@ -0,0 +1,27 @@ +import db_sqlite, future, htmlparser, httpclient, q, strtabs, xmltree, ../twtxt + +let + reshtml = "http://twtxt.reednj.com/".getContent + doc = reshtml.q + users = doc.select "table tbody tr a.user-link" + feedurls = doc.select "table tbody tr td a.wide-only" + +var + usernames = newSeq[string]() + feeds = newSeq[string]() + +for user in users.items: + usernames.add user.innerText[1..^1] + +for feed in feedurls.items: + feeds.add feed.attrs["href"] + +let db = open("../../data/twtxt.db", nil, nil, nil) + +for i, user in usernames.pairs: + let url = feeds[i] + echo i, " ", user, " ", url + + try: + db.exec(sql"insert into users values(null, ?, ?)", user, url) + except: echo getCurrentExceptionMsg() diff --git a/twtxtlist.nimble b/twtxtlist.nimble index 4eef1dd..6e2ceb3 100644 --- a/twtxtlist.nimble +++ b/twtxtlist.nimble @@ -9,7 +9,7 @@ bin = @["twtxtlist", "worker"] # Dependencies -requires "nim >= 0.13.0", "jester", "moustachu", "shorturl" +requires "nim >= 0.13.0", "jester", "moustachu", "shorturl", "q" mode = ScriptMode.Verbose