Add scraper for @reednj's list

2016-02-08 17:57:28 -08:00 · 2016-02-08 17:57:28 -08:00 · db7ecdbbfe
parent 79ba928de3
commit db7ecdbbfe
3 changed files with 30 additions and 1 deletions
--- a/src/onetime/.gitignore
+++ b/src/onetime/.gitignore
@ -0,0 +1,2 @@
+nimcache
+reednj
--- a/src/onetime/reednj.nim
+++ b/src/onetime/reednj.nim
@ -0,0 +1,27 @@
+import db_sqlite, future, htmlparser, httpclient, q, strtabs, xmltree, ../twtxt
+
+let
+  reshtml = "http://twtxt.reednj.com/".getContent
+  doc = reshtml.q
+  users = doc.select "table tbody tr a.user-link"
+  feedurls = doc.select "table tbody tr td a.wide-only"
+
+var
+  usernames = newSeq[string]()
+  feeds = newSeq[string]()
+
+for user in users.items:
+  usernames.add user.innerText[1..^1]
+
+for feed in feedurls.items:
+  feeds.add feed.attrs["href"]
+
+let db = open("../../data/twtxt.db", nil, nil, nil)
+
+for i, user in usernames.pairs:
+  let url = feeds[i]
+  echo i, " ", user, " ", url
+  
+  try:
+    db.exec(sql"insert into users values(null, ?, ?)", user, url)
+  except: echo getCurrentExceptionMsg()
--- a/twtxtlist.nimble
+++ b/twtxtlist.nimble
@ -9,7 +9,7 @@ bin           = @["twtxtlist", "worker"]

 # Dependencies

-requires "nim >= 0.13.0", "jester", "moustachu", "shorturl"
+requires "nim >= 0.13.0", "jester", "moustachu", "shorturl", "q"

 mode = ScriptMode.Verbose