Add scraper for @reednj's list

This commit is contained in:
Christine Dodrill 2016-02-08 17:57:28 -08:00
parent 79ba928de3
commit db7ecdbbfe
3 changed files with 30 additions and 1 deletions

2
src/onetime/.gitignore vendored Normal file
View File

@ -0,0 +1,2 @@
nimcache
reednj

27
src/onetime/reednj.nim Normal file
View File

@ -0,0 +1,27 @@
import db_sqlite, future, htmlparser, httpclient, q, strtabs, xmltree, ../twtxt
let
reshtml = "http://twtxt.reednj.com/".getContent
doc = reshtml.q
users = doc.select "table tbody tr a.user-link"
feedurls = doc.select "table tbody tr td a.wide-only"
var
usernames = newSeq[string]()
feeds = newSeq[string]()
for user in users.items:
usernames.add user.innerText[1..^1]
for feed in feedurls.items:
feeds.add feed.attrs["href"]
let db = open("../../data/twtxt.db", nil, nil, nil)
for i, user in usernames.pairs:
let url = feeds[i]
echo i, " ", user, " ", url
try:
db.exec(sql"insert into users values(null, ?, ?)", user, url)
except: echo getCurrentExceptionMsg()

View File

@ -9,7 +9,7 @@ bin = @["twtxtlist", "worker"]
# Dependencies # Dependencies
requires "nim >= 0.13.0", "jester", "moustachu", "shorturl" requires "nim >= 0.13.0", "jester", "moustachu", "shorturl", "q"
mode = ScriptMode.Verbose mode = ScriptMode.Verbose