scrape.moon: LUDICROUS SPEED

This commit is contained in:
Christine Dodrill 2015-07-27 19:17:43 -07:00
parent c74614a565
commit 2c687120b5
1 changed files with 8 additions and 2 deletions

View File

@ -5,14 +5,18 @@ db = assert sqlite3.open "../db/posts.db"
insert_stmt = assert db\prepare "INSERT INTO Posts VALUES (NULL, ?, ?, '', ?, ?)" insert_stmt = assert db\prepare "INSERT INTO Posts VALUES (NULL, ?, ?, '', ?, ?)"
db\exec [[ BEGIN TRANSACTION; ]]
for page=1,8002 for page=1,8002
print "Scraping page #{page}..." print "Scraping page #{page}..."
with fin = assert io.open "../raw/pages/#{page}.json", "r" with fin = assert io.open "../raw/pages/#{page}.json", "r"
data = fin\read "*a" data = fin\read "*a"
posts = json.decode data posts, _, err = json.decode data
error err unless posts
for _, post in pairs posts.topics for _, post in pairs posts.topics
continue unless post
do do
insert_stmt\bind_values post.id, post.body, post.author, page insert_stmt\bind_values post.id, post.body, post.author, page
insert_stmt\step! insert_stmt\step!
@ -20,4 +24,6 @@ for page=1,8002
fin\close! fin\close!
print "done" print "done at #{os.date!}"
db\exec [[ COMMIT; ]]