From 2c687120b5af740a52b85d260ed6ee04a07b933f Mon Sep 17 00:00:00 2001 From: Christine Dodrill Date: Mon, 27 Jul 2015 19:17:43 -0700 Subject: [PATCH] scrape.moon: LUDICROUS SPEED --- scripts/scrape.moon | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/scripts/scrape.moon b/scripts/scrape.moon index 33ee919..962d742 100644 --- a/scripts/scrape.moon +++ b/scripts/scrape.moon @@ -5,14 +5,18 @@ db = assert sqlite3.open "../db/posts.db" insert_stmt = assert db\prepare "INSERT INTO Posts VALUES (NULL, ?, ?, '', ?, ?)" +db\exec [[ BEGIN TRANSACTION; ]] + for page=1,8002 print "Scraping page #{page}..." with fin = assert io.open "../raw/pages/#{page}.json", "r" data = fin\read "*a" - posts = json.decode data + posts, _, err = json.decode data + error err unless posts for _, post in pairs posts.topics + continue unless post do insert_stmt\bind_values post.id, post.body, post.author, page insert_stmt\step! @@ -20,4 +24,6 @@ for page=1,8002 fin\close! - print "done" + print "done at #{os.date!}" + +db\exec [[ COMMIT; ]]