scrape.moon: LUDICROUS SPEED
This commit is contained in:
parent
c74614a565
commit
2c687120b5
|
@ -5,14 +5,18 @@ db = assert sqlite3.open "../db/posts.db"
|
||||||
|
|
||||||
insert_stmt = assert db\prepare "INSERT INTO Posts VALUES (NULL, ?, ?, '', ?, ?)"
|
insert_stmt = assert db\prepare "INSERT INTO Posts VALUES (NULL, ?, ?, '', ?, ?)"
|
||||||
|
|
||||||
|
db\exec [[ BEGIN TRANSACTION; ]]
|
||||||
|
|
||||||
for page=1,8002
|
for page=1,8002
|
||||||
print "Scraping page #{page}..."
|
print "Scraping page #{page}..."
|
||||||
|
|
||||||
with fin = assert io.open "../raw/pages/#{page}.json", "r"
|
with fin = assert io.open "../raw/pages/#{page}.json", "r"
|
||||||
data = fin\read "*a"
|
data = fin\read "*a"
|
||||||
posts = json.decode data
|
posts, _, err = json.decode data
|
||||||
|
error err unless posts
|
||||||
|
|
||||||
for _, post in pairs posts.topics
|
for _, post in pairs posts.topics
|
||||||
|
continue unless post
|
||||||
do
|
do
|
||||||
insert_stmt\bind_values post.id, post.body, post.author, page
|
insert_stmt\bind_values post.id, post.body, post.author, page
|
||||||
insert_stmt\step!
|
insert_stmt\step!
|
||||||
|
@ -20,4 +24,6 @@ for page=1,8002
|
||||||
|
|
||||||
fin\close!
|
fin\close!
|
||||||
|
|
||||||
print "done"
|
print "done at #{os.date!}"
|
||||||
|
|
||||||
|
db\exec [[ COMMIT; ]]
|
||||||
|
|
Loading…
Reference in New Issue