scrape the posts into a sqlite database
This commit is contained in:
parent
d1f2fb5a7b
commit
81279be189
|
@ -1,4 +1,5 @@
|
|||
*.db
|
||||
*.db-journal
|
||||
*.a
|
||||
*.o
|
||||
*.so
|
||||
|
|
|
@ -4,7 +4,7 @@ CREATE TABLE IF NOT EXISTS Posts (
|
|||
, body TEXT NOT NULL
|
||||
, mdown TEXT NOT NULL
|
||||
, author TEXT NOT NULL
|
||||
, page TEXT NOT NULL
|
||||
, page INTEGER
|
||||
);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS Users (
|
||||
|
|
|
@ -0,0 +1,7 @@
|
|||
#!/bin/bash
|
||||
|
||||
set -e
|
||||
set -x
|
||||
|
||||
rm ./posts.db
|
||||
cat ./db.sql | sqlite3 ./posts.db
|
|
@ -0,0 +1,23 @@
|
|||
json = require "dkjson"
|
||||
sqlite3 = require "lsqlite3"
|
||||
|
||||
db = assert sqlite3.open "../db/posts.db"
|
||||
|
||||
insert_stmt = assert db\prepare "INSERT INTO Posts VALUES (NULL, ?, ?, '', ?, ?)"
|
||||
|
||||
for page=1,8002
|
||||
print "Scraping page #{page}..."
|
||||
|
||||
with fin = assert io.open "../raw/pages/#{page}.json", "r"
|
||||
data = fin\read "*a"
|
||||
posts = json.decode data
|
||||
|
||||
for _, post in pairs posts.topics
|
||||
do
|
||||
insert_stmt\bind_values post.id, post.body, post.author, page
|
||||
insert_stmt\step!
|
||||
insert_stmt\reset!
|
||||
|
||||
fin\close!
|
||||
|
||||
print "done"
|
Loading…
Reference in New Issue