scrape the posts into a sqlite database

This commit is contained in:
Christine Dodrill 2015-07-27 18:44:26 -07:00
parent d1f2fb5a7b
commit 81279be189
4 changed files with 32 additions and 1 deletions

1
.gitignore vendored
View File

@ -1,4 +1,5 @@
*.db
*.db-journal
*.a
*.o
*.so

View File

@ -4,7 +4,7 @@ CREATE TABLE IF NOT EXISTS Posts (
, body TEXT NOT NULL
, mdown TEXT NOT NULL
, author TEXT NOT NULL
, page TEXT NOT NULL
, page INTEGER
);
CREATE TABLE IF NOT EXISTS Users (

7
db/rebuilddb.sh Executable file
View File

@ -0,0 +1,7 @@
#!/bin/bash
set -e
set -x
rm ./posts.db
cat ./db.sql | sqlite3 ./posts.db

23
scripts/scrape.moon Normal file
View File

@ -0,0 +1,23 @@
json = require "dkjson"
sqlite3 = require "lsqlite3"
db = assert sqlite3.open "../db/posts.db"
insert_stmt = assert db\prepare "INSERT INTO Posts VALUES (NULL, ?, ?, '', ?, ?)"
for page=1,8002
print "Scraping page #{page}..."
with fin = assert io.open "../raw/pages/#{page}.json", "r"
data = fin\read "*a"
posts = json.decode data
for _, post in pairs posts.topics
do
insert_stmt\bind_values post.id, post.body, post.author, page
insert_stmt\step!
insert_stmt\reset!
fin\close!
print "done"