parallelize markdown parsing

Signed-off-by: Christine Dodrill <me@christine.website>
This commit is contained in:
Cadey Ratio 2021-02-15 15:09:25 -05:00
parent 3f3bb17921
commit b7c2687ca8
4 changed files with 86 additions and 41 deletions

35
Cargo.lock generated
View File

@ -574,6 +574,7 @@ checksum = "c70be434c505aee38639abccb918163b63158a4b4bb791b45b7023044bdc3c9c"
dependencies = [ dependencies = [
"futures-channel", "futures-channel",
"futures-core", "futures-core",
"futures-executor",
"futures-io", "futures-io",
"futures-sink", "futures-sink",
"futures-task", "futures-task",
@ -596,12 +597,35 @@ version = "0.3.9"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "db8d3b0917ff63a2a96173133c02818fac4a746b0a57569d3baca9ec0e945e08" checksum = "db8d3b0917ff63a2a96173133c02818fac4a746b0a57569d3baca9ec0e945e08"
[[package]]
name = "futures-executor"
version = "0.3.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9ee9ca2f7eb4475772cf39dd1cd06208dce2670ad38f4d9c7262b3e15f127068"
dependencies = [
"futures-core",
"futures-task",
"futures-util",
]
[[package]] [[package]]
name = "futures-io" name = "futures-io"
version = "0.3.9" version = "0.3.9"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e37c1a51b037b80922864b8eed90692c5cd8abd4c71ce49b77146caa47f3253b" checksum = "e37c1a51b037b80922864b8eed90692c5cd8abd4c71ce49b77146caa47f3253b"
[[package]]
name = "futures-macro"
version = "0.3.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0f8719ca0e1f3c5e34f3efe4570ef2c0610ca6da85ae7990d472e9cbfba13664"
dependencies = [
"proc-macro-hack",
"proc-macro2",
"quote",
"syn",
]
[[package]] [[package]]
name = "futures-sink" name = "futures-sink"
version = "0.3.9" version = "0.3.9"
@ -623,13 +647,17 @@ version = "0.3.9"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "036a2107cdeb57f6d7322f1b6c363dad67cd63ca3b7d1b925bdf75bd5d96cda9" checksum = "036a2107cdeb57f6d7322f1b6c363dad67cd63ca3b7d1b925bdf75bd5d96cda9"
dependencies = [ dependencies = [
"futures-channel",
"futures-core", "futures-core",
"futures-io", "futures-io",
"futures-macro",
"futures-sink", "futures-sink",
"futures-task", "futures-task",
"memchr", "memchr",
"pin-project-lite 0.2.3", "pin-project-lite 0.2.3",
"pin-utils", "pin-utils",
"proc-macro-hack",
"proc-macro-nested",
"slab", "slab",
] ]
@ -1582,6 +1610,12 @@ version = "0.5.19"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dbf0c48bc1d91375ae5c3cd81e3722dff1abcf81a30960240640d223f59fe0e5" checksum = "dbf0c48bc1d91375ae5c3cd81e3722dff1abcf81a30960240640d223f59fe0e5"
[[package]]
name = "proc-macro-nested"
version = "0.1.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bc881b2c22681370c6a780e47af9840ef841837bc98118431d4e1868bd0c1086"
[[package]] [[package]]
name = "proc-macro2" name = "proc-macro2"
version = "1.0.24" version = "1.0.24"
@ -2790,6 +2824,7 @@ dependencies = [
"comrak", "comrak",
"envy", "envy",
"eyre", "eyre",
"futures",
"glob", "glob",
"go_vanity", "go_vanity",
"hyper 0.14.2", "hyper 0.14.2",

View File

@ -13,6 +13,7 @@ color-eyre = "0.5"
chrono = "0.4" chrono = "0.4"
comrak = "0.9" comrak = "0.9"
envy = "0.4" envy = "0.4"
futures = "0.3"
glob = "0.3" glob = "0.3"
hyper = "0.14" hyper = "0.14"
kankyo = "0.3" kankyo = "0.3"

View File

@ -70,9 +70,9 @@ pub async fn init(cfg: PathBuf) -> Result<State> {
let resume = fs::read_to_string(cfg.resume_fname.clone())?; let resume = fs::read_to_string(cfg.resume_fname.clone())?;
let resume: String = markdown::render(&resume)?; let resume: String = markdown::render(&resume)?;
let mi = mi::Client::new(cfg.mi_token.clone(), crate::APPLICATION_NAME.to_string())?; let mi = mi::Client::new(cfg.mi_token.clone(), crate::APPLICATION_NAME.to_string())?;
let blog = crate::post::load("blog", Some(&mi)).await?; let blog = crate::post::load("blog").await?;
let gallery = crate::post::load("gallery", None).await?; let gallery = crate::post::load("gallery").await?;
let talks = crate::post::load("talks", None).await?; let talks = crate::post::load("talks").await?;
let mut everything: Vec<Post> = vec![]; let mut everything: Vec<Post> = vec![];
{ {

View File

@ -1,7 +1,8 @@
use chrono::prelude::*; use chrono::prelude::*;
use color_eyre::eyre::{eyre, Result, WrapErr}; use color_eyre::eyre::{eyre, Result, WrapErr};
use glob::glob; use glob::glob;
use std::{cmp::Ordering, fs}; use std::{cmp::Ordering, path::PathBuf};
use tokio::fs;
pub mod frontmatter; pub mod frontmatter;
@ -70,46 +71,54 @@ impl Post {
} }
} }
pub async fn load(dir: &str, mi: Option<&mi::Client>) -> Result<Vec<Post>> { async fn read_post(dir: &str, fname: PathBuf) -> Result<Post> {
let mut result: Vec<Post> = vec![]; let body = fs::read_to_string(fname.clone())
.await
.wrap_err_with(|| format!("can't read {:?}", fname))?;
let (front_matter, content_offset) = frontmatter::Data::parse(body.clone().as_str())
.wrap_err_with(|| format!("can't parse frontmatter of {:?}", fname))?;
let body = &body[content_offset..];
let date = NaiveDate::parse_from_str(&front_matter.clone().date, "%Y-%m-%d")
.map_err(|why| eyre!("error parsing date in {:?}: {}", fname, why))?;
let link = format!("{}/{}", dir, fname.file_stem().unwrap().to_str().unwrap());
let body_html = crate::app::markdown::render(&body)
.wrap_err_with(|| format!("can't parse markdown for {:?}", fname))?;
let body = body.to_string();
let date: DateTime<FixedOffset> =
DateTime::<Utc>::from_utc(NaiveDateTime::new(date, NaiveTime::from_hms(0, 0, 0)), Utc)
.with_timezone(&Utc)
.into();
for path in glob(&format!("{}/*.markdown", dir))?.filter_map(Result::ok) { let mentions: Vec<mi::WebMention> = match std::env::var("MI_TOKEN") {
log::debug!("loading {:?}", path); Ok(token) => mi::Client::new(token.to_string(), crate::APPLICATION_NAME.to_string())?
let body =
fs::read_to_string(path.clone()).wrap_err_with(|| format!("can't read {:?}", path))?;
let (fm, content_offset) = frontmatter::Data::parse(body.clone().as_str())
.wrap_err_with(|| format!("can't parse frontmatter of {:?}", path))?;
let markup = &body[content_offset..];
let date = NaiveDate::parse_from_str(&fm.clone().date, "%Y-%m-%d")
.map_err(|why| eyre!("error parsing date in {:?}: {}", path, why))?;
let link = format!("{}/{}", dir, path.file_stem().unwrap().to_str().unwrap());
let mentions: Vec<mi::WebMention> = match mi {
None => vec![],
Some(mi) => mi
.mentioners(format!("https://christine.website/{}", link)) .mentioners(format!("https://christine.website/{}", link))
.await .await
.map_err(|why| tracing::error!("error: can't load mentions for {}: {}", link, why)) .map_err(|why| tracing::error!("error: can't load mentions for {}: {}", link, why))
.unwrap_or(vec![]), .unwrap_or(vec![]),
Err(_) => vec![],
}; };
result.push(Post { Ok(Post {
front_matter: fm, front_matter,
link: link, link,
body: markup.to_string(), body,
body_html: crate::app::markdown::render(&markup) body_html,
.wrap_err_with(|| format!("can't parse markdown for {:?}", path))?, date,
date: { mentions,
DateTime::<Utc>::from_utc(
NaiveDateTime::new(date, NaiveTime::from_hms(0, 0, 0)),
Utc,
)
.with_timezone(&Utc)
.into()
},
mentions: mentions,
}) })
} }
pub async fn load(dir: &str) -> Result<Vec<Post>> {
let futs = glob(&format!("{}/*.markdown", dir))?
.filter_map(Result::ok)
.map(|fname| read_post(dir, fname));
let mut result: Vec<Post> = futures::future::join_all(futs)
.await
.into_iter()
.map(Result::unwrap)
.collect();
if result.len() == 0 { if result.len() == 0 {
Err(eyre!("no posts loaded")) Err(eyre!("no posts loaded"))
} else { } else {