From 272b3a0bb1afe17ff32379d3a846a26a510daff6 Mon Sep 17 00:00:00 2001 From: Christine Dodrill Date: Thu, 16 Sep 2021 19:46:57 -0400 Subject: [PATCH] add dirhash crate to make h1 hashes Signed-off-by: Christine Dodrill --- Cargo.lock | 39 +++++++++++++++++++++ cmd/mincatcher/src/server/mod.rs | 34 ++++++++++++------ crates/dirhash/Cargo.toml | 12 +++++++ crates/dirhash/go_cmp/main.go | 17 +++++++++ crates/dirhash/src/lib.rs | 60 ++++++++++++++++++++++++++++++++ crates/dirhash/testdata/foo | 1 + go.mod | 5 +++ go.sum | 13 +++++++ shell.nix | 1 + 9 files changed, 172 insertions(+), 10 deletions(-) create mode 100644 crates/dirhash/Cargo.toml create mode 100644 crates/dirhash/go_cmp/main.go create mode 100644 crates/dirhash/src/lib.rs create mode 100644 crates/dirhash/testdata/foo create mode 100644 go.mod create mode 100644 go.sum diff --git a/Cargo.lock b/Cargo.lock index 965dc7c..e966f50 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -288,6 +288,16 @@ dependencies = [ "generic-array", ] +[[package]] +name = "dirhash" +version = "0.1.0" +dependencies = [ + "base64", + "hex", + "sha2", + "walkdir", +] + [[package]] name = "encoding_rs" version = "0.8.28" @@ -1240,6 +1250,15 @@ version = "1.0.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "71d301d4193d031abdd79ff7e3dd721168a9572ef3fe51a1517aba235bd8f86e" +[[package]] +name = "same-file" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502" +dependencies = [ + "winapi-util", +] + [[package]] name = "scopeguard" version = "1.1.0" @@ -1779,6 +1798,17 @@ version = "0.9.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5fecdca9a5291cc2b8dcf7dc02453fee791a280f3743cb0905f8822ae463b3fe" +[[package]] +name = "walkdir" +version = "2.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "808cf2735cd4b6866113f648b791c6adc5714537bc222d9347bb203386ffda56" +dependencies = [ + "same-file", + "winapi", + "winapi-util", +] + [[package]] name = "want" version = "0.3.0" @@ -1908,6 +1938,15 @@ version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" +[[package]] +name = "winapi-util" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "70ec6ce85bb158151cae5e5c87f95a8e97d2c0c4b001223f33a334e3ce5de178" +dependencies = [ + "winapi", +] + [[package]] name = "winapi-x86_64-pc-windows-gnu" version = "0.4.0" diff --git a/cmd/mincatcher/src/server/mod.rs b/cmd/mincatcher/src/server/mod.rs index 3d67afb..2ef29c9 100644 --- a/cmd/mincatcher/src/server/mod.rs +++ b/cmd/mincatcher/src/server/mod.rs @@ -9,7 +9,7 @@ use headers::{Header, HeaderName, HeaderValue}; use hyper::http::StatusCode; use logtail::PublicID; use prometheus::{Encoder, IntCounterVec, IntGauge}; -use std::net::SocketAddr; +use std::{io::Write, net::SocketAddr}; use tokio::sync::Semaphore; use tower_http::{ trace::{DefaultOnResponse, TraceLayer}, @@ -108,7 +108,8 @@ impl Header for OrigContentLength { #[instrument(skip(body))] async fn put_logs( TypedHeader(size): TypedHeader, - TypedHeader(orig_size): TypedHeader, + orig_size: Option>, + compression_algo: Option>, Path((collection, private_id)): Path<(String, logtail::PrivateID)>, body: axum::body::Body, ) -> StatusCode { @@ -116,17 +117,30 @@ async fn put_logs( error!("error acquiring buffer: {}", why); return StatusCode::INSUFFICIENT_STORAGE; } - if let Err(why) = BUF_SEM.try_acquire_many(orig_size.0) { - error!("error acquiring buffer: {}", why); - return StatusCode::INSUFFICIENT_STORAGE; + let mut decompressed_body: Vec = Vec::new(); + let actual_size = match orig_size.clone() { + Some(orig_size) => orig_size.0 .0 as usize, + None => size.0 as usize, + }; + decompressed_body.resize(actual_size, 0); + + if let Some(orig_size) = orig_size { + if let Err(why) = BUF_SEM.try_acquire_many(orig_size.0 .0) { + error!("error acquiring buffer: {}", why); + return StatusCode::INSUFFICIENT_STORAGE; + } + + let compressed_body = hyper::body::to_bytes(body).await.unwrap(); + zstd::block::decompress_to_buffer(&compressed_body, &mut decompressed_body).unwrap(); + } else { + let body = hyper::body::to_bytes(body).await.unwrap(); + decompressed_body.write(&body).unwrap(); } + BYTES_IN_USE.set(BUF_SEM.available_permits() as i64 - BYTE_MAX as i64); - let mut decompressed_body: Vec = Vec::new(); - decompressed_body.resize(orig_size.0 as usize, 0); - - let compressed_body = hyper::body::to_bytes(body).await.unwrap(); - zstd::block::decompress_to_buffer(&compressed_body, &mut decompressed_body).unwrap(); + #[cfg(debug_assertions)] + std::io::stdout().lock().write(&decompressed_body).unwrap(); let vals: Vec = serde_json::from_slice(&decompressed_body).unwrap(); diff --git a/crates/dirhash/Cargo.toml b/crates/dirhash/Cargo.toml new file mode 100644 index 0000000..833abf8 --- /dev/null +++ b/crates/dirhash/Cargo.toml @@ -0,0 +1,12 @@ +[package] +name = "dirhash" +version = "0.1.0" +edition = "2018" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +base64 = "0.13" +hex = "0.4" +sha2 = "0.9" +walkdir = "2" diff --git a/crates/dirhash/go_cmp/main.go b/crates/dirhash/go_cmp/main.go new file mode 100644 index 0000000..25015fb --- /dev/null +++ b/crates/dirhash/go_cmp/main.go @@ -0,0 +1,17 @@ +package main + +import ( + "fmt" + "log" + + "golang.org/x/mod/sumdb/dirhash" +) + +func main() { + hash, err := dirhash.HashDir("./testdata", "", dirhash.Hash1) + if err != nil { + log.Fatal(err) + } + + fmt.Print(hash) +} diff --git a/crates/dirhash/src/lib.rs b/crates/dirhash/src/lib.rs new file mode 100644 index 0000000..bd1813e --- /dev/null +++ b/crates/dirhash/src/lib.rs @@ -0,0 +1,60 @@ +use sha2::{Digest, Sha256}; +use std::{ + fs::File, + io::{self, Read, Write}, + path::Path, +}; +use walkdir::WalkDir; + +pub fn hash_reader(name: &str, reader: &mut dyn Read) -> io::Result { + let mut hasher = Sha256::new(); + io::copy(reader, &mut hasher)?; + Ok(format!("{} {}\n", hex::encode(hasher.finalize()), name)) +} + +fn hash_file(fname: &Path, name: &Path) -> io::Result { + let mut fin = File::open(name)?; + hash_reader(fname.as_os_str().to_str().unwrap(), &mut fin) +} + +pub fn hash_dir(name: &str) -> io::Result { + let mut result = String::new(); + + for entry in WalkDir::new(name) + .into_iter() + .filter(Result::is_ok) + .map(Result::unwrap) + .filter(|dir_entry| !dir_entry.file_type().is_dir()) + .map(|de| de.into_path()) + .map(|de| hash_file(&de.strip_prefix(name).unwrap(), &de)) + { + result.push_str(&entry?); + } + + let mut hasher = Sha256::new(); + hasher.write_all(&result.as_bytes())?; + Ok(format!("h1:{}", base64::encode(hasher.finalize()))) +} + +#[cfg(test)] +mod tests { + use std::process::Command; + + #[test] + fn hash_dir() { + let hash = super::hash_dir("./testdata").unwrap(); + assert_eq!("h1:nJxVO77692drULxxVJvOrMJE8fl8vG1NLjoDDv9e7+E=", &hash); + } + + #[test] + fn test_compare_go() { + let rust_hash = super::hash_dir("./testdata").unwrap(); + let go_output = Command::new("go") + .arg("run") + .arg("./go_cmp") + .output() + .expect("go to run"); + let go_hash: String = String::from_utf8(go_output.stdout).expect("go output to be utf-8"); + assert_eq!(go_hash, rust_hash); + } +} diff --git a/crates/dirhash/testdata/foo b/crates/dirhash/testdata/foo new file mode 100644 index 0000000..82fa9da --- /dev/null +++ b/crates/dirhash/testdata/foo @@ -0,0 +1 @@ +this is some data \ No newline at end of file diff --git a/go.mod b/go.mod new file mode 100644 index 0000000..60a4797 --- /dev/null +++ b/go.mod @@ -0,0 +1,5 @@ +module tulpa.dev/cadey/rebterlai + +go 1.16 + +require golang.org/x/mod v0.5.0 diff --git a/go.sum b/go.sum new file mode 100644 index 0000000..ba6ec3f --- /dev/null +++ b/go.sum @@ -0,0 +1,13 @@ +golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= +golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= +golang.org/x/mod v0.5.0 h1:UG21uOlmZabA4fW5i7ZX6bjw1xELEGg/ZLgZq9auk/Q= +golang.org/x/mod v0.5.0/go.mod h1:5OXOZSfqPIIbmVBIIKWRFfZjPR0E5r58TLhUjH0a2Ro= +golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= +golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= +golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= +golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= diff --git a/shell.nix b/shell.nix index 54bca02..fb0069f 100644 --- a/shell.nix +++ b/shell.nix @@ -3,6 +3,7 @@ pkgs.mkShell { buildInputs = with pkgs; [ rustc cargo rust-analyzer rustfmt + go goimports gopls # keep this line if you use bash bashInteractive