add dirhash crate to make h1 hashes

Signed-off-by: Christine Dodrill <me@christine.website>
This commit is contained in:
Cadey Ratio 2021-09-16 19:46:57 -04:00
parent 993053098b
commit 272b3a0bb1
9 changed files with 172 additions and 10 deletions

39
Cargo.lock generated
View File

@ -288,6 +288,16 @@ dependencies = [
"generic-array",
]
[[package]]
name = "dirhash"
version = "0.1.0"
dependencies = [
"base64",
"hex",
"sha2",
"walkdir",
]
[[package]]
name = "encoding_rs"
version = "0.8.28"
@ -1240,6 +1250,15 @@ version = "1.0.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "71d301d4193d031abdd79ff7e3dd721168a9572ef3fe51a1517aba235bd8f86e"
[[package]]
name = "same-file"
version = "1.0.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502"
dependencies = [
"winapi-util",
]
[[package]]
name = "scopeguard"
version = "1.1.0"
@ -1779,6 +1798,17 @@ version = "0.9.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5fecdca9a5291cc2b8dcf7dc02453fee791a280f3743cb0905f8822ae463b3fe"
[[package]]
name = "walkdir"
version = "2.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "808cf2735cd4b6866113f648b791c6adc5714537bc222d9347bb203386ffda56"
dependencies = [
"same-file",
"winapi",
"winapi-util",
]
[[package]]
name = "want"
version = "0.3.0"
@ -1908,6 +1938,15 @@ version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
[[package]]
name = "winapi-util"
version = "0.1.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "70ec6ce85bb158151cae5e5c87f95a8e97d2c0c4b001223f33a334e3ce5de178"
dependencies = [
"winapi",
]
[[package]]
name = "winapi-x86_64-pc-windows-gnu"
version = "0.4.0"

View File

@ -9,7 +9,7 @@ use headers::{Header, HeaderName, HeaderValue};
use hyper::http::StatusCode;
use logtail::PublicID;
use prometheus::{Encoder, IntCounterVec, IntGauge};
use std::net::SocketAddr;
use std::{io::Write, net::SocketAddr};
use tokio::sync::Semaphore;
use tower_http::{
trace::{DefaultOnResponse, TraceLayer},
@ -108,7 +108,8 @@ impl Header for OrigContentLength {
#[instrument(skip(body))]
async fn put_logs(
TypedHeader(size): TypedHeader<headers::ContentLength>,
TypedHeader(orig_size): TypedHeader<OrigContentLength>,
orig_size: Option<TypedHeader<OrigContentLength>>,
compression_algo: Option<TypedHeader<headers::ContentEncoding>>,
Path((collection, private_id)): Path<(String, logtail::PrivateID)>,
body: axum::body::Body,
) -> StatusCode {
@ -116,17 +117,30 @@ async fn put_logs(
error!("error acquiring buffer: {}", why);
return StatusCode::INSUFFICIENT_STORAGE;
}
if let Err(why) = BUF_SEM.try_acquire_many(orig_size.0) {
error!("error acquiring buffer: {}", why);
return StatusCode::INSUFFICIENT_STORAGE;
let mut decompressed_body: Vec<u8> = Vec::new();
let actual_size = match orig_size.clone() {
Some(orig_size) => orig_size.0 .0 as usize,
None => size.0 as usize,
};
decompressed_body.resize(actual_size, 0);
if let Some(orig_size) = orig_size {
if let Err(why) = BUF_SEM.try_acquire_many(orig_size.0 .0) {
error!("error acquiring buffer: {}", why);
return StatusCode::INSUFFICIENT_STORAGE;
}
let compressed_body = hyper::body::to_bytes(body).await.unwrap();
zstd::block::decompress_to_buffer(&compressed_body, &mut decompressed_body).unwrap();
} else {
let body = hyper::body::to_bytes(body).await.unwrap();
decompressed_body.write(&body).unwrap();
}
BYTES_IN_USE.set(BUF_SEM.available_permits() as i64 - BYTE_MAX as i64);
let mut decompressed_body: Vec<u8> = Vec::new();
decompressed_body.resize(orig_size.0 as usize, 0);
let compressed_body = hyper::body::to_bytes(body).await.unwrap();
zstd::block::decompress_to_buffer(&compressed_body, &mut decompressed_body).unwrap();
#[cfg(debug_assertions)]
std::io::stdout().lock().write(&decompressed_body).unwrap();
let vals: Vec<serde_json::Value> = serde_json::from_slice(&decompressed_body).unwrap();

12
crates/dirhash/Cargo.toml Normal file
View File

@ -0,0 +1,12 @@
[package]
name = "dirhash"
version = "0.1.0"
edition = "2018"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
base64 = "0.13"
hex = "0.4"
sha2 = "0.9"
walkdir = "2"

View File

@ -0,0 +1,17 @@
package main
import (
"fmt"
"log"
"golang.org/x/mod/sumdb/dirhash"
)
func main() {
hash, err := dirhash.HashDir("./testdata", "", dirhash.Hash1)
if err != nil {
log.Fatal(err)
}
fmt.Print(hash)
}

60
crates/dirhash/src/lib.rs Normal file
View File

@ -0,0 +1,60 @@
use sha2::{Digest, Sha256};
use std::{
fs::File,
io::{self, Read, Write},
path::Path,
};
use walkdir::WalkDir;
pub fn hash_reader(name: &str, reader: &mut dyn Read) -> io::Result<String> {
let mut hasher = Sha256::new();
io::copy(reader, &mut hasher)?;
Ok(format!("{} {}\n", hex::encode(hasher.finalize()), name))
}
fn hash_file(fname: &Path, name: &Path) -> io::Result<String> {
let mut fin = File::open(name)?;
hash_reader(fname.as_os_str().to_str().unwrap(), &mut fin)
}
pub fn hash_dir(name: &str) -> io::Result<String> {
let mut result = String::new();
for entry in WalkDir::new(name)
.into_iter()
.filter(Result::is_ok)
.map(Result::unwrap)
.filter(|dir_entry| !dir_entry.file_type().is_dir())
.map(|de| de.into_path())
.map(|de| hash_file(&de.strip_prefix(name).unwrap(), &de))
{
result.push_str(&entry?);
}
let mut hasher = Sha256::new();
hasher.write_all(&result.as_bytes())?;
Ok(format!("h1:{}", base64::encode(hasher.finalize())))
}
#[cfg(test)]
mod tests {
use std::process::Command;
#[test]
fn hash_dir() {
let hash = super::hash_dir("./testdata").unwrap();
assert_eq!("h1:nJxVO77692drULxxVJvOrMJE8fl8vG1NLjoDDv9e7+E=", &hash);
}
#[test]
fn test_compare_go() {
let rust_hash = super::hash_dir("./testdata").unwrap();
let go_output = Command::new("go")
.arg("run")
.arg("./go_cmp")
.output()
.expect("go to run");
let go_hash: String = String::from_utf8(go_output.stdout).expect("go output to be utf-8");
assert_eq!(go_hash, rust_hash);
}
}

1
crates/dirhash/testdata/foo vendored Normal file
View File

@ -0,0 +1 @@
this is some data

5
go.mod Normal file
View File

@ -0,0 +1,5 @@
module tulpa.dev/cadey/rebterlai
go 1.16
require golang.org/x/mod v0.5.0

13
go.sum Normal file
View File

@ -0,0 +1,13 @@
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
golang.org/x/mod v0.5.0 h1:UG21uOlmZabA4fW5i7ZX6bjw1xELEGg/ZLgZq9auk/Q=
golang.org/x/mod v0.5.0/go.mod h1:5OXOZSfqPIIbmVBIIKWRFfZjPR0E5r58TLhUjH0a2Ro=
golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=

View File

@ -3,6 +3,7 @@
pkgs.mkShell {
buildInputs = with pkgs; [
rustc cargo rust-analyzer rustfmt
go goimports gopls
# keep this line if you use bash
bashInteractive