From 94b797de44eebf4309b5ccac7a977fa07f5c7b6c Mon Sep 17 00:00:00 2001 From: Sergey Pepyakin Date: Tue, 19 Jun 2018 16:29:33 +0300 Subject: [PATCH 1/2] Add some more benches (#97) * Add rev_complement test # Conflicts: # benches/src/lib.rs # benches/wasm-kernel/src/lib.rs * Add redux_regex test. # Conflicts: # benches/wasm-kernel/Cargo.toml * Fmt and return an original header * Use ManuallyDrop * Really initialize lazy static. --- benches/Cargo.toml | 1 + benches/src/lib.rs | 235 ++++++++++++++++++++++ benches/src/revcomp-input.txt | 171 ++++++++++++++++ benches/src/revcomp-output.txt | 171 ++++++++++++++++ benches/wasm-kernel/Cargo.toml | 3 +- benches/wasm-kernel/src/lib.rs | 101 ++++++++-- benches/wasm-kernel/src/regex_redux.rs | 17 ++ benches/wasm-kernel/src/rev_complement.rs | 164 +++++++++++++++ 8 files changed, 848 insertions(+), 15 deletions(-) create mode 100644 benches/src/revcomp-input.txt create mode 100644 benches/src/revcomp-output.txt create mode 100644 benches/wasm-kernel/src/regex_redux.rs create mode 100644 benches/wasm-kernel/src/rev_complement.rs diff --git a/benches/Cargo.toml b/benches/Cargo.toml index 0df6e79..3626446 100644 --- a/benches/Cargo.toml +++ b/benches/Cargo.toml @@ -6,3 +6,4 @@ authors = ["Sergey Pepyakin "] [dependencies] wasmi = { path = ".." } assert_matches = "1.2" +wabt = "0.3" diff --git a/benches/src/lib.rs b/benches/src/lib.rs index db01ccd..8eead71 100644 --- a/benches/src/lib.rs +++ b/benches/src/lib.rs @@ -4,6 +4,7 @@ extern crate test; extern crate wasmi; #[macro_use] extern crate assert_matches; +extern crate wabt; use std::error; use std::fs::File; @@ -20,6 +21,9 @@ fn load_from_file(filename: &str) -> Result> { Ok(Module::from_buffer(buf)?) } +const REVCOMP_INPUT: &'static [u8] = include_bytes!("./revcomp-input.txt"); +const REVCOMP_OUTPUT: &'static [u8] = include_bytes!("./revcomp-output.txt"); + #[bench] fn bench_tiny_keccak(b: &mut Bencher) { let wasm_kernel = load_from_file( @@ -41,3 +45,234 @@ fn bench_tiny_keccak(b: &mut Bencher) { .unwrap(); }); } + +#[bench] +fn bench_rev_comp(b: &mut Bencher) { + let wasm_kernel = load_from_file( + "./wasm-kernel/target/wasm32-unknown-unknown/release/wasm_kernel.wasm", + ).expect("failed to load wasm_kernel. Is `build.rs` broken?"); + + let instance = ModuleInstance::new(&wasm_kernel, &ImportsBuilder::default()) + .expect("failed to instantiate wasm module") + .assert_no_start(); + + // Allocate buffers for the input and output. + let test_data_ptr: RuntimeValue = { + let input_size = RuntimeValue::I32(REVCOMP_INPUT.len() as i32); + assert_matches!( + instance.invoke_export("prepare_rev_complement", &[input_size], &mut NopExternals), + Ok(Some(v @ RuntimeValue::I32(_))) => v, + "", + ) + }; + + // Get the pointer to the input buffer. + let input_data_mem_offset = assert_matches!( + instance.invoke_export("rev_complement_input_ptr", &[test_data_ptr], &mut NopExternals), + Ok(Some(RuntimeValue::I32(v))) => v as u32, + "", + ); + + // Copy test data inside the wasm memory. + let memory = instance.export_by_name("memory") + .expect("Expected export with a name 'memory'") + .as_memory() + .expect("'memory' should be a memory instance") + .clone(); + memory + .set(input_data_mem_offset, REVCOMP_INPUT) + .expect("can't load test data into a wasm memory"); + + b.iter(|| { + instance + .invoke_export("bench_rev_complement", &[test_data_ptr], &mut NopExternals) + .unwrap(); + }); + + // Verify the result. + let output_data_mem_offset = assert_matches!( + instance.invoke_export("rev_complement_output_ptr", &[test_data_ptr], &mut NopExternals), + Ok(Some(RuntimeValue::I32(v))) => v as u32, + "", + ); + let result = memory + .get(output_data_mem_offset, REVCOMP_OUTPUT.len()) + .expect("can't get result data from a wasm memory"); + assert_eq!(&*result, REVCOMP_OUTPUT); +} + +#[bench] +fn bench_regex_redux(b: &mut Bencher) { + let wasm_kernel = load_from_file( + "./wasm-kernel/target/wasm32-unknown-unknown/release/wasm_kernel.wasm", + ).expect("failed to load wasm_kernel. Is `build.rs` broken?"); + + let instance = ModuleInstance::new(&wasm_kernel, &ImportsBuilder::default()) + .expect("failed to instantiate wasm module") + .assert_no_start(); + + // Allocate buffers for the input and output. + let test_data_ptr: RuntimeValue = { + let input_size = RuntimeValue::I32(REVCOMP_INPUT.len() as i32); + assert_matches!( + instance.invoke_export("prepare_regex_redux", &[input_size], &mut NopExternals), + Ok(Some(v @ RuntimeValue::I32(_))) => v, + "", + ) + }; + + // Get the pointer to the input buffer. + let input_data_mem_offset = assert_matches!( + instance.invoke_export("regex_redux_input_ptr", &[test_data_ptr], &mut NopExternals), + Ok(Some(RuntimeValue::I32(v))) => v as u32, + "", + ); + + // Copy test data inside the wasm memory. + let memory = instance.export_by_name("memory") + .expect("Expected export with a name 'memory'") + .as_memory() + .expect("'memory' should be a memory instance") + .clone(); + memory + .set(input_data_mem_offset, REVCOMP_INPUT) + .expect("can't load test data into a wasm memory"); + + b.iter(|| { + instance + .invoke_export("bench_regex_redux", &[test_data_ptr], &mut NopExternals) + .unwrap(); + }); +} + +#[bench] +fn fac_recursive(b: &mut Bencher) { + let wasm = wabt::wat2wasm( +r#" + ;; Recursive factorial +(func (export "fac-rec") (param i64) (result i64) + (if (result i64) (i64.eq (get_local 0) (i64.const 0)) + (then (i64.const 1)) + (else + (i64.mul (get_local 0) (call 0 (i64.sub (get_local 0) (i64.const 1)))) + ) + ) +) +"# + ).unwrap(); + + let module = Module::from_buffer(&wasm).unwrap(); + + let instance = ModuleInstance::new(&module, &ImportsBuilder::default()) + .expect("failed to instantiate wasm module") + .assert_no_start(); + + b.iter(|| { + let value = instance + .invoke_export("fac-rec", &[RuntimeValue::I64(25)], &mut NopExternals); + assert_matches!(value, Ok(Some(RuntimeValue::I64(7034535277573963776)))); + }); +} + +#[bench] +fn fac_opt(b: &mut Bencher) { + let wasm = wabt::wat2wasm( +r#" +;; Optimized factorial. +(func (export "fac-opt") (param i64) (result i64) + (local i64) + (set_local 1 (i64.const 1)) + (block + (br_if 0 (i64.lt_s (get_local 0) (i64.const 2))) + (loop + (set_local 1 (i64.mul (get_local 1) (get_local 0))) + (set_local 0 (i64.add (get_local 0) (i64.const -1))) + (br_if 0 (i64.gt_s (get_local 0) (i64.const 1))) + ) + ) + (get_local 1) +) +"# + ).unwrap(); + + let module = Module::from_buffer(&wasm).unwrap(); + + let instance = ModuleInstance::new(&module, &ImportsBuilder::default()) + .expect("failed to instantiate wasm module") + .assert_no_start(); + + b.iter(|| { + let value = instance + .invoke_export("fac-opt", &[RuntimeValue::I64(25)], &mut NopExternals); + assert_matches!(value, Ok(Some(RuntimeValue::I64(7034535277573963776)))); + }); +} + +// This is used for testing overhead of a function call +// is not too large. +#[bench] +fn recursive_ok(b: &mut Bencher) { + let wasm = wabt::wat2wasm( + r#" +(module + (func $call (export "call") (param i32) (result i32) + block (result i32) + get_local 0 + get_local 0 + i32.eqz + br_if 0 + + i32.const 1 + i32.sub + call $call + end + ) +) + "# + ).unwrap(); + let module = Module::from_buffer(&wasm).unwrap(); + + let instance = ModuleInstance::new(&module, &ImportsBuilder::default()) + .expect("failed to instantiate wasm module") + .assert_no_start(); + + b.iter(|| { + let value = instance + .invoke_export("call", &[RuntimeValue::I32(8000)], &mut NopExternals); + assert_matches!(value, Ok(Some(RuntimeValue::I32(0)))); + }); +} + +#[bench] +fn recursive_trap(b: &mut Bencher) { + let wasm = wabt::wat2wasm( + r#" +(module + (func $call (export "call") (param i32) (result i32) + block (result i32) + get_local 0 + get_local 0 + i32.eqz + br_if 0 + + i32.const 1 + i32.sub + call $call + end + unreachable + ) +) + "# + ).unwrap(); + let module = Module::from_buffer(&wasm).unwrap(); + + let instance = ModuleInstance::new(&module, &ImportsBuilder::default()) + .expect("failed to instantiate wasm module") + .assert_no_start(); + + b.iter(|| { + let value = instance + .invoke_export("call", &[RuntimeValue::I32(1000)], &mut NopExternals); + assert_matches!(value, Err(_)); + }); +} diff --git a/benches/src/revcomp-input.txt b/benches/src/revcomp-input.txt new file mode 100644 index 0000000..f1caba0 --- /dev/null +++ b/benches/src/revcomp-input.txt @@ -0,0 +1,171 @@ +>ONE Homo sapiens alu +GGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGCGGA +TCACCTGAGGTCAGGAGTTCGAGACCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACT +AAAAATACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCAGCTACTCGGGAG +GCTGAGGCAGGAGAATCGCTTGAACCCGGGAGGCGGAGGTTGCAGTGAGCCGAGATCGCG +CCACTGCACTCCAGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAAGGCCGGGCGCGGT +GGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGCGGATCACCTGAGGTCA +GGAGTTCGAGACCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAATACAAAAA +TTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCAGCTACTCGGGAGGCTGAGGCAGGAG +AATCGCTTGAACCCGGGAGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCCA +GCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAAGGCCGGGCGCGGTGGCTCACGCCTGT +AATCCCAGCACTTTGGGAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGACC +AGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAATACAAAAATTAGCCGGGCGTG +GTGGCGCGCGCCTGTAATCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACC +CGGGAGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCCAGCCTGGGCGACAG +AGCGAGACTCCGTCTCAAAAAGGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTT +TGGGAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGACCAGCCTGGCCAACA +TGGTGAAACCCCGTCTCTACTAAAAATACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCT +GTAATCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGGAGGCGGAGG +TTGCAGTGAGCCGAGATCGCGCCACTGCACTCCAGCCTGGGCGACAGAGCGAGACTCCGT +CTCAAAAAGGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGG +CGGGCGGATCACCTGAGGTCAGGAGTTCGAGACCAGCCTGGCCAACATGGTGAAACCCCG +TCTCTACTAAAAATACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCAGCTA +CTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGGAGGCGGAGGTTGCAGTGAGCCG +AGATCGCGCCACTGCACTCCAGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAAGGCCG +GGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGCGGATCACC +TGAGGTCAGGAGTTCGAGACCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAA +TACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCAGCTACTCGGGAGGCTGA +GGCAGGAGAATCGCTTGAACCCGGGAGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACT +GCACTCCAGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAAGGCCGGGCGCGGTGGCTC +ACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGT +TCGAGACCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAATACAAAAATTAGC +CGGGCGTGGTGGCGCGCGCCTGTAATCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATCG +CTTGAACCCGGGAGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCCAGCCTG +GGCGACAGAGCGAGACTCCG +>TWO IUB ambiguity codes +cttBtatcatatgctaKggNcataaaSatgtaaaDcDRtBggDtctttataattcBgtcg +tactDtDagcctatttSVHtHttKtgtHMaSattgWaHKHttttagacatWatgtRgaaa +NtactMcSMtYtcMgRtacttctWBacgaaatatagScDtttgaagacacatagtVgYgt +cattHWtMMWcStgttaggKtSgaYaaccWStcgBttgcgaMttBYatcWtgacaYcaga +gtaBDtRacttttcWatMttDBcatWtatcttactaBgaYtcttgttttttttYaaScYa +HgtgttNtSatcMtcVaaaStccRcctDaataataStcYtRDSaMtDttgttSagtRRca +tttHatSttMtWgtcgtatSSagactYaaattcaMtWatttaSgYttaRgKaRtccactt +tattRggaMcDaWaWagttttgacatgttctacaaaRaatataataaMttcgDacgaSSt +acaStYRctVaNMtMgtaggcKatcttttattaaaaagVWaHKYagtttttatttaacct +tacgtVtcVaattVMBcttaMtttaStgacttagattWWacVtgWYagWVRctDattBYt +gtttaagaagattattgacVatMaacattVctgtBSgaVtgWWggaKHaatKWcBScSWa +accRVacacaaactaccScattRatatKVtactatatttHttaagtttSKtRtacaaagt +RDttcaaaaWgcacatWaDgtDKacgaacaattacaRNWaatHtttStgttattaaMtgt +tgDcgtMgcatBtgcttcgcgaDWgagctgcgaggggVtaaScNatttacttaatgacag +cccccacatYScaMgtaggtYaNgttctgaMaacNaMRaacaaacaKctacatagYWctg +ttWaaataaaataRattagHacacaagcgKatacBttRttaagtatttccgatctHSaat +actcNttMaagtattMtgRtgaMgcataatHcMtaBSaRattagttgatHtMttaaKagg +YtaaBataSaVatactWtataVWgKgttaaaacagtgcgRatatacatVtHRtVYataSa +KtWaStVcNKHKttactatccctcatgWHatWaRcttactaggatctataDtDHBttata +aaaHgtacVtagaYttYaKcctattcttcttaataNDaaggaaaDYgcggctaaWSctBa +aNtgctggMBaKctaMVKagBaactaWaDaMaccYVtNtaHtVWtKgRtcaaNtYaNacg +gtttNattgVtttctgtBaWgtaattcaagtcaVWtactNggattctttaYtaaagccgc +tcttagHVggaYtgtNcDaVagctctctKgacgtatagYcctRYHDtgBattDaaDgccK +tcHaaStttMcctagtattgcRgWBaVatHaaaataYtgtttagMDMRtaataaggatMt +ttctWgtNtgtgaaaaMaatatRtttMtDgHHtgtcattttcWattRSHcVagaagtacg +ggtaKVattKYagactNaatgtttgKMMgYNtcccgSKttctaStatatNVataYHgtNa +BKRgNacaactgatttcctttaNcgatttctctataScaHtataRagtcRVttacDSDtt +aRtSatacHgtSKacYagttMHtWataggatgactNtatSaNctataVtttRNKtgRacc +tttYtatgttactttttcctttaaacatacaHactMacacggtWataMtBVacRaSaatc +cgtaBVttccagccBcttaRKtgtgcctttttRtgtcagcRttKtaaacKtaaatctcac +aattgcaNtSBaaccgggttattaaBcKatDagttactcttcattVtttHaaggctKKga +tacatcBggScagtVcacattttgaHaDSgHatRMaHWggtatatRgccDttcgtatcga +aacaHtaagttaRatgaVacttagattVKtaaYttaaatcaNatccRttRRaMScNaaaD +gttVHWgtcHaaHgacVaWtgttScactaagSgttatcttagggDtaccagWattWtRtg +ttHWHacgattBtgVcaYatcggttgagKcWtKKcaVtgaYgWctgYggVctgtHgaNcV +taBtWaaYatcDRaaRtSctgaHaYRttagatMatgcatttNattaDttaattgttctaa +ccctcccctagaWBtttHtBccttagaVaatMcBHagaVcWcagBVttcBtaYMccagat +gaaaaHctctaacgttagNWRtcggattNatcRaNHttcagtKttttgWatWttcSaNgg +gaWtactKKMaacatKatacNattgctWtatctaVgagctatgtRaHtYcWcttagccaa +tYttWttaWSSttaHcaaaaagVacVgtaVaRMgattaVcDactttcHHggHRtgNcctt +tYatcatKgctcctctatVcaaaaKaaaagtatatctgMtWtaaaacaStttMtcgactt +taSatcgDataaactaaacaagtaaVctaggaSccaatMVtaaSKNVattttgHccatca +cBVctgcaVatVttRtactgtVcaattHgtaaattaaattttYtatattaaRSgYtgBag +aHSBDgtagcacRHtYcBgtcacttacactaYcgctWtattgSHtSatcataaatataHt +cgtYaaMNgBaatttaRgaMaatatttBtttaaaHHKaatctgatWatYaacttMctctt +ttVctagctDaaagtaVaKaKRtaacBgtatccaaccactHHaagaagaaggaNaaatBW +attccgStaMSaMatBttgcatgRSacgttVVtaaDMtcSgVatWcaSatcttttVatag +ttactttacgatcaccNtaDVgSRcgVcgtgaacgaNtaNatatagtHtMgtHcMtagaa +attBgtataRaaaacaYKgtRccYtatgaagtaataKgtaaMttgaaRVatgcagaKStc +tHNaaatctBBtcttaYaBWHgtVtgacagcaRcataWctcaBcYacYgatDgtDHccta +>THREE Homo sapiens frequency +aacacttcaccaggtatcgtgaaggctcaagattacccagagaacctttgcaatataaga +atatgtatgcagcattaccctaagtaattatattctttttctgactcaaagtgacaagcc +ctagtgtatattaaatcggtatatttgggaaattcctcaaactatcctaatcaggtagcc +atgaaagtgatcaaaaaagttcgtacttataccatacatgaattctggccaagtaaaaaa +tagattgcgcaaaattcgtaccttaagtctctcgccaagatattaggatcctattactca +tatcgtgtttttctttattgccgccatccccggagtatctcacccatccttctcttaaag +gcctaatattacctatgcaaataaacatatattgttgaaaattgagaacctgatcgtgat +tcttatgtgtaccatatgtatagtaatcacgcgactatatagtgctttagtatcgcccgt +gggtgagtgaatattctgggctagcgtgagatagtttcttgtcctaatatttttcagatc +gaatagcttctatttttgtgtttattgacatatgtcgaaactccttactcagtgaaagtc +atgaccagatccacgaacaatcttcggaatcagtctcgttttacggcggaatcttgagtc +taacttatatcccgtcgcttactttctaacaccccttatgtatttttaaaattacgttta +ttcgaacgtacttggcggaagcgttattttttgaagtaagttacattgggcagactcttg +acattttcgatacgactttctttcatccatcacaggactcgttcgtattgatatcagaag +ctcgtgatgattagttgtcttctttaccaatactttgaggcctattctgcgaaatttttg +ttgccctgcgaacttcacataccaaggaacacctcgcaacatgccttcatatccatcgtt +cattgtaattcttacacaatgaatcctaagtaattacatccctgcgtaaaagatggtagg +ggcactgaggatatattaccaagcatttagttatgagtaatcagcaatgtttcttgtatt +aagttctctaaaatagttacatcgtaatgttatctcgggttccgcgaataaacgagatag +attcattatatatggccctaagcaaaaacctcctcgtattctgttggtaattagaatcac +acaatacgggttgagatattaattatttgtagtacgaagagatataaaaagatgaacaat +tactcaagtcaagatgtatacgggatttataataaaaatcgggtagagatctgctttgca +attcagacgtgccactaaatcgtaatatgtcgcgttacatcagaaagggtaactattatt +aattaataaagggcttaatcactacatattagatcttatccgatagtcttatctattcgt +tgtatttttaagcggttctaattcagtcattatatcagtgctccgagttctttattattg +ttttaaggatgacaaaatgcctcttgttataacgctgggagaagcagactaagagtcgga +gcagttggtagaatgaggctgcaaaagacggtctcgacgaatggacagactttactaaac +caatgaaagacagaagtagagcaaagtctgaagtggtatcagcttaattatgacaaccct +taatacttccctttcgccgaatactggcgtggaaaggttttaaaagtcgaagtagttaga +ggcatctctcgctcataaataggtagactactcgcaatccaatgtgactatgtaatactg +ggaacatcagtccgcgatgcagcgtgtttatcaaccgtccccactcgcctggggagacat +gagaccacccccgtggggattattagtccgcagtaatcgactcttgacaatccttttcga +ttatgtcatagcaatttacgacagttcagcgaagtgactactcggcgaaatggtattact +aaagcattcgaacccacatgaatgtgattcttggcaatttctaatccactaaagcttttc +cgttgaatctggttgtagatatttatataagttcactaattaagatcacggtagtatatt +gatagtgatgtctttgcaagaggttggccgaggaatttacggattctctattgatacaat +ttgtctggcttataactcttaaggctgaaccaggcgtttttagacgacttgatcagctgt +tagaatggtttggactccctctttcatgtcagtaacatttcagccgttattgttacgata +tgcttgaacaatattgatctaccacacacccatagtatattttataggtcatgctgttac +ctacgagcatggtattccacttcccattcaatgagtattcaacatcactagcctcagaga +tgatgacccacctctaataacgtcacgttgcggccatgtgaaacctgaacttgagtagac +gatatcaagcgctttaaattgcatataacatttgagggtaaagctaagcggatgctttat +ataatcaatactcaataataagatttgattgcattttagagttatgacacgacatagttc +actaacgagttactattcccagatctagactgaagtactgatcgagacgatccttacgtc +gatgatcgttagttatcgacttaggtcgggtctctagcggtattggtacttaaccggaca +ctatactaataacccatgatcaaagcataacagaatacagacgataatttcgccaacata +tatgtacagaccccaagcatgagaagctcattgaaagctatcattgaagtcccgctcaca +atgtgtcttttccagacggtttaactggttcccgggagtcctggagtttcgacttacata +aatggaaacaatgtattttgctaatttatctatagcgtcatttggaccaatacagaatat +tatgttgcctagtaatccactataacccgcaagtgctgatagaaaatttttagacgattt +ataaatgccccaagtatccctcccgtgaatcctccgttatactaattagtattcgttcat +acgtataccgcgcatatatgaacatttggcgataaggcgcgtgaattgttacgtgacaga +gatagcagtttcttgtgatatggttaacagacgtacatgaagggaaactttatatctata +gtgatgcttccgtagaaataccgccactggtctgccaatgatgaagtatgtagctttagg +tttgtactatgaggctttcgtttgtttgcagagtataacagttgcgagtgaaaaaccgac +gaatttatactaatacgctttcactattggctacaaaatagggaagagtttcaatcatga +gagggagtatatggatgctttgtagctaaaggtagaacgtatgtatatgctgccgttcat +tcttgaaagatacataagcgataagttacgacaattataagcaacatccctaccttcgta +acgatttcactgttactgcgcttgaaatacactatggggctattggcggagagaagcaga +tcgcgccgagcatatacgagacctataatgttgatgatagagaaggcgtctgaattgata +catcgaagtacactttctttcgtagtatctctcgtcctctttctatctccggacacaaga +attaagttatatatatagagtcttaccaatcatgttgaatcctgattctcagagttcttt +ggcgggccttgtgatgactgagaaacaatgcaatattgctccaaatttcctaagcaaatt +ctcggttatgttatgttatcagcaaagcgttacgttatgttatttaaatctggaatgacg +gagcgaagttcttatgtcggtgtgggaataattcttttgaagacagcactccttaaataa +tatcgctccgtgtttgtatttatcgaatgggtctgtaaccttgcacaagcaaatcggtgg +tgtatatatcggataacaattaatacgatgttcatagtgacagtatactgatcgagtcct +ctaaagtcaattacctcacttaacaatctcattgatgttgtgtcattcccggtatcgccc +gtagtatgtgctctgattgaccgagtgtgaaccaaggaacatctactaatgcctttgtta +ggtaagatctctctgaattccttcgtgccaacttaaaacattatcaaaatttcttctact +tggattaactacttttacgagcatggcaaattcccctgtggaagacggttcattattatc +ggaaaccttatagaaattgcgtgttgactgaaattagatttttattgtaagagttgcatc +tttgcgattcctctggtctagcttccaatgaacagtcctcccttctattcgacatcgggt +ccttcgtacatgtctttgcgatgtaataattaggttcggagtgtggccttaatgggtgca +actaggaatacaacgcaaatttgctgacatgatagcaaatcggtatgccggcaccaaaac +gtgctccttgcttagcttgtgaatgagactcagtagttaaataaatccatatctgcaatc +gattccacaggtattgtccactatctttgaactactctaagagatacaagcttagctgag +accgaggtgtatatgactacgctgatatctgtaaggtaccaatgcaggcaaagtatgcga +gaagctaataccggctgtttccagctttataagattaaaatttggctgtcctggcggcct +cagaattgttctatcgtaatcagttggttcattaattagctaagtacgaggtacaactta +tctgtcccagaacagctccacaagtttttttacagccgaaacccctgtgtgaatcttaat +atccaagcgcgttatctgattagagtttacaactcagtattttatcagtacgttttgttt +ccaacattacccggtatgacaaaatgacgccacgtgtcgaataatggtctgaccaatgta +ggaagtgaaaagataaatat diff --git a/benches/src/revcomp-output.txt b/benches/src/revcomp-output.txt new file mode 100644 index 0000000..14d792a --- /dev/null +++ b/benches/src/revcomp-output.txt @@ -0,0 +1,171 @@ +>ONE Homo sapiens alu +CGGAGTCTCGCTCTGTCGCCCAGGCTGGAGTGCAGTGGCGCGATCTCGGCTCACTGCAAC +CTCCGCCTCCCGGGTTCAAGCGATTCTCCTGCCTCAGCCTCCCGAGTAGCTGGGATTACA +GGCGCGCGCCACCACGCCCGGCTAATTTTTGTATTTTTAGTAGAGACGGGGTTTCACCAT +GTTGGCCAGGCTGGTCTCGAACTCCTGACCTCAGGTGATCCGCCCGCCTCGGCCTCCCAA +AGTGCTGGGATTACAGGCGTGAGCCACCGCGCCCGGCCTTTTTGAGACGGAGTCTCGCTC +TGTCGCCCAGGCTGGAGTGCAGTGGCGCGATCTCGGCTCACTGCAACCTCCGCCTCCCGG +GTTCAAGCGATTCTCCTGCCTCAGCCTCCCGAGTAGCTGGGATTACAGGCGCGCGCCACC +ACGCCCGGCTAATTTTTGTATTTTTAGTAGAGACGGGGTTTCACCATGTTGGCCAGGCTG +GTCTCGAACTCCTGACCTCAGGTGATCCGCCCGCCTCGGCCTCCCAAAGTGCTGGGATTA +CAGGCGTGAGCCACCGCGCCCGGCCTTTTTGAGACGGAGTCTCGCTCTGTCGCCCAGGCT +GGAGTGCAGTGGCGCGATCTCGGCTCACTGCAACCTCCGCCTCCCGGGTTCAAGCGATTC +TCCTGCCTCAGCCTCCCGAGTAGCTGGGATTACAGGCGCGCGCCACCACGCCCGGCTAAT +TTTTGTATTTTTAGTAGAGACGGGGTTTCACCATGTTGGCCAGGCTGGTCTCGAACTCCT +GACCTCAGGTGATCCGCCCGCCTCGGCCTCCCAAAGTGCTGGGATTACAGGCGTGAGCCA +CCGCGCCCGGCCTTTTTGAGACGGAGTCTCGCTCTGTCGCCCAGGCTGGAGTGCAGTGGC +GCGATCTCGGCTCACTGCAACCTCCGCCTCCCGGGTTCAAGCGATTCTCCTGCCTCAGCC +TCCCGAGTAGCTGGGATTACAGGCGCGCGCCACCACGCCCGGCTAATTTTTGTATTTTTA +GTAGAGACGGGGTTTCACCATGTTGGCCAGGCTGGTCTCGAACTCCTGACCTCAGGTGAT +CCGCCCGCCTCGGCCTCCCAAAGTGCTGGGATTACAGGCGTGAGCCACCGCGCCCGGCCT +TTTTGAGACGGAGTCTCGCTCTGTCGCCCAGGCTGGAGTGCAGTGGCGCGATCTCGGCTC +ACTGCAACCTCCGCCTCCCGGGTTCAAGCGATTCTCCTGCCTCAGCCTCCCGAGTAGCTG +GGATTACAGGCGCGCGCCACCACGCCCGGCTAATTTTTGTATTTTTAGTAGAGACGGGGT +TTCACCATGTTGGCCAGGCTGGTCTCGAACTCCTGACCTCAGGTGATCCGCCCGCCTCGG +CCTCCCAAAGTGCTGGGATTACAGGCGTGAGCCACCGCGCCCGGCCTTTTTGAGACGGAG +TCTCGCTCTGTCGCCCAGGCTGGAGTGCAGTGGCGCGATCTCGGCTCACTGCAACCTCCG +CCTCCCGGGTTCAAGCGATTCTCCTGCCTCAGCCTCCCGAGTAGCTGGGATTACAGGCGC +GCGCCACCACGCCCGGCTAATTTTTGTATTTTTAGTAGAGACGGGGTTTCACCATGTTGG +CCAGGCTGGTCTCGAACTCCTGACCTCAGGTGATCCGCCCGCCTCGGCCTCCCAAAGTGC +TGGGATTACAGGCGTGAGCCACCGCGCCCGGCCTTTTTGAGACGGAGTCTCGCTCTGTCG +CCCAGGCTGGAGTGCAGTGGCGCGATCTCGGCTCACTGCAACCTCCGCCTCCCGGGTTCA +AGCGATTCTCCTGCCTCAGCCTCCCGAGTAGCTGGGATTACAGGCGCGCGCCACCACGCC +CGGCTAATTTTTGTATTTTTAGTAGAGACGGGGTTTCACCATGTTGGCCAGGCTGGTCTC +GAACTCCTGACCTCAGGTGATCCGCCCGCCTCGGCCTCCCAAAGTGCTGGGATTACAGGC +GTGAGCCACCGCGCCCGGCC +>TWO IUB ambiguity codes +TAGGDHACHATCRGTRGVTGAGWTATGYTGCTGTCABACDWVTRTAAGAVVAGATTTNDA +GASMTCTGCATBYTTCAAKTTACMTATTACTTCATARGGYACMRTGTTTTYTATACVAAT +TTCTAKGDACKADACTATATNTANTCGTTCACGBCGYSCBHTANGGTGATCGTAAAGTAA +CTATBAAAAGATSTGWATBCSGAKHTTABBAACGTSYCATGCAAVATKTSKTASCGGAAT +WVATTTNTCCTTCTTCTTDDAGTGGTTGGATACVGTTAYMTMTBTACTTTHAGCTAGBAA +AAGAGKAAGTTRATWATCAGATTMDDTTTAAAVAAATATTKTCYTAAATTVCNKTTRACG +ADTATATTTATGATSADSCAATAWAGCGRTAGTGTAAGTGACVGRADYGTGCTACHVSDT +CTVCARCSYTTAATATARAAAATTTAATTTACDAATTGBACAGTAYAABATBTGCAGBVG +TGATGGDCAAAATBNMSTTABKATTGGSTCCTAGBTTACTTGTTTAGTTTATHCGATSTA +AAGTCGAKAAASTGTTTTAWAKCAGATATACTTTTMTTTTGBATAGAGGAGCMATGATRA +AAGGNCAYDCCDDGAAAGTHGBTAATCKYTBTACBGTBCTTTTTGDTAASSWTAAWAARA +TTGGCTAAGWGRADTYACATAGCTCBTAGATAWAGCAATNGTATMATGTTKMMAGTAWTC +CCNTSGAAWATWCAAAAMACTGAADNTYGATNAATCCGAYWNCTAACGTTAGAGDTTTTC +ATCTGGKRTAVGAABVCTGWGBTCTDVGKATTBTCTAAGGVADAAAVWTCTAGGGGAGGG +TTAGAACAATTAAHTAATNAAATGCATKATCTAAYRTDTCAGSAYTTYHGATRTTWAVTA +BGNTCDACAGBCCRCAGWCRTCABTGMMAWGMCTCAACCGATRTGBCAVAATCGTDWDAA +CAYAWAATWCTGGTAHCCCTAAGATAACSCTTAGTGSAACAWTBGTCDTTDGACWDBAAC +HTTTNGSKTYYAAYGGATNTGATTTAARTTAMBAATCTAAGTBTCATYTAACTTADTGTT +TCGATACGAAHGGCYATATACCWDTKYATDCSHTDTCAAAATGTGBACTGSCCVGATGTA +TCMMAGCCTTDAAABAATGAAGAGTAACTHATMGVTTAATAACCCGGTTVSANTGCAATT +GTGAGATTTAMGTTTAMAAYGCTGACAYAAAAAGGCACAMYTAAGVGGCTGGAABVTACG +GATTSTYGTBVAKTATWACCGTGTKAGTDTGTATGTTTAAAGGAAAAAGTAACATARAAA +GGTYCAMNYAAABTATAGNTSATANAGTCATCCTATWADKAACTRGTMSACDGTATSAYT +AAHSHGTAABYGACTYTATADTGSTATAGAGAAATCGNTAAAGGAAATCAGTTGTNCYMV +TNACDRTATBNATATASTAGAAMSCGGGANRCKKMCAAACATTNAGTCTRMAATBMTACC +CGTACTTCTBGDSYAATWGAAAATGACADDCHAKAAAYATATTKTTTTCACANACWAGAA +AKATCCTTATTAYKHKCTAAACARTATTTTDATBTVWCYGCAATACTAGGKAAASTTDGA +MGGCHTTHAATVCAHDRYAGGRCTATACGTCMAGAGAGCTBTHGNACARTCCBDCTAAGA +GCGGCTTTARTAAAGAATCCNAGTAWBTGACTTGAATTACWTVACAGAAABCAATNAAAC +CGTNTRANTTGAYCMAWBADTANABRGGTKTHTWTAGTTVCTMBKTAGMTVKCCAGCANT +TVAGSWTTAGCCGCRHTTTCCTTHNTATTAAGAAGAATAGGMTRAARTCTABGTACDTTT +TATAAVDHAHTATAGATCCTAGTAAGYTWATDWCATGAGGGATAGTAAMDMNGBASTWAM +TSTATRBAYDABATGTATATYCGCACTGTTTTAACMCWBTATAWAGTATBTSTATVTTAR +CCTMTTAAKADATCAACTAATYTSVTAKGDATTATGCKTCAYCAKAATACTTKAANGAGT +ATTSDAGATCGGAAATACTTAAYAAVGTATMCGCTTGTGTDCTAATYTATTTTATTTWAA +CAGWRCTATGTAGMTGTTTGTTYKTNGTTKTCAGAACNTRACCTACKTGSRATGTGGGGG +CTGTCATTAAGTAAATNGSTTABCCCCTCGCAGCTCWHTCGCGAAGCAVATGCKACGHCA +ACAKTTAATAACASAAADATTWNYTGTAATTGTTCGTMHACHTWATGTGCWTTTTGAAHY +ACTTTGTAYAMSAAACTTAADAAATATAGTABMATATYAATGSGGTAGTTTGTGTBYGGT +TWSGSVGWMATTDMTCCWWCABTCSVACAGBAATGTTKATBGTCAATAATCTTCTTAAAC +ARVAATHAGYBWCTRWCABGTWWAATCTAAGTCASTAAAKTAAGVKBAATTBGABACGTA +AGGTTAAATAAAAACTRMDTWBCTTTTTAATAAAAGATMGCCTACKAKNTBAGYRASTGT +ASSTCGTHCGAAKTTATTATATTYTTTGTAGAACATGTCAAAACTWTWTHGKTCCYAATA +AAGTGGAYTMCYTAARCSTAAATWAKTGAATTTRAGTCTSSATACGACWAKAASATDAAA +TGYYACTSAACAAHAKTSHYARGASTATTATTHAGGYGGASTTTBGAKGATSANAACACD +TRGSTTRAAAAAAAACAAGARTCVTAGTAAGATAWATGVHAAKATWGAAAAGTYAHVTAC +TCTGRTGTCAWGATRVAAKTCGCAAVCGASWGGTTRTCSAMCCTAACASGWKKAWDAATG +ACRCBACTATGTGTCTTCAAAHGSCTATATTTCGTVWAGAAGTAYCKGARAKSGKAGTAN +TTTCYACATWATGTCTAAAADMDTWCAATSTKDACAMAADADBSAAATAGGCTHAHAGTA +CGACVGAATTATAAAGAHCCVAYHGHTTTACATSTTTATGNCCMTAGCATATGATAVAAG +>THREE Homo sapiens frequency +ATATTTATCTTTTCACTTCCTACATTGGTCAGACCATTATTCGACACGTGGCGTCATTTT +GTCATACCGGGTAATGTTGGAAACAAAACGTACTGATAAAATACTGAGTTGTAAACTCTA +ATCAGATAACGCGCTTGGATATTAAGATTCACACAGGGGTTTCGGCTGTAAAAAAACTTG +TGGAGCTGTTCTGGGACAGATAAGTTGTACCTCGTACTTAGCTAATTAATGAACCAACTG +ATTACGATAGAACAATTCTGAGGCCGCCAGGACAGCCAAATTTTAATCTTATAAAGCTGG +AAACAGCCGGTATTAGCTTCTCGCATACTTTGCCTGCATTGGTACCTTACAGATATCAGC +GTAGTCATATACACCTCGGTCTCAGCTAAGCTTGTATCTCTTAGAGTAGTTCAAAGATAG +TGGACAATACCTGTGGAATCGATTGCAGATATGGATTTATTTAACTACTGAGTCTCATTC +ACAAGCTAAGCAAGGAGCACGTTTTGGTGCCGGCATACCGATTTGCTATCATGTCAGCAA +ATTTGCGTTGTATTCCTAGTTGCACCCATTAAGGCCACACTCCGAACCTAATTATTACAT +CGCAAAGACATGTACGAAGGACCCGATGTCGAATAGAAGGGAGGACTGTTCATTGGAAGC +TAGACCAGAGGAATCGCAAAGATGCAACTCTTACAATAAAAATCTAATTTCAGTCAACAC +GCAATTTCTATAAGGTTTCCGATAATAATGAACCGTCTTCCACAGGGGAATTTGCCATGC +TCGTAAAAGTAGTTAATCCAAGTAGAAGAAATTTTGATAATGTTTTAAGTTGGCACGAAG +GAATTCAGAGAGATCTTACCTAACAAAGGCATTAGTAGATGTTCCTTGGTTCACACTCGG +TCAATCAGAGCACATACTACGGGCGATACCGGGAATGACACAACATCAATGAGATTGTTA +AGTGAGGTAATTGACTTTAGAGGACTCGATCAGTATACTGTCACTATGAACATCGTATTA +ATTGTTATCCGATATATACACCACCGATTTGCTTGTGCAAGGTTACAGACCCATTCGATA +AATACAAACACGGAGCGATATTATTTAAGGAGTGCTGTCTTCAAAAGAATTATTCCCACA +CCGACATAAGAACTTCGCTCCGTCATTCCAGATTTAAATAACATAACGTAACGCTTTGCT +GATAACATAACATAACCGAGAATTTGCTTAGGAAATTTGGAGCAATATTGCATTGTTTCT +CAGTCATCACAAGGCCCGCCAAAGAACTCTGAGAATCAGGATTCAACATGATTGGTAAGA +CTCTATATATATAACTTAATTCTTGTGTCCGGAGATAGAAAGAGGACGAGAGATACTACG +AAAGAAAGTGTACTTCGATGTATCAATTCAGACGCCTTCTCTATCATCAACATTATAGGT +CTCGTATATGCTCGGCGCGATCTGCTTCTCTCCGCCAATAGCCCCATAGTGTATTTCAAG +CGCAGTAACAGTGAAATCGTTACGAAGGTAGGGATGTTGCTTATAATTGTCGTAACTTAT +CGCTTATGTATCTTTCAAGAATGAACGGCAGCATATACATACGTTCTACCTTTAGCTACA +AAGCATCCATATACTCCCTCTCATGATTGAAACTCTTCCCTATTTTGTAGCCAATAGTGA +AAGCGTATTAGTATAAATTCGTCGGTTTTTCACTCGCAACTGTTATACTCTGCAAACAAA +CGAAAGCCTCATAGTACAAACCTAAAGCTACATACTTCATCATTGGCAGACCAGTGGCGG +TATTTCTACGGAAGCATCACTATAGATATAAAGTTTCCCTTCATGTACGTCTGTTAACCA +TATCACAAGAAACTGCTATCTCTGTCACGTAACAATTCACGCGCCTTATCGCCAAATGTT +CATATATGCGCGGTATACGTATGAACGAATACTAATTAGTATAACGGAGGATTCACGGGA +GGGATACTTGGGGCATTTATAAATCGTCTAAAAATTTTCTATCAGCACTTGCGGGTTATA +GTGGATTACTAGGCAACATAATATTCTGTATTGGTCCAAATGACGCTATAGATAAATTAG +CAAAATACATTGTTTCCATTTATGTAAGTCGAAACTCCAGGACTCCCGGGAACCAGTTAA +ACCGTCTGGAAAAGACACATTGTGAGCGGGACTTCAATGATAGCTTTCAATGAGCTTCTC +ATGCTTGGGGTCTGTACATATATGTTGGCGAAATTATCGTCTGTATTCTGTTATGCTTTG +ATCATGGGTTATTAGTATAGTGTCCGGTTAAGTACCAATACCGCTAGAGACCCGACCTAA +GTCGATAACTAACGATCATCGACGTAAGGATCGTCTCGATCAGTACTTCAGTCTAGATCT +GGGAATAGTAACTCGTTAGTGAACTATGTCGTGTCATAACTCTAAAATGCAATCAAATCT +TATTATTGAGTATTGATTATATAAAGCATCCGCTTAGCTTTACCCTCAAATGTTATATGC +AATTTAAAGCGCTTGATATCGTCTACTCAAGTTCAGGTTTCACATGGCCGCAACGTGACG +TTATTAGAGGTGGGTCATCATCTCTGAGGCTAGTGATGTTGAATACTCATTGAATGGGAA +GTGGAATACCATGCTCGTAGGTAACAGCATGACCTATAAAATATACTATGGGTGTGTGGT +AGATCAATATTGTTCAAGCATATCGTAACAATAACGGCTGAAATGTTACTGACATGAAAG +AGGGAGTCCAAACCATTCTAACAGCTGATCAAGTCGTCTAAAAACGCCTGGTTCAGCCTT +AAGAGTTATAAGCCAGACAAATTGTATCAATAGAGAATCCGTAAATTCCTCGGCCAACCT +CTTGCAAAGACATCACTATCAATATACTACCGTGATCTTAATTAGTGAACTTATATAAAT +ATCTACAACCAGATTCAACGGAAAAGCTTTAGTGGATTAGAAATTGCCAAGAATCACATT +CATGTGGGTTCGAATGCTTTAGTAATACCATTTCGCCGAGTAGTCACTTCGCTGAACTGT +CGTAAATTGCTATGACATAATCGAAAAGGATTGTCAAGAGTCGATTACTGCGGACTAATA +ATCCCCACGGGGGTGGTCTCATGTCTCCCCAGGCGAGTGGGGACGGTTGATAAACACGCT +GCATCGCGGACTGATGTTCCCAGTATTACATAGTCACATTGGATTGCGAGTAGTCTACCT +ATTTATGAGCGAGAGATGCCTCTAACTACTTCGACTTTTAAAACCTTTCCACGCCAGTAT +TCGGCGAAAGGGAAGTATTAAGGGTTGTCATAATTAAGCTGATACCACTTCAGACTTTGC +TCTACTTCTGTCTTTCATTGGTTTAGTAAAGTCTGTCCATTCGTCGAGACCGTCTTTTGC +AGCCTCATTCTACCAACTGCTCCGACTCTTAGTCTGCTTCTCCCAGCGTTATAACAAGAG +GCATTTTGTCATCCTTAAAACAATAATAAAGAACTCGGAGCACTGATATAATGACTGAAT +TAGAACCGCTTAAAAATACAACGAATAGATAAGACTATCGGATAAGATCTAATATGTAGT +GATTAAGCCCTTTATTAATTAATAATAGTTACCCTTTCTGATGTAACGCGACATATTACG +ATTTAGTGGCACGTCTGAATTGCAAAGCAGATCTCTACCCGATTTTTATTATAAATCCCG +TATACATCTTGACTTGAGTAATTGTTCATCTTTTTATATCTCTTCGTACTACAAATAATT +AATATCTCAACCCGTATTGTGTGATTCTAATTACCAACAGAATACGAGGAGGTTTTTGCT +TAGGGCCATATATAATGAATCTATCTCGTTTATTCGCGGAACCCGAGATAACATTACGAT +GTAACTATTTTAGAGAACTTAATACAAGAAACATTGCTGATTACTCATAACTAAATGCTT +GGTAATATATCCTCAGTGCCCCTACCATCTTTTACGCAGGGATGTAATTACTTAGGATTC +ATTGTGTAAGAATTACAATGAACGATGGATATGAAGGCATGTTGCGAGGTGTTCCTTGGT +ATGTGAAGTTCGCAGGGCAACAAAAATTTCGCAGAATAGGCCTCAAAGTATTGGTAAAGA +AGACAACTAATCATCACGAGCTTCTGATATCAATACGAACGAGTCCTGTGATGGATGAAA +GAAAGTCGTATCGAAAATGTCAAGAGTCTGCCCAATGTAACTTACTTCAAAAAATAACGC +TTCCGCCAAGTACGTTCGAATAAACGTAATTTTAAAAATACATAAGGGGTGTTAGAAAGT +AAGCGACGGGATATAAGTTAGACTCAAGATTCCGCCGTAAAACGAGACTGATTCCGAAGA +TTGTTCGTGGATCTGGTCATGACTTTCACTGAGTAAGGAGTTTCGACATATGTCAATAAA +CACAAAAATAGAAGCTATTCGATCTGAAAAATATTAGGACAAGAAACTATCTCACGCTAG +CCCAGAATATTCACTCACCCACGGGCGATACTAAAGCACTATATAGTCGCGTGATTACTA +TACATATGGTACACATAAGAATCACGATCAGGTTCTCAATTTTCAACAATATATGTTTAT +TTGCATAGGTAATATTAGGCCTTTAAGAGAAGGATGGGTGAGATACTCCGGGGATGGCGG +CAATAAAGAAAAACACGATATGAGTAATAGGATCCTAATATCTTGGCGAGAGACTTAAGG +TACGAATTTTGCGCAATCTATTTTTTACTTGGCCAGAATTCATGTATGGTATAAGTACGA +ACTTTTTTGATCACTTTCATGGCTACCTGATTAGGATAGTTTGAGGAATTTCCCAAATAT +ACCGATTTAATATACACTAGGGCTTGTCACTTTGAGTCAGAAAAAGAATATAATTACTTA +GGGTAATGCTGCATACATATTCTTATATTGCAAAGGTTCTCTGGGTAATCTTGAGCCTTC +ACGATACCTGGTGAAGTGTT diff --git a/benches/wasm-kernel/Cargo.toml b/benches/wasm-kernel/Cargo.toml index f9717f5..f6ea6dc 100644 --- a/benches/wasm-kernel/Cargo.toml +++ b/benches/wasm-kernel/Cargo.toml @@ -8,7 +8,8 @@ crate-type = ["cdylib"] [dependencies] tiny-keccak = "1.4.2" -rlibc = "1.0" +regex = "0.2.10" +lazy_static = "1.0" [profile.release] panic = "abort" diff --git a/benches/wasm-kernel/src/lib.rs b/benches/wasm-kernel/src/lib.rs index ee172ff..17ba845 100644 --- a/benches/wasm-kernel/src/lib.rs +++ b/benches/wasm-kernel/src/lib.rs @@ -1,21 +1,13 @@ -#![no_std] -#![feature(lang_items)] -#![feature(core_intrinsics)] -#![feature(panic_implementation)] - -extern crate rlibc; extern crate tiny_keccak; +extern crate regex; +#[macro_use] +extern crate lazy_static; +use std::mem::ManuallyDrop; use tiny_keccak::Keccak; -#[no_mangle] -#[panic_implementation] -pub fn panic_fmt(_info: &::core::panic::PanicInfo) -> ! { - use core::intrinsics; - unsafe { - intrinsics::abort(); - } -} +mod rev_complement; +mod regex_redux; pub struct TinyKeccakTestData { data: &'static [u8], @@ -48,3 +40,84 @@ pub extern "C" fn bench_tiny_keccak(test_data: *const TinyKeccakTestData) { keccak.finalize((*test_data).result); } } + +pub struct RevComplementTestData { + input: ManuallyDrop>, + output: ManuallyDrop>, +} + +#[no_mangle] +pub extern "C" fn prepare_rev_complement(size: usize) -> *mut RevComplementTestData { + let input = vec![0; size]; + let output = vec![0; size]; + + let test_data = Box::new( + RevComplementTestData { + input: ManuallyDrop::new(input.into_boxed_slice()), + output: ManuallyDrop::new(output.into_boxed_slice()), + } + ); + + // Basically leak the pointer to the test data. This shouldn't be harmful since `prepare` is called + // only once per bench run (not for the iteration), and afterwards whole memory instance is discarded. + Box::into_raw(test_data) +} + +#[no_mangle] +pub extern "C" fn rev_complement_input_ptr(test_data: *mut RevComplementTestData) -> *mut u8 { + unsafe { + (*test_data).input.as_mut_ptr() + } +} + +#[no_mangle] +pub extern "C" fn rev_complement_output_ptr(test_data: *mut RevComplementTestData) -> *const u8 { + unsafe { + (*test_data).output.as_ptr() + } +} + +#[no_mangle] +pub extern "C" fn bench_rev_complement(test_data: *mut RevComplementTestData) { + unsafe { + let result = rev_complement::run(&*(*test_data).input); + (*test_data).output.copy_from_slice(&result); + } +} + +pub struct RegexReduxTestData { + input: ManuallyDrop>, + output: Option, +} + +#[no_mangle] +pub extern "C" fn prepare_regex_redux(size: usize) -> *mut RegexReduxTestData { + regex_redux::prepare(); + + let input = vec![0; size]; + let test_data = Box::new( + RegexReduxTestData { + input: ManuallyDrop::new(input.into_boxed_slice()), + output: None, + } + ); + + // Basically leak the pointer to the test data. This shouldn't be harmful since `prepare` is called + // only once per bench run (not for the iteration), and afterwards whole memory instance is discarded. + Box::into_raw(test_data) +} + +#[no_mangle] +pub extern "C" fn regex_redux_input_ptr(test_data: *mut RegexReduxTestData) -> *mut u8 { + unsafe { + (*test_data).input.as_mut_ptr() + } +} + +#[no_mangle] +pub extern "C" fn bench_regex_redux(test_data: *mut RegexReduxTestData) { + unsafe { + let result = regex_redux::run(&*(*test_data).input); + (*test_data).output = Some(result); + } +} diff --git a/benches/wasm-kernel/src/regex_redux.rs b/benches/wasm-kernel/src/regex_redux.rs new file mode 100644 index 0000000..fae1f5f --- /dev/null +++ b/benches/wasm-kernel/src/regex_redux.rs @@ -0,0 +1,17 @@ +//! Initially it supposed to be like [1]. However it turned out +//! that executing this code in wasmi way too slow. +//! +//! [1]: https://benchmarksgame-team.pages.debian.net/benchmarksgame/program/regexredux-rust-2.html + +lazy_static! { + static ref REGEX: ::regex::bytes::Regex = + { ::regex::bytes::Regex::new("agggtaa[cgt]|[acg]ttaccct").unwrap() }; +} + +pub fn prepare() { + ::lazy_static::initialize(®EX); +} + +pub fn run(seq: &[u8]) -> usize { + REGEX.find_iter(seq).count() +} diff --git a/benches/wasm-kernel/src/rev_complement.rs b/benches/wasm-kernel/src/rev_complement.rs new file mode 100644 index 0000000..1d4c03a --- /dev/null +++ b/benches/wasm-kernel/src/rev_complement.rs @@ -0,0 +1,164 @@ +// Adapted version from benchmarks game. In particular +// rayon is removed. +// +// https://benchmarksgame-team.pages.debian.net/benchmarksgame/program/revcomp-rust-3.html + +// The Computer Language Benchmarks Game +// https://salsa.debian.org/benchmarksgame-team/benchmarksgame/ +// +// contributed by the Rust Project Developers +// contributed by Cristi Cobzarenco +// contributed by TeXitoi +// contributed by Matt Brubeck + +use std::io::BufRead; +use std::mem::replace; +use std::{cmp, io}; + +/// Lookup table to find the complement of a single FASTA code. +fn build_table() -> [u8; 256] { + let mut table = [0; 256]; + for (i, x) in table.iter_mut().enumerate() { + *x = match i as u8 as char { + 'A' | 'a' => 'T', + 'C' | 'c' => 'G', + 'G' | 'g' => 'C', + 'T' | 't' => 'A', + 'U' | 'u' => 'A', + 'M' | 'm' => 'K', + 'R' | 'r' => 'Y', + 'W' | 'w' => 'W', + 'S' | 's' => 'S', + 'Y' | 'y' => 'R', + 'K' | 'k' => 'M', + 'V' | 'v' => 'B', + 'H' | 'h' => 'D', + 'D' | 'd' => 'H', + 'B' | 'b' => 'V', + 'N' | 'n' => 'N', + i => i, + } as u8; + } + table +} + +/// Utilities for splitting chunks off of slices. +trait SplitOff { + fn split_off_left(&mut self, n: usize) -> Self; + fn split_off_right(&mut self, n: usize) -> Self; +} +impl<'a, T> SplitOff for &'a mut [T] { + /// Split the left `n` items from self and return them as a separate slice. + fn split_off_left(&mut self, n: usize) -> Self { + let n = cmp::min(self.len(), n); + let data = replace(self, &mut []); + let (left, data) = data.split_at_mut(n); + *self = data; + left + } + /// Split the right `n` items from self and return them as a separate slice. + fn split_off_right(&mut self, n: usize) -> Self { + let len = self.len(); + let n = cmp::min(len, n); + let data = replace(self, &mut []); + let (data, right) = data.split_at_mut(len - n); + *self = data; + right + } +} + +/// Length of a normal line including the terminating \n. +const LINE_LEN: usize = 61; + +/// Compute the reverse complement for two contiguous chunks without line breaks. +fn reverse_chunks(left: &mut [u8], right: &mut [u8], table: &[u8; 256]) { + for (x, y) in left.iter_mut().zip(right.iter_mut().rev()) { + *y = table[replace(x, table[*y as usize]) as usize]; + } +} + +/// Compute the reverse complement on chunks from opposite ends of a sequence. +/// +/// `left` must start at the beginning of a line. If there are an odd number of +/// bytes, `right` will initially be 1 byte longer than `left`; otherwise they +/// will have equal lengths. +fn reverse_complement_left_right( + mut left: &mut [u8], + mut right: &mut [u8], + trailing_len: usize, + table: &[u8; 256], +) { + // Each iteration swaps one line from the start of the sequence with one + // from the end. + while left.len() > 0 || right.len() > 0 { + // Get the chunk up to the newline in `right`. + let mut a = left.split_off_left(trailing_len); + let mut b = right.split_off_right(trailing_len); + right.split_off_right(1); // Skip the newline in `right`. + + // If we've reached the middle of the sequence here and there is an + // odd number of bytes remaining, the odd one will be on the right. + if b.len() > a.len() { + let mid = b.split_off_left(1); + mid[0] = table[mid[0] as usize]; + } + + reverse_chunks(a, b, table); + + // Get the chunk up to the newline in `left`. + let n = LINE_LEN - 1 - trailing_len; + a = left.split_off_left(n); + b = right.split_off_right(n); + left.split_off_left(1); // Skip the newline in `left`. + + // If we've reached the middle of the sequence and there is an odd + // number of bytes remaining, the odd one will now be on the left. + if a.len() > b.len() { + let mid = a.split_off_right(1); + mid[0] = table[mid[0] as usize] + } + + reverse_chunks(a, b, table); + } +} + +/// Compute the reverse complement of one sequence. +fn reverse_complement(seq: &mut [u8], table: &[u8; 256]) { + let len = seq.len() - 1; + let seq = &mut seq[..len]; // Drop the last newline + let trailing_len = len % LINE_LEN; + let (left, right) = seq.split_at_mut(len / 2); + reverse_complement_left_right(left, right, trailing_len, table); +} + +/// Read sequences from stdin and print the reverse complement to stdout. +pub fn run(input: &[u8]) -> Vec { + let mut buf = Vec::with_capacity(input.len()); + + let mut input = io::Cursor::new(input); + + // Read the first header line. + input.read_until(b'\n', &mut buf).unwrap(); + + // Read sequence data line-by-line, splitting on headers. + let mut line_start = buf.len(); + let mut seq_start = line_start; + let mut seqs = vec![]; + while input.read_until(b'\n', &mut buf).unwrap() > 0 { + if buf[line_start] == b'>' { + // Found the start of a new sequence. + seqs.push(seq_start..line_start); + seq_start = buf.len(); + } + line_start = buf.len(); + } + seqs.push(seq_start..buf.len()); + + // Compute the reverse complements of each sequence. + let table = build_table(); + for seq in seqs { + reverse_complement(&mut buf[seq], &table); + } + + buf +} From 75406dd8ff0551c87bdea67d73b77532f0aa4db3 Mon Sep 17 00:00:00 2001 From: Leonardo Yvens Date: Thu, 21 Jun 2018 09:47:49 -0300 Subject: [PATCH 2/2] Use transmute instead of casts In RuntimeValue conversion. (#102) Casts have arithmetic semantics, and under some build configurations Rust will panic when encountering an arithmetic overflow. Use a transmute instead since it's what we mean. The previous code worked, but still I added a test for good measure. --- src/tests/mod.rs | 13 +++++++++++++ src/value.rs | 16 +++++++++++----- 2 files changed, 24 insertions(+), 5 deletions(-) diff --git a/src/tests/mod.rs b/src/tests/mod.rs index e07c3a8..3ad8ba8 100644 --- a/src/tests/mod.rs +++ b/src/tests/mod.rs @@ -17,6 +17,19 @@ fn assert_error_properties() { assert_std_err_impl::(); } +/// Test that converting an u32 (u64) that does not fit in an i32 (i64) +/// to a RuntimeValue and back works as expected and the number remains unchanged. +#[test] +fn unsigned_to_runtime_value() { + use super::RuntimeValue; + + let overflow_i32: u32 = ::std::i32::MAX as u32 + 1; + assert_eq!(RuntimeValue::from(overflow_i32).try_into::().unwrap(), overflow_i32); + + let overflow_i64: u64 = ::std::i64::MAX as u64 + 1; + assert_eq!(RuntimeValue::from(overflow_i64).try_into::().unwrap(), overflow_i64); +} + pub fn parse_wat(source: &str) -> Module { let wasm_binary = wabt::wat2wasm(source).expect("Failed to parse wat source"); Module::from_buffer(wasm_binary).expect("Failed to load parsed module") diff --git a/src/value.rs b/src/value.rs index 08cdff2..908829b 100644 --- a/src/value.rs +++ b/src/value.rs @@ -1,6 +1,7 @@ use byteorder::{LittleEndian, ReadBytesExt, WriteBytesExt}; use nan_preserving_float::{F32, F64}; use std::io; +use std::mem::transmute; use std::{f32, i32, i64, u32, u64}; use TrapKind; @@ -188,13 +189,13 @@ impl From for RuntimeValue { impl From for RuntimeValue { fn from(val: u32) -> Self { - RuntimeValue::I32(val as i32) + RuntimeValue::I32(val.transmute_into()) } } impl From for RuntimeValue { fn from(val: u64) -> Self { - RuntimeValue::I64(val as i64) + RuntimeValue::I64(val.transmute_into()) } } @@ -403,11 +404,8 @@ macro_rules! impl_transmute_into_as { } impl_transmute_into_as!(i8, u8); -impl_transmute_into_as!(u8, i8); impl_transmute_into_as!(i32, u32); -impl_transmute_into_as!(u32, i32); impl_transmute_into_as!(i64, u64); -impl_transmute_into_as!(u64, i64); macro_rules! impl_transmute_into_npf { ($npf:ident, $float:ident, $signed:ident, $unsigned:ident) => { @@ -468,6 +466,14 @@ impl TransmuteInto for i64 { fn transmute_into(self) -> f64 { f64::from_bits(self as u64) } } +impl TransmuteInto for u32 { + fn transmute_into(self) -> i32 { unsafe { transmute(self) } } +} + +impl TransmuteInto for u64 { + fn transmute_into(self) -> i64 { unsafe { transmute(self) } } +} + impl LittleEndianConvert for i8 { fn into_little_endian(self) -> Vec { vec![self as u8]