Add some more benches (#97)

* Add rev_complement test

# Conflicts:
#	benches/src/lib.rs
#	benches/wasm-kernel/src/lib.rs

* Add redux_regex test.

# Conflicts:
#	benches/wasm-kernel/Cargo.toml

* Fmt and return an original header

* Use ManuallyDrop

* Really initialize lazy static.
This commit is contained in:
Sergey Pepyakin 2018-06-19 16:29:33 +03:00 committed by GitHub
parent f305b3cd1f
commit 94b797de44
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 848 additions and 15 deletions

View File

@ -6,3 +6,4 @@ authors = ["Sergey Pepyakin <s.pepyakin@gmail.com>"]
[dependencies]
wasmi = { path = ".." }
assert_matches = "1.2"
wabt = "0.3"

View File

@ -4,6 +4,7 @@ extern crate test;
extern crate wasmi;
#[macro_use]
extern crate assert_matches;
extern crate wabt;
use std::error;
use std::fs::File;
@ -20,6 +21,9 @@ fn load_from_file(filename: &str) -> Result<Module, Box<error::Error>> {
Ok(Module::from_buffer(buf)?)
}
const REVCOMP_INPUT: &'static [u8] = include_bytes!("./revcomp-input.txt");
const REVCOMP_OUTPUT: &'static [u8] = include_bytes!("./revcomp-output.txt");
#[bench]
fn bench_tiny_keccak(b: &mut Bencher) {
let wasm_kernel = load_from_file(
@ -41,3 +45,234 @@ fn bench_tiny_keccak(b: &mut Bencher) {
.unwrap();
});
}
#[bench]
fn bench_rev_comp(b: &mut Bencher) {
let wasm_kernel = load_from_file(
"./wasm-kernel/target/wasm32-unknown-unknown/release/wasm_kernel.wasm",
).expect("failed to load wasm_kernel. Is `build.rs` broken?");
let instance = ModuleInstance::new(&wasm_kernel, &ImportsBuilder::default())
.expect("failed to instantiate wasm module")
.assert_no_start();
// Allocate buffers for the input and output.
let test_data_ptr: RuntimeValue = {
let input_size = RuntimeValue::I32(REVCOMP_INPUT.len() as i32);
assert_matches!(
instance.invoke_export("prepare_rev_complement", &[input_size], &mut NopExternals),
Ok(Some(v @ RuntimeValue::I32(_))) => v,
"",
)
};
// Get the pointer to the input buffer.
let input_data_mem_offset = assert_matches!(
instance.invoke_export("rev_complement_input_ptr", &[test_data_ptr], &mut NopExternals),
Ok(Some(RuntimeValue::I32(v))) => v as u32,
"",
);
// Copy test data inside the wasm memory.
let memory = instance.export_by_name("memory")
.expect("Expected export with a name 'memory'")
.as_memory()
.expect("'memory' should be a memory instance")
.clone();
memory
.set(input_data_mem_offset, REVCOMP_INPUT)
.expect("can't load test data into a wasm memory");
b.iter(|| {
instance
.invoke_export("bench_rev_complement", &[test_data_ptr], &mut NopExternals)
.unwrap();
});
// Verify the result.
let output_data_mem_offset = assert_matches!(
instance.invoke_export("rev_complement_output_ptr", &[test_data_ptr], &mut NopExternals),
Ok(Some(RuntimeValue::I32(v))) => v as u32,
"",
);
let result = memory
.get(output_data_mem_offset, REVCOMP_OUTPUT.len())
.expect("can't get result data from a wasm memory");
assert_eq!(&*result, REVCOMP_OUTPUT);
}
#[bench]
fn bench_regex_redux(b: &mut Bencher) {
let wasm_kernel = load_from_file(
"./wasm-kernel/target/wasm32-unknown-unknown/release/wasm_kernel.wasm",
).expect("failed to load wasm_kernel. Is `build.rs` broken?");
let instance = ModuleInstance::new(&wasm_kernel, &ImportsBuilder::default())
.expect("failed to instantiate wasm module")
.assert_no_start();
// Allocate buffers for the input and output.
let test_data_ptr: RuntimeValue = {
let input_size = RuntimeValue::I32(REVCOMP_INPUT.len() as i32);
assert_matches!(
instance.invoke_export("prepare_regex_redux", &[input_size], &mut NopExternals),
Ok(Some(v @ RuntimeValue::I32(_))) => v,
"",
)
};
// Get the pointer to the input buffer.
let input_data_mem_offset = assert_matches!(
instance.invoke_export("regex_redux_input_ptr", &[test_data_ptr], &mut NopExternals),
Ok(Some(RuntimeValue::I32(v))) => v as u32,
"",
);
// Copy test data inside the wasm memory.
let memory = instance.export_by_name("memory")
.expect("Expected export with a name 'memory'")
.as_memory()
.expect("'memory' should be a memory instance")
.clone();
memory
.set(input_data_mem_offset, REVCOMP_INPUT)
.expect("can't load test data into a wasm memory");
b.iter(|| {
instance
.invoke_export("bench_regex_redux", &[test_data_ptr], &mut NopExternals)
.unwrap();
});
}
#[bench]
fn fac_recursive(b: &mut Bencher) {
let wasm = wabt::wat2wasm(
r#"
;; Recursive factorial
(func (export "fac-rec") (param i64) (result i64)
(if (result i64) (i64.eq (get_local 0) (i64.const 0))
(then (i64.const 1))
(else
(i64.mul (get_local 0) (call 0 (i64.sub (get_local 0) (i64.const 1))))
)
)
)
"#
).unwrap();
let module = Module::from_buffer(&wasm).unwrap();
let instance = ModuleInstance::new(&module, &ImportsBuilder::default())
.expect("failed to instantiate wasm module")
.assert_no_start();
b.iter(|| {
let value = instance
.invoke_export("fac-rec", &[RuntimeValue::I64(25)], &mut NopExternals);
assert_matches!(value, Ok(Some(RuntimeValue::I64(7034535277573963776))));
});
}
#[bench]
fn fac_opt(b: &mut Bencher) {
let wasm = wabt::wat2wasm(
r#"
;; Optimized factorial.
(func (export "fac-opt") (param i64) (result i64)
(local i64)
(set_local 1 (i64.const 1))
(block
(br_if 0 (i64.lt_s (get_local 0) (i64.const 2)))
(loop
(set_local 1 (i64.mul (get_local 1) (get_local 0)))
(set_local 0 (i64.add (get_local 0) (i64.const -1)))
(br_if 0 (i64.gt_s (get_local 0) (i64.const 1)))
)
)
(get_local 1)
)
"#
).unwrap();
let module = Module::from_buffer(&wasm).unwrap();
let instance = ModuleInstance::new(&module, &ImportsBuilder::default())
.expect("failed to instantiate wasm module")
.assert_no_start();
b.iter(|| {
let value = instance
.invoke_export("fac-opt", &[RuntimeValue::I64(25)], &mut NopExternals);
assert_matches!(value, Ok(Some(RuntimeValue::I64(7034535277573963776))));
});
}
// This is used for testing overhead of a function call
// is not too large.
#[bench]
fn recursive_ok(b: &mut Bencher) {
let wasm = wabt::wat2wasm(
r#"
(module
(func $call (export "call") (param i32) (result i32)
block (result i32)
get_local 0
get_local 0
i32.eqz
br_if 0
i32.const 1
i32.sub
call $call
end
)
)
"#
).unwrap();
let module = Module::from_buffer(&wasm).unwrap();
let instance = ModuleInstance::new(&module, &ImportsBuilder::default())
.expect("failed to instantiate wasm module")
.assert_no_start();
b.iter(|| {
let value = instance
.invoke_export("call", &[RuntimeValue::I32(8000)], &mut NopExternals);
assert_matches!(value, Ok(Some(RuntimeValue::I32(0))));
});
}
#[bench]
fn recursive_trap(b: &mut Bencher) {
let wasm = wabt::wat2wasm(
r#"
(module
(func $call (export "call") (param i32) (result i32)
block (result i32)
get_local 0
get_local 0
i32.eqz
br_if 0
i32.const 1
i32.sub
call $call
end
unreachable
)
)
"#
).unwrap();
let module = Module::from_buffer(&wasm).unwrap();
let instance = ModuleInstance::new(&module, &ImportsBuilder::default())
.expect("failed to instantiate wasm module")
.assert_no_start();
b.iter(|| {
let value = instance
.invoke_export("call", &[RuntimeValue::I32(1000)], &mut NopExternals);
assert_matches!(value, Err(_));
});
}

View File

@ -0,0 +1,171 @@
>ONE Homo sapiens alu
GGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGCGGA
TCACCTGAGGTCAGGAGTTCGAGACCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACT
AAAAATACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCAGCTACTCGGGAG
GCTGAGGCAGGAGAATCGCTTGAACCCGGGAGGCGGAGGTTGCAGTGAGCCGAGATCGCG
CCACTGCACTCCAGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAAGGCCGGGCGCGGT
GGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGCGGATCACCTGAGGTCA
GGAGTTCGAGACCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAATACAAAAA
TTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCAGCTACTCGGGAGGCTGAGGCAGGAG
AATCGCTTGAACCCGGGAGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCCA
GCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAAGGCCGGGCGCGGTGGCTCACGCCTGT
AATCCCAGCACTTTGGGAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGACC
AGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAATACAAAAATTAGCCGGGCGTG
GTGGCGCGCGCCTGTAATCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACC
CGGGAGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCCAGCCTGGGCGACAG
AGCGAGACTCCGTCTCAAAAAGGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTT
TGGGAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGACCAGCCTGGCCAACA
TGGTGAAACCCCGTCTCTACTAAAAATACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCT
GTAATCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGGAGGCGGAGG
TTGCAGTGAGCCGAGATCGCGCCACTGCACTCCAGCCTGGGCGACAGAGCGAGACTCCGT
CTCAAAAAGGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGG
CGGGCGGATCACCTGAGGTCAGGAGTTCGAGACCAGCCTGGCCAACATGGTGAAACCCCG
TCTCTACTAAAAATACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCAGCTA
CTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGGAGGCGGAGGTTGCAGTGAGCCG
AGATCGCGCCACTGCACTCCAGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAAGGCCG
GGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGCGGATCACC
TGAGGTCAGGAGTTCGAGACCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAA
TACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCAGCTACTCGGGAGGCTGA
GGCAGGAGAATCGCTTGAACCCGGGAGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACT
GCACTCCAGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAAGGCCGGGCGCGGTGGCTC
ACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGT
TCGAGACCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAATACAAAAATTAGC
CGGGCGTGGTGGCGCGCGCCTGTAATCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATCG
CTTGAACCCGGGAGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCCAGCCTG
GGCGACAGAGCGAGACTCCG
>TWO IUB ambiguity codes
cttBtatcatatgctaKggNcataaaSatgtaaaDcDRtBggDtctttataattcBgtcg
tactDtDagcctatttSVHtHttKtgtHMaSattgWaHKHttttagacatWatgtRgaaa
NtactMcSMtYtcMgRtacttctWBacgaaatatagScDtttgaagacacatagtVgYgt
cattHWtMMWcStgttaggKtSgaYaaccWStcgBttgcgaMttBYatcWtgacaYcaga
gtaBDtRacttttcWatMttDBcatWtatcttactaBgaYtcttgttttttttYaaScYa
HgtgttNtSatcMtcVaaaStccRcctDaataataStcYtRDSaMtDttgttSagtRRca
tttHatSttMtWgtcgtatSSagactYaaattcaMtWatttaSgYttaRgKaRtccactt
tattRggaMcDaWaWagttttgacatgttctacaaaRaatataataaMttcgDacgaSSt
acaStYRctVaNMtMgtaggcKatcttttattaaaaagVWaHKYagtttttatttaacct
tacgtVtcVaattVMBcttaMtttaStgacttagattWWacVtgWYagWVRctDattBYt
gtttaagaagattattgacVatMaacattVctgtBSgaVtgWWggaKHaatKWcBScSWa
accRVacacaaactaccScattRatatKVtactatatttHttaagtttSKtRtacaaagt
RDttcaaaaWgcacatWaDgtDKacgaacaattacaRNWaatHtttStgttattaaMtgt
tgDcgtMgcatBtgcttcgcgaDWgagctgcgaggggVtaaScNatttacttaatgacag
cccccacatYScaMgtaggtYaNgttctgaMaacNaMRaacaaacaKctacatagYWctg
ttWaaataaaataRattagHacacaagcgKatacBttRttaagtatttccgatctHSaat
actcNttMaagtattMtgRtgaMgcataatHcMtaBSaRattagttgatHtMttaaKagg
YtaaBataSaVatactWtataVWgKgttaaaacagtgcgRatatacatVtHRtVYataSa
KtWaStVcNKHKttactatccctcatgWHatWaRcttactaggatctataDtDHBttata
aaaHgtacVtagaYttYaKcctattcttcttaataNDaaggaaaDYgcggctaaWSctBa
aNtgctggMBaKctaMVKagBaactaWaDaMaccYVtNtaHtVWtKgRtcaaNtYaNacg
gtttNattgVtttctgtBaWgtaattcaagtcaVWtactNggattctttaYtaaagccgc
tcttagHVggaYtgtNcDaVagctctctKgacgtatagYcctRYHDtgBattDaaDgccK
tcHaaStttMcctagtattgcRgWBaVatHaaaataYtgtttagMDMRtaataaggatMt
ttctWgtNtgtgaaaaMaatatRtttMtDgHHtgtcattttcWattRSHcVagaagtacg
ggtaKVattKYagactNaatgtttgKMMgYNtcccgSKttctaStatatNVataYHgtNa
BKRgNacaactgatttcctttaNcgatttctctataScaHtataRagtcRVttacDSDtt
aRtSatacHgtSKacYagttMHtWataggatgactNtatSaNctataVtttRNKtgRacc
tttYtatgttactttttcctttaaacatacaHactMacacggtWataMtBVacRaSaatc
cgtaBVttccagccBcttaRKtgtgcctttttRtgtcagcRttKtaaacKtaaatctcac
aattgcaNtSBaaccgggttattaaBcKatDagttactcttcattVtttHaaggctKKga
tacatcBggScagtVcacattttgaHaDSgHatRMaHWggtatatRgccDttcgtatcga
aacaHtaagttaRatgaVacttagattVKtaaYttaaatcaNatccRttRRaMScNaaaD
gttVHWgtcHaaHgacVaWtgttScactaagSgttatcttagggDtaccagWattWtRtg
ttHWHacgattBtgVcaYatcggttgagKcWtKKcaVtgaYgWctgYggVctgtHgaNcV
taBtWaaYatcDRaaRtSctgaHaYRttagatMatgcatttNattaDttaattgttctaa
ccctcccctagaWBtttHtBccttagaVaatMcBHagaVcWcagBVttcBtaYMccagat
gaaaaHctctaacgttagNWRtcggattNatcRaNHttcagtKttttgWatWttcSaNgg
gaWtactKKMaacatKatacNattgctWtatctaVgagctatgtRaHtYcWcttagccaa
tYttWttaWSSttaHcaaaaagVacVgtaVaRMgattaVcDactttcHHggHRtgNcctt
tYatcatKgctcctctatVcaaaaKaaaagtatatctgMtWtaaaacaStttMtcgactt
taSatcgDataaactaaacaagtaaVctaggaSccaatMVtaaSKNVattttgHccatca
cBVctgcaVatVttRtactgtVcaattHgtaaattaaattttYtatattaaRSgYtgBag
aHSBDgtagcacRHtYcBgtcacttacactaYcgctWtattgSHtSatcataaatataHt
cgtYaaMNgBaatttaRgaMaatatttBtttaaaHHKaatctgatWatYaacttMctctt
ttVctagctDaaagtaVaKaKRtaacBgtatccaaccactHHaagaagaaggaNaaatBW
attccgStaMSaMatBttgcatgRSacgttVVtaaDMtcSgVatWcaSatcttttVatag
ttactttacgatcaccNtaDVgSRcgVcgtgaacgaNtaNatatagtHtMgtHcMtagaa
attBgtataRaaaacaYKgtRccYtatgaagtaataKgtaaMttgaaRVatgcagaKStc
tHNaaatctBBtcttaYaBWHgtVtgacagcaRcataWctcaBcYacYgatDgtDHccta
>THREE Homo sapiens frequency
aacacttcaccaggtatcgtgaaggctcaagattacccagagaacctttgcaatataaga
atatgtatgcagcattaccctaagtaattatattctttttctgactcaaagtgacaagcc
ctagtgtatattaaatcggtatatttgggaaattcctcaaactatcctaatcaggtagcc
atgaaagtgatcaaaaaagttcgtacttataccatacatgaattctggccaagtaaaaaa
tagattgcgcaaaattcgtaccttaagtctctcgccaagatattaggatcctattactca
tatcgtgtttttctttattgccgccatccccggagtatctcacccatccttctcttaaag
gcctaatattacctatgcaaataaacatatattgttgaaaattgagaacctgatcgtgat
tcttatgtgtaccatatgtatagtaatcacgcgactatatagtgctttagtatcgcccgt
gggtgagtgaatattctgggctagcgtgagatagtttcttgtcctaatatttttcagatc
gaatagcttctatttttgtgtttattgacatatgtcgaaactccttactcagtgaaagtc
atgaccagatccacgaacaatcttcggaatcagtctcgttttacggcggaatcttgagtc
taacttatatcccgtcgcttactttctaacaccccttatgtatttttaaaattacgttta
ttcgaacgtacttggcggaagcgttattttttgaagtaagttacattgggcagactcttg
acattttcgatacgactttctttcatccatcacaggactcgttcgtattgatatcagaag
ctcgtgatgattagttgtcttctttaccaatactttgaggcctattctgcgaaatttttg
ttgccctgcgaacttcacataccaaggaacacctcgcaacatgccttcatatccatcgtt
cattgtaattcttacacaatgaatcctaagtaattacatccctgcgtaaaagatggtagg
ggcactgaggatatattaccaagcatttagttatgagtaatcagcaatgtttcttgtatt
aagttctctaaaatagttacatcgtaatgttatctcgggttccgcgaataaacgagatag
attcattatatatggccctaagcaaaaacctcctcgtattctgttggtaattagaatcac
acaatacgggttgagatattaattatttgtagtacgaagagatataaaaagatgaacaat
tactcaagtcaagatgtatacgggatttataataaaaatcgggtagagatctgctttgca
attcagacgtgccactaaatcgtaatatgtcgcgttacatcagaaagggtaactattatt
aattaataaagggcttaatcactacatattagatcttatccgatagtcttatctattcgt
tgtatttttaagcggttctaattcagtcattatatcagtgctccgagttctttattattg
ttttaaggatgacaaaatgcctcttgttataacgctgggagaagcagactaagagtcgga
gcagttggtagaatgaggctgcaaaagacggtctcgacgaatggacagactttactaaac
caatgaaagacagaagtagagcaaagtctgaagtggtatcagcttaattatgacaaccct
taatacttccctttcgccgaatactggcgtggaaaggttttaaaagtcgaagtagttaga
ggcatctctcgctcataaataggtagactactcgcaatccaatgtgactatgtaatactg
ggaacatcagtccgcgatgcagcgtgtttatcaaccgtccccactcgcctggggagacat
gagaccacccccgtggggattattagtccgcagtaatcgactcttgacaatccttttcga
ttatgtcatagcaatttacgacagttcagcgaagtgactactcggcgaaatggtattact
aaagcattcgaacccacatgaatgtgattcttggcaatttctaatccactaaagcttttc
cgttgaatctggttgtagatatttatataagttcactaattaagatcacggtagtatatt
gatagtgatgtctttgcaagaggttggccgaggaatttacggattctctattgatacaat
ttgtctggcttataactcttaaggctgaaccaggcgtttttagacgacttgatcagctgt
tagaatggtttggactccctctttcatgtcagtaacatttcagccgttattgttacgata
tgcttgaacaatattgatctaccacacacccatagtatattttataggtcatgctgttac
ctacgagcatggtattccacttcccattcaatgagtattcaacatcactagcctcagaga
tgatgacccacctctaataacgtcacgttgcggccatgtgaaacctgaacttgagtagac
gatatcaagcgctttaaattgcatataacatttgagggtaaagctaagcggatgctttat
ataatcaatactcaataataagatttgattgcattttagagttatgacacgacatagttc
actaacgagttactattcccagatctagactgaagtactgatcgagacgatccttacgtc
gatgatcgttagttatcgacttaggtcgggtctctagcggtattggtacttaaccggaca
ctatactaataacccatgatcaaagcataacagaatacagacgataatttcgccaacata
tatgtacagaccccaagcatgagaagctcattgaaagctatcattgaagtcccgctcaca
atgtgtcttttccagacggtttaactggttcccgggagtcctggagtttcgacttacata
aatggaaacaatgtattttgctaatttatctatagcgtcatttggaccaatacagaatat
tatgttgcctagtaatccactataacccgcaagtgctgatagaaaatttttagacgattt
ataaatgccccaagtatccctcccgtgaatcctccgttatactaattagtattcgttcat
acgtataccgcgcatatatgaacatttggcgataaggcgcgtgaattgttacgtgacaga
gatagcagtttcttgtgatatggttaacagacgtacatgaagggaaactttatatctata
gtgatgcttccgtagaaataccgccactggtctgccaatgatgaagtatgtagctttagg
tttgtactatgaggctttcgtttgtttgcagagtataacagttgcgagtgaaaaaccgac
gaatttatactaatacgctttcactattggctacaaaatagggaagagtttcaatcatga
gagggagtatatggatgctttgtagctaaaggtagaacgtatgtatatgctgccgttcat
tcttgaaagatacataagcgataagttacgacaattataagcaacatccctaccttcgta
acgatttcactgttactgcgcttgaaatacactatggggctattggcggagagaagcaga
tcgcgccgagcatatacgagacctataatgttgatgatagagaaggcgtctgaattgata
catcgaagtacactttctttcgtagtatctctcgtcctctttctatctccggacacaaga
attaagttatatatatagagtcttaccaatcatgttgaatcctgattctcagagttcttt
ggcgggccttgtgatgactgagaaacaatgcaatattgctccaaatttcctaagcaaatt
ctcggttatgttatgttatcagcaaagcgttacgttatgttatttaaatctggaatgacg
gagcgaagttcttatgtcggtgtgggaataattcttttgaagacagcactccttaaataa
tatcgctccgtgtttgtatttatcgaatgggtctgtaaccttgcacaagcaaatcggtgg
tgtatatatcggataacaattaatacgatgttcatagtgacagtatactgatcgagtcct
ctaaagtcaattacctcacttaacaatctcattgatgttgtgtcattcccggtatcgccc
gtagtatgtgctctgattgaccgagtgtgaaccaaggaacatctactaatgcctttgtta
ggtaagatctctctgaattccttcgtgccaacttaaaacattatcaaaatttcttctact
tggattaactacttttacgagcatggcaaattcccctgtggaagacggttcattattatc
ggaaaccttatagaaattgcgtgttgactgaaattagatttttattgtaagagttgcatc
tttgcgattcctctggtctagcttccaatgaacagtcctcccttctattcgacatcgggt
ccttcgtacatgtctttgcgatgtaataattaggttcggagtgtggccttaatgggtgca
actaggaatacaacgcaaatttgctgacatgatagcaaatcggtatgccggcaccaaaac
gtgctccttgcttagcttgtgaatgagactcagtagttaaataaatccatatctgcaatc
gattccacaggtattgtccactatctttgaactactctaagagatacaagcttagctgag
accgaggtgtatatgactacgctgatatctgtaaggtaccaatgcaggcaaagtatgcga
gaagctaataccggctgtttccagctttataagattaaaatttggctgtcctggcggcct
cagaattgttctatcgtaatcagttggttcattaattagctaagtacgaggtacaactta
tctgtcccagaacagctccacaagtttttttacagccgaaacccctgtgtgaatcttaat
atccaagcgcgttatctgattagagtttacaactcagtattttatcagtacgttttgttt
ccaacattacccggtatgacaaaatgacgccacgtgtcgaataatggtctgaccaatgta
ggaagtgaaaagataaatat

View File

@ -0,0 +1,171 @@
>ONE Homo sapiens alu
CGGAGTCTCGCTCTGTCGCCCAGGCTGGAGTGCAGTGGCGCGATCTCGGCTCACTGCAAC
CTCCGCCTCCCGGGTTCAAGCGATTCTCCTGCCTCAGCCTCCCGAGTAGCTGGGATTACA
GGCGCGCGCCACCACGCCCGGCTAATTTTTGTATTTTTAGTAGAGACGGGGTTTCACCAT
GTTGGCCAGGCTGGTCTCGAACTCCTGACCTCAGGTGATCCGCCCGCCTCGGCCTCCCAA
AGTGCTGGGATTACAGGCGTGAGCCACCGCGCCCGGCCTTTTTGAGACGGAGTCTCGCTC
TGTCGCCCAGGCTGGAGTGCAGTGGCGCGATCTCGGCTCACTGCAACCTCCGCCTCCCGG
GTTCAAGCGATTCTCCTGCCTCAGCCTCCCGAGTAGCTGGGATTACAGGCGCGCGCCACC
ACGCCCGGCTAATTTTTGTATTTTTAGTAGAGACGGGGTTTCACCATGTTGGCCAGGCTG
GTCTCGAACTCCTGACCTCAGGTGATCCGCCCGCCTCGGCCTCCCAAAGTGCTGGGATTA
CAGGCGTGAGCCACCGCGCCCGGCCTTTTTGAGACGGAGTCTCGCTCTGTCGCCCAGGCT
GGAGTGCAGTGGCGCGATCTCGGCTCACTGCAACCTCCGCCTCCCGGGTTCAAGCGATTC
TCCTGCCTCAGCCTCCCGAGTAGCTGGGATTACAGGCGCGCGCCACCACGCCCGGCTAAT
TTTTGTATTTTTAGTAGAGACGGGGTTTCACCATGTTGGCCAGGCTGGTCTCGAACTCCT
GACCTCAGGTGATCCGCCCGCCTCGGCCTCCCAAAGTGCTGGGATTACAGGCGTGAGCCA
CCGCGCCCGGCCTTTTTGAGACGGAGTCTCGCTCTGTCGCCCAGGCTGGAGTGCAGTGGC
GCGATCTCGGCTCACTGCAACCTCCGCCTCCCGGGTTCAAGCGATTCTCCTGCCTCAGCC
TCCCGAGTAGCTGGGATTACAGGCGCGCGCCACCACGCCCGGCTAATTTTTGTATTTTTA
GTAGAGACGGGGTTTCACCATGTTGGCCAGGCTGGTCTCGAACTCCTGACCTCAGGTGAT
CCGCCCGCCTCGGCCTCCCAAAGTGCTGGGATTACAGGCGTGAGCCACCGCGCCCGGCCT
TTTTGAGACGGAGTCTCGCTCTGTCGCCCAGGCTGGAGTGCAGTGGCGCGATCTCGGCTC
ACTGCAACCTCCGCCTCCCGGGTTCAAGCGATTCTCCTGCCTCAGCCTCCCGAGTAGCTG
GGATTACAGGCGCGCGCCACCACGCCCGGCTAATTTTTGTATTTTTAGTAGAGACGGGGT
TTCACCATGTTGGCCAGGCTGGTCTCGAACTCCTGACCTCAGGTGATCCGCCCGCCTCGG
CCTCCCAAAGTGCTGGGATTACAGGCGTGAGCCACCGCGCCCGGCCTTTTTGAGACGGAG
TCTCGCTCTGTCGCCCAGGCTGGAGTGCAGTGGCGCGATCTCGGCTCACTGCAACCTCCG
CCTCCCGGGTTCAAGCGATTCTCCTGCCTCAGCCTCCCGAGTAGCTGGGATTACAGGCGC
GCGCCACCACGCCCGGCTAATTTTTGTATTTTTAGTAGAGACGGGGTTTCACCATGTTGG
CCAGGCTGGTCTCGAACTCCTGACCTCAGGTGATCCGCCCGCCTCGGCCTCCCAAAGTGC
TGGGATTACAGGCGTGAGCCACCGCGCCCGGCCTTTTTGAGACGGAGTCTCGCTCTGTCG
CCCAGGCTGGAGTGCAGTGGCGCGATCTCGGCTCACTGCAACCTCCGCCTCCCGGGTTCA
AGCGATTCTCCTGCCTCAGCCTCCCGAGTAGCTGGGATTACAGGCGCGCGCCACCACGCC
CGGCTAATTTTTGTATTTTTAGTAGAGACGGGGTTTCACCATGTTGGCCAGGCTGGTCTC
GAACTCCTGACCTCAGGTGATCCGCCCGCCTCGGCCTCCCAAAGTGCTGGGATTACAGGC
GTGAGCCACCGCGCCCGGCC
>TWO IUB ambiguity codes
TAGGDHACHATCRGTRGVTGAGWTATGYTGCTGTCABACDWVTRTAAGAVVAGATTTNDA
GASMTCTGCATBYTTCAAKTTACMTATTACTTCATARGGYACMRTGTTTTYTATACVAAT
TTCTAKGDACKADACTATATNTANTCGTTCACGBCGYSCBHTANGGTGATCGTAAAGTAA
CTATBAAAAGATSTGWATBCSGAKHTTABBAACGTSYCATGCAAVATKTSKTASCGGAAT
WVATTTNTCCTTCTTCTTDDAGTGGTTGGATACVGTTAYMTMTBTACTTTHAGCTAGBAA
AAGAGKAAGTTRATWATCAGATTMDDTTTAAAVAAATATTKTCYTAAATTVCNKTTRACG
ADTATATTTATGATSADSCAATAWAGCGRTAGTGTAAGTGACVGRADYGTGCTACHVSDT
CTVCARCSYTTAATATARAAAATTTAATTTACDAATTGBACAGTAYAABATBTGCAGBVG
TGATGGDCAAAATBNMSTTABKATTGGSTCCTAGBTTACTTGTTTAGTTTATHCGATSTA
AAGTCGAKAAASTGTTTTAWAKCAGATATACTTTTMTTTTGBATAGAGGAGCMATGATRA
AAGGNCAYDCCDDGAAAGTHGBTAATCKYTBTACBGTBCTTTTTGDTAASSWTAAWAARA
TTGGCTAAGWGRADTYACATAGCTCBTAGATAWAGCAATNGTATMATGTTKMMAGTAWTC
CCNTSGAAWATWCAAAAMACTGAADNTYGATNAATCCGAYWNCTAACGTTAGAGDTTTTC
ATCTGGKRTAVGAABVCTGWGBTCTDVGKATTBTCTAAGGVADAAAVWTCTAGGGGAGGG
TTAGAACAATTAAHTAATNAAATGCATKATCTAAYRTDTCAGSAYTTYHGATRTTWAVTA
BGNTCDACAGBCCRCAGWCRTCABTGMMAWGMCTCAACCGATRTGBCAVAATCGTDWDAA
CAYAWAATWCTGGTAHCCCTAAGATAACSCTTAGTGSAACAWTBGTCDTTDGACWDBAAC
HTTTNGSKTYYAAYGGATNTGATTTAARTTAMBAATCTAAGTBTCATYTAACTTADTGTT
TCGATACGAAHGGCYATATACCWDTKYATDCSHTDTCAAAATGTGBACTGSCCVGATGTA
TCMMAGCCTTDAAABAATGAAGAGTAACTHATMGVTTAATAACCCGGTTVSANTGCAATT
GTGAGATTTAMGTTTAMAAYGCTGACAYAAAAAGGCACAMYTAAGVGGCTGGAABVTACG
GATTSTYGTBVAKTATWACCGTGTKAGTDTGTATGTTTAAAGGAAAAAGTAACATARAAA
GGTYCAMNYAAABTATAGNTSATANAGTCATCCTATWADKAACTRGTMSACDGTATSAYT
AAHSHGTAABYGACTYTATADTGSTATAGAGAAATCGNTAAAGGAAATCAGTTGTNCYMV
TNACDRTATBNATATASTAGAAMSCGGGANRCKKMCAAACATTNAGTCTRMAATBMTACC
CGTACTTCTBGDSYAATWGAAAATGACADDCHAKAAAYATATTKTTTTCACANACWAGAA
AKATCCTTATTAYKHKCTAAACARTATTTTDATBTVWCYGCAATACTAGGKAAASTTDGA
MGGCHTTHAATVCAHDRYAGGRCTATACGTCMAGAGAGCTBTHGNACARTCCBDCTAAGA
GCGGCTTTARTAAAGAATCCNAGTAWBTGACTTGAATTACWTVACAGAAABCAATNAAAC
CGTNTRANTTGAYCMAWBADTANABRGGTKTHTWTAGTTVCTMBKTAGMTVKCCAGCANT
TVAGSWTTAGCCGCRHTTTCCTTHNTATTAAGAAGAATAGGMTRAARTCTABGTACDTTT
TATAAVDHAHTATAGATCCTAGTAAGYTWATDWCATGAGGGATAGTAAMDMNGBASTWAM
TSTATRBAYDABATGTATATYCGCACTGTTTTAACMCWBTATAWAGTATBTSTATVTTAR
CCTMTTAAKADATCAACTAATYTSVTAKGDATTATGCKTCAYCAKAATACTTKAANGAGT
ATTSDAGATCGGAAATACTTAAYAAVGTATMCGCTTGTGTDCTAATYTATTTTATTTWAA
CAGWRCTATGTAGMTGTTTGTTYKTNGTTKTCAGAACNTRACCTACKTGSRATGTGGGGG
CTGTCATTAAGTAAATNGSTTABCCCCTCGCAGCTCWHTCGCGAAGCAVATGCKACGHCA
ACAKTTAATAACASAAADATTWNYTGTAATTGTTCGTMHACHTWATGTGCWTTTTGAAHY
ACTTTGTAYAMSAAACTTAADAAATATAGTABMATATYAATGSGGTAGTTTGTGTBYGGT
TWSGSVGWMATTDMTCCWWCABTCSVACAGBAATGTTKATBGTCAATAATCTTCTTAAAC
ARVAATHAGYBWCTRWCABGTWWAATCTAAGTCASTAAAKTAAGVKBAATTBGABACGTA
AGGTTAAATAAAAACTRMDTWBCTTTTTAATAAAAGATMGCCTACKAKNTBAGYRASTGT
ASSTCGTHCGAAKTTATTATATTYTTTGTAGAACATGTCAAAACTWTWTHGKTCCYAATA
AAGTGGAYTMCYTAARCSTAAATWAKTGAATTTRAGTCTSSATACGACWAKAASATDAAA
TGYYACTSAACAAHAKTSHYARGASTATTATTHAGGYGGASTTTBGAKGATSANAACACD
TRGSTTRAAAAAAAACAAGARTCVTAGTAAGATAWATGVHAAKATWGAAAAGTYAHVTAC
TCTGRTGTCAWGATRVAAKTCGCAAVCGASWGGTTRTCSAMCCTAACASGWKKAWDAATG
ACRCBACTATGTGTCTTCAAAHGSCTATATTTCGTVWAGAAGTAYCKGARAKSGKAGTAN
TTTCYACATWATGTCTAAAADMDTWCAATSTKDACAMAADADBSAAATAGGCTHAHAGTA
CGACVGAATTATAAAGAHCCVAYHGHTTTACATSTTTATGNCCMTAGCATATGATAVAAG
>THREE Homo sapiens frequency
ATATTTATCTTTTCACTTCCTACATTGGTCAGACCATTATTCGACACGTGGCGTCATTTT
GTCATACCGGGTAATGTTGGAAACAAAACGTACTGATAAAATACTGAGTTGTAAACTCTA
ATCAGATAACGCGCTTGGATATTAAGATTCACACAGGGGTTTCGGCTGTAAAAAAACTTG
TGGAGCTGTTCTGGGACAGATAAGTTGTACCTCGTACTTAGCTAATTAATGAACCAACTG
ATTACGATAGAACAATTCTGAGGCCGCCAGGACAGCCAAATTTTAATCTTATAAAGCTGG
AAACAGCCGGTATTAGCTTCTCGCATACTTTGCCTGCATTGGTACCTTACAGATATCAGC
GTAGTCATATACACCTCGGTCTCAGCTAAGCTTGTATCTCTTAGAGTAGTTCAAAGATAG
TGGACAATACCTGTGGAATCGATTGCAGATATGGATTTATTTAACTACTGAGTCTCATTC
ACAAGCTAAGCAAGGAGCACGTTTTGGTGCCGGCATACCGATTTGCTATCATGTCAGCAA
ATTTGCGTTGTATTCCTAGTTGCACCCATTAAGGCCACACTCCGAACCTAATTATTACAT
CGCAAAGACATGTACGAAGGACCCGATGTCGAATAGAAGGGAGGACTGTTCATTGGAAGC
TAGACCAGAGGAATCGCAAAGATGCAACTCTTACAATAAAAATCTAATTTCAGTCAACAC
GCAATTTCTATAAGGTTTCCGATAATAATGAACCGTCTTCCACAGGGGAATTTGCCATGC
TCGTAAAAGTAGTTAATCCAAGTAGAAGAAATTTTGATAATGTTTTAAGTTGGCACGAAG
GAATTCAGAGAGATCTTACCTAACAAAGGCATTAGTAGATGTTCCTTGGTTCACACTCGG
TCAATCAGAGCACATACTACGGGCGATACCGGGAATGACACAACATCAATGAGATTGTTA
AGTGAGGTAATTGACTTTAGAGGACTCGATCAGTATACTGTCACTATGAACATCGTATTA
ATTGTTATCCGATATATACACCACCGATTTGCTTGTGCAAGGTTACAGACCCATTCGATA
AATACAAACACGGAGCGATATTATTTAAGGAGTGCTGTCTTCAAAAGAATTATTCCCACA
CCGACATAAGAACTTCGCTCCGTCATTCCAGATTTAAATAACATAACGTAACGCTTTGCT
GATAACATAACATAACCGAGAATTTGCTTAGGAAATTTGGAGCAATATTGCATTGTTTCT
CAGTCATCACAAGGCCCGCCAAAGAACTCTGAGAATCAGGATTCAACATGATTGGTAAGA
CTCTATATATATAACTTAATTCTTGTGTCCGGAGATAGAAAGAGGACGAGAGATACTACG
AAAGAAAGTGTACTTCGATGTATCAATTCAGACGCCTTCTCTATCATCAACATTATAGGT
CTCGTATATGCTCGGCGCGATCTGCTTCTCTCCGCCAATAGCCCCATAGTGTATTTCAAG
CGCAGTAACAGTGAAATCGTTACGAAGGTAGGGATGTTGCTTATAATTGTCGTAACTTAT
CGCTTATGTATCTTTCAAGAATGAACGGCAGCATATACATACGTTCTACCTTTAGCTACA
AAGCATCCATATACTCCCTCTCATGATTGAAACTCTTCCCTATTTTGTAGCCAATAGTGA
AAGCGTATTAGTATAAATTCGTCGGTTTTTCACTCGCAACTGTTATACTCTGCAAACAAA
CGAAAGCCTCATAGTACAAACCTAAAGCTACATACTTCATCATTGGCAGACCAGTGGCGG
TATTTCTACGGAAGCATCACTATAGATATAAAGTTTCCCTTCATGTACGTCTGTTAACCA
TATCACAAGAAACTGCTATCTCTGTCACGTAACAATTCACGCGCCTTATCGCCAAATGTT
CATATATGCGCGGTATACGTATGAACGAATACTAATTAGTATAACGGAGGATTCACGGGA
GGGATACTTGGGGCATTTATAAATCGTCTAAAAATTTTCTATCAGCACTTGCGGGTTATA
GTGGATTACTAGGCAACATAATATTCTGTATTGGTCCAAATGACGCTATAGATAAATTAG
CAAAATACATTGTTTCCATTTATGTAAGTCGAAACTCCAGGACTCCCGGGAACCAGTTAA
ACCGTCTGGAAAAGACACATTGTGAGCGGGACTTCAATGATAGCTTTCAATGAGCTTCTC
ATGCTTGGGGTCTGTACATATATGTTGGCGAAATTATCGTCTGTATTCTGTTATGCTTTG
ATCATGGGTTATTAGTATAGTGTCCGGTTAAGTACCAATACCGCTAGAGACCCGACCTAA
GTCGATAACTAACGATCATCGACGTAAGGATCGTCTCGATCAGTACTTCAGTCTAGATCT
GGGAATAGTAACTCGTTAGTGAACTATGTCGTGTCATAACTCTAAAATGCAATCAAATCT
TATTATTGAGTATTGATTATATAAAGCATCCGCTTAGCTTTACCCTCAAATGTTATATGC
AATTTAAAGCGCTTGATATCGTCTACTCAAGTTCAGGTTTCACATGGCCGCAACGTGACG
TTATTAGAGGTGGGTCATCATCTCTGAGGCTAGTGATGTTGAATACTCATTGAATGGGAA
GTGGAATACCATGCTCGTAGGTAACAGCATGACCTATAAAATATACTATGGGTGTGTGGT
AGATCAATATTGTTCAAGCATATCGTAACAATAACGGCTGAAATGTTACTGACATGAAAG
AGGGAGTCCAAACCATTCTAACAGCTGATCAAGTCGTCTAAAAACGCCTGGTTCAGCCTT
AAGAGTTATAAGCCAGACAAATTGTATCAATAGAGAATCCGTAAATTCCTCGGCCAACCT
CTTGCAAAGACATCACTATCAATATACTACCGTGATCTTAATTAGTGAACTTATATAAAT
ATCTACAACCAGATTCAACGGAAAAGCTTTAGTGGATTAGAAATTGCCAAGAATCACATT
CATGTGGGTTCGAATGCTTTAGTAATACCATTTCGCCGAGTAGTCACTTCGCTGAACTGT
CGTAAATTGCTATGACATAATCGAAAAGGATTGTCAAGAGTCGATTACTGCGGACTAATA
ATCCCCACGGGGGTGGTCTCATGTCTCCCCAGGCGAGTGGGGACGGTTGATAAACACGCT
GCATCGCGGACTGATGTTCCCAGTATTACATAGTCACATTGGATTGCGAGTAGTCTACCT
ATTTATGAGCGAGAGATGCCTCTAACTACTTCGACTTTTAAAACCTTTCCACGCCAGTAT
TCGGCGAAAGGGAAGTATTAAGGGTTGTCATAATTAAGCTGATACCACTTCAGACTTTGC
TCTACTTCTGTCTTTCATTGGTTTAGTAAAGTCTGTCCATTCGTCGAGACCGTCTTTTGC
AGCCTCATTCTACCAACTGCTCCGACTCTTAGTCTGCTTCTCCCAGCGTTATAACAAGAG
GCATTTTGTCATCCTTAAAACAATAATAAAGAACTCGGAGCACTGATATAATGACTGAAT
TAGAACCGCTTAAAAATACAACGAATAGATAAGACTATCGGATAAGATCTAATATGTAGT
GATTAAGCCCTTTATTAATTAATAATAGTTACCCTTTCTGATGTAACGCGACATATTACG
ATTTAGTGGCACGTCTGAATTGCAAAGCAGATCTCTACCCGATTTTTATTATAAATCCCG
TATACATCTTGACTTGAGTAATTGTTCATCTTTTTATATCTCTTCGTACTACAAATAATT
AATATCTCAACCCGTATTGTGTGATTCTAATTACCAACAGAATACGAGGAGGTTTTTGCT
TAGGGCCATATATAATGAATCTATCTCGTTTATTCGCGGAACCCGAGATAACATTACGAT
GTAACTATTTTAGAGAACTTAATACAAGAAACATTGCTGATTACTCATAACTAAATGCTT
GGTAATATATCCTCAGTGCCCCTACCATCTTTTACGCAGGGATGTAATTACTTAGGATTC
ATTGTGTAAGAATTACAATGAACGATGGATATGAAGGCATGTTGCGAGGTGTTCCTTGGT
ATGTGAAGTTCGCAGGGCAACAAAAATTTCGCAGAATAGGCCTCAAAGTATTGGTAAAGA
AGACAACTAATCATCACGAGCTTCTGATATCAATACGAACGAGTCCTGTGATGGATGAAA
GAAAGTCGTATCGAAAATGTCAAGAGTCTGCCCAATGTAACTTACTTCAAAAAATAACGC
TTCCGCCAAGTACGTTCGAATAAACGTAATTTTAAAAATACATAAGGGGTGTTAGAAAGT
AAGCGACGGGATATAAGTTAGACTCAAGATTCCGCCGTAAAACGAGACTGATTCCGAAGA
TTGTTCGTGGATCTGGTCATGACTTTCACTGAGTAAGGAGTTTCGACATATGTCAATAAA
CACAAAAATAGAAGCTATTCGATCTGAAAAATATTAGGACAAGAAACTATCTCACGCTAG
CCCAGAATATTCACTCACCCACGGGCGATACTAAAGCACTATATAGTCGCGTGATTACTA
TACATATGGTACACATAAGAATCACGATCAGGTTCTCAATTTTCAACAATATATGTTTAT
TTGCATAGGTAATATTAGGCCTTTAAGAGAAGGATGGGTGAGATACTCCGGGGATGGCGG
CAATAAAGAAAAACACGATATGAGTAATAGGATCCTAATATCTTGGCGAGAGACTTAAGG
TACGAATTTTGCGCAATCTATTTTTTACTTGGCCAGAATTCATGTATGGTATAAGTACGA
ACTTTTTTGATCACTTTCATGGCTACCTGATTAGGATAGTTTGAGGAATTTCCCAAATAT
ACCGATTTAATATACACTAGGGCTTGTCACTTTGAGTCAGAAAAAGAATATAATTACTTA
GGGTAATGCTGCATACATATTCTTATATTGCAAAGGTTCTCTGGGTAATCTTGAGCCTTC
ACGATACCTGGTGAAGTGTT

View File

@ -8,7 +8,8 @@ crate-type = ["cdylib"]
[dependencies]
tiny-keccak = "1.4.2"
rlibc = "1.0"
regex = "0.2.10"
lazy_static = "1.0"
[profile.release]
panic = "abort"

View File

@ -1,21 +1,13 @@
#![no_std]
#![feature(lang_items)]
#![feature(core_intrinsics)]
#![feature(panic_implementation)]
extern crate rlibc;
extern crate tiny_keccak;
extern crate regex;
#[macro_use]
extern crate lazy_static;
use std::mem::ManuallyDrop;
use tiny_keccak::Keccak;
#[no_mangle]
#[panic_implementation]
pub fn panic_fmt(_info: &::core::panic::PanicInfo) -> ! {
use core::intrinsics;
unsafe {
intrinsics::abort();
}
}
mod rev_complement;
mod regex_redux;
pub struct TinyKeccakTestData {
data: &'static [u8],
@ -48,3 +40,84 @@ pub extern "C" fn bench_tiny_keccak(test_data: *const TinyKeccakTestData) {
keccak.finalize((*test_data).result);
}
}
pub struct RevComplementTestData {
input: ManuallyDrop<Box<[u8]>>,
output: ManuallyDrop<Box<[u8]>>,
}
#[no_mangle]
pub extern "C" fn prepare_rev_complement(size: usize) -> *mut RevComplementTestData {
let input = vec![0; size];
let output = vec![0; size];
let test_data = Box::new(
RevComplementTestData {
input: ManuallyDrop::new(input.into_boxed_slice()),
output: ManuallyDrop::new(output.into_boxed_slice()),
}
);
// Basically leak the pointer to the test data. This shouldn't be harmful since `prepare` is called
// only once per bench run (not for the iteration), and afterwards whole memory instance is discarded.
Box::into_raw(test_data)
}
#[no_mangle]
pub extern "C" fn rev_complement_input_ptr(test_data: *mut RevComplementTestData) -> *mut u8 {
unsafe {
(*test_data).input.as_mut_ptr()
}
}
#[no_mangle]
pub extern "C" fn rev_complement_output_ptr(test_data: *mut RevComplementTestData) -> *const u8 {
unsafe {
(*test_data).output.as_ptr()
}
}
#[no_mangle]
pub extern "C" fn bench_rev_complement(test_data: *mut RevComplementTestData) {
unsafe {
let result = rev_complement::run(&*(*test_data).input);
(*test_data).output.copy_from_slice(&result);
}
}
pub struct RegexReduxTestData {
input: ManuallyDrop<Box<[u8]>>,
output: Option<usize>,
}
#[no_mangle]
pub extern "C" fn prepare_regex_redux(size: usize) -> *mut RegexReduxTestData {
regex_redux::prepare();
let input = vec![0; size];
let test_data = Box::new(
RegexReduxTestData {
input: ManuallyDrop::new(input.into_boxed_slice()),
output: None,
}
);
// Basically leak the pointer to the test data. This shouldn't be harmful since `prepare` is called
// only once per bench run (not for the iteration), and afterwards whole memory instance is discarded.
Box::into_raw(test_data)
}
#[no_mangle]
pub extern "C" fn regex_redux_input_ptr(test_data: *mut RegexReduxTestData) -> *mut u8 {
unsafe {
(*test_data).input.as_mut_ptr()
}
}
#[no_mangle]
pub extern "C" fn bench_regex_redux(test_data: *mut RegexReduxTestData) {
unsafe {
let result = regex_redux::run(&*(*test_data).input);
(*test_data).output = Some(result);
}
}

View File

@ -0,0 +1,17 @@
//! Initially it supposed to be like [1]. However it turned out
//! that executing this code in wasmi way too slow.
//!
//! [1]: https://benchmarksgame-team.pages.debian.net/benchmarksgame/program/regexredux-rust-2.html
lazy_static! {
static ref REGEX: ::regex::bytes::Regex =
{ ::regex::bytes::Regex::new("agggtaa[cgt]|[acg]ttaccct").unwrap() };
}
pub fn prepare() {
::lazy_static::initialize(&REGEX);
}
pub fn run(seq: &[u8]) -> usize {
REGEX.find_iter(seq).count()
}

View File

@ -0,0 +1,164 @@
// Adapted version from benchmarks game. In particular
// rayon is removed.
//
// https://benchmarksgame-team.pages.debian.net/benchmarksgame/program/revcomp-rust-3.html
// The Computer Language Benchmarks Game
// https://salsa.debian.org/benchmarksgame-team/benchmarksgame/
//
// contributed by the Rust Project Developers
// contributed by Cristi Cobzarenco
// contributed by TeXitoi
// contributed by Matt Brubeck
use std::io::BufRead;
use std::mem::replace;
use std::{cmp, io};
/// Lookup table to find the complement of a single FASTA code.
fn build_table() -> [u8; 256] {
let mut table = [0; 256];
for (i, x) in table.iter_mut().enumerate() {
*x = match i as u8 as char {
'A' | 'a' => 'T',
'C' | 'c' => 'G',
'G' | 'g' => 'C',
'T' | 't' => 'A',
'U' | 'u' => 'A',
'M' | 'm' => 'K',
'R' | 'r' => 'Y',
'W' | 'w' => 'W',
'S' | 's' => 'S',
'Y' | 'y' => 'R',
'K' | 'k' => 'M',
'V' | 'v' => 'B',
'H' | 'h' => 'D',
'D' | 'd' => 'H',
'B' | 'b' => 'V',
'N' | 'n' => 'N',
i => i,
} as u8;
}
table
}
/// Utilities for splitting chunks off of slices.
trait SplitOff {
fn split_off_left(&mut self, n: usize) -> Self;
fn split_off_right(&mut self, n: usize) -> Self;
}
impl<'a, T> SplitOff for &'a mut [T] {
/// Split the left `n` items from self and return them as a separate slice.
fn split_off_left(&mut self, n: usize) -> Self {
let n = cmp::min(self.len(), n);
let data = replace(self, &mut []);
let (left, data) = data.split_at_mut(n);
*self = data;
left
}
/// Split the right `n` items from self and return them as a separate slice.
fn split_off_right(&mut self, n: usize) -> Self {
let len = self.len();
let n = cmp::min(len, n);
let data = replace(self, &mut []);
let (data, right) = data.split_at_mut(len - n);
*self = data;
right
}
}
/// Length of a normal line including the terminating \n.
const LINE_LEN: usize = 61;
/// Compute the reverse complement for two contiguous chunks without line breaks.
fn reverse_chunks(left: &mut [u8], right: &mut [u8], table: &[u8; 256]) {
for (x, y) in left.iter_mut().zip(right.iter_mut().rev()) {
*y = table[replace(x, table[*y as usize]) as usize];
}
}
/// Compute the reverse complement on chunks from opposite ends of a sequence.
///
/// `left` must start at the beginning of a line. If there are an odd number of
/// bytes, `right` will initially be 1 byte longer than `left`; otherwise they
/// will have equal lengths.
fn reverse_complement_left_right(
mut left: &mut [u8],
mut right: &mut [u8],
trailing_len: usize,
table: &[u8; 256],
) {
// Each iteration swaps one line from the start of the sequence with one
// from the end.
while left.len() > 0 || right.len() > 0 {
// Get the chunk up to the newline in `right`.
let mut a = left.split_off_left(trailing_len);
let mut b = right.split_off_right(trailing_len);
right.split_off_right(1); // Skip the newline in `right`.
// If we've reached the middle of the sequence here and there is an
// odd number of bytes remaining, the odd one will be on the right.
if b.len() > a.len() {
let mid = b.split_off_left(1);
mid[0] = table[mid[0] as usize];
}
reverse_chunks(a, b, table);
// Get the chunk up to the newline in `left`.
let n = LINE_LEN - 1 - trailing_len;
a = left.split_off_left(n);
b = right.split_off_right(n);
left.split_off_left(1); // Skip the newline in `left`.
// If we've reached the middle of the sequence and there is an odd
// number of bytes remaining, the odd one will now be on the left.
if a.len() > b.len() {
let mid = a.split_off_right(1);
mid[0] = table[mid[0] as usize]
}
reverse_chunks(a, b, table);
}
}
/// Compute the reverse complement of one sequence.
fn reverse_complement(seq: &mut [u8], table: &[u8; 256]) {
let len = seq.len() - 1;
let seq = &mut seq[..len]; // Drop the last newline
let trailing_len = len % LINE_LEN;
let (left, right) = seq.split_at_mut(len / 2);
reverse_complement_left_right(left, right, trailing_len, table);
}
/// Read sequences from stdin and print the reverse complement to stdout.
pub fn run(input: &[u8]) -> Vec<u8> {
let mut buf = Vec::with_capacity(input.len());
let mut input = io::Cursor::new(input);
// Read the first header line.
input.read_until(b'\n', &mut buf).unwrap();
// Read sequence data line-by-line, splitting on headers.
let mut line_start = buf.len();
let mut seq_start = line_start;
let mut seqs = vec![];
while input.read_until(b'\n', &mut buf).unwrap() > 0 {
if buf[line_start] == b'>' {
// Found the start of a new sequence.
seqs.push(seq_start..line_start);
seq_start = buf.len();
}
line_start = buf.len();
}
seqs.push(seq_start..buf.len());
// Compute the reverse complements of each sequence.
let table = build_table();
for seq in seqs {
reverse_complement(&mut buf[seq], &table);
}
buf
}