Add some more benches (#97)
* Add rev_complement test # Conflicts: # benches/src/lib.rs # benches/wasm-kernel/src/lib.rs * Add redux_regex test. # Conflicts: # benches/wasm-kernel/Cargo.toml * Fmt and return an original header * Use ManuallyDrop * Really initialize lazy static.
This commit is contained in:
parent
f305b3cd1f
commit
94b797de44
|
@ -6,3 +6,4 @@ authors = ["Sergey Pepyakin <s.pepyakin@gmail.com>"]
|
|||
[dependencies]
|
||||
wasmi = { path = ".." }
|
||||
assert_matches = "1.2"
|
||||
wabt = "0.3"
|
||||
|
|
|
@ -4,6 +4,7 @@ extern crate test;
|
|||
extern crate wasmi;
|
||||
#[macro_use]
|
||||
extern crate assert_matches;
|
||||
extern crate wabt;
|
||||
|
||||
use std::error;
|
||||
use std::fs::File;
|
||||
|
@ -20,6 +21,9 @@ fn load_from_file(filename: &str) -> Result<Module, Box<error::Error>> {
|
|||
Ok(Module::from_buffer(buf)?)
|
||||
}
|
||||
|
||||
const REVCOMP_INPUT: &'static [u8] = include_bytes!("./revcomp-input.txt");
|
||||
const REVCOMP_OUTPUT: &'static [u8] = include_bytes!("./revcomp-output.txt");
|
||||
|
||||
#[bench]
|
||||
fn bench_tiny_keccak(b: &mut Bencher) {
|
||||
let wasm_kernel = load_from_file(
|
||||
|
@ -41,3 +45,234 @@ fn bench_tiny_keccak(b: &mut Bencher) {
|
|||
.unwrap();
|
||||
});
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn bench_rev_comp(b: &mut Bencher) {
|
||||
let wasm_kernel = load_from_file(
|
||||
"./wasm-kernel/target/wasm32-unknown-unknown/release/wasm_kernel.wasm",
|
||||
).expect("failed to load wasm_kernel. Is `build.rs` broken?");
|
||||
|
||||
let instance = ModuleInstance::new(&wasm_kernel, &ImportsBuilder::default())
|
||||
.expect("failed to instantiate wasm module")
|
||||
.assert_no_start();
|
||||
|
||||
// Allocate buffers for the input and output.
|
||||
let test_data_ptr: RuntimeValue = {
|
||||
let input_size = RuntimeValue::I32(REVCOMP_INPUT.len() as i32);
|
||||
assert_matches!(
|
||||
instance.invoke_export("prepare_rev_complement", &[input_size], &mut NopExternals),
|
||||
Ok(Some(v @ RuntimeValue::I32(_))) => v,
|
||||
"",
|
||||
)
|
||||
};
|
||||
|
||||
// Get the pointer to the input buffer.
|
||||
let input_data_mem_offset = assert_matches!(
|
||||
instance.invoke_export("rev_complement_input_ptr", &[test_data_ptr], &mut NopExternals),
|
||||
Ok(Some(RuntimeValue::I32(v))) => v as u32,
|
||||
"",
|
||||
);
|
||||
|
||||
// Copy test data inside the wasm memory.
|
||||
let memory = instance.export_by_name("memory")
|
||||
.expect("Expected export with a name 'memory'")
|
||||
.as_memory()
|
||||
.expect("'memory' should be a memory instance")
|
||||
.clone();
|
||||
memory
|
||||
.set(input_data_mem_offset, REVCOMP_INPUT)
|
||||
.expect("can't load test data into a wasm memory");
|
||||
|
||||
b.iter(|| {
|
||||
instance
|
||||
.invoke_export("bench_rev_complement", &[test_data_ptr], &mut NopExternals)
|
||||
.unwrap();
|
||||
});
|
||||
|
||||
// Verify the result.
|
||||
let output_data_mem_offset = assert_matches!(
|
||||
instance.invoke_export("rev_complement_output_ptr", &[test_data_ptr], &mut NopExternals),
|
||||
Ok(Some(RuntimeValue::I32(v))) => v as u32,
|
||||
"",
|
||||
);
|
||||
let result = memory
|
||||
.get(output_data_mem_offset, REVCOMP_OUTPUT.len())
|
||||
.expect("can't get result data from a wasm memory");
|
||||
assert_eq!(&*result, REVCOMP_OUTPUT);
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn bench_regex_redux(b: &mut Bencher) {
|
||||
let wasm_kernel = load_from_file(
|
||||
"./wasm-kernel/target/wasm32-unknown-unknown/release/wasm_kernel.wasm",
|
||||
).expect("failed to load wasm_kernel. Is `build.rs` broken?");
|
||||
|
||||
let instance = ModuleInstance::new(&wasm_kernel, &ImportsBuilder::default())
|
||||
.expect("failed to instantiate wasm module")
|
||||
.assert_no_start();
|
||||
|
||||
// Allocate buffers for the input and output.
|
||||
let test_data_ptr: RuntimeValue = {
|
||||
let input_size = RuntimeValue::I32(REVCOMP_INPUT.len() as i32);
|
||||
assert_matches!(
|
||||
instance.invoke_export("prepare_regex_redux", &[input_size], &mut NopExternals),
|
||||
Ok(Some(v @ RuntimeValue::I32(_))) => v,
|
||||
"",
|
||||
)
|
||||
};
|
||||
|
||||
// Get the pointer to the input buffer.
|
||||
let input_data_mem_offset = assert_matches!(
|
||||
instance.invoke_export("regex_redux_input_ptr", &[test_data_ptr], &mut NopExternals),
|
||||
Ok(Some(RuntimeValue::I32(v))) => v as u32,
|
||||
"",
|
||||
);
|
||||
|
||||
// Copy test data inside the wasm memory.
|
||||
let memory = instance.export_by_name("memory")
|
||||
.expect("Expected export with a name 'memory'")
|
||||
.as_memory()
|
||||
.expect("'memory' should be a memory instance")
|
||||
.clone();
|
||||
memory
|
||||
.set(input_data_mem_offset, REVCOMP_INPUT)
|
||||
.expect("can't load test data into a wasm memory");
|
||||
|
||||
b.iter(|| {
|
||||
instance
|
||||
.invoke_export("bench_regex_redux", &[test_data_ptr], &mut NopExternals)
|
||||
.unwrap();
|
||||
});
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn fac_recursive(b: &mut Bencher) {
|
||||
let wasm = wabt::wat2wasm(
|
||||
r#"
|
||||
;; Recursive factorial
|
||||
(func (export "fac-rec") (param i64) (result i64)
|
||||
(if (result i64) (i64.eq (get_local 0) (i64.const 0))
|
||||
(then (i64.const 1))
|
||||
(else
|
||||
(i64.mul (get_local 0) (call 0 (i64.sub (get_local 0) (i64.const 1))))
|
||||
)
|
||||
)
|
||||
)
|
||||
"#
|
||||
).unwrap();
|
||||
|
||||
let module = Module::from_buffer(&wasm).unwrap();
|
||||
|
||||
let instance = ModuleInstance::new(&module, &ImportsBuilder::default())
|
||||
.expect("failed to instantiate wasm module")
|
||||
.assert_no_start();
|
||||
|
||||
b.iter(|| {
|
||||
let value = instance
|
||||
.invoke_export("fac-rec", &[RuntimeValue::I64(25)], &mut NopExternals);
|
||||
assert_matches!(value, Ok(Some(RuntimeValue::I64(7034535277573963776))));
|
||||
});
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn fac_opt(b: &mut Bencher) {
|
||||
let wasm = wabt::wat2wasm(
|
||||
r#"
|
||||
;; Optimized factorial.
|
||||
(func (export "fac-opt") (param i64) (result i64)
|
||||
(local i64)
|
||||
(set_local 1 (i64.const 1))
|
||||
(block
|
||||
(br_if 0 (i64.lt_s (get_local 0) (i64.const 2)))
|
||||
(loop
|
||||
(set_local 1 (i64.mul (get_local 1) (get_local 0)))
|
||||
(set_local 0 (i64.add (get_local 0) (i64.const -1)))
|
||||
(br_if 0 (i64.gt_s (get_local 0) (i64.const 1)))
|
||||
)
|
||||
)
|
||||
(get_local 1)
|
||||
)
|
||||
"#
|
||||
).unwrap();
|
||||
|
||||
let module = Module::from_buffer(&wasm).unwrap();
|
||||
|
||||
let instance = ModuleInstance::new(&module, &ImportsBuilder::default())
|
||||
.expect("failed to instantiate wasm module")
|
||||
.assert_no_start();
|
||||
|
||||
b.iter(|| {
|
||||
let value = instance
|
||||
.invoke_export("fac-opt", &[RuntimeValue::I64(25)], &mut NopExternals);
|
||||
assert_matches!(value, Ok(Some(RuntimeValue::I64(7034535277573963776))));
|
||||
});
|
||||
}
|
||||
|
||||
// This is used for testing overhead of a function call
|
||||
// is not too large.
|
||||
#[bench]
|
||||
fn recursive_ok(b: &mut Bencher) {
|
||||
let wasm = wabt::wat2wasm(
|
||||
r#"
|
||||
(module
|
||||
(func $call (export "call") (param i32) (result i32)
|
||||
block (result i32)
|
||||
get_local 0
|
||||
get_local 0
|
||||
i32.eqz
|
||||
br_if 0
|
||||
|
||||
i32.const 1
|
||||
i32.sub
|
||||
call $call
|
||||
end
|
||||
)
|
||||
)
|
||||
"#
|
||||
).unwrap();
|
||||
let module = Module::from_buffer(&wasm).unwrap();
|
||||
|
||||
let instance = ModuleInstance::new(&module, &ImportsBuilder::default())
|
||||
.expect("failed to instantiate wasm module")
|
||||
.assert_no_start();
|
||||
|
||||
b.iter(|| {
|
||||
let value = instance
|
||||
.invoke_export("call", &[RuntimeValue::I32(8000)], &mut NopExternals);
|
||||
assert_matches!(value, Ok(Some(RuntimeValue::I32(0))));
|
||||
});
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn recursive_trap(b: &mut Bencher) {
|
||||
let wasm = wabt::wat2wasm(
|
||||
r#"
|
||||
(module
|
||||
(func $call (export "call") (param i32) (result i32)
|
||||
block (result i32)
|
||||
get_local 0
|
||||
get_local 0
|
||||
i32.eqz
|
||||
br_if 0
|
||||
|
||||
i32.const 1
|
||||
i32.sub
|
||||
call $call
|
||||
end
|
||||
unreachable
|
||||
)
|
||||
)
|
||||
"#
|
||||
).unwrap();
|
||||
let module = Module::from_buffer(&wasm).unwrap();
|
||||
|
||||
let instance = ModuleInstance::new(&module, &ImportsBuilder::default())
|
||||
.expect("failed to instantiate wasm module")
|
||||
.assert_no_start();
|
||||
|
||||
b.iter(|| {
|
||||
let value = instance
|
||||
.invoke_export("call", &[RuntimeValue::I32(1000)], &mut NopExternals);
|
||||
assert_matches!(value, Err(_));
|
||||
});
|
||||
}
|
||||
|
|
|
@ -0,0 +1,171 @@
|
|||
>ONE Homo sapiens alu
|
||||
GGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGCGGA
|
||||
TCACCTGAGGTCAGGAGTTCGAGACCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACT
|
||||
AAAAATACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCAGCTACTCGGGAG
|
||||
GCTGAGGCAGGAGAATCGCTTGAACCCGGGAGGCGGAGGTTGCAGTGAGCCGAGATCGCG
|
||||
CCACTGCACTCCAGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAAGGCCGGGCGCGGT
|
||||
GGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGCGGATCACCTGAGGTCA
|
||||
GGAGTTCGAGACCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAATACAAAAA
|
||||
TTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCAGCTACTCGGGAGGCTGAGGCAGGAG
|
||||
AATCGCTTGAACCCGGGAGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCCA
|
||||
GCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAAGGCCGGGCGCGGTGGCTCACGCCTGT
|
||||
AATCCCAGCACTTTGGGAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGACC
|
||||
AGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAATACAAAAATTAGCCGGGCGTG
|
||||
GTGGCGCGCGCCTGTAATCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACC
|
||||
CGGGAGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCCAGCCTGGGCGACAG
|
||||
AGCGAGACTCCGTCTCAAAAAGGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTT
|
||||
TGGGAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGACCAGCCTGGCCAACA
|
||||
TGGTGAAACCCCGTCTCTACTAAAAATACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCT
|
||||
GTAATCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGGAGGCGGAGG
|
||||
TTGCAGTGAGCCGAGATCGCGCCACTGCACTCCAGCCTGGGCGACAGAGCGAGACTCCGT
|
||||
CTCAAAAAGGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGG
|
||||
CGGGCGGATCACCTGAGGTCAGGAGTTCGAGACCAGCCTGGCCAACATGGTGAAACCCCG
|
||||
TCTCTACTAAAAATACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCAGCTA
|
||||
CTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGGAGGCGGAGGTTGCAGTGAGCCG
|
||||
AGATCGCGCCACTGCACTCCAGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAAGGCCG
|
||||
GGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGCGGATCACC
|
||||
TGAGGTCAGGAGTTCGAGACCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAA
|
||||
TACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCAGCTACTCGGGAGGCTGA
|
||||
GGCAGGAGAATCGCTTGAACCCGGGAGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACT
|
||||
GCACTCCAGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAAGGCCGGGCGCGGTGGCTC
|
||||
ACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGT
|
||||
TCGAGACCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAATACAAAAATTAGC
|
||||
CGGGCGTGGTGGCGCGCGCCTGTAATCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATCG
|
||||
CTTGAACCCGGGAGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCCAGCCTG
|
||||
GGCGACAGAGCGAGACTCCG
|
||||
>TWO IUB ambiguity codes
|
||||
cttBtatcatatgctaKggNcataaaSatgtaaaDcDRtBggDtctttataattcBgtcg
|
||||
tactDtDagcctatttSVHtHttKtgtHMaSattgWaHKHttttagacatWatgtRgaaa
|
||||
NtactMcSMtYtcMgRtacttctWBacgaaatatagScDtttgaagacacatagtVgYgt
|
||||
cattHWtMMWcStgttaggKtSgaYaaccWStcgBttgcgaMttBYatcWtgacaYcaga
|
||||
gtaBDtRacttttcWatMttDBcatWtatcttactaBgaYtcttgttttttttYaaScYa
|
||||
HgtgttNtSatcMtcVaaaStccRcctDaataataStcYtRDSaMtDttgttSagtRRca
|
||||
tttHatSttMtWgtcgtatSSagactYaaattcaMtWatttaSgYttaRgKaRtccactt
|
||||
tattRggaMcDaWaWagttttgacatgttctacaaaRaatataataaMttcgDacgaSSt
|
||||
acaStYRctVaNMtMgtaggcKatcttttattaaaaagVWaHKYagtttttatttaacct
|
||||
tacgtVtcVaattVMBcttaMtttaStgacttagattWWacVtgWYagWVRctDattBYt
|
||||
gtttaagaagattattgacVatMaacattVctgtBSgaVtgWWggaKHaatKWcBScSWa
|
||||
accRVacacaaactaccScattRatatKVtactatatttHttaagtttSKtRtacaaagt
|
||||
RDttcaaaaWgcacatWaDgtDKacgaacaattacaRNWaatHtttStgttattaaMtgt
|
||||
tgDcgtMgcatBtgcttcgcgaDWgagctgcgaggggVtaaScNatttacttaatgacag
|
||||
cccccacatYScaMgtaggtYaNgttctgaMaacNaMRaacaaacaKctacatagYWctg
|
||||
ttWaaataaaataRattagHacacaagcgKatacBttRttaagtatttccgatctHSaat
|
||||
actcNttMaagtattMtgRtgaMgcataatHcMtaBSaRattagttgatHtMttaaKagg
|
||||
YtaaBataSaVatactWtataVWgKgttaaaacagtgcgRatatacatVtHRtVYataSa
|
||||
KtWaStVcNKHKttactatccctcatgWHatWaRcttactaggatctataDtDHBttata
|
||||
aaaHgtacVtagaYttYaKcctattcttcttaataNDaaggaaaDYgcggctaaWSctBa
|
||||
aNtgctggMBaKctaMVKagBaactaWaDaMaccYVtNtaHtVWtKgRtcaaNtYaNacg
|
||||
gtttNattgVtttctgtBaWgtaattcaagtcaVWtactNggattctttaYtaaagccgc
|
||||
tcttagHVggaYtgtNcDaVagctctctKgacgtatagYcctRYHDtgBattDaaDgccK
|
||||
tcHaaStttMcctagtattgcRgWBaVatHaaaataYtgtttagMDMRtaataaggatMt
|
||||
ttctWgtNtgtgaaaaMaatatRtttMtDgHHtgtcattttcWattRSHcVagaagtacg
|
||||
ggtaKVattKYagactNaatgtttgKMMgYNtcccgSKttctaStatatNVataYHgtNa
|
||||
BKRgNacaactgatttcctttaNcgatttctctataScaHtataRagtcRVttacDSDtt
|
||||
aRtSatacHgtSKacYagttMHtWataggatgactNtatSaNctataVtttRNKtgRacc
|
||||
tttYtatgttactttttcctttaaacatacaHactMacacggtWataMtBVacRaSaatc
|
||||
cgtaBVttccagccBcttaRKtgtgcctttttRtgtcagcRttKtaaacKtaaatctcac
|
||||
aattgcaNtSBaaccgggttattaaBcKatDagttactcttcattVtttHaaggctKKga
|
||||
tacatcBggScagtVcacattttgaHaDSgHatRMaHWggtatatRgccDttcgtatcga
|
||||
aacaHtaagttaRatgaVacttagattVKtaaYttaaatcaNatccRttRRaMScNaaaD
|
||||
gttVHWgtcHaaHgacVaWtgttScactaagSgttatcttagggDtaccagWattWtRtg
|
||||
ttHWHacgattBtgVcaYatcggttgagKcWtKKcaVtgaYgWctgYggVctgtHgaNcV
|
||||
taBtWaaYatcDRaaRtSctgaHaYRttagatMatgcatttNattaDttaattgttctaa
|
||||
ccctcccctagaWBtttHtBccttagaVaatMcBHagaVcWcagBVttcBtaYMccagat
|
||||
gaaaaHctctaacgttagNWRtcggattNatcRaNHttcagtKttttgWatWttcSaNgg
|
||||
gaWtactKKMaacatKatacNattgctWtatctaVgagctatgtRaHtYcWcttagccaa
|
||||
tYttWttaWSSttaHcaaaaagVacVgtaVaRMgattaVcDactttcHHggHRtgNcctt
|
||||
tYatcatKgctcctctatVcaaaaKaaaagtatatctgMtWtaaaacaStttMtcgactt
|
||||
taSatcgDataaactaaacaagtaaVctaggaSccaatMVtaaSKNVattttgHccatca
|
||||
cBVctgcaVatVttRtactgtVcaattHgtaaattaaattttYtatattaaRSgYtgBag
|
||||
aHSBDgtagcacRHtYcBgtcacttacactaYcgctWtattgSHtSatcataaatataHt
|
||||
cgtYaaMNgBaatttaRgaMaatatttBtttaaaHHKaatctgatWatYaacttMctctt
|
||||
ttVctagctDaaagtaVaKaKRtaacBgtatccaaccactHHaagaagaaggaNaaatBW
|
||||
attccgStaMSaMatBttgcatgRSacgttVVtaaDMtcSgVatWcaSatcttttVatag
|
||||
ttactttacgatcaccNtaDVgSRcgVcgtgaacgaNtaNatatagtHtMgtHcMtagaa
|
||||
attBgtataRaaaacaYKgtRccYtatgaagtaataKgtaaMttgaaRVatgcagaKStc
|
||||
tHNaaatctBBtcttaYaBWHgtVtgacagcaRcataWctcaBcYacYgatDgtDHccta
|
||||
>THREE Homo sapiens frequency
|
||||
aacacttcaccaggtatcgtgaaggctcaagattacccagagaacctttgcaatataaga
|
||||
atatgtatgcagcattaccctaagtaattatattctttttctgactcaaagtgacaagcc
|
||||
ctagtgtatattaaatcggtatatttgggaaattcctcaaactatcctaatcaggtagcc
|
||||
atgaaagtgatcaaaaaagttcgtacttataccatacatgaattctggccaagtaaaaaa
|
||||
tagattgcgcaaaattcgtaccttaagtctctcgccaagatattaggatcctattactca
|
||||
tatcgtgtttttctttattgccgccatccccggagtatctcacccatccttctcttaaag
|
||||
gcctaatattacctatgcaaataaacatatattgttgaaaattgagaacctgatcgtgat
|
||||
tcttatgtgtaccatatgtatagtaatcacgcgactatatagtgctttagtatcgcccgt
|
||||
gggtgagtgaatattctgggctagcgtgagatagtttcttgtcctaatatttttcagatc
|
||||
gaatagcttctatttttgtgtttattgacatatgtcgaaactccttactcagtgaaagtc
|
||||
atgaccagatccacgaacaatcttcggaatcagtctcgttttacggcggaatcttgagtc
|
||||
taacttatatcccgtcgcttactttctaacaccccttatgtatttttaaaattacgttta
|
||||
ttcgaacgtacttggcggaagcgttattttttgaagtaagttacattgggcagactcttg
|
||||
acattttcgatacgactttctttcatccatcacaggactcgttcgtattgatatcagaag
|
||||
ctcgtgatgattagttgtcttctttaccaatactttgaggcctattctgcgaaatttttg
|
||||
ttgccctgcgaacttcacataccaaggaacacctcgcaacatgccttcatatccatcgtt
|
||||
cattgtaattcttacacaatgaatcctaagtaattacatccctgcgtaaaagatggtagg
|
||||
ggcactgaggatatattaccaagcatttagttatgagtaatcagcaatgtttcttgtatt
|
||||
aagttctctaaaatagttacatcgtaatgttatctcgggttccgcgaataaacgagatag
|
||||
attcattatatatggccctaagcaaaaacctcctcgtattctgttggtaattagaatcac
|
||||
acaatacgggttgagatattaattatttgtagtacgaagagatataaaaagatgaacaat
|
||||
tactcaagtcaagatgtatacgggatttataataaaaatcgggtagagatctgctttgca
|
||||
attcagacgtgccactaaatcgtaatatgtcgcgttacatcagaaagggtaactattatt
|
||||
aattaataaagggcttaatcactacatattagatcttatccgatagtcttatctattcgt
|
||||
tgtatttttaagcggttctaattcagtcattatatcagtgctccgagttctttattattg
|
||||
ttttaaggatgacaaaatgcctcttgttataacgctgggagaagcagactaagagtcgga
|
||||
gcagttggtagaatgaggctgcaaaagacggtctcgacgaatggacagactttactaaac
|
||||
caatgaaagacagaagtagagcaaagtctgaagtggtatcagcttaattatgacaaccct
|
||||
taatacttccctttcgccgaatactggcgtggaaaggttttaaaagtcgaagtagttaga
|
||||
ggcatctctcgctcataaataggtagactactcgcaatccaatgtgactatgtaatactg
|
||||
ggaacatcagtccgcgatgcagcgtgtttatcaaccgtccccactcgcctggggagacat
|
||||
gagaccacccccgtggggattattagtccgcagtaatcgactcttgacaatccttttcga
|
||||
ttatgtcatagcaatttacgacagttcagcgaagtgactactcggcgaaatggtattact
|
||||
aaagcattcgaacccacatgaatgtgattcttggcaatttctaatccactaaagcttttc
|
||||
cgttgaatctggttgtagatatttatataagttcactaattaagatcacggtagtatatt
|
||||
gatagtgatgtctttgcaagaggttggccgaggaatttacggattctctattgatacaat
|
||||
ttgtctggcttataactcttaaggctgaaccaggcgtttttagacgacttgatcagctgt
|
||||
tagaatggtttggactccctctttcatgtcagtaacatttcagccgttattgttacgata
|
||||
tgcttgaacaatattgatctaccacacacccatagtatattttataggtcatgctgttac
|
||||
ctacgagcatggtattccacttcccattcaatgagtattcaacatcactagcctcagaga
|
||||
tgatgacccacctctaataacgtcacgttgcggccatgtgaaacctgaacttgagtagac
|
||||
gatatcaagcgctttaaattgcatataacatttgagggtaaagctaagcggatgctttat
|
||||
ataatcaatactcaataataagatttgattgcattttagagttatgacacgacatagttc
|
||||
actaacgagttactattcccagatctagactgaagtactgatcgagacgatccttacgtc
|
||||
gatgatcgttagttatcgacttaggtcgggtctctagcggtattggtacttaaccggaca
|
||||
ctatactaataacccatgatcaaagcataacagaatacagacgataatttcgccaacata
|
||||
tatgtacagaccccaagcatgagaagctcattgaaagctatcattgaagtcccgctcaca
|
||||
atgtgtcttttccagacggtttaactggttcccgggagtcctggagtttcgacttacata
|
||||
aatggaaacaatgtattttgctaatttatctatagcgtcatttggaccaatacagaatat
|
||||
tatgttgcctagtaatccactataacccgcaagtgctgatagaaaatttttagacgattt
|
||||
ataaatgccccaagtatccctcccgtgaatcctccgttatactaattagtattcgttcat
|
||||
acgtataccgcgcatatatgaacatttggcgataaggcgcgtgaattgttacgtgacaga
|
||||
gatagcagtttcttgtgatatggttaacagacgtacatgaagggaaactttatatctata
|
||||
gtgatgcttccgtagaaataccgccactggtctgccaatgatgaagtatgtagctttagg
|
||||
tttgtactatgaggctttcgtttgtttgcagagtataacagttgcgagtgaaaaaccgac
|
||||
gaatttatactaatacgctttcactattggctacaaaatagggaagagtttcaatcatga
|
||||
gagggagtatatggatgctttgtagctaaaggtagaacgtatgtatatgctgccgttcat
|
||||
tcttgaaagatacataagcgataagttacgacaattataagcaacatccctaccttcgta
|
||||
acgatttcactgttactgcgcttgaaatacactatggggctattggcggagagaagcaga
|
||||
tcgcgccgagcatatacgagacctataatgttgatgatagagaaggcgtctgaattgata
|
||||
catcgaagtacactttctttcgtagtatctctcgtcctctttctatctccggacacaaga
|
||||
attaagttatatatatagagtcttaccaatcatgttgaatcctgattctcagagttcttt
|
||||
ggcgggccttgtgatgactgagaaacaatgcaatattgctccaaatttcctaagcaaatt
|
||||
ctcggttatgttatgttatcagcaaagcgttacgttatgttatttaaatctggaatgacg
|
||||
gagcgaagttcttatgtcggtgtgggaataattcttttgaagacagcactccttaaataa
|
||||
tatcgctccgtgtttgtatttatcgaatgggtctgtaaccttgcacaagcaaatcggtgg
|
||||
tgtatatatcggataacaattaatacgatgttcatagtgacagtatactgatcgagtcct
|
||||
ctaaagtcaattacctcacttaacaatctcattgatgttgtgtcattcccggtatcgccc
|
||||
gtagtatgtgctctgattgaccgagtgtgaaccaaggaacatctactaatgcctttgtta
|
||||
ggtaagatctctctgaattccttcgtgccaacttaaaacattatcaaaatttcttctact
|
||||
tggattaactacttttacgagcatggcaaattcccctgtggaagacggttcattattatc
|
||||
ggaaaccttatagaaattgcgtgttgactgaaattagatttttattgtaagagttgcatc
|
||||
tttgcgattcctctggtctagcttccaatgaacagtcctcccttctattcgacatcgggt
|
||||
ccttcgtacatgtctttgcgatgtaataattaggttcggagtgtggccttaatgggtgca
|
||||
actaggaatacaacgcaaatttgctgacatgatagcaaatcggtatgccggcaccaaaac
|
||||
gtgctccttgcttagcttgtgaatgagactcagtagttaaataaatccatatctgcaatc
|
||||
gattccacaggtattgtccactatctttgaactactctaagagatacaagcttagctgag
|
||||
accgaggtgtatatgactacgctgatatctgtaaggtaccaatgcaggcaaagtatgcga
|
||||
gaagctaataccggctgtttccagctttataagattaaaatttggctgtcctggcggcct
|
||||
cagaattgttctatcgtaatcagttggttcattaattagctaagtacgaggtacaactta
|
||||
tctgtcccagaacagctccacaagtttttttacagccgaaacccctgtgtgaatcttaat
|
||||
atccaagcgcgttatctgattagagtttacaactcagtattttatcagtacgttttgttt
|
||||
ccaacattacccggtatgacaaaatgacgccacgtgtcgaataatggtctgaccaatgta
|
||||
ggaagtgaaaagataaatat
|
|
@ -0,0 +1,171 @@
|
|||
>ONE Homo sapiens alu
|
||||
CGGAGTCTCGCTCTGTCGCCCAGGCTGGAGTGCAGTGGCGCGATCTCGGCTCACTGCAAC
|
||||
CTCCGCCTCCCGGGTTCAAGCGATTCTCCTGCCTCAGCCTCCCGAGTAGCTGGGATTACA
|
||||
GGCGCGCGCCACCACGCCCGGCTAATTTTTGTATTTTTAGTAGAGACGGGGTTTCACCAT
|
||||
GTTGGCCAGGCTGGTCTCGAACTCCTGACCTCAGGTGATCCGCCCGCCTCGGCCTCCCAA
|
||||
AGTGCTGGGATTACAGGCGTGAGCCACCGCGCCCGGCCTTTTTGAGACGGAGTCTCGCTC
|
||||
TGTCGCCCAGGCTGGAGTGCAGTGGCGCGATCTCGGCTCACTGCAACCTCCGCCTCCCGG
|
||||
GTTCAAGCGATTCTCCTGCCTCAGCCTCCCGAGTAGCTGGGATTACAGGCGCGCGCCACC
|
||||
ACGCCCGGCTAATTTTTGTATTTTTAGTAGAGACGGGGTTTCACCATGTTGGCCAGGCTG
|
||||
GTCTCGAACTCCTGACCTCAGGTGATCCGCCCGCCTCGGCCTCCCAAAGTGCTGGGATTA
|
||||
CAGGCGTGAGCCACCGCGCCCGGCCTTTTTGAGACGGAGTCTCGCTCTGTCGCCCAGGCT
|
||||
GGAGTGCAGTGGCGCGATCTCGGCTCACTGCAACCTCCGCCTCCCGGGTTCAAGCGATTC
|
||||
TCCTGCCTCAGCCTCCCGAGTAGCTGGGATTACAGGCGCGCGCCACCACGCCCGGCTAAT
|
||||
TTTTGTATTTTTAGTAGAGACGGGGTTTCACCATGTTGGCCAGGCTGGTCTCGAACTCCT
|
||||
GACCTCAGGTGATCCGCCCGCCTCGGCCTCCCAAAGTGCTGGGATTACAGGCGTGAGCCA
|
||||
CCGCGCCCGGCCTTTTTGAGACGGAGTCTCGCTCTGTCGCCCAGGCTGGAGTGCAGTGGC
|
||||
GCGATCTCGGCTCACTGCAACCTCCGCCTCCCGGGTTCAAGCGATTCTCCTGCCTCAGCC
|
||||
TCCCGAGTAGCTGGGATTACAGGCGCGCGCCACCACGCCCGGCTAATTTTTGTATTTTTA
|
||||
GTAGAGACGGGGTTTCACCATGTTGGCCAGGCTGGTCTCGAACTCCTGACCTCAGGTGAT
|
||||
CCGCCCGCCTCGGCCTCCCAAAGTGCTGGGATTACAGGCGTGAGCCACCGCGCCCGGCCT
|
||||
TTTTGAGACGGAGTCTCGCTCTGTCGCCCAGGCTGGAGTGCAGTGGCGCGATCTCGGCTC
|
||||
ACTGCAACCTCCGCCTCCCGGGTTCAAGCGATTCTCCTGCCTCAGCCTCCCGAGTAGCTG
|
||||
GGATTACAGGCGCGCGCCACCACGCCCGGCTAATTTTTGTATTTTTAGTAGAGACGGGGT
|
||||
TTCACCATGTTGGCCAGGCTGGTCTCGAACTCCTGACCTCAGGTGATCCGCCCGCCTCGG
|
||||
CCTCCCAAAGTGCTGGGATTACAGGCGTGAGCCACCGCGCCCGGCCTTTTTGAGACGGAG
|
||||
TCTCGCTCTGTCGCCCAGGCTGGAGTGCAGTGGCGCGATCTCGGCTCACTGCAACCTCCG
|
||||
CCTCCCGGGTTCAAGCGATTCTCCTGCCTCAGCCTCCCGAGTAGCTGGGATTACAGGCGC
|
||||
GCGCCACCACGCCCGGCTAATTTTTGTATTTTTAGTAGAGACGGGGTTTCACCATGTTGG
|
||||
CCAGGCTGGTCTCGAACTCCTGACCTCAGGTGATCCGCCCGCCTCGGCCTCCCAAAGTGC
|
||||
TGGGATTACAGGCGTGAGCCACCGCGCCCGGCCTTTTTGAGACGGAGTCTCGCTCTGTCG
|
||||
CCCAGGCTGGAGTGCAGTGGCGCGATCTCGGCTCACTGCAACCTCCGCCTCCCGGGTTCA
|
||||
AGCGATTCTCCTGCCTCAGCCTCCCGAGTAGCTGGGATTACAGGCGCGCGCCACCACGCC
|
||||
CGGCTAATTTTTGTATTTTTAGTAGAGACGGGGTTTCACCATGTTGGCCAGGCTGGTCTC
|
||||
GAACTCCTGACCTCAGGTGATCCGCCCGCCTCGGCCTCCCAAAGTGCTGGGATTACAGGC
|
||||
GTGAGCCACCGCGCCCGGCC
|
||||
>TWO IUB ambiguity codes
|
||||
TAGGDHACHATCRGTRGVTGAGWTATGYTGCTGTCABACDWVTRTAAGAVVAGATTTNDA
|
||||
GASMTCTGCATBYTTCAAKTTACMTATTACTTCATARGGYACMRTGTTTTYTATACVAAT
|
||||
TTCTAKGDACKADACTATATNTANTCGTTCACGBCGYSCBHTANGGTGATCGTAAAGTAA
|
||||
CTATBAAAAGATSTGWATBCSGAKHTTABBAACGTSYCATGCAAVATKTSKTASCGGAAT
|
||||
WVATTTNTCCTTCTTCTTDDAGTGGTTGGATACVGTTAYMTMTBTACTTTHAGCTAGBAA
|
||||
AAGAGKAAGTTRATWATCAGATTMDDTTTAAAVAAATATTKTCYTAAATTVCNKTTRACG
|
||||
ADTATATTTATGATSADSCAATAWAGCGRTAGTGTAAGTGACVGRADYGTGCTACHVSDT
|
||||
CTVCARCSYTTAATATARAAAATTTAATTTACDAATTGBACAGTAYAABATBTGCAGBVG
|
||||
TGATGGDCAAAATBNMSTTABKATTGGSTCCTAGBTTACTTGTTTAGTTTATHCGATSTA
|
||||
AAGTCGAKAAASTGTTTTAWAKCAGATATACTTTTMTTTTGBATAGAGGAGCMATGATRA
|
||||
AAGGNCAYDCCDDGAAAGTHGBTAATCKYTBTACBGTBCTTTTTGDTAASSWTAAWAARA
|
||||
TTGGCTAAGWGRADTYACATAGCTCBTAGATAWAGCAATNGTATMATGTTKMMAGTAWTC
|
||||
CCNTSGAAWATWCAAAAMACTGAADNTYGATNAATCCGAYWNCTAACGTTAGAGDTTTTC
|
||||
ATCTGGKRTAVGAABVCTGWGBTCTDVGKATTBTCTAAGGVADAAAVWTCTAGGGGAGGG
|
||||
TTAGAACAATTAAHTAATNAAATGCATKATCTAAYRTDTCAGSAYTTYHGATRTTWAVTA
|
||||
BGNTCDACAGBCCRCAGWCRTCABTGMMAWGMCTCAACCGATRTGBCAVAATCGTDWDAA
|
||||
CAYAWAATWCTGGTAHCCCTAAGATAACSCTTAGTGSAACAWTBGTCDTTDGACWDBAAC
|
||||
HTTTNGSKTYYAAYGGATNTGATTTAARTTAMBAATCTAAGTBTCATYTAACTTADTGTT
|
||||
TCGATACGAAHGGCYATATACCWDTKYATDCSHTDTCAAAATGTGBACTGSCCVGATGTA
|
||||
TCMMAGCCTTDAAABAATGAAGAGTAACTHATMGVTTAATAACCCGGTTVSANTGCAATT
|
||||
GTGAGATTTAMGTTTAMAAYGCTGACAYAAAAAGGCACAMYTAAGVGGCTGGAABVTACG
|
||||
GATTSTYGTBVAKTATWACCGTGTKAGTDTGTATGTTTAAAGGAAAAAGTAACATARAAA
|
||||
GGTYCAMNYAAABTATAGNTSATANAGTCATCCTATWADKAACTRGTMSACDGTATSAYT
|
||||
AAHSHGTAABYGACTYTATADTGSTATAGAGAAATCGNTAAAGGAAATCAGTTGTNCYMV
|
||||
TNACDRTATBNATATASTAGAAMSCGGGANRCKKMCAAACATTNAGTCTRMAATBMTACC
|
||||
CGTACTTCTBGDSYAATWGAAAATGACADDCHAKAAAYATATTKTTTTCACANACWAGAA
|
||||
AKATCCTTATTAYKHKCTAAACARTATTTTDATBTVWCYGCAATACTAGGKAAASTTDGA
|
||||
MGGCHTTHAATVCAHDRYAGGRCTATACGTCMAGAGAGCTBTHGNACARTCCBDCTAAGA
|
||||
GCGGCTTTARTAAAGAATCCNAGTAWBTGACTTGAATTACWTVACAGAAABCAATNAAAC
|
||||
CGTNTRANTTGAYCMAWBADTANABRGGTKTHTWTAGTTVCTMBKTAGMTVKCCAGCANT
|
||||
TVAGSWTTAGCCGCRHTTTCCTTHNTATTAAGAAGAATAGGMTRAARTCTABGTACDTTT
|
||||
TATAAVDHAHTATAGATCCTAGTAAGYTWATDWCATGAGGGATAGTAAMDMNGBASTWAM
|
||||
TSTATRBAYDABATGTATATYCGCACTGTTTTAACMCWBTATAWAGTATBTSTATVTTAR
|
||||
CCTMTTAAKADATCAACTAATYTSVTAKGDATTATGCKTCAYCAKAATACTTKAANGAGT
|
||||
ATTSDAGATCGGAAATACTTAAYAAVGTATMCGCTTGTGTDCTAATYTATTTTATTTWAA
|
||||
CAGWRCTATGTAGMTGTTTGTTYKTNGTTKTCAGAACNTRACCTACKTGSRATGTGGGGG
|
||||
CTGTCATTAAGTAAATNGSTTABCCCCTCGCAGCTCWHTCGCGAAGCAVATGCKACGHCA
|
||||
ACAKTTAATAACASAAADATTWNYTGTAATTGTTCGTMHACHTWATGTGCWTTTTGAAHY
|
||||
ACTTTGTAYAMSAAACTTAADAAATATAGTABMATATYAATGSGGTAGTTTGTGTBYGGT
|
||||
TWSGSVGWMATTDMTCCWWCABTCSVACAGBAATGTTKATBGTCAATAATCTTCTTAAAC
|
||||
ARVAATHAGYBWCTRWCABGTWWAATCTAAGTCASTAAAKTAAGVKBAATTBGABACGTA
|
||||
AGGTTAAATAAAAACTRMDTWBCTTTTTAATAAAAGATMGCCTACKAKNTBAGYRASTGT
|
||||
ASSTCGTHCGAAKTTATTATATTYTTTGTAGAACATGTCAAAACTWTWTHGKTCCYAATA
|
||||
AAGTGGAYTMCYTAARCSTAAATWAKTGAATTTRAGTCTSSATACGACWAKAASATDAAA
|
||||
TGYYACTSAACAAHAKTSHYARGASTATTATTHAGGYGGASTTTBGAKGATSANAACACD
|
||||
TRGSTTRAAAAAAAACAAGARTCVTAGTAAGATAWATGVHAAKATWGAAAAGTYAHVTAC
|
||||
TCTGRTGTCAWGATRVAAKTCGCAAVCGASWGGTTRTCSAMCCTAACASGWKKAWDAATG
|
||||
ACRCBACTATGTGTCTTCAAAHGSCTATATTTCGTVWAGAAGTAYCKGARAKSGKAGTAN
|
||||
TTTCYACATWATGTCTAAAADMDTWCAATSTKDACAMAADADBSAAATAGGCTHAHAGTA
|
||||
CGACVGAATTATAAAGAHCCVAYHGHTTTACATSTTTATGNCCMTAGCATATGATAVAAG
|
||||
>THREE Homo sapiens frequency
|
||||
ATATTTATCTTTTCACTTCCTACATTGGTCAGACCATTATTCGACACGTGGCGTCATTTT
|
||||
GTCATACCGGGTAATGTTGGAAACAAAACGTACTGATAAAATACTGAGTTGTAAACTCTA
|
||||
ATCAGATAACGCGCTTGGATATTAAGATTCACACAGGGGTTTCGGCTGTAAAAAAACTTG
|
||||
TGGAGCTGTTCTGGGACAGATAAGTTGTACCTCGTACTTAGCTAATTAATGAACCAACTG
|
||||
ATTACGATAGAACAATTCTGAGGCCGCCAGGACAGCCAAATTTTAATCTTATAAAGCTGG
|
||||
AAACAGCCGGTATTAGCTTCTCGCATACTTTGCCTGCATTGGTACCTTACAGATATCAGC
|
||||
GTAGTCATATACACCTCGGTCTCAGCTAAGCTTGTATCTCTTAGAGTAGTTCAAAGATAG
|
||||
TGGACAATACCTGTGGAATCGATTGCAGATATGGATTTATTTAACTACTGAGTCTCATTC
|
||||
ACAAGCTAAGCAAGGAGCACGTTTTGGTGCCGGCATACCGATTTGCTATCATGTCAGCAA
|
||||
ATTTGCGTTGTATTCCTAGTTGCACCCATTAAGGCCACACTCCGAACCTAATTATTACAT
|
||||
CGCAAAGACATGTACGAAGGACCCGATGTCGAATAGAAGGGAGGACTGTTCATTGGAAGC
|
||||
TAGACCAGAGGAATCGCAAAGATGCAACTCTTACAATAAAAATCTAATTTCAGTCAACAC
|
||||
GCAATTTCTATAAGGTTTCCGATAATAATGAACCGTCTTCCACAGGGGAATTTGCCATGC
|
||||
TCGTAAAAGTAGTTAATCCAAGTAGAAGAAATTTTGATAATGTTTTAAGTTGGCACGAAG
|
||||
GAATTCAGAGAGATCTTACCTAACAAAGGCATTAGTAGATGTTCCTTGGTTCACACTCGG
|
||||
TCAATCAGAGCACATACTACGGGCGATACCGGGAATGACACAACATCAATGAGATTGTTA
|
||||
AGTGAGGTAATTGACTTTAGAGGACTCGATCAGTATACTGTCACTATGAACATCGTATTA
|
||||
ATTGTTATCCGATATATACACCACCGATTTGCTTGTGCAAGGTTACAGACCCATTCGATA
|
||||
AATACAAACACGGAGCGATATTATTTAAGGAGTGCTGTCTTCAAAAGAATTATTCCCACA
|
||||
CCGACATAAGAACTTCGCTCCGTCATTCCAGATTTAAATAACATAACGTAACGCTTTGCT
|
||||
GATAACATAACATAACCGAGAATTTGCTTAGGAAATTTGGAGCAATATTGCATTGTTTCT
|
||||
CAGTCATCACAAGGCCCGCCAAAGAACTCTGAGAATCAGGATTCAACATGATTGGTAAGA
|
||||
CTCTATATATATAACTTAATTCTTGTGTCCGGAGATAGAAAGAGGACGAGAGATACTACG
|
||||
AAAGAAAGTGTACTTCGATGTATCAATTCAGACGCCTTCTCTATCATCAACATTATAGGT
|
||||
CTCGTATATGCTCGGCGCGATCTGCTTCTCTCCGCCAATAGCCCCATAGTGTATTTCAAG
|
||||
CGCAGTAACAGTGAAATCGTTACGAAGGTAGGGATGTTGCTTATAATTGTCGTAACTTAT
|
||||
CGCTTATGTATCTTTCAAGAATGAACGGCAGCATATACATACGTTCTACCTTTAGCTACA
|
||||
AAGCATCCATATACTCCCTCTCATGATTGAAACTCTTCCCTATTTTGTAGCCAATAGTGA
|
||||
AAGCGTATTAGTATAAATTCGTCGGTTTTTCACTCGCAACTGTTATACTCTGCAAACAAA
|
||||
CGAAAGCCTCATAGTACAAACCTAAAGCTACATACTTCATCATTGGCAGACCAGTGGCGG
|
||||
TATTTCTACGGAAGCATCACTATAGATATAAAGTTTCCCTTCATGTACGTCTGTTAACCA
|
||||
TATCACAAGAAACTGCTATCTCTGTCACGTAACAATTCACGCGCCTTATCGCCAAATGTT
|
||||
CATATATGCGCGGTATACGTATGAACGAATACTAATTAGTATAACGGAGGATTCACGGGA
|
||||
GGGATACTTGGGGCATTTATAAATCGTCTAAAAATTTTCTATCAGCACTTGCGGGTTATA
|
||||
GTGGATTACTAGGCAACATAATATTCTGTATTGGTCCAAATGACGCTATAGATAAATTAG
|
||||
CAAAATACATTGTTTCCATTTATGTAAGTCGAAACTCCAGGACTCCCGGGAACCAGTTAA
|
||||
ACCGTCTGGAAAAGACACATTGTGAGCGGGACTTCAATGATAGCTTTCAATGAGCTTCTC
|
||||
ATGCTTGGGGTCTGTACATATATGTTGGCGAAATTATCGTCTGTATTCTGTTATGCTTTG
|
||||
ATCATGGGTTATTAGTATAGTGTCCGGTTAAGTACCAATACCGCTAGAGACCCGACCTAA
|
||||
GTCGATAACTAACGATCATCGACGTAAGGATCGTCTCGATCAGTACTTCAGTCTAGATCT
|
||||
GGGAATAGTAACTCGTTAGTGAACTATGTCGTGTCATAACTCTAAAATGCAATCAAATCT
|
||||
TATTATTGAGTATTGATTATATAAAGCATCCGCTTAGCTTTACCCTCAAATGTTATATGC
|
||||
AATTTAAAGCGCTTGATATCGTCTACTCAAGTTCAGGTTTCACATGGCCGCAACGTGACG
|
||||
TTATTAGAGGTGGGTCATCATCTCTGAGGCTAGTGATGTTGAATACTCATTGAATGGGAA
|
||||
GTGGAATACCATGCTCGTAGGTAACAGCATGACCTATAAAATATACTATGGGTGTGTGGT
|
||||
AGATCAATATTGTTCAAGCATATCGTAACAATAACGGCTGAAATGTTACTGACATGAAAG
|
||||
AGGGAGTCCAAACCATTCTAACAGCTGATCAAGTCGTCTAAAAACGCCTGGTTCAGCCTT
|
||||
AAGAGTTATAAGCCAGACAAATTGTATCAATAGAGAATCCGTAAATTCCTCGGCCAACCT
|
||||
CTTGCAAAGACATCACTATCAATATACTACCGTGATCTTAATTAGTGAACTTATATAAAT
|
||||
ATCTACAACCAGATTCAACGGAAAAGCTTTAGTGGATTAGAAATTGCCAAGAATCACATT
|
||||
CATGTGGGTTCGAATGCTTTAGTAATACCATTTCGCCGAGTAGTCACTTCGCTGAACTGT
|
||||
CGTAAATTGCTATGACATAATCGAAAAGGATTGTCAAGAGTCGATTACTGCGGACTAATA
|
||||
ATCCCCACGGGGGTGGTCTCATGTCTCCCCAGGCGAGTGGGGACGGTTGATAAACACGCT
|
||||
GCATCGCGGACTGATGTTCCCAGTATTACATAGTCACATTGGATTGCGAGTAGTCTACCT
|
||||
ATTTATGAGCGAGAGATGCCTCTAACTACTTCGACTTTTAAAACCTTTCCACGCCAGTAT
|
||||
TCGGCGAAAGGGAAGTATTAAGGGTTGTCATAATTAAGCTGATACCACTTCAGACTTTGC
|
||||
TCTACTTCTGTCTTTCATTGGTTTAGTAAAGTCTGTCCATTCGTCGAGACCGTCTTTTGC
|
||||
AGCCTCATTCTACCAACTGCTCCGACTCTTAGTCTGCTTCTCCCAGCGTTATAACAAGAG
|
||||
GCATTTTGTCATCCTTAAAACAATAATAAAGAACTCGGAGCACTGATATAATGACTGAAT
|
||||
TAGAACCGCTTAAAAATACAACGAATAGATAAGACTATCGGATAAGATCTAATATGTAGT
|
||||
GATTAAGCCCTTTATTAATTAATAATAGTTACCCTTTCTGATGTAACGCGACATATTACG
|
||||
ATTTAGTGGCACGTCTGAATTGCAAAGCAGATCTCTACCCGATTTTTATTATAAATCCCG
|
||||
TATACATCTTGACTTGAGTAATTGTTCATCTTTTTATATCTCTTCGTACTACAAATAATT
|
||||
AATATCTCAACCCGTATTGTGTGATTCTAATTACCAACAGAATACGAGGAGGTTTTTGCT
|
||||
TAGGGCCATATATAATGAATCTATCTCGTTTATTCGCGGAACCCGAGATAACATTACGAT
|
||||
GTAACTATTTTAGAGAACTTAATACAAGAAACATTGCTGATTACTCATAACTAAATGCTT
|
||||
GGTAATATATCCTCAGTGCCCCTACCATCTTTTACGCAGGGATGTAATTACTTAGGATTC
|
||||
ATTGTGTAAGAATTACAATGAACGATGGATATGAAGGCATGTTGCGAGGTGTTCCTTGGT
|
||||
ATGTGAAGTTCGCAGGGCAACAAAAATTTCGCAGAATAGGCCTCAAAGTATTGGTAAAGA
|
||||
AGACAACTAATCATCACGAGCTTCTGATATCAATACGAACGAGTCCTGTGATGGATGAAA
|
||||
GAAAGTCGTATCGAAAATGTCAAGAGTCTGCCCAATGTAACTTACTTCAAAAAATAACGC
|
||||
TTCCGCCAAGTACGTTCGAATAAACGTAATTTTAAAAATACATAAGGGGTGTTAGAAAGT
|
||||
AAGCGACGGGATATAAGTTAGACTCAAGATTCCGCCGTAAAACGAGACTGATTCCGAAGA
|
||||
TTGTTCGTGGATCTGGTCATGACTTTCACTGAGTAAGGAGTTTCGACATATGTCAATAAA
|
||||
CACAAAAATAGAAGCTATTCGATCTGAAAAATATTAGGACAAGAAACTATCTCACGCTAG
|
||||
CCCAGAATATTCACTCACCCACGGGCGATACTAAAGCACTATATAGTCGCGTGATTACTA
|
||||
TACATATGGTACACATAAGAATCACGATCAGGTTCTCAATTTTCAACAATATATGTTTAT
|
||||
TTGCATAGGTAATATTAGGCCTTTAAGAGAAGGATGGGTGAGATACTCCGGGGATGGCGG
|
||||
CAATAAAGAAAAACACGATATGAGTAATAGGATCCTAATATCTTGGCGAGAGACTTAAGG
|
||||
TACGAATTTTGCGCAATCTATTTTTTACTTGGCCAGAATTCATGTATGGTATAAGTACGA
|
||||
ACTTTTTTGATCACTTTCATGGCTACCTGATTAGGATAGTTTGAGGAATTTCCCAAATAT
|
||||
ACCGATTTAATATACACTAGGGCTTGTCACTTTGAGTCAGAAAAAGAATATAATTACTTA
|
||||
GGGTAATGCTGCATACATATTCTTATATTGCAAAGGTTCTCTGGGTAATCTTGAGCCTTC
|
||||
ACGATACCTGGTGAAGTGTT
|
|
@ -8,7 +8,8 @@ crate-type = ["cdylib"]
|
|||
|
||||
[dependencies]
|
||||
tiny-keccak = "1.4.2"
|
||||
rlibc = "1.0"
|
||||
regex = "0.2.10"
|
||||
lazy_static = "1.0"
|
||||
|
||||
[profile.release]
|
||||
panic = "abort"
|
||||
|
|
|
@ -1,21 +1,13 @@
|
|||
#![no_std]
|
||||
#![feature(lang_items)]
|
||||
#![feature(core_intrinsics)]
|
||||
#![feature(panic_implementation)]
|
||||
|
||||
extern crate rlibc;
|
||||
extern crate tiny_keccak;
|
||||
extern crate regex;
|
||||
#[macro_use]
|
||||
extern crate lazy_static;
|
||||
|
||||
use std::mem::ManuallyDrop;
|
||||
use tiny_keccak::Keccak;
|
||||
|
||||
#[no_mangle]
|
||||
#[panic_implementation]
|
||||
pub fn panic_fmt(_info: &::core::panic::PanicInfo) -> ! {
|
||||
use core::intrinsics;
|
||||
unsafe {
|
||||
intrinsics::abort();
|
||||
}
|
||||
}
|
||||
mod rev_complement;
|
||||
mod regex_redux;
|
||||
|
||||
pub struct TinyKeccakTestData {
|
||||
data: &'static [u8],
|
||||
|
@ -48,3 +40,84 @@ pub extern "C" fn bench_tiny_keccak(test_data: *const TinyKeccakTestData) {
|
|||
keccak.finalize((*test_data).result);
|
||||
}
|
||||
}
|
||||
|
||||
pub struct RevComplementTestData {
|
||||
input: ManuallyDrop<Box<[u8]>>,
|
||||
output: ManuallyDrop<Box<[u8]>>,
|
||||
}
|
||||
|
||||
#[no_mangle]
|
||||
pub extern "C" fn prepare_rev_complement(size: usize) -> *mut RevComplementTestData {
|
||||
let input = vec![0; size];
|
||||
let output = vec![0; size];
|
||||
|
||||
let test_data = Box::new(
|
||||
RevComplementTestData {
|
||||
input: ManuallyDrop::new(input.into_boxed_slice()),
|
||||
output: ManuallyDrop::new(output.into_boxed_slice()),
|
||||
}
|
||||
);
|
||||
|
||||
// Basically leak the pointer to the test data. This shouldn't be harmful since `prepare` is called
|
||||
// only once per bench run (not for the iteration), and afterwards whole memory instance is discarded.
|
||||
Box::into_raw(test_data)
|
||||
}
|
||||
|
||||
#[no_mangle]
|
||||
pub extern "C" fn rev_complement_input_ptr(test_data: *mut RevComplementTestData) -> *mut u8 {
|
||||
unsafe {
|
||||
(*test_data).input.as_mut_ptr()
|
||||
}
|
||||
}
|
||||
|
||||
#[no_mangle]
|
||||
pub extern "C" fn rev_complement_output_ptr(test_data: *mut RevComplementTestData) -> *const u8 {
|
||||
unsafe {
|
||||
(*test_data).output.as_ptr()
|
||||
}
|
||||
}
|
||||
|
||||
#[no_mangle]
|
||||
pub extern "C" fn bench_rev_complement(test_data: *mut RevComplementTestData) {
|
||||
unsafe {
|
||||
let result = rev_complement::run(&*(*test_data).input);
|
||||
(*test_data).output.copy_from_slice(&result);
|
||||
}
|
||||
}
|
||||
|
||||
pub struct RegexReduxTestData {
|
||||
input: ManuallyDrop<Box<[u8]>>,
|
||||
output: Option<usize>,
|
||||
}
|
||||
|
||||
#[no_mangle]
|
||||
pub extern "C" fn prepare_regex_redux(size: usize) -> *mut RegexReduxTestData {
|
||||
regex_redux::prepare();
|
||||
|
||||
let input = vec![0; size];
|
||||
let test_data = Box::new(
|
||||
RegexReduxTestData {
|
||||
input: ManuallyDrop::new(input.into_boxed_slice()),
|
||||
output: None,
|
||||
}
|
||||
);
|
||||
|
||||
// Basically leak the pointer to the test data. This shouldn't be harmful since `prepare` is called
|
||||
// only once per bench run (not for the iteration), and afterwards whole memory instance is discarded.
|
||||
Box::into_raw(test_data)
|
||||
}
|
||||
|
||||
#[no_mangle]
|
||||
pub extern "C" fn regex_redux_input_ptr(test_data: *mut RegexReduxTestData) -> *mut u8 {
|
||||
unsafe {
|
||||
(*test_data).input.as_mut_ptr()
|
||||
}
|
||||
}
|
||||
|
||||
#[no_mangle]
|
||||
pub extern "C" fn bench_regex_redux(test_data: *mut RegexReduxTestData) {
|
||||
unsafe {
|
||||
let result = regex_redux::run(&*(*test_data).input);
|
||||
(*test_data).output = Some(result);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,17 @@
|
|||
//! Initially it supposed to be like [1]. However it turned out
|
||||
//! that executing this code in wasmi way too slow.
|
||||
//!
|
||||
//! [1]: https://benchmarksgame-team.pages.debian.net/benchmarksgame/program/regexredux-rust-2.html
|
||||
|
||||
lazy_static! {
|
||||
static ref REGEX: ::regex::bytes::Regex =
|
||||
{ ::regex::bytes::Regex::new("agggtaa[cgt]|[acg]ttaccct").unwrap() };
|
||||
}
|
||||
|
||||
pub fn prepare() {
|
||||
::lazy_static::initialize(®EX);
|
||||
}
|
||||
|
||||
pub fn run(seq: &[u8]) -> usize {
|
||||
REGEX.find_iter(seq).count()
|
||||
}
|
|
@ -0,0 +1,164 @@
|
|||
// Adapted version from benchmarks game. In particular
|
||||
// rayon is removed.
|
||||
//
|
||||
// https://benchmarksgame-team.pages.debian.net/benchmarksgame/program/revcomp-rust-3.html
|
||||
|
||||
// The Computer Language Benchmarks Game
|
||||
// https://salsa.debian.org/benchmarksgame-team/benchmarksgame/
|
||||
//
|
||||
// contributed by the Rust Project Developers
|
||||
// contributed by Cristi Cobzarenco
|
||||
// contributed by TeXitoi
|
||||
// contributed by Matt Brubeck
|
||||
|
||||
use std::io::BufRead;
|
||||
use std::mem::replace;
|
||||
use std::{cmp, io};
|
||||
|
||||
/// Lookup table to find the complement of a single FASTA code.
|
||||
fn build_table() -> [u8; 256] {
|
||||
let mut table = [0; 256];
|
||||
for (i, x) in table.iter_mut().enumerate() {
|
||||
*x = match i as u8 as char {
|
||||
'A' | 'a' => 'T',
|
||||
'C' | 'c' => 'G',
|
||||
'G' | 'g' => 'C',
|
||||
'T' | 't' => 'A',
|
||||
'U' | 'u' => 'A',
|
||||
'M' | 'm' => 'K',
|
||||
'R' | 'r' => 'Y',
|
||||
'W' | 'w' => 'W',
|
||||
'S' | 's' => 'S',
|
||||
'Y' | 'y' => 'R',
|
||||
'K' | 'k' => 'M',
|
||||
'V' | 'v' => 'B',
|
||||
'H' | 'h' => 'D',
|
||||
'D' | 'd' => 'H',
|
||||
'B' | 'b' => 'V',
|
||||
'N' | 'n' => 'N',
|
||||
i => i,
|
||||
} as u8;
|
||||
}
|
||||
table
|
||||
}
|
||||
|
||||
/// Utilities for splitting chunks off of slices.
|
||||
trait SplitOff {
|
||||
fn split_off_left(&mut self, n: usize) -> Self;
|
||||
fn split_off_right(&mut self, n: usize) -> Self;
|
||||
}
|
||||
impl<'a, T> SplitOff for &'a mut [T] {
|
||||
/// Split the left `n` items from self and return them as a separate slice.
|
||||
fn split_off_left(&mut self, n: usize) -> Self {
|
||||
let n = cmp::min(self.len(), n);
|
||||
let data = replace(self, &mut []);
|
||||
let (left, data) = data.split_at_mut(n);
|
||||
*self = data;
|
||||
left
|
||||
}
|
||||
/// Split the right `n` items from self and return them as a separate slice.
|
||||
fn split_off_right(&mut self, n: usize) -> Self {
|
||||
let len = self.len();
|
||||
let n = cmp::min(len, n);
|
||||
let data = replace(self, &mut []);
|
||||
let (data, right) = data.split_at_mut(len - n);
|
||||
*self = data;
|
||||
right
|
||||
}
|
||||
}
|
||||
|
||||
/// Length of a normal line including the terminating \n.
|
||||
const LINE_LEN: usize = 61;
|
||||
|
||||
/// Compute the reverse complement for two contiguous chunks without line breaks.
|
||||
fn reverse_chunks(left: &mut [u8], right: &mut [u8], table: &[u8; 256]) {
|
||||
for (x, y) in left.iter_mut().zip(right.iter_mut().rev()) {
|
||||
*y = table[replace(x, table[*y as usize]) as usize];
|
||||
}
|
||||
}
|
||||
|
||||
/// Compute the reverse complement on chunks from opposite ends of a sequence.
|
||||
///
|
||||
/// `left` must start at the beginning of a line. If there are an odd number of
|
||||
/// bytes, `right` will initially be 1 byte longer than `left`; otherwise they
|
||||
/// will have equal lengths.
|
||||
fn reverse_complement_left_right(
|
||||
mut left: &mut [u8],
|
||||
mut right: &mut [u8],
|
||||
trailing_len: usize,
|
||||
table: &[u8; 256],
|
||||
) {
|
||||
// Each iteration swaps one line from the start of the sequence with one
|
||||
// from the end.
|
||||
while left.len() > 0 || right.len() > 0 {
|
||||
// Get the chunk up to the newline in `right`.
|
||||
let mut a = left.split_off_left(trailing_len);
|
||||
let mut b = right.split_off_right(trailing_len);
|
||||
right.split_off_right(1); // Skip the newline in `right`.
|
||||
|
||||
// If we've reached the middle of the sequence here and there is an
|
||||
// odd number of bytes remaining, the odd one will be on the right.
|
||||
if b.len() > a.len() {
|
||||
let mid = b.split_off_left(1);
|
||||
mid[0] = table[mid[0] as usize];
|
||||
}
|
||||
|
||||
reverse_chunks(a, b, table);
|
||||
|
||||
// Get the chunk up to the newline in `left`.
|
||||
let n = LINE_LEN - 1 - trailing_len;
|
||||
a = left.split_off_left(n);
|
||||
b = right.split_off_right(n);
|
||||
left.split_off_left(1); // Skip the newline in `left`.
|
||||
|
||||
// If we've reached the middle of the sequence and there is an odd
|
||||
// number of bytes remaining, the odd one will now be on the left.
|
||||
if a.len() > b.len() {
|
||||
let mid = a.split_off_right(1);
|
||||
mid[0] = table[mid[0] as usize]
|
||||
}
|
||||
|
||||
reverse_chunks(a, b, table);
|
||||
}
|
||||
}
|
||||
|
||||
/// Compute the reverse complement of one sequence.
|
||||
fn reverse_complement(seq: &mut [u8], table: &[u8; 256]) {
|
||||
let len = seq.len() - 1;
|
||||
let seq = &mut seq[..len]; // Drop the last newline
|
||||
let trailing_len = len % LINE_LEN;
|
||||
let (left, right) = seq.split_at_mut(len / 2);
|
||||
reverse_complement_left_right(left, right, trailing_len, table);
|
||||
}
|
||||
|
||||
/// Read sequences from stdin and print the reverse complement to stdout.
|
||||
pub fn run(input: &[u8]) -> Vec<u8> {
|
||||
let mut buf = Vec::with_capacity(input.len());
|
||||
|
||||
let mut input = io::Cursor::new(input);
|
||||
|
||||
// Read the first header line.
|
||||
input.read_until(b'\n', &mut buf).unwrap();
|
||||
|
||||
// Read sequence data line-by-line, splitting on headers.
|
||||
let mut line_start = buf.len();
|
||||
let mut seq_start = line_start;
|
||||
let mut seqs = vec![];
|
||||
while input.read_until(b'\n', &mut buf).unwrap() > 0 {
|
||||
if buf[line_start] == b'>' {
|
||||
// Found the start of a new sequence.
|
||||
seqs.push(seq_start..line_start);
|
||||
seq_start = buf.len();
|
||||
}
|
||||
line_start = buf.len();
|
||||
}
|
||||
seqs.push(seq_start..buf.len());
|
||||
|
||||
// Compute the reverse complements of each sequence.
|
||||
let table = build_table();
|
||||
for seq in seqs {
|
||||
reverse_complement(&mut buf[seq], &table);
|
||||
}
|
||||
|
||||
buf
|
||||
}
|
Loading…
Reference in New Issue