tools: add some letter info tools for word parsing
This commit is contained in:
parent
9ef41d7a64
commit
e761cc0d7b
|
@ -1 +1,2 @@
|
|||
result*
|
||||
node_modules
|
||||
|
|
|
@ -30,9 +30,9 @@ in pkgs.mkShell {
|
|||
dhall-json
|
||||
|
||||
# deno
|
||||
nodejs-13_x
|
||||
nur.repos.xe.deno
|
||||
nodePackages.typescript
|
||||
nodePackages.typescript-language-server
|
||||
|
||||
# rust
|
||||
cargo
|
||||
|
|
|
@ -1,5 +1,125 @@
|
|||
# This file is automatically @generated by Cargo.
|
||||
# It is not intended for manual editing.
|
||||
[[package]]
|
||||
name = "anyhow"
|
||||
version = "1.0.31"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "85bb70cc08ec97ca5450e6eba421deeea5f172c0fc61f78b5357b2a8e8be195f"
|
||||
|
||||
[[package]]
|
||||
name = "lewa"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"rust-fsm",
|
||||
"serde",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "proc-macro2"
|
||||
version = "0.4.30"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "cf3d2011ab5c909338f7887f4fc896d35932e29146c12c8d01da6b22a80ba759"
|
||||
dependencies = [
|
||||
"unicode-xid 0.1.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "proc-macro2"
|
||||
version = "1.0.15"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "70a50b9351bfa8d65a7d93ce712dc63d2fd15ddbf2c36990fc7cac344859c04f"
|
||||
dependencies = [
|
||||
"unicode-xid 0.2.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "quote"
|
||||
version = "0.6.13"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6ce23b6b870e8f94f81fb0a363d65d86675884b34a09043c81e5562f11c1f8e1"
|
||||
dependencies = [
|
||||
"proc-macro2 0.4.30",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "quote"
|
||||
version = "1.0.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "54a21852a652ad6f610c9510194f398ff6f8692e334fd1145fed931f7fbe44ea"
|
||||
dependencies = [
|
||||
"proc-macro2 1.0.15",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rust-fsm"
|
||||
version = "0.3.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c1b9ef6eff7d93f596b654be024cdb030dadc0bff93eb22e34a123cc2b13fcb6"
|
||||
dependencies = [
|
||||
"rust-fsm-dsl",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rust-fsm-dsl"
|
||||
version = "0.3.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "86ee4c91af3bd97b9c6469f10cf2db993925cddb6f0de761b490c5e95c825ef2"
|
||||
dependencies = [
|
||||
"quote 0.6.13",
|
||||
"syn 0.15.44",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "serde"
|
||||
version = "1.0.110"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "99e7b308464d16b56eba9964e4972a3eee817760ab60d88c3f86e1fecb08204c"
|
||||
dependencies = [
|
||||
"serde_derive",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "serde_derive"
|
||||
version = "1.0.110"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "818fbf6bfa9a42d3bfcaca148547aa00c7b915bec71d1757aa2d44ca68771984"
|
||||
dependencies = [
|
||||
"proc-macro2 1.0.15",
|
||||
"quote 1.0.6",
|
||||
"syn 1.0.23",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "syn"
|
||||
version = "0.15.44"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9ca4b3b69a77cbe1ffc9e198781b7acb0c7365a883670e8f1c1bc66fba79a5c5"
|
||||
dependencies = [
|
||||
"proc-macro2 0.4.30",
|
||||
"quote 0.6.13",
|
||||
"unicode-xid 0.1.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "syn"
|
||||
version = "1.0.23"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "95b5f192649e48a5302a13f2feb224df883b98933222369e4b3b0fe2a5447269"
|
||||
dependencies = [
|
||||
"proc-macro2 1.0.15",
|
||||
"quote 1.0.6",
|
||||
"unicode-xid 0.2.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "unicode-xid"
|
||||
version = "0.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "fc72304796d0818e357ead4e000d19c9c174ab23dc11093ac919054d20a6a7fc"
|
||||
|
||||
[[package]]
|
||||
name = "unicode-xid"
|
||||
version = "0.2.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "826e7639553986605ec5979c7dd957c7895e93eabed50ab2ffa7f6128a75097c"
|
||||
|
|
|
@ -8,3 +8,6 @@ license = "MIT"
|
|||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||
|
||||
[dependencies]
|
||||
anyhow = "1.0"
|
||||
rust-fsm = "0.3.0"
|
||||
serde = { version = "1.0", features = ["derive"] }
|
||||
|
|
|
@ -0,0 +1,93 @@
|
|||
use crate::Letter;
|
||||
use anyhow::{anyhow, Result};
|
||||
|
||||
fn letter2ipa(l: &String) -> Result<String> {
|
||||
let l = l.as_str();
|
||||
// /d f g h j k l m n p q s t w ʃ ʒ ʔ ʙ̥/
|
||||
// /a ɛ i o u/
|
||||
match l {
|
||||
"a" => Ok("a".into()),
|
||||
"b" => Ok("ʙ̥".into()),
|
||||
"d" => Ok("d".into()),
|
||||
"e" => Ok("ɛ".into()),
|
||||
"f" => Ok("f".into()),
|
||||
"g" => Ok("g".into()),
|
||||
"h" => Ok("h".into()),
|
||||
"i" => Ok("i".into()),
|
||||
"j" => Ok("j".into()),
|
||||
"k" => Ok("k".into()),
|
||||
"l" => Ok("l".into()),
|
||||
"m" => Ok("m".into()),
|
||||
"n" => Ok("n".into()),
|
||||
"o" => Ok("o".into()),
|
||||
"p" => Ok("p".into()),
|
||||
"q" => Ok("q".into()),
|
||||
"r" => Ok("r".into()),
|
||||
"s" => Ok("s".into()),
|
||||
"t" => Ok("t".into()),
|
||||
"u" => Ok("u".into()),
|
||||
"w" => Ok("w".into()),
|
||||
"x" => Ok("ʃ".into()),
|
||||
"z" => Ok("ʒ".into()),
|
||||
"'" => Ok("ʔ".into()),
|
||||
|
||||
_ => Err(anyhow!("didn't want {}", l)),
|
||||
}
|
||||
}
|
||||
|
||||
fn is_vowel(l: &String) -> bool {
|
||||
let l = l.as_str();
|
||||
|
||||
match l {
|
||||
"a" | "e" | "i" | "o" | "u" => true,
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
|
||||
fn is_stop(l: &String) -> bool {
|
||||
let l = l.as_str();
|
||||
|
||||
match l {
|
||||
"p" | "t" | "d" | "k" | "g" | "q" | "'" => true,
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
|
||||
impl Letter {
|
||||
fn new(l: String) -> Result<Letter> {
|
||||
let ipa = letter2ipa(&l)?;
|
||||
let is_vowel = is_vowel(&l);
|
||||
let is_stop = is_stop(&l);
|
||||
Ok(Letter {
|
||||
latin: l,
|
||||
ipa: ipa,
|
||||
is_vowel: is_vowel,
|
||||
is_stop: is_stop,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
#[test]
|
||||
fn alphabet() {
|
||||
let alphabet = vec![
|
||||
"a", "b", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "r",
|
||||
"s", "t", "u", "w", "x", "z", "'",
|
||||
];
|
||||
for l in alphabet {
|
||||
let l = l.to_string();
|
||||
super::Letter::new(l).unwrap();
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn invalid_letters() {
|
||||
let invalid = vec!["treason", "y"];
|
||||
|
||||
for l in invalid {
|
||||
let l = l.to_string();
|
||||
assert!(super::Letter::new(l).is_err());
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,3 +1,15 @@
|
|||
use serde::{Serialize, Deserialize};
|
||||
|
||||
pub mod letters;
|
||||
|
||||
#[derive(Deserialize, Serialize)]
|
||||
pub struct Letter {
|
||||
latin: String,
|
||||
ipa: String,
|
||||
is_vowel: bool,
|
||||
is_stop: bool,
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
#[test]
|
||||
|
|
Loading…
Reference in New Issue