tools: add some letter info tools for word parsing

This commit is contained in:
Cadey Ratio 2020-05-23 10:53:38 -04:00
parent 9ef41d7a64
commit e761cc0d7b
6 changed files with 230 additions and 1 deletions

1
.gitignore vendored
View File

@ -1 +1,2 @@
result* result*
node_modules

View File

@ -30,9 +30,9 @@ in pkgs.mkShell {
dhall-json dhall-json
# deno # deno
nodejs-13_x
nur.repos.xe.deno nur.repos.xe.deno
nodePackages.typescript nodePackages.typescript
nodePackages.typescript-language-server
# rust # rust
cargo cargo

120
tools/Cargo.lock generated
View File

@ -1,5 +1,125 @@
# This file is automatically @generated by Cargo. # This file is automatically @generated by Cargo.
# It is not intended for manual editing. # It is not intended for manual editing.
[[package]]
name = "anyhow"
version = "1.0.31"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "85bb70cc08ec97ca5450e6eba421deeea5f172c0fc61f78b5357b2a8e8be195f"
[[package]] [[package]]
name = "lewa" name = "lewa"
version = "0.1.0" version = "0.1.0"
dependencies = [
"anyhow",
"rust-fsm",
"serde",
]
[[package]]
name = "proc-macro2"
version = "0.4.30"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cf3d2011ab5c909338f7887f4fc896d35932e29146c12c8d01da6b22a80ba759"
dependencies = [
"unicode-xid 0.1.0",
]
[[package]]
name = "proc-macro2"
version = "1.0.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "70a50b9351bfa8d65a7d93ce712dc63d2fd15ddbf2c36990fc7cac344859c04f"
dependencies = [
"unicode-xid 0.2.0",
]
[[package]]
name = "quote"
version = "0.6.13"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6ce23b6b870e8f94f81fb0a363d65d86675884b34a09043c81e5562f11c1f8e1"
dependencies = [
"proc-macro2 0.4.30",
]
[[package]]
name = "quote"
version = "1.0.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "54a21852a652ad6f610c9510194f398ff6f8692e334fd1145fed931f7fbe44ea"
dependencies = [
"proc-macro2 1.0.15",
]
[[package]]
name = "rust-fsm"
version = "0.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c1b9ef6eff7d93f596b654be024cdb030dadc0bff93eb22e34a123cc2b13fcb6"
dependencies = [
"rust-fsm-dsl",
]
[[package]]
name = "rust-fsm-dsl"
version = "0.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "86ee4c91af3bd97b9c6469f10cf2db993925cddb6f0de761b490c5e95c825ef2"
dependencies = [
"quote 0.6.13",
"syn 0.15.44",
]
[[package]]
name = "serde"
version = "1.0.110"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "99e7b308464d16b56eba9964e4972a3eee817760ab60d88c3f86e1fecb08204c"
dependencies = [
"serde_derive",
]
[[package]]
name = "serde_derive"
version = "1.0.110"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "818fbf6bfa9a42d3bfcaca148547aa00c7b915bec71d1757aa2d44ca68771984"
dependencies = [
"proc-macro2 1.0.15",
"quote 1.0.6",
"syn 1.0.23",
]
[[package]]
name = "syn"
version = "0.15.44"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9ca4b3b69a77cbe1ffc9e198781b7acb0c7365a883670e8f1c1bc66fba79a5c5"
dependencies = [
"proc-macro2 0.4.30",
"quote 0.6.13",
"unicode-xid 0.1.0",
]
[[package]]
name = "syn"
version = "1.0.23"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "95b5f192649e48a5302a13f2feb224df883b98933222369e4b3b0fe2a5447269"
dependencies = [
"proc-macro2 1.0.15",
"quote 1.0.6",
"unicode-xid 0.2.0",
]
[[package]]
name = "unicode-xid"
version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fc72304796d0818e357ead4e000d19c9c174ab23dc11093ac919054d20a6a7fc"
[[package]]
name = "unicode-xid"
version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "826e7639553986605ec5979c7dd957c7895e93eabed50ab2ffa7f6128a75097c"

View File

@ -8,3 +8,6 @@ license = "MIT"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies] [dependencies]
anyhow = "1.0"
rust-fsm = "0.3.0"
serde = { version = "1.0", features = ["derive"] }

93
tools/src/letters.rs Normal file
View File

@ -0,0 +1,93 @@
use crate::Letter;
use anyhow::{anyhow, Result};
fn letter2ipa(l: &String) -> Result<String> {
let l = l.as_str();
// /d f g h j k l m n p q s t w ʃ ʒ ʔ ʙ̥/
// /a ɛ i o u/
match l {
"a" => Ok("a".into()),
"b" => Ok("ʙ̥".into()),
"d" => Ok("d".into()),
"e" => Ok("ɛ".into()),
"f" => Ok("f".into()),
"g" => Ok("g".into()),
"h" => Ok("h".into()),
"i" => Ok("i".into()),
"j" => Ok("j".into()),
"k" => Ok("k".into()),
"l" => Ok("l".into()),
"m" => Ok("m".into()),
"n" => Ok("n".into()),
"o" => Ok("o".into()),
"p" => Ok("p".into()),
"q" => Ok("q".into()),
"r" => Ok("r".into()),
"s" => Ok("s".into()),
"t" => Ok("t".into()),
"u" => Ok("u".into()),
"w" => Ok("w".into()),
"x" => Ok("ʃ".into()),
"z" => Ok("ʒ".into()),
"'" => Ok("ʔ".into()),
_ => Err(anyhow!("didn't want {}", l)),
}
}
fn is_vowel(l: &String) -> bool {
let l = l.as_str();
match l {
"a" | "e" | "i" | "o" | "u" => true,
_ => false,
}
}
fn is_stop(l: &String) -> bool {
let l = l.as_str();
match l {
"p" | "t" | "d" | "k" | "g" | "q" | "'" => true,
_ => false,
}
}
impl Letter {
fn new(l: String) -> Result<Letter> {
let ipa = letter2ipa(&l)?;
let is_vowel = is_vowel(&l);
let is_stop = is_stop(&l);
Ok(Letter {
latin: l,
ipa: ipa,
is_vowel: is_vowel,
is_stop: is_stop,
})
}
}
#[cfg(test)]
mod tests {
#[test]
fn alphabet() {
let alphabet = vec![
"a", "b", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "r",
"s", "t", "u", "w", "x", "z", "'",
];
for l in alphabet {
let l = l.to_string();
super::Letter::new(l).unwrap();
}
}
#[test]
fn invalid_letters() {
let invalid = vec!["treason", "y"];
for l in invalid {
let l = l.to_string();
assert!(super::Letter::new(l).is_err());
}
}
}

View File

@ -1,3 +1,15 @@
use serde::{Serialize, Deserialize};
pub mod letters;
#[derive(Deserialize, Serialize)]
pub struct Letter {
latin: String,
ipa: String,
is_vowel: bool,
is_stop: bool,
}
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
#[test] #[test]