From 479fb697ee5bc681e663d53576f8c8ddb388e8d3 Mon Sep 17 00:00:00 2001 From: Christine Dodrill Date: Sat, 25 Jul 2020 13:43:04 -0400 Subject: [PATCH] add text/gemini parser --- majc/src/main.rs | 5 +- src/gemini.rs | 235 +++++++++++++++++++++++++++++++++++++++++++++++ src/lib.rs | 2 + src/response.rs | 1 - 4 files changed, 240 insertions(+), 3 deletions(-) create mode 100644 src/gemini.rs diff --git a/majc/src/main.rs b/majc/src/main.rs index 28895f4..bdb50e3 100644 --- a/majc/src/main.rs +++ b/majc/src/main.rs @@ -27,9 +27,10 @@ fn main() { MenuTree::new() .leaf("About", move |s| { s.add_layer(Dialog::info(format!( - "{} {}", + "{} {}\n\nby {}\n\nSee https://tulpa.dev/cadey/maj for more information", env!("CARGO_PKG_NAME"), - env!("CARGO_PKG_VERSION") + env!("CARGO_PKG_VERSION"), + env!("CARGO_PKG_AUTHORS"), ))); }) .leaf("Help", move |s| { diff --git a/src/gemini.rs b/src/gemini.rs new file mode 100644 index 0000000..96f1d7b --- /dev/null +++ b/src/gemini.rs @@ -0,0 +1,235 @@ +/// This module implements a simple text/gemini parser based on the description +/// here: https://gemini.circumlunar.space/docs/specification.html +use std::io::Write; + +/// Individual nodes of the document. Each node correlates to a line in the file. +#[derive(Debug, PartialEq, Eq)] +pub enum Node { + /// Text lines are the most fundamental line type - any line which does not + /// match the definition of another line type defined below defaults to + /// being a text line. The majority of lines in a typical text/gemini document will be text lines. + Text(String), + + /// Lines beginning with the two characters "=>" are link lines, which have the following syntax: + /// + /// ```gemini + /// =>[][] + /// ``` + /// + /// where: + /// + /// * `` is any non-zero number of consecutive spaces or tabs + /// * Square brackets indicate that the enclosed content is optional. + /// * `` is a URL, which may be absolute or relative. If the URL + /// does not include a scheme, a scheme of `gemini://` is implied. + Link { to: String, name: Option }, + + /// Any line whose first three characters are "```" (i.e. three consecutive + /// back ticks with no leading whitespace) are preformatted toggle lines. + /// These lines should NOT be included in the rendered output shown to the + /// user. Instead, these lines toggle the parser between preformatted mode + /// being "on" or "off". Preformatted mode should be "off" at the beginning + /// of a document. The current status of preformatted mode is the only + /// internal state a parser is required to maintain. When preformatted mode + /// is "on", the usual rules for identifying line types are suspended, and + /// all lines should be identified as preformatted text lines (see 5.4.4). + /// + /// Preformatted text lines should be presented to the user in a "neutral", + /// monowidth font without any alteration to whitespace or stylistic + /// enhancements. Graphical clients should use scrolling mechanisms to present + /// preformatted text lines which are longer than the client viewport, in + /// preference to wrapping. In displaying preformatted text lines, clients + /// should keep in mind applications like ASCII art and computer source + /// code: in particular, source code in languages with significant whitespace + /// (e.g. Python) should be able to be copied and pasted from the client into + /// a file and interpreted/compiled without any problems arising from the + /// client's manner of displaying them. + Preformatted(String), + + /// Lines beginning with "#" are heading lines. Heading lines consist of one, + /// two or three consecutive "#" characters, followed by optional whitespace, + /// followed by heading text. The number of # characters indicates the "level" + /// of header; #, ## and ### can be thought of as analogous to `

`, `

` + /// and `

` in HTML. + /// + /// Heading text should be presented to the user, and clients MAY use special + /// formatting, e.g. a larger or bold font, to indicate its status as a header + /// (simple clients may simply print the line, including its leading #s, + /// without any styling at all). However, the main motivation for the + /// definition of heading lines is not stylistic but to provide a + /// machine-readable representation of the internal structure of the document. + /// Advanced clients can use this information to, e.g. display an automatically + /// generated and hierarchically formatted "table of contents" for a long + /// document in a side-pane, allowing users to easily jump to specific sections + /// without excessive scrolling. CMS-style tools automatically generating menus + /// or Atom/RSS feeds for a directory of text/gemini files can use first + /// heading in the file as a human-friendly title. + Heading { level: u8, body: String }, + + /// Lines beginning with "* " are unordered list items. This line type exists + /// purely for stylistic reasons. The * may be replaced in advanced clients by + /// a bullet symbol. Any text after the "* " should be presented to the user as + /// if it were a text line, i.e. wrapped to fit the viewport and formatted + /// "nicely". Advanced clients can take the space of the bullet symbol into + /// account when wrapping long list items to ensure that all lines of text + /// corresponding to the item are offset an equal distance from the left of the screen. + ListItem(String), + + /// Lines beginning with ">" are quote lines. This line type exists so that + /// advanced clients may use distinct styling to convey to readers the important + /// semantic information that certain text is being quoted from an external + /// source. For example, when wrapping long lines to the the viewport, each + /// resultant line may have a ">" symbol placed at the front. + Quote(String), +} + +pub fn parse(doc: &str) -> Vec { + let mut result: Vec = vec![]; + let mut collect_preformatted: bool = false; + let mut preformatted_buffer: Vec = vec![]; + + for line in doc.lines() { + if line == "```" { + collect_preformatted = !collect_preformatted; + if !collect_preformatted { + result.push(Node::Preformatted( + String::from_utf8(preformatted_buffer).unwrap(), + )); + preformatted_buffer = vec![]; + } + continue; + } + + if collect_preformatted && line != "```" { + write!(preformatted_buffer, "{}\n", line).unwrap(); + continue; + } + + // Quotes + if line.starts_with(">") { + result.push(Node::Quote(line[1..].trim().to_string())); + continue; + } + + // List items + if line.starts_with("*") { + result.push(Node::ListItem(line[1..].trim().to_string())); + continue; + } + + // Headings + if line.starts_with("###") { + result.push(Node::Heading { + level: 3, + body: line[3..].trim().to_string(), + }); + continue; + } + if line.starts_with("##") { + result.push(Node::Heading { + level: 2, + body: line[2..].trim().to_string(), + }); + continue; + } + if line.starts_with("#") { + result.push(Node::Heading { + level: 1, + body: line[1..].trim().to_string(), + }); + continue; + } + + // Links + if line.starts_with("=>") { + let sp = line[2..].split_ascii_whitespace().collect::>(); + + match sp.len() { + 1 => result.push(Node::Link { to: sp[0].trim().to_string(), name: None }), + _ => result.push(Node::Link { to: sp[0].trim().to_string(), name: Some(sp[1..].join(" ").trim().to_string()) }), + } + + continue; + } + + result.push(Node::Text(line.to_string())); + } + + result +} + +#[cfg(test)] +mod tests { + use super::*; + #[test] + fn basic() { + let _ = pretty_env_logger::try_init(); + let msg = include_str!("../majc/src/help.gmi"); + let doc = super::parse(msg); + assert_ne!(doc.len(), 0); + } + + #[test] + fn quote() { + let _ = pretty_env_logger::try_init(); + let msg = ">hi there"; + let expected: Vec = vec![Node::Quote("hi there".to_string())]; + assert_eq!(expected, parse(msg)); + } + + #[test] + fn list() { + let _ = pretty_env_logger::try_init(); + let msg = "*hi there"; + let expected: Vec = vec![Node::ListItem("hi there".to_string())]; + assert_eq!(expected, parse(msg)); + } + + #[test] + fn preformatted() { + let _ = pretty_env_logger::try_init(); + let msg = "```\n\ + hi there\n\ + ```\n\ + \n\ + Test\n"; + let expected: Vec = vec![ + Node::Preformatted("hi there\n".to_string()), + Node::Text(String::new()), + Node::Text("Test".to_string()), + ]; + assert_eq!(expected, parse(msg)); + } + + #[test] + fn header() { + let _ = pretty_env_logger::try_init(); + let msg = "#hi\n##there\n### my friends"; + let expected: Vec = vec![ + Node::Heading { + level: 1, + body: "hi".to_string(), + }, + Node::Heading { + level: 2, + body: "there".to_string(), + }, + Node::Heading { + level: 3, + body: "my friends".to_string(), + }, + ]; + assert_eq!(expected, parse(msg)); + } + + #[test] + fn link() { + let _ = pretty_env_logger::try_init(); + let msg = "=>/\n=> / Go home"; + let expected: Vec = vec![ + Node::Link{to: "/".to_string(), name: None}, + Node::Link{to: "/".to_string(), name: Some("Go home".to_string()) }, + ]; + assert_eq!(expected, parse(msg)); + } +} diff --git a/src/lib.rs b/src/lib.rs index 80e605f..37f4b54 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -4,6 +4,8 @@ mod status_code; pub use response::{Error as ResponseError, Response}; pub use status_code::StatusCode; +pub mod gemini; + #[cfg(feature = "client")] mod client; #[cfg(feature = "client")] diff --git a/src/response.rs b/src/response.rs index e8cde59..1858060 100644 --- a/src/response.rs +++ b/src/response.rs @@ -59,7 +59,6 @@ impl Response { return Ok(result); } } - log::trace!("buf: {:?}: {:?}", buf, buf[0] as char); } Err(why) => {