maj/gemtext/src/lib.rs

487 lines
18 KiB
Rust
Raw Normal View History

/// This module implements a simple text/gemini parser based on the description
/// here: https://gemini.circumlunar.space/docs/specification.html
use std::io::{self, Write};
/// Build a gemini document up from a series of nodes.
#[derive(Default)]
pub struct Builder {
nodes: Vec<Node>,
}
impl Builder {
pub fn new() -> Builder {
Builder::default()
}
pub fn text<T: Into<String>>(mut self, data: T) -> Builder {
self.nodes.push(Node::Text(data.into()));
self
}
/// Append a single blank line to the document
///
/// This is equivilent to calling [`text()`] with an empty string, or pushing a blank
/// [`Node`]
///
/// ```
/// # use gemtext::Builder;
/// let greeting = Builder::new()
/// .text("Hello")
/// .blank_line()
/// .text("universe")
/// .to_string();
///
/// assert_eq!(greeting.trim(), "Hello\n\nuniverse");
/// ```
///
/// [`text()`]: Self::text()
pub fn blank_line(mut self) -> Self {
self.nodes.push(Node::blank());
self
}
pub fn link<T: Into<String>>(mut self, to: T, name: Option<String>) -> Builder {
self.nodes.push(Node::Link {
to: to.into(),
name: name,
});
self
}
pub fn preformatted<A, T>(mut self, alt_text: A, data: T) -> Builder
where
A: Into<String>,
T: Into<String>,
{
self.nodes.push(Node::Preformatted { alt: alt_text.into(), body: data.into() });
self
}
pub fn heading<T: Into<String>>(mut self, level: u8, body: T) -> Builder {
self.nodes.push(Node::Heading {
level: level,
body: body.into(),
});
self
}
pub fn list_item<T: Into<String>>(mut self, item: T) -> Builder {
self.nodes.push(Node::ListItem(item.into()));
self
}
pub fn quote<T: Into<String>>(mut self, body: T) -> Builder {
self.nodes.push(Node::Quote(body.into()));
self
}
pub fn build(self) -> Vec<Node> {
self.nodes
}
}
Impl ToString for Builder, accept AsRef<[Node]> in `render()` This adds a to_string method to the `Builder` allowing for the easy conversion of a Vec<Node> into a String, for any usecases where a library might not be directly writing to an io::Write, or may want to do String-y things with your document first. Without this, users would have to write to a Vec<u8> and convert to a String, which is kinda unintuitive, takes a lot of steps, and doesn't produce very readable code. This simplifies it to one method call. * Implementation of the std::str::ToString method for Builder * Accepting any AsRef<[Node]> in render (including accepting the old Vec<Node>, so not breaking) * Addition of estimate_len() to Node, used to pre-allocate the correct size of the String buffer * `estimate_len` has some quick doctests and examples. I know most of the rest of the project uses test methods, but I hope this is alright given that the tests may add some more clarity to the purpose and function of the method. * `to_string` has a single line of unsafe code. As the associated comment explains, this is provably safe, and exists just to avoid having to choose between having a bunch of duplicate code or inefficiently performing a UTF-8 check on a whole bunch bytes that we already know are safe. That said, I totally get it if you're just generally against unsafe code and will change it to be an alternative if you so wish * ToString is implemented instead of Display. This is to discourage users from directly using this in a println!() or write!() macro, which would not be a thing you would normally expect to do with this. It also gives us the advantage of being able to pre-allocate a buffer size, meaning less expensive String resizing. * I couldn't think of a clever way to get `render()` to work with both `io::Write`s or `fmt::Write`s without duplicating the code, but I'm dumb and might be missing something, so if there's a way to do that instead of doing my funky unsafe hack that's cool and I can do that instead.
2020-11-30 04:17:15 +00:00
impl ToString for Builder {
/// Render a document to a string
///
/// This produces a text/gemini compliant text document, represented as a string
fn to_string(&self) -> String {
let len: usize = self.nodes.iter().map(Node::estimate_len).sum(); // sum up node lengths
let mut bytes = Vec::with_capacity(len + self.nodes.len()); // add in inter-node newlines
2020-11-30 05:52:08 +00:00
render(self, &mut bytes).unwrap(); // Writing to a string shouldn't produce errors
Impl ToString for Builder, accept AsRef<[Node]> in `render()` This adds a to_string method to the `Builder` allowing for the easy conversion of a Vec<Node> into a String, for any usecases where a library might not be directly writing to an io::Write, or may want to do String-y things with your document first. Without this, users would have to write to a Vec<u8> and convert to a String, which is kinda unintuitive, takes a lot of steps, and doesn't produce very readable code. This simplifies it to one method call. * Implementation of the std::str::ToString method for Builder * Accepting any AsRef<[Node]> in render (including accepting the old Vec<Node>, so not breaking) * Addition of estimate_len() to Node, used to pre-allocate the correct size of the String buffer * `estimate_len` has some quick doctests and examples. I know most of the rest of the project uses test methods, but I hope this is alright given that the tests may add some more clarity to the purpose and function of the method. * `to_string` has a single line of unsafe code. As the associated comment explains, this is provably safe, and exists just to avoid having to choose between having a bunch of duplicate code or inefficiently performing a UTF-8 check on a whole bunch bytes that we already know are safe. That said, I totally get it if you're just generally against unsafe code and will change it to be an alternative if you so wish * ToString is implemented instead of Display. This is to discourage users from directly using this in a println!() or write!() macro, which would not be a thing you would normally expect to do with this. It also gives us the advantage of being able to pre-allocate a buffer size, meaning less expensive String resizing. * I couldn't think of a clever way to get `render()` to work with both `io::Write`s or `fmt::Write`s without duplicating the code, but I'm dumb and might be missing something, so if there's a way to do that instead of doing my funky unsafe hack that's cool and I can do that instead.
2020-11-30 04:17:15 +00:00
unsafe {
// This is safe because bytes is composed of Strings. We could have this as
// pure safe code by replicating the `render()` method and switching it to use
// a fmt::Write (or even `String::push()`)instead of a io::Write, but this has
// the same effect, with much DRYer code.
String::from_utf8_unchecked(bytes)
}
}
}
2020-11-30 05:52:08 +00:00
impl AsRef<[Node]> for Builder {
/// Get a reference to the internal node list of this builder
fn as_ref(&self) -> &[Node] {
self.nodes.as_ref()
}
}
impl AsMut<[Node]> for Builder {
/// Get a mutable reference to the internal node list of this builder
fn as_mut(&mut self) -> &mut [Node] {
self.nodes.as_mut()
}
}
2020-11-30 06:40:23 +00:00
impl From<Builder> for Vec<Node> {
/// Convert into a collection of [`Node`]s.
///
/// Equivilent to calling [`Builder::build()`]
fn from(builder: Builder) -> Self {
builder.build()
}
}
/// Render a set of nodes as a document to a writer.
Impl ToString for Builder, accept AsRef<[Node]> in `render()` This adds a to_string method to the `Builder` allowing for the easy conversion of a Vec<Node> into a String, for any usecases where a library might not be directly writing to an io::Write, or may want to do String-y things with your document first. Without this, users would have to write to a Vec<u8> and convert to a String, which is kinda unintuitive, takes a lot of steps, and doesn't produce very readable code. This simplifies it to one method call. * Implementation of the std::str::ToString method for Builder * Accepting any AsRef<[Node]> in render (including accepting the old Vec<Node>, so not breaking) * Addition of estimate_len() to Node, used to pre-allocate the correct size of the String buffer * `estimate_len` has some quick doctests and examples. I know most of the rest of the project uses test methods, but I hope this is alright given that the tests may add some more clarity to the purpose and function of the method. * `to_string` has a single line of unsafe code. As the associated comment explains, this is provably safe, and exists just to avoid having to choose between having a bunch of duplicate code or inefficiently performing a UTF-8 check on a whole bunch bytes that we already know are safe. That said, I totally get it if you're just generally against unsafe code and will change it to be an alternative if you so wish * ToString is implemented instead of Display. This is to discourage users from directly using this in a println!() or write!() macro, which would not be a thing you would normally expect to do with this. It also gives us the advantage of being able to pre-allocate a buffer size, meaning less expensive String resizing. * I couldn't think of a clever way to get `render()` to work with both `io::Write`s or `fmt::Write`s without duplicating the code, but I'm dumb and might be missing something, so if there's a way to do that instead of doing my funky unsafe hack that's cool and I can do that instead.
2020-11-30 04:17:15 +00:00
pub fn render(nodes: impl AsRef<[Node]>, out: &mut impl Write) -> io::Result<()> {
use Node::*;
Impl ToString for Builder, accept AsRef<[Node]> in `render()` This adds a to_string method to the `Builder` allowing for the easy conversion of a Vec<Node> into a String, for any usecases where a library might not be directly writing to an io::Write, or may want to do String-y things with your document first. Without this, users would have to write to a Vec<u8> and convert to a String, which is kinda unintuitive, takes a lot of steps, and doesn't produce very readable code. This simplifies it to one method call. * Implementation of the std::str::ToString method for Builder * Accepting any AsRef<[Node]> in render (including accepting the old Vec<Node>, so not breaking) * Addition of estimate_len() to Node, used to pre-allocate the correct size of the String buffer * `estimate_len` has some quick doctests and examples. I know most of the rest of the project uses test methods, but I hope this is alright given that the tests may add some more clarity to the purpose and function of the method. * `to_string` has a single line of unsafe code. As the associated comment explains, this is provably safe, and exists just to avoid having to choose between having a bunch of duplicate code or inefficiently performing a UTF-8 check on a whole bunch bytes that we already know are safe. That said, I totally get it if you're just generally against unsafe code and will change it to be an alternative if you so wish * ToString is implemented instead of Display. This is to discourage users from directly using this in a println!() or write!() macro, which would not be a thing you would normally expect to do with this. It also gives us the advantage of being able to pre-allocate a buffer size, meaning less expensive String resizing. * I couldn't think of a clever way to get `render()` to work with both `io::Write`s or `fmt::Write`s without duplicating the code, but I'm dumb and might be missing something, so if there's a way to do that instead of doing my funky unsafe hack that's cool and I can do that instead.
2020-11-30 04:17:15 +00:00
for node in nodes.as_ref() {
match node {
Text(body) => {
let special_prefixes = ["=>", "```", "#", "*", ">"];
if special_prefixes.iter().any(|prefix| body.starts_with(prefix)) {
write!(out, " ")?;
}
write!(out, "{}\n", body)?
},
Link { to, name } => match name {
Some(name) => write!(out, "=> {} {}\n", to, name)?,
None => write!(out, "=> {}\n", to)?,
},
Preformatted { alt, body } => write!(out, "```{}\n{}\n```\n", alt, body)?,
Impl ToString for Builder, accept AsRef<[Node]> in `render()` This adds a to_string method to the `Builder` allowing for the easy conversion of a Vec<Node> into a String, for any usecases where a library might not be directly writing to an io::Write, or may want to do String-y things with your document first. Without this, users would have to write to a Vec<u8> and convert to a String, which is kinda unintuitive, takes a lot of steps, and doesn't produce very readable code. This simplifies it to one method call. * Implementation of the std::str::ToString method for Builder * Accepting any AsRef<[Node]> in render (including accepting the old Vec<Node>, so not breaking) * Addition of estimate_len() to Node, used to pre-allocate the correct size of the String buffer * `estimate_len` has some quick doctests and examples. I know most of the rest of the project uses test methods, but I hope this is alright given that the tests may add some more clarity to the purpose and function of the method. * `to_string` has a single line of unsafe code. As the associated comment explains, this is provably safe, and exists just to avoid having to choose between having a bunch of duplicate code or inefficiently performing a UTF-8 check on a whole bunch bytes that we already know are safe. That said, I totally get it if you're just generally against unsafe code and will change it to be an alternative if you so wish * ToString is implemented instead of Display. This is to discourage users from directly using this in a println!() or write!() macro, which would not be a thing you would normally expect to do with this. It also gives us the advantage of being able to pre-allocate a buffer size, meaning less expensive String resizing. * I couldn't think of a clever way to get `render()` to work with both `io::Write`s or `fmt::Write`s without duplicating the code, but I'm dumb and might be missing something, so if there's a way to do that instead of doing my funky unsafe hack that's cool and I can do that instead.
2020-11-30 04:17:15 +00:00
Heading { level, body } => write!(out, "{} {}\n", "#".repeat(*level as usize), body)?,
ListItem(body) => write!(out, "* {}\n", body)?,
Quote(body) => write!(out, "> {}\n", body)?,
};
}
Ok(())
}
/// Individual nodes of the document. Each node correlates to a line in the file.
2020-10-05 10:38:19 +00:00
#[derive(Debug, PartialEq, Eq, Clone)]
pub enum Node {
/// Text lines are the most fundamental line type - any line which does not
/// match the definition of another line type defined below defaults to
/// being a text line. The majority of lines in a typical text/gemini document will be text lines.
Text(String),
/// Lines beginning with the two characters "=>" are link lines, which have the following syntax:
///
/// ```gemini
/// =>[<whitespace>]<URL>[<whitespace><USER-FRIENDLY LINK NAME>]
/// ```
///
/// where:
///
/// * `<whitespace>` is any non-zero number of consecutive spaces or tabs
/// * Square brackets indicate that the enclosed content is optional.
/// * `<URL>` is a URL, which may be absolute or relative. If the URL
/// does not include a scheme, a scheme of `gemini://` is implied.
Link { to: String, name: Option<String> },
/// Any line whose first three characters are "```" (i.e. three consecutive
/// back ticks with no leading whitespace) are preformatted toggle lines.
/// These lines should NOT be included in the rendered output shown to the
/// user. Instead, these lines toggle the parser between preformatted mode
/// being "on" or "off". Preformatted mode should be "off" at the beginning
/// of a document. The current status of preformatted mode is the only
/// internal state a parser is required to maintain. When preformatted mode
/// is "on", the usual rules for identifying line types are suspended, and
/// all lines should be identified as preformatted text lines (see 5.4.4).
///
/// Preformatted text lines should be presented to the user in a "neutral",
/// monowidth font without any alteration to whitespace or stylistic
/// enhancements. Graphical clients should use scrolling mechanisms to present
/// preformatted text lines which are longer than the client viewport, in
/// preference to wrapping. In displaying preformatted text lines, clients
/// should keep in mind applications like ASCII art and computer source
/// code: in particular, source code in languages with significant whitespace
/// (e.g. Python) should be able to be copied and pasted from the client into
/// a file and interpreted/compiled without any problems arising from the
/// client's manner of displaying them.
///
/// The first preformatted toggle of a document is often followed by a short
/// string, which acts as alt-text for the preformatted block. This is also
/// often used to denote the language of code in a block of text. For example,
/// a block starting with the text `\`\`\`rust` may be interpreted as rust
/// code, and a block starting with `\`\`\` An ascii art owl` would be
/// described aptly to visually impaired users using a screen reader. The alt
/// text may be separated from the toggle by whitespace. `gemtext` currently
/// renders alt text without this separation.
///
/// To create a preformatted block with no alt text, simply pass a zero-length
/// string as alt text.
Preformatted { alt: String, body: String },
/// Lines beginning with "#" are heading lines. Heading lines consist of one,
/// two or three consecutive "#" characters, followed by optional whitespace,
/// followed by heading text. The number of # characters indicates the "level"
/// of header; #, ## and ### can be thought of as analogous to `<h1>`, `<h2>`
/// and `<h3>` in HTML.
///
/// Heading text should be presented to the user, and clients MAY use special
/// formatting, e.g. a larger or bold font, to indicate its status as a header
/// (simple clients may simply print the line, including its leading #s,
/// without any styling at all). However, the main motivation for the
/// definition of heading lines is not stylistic but to provide a
/// machine-readable representation of the internal structure of the document.
/// Advanced clients can use this information to, e.g. display an automatically
/// generated and hierarchically formatted "table of contents" for a long
/// document in a side-pane, allowing users to easily jump to specific sections
/// without excessive scrolling. CMS-style tools automatically generating menus
/// or Atom/RSS feeds for a directory of text/gemini files can use first
/// heading in the file as a human-friendly title.
Heading { level: u8, body: String },
/// Lines beginning with "* " are unordered list items. This line type exists
/// purely for stylistic reasons. The * may be replaced in advanced clients by
/// a bullet symbol. Any text after the "* " should be presented to the user as
/// if it were a text line, i.e. wrapped to fit the viewport and formatted
/// "nicely". Advanced clients can take the space of the bullet symbol into
/// account when wrapping long list items to ensure that all lines of text
/// corresponding to the item are offset an equal distance from the left of the screen.
ListItem(String),
/// Lines beginning with ">" are quote lines. This line type exists so that
/// advanced clients may use distinct styling to convey to readers the important
/// semantic information that certain text is being quoted from an external
/// source. For example, when wrapping long lines to the the viewport, each
/// resultant line may have a ">" symbol placed at the front.
Quote(String),
}
impl Node {
pub fn blank() -> Node {
Node::Text("".to_string())
}
Impl ToString for Builder, accept AsRef<[Node]> in `render()` This adds a to_string method to the `Builder` allowing for the easy conversion of a Vec<Node> into a String, for any usecases where a library might not be directly writing to an io::Write, or may want to do String-y things with your document first. Without this, users would have to write to a Vec<u8> and convert to a String, which is kinda unintuitive, takes a lot of steps, and doesn't produce very readable code. This simplifies it to one method call. * Implementation of the std::str::ToString method for Builder * Accepting any AsRef<[Node]> in render (including accepting the old Vec<Node>, so not breaking) * Addition of estimate_len() to Node, used to pre-allocate the correct size of the String buffer * `estimate_len` has some quick doctests and examples. I know most of the rest of the project uses test methods, but I hope this is alright given that the tests may add some more clarity to the purpose and function of the method. * `to_string` has a single line of unsafe code. As the associated comment explains, this is provably safe, and exists just to avoid having to choose between having a bunch of duplicate code or inefficiently performing a UTF-8 check on a whole bunch bytes that we already know are safe. That said, I totally get it if you're just generally against unsafe code and will change it to be an alternative if you so wish * ToString is implemented instead of Display. This is to discourage users from directly using this in a println!() or write!() macro, which would not be a thing you would normally expect to do with this. It also gives us the advantage of being able to pre-allocate a buffer size, meaning less expensive String resizing. * I couldn't think of a clever way to get `render()` to work with both `io::Write`s or `fmt::Write`s without duplicating the code, but I'm dumb and might be missing something, so if there's a way to do that instead of doing my funky unsafe hack that's cool and I can do that instead.
2020-11-30 04:17:15 +00:00
/// Cheaply estimate the length of this node
///
/// This measures length in bytes, *not characters*. So if the user includes
/// non-ascii characters, a single one of these characters may add several bytes to
/// the length, despite only displaying as one character.
///
/// This does include any newlines, but not any trailing newlines. For example, a
/// preformatted text block containing a single line reading "trans rights! 🏳️‍⚧️"
/// would have a length of 30: 3 backticks, a newline, the text (including 16 bytes
/// for the trans flag), another newline, and another 3 backticks.
///
/// ```
/// # use gemtext::Node;
/// let simple_text = Node::Text(String::from("Henlo worl"));
/// let linky_link = Node::Link { to: "gemini://cetacean.club/maj/".to_string(), name: Some("Maj".to_string()) };
/// let human_rights = Node::Preformatted {
/// alt: "".to_string(),
/// body: "trans rights! 🏳️‍⚧️".to_string(),
/// };
Impl ToString for Builder, accept AsRef<[Node]> in `render()` This adds a to_string method to the `Builder` allowing for the easy conversion of a Vec<Node> into a String, for any usecases where a library might not be directly writing to an io::Write, or may want to do String-y things with your document first. Without this, users would have to write to a Vec<u8> and convert to a String, which is kinda unintuitive, takes a lot of steps, and doesn't produce very readable code. This simplifies it to one method call. * Implementation of the std::str::ToString method for Builder * Accepting any AsRef<[Node]> in render (including accepting the old Vec<Node>, so not breaking) * Addition of estimate_len() to Node, used to pre-allocate the correct size of the String buffer * `estimate_len` has some quick doctests and examples. I know most of the rest of the project uses test methods, but I hope this is alright given that the tests may add some more clarity to the purpose and function of the method. * `to_string` has a single line of unsafe code. As the associated comment explains, this is provably safe, and exists just to avoid having to choose between having a bunch of duplicate code or inefficiently performing a UTF-8 check on a whole bunch bytes that we already know are safe. That said, I totally get it if you're just generally against unsafe code and will change it to be an alternative if you so wish * ToString is implemented instead of Display. This is to discourage users from directly using this in a println!() or write!() macro, which would not be a thing you would normally expect to do with this. It also gives us the advantage of being able to pre-allocate a buffer size, meaning less expensive String resizing. * I couldn't think of a clever way to get `render()` to work with both `io::Write`s or `fmt::Write`s without duplicating the code, but I'm dumb and might be missing something, so if there's a way to do that instead of doing my funky unsafe hack that's cool and I can do that instead.
2020-11-30 04:17:15 +00:00
///
/// assert_eq!(
/// simple_text.estimate_len(),
/// "Henlo worl".as_bytes().len()
/// );
/// assert_eq!(
/// linky_link.estimate_len(),
/// "=> gemini://cetacean.club/maj/ Maj".as_bytes().len()
/// );
/// assert_eq!(
/// human_rights.estimate_len(),
/// "```\ntrans rights! 🏳️‍⚧️\n```".as_bytes().len()
/// );
/// ```
pub fn estimate_len(&self) -> usize {
match self {
Self::Text(text) => text.len(),
Self::Link { to, name } => 3 + to.as_bytes().len() +
name.as_ref().map(|n| n.as_bytes().len() + 1).unwrap_or(0),
Self::Preformatted { alt, body } => alt.as_bytes().len()
+ body.as_bytes().len() + 8,
Impl ToString for Builder, accept AsRef<[Node]> in `render()` This adds a to_string method to the `Builder` allowing for the easy conversion of a Vec<Node> into a String, for any usecases where a library might not be directly writing to an io::Write, or may want to do String-y things with your document first. Without this, users would have to write to a Vec<u8> and convert to a String, which is kinda unintuitive, takes a lot of steps, and doesn't produce very readable code. This simplifies it to one method call. * Implementation of the std::str::ToString method for Builder * Accepting any AsRef<[Node]> in render (including accepting the old Vec<Node>, so not breaking) * Addition of estimate_len() to Node, used to pre-allocate the correct size of the String buffer * `estimate_len` has some quick doctests and examples. I know most of the rest of the project uses test methods, but I hope this is alright given that the tests may add some more clarity to the purpose and function of the method. * `to_string` has a single line of unsafe code. As the associated comment explains, this is provably safe, and exists just to avoid having to choose between having a bunch of duplicate code or inefficiently performing a UTF-8 check on a whole bunch bytes that we already know are safe. That said, I totally get it if you're just generally against unsafe code and will change it to be an alternative if you so wish * ToString is implemented instead of Display. This is to discourage users from directly using this in a println!() or write!() macro, which would not be a thing you would normally expect to do with this. It also gives us the advantage of being able to pre-allocate a buffer size, meaning less expensive String resizing. * I couldn't think of a clever way to get `render()` to work with both `io::Write`s or `fmt::Write`s without duplicating the code, but I'm dumb and might be missing something, so if there's a way to do that instead of doing my funky unsafe hack that's cool and I can do that instead.
2020-11-30 04:17:15 +00:00
Self::Heading { level, body } => *level as usize + 1 + body.as_bytes().len(),
Self::ListItem(item) | Self::Quote(item)=> 2 + item.as_bytes().len(),
}
}
}
pub fn parse(doc: &str) -> Vec<Node> {
let mut result: Vec<Node> = vec![];
let mut collect_preformatted: bool = false;
let mut preformatted_buffer: Vec<u8> = vec![];
let mut alt = "";
for line in doc.lines() {
if let Some(trailing) = line.strip_prefix("```") {
collect_preformatted = !collect_preformatted;
if !collect_preformatted {
result.push(Node::Preformatted {
alt: alt.to_string(),
body: String::from_utf8(preformatted_buffer)
.unwrap()
.trim_end()
.to_string(),
});
preformatted_buffer = vec![];
} else {
alt = trailing.trim();
}
continue;
}
if collect_preformatted && line != "```" {
write!(preformatted_buffer, "{}\n", line).unwrap();
continue;
}
// Quotes
if line.starts_with(">") {
result.push(Node::Quote(line[1..].trim().to_string()));
continue;
}
// List items
if line.starts_with("*") {
result.push(Node::ListItem(line[1..].trim().to_string()));
continue;
}
// Headings
if line.starts_with("###") {
result.push(Node::Heading {
level: 3,
body: line[3..].trim().to_string(),
});
continue;
}
if line.starts_with("##") {
result.push(Node::Heading {
level: 2,
body: line[2..].trim().to_string(),
});
continue;
}
if line.starts_with("#") {
result.push(Node::Heading {
level: 1,
body: line[1..].trim().to_string(),
});
continue;
}
// Links
if line.starts_with("=>") {
let sp = line[2..].split_ascii_whitespace().collect::<Vec<&str>>();
match sp.len() {
1 => result.push(Node::Link {
to: sp[0].trim().to_string(),
name: None,
}),
_ => result.push(Node::Link {
to: sp[0].trim().to_string(),
name: Some(sp[1..].join(" ").trim().to_string()),
}),
}
continue;
}
result.push(Node::Text(line.to_string()));
}
result
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn basic() {
let _ = pretty_env_logger::try_init();
let msg = include_str!("../../majc/src/help.gmi");
let doc = super::parse(msg);
assert_ne!(doc.len(), 0);
}
#[test]
fn quote() {
let _ = pretty_env_logger::try_init();
let msg = ">hi there";
let expected: Vec<Node> = vec![Node::Quote("hi there".to_string())];
assert_eq!(expected, parse(msg));
}
#[test]
fn list() {
let _ = pretty_env_logger::try_init();
let msg = "*hi there";
let expected: Vec<Node> = vec![Node::ListItem("hi there".to_string())];
assert_eq!(expected, parse(msg));
}
#[test]
fn preformatted() {
let _ = pretty_env_logger::try_init();
let msg = "```hi there\n\
obi-wan kenobi\n\
```\n\
\n\
Test\n";
let expected: Vec<Node> = vec![
Node::Preformatted{ alt: "hi there".to_string(), body: "obi-wan kenobi".to_string() },
Node::Text(String::new()),
Node::Text("Test".to_string()),
];
assert_eq!(expected, parse(msg));
}
#[test]
fn header() {
let _ = pretty_env_logger::try_init();
let msg = "#hi\n##there\n### my friends";
let expected: Vec<Node> = vec![
Node::Heading {
level: 1,
body: "hi".to_string(),
},
Node::Heading {
level: 2,
body: "there".to_string(),
},
Node::Heading {
level: 3,
body: "my friends".to_string(),
},
];
assert_eq!(expected, parse(msg));
}
#[test]
fn link() {
let _ = pretty_env_logger::try_init();
let msg = "=>/\n=> / Go home";
let expected: Vec<Node> = vec![
Node::Link {
to: "/".to_string(),
name: None,
},
Node::Link {
to: "/".to_string(),
name: Some("Go home".to_string()),
},
];
assert_eq!(expected, parse(msg));
}
#[test]
fn ambiguous_preformatted() {
let _ = pretty_env_logger::try_init();
let msg = include_str!("../../testdata/ambig_preformatted.gmi");
let expected: Vec<Node> = vec![
Node::Preformatted { alt: "foo".to_string(), body: "FOO".to_string() },
Node::Text("Foo bar".to_string()),
];
assert_eq!(expected, parse(msg));
}
#[test]
fn ambiguous_text() {
let _ = pretty_env_logger::try_init();
let original = Node::Text("#1 World's Best Coder".to_string());
let expected = " #1 World's Best Coder\n";
let mut rendered: Vec<u8> = vec![];
render(vec![original], &mut rendered).unwrap();
let rendered = String::from_utf8(rendered).unwrap();
assert_eq!(expected, rendered)
}
}