From 34dca8d92df769b330af7c332b5cc551436a136b Mon Sep 17 00:00:00 2001 From: Emii Tatsuo Date: Sun, 29 Nov 2020 23:17:15 -0500 Subject: [PATCH 1/3] Impl ToString for Builder, accept AsRef<[Node]> in `render()` This adds a to_string method to the `Builder` allowing for the easy conversion of a Vec into a String, for any usecases where a library might not be directly writing to an io::Write, or may want to do String-y things with your document first. Without this, users would have to write to a Vec and convert to a String, which is kinda unintuitive, takes a lot of steps, and doesn't produce very readable code. This simplifies it to one method call. * Implementation of the std::str::ToString method for Builder * Accepting any AsRef<[Node]> in render (including accepting the old Vec, so not breaking) * Addition of estimate_len() to Node, used to pre-allocate the correct size of the String buffer * `estimate_len` has some quick doctests and examples. I know most of the rest of the project uses test methods, but I hope this is alright given that the tests may add some more clarity to the purpose and function of the method. * `to_string` has a single line of unsafe code. As the associated comment explains, this is provably safe, and exists just to avoid having to choose between having a bunch of duplicate code or inefficiently performing a UTF-8 check on a whole bunch bytes that we already know are safe. That said, I totally get it if you're just generally against unsafe code and will change it to be an alternative if you so wish * ToString is implemented instead of Display. This is to discourage users from directly using this in a println!() or write!() macro, which would not be a thing you would normally expect to do with this. It also gives us the advantage of being able to pre-allocate a buffer size, meaning less expensive String resizing. * I couldn't think of a clever way to get `render()` to work with both `io::Write`s or `fmt::Write`s without duplicating the code, but I'm dumb and might be missing something, so if there's a way to do that instead of doing my funky unsafe hack that's cool and I can do that instead. --- gemtext/src/lib.rs | 66 +++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 63 insertions(+), 3 deletions(-) diff --git a/gemtext/src/lib.rs b/gemtext/src/lib.rs index e350b95..71ab545 100644 --- a/gemtext/src/lib.rs +++ b/gemtext/src/lib.rs @@ -54,11 +54,30 @@ impl Builder { } } +impl ToString for Builder { + /// Render a document to a string + /// + /// This produces a text/gemini compliant text document, represented as a string + fn to_string(&self) -> String { + let len: usize = self.nodes.iter().map(Node::estimate_len).sum(); // sum up node lengths + let mut bytes = Vec::with_capacity(len + self.nodes.len()); // add in inter-node newlines + render(&self.nodes, &mut bytes).unwrap(); // Writing to a string shouldn't produce errors + + unsafe { + // This is safe because bytes is composed of Strings. We could have this as + // pure safe code by replicating the `render()` method and switching it to use + // a fmt::Write (or even `String::push()`)instead of a io::Write, but this has + // the same effect, with much DRYer code. + String::from_utf8_unchecked(bytes) + } + } +} + /// Render a set of nodes as a document to a writer. -pub fn render(nodes: Vec, out: &mut impl Write) -> io::Result<()> { +pub fn render(nodes: impl AsRef<[Node]>, out: &mut impl Write) -> io::Result<()> { use Node::*; - for node in nodes { + for node in nodes.as_ref() { match node { Text(body) => { let special_prefixes = ["=>", "```", "#", "*", ">"]; @@ -72,7 +91,7 @@ pub fn render(nodes: Vec, out: &mut impl Write) -> io::Result<()> { None => write!(out, "=> {}\n", to)?, }, Preformatted(body) => write!(out, "```\n{}\n```\n", body)?, - Heading { level, body } => write!(out, "{} {}\n", "#".repeat(level as usize), body)?, + Heading { level, body } => write!(out, "{} {}\n", "#".repeat(*level as usize), body)?, ListItem(body) => write!(out, "* {}\n", body)?, Quote(body) => write!(out, "> {}\n", body)?, }; @@ -166,6 +185,47 @@ impl Node { pub fn blank() -> Node { Node::Text("".to_string()) } + + /// Cheaply estimate the length of this node + /// + /// This measures length in bytes, *not characters*. So if the user includes + /// non-ascii characters, a single one of these characters may add several bytes to + /// the length, despite only displaying as one character. + /// + /// This does include any newlines, but not any trailing newlines. For example, a + /// preformatted text block containing a single line reading "trans rights! 🏳️‍⚧️" + /// would have a length of 30: 3 backticks, a newline, the text (including 16 bytes + /// for the trans flag), another newline, and another 3 backticks. + /// + /// ``` + /// # use gemtext::Node; + /// let simple_text = Node::Text(String::from("Henlo worl")); + /// let linky_link = Node::Link { to: "gemini://cetacean.club/maj/".to_string(), name: Some("Maj".to_string()) }; + /// let human_rights = Node::Preformatted("trans rights! 🏳️‍⚧️".to_string());; + /// + /// assert_eq!( + /// simple_text.estimate_len(), + /// "Henlo worl".as_bytes().len() + /// ); + /// assert_eq!( + /// linky_link.estimate_len(), + /// "=> gemini://cetacean.club/maj/ Maj".as_bytes().len() + /// ); + /// assert_eq!( + /// human_rights.estimate_len(), + /// "```\ntrans rights! 🏳️‍⚧️\n```".as_bytes().len() + /// ); + /// ``` + pub fn estimate_len(&self) -> usize { + match self { + Self::Text(text) => text.len(), + Self::Link { to, name } => 3 + to.as_bytes().len() + + name.as_ref().map(|n| n.as_bytes().len() + 1).unwrap_or(0), + Self::Preformatted(text) => text.as_bytes().len() + 8, + Self::Heading { level, body } => *level as usize + 1 + body.as_bytes().len(), + Self::ListItem(item) | Self::Quote(item)=> 2 + item.as_bytes().len(), + } + } } pub fn parse(doc: &str) -> Vec { From 2f3dd72d90e559d601e984747e8f6cb9ad372669 Mon Sep 17 00:00:00 2001 From: Emii Tatsuo Date: Mon, 30 Nov 2020 00:52:08 -0500 Subject: [PATCH 2/3] Add AsRef and AsMut<[Node]> to builder --- gemtext/src/lib.rs | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/gemtext/src/lib.rs b/gemtext/src/lib.rs index 71ab545..f9cc830 100644 --- a/gemtext/src/lib.rs +++ b/gemtext/src/lib.rs @@ -61,7 +61,7 @@ impl ToString for Builder { fn to_string(&self) -> String { let len: usize = self.nodes.iter().map(Node::estimate_len).sum(); // sum up node lengths let mut bytes = Vec::with_capacity(len + self.nodes.len()); // add in inter-node newlines - render(&self.nodes, &mut bytes).unwrap(); // Writing to a string shouldn't produce errors + render(self, &mut bytes).unwrap(); // Writing to a string shouldn't produce errors unsafe { // This is safe because bytes is composed of Strings. We could have this as @@ -73,6 +73,20 @@ impl ToString for Builder { } } +impl AsRef<[Node]> for Builder { + /// Get a reference to the internal node list of this builder + fn as_ref(&self) -> &[Node] { + self.nodes.as_ref() + } +} + +impl AsMut<[Node]> for Builder { + /// Get a mutable reference to the internal node list of this builder + fn as_mut(&mut self) -> &mut [Node] { + self.nodes.as_mut() + } +} + /// Render a set of nodes as a document to a writer. pub fn render(nodes: impl AsRef<[Node]>, out: &mut impl Write) -> io::Result<()> { use Node::*; From a7fabdc9098afba04f40cf5bb669f09d1ee37213 Mon Sep 17 00:00:00 2001 From: Emii Tatsuo Date: Mon, 30 Nov 2020 01:40:23 -0500 Subject: [PATCH 3/3] Allow Builder as Into> --- gemtext/src/lib.rs | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/gemtext/src/lib.rs b/gemtext/src/lib.rs index f9cc830..8d532cd 100644 --- a/gemtext/src/lib.rs +++ b/gemtext/src/lib.rs @@ -87,6 +87,15 @@ impl AsMut<[Node]> for Builder { } } +impl From for Vec { + /// Convert into a collection of [`Node`]s. + /// + /// Equivilent to calling [`Builder::build()`] + fn from(builder: Builder) -> Self { + builder.build() + } +} + /// Render a set of nodes as a document to a writer. pub fn render(nodes: impl AsRef<[Node]>, out: &mut impl Write) -> io::Result<()> { use Node::*;