From b7f2f8bf4aaa8039c4bd7c9acd0b0878eb415751 Mon Sep 17 00:00:00 2001 From: Christine Dodrill Date: Thu, 23 Jul 2020 22:25:35 -0400 Subject: [PATCH] add basic gemeni response parsing --- .envrc | 1 + .gitignore | 2 + Cargo.toml | 20 ++++++ LICENSE | 12 ++++ README.md | 3 + shell.nix | 7 ++ src/lib.rs | 16 +++++ src/response.rs | 116 +++++++++++++++++++++++++++++++++ src/status_code.rs | 123 +++++++++++++++++++++++++++++++++++ testdata/simple_response.txt | 43 ++++++++++++ 10 files changed, 343 insertions(+) create mode 100644 .envrc create mode 100644 .gitignore create mode 100644 Cargo.toml create mode 100644 LICENSE create mode 100644 README.md create mode 100644 shell.nix create mode 100644 src/lib.rs create mode 100644 src/response.rs create mode 100644 src/status_code.rs create mode 100644 testdata/simple_response.txt diff --git a/.envrc b/.envrc new file mode 100644 index 0000000..051d09d --- /dev/null +++ b/.envrc @@ -0,0 +1 @@ +eval "$(lorri direnv)" diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..96ef6c0 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +/target +Cargo.lock diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..186a743 --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,20 @@ +[package] +name = "maj" +version = "0.1.0" +authors = ["Christine Dodrill "] +edition = "2018" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +num = "0.2" +num-derive = "0.3" +num-traits = "0.2" +rustls = "0.18" +webpki = "0.21.0" +log = "0.4" +url = "2" +thiserror = "1" + +[dev-dependencies] +pretty_env_logger = "0.4" diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..1b4d5d0 --- /dev/null +++ b/LICENSE @@ -0,0 +1,12 @@ +Copyright (c) 2020 Christine Dodrill + +Permission to use, copy, modify, and/or distribute this software for any +purpose with or without fee is hereby granted. + +THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH +REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY +AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, +INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM +LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR +OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR +PERFORMANCE OF THIS SOFTWARE. \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..f47b1bc --- /dev/null +++ b/README.md @@ -0,0 +1,3 @@ +# maj + +A gemini ecosystem for Rust. diff --git a/shell.nix b/shell.nix new file mode 100644 index 0000000..bcc085d --- /dev/null +++ b/shell.nix @@ -0,0 +1,7 @@ +{ pkgs ? import {} }: + +pkgs.mkShell { + buildInputs = with pkgs; [ + rustc cargo rls rustfmt cargo-watch + ]; +} diff --git a/src/lib.rs b/src/lib.rs new file mode 100644 index 0000000..36855f0 --- /dev/null +++ b/src/lib.rs @@ -0,0 +1,16 @@ +#[macro_use] +extern crate num_derive; + +mod status_code; +mod response; + +pub use status_code::StatusCode; +pub use response::Response; + +#[cfg(test)] +mod tests { + #[test] + fn it_works() { + assert_eq!(2 + 2, 4); + } +} diff --git a/src/response.rs b/src/response.rs new file mode 100644 index 0000000..0aea72e --- /dev/null +++ b/src/response.rs @@ -0,0 +1,116 @@ +use crate::StatusCode; +use num::FromPrimitive; +use std::io::prelude::*; + +/// A Gemini response as specified in [the spec](https://gemini.circumlunar.space/docs/specification.html). +#[derive(Default)] +pub struct Response { + status: StatusCode, + meta: String, + body: Vec, +} + +/// The parser state. +#[derive(Debug)] +enum State { + ReadStatusCode { data: Vec }, + ReadWhitespace, + ReadMeta { data: Vec }, + ReadBody { data: Vec }, +} + +#[derive(thiserror::Error, Debug)] +pub enum Error { + #[error("unexpected end of file found while parsing response")] + EOF, + + #[error("I/O error")] + IO(#[from] std::io::Error), + + #[error("invalid status code character {0}")] + InvalidStatusCode(u8), + + #[error("UTF-8 error: {0}")] + Utf8(#[from] std::str::Utf8Error), + + #[error("Number parsing error: {0}")] + NumParse(#[from] std::num::ParseIntError), + + #[error("None found when none should not be found")] + NoneFound, +} + +impl Response { + pub fn parse(inp: &mut impl Read) -> Result { + let mut state = State::ReadStatusCode { data: vec![] }; + let mut buf = [0; 1]; + let mut result = Response::default(); + + loop { + match inp.read(&mut buf) { + Ok(n) => { + if n == 0 { + if let State::ReadBody { data } = state { + result.body = data; + return Ok(result); + } + } + log::trace!("buf: {:?}: {:?}", buf, buf[0] as char); + } + Err(why) => return Err(Error::IO(why)), + } + + log::trace!("state: {:?}", state); + + match &mut state { + State::ReadStatusCode { data } => match buf[0] as char { + '1' | '2' | '3' | '4' | '5' | '6' | '7' | '8' | '9' | '0' => { + data.push(buf[0]); + } + ' ' | '\t' => { + let status_code: &str = std::str::from_utf8(data)?; + let status_code: u8 = status_code.parse()?; + result.status = StatusCode::from_u8(status_code).ok_or(Error::NoneFound)?; + state = State::ReadWhitespace; + } + foo => return Err(Error::InvalidStatusCode(foo as u8)), + }, + + State::ReadWhitespace => match buf[0] as char { + ' ' | '\t' => {} + _ => { + state = State::ReadMeta { data: vec![buf[0]] }; + } + }, + + State::ReadMeta { data } => match buf[0] as char { + '\r' => {} + '\n' => { + result.meta = std::str::from_utf8(data)?.to_string(); + state = State::ReadBody { data: vec![] }; + } + _ => data.push(buf[0]), + }, + + State::ReadBody { data } => data.push(buf[0]), + } + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::*; + use std::io::prelude::*; + + #[test] + fn parse() -> Result<(), Error> { + pretty_env_logger::init(); + let mut fin = std::fs::File::open("./testdata/simple_response.txt")?; + + Response::parse(&mut fin)?; + + Ok(()) + } +} diff --git a/src/status_code.rs b/src/status_code.rs new file mode 100644 index 0000000..c742848 --- /dev/null +++ b/src/status_code.rs @@ -0,0 +1,123 @@ +/// Status codes as specified in [the spec](https://gemini.circumlunar.space/docs/specification.html). +#[derive(Copy, Clone, num_derive::FromPrimitive)] +pub enum StatusCode { + /// The requested resource accepts a line of textual user input. + /// The line is a prompt which should be displayed to the + /// user. The same resource should then be requested again with the + /// user's input included as a query component. Queries are + /// included in requests as per the usual generic URL definition in + /// RFC3986, i.e. separated from the path by a ?. Reserved characters + /// used in the user's input must be "percent-encoded" as per RFC3986, + /// and space characters should also be percent-encoded. + Input = 10, + + /// As per status code 10, but for use with sensitive input such as + /// passwords. Clients should present the prompt as per status code + /// 10, but the user's input should not be echoed to the screen to + /// prevent it being read by "shoulder surfers". + SensitiveInput = 11, + + /// The request was handled successfully and a response body will follow the response header. The line is a MIME media type which applies to the response body. + Success = 20, + + /// The server is redirecting the client to a new location for the + /// requested resource. There is no response body. is a new + /// URL for the requested resource. The URL may be absolute or + /// relative. The redirect should be considered temporary, i.e. + /// clients should continue to request the resource at the original + /// address and should not performance convenience actions like + /// automatically updating bookmarks. There is no response body. + TemporaryRedirect = 30, + + /// The requested resource should be consistently requested from the + /// new URL provided in future. Tools like search engine indexers or + /// content aggregators should update their configurations to avoid + /// requesting the old URL, and end-user clients may automatically + /// update bookmarks, etc. Note that clients which only pay attention + /// to the initial digit of status codes will treat this as a temporary + /// redirect. They will still end up at the right place, they just + /// won't be able to make use of the knowledge that this redirect is + /// permanent, so they'll pay a small performance penalty by having + ///to follow the redirect each time. + PermanentRedirect = 31, + + /// The request has failed. There is no response body. The nature of + /// the failure is temporary, i.e. an identical request MAY succeed + /// in the future. The contents of may provide additional + /// information on the failure, and should be displayed to human users. + TemporaryFailure = 40, + + /// The server is unavailable due to overload or maintenance. (cf HTTP + /// 503) + ServerUnavailable = 41, + + /// A CGI process, or similar system for generating dynamic content, + /// died unexpectedly or timed out. + CGIError = 42, + + /// A proxy request failed because the server was unable to + /// successfully complete a transaction with the remote host. (cf HTTP + /// 502, 504) + ProxyError = 43, + + /// Rate limiting is in effect. is an integer number of seconds + /// which the client must wait before another request is made to this + /// server. (cf HTTP 429) + SlowDown = 44, + + /// The request has failed. There is no response body. The nature of + /// the failure is permanent, i.e. identical future requests will + /// reliably fail for the same reason. The contents of may + /// provide additional information on the failure, and should be + /// displayed to human users. Automatic clients such as aggregators + /// or indexing crawlers should not repeat this request. + PermanentFailure = 50, + + /// The requested resource could not be found but may be available in + /// the future. (cf HTTP 404) (struggling to remember this important + /// status code? Easy: you can't find things hidden at Area 51!) + NotFound = 51, + + /// The resource requested is no longer available and will not be + /// available again. Search engines and similar tools should remove this + /// resource from their indices. Content aggregators should stop + /// requesting the resource and convey to their human users that the + /// subscribed resource is gone. (cf HTTP 410) + Gone = 52, + + /// The request was for a resource at a domain not served by the server + /// and the server does not accept proxy requests. + ProxyRequestRefused = 53, + + /// The server was unable to parse the client's request, presumably due + /// to a malformed request. (cf HTTP 400) + BadRequest = 59, + + /// The requested resource requires a client certificate to access. If + /// the request was made without a certificate, it should be repeated + /// with one. If the request was made with a certificate, the server + /// did not accept it and the request should be repeated with a + /// different certificate. The contents of (and/or the specific + /// 6x code) may provide additional information on certificate + /// requirements or the reason a certificate was rejected. + ClientCertificateRequired = 60, + + /// The supplied client certificate is not authorised for accessing the + /// particular requested resource. The problem is not with the + /// certificate itself, which may be authorised for other resources. + CertificateNotAuthorized = 61, + + /// The supplied client certificate was not accepted because it is not + /// valid. This indicates a problem with the certificate in and of itself, + /// with no consideration of the particular requested resource. The most + /// likely cause is that the certificate's validity start date is in the + /// future or its expiry date has passed, but this code may also + /// indicate an invalid signature, or a violation of a X509 standard + /// requirements. The should provide more information about the + /// exact error. + CertificateNotValid = 62, +} + +impl Default for StatusCode { + fn default() -> Self { StatusCode::Success } +} diff --git a/testdata/simple_response.txt b/testdata/simple_response.txt new file mode 100644 index 0000000..c4c73fd --- /dev/null +++ b/testdata/simple_response.txt @@ -0,0 +1,43 @@ +20 text/gemini +# Project Gemini + +## Overview + +Gemini is a new internet protocol which: + +* Is heavier than gopher +* Is lighter than the web +* Will not replace either +* Strives for maximum power to weight ratio +* Takes user privacy very seriously + +## Resources + +=> docs/ Gemini documentation +=> software/ Gemini software +=> servers/ Known Gemini servers +=> https://lists.orbitalfox.eu/listinfo/gemini Gemini mailing list +=> gemini://gemini.conman.org/test/torture/ Gemini client torture test + +## Web proxies + +=> https://portal.mozz.us/?url=gemini%3A%2F%2Fgemini.circumlunar.space%2F&fmt=fixed Gemini-to-web proxy service +=> https://proxy.vulpes.one/gemini/gemini.circumlunar.space Another Gemini-to-web proxy service + +## Search engines + +=> gemini://gus.guru/ Gemini Universal Search engine +=> gemini://houston.coder.town Houston search engine + +## Geminispace aggregators (experimental!) + +=> capcom/ CAPCOM +=> gemini://rawtext.club:1965/~sloum/spacewalk.gmi Spacewalk + +## Gemini mirrors of web resources + +=> gemini://gempaper.strangled.net/mirrorlist/ A list of mirrored services + +## Free Gemini hosting + +=> users/ Users with Gemini content on this server