Browse Source

add basic gemeni response parsing

majc-history
Cadey Ratio 6 months ago
commit
b7f2f8bf4a
10 changed files with 343 additions and 0 deletions
  1. +1
    -0
      .envrc
  2. +2
    -0
      .gitignore
  3. +20
    -0
      Cargo.toml
  4. +12
    -0
      LICENSE
  5. +3
    -0
      README.md
  6. +7
    -0
      shell.nix
  7. +16
    -0
      src/lib.rs
  8. +116
    -0
      src/response.rs
  9. +123
    -0
      src/status_code.rs
  10. +43
    -0
      testdata/simple_response.txt

+ 1
- 0
.envrc View File

@ -0,0 +1 @@
eval "$(lorri direnv)"

+ 2
- 0
.gitignore View File

@ -0,0 +1,2 @@
/target
Cargo.lock

+ 20
- 0
Cargo.toml View File

@ -0,0 +1,20 @@
[package]
name = "maj"
version = "0.1.0"
authors = ["Christine Dodrill <[email protected]>"]
edition = "2018"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
num = "0.2"
num-derive = "0.3"
num-traits = "0.2"
rustls = "0.18"
webpki = "0.21.0"
log = "0.4"
url = "2"
thiserror = "1"
[dev-dependencies]
pretty_env_logger = "0.4"

+ 12
- 0
LICENSE View File

@ -0,0 +1,12 @@
Copyright (c) 2020 Christine Dodrill <[email protected]>
Permission to use, copy, modify, and/or distribute this software for any
purpose with or without fee is hereby granted.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH
REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT,
INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR
OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.

+ 3
- 0
README.md View File

@ -0,0 +1,3 @@
# maj
A gemini ecosystem for Rust.

+ 7
- 0
shell.nix View File

@ -0,0 +1,7 @@
{ pkgs ? import <nixpkgs> {} }:
pkgs.mkShell {
buildInputs = with pkgs; [
rustc cargo rls rustfmt cargo-watch
];
}

+ 16
- 0
src/lib.rs View File

@ -0,0 +1,16 @@
#[macro_use]
extern crate num_derive;
mod status_code;
mod response;
pub use status_code::StatusCode;
pub use response::Response;
#[cfg(test)]
mod tests {
#[test]
fn it_works() {
assert_eq!(2 + 2, 4);
}
}

+ 116
- 0
src/response.rs View File

@ -0,0 +1,116 @@
use crate::StatusCode;
use num::FromPrimitive;
use std::io::prelude::*;
/// A Gemini response as specified in [the spec](https://gemini.circumlunar.space/docs/specification.html).
#[derive(Default)]
pub struct Response {
status: StatusCode,
meta: String,
body: Vec<u8>,
}
/// The parser state.
#[derive(Debug)]
enum State {
ReadStatusCode { data: Vec<u8> },
ReadWhitespace,
ReadMeta { data: Vec<u8> },
ReadBody { data: Vec<u8> },
}
#[derive(thiserror::Error, Debug)]
pub enum Error {
#[error("unexpected end of file found while parsing response")]
EOF,
#[error("I/O error")]
IO(#[from] std::io::Error),
#[error("invalid status code character {0}")]
InvalidStatusCode(u8),
#[error("UTF-8 error: {0}")]
Utf8(#[from] std::str::Utf8Error),
#[error("Number parsing error: {0}")]
NumParse(#[from] std::num::ParseIntError),
#[error("None found when none should not be found")]
NoneFound,
}
impl Response {
pub fn parse(inp: &mut impl Read) -> Result<Response, Error> {
let mut state = State::ReadStatusCode { data: vec![] };
let mut buf = [0; 1];
let mut result = Response::default();
loop {
match inp.read(&mut buf) {
Ok(n) => {
if n == 0 {
if let State::ReadBody { data } = state {
result.body = data;
return Ok(result);
}
}
log::trace!("buf: {:?}: {:?}", buf, buf[0] as char);
}
Err(why) => return Err(Error::IO(why)),
}
log::trace!("state: {:?}", state);
match &mut state {
State::ReadStatusCode { data } => match buf[0] as char {
'1' | '2' | '3' | '4' | '5' | '6' | '7' | '8' | '9' | '0' => {
data.push(buf[0]);
}
' ' | '\t' => {
let status_code: &str = std::str::from_utf8(data)?;
let status_code: u8 = status_code.parse()?;
result.status = StatusCode::from_u8(status_code).ok_or(Error::NoneFound)?;
state = State::ReadWhitespace;
}
foo => return Err(Error::InvalidStatusCode(foo as u8)),
},
State::ReadWhitespace => match buf[0] as char {
' ' | '\t' => {}
_ => {
state = State::ReadMeta { data: vec![buf[0]] };
}
},
State::ReadMeta { data } => match buf[0] as char {
'\r' => {}
'\n' => {
result.meta = std::str::from_utf8(data)?.to_string();
state = State::ReadBody { data: vec![] };
}
_ => data.push(buf[0]),
},
State::ReadBody { data } => data.push(buf[0]),
}
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::*;
use std::io::prelude::*;
#[test]
fn parse() -> Result<(), Error> {
pretty_env_logger::init();
let mut fin = std::fs::File::open("./testdata/simple_response.txt")?;
Response::parse(&mut fin)?;
Ok(())
}
}

+ 123
- 0
src/status_code.rs View File

@ -0,0 +1,123 @@
/// Status codes as specified in [the spec](https://gemini.circumlunar.space/docs/specification.html).
#[derive(Copy, Clone, num_derive::FromPrimitive)]
pub enum StatusCode {
/// The requested resource accepts a line of textual user input.
/// The <META> line is a prompt which should be displayed to the
/// user. The same resource should then be requested again with the
/// user's input included as a query component. Queries are
/// included in requests as per the usual generic URL definition in
/// RFC3986, i.e. separated from the path by a ?. Reserved characters
/// used in the user's input must be "percent-encoded" as per RFC3986,
/// and space characters should also be percent-encoded.
Input = 10,
/// As per status code 10, but for use with sensitive input such as
/// passwords. Clients should present the prompt as per status code
/// 10, but the user's input should not be echoed to the screen to
/// prevent it being read by "shoulder surfers".
SensitiveInput = 11,
/// The request was handled successfully and a response body will follow the response header. The <META> line is a MIME media type which applies to the response body.
Success = 20,
/// The server is redirecting the client to a new location for the
/// requested resource. There is no response body. <META> is a new
/// URL for the requested resource. The URL may be absolute or
/// relative. The redirect should be considered temporary, i.e.
/// clients should continue to request the resource at the original
/// address and should not performance convenience actions like
/// automatically updating bookmarks. There is no response body.
TemporaryRedirect = 30,
/// The requested resource should be consistently requested from the
/// new URL provided in future. Tools like search engine indexers or
/// content aggregators should update their configurations to avoid
/// requesting the old URL, and end-user clients may automatically
/// update bookmarks, etc. Note that clients which only pay attention
/// to the initial digit of status codes will treat this as a temporary
/// redirect. They will still end up at the right place, they just
/// won't be able to make use of the knowledge that this redirect is
/// permanent, so they'll pay a small performance penalty by having
///to follow the redirect each time.
PermanentRedirect = 31,
/// The request has failed. There is no response body. The nature of
/// the failure is temporary, i.e. an identical request MAY succeed
/// in the future. The contents of <META> may provide additional
/// information on the failure, and should be displayed to human users.
TemporaryFailure = 40,
/// The server is unavailable due to overload or maintenance. (cf HTTP
/// 503)
ServerUnavailable = 41,
/// A CGI process, or similar system for generating dynamic content,
/// died unexpectedly or timed out.
CGIError = 42,
/// A proxy request failed because the server was unable to
/// successfully complete a transaction with the remote host. (cf HTTP
/// 502, 504)
ProxyError = 43,
/// Rate limiting is in effect. <META> is an integer number of seconds
/// which the client must wait before another request is made to this
/// server. (cf HTTP 429)
SlowDown = 44,
/// The request has failed. There is no response body. The nature of
/// the failure is permanent, i.e. identical future requests will
/// reliably fail for the same reason. The contents of <META> may
/// provide additional information on the failure, and should be
/// displayed to human users. Automatic clients such as aggregators
/// or indexing crawlers should not repeat this request.
PermanentFailure = 50,
/// The requested resource could not be found but may be available in
/// the future. (cf HTTP 404) (struggling to remember this important
/// status code? Easy: you can't find things hidden at Area 51!)
NotFound = 51,
/// The resource requested is no longer available and will not be
/// available again. Search engines and similar tools should remove this
/// resource from their indices. Content aggregators should stop
/// requesting the resource and convey to their human users that the
/// subscribed resource is gone. (cf HTTP 410)
Gone = 52,
/// The request was for a resource at a domain not served by the server
/// and the server does not accept proxy requests.
ProxyRequestRefused = 53,
/// The server was unable to parse the client's request, presumably due
/// to a malformed request. (cf HTTP 400)
BadRequest = 59,
/// The requested resource requires a client certificate to access. If
/// the request was made without a certificate, it should be repeated
/// with one. If the request was made with a certificate, the server
/// did not accept it and the request should be repeated with a
/// different certificate. The contents of <META> (and/or the specific
/// 6x code) may provide additional information on certificate
/// requirements or the reason a certificate was rejected.
ClientCertificateRequired = 60,
/// The supplied client certificate is not authorised for accessing the
/// particular requested resource. The problem is not with the
/// certificate itself, which may be authorised for other resources.
CertificateNotAuthorized = 61,
/// The supplied client certificate was not accepted because it is not
/// valid. This indicates a problem with the certificate in and of itself,
/// with no consideration of the particular requested resource. The most
/// likely cause is that the certificate's validity start date is in the
/// future or its expiry date has passed, but this code may also
/// indicate an invalid signature, or a violation of a X509 standard
/// requirements. The <META> should provide more information about the
/// exact error.
CertificateNotValid = 62,
}
impl Default for StatusCode {
fn default() -> Self { StatusCode::Success }
}

+ 43
- 0
testdata/simple_response.txt View File

@ -0,0 +1,43 @@
20 text/gemini
# Project Gemini
## Overview
Gemini is a new internet protocol which:
* Is heavier than gopher
* Is lighter than the web
* Will not replace either
* Strives for maximum power to weight ratio
* Takes user privacy very seriously
## Resources
=> docs/ Gemini documentation
=> software/ Gemini software
=> servers/ Known Gemini servers
=> https://lists.orbitalfox.eu/listinfo/gemini Gemini mailing list
=> gemini://gemini.conman.org/test/torture/ Gemini client torture test
## Web proxies
=> https://portal.mozz.us/?url=gemini%3A%2F%2Fgemini.circumlunar.space%2F&fmt=fixed Gemini-to-web proxy service
=> https://proxy.vulpes.one/gemini/gemini.circumlunar.space Another Gemini-to-web proxy service
## Search engines
=> gemini://gus.guru/ Gemini Universal Search engine
=> gemini://houston.coder.town Houston search engine
## Geminispace aggregators (experimental!)
=> capcom/ CAPCOM
=> gemini://rawtext.club:1965/~sloum/spacewalk.gmi Spacewalk
## Gemini mirrors of web resources
=> gemini://gempaper.strangled.net/mirrorlist/ A list of mirrored services
## Free Gemini hosting
=> users/ Users with Gemini content on this server

Loading…
Cancel
Save