add basic gemeni response parsing
This commit is contained in:
commit
b7f2f8bf4a
|
@ -0,0 +1,2 @@
|
||||||
|
/target
|
||||||
|
Cargo.lock
|
|
@ -0,0 +1,20 @@
|
||||||
|
[package]
|
||||||
|
name = "maj"
|
||||||
|
version = "0.1.0"
|
||||||
|
authors = ["Christine Dodrill <me@christine.website>"]
|
||||||
|
edition = "2018"
|
||||||
|
|
||||||
|
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||||
|
|
||||||
|
[dependencies]
|
||||||
|
num = "0.2"
|
||||||
|
num-derive = "0.3"
|
||||||
|
num-traits = "0.2"
|
||||||
|
rustls = "0.18"
|
||||||
|
webpki = "0.21.0"
|
||||||
|
log = "0.4"
|
||||||
|
url = "2"
|
||||||
|
thiserror = "1"
|
||||||
|
|
||||||
|
[dev-dependencies]
|
||||||
|
pretty_env_logger = "0.4"
|
|
@ -0,0 +1,12 @@
|
||||||
|
Copyright (c) 2020 Christine Dodrill <me@christine.website>
|
||||||
|
|
||||||
|
Permission to use, copy, modify, and/or distribute this software for any
|
||||||
|
purpose with or without fee is hereby granted.
|
||||||
|
|
||||||
|
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH
|
||||||
|
REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
|
||||||
|
AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT,
|
||||||
|
INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
|
||||||
|
LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR
|
||||||
|
OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
|
||||||
|
PERFORMANCE OF THIS SOFTWARE.
|
|
@ -0,0 +1,7 @@
|
||||||
|
{ pkgs ? import <nixpkgs> {} }:
|
||||||
|
|
||||||
|
pkgs.mkShell {
|
||||||
|
buildInputs = with pkgs; [
|
||||||
|
rustc cargo rls rustfmt cargo-watch
|
||||||
|
];
|
||||||
|
}
|
|
@ -0,0 +1,16 @@
|
||||||
|
#[macro_use]
|
||||||
|
extern crate num_derive;
|
||||||
|
|
||||||
|
mod status_code;
|
||||||
|
mod response;
|
||||||
|
|
||||||
|
pub use status_code::StatusCode;
|
||||||
|
pub use response::Response;
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
#[test]
|
||||||
|
fn it_works() {
|
||||||
|
assert_eq!(2 + 2, 4);
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,116 @@
|
||||||
|
use crate::StatusCode;
|
||||||
|
use num::FromPrimitive;
|
||||||
|
use std::io::prelude::*;
|
||||||
|
|
||||||
|
/// A Gemini response as specified in [the spec](https://gemini.circumlunar.space/docs/specification.html).
|
||||||
|
#[derive(Default)]
|
||||||
|
pub struct Response {
|
||||||
|
status: StatusCode,
|
||||||
|
meta: String,
|
||||||
|
body: Vec<u8>,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// The parser state.
|
||||||
|
#[derive(Debug)]
|
||||||
|
enum State {
|
||||||
|
ReadStatusCode { data: Vec<u8> },
|
||||||
|
ReadWhitespace,
|
||||||
|
ReadMeta { data: Vec<u8> },
|
||||||
|
ReadBody { data: Vec<u8> },
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(thiserror::Error, Debug)]
|
||||||
|
pub enum Error {
|
||||||
|
#[error("unexpected end of file found while parsing response")]
|
||||||
|
EOF,
|
||||||
|
|
||||||
|
#[error("I/O error")]
|
||||||
|
IO(#[from] std::io::Error),
|
||||||
|
|
||||||
|
#[error("invalid status code character {0}")]
|
||||||
|
InvalidStatusCode(u8),
|
||||||
|
|
||||||
|
#[error("UTF-8 error: {0}")]
|
||||||
|
Utf8(#[from] std::str::Utf8Error),
|
||||||
|
|
||||||
|
#[error("Number parsing error: {0}")]
|
||||||
|
NumParse(#[from] std::num::ParseIntError),
|
||||||
|
|
||||||
|
#[error("None found when none should not be found")]
|
||||||
|
NoneFound,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Response {
|
||||||
|
pub fn parse(inp: &mut impl Read) -> Result<Response, Error> {
|
||||||
|
let mut state = State::ReadStatusCode { data: vec![] };
|
||||||
|
let mut buf = [0; 1];
|
||||||
|
let mut result = Response::default();
|
||||||
|
|
||||||
|
loop {
|
||||||
|
match inp.read(&mut buf) {
|
||||||
|
Ok(n) => {
|
||||||
|
if n == 0 {
|
||||||
|
if let State::ReadBody { data } = state {
|
||||||
|
result.body = data;
|
||||||
|
return Ok(result);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
log::trace!("buf: {:?}: {:?}", buf, buf[0] as char);
|
||||||
|
}
|
||||||
|
Err(why) => return Err(Error::IO(why)),
|
||||||
|
}
|
||||||
|
|
||||||
|
log::trace!("state: {:?}", state);
|
||||||
|
|
||||||
|
match &mut state {
|
||||||
|
State::ReadStatusCode { data } => match buf[0] as char {
|
||||||
|
'1' | '2' | '3' | '4' | '5' | '6' | '7' | '8' | '9' | '0' => {
|
||||||
|
data.push(buf[0]);
|
||||||
|
}
|
||||||
|
' ' | '\t' => {
|
||||||
|
let status_code: &str = std::str::from_utf8(data)?;
|
||||||
|
let status_code: u8 = status_code.parse()?;
|
||||||
|
result.status = StatusCode::from_u8(status_code).ok_or(Error::NoneFound)?;
|
||||||
|
state = State::ReadWhitespace;
|
||||||
|
}
|
||||||
|
foo => return Err(Error::InvalidStatusCode(foo as u8)),
|
||||||
|
},
|
||||||
|
|
||||||
|
State::ReadWhitespace => match buf[0] as char {
|
||||||
|
' ' | '\t' => {}
|
||||||
|
_ => {
|
||||||
|
state = State::ReadMeta { data: vec![buf[0]] };
|
||||||
|
}
|
||||||
|
},
|
||||||
|
|
||||||
|
State::ReadMeta { data } => match buf[0] as char {
|
||||||
|
'\r' => {}
|
||||||
|
'\n' => {
|
||||||
|
result.meta = std::str::from_utf8(data)?.to_string();
|
||||||
|
state = State::ReadBody { data: vec![] };
|
||||||
|
}
|
||||||
|
_ => data.push(buf[0]),
|
||||||
|
},
|
||||||
|
|
||||||
|
State::ReadBody { data } => data.push(buf[0]),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use super::*;
|
||||||
|
use crate::*;
|
||||||
|
use std::io::prelude::*;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn parse() -> Result<(), Error> {
|
||||||
|
pretty_env_logger::init();
|
||||||
|
let mut fin = std::fs::File::open("./testdata/simple_response.txt")?;
|
||||||
|
|
||||||
|
Response::parse(&mut fin)?;
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,123 @@
|
||||||
|
/// Status codes as specified in [the spec](https://gemini.circumlunar.space/docs/specification.html).
|
||||||
|
#[derive(Copy, Clone, num_derive::FromPrimitive)]
|
||||||
|
pub enum StatusCode {
|
||||||
|
/// The requested resource accepts a line of textual user input.
|
||||||
|
/// The <META> line is a prompt which should be displayed to the
|
||||||
|
/// user. The same resource should then be requested again with the
|
||||||
|
/// user's input included as a query component. Queries are
|
||||||
|
/// included in requests as per the usual generic URL definition in
|
||||||
|
/// RFC3986, i.e. separated from the path by a ?. Reserved characters
|
||||||
|
/// used in the user's input must be "percent-encoded" as per RFC3986,
|
||||||
|
/// and space characters should also be percent-encoded.
|
||||||
|
Input = 10,
|
||||||
|
|
||||||
|
/// As per status code 10, but for use with sensitive input such as
|
||||||
|
/// passwords. Clients should present the prompt as per status code
|
||||||
|
/// 10, but the user's input should not be echoed to the screen to
|
||||||
|
/// prevent it being read by "shoulder surfers".
|
||||||
|
SensitiveInput = 11,
|
||||||
|
|
||||||
|
/// The request was handled successfully and a response body will follow the response header. The <META> line is a MIME media type which applies to the response body.
|
||||||
|
Success = 20,
|
||||||
|
|
||||||
|
/// The server is redirecting the client to a new location for the
|
||||||
|
/// requested resource. There is no response body. <META> is a new
|
||||||
|
/// URL for the requested resource. The URL may be absolute or
|
||||||
|
/// relative. The redirect should be considered temporary, i.e.
|
||||||
|
/// clients should continue to request the resource at the original
|
||||||
|
/// address and should not performance convenience actions like
|
||||||
|
/// automatically updating bookmarks. There is no response body.
|
||||||
|
TemporaryRedirect = 30,
|
||||||
|
|
||||||
|
/// The requested resource should be consistently requested from the
|
||||||
|
/// new URL provided in future. Tools like search engine indexers or
|
||||||
|
/// content aggregators should update their configurations to avoid
|
||||||
|
/// requesting the old URL, and end-user clients may automatically
|
||||||
|
/// update bookmarks, etc. Note that clients which only pay attention
|
||||||
|
/// to the initial digit of status codes will treat this as a temporary
|
||||||
|
/// redirect. They will still end up at the right place, they just
|
||||||
|
/// won't be able to make use of the knowledge that this redirect is
|
||||||
|
/// permanent, so they'll pay a small performance penalty by having
|
||||||
|
///to follow the redirect each time.
|
||||||
|
PermanentRedirect = 31,
|
||||||
|
|
||||||
|
/// The request has failed. There is no response body. The nature of
|
||||||
|
/// the failure is temporary, i.e. an identical request MAY succeed
|
||||||
|
/// in the future. The contents of <META> may provide additional
|
||||||
|
/// information on the failure, and should be displayed to human users.
|
||||||
|
TemporaryFailure = 40,
|
||||||
|
|
||||||
|
/// The server is unavailable due to overload or maintenance. (cf HTTP
|
||||||
|
/// 503)
|
||||||
|
ServerUnavailable = 41,
|
||||||
|
|
||||||
|
/// A CGI process, or similar system for generating dynamic content,
|
||||||
|
/// died unexpectedly or timed out.
|
||||||
|
CGIError = 42,
|
||||||
|
|
||||||
|
/// A proxy request failed because the server was unable to
|
||||||
|
/// successfully complete a transaction with the remote host. (cf HTTP
|
||||||
|
/// 502, 504)
|
||||||
|
ProxyError = 43,
|
||||||
|
|
||||||
|
/// Rate limiting is in effect. <META> is an integer number of seconds
|
||||||
|
/// which the client must wait before another request is made to this
|
||||||
|
/// server. (cf HTTP 429)
|
||||||
|
SlowDown = 44,
|
||||||
|
|
||||||
|
/// The request has failed. There is no response body. The nature of
|
||||||
|
/// the failure is permanent, i.e. identical future requests will
|
||||||
|
/// reliably fail for the same reason. The contents of <META> may
|
||||||
|
/// provide additional information on the failure, and should be
|
||||||
|
/// displayed to human users. Automatic clients such as aggregators
|
||||||
|
/// or indexing crawlers should not repeat this request.
|
||||||
|
PermanentFailure = 50,
|
||||||
|
|
||||||
|
/// The requested resource could not be found but may be available in
|
||||||
|
/// the future. (cf HTTP 404) (struggling to remember this important
|
||||||
|
/// status code? Easy: you can't find things hidden at Area 51!)
|
||||||
|
NotFound = 51,
|
||||||
|
|
||||||
|
/// The resource requested is no longer available and will not be
|
||||||
|
/// available again. Search engines and similar tools should remove this
|
||||||
|
/// resource from their indices. Content aggregators should stop
|
||||||
|
/// requesting the resource and convey to their human users that the
|
||||||
|
/// subscribed resource is gone. (cf HTTP 410)
|
||||||
|
Gone = 52,
|
||||||
|
|
||||||
|
/// The request was for a resource at a domain not served by the server
|
||||||
|
/// and the server does not accept proxy requests.
|
||||||
|
ProxyRequestRefused = 53,
|
||||||
|
|
||||||
|
/// The server was unable to parse the client's request, presumably due
|
||||||
|
/// to a malformed request. (cf HTTP 400)
|
||||||
|
BadRequest = 59,
|
||||||
|
|
||||||
|
/// The requested resource requires a client certificate to access. If
|
||||||
|
/// the request was made without a certificate, it should be repeated
|
||||||
|
/// with one. If the request was made with a certificate, the server
|
||||||
|
/// did not accept it and the request should be repeated with a
|
||||||
|
/// different certificate. The contents of <META> (and/or the specific
|
||||||
|
/// 6x code) may provide additional information on certificate
|
||||||
|
/// requirements or the reason a certificate was rejected.
|
||||||
|
ClientCertificateRequired = 60,
|
||||||
|
|
||||||
|
/// The supplied client certificate is not authorised for accessing the
|
||||||
|
/// particular requested resource. The problem is not with the
|
||||||
|
/// certificate itself, which may be authorised for other resources.
|
||||||
|
CertificateNotAuthorized = 61,
|
||||||
|
|
||||||
|
/// The supplied client certificate was not accepted because it is not
|
||||||
|
/// valid. This indicates a problem with the certificate in and of itself,
|
||||||
|
/// with no consideration of the particular requested resource. The most
|
||||||
|
/// likely cause is that the certificate's validity start date is in the
|
||||||
|
/// future or its expiry date has passed, but this code may also
|
||||||
|
/// indicate an invalid signature, or a violation of a X509 standard
|
||||||
|
/// requirements. The <META> should provide more information about the
|
||||||
|
/// exact error.
|
||||||
|
CertificateNotValid = 62,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Default for StatusCode {
|
||||||
|
fn default() -> Self { StatusCode::Success }
|
||||||
|
}
|
|
@ -0,0 +1,43 @@
|
||||||
|
20 text/gemini
|
||||||
|
# Project Gemini
|
||||||
|
|
||||||
|
## Overview
|
||||||
|
|
||||||
|
Gemini is a new internet protocol which:
|
||||||
|
|
||||||
|
* Is heavier than gopher
|
||||||
|
* Is lighter than the web
|
||||||
|
* Will not replace either
|
||||||
|
* Strives for maximum power to weight ratio
|
||||||
|
* Takes user privacy very seriously
|
||||||
|
|
||||||
|
## Resources
|
||||||
|
|
||||||
|
=> docs/ Gemini documentation
|
||||||
|
=> software/ Gemini software
|
||||||
|
=> servers/ Known Gemini servers
|
||||||
|
=> https://lists.orbitalfox.eu/listinfo/gemini Gemini mailing list
|
||||||
|
=> gemini://gemini.conman.org/test/torture/ Gemini client torture test
|
||||||
|
|
||||||
|
## Web proxies
|
||||||
|
|
||||||
|
=> https://portal.mozz.us/?url=gemini%3A%2F%2Fgemini.circumlunar.space%2F&fmt=fixed Gemini-to-web proxy service
|
||||||
|
=> https://proxy.vulpes.one/gemini/gemini.circumlunar.space Another Gemini-to-web proxy service
|
||||||
|
|
||||||
|
## Search engines
|
||||||
|
|
||||||
|
=> gemini://gus.guru/ Gemini Universal Search engine
|
||||||
|
=> gemini://houston.coder.town Houston search engine
|
||||||
|
|
||||||
|
## Geminispace aggregators (experimental!)
|
||||||
|
|
||||||
|
=> capcom/ CAPCOM
|
||||||
|
=> gemini://rawtext.club:1965/~sloum/spacewalk.gmi Spacewalk
|
||||||
|
|
||||||
|
## Gemini mirrors of web resources
|
||||||
|
|
||||||
|
=> gemini://gempaper.strangled.net/mirrorlist/ A list of mirrored services
|
||||||
|
|
||||||
|
## Free Gemini hosting
|
||||||
|
|
||||||
|
=> users/ Users with Gemini content on this server
|
Loading…
Reference in New Issue