diff --git a/Cargo.toml b/Cargo.toml index 9fa6ad7..c4422fe 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -16,4 +16,7 @@ name = "chrono" [dependencies] time = "0.1.14" +regex = "*" +regex_macros = "*" + diff --git a/src/lib.rs b/src/lib.rs index af9dfcb..020a1bd 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -78,6 +78,7 @@ Addition and subtraction is also supported. The following illustrates most supported operations to the date and time: ~~~~ {.rust} + # #![allow(unstable)] # /* we intentionally fake the datetime... use chrono::{UTC, Local, Datelike, Timelike, Weekday, Duration}; @@ -190,6 +191,12 @@ Advanced offset handling and date/time parsing is not yet supported (but is plan #![allow(unstable)] #![deny(missing_docs)] +// This is needed to allow compile-time regular expressions in this crate. +#![feature(plugin)] +#[plugin] #[no_link] +extern crate regex_macros; +extern crate regex; + extern crate "time" as stdtime; @@ -226,6 +233,9 @@ pub mod time; pub mod datetime; pub mod format; +/// Parsing functions for date/time strings. +pub mod parse; + /// The day of week (DOW). /// /// The order of the days of week depends on the context. diff --git a/src/parse.rs b/src/parse.rs new file mode 100644 index 0000000..6dfabb4 --- /dev/null +++ b/src/parse.rs @@ -0,0 +1,346 @@ +#![allow(unstable)] +extern crate regex_macros; // for compile-time regular expression compilation +extern crate regex; +use std::num::Int; +use std::cmp; +use ::{Offset}; + +// +// parse.rs -- parsing for various standardized date and time string formats +// +// John Nagle +// January, 2015 +// +// +// RFC2822 time/date stamp parsing +// +// Example: "Tue, 20 Jan 2015 17:35:20 -0800". +// Common use case: email date/time. +// +// Date format specification, from RFC2822. +// +// date-time = [ day-of-week "," ] date FWS time [CFWS] +// +// day-of-week = ([FWS] day-name) / obs-day-of-week +// +// day-name = "Mon" / "Tue" / "Wed" / "Thu" / +// "Fri" / "Sat" / "Sun" +// +// date = day month year +// +// year = 4*DIGIT / obs-year +// +// month = (FWS month-name FWS) / obs-month +// +// month-name = "Jan" / "Feb" / "Mar" / "Apr" / +// "May" / "Jun" / "Jul" / "Aug" / +// "Sep" / "Oct" / "Nov" / "Dec" +// +// day = ([FWS] 1*2DIGIT) / obs-day +// +// time = time-of-day FWS zone +// +// time-of-day = hour ":" minute [ ":" second ] +// +// hour = 2DIGIT / obs-hour +// +// minute = 2DIGIT / obs-minute +// +// second = 2DIGIT / obs-second +// +// zone = (( "+" / "-" ) 4DIGIT) / obs-zone +// +// +// Obsolete forms +// +// obs-day-of-week = [CFWS] day-name [CFWS] +// +// obs-year = [CFWS] 2*DIGIT [CFWS] +// +// obs-month = CFWS month-name CFWS +// +// obs-day = [CFWS] 1*2DIGIT [CFWS] +// +// obs-hour = [CFWS] 2DIGIT [CFWS] +// +// obs-minute = [CFWS] 2DIGIT [CFWS] +// +// obs-second = [CFWS] 2DIGIT [CFWS] +// +// obs-zone = "UT" / "GMT" / ; Universal Time +// ; North American UT +// ; offsets +// "EST" / "EDT" / ; Eastern: - 5/ - 4 +// "CST" / "CDT" / ; Central: - 6/ - 5 +// "MST" / "MDT" / ; Mountain: - 7/ - 6 +// "PST" / "PDT" / ; Pacific: - 8/ - 7 +// +// %d65-73 / ; Military zones - "A" +// %d75-90 / ; through "I" and "K" +// %d97-105 / ; through "Z", both +// %d107-122 ; upper and lower case +// +// +// Per RFC2882, all the obsolete one-letter military time zones are interpreted as +// +0000. +// +// The only feature not supported is that an offset of "-0000" should return a +// naive date/time, not a time zone aware one. This returns a time zone aware +// date/time object in all cases. +// +// offsetmins -- time zone offset in minutes, from string +// +// Allowed input per RFC2822 above - numeric offset or named time zone +// +fn offsetmins(s: &str) -> Option { + let offsetre = regex!(r"^([+-])(\d\d)(\d\d)$"); // +0800 as 8 hour offset + let offsetmatches = offsetre.captures(s); // match time zone + match offsetmatches { + Some(caps) => { // It looks like a numeric offset + let sign = caps.at(1).unwrap(); // + or - + let hh = caps.at(2).unwrap().parse::().unwrap(); // hours + let mm = caps.at(3).unwrap().parse::().unwrap(); // minutes + let signval = match sign { + "+" => 1, + "-" => -1, + _ => return None // unreachable + }; + if hh < -12 || hh > 12 || mm < 0 || mm > 59 { return None } // check offsets + return Some(signval*(hh*60 + mm)) // return offset in minute + } + None => { // not numeric, try the named time zones + return match s { + "GMT"|"UT"|"Z"|"z" => Some(0), // prime meridian + "EDT" => Some(-4*60), // obsolete forms + "EST"|"CDT" => Some(-5*60), // rather US-centric in this old RFC. + "CST"|"MDT" => Some(-6*60), + "MST"|"PDT" => Some(-7*60), + "PST" => Some(-8*60), + _ => match s.len() { 1 => Some(0), _ => None } // obsolete single-letter miltary forms are treated as 0 per RFC + } + } + }; +} + +// +// parserfc2822datetime -- parse a date/time in RFC822 format. +// +/// Makes a new `DateTime` with offset given a valid RFC822 string. +/// Example: "Tue, 20 Jan 2015 17:35:20 -0800" +pub fn parserfc2822datetime(s: &str) -> Option<::DateTime<::FixedOffset>> { + + // Match the date format. Case-insensitive, compile-time regex. + let datere = regex!(r"^(?i)(?:Mon,|Tue,|Wed,|Thu,|Fri,|Sat,|Sun,)??\s*(\d+)\s+(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)\s+(\d\d\d\d)\s+(\d+):(\d+):(\d+)\s*([+-]\d\d\d\d|[A-Z]+)$"); + let matches = datere.captures(s.trim()); // Pattern match the date + let captures = match matches { + Some(caps) => caps, // succeed + None => return None // fail + }; + // Unwrapping numeric fields is safe because we've matched the regular expression. + let dd = captures.at(1).unwrap().parse::().unwrap(); // day of month + // Month names are case-sensitive in RFC 2822, but we allow the obvious other forms. + let mo = match captures.at(2).unwrap() { // month decode + "Jan"|"JAN"|"jan" => 1, + "Feb"|"FEB"|"feb" => 2, + "Mar"|"MAR"|"mar" => 3, + "Apr"|"APR"|"apr" => 4, + "May"|"MAY"|"may" => 5, + "Jun"|"JUN"|"jun" => 6, + "Jul"|"JUL"|"jul" => 7, + "Aug"|"AUG"|"aug" => 8, + "Sep"|"SEP"|"sep" => 9, + "Oct"|"OCT"|"oct" => 10, + "Nov"|"NOV"|"nov" => 11, + "Dec"|"DEC"|"dec" => 12, + _ => return None + }; + let yyyy = captures.at(3).unwrap().parse::().unwrap(); // chrono wants a signed year + let hh = captures.at(4).unwrap().parse::().unwrap(); + let mm = captures.at(5).unwrap().parse::().unwrap(); // minute + let ss = captures.at(6).unwrap().parse::().unwrap(); + let offsetstr = captures.at(7).unwrap(); // can be +0800 or a time zone name + let offsetmm = match offsetmins(offsetstr) { + Some(v) => v, + None => return None + }; + let tz = ::FixedOffset::east(offsetmm*60); // decode time zone offset + // Pack numeric values into DateTime object, returning None if fail. + let date = tz.ymd_opt(yyyy, mo, dd); // date or none + match date { // check for invalid date + ::LocalResult::Single(d) => d.and_hms_opt(hh, mm, ss), // encode into DateTime + _ => return None // date conversion failed + } +} +// +// fmtrfc2822datetime -- format DateTime as RFC 2822 date. +// +/// Formats a DateTime as an RF2822 string. +/// This is primarily for debugging. +pub fn fmtrfc2822datetime(dt: ::DateTime<::FixedOffset>) -> String { + dt.format("%a, %e %b %Y %H:%M:%S %z").to_string() // inverse of parsing +} + +// +// RFC3339 date parsing +// +// This is a subset of ISO 8601 date format. +// Example: "2012-09-09T18:00:00-07:00" +// Common use case: Atom feeds. +// +// +// From RFC3339, "Date and Time on the Internet: Timestamps", section 5.6: +// +// date-fullyear = 4DIGIT +// date-month = 2DIGIT ; 01-12 +// date-mday = 2DIGIT ; 01-28, 01-29, 01-30, 01-31 based on +// ; month/year +// time-hour = 2DIGIT ; 00-23 +// time-minute = 2DIGIT ; 00-59 +// time-second = 2DIGIT ; 00-58, 00-59, 00-60 based on leap second +// ; rules +// time-secfrac = "." 1*DIGIT +// time-numoffset = ("+" / "-") time-hour ":" time-minute +// time-offset = "Z" / time-numoffset +// +// partial-time = time-hour ":" time-minute ":" time-second +// [time-secfrac] +// full-date = date-fullyear "-" date-month "-" date-mday +// full-time = partial-time time-offset +// +// date-time = full-date "T" full-time +// +// NOTE: Per [ABNF] and ISO8601, the "T" and "Z" characters in this +// syntax may alternatively be lower case "t" or "z" respectively. +// +// ISO 8601 defines date and time separated by "T". +// Applications using this syntax may choose, for the sake of +// readability, to specify a full-date and full-time separated by +// (say) a space character. +// +// +// parserfc3339datetime -- parse a date/time in RFC822 format. +// +/// Parse a string with a RFC3339 date, time, and offset into a DateTime. +/// This is the subset of ISO 8601 date and time strings most used on the Web. +pub fn parserfc3339datetime(s: &str) -> Option<::DateTime<::FixedOffset>> { + let datere = regex!(r"^(?i)(\d\d\d\d)-(\d\d)-(\d\d)T(\d\d):(\d\d):(\d\d)(\.\d+)??([+-]\d\d\d\d|[A-Z]+)$"); // format regex + let matches = datere.captures(s.trim()); // Pattern match the date + let captures = match matches { + Some(caps) => caps, // succeed + None => return None // fail + }; + // Unwrapping numeric fields is safe because we've matched the regular expression. + let yyyy = captures.at(1).unwrap().parse::().unwrap(); // chrono wants a signed year + let mo = captures.at(2).unwrap().parse::().unwrap(); // month of year + let dd = captures.at(3).unwrap().parse::().unwrap(); // day of month + let hh = captures.at(4).unwrap().parse::().unwrap(); // hour + let mm = captures.at(5).unwrap().parse::().unwrap(); // minute + let ss = captures.at(6).unwrap().parse::().unwrap(); // second + let ns = match captures.at(7) { // fractional seconds present? + Some(fractstr) => parsensfract(fractstr), // parse as nanoseconds + None => 0 // no fraction + }; + let offsetstr = captures.at(8).unwrap(); // time zone offset, numeric + let offsetmm = match offsetmins(offsetstr) { // also accepts named time zones, not required. + Some(v) => v, + None => return None + }; + let tz = ::FixedOffset::east(offsetmm*60); // decode time zone offset + // Pack numeric values into DateTime object, returning None if fail. + let date = tz.ymd_opt(yyyy, mo, dd); // date or none + match date { // check for invalid date + ::LocalResult::Single(d) => d.and_hms_nano_opt(hh, mm, ss, ns), // encode into DateTime + _ => return None // date conversion failed + } +} +// +// parsensfract -- parse ".NNN" into nanoseconds +// +// Assumes format is ".NNN" for any number of digits +// +fn parsensfract(s: &str) -> u32 { + let sdigits = &s[1..]; // trim off leading "." + let sdigits9 = &sdigits[0..(cmp::min(sdigits.len(),9))]; // truncate at 9 digits after "." + let v = sdigits9.parse::().unwrap(); // digits as u32 (will fit) + let vl = 9-sdigits9.len(); // power of 10 for scaling + let scale = Int::pow(10,vl); // scale factor to get to + //panic!("parsens: s: {} sdigits9: {} v: {} scale: {} result: {}", s, sdigits9 , v, scale, v*scale); // ***TEMP*** + v*scale // as nanoseconds +} + +/// Formats a DateTime as an RFC 3339 date, with 9 digits of nanoseconds. +/// This is primarily for debugging use. +pub fn fmtrfc3339datetime(dt: ::DateTime<::FixedOffset>) -> String { + dt.format("%Y-%m-%dT%H:%M:%S.%f%z").to_string() // inverse of parsing +} + + +// +// Unit tests +// +#[test] +fn testrfc2822parser() { + // Test data - [input, expected result after parse and format] + let testdates = [ + ["Tue, 20 Jan 2015 17:35:20 -0800", "Tue, 20 Jan 2015 17:35:20 -0800"], // normal case + ["20 Jan 2015 17:35:20 -0800", "Tue, 20 Jan 2015 17:35:20 -0800"], // no day of week + ["20 JAN 2015 17:35:20 -0800", "Tue, 20 Jan 2015 17:35:20 -0800"], // upper case month allowed + ["6 Jun 1944 04:00:00Z","Tue, 6 Jun 1944 04:00:00 +0000"], // D-day + ["11 Sep 2001 9:45:00 EST", "Tue, 11 Sep 2001 09:45:00 -0500"], + ["30 Feb 2015 17:35:20 -0800", ""], // bad day of month + ["Tue, 20 Avr 2015 17:35:20 -0800", ""],// bad month name + ["Tue, 20 Jan 2015 25:35:20 -0800",""], // bad hour + ["Tue, 20 Jan 2015 17:65:20 -0800",""], // bad minute + ["Tue, 20 Jan 2015 17:35:90 -0800",""], // bad second + ["Tue, 20 Jan 2015 17:35:20 -1800",""], // bad offset + ["Tue, 20 Jan 2015 17:35:20 HAS",""] // bad named time zone + ]; + // Test against test data above + for testdate in testdates.iter() { + let date = testdate[0]; // input + let checkdate = testdate[1]; // expected result or "" + let d = parserfc2822datetime(date); // parse a date + let dt = match d { // did we get a value? + Some(dt) => dt, // yes, go on + None => if checkdate != "" { panic!("Failed to convert date {}", date)} else { continue }, + }; + // let mut s = String::new(); + let s = fmtrfc2822datetime(dt); // convert date/time back to string + if s != checkdate { // check for expected result + panic!("Date conversion failed for {}\nReceived: {}\nExpected: {}",date, s, checkdate); + } + }; +} +#[test] +fn testrfc3339parser() { + // Test data - [input, expected result after parse and format] + let testdates = [ + ["2015-01-20T17:35:20-0800", "2015-01-20T17:35:20.000000000-0800"], // normal case + ["1944-06-06T04:04:00Z", "1944-06-06T04:04:00.000000000+0000"], // D-day + ["2001-09-11T09:45:00-0800", "2001-09-11T09:45:00.000000000-0800"], + ["2015-01-20T17:35:20.001-0800", "2015-01-20T17:35:20.001000000-0800"], // milliseconds + ["2015-01-20T17:35:20.000031-0800", "2015-01-20T17:35:20.000031000-0800"], // microseconds + ["2015-01-20T17:35:20.000000004-0800", "2015-01-20T17:35:20.000000004-0800"], // nanoseconds + ["2015-01-20T17:35:20.000000000452-0800", "2015-01-20T17:35:20.000000000-0800"], // picoseconds (too small) + ["2015-02-30T17:35:20-0800", ""], // bad day of month + ["2015-01-20T25:35:20-0800", ""], // bad hour + ["2015-01-20T17:65:20-0800", ""], // bad minute + ["2015-01-20T17:35:90-0800", ""], // bad second + ["2015-01-20T17:35:20-1800", ""], // bad offset + ]; + // Test against test data above + for testdate in testdates.iter() { + let date = testdate[0]; // input + let checkdate = testdate[1]; // expected result or "" + let d = parserfc3339datetime(date); // parse a date + let dt = match d { // did we get a value? + Some(dt) => dt, // yes, go on + None => if checkdate != "" { panic!("Failed to convert date {}", date)} else { continue }, + }; + // let mut s = String::new(); + let s = fmtrfc3339datetime(dt); // convert date/time back to string + if s != checkdate { // check for expected result + panic!("Date conversion failed for {}\nReceived: {}\nExpected: {}",date, s, checkdate); + } + }; +}