From 693747040559c6c9b0cbb392938c017ec2671c0b Mon Sep 17 00:00:00 2001 From: Kang Seonghoon Date: Sun, 15 Feb 2015 21:01:36 +0900 Subject: [PATCH] created `format::parse` module. this new module encompasses John Nagle's original RFC 2822 and 3337 parsers, updated to fully compatible to the actual standard. the contributed `parse` module has been merged into it. --- Cargo.toml | 2 - src/format/mod.rs | 529 ++++++++---------------------------- src/format/parse.rs | 625 +++++++++++++++++++++++++++++++++++++++++++ src/format/parsed.rs | 10 +- src/format/scan.rs | 63 ++++- src/lib.rs | 11 - src/parse.rs | 335 ----------------------- 7 files changed, 803 insertions(+), 772 deletions(-) create mode 100644 src/format/parse.rs delete mode 100644 src/parse.rs diff --git a/Cargo.toml b/Cargo.toml index e6a16f2..4401441 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -16,6 +16,4 @@ name = "chrono" [dependencies] time = "0.1.15" -regex = "0.1.12" -regex_macros = "0.1.6" diff --git a/src/format/mod.rs b/src/format/mod.rs index c69b1a3..ba338ff 100644 --- a/src/format/mod.rs +++ b/src/format/mod.rs @@ -7,19 +7,18 @@ */ use std::fmt; -use std::usize; use std::error::Error; use {Datelike, Timelike}; -use Weekday; use div::{div_floor, mod_floor}; use duration::Duration; use offset::Offset; use naive::date::NaiveDate; use naive::time::NaiveTime; -pub use self::parsed::Parsed; pub use self::strftime::StrftimeItems; +pub use self::parsed::Parsed; +pub use self::parse::parse; /// Padding characters for numeric items. #[derive(Copy, Clone, PartialEq, Eq, Debug)] @@ -134,6 +133,10 @@ pub enum Fixed { /// and `Z` can be either in upper case or in lower case. /// The offset is limited from `-24:00` to `+24:00`, which is same to `FixedOffset`'s range. TimezoneOffsetZ, + /// RFC 2822 date and time syntax. Commonly used for email and MIME date and time. + RFC2822, + /// RFC 3339 & ISO 8601 date and time syntax. + RFC3339, } /// A single formatting item. This is used for both formatting and parsing. @@ -159,6 +162,73 @@ macro_rules! num0 { ($x:ident) => (Item::Numeric(Numeric::$x, Pad::Zero)) } macro_rules! nums { ($x:ident) => (Item::Numeric(Numeric::$x, Pad::Space)) } macro_rules! fix { ($x:ident) => (Item::Fixed(Fixed::$x)) } +/// An error from the `parse` function. +#[derive(Debug, Clone, PartialEq, Copy)] +pub struct ParseError(ParseErrorKind); + +#[derive(Debug, Clone, PartialEq, Copy)] +enum ParseErrorKind { + /// Given field is out of permitted range. + OutOfRange, + + /// There is no possible date and time value with given set of fields. + /// + /// This does not include the out-of-range conditions, which are trivially invalid. + /// It includes the case that there are one or more fields that are inconsistent to each other. + Impossible, + + /// Given set of fields is not enough to make a requested date and time value. + /// + /// Note that there *may* be a case that given fields constrain the possible values so much + /// that there is a unique possible value. Chrono only tries to be correct for + /// most useful sets of fields however, as such constraint solving can be expensive. + NotEnough, + + /// The input string has some invalid character sequence for given formatting items. + Invalid, + + /// The input string has been prematurely ended. + TooShort, + + /// All formatting items have been read but there is a remaining input. + TooLong, + + /// There was an error on the formatting string, or there were non-supported formating items. + BadFormat, +} + +/// Same to `Result`. +pub type ParseResult = Result; + +impl fmt::Display for ParseError { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + self.description().fmt(f) + } +} + +impl Error for ParseError { + fn description(&self) -> &str { + match self.0 { + ParseErrorKind::OutOfRange => "input is out of range", + ParseErrorKind::Impossible => "no possible date and time matching input", + ParseErrorKind::NotEnough => "input is not enough for unique date and time", + ParseErrorKind::Invalid => "input contains invalid characters", + ParseErrorKind::TooShort => "premature end of input", + ParseErrorKind::TooLong => "trailing input", + ParseErrorKind::BadFormat => "bad or unsupported format string", + } + } +} + +// to be used in this module and submodules +const OUT_OF_RANGE: ParseError = ParseError(ParseErrorKind::OutOfRange); +const IMPOSSIBLE: ParseError = ParseError(ParseErrorKind::Impossible); +const NOT_ENOUGH: ParseError = ParseError(ParseErrorKind::NotEnough); +const INVALID: ParseError = ParseError(ParseErrorKind::Invalid); +const TOO_SHORT: ParseError = ParseError(ParseErrorKind::TooShort); +const TOO_LONG: ParseError = ParseError(ParseErrorKind::TooLong); +const BAD_FORMAT: ParseError = ParseError(ParseErrorKind::BadFormat); + /// Tries to format given arguments with given formatting items. /// Internally used by `DelayedFormat`. pub fn format<'a, I>(w: &mut fmt::Formatter, date: Option<&NaiveDate>, time: Option<&NaiveTime>, @@ -228,18 +298,23 @@ pub fn format<'a, I>(w: &mut fmt::Formatter, date: Option<&NaiveDate>, time: Opt Item::Fixed(spec) => { use self::Fixed::*; + /// Prints an offset from UTC in the format of `+HHMM` or `+HH:MM`. + /// `Z` instead of `+00[:]00` is allowed when `allow_zulu` is true. fn write_local_minus_utc(w: &mut fmt::Formatter, off: Duration, - allow_zulu: bool) -> fmt::Result { + allow_zulu: bool, use_colon: bool) -> fmt::Result { let off = off.num_minutes(); if !allow_zulu || off != 0 { let (sign, off) = if off < 0 {('-', -off)} else {('+', off)}; - write!(w, "{}{:02}{:02}", sign, off / 60, off % 60) + if use_colon { + write!(w, "{}{:02}:{:02}", sign, off / 60, off % 60) + } else { + write!(w, "{}{:02}{:02}", sign, off / 60, off % 60) + } } else { write!(w, "Z") } } - let ret = match spec { ShortMonthName => date.map(|d| write!(w, "{}", SHORT_MONTHS[d.month0() as usize])), @@ -258,9 +333,28 @@ pub fn format<'a, I>(w: &mut fmt::Formatter, date: Option<&NaiveDate>, time: Opt TimezoneName => off.map(|&(ref name, _)| write!(w, "{}", *name)), TimezoneOffset => - off.map(|&(_, off)| write_local_minus_utc(w, off, false)), + off.map(|&(_, off)| write_local_minus_utc(w, off, false, false)), TimezoneOffsetZ => - off.map(|&(_, off)| write_local_minus_utc(w, off, true)), + off.map(|&(_, off)| write_local_minus_utc(w, off, true, false)), + RFC2822 => // same to `%a, %e %b %Y %H:%M:%S %z` + if let (Some(d), Some(t), Some(&(_, off))) = (date, time, off) { + try!(write!(w, "{}, {:2} {} {:04} {:02}:{:02}:{:02} ", + SHORT_WEEKDAYS[d.weekday().num_days_from_monday() as usize], + d.day(), SHORT_MONTHS[d.month0() as usize], d.year(), + t.hour(), t.minute(), t.second())); + Some(write_local_minus_utc(w, off, false, false)) + } else { + None + }, + RFC3339 => // (almost) same to `%Y-%m-%dT%H:%M:%S.%f%z` + if let (Some(d), Some(t), Some(&(_, off))) = (date, time, off) { + // reuse `Debug` impls which already prints ISO 8601 format. + // this is faster in this way. + try!(write!(w, "{:?}T{:?}", d, t)); + Some(write_local_minus_utc(w, off, false, true)) + } else { + None + }, }; match ret { @@ -276,215 +370,13 @@ pub fn format<'a, I>(w: &mut fmt::Formatter, date: Option<&NaiveDate>, time: Opt Ok(()) } -/// An error from the `parse` function. -#[derive(Debug, Clone, PartialEq, Copy)] -pub struct ParseError(ParseErrorKind); +pub mod parsed; -#[derive(Debug, Clone, PartialEq, Copy)] -enum ParseErrorKind { - /// Given field is out of permitted range. - OutOfRange, +// due to the size of parsing routines, they are in separate modules. +mod scan; +mod parse; - /// There is no possible date and time value with given set of fields. - /// - /// This does not include the out-of-range conditions, which are trivially invalid. - /// It includes the case that there are one or more fields that are inconsistent to each other. - Impossible, - - /// Given set of fields is not enough to make a requested date and time value. - /// - /// Note that there *may* be a case that given fields constrain the possible values so much - /// that there is a unique possible value. Chrono only tries to be correct for - /// most useful sets of fields however, as such constraint solving can be expensive. - NotEnough, - - /// The input string has some invalid character sequence for given formatting items. - Invalid, - - /// The input string has been prematurely ended. - TooShort, - - /// All formatting items have been read but there is a remaining input. - TooLong, - - /// There was an error on the formatting string, or there were non-supported formating items. - BadFormat, -} - -/// Same to `Result`. -pub type ParseResult = Result; - -impl fmt::Display for ParseError { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - self.description().fmt(f) - } -} - -impl Error for ParseError { - fn description(&self) -> &str { - match self.0 { - ParseErrorKind::OutOfRange => "input is out of range", - ParseErrorKind::Impossible => "no possible date and time matching input", - ParseErrorKind::NotEnough => "input is not enough for unique date and time", - ParseErrorKind::Invalid => "input contains invalid characters", - ParseErrorKind::TooShort => "premature end of input", - ParseErrorKind::TooLong => "trailing input", - ParseErrorKind::BadFormat => "bad or unsupported format string", - } - } -} - -// to be used in this module and submodules -const OUT_OF_RANGE: ParseError = ParseError(ParseErrorKind::OutOfRange); -const IMPOSSIBLE: ParseError = ParseError(ParseErrorKind::Impossible); -const NOT_ENOUGH: ParseError = ParseError(ParseErrorKind::NotEnough); -const INVALID: ParseError = ParseError(ParseErrorKind::Invalid); -const TOO_SHORT: ParseError = ParseError(ParseErrorKind::TooShort); -const TOO_LONG: ParseError = ParseError(ParseErrorKind::TooLong); -const BAD_FORMAT: ParseError = ParseError(ParseErrorKind::BadFormat); - -/// Tries to parse given string into `parsed` with given formatting items. -/// Returns `Ok` when the entire string has been parsed (otherwise `parsed` should not be used). -/// There should be no trailing string after parsing; use a stray `Item::Space` to trim whitespaces. -/// -/// This particular date and time parser is: -/// -/// - Greedy. It will consume the longest possible prefix. -/// For example, `April` is always consumed entirely when the long month name is requested; -/// it equally accepts `Apr`, but prefers the longer prefix in this case. -/// - Padding-agnostic (for numeric items). The `Pad` field is completely ignored, -/// so one can prepend any number of whitespace then any number of zeroes before numbers. -/// - (Still) obeying the intrinsic parsing width. This allows, for example, parsing `HHMMSS`. -pub fn parse<'a, I>(parsed: &mut Parsed, mut s: &str, items: I) -> ParseResult<()> - where I: Iterator> { - macro_rules! try_consume { - ($e:expr) => ({ let (s_, v) = try!($e); s = s_; v }) - } - - for item in items { - match item { - Item::Literal(prefix) => { - if s.len() < prefix.len() { return Err(TOO_SHORT); } - if !s.starts_with(prefix) { return Err(INVALID); } - s = &s[prefix.len()..]; - } - - Item::Space(_) => { - s = s.trim_left(); - } - - Item::Numeric(spec, _pad) => { - use self::Numeric::*; - - fn set_weekday_with_num_days_from_sunday(p: &mut Parsed, - v: i64) -> ParseResult<()> { - p.set_weekday(match v { - 0 => Weekday::Sun, 1 => Weekday::Mon, 2 => Weekday::Tue, - 3 => Weekday::Wed, 4 => Weekday::Thu, 5 => Weekday::Fri, - 6 => Weekday::Sat, _ => return Err(OUT_OF_RANGE) - }) - } - - fn set_weekday_with_number_from_monday(p: &mut Parsed, v: i64) -> ParseResult<()> { - p.set_weekday(match v { - 1 => Weekday::Mon, 2 => Weekday::Tue, 3 => Weekday::Wed, - 4 => Weekday::Thu, 5 => Weekday::Fri, 6 => Weekday::Sat, - 7 => Weekday::Sun, _ => return Err(OUT_OF_RANGE) - }) - } - - let (width, frac, set): (usize, bool, - fn(&mut Parsed, i64) -> ParseResult<()>) = match spec { - Year => (4, false, Parsed::set_year), - YearDiv100 => (2, false, Parsed::set_year_div_100), - YearMod100 => (2, false, Parsed::set_year_mod_100), - IsoYear => (4, false, Parsed::set_isoyear), - IsoYearDiv100 => (2, false, Parsed::set_isoyear_div_100), - IsoYearMod100 => (2, false, Parsed::set_isoyear_mod_100), - Month => (2, false, Parsed::set_month), - Day => (2, false, Parsed::set_day), - WeekFromSun => (2, false, Parsed::set_week_from_sun), - WeekFromMon => (2, false, Parsed::set_week_from_mon), - IsoWeek => (2, false, Parsed::set_isoweek), - NumDaysFromSun => (1, false, set_weekday_with_num_days_from_sunday), - WeekdayFromMon => (1, false, set_weekday_with_number_from_monday), - Ordinal => (3, false, Parsed::set_ordinal), - Hour => (2, false, Parsed::set_hour), - Hour12 => (2, false, Parsed::set_hour12), - Minute => (2, false, Parsed::set_minute), - Second => (2, false, Parsed::set_second), - Nanosecond => (9, true, Parsed::set_nanosecond), - Timestamp => (usize::MAX, false, Parsed::set_timestamp), - }; - - let v = try_consume!(scan::number(s.trim_left(), 1, width, frac)); - try!(set(parsed, v)); - } - - Item::Fixed(spec) => { - use self::Fixed::*; - - match spec { - ShortMonthName => { - let month0 = try_consume!(scan::short_month0(s)); - try!(parsed.set_month(month0 as i64 + 1)); - } - - LongMonthName => { - let month0 = try_consume!(scan::short_or_long_month0(s)); - try!(parsed.set_month(month0 as i64 + 1)); - } - - ShortWeekdayName => { - let weekday = try_consume!(scan::short_weekday(s)); - try!(parsed.set_weekday(weekday)); - } - - LongWeekdayName => { - let weekday = try_consume!(scan::short_or_long_weekday(s)); - try!(parsed.set_weekday(weekday)); - } - - LowerAmPm | UpperAmPm => { - if s.len() < 2 { return Err(TOO_SHORT); } - let ampm = match [s.as_bytes()[0] | 32, s.as_bytes()[1] | 32] { - [b'a',b'm'] => false, - [b'p',b'm'] => true, - _ => return Err(INVALID) - }; - try!(parsed.set_ampm(ampm)); - s = &s[2..]; - } - - TimezoneName => return Err(BAD_FORMAT), - - TimezoneOffset => { - let offset = try_consume!(scan::timezone_offset(s.trim_left(), - scan::colon_or_space)); - try!(parsed.set_offset(offset as i64)); - } - - TimezoneOffsetZ => { - let offset = try_consume!(scan::timezone_offset_zulu(s.trim_left(), - scan::colon_or_space)); - try!(parsed.set_offset(offset as i64)); - } - } - } - - Item::Error => { - return Err(BAD_FORMAT); - } - } - } - - // if there are trailling chars, it is an error - if !s.is_empty() { - Err(TOO_LONG) - } else { - Ok(()) - } -} +pub mod strftime; /// A *temporary* object which can be used as an argument to `format!` or others. /// This is normally constructed via `format` methods of each date and time type. @@ -521,210 +413,3 @@ impl<'a, I: Iterator> + Clone> fmt::Display for DelayedFormat<'a, } } -mod scan; -pub mod parsed; - -pub mod strftime; - -#[cfg(test)] -#[test] -fn test_parse() { - // workaround for Rust issue #22255 - fn parse_all(s: &str, items: &[Item]) -> ParseResult { - let mut parsed = Parsed::new(); - try!(parse(&mut parsed, s, items.iter().cloned())); - Ok(parsed) - } - - macro_rules! check { - ($fmt:expr, $items:expr; $err:expr) => ( - assert_eq!(parse_all($fmt, &$items), Err($err)) - ); - ($fmt:expr, $items:expr; $($k:ident: $v:expr),*) => ( - assert_eq!(parse_all($fmt, &$items), Ok(Parsed { $($k: Some($v),)* ..Parsed::new() })) - ); - } - - // empty string - check!("", []; ); - check!(" ", []; TOO_LONG); - check!("a", []; TOO_LONG); - - // whitespaces - check!("", [sp!("")]; ); - check!(" ", [sp!("")]; ); - check!("\t", [sp!("")]; ); - check!(" \n\r \n", [sp!("")]; ); - check!("a", [sp!("")]; TOO_LONG); - - // literal - check!("", [lit!("a")]; TOO_SHORT); - check!(" ", [lit!("a")]; INVALID); - check!("a", [lit!("a")]; ); - check!("aa", [lit!("a")]; TOO_LONG); - check!("A", [lit!("a")]; INVALID); - check!("xy", [lit!("xy")]; ); - check!("xy", [lit!("x"), lit!("y")]; ); - check!("x y", [lit!("x"), lit!("y")]; INVALID); - check!("xy", [lit!("x"), sp!(""), lit!("y")]; ); - check!("x y", [lit!("x"), sp!(""), lit!("y")]; ); - - // numeric - check!("1987", [num!(Year)]; year_div_100: 19, year_mod_100: 87); - check!("1987 ", [num!(Year)]; TOO_LONG); - check!("0x12", [num!(Year)]; TOO_LONG); // `0` is parsed - check!("x123", [num!(Year)]; INVALID); - check!("2015", [num!(Year)]; year_div_100: 20, year_mod_100: 15); - check!("0000", [num!(Year)]; year_div_100: 0, year_mod_100: 0); - check!("9999", [num!(Year)]; year_div_100: 99, year_mod_100: 99); - check!(" \t987", [num!(Year)]; year_div_100: 9, year_mod_100: 87); - check!("5", [num!(Year)]; year_div_100: 0, year_mod_100: 5); - check!("-42", [num!(Year)]; INVALID); - check!("+42", [num!(Year)]; INVALID); - check!("5\0", [num!(Year)]; TOO_LONG); - check!("\05", [num!(Year)]; INVALID); - check!("", [num!(Year)]; TOO_SHORT); - check!("12345", [num!(Year), lit!("5")]; year_div_100: 12, year_mod_100: 34); - check!("12345", [nums!(Year), lit!("5")]; year_div_100: 12, year_mod_100: 34); - check!("12345", [num0!(Year), lit!("5")]; year_div_100: 12, year_mod_100: 34); - check!("12341234", [num!(Year), num!(Year)]; year_div_100: 12, year_mod_100: 34); - check!("1234 1234", [num!(Year), num!(Year)]; year_div_100: 12, year_mod_100: 34); - check!("1234 1235", [num!(Year), num!(Year)]; IMPOSSIBLE); - check!("1234 1234", [num!(Year), lit!("x"), num!(Year)]; INVALID); - check!("1234x1234", [num!(Year), lit!("x"), num!(Year)]; year_div_100: 12, year_mod_100: 34); - check!("1234xx1234", [num!(Year), lit!("x"), num!(Year)]; INVALID); - check!("1234 x 1234", [num!(Year), lit!("x"), num!(Year)]; INVALID); - - // various numeric fields - check!("1234 5678", - [num!(Year), num!(IsoYear)]; - year_div_100: 12, year_mod_100: 34, isoyear_div_100: 56, isoyear_mod_100: 78); - check!("12 34 56 78", - [num!(YearDiv100), num!(YearMod100), num!(IsoYearDiv100), num!(IsoYearMod100)]; - year_div_100: 12, year_mod_100: 34, isoyear_div_100: 56, isoyear_mod_100: 78); - check!("1 2 3 4 5 6", - [num!(Month), num!(Day), num!(WeekFromSun), num!(WeekFromMon), num!(IsoWeek), - num!(NumDaysFromSun)]; - month: 1, day: 2, week_from_sun: 3, week_from_mon: 4, isoweek: 5, weekday: Weekday::Sat); - check!("7 89 01", - [num!(WeekdayFromMon), num!(Ordinal), num!(Hour12)]; - weekday: Weekday::Sun, ordinal: 89, hour_mod_12: 1); - check!("23 45 6 78901234 567890123", - [num!(Hour), num!(Minute), num!(Second), num!(Nanosecond), num!(Timestamp)]; - hour_div_12: 1, hour_mod_12: 11, minute: 45, second: 6, nanosecond: 789_012_340, - timestamp: 567_890_123); - - // fixed: month and weekday names - check!("apr", [fix!(ShortMonthName)]; month: 4); - check!("Apr", [fix!(ShortMonthName)]; month: 4); - check!("APR", [fix!(ShortMonthName)]; month: 4); - check!("ApR", [fix!(ShortMonthName)]; month: 4); - check!("April", [fix!(ShortMonthName)]; TOO_LONG); // `Apr` is parsed - check!("A", [fix!(ShortMonthName)]; TOO_SHORT); - check!("Sol", [fix!(ShortMonthName)]; INVALID); - check!("Apr", [fix!(LongMonthName)]; month: 4); - check!("Apri", [fix!(LongMonthName)]; TOO_LONG); // `Apr` is parsed - check!("April", [fix!(LongMonthName)]; month: 4); - check!("Aprill", [fix!(LongMonthName)]; TOO_LONG); - check!("Aprill", [fix!(LongMonthName), lit!("l")]; month: 4); - check!("Aprl", [fix!(LongMonthName), lit!("l")]; month: 4); - check!("April", [fix!(LongMonthName), lit!("il")]; TOO_SHORT); // do not backtrack - check!("thu", [fix!(ShortWeekdayName)]; weekday: Weekday::Thu); - check!("Thu", [fix!(ShortWeekdayName)]; weekday: Weekday::Thu); - check!("THU", [fix!(ShortWeekdayName)]; weekday: Weekday::Thu); - check!("tHu", [fix!(ShortWeekdayName)]; weekday: Weekday::Thu); - check!("Thursday", [fix!(ShortWeekdayName)]; TOO_LONG); // `Thu` is parsed - check!("T", [fix!(ShortWeekdayName)]; TOO_SHORT); - check!("The", [fix!(ShortWeekdayName)]; INVALID); - check!("Nop", [fix!(ShortWeekdayName)]; INVALID); - check!("Thu", [fix!(LongWeekdayName)]; weekday: Weekday::Thu); - check!("Thur", [fix!(LongWeekdayName)]; TOO_LONG); // `Thu` is parsed - check!("Thurs", [fix!(LongWeekdayName)]; TOO_LONG); // ditto - check!("Thursday", [fix!(LongWeekdayName)]; weekday: Weekday::Thu); - check!("Thursdays", [fix!(LongWeekdayName)]; TOO_LONG); - check!("Thursdays", [fix!(LongWeekdayName), lit!("s")]; weekday: Weekday::Thu); - check!("Thus", [fix!(LongWeekdayName), lit!("s")]; weekday: Weekday::Thu); - check!("Thursday", [fix!(LongWeekdayName), lit!("rsday")]; TOO_SHORT); // do not backtrack - - // fixed: am/pm - check!("am", [fix!(LowerAmPm)]; hour_div_12: 0); - check!("pm", [fix!(LowerAmPm)]; hour_div_12: 1); - check!("AM", [fix!(LowerAmPm)]; hour_div_12: 0); - check!("PM", [fix!(LowerAmPm)]; hour_div_12: 1); - check!("am", [fix!(UpperAmPm)]; hour_div_12: 0); - check!("pm", [fix!(UpperAmPm)]; hour_div_12: 1); - check!("AM", [fix!(UpperAmPm)]; hour_div_12: 0); - check!("PM", [fix!(UpperAmPm)]; hour_div_12: 1); - check!("Am", [fix!(LowerAmPm)]; hour_div_12: 0); - check!(" Am", [fix!(LowerAmPm)]; INVALID); - check!("ame", [fix!(LowerAmPm)]; TOO_LONG); // `am` is parsed - check!("a", [fix!(LowerAmPm)]; TOO_SHORT); - check!("p", [fix!(LowerAmPm)]; TOO_SHORT); - check!("x", [fix!(LowerAmPm)]; TOO_SHORT); - check!("xx", [fix!(LowerAmPm)]; INVALID); - check!("", [fix!(LowerAmPm)]; TOO_SHORT); - - // fixed: timezone offsets - check!("+00:00", [fix!(TimezoneOffset)]; offset: 0); - check!("-00:00", [fix!(TimezoneOffset)]; offset: 0); - check!("+00:01", [fix!(TimezoneOffset)]; offset: 60); - check!("-00:01", [fix!(TimezoneOffset)]; offset: -60); - check!("+00:30", [fix!(TimezoneOffset)]; offset: 30 * 60); - check!("-00:30", [fix!(TimezoneOffset)]; offset: -30 * 60); - check!("+04:56", [fix!(TimezoneOffset)]; offset: 296 * 60); - check!("-04:56", [fix!(TimezoneOffset)]; offset: -296 * 60); - check!("+24:00", [fix!(TimezoneOffset)]; offset: 24 * 60 * 60); - check!("-24:00", [fix!(TimezoneOffset)]; offset: -24 * 60 * 60); - check!("+99:59", [fix!(TimezoneOffset)]; offset: (100 * 60 - 1) * 60); - check!("-99:59", [fix!(TimezoneOffset)]; offset: -(100 * 60 - 1) * 60); - check!("+00:59", [fix!(TimezoneOffset)]; offset: 59 * 60); - check!("+00:60", [fix!(TimezoneOffset)]; OUT_OF_RANGE); - check!("+00:99", [fix!(TimezoneOffset)]; OUT_OF_RANGE); - check!("#12:34", [fix!(TimezoneOffset)]; INVALID); - check!("12:34", [fix!(TimezoneOffset)]; INVALID); - check!("+12:34 ", [fix!(TimezoneOffset)]; TOO_LONG); - check!(" +12:34", [fix!(TimezoneOffset)]; offset: 754 * 60); - check!("\t -12:34", [fix!(TimezoneOffset)]; offset: -754 * 60); - check!("", [fix!(TimezoneOffset)]; TOO_SHORT); - check!("+", [fix!(TimezoneOffset)]; TOO_SHORT); - check!("+1", [fix!(TimezoneOffset)]; TOO_SHORT); - check!("+12", [fix!(TimezoneOffset)]; TOO_SHORT); - check!("+123", [fix!(TimezoneOffset)]; TOO_SHORT); - check!("+1234", [fix!(TimezoneOffset)]; offset: 754 * 60); - check!("+12345", [fix!(TimezoneOffset)]; TOO_LONG); - check!("+12345", [fix!(TimezoneOffset), num!(Day)]; offset: 754 * 60, day: 5); - check!("Z", [fix!(TimezoneOffset)]; INVALID); - check!("z", [fix!(TimezoneOffset)]; INVALID); - check!("Z", [fix!(TimezoneOffsetZ)]; offset: 0); - check!("z", [fix!(TimezoneOffsetZ)]; offset: 0); - check!("Y", [fix!(TimezoneOffsetZ)]; INVALID); - check!("Zulu", [fix!(TimezoneOffsetZ), lit!("ulu")]; offset: 0); - check!("zulu", [fix!(TimezoneOffsetZ), lit!("ulu")]; offset: 0); - check!("+1234ulu", [fix!(TimezoneOffsetZ), lit!("ulu")]; offset: 754 * 60); - check!("+12:34ulu", [fix!(TimezoneOffsetZ), lit!("ulu")]; offset: 754 * 60); - check!("???", [fix!(TimezoneName)]; BAD_FORMAT); // not allowed - - // some practical examples - check!("2015-02-04T14:37:05+09:00", - [num!(Year), lit!("-"), num!(Month), lit!("-"), num!(Day), lit!("T"), - num!(Hour), lit!(":"), num!(Minute), lit!(":"), num!(Second), fix!(TimezoneOffset)]; - year_div_100: 20, year_mod_100: 15, month: 2, day: 4, - hour_div_12: 1, hour_mod_12: 2, minute: 37, second: 5, offset: 32400); - check!("Mon, 10 Jun 2013 09:32:37 GMT", - [fix!(ShortWeekdayName), lit!(","), sp!(" "), num!(Day), sp!(" "), - fix!(ShortMonthName), sp!(" "), num!(Year), sp!(" "), num!(Hour), lit!(":"), - num!(Minute), lit!(":"), num!(Second), sp!(" "), lit!("GMT")]; - year_div_100: 20, year_mod_100: 13, month: 6, day: 10, weekday: Weekday::Mon, - hour_div_12: 0, hour_mod_12: 9, minute: 32, second: 37); - check!("20060102150405", - [num!(Year), num!(Month), num!(Day), num!(Hour), num!(Minute), num!(Second)]; - year_div_100: 20, year_mod_100: 6, month: 1, day: 2, - hour_div_12: 1, hour_mod_12: 3, minute: 4, second: 5); - check!("3:14PM", - [num!(Hour12), lit!(":"), num!(Minute), fix!(LowerAmPm)]; - hour_div_12: 1, hour_mod_12: 3, minute: 14); - check!("12345678901234.56789", - [num!(Timestamp), lit!("."), num!(Nanosecond)]; - nanosecond: 567_890_000, timestamp: 12_345_678_901_234); -} - diff --git a/src/format/parse.rs b/src/format/parse.rs new file mode 100644 index 0000000..36f72a9 --- /dev/null +++ b/src/format/parse.rs @@ -0,0 +1,625 @@ +// This is a part of rust-chrono. +// Copyright (c) 2015, Kang Seonghoon. +// Portions copyright (c) 2015, John Nagle. +// See README.md and LICENSE.txt for details. + +/*! + * Date and time parsing routines. + */ + +use std::usize; + +use Weekday; + +use super::scan; +use super::{Parsed, ParseResult, Item}; +use super::{OUT_OF_RANGE, INVALID, TOO_SHORT, TOO_LONG, BAD_FORMAT}; + +fn set_weekday_with_num_days_from_sunday(p: &mut Parsed, v: i64) -> ParseResult<()> { + p.set_weekday(match v { + 0 => Weekday::Sun, 1 => Weekday::Mon, 2 => Weekday::Tue, + 3 => Weekday::Wed, 4 => Weekday::Thu, 5 => Weekday::Fri, + 6 => Weekday::Sat, _ => return Err(OUT_OF_RANGE) + }) +} + +fn set_weekday_with_number_from_monday(p: &mut Parsed, v: i64) -> ParseResult<()> { + p.set_weekday(match v { + 1 => Weekday::Mon, 2 => Weekday::Tue, 3 => Weekday::Wed, + 4 => Weekday::Thu, 5 => Weekday::Fri, 6 => Weekday::Sat, + 7 => Weekday::Sun, _ => return Err(OUT_OF_RANGE) + }) +} + +fn parse_rfc2822<'a>(parsed: &mut Parsed, mut s: &'a str) -> ParseResult<(&'a str, ())> { + macro_rules! try_consume { + ($e:expr) => ({ let (s_, v) = try!($e); s = s_; v }) + } + + // an adapted RFC 2822 syntax from Section 3.3 and 4.3: + // + // date-time = [ day-of-week "," ] date 1*S time *S + // day-of-week = *S day-name *S + // day-name = "Mon" / "Tue" / "Wed" / "Thu" / "Fri" / "Sat" / "Sun" + // date = day month year + // day = *S 1*2DIGIT *S + // month = 1*S month-name 1*S + // month-name = "Jan" / "Feb" / "Mar" / "Apr" / "May" / "Jun" / + // "Jul" / "Aug" / "Sep" / "Oct" / "Nov" / "Dec" + // year = *S 2*DIGIT *S + // time = time-of-day 1*S zone + // time-of-day = hour ":" minute [ ":" second ] + // hour = *S 2DIGIT *S + // minute = *S 2DIGIT *S + // second = *S 2DIGIT *S + // zone = ( "+" / "-" ) 4DIGIT / + // "UT" / "GMT" / ; same to +0000 + // "EST" / "CST" / "MST" / "PST" / ; same to -0500 to -0800 + // "EDT" / "CDT" / "MDT" / "PDT" / ; same to -0400 to -0700 + // 1*(%d65-90 / %d97-122) ; same to -0000 + // + // some notes: + // + // - quoted characters can be in any mixture of lower and upper cases. + // + // - we do not recognize a folding white space (FWS) or comment (CFWS). + // for our purposes, instead, we accept any sequence of Unicode + // white space characters (denoted here to `S`). any actual RFC 2822 + // parser is expected to parse FWS and/or CFWS themselves and replace + // it with a single SP (`%x20`); this is legitimate. + // + // - two-digit year < 50 should be interpreted by adding 2000. + // two-digit year >= 50 or three-digit year should be interpreted + // by adding 1900. note that four-or-more-digit years less than 1000 + // are *never* affected by this rule. + // + // - zone of `-0000` and any unrecognized legacy time zones (including + // *every* one-letter military time zones) are considered "missing", + // in such that we don't actually know what time zone is being used. + // + // - mismatching day-of-week is always an error, which is consistent to + // Chrono's own rules. + // + // - zones can range from `-9959` to `+9959`, but `FixedOffset` does not + // support offsets larger than 24 hours. this is not *that* problematic + // since we do not directly go to a `DateTime` so one can recover + // the offset information from `Parsed` anyway. + + s = s.trim_left(); + + if let Ok((s_, weekday)) = scan::short_weekday(s) { + if !s_.starts_with(",") { return Err(INVALID); } + s = &s_[1..]; + try!(parsed.set_weekday(weekday)); + } + + s = s.trim_left(); + try!(parsed.set_day(try_consume!(scan::number(s, 1, 2, false)))); + s = try!(scan::space(s)); // mandatory + try!(parsed.set_month(1 + try_consume!(scan::short_month0(s)) as i64)); + s = try!(scan::space(s)); // mandatory + + // distinguish two- and three-digit years from four-digit years + let prevlen = s.len(); + let mut year = try_consume!(scan::number(s, 2, usize::MAX, false)); + let yearlen = prevlen - s.len(); + match (yearlen, year) { + (2, 0...49) => { year += 2000; } // 47 -> 2047, 05 -> 2005 + (2, 50...99) => { year += 1900; } // 79 -> 1979 + (3, _) => { year += 1900; } // 112 -> 2012, 009 -> 1909 + (_, _) => {} // 1987 -> 1987, 0654 -> 0654 + } + try!(parsed.set_year(year)); + + s = try!(scan::space(s)); // mandatory + try!(parsed.set_hour(try_consume!(scan::number(s, 2, 2, false)))); + s = try!(scan::char(s.trim_left(), b':')).trim_left(); // *S ":" *S + try!(parsed.set_minute(try_consume!(scan::number(s, 2, 2, false)))); + s = s.trim_left(); + if !s.is_empty() { // [ ":" *S 2DIGIT ] + s = try!(scan::char(s, b':')).trim_left(); + try!(parsed.set_second(try_consume!(scan::number(s, 2, 2, false)))); + } + + s = try!(scan::space(s)); // mandatory + if let Some(offset) = try_consume!(scan::timezone_offset_2822(s)) { + // only set the offset when it is definitely known (i.e. not `-0000`) + try!(parsed.set_offset(offset as i64)); + } + + Ok((s, ())) +} + +fn parse_rfc3339<'a>(parsed: &mut Parsed, mut s: &'a str) -> ParseResult<(&'a str, ())> { + macro_rules! try_consume { + ($e:expr) => ({ let (s_, v) = try!($e); s = s_; v }) + } + + // an adapted RFC 3339 syntax from Section 5.6: + // + // date-fullyear = 4DIGIT + // date-month = 2DIGIT ; 01-12 + // date-mday = 2DIGIT ; 01-28, 01-29, 01-30, 01-31 based on month/year + // time-hour = 2DIGIT ; 00-23 + // time-minute = 2DIGIT ; 00-59 + // time-second = 2DIGIT ; 00-58, 00-59, 00-60 based on leap second rules + // time-secfrac = "." 1*DIGIT + // time-numoffset = ("+" / "-") time-hour ":" time-minute + // time-offset = "Z" / time-numoffset + // partial-time = time-hour ":" time-minute ":" time-second [time-secfrac] + // full-date = date-fullyear "-" date-month "-" date-mday + // full-time = partial-time time-offset + // date-time = full-date "T" full-time + // + // some notes: + // + // - quoted characters can be in any mixture of lower and upper cases. + // + // - it may accept any number of fractional digits for seconds. + // for Chrono, this means that we should skip digits past first 9 digits. + // + // - unlike RFC 2822, the valid offset ranges from -23:59 to +23:59. + // note that this restriction is unique to RFC 3339 and not ISO 8601. + // since this is not a typical Chrono behavior, we check it earlier. + + try!(parsed.set_year(try_consume!(scan::number(s, 4, 4, false)))); + s = try!(scan::char(s, b'-')); + try!(parsed.set_month(try_consume!(scan::number(s, 2, 2, false)))); + s = try!(scan::char(s, b'-')); + try!(parsed.set_day(try_consume!(scan::number(s, 2, 2, false)))); + + s = match s.as_bytes().first() { + Some(&b't') | Some(&b'T') => &s[1..], + Some(_) => return Err(INVALID), + None => return Err(TOO_SHORT), + }; + + try!(parsed.set_hour(try_consume!(scan::number(s, 2, 2, false)))); + s = try!(scan::char(s, b':')); + try!(parsed.set_minute(try_consume!(scan::number(s, 2, 2, false)))); + s = try!(scan::char(s, b':')); + try!(parsed.set_second(try_consume!(scan::number(s, 2, 2, false)))); + if s.starts_with(".") { + let nanosecond = try_consume!(scan::number(&s[1..], 1, 9, true)); + s = s.trim_left_matches(|c: char| '0' <= c && c <= '9'); + try!(parsed.set_nanosecond(nanosecond)); + } + + let offset = try_consume!(scan::timezone_offset_zulu(s, |s| scan::char(s, b':'))); + if offset <= -86400 || offset >= 86400 { return Err(OUT_OF_RANGE); } + try!(parsed.set_offset(offset as i64)); + + Ok((s, ())) +} + +/// Tries to parse given string into `parsed` with given formatting items. +/// Returns `Ok` when the entire string has been parsed (otherwise `parsed` should not be used). +/// There should be no trailing string after parsing; use a stray `Item::Space` to trim whitespaces. +/// +/// This particular date and time parser is: +/// +/// - Greedy. It will consume the longest possible prefix. +/// For example, `April` is always consumed entirely when the long month name is requested; +/// it equally accepts `Apr`, but prefers the longer prefix in this case. +/// - Padding-agnostic (for numeric items). The `Pad` field is completely ignored, +/// so one can prepend any number of whitespace then any number of zeroes before numbers. +/// - (Still) obeying the intrinsic parsing width. This allows, for example, parsing `HHMMSS`. +pub fn parse<'a, I>(parsed: &mut Parsed, mut s: &str, items: I) -> ParseResult<()> + where I: Iterator> { + macro_rules! try_consume { + ($e:expr) => ({ let (s_, v) = try!($e); s = s_; v }) + } + + for item in items { + match item { + Item::Literal(prefix) => { + if s.len() < prefix.len() { return Err(TOO_SHORT); } + if !s.starts_with(prefix) { return Err(INVALID); } + s = &s[prefix.len()..]; + } + + Item::Space(_) => { + s = s.trim_left(); + } + + Item::Numeric(spec, _pad) => { + use super::Numeric::*; + + let (width, frac, set): (usize, bool, + fn(&mut Parsed, i64) -> ParseResult<()>) = match spec { + Year => (4, false, Parsed::set_year), + YearDiv100 => (2, false, Parsed::set_year_div_100), + YearMod100 => (2, false, Parsed::set_year_mod_100), + IsoYear => (4, false, Parsed::set_isoyear), + IsoYearDiv100 => (2, false, Parsed::set_isoyear_div_100), + IsoYearMod100 => (2, false, Parsed::set_isoyear_mod_100), + Month => (2, false, Parsed::set_month), + Day => (2, false, Parsed::set_day), + WeekFromSun => (2, false, Parsed::set_week_from_sun), + WeekFromMon => (2, false, Parsed::set_week_from_mon), + IsoWeek => (2, false, Parsed::set_isoweek), + NumDaysFromSun => (1, false, set_weekday_with_num_days_from_sunday), + WeekdayFromMon => (1, false, set_weekday_with_number_from_monday), + Ordinal => (3, false, Parsed::set_ordinal), + Hour => (2, false, Parsed::set_hour), + Hour12 => (2, false, Parsed::set_hour12), + Minute => (2, false, Parsed::set_minute), + Second => (2, false, Parsed::set_second), + Nanosecond => (9, true, Parsed::set_nanosecond), + Timestamp => (usize::MAX, false, Parsed::set_timestamp), + }; + + let v = try_consume!(scan::number(s.trim_left(), 1, width, frac)); + try!(set(parsed, v)); + } + + Item::Fixed(spec) => { + use super::Fixed::*; + + match spec { + ShortMonthName => { + let month0 = try_consume!(scan::short_month0(s)); + try!(parsed.set_month(month0 as i64 + 1)); + } + + LongMonthName => { + let month0 = try_consume!(scan::short_or_long_month0(s)); + try!(parsed.set_month(month0 as i64 + 1)); + } + + ShortWeekdayName => { + let weekday = try_consume!(scan::short_weekday(s)); + try!(parsed.set_weekday(weekday)); + } + + LongWeekdayName => { + let weekday = try_consume!(scan::short_or_long_weekday(s)); + try!(parsed.set_weekday(weekday)); + } + + LowerAmPm | UpperAmPm => { + if s.len() < 2 { return Err(TOO_SHORT); } + let ampm = match [s.as_bytes()[0] | 32, s.as_bytes()[1] | 32] { + [b'a',b'm'] => false, + [b'p',b'm'] => true, + _ => return Err(INVALID) + }; + try!(parsed.set_ampm(ampm)); + s = &s[2..]; + } + + TimezoneName => return Err(BAD_FORMAT), + + TimezoneOffset => { + let offset = try_consume!(scan::timezone_offset(s.trim_left(), + scan::colon_or_space)); + try!(parsed.set_offset(offset as i64)); + } + + TimezoneOffsetZ => { + let offset = try_consume!(scan::timezone_offset_zulu(s.trim_left(), + scan::colon_or_space)); + try!(parsed.set_offset(offset as i64)); + } + + RFC2822 => try_consume!(parse_rfc2822(parsed, s)), + RFC3339 => try_consume!(parse_rfc3339(parsed, s)), + } + } + + Item::Error => { + return Err(BAD_FORMAT); + } + } + } + + // if there are trailling chars, it is an error + if !s.is_empty() { + Err(TOO_LONG) + } else { + Ok(()) + } +} + +#[cfg(test)] +#[test] +fn test_parse() { + use super::*; + use super::IMPOSSIBLE; + + // workaround for Rust issue #22255 + fn parse_all(s: &str, items: &[Item]) -> ParseResult { + let mut parsed = Parsed::new(); + try!(parse(&mut parsed, s, items.iter().cloned())); + Ok(parsed) + } + + macro_rules! check { + ($fmt:expr, $items:expr; $err:expr) => ( + assert_eq!(parse_all($fmt, &$items), Err($err)) + ); + ($fmt:expr, $items:expr; $($k:ident: $v:expr),*) => ( + assert_eq!(parse_all($fmt, &$items), Ok(Parsed { $($k: Some($v),)* ..Parsed::new() })) + ); + } + + // empty string + check!("", []; ); + check!(" ", []; TOO_LONG); + check!("a", []; TOO_LONG); + + // whitespaces + check!("", [sp!("")]; ); + check!(" ", [sp!("")]; ); + check!("\t", [sp!("")]; ); + check!(" \n\r \n", [sp!("")]; ); + check!("a", [sp!("")]; TOO_LONG); + + // literal + check!("", [lit!("a")]; TOO_SHORT); + check!(" ", [lit!("a")]; INVALID); + check!("a", [lit!("a")]; ); + check!("aa", [lit!("a")]; TOO_LONG); + check!("A", [lit!("a")]; INVALID); + check!("xy", [lit!("xy")]; ); + check!("xy", [lit!("x"), lit!("y")]; ); + check!("x y", [lit!("x"), lit!("y")]; INVALID); + check!("xy", [lit!("x"), sp!(""), lit!("y")]; ); + check!("x y", [lit!("x"), sp!(""), lit!("y")]; ); + + // numeric + check!("1987", [num!(Year)]; year_div_100: 19, year_mod_100: 87); + check!("1987 ", [num!(Year)]; TOO_LONG); + check!("0x12", [num!(Year)]; TOO_LONG); // `0` is parsed + check!("x123", [num!(Year)]; INVALID); + check!("2015", [num!(Year)]; year_div_100: 20, year_mod_100: 15); + check!("0000", [num!(Year)]; year_div_100: 0, year_mod_100: 0); + check!("9999", [num!(Year)]; year_div_100: 99, year_mod_100: 99); + check!(" \t987", [num!(Year)]; year_div_100: 9, year_mod_100: 87); + check!("5", [num!(Year)]; year_div_100: 0, year_mod_100: 5); + check!("-42", [num!(Year)]; INVALID); + check!("+42", [num!(Year)]; INVALID); + check!("5\0", [num!(Year)]; TOO_LONG); + check!("\05", [num!(Year)]; INVALID); + check!("", [num!(Year)]; TOO_SHORT); + check!("12345", [num!(Year), lit!("5")]; year_div_100: 12, year_mod_100: 34); + check!("12345", [nums!(Year), lit!("5")]; year_div_100: 12, year_mod_100: 34); + check!("12345", [num0!(Year), lit!("5")]; year_div_100: 12, year_mod_100: 34); + check!("12341234", [num!(Year), num!(Year)]; year_div_100: 12, year_mod_100: 34); + check!("1234 1234", [num!(Year), num!(Year)]; year_div_100: 12, year_mod_100: 34); + check!("1234 1235", [num!(Year), num!(Year)]; IMPOSSIBLE); + check!("1234 1234", [num!(Year), lit!("x"), num!(Year)]; INVALID); + check!("1234x1234", [num!(Year), lit!("x"), num!(Year)]; year_div_100: 12, year_mod_100: 34); + check!("1234xx1234", [num!(Year), lit!("x"), num!(Year)]; INVALID); + check!("1234 x 1234", [num!(Year), lit!("x"), num!(Year)]; INVALID); + + // various numeric fields + check!("1234 5678", + [num!(Year), num!(IsoYear)]; + year_div_100: 12, year_mod_100: 34, isoyear_div_100: 56, isoyear_mod_100: 78); + check!("12 34 56 78", + [num!(YearDiv100), num!(YearMod100), num!(IsoYearDiv100), num!(IsoYearMod100)]; + year_div_100: 12, year_mod_100: 34, isoyear_div_100: 56, isoyear_mod_100: 78); + check!("1 2 3 4 5 6", + [num!(Month), num!(Day), num!(WeekFromSun), num!(WeekFromMon), num!(IsoWeek), + num!(NumDaysFromSun)]; + month: 1, day: 2, week_from_sun: 3, week_from_mon: 4, isoweek: 5, weekday: Weekday::Sat); + check!("7 89 01", + [num!(WeekdayFromMon), num!(Ordinal), num!(Hour12)]; + weekday: Weekday::Sun, ordinal: 89, hour_mod_12: 1); + check!("23 45 6 78901234 567890123", + [num!(Hour), num!(Minute), num!(Second), num!(Nanosecond), num!(Timestamp)]; + hour_div_12: 1, hour_mod_12: 11, minute: 45, second: 6, nanosecond: 789_012_340, + timestamp: 567_890_123); + + // fixed: month and weekday names + check!("apr", [fix!(ShortMonthName)]; month: 4); + check!("Apr", [fix!(ShortMonthName)]; month: 4); + check!("APR", [fix!(ShortMonthName)]; month: 4); + check!("ApR", [fix!(ShortMonthName)]; month: 4); + check!("April", [fix!(ShortMonthName)]; TOO_LONG); // `Apr` is parsed + check!("A", [fix!(ShortMonthName)]; TOO_SHORT); + check!("Sol", [fix!(ShortMonthName)]; INVALID); + check!("Apr", [fix!(LongMonthName)]; month: 4); + check!("Apri", [fix!(LongMonthName)]; TOO_LONG); // `Apr` is parsed + check!("April", [fix!(LongMonthName)]; month: 4); + check!("Aprill", [fix!(LongMonthName)]; TOO_LONG); + check!("Aprill", [fix!(LongMonthName), lit!("l")]; month: 4); + check!("Aprl", [fix!(LongMonthName), lit!("l")]; month: 4); + check!("April", [fix!(LongMonthName), lit!("il")]; TOO_SHORT); // do not backtrack + check!("thu", [fix!(ShortWeekdayName)]; weekday: Weekday::Thu); + check!("Thu", [fix!(ShortWeekdayName)]; weekday: Weekday::Thu); + check!("THU", [fix!(ShortWeekdayName)]; weekday: Weekday::Thu); + check!("tHu", [fix!(ShortWeekdayName)]; weekday: Weekday::Thu); + check!("Thursday", [fix!(ShortWeekdayName)]; TOO_LONG); // `Thu` is parsed + check!("T", [fix!(ShortWeekdayName)]; TOO_SHORT); + check!("The", [fix!(ShortWeekdayName)]; INVALID); + check!("Nop", [fix!(ShortWeekdayName)]; INVALID); + check!("Thu", [fix!(LongWeekdayName)]; weekday: Weekday::Thu); + check!("Thur", [fix!(LongWeekdayName)]; TOO_LONG); // `Thu` is parsed + check!("Thurs", [fix!(LongWeekdayName)]; TOO_LONG); // ditto + check!("Thursday", [fix!(LongWeekdayName)]; weekday: Weekday::Thu); + check!("Thursdays", [fix!(LongWeekdayName)]; TOO_LONG); + check!("Thursdays", [fix!(LongWeekdayName), lit!("s")]; weekday: Weekday::Thu); + check!("Thus", [fix!(LongWeekdayName), lit!("s")]; weekday: Weekday::Thu); + check!("Thursday", [fix!(LongWeekdayName), lit!("rsday")]; TOO_SHORT); // do not backtrack + + // fixed: am/pm + check!("am", [fix!(LowerAmPm)]; hour_div_12: 0); + check!("pm", [fix!(LowerAmPm)]; hour_div_12: 1); + check!("AM", [fix!(LowerAmPm)]; hour_div_12: 0); + check!("PM", [fix!(LowerAmPm)]; hour_div_12: 1); + check!("am", [fix!(UpperAmPm)]; hour_div_12: 0); + check!("pm", [fix!(UpperAmPm)]; hour_div_12: 1); + check!("AM", [fix!(UpperAmPm)]; hour_div_12: 0); + check!("PM", [fix!(UpperAmPm)]; hour_div_12: 1); + check!("Am", [fix!(LowerAmPm)]; hour_div_12: 0); + check!(" Am", [fix!(LowerAmPm)]; INVALID); + check!("ame", [fix!(LowerAmPm)]; TOO_LONG); // `am` is parsed + check!("a", [fix!(LowerAmPm)]; TOO_SHORT); + check!("p", [fix!(LowerAmPm)]; TOO_SHORT); + check!("x", [fix!(LowerAmPm)]; TOO_SHORT); + check!("xx", [fix!(LowerAmPm)]; INVALID); + check!("", [fix!(LowerAmPm)]; TOO_SHORT); + + // fixed: timezone offsets + check!("+00:00", [fix!(TimezoneOffset)]; offset: 0); + check!("-00:00", [fix!(TimezoneOffset)]; offset: 0); + check!("+00:01", [fix!(TimezoneOffset)]; offset: 60); + check!("-00:01", [fix!(TimezoneOffset)]; offset: -60); + check!("+00:30", [fix!(TimezoneOffset)]; offset: 30 * 60); + check!("-00:30", [fix!(TimezoneOffset)]; offset: -30 * 60); + check!("+04:56", [fix!(TimezoneOffset)]; offset: 296 * 60); + check!("-04:56", [fix!(TimezoneOffset)]; offset: -296 * 60); + check!("+24:00", [fix!(TimezoneOffset)]; offset: 24 * 60 * 60); + check!("-24:00", [fix!(TimezoneOffset)]; offset: -24 * 60 * 60); + check!("+99:59", [fix!(TimezoneOffset)]; offset: (100 * 60 - 1) * 60); + check!("-99:59", [fix!(TimezoneOffset)]; offset: -(100 * 60 - 1) * 60); + check!("+00:59", [fix!(TimezoneOffset)]; offset: 59 * 60); + check!("+00:60", [fix!(TimezoneOffset)]; OUT_OF_RANGE); + check!("+00:99", [fix!(TimezoneOffset)]; OUT_OF_RANGE); + check!("#12:34", [fix!(TimezoneOffset)]; INVALID); + check!("12:34", [fix!(TimezoneOffset)]; INVALID); + check!("+12:34 ", [fix!(TimezoneOffset)]; TOO_LONG); + check!(" +12:34", [fix!(TimezoneOffset)]; offset: 754 * 60); + check!("\t -12:34", [fix!(TimezoneOffset)]; offset: -754 * 60); + check!("", [fix!(TimezoneOffset)]; TOO_SHORT); + check!("+", [fix!(TimezoneOffset)]; TOO_SHORT); + check!("+1", [fix!(TimezoneOffset)]; TOO_SHORT); + check!("+12", [fix!(TimezoneOffset)]; TOO_SHORT); + check!("+123", [fix!(TimezoneOffset)]; TOO_SHORT); + check!("+1234", [fix!(TimezoneOffset)]; offset: 754 * 60); + check!("+12345", [fix!(TimezoneOffset)]; TOO_LONG); + check!("+12345", [fix!(TimezoneOffset), num!(Day)]; offset: 754 * 60, day: 5); + check!("Z", [fix!(TimezoneOffset)]; INVALID); + check!("z", [fix!(TimezoneOffset)]; INVALID); + check!("Z", [fix!(TimezoneOffsetZ)]; offset: 0); + check!("z", [fix!(TimezoneOffsetZ)]; offset: 0); + check!("Y", [fix!(TimezoneOffsetZ)]; INVALID); + check!("Zulu", [fix!(TimezoneOffsetZ), lit!("ulu")]; offset: 0); + check!("zulu", [fix!(TimezoneOffsetZ), lit!("ulu")]; offset: 0); + check!("+1234ulu", [fix!(TimezoneOffsetZ), lit!("ulu")]; offset: 754 * 60); + check!("+12:34ulu", [fix!(TimezoneOffsetZ), lit!("ulu")]; offset: 754 * 60); + check!("???", [fix!(TimezoneName)]; BAD_FORMAT); // not allowed + + // some practical examples + check!("2015-02-04T14:37:05+09:00", + [num!(Year), lit!("-"), num!(Month), lit!("-"), num!(Day), lit!("T"), + num!(Hour), lit!(":"), num!(Minute), lit!(":"), num!(Second), fix!(TimezoneOffset)]; + year_div_100: 20, year_mod_100: 15, month: 2, day: 4, + hour_div_12: 1, hour_mod_12: 2, minute: 37, second: 5, offset: 32400); + check!("Mon, 10 Jun 2013 09:32:37 GMT", + [fix!(ShortWeekdayName), lit!(","), sp!(" "), num!(Day), sp!(" "), + fix!(ShortMonthName), sp!(" "), num!(Year), sp!(" "), num!(Hour), lit!(":"), + num!(Minute), lit!(":"), num!(Second), sp!(" "), lit!("GMT")]; + year_div_100: 20, year_mod_100: 13, month: 6, day: 10, weekday: Weekday::Mon, + hour_div_12: 0, hour_mod_12: 9, minute: 32, second: 37); + check!("20060102150405", + [num!(Year), num!(Month), num!(Day), num!(Hour), num!(Minute), num!(Second)]; + year_div_100: 20, year_mod_100: 6, month: 1, day: 2, + hour_div_12: 1, hour_mod_12: 3, minute: 4, second: 5); + check!("3:14PM", + [num!(Hour12), lit!(":"), num!(Minute), fix!(LowerAmPm)]; + hour_div_12: 1, hour_mod_12: 3, minute: 14); + check!("12345678901234.56789", + [num!(Timestamp), lit!("."), num!(Nanosecond)]; + nanosecond: 567_890_000, timestamp: 12_345_678_901_234); +} + +#[cfg(test)] +#[test] +fn test_rfc2822() { + use datetime::DateTime; + use offset::FixedOffset; + use super::*; + use super::NOT_ENOUGH; + + // Test data - (input, Ok(expected result after parse and format) or Err(error code)) + let testdates = [ + ("Tue, 20 Jan 2015 17:35:20 -0800", Ok("Tue, 20 Jan 2015 17:35:20 -0800")), // normal case + ("20 Jan 2015 17:35:20 -0800", Ok("Tue, 20 Jan 2015 17:35:20 -0800")), // no day of week + ("20 JAN 2015 17:35:20 -0800", Ok("Tue, 20 Jan 2015 17:35:20 -0800")), // upper case month + ("11 Sep 2001 09:45:00 EST", Ok("Tue, 11 Sep 2001 09:45:00 -0500")), + ("30 Feb 2015 17:35:20 -0800", Err(OUT_OF_RANGE)), // bad day of month + ("Tue, 20 Jan 2015", Err(TOO_SHORT)), // omitted fields + ("Tue, 20 Avr 2015 17:35:20 -0800", Err(INVALID)), // bad month name + ("Tue, 20 Jan 2015 25:35:20 -0800", Err(OUT_OF_RANGE)), // bad hour + ("Tue, 20 Jan 2015 7:35:20 -0800", Err(INVALID)), // bad # of digits in hour + ("Tue, 20 Jan 2015 17:65:20 -0800", Err(OUT_OF_RANGE)), // bad minute + ("Tue, 20 Jan 2015 17:35:90 -0800", Err(OUT_OF_RANGE)), // bad second + ("Tue, 20 Jan 2015 17:35:20 -0890", Err(OUT_OF_RANGE)), // bad offset + ("6 Jun 1944 04:00:00Z", Err(INVALID)), // bad offset (zulu not allowed) + ("Tue, 20 Jan 2015 17:35:20 HAS", Err(NOT_ENOUGH)) // bad named time zone + ]; + + fn rfc2822_to_datetime(date: &str) -> ParseResult> { + let mut parsed = Parsed::new(); + try!(parse(&mut parsed, date, [Item::Fixed(Fixed::RFC2822)].iter().cloned())); + parsed.to_datetime() + } + + fn fmt_rfc2822_datetime(dt: DateTime) -> String { + dt.format_with_items([Item::Fixed(Fixed::RFC2822)].iter().cloned()).to_string() + } + + // Test against test data above + for &(date, checkdate) in testdates.iter() { + let d = rfc2822_to_datetime(date); // parse a date + let dt = match d { // did we get a value? + Ok(dt) => Ok(fmt_rfc2822_datetime(dt)), // yes, go on + Err(e) => Err(e), // otherwise keep an error for the comparison + }; + if dt != checkdate.map(|s| s.to_string()) { // check for expected result + panic!("Date conversion failed for {}\nReceived: {:?}\nExpected: {:?}", + date, dt, checkdate); + } + }; +} + +#[cfg(test)] +#[test] +fn test_rfc3339() { + use datetime::DateTime; + use offset::FixedOffset; + use super::*; + + // Test data - (input, Ok(expected result after parse and format) or Err(error code)) + let testdates = [ + ("2015-01-20T17:35:20-08:00", Ok("2015-01-20T17:35:20-08:00")), // normal case + ("1944-06-06T04:04:00Z", Ok("1944-06-06T04:04:00+00:00")), // D-day + ("2001-09-11T09:45:00-08:00", Ok("2001-09-11T09:45:00-08:00")), + ("2015-01-20T17:35:20.001-08:00", Ok("2015-01-20T17:35:20.001-08:00")), + ("2015-01-20T17:35:20.000031-08:00", Ok("2015-01-20T17:35:20.000031-08:00")), + ("2015-01-20T17:35:20.000000004-08:00", Ok("2015-01-20T17:35:20.000000004-08:00")), + ("2015-01-20T17:35:20.000000000452-08:00", Ok("2015-01-20T17:35:20-08:00")), // too small + ("2015-02-30T17:35:20-08:00", Err(OUT_OF_RANGE)), // bad day of month + ("2015-01-20T25:35:20-08:00", Err(OUT_OF_RANGE)), // bad hour + ("2015-01-20T17:65:20-08:00", Err(OUT_OF_RANGE)), // bad minute + ("2015-01-20T17:35:90-08:00", Err(OUT_OF_RANGE)), // bad second + ("2015-01-20T17:35:20-24:00", Err(OUT_OF_RANGE)), // bad offset + ]; + + fn rfc3339_to_datetime(date: &str) -> ParseResult> { + let mut parsed = Parsed::new(); + try!(parse(&mut parsed, date, [Item::Fixed(Fixed::RFC3339)].iter().cloned())); + parsed.to_datetime() + } + + fn fmt_rfc3339_datetime(dt: DateTime) -> String { + dt.format_with_items([Item::Fixed(Fixed::RFC3339)].iter().cloned()).to_string() + } + + // Test against test data above + for &(date, checkdate) in testdates.iter() { + let d = rfc3339_to_datetime(date); // parse a date + let dt = match d { // did we get a value? + Ok(dt) => Ok(fmt_rfc3339_datetime(dt)), // yes, go on + Err(e) => Err(e), // otherwise keep an error for the comparison + }; + if dt != checkdate.map(|s| s.to_string()) { // check for expected result + panic!("Date conversion failed for {}\nReceived: {:?}\nExpected: {:?}", + date, dt, checkdate); + } + }; +} + diff --git a/src/format/parsed.rs b/src/format/parsed.rs index b7e8dab..6810879 100644 --- a/src/format/parsed.rs +++ b/src/format/parsed.rs @@ -467,7 +467,10 @@ impl Parsed { let time = try!(parsed.to_naive_time()); Ok(date.and_time(time)) } else { - Err(NOT_ENOUGH) + // reproduce the previous error(s) + try!(date); + try!(time); + unreachable!() } } @@ -867,6 +870,11 @@ mod tests { assert_eq!(parse!(year_mod_100: 12, ordinal: 182, hour_div_12: 1, hour_mod_12: 11, minute: 59, second: 60, timestamp: 1_341_100_801), Err(IMPOSSIBLE)); + + // error codes + assert_eq!(parse!(year_div_100: 20, year_mod_100: 15, month: 1, day: 20, weekday: Tue, + hour_div_12: 2, hour_mod_12: 1, minute: 35, second: 20), + Err(OUT_OF_RANGE)); // `hour_div_12` is out of range } #[test] diff --git a/src/format/scan.rs b/src/format/scan.rs index 27a295b..923e648 100644 --- a/src/format/scan.rs +++ b/src/format/scan.rs @@ -130,6 +130,27 @@ pub fn short_or_long_weekday(s: &str) -> ParseResult<(&str, Weekday)> { Ok((s, weekday)) } +/// Tries to consume exactly one given character. +pub fn char(s: &str, c1: u8) -> ParseResult<&str> { + match s.as_bytes().first() { + Some(&c) if c == c1 => Ok(&s[1..]), + Some(_) => Err(INVALID), + None => Err(TOO_SHORT), + } +} + +/// Tries to consume one or more whitespace. +pub fn space(s: &str) -> ParseResult<&str> { + let s_ = s.trim_left(); + if s_.len() < s.len() { + Ok(s_) + } else if s.is_empty() { + Err(TOO_SHORT) + } else { + Err(INVALID) + } +} + /// Consumes any number (including zero) of colon or spaces. pub fn colon_or_space(s: &str) -> ParseResult<&str> { Ok(s.trim_left_matches(|c: char| c == ':' || c.is_whitespace())) @@ -138,7 +159,7 @@ pub fn colon_or_space(s: &str) -> ParseResult<&str> { /// Tries to parse `[-+]\d\d` continued by `\d\d`. Return an offset in seconds if possible. /// /// The additional `colon` may be used to parse a mandatory or optional `:` -/// between hours and minutes, and should return either a new suffix or `None` when parsing fails. +/// between hours and minutes, and should return either a new suffix or `Err` when parsing fails. pub fn timezone_offset(mut s: &str, mut colon: F) -> ParseResult<(&str, i32)> where F: FnMut(&str) -> ParseResult<&str> { let negative = match s.as_bytes().first() { @@ -182,3 +203,43 @@ pub fn timezone_offset_zulu(s: &str, colon: F) -> ParseResult<(&str, i32)> } } +/// Same to `timezone_offset` but also allows for RFC 2822 legacy timezones. +/// May return `None` which indicates an insufficient offset data (i.e. `-0000`). +pub fn timezone_offset_2822(s: &str) -> ParseResult<(&str, Option)> { + // tries to parse legacy time zone names + let upto = s.as_bytes().iter().position(|&c| match c { b'a'...b'z' | b'A'...b'Z' => false, + _ => true }).unwrap_or(s.len()); + if upto > 0 { + let name = &s[..upto]; + let s = &s[upto..]; + if equals(name, "gmt") || equals(name, "ut") { + Ok((s, Some(0))) + } else if equals(name, "est") { + Ok((s, Some(-5 * 3600))) + } else if equals(name, "edt") { + Ok((s, Some(-4 * 3600))) + } else if equals(name, "cst") { + Ok((s, Some(-6 * 3600))) + } else if equals(name, "cdt") { + Ok((s, Some(-5 * 3600))) + } else if equals(name, "mst") { + Ok((s, Some(-7 * 3600))) + } else if equals(name, "mdt") { + Ok((s, Some(-6 * 3600))) + } else if equals(name, "pst") { + Ok((s, Some(-8 * 3600))) + } else if equals(name, "pdt") { + Ok((s, Some(-7 * 3600))) + } else { + Ok((s, None)) // recommended by RFC 2822: consume but treat it as -0000 + } + } else { + let (s_, offset) = try!(timezone_offset(s, |s| Ok(s))); + if offset == 0 && s.starts_with("-") { // -0000 is not same to +0000 + Ok((s_, None)) + } else { + Ok((s_, Some(offset))) + } + } +} + diff --git a/src/lib.rs b/src/lib.rs index 4f9d859..cee1e14 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -218,11 +218,6 @@ Advanced offset handling is not yet supported (but is planned in 0.3). #![feature(core, collections, hash, std_misc)] // lib stability features as per RFC #507 #![cfg_attr(test, feature(test))] // ditto #![deny(missing_docs)] -// This is needed to allow compile-time regular expressions in this crate. -#![feature(plugin)] -#![plugin(regex_macros)] -extern crate regex; - extern crate "time" as stdtime; @@ -265,12 +260,6 @@ pub mod time; pub mod datetime; pub mod format; -/// Parsing functions for date/time strings. -/// -/// Parsing functions are provided for RFC 2822 ("Tue, 20 Jan 2015 17:35:20 -0800") -/// and RFC3339/ISO8601 ("2015-01-20T17:35:20.001-0800") date/time strings. -pub mod parse; - /// The day of week (DOW). /// /// The order of the days of week depends on the context. diff --git a/src/parse.rs b/src/parse.rs deleted file mode 100644 index 57dc156..0000000 --- a/src/parse.rs +++ /dev/null @@ -1,335 +0,0 @@ -use std::num::Int; -use std::cmp; -use ::{Offset}; - -// -// parse.rs -- parsing for various standardized date and time string formats -// -// John Nagle -// January, 2015 -// -// -// RFC2822 time/date stamp parsing -// -// Example: "Tue, 20 Jan 2015 17:35:20 -0800". -// Common use case: email date/time. -// -// Date format specification, from RFC2822. -// -// date-time = [ day-of-week "," ] date FWS time [CFWS] -// -// day-of-week = ([FWS] day-name) / obs-day-of-week -// -// day-name = "Mon" / "Tue" / "Wed" / "Thu" / -// "Fri" / "Sat" / "Sun" -// -// date = day month year -// -// year = 4*DIGIT / obs-year -// -// month = (FWS month-name FWS) / obs-month -// -// month-name = "Jan" / "Feb" / "Mar" / "Apr" / -// "May" / "Jun" / "Jul" / "Aug" / -// "Sep" / "Oct" / "Nov" / "Dec" -// -// day = ([FWS] 1*2DIGIT) / obs-day -// -// time = time-of-day FWS zone -// -// time-of-day = hour ":" minute [ ":" second ] -// -// hour = 2DIGIT / obs-hour -// -// minute = 2DIGIT / obs-minute -// -// second = 2DIGIT / obs-second -// -// zone = (( "+" / "-" ) 4DIGIT) / obs-zone -// -// -// Obsolete forms -// -// obs-day-of-week = [CFWS] day-name [CFWS] -// -// obs-year = [CFWS] 2*DIGIT [CFWS] -// -// obs-month = CFWS month-name CFWS -// -// obs-day = [CFWS] 1*2DIGIT [CFWS] -// -// obs-hour = [CFWS] 2DIGIT [CFWS] -// -// obs-minute = [CFWS] 2DIGIT [CFWS] -// -// obs-second = [CFWS] 2DIGIT [CFWS] -// -// obs-zone = "UT" / "GMT" / ; Universal Time -// ; North American UT -// ; offsets -// "EST" / "EDT" / ; Eastern: - 5/ - 4 -// "CST" / "CDT" / ; Central: - 6/ - 5 -// "MST" / "MDT" / ; Mountain: - 7/ - 6 -// "PST" / "PDT" / ; Pacific: - 8/ - 7 -// -// %d65-73 / ; Military zones - "A" -// %d75-90 / ; through "I" and "K" -// %d97-105 / ; through "Z", both -// %d107-122 ; upper and lower case -// -// -// Per RFC2882, all the obsolete one-letter military time zones are interpreted as -// +0000. -// -// The only feature not supported is that an offset of "-0000" should return a -// naive date/time, not a time zone aware one. This returns a time zone aware -// date/time object in all cases. -// -// -/// Time zone offset in minutes, from string. -/// Allowed input per RFC2822 above - numeric offset or named time zone -fn offsetmins(s: &str) -> Option { - let offsetre = regex!(r"^([+-])(\d\d)(\d\d)$"); // +0800 as 8 hour offset - let offsetmatches = offsetre.captures(s); // match time zone - match offsetmatches { - Some(caps) => { // It looks like a numeric offset - let sign = caps.at(1).unwrap(); // + or - - let hh = caps.at(2).unwrap().parse::().unwrap(); // hours - let mm = caps.at(3).unwrap().parse::().unwrap(); // minutes - let signval = match sign { - "+" => 1, - "-" => -1, - _ => return None // unreachable - }; - if hh < -12 || hh > 12 || mm < 0 || mm > 59 { return None } // check offsets - return Some(signval*(hh*60 + mm)) // return offset in minute - } - None => { // not numeric, try the named time zones - return match s { - "GMT"|"UT"|"Z"|"z" => Some(0), // prime meridian - "EDT" => Some(-4*60), // obsolete forms - "EST"|"CDT" => Some(-5*60), // rather US-centric in this old RFC. - "CST"|"MDT" => Some(-6*60), - "MST"|"PDT" => Some(-7*60), - "PST" => Some(-8*60), - _ => match s.len() { 1 => Some(0), _ => None } // obsolete single-letter miltary forms are treated as 0 per RFC2822 - } - } - }; -} - -/// Makes a new `DateTime` with offset given a valid RFC2822 string. -/// Example: "Tue, 20 Jan 2015 17:35:20 -0800" -pub fn rfc2822_to_datetime(s: &str) -> Option<::DateTime<::FixedOffset>> { - - // Match the date format. Case-insensitive, compile-time regex. - let datere = regex!(r"^(?i)(?:Mon,|Tue,|Wed,|Thu,|Fri,|Sat,|Sun,)??\s*(\d+)\s+(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)\s+(\d\d\d\d)\s+(\d+):(\d+):(\d+)\s*([+-]\d\d\d\d|[A-Z]+)$"); - let matches = datere.captures(s.trim()); // Pattern match the date - let captures = match matches { - Some(caps) => caps, // succeed - None => return None // fail - }; - // Unwrapping numeric fields is safe because we've matched the regular expression. - let dd = captures.at(1).unwrap().parse::().unwrap(); // day of month - // Month names are case-sensitive in RFC 2822, but we allow the obvious other forms. - let mo = match captures.at(2).unwrap() { // month decode - "Jan"|"JAN"|"jan" => 1, - "Feb"|"FEB"|"feb" => 2, - "Mar"|"MAR"|"mar" => 3, - "Apr"|"APR"|"apr" => 4, - "May"|"MAY"|"may" => 5, - "Jun"|"JUN"|"jun" => 6, - "Jul"|"JUL"|"jul" => 7, - "Aug"|"AUG"|"aug" => 8, - "Sep"|"SEP"|"sep" => 9, - "Oct"|"OCT"|"oct" => 10, - "Nov"|"NOV"|"nov" => 11, - "Dec"|"DEC"|"dec" => 12, - _ => return None - }; - let yyyy = captures.at(3).unwrap().parse::().unwrap(); // chrono wants a signed year - let hh = captures.at(4).unwrap().parse::().unwrap(); - let mm = captures.at(5).unwrap().parse::().unwrap(); // minute - let ss = captures.at(6).unwrap().parse::().unwrap(); - let offsetstr = captures.at(7).unwrap(); // can be +0800 or a time zone name - let offsetmm = match offsetmins(offsetstr) { - Some(v) => v, - None => return None - }; - let tz = ::FixedOffset::east(offsetmm*60); // decode time zone offset - // Pack numeric values into DateTime object, returning None if fail. - let date = tz.ymd_opt(yyyy, mo, dd); // date or none - match date { // check for invalid date - ::LocalResult::Single(d) => d.and_hms_opt(hh, mm, ss), // encode into DateTime - _ => return None // date conversion failed - } -} - -/// Formats a DateTime as an RF2822 string. -/// This is primarily for debugging. -pub fn fmt_rfc2822_datetime(dt: ::DateTime<::FixedOffset>) -> String { - dt.format("%a, %e %b %Y %H:%M:%S %z").to_string() // inverse of parsing -} - -// -// RFC3339 date parsing -// -// This is a subset of ISO 8601 date format. -// Example: "2012-09-09T18:00:00-07:00" -// Common use case: Atom feeds. -// -// -// From RFC3339, "Date and Time on the Internet: Timestamps", section 5.6: -// -// date-fullyear = 4DIGIT -// date-month = 2DIGIT ; 01-12 -// date-mday = 2DIGIT ; 01-28, 01-29, 01-30, 01-31 based on -// ; month/year -// time-hour = 2DIGIT ; 00-23 -// time-minute = 2DIGIT ; 00-59 -// time-second = 2DIGIT ; 00-58, 00-59, 00-60 based on leap second -// ; rules -// time-secfrac = "." 1*DIGIT -// time-numoffset = ("+" / "-") time-hour ":" time-minute -// time-offset = "Z" / time-numoffset -// -// partial-time = time-hour ":" time-minute ":" time-second -// [time-secfrac] -// full-date = date-fullyear "-" date-month "-" date-mday -// full-time = partial-time time-offset -// -// date-time = full-date "T" full-time -// -// NOTE: Per [ABNF] and ISO8601, the "T" and "Z" characters in this -// syntax may alternatively be lower case "t" or "z" respectively. -// -// ISO 8601 defines date and time separated by "T". -// Applications using this syntax may choose, for the sake of -// readability, to specify a full-date and full-time separated by -// (say) a space character. -// - -/// Parse a string with a RFC3339 date, time, and offset into a DateTime. -/// This is the subset of ISO 8601 date and time strings most used on the Web. -pub fn rfc3339_to_datetime(s: &str) -> Option<::DateTime<::FixedOffset>> { - let datere = regex!(r"^(?i)(\d\d\d\d)-(\d\d)-(\d\d)T(\d\d):(\d\d):(\d\d)(\.\d+)??([+-]\d\d\d\d|[A-Z]+)$"); // format regex - let matches = datere.captures(s.trim()); // Pattern match the date - let captures = match matches { - Some(caps) => caps, // succeed - None => return None // fail - }; - // Unwrapping numeric fields is safe because we've matched the regular expression. - let yyyy = captures.at(1).unwrap().parse::().unwrap(); // chrono wants a signed year - let mo = captures.at(2).unwrap().parse::().unwrap(); // month of year - let dd = captures.at(3).unwrap().parse::().unwrap(); // day of month - let hh = captures.at(4).unwrap().parse::().unwrap(); // hour - let mm = captures.at(5).unwrap().parse::().unwrap(); // minute - let ss = captures.at(6).unwrap().parse::().unwrap(); // second - let ns = match captures.at(7) { // fractional seconds present? - Some(fractstr) => parsensfract(fractstr), // parse as nanoseconds - None => 0 // no fraction - }; - let offsetstr = captures.at(8).unwrap(); // time zone offset, numeric - let offsetmm = match offsetmins(offsetstr) { // also accepts named time zones, not required. - Some(v) => v, - None => return None - }; - let tz = ::FixedOffset::east(offsetmm*60); // decode time zone offset - // Pack numeric values into DateTime object, returning None if fail. - let date = tz.ymd_opt(yyyy, mo, dd); // date or none - match date { // check for invalid date - ::LocalResult::Single(d) => d.and_hms_nano_opt(hh, mm, ss, ns), // encode into DateTime - _ => return None // date conversion failed - } -} - -/// Parse ".NNN" into nanoseconds. -/// Assumes input has already been checked for ".NNN" format. -fn parsensfract(s: &str) -> u32 { - let sdigits = &s[1..]; // trim off leading "." - let sdigits9 = &sdigits[0..(cmp::min(sdigits.len(),9))]; // truncate at 9 digits after "." - let v = sdigits9.parse::().unwrap(); // digits as u32 (will fit) - let vl = 9-sdigits9.len(); // power of 10 for scaling - let scale = Int::pow(10,vl); // scale factor to get to - //panic!("parsens: s: {} sdigits9: {} v: {} scale: {} result: {}", s, sdigits9 , v, scale, v*scale); // ***TEMP*** - v*scale // as nanoseconds -} - -/// Formats a DateTime as an RFC 3339/ISO8601 date, with 9 digits of nanoseconds. -/// This is the inverse operation of rfc3339 parsing. -pub fn fmt_rfc3339_datetime(dt: ::DateTime<::FixedOffset>) -> String { - dt.format("%Y-%m-%dT%H:%M:%S.%f%z").to_string() // inverse of parsing -} - - -// -// Unit tests -// -#[test] -/// Test RFC2822 parser. -fn testrfc2822parser() { - // Test data - [input, expected result after parse and format] - let testdates = [ - ["Tue, 20 Jan 2015 17:35:20 -0800", "Tue, 20 Jan 2015 17:35:20 -0800"], // normal case - ["20 Jan 2015 17:35:20 -0800", "Tue, 20 Jan 2015 17:35:20 -0800"], // no day of week - ["20 JAN 2015 17:35:20 -0800", "Tue, 20 Jan 2015 17:35:20 -0800"], // upper case month allowed - ["6 Jun 1944 04:00:00Z","Tue, 6 Jun 1944 04:00:00 +0000"], // D-day - ["11 Sep 2001 9:45:00 EST", "Tue, 11 Sep 2001 09:45:00 -0500"], - ["30 Feb 2015 17:35:20 -0800", ""], // bad day of month - ["Tue, 20 Avr 2015 17:35:20 -0800", ""],// bad month name - ["Tue, 20 Jan 2015 25:35:20 -0800",""], // bad hour - ["Tue, 20 Jan 2015 17:65:20 -0800",""], // bad minute - ["Tue, 20 Jan 2015 17:35:90 -0800",""], // bad second - ["Tue, 20 Jan 2015 17:35:20 -1800",""], // bad offset - ["Tue, 20 Jan 2015 17:35:20 HAS",""] // bad named time zone - ]; - // Test against test data above - for testdate in testdates.iter() { - let date = testdate[0]; // input - let checkdate = testdate[1]; // expected result or "" - let d = rfc2822_to_datetime(date); // parse a date - let dt = match d { // did we get a value? - Some(dt) => dt, // yes, go on - None => if checkdate != "" { panic!("Failed to convert date {}", date)} else { continue }, - }; - // let mut s = String::new(); - let s = fmt_rfc2822_datetime(dt); // convert date/time back to string - if s != checkdate { // check for expected result - panic!("Date conversion failed for {}\nReceived: {}\nExpected: {}",date, s, checkdate); - } - }; -} -#[test] -/// Test RFC3339/ISO8601 parser. -fn testrfc3339parser() { - // Test data - [input, expected result after parse and format] - let testdates = [ - ["2015-01-20T17:35:20-0800", "2015-01-20T17:35:20.000000000-0800"], // normal case - ["1944-06-06T04:04:00Z", "1944-06-06T04:04:00.000000000+0000"], // D-day - ["2001-09-11T09:45:00-0800", "2001-09-11T09:45:00.000000000-0800"], - ["2015-01-20T17:35:20.001-0800", "2015-01-20T17:35:20.001000000-0800"], // milliseconds - ["2015-01-20T17:35:20.000031-0800", "2015-01-20T17:35:20.000031000-0800"], // microseconds - ["2015-01-20T17:35:20.000000004-0800", "2015-01-20T17:35:20.000000004-0800"], // nanoseconds - ["2015-01-20T17:35:20.000000000452-0800", "2015-01-20T17:35:20.000000000-0800"], // picoseconds (too small) - ["2015-02-30T17:35:20-0800", ""], // bad day of month - ["2015-01-20T25:35:20-0800", ""], // bad hour - ["2015-01-20T17:65:20-0800", ""], // bad minute - ["2015-01-20T17:35:90-0800", ""], // bad second - ["2015-01-20T17:35:20-1800", ""], // bad offset - ]; - // Test against test data above - for testdate in testdates.iter() { - let date = testdate[0]; // input - let checkdate = testdate[1]; // expected result or "" - let d = rfc3339_to_datetime(date); // parse a date - let dt = match d { // did we get a value? - Some(dt) => dt, // yes, go on - None => if checkdate != "" { panic!("Failed to convert date {}", date)} else { continue }, - }; - // let mut s = String::new(); - let s = fmt_rfc3339_datetime(dt); // convert date/time back to string - if s != checkdate { // check for expected result - panic!("Date conversion failed for {}\nReceived: {}\nExpected: {}",date, s, checkdate); - } - }; -}