created `format::parse` module.

this new module encompasses John Nagle's original RFC 2822 and 3337
parsers, updated to fully compatible to the actual standard.
the contributed `parse` module has been merged into it.
This commit is contained in:
Kang Seonghoon 2015-02-15 21:01:36 +09:00
parent c7f132cca2
commit 6937470405
7 changed files with 803 additions and 772 deletions

View File

@ -16,6 +16,4 @@ name = "chrono"
[dependencies]
time = "0.1.15"
regex = "0.1.12"
regex_macros = "0.1.6"

View File

@ -7,19 +7,18 @@
*/
use std::fmt;
use std::usize;
use std::error::Error;
use {Datelike, Timelike};
use Weekday;
use div::{div_floor, mod_floor};
use duration::Duration;
use offset::Offset;
use naive::date::NaiveDate;
use naive::time::NaiveTime;
pub use self::parsed::Parsed;
pub use self::strftime::StrftimeItems;
pub use self::parsed::Parsed;
pub use self::parse::parse;
/// Padding characters for numeric items.
#[derive(Copy, Clone, PartialEq, Eq, Debug)]
@ -134,6 +133,10 @@ pub enum Fixed {
/// and `Z` can be either in upper case or in lower case.
/// The offset is limited from `-24:00` to `+24:00`, which is same to `FixedOffset`'s range.
TimezoneOffsetZ,
/// RFC 2822 date and time syntax. Commonly used for email and MIME date and time.
RFC2822,
/// RFC 3339 & ISO 8601 date and time syntax.
RFC3339,
}
/// A single formatting item. This is used for both formatting and parsing.
@ -159,6 +162,73 @@ macro_rules! num0 { ($x:ident) => (Item::Numeric(Numeric::$x, Pad::Zero)) }
macro_rules! nums { ($x:ident) => (Item::Numeric(Numeric::$x, Pad::Space)) }
macro_rules! fix { ($x:ident) => (Item::Fixed(Fixed::$x)) }
/// An error from the `parse` function.
#[derive(Debug, Clone, PartialEq, Copy)]
pub struct ParseError(ParseErrorKind);
#[derive(Debug, Clone, PartialEq, Copy)]
enum ParseErrorKind {
/// Given field is out of permitted range.
OutOfRange,
/// There is no possible date and time value with given set of fields.
///
/// This does not include the out-of-range conditions, which are trivially invalid.
/// It includes the case that there are one or more fields that are inconsistent to each other.
Impossible,
/// Given set of fields is not enough to make a requested date and time value.
///
/// Note that there *may* be a case that given fields constrain the possible values so much
/// that there is a unique possible value. Chrono only tries to be correct for
/// most useful sets of fields however, as such constraint solving can be expensive.
NotEnough,
/// The input string has some invalid character sequence for given formatting items.
Invalid,
/// The input string has been prematurely ended.
TooShort,
/// All formatting items have been read but there is a remaining input.
TooLong,
/// There was an error on the formatting string, or there were non-supported formating items.
BadFormat,
}
/// Same to `Result<T, ParseError>`.
pub type ParseResult<T> = Result<T, ParseError>;
impl fmt::Display for ParseError {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
self.description().fmt(f)
}
}
impl Error for ParseError {
fn description(&self) -> &str {
match self.0 {
ParseErrorKind::OutOfRange => "input is out of range",
ParseErrorKind::Impossible => "no possible date and time matching input",
ParseErrorKind::NotEnough => "input is not enough for unique date and time",
ParseErrorKind::Invalid => "input contains invalid characters",
ParseErrorKind::TooShort => "premature end of input",
ParseErrorKind::TooLong => "trailing input",
ParseErrorKind::BadFormat => "bad or unsupported format string",
}
}
}
// to be used in this module and submodules
const OUT_OF_RANGE: ParseError = ParseError(ParseErrorKind::OutOfRange);
const IMPOSSIBLE: ParseError = ParseError(ParseErrorKind::Impossible);
const NOT_ENOUGH: ParseError = ParseError(ParseErrorKind::NotEnough);
const INVALID: ParseError = ParseError(ParseErrorKind::Invalid);
const TOO_SHORT: ParseError = ParseError(ParseErrorKind::TooShort);
const TOO_LONG: ParseError = ParseError(ParseErrorKind::TooLong);
const BAD_FORMAT: ParseError = ParseError(ParseErrorKind::BadFormat);
/// Tries to format given arguments with given formatting items.
/// Internally used by `DelayedFormat`.
pub fn format<'a, I>(w: &mut fmt::Formatter, date: Option<&NaiveDate>, time: Option<&NaiveTime>,
@ -228,18 +298,23 @@ pub fn format<'a, I>(w: &mut fmt::Formatter, date: Option<&NaiveDate>, time: Opt
Item::Fixed(spec) => {
use self::Fixed::*;
/// Prints an offset from UTC in the format of `+HHMM` or `+HH:MM`.
/// `Z` instead of `+00[:]00` is allowed when `allow_zulu` is true.
fn write_local_minus_utc(w: &mut fmt::Formatter, off: Duration,
allow_zulu: bool) -> fmt::Result {
allow_zulu: bool, use_colon: bool) -> fmt::Result {
let off = off.num_minutes();
if !allow_zulu || off != 0 {
let (sign, off) = if off < 0 {('-', -off)} else {('+', off)};
if use_colon {
write!(w, "{}{:02}:{:02}", sign, off / 60, off % 60)
} else {
write!(w, "{}{:02}{:02}", sign, off / 60, off % 60)
}
} else {
write!(w, "Z")
}
}
let ret = match spec {
ShortMonthName =>
date.map(|d| write!(w, "{}", SHORT_MONTHS[d.month0() as usize])),
@ -258,9 +333,28 @@ pub fn format<'a, I>(w: &mut fmt::Formatter, date: Option<&NaiveDate>, time: Opt
TimezoneName =>
off.map(|&(ref name, _)| write!(w, "{}", *name)),
TimezoneOffset =>
off.map(|&(_, off)| write_local_minus_utc(w, off, false)),
off.map(|&(_, off)| write_local_minus_utc(w, off, false, false)),
TimezoneOffsetZ =>
off.map(|&(_, off)| write_local_minus_utc(w, off, true)),
off.map(|&(_, off)| write_local_minus_utc(w, off, true, false)),
RFC2822 => // same to `%a, %e %b %Y %H:%M:%S %z`
if let (Some(d), Some(t), Some(&(_, off))) = (date, time, off) {
try!(write!(w, "{}, {:2} {} {:04} {:02}:{:02}:{:02} ",
SHORT_WEEKDAYS[d.weekday().num_days_from_monday() as usize],
d.day(), SHORT_MONTHS[d.month0() as usize], d.year(),
t.hour(), t.minute(), t.second()));
Some(write_local_minus_utc(w, off, false, false))
} else {
None
},
RFC3339 => // (almost) same to `%Y-%m-%dT%H:%M:%S.%f%z`
if let (Some(d), Some(t), Some(&(_, off))) = (date, time, off) {
// reuse `Debug` impls which already prints ISO 8601 format.
// this is faster in this way.
try!(write!(w, "{:?}T{:?}", d, t));
Some(write_local_minus_utc(w, off, false, true))
} else {
None
},
};
match ret {
@ -276,215 +370,13 @@ pub fn format<'a, I>(w: &mut fmt::Formatter, date: Option<&NaiveDate>, time: Opt
Ok(())
}
/// An error from the `parse` function.
#[derive(Debug, Clone, PartialEq, Copy)]
pub struct ParseError(ParseErrorKind);
pub mod parsed;
#[derive(Debug, Clone, PartialEq, Copy)]
enum ParseErrorKind {
/// Given field is out of permitted range.
OutOfRange,
// due to the size of parsing routines, they are in separate modules.
mod scan;
mod parse;
/// There is no possible date and time value with given set of fields.
///
/// This does not include the out-of-range conditions, which are trivially invalid.
/// It includes the case that there are one or more fields that are inconsistent to each other.
Impossible,
/// Given set of fields is not enough to make a requested date and time value.
///
/// Note that there *may* be a case that given fields constrain the possible values so much
/// that there is a unique possible value. Chrono only tries to be correct for
/// most useful sets of fields however, as such constraint solving can be expensive.
NotEnough,
/// The input string has some invalid character sequence for given formatting items.
Invalid,
/// The input string has been prematurely ended.
TooShort,
/// All formatting items have been read but there is a remaining input.
TooLong,
/// There was an error on the formatting string, or there were non-supported formating items.
BadFormat,
}
/// Same to `Result<T, ParseError>`.
pub type ParseResult<T> = Result<T, ParseError>;
impl fmt::Display for ParseError {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
self.description().fmt(f)
}
}
impl Error for ParseError {
fn description(&self) -> &str {
match self.0 {
ParseErrorKind::OutOfRange => "input is out of range",
ParseErrorKind::Impossible => "no possible date and time matching input",
ParseErrorKind::NotEnough => "input is not enough for unique date and time",
ParseErrorKind::Invalid => "input contains invalid characters",
ParseErrorKind::TooShort => "premature end of input",
ParseErrorKind::TooLong => "trailing input",
ParseErrorKind::BadFormat => "bad or unsupported format string",
}
}
}
// to be used in this module and submodules
const OUT_OF_RANGE: ParseError = ParseError(ParseErrorKind::OutOfRange);
const IMPOSSIBLE: ParseError = ParseError(ParseErrorKind::Impossible);
const NOT_ENOUGH: ParseError = ParseError(ParseErrorKind::NotEnough);
const INVALID: ParseError = ParseError(ParseErrorKind::Invalid);
const TOO_SHORT: ParseError = ParseError(ParseErrorKind::TooShort);
const TOO_LONG: ParseError = ParseError(ParseErrorKind::TooLong);
const BAD_FORMAT: ParseError = ParseError(ParseErrorKind::BadFormat);
/// Tries to parse given string into `parsed` with given formatting items.
/// Returns `Ok` when the entire string has been parsed (otherwise `parsed` should not be used).
/// There should be no trailing string after parsing; use a stray `Item::Space` to trim whitespaces.
///
/// This particular date and time parser is:
///
/// - Greedy. It will consume the longest possible prefix.
/// For example, `April` is always consumed entirely when the long month name is requested;
/// it equally accepts `Apr`, but prefers the longer prefix in this case.
/// - Padding-agnostic (for numeric items). The `Pad` field is completely ignored,
/// so one can prepend any number of whitespace then any number of zeroes before numbers.
/// - (Still) obeying the intrinsic parsing width. This allows, for example, parsing `HHMMSS`.
pub fn parse<'a, I>(parsed: &mut Parsed, mut s: &str, items: I) -> ParseResult<()>
where I: Iterator<Item=Item<'a>> {
macro_rules! try_consume {
($e:expr) => ({ let (s_, v) = try!($e); s = s_; v })
}
for item in items {
match item {
Item::Literal(prefix) => {
if s.len() < prefix.len() { return Err(TOO_SHORT); }
if !s.starts_with(prefix) { return Err(INVALID); }
s = &s[prefix.len()..];
}
Item::Space(_) => {
s = s.trim_left();
}
Item::Numeric(spec, _pad) => {
use self::Numeric::*;
fn set_weekday_with_num_days_from_sunday(p: &mut Parsed,
v: i64) -> ParseResult<()> {
p.set_weekday(match v {
0 => Weekday::Sun, 1 => Weekday::Mon, 2 => Weekday::Tue,
3 => Weekday::Wed, 4 => Weekday::Thu, 5 => Weekday::Fri,
6 => Weekday::Sat, _ => return Err(OUT_OF_RANGE)
})
}
fn set_weekday_with_number_from_monday(p: &mut Parsed, v: i64) -> ParseResult<()> {
p.set_weekday(match v {
1 => Weekday::Mon, 2 => Weekday::Tue, 3 => Weekday::Wed,
4 => Weekday::Thu, 5 => Weekday::Fri, 6 => Weekday::Sat,
7 => Weekday::Sun, _ => return Err(OUT_OF_RANGE)
})
}
let (width, frac, set): (usize, bool,
fn(&mut Parsed, i64) -> ParseResult<()>) = match spec {
Year => (4, false, Parsed::set_year),
YearDiv100 => (2, false, Parsed::set_year_div_100),
YearMod100 => (2, false, Parsed::set_year_mod_100),
IsoYear => (4, false, Parsed::set_isoyear),
IsoYearDiv100 => (2, false, Parsed::set_isoyear_div_100),
IsoYearMod100 => (2, false, Parsed::set_isoyear_mod_100),
Month => (2, false, Parsed::set_month),
Day => (2, false, Parsed::set_day),
WeekFromSun => (2, false, Parsed::set_week_from_sun),
WeekFromMon => (2, false, Parsed::set_week_from_mon),
IsoWeek => (2, false, Parsed::set_isoweek),
NumDaysFromSun => (1, false, set_weekday_with_num_days_from_sunday),
WeekdayFromMon => (1, false, set_weekday_with_number_from_monday),
Ordinal => (3, false, Parsed::set_ordinal),
Hour => (2, false, Parsed::set_hour),
Hour12 => (2, false, Parsed::set_hour12),
Minute => (2, false, Parsed::set_minute),
Second => (2, false, Parsed::set_second),
Nanosecond => (9, true, Parsed::set_nanosecond),
Timestamp => (usize::MAX, false, Parsed::set_timestamp),
};
let v = try_consume!(scan::number(s.trim_left(), 1, width, frac));
try!(set(parsed, v));
}
Item::Fixed(spec) => {
use self::Fixed::*;
match spec {
ShortMonthName => {
let month0 = try_consume!(scan::short_month0(s));
try!(parsed.set_month(month0 as i64 + 1));
}
LongMonthName => {
let month0 = try_consume!(scan::short_or_long_month0(s));
try!(parsed.set_month(month0 as i64 + 1));
}
ShortWeekdayName => {
let weekday = try_consume!(scan::short_weekday(s));
try!(parsed.set_weekday(weekday));
}
LongWeekdayName => {
let weekday = try_consume!(scan::short_or_long_weekday(s));
try!(parsed.set_weekday(weekday));
}
LowerAmPm | UpperAmPm => {
if s.len() < 2 { return Err(TOO_SHORT); }
let ampm = match [s.as_bytes()[0] | 32, s.as_bytes()[1] | 32] {
[b'a',b'm'] => false,
[b'p',b'm'] => true,
_ => return Err(INVALID)
};
try!(parsed.set_ampm(ampm));
s = &s[2..];
}
TimezoneName => return Err(BAD_FORMAT),
TimezoneOffset => {
let offset = try_consume!(scan::timezone_offset(s.trim_left(),
scan::colon_or_space));
try!(parsed.set_offset(offset as i64));
}
TimezoneOffsetZ => {
let offset = try_consume!(scan::timezone_offset_zulu(s.trim_left(),
scan::colon_or_space));
try!(parsed.set_offset(offset as i64));
}
}
}
Item::Error => {
return Err(BAD_FORMAT);
}
}
}
// if there are trailling chars, it is an error
if !s.is_empty() {
Err(TOO_LONG)
} else {
Ok(())
}
}
pub mod strftime;
/// A *temporary* object which can be used as an argument to `format!` or others.
/// This is normally constructed via `format` methods of each date and time type.
@ -521,210 +413,3 @@ impl<'a, I: Iterator<Item=Item<'a>> + Clone> fmt::Display for DelayedFormat<'a,
}
}
mod scan;
pub mod parsed;
pub mod strftime;
#[cfg(test)]
#[test]
fn test_parse() {
// workaround for Rust issue #22255
fn parse_all(s: &str, items: &[Item]) -> ParseResult<Parsed> {
let mut parsed = Parsed::new();
try!(parse(&mut parsed, s, items.iter().cloned()));
Ok(parsed)
}
macro_rules! check {
($fmt:expr, $items:expr; $err:expr) => (
assert_eq!(parse_all($fmt, &$items), Err($err))
);
($fmt:expr, $items:expr; $($k:ident: $v:expr),*) => (
assert_eq!(parse_all($fmt, &$items), Ok(Parsed { $($k: Some($v),)* ..Parsed::new() }))
);
}
// empty string
check!("", []; );
check!(" ", []; TOO_LONG);
check!("a", []; TOO_LONG);
// whitespaces
check!("", [sp!("")]; );
check!(" ", [sp!("")]; );
check!("\t", [sp!("")]; );
check!(" \n\r \n", [sp!("")]; );
check!("a", [sp!("")]; TOO_LONG);
// literal
check!("", [lit!("a")]; TOO_SHORT);
check!(" ", [lit!("a")]; INVALID);
check!("a", [lit!("a")]; );
check!("aa", [lit!("a")]; TOO_LONG);
check!("A", [lit!("a")]; INVALID);
check!("xy", [lit!("xy")]; );
check!("xy", [lit!("x"), lit!("y")]; );
check!("x y", [lit!("x"), lit!("y")]; INVALID);
check!("xy", [lit!("x"), sp!(""), lit!("y")]; );
check!("x y", [lit!("x"), sp!(""), lit!("y")]; );
// numeric
check!("1987", [num!(Year)]; year_div_100: 19, year_mod_100: 87);
check!("1987 ", [num!(Year)]; TOO_LONG);
check!("0x12", [num!(Year)]; TOO_LONG); // `0` is parsed
check!("x123", [num!(Year)]; INVALID);
check!("2015", [num!(Year)]; year_div_100: 20, year_mod_100: 15);
check!("0000", [num!(Year)]; year_div_100: 0, year_mod_100: 0);
check!("9999", [num!(Year)]; year_div_100: 99, year_mod_100: 99);
check!(" \t987", [num!(Year)]; year_div_100: 9, year_mod_100: 87);
check!("5", [num!(Year)]; year_div_100: 0, year_mod_100: 5);
check!("-42", [num!(Year)]; INVALID);
check!("+42", [num!(Year)]; INVALID);
check!("5\0", [num!(Year)]; TOO_LONG);
check!("\05", [num!(Year)]; INVALID);
check!("", [num!(Year)]; TOO_SHORT);
check!("12345", [num!(Year), lit!("5")]; year_div_100: 12, year_mod_100: 34);
check!("12345", [nums!(Year), lit!("5")]; year_div_100: 12, year_mod_100: 34);
check!("12345", [num0!(Year), lit!("5")]; year_div_100: 12, year_mod_100: 34);
check!("12341234", [num!(Year), num!(Year)]; year_div_100: 12, year_mod_100: 34);
check!("1234 1234", [num!(Year), num!(Year)]; year_div_100: 12, year_mod_100: 34);
check!("1234 1235", [num!(Year), num!(Year)]; IMPOSSIBLE);
check!("1234 1234", [num!(Year), lit!("x"), num!(Year)]; INVALID);
check!("1234x1234", [num!(Year), lit!("x"), num!(Year)]; year_div_100: 12, year_mod_100: 34);
check!("1234xx1234", [num!(Year), lit!("x"), num!(Year)]; INVALID);
check!("1234 x 1234", [num!(Year), lit!("x"), num!(Year)]; INVALID);
// various numeric fields
check!("1234 5678",
[num!(Year), num!(IsoYear)];
year_div_100: 12, year_mod_100: 34, isoyear_div_100: 56, isoyear_mod_100: 78);
check!("12 34 56 78",
[num!(YearDiv100), num!(YearMod100), num!(IsoYearDiv100), num!(IsoYearMod100)];
year_div_100: 12, year_mod_100: 34, isoyear_div_100: 56, isoyear_mod_100: 78);
check!("1 2 3 4 5 6",
[num!(Month), num!(Day), num!(WeekFromSun), num!(WeekFromMon), num!(IsoWeek),
num!(NumDaysFromSun)];
month: 1, day: 2, week_from_sun: 3, week_from_mon: 4, isoweek: 5, weekday: Weekday::Sat);
check!("7 89 01",
[num!(WeekdayFromMon), num!(Ordinal), num!(Hour12)];
weekday: Weekday::Sun, ordinal: 89, hour_mod_12: 1);
check!("23 45 6 78901234 567890123",
[num!(Hour), num!(Minute), num!(Second), num!(Nanosecond), num!(Timestamp)];
hour_div_12: 1, hour_mod_12: 11, minute: 45, second: 6, nanosecond: 789_012_340,
timestamp: 567_890_123);
// fixed: month and weekday names
check!("apr", [fix!(ShortMonthName)]; month: 4);
check!("Apr", [fix!(ShortMonthName)]; month: 4);
check!("APR", [fix!(ShortMonthName)]; month: 4);
check!("ApR", [fix!(ShortMonthName)]; month: 4);
check!("April", [fix!(ShortMonthName)]; TOO_LONG); // `Apr` is parsed
check!("A", [fix!(ShortMonthName)]; TOO_SHORT);
check!("Sol", [fix!(ShortMonthName)]; INVALID);
check!("Apr", [fix!(LongMonthName)]; month: 4);
check!("Apri", [fix!(LongMonthName)]; TOO_LONG); // `Apr` is parsed
check!("April", [fix!(LongMonthName)]; month: 4);
check!("Aprill", [fix!(LongMonthName)]; TOO_LONG);
check!("Aprill", [fix!(LongMonthName), lit!("l")]; month: 4);
check!("Aprl", [fix!(LongMonthName), lit!("l")]; month: 4);
check!("April", [fix!(LongMonthName), lit!("il")]; TOO_SHORT); // do not backtrack
check!("thu", [fix!(ShortWeekdayName)]; weekday: Weekday::Thu);
check!("Thu", [fix!(ShortWeekdayName)]; weekday: Weekday::Thu);
check!("THU", [fix!(ShortWeekdayName)]; weekday: Weekday::Thu);
check!("tHu", [fix!(ShortWeekdayName)]; weekday: Weekday::Thu);
check!("Thursday", [fix!(ShortWeekdayName)]; TOO_LONG); // `Thu` is parsed
check!("T", [fix!(ShortWeekdayName)]; TOO_SHORT);
check!("The", [fix!(ShortWeekdayName)]; INVALID);
check!("Nop", [fix!(ShortWeekdayName)]; INVALID);
check!("Thu", [fix!(LongWeekdayName)]; weekday: Weekday::Thu);
check!("Thur", [fix!(LongWeekdayName)]; TOO_LONG); // `Thu` is parsed
check!("Thurs", [fix!(LongWeekdayName)]; TOO_LONG); // ditto
check!("Thursday", [fix!(LongWeekdayName)]; weekday: Weekday::Thu);
check!("Thursdays", [fix!(LongWeekdayName)]; TOO_LONG);
check!("Thursdays", [fix!(LongWeekdayName), lit!("s")]; weekday: Weekday::Thu);
check!("Thus", [fix!(LongWeekdayName), lit!("s")]; weekday: Weekday::Thu);
check!("Thursday", [fix!(LongWeekdayName), lit!("rsday")]; TOO_SHORT); // do not backtrack
// fixed: am/pm
check!("am", [fix!(LowerAmPm)]; hour_div_12: 0);
check!("pm", [fix!(LowerAmPm)]; hour_div_12: 1);
check!("AM", [fix!(LowerAmPm)]; hour_div_12: 0);
check!("PM", [fix!(LowerAmPm)]; hour_div_12: 1);
check!("am", [fix!(UpperAmPm)]; hour_div_12: 0);
check!("pm", [fix!(UpperAmPm)]; hour_div_12: 1);
check!("AM", [fix!(UpperAmPm)]; hour_div_12: 0);
check!("PM", [fix!(UpperAmPm)]; hour_div_12: 1);
check!("Am", [fix!(LowerAmPm)]; hour_div_12: 0);
check!(" Am", [fix!(LowerAmPm)]; INVALID);
check!("ame", [fix!(LowerAmPm)]; TOO_LONG); // `am` is parsed
check!("a", [fix!(LowerAmPm)]; TOO_SHORT);
check!("p", [fix!(LowerAmPm)]; TOO_SHORT);
check!("x", [fix!(LowerAmPm)]; TOO_SHORT);
check!("xx", [fix!(LowerAmPm)]; INVALID);
check!("", [fix!(LowerAmPm)]; TOO_SHORT);
// fixed: timezone offsets
check!("+00:00", [fix!(TimezoneOffset)]; offset: 0);
check!("-00:00", [fix!(TimezoneOffset)]; offset: 0);
check!("+00:01", [fix!(TimezoneOffset)]; offset: 60);
check!("-00:01", [fix!(TimezoneOffset)]; offset: -60);
check!("+00:30", [fix!(TimezoneOffset)]; offset: 30 * 60);
check!("-00:30", [fix!(TimezoneOffset)]; offset: -30 * 60);
check!("+04:56", [fix!(TimezoneOffset)]; offset: 296 * 60);
check!("-04:56", [fix!(TimezoneOffset)]; offset: -296 * 60);
check!("+24:00", [fix!(TimezoneOffset)]; offset: 24 * 60 * 60);
check!("-24:00", [fix!(TimezoneOffset)]; offset: -24 * 60 * 60);
check!("+99:59", [fix!(TimezoneOffset)]; offset: (100 * 60 - 1) * 60);
check!("-99:59", [fix!(TimezoneOffset)]; offset: -(100 * 60 - 1) * 60);
check!("+00:59", [fix!(TimezoneOffset)]; offset: 59 * 60);
check!("+00:60", [fix!(TimezoneOffset)]; OUT_OF_RANGE);
check!("+00:99", [fix!(TimezoneOffset)]; OUT_OF_RANGE);
check!("#12:34", [fix!(TimezoneOffset)]; INVALID);
check!("12:34", [fix!(TimezoneOffset)]; INVALID);
check!("+12:34 ", [fix!(TimezoneOffset)]; TOO_LONG);
check!(" +12:34", [fix!(TimezoneOffset)]; offset: 754 * 60);
check!("\t -12:34", [fix!(TimezoneOffset)]; offset: -754 * 60);
check!("", [fix!(TimezoneOffset)]; TOO_SHORT);
check!("+", [fix!(TimezoneOffset)]; TOO_SHORT);
check!("+1", [fix!(TimezoneOffset)]; TOO_SHORT);
check!("+12", [fix!(TimezoneOffset)]; TOO_SHORT);
check!("+123", [fix!(TimezoneOffset)]; TOO_SHORT);
check!("+1234", [fix!(TimezoneOffset)]; offset: 754 * 60);
check!("+12345", [fix!(TimezoneOffset)]; TOO_LONG);
check!("+12345", [fix!(TimezoneOffset), num!(Day)]; offset: 754 * 60, day: 5);
check!("Z", [fix!(TimezoneOffset)]; INVALID);
check!("z", [fix!(TimezoneOffset)]; INVALID);
check!("Z", [fix!(TimezoneOffsetZ)]; offset: 0);
check!("z", [fix!(TimezoneOffsetZ)]; offset: 0);
check!("Y", [fix!(TimezoneOffsetZ)]; INVALID);
check!("Zulu", [fix!(TimezoneOffsetZ), lit!("ulu")]; offset: 0);
check!("zulu", [fix!(TimezoneOffsetZ), lit!("ulu")]; offset: 0);
check!("+1234ulu", [fix!(TimezoneOffsetZ), lit!("ulu")]; offset: 754 * 60);
check!("+12:34ulu", [fix!(TimezoneOffsetZ), lit!("ulu")]; offset: 754 * 60);
check!("???", [fix!(TimezoneName)]; BAD_FORMAT); // not allowed
// some practical examples
check!("2015-02-04T14:37:05+09:00",
[num!(Year), lit!("-"), num!(Month), lit!("-"), num!(Day), lit!("T"),
num!(Hour), lit!(":"), num!(Minute), lit!(":"), num!(Second), fix!(TimezoneOffset)];
year_div_100: 20, year_mod_100: 15, month: 2, day: 4,
hour_div_12: 1, hour_mod_12: 2, minute: 37, second: 5, offset: 32400);
check!("Mon, 10 Jun 2013 09:32:37 GMT",
[fix!(ShortWeekdayName), lit!(","), sp!(" "), num!(Day), sp!(" "),
fix!(ShortMonthName), sp!(" "), num!(Year), sp!(" "), num!(Hour), lit!(":"),
num!(Minute), lit!(":"), num!(Second), sp!(" "), lit!("GMT")];
year_div_100: 20, year_mod_100: 13, month: 6, day: 10, weekday: Weekday::Mon,
hour_div_12: 0, hour_mod_12: 9, minute: 32, second: 37);
check!("20060102150405",
[num!(Year), num!(Month), num!(Day), num!(Hour), num!(Minute), num!(Second)];
year_div_100: 20, year_mod_100: 6, month: 1, day: 2,
hour_div_12: 1, hour_mod_12: 3, minute: 4, second: 5);
check!("3:14PM",
[num!(Hour12), lit!(":"), num!(Minute), fix!(LowerAmPm)];
hour_div_12: 1, hour_mod_12: 3, minute: 14);
check!("12345678901234.56789",
[num!(Timestamp), lit!("."), num!(Nanosecond)];
nanosecond: 567_890_000, timestamp: 12_345_678_901_234);
}

625
src/format/parse.rs Normal file
View File

@ -0,0 +1,625 @@
// This is a part of rust-chrono.
// Copyright (c) 2015, Kang Seonghoon.
// Portions copyright (c) 2015, John Nagle.
// See README.md and LICENSE.txt for details.
/*!
* Date and time parsing routines.
*/
use std::usize;
use Weekday;
use super::scan;
use super::{Parsed, ParseResult, Item};
use super::{OUT_OF_RANGE, INVALID, TOO_SHORT, TOO_LONG, BAD_FORMAT};
fn set_weekday_with_num_days_from_sunday(p: &mut Parsed, v: i64) -> ParseResult<()> {
p.set_weekday(match v {
0 => Weekday::Sun, 1 => Weekday::Mon, 2 => Weekday::Tue,
3 => Weekday::Wed, 4 => Weekday::Thu, 5 => Weekday::Fri,
6 => Weekday::Sat, _ => return Err(OUT_OF_RANGE)
})
}
fn set_weekday_with_number_from_monday(p: &mut Parsed, v: i64) -> ParseResult<()> {
p.set_weekday(match v {
1 => Weekday::Mon, 2 => Weekday::Tue, 3 => Weekday::Wed,
4 => Weekday::Thu, 5 => Weekday::Fri, 6 => Weekday::Sat,
7 => Weekday::Sun, _ => return Err(OUT_OF_RANGE)
})
}
fn parse_rfc2822<'a>(parsed: &mut Parsed, mut s: &'a str) -> ParseResult<(&'a str, ())> {
macro_rules! try_consume {
($e:expr) => ({ let (s_, v) = try!($e); s = s_; v })
}
// an adapted RFC 2822 syntax from Section 3.3 and 4.3:
//
// date-time = [ day-of-week "," ] date 1*S time *S
// day-of-week = *S day-name *S
// day-name = "Mon" / "Tue" / "Wed" / "Thu" / "Fri" / "Sat" / "Sun"
// date = day month year
// day = *S 1*2DIGIT *S
// month = 1*S month-name 1*S
// month-name = "Jan" / "Feb" / "Mar" / "Apr" / "May" / "Jun" /
// "Jul" / "Aug" / "Sep" / "Oct" / "Nov" / "Dec"
// year = *S 2*DIGIT *S
// time = time-of-day 1*S zone
// time-of-day = hour ":" minute [ ":" second ]
// hour = *S 2DIGIT *S
// minute = *S 2DIGIT *S
// second = *S 2DIGIT *S
// zone = ( "+" / "-" ) 4DIGIT /
// "UT" / "GMT" / ; same to +0000
// "EST" / "CST" / "MST" / "PST" / ; same to -0500 to -0800
// "EDT" / "CDT" / "MDT" / "PDT" / ; same to -0400 to -0700
// 1*(%d65-90 / %d97-122) ; same to -0000
//
// some notes:
//
// - quoted characters can be in any mixture of lower and upper cases.
//
// - we do not recognize a folding white space (FWS) or comment (CFWS).
// for our purposes, instead, we accept any sequence of Unicode
// white space characters (denoted here to `S`). any actual RFC 2822
// parser is expected to parse FWS and/or CFWS themselves and replace
// it with a single SP (`%x20`); this is legitimate.
//
// - two-digit year < 50 should be interpreted by adding 2000.
// two-digit year >= 50 or three-digit year should be interpreted
// by adding 1900. note that four-or-more-digit years less than 1000
// are *never* affected by this rule.
//
// - zone of `-0000` and any unrecognized legacy time zones (including
// *every* one-letter military time zones) are considered "missing",
// in such that we don't actually know what time zone is being used.
//
// - mismatching day-of-week is always an error, which is consistent to
// Chrono's own rules.
//
// - zones can range from `-9959` to `+9959`, but `FixedOffset` does not
// support offsets larger than 24 hours. this is not *that* problematic
// since we do not directly go to a `DateTime` so one can recover
// the offset information from `Parsed` anyway.
s = s.trim_left();
if let Ok((s_, weekday)) = scan::short_weekday(s) {
if !s_.starts_with(",") { return Err(INVALID); }
s = &s_[1..];
try!(parsed.set_weekday(weekday));
}
s = s.trim_left();
try!(parsed.set_day(try_consume!(scan::number(s, 1, 2, false))));
s = try!(scan::space(s)); // mandatory
try!(parsed.set_month(1 + try_consume!(scan::short_month0(s)) as i64));
s = try!(scan::space(s)); // mandatory
// distinguish two- and three-digit years from four-digit years
let prevlen = s.len();
let mut year = try_consume!(scan::number(s, 2, usize::MAX, false));
let yearlen = prevlen - s.len();
match (yearlen, year) {
(2, 0...49) => { year += 2000; } // 47 -> 2047, 05 -> 2005
(2, 50...99) => { year += 1900; } // 79 -> 1979
(3, _) => { year += 1900; } // 112 -> 2012, 009 -> 1909
(_, _) => {} // 1987 -> 1987, 0654 -> 0654
}
try!(parsed.set_year(year));
s = try!(scan::space(s)); // mandatory
try!(parsed.set_hour(try_consume!(scan::number(s, 2, 2, false))));
s = try!(scan::char(s.trim_left(), b':')).trim_left(); // *S ":" *S
try!(parsed.set_minute(try_consume!(scan::number(s, 2, 2, false))));
s = s.trim_left();
if !s.is_empty() { // [ ":" *S 2DIGIT ]
s = try!(scan::char(s, b':')).trim_left();
try!(parsed.set_second(try_consume!(scan::number(s, 2, 2, false))));
}
s = try!(scan::space(s)); // mandatory
if let Some(offset) = try_consume!(scan::timezone_offset_2822(s)) {
// only set the offset when it is definitely known (i.e. not `-0000`)
try!(parsed.set_offset(offset as i64));
}
Ok((s, ()))
}
fn parse_rfc3339<'a>(parsed: &mut Parsed, mut s: &'a str) -> ParseResult<(&'a str, ())> {
macro_rules! try_consume {
($e:expr) => ({ let (s_, v) = try!($e); s = s_; v })
}
// an adapted RFC 3339 syntax from Section 5.6:
//
// date-fullyear = 4DIGIT
// date-month = 2DIGIT ; 01-12
// date-mday = 2DIGIT ; 01-28, 01-29, 01-30, 01-31 based on month/year
// time-hour = 2DIGIT ; 00-23
// time-minute = 2DIGIT ; 00-59
// time-second = 2DIGIT ; 00-58, 00-59, 00-60 based on leap second rules
// time-secfrac = "." 1*DIGIT
// time-numoffset = ("+" / "-") time-hour ":" time-minute
// time-offset = "Z" / time-numoffset
// partial-time = time-hour ":" time-minute ":" time-second [time-secfrac]
// full-date = date-fullyear "-" date-month "-" date-mday
// full-time = partial-time time-offset
// date-time = full-date "T" full-time
//
// some notes:
//
// - quoted characters can be in any mixture of lower and upper cases.
//
// - it may accept any number of fractional digits for seconds.
// for Chrono, this means that we should skip digits past first 9 digits.
//
// - unlike RFC 2822, the valid offset ranges from -23:59 to +23:59.
// note that this restriction is unique to RFC 3339 and not ISO 8601.
// since this is not a typical Chrono behavior, we check it earlier.
try!(parsed.set_year(try_consume!(scan::number(s, 4, 4, false))));
s = try!(scan::char(s, b'-'));
try!(parsed.set_month(try_consume!(scan::number(s, 2, 2, false))));
s = try!(scan::char(s, b'-'));
try!(parsed.set_day(try_consume!(scan::number(s, 2, 2, false))));
s = match s.as_bytes().first() {
Some(&b't') | Some(&b'T') => &s[1..],
Some(_) => return Err(INVALID),
None => return Err(TOO_SHORT),
};
try!(parsed.set_hour(try_consume!(scan::number(s, 2, 2, false))));
s = try!(scan::char(s, b':'));
try!(parsed.set_minute(try_consume!(scan::number(s, 2, 2, false))));
s = try!(scan::char(s, b':'));
try!(parsed.set_second(try_consume!(scan::number(s, 2, 2, false))));
if s.starts_with(".") {
let nanosecond = try_consume!(scan::number(&s[1..], 1, 9, true));
s = s.trim_left_matches(|c: char| '0' <= c && c <= '9');
try!(parsed.set_nanosecond(nanosecond));
}
let offset = try_consume!(scan::timezone_offset_zulu(s, |s| scan::char(s, b':')));
if offset <= -86400 || offset >= 86400 { return Err(OUT_OF_RANGE); }
try!(parsed.set_offset(offset as i64));
Ok((s, ()))
}
/// Tries to parse given string into `parsed` with given formatting items.
/// Returns `Ok` when the entire string has been parsed (otherwise `parsed` should not be used).
/// There should be no trailing string after parsing; use a stray `Item::Space` to trim whitespaces.
///
/// This particular date and time parser is:
///
/// - Greedy. It will consume the longest possible prefix.
/// For example, `April` is always consumed entirely when the long month name is requested;
/// it equally accepts `Apr`, but prefers the longer prefix in this case.
/// - Padding-agnostic (for numeric items). The `Pad` field is completely ignored,
/// so one can prepend any number of whitespace then any number of zeroes before numbers.
/// - (Still) obeying the intrinsic parsing width. This allows, for example, parsing `HHMMSS`.
pub fn parse<'a, I>(parsed: &mut Parsed, mut s: &str, items: I) -> ParseResult<()>
where I: Iterator<Item=Item<'a>> {
macro_rules! try_consume {
($e:expr) => ({ let (s_, v) = try!($e); s = s_; v })
}
for item in items {
match item {
Item::Literal(prefix) => {
if s.len() < prefix.len() { return Err(TOO_SHORT); }
if !s.starts_with(prefix) { return Err(INVALID); }
s = &s[prefix.len()..];
}
Item::Space(_) => {
s = s.trim_left();
}
Item::Numeric(spec, _pad) => {
use super::Numeric::*;
let (width, frac, set): (usize, bool,
fn(&mut Parsed, i64) -> ParseResult<()>) = match spec {
Year => (4, false, Parsed::set_year),
YearDiv100 => (2, false, Parsed::set_year_div_100),
YearMod100 => (2, false, Parsed::set_year_mod_100),
IsoYear => (4, false, Parsed::set_isoyear),
IsoYearDiv100 => (2, false, Parsed::set_isoyear_div_100),
IsoYearMod100 => (2, false, Parsed::set_isoyear_mod_100),
Month => (2, false, Parsed::set_month),
Day => (2, false, Parsed::set_day),
WeekFromSun => (2, false, Parsed::set_week_from_sun),
WeekFromMon => (2, false, Parsed::set_week_from_mon),
IsoWeek => (2, false, Parsed::set_isoweek),
NumDaysFromSun => (1, false, set_weekday_with_num_days_from_sunday),
WeekdayFromMon => (1, false, set_weekday_with_number_from_monday),
Ordinal => (3, false, Parsed::set_ordinal),
Hour => (2, false, Parsed::set_hour),
Hour12 => (2, false, Parsed::set_hour12),
Minute => (2, false, Parsed::set_minute),
Second => (2, false, Parsed::set_second),
Nanosecond => (9, true, Parsed::set_nanosecond),
Timestamp => (usize::MAX, false, Parsed::set_timestamp),
};
let v = try_consume!(scan::number(s.trim_left(), 1, width, frac));
try!(set(parsed, v));
}
Item::Fixed(spec) => {
use super::Fixed::*;
match spec {
ShortMonthName => {
let month0 = try_consume!(scan::short_month0(s));
try!(parsed.set_month(month0 as i64 + 1));
}
LongMonthName => {
let month0 = try_consume!(scan::short_or_long_month0(s));
try!(parsed.set_month(month0 as i64 + 1));
}
ShortWeekdayName => {
let weekday = try_consume!(scan::short_weekday(s));
try!(parsed.set_weekday(weekday));
}
LongWeekdayName => {
let weekday = try_consume!(scan::short_or_long_weekday(s));
try!(parsed.set_weekday(weekday));
}
LowerAmPm | UpperAmPm => {
if s.len() < 2 { return Err(TOO_SHORT); }
let ampm = match [s.as_bytes()[0] | 32, s.as_bytes()[1] | 32] {
[b'a',b'm'] => false,
[b'p',b'm'] => true,
_ => return Err(INVALID)
};
try!(parsed.set_ampm(ampm));
s = &s[2..];
}
TimezoneName => return Err(BAD_FORMAT),
TimezoneOffset => {
let offset = try_consume!(scan::timezone_offset(s.trim_left(),
scan::colon_or_space));
try!(parsed.set_offset(offset as i64));
}
TimezoneOffsetZ => {
let offset = try_consume!(scan::timezone_offset_zulu(s.trim_left(),
scan::colon_or_space));
try!(parsed.set_offset(offset as i64));
}
RFC2822 => try_consume!(parse_rfc2822(parsed, s)),
RFC3339 => try_consume!(parse_rfc3339(parsed, s)),
}
}
Item::Error => {
return Err(BAD_FORMAT);
}
}
}
// if there are trailling chars, it is an error
if !s.is_empty() {
Err(TOO_LONG)
} else {
Ok(())
}
}
#[cfg(test)]
#[test]
fn test_parse() {
use super::*;
use super::IMPOSSIBLE;
// workaround for Rust issue #22255
fn parse_all(s: &str, items: &[Item]) -> ParseResult<Parsed> {
let mut parsed = Parsed::new();
try!(parse(&mut parsed, s, items.iter().cloned()));
Ok(parsed)
}
macro_rules! check {
($fmt:expr, $items:expr; $err:expr) => (
assert_eq!(parse_all($fmt, &$items), Err($err))
);
($fmt:expr, $items:expr; $($k:ident: $v:expr),*) => (
assert_eq!(parse_all($fmt, &$items), Ok(Parsed { $($k: Some($v),)* ..Parsed::new() }))
);
}
// empty string
check!("", []; );
check!(" ", []; TOO_LONG);
check!("a", []; TOO_LONG);
// whitespaces
check!("", [sp!("")]; );
check!(" ", [sp!("")]; );
check!("\t", [sp!("")]; );
check!(" \n\r \n", [sp!("")]; );
check!("a", [sp!("")]; TOO_LONG);
// literal
check!("", [lit!("a")]; TOO_SHORT);
check!(" ", [lit!("a")]; INVALID);
check!("a", [lit!("a")]; );
check!("aa", [lit!("a")]; TOO_LONG);
check!("A", [lit!("a")]; INVALID);
check!("xy", [lit!("xy")]; );
check!("xy", [lit!("x"), lit!("y")]; );
check!("x y", [lit!("x"), lit!("y")]; INVALID);
check!("xy", [lit!("x"), sp!(""), lit!("y")]; );
check!("x y", [lit!("x"), sp!(""), lit!("y")]; );
// numeric
check!("1987", [num!(Year)]; year_div_100: 19, year_mod_100: 87);
check!("1987 ", [num!(Year)]; TOO_LONG);
check!("0x12", [num!(Year)]; TOO_LONG); // `0` is parsed
check!("x123", [num!(Year)]; INVALID);
check!("2015", [num!(Year)]; year_div_100: 20, year_mod_100: 15);
check!("0000", [num!(Year)]; year_div_100: 0, year_mod_100: 0);
check!("9999", [num!(Year)]; year_div_100: 99, year_mod_100: 99);
check!(" \t987", [num!(Year)]; year_div_100: 9, year_mod_100: 87);
check!("5", [num!(Year)]; year_div_100: 0, year_mod_100: 5);
check!("-42", [num!(Year)]; INVALID);
check!("+42", [num!(Year)]; INVALID);
check!("5\0", [num!(Year)]; TOO_LONG);
check!("\05", [num!(Year)]; INVALID);
check!("", [num!(Year)]; TOO_SHORT);
check!("12345", [num!(Year), lit!("5")]; year_div_100: 12, year_mod_100: 34);
check!("12345", [nums!(Year), lit!("5")]; year_div_100: 12, year_mod_100: 34);
check!("12345", [num0!(Year), lit!("5")]; year_div_100: 12, year_mod_100: 34);
check!("12341234", [num!(Year), num!(Year)]; year_div_100: 12, year_mod_100: 34);
check!("1234 1234", [num!(Year), num!(Year)]; year_div_100: 12, year_mod_100: 34);
check!("1234 1235", [num!(Year), num!(Year)]; IMPOSSIBLE);
check!("1234 1234", [num!(Year), lit!("x"), num!(Year)]; INVALID);
check!("1234x1234", [num!(Year), lit!("x"), num!(Year)]; year_div_100: 12, year_mod_100: 34);
check!("1234xx1234", [num!(Year), lit!("x"), num!(Year)]; INVALID);
check!("1234 x 1234", [num!(Year), lit!("x"), num!(Year)]; INVALID);
// various numeric fields
check!("1234 5678",
[num!(Year), num!(IsoYear)];
year_div_100: 12, year_mod_100: 34, isoyear_div_100: 56, isoyear_mod_100: 78);
check!("12 34 56 78",
[num!(YearDiv100), num!(YearMod100), num!(IsoYearDiv100), num!(IsoYearMod100)];
year_div_100: 12, year_mod_100: 34, isoyear_div_100: 56, isoyear_mod_100: 78);
check!("1 2 3 4 5 6",
[num!(Month), num!(Day), num!(WeekFromSun), num!(WeekFromMon), num!(IsoWeek),
num!(NumDaysFromSun)];
month: 1, day: 2, week_from_sun: 3, week_from_mon: 4, isoweek: 5, weekday: Weekday::Sat);
check!("7 89 01",
[num!(WeekdayFromMon), num!(Ordinal), num!(Hour12)];
weekday: Weekday::Sun, ordinal: 89, hour_mod_12: 1);
check!("23 45 6 78901234 567890123",
[num!(Hour), num!(Minute), num!(Second), num!(Nanosecond), num!(Timestamp)];
hour_div_12: 1, hour_mod_12: 11, minute: 45, second: 6, nanosecond: 789_012_340,
timestamp: 567_890_123);
// fixed: month and weekday names
check!("apr", [fix!(ShortMonthName)]; month: 4);
check!("Apr", [fix!(ShortMonthName)]; month: 4);
check!("APR", [fix!(ShortMonthName)]; month: 4);
check!("ApR", [fix!(ShortMonthName)]; month: 4);
check!("April", [fix!(ShortMonthName)]; TOO_LONG); // `Apr` is parsed
check!("A", [fix!(ShortMonthName)]; TOO_SHORT);
check!("Sol", [fix!(ShortMonthName)]; INVALID);
check!("Apr", [fix!(LongMonthName)]; month: 4);
check!("Apri", [fix!(LongMonthName)]; TOO_LONG); // `Apr` is parsed
check!("April", [fix!(LongMonthName)]; month: 4);
check!("Aprill", [fix!(LongMonthName)]; TOO_LONG);
check!("Aprill", [fix!(LongMonthName), lit!("l")]; month: 4);
check!("Aprl", [fix!(LongMonthName), lit!("l")]; month: 4);
check!("April", [fix!(LongMonthName), lit!("il")]; TOO_SHORT); // do not backtrack
check!("thu", [fix!(ShortWeekdayName)]; weekday: Weekday::Thu);
check!("Thu", [fix!(ShortWeekdayName)]; weekday: Weekday::Thu);
check!("THU", [fix!(ShortWeekdayName)]; weekday: Weekday::Thu);
check!("tHu", [fix!(ShortWeekdayName)]; weekday: Weekday::Thu);
check!("Thursday", [fix!(ShortWeekdayName)]; TOO_LONG); // `Thu` is parsed
check!("T", [fix!(ShortWeekdayName)]; TOO_SHORT);
check!("The", [fix!(ShortWeekdayName)]; INVALID);
check!("Nop", [fix!(ShortWeekdayName)]; INVALID);
check!("Thu", [fix!(LongWeekdayName)]; weekday: Weekday::Thu);
check!("Thur", [fix!(LongWeekdayName)]; TOO_LONG); // `Thu` is parsed
check!("Thurs", [fix!(LongWeekdayName)]; TOO_LONG); // ditto
check!("Thursday", [fix!(LongWeekdayName)]; weekday: Weekday::Thu);
check!("Thursdays", [fix!(LongWeekdayName)]; TOO_LONG);
check!("Thursdays", [fix!(LongWeekdayName), lit!("s")]; weekday: Weekday::Thu);
check!("Thus", [fix!(LongWeekdayName), lit!("s")]; weekday: Weekday::Thu);
check!("Thursday", [fix!(LongWeekdayName), lit!("rsday")]; TOO_SHORT); // do not backtrack
// fixed: am/pm
check!("am", [fix!(LowerAmPm)]; hour_div_12: 0);
check!("pm", [fix!(LowerAmPm)]; hour_div_12: 1);
check!("AM", [fix!(LowerAmPm)]; hour_div_12: 0);
check!("PM", [fix!(LowerAmPm)]; hour_div_12: 1);
check!("am", [fix!(UpperAmPm)]; hour_div_12: 0);
check!("pm", [fix!(UpperAmPm)]; hour_div_12: 1);
check!("AM", [fix!(UpperAmPm)]; hour_div_12: 0);
check!("PM", [fix!(UpperAmPm)]; hour_div_12: 1);
check!("Am", [fix!(LowerAmPm)]; hour_div_12: 0);
check!(" Am", [fix!(LowerAmPm)]; INVALID);
check!("ame", [fix!(LowerAmPm)]; TOO_LONG); // `am` is parsed
check!("a", [fix!(LowerAmPm)]; TOO_SHORT);
check!("p", [fix!(LowerAmPm)]; TOO_SHORT);
check!("x", [fix!(LowerAmPm)]; TOO_SHORT);
check!("xx", [fix!(LowerAmPm)]; INVALID);
check!("", [fix!(LowerAmPm)]; TOO_SHORT);
// fixed: timezone offsets
check!("+00:00", [fix!(TimezoneOffset)]; offset: 0);
check!("-00:00", [fix!(TimezoneOffset)]; offset: 0);
check!("+00:01", [fix!(TimezoneOffset)]; offset: 60);
check!("-00:01", [fix!(TimezoneOffset)]; offset: -60);
check!("+00:30", [fix!(TimezoneOffset)]; offset: 30 * 60);
check!("-00:30", [fix!(TimezoneOffset)]; offset: -30 * 60);
check!("+04:56", [fix!(TimezoneOffset)]; offset: 296 * 60);
check!("-04:56", [fix!(TimezoneOffset)]; offset: -296 * 60);
check!("+24:00", [fix!(TimezoneOffset)]; offset: 24 * 60 * 60);
check!("-24:00", [fix!(TimezoneOffset)]; offset: -24 * 60 * 60);
check!("+99:59", [fix!(TimezoneOffset)]; offset: (100 * 60 - 1) * 60);
check!("-99:59", [fix!(TimezoneOffset)]; offset: -(100 * 60 - 1) * 60);
check!("+00:59", [fix!(TimezoneOffset)]; offset: 59 * 60);
check!("+00:60", [fix!(TimezoneOffset)]; OUT_OF_RANGE);
check!("+00:99", [fix!(TimezoneOffset)]; OUT_OF_RANGE);
check!("#12:34", [fix!(TimezoneOffset)]; INVALID);
check!("12:34", [fix!(TimezoneOffset)]; INVALID);
check!("+12:34 ", [fix!(TimezoneOffset)]; TOO_LONG);
check!(" +12:34", [fix!(TimezoneOffset)]; offset: 754 * 60);
check!("\t -12:34", [fix!(TimezoneOffset)]; offset: -754 * 60);
check!("", [fix!(TimezoneOffset)]; TOO_SHORT);
check!("+", [fix!(TimezoneOffset)]; TOO_SHORT);
check!("+1", [fix!(TimezoneOffset)]; TOO_SHORT);
check!("+12", [fix!(TimezoneOffset)]; TOO_SHORT);
check!("+123", [fix!(TimezoneOffset)]; TOO_SHORT);
check!("+1234", [fix!(TimezoneOffset)]; offset: 754 * 60);
check!("+12345", [fix!(TimezoneOffset)]; TOO_LONG);
check!("+12345", [fix!(TimezoneOffset), num!(Day)]; offset: 754 * 60, day: 5);
check!("Z", [fix!(TimezoneOffset)]; INVALID);
check!("z", [fix!(TimezoneOffset)]; INVALID);
check!("Z", [fix!(TimezoneOffsetZ)]; offset: 0);
check!("z", [fix!(TimezoneOffsetZ)]; offset: 0);
check!("Y", [fix!(TimezoneOffsetZ)]; INVALID);
check!("Zulu", [fix!(TimezoneOffsetZ), lit!("ulu")]; offset: 0);
check!("zulu", [fix!(TimezoneOffsetZ), lit!("ulu")]; offset: 0);
check!("+1234ulu", [fix!(TimezoneOffsetZ), lit!("ulu")]; offset: 754 * 60);
check!("+12:34ulu", [fix!(TimezoneOffsetZ), lit!("ulu")]; offset: 754 * 60);
check!("???", [fix!(TimezoneName)]; BAD_FORMAT); // not allowed
// some practical examples
check!("2015-02-04T14:37:05+09:00",
[num!(Year), lit!("-"), num!(Month), lit!("-"), num!(Day), lit!("T"),
num!(Hour), lit!(":"), num!(Minute), lit!(":"), num!(Second), fix!(TimezoneOffset)];
year_div_100: 20, year_mod_100: 15, month: 2, day: 4,
hour_div_12: 1, hour_mod_12: 2, minute: 37, second: 5, offset: 32400);
check!("Mon, 10 Jun 2013 09:32:37 GMT",
[fix!(ShortWeekdayName), lit!(","), sp!(" "), num!(Day), sp!(" "),
fix!(ShortMonthName), sp!(" "), num!(Year), sp!(" "), num!(Hour), lit!(":"),
num!(Minute), lit!(":"), num!(Second), sp!(" "), lit!("GMT")];
year_div_100: 20, year_mod_100: 13, month: 6, day: 10, weekday: Weekday::Mon,
hour_div_12: 0, hour_mod_12: 9, minute: 32, second: 37);
check!("20060102150405",
[num!(Year), num!(Month), num!(Day), num!(Hour), num!(Minute), num!(Second)];
year_div_100: 20, year_mod_100: 6, month: 1, day: 2,
hour_div_12: 1, hour_mod_12: 3, minute: 4, second: 5);
check!("3:14PM",
[num!(Hour12), lit!(":"), num!(Minute), fix!(LowerAmPm)];
hour_div_12: 1, hour_mod_12: 3, minute: 14);
check!("12345678901234.56789",
[num!(Timestamp), lit!("."), num!(Nanosecond)];
nanosecond: 567_890_000, timestamp: 12_345_678_901_234);
}
#[cfg(test)]
#[test]
fn test_rfc2822() {
use datetime::DateTime;
use offset::FixedOffset;
use super::*;
use super::NOT_ENOUGH;
// Test data - (input, Ok(expected result after parse and format) or Err(error code))
let testdates = [
("Tue, 20 Jan 2015 17:35:20 -0800", Ok("Tue, 20 Jan 2015 17:35:20 -0800")), // normal case
("20 Jan 2015 17:35:20 -0800", Ok("Tue, 20 Jan 2015 17:35:20 -0800")), // no day of week
("20 JAN 2015 17:35:20 -0800", Ok("Tue, 20 Jan 2015 17:35:20 -0800")), // upper case month
("11 Sep 2001 09:45:00 EST", Ok("Tue, 11 Sep 2001 09:45:00 -0500")),
("30 Feb 2015 17:35:20 -0800", Err(OUT_OF_RANGE)), // bad day of month
("Tue, 20 Jan 2015", Err(TOO_SHORT)), // omitted fields
("Tue, 20 Avr 2015 17:35:20 -0800", Err(INVALID)), // bad month name
("Tue, 20 Jan 2015 25:35:20 -0800", Err(OUT_OF_RANGE)), // bad hour
("Tue, 20 Jan 2015 7:35:20 -0800", Err(INVALID)), // bad # of digits in hour
("Tue, 20 Jan 2015 17:65:20 -0800", Err(OUT_OF_RANGE)), // bad minute
("Tue, 20 Jan 2015 17:35:90 -0800", Err(OUT_OF_RANGE)), // bad second
("Tue, 20 Jan 2015 17:35:20 -0890", Err(OUT_OF_RANGE)), // bad offset
("6 Jun 1944 04:00:00Z", Err(INVALID)), // bad offset (zulu not allowed)
("Tue, 20 Jan 2015 17:35:20 HAS", Err(NOT_ENOUGH)) // bad named time zone
];
fn rfc2822_to_datetime(date: &str) -> ParseResult<DateTime<FixedOffset>> {
let mut parsed = Parsed::new();
try!(parse(&mut parsed, date, [Item::Fixed(Fixed::RFC2822)].iter().cloned()));
parsed.to_datetime()
}
fn fmt_rfc2822_datetime(dt: DateTime<FixedOffset>) -> String {
dt.format_with_items([Item::Fixed(Fixed::RFC2822)].iter().cloned()).to_string()
}
// Test against test data above
for &(date, checkdate) in testdates.iter() {
let d = rfc2822_to_datetime(date); // parse a date
let dt = match d { // did we get a value?
Ok(dt) => Ok(fmt_rfc2822_datetime(dt)), // yes, go on
Err(e) => Err(e), // otherwise keep an error for the comparison
};
if dt != checkdate.map(|s| s.to_string()) { // check for expected result
panic!("Date conversion failed for {}\nReceived: {:?}\nExpected: {:?}",
date, dt, checkdate);
}
};
}
#[cfg(test)]
#[test]
fn test_rfc3339() {
use datetime::DateTime;
use offset::FixedOffset;
use super::*;
// Test data - (input, Ok(expected result after parse and format) or Err(error code))
let testdates = [
("2015-01-20T17:35:20-08:00", Ok("2015-01-20T17:35:20-08:00")), // normal case
("1944-06-06T04:04:00Z", Ok("1944-06-06T04:04:00+00:00")), // D-day
("2001-09-11T09:45:00-08:00", Ok("2001-09-11T09:45:00-08:00")),
("2015-01-20T17:35:20.001-08:00", Ok("2015-01-20T17:35:20.001-08:00")),
("2015-01-20T17:35:20.000031-08:00", Ok("2015-01-20T17:35:20.000031-08:00")),
("2015-01-20T17:35:20.000000004-08:00", Ok("2015-01-20T17:35:20.000000004-08:00")),
("2015-01-20T17:35:20.000000000452-08:00", Ok("2015-01-20T17:35:20-08:00")), // too small
("2015-02-30T17:35:20-08:00", Err(OUT_OF_RANGE)), // bad day of month
("2015-01-20T25:35:20-08:00", Err(OUT_OF_RANGE)), // bad hour
("2015-01-20T17:65:20-08:00", Err(OUT_OF_RANGE)), // bad minute
("2015-01-20T17:35:90-08:00", Err(OUT_OF_RANGE)), // bad second
("2015-01-20T17:35:20-24:00", Err(OUT_OF_RANGE)), // bad offset
];
fn rfc3339_to_datetime(date: &str) -> ParseResult<DateTime<FixedOffset>> {
let mut parsed = Parsed::new();
try!(parse(&mut parsed, date, [Item::Fixed(Fixed::RFC3339)].iter().cloned()));
parsed.to_datetime()
}
fn fmt_rfc3339_datetime(dt: DateTime<FixedOffset>) -> String {
dt.format_with_items([Item::Fixed(Fixed::RFC3339)].iter().cloned()).to_string()
}
// Test against test data above
for &(date, checkdate) in testdates.iter() {
let d = rfc3339_to_datetime(date); // parse a date
let dt = match d { // did we get a value?
Ok(dt) => Ok(fmt_rfc3339_datetime(dt)), // yes, go on
Err(e) => Err(e), // otherwise keep an error for the comparison
};
if dt != checkdate.map(|s| s.to_string()) { // check for expected result
panic!("Date conversion failed for {}\nReceived: {:?}\nExpected: {:?}",
date, dt, checkdate);
}
};
}

View File

@ -467,7 +467,10 @@ impl Parsed {
let time = try!(parsed.to_naive_time());
Ok(date.and_time(time))
} else {
Err(NOT_ENOUGH)
// reproduce the previous error(s)
try!(date);
try!(time);
unreachable!()
}
}
@ -867,6 +870,11 @@ mod tests {
assert_eq!(parse!(year_mod_100: 12, ordinal: 182, hour_div_12: 1, hour_mod_12: 11,
minute: 59, second: 60, timestamp: 1_341_100_801),
Err(IMPOSSIBLE));
// error codes
assert_eq!(parse!(year_div_100: 20, year_mod_100: 15, month: 1, day: 20, weekday: Tue,
hour_div_12: 2, hour_mod_12: 1, minute: 35, second: 20),
Err(OUT_OF_RANGE)); // `hour_div_12` is out of range
}
#[test]

View File

@ -130,6 +130,27 @@ pub fn short_or_long_weekday(s: &str) -> ParseResult<(&str, Weekday)> {
Ok((s, weekday))
}
/// Tries to consume exactly one given character.
pub fn char(s: &str, c1: u8) -> ParseResult<&str> {
match s.as_bytes().first() {
Some(&c) if c == c1 => Ok(&s[1..]),
Some(_) => Err(INVALID),
None => Err(TOO_SHORT),
}
}
/// Tries to consume one or more whitespace.
pub fn space(s: &str) -> ParseResult<&str> {
let s_ = s.trim_left();
if s_.len() < s.len() {
Ok(s_)
} else if s.is_empty() {
Err(TOO_SHORT)
} else {
Err(INVALID)
}
}
/// Consumes any number (including zero) of colon or spaces.
pub fn colon_or_space(s: &str) -> ParseResult<&str> {
Ok(s.trim_left_matches(|c: char| c == ':' || c.is_whitespace()))
@ -138,7 +159,7 @@ pub fn colon_or_space(s: &str) -> ParseResult<&str> {
/// Tries to parse `[-+]\d\d` continued by `\d\d`. Return an offset in seconds if possible.
///
/// The additional `colon` may be used to parse a mandatory or optional `:`
/// between hours and minutes, and should return either a new suffix or `None` when parsing fails.
/// between hours and minutes, and should return either a new suffix or `Err` when parsing fails.
pub fn timezone_offset<F>(mut s: &str, mut colon: F) -> ParseResult<(&str, i32)>
where F: FnMut(&str) -> ParseResult<&str> {
let negative = match s.as_bytes().first() {
@ -182,3 +203,43 @@ pub fn timezone_offset_zulu<F>(s: &str, colon: F) -> ParseResult<(&str, i32)>
}
}
/// Same to `timezone_offset` but also allows for RFC 2822 legacy timezones.
/// May return `None` which indicates an insufficient offset data (i.e. `-0000`).
pub fn timezone_offset_2822(s: &str) -> ParseResult<(&str, Option<i32>)> {
// tries to parse legacy time zone names
let upto = s.as_bytes().iter().position(|&c| match c { b'a'...b'z' | b'A'...b'Z' => false,
_ => true }).unwrap_or(s.len());
if upto > 0 {
let name = &s[..upto];
let s = &s[upto..];
if equals(name, "gmt") || equals(name, "ut") {
Ok((s, Some(0)))
} else if equals(name, "est") {
Ok((s, Some(-5 * 3600)))
} else if equals(name, "edt") {
Ok((s, Some(-4 * 3600)))
} else if equals(name, "cst") {
Ok((s, Some(-6 * 3600)))
} else if equals(name, "cdt") {
Ok((s, Some(-5 * 3600)))
} else if equals(name, "mst") {
Ok((s, Some(-7 * 3600)))
} else if equals(name, "mdt") {
Ok((s, Some(-6 * 3600)))
} else if equals(name, "pst") {
Ok((s, Some(-8 * 3600)))
} else if equals(name, "pdt") {
Ok((s, Some(-7 * 3600)))
} else {
Ok((s, None)) // recommended by RFC 2822: consume but treat it as -0000
}
} else {
let (s_, offset) = try!(timezone_offset(s, |s| Ok(s)));
if offset == 0 && s.starts_with("-") { // -0000 is not same to +0000
Ok((s_, None))
} else {
Ok((s_, Some(offset)))
}
}
}

View File

@ -218,11 +218,6 @@ Advanced offset handling is not yet supported (but is planned in 0.3).
#![feature(core, collections, hash, std_misc)] // lib stability features as per RFC #507
#![cfg_attr(test, feature(test))] // ditto
#![deny(missing_docs)]
// This is needed to allow compile-time regular expressions in this crate.
#![feature(plugin)]
#![plugin(regex_macros)]
extern crate regex;
extern crate "time" as stdtime;
@ -265,12 +260,6 @@ pub mod time;
pub mod datetime;
pub mod format;
/// Parsing functions for date/time strings.
///
/// Parsing functions are provided for RFC 2822 ("Tue, 20 Jan 2015 17:35:20 -0800")
/// and RFC3339/ISO8601 ("2015-01-20T17:35:20.001-0800") date/time strings.
pub mod parse;
/// The day of week (DOW).
///
/// The order of the days of week depends on the context.

View File

@ -1,335 +0,0 @@
use std::num::Int;
use std::cmp;
use ::{Offset};
//
// parse.rs -- parsing for various standardized date and time string formats
//
// John Nagle
// January, 2015
//
//
// RFC2822 time/date stamp parsing
//
// Example: "Tue, 20 Jan 2015 17:35:20 -0800".
// Common use case: email date/time.
//
// Date format specification, from RFC2822.
//
// date-time = [ day-of-week "," ] date FWS time [CFWS]
//
// day-of-week = ([FWS] day-name) / obs-day-of-week
//
// day-name = "Mon" / "Tue" / "Wed" / "Thu" /
// "Fri" / "Sat" / "Sun"
//
// date = day month year
//
// year = 4*DIGIT / obs-year
//
// month = (FWS month-name FWS) / obs-month
//
// month-name = "Jan" / "Feb" / "Mar" / "Apr" /
// "May" / "Jun" / "Jul" / "Aug" /
// "Sep" / "Oct" / "Nov" / "Dec"
//
// day = ([FWS] 1*2DIGIT) / obs-day
//
// time = time-of-day FWS zone
//
// time-of-day = hour ":" minute [ ":" second ]
//
// hour = 2DIGIT / obs-hour
//
// minute = 2DIGIT / obs-minute
//
// second = 2DIGIT / obs-second
//
// zone = (( "+" / "-" ) 4DIGIT) / obs-zone
//
//
// Obsolete forms
//
// obs-day-of-week = [CFWS] day-name [CFWS]
//
// obs-year = [CFWS] 2*DIGIT [CFWS]
//
// obs-month = CFWS month-name CFWS
//
// obs-day = [CFWS] 1*2DIGIT [CFWS]
//
// obs-hour = [CFWS] 2DIGIT [CFWS]
//
// obs-minute = [CFWS] 2DIGIT [CFWS]
//
// obs-second = [CFWS] 2DIGIT [CFWS]
//
// obs-zone = "UT" / "GMT" / ; Universal Time
// ; North American UT
// ; offsets
// "EST" / "EDT" / ; Eastern: - 5/ - 4
// "CST" / "CDT" / ; Central: - 6/ - 5
// "MST" / "MDT" / ; Mountain: - 7/ - 6
// "PST" / "PDT" / ; Pacific: - 8/ - 7
//
// %d65-73 / ; Military zones - "A"
// %d75-90 / ; through "I" and "K"
// %d97-105 / ; through "Z", both
// %d107-122 ; upper and lower case
//
//
// Per RFC2882, all the obsolete one-letter military time zones are interpreted as
// +0000.
//
// The only feature not supported is that an offset of "-0000" should return a
// naive date/time, not a time zone aware one. This returns a time zone aware
// date/time object in all cases.
//
//
/// Time zone offset in minutes, from string.
/// Allowed input per RFC2822 above - numeric offset or named time zone
fn offsetmins(s: &str) -> Option<i32> {
let offsetre = regex!(r"^([+-])(\d\d)(\d\d)$"); // +0800 as 8 hour offset
let offsetmatches = offsetre.captures(s); // match time zone
match offsetmatches {
Some(caps) => { // It looks like a numeric offset
let sign = caps.at(1).unwrap(); // + or -
let hh = caps.at(2).unwrap().parse::<i32>().unwrap(); // hours
let mm = caps.at(3).unwrap().parse::<i32>().unwrap(); // minutes
let signval = match sign {
"+" => 1,
"-" => -1,
_ => return None // unreachable
};
if hh < -12 || hh > 12 || mm < 0 || mm > 59 { return None } // check offsets
return Some(signval*(hh*60 + mm)) // return offset in minute
}
None => { // not numeric, try the named time zones
return match s {
"GMT"|"UT"|"Z"|"z" => Some(0), // prime meridian
"EDT" => Some(-4*60), // obsolete forms
"EST"|"CDT" => Some(-5*60), // rather US-centric in this old RFC.
"CST"|"MDT" => Some(-6*60),
"MST"|"PDT" => Some(-7*60),
"PST" => Some(-8*60),
_ => match s.len() { 1 => Some(0), _ => None } // obsolete single-letter miltary forms are treated as 0 per RFC2822
}
}
};
}
/// Makes a new `DateTime` with offset given a valid RFC2822 string.
/// Example: "Tue, 20 Jan 2015 17:35:20 -0800"
pub fn rfc2822_to_datetime(s: &str) -> Option<::DateTime<::FixedOffset>> {
// Match the date format. Case-insensitive, compile-time regex.
let datere = regex!(r"^(?i)(?:Mon,|Tue,|Wed,|Thu,|Fri,|Sat,|Sun,)??\s*(\d+)\s+(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)\s+(\d\d\d\d)\s+(\d+):(\d+):(\d+)\s*([+-]\d\d\d\d|[A-Z]+)$");
let matches = datere.captures(s.trim()); // Pattern match the date
let captures = match matches {
Some(caps) => caps, // succeed
None => return None // fail
};
// Unwrapping numeric fields is safe because we've matched the regular expression.
let dd = captures.at(1).unwrap().parse::<u32>().unwrap(); // day of month
// Month names are case-sensitive in RFC 2822, but we allow the obvious other forms.
let mo = match captures.at(2).unwrap() { // month decode
"Jan"|"JAN"|"jan" => 1,
"Feb"|"FEB"|"feb" => 2,
"Mar"|"MAR"|"mar" => 3,
"Apr"|"APR"|"apr" => 4,
"May"|"MAY"|"may" => 5,
"Jun"|"JUN"|"jun" => 6,
"Jul"|"JUL"|"jul" => 7,
"Aug"|"AUG"|"aug" => 8,
"Sep"|"SEP"|"sep" => 9,
"Oct"|"OCT"|"oct" => 10,
"Nov"|"NOV"|"nov" => 11,
"Dec"|"DEC"|"dec" => 12,
_ => return None
};
let yyyy = captures.at(3).unwrap().parse::<i32>().unwrap(); // chrono wants a signed year
let hh = captures.at(4).unwrap().parse::<u32>().unwrap();
let mm = captures.at(5).unwrap().parse::<u32>().unwrap(); // minute
let ss = captures.at(6).unwrap().parse::<u32>().unwrap();
let offsetstr = captures.at(7).unwrap(); // can be +0800 or a time zone name
let offsetmm = match offsetmins(offsetstr) {
Some(v) => v,
None => return None
};
let tz = ::FixedOffset::east(offsetmm*60); // decode time zone offset
// Pack numeric values into DateTime object, returning None if fail.
let date = tz.ymd_opt(yyyy, mo, dd); // date or none
match date { // check for invalid date
::LocalResult::Single(d) => d.and_hms_opt(hh, mm, ss), // encode into DateTime
_ => return None // date conversion failed
}
}
/// Formats a DateTime as an RF2822 string.
/// This is primarily for debugging.
pub fn fmt_rfc2822_datetime(dt: ::DateTime<::FixedOffset>) -> String {
dt.format("%a, %e %b %Y %H:%M:%S %z").to_string() // inverse of parsing
}
//
// RFC3339 date parsing
//
// This is a subset of ISO 8601 date format.
// Example: "2012-09-09T18:00:00-07:00"
// Common use case: Atom feeds.
//
//
// From RFC3339, "Date and Time on the Internet: Timestamps", section 5.6:
//
// date-fullyear = 4DIGIT
// date-month = 2DIGIT ; 01-12
// date-mday = 2DIGIT ; 01-28, 01-29, 01-30, 01-31 based on
// ; month/year
// time-hour = 2DIGIT ; 00-23
// time-minute = 2DIGIT ; 00-59
// time-second = 2DIGIT ; 00-58, 00-59, 00-60 based on leap second
// ; rules
// time-secfrac = "." 1*DIGIT
// time-numoffset = ("+" / "-") time-hour ":" time-minute
// time-offset = "Z" / time-numoffset
//
// partial-time = time-hour ":" time-minute ":" time-second
// [time-secfrac]
// full-date = date-fullyear "-" date-month "-" date-mday
// full-time = partial-time time-offset
//
// date-time = full-date "T" full-time
//
// NOTE: Per [ABNF] and ISO8601, the "T" and "Z" characters in this
// syntax may alternatively be lower case "t" or "z" respectively.
//
// ISO 8601 defines date and time separated by "T".
// Applications using this syntax may choose, for the sake of
// readability, to specify a full-date and full-time separated by
// (say) a space character.
//
/// Parse a string with a RFC3339 date, time, and offset into a DateTime.
/// This is the subset of ISO 8601 date and time strings most used on the Web.
pub fn rfc3339_to_datetime(s: &str) -> Option<::DateTime<::FixedOffset>> {
let datere = regex!(r"^(?i)(\d\d\d\d)-(\d\d)-(\d\d)T(\d\d):(\d\d):(\d\d)(\.\d+)??([+-]\d\d\d\d|[A-Z]+)$"); // format regex
let matches = datere.captures(s.trim()); // Pattern match the date
let captures = match matches {
Some(caps) => caps, // succeed
None => return None // fail
};
// Unwrapping numeric fields is safe because we've matched the regular expression.
let yyyy = captures.at(1).unwrap().parse::<i32>().unwrap(); // chrono wants a signed year
let mo = captures.at(2).unwrap().parse::<u32>().unwrap(); // month of year
let dd = captures.at(3).unwrap().parse::<u32>().unwrap(); // day of month
let hh = captures.at(4).unwrap().parse::<u32>().unwrap(); // hour
let mm = captures.at(5).unwrap().parse::<u32>().unwrap(); // minute
let ss = captures.at(6).unwrap().parse::<u32>().unwrap(); // second
let ns = match captures.at(7) { // fractional seconds present?
Some(fractstr) => parsensfract(fractstr), // parse as nanoseconds
None => 0 // no fraction
};
let offsetstr = captures.at(8).unwrap(); // time zone offset, numeric
let offsetmm = match offsetmins(offsetstr) { // also accepts named time zones, not required.
Some(v) => v,
None => return None
};
let tz = ::FixedOffset::east(offsetmm*60); // decode time zone offset
// Pack numeric values into DateTime object, returning None if fail.
let date = tz.ymd_opt(yyyy, mo, dd); // date or none
match date { // check for invalid date
::LocalResult::Single(d) => d.and_hms_nano_opt(hh, mm, ss, ns), // encode into DateTime
_ => return None // date conversion failed
}
}
/// Parse ".NNN" into nanoseconds.
/// Assumes input has already been checked for ".NNN" format.
fn parsensfract(s: &str) -> u32 {
let sdigits = &s[1..]; // trim off leading "."
let sdigits9 = &sdigits[0..(cmp::min(sdigits.len(),9))]; // truncate at 9 digits after "."
let v = sdigits9.parse::<u32>().unwrap(); // digits as u32 (will fit)
let vl = 9-sdigits9.len(); // power of 10 for scaling
let scale = Int::pow(10,vl); // scale factor to get to
//panic!("parsens: s: {} sdigits9: {} v: {} scale: {} result: {}", s, sdigits9 , v, scale, v*scale); // ***TEMP***
v*scale // as nanoseconds
}
/// Formats a DateTime as an RFC 3339/ISO8601 date, with 9 digits of nanoseconds.
/// This is the inverse operation of rfc3339 parsing.
pub fn fmt_rfc3339_datetime(dt: ::DateTime<::FixedOffset>) -> String {
dt.format("%Y-%m-%dT%H:%M:%S.%f%z").to_string() // inverse of parsing
}
//
// Unit tests
//
#[test]
/// Test RFC2822 parser.
fn testrfc2822parser() {
// Test data - [input, expected result after parse and format]
let testdates = [
["Tue, 20 Jan 2015 17:35:20 -0800", "Tue, 20 Jan 2015 17:35:20 -0800"], // normal case
["20 Jan 2015 17:35:20 -0800", "Tue, 20 Jan 2015 17:35:20 -0800"], // no day of week
["20 JAN 2015 17:35:20 -0800", "Tue, 20 Jan 2015 17:35:20 -0800"], // upper case month allowed
["6 Jun 1944 04:00:00Z","Tue, 6 Jun 1944 04:00:00 +0000"], // D-day
["11 Sep 2001 9:45:00 EST", "Tue, 11 Sep 2001 09:45:00 -0500"],
["30 Feb 2015 17:35:20 -0800", ""], // bad day of month
["Tue, 20 Avr 2015 17:35:20 -0800", ""],// bad month name
["Tue, 20 Jan 2015 25:35:20 -0800",""], // bad hour
["Tue, 20 Jan 2015 17:65:20 -0800",""], // bad minute
["Tue, 20 Jan 2015 17:35:90 -0800",""], // bad second
["Tue, 20 Jan 2015 17:35:20 -1800",""], // bad offset
["Tue, 20 Jan 2015 17:35:20 HAS",""] // bad named time zone
];
// Test against test data above
for testdate in testdates.iter() {
let date = testdate[0]; // input
let checkdate = testdate[1]; // expected result or ""
let d = rfc2822_to_datetime(date); // parse a date
let dt = match d { // did we get a value?
Some(dt) => dt, // yes, go on
None => if checkdate != "" { panic!("Failed to convert date {}", date)} else { continue },
};
// let mut s = String::new();
let s = fmt_rfc2822_datetime(dt); // convert date/time back to string
if s != checkdate { // check for expected result
panic!("Date conversion failed for {}\nReceived: {}\nExpected: {}",date, s, checkdate);
}
};
}
#[test]
/// Test RFC3339/ISO8601 parser.
fn testrfc3339parser() {
// Test data - [input, expected result after parse and format]
let testdates = [
["2015-01-20T17:35:20-0800", "2015-01-20T17:35:20.000000000-0800"], // normal case
["1944-06-06T04:04:00Z", "1944-06-06T04:04:00.000000000+0000"], // D-day
["2001-09-11T09:45:00-0800", "2001-09-11T09:45:00.000000000-0800"],
["2015-01-20T17:35:20.001-0800", "2015-01-20T17:35:20.001000000-0800"], // milliseconds
["2015-01-20T17:35:20.000031-0800", "2015-01-20T17:35:20.000031000-0800"], // microseconds
["2015-01-20T17:35:20.000000004-0800", "2015-01-20T17:35:20.000000004-0800"], // nanoseconds
["2015-01-20T17:35:20.000000000452-0800", "2015-01-20T17:35:20.000000000-0800"], // picoseconds (too small)
["2015-02-30T17:35:20-0800", ""], // bad day of month
["2015-01-20T25:35:20-0800", ""], // bad hour
["2015-01-20T17:65:20-0800", ""], // bad minute
["2015-01-20T17:35:90-0800", ""], // bad second
["2015-01-20T17:35:20-1800", ""], // bad offset
];
// Test against test data above
for testdate in testdates.iter() {
let date = testdate[0]; // input
let checkdate = testdate[1]; // expected result or ""
let d = rfc3339_to_datetime(date); // parse a date
let dt = match d { // did we get a value?
Some(dt) => dt, // yes, go on
None => if checkdate != "" { panic!("Failed to convert date {}", date)} else { continue },
};
// let mut s = String::new();
let s = fmt_rfc3339_datetime(dt); // convert date/time back to string
if s != checkdate { // check for expected result
panic!("Date conversion failed for {}\nReceived: {}\nExpected: {}",date, s, checkdate);
}
};
}