2017-02-05 21:15:57 +00:00
|
|
|
// This is a part of Chrono.
|
2015-02-13 09:46:02 +00:00
|
|
|
// See README.md and LICENSE.txt for details.
|
|
|
|
|
|
|
|
/*!
|
|
|
|
* Various scanning routines for the parser.
|
|
|
|
*/
|
|
|
|
|
2019-03-24 00:49:05 +00:00
|
|
|
#![allow(deprecated)]
|
|
|
|
|
2015-02-13 09:46:02 +00:00
|
|
|
use Weekday;
|
|
|
|
use super::{ParseResult, TOO_SHORT, INVALID, OUT_OF_RANGE};
|
|
|
|
|
|
|
|
/// Returns true when two slices are equal case-insensitively (in ASCII).
|
|
|
|
/// Assumes that the `pattern` is already converted to lower case.
|
|
|
|
fn equals(s: &str, pattern: &str) -> bool {
|
2015-04-26 06:17:01 +00:00
|
|
|
let mut xs = s.as_bytes().iter().map(|&c| match c { b'A'...b'Z' => c + 32, _ => c });
|
|
|
|
let mut ys = pattern.as_bytes().iter().cloned();
|
|
|
|
loop {
|
|
|
|
match (xs.next(), ys.next()) {
|
|
|
|
(None, None) => return true,
|
|
|
|
(None, _) | (_, None) => return false,
|
|
|
|
(Some(x), Some(y)) if x != y => return false,
|
|
|
|
_ => (),
|
|
|
|
}
|
|
|
|
}
|
2015-02-13 09:46:02 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/// Tries to parse the non-negative number from `min` to `max` digits.
|
|
|
|
///
|
|
|
|
/// The absence of digits at all is an unconditional error.
|
|
|
|
/// More than `max` digits are consumed up to the first `max` digits.
|
|
|
|
/// Any number that does not fit in `i64` is an error.
|
2019-11-22 13:42:55 +00:00
|
|
|
#[inline]
|
2015-02-18 14:27:12 +00:00
|
|
|
pub fn number(s: &str, min: usize, max: usize) -> ParseResult<(&str, i64)> {
|
2015-02-13 09:46:02 +00:00
|
|
|
assert!(min <= max);
|
|
|
|
|
2019-11-22 13:34:30 +00:00
|
|
|
// We are only interested in ascii numbers, so we can work with the `str` as bytes. We stop on
|
|
|
|
// the first non-numeric byte, which may be another ascii character or beginning of multi-byte
|
|
|
|
// UTF-8 character.
|
|
|
|
let bytes = s.as_bytes();
|
|
|
|
if bytes.len() < min {
|
|
|
|
return Err(TOO_SHORT);
|
|
|
|
}
|
|
|
|
|
|
|
|
let mut n = 0i64;
|
2019-11-22 18:32:05 +00:00
|
|
|
for (i, c) in bytes.iter().take(max).cloned().enumerate() { // cloned() = copied()
|
2019-11-22 13:34:30 +00:00
|
|
|
if c < b'0' || b'9' < c {
|
|
|
|
if i < min {
|
|
|
|
return Err(INVALID);
|
|
|
|
} else {
|
|
|
|
return Ok((&s[i..], n));
|
|
|
|
}
|
|
|
|
}
|
2015-02-13 09:46:02 +00:00
|
|
|
|
2019-11-22 13:34:30 +00:00
|
|
|
n = match n.checked_mul(10).and_then(|n| n.checked_add((c - b'0') as i64)) {
|
|
|
|
Some(n) => n,
|
|
|
|
None => return Err(OUT_OF_RANGE),
|
|
|
|
};
|
2015-02-13 09:46:02 +00:00
|
|
|
}
|
|
|
|
|
2019-11-22 18:32:05 +00:00
|
|
|
Ok((&s[::core::cmp::min(max, bytes.len())..], n))
|
2015-02-18 14:27:12 +00:00
|
|
|
}
|
2015-02-13 09:46:02 +00:00
|
|
|
|
2015-02-18 14:27:12 +00:00
|
|
|
/// Tries to consume at least one digits as a fractional second.
|
|
|
|
/// Returns the number of whole nanoseconds (0--999,999,999).
|
|
|
|
pub fn nanosecond(s: &str) -> ParseResult<(&str, i64)> {
|
|
|
|
// record the number of digits consumed for later scaling.
|
|
|
|
let origlen = s.len();
|
2019-11-24 00:06:27 +00:00
|
|
|
let (s, v) = number(s, 1, 9)?;
|
2015-02-18 14:27:12 +00:00
|
|
|
let consumed = origlen - s.len();
|
2015-02-13 09:46:02 +00:00
|
|
|
|
2015-02-18 14:27:12 +00:00
|
|
|
// scale the number accordingly.
|
|
|
|
static SCALE: [i64; 10] = [0, 100_000_000, 10_000_000, 1_000_000, 100_000, 10_000,
|
|
|
|
1_000, 100, 10, 1];
|
2019-11-24 00:06:27 +00:00
|
|
|
let v = v.checked_mul(SCALE[consumed]).ok_or(OUT_OF_RANGE)?;
|
2015-02-18 14:27:12 +00:00
|
|
|
|
|
|
|
// if there are more than 9 digits, skip next digits.
|
|
|
|
let s = s.trim_left_matches(|c: char| '0' <= c && c <= '9');
|
|
|
|
|
|
|
|
Ok((s, v))
|
2015-02-13 09:46:02 +00:00
|
|
|
}
|
|
|
|
|
2018-06-12 14:49:13 +00:00
|
|
|
/// Tries to consume a fixed number of digits as a fractional second.
|
|
|
|
/// Returns the number of whole nanoseconds (0--999,999,999).
|
|
|
|
pub fn nanosecond_fixed(s: &str, digits: usize) -> ParseResult<(&str, i64)> {
|
|
|
|
// record the number of digits consumed for later scaling.
|
2019-11-24 00:06:27 +00:00
|
|
|
let (s, v) = number(s, digits, digits)?;
|
2018-06-12 14:49:13 +00:00
|
|
|
|
|
|
|
// scale the number accordingly.
|
|
|
|
static SCALE: [i64; 10] = [0, 100_000_000, 10_000_000, 1_000_000, 100_000, 10_000,
|
|
|
|
1_000, 100, 10, 1];
|
2019-11-24 00:06:27 +00:00
|
|
|
let v = v.checked_mul(SCALE[digits]).ok_or(OUT_OF_RANGE)?;
|
2018-06-12 14:49:13 +00:00
|
|
|
|
|
|
|
Ok((s, v))
|
|
|
|
}
|
|
|
|
|
2015-02-13 09:46:02 +00:00
|
|
|
/// Tries to parse the month index (0 through 11) with the first three ASCII letters.
|
|
|
|
pub fn short_month0(s: &str) -> ParseResult<(&str, u8)> {
|
|
|
|
if s.len() < 3 { return Err(TOO_SHORT); }
|
|
|
|
let buf = s.as_bytes();
|
2015-04-26 06:10:28 +00:00
|
|
|
let month0 = match (buf[0] | 32, buf[1] | 32, buf[2] | 32) {
|
|
|
|
(b'j',b'a',b'n') => 0,
|
|
|
|
(b'f',b'e',b'b') => 1,
|
|
|
|
(b'm',b'a',b'r') => 2,
|
|
|
|
(b'a',b'p',b'r') => 3,
|
|
|
|
(b'm',b'a',b'y') => 4,
|
|
|
|
(b'j',b'u',b'n') => 5,
|
|
|
|
(b'j',b'u',b'l') => 6,
|
|
|
|
(b'a',b'u',b'g') => 7,
|
|
|
|
(b's',b'e',b'p') => 8,
|
|
|
|
(b'o',b'c',b't') => 9,
|
|
|
|
(b'n',b'o',b'v') => 10,
|
|
|
|
(b'd',b'e',b'c') => 11,
|
2015-02-13 09:46:02 +00:00
|
|
|
_ => return Err(INVALID)
|
|
|
|
};
|
|
|
|
Ok((&s[3..], month0))
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Tries to parse the weekday with the first three ASCII letters.
|
|
|
|
pub fn short_weekday(s: &str) -> ParseResult<(&str, Weekday)> {
|
|
|
|
if s.len() < 3 { return Err(TOO_SHORT); }
|
|
|
|
let buf = s.as_bytes();
|
2015-04-26 06:10:28 +00:00
|
|
|
let weekday = match (buf[0] | 32, buf[1] | 32, buf[2] | 32) {
|
|
|
|
(b'm',b'o',b'n') => Weekday::Mon,
|
|
|
|
(b't',b'u',b'e') => Weekday::Tue,
|
|
|
|
(b'w',b'e',b'd') => Weekday::Wed,
|
|
|
|
(b't',b'h',b'u') => Weekday::Thu,
|
|
|
|
(b'f',b'r',b'i') => Weekday::Fri,
|
|
|
|
(b's',b'a',b't') => Weekday::Sat,
|
|
|
|
(b's',b'u',b'n') => Weekday::Sun,
|
2015-02-13 09:46:02 +00:00
|
|
|
_ => return Err(INVALID)
|
|
|
|
};
|
|
|
|
Ok((&s[3..], weekday))
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Tries to parse the month index (0 through 11) with short or long month names.
|
|
|
|
/// It prefers long month names to short month names when both are possible.
|
|
|
|
pub fn short_or_long_month0(s: &str) -> ParseResult<(&str, u8)> {
|
|
|
|
// lowercased month names, minus first three chars
|
|
|
|
static LONG_MONTH_SUFFIXES: [&'static str; 12] =
|
|
|
|
["uary", "ruary", "ch", "il", "", "e", "y", "ust", "tember", "ober", "ember", "ember"];
|
|
|
|
|
2019-11-24 00:06:27 +00:00
|
|
|
let (mut s, month0) = short_month0(s)?;
|
2015-02-13 09:46:02 +00:00
|
|
|
|
|
|
|
// tries to consume the suffix if possible
|
|
|
|
let suffix = LONG_MONTH_SUFFIXES[month0 as usize];
|
|
|
|
if s.len() >= suffix.len() && equals(&s[..suffix.len()], suffix) {
|
|
|
|
s = &s[suffix.len()..];
|
|
|
|
}
|
|
|
|
|
|
|
|
Ok((s, month0))
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Tries to parse the weekday with short or long weekday names.
|
|
|
|
/// It prefers long weekday names to short weekday names when both are possible.
|
|
|
|
pub fn short_or_long_weekday(s: &str) -> ParseResult<(&str, Weekday)> {
|
|
|
|
// lowercased weekday names, minus first three chars
|
|
|
|
static LONG_WEEKDAY_SUFFIXES: [&'static str; 7] =
|
2016-03-28 16:46:25 +00:00
|
|
|
["day", "sday", "nesday", "rsday", "day", "urday", "day"];
|
2015-02-13 09:46:02 +00:00
|
|
|
|
2019-11-24 00:06:27 +00:00
|
|
|
let (mut s, weekday) = short_weekday(s)?;
|
2015-02-13 09:46:02 +00:00
|
|
|
|
|
|
|
// tries to consume the suffix if possible
|
|
|
|
let suffix = LONG_WEEKDAY_SUFFIXES[weekday.num_days_from_monday() as usize];
|
|
|
|
if s.len() >= suffix.len() && equals(&s[..suffix.len()], suffix) {
|
|
|
|
s = &s[suffix.len()..];
|
|
|
|
}
|
|
|
|
|
|
|
|
Ok((s, weekday))
|
|
|
|
}
|
|
|
|
|
2015-02-15 12:01:36 +00:00
|
|
|
/// Tries to consume exactly one given character.
|
|
|
|
pub fn char(s: &str, c1: u8) -> ParseResult<&str> {
|
|
|
|
match s.as_bytes().first() {
|
|
|
|
Some(&c) if c == c1 => Ok(&s[1..]),
|
|
|
|
Some(_) => Err(INVALID),
|
|
|
|
None => Err(TOO_SHORT),
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Tries to consume one or more whitespace.
|
|
|
|
pub fn space(s: &str) -> ParseResult<&str> {
|
|
|
|
let s_ = s.trim_left();
|
|
|
|
if s_.len() < s.len() {
|
|
|
|
Ok(s_)
|
|
|
|
} else if s.is_empty() {
|
|
|
|
Err(TOO_SHORT)
|
|
|
|
} else {
|
|
|
|
Err(INVALID)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-02-13 09:46:02 +00:00
|
|
|
/// Consumes any number (including zero) of colon or spaces.
|
|
|
|
pub fn colon_or_space(s: &str) -> ParseResult<&str> {
|
|
|
|
Ok(s.trim_left_matches(|c: char| c == ':' || c.is_whitespace()))
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Tries to parse `[-+]\d\d` continued by `\d\d`. Return an offset in seconds if possible.
|
|
|
|
///
|
|
|
|
/// The additional `colon` may be used to parse a mandatory or optional `:`
|
2015-02-15 12:01:36 +00:00
|
|
|
/// between hours and minutes, and should return either a new suffix or `Err` when parsing fails.
|
2018-04-25 02:11:37 +00:00
|
|
|
pub fn timezone_offset<F>(s: &str, consume_colon: F) -> ParseResult<(&str, i32)>
|
2015-02-13 09:46:02 +00:00
|
|
|
where F: FnMut(&str) -> ParseResult<&str> {
|
2018-04-25 02:11:37 +00:00
|
|
|
timezone_offset_internal(s, consume_colon, false)
|
|
|
|
}
|
|
|
|
|
|
|
|
fn timezone_offset_internal<F>(mut s: &str, mut consume_colon: F, allow_missing_minutes: bool)
|
|
|
|
-> ParseResult<(&str, i32)>
|
|
|
|
where F: FnMut(&str) -> ParseResult<&str>
|
|
|
|
{
|
2015-04-26 06:10:28 +00:00
|
|
|
fn digits(s: &str) -> ParseResult<(u8, u8)> {
|
|
|
|
let b = s.as_bytes();
|
|
|
|
if b.len() < 2 {
|
|
|
|
Err(TOO_SHORT)
|
|
|
|
} else {
|
|
|
|
Ok((b[0], b[1]))
|
|
|
|
}
|
|
|
|
}
|
2015-02-13 09:46:02 +00:00
|
|
|
let negative = match s.as_bytes().first() {
|
|
|
|
Some(&b'+') => false,
|
|
|
|
Some(&b'-') => true,
|
|
|
|
Some(_) => return Err(INVALID),
|
|
|
|
None => return Err(TOO_SHORT),
|
|
|
|
};
|
|
|
|
s = &s[1..];
|
|
|
|
|
|
|
|
// hours (00--99)
|
2019-11-24 00:06:27 +00:00
|
|
|
let hours = match digits(s)? {
|
2017-10-08 22:08:40 +00:00
|
|
|
(h1 @ b'0'...b'9', h2 @ b'0'...b'9') => i32::from((h1 - b'0') * 10 + (h2 - b'0')),
|
2015-02-13 09:46:02 +00:00
|
|
|
_ => return Err(INVALID),
|
|
|
|
};
|
|
|
|
s = &s[2..];
|
|
|
|
|
|
|
|
// colons (and possibly other separators)
|
2019-11-24 00:06:27 +00:00
|
|
|
s = consume_colon(s)?;
|
2015-02-13 09:46:02 +00:00
|
|
|
|
|
|
|
// minutes (00--59)
|
2018-04-25 02:11:37 +00:00
|
|
|
// if the next two items are digits then we have to add minutes
|
|
|
|
let minutes = if let Ok(ds) = digits(s) {
|
|
|
|
match ds {
|
|
|
|
(m1 @ b'0'...b'5', m2 @ b'0'...b'9') => i32::from((m1 - b'0') * 10 + (m2 - b'0')),
|
|
|
|
(b'6'...b'9', b'0'...b'9') => return Err(OUT_OF_RANGE),
|
|
|
|
_ => return Err(INVALID),
|
|
|
|
}
|
|
|
|
} else if allow_missing_minutes {
|
|
|
|
0
|
|
|
|
} else {
|
|
|
|
return Err(TOO_SHORT);
|
|
|
|
};
|
|
|
|
s = match s.len() {
|
|
|
|
len if len >= 2 => &s[2..],
|
|
|
|
len if len == 0 => s,
|
|
|
|
_ => return Err(TOO_SHORT),
|
2015-02-13 09:46:02 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
let seconds = hours * 3600 + minutes * 60;
|
|
|
|
Ok((s, if negative {-seconds} else {seconds}))
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Same to `timezone_offset` but also allows for `z`/`Z` which is same to `+00:00`.
|
2018-04-25 02:11:37 +00:00
|
|
|
pub fn timezone_offset_zulu<F>(s: &str, colon: F)
|
|
|
|
-> ParseResult<(&str, i32)>
|
|
|
|
where F: FnMut(&str) -> ParseResult<&str>
|
|
|
|
{
|
2015-02-13 09:46:02 +00:00
|
|
|
match s.as_bytes().first() {
|
|
|
|
Some(&b'z') | Some(&b'Z') => Ok((&s[1..], 0)),
|
|
|
|
_ => timezone_offset(s, colon),
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-04-25 02:11:37 +00:00
|
|
|
/// Same to `timezone_offset` but also allows for `z`/`Z` which is same to
|
|
|
|
/// `+00:00`, and allows missing minutes entirely.
|
|
|
|
pub fn timezone_offset_permissive<F>(s: &str, colon: F)
|
|
|
|
-> ParseResult<(&str, i32)>
|
|
|
|
where F: FnMut(&str) -> ParseResult<&str>
|
|
|
|
{
|
|
|
|
match s.as_bytes().first() {
|
|
|
|
Some(&b'z') | Some(&b'Z') => Ok((&s[1..], 0)),
|
|
|
|
_ => timezone_offset_internal(s, colon, true),
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-02-15 12:01:36 +00:00
|
|
|
/// Same to `timezone_offset` but also allows for RFC 2822 legacy timezones.
|
|
|
|
/// May return `None` which indicates an insufficient offset data (i.e. `-0000`).
|
|
|
|
pub fn timezone_offset_2822(s: &str) -> ParseResult<(&str, Option<i32>)> {
|
|
|
|
// tries to parse legacy time zone names
|
|
|
|
let upto = s.as_bytes().iter().position(|&c| match c { b'a'...b'z' | b'A'...b'Z' => false,
|
2017-06-23 20:02:58 +00:00
|
|
|
_ => true })
|
|
|
|
.unwrap_or_else(|| s.len());
|
2015-02-15 12:01:36 +00:00
|
|
|
if upto > 0 {
|
|
|
|
let name = &s[..upto];
|
|
|
|
let s = &s[upto..];
|
2017-06-23 20:14:41 +00:00
|
|
|
let offset_hours = |o| Ok((s, Some(o * 3600)));
|
2015-02-15 12:01:36 +00:00
|
|
|
if equals(name, "gmt") || equals(name, "ut") {
|
2017-06-23 20:14:41 +00:00
|
|
|
offset_hours(0)
|
2015-02-15 12:01:36 +00:00
|
|
|
} else if equals(name, "edt") {
|
2017-06-23 20:14:41 +00:00
|
|
|
offset_hours(-4)
|
|
|
|
} else if equals(name, "est") || equals(name, "cdt") {
|
|
|
|
offset_hours(-5)
|
|
|
|
} else if equals(name, "cst") || equals(name, "mdt") {
|
|
|
|
offset_hours(-6)
|
|
|
|
} else if equals(name, "mst") || equals(name, "pdt") {
|
|
|
|
offset_hours(-7)
|
2015-02-15 12:01:36 +00:00
|
|
|
} else if equals(name, "pst") {
|
2017-06-23 20:14:41 +00:00
|
|
|
offset_hours(-8)
|
2015-02-15 12:01:36 +00:00
|
|
|
} else {
|
|
|
|
Ok((s, None)) // recommended by RFC 2822: consume but treat it as -0000
|
|
|
|
}
|
|
|
|
} else {
|
2019-11-24 00:06:27 +00:00
|
|
|
let (s_, offset) = timezone_offset(s, |s| Ok(s))?;
|
2019-11-30 21:38:42 +00:00
|
|
|
Ok((s_, Some(offset)))
|
2015-02-15 12:01:36 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|