Reimplement scan::number

The original would first check that there is right amount of numeric
characters and then parsed them using the std::str::parse, which
internally checks the characters again and also checks for -/+ prefix,
which is not necessary in this case.

Since we are already going over the characters, we may as well do the
parsing ourselves. The length of the function is roughly the same and
it is faster:

 name                                                 simplify-from-str ns/iter  reimplement-number ns/iter  diff ns/iter   diff %  speedup
 datetime::tests::bench_datetime_from_str             448                        365                                  -83  -18.53%   x 1.23
 datetime::tests::bench_datetime_parse_from_rfc2822   242                        195                                  -47  -19.42%   x 1.24
 datetime::tests::bench_datetime_parse_from_rfc3339   234                        166                                  -68  -29.06%   x 1.41
This commit is contained in:
Michal Srb 2019-11-22 14:34:30 +01:00
parent f7318277e2
commit 6da5359d39
1 changed files with 23 additions and 12 deletions

View File

@ -33,20 +33,31 @@ fn equals(s: &str, pattern: &str) -> bool {
pub fn number(s: &str, min: usize, max: usize) -> ParseResult<(&str, i64)> {
assert!(min <= max);
// limit `s` to given number of digits
let mut window = s.as_bytes();
if window.len() > max { window = &window[..max]; }
// scan digits
let upto = window.iter().position(|&c| c < b'0' || b'9' < c)
.unwrap_or_else(|| window.len());
if upto < min {
return Err(if window.is_empty() {TOO_SHORT} else {INVALID});
// We are only interested in ascii numbers, so we can work with the `str` as bytes. We stop on
// the first non-numeric byte, which may be another ascii character or beginning of multi-byte
// UTF-8 character.
let bytes = s.as_bytes();
if bytes.len() < min {
return Err(TOO_SHORT);
}
// we can overflow here, which is the only possible cause of error from `parse`.
let v: i64 = try!(s[..upto].parse().map_err(|_| OUT_OF_RANGE));
Ok((&s[upto..], v))
let mut n = 0i64;
for (i, c) in bytes.iter().take(max).copied().enumerate() {
if c < b'0' || b'9' < c {
if i < min {
return Err(INVALID);
} else {
return Ok((&s[i..], n));
}
}
n = match n.checked_mul(10).and_then(|n| n.checked_add((c - b'0') as i64)) {
Some(n) => n,
None => return Err(OUT_OF_RANGE),
};
}
Ok((&s[max.min(bytes.len())..], n))
}
/// Tries to consume at least one digits as a fractional second.