diff --git a/src/items/combined.rs b/src/items/combined.rs index d452196..d83baf5 100644 --- a/src/items/combined.rs +++ b/src/items/combined.rs @@ -12,7 +12,8 @@ //! > In this format, the time of day should use 24-hour notation. Fractional //! > seconds are allowed, with either comma or period preceding the fraction. //! > ISO 8601 fractional minutes and hours are not supported. Typically, hosts -//! > support nanosecond timestamp resolution; excess precision is silently discarded. +//! > support nanosecond timestamp resolution; excess precision is silently +//! > discarded. use winnow::{ combinator::{alt, trace}, seq, ModalResult, Parser, @@ -27,17 +28,17 @@ use super::{ }; #[derive(PartialEq, Debug, Clone, Default)] -pub struct DateTime { +pub(crate) struct DateTime { pub(crate) date: Date, pub(crate) time: Time, } -pub fn parse(input: &mut &str) -> ModalResult { +pub(crate) fn parse(input: &mut &str) -> ModalResult { seq!(DateTime { - date: trace("date iso", alt((date::iso1, date::iso2))), + date: trace("iso_date", alt((date::iso1, date::iso2))), // Note: the `T` is lowercased by the main parse function _: alt((s('t').void(), (' ', space).void())), - time: trace("time iso", time::iso), + time: trace("iso_time", time::iso), }) .parse_next(input) } diff --git a/src/items/date.rs b/src/items/date.rs index b99dc12..8e6e5ec 100644 --- a/src/items/date.rs +++ b/src/items/date.rs @@ -29,13 +29,14 @@ use winnow::{ ascii::alpha1, combinator::{alt, opt, preceded, trace}, + error::ErrMode, seq, stream::AsChar, - token::{take, take_while}, + token::take_while, ModalResult, Parser, }; -use super::primitive::{dec_uint, s}; +use super::primitive::{ctx_err, dec_uint, s}; use crate::ParseDateTimeError; #[derive(PartialEq, Eq, Clone, Debug, Default)] @@ -45,39 +46,112 @@ pub struct Date { pub year: Option, } +impl TryFrom<(&str, u32, u32)> for Date { + type Error = &'static str; + + /// Create a `Date` from a tuple of `(year, month, day)`. + /// + /// Note: The `year` is represented as a `&str` to handle a specific GNU + /// compatibility quirk. According to the GNU documentation: "if the year is + /// 68 or smaller, then 2000 is added to it; otherwise, if year is less than + /// 100, then 1900 is added to it." This adjustment only applies to + /// two-digit year strings. For example, `"00"` is interpreted as `2000`, + /// whereas `"0"`, `"000"`, or `"0000"` are interpreted as `0`. + fn try_from(value: (&str, u32, u32)) -> Result { + let (year_str, month, day) = value; + + let mut year = year_str + .parse::() + .map_err(|_| "year must be a valid number")?; + + // If year is 68 or smaller, then 2000 is added to it; otherwise, if year + // is less than 100, then 1900 is added to it. + // + // GNU quirk: this only applies to two-digit years. For example, + // "98-01-01" will be parsed as "1998-01-01", while "098-01-01" will be + // parsed as "0098-01-01". + if year_str.len() == 2 { + if year <= 68 { + year += 2000 + } else { + year += 1900 + } + } + + // 2147485547 is the maximum value accepted by GNU, but chrono only + // behaves like GNU for years in the range: [0, 9999], so we keep in the + // range [0, 9999]. + // + // See discussion in https://github.com/uutils/parse_datetime/issues/160. + if year > 9999 { + return Err("year must be no greater than 9999"); + } + + if !(1..=12).contains(&month) { + return Err("month must be between 1 and 12"); + } + + let is_leap_year = (year % 4 == 0 && year % 100 != 0) || (year % 400 == 0); + + if !(1..=31).contains(&day) + || (month == 2 && day > (if is_leap_year { 29 } else { 28 })) + || ((month == 4 || month == 6 || month == 9 || month == 11) && day > 30) + { + return Err("day is not valid for the given month"); + } + + Ok(Date { + day, + month, + year: Some(year), + }) + } +} + pub fn parse(input: &mut &str) -> ModalResult { alt((iso1, iso2, us, literal1, literal2)).parse_next(input) } -/// Parse `YYYY-MM-DD` or `YY-MM-DD` +/// Parse `[year]-[month]-[day]` /// /// This is also used by [`combined`](super::combined). pub fn iso1(input: &mut &str) -> ModalResult { - seq!(Date { - year: year.map(Some), - _: s('-'), - month: month, - _: s('-'), - day: day, - }) - .parse_next(input) + let (year, _, month, _, day) = ( + // `year` must be a `&str`, see comment in `TryFrom` impl for `Date`. + s(take_while(1.., AsChar::is_dec_digit)), + s('-'), + s(dec_uint), + s('-'), + s(dec_uint), + ) + .parse_next(input)?; + + (year, month, day) + .try_into() + .map_err(|e| ErrMode::Cut(ctx_err(e))) } -/// Parse `YYYYMMDD` +/// Parse `[year][month][day]` /// /// This is also used by [`combined`](super::combined). pub fn iso2(input: &mut &str) -> ModalResult { - s(( - take(4usize).try_map(|s: &str| s.parse::()), - take(2usize).try_map(|s: &str| s.parse::()), - take(2usize).try_map(|s: &str| s.parse::()), - )) - .map(|(year, month, day): (u32, u32, u32)| Date { - day, - month, - year: Some(year), - }) - .parse_next(input) + let date_str = take_while(5.., AsChar::is_dec_digit).parse_next(input)?; + let len = date_str.len(); + + // `year` must be a `&str`, see comment in `TryFrom` impl for `Date`. + let year = &date_str[..len - 4]; + + let month = date_str[len - 4..len - 2] + .parse::() + .map_err(|_| ErrMode::Cut(ctx_err("month must be a valid number")))?; + + let day = date_str[len - 2..] + .parse::() + .map_err(|_| ErrMode::Cut(ctx_err("day must be a valid number")))?; + + (year, month, day) + .try_into() + .map_err(|e| ErrMode::Cut(ctx_err(e))) } /// Parse `MM/DD/YYYY`, `MM/DD/YY` or `MM/DD` @@ -202,6 +276,94 @@ mod tests { // 14nov2022 // ``` + #[test] + fn iso1() { + let reference = Date { + year: Some(1), + month: 1, + day: 1, + }; + + for mut s in ["1-1-1", "1 - 1 - 1", "1-01-01", "1-001-001", "001-01-01"] { + let old_s = s.to_owned(); + assert_eq!(parse(&mut s).unwrap(), reference, "Format string: {old_s}"); + } + + // GNU quirk: when year string is 2 characters long and year is 68 or + // smaller, 2000 is added to it. + let reference = Date { + year: Some(2001), + month: 1, + day: 1, + }; + + for mut s in ["01-1-1", "01-01-01"] { + let old_s = s.to_owned(); + assert_eq!(parse(&mut s).unwrap(), reference, "Format string: {old_s}"); + } + + // GNU quirk: when year string is 2 characters long and year is less + // than 100, 1900 is added to it. + let reference = Date { + year: Some(1970), + month: 1, + day: 1, + }; + + for mut s in ["70-1-1", "70-01-01"] { + let old_s = s.to_owned(); + assert_eq!(parse(&mut s).unwrap(), reference, "Format string: {old_s}"); + } + + for mut s in ["01-00-01", "01-13-01", "01-01-32", "01-02-29", "01-04-31"] { + let old_s = s.to_owned(); + assert!(parse(&mut s).is_err(), "Format string: {old_s}"); + } + } + + #[test] + fn iso2() { + let reference = Date { + year: Some(1), + month: 1, + day: 1, + }; + + for mut s in ["10101", "0010101", "00010101", "000010101"] { + let old_s = s.to_owned(); + assert_eq!(parse(&mut s).unwrap(), reference, "Format string: {old_s}"); + } + + // GNU quirk: when year string is 2 characters long and year is 68 or + // smaller, 2000 is added to it. + let reference = Date { + year: Some(2001), + month: 1, + day: 1, + }; + + let mut s = "010101"; + let old_s = s.to_owned(); + assert_eq!(parse(&mut s).unwrap(), reference, "Format string: {old_s}"); + + // GNU quirk: when year string is 2 characters long and year is less + // than 100, 1900 is added to it. + let reference = Date { + year: Some(1970), + month: 1, + day: 1, + }; + + let mut s = "700101"; + let old_s = s.to_owned(); + assert_eq!(parse(&mut s).unwrap(), reference, "Format string: {old_s}"); + + for mut s in ["010001", "011301", "010132", "010229", "010431"] { + let old_s = s.to_owned(); + assert!(parse(&mut s).is_err(), "Format string: {old_s}"); + } + } + #[test] fn with_year() { let reference = Date { diff --git a/src/items/epoch.rs b/src/items/epoch.rs index 338de8a..67dd1a7 100644 --- a/src/items/epoch.rs +++ b/src/items/epoch.rs @@ -6,12 +6,6 @@ use winnow::{combinator::preceded, ModalResult, Parser}; use super::primitive::{dec_int, s}; /// Parse a timestamp in the form of `@1234567890`. -/// -/// Grammar: -/// -/// ```ebnf -/// timestamp = "@" dec_int ; -/// ``` pub fn parse(input: &mut &str) -> ModalResult { s(preceded("@", dec_int)).parse_next(input) } diff --git a/src/items/mod.rs b/src/items/mod.rs index 1cd36d0..64b47ba 100644 --- a/src/items/mod.rs +++ b/src/items/mod.rs @@ -54,7 +54,7 @@ use winnow::{ use crate::ParseDateTimeError; #[derive(PartialEq, Debug)] -pub enum Item { +pub(crate) enum Item { Timestamp(i32), Year(u32), DateTime(combined::DateTime), @@ -95,6 +95,14 @@ pub(crate) fn at_local( /// /// items = item , { item } ; /// item = datetime | date | time | relative | weekday | timezone | year ; +/// +/// datetime = iso_date , [ "T" | "t" | whitespace ] , iso_time ; +/// +/// iso_date = year , [ delim ] , month , [ delim ] , day ; +/// year = dec_int ; +/// month = dec_int ; +/// day = dec_int ; +/// delim = [ { whitespace } ] , "-" , [ { whitespace } ] ; /// ``` pub(crate) fn parse(input: &mut &str) -> ModalResult { trace("parse", alt((parse_timestamp, parse_items))).parse_next(input) diff --git a/src/lib.rs b/src/lib.rs index 7661e8f..c5cbf4f 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -442,7 +442,6 @@ mod tests { for s in [ "1997-01-01 00:00:00 +0000", "1997-01-01 00:00:00 +00", - "199701010000 +0000", "1997-01-01 00:00 +0000", "1997-01-01 00:00:00 +0000", "1997-01-01T00:00:00+0000", @@ -508,7 +507,6 @@ mod tests { for s in [ "1997-01-01 00:00:00 +0000 +1 year", "1997-01-01 00:00:00 +00 +1 year", - "199701010000 +0000 +1 year", "1997-01-01T00:00:00Z +1 year", "1997-01-01 00:00 +0000 +1 year", "1997-01-01 00:00:00 +0000 +1 year",