Skip to content

Improve the ios1() and iso2() date parsers #183

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Jul 22, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 6 additions & 5 deletions src/items/combined.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,8 @@
//! > In this format, the time of day should use 24-hour notation. Fractional
//! > seconds are allowed, with either comma or period preceding the fraction.
//! > ISO 8601 fractional minutes and hours are not supported. Typically, hosts
//! > support nanosecond timestamp resolution; excess precision is silently discarded.
//! > support nanosecond timestamp resolution; excess precision is silently
//! > discarded.
use winnow::{
combinator::{alt, trace},
seq, ModalResult, Parser,
Expand All @@ -27,17 +28,17 @@ use super::{
};

#[derive(PartialEq, Debug, Clone, Default)]
pub struct DateTime {
pub(crate) struct DateTime {
pub(crate) date: Date,
pub(crate) time: Time,
}

pub fn parse(input: &mut &str) -> ModalResult<DateTime> {
pub(crate) fn parse(input: &mut &str) -> ModalResult<DateTime> {
seq!(DateTime {
date: trace("date iso", alt((date::iso1, date::iso2))),
date: trace("iso_date", alt((date::iso1, date::iso2))),
// Note: the `T` is lowercased by the main parse function
_: alt((s('t').void(), (' ', space).void())),
time: trace("time iso", time::iso),
time: trace("iso_time", time::iso),
})
.parse_next(input)
}
Expand Down
208 changes: 185 additions & 23 deletions src/items/date.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,13 +29,14 @@
use winnow::{
ascii::alpha1,
combinator::{alt, opt, preceded, trace},
error::ErrMode,
seq,
stream::AsChar,
token::{take, take_while},
token::take_while,
ModalResult, Parser,
};

use super::primitive::{dec_uint, s};
use super::primitive::{ctx_err, dec_uint, s};
use crate::ParseDateTimeError;

#[derive(PartialEq, Eq, Clone, Debug, Default)]
Expand All @@ -45,39 +46,112 @@ pub struct Date {
pub year: Option<u32>,
}

impl TryFrom<(&str, u32, u32)> for Date {
type Error = &'static str;

/// Create a `Date` from a tuple of `(year, month, day)`.
///
/// Note: The `year` is represented as a `&str` to handle a specific GNU
/// compatibility quirk. According to the GNU documentation: "if the year is
/// 68 or smaller, then 2000 is added to it; otherwise, if year is less than
/// 100, then 1900 is added to it." This adjustment only applies to
/// two-digit year strings. For example, `"00"` is interpreted as `2000`,
/// whereas `"0"`, `"000"`, or `"0000"` are interpreted as `0`.
fn try_from(value: (&str, u32, u32)) -> Result<Self, Self::Error> {
let (year_str, month, day) = value;

let mut year = year_str
.parse::<u32>()
.map_err(|_| "year must be a valid number")?;

// If year is 68 or smaller, then 2000 is added to it; otherwise, if year
// is less than 100, then 1900 is added to it.
//
// GNU quirk: this only applies to two-digit years. For example,
// "98-01-01" will be parsed as "1998-01-01", while "098-01-01" will be
// parsed as "0098-01-01".
if year_str.len() == 2 {
if year <= 68 {
year += 2000
} else {
year += 1900
}
}

// 2147485547 is the maximum value accepted by GNU, but chrono only
// behaves like GNU for years in the range: [0, 9999], so we keep in the
// range [0, 9999].
//
// See discussion in https://github.com/uutils/parse_datetime/issues/160.
if year > 9999 {
return Err("year must be no greater than 9999");
}

if !(1..=12).contains(&month) {
return Err("month must be between 1 and 12");
}

let is_leap_year = (year % 4 == 0 && year % 100 != 0) || (year % 400 == 0);

if !(1..=31).contains(&day)
|| (month == 2 && day > (if is_leap_year { 29 } else { 28 }))
|| ((month == 4 || month == 6 || month == 9 || month == 11) && day > 30)
{
return Err("day is not valid for the given month");
}

Ok(Date {
day,
month,
year: Some(year),
})
}
}

pub fn parse(input: &mut &str) -> ModalResult<Date> {
alt((iso1, iso2, us, literal1, literal2)).parse_next(input)
}

/// Parse `YYYY-MM-DD` or `YY-MM-DD`
/// Parse `[year]-[month]-[day]`
///
/// This is also used by [`combined`](super::combined).
pub fn iso1(input: &mut &str) -> ModalResult<Date> {
seq!(Date {
year: year.map(Some),
_: s('-'),
month: month,
_: s('-'),
day: day,
})
.parse_next(input)
let (year, _, month, _, day) = (
// `year` must be a `&str`, see comment in `TryFrom` impl for `Date`.
s(take_while(1.., AsChar::is_dec_digit)),
s('-'),
s(dec_uint),
s('-'),
s(dec_uint),
)
.parse_next(input)?;

(year, month, day)
.try_into()
.map_err(|e| ErrMode::Cut(ctx_err(e)))
}

/// Parse `YYYYMMDD`
/// Parse `[year][month][day]`
///
/// This is also used by [`combined`](super::combined).
pub fn iso2(input: &mut &str) -> ModalResult<Date> {
s((
take(4usize).try_map(|s: &str| s.parse::<u32>()),
take(2usize).try_map(|s: &str| s.parse::<u32>()),
take(2usize).try_map(|s: &str| s.parse::<u32>()),
))
.map(|(year, month, day): (u32, u32, u32)| Date {
day,
month,
year: Some(year),
})
.parse_next(input)
let date_str = take_while(5.., AsChar::is_dec_digit).parse_next(input)?;
let len = date_str.len();

// `year` must be a `&str`, see comment in `TryFrom` impl for `Date`.
let year = &date_str[..len - 4];

let month = date_str[len - 4..len - 2]
.parse::<u32>()
.map_err(|_| ErrMode::Cut(ctx_err("month must be a valid number")))?;

let day = date_str[len - 2..]
.parse::<u32>()
.map_err(|_| ErrMode::Cut(ctx_err("day must be a valid number")))?;

(year, month, day)
.try_into()
.map_err(|e| ErrMode::Cut(ctx_err(e)))
}

/// Parse `MM/DD/YYYY`, `MM/DD/YY` or `MM/DD`
Expand Down Expand Up @@ -202,6 +276,94 @@ mod tests {
// 14nov2022
// ```

#[test]
fn iso1() {
let reference = Date {
year: Some(1),
month: 1,
day: 1,
};

for mut s in ["1-1-1", "1 - 1 - 1", "1-01-01", "1-001-001", "001-01-01"] {
let old_s = s.to_owned();
assert_eq!(parse(&mut s).unwrap(), reference, "Format string: {old_s}");
}

// GNU quirk: when year string is 2 characters long and year is 68 or
// smaller, 2000 is added to it.
let reference = Date {
year: Some(2001),
month: 1,
day: 1,
};

for mut s in ["01-1-1", "01-01-01"] {
let old_s = s.to_owned();
assert_eq!(parse(&mut s).unwrap(), reference, "Format string: {old_s}");
}

// GNU quirk: when year string is 2 characters long and year is less
// than 100, 1900 is added to it.
let reference = Date {
year: Some(1970),
month: 1,
day: 1,
};

for mut s in ["70-1-1", "70-01-01"] {
let old_s = s.to_owned();
assert_eq!(parse(&mut s).unwrap(), reference, "Format string: {old_s}");
}

for mut s in ["01-00-01", "01-13-01", "01-01-32", "01-02-29", "01-04-31"] {
let old_s = s.to_owned();
assert!(parse(&mut s).is_err(), "Format string: {old_s}");
}
}

#[test]
fn iso2() {
let reference = Date {
year: Some(1),
month: 1,
day: 1,
};

for mut s in ["10101", "0010101", "00010101", "000010101"] {
let old_s = s.to_owned();
assert_eq!(parse(&mut s).unwrap(), reference, "Format string: {old_s}");
}

// GNU quirk: when year string is 2 characters long and year is 68 or
// smaller, 2000 is added to it.
let reference = Date {
year: Some(2001),
month: 1,
day: 1,
};

let mut s = "010101";
let old_s = s.to_owned();
assert_eq!(parse(&mut s).unwrap(), reference, "Format string: {old_s}");

// GNU quirk: when year string is 2 characters long and year is less
// than 100, 1900 is added to it.
let reference = Date {
year: Some(1970),
month: 1,
day: 1,
};

let mut s = "700101";
let old_s = s.to_owned();
assert_eq!(parse(&mut s).unwrap(), reference, "Format string: {old_s}");

for mut s in ["010001", "011301", "010132", "010229", "010431"] {
let old_s = s.to_owned();
assert!(parse(&mut s).is_err(), "Format string: {old_s}");
}
}

#[test]
fn with_year() {
let reference = Date {
Expand Down
6 changes: 0 additions & 6 deletions src/items/epoch.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,6 @@ use winnow::{combinator::preceded, ModalResult, Parser};
use super::primitive::{dec_int, s};

/// Parse a timestamp in the form of `@1234567890`.
///
/// Grammar:
///
/// ```ebnf
/// timestamp = "@" dec_int ;
/// ```
pub fn parse(input: &mut &str) -> ModalResult<i32> {
s(preceded("@", dec_int)).parse_next(input)
}
10 changes: 9 additions & 1 deletion src/items/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ use winnow::{
use crate::ParseDateTimeError;

#[derive(PartialEq, Debug)]
pub enum Item {
pub(crate) enum Item {
Timestamp(i32),
Year(u32),
DateTime(combined::DateTime),
Expand Down Expand Up @@ -95,6 +95,14 @@ pub(crate) fn at_local(
///
/// items = item , { item } ;
/// item = datetime | date | time | relative | weekday | timezone | year ;
///
/// datetime = iso_date , [ "T" | "t" | whitespace ] , iso_time ;
///
/// iso_date = year , [ delim ] , month , [ delim ] , day ;
/// year = dec_int ;
/// month = dec_int ;
/// day = dec_int ;
/// delim = [ { whitespace } ] , "-" , [ { whitespace } ] ;
/// ```
pub(crate) fn parse(input: &mut &str) -> ModalResult<DateTimeBuilder> {
trace("parse", alt((parse_timestamp, parse_items))).parse_next(input)
Expand Down
2 changes: 0 additions & 2 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -442,7 +442,6 @@ mod tests {
for s in [
"1997-01-01 00:00:00 +0000",
"1997-01-01 00:00:00 +00",
"199701010000 +0000",
"1997-01-01 00:00 +0000",
"1997-01-01 00:00:00 +0000",
"1997-01-01T00:00:00+0000",
Expand Down Expand Up @@ -508,7 +507,6 @@ mod tests {
for s in [
"1997-01-01 00:00:00 +0000 +1 year",
"1997-01-01 00:00:00 +00 +1 year",
"199701010000 +0000 +1 year",
"1997-01-01T00:00:00Z +1 year",
"1997-01-01 00:00 +0000 +1 year",
"1997-01-01 00:00:00 +0000 +1 year",
Expand Down
Loading