Skip to content

Commit 7478659

Browse files
committed
feat: improve date parsing with GNU compatibility and validation
- Add `TryFrom<(&str, u32, u32)>` implementation for `Date` struct with validation. - Add comprehensive date validation for months, days, taking leap year into account. - Refactor `iso1()` and `iso2()` date parsers to use the new validation logic.
1 parent 5d869b5 commit 7478659

File tree

5 files changed

+200
-37
lines changed

5 files changed

+200
-37
lines changed

src/items/combined.rs

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,8 @@
1212
//! > In this format, the time of day should use 24-hour notation. Fractional
1313
//! > seconds are allowed, with either comma or period preceding the fraction.
1414
//! > ISO 8601 fractional minutes and hours are not supported. Typically, hosts
15-
//! > support nanosecond timestamp resolution; excess precision is silently discarded.
15+
//! > support nanosecond timestamp resolution; excess precision is silently
16+
//! > discarded.
1617
use winnow::{
1718
combinator::{alt, trace},
1819
seq, ModalResult, Parser,
@@ -27,17 +28,17 @@ use super::{
2728
};
2829

2930
#[derive(PartialEq, Debug, Clone, Default)]
30-
pub struct DateTime {
31+
pub(crate) struct DateTime {
3132
pub(crate) date: Date,
3233
pub(crate) time: Time,
3334
}
3435

35-
pub fn parse(input: &mut &str) -> ModalResult<DateTime> {
36+
pub(crate) fn parse(input: &mut &str) -> ModalResult<DateTime> {
3637
seq!(DateTime {
37-
date: trace("date iso", alt((date::iso1, date::iso2))),
38+
date: trace("iso_date", alt((date::iso1, date::iso2))),
3839
// Note: the `T` is lowercased by the main parse function
3940
_: alt((s('t').void(), (' ', space).void())),
40-
time: trace("time iso", time::iso),
41+
time: trace("iso_time", time::iso),
4142
})
4243
.parse_next(input)
4344
}

src/items/date.rs

Lines changed: 185 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -29,13 +29,14 @@
2929
use winnow::{
3030
ascii::alpha1,
3131
combinator::{alt, opt, preceded, trace},
32+
error::ErrMode,
3233
seq,
3334
stream::AsChar,
34-
token::{take, take_while},
35+
token::take_while,
3536
ModalResult, Parser,
3637
};
3738

38-
use super::primitive::{dec_uint, s};
39+
use super::primitive::{ctx_err, dec_uint, s};
3940
use crate::ParseDateTimeError;
4041

4142
#[derive(PartialEq, Eq, Clone, Debug, Default)]
@@ -45,39 +46,112 @@ pub struct Date {
4546
pub year: Option<u32>,
4647
}
4748

49+
impl TryFrom<(&str, u32, u32)> for Date {
50+
type Error = &'static str;
51+
52+
/// Create a `Date` from a tuple of `(year, month, day)`.
53+
///
54+
/// Note: The `year` is represented as a `&str` to handle a specific GNU
55+
/// compatibility quirk. According to the GNU documentation: "if the year is
56+
/// 68 or smaller, then 2000 is added to it; otherwise, if year is less than
57+
/// 100, then 1900 is added to it." This adjustment only applies to
58+
/// two-digit year strings. For example, `"00"` is interpreted as `2000`,
59+
/// whereas `"0"`, `"000"`, or `"0000"` are interpreted as `0`.
60+
fn try_from(value: (&str, u32, u32)) -> Result<Self, Self::Error> {
61+
let (year_str, month, day) = value;
62+
63+
let mut year = year_str
64+
.parse::<u32>()
65+
.map_err(|_| "year must be a valid number")?;
66+
67+
// If year is 68 or smaller, then 2000 is added to it; otherwise, if year
68+
// is less than 100, then 1900 is added to it.
69+
//
70+
// GNU quirk: this only applies to two-digit years. For example,
71+
// "98-01-01" will be parsed as "1998-01-01", while "098-01-01" will be
72+
// parsed as "0098-01-01".
73+
if year_str.len() == 2 {
74+
if year <= 68 {
75+
year += 2000
76+
} else if year < 100 {
77+
year += 1900
78+
}
79+
}
80+
81+
// 2147485547 is the maximum value accepted by GNU, but chrono only
82+
// behaves like GNU for years in the range: [0, 9999], so we keep in the
83+
// range [0, 9999].
84+
//
85+
// See discussion in https://github.com/uutils/parse_datetime/issues/160.
86+
if year > 9999 {
87+
return Err("year must be no greater than 9999");
88+
}
89+
90+
if !(1..=12).contains(&month) {
91+
return Err("month must be between 1 and 12");
92+
}
93+
94+
let is_leap_year = (year % 4 == 0 && year % 100 != 0) || (year % 400 == 0);
95+
96+
if !(1..=31).contains(&day)
97+
|| (month == 2 && day > (if is_leap_year { 29 } else { 28 }))
98+
|| ((month == 4 || month == 6 || month == 9 || month == 11) && day > 30)
99+
{
100+
return Err("day is not valid for the given month");
101+
}
102+
103+
Ok(Date {
104+
day,
105+
month,
106+
year: Some(year),
107+
})
108+
}
109+
}
110+
48111
pub fn parse(input: &mut &str) -> ModalResult<Date> {
49112
alt((iso1, iso2, us, literal1, literal2)).parse_next(input)
50113
}
51114

52-
/// Parse `YYYY-MM-DD` or `YY-MM-DD`
115+
/// Parse `[year]-[month]-[day]`
53116
///
54117
/// This is also used by [`combined`](super::combined).
55118
pub fn iso1(input: &mut &str) -> ModalResult<Date> {
56-
seq!(Date {
57-
year: year.map(Some),
58-
_: s('-'),
59-
month: month,
60-
_: s('-'),
61-
day: day,
62-
})
63-
.parse_next(input)
119+
let (year, _, month, _, day) = (
120+
// `year` must be a `&str`, see comment in `TryFrom` impl for `Date`.
121+
s(take_while(1.., AsChar::is_dec_digit)),
122+
s('-'),
123+
s(dec_uint),
124+
s('-'),
125+
s(dec_uint),
126+
)
127+
.parse_next(input)?;
128+
129+
(year, month, day)
130+
.try_into()
131+
.map_err(|e| ErrMode::Cut(ctx_err(e)))
64132
}
65133

66-
/// Parse `YYYYMMDD`
134+
/// Parse `[year][month][day]`
67135
///
68136
/// This is also used by [`combined`](super::combined).
69137
pub fn iso2(input: &mut &str) -> ModalResult<Date> {
70-
s((
71-
take(4usize).try_map(|s: &str| s.parse::<u32>()),
72-
take(2usize).try_map(|s: &str| s.parse::<u32>()),
73-
take(2usize).try_map(|s: &str| s.parse::<u32>()),
74-
))
75-
.map(|(year, month, day): (u32, u32, u32)| Date {
76-
day,
77-
month,
78-
year: Some(year),
79-
})
80-
.parse_next(input)
138+
let date_str = take_while(5.., AsChar::is_dec_digit).parse_next(input)?;
139+
let len = date_str.len();
140+
141+
// `year` must be a `&str`, see comment in `TryFrom` impl for `Date`.
142+
let year = &date_str[..len - 4];
143+
144+
let month = date_str[len - 4..len - 2]
145+
.parse::<u32>()
146+
.map_err(|_| ErrMode::Cut(ctx_err("month must be a valid number")))?;
147+
148+
let day = date_str[len - 2..]
149+
.parse::<u32>()
150+
.map_err(|_| ErrMode::Cut(ctx_err("day must be a valid number")))?;
151+
152+
(year, month, day)
153+
.try_into()
154+
.map_err(|e| ErrMode::Cut(ctx_err(e)))
81155
}
82156

83157
/// Parse `MM/DD/YYYY`, `MM/DD/YY` or `MM/DD`
@@ -202,6 +276,94 @@ mod tests {
202276
// 14nov2022
203277
// ```
204278

279+
#[test]
280+
fn iso1() {
281+
let reference = Date {
282+
year: Some(1),
283+
month: 1,
284+
day: 1,
285+
};
286+
287+
for mut s in ["1-1-1", "1 - 1 - 1", "1-01-01", "1-001-001", "001-01-01"] {
288+
let old_s = s.to_owned();
289+
assert_eq!(parse(&mut s).unwrap(), reference, "Format string: {old_s}");
290+
}
291+
292+
// GNU quirk: when year string is 2 characters long and year is 68 or
293+
// smaller, 2000 is added to it.
294+
let reference = Date {
295+
year: Some(2001),
296+
month: 1,
297+
day: 1,
298+
};
299+
300+
for mut s in ["01-1-1", "01-01-01"] {
301+
let old_s = s.to_owned();
302+
assert_eq!(parse(&mut s).unwrap(), reference, "Format string: {old_s}");
303+
}
304+
305+
// GNU quirk: when year string is 2 characters long and year is less
306+
// than 100, 1900 is added to it.
307+
let reference = Date {
308+
year: Some(1970),
309+
month: 1,
310+
day: 1,
311+
};
312+
313+
for mut s in ["70-1-1", "70-01-01"] {
314+
let old_s = s.to_owned();
315+
assert_eq!(parse(&mut s).unwrap(), reference, "Format string: {old_s}");
316+
}
317+
318+
for mut s in ["01-00-01", "01-13-01", "01-01-32", "01-02-29", "01-04-31"] {
319+
let old_s = s.to_owned();
320+
assert!(parse(&mut s).is_err(), "Format string: {old_s}");
321+
}
322+
}
323+
324+
#[test]
325+
fn iso2() {
326+
let reference = Date {
327+
year: Some(1),
328+
month: 1,
329+
day: 1,
330+
};
331+
332+
for mut s in ["10101", "0010101", "00010101", "000010101"] {
333+
let old_s = s.to_owned();
334+
assert_eq!(parse(&mut s).unwrap(), reference, "Format string: {old_s}");
335+
}
336+
337+
// GNU quirk: when year string is 2 characters long and year is 68 or
338+
// smaller, 2000 is added to it.
339+
let reference = Date {
340+
year: Some(2001),
341+
month: 1,
342+
day: 1,
343+
};
344+
345+
let mut s = "010101";
346+
let old_s = s.to_owned();
347+
assert_eq!(parse(&mut s).unwrap(), reference, "Format string: {old_s}");
348+
349+
// GNU quirk: when year string is 2 characters long and year is less
350+
// than 100, 1900 is added to it.
351+
let reference = Date {
352+
year: Some(1970),
353+
month: 1,
354+
day: 1,
355+
};
356+
357+
let mut s = "700101";
358+
let old_s = s.to_owned();
359+
assert_eq!(parse(&mut s).unwrap(), reference, "Format string: {old_s}");
360+
361+
for mut s in ["010001", "011301", "010132", "010229", "010431"] {
362+
let old_s = s.to_owned();
363+
assert!(parse(&mut s).is_err(), "Format string: {old_s}");
364+
}
365+
}
366+
205367
#[test]
206368
fn with_year() {
207369
let reference = Date {

src/items/epoch.rs

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -6,12 +6,6 @@ use winnow::{combinator::preceded, ModalResult, Parser};
66
use super::primitive::{dec_int, s};
77

88
/// Parse a timestamp in the form of `@1234567890`.
9-
///
10-
/// Grammar:
11-
///
12-
/// ```ebnf
13-
/// timestamp = "@" dec_int ;
14-
/// ```
159
pub fn parse(input: &mut &str) -> ModalResult<i32> {
1610
s(preceded("@", dec_int)).parse_next(input)
1711
}

src/items/mod.rs

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ use winnow::{
5454
use crate::ParseDateTimeError;
5555

5656
#[derive(PartialEq, Debug)]
57-
pub enum Item {
57+
pub(crate) enum Item {
5858
Timestamp(i32),
5959
Year(u32),
6060
DateTime(combined::DateTime),
@@ -95,6 +95,14 @@ pub(crate) fn at_local(
9595
///
9696
/// items = item , { item } ;
9797
/// item = datetime | date | time | relative | weekday | timezone | year ;
98+
///
99+
/// datetime = iso_date , [ "T" | "t" | whitespace ] , iso_time ;
100+
///
101+
/// iso_date = year , [ delem ] , month , [ delem ] , day ;
102+
/// year = dec_int ;
103+
/// month = dec_int ;
104+
/// day = dec_int ;
105+
/// delem = [ { whitespace } ] , "-" , [ { whitespace } ] ;
98106
/// ```
99107
pub(crate) fn parse(input: &mut &str) -> ModalResult<DateTimeBuilder> {
100108
trace("parse", alt((parse_timestamp, parse_items))).parse_next(input)

src/lib.rs

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -442,7 +442,6 @@ mod tests {
442442
for s in [
443443
"1997-01-01 00:00:00 +0000",
444444
"1997-01-01 00:00:00 +00",
445-
"199701010000 +0000",
446445
"1997-01-01 00:00 +0000",
447446
"1997-01-01 00:00:00 +0000",
448447
"1997-01-01T00:00:00+0000",
@@ -508,7 +507,6 @@ mod tests {
508507
for s in [
509508
"1997-01-01 00:00:00 +0000 +1 year",
510509
"1997-01-01 00:00:00 +00 +1 year",
511-
"199701010000 +0000 +1 year",
512510
"1997-01-01T00:00:00Z +1 year",
513511
"1997-01-01 00:00 +0000 +1 year",
514512
"1997-01-01 00:00:00 +0000 +1 year",

0 commit comments

Comments
 (0)