Skip to content

Commit 324d0bc

Browse files
authored
Merge pull request #184 from yuankunzhang/improve-us-date-parser
Improve the `us()` date parser
2 parents 5ba4b18 + 9f75f89 commit 324d0bc

File tree

2 files changed

+143
-40
lines changed

2 files changed

+143
-40
lines changed

src/items/date.rs

Lines changed: 135 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,30 @@ impl TryFrom<(&str, u32, u32)> for Date {
108108
}
109109
}
110110

111+
impl TryFrom<(u32, u32)> for Date {
112+
type Error = &'static str;
113+
114+
/// Create a `Date` from a tuple of `(month, day)`.
115+
fn try_from((month, day): (u32, u32)) -> Result<Self, Self::Error> {
116+
if !(1..=12).contains(&month) {
117+
return Err("month must be between 1 and 12");
118+
}
119+
120+
if !(1..=31).contains(&day)
121+
|| (month == 2 && day > 29)
122+
|| ((month == 4 || month == 6 || month == 9 || month == 11) && day > 30)
123+
{
124+
return Err("day is not valid for the given month");
125+
}
126+
127+
Ok(Date {
128+
day,
129+
month,
130+
year: None,
131+
})
132+
}
133+
}
134+
111135
pub fn parse(input: &mut &str) -> ModalResult<Date> {
112136
alt((iso1, iso2, us, literal1, literal2)).parse_next(input)
113137
}
@@ -154,15 +178,46 @@ pub fn iso2(input: &mut &str) -> ModalResult<Date> {
154178
.map_err(|e| ErrMode::Cut(ctx_err(e)))
155179
}
156180

157-
/// Parse `MM/DD/YYYY`, `MM/DD/YY` or `MM/DD`
181+
/// Parse `[year]/[month]/[day]` or `[month]/[day]/[year]` or `[month]/[day]`.
158182
fn us(input: &mut &str) -> ModalResult<Date> {
159-
seq!(Date {
160-
month: month,
161-
_: s('/'),
162-
day: day,
163-
year: opt(preceded(s('/'), year)),
164-
})
165-
.parse_next(input)
183+
let (s1, _, n, s2) = (
184+
s(take_while(1.., AsChar::is_dec_digit)),
185+
s('/'),
186+
s(dec_uint),
187+
opt(preceded(s('/'), s(take_while(1.., AsChar::is_dec_digit)))),
188+
)
189+
.parse_next(input)?;
190+
191+
match s2 {
192+
Some(s2) if s1.len() >= 4 => {
193+
// [year]/[month]/[day]
194+
//
195+
// GNU quirk: interpret as [year]/[month]/[day] if the first part is at
196+
// least 4 characters long.
197+
let day = s2
198+
.parse::<u32>()
199+
.map_err(|_| ErrMode::Cut(ctx_err("day must be a valid number")))?;
200+
(s1, n, day)
201+
.try_into()
202+
.map_err(|e| ErrMode::Cut(ctx_err(e)))
203+
}
204+
Some(s2) => {
205+
// [month]/[day]/[year]
206+
let month = s1
207+
.parse::<u32>()
208+
.map_err(|_| ErrMode::Cut(ctx_err("month must be a valid number")))?;
209+
(s2, month, n)
210+
.try_into()
211+
.map_err(|e| ErrMode::Cut(ctx_err(e)))
212+
}
213+
None => {
214+
// [month]/[day]
215+
let month = s1
216+
.parse::<u32>()
217+
.map_err(|_| ErrMode::Cut(ctx_err("month must be a valid number")))?;
218+
(month, n).try_into().map_err(|e| ErrMode::Cut(ctx_err(e)))
219+
}
220+
}
166221
}
167222

168223
/// Parse `14 November 2022`, `14 Nov 2022`, "14nov2022", "14-nov-2022", "14-nov2022", "14nov-2022"
@@ -213,17 +268,6 @@ pub fn year(input: &mut &str) -> ModalResult<u32> {
213268
.parse_next(input)
214269
}
215270

216-
fn month(input: &mut &str) -> ModalResult<u32> {
217-
s(dec_uint)
218-
.try_map(|x| {
219-
(1..=12)
220-
.contains(&x)
221-
.then_some(x)
222-
.ok_or(ParseDateTimeError::InvalidInput)
223-
})
224-
.parse_next(input)
225-
}
226-
227271
fn day(input: &mut &str) -> ModalResult<u32> {
228272
s(dec_uint)
229273
.try_map(|x| {
@@ -280,11 +324,11 @@ mod tests {
280324
fn iso1() {
281325
let reference = Date {
282326
year: Some(1),
283-
month: 1,
284-
day: 1,
327+
month: 2,
328+
day: 3,
285329
};
286330

287-
for mut s in ["1-1-1", "1 - 1 - 1", "1-01-01", "1-001-001", "001-01-01"] {
331+
for mut s in ["1-2-3", "1 - 2 - 3", "1-02-03", "1-002-003", "001-02-03"] {
288332
let old_s = s.to_owned();
289333
assert_eq!(parse(&mut s).unwrap(), reference, "Format string: {old_s}");
290334
}
@@ -293,11 +337,11 @@ mod tests {
293337
// smaller, 2000 is added to it.
294338
let reference = Date {
295339
year: Some(2001),
296-
month: 1,
297-
day: 1,
340+
month: 2,
341+
day: 3,
298342
};
299343

300-
for mut s in ["01-1-1", "01-01-01"] {
344+
for mut s in ["01-2-3", "01-02-03"] {
301345
let old_s = s.to_owned();
302346
assert_eq!(parse(&mut s).unwrap(), reference, "Format string: {old_s}");
303347
}
@@ -306,11 +350,11 @@ mod tests {
306350
// than 100, 1900 is added to it.
307351
let reference = Date {
308352
year: Some(1970),
309-
month: 1,
310-
day: 1,
353+
month: 2,
354+
day: 3,
311355
};
312356

313-
for mut s in ["70-1-1", "70-01-01"] {
357+
for mut s in ["70-2-3", "70-02-03"] {
314358
let old_s = s.to_owned();
315359
assert_eq!(parse(&mut s).unwrap(), reference, "Format string: {old_s}");
316360
}
@@ -325,11 +369,11 @@ mod tests {
325369
fn iso2() {
326370
let reference = Date {
327371
year: Some(1),
328-
month: 1,
329-
day: 1,
372+
month: 2,
373+
day: 3,
330374
};
331375

332-
for mut s in ["10101", "0010101", "00010101", "000010101"] {
376+
for mut s in ["10203", "0010203", "00010203", "000010203"] {
333377
let old_s = s.to_owned();
334378
assert_eq!(parse(&mut s).unwrap(), reference, "Format string: {old_s}");
335379
}
@@ -338,23 +382,23 @@ mod tests {
338382
// smaller, 2000 is added to it.
339383
let reference = Date {
340384
year: Some(2001),
341-
month: 1,
342-
day: 1,
385+
month: 2,
386+
day: 3,
343387
};
344388

345-
let mut s = "010101";
389+
let mut s = "010203";
346390
let old_s = s.to_owned();
347391
assert_eq!(parse(&mut s).unwrap(), reference, "Format string: {old_s}");
348392

349393
// GNU quirk: when year string is 2 characters long and year is less
350394
// than 100, 1900 is added to it.
351395
let reference = Date {
352396
year: Some(1970),
353-
month: 1,
354-
day: 1,
397+
month: 2,
398+
day: 3,
355399
};
356400

357-
let mut s = "700101";
401+
let mut s = "700203";
358402
let old_s = s.to_owned();
359403
assert_eq!(parse(&mut s).unwrap(), reference, "Format string: {old_s}");
360404

@@ -364,6 +408,60 @@ mod tests {
364408
}
365409
}
366410

411+
#[test]
412+
fn us() {
413+
let reference = Date {
414+
year: Some(1),
415+
month: 2,
416+
day: 3,
417+
};
418+
419+
for mut s in ["2/3/1", "2 / 3 / 1", "02/03/ 001", "0001/2/3"] {
420+
let old_s = s.to_owned();
421+
assert_eq!(parse(&mut s).unwrap(), reference, "Format string: {old_s}");
422+
}
423+
424+
let reference = Date {
425+
year: None,
426+
month: 2,
427+
day: 3,
428+
};
429+
430+
for mut s in ["2/3", "2 / 3"] {
431+
let old_s = s.to_owned();
432+
assert_eq!(parse(&mut s).unwrap(), reference, "Format string: {old_s}");
433+
}
434+
435+
// GNU quirk: when year string is 2 characters long and year is 68 or
436+
// smaller, 2000 is added to it.
437+
let reference = Date {
438+
year: Some(2001),
439+
month: 2,
440+
day: 3,
441+
};
442+
443+
let mut s = "2/3/01";
444+
let old_s = s.to_owned();
445+
assert_eq!(parse(&mut s).unwrap(), reference, "Format string: {old_s}");
446+
447+
// GNU quirk: when year string is 2 characters long and year is less
448+
// than 100, 1900 is added to it.
449+
let reference = Date {
450+
year: Some(1970),
451+
month: 2,
452+
day: 3,
453+
};
454+
455+
let mut s = "2/3/70";
456+
let old_s = s.to_owned();
457+
assert_eq!(parse(&mut s).unwrap(), reference, "Format string: {old_s}");
458+
459+
for mut s in ["00/01/01", "13/01/01", "01/32/01", "02/30/01", "04/31/01"] {
460+
let old_s = s.to_owned();
461+
assert!(parse(&mut s).is_err(), "Format string: {old_s}");
462+
}
463+
}
464+
367465
#[test]
368466
fn with_year() {
369467
let reference = Date {

src/items/mod.rs

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -96,13 +96,18 @@ pub(crate) fn at_local(
9696
/// items = item , { item } ;
9797
/// item = datetime | date | time | relative | weekday | timezone | year ;
9898
///
99-
/// datetime = iso_date , [ "T" | "t" | whitespace ] , iso_time ;
99+
/// datetime = date , [ "T" | "t" | whitespace ] , iso_time ;
100+
/// date = iso_date | us_date ;
101+
///
102+
/// iso_date = year , [ iso_date_delim ] , month , [ iso_date_delim ] , day ;
103+
/// iso_date_delim = [ { whitespace } ] , "-" , [ { whitespace } ] ;
104+
///
105+
/// us_date = month , [ us_date_delim ] , day , [ [ us_date_delim ] , year ];
106+
/// us_date_delim = [ { whitespace } ] , "/" , [ { whitespace } ] ;
100107
///
101-
/// iso_date = year , [ delim ] , month , [ delim ] , day ;
102108
/// year = dec_int ;
103109
/// month = dec_int ;
104110
/// day = dec_int ;
105-
/// delim = [ { whitespace } ] , "-" , [ { whitespace } ] ;
106111
/// ```
107112
pub(crate) fn parse(input: &mut &str) -> ModalResult<DateTimeBuilder> {
108113
trace("parse", alt((parse_timestamp, parse_items))).parse_next(input)

0 commit comments

Comments
 (0)