Skip to content

Commit 5b52929

Browse files
authored
Merge pull request #186 from yuankunzhang/improve-literal-date-parser
feat: improve the literal date parsers
2 parents 324d0bc + 3d6f79d commit 5b52929

File tree

2 files changed

+174
-33
lines changed

2 files changed

+174
-33
lines changed

src/items/date.rs

Lines changed: 154 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -27,17 +27,15 @@
2727
//! > ‘September’.
2828
2929
use winnow::{
30-
ascii::alpha1,
31-
combinator::{alt, opt, preceded, trace},
30+
ascii::{alpha1, multispace1},
31+
combinator::{alt, eof, opt, preceded, terminated, trace},
3232
error::ErrMode,
33-
seq,
3433
stream::AsChar,
3534
token::take_while,
3635
ModalResult, Parser,
3736
};
3837

3938
use super::primitive::{ctx_err, dec_uint, s};
40-
use crate::ParseDateTimeError;
4139

4240
#[derive(PartialEq, Eq, Clone, Debug, Default)]
4341
pub struct Date {
@@ -220,27 +218,58 @@ fn us(input: &mut &str) -> ModalResult<Date> {
220218
}
221219
}
222220

223-
/// Parse `14 November 2022`, `14 Nov 2022`, "14nov2022", "14-nov-2022", "14-nov2022", "14nov-2022"
221+
/// Parse `14 November 2022`, `14 Nov 2022`, "14nov2022", "14-nov-2022",
222+
/// "14-nov2022", "14nov-2022".
224223
fn literal1(input: &mut &str) -> ModalResult<Date> {
225-
seq!(Date {
226-
day: day,
227-
_: opt(s('-')),
228-
month: literal_month,
229-
year: opt(preceded(opt(s('-')), year)),
230-
})
231-
.parse_next(input)
224+
let (day, _, month, year) = (
225+
s(dec_uint),
226+
opt(s('-')),
227+
s(literal_month),
228+
opt(terminated(
229+
preceded(opt(s('-')), s(take_while(1.., AsChar::is_dec_digit))),
230+
// The year must be followed by a space or end of input.
231+
alt((multispace1, eof)),
232+
)),
233+
)
234+
.parse_next(input)?;
235+
236+
match year {
237+
Some(year) => (year, month, day)
238+
.try_into()
239+
.map_err(|e| ErrMode::Cut(ctx_err(e))),
240+
None => (month, day)
241+
.try_into()
242+
.map_err(|e| ErrMode::Cut(ctx_err(e))),
243+
}
232244
}
233245

234-
/// Parse `November 14, 2022` and `Nov 14, 2022`
246+
/// Parse `November 14, 2022`, `Nov 14, 2022`, and `Nov 14 2022`.
235247
fn literal2(input: &mut &str) -> ModalResult<Date> {
236-
seq!(Date {
237-
month: literal_month,
238-
day: day,
239-
// FIXME: GNU requires _some_ space between the day and the year,
240-
// probably to distinguish with floats.
241-
year: opt(preceded(s(","), year)),
242-
})
243-
.parse_next(input)
248+
let (month, day, year) = (
249+
s(literal_month),
250+
s(dec_uint),
251+
opt(terminated(
252+
preceded(
253+
// GNU quirk: for formats like `Nov 14, 2022`, there must be some
254+
// space between the comma and the year. This is probably to
255+
// distinguish with floats.
256+
opt(s(terminated(',', multispace1))),
257+
s(take_while(1.., AsChar::is_dec_digit)),
258+
),
259+
// The year must be followed by a space or end of input.
260+
alt((multispace1, eof)),
261+
)),
262+
)
263+
.parse_next(input)?;
264+
265+
match year {
266+
Some(year) => (year, month, day)
267+
.try_into()
268+
.map_err(|e| ErrMode::Cut(ctx_err(e))),
269+
None => (month, day)
270+
.try_into()
271+
.map_err(|e| ErrMode::Cut(ctx_err(e))),
272+
}
244273
}
245274

246275
pub fn year(input: &mut &str) -> ModalResult<u32> {
@@ -268,17 +297,6 @@ pub fn year(input: &mut &str) -> ModalResult<u32> {
268297
.parse_next(input)
269298
}
270299

271-
fn day(input: &mut &str) -> ModalResult<u32> {
272-
s(dec_uint)
273-
.try_map(|x| {
274-
(1..=31)
275-
.contains(&x)
276-
.then_some(x)
277-
.ok_or(ParseDateTimeError::InvalidInput)
278-
})
279-
.parse_next(input)
280-
}
281-
282300
/// Parse the name of a month (case-insensitive)
283301
fn literal_month(input: &mut &str) -> ModalResult<u32> {
284302
s(alpha1)
@@ -462,6 +480,110 @@ mod tests {
462480
}
463481
}
464482

483+
#[test]
484+
fn literal1() {
485+
let reference = Date {
486+
year: Some(2022),
487+
month: 11,
488+
day: 14,
489+
};
490+
491+
for mut s in [
492+
"14 november 2022",
493+
"14 nov 2022",
494+
"14-nov-2022",
495+
"14-nov2022",
496+
"14nov2022",
497+
"14nov 2022",
498+
] {
499+
let old_s = s.to_owned();
500+
assert_eq!(parse(&mut s).unwrap(), reference, "Format string: {old_s}");
501+
}
502+
503+
let reference = Date {
504+
year: None,
505+
month: 11,
506+
day: 14,
507+
};
508+
509+
for mut s in ["14 november", "14 nov", "14-nov", "14nov"] {
510+
let old_s = s.to_owned();
511+
assert_eq!(parse(&mut s).unwrap(), reference, "Format string: {old_s}");
512+
}
513+
514+
let reference = Date {
515+
year: None,
516+
month: 11,
517+
day: 14,
518+
};
519+
520+
// Year must be followed by a space or end of input.
521+
let mut s = "14 nov 2022a";
522+
let old_s = s.to_owned();
523+
assert_eq!(parse(&mut s).unwrap(), reference, "Format string: {old_s}");
524+
assert_eq!(s, " 2022a");
525+
526+
let mut s = "14 nov-2022a";
527+
let old_s = s.to_owned();
528+
assert_eq!(parse(&mut s).unwrap(), reference, "Format string: {old_s}");
529+
assert_eq!(s, "-2022a");
530+
}
531+
532+
#[test]
533+
fn literal2() {
534+
let reference = Date {
535+
year: Some(2022),
536+
month: 11,
537+
day: 14,
538+
};
539+
540+
for mut s in [
541+
"november 14 2022",
542+
"november 14, 2022",
543+
"november 14 , 2022",
544+
"nov 14 2022",
545+
"nov14 2022",
546+
"nov14, 2022",
547+
] {
548+
let old_s = s.to_owned();
549+
assert_eq!(parse(&mut s).unwrap(), reference, "Format string: {old_s}");
550+
}
551+
552+
let reference = Date {
553+
year: None,
554+
month: 11,
555+
day: 14,
556+
};
557+
558+
for mut s in ["november 14", "nov 14", "nov14"] {
559+
let old_s = s.to_owned();
560+
assert_eq!(parse(&mut s).unwrap(), reference, "Format string: {old_s}");
561+
}
562+
563+
let reference = Date {
564+
year: None,
565+
month: 11,
566+
day: 14,
567+
};
568+
569+
// There must be some space between the comma and the year.
570+
let mut s = "november 14,2022";
571+
let old_s = s.to_owned();
572+
assert_eq!(parse(&mut s).unwrap(), reference, "Format string: {old_s}");
573+
assert_eq!(s, ",2022");
574+
575+
// Year must be followed by a space or end of input.
576+
let mut s = "november 14 2022a";
577+
let old_s = s.to_owned();
578+
assert_eq!(parse(&mut s).unwrap(), reference, "Format string: {old_s}");
579+
assert_eq!(s, " 2022a");
580+
581+
let mut s = "november 14, 2022a";
582+
let old_s = s.to_owned();
583+
assert_eq!(parse(&mut s).unwrap(), reference, "Format string: {old_s}");
584+
assert_eq!(s, ", 2022a");
585+
}
586+
465587
#[test]
466588
fn with_year() {
467589
let reference = Date {

src/items/mod.rs

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -97,17 +97,36 @@ pub(crate) fn at_local(
9797
/// item = datetime | date | time | relative | weekday | timezone | year ;
9898
///
9999
/// datetime = date , [ "T" | "t" | whitespace ] , iso_time ;
100-
/// date = iso_date | us_date ;
100+
/// date = iso_date | us_date | literal1_date | literal2_date ;
101101
///
102102
/// iso_date = year , [ iso_date_delim ] , month , [ iso_date_delim ] , day ;
103103
/// iso_date_delim = [ { whitespace } ] , "-" , [ { whitespace } ] ;
104104
///
105105
/// us_date = month , [ us_date_delim ] , day , [ [ us_date_delim ] , year ];
106106
/// us_date_delim = [ { whitespace } ] , "/" , [ { whitespace } ] ;
107107
///
108+
/// literal1_date = day , [ literal1_date_delim ] , literal_month , [ [ literal1_date_delim ] , year ] ;
109+
/// literal1_date_delim = { whitespace } | [ { whitespace } ] , "-" , [ { whitespace } ] ;
110+
///
111+
/// literal2_date = literal_month , [ { whitespace } ] , day , [ [ literal2_date_delim ] , year ] ;
112+
/// literal2_date_delim = { whitespace } | [ { whitespace } ] , "," , [ { whitespace } ] ;
113+
///
108114
/// year = dec_int ;
109115
/// month = dec_int ;
110116
/// day = dec_int ;
117+
///
118+
/// literal_month = "january" | "jan"
119+
/// | "february" | "feb"
120+
/// | "march" | "mar"
121+
/// | "april" | "apr"
122+
/// | "may"
123+
/// | "june" | "jun"
124+
/// | "july" | "jul"
125+
/// | "august" | "aug"
126+
/// | "september" | "sept" | "sep"
127+
/// | "october" | "oct"
128+
/// | "november" | "nov"
129+
/// | "december" | "dec" ;
111130
/// ```
112131
pub(crate) fn parse(input: &mut &str) -> ModalResult<DateTimeBuilder> {
113132
trace("parse", alt((parse_timestamp, parse_items))).parse_next(input)

0 commit comments

Comments
 (0)