Skip to content

Commit cbec464

Browse files
committed
feat: improve automatic format detection
Add more patterns to automatic format detector, pick a format that produces maximum date fixes #103
1 parent 6f3f929 commit cbec464

File tree

3 files changed

+198
-28
lines changed

3 files changed

+198
-28
lines changed

src/main/java/net/atomique/ksar/AllParser.java

Lines changed: 69 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
package net.atomique.ksar;
77

88
import net.atomique.ksar.xml.OSConfig;
9+
910
import org.slf4j.Logger;
1011
import org.slf4j.LoggerFactory;
1112

@@ -14,26 +15,49 @@
1415
import java.time.LocalTime;
1516
import java.time.format.DateTimeFormatter;
1617
import java.time.format.DateTimeParseException;
17-
import java.util.HashMap;
18-
import java.util.Map;
18+
import java.util.List;
19+
import java.util.Locale;
1920
import java.util.TreeSet;
21+
import java.util.stream.Collectors;
22+
import java.util.stream.Stream;
2023

2124
public abstract class AllParser {
2225

2326
private static final Logger log = LoggerFactory.getLogger(AllParser.class);
24-
private static final Map<String, String> DATE_FORMAT_REGEXPS = new HashMap<String, String>() {
25-
{
26-
put("^\\d{8}$", "yyyyMMdd");
27-
put("^\\d{1,2}-\\d{1,2}-\\d{4}$", "dd-MM-yyyy");
28-
put("^\\d{4}-\\d{1,2}-\\d{1,2}$", "yyyy-MM-dd");
29-
put("^\\d{1,2}/\\d{1,2}/\\d{4}$", "MM/dd/yyyy");
30-
put("^\\d{4}/\\d{1,2}/\\d{1,2}$", "yyyy/MM/dd");
31-
put("^\\d{1,2}\\s[a-z]{3}\\s\\d{4}$", "dd MMM yyyy");
32-
put("^\\d{1,2}\\s[a-z]{4,}\\s\\d{4}$", "dd MMMM yyyy");
33-
put("^\\d{1,2}-\\d{1,2}-\\d{2}$", "dd-MM-yy");
34-
put("^\\d{1,2}/\\d{1,2}/\\d{2}$", "MM/dd/yy");
35-
}
36-
};
27+
28+
private static final List<DateTimeFormatter> DATE_FORMATS = Stream.of(
29+
"MM dd, yy",
30+
"MM-dd-yy",
31+
"MM/dd/yy",
32+
"MM-dd-yyyy",
33+
"MM/dd/yyyy",
34+
"dd-MM-yy",
35+
"dd.MM.yy",
36+
"dd/MM/yy",
37+
"dd.MM.yy.",
38+
"dd-MM-yyyy",
39+
"dd.MM.yyyy",
40+
"dd/MM/yyyy",
41+
"dd.MM.yyyy.",
42+
"yy. MM. dd",
43+
"yy-MM-dd",
44+
"yy.MM.dd",
45+
"yy/MM/dd",
46+
"yy年MM月dd日",
47+
"yy.dd.MM",
48+
"yyyy. MM. dd",
49+
"yyyy-MM-dd",
50+
"yyyy.MM.dd",
51+
"yyyy/MM/dd",
52+
"yyyy.MM.dd.",
53+
"yyyy年MM月dd日",
54+
"yyyy.dd.MM",
55+
"yyyyMMdd",
56+
"dd MMM yyyy",
57+
"dd MMMM yyyy",
58+
"MMM dd yyyy",
59+
"MMMM dd yyyy"
60+
).map(p -> DateTimeFormatter.ofPattern(p, Locale.US)).collect(Collectors.toList());
3761

3862
public AllParser() {
3963

@@ -80,14 +104,7 @@ public boolean setDate(String s) {
80104
}
81105

82106
try {
83-
DateTimeFormatter formatter;
84-
if ("Automatic Detection".equals(dateFormat)) {
85-
formatter = DateTimeFormatter.ofPattern(determineDateFormat(s));
86-
87-
} else {
88-
formatter = DateTimeFormatter.ofPattern(dateFormat);
89-
}
90-
107+
DateTimeFormatter formatter = getDateFormatter(s);
91108
currentDate = LocalDate.parse(s, formatter);
92109

93110
parsedate = currentDate;
@@ -109,6 +126,20 @@ public boolean setDate(String s) {
109126
return true;
110127
}
111128

129+
private DateTimeFormatter getDateFormatter(String s) {
130+
if (dateFormatter != null) {
131+
return dateFormatter;
132+
}
133+
DateTimeFormatter format = null;
134+
if ("Automatic Detection".equals(dateFormat)) {
135+
format = determineDateFormat(s);
136+
} else {
137+
format = DateTimeFormatter.ofPattern(dateFormat);
138+
}
139+
dateFormatter = format;
140+
return dateFormatter;
141+
}
142+
112143
public String getDate() {
113144
if (sarStartDate.equals(sarEndDate)) {
114145
return sarStartDate;
@@ -125,13 +156,21 @@ public String getCurrentStat() {
125156
return currentStat;
126157
}
127158

128-
public static String determineDateFormat(String dateString) {
129-
for (String regexp : DATE_FORMAT_REGEXPS.keySet()) {
130-
if (dateString.toLowerCase().matches(regexp)) {
131-
return DATE_FORMAT_REGEXPS.get(regexp);
159+
public static DateTimeFormatter determineDateFormat(String dateString) {
160+
DateTimeFormatter best = null;
161+
LocalDate bestDate = null;
162+
for (DateTimeFormatter format : DATE_FORMATS) {
163+
try {
164+
LocalDate nextDate = LocalDate.parse(dateString, format);
165+
if (bestDate == null || nextDate.compareTo(bestDate) >= 0) {
166+
bestDate = nextDate;
167+
best = format;
168+
}
169+
} catch (DateTimeParseException e) {
170+
/* ignore */
132171
}
133172
}
134-
return null; // Unknown format.
173+
return best;
135174
}
136175

137176
protected String sarStartDate = null;
@@ -159,4 +198,6 @@ public static String determineDateFormat(String dateString) {
159198
protected String dateFormat = "MM/dd/yy";
160199
protected String timeFormat = "HH:mm:ss";
161200
protected int timeColumn = 1;
201+
202+
private DateTimeFormatter dateFormatter;
162203
}
Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
/*
2+
* Copyright 2017 The kSAR Project. All rights reserved.
3+
* See the LICENSE file in the project root for more information.
4+
*/
5+
6+
package net.atomique.ksar.parser;
7+
8+
import org.junit.Ignore;
9+
import org.junit.Test;
10+
11+
import java.time.LocalDate;
12+
import java.time.format.DateTimeFormatter;
13+
import java.time.format.FormatStyle;
14+
import java.util.ArrayList;
15+
import java.util.Comparator;
16+
import java.util.EnumSet;
17+
import java.util.HashSet;
18+
import java.util.List;
19+
import java.util.Locale;
20+
import java.util.Set;
21+
import java.util.function.Function;
22+
import java.util.function.Predicate;
23+
import java.util.regex.Pattern;
24+
25+
public class DateFormatHelperTest {
26+
@Test
27+
@Ignore
28+
public void generateTests() throws Exception {
29+
Set<String> allFormats = new HashSet<>();
30+
LocalDate date = LocalDate.of(2017, 10, 18);
31+
Predicate<String> nonPunctuation = Pattern.compile("[^ ./-\\:0-9]{3,}").asPredicate();
32+
for (Locale locale : Locale.getAvailableLocales()) {
33+
for (FormatStyle style : EnumSet.of(FormatStyle.SHORT, FormatStyle.MEDIUM)) {
34+
DateTimeFormatter f = DateTimeFormatter.ofLocalizedDate(style).withLocale(locale);
35+
String str = f.format(date);
36+
if (nonPunctuation.test(str)) {
37+
continue;
38+
}
39+
String v = str.replaceAll("2017", "yyyy").replaceAll("17", "yy")
40+
.replaceAll("18", "dd").replaceAll("10", "MM");
41+
allFormats.add(v);
42+
}
43+
}
44+
List<String> formats = new ArrayList<>(allFormats);
45+
formats.sort(Comparator.<String, String>comparing(v -> v.replaceAll("[^\\w]", "-"))
46+
.thenComparing(Function.identity()));
47+
48+
for (String format : formats) {
49+
System.out.println('"' + format + "\",");
50+
}
51+
}
52+
53+
}
Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
/*
2+
* Copyright 2017 The kSAR Project. All rights reserved.
3+
* See the LICENSE file in the project root for more information.
4+
*/
5+
6+
package net.atomique.ksar.parser;
7+
8+
import net.atomique.ksar.AllParser;
9+
10+
import org.junit.Assert;
11+
import org.junit.Test;
12+
import org.junit.runner.RunWith;
13+
import org.junit.runners.Parameterized;
14+
15+
import java.time.LocalDate;
16+
import java.time.format.DateTimeFormatter;
17+
import java.util.ArrayList;
18+
import java.util.Arrays;
19+
import java.util.Collection;
20+
21+
@RunWith(Parameterized.class)
22+
public class DateFormatTest {
23+
private final String text;
24+
private final LocalDate date;
25+
private final String expected;
26+
27+
public DateFormatTest(LocalDate date, String text, String expected) {
28+
this.text = text;
29+
this.date = date;
30+
this.expected = expected;
31+
}
32+
33+
@Parameterized.Parameters(name = "{1} -> {2}")
34+
public static Iterable<Object[]> params() {
35+
Collection<Object[]> res = new ArrayList<>();
36+
37+
// See DateTest.generateFormats
38+
LocalDate date = LocalDate.of(2017, 5, 16);
39+
for (String format : Arrays.asList(
40+
"MM-dd-yy",
41+
"MM/dd/yy",
42+
"dd-MM-yy",
43+
"dd.MM.yy",
44+
"dd/MM/yy",
45+
"dd.MM.yy.",
46+
"dd-MM-yyyy",
47+
"dd.MM.yyyy",
48+
"dd/MM/yyyy",
49+
"yy. MM. dd",
50+
"yy-MM-dd",
51+
"yy.MM.dd",
52+
"yy/MM/dd",
53+
"yy年MM月dd日",
54+
"yy.dd.MM",
55+
"yyyy-MM-dd",
56+
"yyyy.MM.dd",
57+
"yyyy/MM/dd",
58+
"yyyy.MM.dd."
59+
)) {
60+
DateTimeFormatter df = DateTimeFormatter.ofPattern(format);
61+
res.add(new Object[]{date, df.format(date), format});
62+
}
63+
// See https://github.com/vlsi/ksar/issues/103
64+
LocalDate aug_04_2017 = LocalDate.of(2017, 8, 4);
65+
res.add(new Object[]{aug_04_2017, "04/08/17", "dd/MM/yy"});
66+
return res;
67+
}
68+
69+
@Test
70+
public void run() {
71+
DateTimeFormatter format = AllParser.determineDateFormat(text);
72+
LocalDate date = LocalDate.parse(text, format);
73+
Assert.assertEquals(text, this.date, date);
74+
}
75+
76+
}

0 commit comments

Comments
 (0)