-
Notifications
You must be signed in to change notification settings - Fork 9
/
Copy pathdate_regex.py
46 lines (33 loc) · 1.91 KB
/
date_regex.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
import re
# 04-20-2009; 04/20/2009; 4/20/09;
date1 = r"\b(0?[1-9]|[12][0-9]|3[01])\b(\s|,|-|\/|.)(\s|,|-|\/|.)?\b(0?[1-9]|[12][0-9]|3[01])\b(\s|,|-|\/|.)(\s|,|-|\/|.)?\b((20(0|1|2)\d)|((0|1|2)\d))\b"
# Monthname Date, Year; Mar 20, 2009;
date2 = r"\b(Jan(uary)?|Feb(ruary)?|Mar(ch)?|Apr(il)?|May|Jun(e)?|Jul(y)?|Aug(ust)?|Sep(tember)?|Oct(ober)?|Nov(ember)?|Dec(ember)?)\b(\s|,|-|\/|.)(\s|,|-|\/|.)?\b(0?[1-9]|[12][0-9]|3[01])(st|nd|th)?\b(\s|,|-|\/|.)(\s|,|-|\/|.)?\b((20(0|1|2)\d)|((0|1|2)\d))\b"
# Date Monthname, Year; 20 March, 2009;
date3 = r"\b(0?[1-9]|[12][0-9]|3[01])(st|nd|th)?\b(\s|,|-|\/|.)(\s|,|-|\/|.)?\b(Jan(uary)?|Feb(ruary)?|Mar(ch)?|Apr(il)?|May|Jun(e)?|Jul(y)?|Aug(ust)?|Sep(tember)?|Oct(ober)?|Nov(ember)?|Dec(ember)?)\b(\s|,|-|\/|.)(\s|,|-|\/|.)?\b((20(0|1|2)\d)|((0|1|2)\d))\b"
# Month, Year; Feb 2009; 12/2009;
date4 = r"\b(Jan(uary)?|Feb(ruary)?|Mar(ch)?|Apr(il)?|May|Jun(e)?|Jul(y)?|Aug(ust)?|Sep(tember)?|Oct(ober)?|Nov(ember)?|Dec(ember)?)\b(\s|,|-|\/|.)(\s|,|-|\/|.)?\b((20(0|1|2)\d)|((0|1|2)\d))\b"
# date_formats_supported = "04-20-2009; 04/20/09; 4/20/09; 4/3/2009; Mar 20, 2009; March 20, 2009; Mar. 20, 2009; Mar 20 2009; 20 Mar 2009; 20 March 2009; 2 Mar. 2009; 20 March, 2009; Mar 20th, 2009; Mar 21st, 2009; Mar 22nd, 2009; Feb 2009; Sep 2009; Oct 2010; 6/2008; 12/2009; 2009; 2010"
ans = "Not Found"
def get_date(text):
dates = []
lines = text.split("\n")
for test_str in lines:
if 'w.e.f' in test_str.lower():
continue
matches = re.finditer(date1, test_str)
for m in matches:
dates += [m.group()]
matches = re.finditer(date2, test_str)
for m in matches:
dates += [m.group()]
matches = re.finditer(date3, test_str)
for m in matches:
dates += [m.group()]
matches = re.finditer(date4, test_str)
for m in matches:
dates += [m.group()]
print("\nSuspected values for Date: ", dates)
# print(type(dates[0]))
return dates[0] if dates else "Not Found"
# return ans