-
Notifications
You must be signed in to change notification settings - Fork 0
/
hwo.py
134 lines (113 loc) · 4.25 KB
/
hwo.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
"""
hwo.py: download and sort the NOAA / All Hazards hazardous weather outlook.
Return a nice dictionary.
"""
import os
import re
import logging
import requests
from bs4 import BeautifulSoup
class HWO(object):
"""
Hazardous weather outlook (HWO) object class.
"""
def __init__(self, data, outputfile='hwo.txt'):
"""
Create an object and empty dictionary.
"""
self.data = data
self.outputfile = outputfile
self.hwo_text = ''
self.hwodict = dict(spotter=[],
dayone=[],
daystwothroughseven=[],
today_text='',
has_spotter=False
)
def get_hwo(self):
"""
Get the HTML-only Hazardous Weather Outlook. The raw text of this statement
is available inside
<body>
<div id="local"> <div id="localcontent">
<pre class="glossaryProduct">
(Text is here)
</pre>
"""
params_dict = {'site': self.data['hwo_site'],
'issuedby': self.data['nws_abbr'],
'product': 'HWO',
'format': 'txt',
'version': 1,
'glossary': 0
}
response = requests.get(self.data['defaults']['hwo_url'],
params=params_dict,
verify=False,
timeout=10)
html = response.text
soup = BeautifulSoup(html, 'html.parser')
pres = soup.body.find_all('pre')
for pretag in pres:
self.hwo_text = pretag.get_text()
if len(self.hwo_text) > 200:
cur = open(os.path.join(self.data['output_dir'], self.outputfile), 'w')
cur.write(self.hwo_text)
cur.close()
return self.hwo_text
return None
def split_hwo(self):
"""
Pull out today's hazardous weather outlook and spotter activation notice.
Return a slightly more compact text block of the two paragraphs.
"""
bodytext = self.hwo_text
logging.debug('Raw body text of HWO: \n%s', bodytext)
dayone = re.search(r'(\.DAY ONE.*?)(\.DAYS TWO THROUGH SEVEN.*?)', bodytext, re.DOTALL)
if dayone:
hwotext = re.sub(r'\n\n$', '', dayone.group(1))
hwotext = re.sub(r'\.{1,}DAY ONE[\.]{1,}', '', hwotext)
first_sentence = re.search(r'^(.*)\.', hwotext).group(1)
logging.debug('First sentence: %s', first_sentence)
hwotext = re.sub('\n', ' ', hwotext)
hwotext = nice_plumbing(hwotext)
first_info = re.sub(first_sentence, '', hwotext)
first_info = re.sub(r'^\s*\.*', '', first_info)
self.hwodict['dayone'] = [first_sentence.strip(), first_info.strip()]
daytwo = re.search('DAYS TWO THROUGH SEVEN(.*)SPOTTER', bodytext, re.DOTALL)
if daytwo:
daytwo = daytwo.group(1)
if daytwo:
logging.debug('DayTwo: %s', daytwo)
daytwo = re.sub(r'\n{1,}', ' ', daytwo)
daytwo = re.sub(r'\.{3,}\s*', ' ', daytwo)
first_sentence = re.search(r'^(.*?)\.', daytwo).group(1)
logging.debug('First sentence: %s', first_sentence)
second_info = re.sub(first_sentence, '', daytwo)
second_info = nice_plumbing(second_info)
self.hwodict['daystwothroughseven'] = [first_sentence.strip(),
second_info.strip()]
spotter = re.search(r'(\.*SPOTTER INFORMATION STATEMENT.*?)(\s*\$\$)',
bodytext, re.DOTALL)
if spotter:
spottext = nice_plumbing(spotter.group(1))
spottext = re.sub(r'SPOTTER INFORMATION STATEMENT[\.]{1,}',
'', spottext)
spottext = re.sub('\n', ' ', spottext)
self.hwodict['spotter'] = ['Spotter Information Statement',
spottext.strip()]
if spottext:
self.hwodict['today_text'] = '{0}{1}\n\n'.format(self.hwodict['dayone'][1],
spottext)
if re.search('Spotter activation is not expected at this time', spottext):
return True
self.hwodict['has_spotter'] = True
return True
def nice_plumbing(text):
"""
Try and regex/tidy some of the text.
"""
return_text = re.sub(r'^\s*\.*', '', text)
return_text = re.sub(r'\.\s+\.$', '.', return_text)
return_text = re.sub(r'\n+$', '', return_text)
return return_text