-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathgdassoca_obsstats.py
157 lines (130 loc) · 5.96 KB
/
gdassoca_obsstats.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
#!/usr/bin/env python3
# creates figures of timeseries from the csv outputs computed by gdassoca_obsstats.x
import argparse
from itertools import product
import os
import glob
import pandas as pd
from jinja2 import Template
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
colors = [
"lightsteelblue",
"lightgreen",
"peachpuff",
"lightpink",
"lightgoldenrodyellow",
"paleturquoise",
"lightcoral",
"palegreen",
"palegoldenrod",
"mistyrose",
"lavender"
"lightsalmon",
]
def get_inst(csv_file_name):
"""Extract the instrument name from the csv file name. gdas.t00z.ocn.sst_ahi_h08_l3c.stats.csv -> sst_ahi_h08_l3c"""
return csv_file_name.split('.')[-3]
class ObsStats:
def __init__(self):
self.data = pd.DataFrame()
def read_csv(self, filepaths):
# Iterate through the list of file paths and append their data
for filepath in filepaths:
new_data = pd.read_csv(filepath)
# Convert date to datetime for easier plotting
new_data['date'] = pd.to_datetime(new_data['date'], format='%Y%m%d%H')
self.data = pd.concat([self.data, new_data], ignore_index=True)
self.data.sort_values('date', inplace=True)
def plot_timeseries(self, ocean, variable, inst="", dirout=""):
# Filter data for the given ocean and variable
filtered_data = self.data[(self.data['Ocean'] == ocean) & (self.data['Variable'] == variable)]
if filtered_data.empty:
print("No data available for the given ocean and variable combination.")
return []
# Get unique experiments
experiments = filtered_data['Exp'].unique()
experiments.sort()
print(experiments)
# Plot settings
fig, axs = plt.subplots(3, 1, figsize=(10, 15), sharex=True)
fig.suptitle(f'{inst} {variable} statistics, {ocean} ocean', fontsize=18, fontweight='bold')
exp_counter = 0
for exp in experiments:
exp_data = self.data[(self.data['Ocean'] == ocean) &
(self.data['Variable'] == variable) &
(self.data['Exp'] == exp)]
# Plot RMSE
axs[0].plot(exp_data['date'], exp_data['RMSE'], marker='o', linestyle='-', color=colors[exp_counter], label=exp)
axs[0].xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d %H'))
axs[0].xaxis.set_major_locator(mdates.DayLocator())
axs[0].tick_params(labelbottom=False)
axs[0].set_ylabel('RMSE', fontsize=18, fontweight='bold')
axs[0].legend()
axs[0].grid(True)
# Plot Bias
axs[1].plot(exp_data['date'], exp_data['Bias'], marker='o', linestyle='-', color=colors[exp_counter], label=exp)
axs[1].xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d %H'))
axs[1].xaxis.set_major_locator(mdates.DayLocator())
axs[1].tick_params(labelbottom=False)
axs[1].set_ylabel('Bias', fontsize=18, fontweight='bold')
axs[1].grid(True)
# Plot Count
axs[2].plot(exp_data['date'], exp_data['Count'], marker='o', linestyle='-', color=colors[exp_counter], label=exp)
axs[2].xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d %H'))
axs[2].xaxis.set_major_locator(mdates.DayLocator())
axs[2].set_ylabel('Count', fontsize=18, fontweight='bold')
axs[2].grid(True)
exp_counter += 1
# Improve layout and show plot
plt.tight_layout(rect=[0, 0.03, 1, 0.95])
plt.savefig(f'{dirout}/{inst}_{variable}_{ocean}.png')
# close the figure
plt.close(fig)
return experiments
if __name__ == "__main__":
epilog = ["Usage examples: ./gdassoca_obsstats.py --exps cp1/COMROOT/cp1 cp2/COMROOT/cp2 --inst sst_abi_g16_l3c --dirout cp1vscp2"]
parser = argparse.ArgumentParser(description="Observation space RMSE's and BIAS's",
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog=os.linesep.join(epilog))
parser.add_argument("--exps", nargs='+', required=True,
help="Path to the experiment's COMROOT")
parser.add_argument("--inst", required=True, help="The name of the instrument/platform (ex: sst_abi_g16_l3c) or a wild card (eg sst*)")
parser.add_argument("--dirout", required=True, help="Output directory")
args = parser.parse_args()
insts = []
inst = args.inst
os.makedirs(args.dirout, exist_ok=True)
# Get all instruments/obs spaces
for exp in args.exps:
wc = exp + f'/*.*/??/analysis/ocean/*{inst}*.stats.csv'
flist = glob.glob(wc)
for fname in flist:
insts.append(get_inst(fname))
insts = list(set(insts))
insts.sort()
print(insts)
experiments = []
for inst in insts:
print(f"Processing {inst}")
flist = []
for exp in args.exps:
wc = exp + f'/*.*/??/analysis/ocean/*{inst}*.stats.csv'
flist.append(glob.glob(wc))
flist = sum(flist, [])
obsStats = ObsStats()
obsStats.read_csv(flist)
for var, ocean in product(['ombg_noqc', 'ombg_qc'],
['Global', 'Atlantic', 'Pacific', 'Indian', 'Arctic', 'Southern']):
experiments.extend(obsStats.plot_timeseries(ocean, var, inst=inst, dirout=args.dirout))
# Select unique elements of experiments
experiments = list(set(experiments))
experiments.sort()
# Create the html document from the jinja2 template
template_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'gdassoca_obsstats_template.html')
template = Template(open(template_path).read())
context = {'insts': insts, 'experiments': experiments}
indexhtml = template.render(context)
# Write the rendered HTML to a file
with open(f'{args.dirout}/index.html', 'w') as f:
f.write(indexhtml)