Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add Canadian Meterological Center Model Support #76

Merged
merged 18 commits into from
Dec 15, 2023
Merged
87 changes: 0 additions & 87 deletions src/nwp_consumer/__init__.py

This file was deleted.

22 changes: 14 additions & 8 deletions src/nwp_consumer/internal/inputs/cmc/client.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
"""Implements a client to fetch ICON data from DWD."""
"""Implements a client to fetch GDPS/GEPS data from CMC."""
import bz2
import datetime as dt
import pathlib
Expand Down Expand Up @@ -33,9 +33,9 @@


class Client(internal.FetcherInterface):
"""Implements a client to fetch ICON data from DWD."""
"""Implements a client to fetch GDPS/GEPS data from CMC."""

baseurl: str # The base URL for the ICON model
baseurl: str # The base URL for the GDPS/GEPS model
model: str # The model to fetch data for
parameters: list[str] # The parameters to fetch
conform: bool # Whether to rename parameters to OCF names and clear unwanted coordinates
Expand All @@ -59,7 +59,7 @@ def __init__(self, model: str, hours: int = 48, param_group: str = "default") ->
self.baseurl += "/ensemble/geps/grib2/raw/"
case _:
raise ValueError(
f"unknown icon model {model}. Valid models are 'gdps' and 'geps'",
f"unknown GDPS/GEPS model {model}. Valid models are 'gdps' and 'geps'",
)

match (param_group, model):
Expand All @@ -72,6 +72,12 @@ def __init__(self, model: str, hours: int = 48, param_group: str = "default") ->
case ("full", "gdps"):
self.parameters = GDPS_VARIABLES
self.conform = False
case ("basic", "geps"):
self.parameters = GEPS_VARIABLES[:2]
self.conform = False
case ("basic", "gdps"):
self.parameters = GDPS_VARIABLES[:2]
self.conform = False
case (_, _):
raise ValueError(
f"unknown parameter group {param_group}."
Expand All @@ -85,10 +91,10 @@ def listRawFilesForInitTime(self, *, it: dt.datetime) -> list[internal.FileInfoM
# GDPS data is only available for today's and yesterdays's date. If data hasn't been uploaded for that init
# time yet, then yesterday's data will still be present on the server.
if it.date() != dt.datetime.now(dt.timezone.utc).date():
raise ValueError("ICON data is only available on today's date")
raise ValueError("GDPS/GEPS data is only available on today's date")
return []

# The ICON model only runs on the hours [00, 12]
# The GDPS/GEPS model only runs on the hours [00, 12]
if it.hour not in [0, 12]:
return []

Expand All @@ -101,7 +107,7 @@ def listRawFilesForInitTime(self, *, it: dt.datetime) -> list[internal.FileInfoM
# The list of files for the parameter
parameterFiles: list[internal.FileInfoModel] = []

# Fetch DWD webpage detailing the available files for the timestep
# Fetch CMC webpage detailing the available files for the timestep
response = requests.get(f"{self.baseurl}/{it.strftime('%H')}/000/", timeout=3)

if response.status_code != 200:
Expand Down Expand Up @@ -184,7 +190,7 @@ def mapTemp(self, *, p: pathlib.Path) -> xr.Dataset: # noqa: D102
return xr.Dataset()
# Rename variable to the value, as some have unknown as the name
if list(ds.data_vars.keys())[0] == "unknown":
ds = ds.rename({"unknown": str(p.name).split("_")[3].lower()})
ds = ds.rename({"unknown": str(p.name).split("_")[2].lower()})

# Rename variables that are both pressure level and surface
if "surface" in list(ds.coords):
Expand Down
6 changes: 5 additions & 1 deletion src/nwp_consumer/internal/inputs/cmc/test_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,13 +22,15 @@ def test_mapTemp(self) -> None:
# Check latitude and longitude are injected
self.assertTrue("latitude" in out.coords)
self.assertTrue("longitude" in out.coords)
self.assertEqual(len(out["latitude"].values), 1201)
self.assertEqual(len(out["longitude"].values), 2400)
# Check that the dimensions are correctly ordered and renamed
self.assertEqual(
out[next(iter(out.data_vars.keys()))].dims,
("variable", "init_time", "step", "latitude", "longitude"),
)
# Check that the parameter is renamed
self.assertEqual(out["variable"].values[0], "v")
self.assertEqual(out["variable"].values[0], "v10")

# Test with europe file
testFilePath: pathlib.Path = (
Expand All @@ -39,6 +41,8 @@ def test_mapTemp(self) -> None:
# Check latitude and longitude are present
self.assertTrue("latitude" in out.coords)
self.assertTrue("longitude" in out.coords)
self.assertEqual(len(out["latitude"].values), 1201)
self.assertEqual(len(out["longitude"].values), 2400)
# Check that the dimensions are correctly ordered and renamed
self.assertEqual(
out[next(iter(out.data_vars.keys()))].dims,
Expand Down
39 changes: 39 additions & 0 deletions src/test_integration/test_inputs_integration.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
from nwp_consumer.internal.inputs.ecmwf._models import ECMWFMarsFileInfo
from nwp_consumer.internal.inputs.icon._models import IconFileInfo
from nwp_consumer.internal.inputs.metoffice._models import MetOfficeFileInfo
from nwp_consumer.internal.inputs.cmc._models import CMCFileInfo

storageClient = outputs.localfs.Client()

Expand Down Expand Up @@ -92,6 +93,24 @@ def test_downloadsRawGribFileFromICON(self) -> None:
_, tmpPath = iconClient.downloadToTemp(fi=fileInfo)
self.assertGreater(tmpPath.stat().st_size, 40000)

def test_downloadsRawGribFileFromCMC(self) -> None:
cmcInitTime: dt.datetime = dt.datetime.now(tz=dt.UTC).replace(
hour=0, minute=0, second=0, microsecond=0,
)

cmcClient = inputs.cmc.Client(
model="gdps",
)
fileInfo = CMCFileInfo(
it=cmcInitTime,
filename=f"CMC_glb_VGRD_ISBL_200_latlon.15x.15_{cmcInitTime.strftime('%Y%m%d%H')}_P120.grib2",
currentURL="https://dd.weather.gc.ca/model_gem_global/15km/grib2/lat_lon/00/120",
step=1,
)
_, tmpPath = cmcClient.downloadToTemp(fi=fileInfo)
self.assertTrue(tmpPath.name.endswith(".grib2"))
self.assertGreater(tmpPath.stat().st_size, 40000)


class TestListRawFilesForInitTime(unittest.TestCase):
def test_getsFileInfosFromCEDA(self) -> None:
Expand Down Expand Up @@ -152,6 +171,26 @@ def test_getsFileInfosFromICON(self) -> None:
self.assertTrue(len(euFileInfos) > 0)
self.assertNotEqual(fileInfos, euFileInfos)

def test_getsFileInfosFromCMC(self) -> None:
cmcInitTime: dt.datetime = dt.datetime.now(tz=dt.UTC).replace(
hour=0, minute=0, second=0, microsecond=0,
)
cmcClient = inputs.cmc.Client(
model="gdps",
hours=4,
param_group="basic",
)
fileInfos = cmcClient.listRawFilesForInitTime(it=cmcInitTime)
self.assertTrue(len(fileInfos) > 0)

cmcClient = inputs.cmc.Client(
model="geps",
hours=4,
param_group="basic",
)
gepsFileInfos = cmcClient.listRawFilesForInitTime(it=cmcInitTime)
self.assertTrue(len(gepsFileInfos) > 0)
self.assertNotEqual(fileInfos, gepsFileInfos)

if __name__ == "__main__":
unittest.main()