From 7af07eda4422a9514ed0ea437a6fa5fc718708ee Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mah=C3=A9=20Perrette?= Date: Wed, 8 Jan 2025 11:46:37 +0000 Subject: [PATCH] rime-pre-quantilemap added #30 --- pyproject.toml | 5 +- rimeX/datasets/download_isimip.py | 10 +- rimeX/preproc/digitize.py | 9 +- rimeX/preproc/quantilemaps.py | 152 ++++++++++++++++++++++++++++++ rimeX/scripts/share.py | 4 +- rimeX/stats.py | 5 +- 6 files changed, 172 insertions(+), 13 deletions(-) create mode 100644 rimeX/preproc/quantilemaps.py diff --git a/pyproject.toml b/pyproject.toml index bff9c85..a2f605a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -24,7 +24,7 @@ dependencies = [ "numpy", "pandas", "xarray", - "tqdm", + "tqdm", "flatdict", ] @@ -36,7 +36,7 @@ all = [ "pyam-iamc", # below for rimeX legacy code and postprocessing to work as expected "dask[distributed]", - "holoviews", + "holoviews", "hvplot", # rimeX.legacy : plot dashboard: "cartopy", @@ -56,6 +56,7 @@ rime-pre-gmt = "rimeX.preproc.global_average_tas:main" rime-pre-region = "rimeX.preproc.regional_average:main" rime-pre-wl = "rimeX.preproc.warminglevels:main" rime-pre-digitize = "rimeX.preproc.digitize:main" +rime-pre-quantilemap = "rimeX.preproc.quantilemaps:main" rime-run-timeseries = "rimeX.scripts.timeseries:main" rime-run-map = "rimeX.scripts.map:main" rime-run-table = "rimeX.scripts.table:main" diff --git a/rimeX/datasets/download_isimip.py b/rimeX/datasets/download_isimip.py index 0953b9a..86763b0 100644 --- a/rimeX/datasets/download_isimip.py +++ b/rimeX/datasets/download_isimip.py @@ -273,7 +273,7 @@ class Indicator: def __init__(self, name, frequency="monthly", folder=None, spatial_aggregation=None, depends_on=None, expr=None, time_aggregation=None, isimip_meta=None, shell=None, custom=None, - db=None, isimip_folder=None, comment=None, transform=None, year_min=None, units="", projection_baseline=None, + db=None, isimip_folder=None, comment=None, transform=None, year_min=None, units="", title="", projection_baseline=None, depends_on_climatology=False, climatology_quantile=False, **kwargs): @@ -311,7 +311,8 @@ def __init__(self, name, frequency="monthly", folder=None, self.comment = comment self.transform = transform # this refers to the baseline period and is only accounted for later on in the emulator (misnamed...) self.units = units - self.projection_baseline = projection_baseline + self.title = title + self.projection_baseline = projection_baseline or CONFIG["preprocessing.projection_baseline"] @classmethod def from_config(cls, name, **kw): @@ -641,8 +642,11 @@ def download_all(self, **kwargs): for simu in self.simulations: yield self.download(**simu, **kwargs) - def get_all_paths(self, **kwargs): + def get_all_paths(self, filter=None, **kwargs): for simu in self.simulations: + if filter is not None: + if not filter(simu): + continue yield self.get_path(**simu, **kwargs) diff --git a/rimeX/preproc/digitize.py b/rimeX/preproc/digitize.py index 525184f..ed1ba69 100644 --- a/rimeX/preproc/digitize.py +++ b/rimeX/preproc/digitize.py @@ -46,18 +46,21 @@ def load_seasonal_means_per_region(variable, model, experiment, region, subregio return pd.DataFrame(seasonal_means, index=years) -def transform_indicator(data, indicator, meta=None): +def transform_indicator(data, indicator, meta=None, dataref=None): if meta is None: meta = CONFIG.get(f"indicator.{indicator}", {}) y1, y2 = meta.get("projection_baseline", CONFIG["preprocessing.projection_baseline"]) + if dataref is None: + dataref = data.loc[y1:y2].mean() + if meta.get("transform") == "baseline_change": - data -= data.loc[y1:y2].mean() + data = data - dataref elif meta.get("transform") == "baseline_change_percent": - data = (data / data.loc[y1:y2].mean() - 1) * 100 + data = (data / dataref - 1) * 100 elif meta.get("transform"): raise NotImplementedError(meta["transform"]) diff --git a/rimeX/preproc/quantilemaps.py b/rimeX/preproc/quantilemaps.py new file mode 100644 index 0000000..028330e --- /dev/null +++ b/rimeX/preproc/quantilemaps.py @@ -0,0 +1,152 @@ +"""Quantile maps for impacts that do not stem from a regional average +""" + +from itertools import groupby +import tqdm +from pathlib import Path + +import numpy as np +import pandas as pd +import xarray as xa + +from rimeX.config import CONFIG, config_parser +from rimeX.logs import logger, log_parser +from rimeX.datasets.download_isimip import Indicator, _matches +from rimeX.preproc.warminglevels import get_warming_level_file +from rimeX.preproc.digitize import transform_indicator + + +def make_quantile_map_array(indicator:Indicator, warming_levels:pd.DataFrame, + quantile_bins=10, season="annual", running_mean_window=21, + projection_baseline=None): + + simulations = indicator.simulations + w = running_mean_window // 2 + quant_bins = quantile_bins + quant_step = 1/quant_bins + quants = np.linspace(quant_step/2, 1-quant_step/2, quant_bins) + + all_warming_levels = [] + + keywl = lambda r: r["warming_level"] + wl_records = sorted(warming_levels.to_dict(orient="records"), key=keywl) + + logger.info(f"Process quantile maps for {indicator.name} | {season}. Warming levels {wl_records[0]['warming_level']} to {wl_records[-1]['warming_level']}") + + for wl, group in groupby(wl_records, key=keywl): + + logger.info(f"==== {wl} ====") + files_to_concat = [] + group = list(group) + + for r in tqdm.tqdm(group): + simus_historical = [s for s in simulations if _matches(s["climate_scenario"], "historical") and _matches(s["climate_forcing"], r["model"])] + simus = [s for s in simulations if _matches(s["climate_scenario"], r["experiment"]) and _matches(s["climate_forcing"], r["model"])] + if len(simus_historical) == 0 or len(simus) == 0: + logger.warning(f"No simulation for {indicator.name} {r['model']} {r['experiment']}: Skip") + continue + assert len(simus_historical) == 1 + assert len(simus) == 1 + simu_historical = simus_historical[0] + simu = simus[0] + # warming_level year + filepath_hist = indicator.get_path(**simu_historical) + filepath = indicator.get_path(**simu) + + with xa.open_mfdataset([filepath_hist, filepath], combine='nested', concat_dim="time") as ds: + + # only select relevant months + if season is not None: + season_mask = ds["time.month"].isin(CONFIG["preprocessing.seasons"][season]) + else: + season_mask = slice(None) + + seasonal_sel = ds[indicator.name].isel(time=season_mask) + + # mean over the ref period + if projection_baseline is not None: + y1, y2 = projection_baseline + dataref = seasonal_sel.sel(time=slice(str(y1),str(y2))).mean("time") + + # mean over required time-slice + data = seasonal_sel.sel(time=slice(str(r['year']-w),str(r['year']+w))).mean("time") + + # subtract the reference period or express as relative change + data = transform_indicator(data, indicator.name, dataref=dataref).load() + + # assign metadata + data = data.assign_coords({ + "warming_level": r["warming_level"], + "model": r["model"], + "experiment": r["experiment"], + "midyear": r["year"], + }) + + files_to_concat.append(data) + + samples = xa.concat(files_to_concat, dim="sample") + quantiles = samples.quantile(quants, dim="sample") + all_warming_levels.append(quantiles) + + warming_level_coords = [dat["warming_level"] for dat in all_warming_levels] + all_warming_levels = xa.concat(all_warming_levels, dim="warming_level") + all_warming_levels = all_warming_levels.assign_coords({"warming_level": warming_level_coords}) # otherwise it's dropped apparently + all_warming_levels.name = indicator.name + return all_warming_levels + + +def main(): + import argparse + parser = argparse.ArgumentParser(description=__doc__, epilog="""""", formatter_class=argparse.RawDescriptionHelpFormatter, parents=[config_parser, log_parser]) + + group = parser.add_argument_group('Warming level matching') + group.add_argument("--running-mean-window", default=CONFIG["preprocessing.running_mean_window"], help="default: %(default)s years") + group.add_argument("--warming-level-file", default=None) + group.add_argument("--quantile-bins", default=10, type=int) + + group = parser.add_argument_group('Indicator variable') + group.add_argument("-v", "--variable", nargs='+', default=[], choices=CONFIG["isimip.variables"]) + group.add_argument("-i", "--indicator", nargs='+', default=[], choices=CONFIG["indicator"], help="includes additional, secondary indicator with specific monthly statistics") + group.add_argument("--season", nargs="+", default=list(CONFIG["preprocessing.seasons"]), choices=list(CONFIG["preprocessing.seasons"])) + group.add_argument("--simulation-round", nargs="+", default=CONFIG["isimip.simulation_round"], help="default: %(default)s") + group.add_argument("--projection-baseline", default=CONFIG["preprocessing.projection_baseline"], type=int, nargs=2, help="default: %(default)s") + + parser.add_argument("-O", "--overwrite", action='store_true') + + # group = parser.add_argument_group('Result') + # group.add_argument("--backend", nargs="+", default=CONFIG["preprocessing.isimip_binned_backend"], choices=["csv", "feather"]) + # group.add_argument("-O", "--overwrite", action='store_true') + # group.add_argument("--cpus", type=int) + + o = parser.parse_args() + + CONFIG["isimip.simulation_round"] = o.simulation_round + CONFIG["preprocessing.projection_baseline"] = o.projection_baseline + + if o.warming_level_file is None: + o.warming_level_file = get_warming_level_file(**{**CONFIG, **vars(o)}) + + warming_levels = pd.read_csv(o.warming_level_file) + + + root_dir = Path(o.warming_level_file).parent + + for name in o.variable + o.indicator: + indicator = Indicator.from_config(name) + + for season in o.season: + filepath = root_dir / "quantilemaps" / indicator.name / season / f"{indicator.name}_{season}_quantilemaps.nc" + if filepath.exists() and not o.overwrite: + logger.info(f"{filepath} already exists. Use -O or --overwrite to reprocess.") + continue + filepath.parent.mkdir(parents=True, exist_ok=True) + array = make_quantile_map_array(indicator, + warming_levels, + season=season, + quantile_bins=o.quantile_bins, + running_mean_window=o.running_mean_window, + projection_baseline=o.projection_baseline, + ) + logger.info(f"Write to {filepath}") + encoding = {array.name: {'zlib': True}} + array.to_netcdf(filepath, encoding=encoding) \ No newline at end of file diff --git a/rimeX/scripts/share.py b/rimeX/scripts/share.py index ba80b05..d1eba89 100644 --- a/rimeX/scripts/share.py +++ b/rimeX/scripts/share.py @@ -164,8 +164,8 @@ def _get_gmt_ensemble(o, parser): ens = np.empty((nt, o.gsat_samples)) for i in range(nt): # fit - quants = [50, 5, 95] - dist = fit_dist(gmt_q.iloc[i][quants], quants, o.gsat_dist) + quants = np.array([50, 5, 95]) + dist = fit_dist(gmt_q.iloc[i][quants], quants/100, o.gsat_dist) logger.debug(f"{i}: {repr_dist(dist)}") # resample (equally spaced percentiles) diff --git a/rimeX/stats.py b/rimeX/stats.py index 968cf74..5aac93d 100644 --- a/rimeX/stats.py +++ b/rimeX/stats.py @@ -4,7 +4,7 @@ def fit_dist(values, quantiles=None, dist_name=None, threshold=0.55): if quantiles is not None: - assert quantiles == [.5, .05, .95] + assert np.array(quantiles).tolist() == [.5, .05, .95], str(np.array(quantiles).tolist()) mid, lo, hi = values if dist_name == "auto": @@ -19,7 +19,6 @@ def fit_dist(values, quantiles=None, dist_name=None, threshold=0.55): return stat.norm(mid, ((hi-mid)+(mid-lo))/2 / stat.norm.ppf(.95)) if dist_name == "lognorm": - import numpy as np reverse = hi - mid < mid - lo @@ -34,7 +33,7 @@ def fit_dist(values, quantiles=None, dist_name=None, threshold=0.55): # the equality lo - loc > 0 becomes lo * (2*mid - lo - hi) - mid **2 - hi*lo <= 0 # and suffices to note that lo * (2*mid - lo - hi) - mid **2 - hi*lo = - (mid - lo)**2 which is always < 0 - normdist = fit_dist([np.log(mid - loc), np.log(lo - loc), np.log(hi - loc)], [50, 5, 95], "norm") + normdist = fit_dist([np.log(mid - loc), np.log(lo - loc), np.log(hi - loc)], [.5, .05, .95], "norm") mu, sigma = normdist.args dist = stat.lognorm(sigma, loc, np.exp(mu))