Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions src/analogues/encode_05red.sh
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#!/bin/bash
#SBATCH --job-name=encode_05red
#SBATCH --partition=gpu
#SBATCH --partition=gpu
#SBATCH --nodes=2
#SBATCH --exclusive
#SBATCH --mem=0
Expand All @@ -16,7 +16,7 @@ export XLA_FLAGS=--xla_gpu_cuda_data_dir=/sw/spack-levante/nvhpc-24.7-py26uc/Lin
export TF_FORCE_GPU_ALLOW_GROWTH=true
module load texlive/live2021-gcc-11.2.0
source activate
conda activate ${ENV2}
conda activate "${ENV2}"
#python encode_search.py -m "encode" -f "config/identify_hw_france2003-mv-05-red.json" > out_enc_05red.log
#python encode_search.py -m "encode" -f "config/train/train_france2003_era5.json" > out_enc_05red.log
#python encode_search.py -m "search-ecmwf" -f "config/search/train_france2003_ecmwf.json" > out_enc_05red.log
Expand Down
740 changes: 560 additions & 180 deletions src/analogues/encode_search.py

Large diffs are not rendered by default.

106 changes: 71 additions & 35 deletions src/analogues/make_analog_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,75 +5,111 @@
from tqdm import tqdm
import numpy as np


def parse_arguments():
parser = argparse.ArgumentParser(description='Build Xarray from CSV index and ensemble NetCDF files.')
parser.add_argument('--csv_path', type=str, required=True, help='Path to the CSV file')
parser.add_argument('--variable', type=str, required=True, help='Variable name (e.g., gz500, msl)')
parser.add_argument('--input_dir', type=str, required=True, help='Input directory for ensemble data (e.g., ./data/ecmwf/)')
parser.add_argument('--output_path', type=str, required=True, help='Output path for the resulting NetCDF file')
parser = argparse.ArgumentParser(
description="Build Xarray from CSV index and ensemble NetCDF files."
)
parser.add_argument(
"--csv_path", type=str, required=True, help="Path to the CSV file"
)
parser.add_argument(
"--variable", type=str, required=True, help="Variable name (e.g., gz500, msl)"
)
parser.add_argument(
"--input_dir",
type=str,
required=True,
help="Input directory for ensemble data (e.g., ./data/ecmwf/)",
)
parser.add_argument(
"--output_path",
type=str,
required=True,
help="Output path for the resulting NetCDF file",
)
return parser.parse_args()


def main():
args = parse_arguments()

# Leer el CSV y ordenar por 'emo-day' y 'rank'
df = pd.read_csv(args.csv_path, parse_dates=['time', 'emo-day'])
df = df.sort_values(['emo-day', 'rank']).reset_index(drop=True)
df = pd.read_csv(args.csv_path, parse_dates=["time", "emo-day"])
df = df.sort_values(["emo-day", "rank"]).reset_index(drop=True)

# Obtener dimensiones y coordenadas base del primer archivo (asumiendo que todas las variables tienen la misma estructura)
example_ens = 'ens_01' # Ejemplo para obtener coordenadas
example_path = os.path.join(args.input_dir, example_ens, args.variable, f'ecmwf_{example_ens.split("_")[1]}_{args.variable}_1993-2016_.nc')
example_ens = "ens_01" # Ejemplo para obtener coordenadas
example_path = os.path.join(
args.input_dir,
example_ens,
args.variable,
f'ecmwf_{example_ens.split("_")[1]}_{args.variable}_1993-2016_.nc',
)
with xr.open_dataset(example_path) as ds:
lats = ds.lat.values
lons = ds.lon.values
time_coords = df['emo-day'].unique()
time_coords = df["emo-day"].unique()

# Inicializar Dataset vacío con Dask para manejo de memoria
data_shape = (len(time_coords), 25, len(lats), len(lons)) # (time, analog, lat, lon)
data_shape = (
len(time_coords),
25,
len(lats),
len(lons),
) # (time, analog, lat, lon)
data = xr.DataArray(
np.full(data_shape, np.nan, dtype=np.float32),
dims=['time', 'analog', 'lat', 'lon'],
dims=["time", "analog", "lat", "lon"],
coords={
'time': pd.to_datetime(time_coords),
'analog': np.arange(25),
'lat': lats,
'lon': lons
"time": pd.to_datetime(time_coords),
"analog": np.arange(25),
"lat": lats,
"lon": lons,
},
name=args.variable
name=args.variable,
)
ds_out = xr.Dataset({args.variable: data})

# Procesar cada fila del CSV
for idx, row in tqdm(df.iterrows(), total=len(df), desc='Processing'):
for idx, row in tqdm(df.iterrows(), total=len(df), desc="Processing"):
# Construir ruta al archivo NetCDF
ens = row['ens']
nc_path = os.path.join(args.input_dir, ens, args.variable, f'ecmwf_{ens.split("_")[1]}_{args.variable}_1993-2016_.nc')

ens = row["ens"]
nc_path = os.path.join(
args.input_dir,
ens,
args.variable,
f'ecmwf_{ens.split("_")[1]}_{args.variable}_1993-2016_.nc',
)

# Calcular el tiempo objetivo (time + leadtime hours)
target_time = row['time'] + pd.to_timedelta(row['leadtime'], unit='h')
target_time = row["time"] + pd.to_timedelta(row["leadtime"], unit="h")

try:
# Cargar el dato específico
with xr.open_dataset(nc_path) as ds:
# Seleccionar el tiempo más cercano (suponiendo que el leadtime está en horas)
ds_target = ds.sel(time=target_time, method='nearest')
ds_target = ds.sel(time=target_time, method="nearest")
value = ds_target[args.variable].load()

# Asignar al Dataset de salida
time_idx = np.where(ds_out.time == row['emo-day'])[0][0]
analog_idx = int(row['rank'].replace("Analog", ""))
time_idx = np.where(ds_out.time == row["emo-day"])[0][0]
analog_idx = int(row["rank"].replace("Analog", ""))
ds_out[args.variable][time_idx, analog_idx, :, :] = value
except FileNotFoundError:
print(f"Archivo no encontrado: {nc_path}", flush=True)
continue
except KeyError:
print(f"Variable {args.variable} no encontrada en {nc_path}", flush=True)
continue

# Guardar el Dataset resultante
out_path = os.path.join(args.output_path, args.variable, f'ecmwf_{args.variable}_1993-2016_.nc')
out_path = os.path.join(
args.output_path, args.variable, f"ecmwf_{args.variable}_1993-2016_.nc"
)
ds_out.to_netcdf(out_path)
print(f"Dataset guardado en {out_path}")

if __name__ == '__main__':
main()

if __name__ == "__main__":
main()
2 changes: 1 addition & 1 deletion src/analogues/make_analog_dataset.sh
Original file line number Diff line number Diff line change
Expand Up @@ -11,5 +11,5 @@

module load python3
source activate
conda activate ${ENV2}
conda activate "${ENV2}"
python make_analog_dataset.py --csv_path "anal_dict/post_processed_analogues_1993-2016.csv" --variable gz300 --input_dir "./data/ecmwf/" --output_path "./data/analogues/"
139 changes: 93 additions & 46 deletions src/cmip6_data_acq/0_data_acq_main_ECROPS.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,28 +7,29 @@

import logging
import sys

# from FREVA import freva_search
import data_acq_freva_search_ECROPS
import os

# projects = ['cmip6', 'reanalysis']
projects = ['cmip6']
projects = ["cmip6"]
# models = ['cesm2',
# 'cnrm-cm6-1-HR',
# 'gfdl-esm4',
# 'ec-earth3',
# 'mpi-esm1-2-hr',
# 'noresm2-mm',
# 'hadgem3-gc31-mm']
models = ['mpi-esm1-2-lr']
models = ["mpi-esm1-2-lr"]

# models = [] # DO NOT DO ANYTHING FOR CMIP6
variables_cmip = ['tdps', 'ua', 'va', 'tasmax', 'lai']
variables_cmip = ["tdps", "ua", "va", "tasmax", "lai"]

# variables_era5_daily_monthly = ['tasmax', 'tasmin', 'tas', 'pr', 'rsds', 'tdps', 'sfcwind', 'hurs']
variables_era5_daily_monthly = ['tdps', 'ua', 'va', 'tasmax', 'lai']
variables_era5_daily_monthly = ["tdps", "ua", "va", "tasmax", "lai"]
# variables_era5_hourly = ['uas', 'vas']
variables_era5_hourly = []
variables_era5_hourly: list[str] = []

# variables_era5_hourly = ['uas', 'vas', 'rsds', 'tdps']
# 10m wind speed vas and uas are calculated with ECROPS function in wofost_util/util.py wind10to2(wind10) function
Expand All @@ -38,64 +39,110 @@
vorticity_height = 20000 # 200hPa

# frequency = ['hour', 'day', 'mon']
frequency = ['day']
frequency = ["day"]
# frequency = ['mon']
# exp_cmip6 = ['ssp370', 'ssp585', 'historical']
exp_cmip6 = ['historical', 'past2k']
exp_reanalysis = 'era5'
exp_cmip6 = ["historical", "past2k"]
exp_reanalysis = "era5"


def main():
def main():
# First initialize a logger instance
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s [%(levelname)s] %(message)s",
force=True,
handlers=[
logging.FileHandler("LOG_Data_Acquisition_FREVA_output.log"),
logging.StreamHandler(sys.stdout)
]
logging.StreamHandler(sys.stdout),
],
)
logging.info('Started Freva files main programme \n')
logging.info("Started Freva files main programme \n")

for project in projects:
if project == 'cmip6':
if project == "cmip6":
for i in range(len(models)):
for exp in exp_cmip6:
for var in variables_cmip:
logging.info("\n \n" + "MODEL: " + str(models[i]) +
", EXPERIMENT: " + str(exp) +
", VARIABLE: " + str(var) +
", FREQUENCY: " + str(frequency) + "\n")
if not exp == 'historical':
data_acq_freva_search_ECROPS.freva_search_ssp(project, models[i], var, frequency, exp)
logging.info('\n\n **** Finished with SSP files **** \n \n')
if exp == 'historical':
data_acq_freva_search_ECROPS.freva_search_historical(project, models[i], var, frequency)
logging.info('\n\n **** Finished with Historical files **** \n\n')

if project == 'reanalysis':
logging.info(
"\n \n"
+ "MODEL: "
+ str(models[i])
+ ", EXPERIMENT: "
+ str(exp)
+ ", VARIABLE: "
+ str(var)
+ ", FREQUENCY: "
+ str(frequency)
+ "\n"
)
if not exp == "historical":
data_acq_freva_search_ECROPS.freva_search_ssp(
project, models[i], var, frequency, exp
)
logging.info(
"\n\n **** Finished with SSP files **** \n \n"
)
if exp == "historical":
data_acq_freva_search_ECROPS.freva_search_historical(
project, models[i], var, frequency
)
logging.info(
"\n\n **** Finished with Historical files **** \n\n"
)

if project == "reanalysis":
for var in variables_era5_daily_monthly:
logging.info("\n \n" + "PROJECT: " + str(project) +
", EXPERIMENT: " + str(exp_reanalysis) +
", VARIABLE: " + str(var) +
", FREQUENCY: " + str(frequency[2]) + "\n")
data_acq_freva_search_ECROPS.freva_search_reanalysis(project, exp_reanalysis, var, frequency[2])

logging.info(
"\n \n"
+ "PROJECT: "
+ str(project)
+ ", EXPERIMENT: "
+ str(exp_reanalysis)
+ ", VARIABLE: "
+ str(var)
+ ", FREQUENCY: "
+ str(frequency[2])
+ "\n"
)
data_acq_freva_search_ECROPS.freva_search_reanalysis(
project, exp_reanalysis, var, frequency[2]
)

for var in variables_era5_daily_monthly:
logging.info("\n \n" + "PROJECT: " + str(project) +
", EXPERIMENT: " + str(exp_reanalysis) +
", VARIABLE: " + str(var) +
", FREQUENCY: " + str(frequency[1]) + "\n")
data_acq_freva_search_ECROPS.freva_search_reanalysis(project, exp_reanalysis, var, frequency[1])

logging.info(
"\n \n"
+ "PROJECT: "
+ str(project)
+ ", EXPERIMENT: "
+ str(exp_reanalysis)
+ ", VARIABLE: "
+ str(var)
+ ", FREQUENCY: "
+ str(frequency[1])
+ "\n"
)
data_acq_freva_search_ECROPS.freva_search_reanalysis(
project, exp_reanalysis, var, frequency[1]
)

for var in variables_era5_hourly:
logging.info("\n \n" + "PROJECT: " + str(project) +
", EXPERIMENT: " + str(exp_reanalysis) +
", VARIABLE: " + str(var) +
", FREQUENCY: " + str(frequency[0]) + "\n")
data_acq_freva_search_ECROPS.freva_search_reanalysis(project, exp_reanalysis, var, frequency[0])


if __name__ == '__main__':
logging.info(
"\n \n"
+ "PROJECT: "
+ str(project)
+ ", EXPERIMENT: "
+ str(exp_reanalysis)
+ ", VARIABLE: "
+ str(var)
+ ", FREQUENCY: "
+ str(frequency[0])
+ "\n"
)
data_acq_freva_search_ECROPS.freva_search_reanalysis(
project, exp_reanalysis, var, frequency[0]
)


if __name__ == "__main__":
main()
Loading
Loading