Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ for notes on deploying the project on a live system.

### Prerequisites

- Need to have acces to Levante.
- Need to have access to Levante.
- Need your own [conda environment](https://docs.dkrz.de/doc/levante/code-development/python.html#set-up-conda-for-individual-environments).

### Installing
Expand All @@ -35,6 +35,8 @@ of conduct, and the process for submitting pull requests to us.

- **Cosmin M. Marina** - *Provided Initial Scripts* -
[cosminmarina](https://github.com/cosminmarina)
- **Eugenio Lorente-Ramos** - *Enhanced data aquisition scripts* -
[eugenioLR](https://github.com/eugenioLR)

See also the list of
[contributors](https://github.com/cosminmarina/dkrz_utils/contributors)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,6 @@ module load python3/2022.01-gcc-11.2.0
module load clint
module load xces

python 0_data_acq_main_ECROPS.py
python data_acquisition_main.py -p reanalysis --era5_vars_hour "10u,10v,msl,tp,q,2t" -f hour --exp_reanalysis ERA5 --dir ./data_acq


Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import sys
import argparse


def copy_files_from_csv(csv_file_path, destination_folder, variable, experiment):
"""
Copies files listed in a CSV file to a structured destination folder.
Expand All @@ -15,35 +16,39 @@ def copy_files_from_csv(csv_file_path, destination_folder, variable, experiment)
:param experiment: Experiment name (e.g., 'historical', 'past2k').
"""
# Open the CSV file and read the file paths
with open(csv_file_path, mode='r') as csv_file:
with open(csv_file_path, mode="r") as csv_file:
csv_reader = csv.reader(csv_file)

for row in csv_reader:
original_file_path = row[0].strip()

# Extract ensemble name from the file path
path_components = original_file_path.split('/')
path_components = original_file_path.split("/")
try:
# Find the position of the experiment in the path
exp_index = path_components.index(experiment)
ensemble = path_components[exp_index + 1] # Ensemble is next component
except (ValueError, IndexError):
print(f"Could not extract ensemble from: {original_file_path}")
continue

# Build destination path based on experiment type
if experiment.startswith('ssp'):
if experiment.startswith("ssp"):
# Projections: destination_folder/projections/<experiment>/<variable>/<ensemble>/
dest_dir = os.path.join(destination_folder, variable, 'projections', experiment, ensemble)
dest_dir = os.path.join(
destination_folder, variable, "projections", experiment, ensemble
)
else:
# Historical/Past2K: destination_folder/<variable>/<experiment>/<ensemble>/
dest_dir = os.path.join(destination_folder, variable, experiment, ensemble)
dest_dir = os.path.join(
destination_folder, variable, experiment, ensemble
)
os.makedirs(dest_dir, exist_ok=True)

# Copy file to destination
file_name = os.path.basename(original_file_path)
dest_file_path = os.path.join(dest_dir, file_name)

try:
shutil.copy2(original_file_path, dest_file_path)
print(f"Copied: {original_file_path} -> {dest_file_path}")
Expand All @@ -55,23 +60,33 @@ def copy_files_from_csv(csv_file_path, destination_folder, variable, experiment)
print(f"Error copying {original_file_path}: {e}")
sys.stdout.flush()


def main():
# Set up command line arguments
parser = argparse.ArgumentParser(
description='Copy CMIP6 files to structured directories based on CSV lists.'
description="Copy CMIP6 files to structured directories based on CSV lists."
)
parser.add_argument(
"-s",
"--source",
default="./data_acq/",
help="Folder containing CSV files (default: ./data_acq/)",
)
parser.add_argument(
"-d",
"--dest",
default="./data_raw/",
help="Destination base folder (default: ./data_raw/)",
)
parser.add_argument(
"-p",
"--pattern",
default="*.csv",
help="Glob pattern to select specific CSV files (default: *.csv)",
)
parser.add_argument('-s', '--source',
default='./data_acq/',
help='Folder containing CSV files (default: ./data_acq/)')
parser.add_argument('-d', '--dest',
default='./data_raw/',
help='Destination base folder (default: ./data_raw/)')
parser.add_argument('-p', '--pattern',
default='*.csv',
help='Glob pattern to select specific CSV files (default: *.csv)')


args = parser.parse_args()

# Use the paths from arguments (or defaults if not provided)
data_acq_folder = args.source
destination_folder = args.dest
Expand All @@ -82,44 +97,56 @@ def main():
data_acq_folder += os.path.sep
if not destination_folder.endswith(os.path.sep):
destination_folder += os.path.sep

# Find matching CSV files using pattern
search_pattern = os.path.join(data_acq_folder, file_pattern)
csv_files = sorted(glob.glob(search_pattern))

print(f"Source folder: {data_acq_folder}")
print(f"Destination folder: {destination_folder}")
print(f"Search pattern: {file_pattern}")
print(f"Found CSV files: {csv_files}")
sys.stdout.flush()

if not csv_files:
print(f"No CSV files found matching pattern: '{file_pattern}' in {data_acq_folder}")
print(
f"No CSV files found matching pattern: '{file_pattern}' in {data_acq_folder}"
)
sys.stdout.flush()
return

# Process each CSV file
for csv_file_path in csv_files:
print(f"Processing CSV: {csv_file_path}")
sys.stdout.flush()

# Extract variable and experiment from filename
filename = os.path.basename(csv_file_path)
parts = filename.split('__cmip6_')[-1].split('_[')[0].split('_')

# Determine experiment and variable
if parts[0] == 'past2k':
experiment = 'past2k'
variable = parts[1]
elif parts[0].startswith('ssp'):
if "cmip6" in filename:
parts = filename.split("__cmip6_")[-1].split("_[")[0].split("_")

# Determine experiment and variable
match parts[0]:
case "past2k":
experiment = "past2k"
variable = parts[1]
case "historical":
experiment = "historical"
variable = parts[0]
case ["ssp", *_]:
experiment = parts[0]
variable = parts[1]
elif "reanalisys" in filename:
parts = filename.split("__reanalisys_")[-1].split("_[")[0].split("_")
experiment = parts[0]
variable = parts[1]
else:
experiment = 'historical'
variable = parts[0]
print(f"File {csv_file_path} could not be processed.")
continue

# Copy files with structured paths
copy_files_from_csv(csv_file_path, destination_folder, variable, experiment)


if __name__ == "__main__":
main()
File renamed without changes.
Loading