GheodeAI · cosminmarina · Sep 15, 2025 · Sep 9, 2025 · Sep 9, 2025 · Sep 9, 2025
diff --git a/README.md b/README.md
@@ -10,7 +10,7 @@ for notes on deploying the project on a live system.
 
 ### Prerequisites
 
-- Need to have acces to Levante.
+- Need to have access to Levante.
 - Need your own [conda environment](https://docs.dkrz.de/doc/levante/code-development/python.html#set-up-conda-for-individual-environments).
 
 ### Installing
@@ -35,6 +35,8 @@ of conduct, and the process for submitting pull requests to us.
 
   - **Cosmin M. Marina** - *Provided Initial Scripts* -
     [cosminmarina](https://github.com/cosminmarina)
+  - **Eugenio Lorente-Ramos** - *Enhanced data aquisition scripts* -
+    [eugenioLR](https://github.com/eugenioLR)
 
 See also the list of
 [contributors](https://github.com/cosminmarina/dkrz_utils/contributors)

diff --git a/src/cmip6_data_acq/SLURM_data_acquisition.sh → ...limate_data_acq/SLURM_data_acquisition.sh b/src/cmip6_data_acq/SLURM_data_acquisition.sh → ...limate_data_acq/SLURM_data_acquisition.sh
@@ -19,6 +19,6 @@ module load python3/2022.01-gcc-11.2.0
 module load clint
 module load xces
 
-python 0_data_acq_main_ECROPS.py
+python data_acquisition_main.py -p reanalysis --era5_vars_hour "10u,10v,msl,tp,q,2t" -f hour --exp_reanalysis ERA5 --dir ./data_acq
 
 
diff --git a/src/cmip6_data_acq/copy_files.py → src/climate_data_acq/copy_files.py b/src/cmip6_data_acq/copy_files.py → src/climate_data_acq/copy_files.py
@@ -5,6 +5,7 @@
 import sys
 import argparse
 
+
 def copy_files_from_csv(csv_file_path, destination_folder, variable, experiment):
     """
     Copies files listed in a CSV file to a structured destination folder.
@@ -15,35 +16,39 @@ def copy_files_from_csv(csv_file_path, destination_folder, variable, experiment)
     :param experiment: Experiment name (e.g., 'historical', 'past2k').
     """
     # Open the CSV file and read the file paths
-    with open(csv_file_path, mode='r') as csv_file:
+    with open(csv_file_path, mode="r") as csv_file:
         csv_reader = csv.reader(csv_file)
-        
+
         for row in csv_reader:
             original_file_path = row[0].strip()
-            
+
             # Extract ensemble name from the file path
-            path_components = original_file_path.split('/')
+            path_components = original_file_path.split("/")
             try:
                 # Find the position of the experiment in the path
                 exp_index = path_components.index(experiment)
                 ensemble = path_components[exp_index + 1]  # Ensemble is next component
             except (ValueError, IndexError):
                 print(f"Could not extract ensemble from: {original_file_path}")
                 continue
-            
+
             # Build destination path based on experiment type
-            if experiment.startswith('ssp'):
+            if experiment.startswith("ssp"):
                 # Projections: destination_folder/projections/<experiment>/<variable>/<ensemble>/
-                dest_dir = os.path.join(destination_folder, variable, 'projections', experiment, ensemble)
+                dest_dir = os.path.join(
+                    destination_folder, variable, "projections", experiment, ensemble
+                )
             else:
                 # Historical/Past2K: destination_folder/<variable>/<experiment>/<ensemble>/
-                dest_dir = os.path.join(destination_folder, variable, experiment, ensemble)
+                dest_dir = os.path.join(
+                    destination_folder, variable, experiment, ensemble
+                )
             os.makedirs(dest_dir, exist_ok=True)
-            
+
             # Copy file to destination
             file_name = os.path.basename(original_file_path)
             dest_file_path = os.path.join(dest_dir, file_name)
-            
+
             try:
                 shutil.copy2(original_file_path, dest_file_path)
                 print(f"Copied: {original_file_path} -> {dest_file_path}")
@@ -55,23 +60,33 @@ def copy_files_from_csv(csv_file_path, destination_folder, variable, experiment)
                 print(f"Error copying {original_file_path}: {e}")
                 sys.stdout.flush()
 
+
 def main():
     # Set up command line arguments
     parser = argparse.ArgumentParser(
-        description='Copy CMIP6 files to structured directories based on CSV lists.'
+        description="Copy CMIP6 files to structured directories based on CSV lists."
+    )
+    parser.add_argument(
+        "-s",
+        "--source",
+        default="./data_acq/",
+        help="Folder containing CSV files (default: ./data_acq/)",
+    )
+    parser.add_argument(
+        "-d",
+        "--dest",
+        default="./data_raw/",
+        help="Destination base folder (default: ./data_raw/)",
+    )
+    parser.add_argument(
+        "-p",
+        "--pattern",
+        default="*.csv",
+        help="Glob pattern to select specific CSV files (default: *.csv)",
     )
-    parser.add_argument('-s', '--source', 
-                        default='./data_acq/',
-                        help='Folder containing CSV files (default: ./data_acq/)')
-    parser.add_argument('-d', '--dest', 
-                        default='./data_raw/',
-                        help='Destination base folder (default: ./data_raw/)')
-    parser.add_argument('-p', '--pattern', 
-                        default='*.csv',
-                        help='Glob pattern to select specific CSV files (default: *.csv)')
-
+
     args = parser.parse_args()
-    
+
     # Use the paths from arguments (or defaults if not provided)
     data_acq_folder = args.source
     destination_folder = args.dest
@@ -82,44 +97,56 @@ def main():
         data_acq_folder += os.path.sep
     if not destination_folder.endswith(os.path.sep):
         destination_folder += os.path.sep
-    
+
     # Find matching CSV files using pattern
     search_pattern = os.path.join(data_acq_folder, file_pattern)
     csv_files = sorted(glob.glob(search_pattern))
-    
+
     print(f"Source folder: {data_acq_folder}")
     print(f"Destination folder: {destination_folder}")
     print(f"Search pattern: {file_pattern}")
     print(f"Found CSV files: {csv_files}")
     sys.stdout.flush()
 
     if not csv_files:
-        print(f"No CSV files found matching pattern: '{file_pattern}' in {data_acq_folder}")
+        print(
+            f"No CSV files found matching pattern: '{file_pattern}' in {data_acq_folder}"
+        )
         sys.stdout.flush()
         return
 
     # Process each CSV file
     for csv_file_path in csv_files:
         print(f"Processing CSV: {csv_file_path}")
         sys.stdout.flush()
-        
+
         # Extract variable and experiment from filename
         filename = os.path.basename(csv_file_path)
-        parts = filename.split('__cmip6_')[-1].split('_[')[0].split('_')
-
-        # Determine experiment and variable
-        if parts[0] == 'past2k':
-            experiment = 'past2k'
-            variable = parts[1]
-        elif parts[0].startswith('ssp'):
+        if "cmip6" in filename:
+            parts = filename.split("__cmip6_")[-1].split("_[")[0].split("_")
+
+            # Determine experiment and variable
+            match parts[0]:
+                case "past2k":
+                    experiment = "past2k"
+                    variable = parts[1]
+                case "historical":
+                    experiment = "historical"
+                    variable = parts[0]
+                case ["ssp", *_]:
+                    experiment = parts[0]
+                    variable = parts[1]
+        elif "reanalisys" in filename:
+            parts = filename.split("__reanalisys_")[-1].split("_[")[0].split("_")
             experiment = parts[0]
             variable = parts[1]
         else:
-            experiment = 'historical'
-            variable = parts[0]
-        
+            print(f"File {csv_file_path} could not be processed.")
+            continue
+
         # Copy files with structured paths
         copy_files_from_csv(csv_file_path, destination_folder, variable, experiment)
 
+
 if __name__ == "__main__":
     main()
diff --git a/src/cmip6_data_acq/copy_files.sh → src/climate_data_acq/copy_files.sh b/src/cmip6_data_acq/copy_files.sh → src/climate_data_acq/copy_files.sh