MariusWiggert · MariusWiggert · Oct 19, 2022 · Oct 19, 2022 · Oct 19, 2022 · Oct 20, 2022
diff --git a/config/reinforcement_learning/gulf_of_mexico_Copernicus_forecast_HYCOM_hindcast.yaml b/config/reinforcement_learning/gulf_of_mexico_Copernicus_forecast_HYCOM_hindcast.yaml
@@ -1,6 +1,6 @@
 casadi_cache_dict:
-    deg_around_x_t: 0.5
-    time_around_x_t: 172800. # 3600 * 24 * 2
+    deg_around_x_t: 2.0
+    time_around_x_t: 432000 # 3600 * 24
 
 platform_dict:
     battery_cap_in_wh: 400.0
@@ -20,23 +20,17 @@ ocean_dict:
         field: 'OceanCurrents'
         source: 'hindcast_files'
         source_settings:
-            folder: "/tmp/hycom_hindcast/"
+            folder: "/home/ubuntu/hycom_hindcast/"
+            local: True
             source: "HYCOM"
             type: "hindcast"
             currents: 'total'
     forecast:
         field: 'OceanCurrents'
         source: 'forecast_files'
         source_settings:
-            folder: "/tmp/copernicus_forecast/"
+            folder: "/home/ubuntu/copernicus_forecast/"
+            local: True
             source: "Copernicus"
             type: "forecast"
-            currents: 'total'
-
-solar_dict:
-    hindcast: null
-    forecast: null
-
-seaweed_dict:
-    hindcast: null
-    forecast: null
+            currents: 'total'
diff --git a/config/reinforcement_learning/gulf_of_mexico_HYCOM_hindcast.yaml b/config/reinforcement_learning/gulf_of_mexico_HYCOM_hindcast.yaml
@@ -20,17 +20,7 @@ ocean_dict:
         field: 'OceanCurrents'
         source: 'hindcast_files'
         source_settings:
-            folder: "/tmp/hycom_hindcast/"
+            folder: "/home/ubuntu/hycom_hindcast/"
             source: "HYCOM"
             type: "hindcast"
-            currents: 'total'
-    forecast:
-        null
-
-solar_dict:
-    hindcast: null
-    forecast: null
-
-seaweed_dict:
-    hindcast: null
-    forecast: null
+            currents: 'total'
diff --git a/config/reinforcement_learning/training.yaml b/config/reinforcement_learning/training.yaml
@@ -20,66 +20,63 @@ algorithm:
     double_q: True
     ##### Model #####
     model:
-        # _use_default_native_models: True
-        # Filter config: List of [out_channels, kernel, stride] for each filter.
-#        dim: 21
-#        conv_filters: [
-#            [4, [5, 5], 1],
-#            [16, [5, 5], 1],
-#            [441, [21, 21], 1]
-#        ]
-#        conv_activation: 'relu'
         custom_model: 'OceanTorchModel'
         custom_model_config:
             map:
-                normalize: True
+                normalize: False
 
-                units: [ 512, 512 ]
-                activation: [ 'tanh', 'tanh' ] # Supported values: "tanh", "relu", "swish", "linear".
-                initializer_std: [ 1, 1 ]
+                units: [ 512, 512, 256 ]
+                activation: [ 'tanh', 'tanh', 'tanh' ] # Supported values: "tanh", "relu", "swish", "linear".
+                initializer: [ 1, 1, 0.01 ] # Supported values: 'xavier_uniform' or float (std of normc_initializer)
 
 #                channels: [ 4, 16 ]
 #                kernel: [ [5,5], [5,5] ]
 #                stride: []
 #                padding: []
-#                activation: []
+#                activation: [ 'relu' ]
 
             meta:
-                input_activation: 'tanh'
+                input_activation: False #'tanh'
                 units: []
                 activation: [] # Supported values: "tanh", "relu", "swish", "linear".
-                initializer_std: []
+                initializer: [] # Supported values: 'xavier_uniform' or float (std of normc_initializer)
 
             joined:
-                units: [ 256, 256 ]
+                input_activation: False #'tanh'
+                units: []
                 activation: [ 'tanh', 'relu' ] # Supported values: "tanh", "relu", "swish", "linear".
-                initializer_std: [ 0.01, False ]
+                initializer: [ 0.01, 'xavier_uniform' ] # Supported values: 'xavier_uniform' or float (std of normc_initializer)
 
             dueling_heads:
+                residual: False
                 units: [ 128, 128 ]
                 activation: [ 'relu', 'relu', 'linear' ] # Supported values: "tanh", "relu", "swish", "linear".
-                initializer_std: [ False, False, False ] # Supported values: False (), or float (std of normc_initializer)
+                initializer: [ 'xavier_uniform', 'xavier_uniform', 'xavier_uniform' ] # Supported values: 'xavier_uniform' or float (std of normc_initializer)
 
     _disable_preprocessor_api: True
     ##### Episodes #####
 #    batch_mode: 'truncate_episodes'
 #    soft_horizon: False
 #    rollout_fragment_length: 50
     ##### Training #####
-#    replay_buffer_config:
-#        capacity: 2000000
-#        learning_starts: 10000
-#        no_local_replay_buffer: True
+    replay_buffer_config:
+        capacity: 2000000
+#        no_local_replay_buffer: False
+#        store_buffer_in_checkpoints: True
 #        prioritized_replay_alpha: 0.6
 #        prioritized_replay_beta: 0.4
 #        prioritized_replay_eps: 0.000001
 #        replay_batch_size: 32
 #        replay_sequence_length: 1
 #        type: 'MultiAgentReplayBuffer'
 #    train_batch_size: 512
-#    timesteps_per_iteration: 10000
-#    training_intensity: 1
-#    target_network_update_freq: 10.0e3,
+#    store_buffer_in_checkpoints: True
+
+    min_sample_timesteps_per_iteration: 100000 # 300 steps / epoch * 200 machines = 60000
+    target_network_update_freq: 100000
+    num_steps_sampled_before_learning_starts: 100000 #500000
+    training_intensity: 1
+
     keep_per_episode_custom_metrics: True
     optimizer:
         num_replay_buffer_shards: 1
@@ -88,51 +85,71 @@ algorithm:
 #    exploration_config: {"type": "SoftQ"}
         # worker_side_prioritization: True,
     ##### Evaluation #####
-#    evaluation_config:
-#        evaluation_interval: 1
-#        evaluation_duration: 1000
-#        evaluation_duration_unit: "episodes"
-#        env_config:
-#            evaluation: True
+    evaluation_interval: 10
+    evaluation_duration: 204
+    evaluation_duration_unit: "episodes"
+    evaluation_num_workers: 102
+    evaluation_sample_timeout_s: 2400
+    evaluation_config:
+        explore: False
+        env_config:
+            evaluation: True
     ##### Workers #####
     num_gpus: 1
     num_workers: 102
-    num_cpus_per_worker: 1
+    num_cpus_per_worker: 0.5
     num_gpus_per_worker: 0
     placement_strategy: 'SPREAD'
     ignore_worker_failures: True
     recreate_failed_workers: True
 
+#    output: 'logdir'
+
 experiments_folder: '/seaweed-storage/experiments/gulf_of_mexico_Copernicus_forecast_HYCOM_hindcast/'
 
 environment:
     scenario_file: 'config/reinforcement_learning/gulf_of_mexico_Copernicus_forecast_HYCOM_hindcast.yaml'
     scenario_config: {}
 
-    problem_folder: '/seaweed-storage/generation/gulf_of_mexico_Copernicus_forecast_HYCOM_hindcast/training_40000_problems/'
-    validation_length: 2000
+    train_missions:
+        folder: '/seaweed-storage/generation/gulf_of_mexico_Copernicus_forecast_HYCOM_hindcast/divers_training_improved_2022_10_23_05_10_12/'
+
+        filter:
+            no_random: True
+            stop: 70000
 
-    arena_steps_per_env_step: 1
+    eval_missions:
+        folder: '/seaweed-storage/generation/gulf_of_mexico_Copernicus_forecast_HYCOM_hindcast/divers_training_improved_2022_10_23_05_10_12/'
+
+        filter:
+            no_random: True
+            start: 70000
+            stop: 204
+
+    arena_steps_per_env_step: 3
     actions: 8
-    render: False
     fake: False #one of: False, 'random', 'naive, 'hj_planner_forecast', 'hj_planner_hindcast', 'residual'
+    render: False
 
 feature_constructor:
     flatten: False
     measurements: False
     local_map:
-        xy_width_degree: 0.2
-        xy_width_points: 5
+        embedding_n: [8, 8, 8, 8]
+        embedding_radius: [0.1, 0.2, 0.4, 0.8]
+#        xy_width_degree: 0.2
+#        xy_width_points: 5
         flatten: False
         features:
             ttr_forecast: True
             ttr_hindcast: False
             observer:
-                variables: ['error_u', 'error_v'] #['error_u', 'error_v'] #['water_u', 'water_v'], # list from: 'error_u', 'error_v', 'std_error_u', 'std_error_v', 'initial_forecast_u', 'initial_forecast_v', 'water_u', 'water_v'
-                time: [0] # offsets in h
-            currents_hindcast: [] # offsets in h
-            currents_forecast: [] # offsets in h
-            true_error: [] # offsets in h
+                # list from: 'mag', 'dir', 'error_u', 'error_v', 'std_error_u', 'std_error_v', 'initial_forecast_u', 'initial_forecast_v', 'water_u', 'water_v'
+                variables: ['error_u', 'error_v', 'std_error_u', 'std_error_v']
+                time: [0]           # offsets in h
+            currents_hindcast: []   # offsets in h
+            currents_forecast: []   # offsets in h
+            true_error: []          # offsets in h
     global_map: False
     meta: False #['lon', 'lat'] #, 'time', 'target_distance', 'target_direction', 'episode_time_in_h']
 

diff --git a/ocean_navigation_simulator/controllers/Controller.py b/ocean_navigation_simulator/controllers/Controller.py
@@ -1,6 +1,5 @@
 import abc
 import logging
-import os
 
 from ocean_navigation_simulator.environment.Arena import ArenaObservation
 from ocean_navigation_simulator.environment.NavigationProblem import (
@@ -25,7 +24,6 @@ def __init__(self, problem: NavigationProblem):
         self.problem = problem
         # initialize logger
         self.logger = logging.getLogger("arena.controller")
-        self.logger.setLevel(os.environ.get("LOGLEVEL", "INFO").upper())
 
     @abc.abstractmethod
     def get_action(self, observation: ArenaObservation) -> PlatformAction: