From ed2747c63655bc95acba69122e8915920881822a Mon Sep 17 00:00:00 2001
From: Patrick Virie <p.virie@gmail.com>
Date: Fri, 3 Jan 2025 22:48:27 +0700
Subject: [PATCH] round action

---
 tasks/rl_hopper.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/tasks/rl_hopper.py b/tasks/rl_hopper.py
index d719c93..fbdc4c9 100644
--- a/tasks/rl_hopper.py
+++ b/tasks/rl_hopper.py
@@ -261,10 +261,11 @@ def prepare_data_tuples(states, actions, rewards, num_layers, skip_steps):
                 for j in range(1000):
                     if random.random() <= epsilon or course == 0:
                         selected_action = env.action_space.sample()
+                        # quantize to -1 0 1
+                        selected_action = np.round(selected_action)
                     else:
                         a = model.react(alg.State(observation.data), stable_state)
-                        # selected_action = np.clip(np.asarray(a.data), -1, 1)
-                        selected_action = np.where(np.asarray(a.data) > 0, 1, -1)
+                        selected_action = np.clip(np.asarray(a.data), -1, 1)
 
                     next_observation, reward, terminated, truncated, info = env.step(selected_action)
                     # check for nan