From ed2747c63655bc95acba69122e8915920881822a Mon Sep 17 00:00:00 2001
From: Patrick Virie
Date: Fri, 3 Jan 2025 22:48:27 +0700
Subject: [PATCH] round action
---
tasks/rl_hopper.py | 5 +++--
1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/tasks/rl_hopper.py b/tasks/rl_hopper.py
index d719c93..fbdc4c9 100644
--- a/tasks/rl_hopper.py
+++ b/tasks/rl_hopper.py
@@ -261,10 +261,11 @@ def prepare_data_tuples(states, actions, rewards, num_layers, skip_steps):
for j in range(1000):
if random.random() <= epsilon or course == 0:
selected_action = env.action_space.sample()
+ # quantize to -1 0 1
+ selected_action = np.round(selected_action)
else:
a = model.react(alg.State(observation.data), stable_state)
- # selected_action = np.clip(np.asarray(a.data), -1, 1)
- selected_action = np.where(np.asarray(a.data) > 0, 1, -1)
+ selected_action = np.clip(np.asarray(a.data), -1, 1)
next_observation, reward, terminated, truncated, info = env.step(selected_action)
# check for nan