From ed2747c63655bc95acba69122e8915920881822a Mon Sep 17 00:00:00 2001 From: Patrick Virie Date: Fri, 3 Jan 2025 22:48:27 +0700 Subject: [PATCH] round action --- tasks/rl_hopper.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tasks/rl_hopper.py b/tasks/rl_hopper.py index d719c93..fbdc4c9 100644 --- a/tasks/rl_hopper.py +++ b/tasks/rl_hopper.py @@ -261,10 +261,11 @@ def prepare_data_tuples(states, actions, rewards, num_layers, skip_steps): for j in range(1000): if random.random() <= epsilon or course == 0: selected_action = env.action_space.sample() + # quantize to -1 0 1 + selected_action = np.round(selected_action) else: a = model.react(alg.State(observation.data), stable_state) - # selected_action = np.clip(np.asarray(a.data), -1, 1) - selected_action = np.where(np.asarray(a.data) > 0, 1, -1) + selected_action = np.clip(np.asarray(a.data), -1, 1) next_observation, reward, terminated, truncated, info = env.step(selected_action) # check for nan