From 5e383556e3f8064c4c58bc4cf31b83cc31c98c67 Mon Sep 17 00:00:00 2001 From: Jongwook Choi Date: Tue, 15 Mar 2022 04:05:09 -0400 Subject: [PATCH] Log rewards statistics in SAC agents (similar to PPO agents) --- acme/agents/jax/sac/learning.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/acme/agents/jax/sac/learning.py b/acme/agents/jax/sac/learning.py index ab786d9512..6b77df1c14 100644 --- a/acme/agents/jax/sac/learning.py +++ b/acme/agents/jax/sac/learning.py @@ -222,6 +222,10 @@ def update_step( jax.tree_map(lambda x: jnp.std(x, axis=0), transitions.next_observation))) + metrics['rewards_mean'] = jnp.mean( + jnp.abs(jnp.mean(transitions.reward, axis=0))) + metrics['rewards_std'] = jnp.std(transitions.reward, axis=0) + return new_state, metrics # General learner book-keeping and loggers.