diff --git a/tasks/rl_hopper.py b/tasks/rl_hopper.py index 69d85cb..7ab2380 100644 --- a/tasks/rl_hopper.py +++ b/tasks/rl_hopper.py @@ -237,7 +237,7 @@ def prepare_data_tuples(states, actions, rewards, num_layers, skip_steps): for course in range(num_courses): logging.info(f"Course {course}") total_steps = 0 - num_trials = 2000 + num_trials = 20000 print_steps = max(1, num_trials // 100) epsilon = 1 - 0.5 * (course + 1) / num_courses @@ -287,7 +287,7 @@ def prepare_data_tuples(states, actions, rewards, num_layers, skip_steps): for trainer in trainers: trainer.prepare_batch(max_mini_batch_size=16, max_learning_sequence=32) - loop_train(trainers, 20000) + loop_train(trainers, 100000) for trainer in trainers: trainer.clear_batch()