Skip to content

Commit

Permalink
reduce experiment time but increase curriculum
Browse files Browse the repository at this point in the history
  • Loading branch information
PVirie committed Dec 29, 2024
1 parent e515a85 commit da5775d
Showing 1 changed file with 4 additions and 4 deletions.
8 changes: 4 additions & 4 deletions tasks/rl_hopper.py
Original file line number Diff line number Diff line change
Expand Up @@ -233,13 +233,13 @@ def prepare_data_tuples(states, actions, rewards, num_layers, skip_steps):
observation, info = env.reset(seed=random_seed)

num_layers = len(cortex_models)
num_courses = 20
num_courses = 60
for course in range(num_courses):
logging.info(f"Course {course}")
total_steps = 0
num_trials = 20000
num_trials = 2000
print_steps = max(1, num_trials // 100)
epsilon = 1 - 0.5 * (course + 1) / num_courses
epsilon = 1 - 0.75 * (course + 1) / num_courses

next_best_targets = np.zeros((len(goals), len(goals[0][0])), dtype=np.float32)
next_best_target_diffs = np.ones((len(goals), 1), dtype=np.float32) * 1e4
Expand Down Expand Up @@ -287,7 +287,7 @@ def prepare_data_tuples(states, actions, rewards, num_layers, skip_steps):
for trainer in trainers:
trainer.prepare_batch(max_mini_batch_size=16, max_learning_sequence=32)

loop_train(trainers, 100000)
loop_train(trainers, 20000)

for trainer in trainers:
trainer.clear_batch()
Expand Down

0 comments on commit da5775d

Please sign in to comment.