diff --git a/hw2/README.md b/hw2/README.md index 8ade4c512..543c0a4e9 100644 --- a/hw2/README.md +++ b/hw2/README.md @@ -14,3 +14,17 @@ Before doing anything, first replace `gym/envs/box2d/lunar_lander.py` with the p The only file that you need to look at is `train_pg_f18.py`, which you will implement. See the [HW2 PDF](http://rail.eecs.berkeley.edu/deeprlcourse/static/homeworks/hw2.pdf) for further instructions. + + +## Running trained agent +After running `train_pg_f18.py` with a specific setting (gym environment, metaprameters) a new directory will +be added under `data` with the following structure: +``` +args.exp_name + '_' + args.env_name + '_' + time.strftime("%d-%m-%Y_%H-%M-%S") +``` +Under this directory, there are multiple (exact number is set by 'n_experiments' param) trained agents. +In order to visualize (render) these agents behavior, run the `run_agent.py` script and specify the number of iterations (-n option). For example: +```bash +# Run 3 iterations of a agent number 1 of +python run_agent.py "data/hc_b4000_r0.01_RoboschoolInvertedPendulum-v1_21-07-2019_08-42-10/1" -n 3 +``` \ No newline at end of file diff --git a/hw2/run_agent.py b/hw2/run_agent.py new file mode 100755 index 000000000..f914c65f3 --- /dev/null +++ b/hw2/run_agent.py @@ -0,0 +1,150 @@ +""" +## Running trained agent +After running `train_pg_f18.py` with a specific setting (gym environment, metaprameters) a new directory will +be added under `data` with the following structure: + + args.exp_name + '_' + args.env_name + '_' + time.strftime("%d-%m-%Y_%H-%M-%S") + +Under this directory, there are multiple (exact number is set by 'n_experiments' param) trained agents. +In order to visualize (render) these agents behavior, run the `run_agent.py` script and specify the number of iterations (-n option). For example: + +> python run_agent.py "data/hc_b4000_r0.01_RoboschoolInvertedPendulum-v1_21-07-2019_08-42-10/1" -n 3 + +""" +import os +import json +import pickle +import gym +import numpy as np +import tensorflow as tf +from train_pg_f18 import Agent + + +PARAMS_FILE = "params.json" +VARS_FILE = "vars.pkl" + + +def load_params(filename): + """ + Load the 'params.json' file. + + A simple json.loads() call does not work here because the file was saved with a special separators. + + :param filename: str + :return: dict + """ + with open(filename, 'r') as file: + data = file.read().replace(',\n', ',').replace('\t:\t', ':').replace('\n', '') + + return json.loads(data) + + +def load_pickle(filename, mode='rb'): + with open(filename, mode=mode) as f: + return pickle.load(f) + + +def load_agent_and_env(model_dir): + """ + Load an agent with its pre-trained model and the relevant environment + + Most of the code here is taken from train_pg_f18.py::train_PG() function + + :param model_dir: str (full directory path to the 'params.json' and 'vars.pkl' files) + :return: tuple (a tuple of length 2, the Agent instance and the gym env object) + """ + # Load the params json + params_file = os.path.join(model_dir, PARAMS_FILE) + params = load_params(filename=params_file) + print(params) + + # Load the model variables + vars_filename = os.path.join(model_dir, VARS_FILE) + model_vars = load_pickle(filename=vars_filename) + # print(model_vars) + + # Make the gym environment + env = gym.make(params['env_name']) + + # Set random seeds + seed = params['seed'] + tf.set_random_seed(seed) + np.random.seed(seed) + #env.seed(seed) + + # Is this env continuous, or self.discrete? + discrete = isinstance(env.action_space, gym.spaces.Discrete) + + # Observation and action sizes + ob_dim = env.observation_space.shape[0] + ac_dim = env.action_space.n if discrete else env.action_space.shape[0] + + # ========================================================================================# + # Initialize Agent + # ========================================================================================# + computation_graph_args = { + 'n_layers': params['n_layers'], + 'ob_dim': ob_dim, + 'ac_dim': ac_dim, + 'discrete': discrete, + 'size': params['size'], + 'learning_rate': params['learning_rate'], + } + + sample_trajectory_args = { + 'animate': params['animate'], + 'max_path_length': params['max_path_length'], + 'min_timesteps_per_batch': params['min_timesteps_per_batch'], + } + + estimate_return_args = { + 'gamma': params['gamma'], + 'reward_to_go': params['reward_to_go'], + 'nn_baseline': params['nn_baseline'], + 'normalize_advantages': params['normalize_advantages'], + } + + agent = Agent(computation_graph_args, sample_trajectory_args, estimate_return_args) + + # build computation graph + agent.build_computation_graph() + + # tensorflow: config, session, variable initialization + agent.init_tf_sess() + + # Override the graph variables with the pre-trained values + for g_var in tf.global_variables(scope=None): + # Get the saved value and assign it to the tensor + value = model_vars[g_var.name] + set_variable_op = g_var.assign(value) + agent.sess.run(set_variable_op) + + # # Validate that the assignment was successful + # for g_var in tf.global_variables(scope=None): + # assert np.array_equal(g_var.eval(), model_vars[g_var.name]) + + return agent, env + + +if __name__ == "__main__": + """ + Example usage (after running train_pg_18.py and creating agent 'data' dirs): + - python run_agent.py "data/hc_b4000_r0.01_RoboschoolInvertedPendulum-v1_21-07-2019_08-42-10/1" -n 3 + - python run_agent.py "data/ll_b40000_r0.005_LunarLanderContinuous-v2_21-07-2019_09-59-05/1" -n 3 + - python run_agent.py "data/hc_b50000_r0.005_RoboschoolHalfCheetah-v1_22-07-2019_20-04-48/1" -n 3 + """ + import argparse + + parser = argparse.ArgumentParser() + parser.add_argument('model_dir', type=str, help='A relative path to the data dir of a specific experiment. For eample: "data/ll_b40000_r0.005_LunarLanderContinuous-v2_21-07-2019_09-59-05/1"') + parser.add_argument('--n_iter', '-n', type=int, default=3) + args = parser.parse_args() + + # Load an agent with its pre-trained model and the relevant environment + model_dir = args.model_dir + agent, env = load_agent_and_env(model_dir) + + # Run an episode with this loaded agent + for i in range(args.n_iter): + agent.sample_trajectory(env, animate_this_episode=True) + print("done")