add EnviromentTest under v2.0

ChenYuSean · ChenYuSean · commit fc25f72465c1 · 2022-11-25T20:42:18.000+08:00
diff --git a/Executable/v2.0/CRML_Data/ML-Agents/Timers/MainGame_timers.json b/Executable/v2.0/CRML_Data/ML-Agents/Timers/MainGame_timers.json
@@ -1 +1 @@
-{"count":1,"self":22.1428832,"total":22.1705197,"children":{"InitializeActuators":{"count":1,"self":0.0029919,"total":0.0029919,"children":null},"InitializeSensors":{"count":1,"self":0.0029919,"total":0.0029919,"children":null},"AgentSendState":{"count":1063,"self":0.0031287,"total":0.011118099999999999,"children":{"CollectObservations":{"count":591,"self":0.0013204999999999998,"total":0.0013204999999999998,"children":null},"WriteActionMask":{"count":591,"self":5.9699999999999994E-05,"total":5.9699999999999994E-05,"children":null},"RequestDecision":{"count":591,"self":0.0066092,"total":0.0066092,"children":null}}},"DecideAction":{"count":1063,"self":0.0042235,"total":0.0042235,"children":null},"AgentAct":{"count":1063,"self":0.004234,"total":0.004234,"children":null}},"gauges":{"Player.CumulativeReward":{"count":5,"max":-1,"min":-1,"runningAverage":-1,"value":-1,"weightedAverage":-1}},"metadata":{"timer_format_version":"0.1.0","start_time_seconds":"1668951973","unity_version":"2020.3.25f1","command_line_arguments":"D:\\Unity Hub\\Project\\CrossyRoadEX\\Executable\\v2.0\\CRML.exe","communication_protocol_version":"1.5.0","com.unity.ml-agents_version":"2.2.1-exp.1","scene_name":"MainGame","end_time_seconds":"1668951995"}}
+{"count":1,"self":22.6099696,"total":526.3322687,"children":{"InitializeActuators":{"count":1,"self":0.0041334,"total":0.0041334,"children":null},"InitializeSensors":{"count":1,"self":0.0050142,"total":0.0050142,"children":null},"AgentSendState":{"count":1082,"self":0.0089643999999999991,"total":0.0291306,"children":{"CollectObservations":{"count":94,"self":0.0021885999999999997,"total":0.0021885999999999997,"children":null},"WriteActionMask":{"count":94,"self":0.0030547,"total":0.0030547,"children":null},"RequestDecision":{"count":94,"self":0.0019535999999999998,"total":0.0149229,"children":{"AgentInfo.ToProto":{"count":94,"self":0.0009737,"total":0.0129693,"children":{"GenerateSensorData":{"count":94,"self":0.011995599999999999,"total":0.011995599999999999,"children":null}}}}}}},"DecideAction":{"count":1082,"self":503.66648319999996,"total":503.6665059,"children":null},"AgentAct":{"count":1082,"self":0.009542,"total":0.009542,"children":null},"AgentInfo.ToProto":{"count":4,"self":0.0009911,"total":0.0019906,"children":{"GenerateSensorData":{"count":4,"self":0.0009995,"total":0.0009995,"children":null}}}},"gauges":{"Player.CumulativeReward":{"count":4,"max":-1,"min":-1,"runningAverage":-1,"value":-1,"weightedAverage":-1}},"metadata":{"timer_format_version":"0.1.0","start_time_seconds":"1669376099","unity_version":"2020.3.25f1","command_line_arguments":"d:\\Unity Hub\\Project\\CrossyRoadEX\\Executable\\v2.0\\CRML.exe --mlagents-port 5005","communication_protocol_version":"1.5.0","com.unity.ml-agents_version":"2.2.1-exp.1","scene_name":"MainGame","end_time_seconds":"1669376625"}}
diff --git a/Executable/v2.0/EnviromentTest.ipynb b/Executable/v2.0/EnviromentTest.ipynb
@@ -0,0 +1,364 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Usage (Command Line)\n",
+    "Run the MLAgent Default Model(PPO/SAC) by Anaconda command prompt under the folder with exe\n",
+    "```\n",
+    "mlagents-learn <config path> --env=<exe name> --run-id=<run_name>\n",
+    "```\n",
+    "It should be like\n",
+    "```\n",
+    "mlagents-learn config\\player_config.yaml --env=\"CRML\" --run-id=test\n",
+    "```"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Enviroment\n",
+    "Get the enviroment by calling `UnityEnviroment()` to get the corresponding file name env.<br />\n",
+    "- Close: `env.close()` close the enviroment. Release the port of commutator. <br />\n",
+    "- Reset: `env.reset()` resets the enviroment. <br />\n",
+    "- Set Action: `env.set_actions(behavior_name: str, action: ActionTuple)` set up the actions for next step.More Info down below <br />\n",
+    "- Step: `env.step()` move the simulation forward. Pass the action to Unity. <br />\n",
+    "- Get Step: `env.get_steps(behavior_name: str)` get the decision step from Unity.More Info down below <br />\n",
+    " "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 35,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from mlagents_envs.environment import UnityEnvironment\n",
+    "import numpy as np\n",
+    "# This is a non-blocking call that only loads the environment.\n",
+    "env = UnityEnvironment(file_name=\"CRML\", seed=1, side_channels=[])\n",
+    "# Start interacting with the environment.\n",
+    "env.reset()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Behavior Spec\n",
+    "Contains the specs of the behavior. Including `ActionSpec` and `Observation Spec`<br />\n",
+    "`env.behavior_specs` is a dictionary of (Name: str, Spec: Behavior_Spec) <br />\n",
+    "Get the names of Behavior Spec by  `list(env.behavior_specs)` or `list(env.behavior_specs.keys())` <br />\n",
+    "Get the corresonding Behavior Spec by `env.behavior_specs[behavior_name]` <br />"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 36,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Name of the behavior : Player?team=0\n",
+      "Behavior spec of Player?team=0 : BehaviorSpec(observation_specs=[ObservationSpec(shape=(60,), dimension_property=(<DimensionProperty.NONE: 1>,), observation_type=<ObservationType.DEFAULT: 0>, name='StackingSensor_size2_VectorSensor_size30')], action_spec=ActionSpec(continuous_size=0, discrete_branches=(5,)))\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Since there's only one agent, get the first on the list\n",
+    "behavior_name = list(env.behavior_specs)[0]\n",
+    "print(f\"Name of the behavior : {behavior_name}\")\n",
+    "spec = env.behavior_specs[behavior_name]\n",
+    "print(f\"Behavior spec of {behavior_name} : {spec}\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Observation Spec\n",
+    "Note: this is NOT the Observation Space that observed for agent, but an info spec of it <br />\n",
+    "Get the Observation Spec by `spec.observation_specs`\n",
+    "- Shape: numbers of observation\n",
+    "- Dimension Property:\n",
+    "- Observation Type:\n",
+    "- Name:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 58,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Number of observations :  1\n",
+      "Spec Info : ObservationSpec(shape=(60,), dimension_property=(<DimensionProperty.NONE: 1>,), observation_type=<ObservationType.DEFAULT: 0>, name='StackingSensor_size2_VectorSensor_size30')\n",
+      "Is there a visual observation ? False\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Examine the number of observations per Agent\n",
+    "print(\"Number of observations : \", len(spec.observation_specs))\n",
+    "print(f\"Spec Info : {spec.observation_specs[0]}\")\n",
+    "# Is there a visual observation ?\n",
+    "# Visual observation have 3 dimensions: Height, Width and number of channels\n",
+    "vis_obs = any(len(spec.shape) == 3 for spec in spec.observation_specs)\n",
+    "print(\"Is there a visual observation ?\", vis_obs)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Action Spec\n",
+    "Note: this is NOT the Action Space that set for agent, but an info spec of it <br />\n",
+    "Get the Action Spec by `spec.action_specs` <br />\n",
+    "Random Action: `action_spec.random_action(n_agent: int)` create an random action for n agents <br />\n",
+    "Empty Action: `action_spec.empty_action(n_agent: int)` create an empty action for n agents <br />"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "There are 1 discrete actions\n",
+      "Action number 0 has 5 different options\n",
+      "Continuous: 0, Discrete: (5,)\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Is the Action continuous or multi-discrete ?\n",
+    "if spec.action_spec.continuous_size > 0:\n",
+    "  print(f\"There are {spec.action_spec.continuous_size} continuous actions\")\n",
+    "if spec.action_spec.is_discrete():\n",
+    "  print(f\"There are {spec.action_spec.discrete_size} discrete actions\")\n",
+    "\n",
+    "# For discrete actions only : How many different options does each action has ?\n",
+    "if spec.action_spec.discrete_size > 0:\n",
+    "  for action, branch_size in enumerate(spec.action_spec.discrete_branches):\n",
+    "    print(f\"Action number {action} has {branch_size} different options\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Action Tuple\n",
+    "Class that's pack NamedTuple as Action\n",
+    "- `action.discrete`: get the discrete actions\n",
+    "- `action.continuous`: get the continuous actions\n",
+    "- `action.add_discrete`: add the discrete actions\n",
+    "- `action.add_continous`: add the continuous actions\n",
+    "\n",
+    "Axis 0(Rows): Different Agents actions value <br />\n",
+    "Axis 1(Columns): Different Actions on Same agent<br />"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 32,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[[1 2]\n",
+      " [3 4]]\n",
+      "[[0.5]]\n",
+      "[[0]\n",
+      " [3]]\n"
+     ]
+    }
+   ],
+   "source": [
+    "from mlagents_envs.environment import ActionTuple\n",
+    "action = ActionTuple()\n",
+    "action.add_discrete(np.array([[1,2],[3,4]])) # [1,2] actions on Agent 1, [3,4] actions on Agent 2\n",
+    "print(action.discrete)\n",
+    "action.add_continuous(np.array([[0.5]]))\n",
+    "print(action.continuous)\n",
+    "print(spec.action_spec.random_action(2).discrete) # Get 1 random action under Action Spec for 2 agents"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Decision Steps / Terminal Steps\n",
+    "Decision Steps and Terminal Steps are the list that agents called for the need of decision.<br />\n",
+    "Difference between Decision Steps and Terminal Steps is that terminal step only calls on episode end, while \n",
+    "decision step can be called at anytime.\n",
+    "- Decision Steps: `env.get_steps()` get the steps from agents calling <br />\n",
+    "- Agent ID: `steps.agent_id()` get the agents id corresponding to the step <br />"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 37,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[0]\n"
+     ]
+    }
+   ],
+   "source": [
+    "decision_steps, terminal_steps = env.get_steps(behavior_name) \n",
+    "print(decision_steps.agent_id)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Observation and Reward of Steps\n",
+    "Observation of an agent: `steps[agent_id].obs` <br />\n",
+    "Reward of an agent: `steps[agent_id].reward` <br /> <br/>\n",
+    "Observation of all agent: `steps.obs` <br />\n",
+    "Reward of all agent: `steps.reward` <br />"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 56,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Observation of Agent 0: [array([   0.,    0.,    0.,    0.,    0.,    0.,    0.,    0.,    0.,\n",
+      "          0.,    0.,    0.,    0.,    0.,    0.,    0.,    0.,    0.,\n",
+      "          0.,    0.,    0.,    0.,    0.,    0.,    0.,    0.,    0.,\n",
+      "          0.,    0.,    0.,    0.,    0.,    0., -100., -100., -100.,\n",
+      "       -100., -100., -100.,    0., -100., -100., -100., -100., -100.,\n",
+      "       -100.,    0., -100., -100., -100., -100., -100., -100.,    0.,\n",
+      "       -100., -100., -100., -100., -100., -100.], dtype=float32)]\n",
+      "Reward of Agent 0: 0.0\n",
+      "0.0\n"
+     ]
+    }
+   ],
+   "source": [
+    "agent_id = decision_steps.agent_id[0]\n",
+    "print(f\"Observation of Agent {agent_id}: {decision_steps[agent_id].obs}\")\n",
+    "print(f\"Reward of Agent {agent_id}: {decision_steps[agent_id].reward}\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Set the Action and Run\n",
+    "- `env.set_actions(behavior_name: str, action: ActionTuple)` : Setup Action for next step\n",
+    "- `env.step()` : Foward to next step"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 57,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "action = spec.action_spec.random_action(len(decision_steps))\n",
+    "env.set_actions(behavior_name, action)\n",
+    "env.step()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Run the enviroment for serval episode"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "ep = 2\n",
+    "for episode in range(ep):\n",
+    "  env.reset()\n",
+    "  decision_steps, terminal_steps = env.get_steps(behavior_name) # Get the first step\n",
+    "  tracked_agent = decision_steps.agent_id[0] # Track the agent (Since there's only one)\n",
+    "  done = False # For the tracked_agent\n",
+    "  episode_rewards = 0 # For the tracked_agent\n",
+    "  while not done:\n",
+    "    # Generate an action for all agents\n",
+    "    action = spec.action_spec.random_action(len(decision_steps))\n",
+    "    # Set the actions\n",
+    "    env.set_actions(behavior_name, action)\n",
+    "\n",
+    "    # Move the simulation forward\n",
+    "    env.step()\n",
+    "\n",
+    "    # Get the new simulation results\n",
+    "    decision_steps, terminal_steps = env.get_steps(behavior_name)\n",
+    "    if tracked_agent in decision_steps: # The agent requested a decision\n",
+    "      episode_rewards += decision_steps[tracked_agent].reward\n",
+    "      print(f\"step reward:{decision_steps[tracked_agent].reward}\")\n",
+    "    if tracked_agent in terminal_steps: # The agent terminated its episode\n",
+    "      episode_rewards += terminal_steps[tracked_agent].reward\n",
+    "      print(f\"step reward:{terminal_steps[tracked_agent].reward}\")\n",
+    "      done = True\n",
+    "  print(f\"Total rewards for episode {episode} is {episode_rewards}\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "env.close()"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3.7.10 ('mlagent')",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.7.15"
+  },
+  "orig_nbformat": 4,
+  "vscode": {
+   "interpreter": {
+    "hash": "add02d4cf8c8a1f4086ab0b95639014411458b216e0b1138ae392c4218dbd2e2"
+   }
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/Executable/v2.0/config/player_config.yaml b/Executable/v2.0/config/player_config.yaml
@@ -0,0 +1,25 @@
+behaviors:
+  Player:
+    trainer_type: ppo
+    hyperparameters:
+      batch_size: 10
+      buffer_size: 100
+      learning_rate: 3.0e-4
+      beta: 5.0e-4
+      epsilon: 0.2
+      lambd: 0.99
+      num_epoch: 3
+      learning_rate_schedule: linear
+      beta_schedule: constant
+      epsilon_schedule: linear
+    network_settings:
+      normalize: false
+      hidden_units: 128
+      num_layers: 2
+    reward_signals:
+      extrinsic:
+        gamma: 0.99
+        strength: 1.0
+    max_steps: 5000
+    time_horizon: 64
+    summary_freq: 10000

Original file line number	Diff line number	Diff line change
`@@ -1 +1 @@`
`1`		-{"count":1,"self":22.1428832,"total":22.1705197,"children":{"InitializeActuators":{"count":1,"self":0.0029919,"total":0.0029919,"children":null},"InitializeSensors":{"count":1,"self":0.0029919,"total":0.0029919,"children":null},"AgentSendState":{"count":1063,"self":0.0031287,"total":0.011118099999999999,"children":{"CollectObservations":{"count":591,"self":0.0013204999999999998,"total":0.0013204999999999998,"children":null},"WriteActionMask":{"count":591,"self":5.9699999999999994E-05,"total":5.9699999999999994E-05,"children":null},"RequestDecision":{"count":591,"self":0.0066092,"total":0.0066092,"children":null}}},"DecideAction":{"count":1063,"self":0.0042235,"total":0.0042235,"children":null},"AgentAct":{"count":1063,"self":0.004234,"total":0.004234,"children":null}},"gauges":{"Player.CumulativeReward":{"count":5,"max":-1,"min":-1,"runningAverage":-1,"value":-1,"weightedAverage":-1}},"metadata":{"timer_format_version":"0.1.0","start_time_seconds":"1668951973","unity_version":"2020.3.25f1","command_line_arguments":"D:\\Unity Hub\\Project\\CrossyRoadEX\\Executable\\v2.0\\CRML.exe","communication_protocol_version":"1.5.0","com.unity.ml-agents_version":"2.2.1-exp.1","scene_name":"MainGame","end_time_seconds":"1668951995"}}
	`1`	+{"count":1,"self":22.6099696,"total":526.3322687,"children":{"InitializeActuators":{"count":1,"self":0.0041334,"total":0.0041334,"children":null},"InitializeSensors":{"count":1,"self":0.0050142,"total":0.0050142,"children":null},"AgentSendState":{"count":1082,"self":0.0089643999999999991,"total":0.0291306,"children":{"CollectObservations":{"count":94,"self":0.0021885999999999997,"total":0.0021885999999999997,"children":null},"WriteActionMask":{"count":94,"self":0.0030547,"total":0.0030547,"children":null},"RequestDecision":{"count":94,"self":0.0019535999999999998,"total":0.0149229,"children":{"AgentInfo.ToProto":{"count":94,"self":0.0009737,"total":0.0129693,"children":{"GenerateSensorData":{"count":94,"self":0.011995599999999999,"total":0.011995599999999999,"children":null}}}}}}},"DecideAction":{"count":1082,"self":503.66648319999996,"total":503.6665059,"children":null},"AgentAct":{"count":1082,"self":0.009542,"total":0.009542,"children":null},"AgentInfo.ToProto":{"count":4,"self":0.0009911,"total":0.0019906,"children":{"GenerateSensorData":{"count":4,"self":0.0009995,"total":0.0009995,"children":null}}}},"gauges":{"Player.CumulativeReward":{"count":4,"max":-1,"min":-1,"runningAverage":-1,"value":-1,"weightedAverage":-1}},"metadata":{"timer_format_version":"0.1.0","start_time_seconds":"1669376099","unity_version":"2020.3.25f1","command_line_arguments":"d:\\Unity Hub\\Project\\CrossyRoadEX\\Executable\\v2.0\\CRML.exe --mlagents-port 5005","communication_protocol_version":"1.5.0","com.unity.ml-agents_version":"2.2.1-exp.1","scene_name":"MainGame","end_time_seconds":"1669376625"}}