Skip to content

Commit fc25f72

Browse files
committed
add EnviromentTest under v2.0
1 parent 2d5efb7 commit fc25f72

File tree

3 files changed

+390
-1
lines changed

3 files changed

+390
-1
lines changed
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
{"count":1,"self":22.1428832,"total":22.1705197,"children":{"InitializeActuators":{"count":1,"self":0.0029919,"total":0.0029919,"children":null},"InitializeSensors":{"count":1,"self":0.0029919,"total":0.0029919,"children":null},"AgentSendState":{"count":1063,"self":0.0031287,"total":0.011118099999999999,"children":{"CollectObservations":{"count":591,"self":0.0013204999999999998,"total":0.0013204999999999998,"children":null},"WriteActionMask":{"count":591,"self":5.9699999999999994E-05,"total":5.9699999999999994E-05,"children":null},"RequestDecision":{"count":591,"self":0.0066092,"total":0.0066092,"children":null}}},"DecideAction":{"count":1063,"self":0.0042235,"total":0.0042235,"children":null},"AgentAct":{"count":1063,"self":0.004234,"total":0.004234,"children":null}},"gauges":{"Player.CumulativeReward":{"count":5,"max":-1,"min":-1,"runningAverage":-1,"value":-1,"weightedAverage":-1}},"metadata":{"timer_format_version":"0.1.0","start_time_seconds":"1668951973","unity_version":"2020.3.25f1","command_line_arguments":"D:\\Unity Hub\\Project\\CrossyRoadEX\\Executable\\v2.0\\CRML.exe","communication_protocol_version":"1.5.0","com.unity.ml-agents_version":"2.2.1-exp.1","scene_name":"MainGame","end_time_seconds":"1668951995"}}
1+
{"count":1,"self":22.6099696,"total":526.3322687,"children":{"InitializeActuators":{"count":1,"self":0.0041334,"total":0.0041334,"children":null},"InitializeSensors":{"count":1,"self":0.0050142,"total":0.0050142,"children":null},"AgentSendState":{"count":1082,"self":0.0089643999999999991,"total":0.0291306,"children":{"CollectObservations":{"count":94,"self":0.0021885999999999997,"total":0.0021885999999999997,"children":null},"WriteActionMask":{"count":94,"self":0.0030547,"total":0.0030547,"children":null},"RequestDecision":{"count":94,"self":0.0019535999999999998,"total":0.0149229,"children":{"AgentInfo.ToProto":{"count":94,"self":0.0009737,"total":0.0129693,"children":{"GenerateSensorData":{"count":94,"self":0.011995599999999999,"total":0.011995599999999999,"children":null}}}}}}},"DecideAction":{"count":1082,"self":503.66648319999996,"total":503.6665059,"children":null},"AgentAct":{"count":1082,"self":0.009542,"total":0.009542,"children":null},"AgentInfo.ToProto":{"count":4,"self":0.0009911,"total":0.0019906,"children":{"GenerateSensorData":{"count":4,"self":0.0009995,"total":0.0009995,"children":null}}}},"gauges":{"Player.CumulativeReward":{"count":4,"max":-1,"min":-1,"runningAverage":-1,"value":-1,"weightedAverage":-1}},"metadata":{"timer_format_version":"0.1.0","start_time_seconds":"1669376099","unity_version":"2020.3.25f1","command_line_arguments":"d:\\Unity Hub\\Project\\CrossyRoadEX\\Executable\\v2.0\\CRML.exe --mlagents-port 5005","communication_protocol_version":"1.5.0","com.unity.ml-agents_version":"2.2.1-exp.1","scene_name":"MainGame","end_time_seconds":"1669376625"}}
Lines changed: 364 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,364 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "markdown",
5+
"metadata": {},
6+
"source": [
7+
"## Usage (Command Line)\n",
8+
"Run the MLAgent Default Model(PPO/SAC) by Anaconda command prompt under the folder with exe\n",
9+
"```\n",
10+
"mlagents-learn <config path> --env=<exe name> --run-id=<run_name>\n",
11+
"```\n",
12+
"It should be like\n",
13+
"```\n",
14+
"mlagents-learn config\\player_config.yaml --env=\"CRML\" --run-id=test\n",
15+
"```"
16+
]
17+
},
18+
{
19+
"cell_type": "markdown",
20+
"metadata": {},
21+
"source": [
22+
"# Enviroment\n",
23+
"Get the enviroment by calling `UnityEnviroment()` to get the corresponding file name env.<br />\n",
24+
"- Close: `env.close()` close the enviroment. Release the port of commutator. <br />\n",
25+
"- Reset: `env.reset()` resets the enviroment. <br />\n",
26+
"- Set Action: `env.set_actions(behavior_name: str, action: ActionTuple)` set up the actions for next step.More Info down below <br />\n",
27+
"- Step: `env.step()` move the simulation forward. Pass the action to Unity. <br />\n",
28+
"- Get Step: `env.get_steps(behavior_name: str)` get the decision step from Unity.More Info down below <br />\n",
29+
" "
30+
]
31+
},
32+
{
33+
"cell_type": "code",
34+
"execution_count": 35,
35+
"metadata": {},
36+
"outputs": [],
37+
"source": [
38+
"from mlagents_envs.environment import UnityEnvironment\n",
39+
"import numpy as np\n",
40+
"# This is a non-blocking call that only loads the environment.\n",
41+
"env = UnityEnvironment(file_name=\"CRML\", seed=1, side_channels=[])\n",
42+
"# Start interacting with the environment.\n",
43+
"env.reset()"
44+
]
45+
},
46+
{
47+
"cell_type": "markdown",
48+
"metadata": {},
49+
"source": [
50+
"# Behavior Spec\n",
51+
"Contains the specs of the behavior. Including `ActionSpec` and `Observation Spec`<br />\n",
52+
"`env.behavior_specs` is a dictionary of (Name: str, Spec: Behavior_Spec) <br />\n",
53+
"Get the names of Behavior Spec by `list(env.behavior_specs)` or `list(env.behavior_specs.keys())` <br />\n",
54+
"Get the corresonding Behavior Spec by `env.behavior_specs[behavior_name]` <br />"
55+
]
56+
},
57+
{
58+
"cell_type": "code",
59+
"execution_count": 36,
60+
"metadata": {},
61+
"outputs": [
62+
{
63+
"name": "stdout",
64+
"output_type": "stream",
65+
"text": [
66+
"Name of the behavior : Player?team=0\n",
67+
"Behavior spec of Player?team=0 : BehaviorSpec(observation_specs=[ObservationSpec(shape=(60,), dimension_property=(<DimensionProperty.NONE: 1>,), observation_type=<ObservationType.DEFAULT: 0>, name='StackingSensor_size2_VectorSensor_size30')], action_spec=ActionSpec(continuous_size=0, discrete_branches=(5,)))\n"
68+
]
69+
}
70+
],
71+
"source": [
72+
"# Since there's only one agent, get the first on the list\n",
73+
"behavior_name = list(env.behavior_specs)[0]\n",
74+
"print(f\"Name of the behavior : {behavior_name}\")\n",
75+
"spec = env.behavior_specs[behavior_name]\n",
76+
"print(f\"Behavior spec of {behavior_name} : {spec}\")"
77+
]
78+
},
79+
{
80+
"cell_type": "markdown",
81+
"metadata": {},
82+
"source": [
83+
"# Observation Spec\n",
84+
"Note: this is NOT the Observation Space that observed for agent, but an info spec of it <br />\n",
85+
"Get the Observation Spec by `spec.observation_specs`\n",
86+
"- Shape: numbers of observation\n",
87+
"- Dimension Property:\n",
88+
"- Observation Type:\n",
89+
"- Name:"
90+
]
91+
},
92+
{
93+
"cell_type": "code",
94+
"execution_count": 58,
95+
"metadata": {},
96+
"outputs": [
97+
{
98+
"name": "stdout",
99+
"output_type": "stream",
100+
"text": [
101+
"Number of observations : 1\n",
102+
"Spec Info : ObservationSpec(shape=(60,), dimension_property=(<DimensionProperty.NONE: 1>,), observation_type=<ObservationType.DEFAULT: 0>, name='StackingSensor_size2_VectorSensor_size30')\n",
103+
"Is there a visual observation ? False\n"
104+
]
105+
}
106+
],
107+
"source": [
108+
"# Examine the number of observations per Agent\n",
109+
"print(\"Number of observations : \", len(spec.observation_specs))\n",
110+
"print(f\"Spec Info : {spec.observation_specs[0]}\")\n",
111+
"# Is there a visual observation ?\n",
112+
"# Visual observation have 3 dimensions: Height, Width and number of channels\n",
113+
"vis_obs = any(len(spec.shape) == 3 for spec in spec.observation_specs)\n",
114+
"print(\"Is there a visual observation ?\", vis_obs)"
115+
]
116+
},
117+
{
118+
"cell_type": "markdown",
119+
"metadata": {},
120+
"source": [
121+
"# Action Spec\n",
122+
"Note: this is NOT the Action Space that set for agent, but an info spec of it <br />\n",
123+
"Get the Action Spec by `spec.action_specs` <br />\n",
124+
"Random Action: `action_spec.random_action(n_agent: int)` create an random action for n agents <br />\n",
125+
"Empty Action: `action_spec.empty_action(n_agent: int)` create an empty action for n agents <br />"
126+
]
127+
},
128+
{
129+
"cell_type": "code",
130+
"execution_count": 10,
131+
"metadata": {},
132+
"outputs": [
133+
{
134+
"name": "stdout",
135+
"output_type": "stream",
136+
"text": [
137+
"There are 1 discrete actions\n",
138+
"Action number 0 has 5 different options\n",
139+
"Continuous: 0, Discrete: (5,)\n"
140+
]
141+
}
142+
],
143+
"source": [
144+
"# Is the Action continuous or multi-discrete ?\n",
145+
"if spec.action_spec.continuous_size > 0:\n",
146+
" print(f\"There are {spec.action_spec.continuous_size} continuous actions\")\n",
147+
"if spec.action_spec.is_discrete():\n",
148+
" print(f\"There are {spec.action_spec.discrete_size} discrete actions\")\n",
149+
"\n",
150+
"# For discrete actions only : How many different options does each action has ?\n",
151+
"if spec.action_spec.discrete_size > 0:\n",
152+
" for action, branch_size in enumerate(spec.action_spec.discrete_branches):\n",
153+
" print(f\"Action number {action} has {branch_size} different options\")"
154+
]
155+
},
156+
{
157+
"cell_type": "markdown",
158+
"metadata": {},
159+
"source": [
160+
"# Action Tuple\n",
161+
"Class that's pack NamedTuple as Action\n",
162+
"- `action.discrete`: get the discrete actions\n",
163+
"- `action.continuous`: get the continuous actions\n",
164+
"- `action.add_discrete`: add the discrete actions\n",
165+
"- `action.add_continous`: add the continuous actions\n",
166+
"\n",
167+
"Axis 0(Rows): Different Agents actions value <br />\n",
168+
"Axis 1(Columns): Different Actions on Same agent<br />"
169+
]
170+
},
171+
{
172+
"cell_type": "code",
173+
"execution_count": 32,
174+
"metadata": {},
175+
"outputs": [
176+
{
177+
"name": "stdout",
178+
"output_type": "stream",
179+
"text": [
180+
"[[1 2]\n",
181+
" [3 4]]\n",
182+
"[[0.5]]\n",
183+
"[[0]\n",
184+
" [3]]\n"
185+
]
186+
}
187+
],
188+
"source": [
189+
"from mlagents_envs.environment import ActionTuple\n",
190+
"action = ActionTuple()\n",
191+
"action.add_discrete(np.array([[1,2],[3,4]])) # [1,2] actions on Agent 1, [3,4] actions on Agent 2\n",
192+
"print(action.discrete)\n",
193+
"action.add_continuous(np.array([[0.5]]))\n",
194+
"print(action.continuous)\n",
195+
"print(spec.action_spec.random_action(2).discrete) # Get 1 random action under Action Spec for 2 agents"
196+
]
197+
},
198+
{
199+
"cell_type": "markdown",
200+
"metadata": {},
201+
"source": [
202+
"# Decision Steps / Terminal Steps\n",
203+
"Decision Steps and Terminal Steps are the list that agents called for the need of decision.<br />\n",
204+
"Difference between Decision Steps and Terminal Steps is that terminal step only calls on episode end, while \n",
205+
"decision step can be called at anytime.\n",
206+
"- Decision Steps: `env.get_steps()` get the steps from agents calling <br />\n",
207+
"- Agent ID: `steps.agent_id()` get the agents id corresponding to the step <br />"
208+
]
209+
},
210+
{
211+
"cell_type": "code",
212+
"execution_count": 37,
213+
"metadata": {},
214+
"outputs": [
215+
{
216+
"name": "stdout",
217+
"output_type": "stream",
218+
"text": [
219+
"[0]\n"
220+
]
221+
}
222+
],
223+
"source": [
224+
"decision_steps, terminal_steps = env.get_steps(behavior_name) \n",
225+
"print(decision_steps.agent_id)"
226+
]
227+
},
228+
{
229+
"cell_type": "markdown",
230+
"metadata": {},
231+
"source": [
232+
"# Observation and Reward of Steps\n",
233+
"Observation of an agent: `steps[agent_id].obs` <br />\n",
234+
"Reward of an agent: `steps[agent_id].reward` <br /> <br/>\n",
235+
"Observation of all agent: `steps.obs` <br />\n",
236+
"Reward of all agent: `steps.reward` <br />"
237+
]
238+
},
239+
{
240+
"cell_type": "code",
241+
"execution_count": 56,
242+
"metadata": {},
243+
"outputs": [
244+
{
245+
"name": "stdout",
246+
"output_type": "stream",
247+
"text": [
248+
"Observation of Agent 0: [array([ 0., 0., 0., 0., 0., 0., 0., 0., 0.,\n",
249+
" 0., 0., 0., 0., 0., 0., 0., 0., 0.,\n",
250+
" 0., 0., 0., 0., 0., 0., 0., 0., 0.,\n",
251+
" 0., 0., 0., 0., 0., 0., -100., -100., -100.,\n",
252+
" -100., -100., -100., 0., -100., -100., -100., -100., -100.,\n",
253+
" -100., 0., -100., -100., -100., -100., -100., -100., 0.,\n",
254+
" -100., -100., -100., -100., -100., -100.], dtype=float32)]\n",
255+
"Reward of Agent 0: 0.0\n",
256+
"0.0\n"
257+
]
258+
}
259+
],
260+
"source": [
261+
"agent_id = decision_steps.agent_id[0]\n",
262+
"print(f\"Observation of Agent {agent_id}: {decision_steps[agent_id].obs}\")\n",
263+
"print(f\"Reward of Agent {agent_id}: {decision_steps[agent_id].reward}\")"
264+
]
265+
},
266+
{
267+
"cell_type": "markdown",
268+
"metadata": {},
269+
"source": [
270+
"# Set the Action and Run\n",
271+
"- `env.set_actions(behavior_name: str, action: ActionTuple)` : Setup Action for next step\n",
272+
"- `env.step()` : Foward to next step"
273+
]
274+
},
275+
{
276+
"cell_type": "code",
277+
"execution_count": 57,
278+
"metadata": {},
279+
"outputs": [],
280+
"source": [
281+
"action = spec.action_spec.random_action(len(decision_steps))\n",
282+
"env.set_actions(behavior_name, action)\n",
283+
"env.step()"
284+
]
285+
},
286+
{
287+
"cell_type": "markdown",
288+
"metadata": {},
289+
"source": [
290+
"# Run the enviroment for serval episode"
291+
]
292+
},
293+
{
294+
"cell_type": "code",
295+
"execution_count": null,
296+
"metadata": {},
297+
"outputs": [],
298+
"source": [
299+
"ep = 2\n",
300+
"for episode in range(ep):\n",
301+
" env.reset()\n",
302+
" decision_steps, terminal_steps = env.get_steps(behavior_name) # Get the first step\n",
303+
" tracked_agent = decision_steps.agent_id[0] # Track the agent (Since there's only one)\n",
304+
" done = False # For the tracked_agent\n",
305+
" episode_rewards = 0 # For the tracked_agent\n",
306+
" while not done:\n",
307+
" # Generate an action for all agents\n",
308+
" action = spec.action_spec.random_action(len(decision_steps))\n",
309+
" # Set the actions\n",
310+
" env.set_actions(behavior_name, action)\n",
311+
"\n",
312+
" # Move the simulation forward\n",
313+
" env.step()\n",
314+
"\n",
315+
" # Get the new simulation results\n",
316+
" decision_steps, terminal_steps = env.get_steps(behavior_name)\n",
317+
" if tracked_agent in decision_steps: # The agent requested a decision\n",
318+
" episode_rewards += decision_steps[tracked_agent].reward\n",
319+
" print(f\"step reward:{decision_steps[tracked_agent].reward}\")\n",
320+
" if tracked_agent in terminal_steps: # The agent terminated its episode\n",
321+
" episode_rewards += terminal_steps[tracked_agent].reward\n",
322+
" print(f\"step reward:{terminal_steps[tracked_agent].reward}\")\n",
323+
" done = True\n",
324+
" print(f\"Total rewards for episode {episode} is {episode_rewards}\")"
325+
]
326+
},
327+
{
328+
"cell_type": "code",
329+
"execution_count": 15,
330+
"metadata": {},
331+
"outputs": [],
332+
"source": [
333+
"env.close()"
334+
]
335+
}
336+
],
337+
"metadata": {
338+
"kernelspec": {
339+
"display_name": "Python 3.7.10 ('mlagent')",
340+
"language": "python",
341+
"name": "python3"
342+
},
343+
"language_info": {
344+
"codemirror_mode": {
345+
"name": "ipython",
346+
"version": 3
347+
},
348+
"file_extension": ".py",
349+
"mimetype": "text/x-python",
350+
"name": "python",
351+
"nbconvert_exporter": "python",
352+
"pygments_lexer": "ipython3",
353+
"version": "3.7.15"
354+
},
355+
"orig_nbformat": 4,
356+
"vscode": {
357+
"interpreter": {
358+
"hash": "add02d4cf8c8a1f4086ab0b95639014411458b216e0b1138ae392c4218dbd2e2"
359+
}
360+
}
361+
},
362+
"nbformat": 4,
363+
"nbformat_minor": 2
364+
}
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
behaviors:
2+
Player:
3+
trainer_type: ppo
4+
hyperparameters:
5+
batch_size: 10
6+
buffer_size: 100
7+
learning_rate: 3.0e-4
8+
beta: 5.0e-4
9+
epsilon: 0.2
10+
lambd: 0.99
11+
num_epoch: 3
12+
learning_rate_schedule: linear
13+
beta_schedule: constant
14+
epsilon_schedule: linear
15+
network_settings:
16+
normalize: false
17+
hidden_units: 128
18+
num_layers: 2
19+
reward_signals:
20+
extrinsic:
21+
gamma: 0.99
22+
strength: 1.0
23+
max_steps: 5000
24+
time_horizon: 64
25+
summary_freq: 10000

0 commit comments

Comments
 (0)