Skip to content

Commit 5bd45f8

Browse files
committed
fix the reward wont add correctly
1 parent fc843c6 commit 5bd45f8

File tree

172 files changed

+303
-9
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

172 files changed

+303
-9
lines changed
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
{"count":1,"self":37.4832064,"total":37.9379595,"children":{"InitializeActuators":{"count":1,"self":0.0009954,"total":0.0009954,"children":null},"InitializeSensors":{"count":1,"self":0.0019997,"total":0.0019997,"children":null},"AgentSendState":{"count":1657,"self":0.0103012,"total":0.0253481,"children":{"CollectObservations":{"count":310,"self":0.0039689999999999994,"total":0.0039689999999999994,"children":null},"WriteActionMask":{"count":310,"self":0,"total":0,"children":null},"RequestDecision":{"count":310,"self":0.0110779,"total":0.0110779,"children":null}}},"DecideAction":{"count":1657,"self":0.4179885,"total":0.4179885,"children":null},"AgentAct":{"count":1657,"self":0.0068666,"total":0.0068666,"children":null}},"gauges":{"Player.CumulativeReward":{"count":18,"max":-1,"min":-1,"runningAverage":-1,"value":-1,"weightedAverage":-1}},"metadata":{"timer_format_version":"0.1.0","start_time_seconds":"1668950585","unity_version":"2020.3.25f1","command_line_arguments":"D:\\Unity3D\\2020.3.25f1\\Editor\\Unity.exe -projectpath D:\/Unity Hub\/Project\/CrossyRoadEX -useHub -hubIPC -cloudEnvironment production -hubSessionId ede81690-68d0-11ed-8bd6-190a1d4b9e9f -accessToken XWH1YHOH09f5fs1Iahj5ELeeE_bmQEEUm5FZ_yenRl001ef","communication_protocol_version":"1.5.0","com.unity.ml-agents_version":"2.2.1-exp.1","scene_name":"MainGame","end_time_seconds":"1668950622"}}
1+
{"count":1,"self":34.9703776,"total":35.2413671,"children":{"InitializeActuators":{"count":1,"self":0.002992,"total":0.002992,"children":null},"InitializeSensors":{"count":1,"self":0.0041308,"total":0.0041308,"children":null},"AgentSendState":{"count":1508,"self":0.0060555999999999995,"total":0.0209852,"children":{"CollectObservations":{"count":893,"self":0.00881,"total":0.00881,"children":null},"WriteActionMask":{"count":893,"self":0.0019311999999999999,"total":0.0019311999999999999,"children":null},"RequestDecision":{"count":893,"self":0.0041884,"total":0.0041884,"children":null}}},"DecideAction":{"count":1508,"self":0.0066473,"total":0.0066473,"children":null},"AgentAct":{"count":1508,"self":0.23395559999999999,"total":0.23395559999999999,"children":null}},"gauges":{"Player.CumulativeReward":{"count":4,"max":14,"min":-1,"runningAverage":2.75,"value":-1,"weightedAverage":1.8125}},"metadata":{"timer_format_version":"0.1.0","start_time_seconds":"1669445190","unity_version":"2020.3.25f1","command_line_arguments":"D:\\Unity3D\\2020.3.25f1\\Editor\\Unity.exe -projectpath D:\\Unity Hub\\Project\\CrossyRoadEX -useHub -hubIPC -cloudEnvironment production -licensingIpc LicenseClient-seanc -hubSessionId bae391b0-6d54-11ed-acd1-1b4f3f0a746b -accessToken ojuXIW3TZExkvNSRgMkty-jXpRnJfQkVKVmaGvKDHno01ef","communication_protocol_version":"1.5.0","com.unity.ml-agents_version":"2.2.1-exp.1","scene_name":"MainGame","end_time_seconds":"1669445225"}}

Assets/Scripts/Controller/PlayerMovementScript.cs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -105,8 +105,7 @@ public float ActionHandle(int actionIndex)
105105
float reward = 0f;
106106
if (IsMoving || !canMove || Time.time - st < input_delay) return 0f;
107107
reward = InputTransfer(actionIndex);
108-
if ((int)current.z > score) return 1f;
109-
return 0f;
108+
return reward;
110109
}
111110

112111
/// <summary>
@@ -122,6 +121,7 @@ private float InputTransfer(int code)
122121
{
123122
case 1:
124123
successCheck = Move(new Vector3(0, 0, 3));
124+
if (successCheck == 1 && (int)current.z + 3 > score) reward = 1.0f;
125125
break;
126126
case 2:
127127
successCheck = Move(new Vector3(0, 0, -3));

Assets/Scripts/TrainingAgent/PlayerAgent.cs

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,7 @@ private void LevelObservation(VectorSensor sensor)
7070
}
7171
else
7272
{
73-
sensor.AddObservation(0);
73+
sensor.AddObservation(-1);
7474
ObjectsObservation(sensor, emptyList);
7575
}
7676
}
@@ -103,7 +103,6 @@ private void ObjectsObservation(VectorSensor sensor,List<GameObject> olist)
103103

104104
public override void OnActionReceived(ActionBuffers actions)
105105
{
106-
//Debug.Log(actions.DiscreteActions[0]);
107106
var reward = PMScript.ActionHandle(actions.DiscreteActions[0]);
108107
SetReward(reward);
109108
}

Executable/v1.0/EnviromentTest.ipynb

Lines changed: 299 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,25 +15,321 @@
1515
"```"
1616
]
1717
},
18+
{
19+
"cell_type": "markdown",
20+
"metadata": {},
21+
"source": [
22+
"# Enviroment\n",
23+
"Get the enviroment by calling `UnityEnviroment()` to get the corresponding file name env.<br />\n",
24+
"- Close: `env.close()` close the enviroment. Release the port of commutator. <br />\n",
25+
"- Reset: `env.reset()` resets the enviroment. <br />\n",
26+
"- Set Action: `env.set_actions(behavior_name: str, action: ActionTuple)` set up the actions for next step.More Info down below <br />\n",
27+
"- Step: `env.step()` move the simulation forward. Pass the action to Unity. <br />\n",
28+
"- Get Step: `env.get_steps(behavior_name: str)` get the decision step from Unity.More Info down below <br />\n",
29+
" "
30+
]
31+
},
1832
{
1933
"cell_type": "code",
20-
"execution_count": 2,
34+
"execution_count": 35,
2135
"metadata": {},
2236
"outputs": [],
2337
"source": [
2438
"from mlagents_envs.environment import UnityEnvironment\n",
39+
"import numpy as np\n",
2540
"# This is a non-blocking call that only loads the environment.\n",
2641
"env = UnityEnvironment(file_name=\"CRML\", seed=1, side_channels=[])\n",
2742
"# Start interacting with the environment.\n",
28-
"env.reset()\n",
29-
"behavior_names = env.behavior_specs.keys()"
43+
"env.reset()"
44+
]
45+
},
46+
{
47+
"cell_type": "markdown",
48+
"metadata": {},
49+
"source": [
50+
"# Behavior Spec\n",
51+
"Contains the specs of the behavior. Including `ActionSpec` and `Observation Spec`<br />\n",
52+
"`env.behavior_specs` is a dictionary of (Name: str, Spec: Behavior_Spec) <br />\n",
53+
"Get the names of Behavior Spec by `list(env.behavior_specs)` or `list(env.behavior_specs.keys())` <br />\n",
54+
"Get the corresonding Behavior Spec by `env.behavior_specs[behavior_name]` <br />"
55+
]
56+
},
57+
{
58+
"cell_type": "code",
59+
"execution_count": 36,
60+
"metadata": {},
61+
"outputs": [
62+
{
63+
"name": "stdout",
64+
"output_type": "stream",
65+
"text": [
66+
"Name of the behavior : Player?team=0\n",
67+
"Behavior spec of Player?team=0 : BehaviorSpec(observation_specs=[ObservationSpec(shape=(60,), dimension_property=(<DimensionProperty.NONE: 1>,), observation_type=<ObservationType.DEFAULT: 0>, name='StackingSensor_size2_VectorSensor_size30')], action_spec=ActionSpec(continuous_size=0, discrete_branches=(5,)))\n"
68+
]
69+
}
70+
],
71+
"source": [
72+
"# Since there's only one behavior, get the first on the list\n",
73+
"behavior_name = list(env.behavior_specs)[0]\n",
74+
"print(f\"Name of the behavior : {behavior_name}\")\n",
75+
"spec = env.behavior_specs[behavior_name]\n",
76+
"print(f\"Behavior spec of {behavior_name} : {spec}\")"
77+
]
78+
},
79+
{
80+
"cell_type": "markdown",
81+
"metadata": {},
82+
"source": [
83+
"# Observation Spec\n",
84+
"Note: this is NOT the Observation Space that observed for agent, but an info spec of it <br />\n",
85+
"Get the Observation Spec by `spec.observation_specs`\n",
86+
"- Shape: numbers of observation\n",
87+
"- Dimension Property:\n",
88+
"- Observation Type:\n",
89+
"- Name:"
90+
]
91+
},
92+
{
93+
"cell_type": "code",
94+
"execution_count": 58,
95+
"metadata": {},
96+
"outputs": [
97+
{
98+
"name": "stdout",
99+
"output_type": "stream",
100+
"text": [
101+
"Number of observations : 1\n",
102+
"Spec Info : ObservationSpec(shape=(60,), dimension_property=(<DimensionProperty.NONE: 1>,), observation_type=<ObservationType.DEFAULT: 0>, name='StackingSensor_size2_VectorSensor_size30')\n",
103+
"Is there a visual observation ? False\n"
104+
]
105+
}
106+
],
107+
"source": [
108+
"# Examine the number of observations per Agent\n",
109+
"print(\"Number of observations : \", len(spec.observation_specs))\n",
110+
"print(f\"Spec Info : {spec.observation_specs[0]}\")\n",
111+
"# Is there a visual observation ?\n",
112+
"# Visual observation have 3 dimensions: Height, Width and number of channels\n",
113+
"vis_obs = any(len(spec.shape) == 3 for spec in spec.observation_specs)\n",
114+
"print(\"Is there a visual observation ?\", vis_obs)"
115+
]
116+
},
117+
{
118+
"cell_type": "markdown",
119+
"metadata": {},
120+
"source": [
121+
"# Action Spec\n",
122+
"Note: this is NOT the Action Space that set for agent, but an info spec of it <br />\n",
123+
"Get the Action Spec by `spec.action_specs` <br />\n",
124+
"Random Action: `action_spec.random_action(n_agent: int)` create an random action for n agents <br />\n",
125+
"Empty Action: `action_spec.empty_action(n_agent: int)` create an empty action for n agents <br />"
126+
]
127+
},
128+
{
129+
"cell_type": "code",
130+
"execution_count": 10,
131+
"metadata": {},
132+
"outputs": [
133+
{
134+
"name": "stdout",
135+
"output_type": "stream",
136+
"text": [
137+
"There are 1 discrete actions\n",
138+
"Action number 0 has 5 different options\n",
139+
"Continuous: 0, Discrete: (5,)\n"
140+
]
141+
}
142+
],
143+
"source": [
144+
"# Is the Action continuous or multi-discrete ?\n",
145+
"if spec.action_spec.continuous_size > 0:\n",
146+
" print(f\"There are {spec.action_spec.continuous_size} continuous actions\")\n",
147+
"if spec.action_spec.is_discrete():\n",
148+
" print(f\"There are {spec.action_spec.discrete_size} discrete actions\")\n",
149+
"\n",
150+
"# For discrete actions only : How many different options does each action has ?\n",
151+
"if spec.action_spec.discrete_size > 0:\n",
152+
" for action, branch_size in enumerate(spec.action_spec.discrete_branches):\n",
153+
" print(f\"Action number {action} has {branch_size} different options\")"
154+
]
155+
},
156+
{
157+
"cell_type": "markdown",
158+
"metadata": {},
159+
"source": [
160+
"# Action Tuple\n",
161+
"Class that's pack NamedTuple as Action\n",
162+
"- `action.discrete`: get the discrete actions\n",
163+
"- `action.continuous`: get the continuous actions\n",
164+
"- `action.add_discrete`: add the discrete actions\n",
165+
"- `action.add_continous`: add the continuous actions\n",
166+
"\n",
167+
"Axis 0(Rows): Different Agents actions value <br />\n",
168+
"Axis 1(Columns): Different Actions on Same agent<br />"
169+
]
170+
},
171+
{
172+
"cell_type": "code",
173+
"execution_count": 32,
174+
"metadata": {},
175+
"outputs": [
176+
{
177+
"name": "stdout",
178+
"output_type": "stream",
179+
"text": [
180+
"[[1 2]\n",
181+
" [3 4]]\n",
182+
"[[0.5]]\n",
183+
"[[0]\n",
184+
" [3]]\n"
185+
]
186+
}
187+
],
188+
"source": [
189+
"from mlagents_envs.environment import ActionTuple\n",
190+
"action = ActionTuple()\n",
191+
"action.add_discrete(np.array([[1,2],[3,4]])) # [1,2] actions on Agent 1, [3,4] actions on Agent 2\n",
192+
"print(action.discrete)\n",
193+
"action.add_continuous(np.array([[0.5]]))\n",
194+
"print(action.continuous)\n",
195+
"print(spec.action_spec.random_action(2).discrete) # Get 1 random action under Action Spec for 2 agents"
196+
]
197+
},
198+
{
199+
"cell_type": "markdown",
200+
"metadata": {},
201+
"source": [
202+
"# Decision Steps / Terminal Steps\n",
203+
"Decision Steps and Terminal Steps are the list that agents called for the need of decision.<br />\n",
204+
"Difference between Decision Steps and Terminal Steps is that terminal step only calls on episode end, while \n",
205+
"decision step can be called at anytime.\n",
206+
"- Decision Steps: `env.get_steps(Behavior_Name:str)` get the steps from agents requested of the behavior<br />\n",
207+
"- Agent ID: `steps.agent_id()` get the agents id corresponding to the step <br />\n",
208+
"- `len(DecisionSteps)`: Returns the number of agents requesting a decision since the last call to env.step()"
209+
]
210+
},
211+
{
212+
"cell_type": "code",
213+
"execution_count": 37,
214+
"metadata": {},
215+
"outputs": [
216+
{
217+
"name": "stdout",
218+
"output_type": "stream",
219+
"text": [
220+
"[0]\n"
221+
]
222+
}
223+
],
224+
"source": [
225+
"decision_steps, terminal_steps = env.get_steps(behavior_name) \n",
226+
"print(decision_steps.agent_id)"
227+
]
228+
},
229+
{
230+
"cell_type": "markdown",
231+
"metadata": {},
232+
"source": [
233+
"# Observation and Reward of Steps\n",
234+
"Observation of an agent: `steps[agent_id].obs` <br />\n",
235+
"Reward of an agent: `steps[agent_id].reward` <br /> <br/>\n",
236+
"Observation of all agent: `steps.obs` <br />\n",
237+
"Reward of all agent: `steps.reward` <br />"
238+
]
239+
},
240+
{
241+
"cell_type": "code",
242+
"execution_count": 56,
243+
"metadata": {},
244+
"outputs": [
245+
{
246+
"name": "stdout",
247+
"output_type": "stream",
248+
"text": [
249+
"Observation of Agent 0: [array([ 0., 0., 0., 0., 0., 0., 0., 0., 0.,\n",
250+
" 0., 0., 0., 0., 0., 0., 0., 0., 0.,\n",
251+
" 0., 0., 0., 0., 0., 0., 0., 0., 0.,\n",
252+
" 0., 0., 0., 0., 0., 0., -100., -100., -100.,\n",
253+
" -100., -100., -100., 0., -100., -100., -100., -100., -100.,\n",
254+
" -100., 0., -100., -100., -100., -100., -100., -100., 0.,\n",
255+
" -100., -100., -100., -100., -100., -100.], dtype=float32)]\n",
256+
"Reward of Agent 0: 0.0\n",
257+
"0.0\n"
258+
]
259+
}
260+
],
261+
"source": [
262+
"agent_id = decision_steps.agent_id[0]\n",
263+
"print(f\"Observation of Agent {agent_id}: {decision_steps[agent_id].obs}\")\n",
264+
"print(f\"Reward of Agent {agent_id}: {decision_steps[agent_id].reward}\")"
265+
]
266+
},
267+
{
268+
"cell_type": "markdown",
269+
"metadata": {},
270+
"source": [
271+
"# Set the Action and Run\n",
272+
"- `env.set_actions(behavior_name: str, action: ActionTuple)` : Setup Action for next step\n",
273+
"- `env.step()` : Foward to next step"
274+
]
275+
},
276+
{
277+
"cell_type": "code",
278+
"execution_count": 57,
279+
"metadata": {},
280+
"outputs": [],
281+
"source": [
282+
"action = spec.action_spec.random_action(len(decision_steps))\n",
283+
"env.set_actions(behavior_name, action)\n",
284+
"env.step()"
285+
]
286+
},
287+
{
288+
"cell_type": "markdown",
289+
"metadata": {},
290+
"source": [
291+
"# Run the enviroment for serval episode"
30292
]
31293
},
32294
{
33295
"cell_type": "code",
34296
"execution_count": null,
35297
"metadata": {},
36298
"outputs": [],
299+
"source": [
300+
"ep = 2\n",
301+
"for episode in range(ep):\n",
302+
" env.reset()\n",
303+
" decision_steps, terminal_steps = env.get_steps(behavior_name) # Get the first step\n",
304+
" tracked_agent = decision_steps.agent_id[0] # Track the agent (Since there's only one)\n",
305+
" done = False # For the tracked_agent\n",
306+
" episode_rewards = 0 # For the tracked_agent\n",
307+
" while not done:\n",
308+
" # Generate an action for all agents\n",
309+
" action = spec.action_spec.random_action(len(decision_steps))\n",
310+
" # Set the actions\n",
311+
" env.set_actions(behavior_name, action)\n",
312+
"\n",
313+
" # Move the simulation forward\n",
314+
" env.step()\n",
315+
"\n",
316+
" # Get the new simulation results\n",
317+
" decision_steps, terminal_steps = env.get_steps(behavior_name)\n",
318+
" if tracked_agent in decision_steps: # The agent requested a decision\n",
319+
" episode_rewards += decision_steps[tracked_agent].reward\n",
320+
" print(f\"step reward:{decision_steps[tracked_agent].reward}\")\n",
321+
" if tracked_agent in terminal_steps: # The agent terminated its episode\n",
322+
" episode_rewards += terminal_steps[tracked_agent].reward\n",
323+
" print(f\"step reward:{terminal_steps[tracked_agent].reward}\")\n",
324+
" done = True\n",
325+
" print(f\"Total rewards for episode {episode} is {episode_rewards}\")"
326+
]
327+
},
328+
{
329+
"cell_type": "code",
330+
"execution_count": 15,
331+
"metadata": {},
332+
"outputs": [],
37333
"source": [
38334
"env.close()"
39335
]

Executable/v2.0/CRML_Data/ML-Agents/Timers/MainGame_timers.json

Lines changed: 0 additions & 1 deletion
This file was deleted.
File renamed without changes.

0 commit comments

Comments
 (0)