#19 more work in progress on MctsExample etc

INFLUENCEorg · Jun 4, 2020 · 0b1e7f7 · 0b1e7f7
1 parent 29c8885
commit 0b1e7f7
Show file tree

Hide file tree

Showing 5 changed files with 40 additions and 23 deletions.
diff --git a/aiagents/multi/FactoryFloorIterativeGreedy.py b/aiagents/multi/FactoryFloorIterativeGreedy.py
@@ -1,45 +1,52 @@
-from aiagents.multi.ComplexAgent import ComplexAgent
-from aiagents.single.FactoryFloorAgent import getPath, getDistance, evaluateAllPositions
-from aienvs.FactoryFloor.FactoryFloorState import FactoryFloorState
 import logging
 import operator
+from typing import List
+from gym.spaces import Dict
+
+from aienvs.FactoryFloor.FactoryFloorState import FactoryFloorState
+
+from aiagents.multi.ComplexAgent import ComplexAgent
+from aiagents.single.FactoryFloorAgent import FactoryFloorAgent, getPath, getDistance, evaluateAllPositions
+
 
 class FactoryFloorIterativeGreedy(ComplexAgent):
     """
     Iterative Greedy as in Claes'17 paper
     """
-    def __init__(self, ffAgentList, environment, parameters=None):
-        self.pathDict=ffAgentList[0].pathDict
+
+    def __init__(self, ffAgentList:List[FactoryFloorAgent], actionspace:Dict, observationspace, parameters=None):
+        super().__init__(ffAgentList, actionspace, observationspace, parameters)
         self._ffAgentDict = {}
         for ffAgent in ffAgentList:
-            self._ffAgentDict[ffAgent.agentId]=ffAgent
+            self._ffAgentDict[ffAgent.agentId] = ffAgent
 
     def step(self, state:FactoryFloorState, reward=None, done=None):
         """
         """
-        #create a dictionary evaluating robot-action pairs
+        # create a dictionary evaluating robot-action pairs
+        pathDict = self._agentSubcomponents[0].pathDict
         evaluationDict = {}
         for robotId in self._ffAgentDict.keys():
             robotpos = state.robots[robotId].getPosition()
-            robotEvaluation = evaluateAllPositions(state,robotpos,self.pathDict)
+            robotEvaluation = evaluateAllPositions(state, robotpos, pathDict)
             for taskpos in robotEvaluation.keys():
-                evaluationDict.update({(robotId,taskpos): robotEvaluation[taskpos]})
+                evaluationDict.update({(robotId, taskpos): robotEvaluation[taskpos]})
 
-        #create a list 
+        # create a list 
         sortedRobotPosEval = sorted(evaluationDict.items(), key=operator.itemgetter(1), reverse=True)
 
-        actions={}
+        actions = {}
         # all agents by default assumed to follow no path, unless later specified
         for ffAgentId, ffAgent in self._ffAgentDict.items():
-            stayInPlacePath=[str(state.robots[ffAgentId].getPosition())]
-            action=ffAgent.getAction(stayInPlacePath)
+            stayInPlacePath = [str(state.robots[ffAgentId].getPosition())]
+            action = ffAgent.getAction(stayInPlacePath)
             actions.update(action)
 
         while len(sortedRobotPosEval) > 0:
-            bestRobot=sortedRobotPosEval[0][0][0]
-            correspondingPosition=sortedRobotPosEval[0][0][1]
-            robotPath=getPath(state.robots[bestRobot].getPosition(), correspondingPosition, self.pathDict)
-            action=self._ffAgentDict[bestRobot].getAction(robotPath)
+            bestRobot = sortedRobotPosEval[0][0][0]
+            correspondingPosition = sortedRobotPosEval[0][0][1]
+            robotPath = getPath(state.robots[bestRobot].getPosition(), correspondingPosition, pathDict)
+            action = self._ffAgentDict[bestRobot].getAction(robotPath)
             actions.update(action)
 
             newPosEval = [] 

diff --git a/aiagents/single/mcts/MctsAgent.py b/aiagents/single/mcts/MctsAgent.py
@@ -53,7 +53,7 @@ def __init__(self, agentId, actionspace: Dict, observationspace, parameters: dic
             self._limitType = 'iterations'
 
         # start the simulator environment
-        envparams = parameters['simulator']
+        envparams = self._parameters['simulator']
         e = EnvironmentFactory.createEnvironment(envparams['fullname'], envparams)
         self._simulator = ModifiedGymEnv(e, DecoratedSpace.create(copy.deepcopy(e.action_space)))
 

diff --git a/examples/MctsAggrExample.py b/examples/MctsAggrExample.py
@@ -42,8 +42,6 @@ def main():
 
     # whao, you need to know exact contents of all files here..    
     recursive_update(agent_parameters['subAgentList'][0]['parameters']['simulator'], env_parameters['environment'])
-    recursive_update(agent_parameters['subAgentList'][0]['parameters']['treeAgent']['parameters']['simulator'], env_parameters['environment'])
-    recursive_update(agent_parameters['subAgentList'][0]['parameters']['rolloutAgent']['parameters']['simulator'], env_parameters['environment'])
 
     print(env_parameters)
     print(agent_parameters)

diff --git a/examples/MctsExample.py b/examples/MctsExample.py
@@ -12,6 +12,7 @@
 import sys
 import pickle
 from scipy import stats
+from dict_recursive_update import recursive_update
 
 logger = logging.getLogger()
 logger.setLevel(logging.ERROR)
@@ -37,24 +38,28 @@ def main():
     env_parameters = getParameters(env_filename)
     agent_parameters = getParameters(agent_filename)
 
+    # add env info to RobotAgent.
+    for i in range(3):
+        recursive_update(agent_parameters['subAgentList'][i]['parameters']['simulator'], env_parameters['environment'])
+
     print(env_parameters)
     print(agent_parameters)
 
     random.seed(env_parameters['seed'])
-    maxSteps=env_parameters['max_steps']
+    maxSteps = env_parameters['max_steps']
     env = FactoryFloor(env_parameters['environment'])
 
     logging.info("Starting example MCTS agent")
     logoutput = io.StringIO("episode output log")
 
     try:
-        logoutputpickle = open('./'+os.environ["SLURM_JOB_ID"] +'.pickle', 'wb')
+        logoutputpickle = open('./' + os.environ["SLURM_JOB_ID"] + '.pickle', 'wb')
     except KeyError:
         print("No SLURM_JOB_ID found")
         logoutputpickle = io.BytesIO()
 
     obs = env.reset()
-    complexAgent = createAgent(env, agent_parameters)
+    complexAgent = createAgent(env.action_space, env.observation_space, agent_parameters)
 
     experiment = Experiment(complexAgent, env, maxSteps, render=True)
     experiment.addListener(JsonLogger(logoutput))

diff --git a/examples/configs/agent_config.yaml b/examples/configs/agent_config.yaml
@@ -4,6 +4,7 @@ aliases:
   - &RandomAgent aiagents.single.RandomAgent.RandomAgent
   - &FactoryFloorAgent aiagents.single.FactoryFloorAgent.FactoryFloorAgent
   - &FactoryFloorIterativeGreedy aiagents.multi.FactoryFloorIterativeGreedy.FactoryFloorIterativeGreedy
+  - &FactoryFloorEnv aienvs.FactoryFloor.FactoryFloor.FactoryFloor
 
 config:
   - &RobotAgent aiagents.single.mcts.MctsAgent.MctsAgent
@@ -23,6 +24,8 @@ parameters:
           - class: *RobotAgent
             id: "robot1"
             parameters:
+              simulator:
+                fullname: *FactoryFloorEnv
               treeAgent:
                 class: *RandomAgent
                 id: "robot1"
@@ -50,6 +53,8 @@ parameters:
           - class: *RobotAgent
             id: "robot2"
             parameters:
+              simulator:
+                fullname: *FactoryFloorEnv
               treeAgent:
                 class: *RandomAgent
                 id: "robot2"
@@ -77,6 +82,8 @@ parameters:
           - class: *RobotAgent
             id: "robot3"
             parameters:
+              simulator:
+                fullname: *FactoryFloorEnv
               treeAgent:
                 class: *RandomAgent
                 id: "robot3"