teamwork.multiagent.COMMTDP

26 version = 1.0

27 - def __init__(self,states,team,name='Generic COM-MTDP'):

28 self.states = states 29 self.team = team 30 self.agents = [] 31 for agent in self.team.agents.keys(): 32 self.agents.append(self.team.agents[agent]) 33 self.name = name

34 35 # R

36 - def reward(self,state,actions,messages):

37 return self.rewardAct(state,actions) + \ 38 self.rewardCom(state,messages)

39 40 # R_A

41 - def rewardAct(self,state,actions):

42 raise NotImplementedError

43 44 # R_Sigma

45 - def rewardCom(self,state,messages):

46 raise NotImplementedError

47 48 # P

49 - def probability(self,orig,dest,actions):

50 raise NotImplementedError

51

52 - def evaluatePolicy(self,domPolicy,comPolicy,state,horizon=100,debug=0):

53 """Computes expected reward of following the domain- and 54 communication-level policies over the finite horizon 55 specified, from the initial state specified""" 56 # Create state of execution, which includes state of the 57 # world, as well as belief states of individual agents 58 stateList = [{'_world': state,'_prob':1.0,'_value':0.0, 59 '_actions':{},'_epoch':0}] 60 for s in stateList: 61 for agent in self.agents: 62 s[agent.name] = agent.initialStateEstimator() 63 return self.__evaluatePolicy(stateList,domPolicy,comPolicy, 64 horizon,debug)

65

66 - def __evaluatePolicy(self,stateList,domPolicy,comPolicy,horizon,debug):

67 results = {'Reward':0.0, 68 'Messages':0.0} 69 if len(stateList) == 0: 70 return results 71 for epoch in range(stateList[0]['_epoch'],horizon): 72 if debug: 73 print '=========' 74 print 'Epoch',epoch 75 print '=========' 76 self.ProjectObservations(stateList,None,[],epoch,debug) 77 results['Messages'] = results['Messages'] + \ 78 self.ExecuteCommunication(stateList, 79 comPolicy,debug) 80 results['Reward'] = results['Reward'] \ 81 + self.ExecuteActions(stateList,domPolicy, 82 debug) 83 self.ProjectWorldDynamics(stateList) 84 if debug: 85 print 'EValue:',results['Reward'] 86 print '# Msgs:',results['Messages'] 87 return results

88

89 - def __generateTeamMessages(self,state,agents,msgList):

90 """Generates all possible messages over all agents""" 91 if len(agents) == 0: 92 return msgList 93 else: 94 # Generate set of possible messages for the current agent 95 agent = agents[0] 96 for teamMsg in msgList[:]: 97 for msg in agent.legalMessages(state[agent.name]): 98 newMsg = copy.copy(teamMsg) 99 if msg: 100 newMsg[agent.name] = msg 101 msgList.append(newMsg) 102 msgList.remove(teamMsg) 103 agents.remove(agent) 104 return self.__generateTeamMessages(state,agents,msgList)

105

106 - def __generateTeamObservations(self,state,actions,agents,obsList,debug=0):

107 """Generates all possible observations over all agents""" 108 if len(agents) == 0: 109 return obsList 110 else: 111 # Find the current agent's observations 112 agent = agents[0] 113 agentObsList = [{'_type':'observation'}] 114 self.__generateAgentObservations(state,actions,agent,self.states.features.keys()[:],agentObsList,debug) 115 # Compose this list with current list of possible observations 116 for obs in obsList[:]: 117 obsList.remove(obs) 118 for agentObs in agentObsList: 119 newObs = copy.copy(obs) 120 newObs[agent.name] = agentObs 121 newObs['_prob'] = newObs['_prob'] * \ 122 self.team.individualObs(state,actions,agentObs,agent.name) 123 if newObs['_prob'] > 0.0: 124 obsList.append(newObs) 125 agents.remove(agent) 126 return self.__generateTeamObservations(state,actions,agents,obsList)

127

128 - def __generateAgentObservations(self,state,actions,agent, 129 features,observations,debug=0):

130 """Generates all possible observations for an individual agent""" 131 if len(features) == 0: 132 if debug: 133 print 'Agent',agent.name,'observes:',observations 134 return observations 135 else: 136 feature = features[0] 137 if agent.observations.has_key(feature): 138 if agent.observations[feature] == 'observable': 139 # For observable features, there is only one 140 # possible observation 141 for obs in observations: 142 obs[feature] = state[feature] 143 else: 144 # For partially observable features, we must 145 # consider all of the possible observations and 146 # their probabilities. 147 for obs in observations[:]: 148 observations.remove(obs) 149 for value in agent.observations[feature]: 150 newObs = copy.copy(obs) 151 newObs[feature] = value 152 observations.append(newObs) 153 else: 154 # Feature is unobservable 155 pass 156 features.remove(feature) 157 return self.__generateAgentObservations(state,actions, 158 agent,features, 159 observations)

160 - def computeBeliefState(self,states,agent,policy,history):

161 """Partially implemented""" 162 stateList = [] 163 for state in states: 164 stateList.append({'_world': state,'_prob':1.0,'_value':0.0, 165 '_actions':{},'_epoch':0}) 166 self.ProjectObservations(states,agent,history,0,1)

167

168 - def bestAction(self,agent,history,horizon,policyOthers):

169 choice = self.team.agents[agent].actions[0] 170 best = self.valueHistory(agent,history,horizon,choice) 171 for action in self.team.agents[agent].actions[1:]: 172 value = self.valueHistory(agent,history,horizon,action) 173 if value > best: 174 best = value 175 choice = action 176 return choice

177

178 - def valueHistory(self,agent,history,horizon,action,policyOthers):

179 value = 0.0 180 # Iterate through each state 181 state = self.states.getFirstState() 182 while state: 183 # Compute conditional probability of state given specified history 184 # and policy of other agents 185 belief = self.computeBelief(state,agent,history,policyOthers) 186 # Consider all possible team actions consistent with the specified 187 # action choice 188 for actionStr in self.team.generateAllActions({agent:action}): 189 actionTeam = self.team.decomposeActions(actionStr) 190 # Compute the probability of this state-action pair 191 prob = belief * self.probAction(actionOthers,agent,history, 192 state) 193 # Increase value by expected reward 194 value = value + prob * self.reward(state,actionTeam) 195 # If looking into future, consider possible state transitions 196 if horizon > 1: 197 pass 198 state = self.states.getNextState(state) 199 return value

200

201 - def ProjectObservations(self,states,agent=None,beliefs=[], 202 epoch=-1,debug=0):

203 # Observation phase 204 if debug: 205 print 206 print '--------' 207 print 'Observation phase:' 208 for s in states[:]: 209 if debug: 210 print '--------' 211 print 'Examining state:',strip(s['_world']) 212 # Draw observations of initial world state (under no actions) 213 observations = [{'_prob':1.0}] 214 self.__generateTeamObservations(s['_world'],s['_actions'], 215 self.agents[:],observations, 216 debug) 217 if debug: 218 print 'Possible observations:' 219 for obs in observations: 220 print '\t',strip(obs) 221 # Eliminate observations inconsistent with state 222 for belief in beliefs: 223 if belief['_type'] == 'observation' and \ 224 belief['_epoch'] == epoch: 225 totalProb = 0.0 226 break 227 else: 228 totalProb = 1.0 229 belief = None 230 if belief: 231 for obs in observations[:]: 232 for feature in obs[agent].keys(): 233 if feature[0] != '_' and \ 234 obs[agent][feature] != belief[feature]: 235 break 236 else: 237 feature = None 238 if feature: 239 # Observation is inconsistent 240 observations.remove(obs) 241 else: 242 # Observation is consistent 243 totalProb = totalProb + obs['_prob'] 244 if len(observations) == 0: 245 # The state itself is inconsistent with observations 246 for state in states: 247 if not state is s: 248 state['_prob'] = state['_prob'] / (1.0 - s['_prob']) 249 else: 250 # Update belief state based on new observations 251 if s.has_key('_parent'): 252 s['_children'] = [] 253 for obs in observations: 254 newState = copy.copy(s) 255 for a in self.agents: 256 newState[a.name] = a.preComStateEstimator(s[a.name],obs[a.name],epoch) 257 # Use normalized probability 258 newState['_prob'] = newState['_prob'] \ 259 * obs['_prob'] / totalProb 260 if s.has_key('_parent'): 261 newState['_parent'] = s 262 s['_children'].append(newState) 263 states.append(newState) 264 states.remove(s)

265

266 - def ExecuteCommunication(self,states,comPolicy,debug=0):

267 # Communication phase 268 if debug: 269 print 270 print '--------' 271 print 'Communication phase:' 272 msgCount = 0.0 273 for s in states[:]: 274 if debug: 275 print '--------' 276 print 'Examining state:',strip(s) 277 if comPolicy: 278 # Execute each agent's communication policy to 279 # generate set of messages 280 messages = [{'_type':'message'}] 281 for agent in self.agents: 282 contents = comPolicy[agent.name].execute(s[agent.name]) 283 if not contents or type(contents) is StringType: 284 contents = [contents] 285 for msg in messages[:]: 286 for content in contents: 287 newMsg = copy.copy(msg) 288 if content: 289 newMsg[agent.name] = content 290 msgCount = msgCount + s['_prob'] 291 messages.append(newMsg) 292 messages.remove(msg) 293 else: 294 # Generate all possible messages 295 messages = self.__generateTeamMessages(s,self.agents[:],[{}]) 296 if s.has_key('_parent'): 297 s['_children'] = {} 298 for msg in messages: 299 newState = copy.copy(s) 300 msg['_type'] = 'message' 301 if debug: 302 print 'Messages:',strip(msg) 303 # Update agents' beliefs based on messages exchanged 304 for agent in self.agents: 305 newState[agent.name] = agent.postComStateEstimator(s[agent.name],msg,s['_epoch']) 306 # Update value based on communication cost 307 newState['_value'] = self.rewardCom(s['_world'],msg) 308 newState['_messages'] = msg 309 if s.has_key('_parent'): 310 newState['_parent'] = s 311 s['_children'][`msg`] = newState 312 states.append(newState) 313 states.remove(s) 314 return msgCount

315

316 - def ExecuteActions(self,states,domPolicy,debug=0):

317 # Action phase 318 if debug: 319 print 320 print '--------' 321 print 'Action phase:' 322 value = 0.0 323 for s in states[:]: 324 if debug: 325 print '--------' 326 print 'Examining state:',strip(s['_world']) 327 print 'Beliefs:',strip(s) 328 # Execute each agent's domain-level policy to 329 # generate set of actions 330 s['_actions'] = {} 331 for agent in self.agents: 332 act = domPolicy[agent.name].execute(s[agent.name]) 333 if act: 334 s['_actions'][agent.name] = act 335 if debug: 336 print 'Actions:',s['_actions'] 337 # Update value based on action cost 338 s['_value'] = s['_value'] \ 339 + self.rewardAct(s['_world'],s['_actions']) 340 # Update overall value 341 value = value + s['_prob'] * s['_value'] 342 if debug: 343 print 'Reward:',s['_value'] 344 return value

345

346 - def ProjectWorldDynamics(self,states,debug=0):

347 # World Dynamics phase 348 if debug: 349 print 350 print '--------' 351 print 'World dynamics phase:' 352 for s in states[:]: 353 if debug: 354 print '--------' 355 print 'Examining state:',strip(s['_world']) 356 print 'Beliefs:',strip(s) 357 orig = self.states.state2index(s['_world']) 358 action = self.team.composeActions(s['_actions']) 359 if s.has_key('_parent'): 360 s['_children'] = {} 361 for dest in self.states.children[orig][action]: 362 destState = self.states.index2state(dest) 363 newState = copy.copy(s) 364 newState['_world'] = destState 365 newState['_epoch'] = newState['_epoch'] + 1 366 newState['_prob'] = s['_prob'] \ 367 * self.probability(s['_world'], 368 destState,action) 369 if s.has_key('_parent'): 370 newState['_parent'] = s 371 s['_children'][action] = newState 372 try: 373 del newState['_messages'] 374 except KeyError: 375 pass 376 states.append(newState) 377 states.remove(s)

378

379 - def updateBeliefs(self,world,beliefs,agent):

380 """Generates all possible new belief states for an agent""" 381 raise NotImplementedError

Source Code for Module teamwork.multiagent.COMMTDP