teamwork.policy.JIPolicy

1 ########################################################################### 2 # 11/5/2001: David V. Pynadath, USC Information Sciences Institute 3 # pynadath@isi.edu 4 # 5 # JIPolicy: generic class for a communication policy, where there is 6 # a single possible message and it refers to the 7 # achievement of some joint goal 8 # STEAMPolicy: a subclass of joint intentions policies, but following 9 # the STEAM algorithm for decision-theoretic selectivity 10 # SilentPolicy: a generic policy class that never specifies communication 11 # BestAvailablePolicy: a generic communication policy class that 12 # searches a space of candidate policies and finds the best 13 # one for a given COM-MTDP. 14 # GloballyOptimalJIPolicy: a subclass of joint intentions policies 15 # that performs a brute force search to determine the best 16 # policy of communication, in reference to a single JPG 17 # SingleMsgPolicy: a subclass of joint intentions policies that 18 # generates all possible communication behaviors for an 19 # agent with a single joint commitment (this class is no 20 # longer very useful) 21 # LocallyOptimalJIPolicy: a subclass of joint intentions policies 22 # that generates locally optimal decisions with respect to 23 # a single JPG 24 # RepeatMsgs: a policy subclass, useful as a helper to the 25 # LocallyOptimalJIPolicy class 26 ########################################################################### 27 28 import copy 29 import string 30 from types import * 31 from teamwork.policy.generic import Policy 32

33 -def generatePolicies(policySpace,actions,observations,horizon,debug=None):

34 if horizon == 0: 35 return policySpace 36 else: 37 for policy in policySpace[:]: 38 # Iterate through each policy 39 policySpace.remove(policy) 40 subspace = [policy] 41 policyObj = GenericPolicy(policy) 42 if debug: 43 print 'Expanding policy:',policyObj 44 leaves = policyObj.getNodes() 45 while len(leaves) > 0: 46 # Iterate through each leaf node 47 entry = leaves.pop() 48 if debug: 49 print '\tExpanding leaf:',entry 50 # Expand table by considering possible observation branches 51 table = [] 52 for omega in observations: 53 tableEntry = {'key':omega} 54 table.append(tableEntry) 55 # Expand table by considering possible action selections 56 newTables = [[]] 57 generateActionCombos(table,actions,newTables) 58 if debug: 59 print '\t\tNew actions:' 60 # Create partial policies with the current leaf node expanded 61 for partialPolicy in subspace[:]: 62 # Iterate through list of previously created partial 63 # policies 64 subspace.remove(partialPolicy) 65 for table in newTables: 66 # Iterate through table of possible branchpoints 67 if debug: 68 print '\t\t\tTable:',table 69 newPolicy = copy.deepcopy(partialPolicy) 70 for currentEntry in \ 71 GenericPolicy(newPolicy).getNodes(): 72 if currentEntry == entry: 73 break 74 else: 75 # This should never happen 76 print 'Unable to find entry!!!' 77 currentEntry['table'] = table 78 # for tableEntry in table: 79 # newPolicy['leaves'].insert(0,tableEntry) 80 subspace.append(newPolicy) 81 # Expansion now complete; partial policies now fully specified so 82 # add to policy space 83 for partialPolicy in subspace: 84 policySpace.append(partialPolicy) 85 if debug: 86 print '\t\t\tNew Policy:',GenericPolicy(partialPolicy) 87 print 88 del subspace 89 if horizon == 1: 90 return policySpace 91 # policy['leaves'] = policy['newleaves'] 92 # del policy['newleaves'] 93 return generatePolicies(policySpace,actions,observations, 94 horizon-1,debug)

95

96 -def generateActionCombos(table,actions,result):

97 while len(table) > 0: 98 entry = table.pop() 99 for partialTable in result[:]: 100 result.remove(partialTable) 101 for action in actions: 102 entry['action'] = action 103 newTable = copy.copy(partialTable) 104 newTable.append(copy.copy(entry)) 105 result.append(newTable) 106 return result

107

108 -class JIPolicy(Policy):

109 """generic class for a communication policy, where there is a 110 single possible message and it refers to the achievement of some 111 joint goal""" 112

113 - def __init__(self,jpg,achievedMsg,type='joint intentions'):

114 """jpg: a dictionary of conditions under which the JPG is 115 achieved. The keys are the relevant feature names; the 116 values are a list of relevant feature values.""" 117 Policy.__init__(self,[achievedMsg,None],type) 118 self.jpg = jpg 119 self.trueMsg = achievedMsg

120

121 - def execute(self,state,choices=[],debug=0):

122 for feature in self.jpg.keys(): 123 if not state[0][feature] in self.jpg[feature]: 124 break 125 else: 126 # Check whether some agent has already communicated the 127 # achievement of the JPG. 128 for belief in state: 129 if belief['_type'] == 'message': 130 for agent in belief.keys(): 131 if belief[agent] == self.trueMsg: 132 # If so, do not communicate again 133 return None 134 else: 135 # No one has communicated achievement, so we must do 136 # so now. 137 return self.trueMsg 138 # We have not achieved the JPG, so do not communicate 139 return None

140

141 -class STEAMPolicy(JIPolicy):

142 """subclass of joint intentions policies, but following the STEAM 143 algorithm for decision-theoretic selectivity"""

144 - def __init__(self,jpg,achievedMsg,gamma,costMiscoord,costComm):

145 JIPolicy.__init__(self,jpg,achievedMsg,'STEAM') 146 # Determine cost of non-communication 147 if gamma == 'high': 148 if costMiscoord == 'high': 149 costNonComm = 'high' 150 elif costMiscoord == 'medium': 151 costNonComm = 'high' 152 else: 153 costNonComm = 'medium' 154 elif gamma == 'low': 155 if costMiscoord == 'high': 156 costNonComm = 'medium' 157 elif costMiscoord == 'medium': 158 costNonComm = 'low' 159 else: 160 costNonComm = 'low' 161 # Weigh cost of non-communication against cost of 162 # communication channel 163 if costNonComm == 'high': 164 self.communicate = 1 165 elif costNonComm == 'medium': 166 if costComm == 'low': 167 self.communicate = 1 168 else: 169 self.communicate = None 170 else: 171 self.communicate = None

172

173 - def execute(self,state,choices=[],debug=0):

174 if self.communicate: 175 return JIPolicy.execute(self,state,choices,debug) 176 else: 177 return None

178

179 -class SilentPolicy(Policy):

180 """generic policy class that never specifies communication"""

181 - def __init__(self):

182 Policy.__init__(self,[None],'silent')

183

184 - def execute(self,state,choices=[],debug=0):

185 return None

186

187 -class BestAvailablePolicy(Policy):

188 """a generic communication policy class that searches a space of 189 candidate policies and finds the best one for a given COM-MTDP. 190 It allows one to specify a space of possible policies, and have 191 the constructor return the optimal policy from that space."""

192 - def __init__(self,com_mtdp,states,agent,policySpace,otherAgentsComPolicy, 193 domPolicy,horizon,debug=0):

194 self.policy = None 195 comPolicy = otherAgentsComPolicy 196 for policy in policySpace: 197 if debug > 1: 198 print '--------' 199 print `policy` 200 comPolicy[agent] = policy 201 value = 0.0 202 msgs = 0.0 203 for state in states: 204 result = com_mtdp.evaluatePolicy(domPolicy,comPolicy, 205 state,horizon) 206 value = value + result['Reward'] 207 msgs = msgs + result['Messages'] 208 if debug > 1: 209 print 'Value:',value 210 if not self.policy or value > bestValue['Reward']: 211 self.policy = policy 212 bestValue = {'Reward':value,'Messages':msgs} 213 if debug: 214 print 'Best Policy:' 215 print `self.policy` 216 self.value = bestValue

217

218 - def execute(self,state,choices=[],debug=0):

219 return self.policy(state)

220

221 -class GloballyOptimalJIPolicy(JIPolicy):

222 """subclass of joint intentions policies that performs a brute 223 force search to determine the best policy of communication, in 224 reference to a single JPG"""

225 - def __init__(self,com_mtdp,states,domPolicy,agent,horizon, 226 jpg,achievedMsg,debug=0):

227 JIPolicy.__init__(self,jpg,achievedMsg,'globally optimal') 228 self.agent = agent 229 self.domPolicy = domPolicy 230 self.mtdp = com_mtdp 231 comPolicy = {self.agent:SingleMsgPolicy(self)} 232 for agent in self.mtdp.agents: 233 if agent.name != self.agent: 234 comPolicy[agent.name] = SilentPolicy() 235 # Create state of execution, which includes state of the 236 # world, as well as belief states of individual agents 237 stateList = [] 238 for state in states: 239 stateList.append({'_world': state,'_parent':None, 240 '_prob':1.0/float(len(states)),'_epoch':0, 241 '_value':0.0,'_actions':{},'_messages':{}}) 242 for s in stateList: 243 for agent in self.mtdp.agents: 244 s[agent.name] = agent.initialStateEstimator() 245 for epoch in range(horizon): 246 if debug: 247 print '=========' 248 print 'Epoch',epoch 249 if debug: 250 print '=========' 251 print '# States:',len(stateList) 252 253 self.mtdp.ProjectObservations(stateList,None,[],epoch,debug) 254 self.mtdp.ExecuteCommunication(stateList,comPolicy,debug) 255 self.mtdp.ExecuteActions(stateList,domPolicy,debug) 256 self.mtdp.ProjectWorldDynamics(stateList) 257 258 # Construct optimal policy 259 if debug: 260 print 261 print '-------' 262 print 'Policy selection phase' 263 if debug: 264 print '-------' 265 print 'Examining Leaf Nodes:',len(stateList) 266 values = [] 267 leafNodes = stateList 268 stateList = [] 269 for index in range(len(leafNodes)): 270 print index 271 s = leafNodes[index] 272 value = s['_value'] * s['_prob'] 273 # Determine whether a message was sent to arrive 274 # at the current state 275 key = 'Communicate' 276 if not s['_parent']['_messages'].has_key(self.agent): 277 key = 'No ' + key 278 if debug: 279 print '-------' 280 print 'Examining state:',strip(s['_world']) 281 print 'Epoch:',s['_epoch'] 282 print 'Latest belief update:',s[self.agent][0] 283 print 'Message choice:',key 284 print 'Value:',value 285 # The grandparent state has the pre-communication 286 # belief state. 287 parent = s['_parent']['_parent'] 288 belief = parent[self.agent] 289 if not parent.has_key('_msgValues'): 290 parent['_msgValues'] = {'Communicate':0.0, 291 'No Communicate':0.0} 292 parent['_msgValues'][key] = parent['_msgValues'][key] \ 293 + value 294 if debug: 295 print 'Parent:',strip(s['_world']) 296 # Update values for this particular belief state 297 for entry in values: 298 if entry['Beliefs'] == belief: 299 if debug: 300 print 'Updating value...' 301 break 302 else: 303 if debug: 304 print 'Creating value...' 305 entry = {'Beliefs':belief, 306 'Communicate':0.0, 307 'No Communicate':0.0} 308 values.append(entry) 309 entry[key] = entry[key] + value 310 if debug: 311 print 'New total value:',entry[key] 312 # Move back to pre-observation state 313 if not parent in leafNodes: 314 leafNodes.append(parent) 315 del s 316 self.policy = values 317 for index in range(length(values)): 318 entry = {'Beliefs':values[index]['Beliefs']} 319 entry['Policy'] = values[index]['Communicate'] \ 320 > values[index]['No Communicate'] 321 del values[index] 322 self.policy.append(entry) 323 debug = 1 324 while len(stateList) > 0: 325 if debug: 326 print '-------' 327 print 'Starting a new ply...' 328 # Store final policy 329 for s in stateList[:]: 330 if debug: 331 print '-------' 332 print 'Examining state:',strip(s['_world']) 333 print 'Epoch:',s['_epoch'] 334 print 'Latest belief update:',s[self.agent][0] 335 print 'Values:',s['_msgValues'] 336 # Compute local value of the current state 337 value = s['_value'] 338 # Compute future value of execution from the 339 # current state 340 key = self.policy[`s[self.agent]`] 341 if debug: 342 print 'Preference:',key 343 value = value + s['_msgValues'][key] 344 value = value * s['_prob'] 345 stateList.remove(s) 346 # Move back to pre-observation state 347 s = s['_parent'] 348 if s['_parent']: 349 # Determine whether a message was sent to arrive 350 # at the current state 351 key = 'Communicate' 352 if not s['_parent']['_messages'].has_key(self.agent): 353 key = 'No ' + key 354 if debug: 355 print 'Message choice:',key 356 print 'Value:',value 357 # The grandparent state has the pre-communication 358 # belief state. 359 parent = s['_parent']['_parent'] 360 belief = parent[self.agent] 361 if not parent.has_key('_msgValues'): 362 parent['_msgValues'] = {'Communicate':0.0, 363 'No Communicate':0.0} 364 parent['_msgValues'][key] = parent['_msgValues'][key] \ 365 + value 366 if debug: 367 print 'Parent:',strip(s['_world']) 368 # Update values for this particular belief state 369 for entry in values: 370 if entry['Beliefs'] == belief: 371 if debug: 372 print 'Updating value...' 373 break 374 else: 375 if debug: 376 print 'Creating value...' 377 entry = {'Beliefs':belief, 378 'Communicate':0.0, 379 'No Communicate':0.0} 380 values.append(entry) 381 entry[key] = entry[key] + value 382 if debug: 383 print 'New total value:',entry[key] 384 # Update values for this pre-observation parent state 385 if not parent in stateList[:]: 386 stateList.append(parent) 387 del s 388 for index in range(length(values)): 389 entry = {'Beliefs':values[index]['Beliefs']} 390 entry['Policy'] = values[index]['Communicate'] \ 391 > values[index]['No Communicate'] 392 del values[index] 393 self.policy.append(entry) 394 if debug: 395 print 396 print '-------' 397 print 'Final Policy:' 398 print 'Communicate in the following belief states:' 399 for key in policy.keys(): 400 if policy[key]: 401 print '-------' 402 print key 403 print hello

404

405 - def execute(self,state,choices=[],debug=0):

406 return policy[`state`]

407

408 -class SingleMsgPolicy(JIPolicy):

409 """subclass of joint intentions policies that generates all 410 possible communication behaviors for an agent with a single joint 411 commitment (this class is no longer very useful)"""

412 - def __init__(self,policy):

413 JIPolicy.__init__(self,policy.jpg,policy.trueMsg) 414 self.agent = policy.agent

415

416 - def execute(self,state,choices=[],debug=0):

417 """Generates possible messages for this agent""" 418 for belief in state: 419 if belief['_type'] == 'message': 420 if belief.has_key(self.agent): 421 return [None] 422 else: 423 return [None,self.trueMsg]

424 425

426 -class LocallyOptimalJIPolicy(JIPolicy):

427 """subclass of joint intentions policies that generates locally 428 optimal decisions with respect to a single JPG"""

429 - def __init__(self,com_mtdp,states,comPolicy,domPolicy,agent,horizon, 430 jpg,achievedMsg,debug=0):

431 JIPolicy.__init__(self,jpg,achievedMsg,'locally optimal') 432 self.horizon = horizon 433 self.agent = agent 434 self.comPolicy = comPolicy 435 self.domPolicy = domPolicy 436 self.mtdp = com_mtdp 437 self.initial = states 438 self.debug = debug

439

440 - def execute(self,state,choices=[],debug=-1):

441 if debug < 0: 442 debug = self.debug 443 if not JIPolicy.execute(self,state,choices): 444 # If we haven't achieved JPG, then don't communicate 445 return None 446 # Determine what epoch we're in 447 currentEpoch = state[0]['_epoch'] 448 if debug: 449 print 'Executing at time:',currentEpoch 450 print 'Beliefs:',state 451 # Create state of execution, which includes state of the 452 # world, as well as belief states of individual agents 453 stateList = [] 454 for s in self.initial: 455 stateList.append({'_world': s,'_epoch':0, 456 '_prob':1.0/float(len(self.initial)), 457 '_value':0.0,'_actions':{}}) 458 for s in stateList: 459 for agent in self.mtdp.agents: 460 s[agent.name] = agent.initialStateEstimator() 461 self.__generateConsistentStates(state,stateList,currentEpoch,debug) 462 if debug: 463 print 'Consistent states:',len(stateList) 464 # Communication phase 465 if debug: 466 print 467 print '--------' 468 print 'Communication phase:' 469 value = {'Communicate': 0.0, 'No Communicate':0.0} 470 states = {'Communicate':[],'No Communicate':[]} 471 for s in stateList[:]: 472 if debug: 473 print '--------' 474 print 'Examining state:',strip(s['_world']) 475 print 'Beliefs:',strip(s[self.agent]) 476 # Generate all possible messages 477 messages = self.mtdp.__generateTeamMessages(s,self.mtdp.agents[:], 478 [{}]) 479 for msg in messages: 480 newState = copy.copy(s) 481 msg['_type'] = 'message' 482 if debug: 483 print 'Messages:',msg 484 # Update agents' beliefs based on messages exchanged 485 for agent in self.mtdp.agents: 486 newState[agent.name] = agent.postComStateEstimator(s[agent.name],msg) 487 # Update value based on communication cost 488 newState['_value'] = self.mtdp.rewardCom(s['_world'],msg) 489 newState['_parent'] = s 490 if msg.has_key(self.agent): 491 if msg[self.agent] == self.trueMsg: 492 states['Communicate'].append(newState) 493 else: 494 states['No Communicate'].append(newState) 495 stateList.remove(s) 496 497 for choice in ['Communicate','No Communicate']: 498 stateList = states[choice] 499 if debug: 500 print '+++++++' 501 print 'Evaluating Policy:',choice 502 print '# States:',len(stateList) 503 print '+++++++' 504 # Action phase 505 value[choice] = value[choice] + \ 506 self.mtdp.ExecuteActions(stateList,self.domPolicy, 507 debug) 508 self.mtdp.ProjectWorldDynamics(stateList) 509 result = self.mtdp._evaluatePolicy(stateList,self.domPolicy, 510 self.comPolicy, 511 self.horizon,debug) 512 value[choice] = value[choice] + result['Reward'] 513 514 # Construct optimal policy 515 if debug: 516 print 517 print '-------' 518 print 'Policy selection phase' 519 print '-------' 520 for key in value.keys(): 521 print key+':',value[key] 522 if value['Communicate'] > value['No Communicate']: 523 #print strip(state[0]),'-> Communicate' 524 return JIPolicy.execute(self,state,choices) 525 else: 526 #print strip(state[0]),'-> No Communicate' 527 return None

528

529 - def __generateConsistentStates(self,beliefs,stateList,currentTime,debug=0):

530 for epoch in range(currentTime+1): 531 if debug: 532 print '=========' 533 print 'Epoch',epoch 534 print '=========' 535 self.mtdp.ProjectObservations(stateList,self.agent,beliefs, 536 epoch,debug) 537 if epoch == currentTime: 538 break 539 comPolicy = {} 540 for agent in self.mtdp.agents: 541 comPolicy[agent.name] = RepeatMsgs(agent.name,beliefs,epoch) 542 self.mtdp.ExecuteCommunication(stateList,comPolicy,debug) 543 self.mtdp.ExecuteActions(stateList,self.domPolicy,debug) 544 self.mtdp.ProjectWorldDynamics(stateList)

545

546 -class RepeatMsgs(Policy):

547 """policy subclass, useful as a helper to the 548 LocallyOptimalJIPolicy class"""

549 - def __init__(self,agent,beliefs,epoch):

550 self.msg = None 551 # Find messages sent in past 552 for msg in beliefs: 553 if msg['_type'] == 'message' and \ 554 msg['_epoch'] == epoch: 555 if msg.has_key(agent): 556 self.msg = msg[agent] 557 break

558

559 - def execute(self,state,choices=[],debug=0):

560 return self.msg

561 562 563 if __name__=='__main__': 564 initialSpace = [{}] 565 # for policy in initialSpace: 566 # policy['leaves'] = [policy] 567 568 space = generatePolicies(initialSpace,['left','right'], 569 [{'Tiger':'l'}],1) 570 space = generatePolicies(space,['left','right'], 571 [{'Tiger':'l'},{'Tiger':'r'}],2) 572 for policy in space: 573 policy = GenericPolicy(policy) 574 print policy 575 print '--------------------------' 576 print '# Policies:',len(space) 577

Source Code for Module teamwork.policy.JIPolicy