Package teamwork :: Package examples :: Module TigerScenario
[hide private]
[frames] | no frames]

Source Code for Module teamwork.examples.TigerScenario

  1  """PsychSim specification of the multiagent tiger problem 
  2  """ 
  3  import sys 
  4  import time 
  5   
  6  from teamwork.agent.AgentClasses import classHierarchy 
  7  from teamwork.math.KeyedMatrix import * 
  8  from teamwork.math.probability import Distribution 
  9  from teamwork.math.ProbabilityTree import * 
 10  from teamwork.action.PsychActions import Action 
 11  from teamwork.policy.pwlTable import PWLTable,getProbRep 
 12  from teamwork.policy.pwlPolicy import PWLPolicy 
 13  from teamwork.policy.ObservationPolicy import ObservationPolicy,solve 
 14   
 15  RIGHT = 1. 
 16  LEFT = 0. 
 17   
 18  LISTENCOST = -1. 
 19  DAMAGE = -50. 
 20  PRIZE = 20. 
 21   
22 -def resetPosition():
23 """Define the dynamics of how the location of the tiger is reset if either door is opened 24 """ 25 leftTree = ProbabilityTree(SetToConstantMatrix(source='position', 26 value=LEFT)) 27 rightTree = ProbabilityTree(SetToConstantMatrix(source='position', 28 value=RIGHT)) 29 tree = ProbabilityTree() 30 tree.branch(Distribution({leftTree:0.5,rightTree:0.5})) 31 return tree
32
33 -def stayPosition():
34 """Define the dynamics of how the location of the tiger stays the same 35 """ 36 return ProbabilityTree(IdentityMatrix('position'))
37 38 # Define the tiger agent 39 classHierarchy['Tiger'] = { 40 'parent': ['Entity'], 41 'state':{'position': Distribution({RIGHT:0.5,LEFT:0.5}) 42 }, 43 'depth':0, 44 'actions':{'type':None}, 45 'dynamics':{'position':{'OpenRight':resetPosition(), 46 'OpenLeft':resetPosition(), 47 'Listen':stayPosition(), 48 }, 49 }, 50 } 51
52 -def openSame(side):
53 """Define the payoff when opening the same door 54 """ 55 row = ThresholdRow(keys=[{'entity':'Tiger','feature':'position'}]) 56 goodTree = ProbabilityTree(PRIZE) 57 badTree = ProbabilityTree(value=DAMAGE) 58 if side > 0.5: 59 return createBranchTree(KeyedPlane(row,0.5), 60 goodTree,badTree) 61 else: 62 return createBranchTree(KeyedPlane(row,0.5), 63 badTree,goodTree)
64
65 -def openDifferent():
66 """Define the payoff when opening different doors 67 """ 68 return ProbabilityTree(2.*DAMAGE)
69
70 -def listen():
71 """Define the payoff when both listen 72 """ 73 return ProbabilityTree(2.*LISTENCOST)
74
75 -def openListen(side):
76 """Define the payoff when one opens and the other listens 77 """ 78 row = ThresholdRow(keys=[StateKey({'entity':'Tiger', 79 'feature':'position'})]) 80 goodTree = ProbabilityTree(PRIZE/2.+LISTENCOST) 81 badTree = ProbabilityTree(2.*DAMAGE+LISTENCOST) 82 if side > 0.5: 83 return createBranchTree(KeyedPlane(row,0.5), 84 goodTree,badTree) 85 else: 86 return createBranchTree(KeyedPlane(row,0.5), 87 badTree,goodTree)
88 89 rewardDict = {'OpenRightOpenRight':openSame(RIGHT), 90 'OpenRightListen':openListen(RIGHT), 91 'OpenRightOpenLeft':openDifferent(), 92 'ListenOpenRight':openListen(RIGHT), 93 'ListenListen':listen(), 94 'ListenOpenLeft':openListen(LEFT), 95 'OpenLeftOpenRight':openDifferent(), 96 'OpenLeftListen':openListen(LEFT), 97 'OpenLeftOpenLeft':openSame(LEFT), 98 } 99 keyList = [ConstantKey(),StateKey({'entity':'Tiger','feature':'position'})] 100 for tree in rewardDict.values(): 101 tree.fill(keyList) 102 tree.freeze() 103 104 Omega = {'left':ObservationKey({'type':'heard left'}), 105 'right':ObservationKey({'type':'heard right'}), 106 }
107 -def openObs():
108 """Define the observations when opening a door 109 """ 110 tRow = KeyedVector({keyConstant:1.}) 111 fRow = KeyedVector({keyConstant:0.}) 112 leftmatrix = KeyedMatrix() 113 leftmatrix[Omega['left']] = copy.copy(tRow) 114 leftmatrix[Omega['right']] = copy.copy(fRow) 115 leftmatrix[keyConstant] = UnchangedRow(sourceKey=keyConstant) 116 rightmatrix = KeyedMatrix() 117 rightmatrix[Omega['left']] = copy.copy(fRow) 118 rightmatrix[Omega['right']] = copy.copy(tRow) 119 rightmatrix[keyConstant] = UnchangedRow(sourceKey=keyConstant) 120 dist = Distribution({ProbabilityTree(leftmatrix):0.5, 121 ProbabilityTree(rightmatrix):0.5}) 122 tree = ProbabilityTree() 123 tree.branch(dist) 124 return tree
125
126 -def listenObs():
127 """Define the observations when listening 128 """ 129 tRow = KeyedVector({keyConstant:1.}) 130 fRow = KeyedVector({keyConstant:0.}) 131 leftmatrix = KeyedMatrix() 132 leftmatrix[Omega['left']] = copy.copy(tRow) 133 leftmatrix[Omega['right']] = copy.copy(fRow) 134 leftmatrix[keyConstant] = UnchangedRow(sourceKey=keyConstant) 135 rightmatrix = KeyedMatrix() 136 rightmatrix[Omega['left']] = copy.copy(fRow) 137 rightmatrix[Omega['right']] = copy.copy(tRow) 138 rightmatrix[keyConstant] = UnchangedRow(sourceKey=keyConstant) 139 leftDist = Distribution({ProbabilityTree(leftmatrix):0.85, 140 ProbabilityTree(rightmatrix):0.15}) 141 leftTree = ProbabilityTree() 142 leftTree.branch(leftDist) 143 rightDist = Distribution({ProbabilityTree(leftmatrix):0.15, 144 ProbabilityTree(rightmatrix):0.85}) 145 rightTree = ProbabilityTree() 146 rightTree.branch(rightDist) 147 row = ThresholdRow(keys=[{'entity':'Tiger','feature':'position'}]) 148 if LEFT > 0.5: 149 return createBranchTree(KeyedPlane(row,0.5), 150 rightTree,leftTree) 151 else: 152 return createBranchTree(KeyedPlane(row,0.5), 153 leftTree,rightTree)
154 155 observationDict = {'OpenRightOpenRight':openObs(), 156 'OpenRightListen':openObs(), 157 'OpenRightOpenLeft':openObs(), 158 'ListenOpenRight':openObs(), 159 'ListenListen':listenObs(), 160 'ListenOpenLeft':openObs(), 161 'OpenLeftOpenRight':openObs(), 162 'OpenLeftListen':openObs(), 163 'OpenLeftOpenLeft':openObs(), 164 } 165 166 # Define the Opener/Listener agent 167 classHierarchy['Dude'] = { 168 'parent': ['Entity'], 169 'goals':[], 170 'beliefs':{'Tiger':{'position':Distribution({RIGHT:0.75,LEFT:0.25}), 171 }, 172 }, 173 'actions':{'type':'XOR', 174 'key':'type', 175 'values':[{'type':'literal','value':'OpenRight'}, 176 {'type':'literal','value':'OpenLeft'}, 177 {'type':'literal','value':'Listen'}, 178 ], 179 }, 180 'observations':{Omega['left']:{}, 181 Omega['right']:{}, 182 }, 183 } 184
185 -def formatState(distribution):
186 content = '' 187 key = StateKey({'entity':'Tiger', 188 'feature':'position'}) 189 marginal = distribution.getMarginal(key) 190 for side,prob in marginal.items(): 191 if side == RIGHT: 192 content += '\nRight: %5.3f' % (prob) 193 else: 194 content += '\nLeft: %5.3f' % (prob) 195 return content[1:]
196
197 -def exhaustive(scenario,transition,observations,reward, 198 policyHorizon,lookaheadHorizon,timings):
199 """ 200 @warning: pretty specific to tiger scenario 201 """ 202 policies = [] 203 start = time.time() 204 for t in range(policyHorizon+1): 205 policies.append({}) 206 # Compute number of entries in the policy 207 size = pow(2,t) 208 # Construct initial beliefs 209 beliefs = {} 210 for agent in filter(lambda a: len(a.actions.getOptions())>1, 211 scenario.members()): 212 beliefs[agent.name] = [] 213 best = {'policy':None,'value':None} 214 worlds = state.keys() 215 policies[-1]['Player 1'] = 0 216 while policies[-1]['Player 1'] < pow(2,size): 217 policies[-1]['Player 2'] = policies[-1]['Player 1'] 218 while policies[-1]['Player 2'] < pow(2,size): 219 value = EV(scenario,transition,observations,reward, 220 lookaheadHorizon) 221 if best['policy'] is None or value > best['value']: 222 best['policy'] = copy.copy(policies[-1]) 223 best['value'] = value 224 policies[-1]['Player 2'] += 1 225 policies[-1]['Player 1'] += 1 226 policies[-1] = best['policy'] 227 timings.append(time.time()-start) 228 return policies
229
230 -def executeObsPolicy(agent,beliefs):
231 """ 232 @type policies: dict 233 @type beliefs: L{Key}[] 234 """ 235 horizon = min(len(beliefs),len(agent.policy)+1) 236 try: 237 policy = agent.policy[len(beliefs)] 238 except IndexError: 239 policy = agent.policy[-1] 240 relevant = beliefs 241 # Compute which entry of policy is applicable 242 index = 0 243 for t in range(len(relevant)): 244 if relevant[t] == Omega['left']: 245 index += pow(2,t) 246 # Access given entry of policy 247 entry = (policy/pow(2,index))%2 248 # Determine appropriate action 249 for option in agent.actions.getOptions(): 250 if entry == 0: 251 if option[0]['type'] == 'Listen': 252 return option 253 elif len(beliefs) == 0: 254 # Open randomly 255 if option[0]['type'] != 'Listen': 256 return option 257 elif beliefs[-1] == Omega['left']: 258 if option[0]['type'] == 'OpenRight': 259 return option 260 elif beliefs[-1] == Omega['right']: 261 if option[0]['type'] == 'OpenLeft': 262 return option 263 else: 264 raise UserWarning
265
266 -def printExhaustivePolicy(agent,policies,horizon,beliefs=None):
267 if beliefs is None: 268 beliefs = [] 269 if len(beliefs) == horizon: 270 print executeObsPolicy(agent,policies,beliefs),'\t', 271 print ','.join(map(lambda o:o['type'].split()[-1],beliefs)) 272 else: 273 for omega in Omega.values(): 274 printExhaustivePolicy(agent,policies,horizon,beliefs+[omega])
275
276 -def tagActions(value,option):
277 policy = KeyedTree() 278 if value.isLeaf(): 279 policy.makeLeaf({'value':value.getValue(), 280 'option':option}) 281 else: 282 fValue,tValue = value.getValue() 283 fTree = tagActions(fValue,option) 284 tTree = tagActions(tValue,option) 285 policy.branch(value.split,fTree,tTree) 286 return policy
287
288 -def mergeAction(policy,value,option,comparisons=None,conditions=[]):
289 if comparisons == None: 290 comparisons = {} 291 if not policy.isLeaf(): 292 fTree,tTree = policy.getValue() 293 fTree = mergeAction(fTree,value,option,comparisons, 294 conditions+[(policy.split,False)]) 295 tTree = mergeAction(tTree,value,option,comparisons, 296 conditions+[(policy.split,True)]) 297 result = KeyedTree() 298 result.branch(policy.split,fTree,tTree,pruneF=False,pruneT=False) 299 return result 300 elif value.isLeaf(): 301 # Identify conditions under which this new option is preferred 302 old = policy.getValue() 303 new = {'value':value.getValue(),'option':option} 304 weights = new['value']-old['value'] 305 ## weights *= 1./min(map(abs,weights.getArray())) # normalize weights 306 split = [KeyedPlane(weights,0.)] 307 for plane,truth in conditions: 308 split = comparePlaneSets(split,plane,truth,comparisons, 309 negative=False) 310 if isinstance(split,bool): 311 if split: 312 # Guaranteed True 313 return KeyedTree(new) 314 else: 315 # Guaranteed False 316 return KeyedTree(old) 317 # Either option can be preferred so we need a branch 318 result = KeyedTree() 319 result.branch(split,old,new,pruneF=False,pruneT=False) 320 return result 321 else: 322 fValue,tValue = value.getValue() 323 split = value.split 324 for plane,truth in conditions: 325 split = comparePlaneSets(split,plane,truth,comparisons, 326 negative=False) 327 if isinstance(split,bool): 328 if split: 329 # Guaranteed True 330 result = mergeAction(policy,tValue,option,comparisons,conditions) 331 return result 332 else: 333 # Guaranteed False 334 result = mergeAction(policy,fValue,option,comparisons,conditivons) 335 return result 336 fTree = mergeAction(policy,fValue,option,comparisons, 337 conditions+[(split,False)]) 338 tTree = mergeAction(policy,tValue,option,comparisons, 339 conditions+[(split,True)]) 340 result = KeyedTree() 341 result.branch(split,fTree,tTree,pruneF=False,pruneT=False) 342 return result
343
344 -def mergePolicy(agent,values):
345 comparisons = {} 346 for index in range(len(agent.actions.getOptions())): 347 option = agent.actions.getOptions()[index] 348 value = values[str(option)] 349 if index == 0: 350 policy = tagActions(value,option) 351 else: 352 policy = mergeAction(policy,value,option,comparisons) 353 for node in policy.leafNodes(): 354 node.makeLeaf(node.getValue()['option']) 355 policy.prune(negative=False) 356 policy.removeTautologies(negative=False) 357 return policy
358
359 -def printObsPolicy(agent,horizon,beliefs=None,observations=None):
360 if beliefs is None: 361 beliefs = agent.beliefs 362 if observations is None: 363 observations = [] 364 action,exp = agent.policy.execute(beliefs,depth=horizon) 365 history = ''.join(map(lambda o: str(o)[6].capitalize(),observations)) 366 print '%-8s' % (history),action[0]['type'] 367 if horizon > 0: 368 for omega in Omega.values(): 369 new = agent.stateEstimator(beliefs=beliefs, 370 actions=action, 371 observation=omega) 372 printObsPolicy(agent,horizon-1,new,observations+[omega])
373
374 -def EV(scenario,transition,observations,reward,horizon, 375 debug=False,behaviors=None):
376 """Computes the EV of a set of policies through exhaustive simulation""" 377 worlds = scenario.state.keys() 378 current = [] 379 node = {'probability':0.5,'state':copy.copy(scenario.state)} 380 node['state'][worlds[0]] = 1. 381 node['state'][worlds[1]] = 0. 382 for agent in filter(lambda a: len(a.actions.getOptions()) > 1, 383 scenario.members()): 384 node['observations %s' % (agent.name)] = [] 385 node[agent.name] = agent.beliefs 386 current.append(node) 387 node = {'probability':0.5,'state':copy.copy(scenario.state)} 388 node['state'][worlds[0]] = 0. 389 node['state'][worlds[1]] = 1. 390 for agent in filter(lambda a: len(a.actions.getOptions()) > 1, 391 scenario.members()): 392 node['observations %s' % (agent.name)] = [] 393 node[agent.name] = agent.beliefs 394 current.append(node) 395 return simulate(scenario,current,transition,observations,reward, 396 horizon,debug,behaviors)
397
398 -def simulate(scenario,current,transition,observations,reward,horizon, 399 debug=False,behaviors=None):
400 """ 401 @param current: list of nodes to expand 402 """ 403 if debug: 404 print 'Horizon:',horizon 405 start = time.time() 406 sys.stderr.write('%d to go (%d nodes)\n' % (horizon,len(current))) 407 value = 0. 408 for node in current: 409 if debug: 410 print '%s %f' % (node['state'].getArray(),node['probability']) 411 # Determine the behavior 412 actions = {} 413 for agent in filter(lambda a: len(a.actions.getOptions()) > 1, 414 scenario.members()): 415 if isinstance(agent.policy,int): 416 # Observation-based policy 417 actions[agent.name] = executeObsPolicy(agent,node[agent.name]) 418 elif isinstance(agent.policy,PWLPolicy): 419 # PWL policy 420 actions[agent.name] = agent.policy.execute(node[agent.name], 421 depth=horizon)[0] 422 else: 423 # More general observation-based policy 424 actions[agent.name] = agent.policy.execute(node['observations %s' % (agent.name)]) 425 if debug: 426 print ','.join(map(str,node['observations %s' % (agent.name)])), 427 if node.has_key(agent.name): 428 print node[agent.name].getArray(), 429 print '-> %s' % (str(actions[agent.name])) 430 node['action'] = actions 431 if isinstance(behaviors,dict): 432 for agent in behaviors.keys(): 433 behaviors[agent].append((node,actions[agent])) 434 node['key'] = ' '.join(map(str,actions.values())) 435 # Compute reward 436 r = reward[node['key']]*node['state'] 437 if debug: 438 print node['key'] 439 print node['state'].getArray() 440 print 'R =',r 441 value += node['probability']*r 442 if horizon == 0: 443 return value 444 # Project next set of nodes 445 projection = [] 446 for node in current: 447 # Update state 448 dist = transition[node['key']]*node['state'] 449 worlds = dist.keys() 450 combinations = [] 451 for world in range(len(dist)): 452 new = copy.copy(node['state']) 453 new[worlds[world]] = 1. 454 for other in range(len(dist)): 455 if other != world: 456 new[worlds[other]] = 0. 457 prob = node['probability']*dist[worlds[world]] 458 if prob > 0.: 459 combo = {'probability':prob,'state':new} 460 for agent in filter(lambda a: len(a.actions.getOptions())>1, 461 scenario.members()): 462 key = 'observations %s' % (agent.name) 463 combo[key] = node[key][:] 464 combinations.append(combo) 465 for agent in filter(lambda a: len(a.actions.getOptions())>1, 466 scenario.members()): 467 next = [] 468 for old in combinations: 469 for omega in Omega.values(): 470 # Generate subsequent belief state possibilities 471 action = node['action'][agent.name] 472 prob = observations[node['key']][omega]*old['state'] 473 new = copy.deepcopy(old) 474 new['observations %s' % (agent.name)].append(omega) 475 new['probability'] *= prob 476 if new['probability'] > 0.: 477 if isinstance(agent.policy,PWLPolicy): 478 belief = agent.stateEstimator(beliefs=node[agent.name], 479 actions=action, 480 observation=omega) 481 new[agent.name] = belief 482 next.append(new) 483 combinations = next 484 projection += combinations 485 # Merge matching nodes 486 index = 1 487 while index < len(projection): 488 node = projection[index] 489 for other in projection[:index]: 490 for key in filter(lambda k: k != 'probability',node.keys()): 491 if key[:12] == 'observations': 492 if node[key] != other[key]: 493 # Mismatch 494 break 495 elif sum(map(abs,(node[key] - other[key]).getArray())) > epsilon: 496 # Mismatch 497 break 498 else: 499 # Matching node 500 other['probability'] += node['probability'] 501 del projection[index] 502 break 503 else: 504 # Unique node 505 index += 1 506 total = 0. 507 for node in projection: 508 total += node['probability'] 509 assert abs(total-1.) < epsilon, 'Prob = %f' % (total) 510 if debug: 511 sys.stderr.write('\t%d sec\n%f\n' % (time.time()-start,value)) 512 return value + simulate(scenario,projection,transition,observations, 513 reward,horizon-1,debug,behaviors)
514 515
516 -def setupTigers():
517 import bz2 518 import copy 519 import random 520 # Load generic models 521 from teamwork.multiagent.GenericSociety import GenericSociety 522 society = GenericSociety() 523 try: 524 f = bz2.BZ2File('tiger.soc','r') 525 data = f.read() 526 f.close() 527 from xml.dom import minidom 528 doc = minidom.parseString(data) 529 society.parse(doc.documentElement) 530 # print 'Read generic models' 531 except IOError: 532 society.importDict(classHierarchy) 533 society.save('tiger.soc') 534 print 'Saved generic models' 535 # Instantiate scenario 536 from teamwork.agent.Entities import PsychEntity 537 from teamwork.multiagent.PsychAgents import PsychAgents 538 # from teamwork.dynamics.pwlDynamics import * 539 agents = [] 540 agents.append(society.instantiate('Tiger','Tiger',PsychEntity)) 541 agents.append(society.instantiate('Dude','Player 1',PsychEntity)) 542 agents.append(society.instantiate('Dude','Player 2',PsychEntity)) 543 full = PsychAgents(agents) 544 full.applyDefaults() 545 546 # Make into PWL agents 547 from teamwork.multiagent.pwlSimulation import PWLSimulation 548 549 scenario = PWLSimulation(full) 550 # print 'Created scenario' 551 # Fill in trees 552 keyList = scenario.state.domain()[0].keys() 553 keyList.sort() 554 state = scenario.getState() 555 for tree in rewardDict.values(): 556 tree.fill(keyList) 557 tree.freeze() 558 for key,tree in observationDict.items(): 559 for plane in tree.branches().values(): 560 if not isinstance(plane,Distribution): 561 plane.weights.fill(keyList) 562 for matrix in tree.leaves(): 563 matrix.addColumns(keyList) 564 tree.freeze() 565 # Initialize some shorthand notation 566 actions = {} 567 for act1 in scenario['Player 1'].actions.getOptions(): 568 if act1[0]['type'] == 'Listen': 569 LISTEN1 = act1 570 elif act1[0]['type'] == 'OpenRight': 571 RIGHT1 = act1 572 elif act1[0]['type'] == 'OpenLeft': 573 LEFT1 = act1 574 else: 575 raise UserWarning,'Unknown action: %s' % (act1[0]['type']) 576 for act2 in scenario['Player 2'].actions.getOptions(): 577 if act2[0]['type'] == 'Listen': 578 LISTEN2 = act2 579 elif act2[0]['type'] == 'OpenRight': 580 RIGHT2 = act2 581 elif act2[0]['type'] == 'OpenLeft': 582 LEFT2 = act2 583 else: 584 raise UserWarning,'Unknown action: %s' % (act2[0]['type']) 585 for act1 in scenario['Player 1'].actions.getOptions(): 586 actions['Player 1'] = act1 587 for act2 in scenario['Player 2'].actions.getOptions(): 588 actions['Player 2'] = act2 589 actionKey = ' '.join(map(str,actions.values())) 590 if act1[0]['type'] == 'Listen' and act2[0]['type'] == 'Listen': 591 dynamics = full.getDynamics({'Player 1':act1}) 592 elif act1[0]['type'] != 'Listen': 593 dynamics = full.getDynamics({'Player 1':act1}) 594 else: 595 dynamics = full.getDynamics({'Player 2':act2}) 596 tree = dynamics['state'].getTree() 597 tree.unfreeze() 598 tree.fill(keyList) 599 tree.freeze() 600 dynamics['state'].args['tree'] = tree 601 full.dynamics[actionKey] = dynamics 602 603 start = time.time() 604 # Find reachable worlds at 0-level 605 worlds,lookup = full.generateWorlds() 606 for name in ['Player 1','Player 2']: 607 scenario[name].beliefs = KeyedVector() 608 for key,world in worlds.items(): 609 scenario[name].beliefs[key] = scenario.state[world] 610 state = KeyedVector() 611 for key,world in worlds.items(): 612 state[key] = scenario.state[world] 613 scenario.state = state 614 transition = full.getDynamicsMatrix(worlds,lookup) 615 reward = {} 616 observations = {} 617 for actions in full.generateActions(): 618 actionKey = ' '.join(map(str,actions.values())) 619 # Transform reward function into matrix representation 620 tree = rewardDict[actions.values()[0][0]['type']+\ 621 actions.values()[1][0]['type']] 622 vector = KeyedVector() 623 for key,world in worlds.items(): 624 vector[key] = tree[world] 625 vector.freeze() 626 reward[actionKey] = vector 627 # Transform observation probability into matrix representation 628 tree = observationDict[actions.values()[0][0]['type']+\ 629 actions.values()[1][0]['type']] 630 matrix = KeyedMatrix() 631 for colKey,world in worlds.items(): 632 new = tree[world]*world 633 for vector,prob in new.items(): 634 for omega in Omega.values(): 635 if vector[omega] > 0.5: 636 matrix.set(omega,colKey,prob) 637 matrix.freeze() 638 observations[actionKey] = matrix 639 print observations 640 blah 641 return scenario,full,transition,reward,observations
642 643 if __name__ == '__main__': 644 scenario,full,transition,reward,observations = setupTigers() 645 state = scenario.state 646 timings = [] 647 # Seed policies with a very naive one --- best joint action 648 best = {'key':None,'value':None} 649 for key,vector in reward.items(): 650 value = vector*state 651 if best['key'] is None or value > best['value']: 652 best['key'] = key 653 best['value'] = value 654 for actions in full.generateActions(): 655 if best['key'] == ' '.join(map(str,actions.values())): 656 best['action'] = actions 657 break 658 else: 659 raise UserWarning,'Unknown joint action: %s' % (best['key']) 660 # Set up null policy 661 for name,option in best['action'].items(): 662 scenario[name].policy = PWLPolicy(scenario[name]) 663 table = PWLTable() 664 table.rules = {0:option} 665 table.values = {0:{}} 666 actions = copy.copy(best['action']) 667 for alternative in scenario[name].actions.getOptions(): 668 actions[name] = alternative 669 actionKey = ' '.join(map(str,actions.values())) 670 value = reward[actionKey] 671 table.values[0][str(alternative)] = value 672 scenario[name].policy.tables.append([table]) 673 # Solve n-level policy for various horizons 674 agent = scenario['Player 1'] 675 other = scenario['Player 2'] 676 depth = 0 677 maxDepth = 1 678 horizon = 5 679 evalHorizon = 4 # max(5,horizon) 680 debug = False 681 while depth < maxDepth: 682 # R 683 R = other.policy.getTable() 684 for index in range(len(R)): 685 yrAction = other.policy.getTable().rules[index] 686 R.values[index].clear() 687 for myAction in agent.actions.getOptions(): 688 actions = {agent.name:myAction, 689 other.name:yrAction} 690 actionKey = ' '.join(map(str,actions.values())) 691 R.values[index][str(myAction)] = copy.copy(reward[actionKey]) 692 # SE 693 agent.setEstimator(transition,observations) 694 other.setEstimator(transition,observations) 695 for t in range(horizon): 696 evalHorizon = t 697 if debug: 698 print 699 print 'Horizon:',t 700 start = time.time() 701 agent.policy.project(R,depth=depth+1,debug=False) 702 timings.append(time.time()-start) 703 # Evaluate new policy 704 behaviors = None # {agent.name:[]} 705 if True: 706 # Print out policy nicely 707 table = agent.policy.tables[depth+1][t] 708 rules = table.rules.keys() 709 rules.sort() 710 last = 'OpenLeft' 711 for rule in rules: 712 factors = table.index2factored(rule) 713 action = table.rules[rule][0]['type'] 714 if action != last: 715 try: 716 attr = factors.index(0) - 1 717 except ValueError: 718 attr = len(factors) - 1 719 if action == 'Listen': 720 if attr < 0: 721 # Always Listen 722 print action 723 break 724 else: 725 print getProbRep(table.attributes[attr][0],0),last 726 elif action == 'OpenRight': 727 print getProbRep(table.attributes[attr][0],1),action 728 break 729 last = action 730 printObsPolicy(agent,horizon) 731 # Compute EV of this policy 732 total = EV(scenario,transition,observations,reward,evalHorizon, 733 False,behaviors) 734 valueAsym = total/float(evalHorizon+1) 735 if debug: 736 print 'EV =',total/float(evalHorizon+1) 737 # Compute EV if policies are identical 738 old = other.policy 739 other.policy = PWLPolicy(other) 740 for policy in agent.policy.tables: 741 tableList = [] 742 for table in policy: 743 new = PWLTable() 744 new.attributes = table.attributes[:] 745 new.rules.clear() 746 new.rules.update(table.rules) 747 for rule,RHS in new.rules.items(): 748 for option in other.actions.getOptions(): 749 if option[0]['type'] == RHS[0]['type']: 750 break 751 else: 752 raise UserWarning 753 new.rules[rule] = option 754 tableList.append(new) 755 other.policy.tables.append(tableList) 756 total = EV(scenario,transition,observations,reward,evalHorizon, 757 False,behaviors) 758 valueSym = total/float(evalHorizon+1) 759 if debug: 760 print 'EV =',valueSym 761 other.policy = old 762 print '%d,%f,%f,%f' % (t,valueAsym,valueSym,timings[-1]) 763 sys.stdout.flush() 764 if behaviors: 765 unique = [] 766 for node,action in behaviors[agent.name]: 767 for others in unique: 768 if node['state'] == others[0]['state'] and \ 769 node[agent.name] == others[0][agent.name]: 770 others[0]['probability'] += node['probability'] 771 break 772 else: 773 unique.append((node,action)) 774 total = 0. 775 for node,action in unique: 776 prob = node['probability']/float(horizon+1) 777 total += prob 778 if action[0]['type'] != 'Listen': 779 print 'Real:',node['state'] 780 print 'Belief:',node[agent.name] 781 print 'Probability:',prob 782 print action 783 assert abs(total-1.) < epsilon 784 depth += 1 785 if debug: 786 print '\nTimes:',timings 787