teamwork.policy.policyTable

20 """Super-nifty class for representing policies as tables and using policy iteration to optimize them. 21 @cvar seedMethod: the method to use to generate the initial RHS values to seed policy iteration. There are currently two types supported: 22 - random: the RHS values are randomly generated 23 - greedy: the initial RHS value provides the best immediate expected reward (i.e., over a one-step horizon) 24 The default is I{greedy}. 25 @type seedMethod: str 26 @ivar rules: the list of RHS actions 27 @type rules: L{Action}[][] 28 """ 29 seedMethod = 'greedy' 30

31 - def __init__(self,entity,actions=[],horizon=1):

32 """Same arguments used by constructor for L{LookaheadPolicy} superclass""" 33 LookaheadPolicy.__init__(self,entity=entity,actions=actions, 34 horizon=horizon) 35 self.type = 'Table' 36 PWLTable.__init__(self) 37 self.transition = None 38 self.lookahead = [] 39 if self.seedMethod == 'random': 40 random.seed()

41

42 - def execute(self,state,observations={},history=None,choices=[], 43 index=None,debug=False,explain=False,entities={},cache={}):

44 """Applies this policy to the given state and observation history 45 @param state: the current state 46 @type state: L{KeyedVector} 47 @param observations: the current observation history 48 @type observations: dict:strS{->}L{Action<teamwork.action.PsychActions.Action>}[] 49 @param history: dictionary of actions that have been performed so far 50 @type history: dict 51 @param entities: a dictionary of entities to be used as a value tree cache 52 @param cache: values computed so far 53 @return: the corresponding action in the table 54 @rtype: L{Action<teamwork.action.PsychActions.Action>}[] 55 """ 56 if history is None: 57 history = {} 58 if index is None: 59 if len(self.rules) <= 1: 60 index = 0 61 elif isinstance(state,KeyedVector): 62 index = self.index(state,observations) 63 else: 64 index = self.index(state['state'].expectation(),observations) 65 if debug is True: 66 print '\t\t',index 67 if len(choices) == 0: 68 choices = self.entity.actions.getOptions() 69 try: 70 rhs = self.rules[index] 71 except KeyError: 72 # We have not filled out this table, so use fallback policy 73 return LookaheadPolicy.execute(self,state,choices, 74 explain=explain) 75 if rhs is None: 76 # This rule has not been seeded yet 77 if isinstance(state,KeyedVector): 78 self.rules[index] = [self.default(choices,state,observations)] 79 else: 80 self.rules[index] = [self.default(choices, 81 state['state'].expectation(), 82 observations)] 83 rhs = self.rules[index] 84 if not isinstance(rhs,dict) and not isinstance(rhs,list): 85 # This is an old-school policy with only one action on the RHS 86 rhs = [rhs] 87 if explain: 88 exp = Document() 89 root = exp.createElement('explanation') 90 exp.appendChild(root) 91 field = exp.createElement('alternatives') 92 root.appendChild(field) 93 else: 94 exp = None 95 if isinstance(rhs,list): 96 labels = map(str,choices) 97 for index in range(len(rhs)): 98 option = rhs[index] 99 if str(option) in labels: 100 # This option is one of our available choices 101 for action in option: 102 if not action['repeatable']: 103 # Check whether we've already done this action 104 if history.has_key(str(action)): 105 # Already performed, so ignore the rest 106 if explain: 107 node = exp.createElement('alternative') 108 node.setAttribute('reason','repeated') 109 node.setAttribute('rank',str(index)) 110 for action in option: 111 node.appendChild(action.__xml__().documentElement) 112 field.appendChild(node) 113 break 114 else: 115 # We have an eligible option 116 break 117 else: 118 # This option is not one of our available choices 119 if explain: 120 node = exp.createElement('alternative') 121 node.setAttribute('reason','ineligible') 122 node.setAttribute('rank',str(index)) 123 for action in option: 124 node.appendChild(action.__xml__().documentElement) 125 field.appendChild(node) 126 else: 127 # The policy does not have any of the available choices 128 return LookaheadPolicy.execute(self,state,choices,explain=explain) 129 if explain: 130 # Generate remaining alternatives 131 for index in range(index+1,len(rhs)): 132 alternative = rhs[index] 133 node = exp.createElement('alternative') 134 node.setAttribute('reason','worse') 135 node.setAttribute('rank',str(index)) 136 for action in alternative: 137 node.appendChild(action.__xml__().documentElement) 138 field.appendChild(node) 139 # Generate expected countermoves 140 policies = self.getPolicies() 141 if len(policies) > 0: 142 node = exp.createElement('expectations') 143 value,sequence = self.evaluate(policies,state,observations, 144 history,details=True) 145 for step in sequence: 146 subNode = exp.createElement('turn') 147 node.appendChild(subNode) 148 subNode.setAttribute('agent',step['actor']) 149 for action in step['decision']: 150 subNode.appendChild(action.__xml__().documentElement) 151 root.appendChild(node) 152 elif isinstance(rhs,dict): 153 # Value function, not explicit rules 154 goals = self.entity.goals.expectation() 155 eState = state['state'].expectation() 156 best = {'option':None,'value':None} 157 for option in choices: 158 try: 159 value = rhs[option[0]] 160 except KeyError: 161 value = None 162 if explain: 163 node = exp.createElement('alternative') 164 node.appendChild(option[0].__xml__().documentElement) 165 if value: 166 total = 0. 167 for key,dynamics in value.items(): 168 delta = None 169 if isinstance(dynamics,dict): 170 # Find referenced value function 171 entry = dynamics['entity']+str(dynamics['action'])+str(dynamics['key']) 172 try: 173 delta = cache[entry] 174 except KeyError: 175 entity = entities[dynamics['entity']] 176 dynamics = entity.policy.rules[index][dynamics['action']][dynamics['key']] 177 else: 178 entry = self.entity.name+str(option[0])+str(key) 179 if delta is None: 180 # Not in cache, so compute new delta 181 matrix = dynamics.apply(state['state']).expectation() 182 delta = goals[key]*(matrix[key]*eState) 183 cache[entry] = delta 184 total += delta 185 else: 186 total = 0. 187 if best['option'] is None or total > best['value']: 188 best['option'] = option 189 best['value'] = total 190 option = best['option'] 191 return option,exp

192

193 - def default(self,choices,state,observations):

194 """Generates a default RHS, presumably with minimal effort. The exact method is determined by the L{seedMethod} class attribute. 195 @param state: the current state 196 @type state: L{KeyedVector} 197 @param observations: the current observation history 198 @type observations: dict:strS{->}L{Action<teamwork.action.PsychActions.Action>}[] 199 @return: the corresponding action in the table 200 @rtype: L{Action<teamwork.action.PsychActions.Action>}[] 201 """ 202 if self.seedMethod == 'greedy': 203 return self._defaultGreedy(choices,state,observations) 204 elif self.seedMethod == 'random': 205 return self._defaultRandom(choices,state,observations) 206 else: 207 raise NotImplementedError,'Unable to use %s seeding' \ 208 % (self.seedMethod)

209

210 - def _defaultRandom(self,choices,state,observations):

211 """Default RHS is a random choice 212 """ 213 return random.choice(choices)

214

215 - def _defaultGreedy(self,choices,state,observations):

216 """Default RHS is the optimal action over a one-step time horizon 217 """ 218 best = None 219 for action in choices: 220 value = self.expectedValue(state,action) 221 if not best or value > best['value']: 222 best = {'action':action,'value':value} 223 return best['action']

224

225 - def generateObservations(self,remaining=None,result=None):

226 if remaining is None: 227 remaining = self.getLookahead()[1:] 228 if result is None: 229 result = [{}] 230 try: 231 name = remaining[0] 232 except IndexError: 233 return result 234 agent = self.entity.getEntity(name) 235 for history in result[:]: 236 result.remove(history) 237 for action in agent.actions.getOptions(): 238 newHistory = copy.copy(history) 239 newHistory[name] = action 240 result.append(newHistory) 241 return self.generateObservations(remaining[1:],result)

242

243 - def getLookahead(self):

244 """ 245 @return: the turn sequence used for the forward projection 246 @rtype: str[] 247 """ 248 try: 249 others = self.entity.entities.getSequence() 250 except AttributeError: 251 # lightweight agents 252 return [] 253 if len(others) > 0 and \ 254 len(self.lookahead) != self.horizon: 255 self.lookahead = [] 256 while len(self.lookahead) < self.horizon: 257 self.lookahead += others 258 self.lookahead = self.lookahead[:self.horizon] 259 while self.lookahead[0] != self.entity.name: 260 last = self.lookahead.pop() 261 self.lookahead.insert(0,last) 262 return self.lookahead

263

264 - def getPolicies(self):

265 """ 266 @return: a dictionary of the policies of all of the agents in this entity's lookahed 267 @rtype: strS{->}L{PolicyTable} 268 """ 269 policies = {} 270 order = self.getLookahead()[:] 271 # Seed policies 272 while len(order) > 0: 273 name = order.pop() 274 if name == self.entity.name: 275 policies[name] = self 276 else: 277 agent = self.entity.getEntity(name) 278 if not policies.has_key(name): 279 policies[name] = agent.policy 280 # Make sure that the next level of entities is covered 281 for other in policies[name].getLookahead(): 282 if not policies.has_key(other) and not other in order: 283 order.append(other) 284 for policy in policies.values(): 285 policy.initialize() 286 return policies

287

288 - def solve(self,horizon=None,choices=None,debug=False,policies=None, 289 interrupt=None,search='exhaustive',progress=None):

290 if policies is None: 291 policies = self.getPolicies() 292 for policy in policies.values(): 293 assert policy.rules 294 if search == 'exhaustive': 295 try: 296 value = self.iterate(choices,policies,state, 297 True,False,interrupt) 298 except OverflowError: 299 return False 300 if value is False: 301 return value 302 else: 303 return True 304 elif search == 'greedy': 305 changes = 1 306 candidates = [] 307 while changes > 0: 308 changes = 0 309 # Pick a random policy to perturb 310 if len(candidates) == 0: 311 candidates = policies.keys() 312 name = random.choice(candidates) 313 candidates.remove(name) 314 ## size = reduce(operator.mul,map(len,policies[name].rules)) 315 # Try to find a best-response strategy for the chosen agent 316 for index in range(1000): 317 if interrupt and interrupt.isSet(): 318 return False 319 if progress: progress() 320 if policies[name].perturb(policies,interrupt,debug): 321 changes += 1 322 return True 323 elif search == 'abstract': 324 return self.abstractSolve(policies,interrupt,progress) 325 elif search == 'parallel': 326 return self.parallelSolve(policies,interrupt,progress) 327 else: 328 raise NameError,'Unknown search type "%s"' % (search)

329

330 - def iterate(self,choices,policies,state,recurse=False,debug=False, 331 interrupt=None):

332 """Exhaustive policy search""" 333 if choices is None: 334 choices = self.entity.actions.getOptions() 335 assert policies[self.entity.name] is self 336 best = None 337 # Consider each possible policy 338 for rule in xrange(pow(len(choices),len(self.rules))): 339 if interrupt and interrupt.isSet(): 340 if best is None: 341 # Haven't made any progress; nothing to report 342 return None 343 else: 344 # Go ahead and return results so far 345 break 346 self.fromIndex(rule,choices) 347 assert self.toIndex(choices) == rule 348 if debug: 349 print self.entity.name,map(lambda a:a[0]['type'],self.rules) 350 current = {'policy':rule} 351 if recurse: 352 # Work our way backward from our turn 353 order = self.getLookahead()[:] 354 order.reverse() 355 for name in order: 356 if name != self.entity.name: 357 index = policies[name].iterate(None,policies,state, 358 False,debug,interrupt) 359 current[name] = index 360 if debug: 361 # very domain-specific info 362 print '\t',policies[name].rules[0] 363 # Evaluate the policy currently under consideration 364 if recurse: 365 current['value'] = self.evaluate(policies,state,{},start=1) 366 else: 367 current['value'] = self.evaluate(policies,state,{},start=0) 368 if best is None or current['value'] > best['value']: 369 best = current 370 if debug: 371 print 'ER =',current['value'] 372 sys.stdout.flush() 373 # Set rule to best found 374 self.fromIndex(best['policy'],choices) 375 if recurse: 376 # Set everyone else's policy to best responses 377 for name in self.getLookahead(): 378 if name != self.entity.name: 379 actions = self.entity.getEntity(name).actions.getOptions() 380 policies[name].fromIndex(best[name]) 381 return best['policy']

382

383 - def perturb(self,policies,interrupt=None,debug=False):

384 """Consider a random perturbation of this policy 385 @return: C{True} iff a better variation was found 386 @rtype: bool 387 """ 388 # Select random rule to perturb 389 state,observations,index = self.chooseRule() 390 # Compute expected reward of original RHS 391 old = self.evaluate(policies,state,observations) 392 # Swap randomly selected actions on RHS 393 size = len(self.rules[index]) 394 i = random.randint(0,size-1) 395 j = random.randint(0,size-2) 396 if j >= i: j += 1 397 if debug: 398 print 'Perturbing %s Rule #%d, %s<->%s' % \ 399 (self.entity.name,index,self.rules[index][i], 400 self.rules[index][j]) 401 original = self.rules[index][i] 402 self.rules[index][i] = self.rules[index][j] 403 self.rules[index][j] = original 404 new = self.evaluate(policies,state,observations) 405 if debug: 406 print old,'->',new 407 if old >= new: 408 # Original ordering was better 409 original = self.rules[index][i] 410 self.rules[index][i] = self.rules[index][j] 411 self.rules[index][j] = original 412 return False 413 else: 414 # We've made a change 415 return True

416

417 - def parallelSolve(self,policies,interrupt=None,progress=None,debug=False):

418 """ 419 Generates an abstract state space and does value iteration to generate a policy when agents all act in parallel, each with its own LHS 420 @rtype: bool 421 """ 422 savings = 0 423 goals = self.entity.goals.expectation() 424 keys = filter(lambda k:isinstance(k,StateKey) and \ 425 goals[k] > epsilon,goals.keys()) 426 keys.sort() 427 for key in keys: 428 entity = self.entity.world[key['entity']] 429 if entity.dynamics.has_key(key['feature']): 430 if debug: 431 print 'Compiling %s\'s goal of %s' % (self.entity.name, 432 str(key)) 433 for option in self.entity.actions.getOptions(): 434 assert len(option) == 1 435 dynamics = entity.getDynamics(option[0],key['feature'], 436 cache=True,debug=debug) 437 self.entity.compiled = True 438 return savings

439

440 - def abstractSolve(self,policies,interrupt=None,progress=None):

441 """ 442 Generates an abstract state space (defined by the LHS attributes) and does value iteration to generate a policy 443 @warning: assumes that all of the agents have the same LHS attribute breakdown! (not hard to overcome this assumption, but annoying to code) 444 @rtype: bool 445 """ 446 assert self.rules 447 # Compute abstract transition probability 448 if self.transition is None: 449 if not self.abstractTransition(policies,interrupt): 450 # Interrupted 451 self.transition = None 452 return None 453 for name in policies.keys(): 454 if name != self.entity.name: 455 policies[name].transition = self.transition 456 # Initialize abstract value function 457 for name in policies.keys(): 458 policies[name].values = {} 459 for current in self.rules.keys(): 460 policies[name].values[current] = {} 461 for other in policies.keys(): 462 if other == name: 463 policies[name].values[current][other] = {} 464 for option in policies[name].rules[current]: 465 policies[name].values[current][other][str(option)] = 0. 466 else: 467 policies[name].values[current][other] = 0. 468 # Compute immediate (abstract) reward and cache it 469 rewards = {} 470 goals = self.entity.getGoalVector()['state'].expectation() 471 for name in policies.keys(): 472 rewards[name] = {} 473 for current in self.rules.keys(): 474 intervals = self.abstract(current) 475 table = {} 476 rewards[name][current] = table 477 for other in policies.keys(): 478 for option in policies[other].rules[current]: 479 value = self.abstractReward(intervals,goals,option, 480 interrupt) 481 if value is None: 482 # Interrupted 483 return None 484 table[str(option)] = value 485 # Value iteration 486 change = 1. 487 iterations = 0 488 while iterations < self.horizon: 489 print iterations 490 iterations += 1 491 change = 0. 492 for name in policies.keys(): 493 if interrupt and interrupt.isSet(): return None 494 # Iterate through one whole policy at a time 495 change += policies[name].abstractIterate(policies,rewards, 496 interrupt) 497 if progress: progress() 498 if change > epsilon: 499 iterations = 0 500 print 'Result:',self.rules 501 return True

502

503 - def abstractTransition(self,policies,interrupt=None,progress=None):

504 """ 505 Generates a transition probability function over the abstract state state space (defined by the LHS attributes) 506 """ 507 self.transition = {} 508 for current in self.rules.keys(): 509 self.transition[current] = {} 510 for name in policies.keys(): 511 # Determine what ranges we fall in 512 intervals = self.abstract(current) 513 if policies[name].rules[current] is None: 514 entity = self.entity.getEntity(name) 515 policies[name].rules[current] = entity.actions.getOptions() 516 # WARNING: the following assumes that all of the agents' policies have the same LHS breakdown!!! 517 for option in policies[name].rules[current]: 518 # Compute dynamics for this action 519 event = {name:option} 520 dynamics = self.entity.entities.getDynamics(event) 521 tree = dynamics['state'].getTree() 522 # Compute possible transitions over intervals 523 table = Distribution() 524 self.transition[current][str(option)] = table 525 for matrix,prob in tree[intervals].items(): 526 new = [] 527 for index in range(len(self.attributes)): 528 if interrupt and interrupt.isSet(): return None 529 if progress: progress() 530 # Determine effect on individual attribute 531 obj,values = self.attributes[index] 532 assert isinstance(obj,KeyedVector) 533 if isinstance(obj,ThresholdRow): 534 key = obj.specialKeys[0] 535 print option 536 print key 537 print matrix[key].simpleText() 538 new.append(adjustInterval(intervals,index, 539 key,matrix[key])) 540 elif isinstance(obj,DifferenceRow): 541 key = obj.specialKeys[0] 542 hi = adjustInterval(intervals,index, 543 key,matrix[key]) 544 key = obj.specialKeys[1] 545 lo = adjustInterval(intervals,index, 546 key,matrix[key]) 547 new.append({'lo':hi['lo']-lo['hi'], 548 'hi':hi['hi']-lo['lo']}) 549 else: 550 raise NotImplementedError,'Unable to compute abstract transitions of %s attributes' % (obj.__class__.__name__) 551 # Compute the possible rules applicable in the new interval 552 destinations = [{'prob':prob,'factors':[]}] 553 for index in range(len(self.attributes)): 554 obj,values = self.attributes[index] 555 interval = new[index] 556 span = interval['hi'] - interval['lo'] 557 loIndex = self.subIndex(index,interval['lo']) 558 hiIndex = self.subIndex(index,interval['hi']) 559 # Compute probability of individual destinations 560 subProbs = {} 561 if hiIndex > loIndex: 562 # Multiple destinations 563 subProbs[loIndex] = (values[loIndex]-interval['lo'])/span 564 subProbs[hiIndex] = (interval['hi']-values[hiIndex-1])/span 565 for subIndex in range(loIndex+1,hiIndex): 566 subProbs[subIndex] = (values[subIndex]-values[subIndex-1])/span 567 else: 568 # Only one destination 569 subProbs[loIndex] = 1. 570 # Generate possible destinations 571 for dest in destinations[:]: 572 destinations.remove(dest) 573 for subIndex in range(loIndex,hiIndex+1): 574 newDest = copy.deepcopy(dest) 575 newDest['prob'] *= subProbs[subIndex] 576 newDest['factors'].append(subIndex) 577 destinations.append(newDest) 578 for dest in destinations: 579 index = self.factored2index(dest['factors']) 580 try: 581 table[index] += dest['prob'] 582 except KeyError: 583 table[index] = dest['prob'] 584 for dest,prob in table.items(): 585 assert prob > -epsilon 586 if prob < epsilon: 587 del table[dest] 588 assert abs(sum(table.values())-1.) < epsilon 589 return True

590

591 - def abstractIterate(self,policies,rewards,interrupt):

592 """ 593 One pass of value iteration over abstract state space. 594 @warning: Currently works for only two agents (easy, but messy, to generalize) 595 @param rewards: C{rewards[name][state][action]} = the reward that agent C{name} gets in state C{state} (index form) derived from the performance of C{action} 596 @type rewards: strS{->}(strS{->}float)[] 597 @return: the total change to the value function 598 @rtype: float 599 """ 600 values = [] 601 # Proceed backward from end of lookahead 602 sequence = self.getLookahead()[:] # entity.entities.getSequence()[:] 603 sequence.reverse() 604 delta = 0. 605 for index in range(len(sequence)): 606 name = sequence[index] 607 for current in self.rules.keys(): 608 if name != self.entity.name: 609 # Determine what action this agent will do 610 best = {'value':None} 611 for option in policies[name].rules[current]: 612 value = policies[name].values[current][name][str(option)] 613 if best['value'] is None or value > best['value']: 614 best['value'] = value 615 best['option'] = option 616 # Compute new value 617 value = rewards[self.entity.name][current][str(best['option'])] 618 table = self.transition[current][str(best['option'])] 619 next = sequence[index-1] 620 for dest,prob in table.items(): 621 if next != self.entity.name: 622 value += prob*self.values[dest][next] 623 else: 624 value += prob*max(self.values[dest][next].values()) 625 self.values[current][name] = value 626 else: 627 # Evaluate all of my possible actions 628 for option in self.rules[current]: 629 value = rewards[self.entity.name][current][str(option)] 630 table = self.transition[current][str(option)] 631 next = sequence[index-1] 632 for dest,prob in table.items(): 633 value += prob*self.values[dest][next] 634 self.values[current][name][str(option)] = value 635 order = self.rules[current][:] 636 order.sort(lambda o1,o2:cmp(-self.values[current][name][str(o1)],-self.values[current][name][str(o2)])) 637 if order != self.rules[current]: 638 delta += 1. 639 self.rules[current] = order 640 return delta

641

642 - def abstractReward(self,intervals,goals,tree,interrupt=None):

643 reward = 0. 644 # Apply state-dependent factors of reward 645 for index in range(len(self.attributes)): 646 if interrupt and interrupt.isSet(): return None 647 obj,values = self.attributes[index] 648 interval = intervals[index] 649 if isinstance(obj,ThresholdRow): 650 key = obj.specialKeys[0] 651 if goals.has_key(key): 652 # We have a non-zero reward weight for a feature 653 # where we know the interval of possible values 654 mean = (interval['hi']+interval['lo'])/2. 655 reward += goals[key]*mean 656 else: 657 raise NotImplementedError,'Unable to compute abstract reward for %s attributes' % (obj.__class__.__name__) 658 return reward

659

660 - def oldReachable(self,choices,policies,state,observations,debug=False):

661 # Determine what policy entries are reachable from here 662 reachable = {} 663 current = [(state,observations)] 664 # Move myself to end of lookahead 665 sequence = self.getLookahead()[:] 666 sequence.append(sequence.pop(0)) 667 horizon = len(sequence) 668 for t in range(horizon): 669 name = sequence[t] 670 if name == self.entity.name: 671 for state,observations in current: 672 reachable[self.index(state,observations)] = True 673 actions = choices 674 else: 675 agent = self.entity.getEntity(name) 676 actions = agent.actions.getOptions() 677 for oldState,oldHistory in current[:]: 678 current.remove((oldState,oldHistory)) 679 for action in actions: 680 newHistory = copy.copy(oldHistory) 681 newHistory[name] = action 682 assert name == action[0]['actor'] 683 event = {name:action} 684 dynamics = self.entity.entities.getDynamics(event) 685 delta = dynamics['state'].apply(oldState) 686 newState = delta*oldState 687 current.append((newState,newHistory)) 688 # Iterate over joint RHS entries for reachable rules 689 rules = reachable.keys() 690 rules.sort() 691 return rules

692

693 - def evaluate(self,policies,state,observations,history=None,debug=False, 694 fixed=True,start=0,details=False):

695 """Computes the expected value of this policy in response to the given 696 policies for the other agents 697 @param policies: the policies that the other agents are expected to follow 698 @type policies: dict:strS{->}L{PolicyTable} 699 @param state: the current state 700 @type state: L{KeyedVector} 701 @param observations: the current observation history 702 @type observations: dict:strS{->}L{Action<teamwork.action.PsychActions.Action>}[] 703 @param history: dictionary of actions that have been performed so far (not changed by this method) 704 @type history: dict 705 @param fixed: flag, if C{True}, then the other agents follow their given policies; otherwise, they can optimize 706 @type fixed: bool 707 @param start: the time offset to use in the lookahead (0 starts with this agent, and is the default) 708 @type start: int 709 @param details: flag, if C{True}, then the details of this evaluation are returned in addition to the expected value 710 @return: expected value of this policy (and sequence of actions if details is C{True}) 711 @rtype: float 712 """ 713 total = 0. 714 observations = copy.copy(observations) 715 if isinstance(state,KeyedVector): 716 stateVector = state 717 else: 718 stateVector = state['state'].expectation() 719 goals = self.entity.getGoalVector() 720 goals['state'].fill(stateVector.keys(),0.) 721 sequence = self.getLookahead()[:] 722 for t in range(start): 723 sequence.append(sequence.pop(0)) 724 if history is None: 725 history = {} 726 else: 727 history = copy.copy(history) 728 actions = [] 729 for name in sequence: 730 if fixed or name == self.entity.name: 731 action,exp = policies[name].execute(stateVector,observations, 732 history) 733 else: 734 action = policies[name].localSolve(policies,stateVector, 735 observations, 736 update=False,debug=debug) 737 actions.append({'actor':name,'decision':action}) 738 if debug: 739 print '\texpects',name,'to',action 740 # Update observation history 741 observations[name] = action 742 for act in action: 743 history[str(act)] = True 744 # Update state 745 dynamics = self.entity.entities.getDynamics({name:action}) 746 stateVector = dynamics['state'].apply(stateVector)*stateVector 747 # Compute expected value 748 total += self.expectedValue(stateVector,action,goals) 749 if debug: 750 print 'EV =',total 751 if details: 752 return total,actions 753 else: 754 return total

755

756 - def localSolve(self,policies,state,observations,update=False,debug=False):

757 """Determines the best action out of the available options, given the current state and observation history, and while holding fixed the expected policies of the other agents. 758 @param update: if C{True}, as a side effect, this policy is modified to have this best action be the RHS of the applicable rule. 759 @type update: bool 760 @return: the best action 761 @rtype: L{Action<teamwork.action.PsychActions.Action>}[] 762 """ 763 index = self.index(state,observations) 764 best = None 765 original = self.execute(state,observations) 766 for action in self.entity.actions.getOptions(): 767 self.rules[index] = action 768 value = self.evaluate(policies,state,observations,debug=debug) 769 if debug: 770 print '\t\tEV of',action,'=',value 771 if best is None or value > best['value']: 772 best = {'action':action,'value':value} 773 if update: 774 self.rules[index] = best['action'] 775 else: 776 self.rules[index] = original 777 return best['action']

778

779 - def expectedValue(self,state,action,goals=None,debug=False):

780 if goals is None: 781 goals = self.entity.getGoalVector() 782 goals['state'].fill(state.keys(),0.) 783 total = (goals['state']*state).expectation() 784 for goal in self.entity.getGoals(): 785 if goal.type == 'state': 786 # Already covered by vector 787 pass 788 elif goal.type == 'act': 789 # Action goals not yet handled by vector 790 for subAct in action: 791 if subAct['type'] == goal.key: 792 total += goal.weight 793 else: 794 raise NotImplementedError,\ 795 'Unable to handle goals of type %s' % (goal.type) 796 return total

797

798 - def chooseRule(self):

799 """Generates a random state and observation history and finds the rule 800 corresponding to them 801 @return: a tuple of the state, observation history, and rule index 802 @rtype: (L{KeyedVector},dict:strS{->}L{Action<teamwork.action.PsychActions.Action>}[],int) 803 """ 804 state = self.entity.entities.getState().expectation() 805 observations = {} 806 for obj,values in self.attributes: 807 if isinstance(obj,KeyedVector): 808 # Randomly generated state vector 809 if isinstance(obj,ThresholdRow): 810 key = obj.specialKeys[0] 811 index = random.choice(range(len(values)+1)) 812 if index == 0: 813 lo = -1. 814 else: 815 lo = values[index-1] 816 if index == len(values): 817 hi = 1. 818 else: 819 hi = values[index] 820 state[key] = (lo+hi)/2. 821 else: 822 raise NotImplementedError,'Unable to generate random choices for attributes of type %s' % (obj.__class__.__name__) 823 else: 824 # Randomly generated observation history 825 name = obj.name 826 agent = self.entity.getEntity(name) 827 observations[name] = random.choice(agent.actions.getOptions()) 828 return state,observations,self.index(state,observations)

829

830 - def abstract(self,index):

831 """ 832 @param index: the rule index of interest 833 @type index: int 834 @return: the abstract state subspace where the given rule is applicable, in the form of a list of intervals, one for each attribute, where each interval is a dictionary with keys C{weights}, C{index}, C{lo}, and C{hi} 835 @rtype: dict[] 836 """ 837 abstract = [] 838 for attrIndex in range(len(self.attributes)): 839 obj,values = self.attributes[-attrIndex-1] 840 if isinstance(obj,KeyedVector): 841 subIndex = index % (len(values)+1) 842 index /= (len(values)+1) 843 if subIndex > 0: 844 lo = values[subIndex-1] 845 else: 846 lo = -1. 847 try: 848 hi = values[subIndex] 849 except IndexError: 850 hi = 1. 851 abstract.append({'weights':obj,'index':subIndex, 852 'lo':lo,'hi':hi}) 853 else: 854 raise NotImplementedError,'Not yet able to abstract over rules on observations.' 855 abstract.reverse() 856 return abstract

857

858 - def fromIndex(self,index,choices=None):

859 """Fills in the rules using the given number as an I{n}-ary representation of the RHS values (where I{n} is the number of possible RHS values) 860 """ 861 if choices is None: 862 choices = self.entity.actions.getOptions() 863 for rule in range(len(self.rules)): 864 self.rules[rule] = choices[index % len(choices)] 865 index /= len(choices)

866

867 - def toIndex(self,choices=None):

868 """ 869 @return: the I{n}-ary representation of the RHS values (where I{n} is the number of possible RHS values) 870 @rtype: int 871 """ 872 if choices is None: 873 choices = self.entity.actions.getOptions() 874 index = 0 875 for rule in range(len(self.rules)): 876 index *= len(choices) 877 index += choices.index(self.rules[-rule-1]) 878 return index

879

880 - def updateAttributes(self,actor,attributes,diffs,leaves):

881 """ 882 @param actor: the name of the agent whose turn it is 883 @type actor: str 884 @param attributes: the attributes already found 885 @type attributes: KeyedPlane[] 886 """ 887 options = self.entity.getEntity(actor).actions.getOptions() 888 newDiffs = {} 889 newLeaves = {} 890 values = {} 891 for action in options: 892 actionDict = {self.entity.name:action} 893 dynamics = self.entity.entities.getDynamics(actionDict) 894 tree = dynamics['state'].getTree() 895 try: 896 myLeaves = values[str(action)] 897 except KeyError: 898 myLeaves = values[str(action)] = tree.leaves() 899 myBranches = tree.branches().values() 900 for branch in myBranches: 901 if len(leaves) > 0: 902 # Project this branch over the actions that have 903 # occurred so far 904 for leaf in leaves.values(): 905 newBranch = KeyedPlane(branch.weights*leaf, 906 branch.threshold) 907 insertBranch(newBranch,attributes) 908 else: 909 # No previous actions, so this branch is directly 910 # relevant 911 insertBranch(branch,attributes) 912 if len(diffs) == 0: 913 for myLeaf in myLeaves: 914 newLeaves[myLeaf.simpleText()] = myLeaf 915 for other in options: 916 if action != other: 917 try: 918 yrLeaves = values[str(other)] 919 except KeyError: 920 actionDict = {self.entity.name:other} 921 dynamics = self.entity.entities.getDynamics(actionDict) 922 tree = dynamics['state'].getTree() 923 yrLeaves = values[str(other)] = tree.leaves() 924 for myLeaf in myLeaves: 925 for yrLeaf in yrLeaves: 926 diff = myLeaf - yrLeaf 927 newDiffs[diff.simpleText()] = diff 928 else: 929 for diff in diffs.values(): 930 for myLeaf in myLeaves: 931 newDiff = myLeaf*diff 932 newDiffs[newDiff.simpleText()] = newDiff 933 for leaf in leaves.values(): 934 for myLeaf in myLeaves: 935 newLeaf = myLeaf*leaf 936 newLeaves[newLeaf.simpleText()] = newLeaf 937 diffs.clear() 938 for key,value in newDiffs.items(): 939 diffs[key] = value 940 leaves.clear() 941 for key,value in newLeaves.items(): 942 leaves[key] = value

943

944 - def importTable(self,table):

945 """Takes the given table and uses it to set the LHS and RHS of this policy (making sure that the RHS refers to my entity instead) 946 @param table: the PWL table that contains the relevant LHS and RHS 947 @type table: L{PWLTable} 948 @warning: Does not import value function, just policy itself 949 """ 950 self.reset() 951 self.attributes = table.attributes[:] 952 for rule,rhs in table.rules.items(): 953 for option in self.entity.actions.getOptions(): 954 if len(rhs) == len(option): 955 # Might be equal 956 for action in option: 957 for other in rhs: 958 if other['type'] == action['type'] and \ 959 other['object'] == action['object']: 960 break 961 else: 962 # Didn't find a matching action 963 break 964 else: 965 # Match 966 self.rules[rule] = option 967 break 968 else: 969 raise UserWarning,'Unable to find %s\'s equivalent for %s' % \ 970 (self.entity.name,str(rhs))

971

972 - def generateLHS(self,horizon=None,choices=None,debug=False):

973 if horizon is None: 974 horizon = self.horizon 975 if choices is None: 976 choices = self.entity.actions.getOptions() 977 attributes = {} 978 vectors = {} 979 keyList = filter(lambda k:isinstance(k,StateKey), 980 self.entity.entities.getStateKeys().keys()) 981 order = self.getLookahead() 982 for agent in order: 983 if agent == self.entity.name: 984 optionList = choices 985 else: 986 optionList = self.entity.getEntity(agent).actions.getOptions() 987 actions = {} 988 # Look for state features affected by this action 989 # (we could get dynamics off of PsychAgents object itself, 990 # but we sometimes want to use uncompiled dynamics) 991 for option in optionList: 992 for action in option: 993 actions[action] = True 994 for action in actions.keys(): 995 for key in keyList: 996 entity = self.entity.getEntity(key['entity']) 997 dynamics = entity.getDynamics(action,key['feature']) 998 tree = dynamics.getTree() 999 for plane in tree.branches().values(): 1000 if isinstance(plane.weights,ThresholdRow): 1001 # Needs to be a bit more permissive, but... 1002 label = plane.weights.simpleText() 1003 try: 1004 attributes[label][plane.threshold] = True 1005 except KeyError: 1006 attributes[label] = {plane.threshold: True} 1007 vectors[label] = plane.weights 1008 self.attributes = [] 1009 for key,values in attributes.items(): 1010 values = values.keys() 1011 values.sort() 1012 self.attributes.append((vectors[key],values))

1013

1014 - def OLDgenerateLHS(self,horizon=None,choices=None,debug=False):

1015 if horizon is None: 1016 horizon = self.horizon 1017 if choices is None: 1018 choices = self.entity.actions.getOptions() 1019 goals = self.entity.getGoalTree().leaves()[0] 1020 attributes = [] 1021 diffs = {} 1022 leaves = {} 1023 order = self.getLookahead() 1024 for agent in order: 1025 if debug: 1026 print agent 1027 self.updateAttributes(agent,attributes,diffs,leaves) 1028 if debug: 1029 for plane in attributes: 1030 print '\t',plane.weights.getArray(),plane.threshold 1031 print 1032 valuePlanes = {} 1033 for matrix in diffs.values(): 1034 weights = goals*matrix 1035 plane = KeyedPlane(copy.deepcopy(weights),0.) 1036 if plane.always() is None: 1037 for other in valuePlanes.values(): 1038 result = plane.compare(other) 1039 if result in ['equal','inverse']: 1040 del diffs[matrix.simpleText()] 1041 break 1042 else: 1043 if debug: 1044 print '\t',weights.getArray() 1045 valuePlanes[plane.simpleText()] = plane 1046 else: 1047 del diffs[matrix.simpleText()] 1048 if debug: 1049 print 1050 for matrix in diffs.values(): 1051 weights = goals*matrix 1052 plane = KeyedPlane(weights,0.) 1053 insertBranch(plane,attributes) 1054 if debug: 1055 print 'FINAL:' 1056 newAttributes = {} 1057 for plane in attributes: 1058 weights = plane.weights.getArray() 1059 if abs(weights[1]-1.) < epsilon: 1060 scaling = 1./weights[1] 1061 else: 1062 scaling = 1. 1063 plane.weights *= scaling 1064 threshold = plane.threshold * scaling 1065 plane.threshold = 0. 1066 for other in newAttributes.keys(): 1067 result = plane.compare(KeyedPlane(newAttributes[other][0],0.)) 1068 if result == 'equal': 1069 newAttributes[other][1].append(threshold) 1070 break 1071 elif result == 'inverse': 1072 newAttributes[other][1].append(-threshold) 1073 break 1074 else: 1075 newAttributes[plane.weights.simpleText()] = (plane.weights,[threshold]) 1076 for name in order[1:]: 1077 agent = self.entity.getEntity(name) 1078 newAttributes[name] = (agent,agent.actions.getOptions()) 1079 for weights,tList in newAttributes.values(): 1080 tList.sort() 1081 if debug: 1082 print self.attrString(weights) 1083 print tList 1084 self.attributes = newAttributes.values()

1085

1086 - def __copy__(self):

1087 result = LookaheadPolicy.__copy__(self) 1088 return PWLTable.copy(self,result)

1089

1090 - def __xml__(self):

1091 doc = LookaheadPolicy.__xml__(self) 1092 # Save attributes 1093 root = doc.createElement('attributes') 1094 doc.documentElement.appendChild(root) 1095 for obj,values in self.attributes: 1096 node = doc.createElement('attribute') 1097 root.appendChild(node) 1098 node.appendChild(obj.__xml__().documentElement) 1099 for value in values: 1100 child = doc.createElement('value') 1101 child.setAttribute('threshold',str(value)) 1102 node.appendChild(child) 1103 if len(self.rules) > 0: 1104 # Save RHS 1105 root = doc.createElement('rules') 1106 flag = False # Set type attribute only once 1107 doc.documentElement.appendChild(root) 1108 for rhs in self.rules: 1109 if rhs is None: 1110 rhs = self.entity.actions.getOptions() 1111 node = doc.createElement('rhs') 1112 if isinstance(rhs,list): 1113 # RHS is an ordered list of actions 1114 node.setAttribute('type','list') 1115 for option in rhs: 1116 subNode = doc.createElement('option') 1117 node.appendChild(subNode) 1118 for action in option: 1119 subNode.appendChild(action.__xml__().documentElement) 1120 elif isinstance(rhs,dict): 1121 # RHS is a table of action-value tree pairs 1122 node.setAttribute('type','dict') 1123 for action,value in rhs.items(): 1124 for key,dynamics in value.items(): 1125 subNode = doc.createElement('entry') 1126 subNode.appendChild(action.__xml__().documentElement) 1127 subNode.appendChild(key.__xml__().documentElement) 1128 if isinstance(dynamics,dict): 1129 link = doc.createElement('link') 1130 link.setAttribute('entity',dynamics['entity']) 1131 link.appendChild(dynamics['key'].__xml__().documentElement) 1132 link.appendChild(dynamics['action'].__xml__().documentElement) 1133 subNode.appendChild(link) 1134 else: 1135 subNode.appendChild(dynamics.__xml__().documentElement) 1136 node.appendChild(subNode) 1137 else: 1138 raise UserWarning,'Unknown RHS type: %s' % \ 1139 (rhs.__class__.__name__) 1140 root.appendChild(node) 1141 return doc

1142

1143 - def parse(self,element):

1144 LookaheadPolicy.parse(self,element) 1145 node = element.firstChild 1146 while node: 1147 if node.nodeType == node.ELEMENT_NODE: 1148 if node.tagName == 'attributes': 1149 vector = KeyedVector() 1150 child = node.firstChild 1151 while child: 1152 if child.nodeType == child.ELEMENT_NODE: 1153 if child.tagName == 'attribute': 1154 values = [] 1155 obj = None 1156 grandchild = child.firstChild 1157 while grandchild: 1158 if grandchild.nodeType == child.ELEMENT_NODE: 1159 if grandchild.tagName == 'value': 1160 values.append(float(grandchild.getAttribute('threshold'))) 1161 else: 1162 obj = vector.parse(grandchild) 1163 grandchild = grandchild.nextSibling 1164 values.sort() 1165 if obj is not None: 1166 self.attributes.append((obj,values)) 1167 child = child.nextSibling 1168 elif node.tagName == 'rules': 1169 child = node.firstChild 1170 while child: 1171 if child.nodeType == child.ELEMENT_NODE: 1172 assert child.tagName == 'rhs' 1173 rhsType = str(child.getAttribute('type')) 1174 if rhsType == 'dict': 1175 rhs = {} 1176 entry = child.firstChild 1177 while entry: 1178 if entry.nodeType == child.ELEMENT_NODE: 1179 assert entry.tagName == 'entry' 1180 action = None 1181 dynamics = None 1182 key = None 1183 grandChild = entry.firstChild 1184 while grandChild: 1185 if grandChild.nodeType == grandChild.ELEMENT_NODE: 1186 if grandChild.tagName == 'action': 1187 action = Action() 1188 action.parse(grandChild) 1189 elif grandChild.tagName == 'dynamic': 1190 dynamics = PWLDynamics() 1191 dynamics.parse(grandChild) 1192 elif grandChild.tagName == 'link': 1193 dynamics = {'entity':str(grandChild.getAttribute('entity')),'action':Action(),'key':Key()} 1194 greatChild = grandChild.firstChild 1195 while greatChild: 1196 if greatChild.nodeType == greatChild.ELEMENT_NODE: 1197 if greatChild.tagName == 'action': 1198 dynamics['action'].parse(greatChild) 1199 elif greatChild.tagName == 'key': 1200 dynamics['key'] = dynamics['key'].parse(greatChild) 1201 greatChild = greatChild.nextSibling 1202 else: 1203 key = Key() 1204 key = key.parse(grandChild) 1205 grandChild = grandChild.nextSibling 1206 assert not action is None 1207 assert not dynamics is None 1208 assert not key is None 1209 try: 1210 rhs[action][key] = dynamics 1211 except KeyError: 1212 rhs[action] = {key: dynamics} 1213 entry = entry.nextSibling 1214 self.rules.append(rhs) 1215 else: 1216 rhs = [] 1217 grandChild = child.firstChild 1218 while grandChild: 1219 if grandChild.nodeType == child.ELEMENT_NODE: 1220 assert grandChild.tagName == 'option' 1221 option = [] 1222 elements = grandChild.getElementsByTagName('action') 1223 for element in elements: 1224 action = Action() 1225 action.parse(element) 1226 option.append(action) 1227 rhs.append(option) 1228 grandChild = grandChild.nextSibling 1229 self.rules.append(rhs) 1230 child = child.nextSibling 1231 node = node.nextSibling 1232 if self.entity.name == 'Police' and self.entity.__class__.__name__ == 'PWLAgent': 1233 if len(self.rules) == 0: 1234 raise UserWarning,'%s has no rules' % (self.entity.name)

Source Code for Module teamwork.policy.policyTable