Package teamwork :: Package policy :: Module policyTable
[hide private]
[frames] | no frames]

Source Code for Module teamwork.policy.policyTable

   1  import copy 
   2  #import operator 
   3  import random 
   4  import sys 
   5  import time 
   6  from xml.dom.minidom import * 
   7  from LookaheadPolicy import LookaheadPolicy 
   8  from teamwork.action.PsychActions import Action 
   9  from teamwork.math.Keys import * 
  10  from teamwork.math.KeyedVector import * 
  11  from teamwork.math.KeyedMatrix import IdentityMatrix 
  12  from teamwork.math.KeyedTree import KeyedPlane 
  13  from teamwork.math.probability import Distribution 
  14  from teamwork.math.ProbabilityTree import ProbabilityTree 
  15  from teamwork.dynamics.pwlDynamics import PWLDynamics 
  16  from teamwork.math.matrices import epsilon 
  17  from pwlTable import PWLTable 
  18       
19 -class PolicyTable(LookaheadPolicy,PWLTable):
20 """Super-nifty class for representing policies as tables and using policy iteration to optimize them. 21 @cvar seedMethod: the method to use to generate the initial RHS values to seed policy iteration. There are currently two types supported: 22 - random: the RHS values are randomly generated 23 - greedy: the initial RHS value provides the best immediate expected reward (i.e., over a one-step horizon) 24 The default is I{greedy}. 25 @type seedMethod: str 26 @ivar rules: the list of RHS actions 27 @type rules: L{Action}[][] 28 """ 29 seedMethod = 'greedy' 30
31 - def __init__(self,entity,actions=[],horizon=1):
32 """Same arguments used by constructor for L{LookaheadPolicy} superclass""" 33 LookaheadPolicy.__init__(self,entity=entity,actions=actions, 34 horizon=horizon) 35 self.type = 'Table' 36 PWLTable.__init__(self) 37 self.transition = None 38 self.lookahead = [] 39 if self.seedMethod == 'random': 40 random.seed()
41
42 - def execute(self,state,observations={},history=None,choices=[], 43 index=None,debug=False,explain=False,entities={},cache={}):
44 """Applies this policy to the given state and observation history 45 @param state: the current state 46 @type state: L{KeyedVector} 47 @param observations: the current observation history 48 @type observations: dict:strS{->}L{Action<teamwork.action.PsychActions.Action>}[] 49 @param history: dictionary of actions that have been performed so far 50 @type history: dict 51 @param entities: a dictionary of entities to be used as a value tree cache 52 @param cache: values computed so far 53 @return: the corresponding action in the table 54 @rtype: L{Action<teamwork.action.PsychActions.Action>}[] 55 """ 56 if history is None: 57 history = {} 58 if index is None: 59 if len(self.rules) <= 1: 60 index = 0 61 elif isinstance(state,KeyedVector): 62 index = self.index(state,observations) 63 else: 64 index = self.index(state['state'].expectation(),observations) 65 if debug is True: 66 print '\t\t',index 67 if len(choices) == 0: 68 choices = self.entity.actions.getOptions() 69 try: 70 rhs = self.rules[index] 71 except KeyError: 72 # We have not filled out this table, so use fallback policy 73 return LookaheadPolicy.execute(self,state,choices, 74 explain=explain) 75 if rhs is None: 76 # This rule has not been seeded yet 77 if isinstance(state,KeyedVector): 78 self.rules[index] = [self.default(choices,state,observations)] 79 else: 80 self.rules[index] = [self.default(choices, 81 state['state'].expectation(), 82 observations)] 83 rhs = self.rules[index] 84 if not isinstance(rhs,dict) and not isinstance(rhs,list): 85 # This is an old-school policy with only one action on the RHS 86 rhs = [rhs] 87 if explain: 88 exp = Document() 89 root = exp.createElement('explanation') 90 exp.appendChild(root) 91 field = exp.createElement('alternatives') 92 root.appendChild(field) 93 else: 94 exp = None 95 if isinstance(rhs,list): 96 labels = map(str,choices) 97 for index in range(len(rhs)): 98 option = rhs[index] 99 if str(option) in labels: 100 # This option is one of our available choices 101 for action in option: 102 if not action['repeatable']: 103 # Check whether we've already done this action 104 if history.has_key(str(action)): 105 # Already performed, so ignore the rest 106 if explain: 107 node = exp.createElement('alternative') 108 node.setAttribute('reason','repeated') 109 node.setAttribute('rank',str(index)) 110 for action in option: 111 node.appendChild(action.__xml__().documentElement) 112 field.appendChild(node) 113 break 114 else: 115 # We have an eligible option 116 break 117 else: 118 # This option is not one of our available choices 119 if explain: 120 node = exp.createElement('alternative') 121 node.setAttribute('reason','ineligible') 122 node.setAttribute('rank',str(index)) 123 for action in option: 124 node.appendChild(action.__xml__().documentElement) 125 field.appendChild(node) 126 else: 127 # The policy does not have any of the available choices 128 return LookaheadPolicy.execute(self,state,choices,explain=explain) 129 if explain: 130 # Generate remaining alternatives 131 for index in range(index+1,len(rhs)): 132 alternative = rhs[index] 133 node = exp.createElement('alternative') 134 node.setAttribute('reason','worse') 135 node.setAttribute('rank',str(index)) 136 for action in alternative: 137 node.appendChild(action.__xml__().documentElement) 138 field.appendChild(node) 139 # Generate expected countermoves 140 policies = self.getPolicies() 141 if len(policies) > 0: 142 node = exp.createElement('expectations') 143 value,sequence = self.evaluate(policies,state,observations, 144 history,details=True) 145 for step in sequence: 146 subNode = exp.createElement('turn') 147 node.appendChild(subNode) 148 subNode.setAttribute('agent',step['actor']) 149 for action in step['decision']: 150 subNode.appendChild(action.__xml__().documentElement) 151 root.appendChild(node) 152 elif isinstance(rhs,dict): 153 # Value function, not explicit rules 154 goals = self.entity.goals.expectation() 155 eState = state['state'].expectation() 156 best = {'option':None,'value':None} 157 for option in choices: 158 try: 159 value = rhs[option[0]] 160 except KeyError: 161 value = None 162 if explain: 163 node = exp.createElement('alternative') 164 node.appendChild(option[0].__xml__().documentElement) 165 if value: 166 total = 0. 167 for key,dynamics in value.items(): 168 delta = None 169 if isinstance(dynamics,dict): 170 # Find referenced value function 171 entry = dynamics['entity']+str(dynamics['action'])+str(dynamics['key']) 172 try: 173 delta = cache[entry] 174 except KeyError: 175 entity = entities[dynamics['entity']] 176 dynamics = entity.policy.rules[index][dynamics['action']][dynamics['key']] 177 else: 178 entry = self.entity.name+str(option[0])+str(key) 179 if delta is None: 180 # Not in cache, so compute new delta 181 matrix = dynamics.apply(state['state']).expectation() 182 delta = goals[key]*(matrix[key]*eState) 183 cache[entry] = delta 184 total += delta 185 else: 186 total = 0. 187 if best['option'] is None or total > best['value']: 188 best['option'] = option 189 best['value'] = total 190 option = best['option'] 191 return option,exp
192
193 - def default(self,choices,state,observations):
194 """Generates a default RHS, presumably with minimal effort. The exact method is determined by the L{seedMethod} class attribute. 195 @param state: the current state 196 @type state: L{KeyedVector} 197 @param observations: the current observation history 198 @type observations: dict:strS{->}L{Action<teamwork.action.PsychActions.Action>}[] 199 @return: the corresponding action in the table 200 @rtype: L{Action<teamwork.action.PsychActions.Action>}[] 201 """ 202 if self.seedMethod == 'greedy': 203 return self._defaultGreedy(choices,state,observations) 204 elif self.seedMethod == 'random': 205 return self._defaultRandom(choices,state,observations) 206 else: 207 raise NotImplementedError,'Unable to use %s seeding' \ 208 % (self.seedMethod)
209
210 - def _defaultRandom(self,choices,state,observations):
211 """Default RHS is a random choice 212 """ 213 return random.choice(choices)
214
215 - def _defaultGreedy(self,choices,state,observations):
216 """Default RHS is the optimal action over a one-step time horizon 217 """ 218 best = None 219 for action in choices: 220 value = self.expectedValue(state,action) 221 if not best or value > best['value']: 222 best = {'action':action,'value':value} 223 return best['action']
224
225 - def generateObservations(self,remaining=None,result=None):
226 if remaining is None: 227 remaining = self.getLookahead()[1:] 228 if result is None: 229 result = [{}] 230 try: 231 name = remaining[0] 232 except IndexError: 233 return result 234 agent = self.entity.getEntity(name) 235 for history in result[:]: 236 result.remove(history) 237 for action in agent.actions.getOptions(): 238 newHistory = copy.copy(history) 239 newHistory[name] = action 240 result.append(newHistory) 241 return self.generateObservations(remaining[1:],result)
242
243 - def getLookahead(self):
244 """ 245 @return: the turn sequence used for the forward projection 246 @rtype: str[] 247 """ 248 try: 249 others = self.entity.entities.getSequence() 250 except AttributeError: 251 # lightweight agents 252 return [] 253 if len(others) > 0 and \ 254 len(self.lookahead) != self.horizon: 255 self.lookahead = [] 256 while len(self.lookahead) < self.horizon: 257 self.lookahead += others 258 self.lookahead = self.lookahead[:self.horizon] 259 while self.lookahead[0] != self.entity.name: 260 last = self.lookahead.pop() 261 self.lookahead.insert(0,last) 262 return self.lookahead
263
264 - def getPolicies(self):
265 """ 266 @return: a dictionary of the policies of all of the agents in this entity's lookahed 267 @rtype: strS{->}L{PolicyTable} 268 """ 269 policies = {} 270 order = self.getLookahead()[:] 271 # Seed policies 272 while len(order) > 0: 273 name = order.pop() 274 if name == self.entity.name: 275 policies[name] = self 276 else: 277 agent = self.entity.getEntity(name) 278 if not policies.has_key(name): 279 policies[name] = agent.policy 280 # Make sure that the next level of entities is covered 281 for other in policies[name].getLookahead(): 282 if not policies.has_key(other) and not other in order: 283 order.append(other) 284 for policy in policies.values(): 285 policy.initialize() 286 return policies
287
288 - def solve(self,horizon=None,choices=None,debug=False,policies=None, 289 interrupt=None,search='exhaustive',progress=None):
290 if policies is None: 291 policies = self.getPolicies() 292 for policy in policies.values(): 293 assert policy.rules 294 if search == 'exhaustive': 295 try: 296 value = self.iterate(choices,policies,state, 297 True,False,interrupt) 298 except OverflowError: 299 return False 300 if value is False: 301 return value 302 else: 303 return True 304 elif search == 'greedy': 305 changes = 1 306 candidates = [] 307 while changes > 0: 308 changes = 0 309 # Pick a random policy to perturb 310 if len(candidates) == 0: 311 candidates = policies.keys() 312 name = random.choice(candidates) 313 candidates.remove(name) 314 ## size = reduce(operator.mul,map(len,policies[name].rules)) 315 # Try to find a best-response strategy for the chosen agent 316 for index in range(1000): 317 if interrupt and interrupt.isSet(): 318 return False 319 if progress: progress() 320 if policies[name].perturb(policies,interrupt,debug): 321 changes += 1 322 return True 323 elif search == 'abstract': 324 return self.abstractSolve(policies,interrupt,progress) 325 elif search == 'parallel': 326 return self.parallelSolve(policies,interrupt,progress) 327 else: 328 raise NameError,'Unknown search type "%s"' % (search)
329
330 - def iterate(self,choices,policies,state,recurse=False,debug=False, 331 interrupt=None):
332 """Exhaustive policy search""" 333 if choices is None: 334 choices = self.entity.actions.getOptions() 335 assert policies[self.entity.name] is self 336 best = None 337 # Consider each possible policy 338 for rule in xrange(pow(len(choices),len(self.rules))): 339 if interrupt and interrupt.isSet(): 340 if best is None: 341 # Haven't made any progress; nothing to report 342 return None 343 else: 344 # Go ahead and return results so far 345 break 346 self.fromIndex(rule,choices) 347 assert self.toIndex(choices) == rule 348 if debug: 349 print self.entity.name,map(lambda a:a[0]['type'],self.rules) 350 current = {'policy':rule} 351 if recurse: 352 # Work our way backward from our turn 353 order = self.getLookahead()[:] 354 order.reverse() 355 for name in order: 356 if name != self.entity.name: 357 index = policies[name].iterate(None,policies,state, 358 False,debug,interrupt) 359 current[name] = index 360 if debug: 361 # very domain-specific info 362 print '\t',policies[name].rules[0] 363 # Evaluate the policy currently under consideration 364 if recurse: 365 current['value'] = self.evaluate(policies,state,{},start=1) 366 else: 367 current['value'] = self.evaluate(policies,state,{},start=0) 368 if best is None or current['value'] > best['value']: 369 best = current 370 if debug: 371 print 'ER =',current['value'] 372 sys.stdout.flush() 373 # Set rule to best found 374 self.fromIndex(best['policy'],choices) 375 if recurse: 376 # Set everyone else's policy to best responses 377 for name in self.getLookahead(): 378 if name != self.entity.name: 379 actions = self.entity.getEntity(name).actions.getOptions() 380 policies[name].fromIndex(best[name]) 381 return best['policy']
382
383 - def perturb(self,policies,interrupt=None,debug=False):
384 """Consider a random perturbation of this policy 385 @return: C{True} iff a better variation was found 386 @rtype: bool 387 """ 388 # Select random rule to perturb 389 state,observations,index = self.chooseRule() 390 # Compute expected reward of original RHS 391 old = self.evaluate(policies,state,observations) 392 # Swap randomly selected actions on RHS 393 size = len(self.rules[index]) 394 i = random.randint(0,size-1) 395 j = random.randint(0,size-2) 396 if j >= i: j += 1 397 if debug: 398 print 'Perturbing %s Rule #%d, %s<->%s' % \ 399 (self.entity.name,index,self.rules[index][i], 400 self.rules[index][j]) 401 original = self.rules[index][i] 402 self.rules[index][i] = self.rules[index][j] 403 self.rules[index][j] = original 404 new = self.evaluate(policies,state,observations) 405 if debug: 406 print old,'->',new 407 if old >= new: 408 # Original ordering was better 409 original = self.rules[index][i] 410 self.rules[index][i] = self.rules[index][j] 411 self.rules[index][j] = original 412 return False 413 else: 414 # We've made a change 415 return True
416
417 - def parallelSolve(self,policies,interrupt=None,progress=None,debug=False):
418 """ 419 Generates an abstract state space and does value iteration to generate a policy when agents all act in parallel, each with its own LHS 420 @rtype: bool 421 """ 422 savings = 0 423 goals = self.entity.goals.expectation() 424 keys = filter(lambda k:isinstance(k,StateKey) and \ 425 goals[k] > epsilon,goals.keys()) 426 keys.sort() 427 for key in keys: 428 entity = self.entity.world[key['entity']] 429 if entity.dynamics.has_key(key['feature']): 430 if debug: 431 print 'Compiling %s\'s goal of %s' % (self.entity.name, 432 str(key)) 433 for option in self.entity.actions.getOptions(): 434 assert len(option) == 1 435 dynamics = entity.getDynamics(option[0],key['feature'], 436 cache=True,debug=debug) 437 self.entity.compiled = True 438 return savings
439
440 - def abstractSolve(self,policies,interrupt=None,progress=None):
441 """ 442 Generates an abstract state space (defined by the LHS attributes) and does value iteration to generate a policy 443 @warning: assumes that all of the agents have the same LHS attribute breakdown! (not hard to overcome this assumption, but annoying to code) 444 @rtype: bool 445 """ 446 assert self.rules 447 # Compute abstract transition probability 448 if self.transition is None: 449 if not self.abstractTransition(policies,interrupt): 450 # Interrupted 451 self.transition = None 452 return None 453 for name in policies.keys(): 454 if name != self.entity.name: 455 policies[name].transition = self.transition 456 # Initialize abstract value function 457 for name in policies.keys(): 458 policies[name].values = {} 459 for current in self.rules.keys(): 460 policies[name].values[current] = {} 461 for other in policies.keys(): 462 if other == name: 463 policies[name].values[current][other] = {} 464 for option in policies[name].rules[current]: 465 policies[name].values[current][other][str(option)] = 0. 466 else: 467 policies[name].values[current][other] = 0. 468 # Compute immediate (abstract) reward and cache it 469 rewards = {} 470 goals = self.entity.getGoalVector()['state'].expectation() 471 for name in policies.keys(): 472 rewards[name] = {} 473 for current in self.rules.keys(): 474 intervals = self.abstract(current) 475 table = {} 476 rewards[name][current] = table 477 for other in policies.keys(): 478 for option in policies[other].rules[current]: 479 value = self.abstractReward(intervals,goals,option, 480 interrupt) 481 if value is None: 482 # Interrupted 483 return None 484 table[str(option)] = value 485 # Value iteration 486 change = 1. 487 iterations = 0 488 while iterations < self.horizon: 489 print iterations 490 iterations += 1 491 change = 0. 492 for name in policies.keys(): 493 if interrupt and interrupt.isSet(): return None 494 # Iterate through one whole policy at a time 495 change += policies[name].abstractIterate(policies,rewards, 496 interrupt) 497 if progress: progress() 498 if change > epsilon: 499 iterations = 0 500 print 'Result:',self.rules 501 return True
502
503 - def abstractTransition(self,policies,interrupt=None,progress=None):
504 """ 505 Generates a transition probability function over the abstract state state space (defined by the LHS attributes) 506 """ 507 self.transition = {} 508 for current in self.rules.keys(): 509 self.transition[current] = {} 510 for name in policies.keys(): 511 # Determine what ranges we fall in 512 intervals = self.abstract(current) 513 if policies[name].rules[current] is None: 514 entity = self.entity.getEntity(name) 515 policies[name].rules[current] = entity.actions.getOptions() 516 # WARNING: the following assumes that all of the agents' policies have the same LHS breakdown!!! 517 for option in policies[name].rules[current]: 518 # Compute dynamics for this action 519 event = {name:option} 520 dynamics = self.entity.entities.getDynamics(event) 521 tree = dynamics['state'].getTree() 522 # Compute possible transitions over intervals 523 table = Distribution() 524 self.transition[current][str(option)] = table 525 for matrix,prob in tree[intervals].items(): 526 new = [] 527 for index in range(len(self.attributes)): 528 if interrupt and interrupt.isSet(): return None 529 if progress: progress() 530 # Determine effect on individual attribute 531 obj,values = self.attributes[index] 532 assert isinstance(obj,KeyedVector) 533 if isinstance(obj,ThresholdRow): 534 key = obj.specialKeys[0] 535 print option 536 print key 537 print matrix[key].simpleText() 538 new.append(adjustInterval(intervals,index, 539 key,matrix[key])) 540 elif isinstance(obj,DifferenceRow): 541 key = obj.specialKeys[0] 542 hi = adjustInterval(intervals,index, 543 key,matrix[key]) 544 key = obj.specialKeys[1] 545 lo = adjustInterval(intervals,index, 546 key,matrix[key]) 547 new.append({'lo':hi['lo']-lo['hi'], 548 'hi':hi['hi']-lo['lo']}) 549 else: 550 raise NotImplementedError,'Unable to compute abstract transitions of %s attributes' % (obj.__class__.__name__) 551 # Compute the possible rules applicable in the new interval 552 destinations = [{'prob':prob,'factors':[]}] 553 for index in range(len(self.attributes)): 554 obj,values = self.attributes[index] 555 interval = new[index] 556 span = interval['hi'] - interval['lo'] 557 loIndex = self.subIndex(index,interval['lo']) 558 hiIndex = self.subIndex(index,interval['hi']) 559 # Compute probability of individual destinations 560 subProbs = {} 561 if hiIndex > loIndex: 562 # Multiple destinations 563 subProbs[loIndex] = (values[loIndex]-interval['lo'])/span 564 subProbs[hiIndex] = (interval['hi']-values[hiIndex-1])/span 565 for subIndex in range(loIndex+1,hiIndex): 566 subProbs[subIndex] = (values[subIndex]-values[subIndex-1])/span 567 else: 568 # Only one destination 569 subProbs[loIndex] = 1. 570 # Generate possible destinations 571 for dest in destinations[:]: 572 destinations.remove(dest) 573 for subIndex in range(loIndex,hiIndex+1): 574 newDest = copy.deepcopy(dest) 575 newDest['prob'] *= subProbs[subIndex] 576 newDest['factors'].append(subIndex) 577 destinations.append(newDest) 578 for dest in destinations: 579 index = self.factored2index(dest['factors']) 580 try: 581 table[index] += dest['prob'] 582 except KeyError: 583 table[index] = dest['prob'] 584 for dest,prob in table.items(): 585 assert prob > -epsilon 586 if prob < epsilon: 587 del table[dest] 588 assert abs(sum(table.values())-1.) < epsilon 589 return True
590
591 - def abstractIterate(self,policies,rewards,interrupt):
592 """ 593 One pass of value iteration over abstract state space. 594 @warning: Currently works for only two agents (easy, but messy, to generalize) 595 @param rewards: C{rewards[name][state][action]} = the reward that agent C{name} gets in state C{state} (index form) derived from the performance of C{action} 596 @type rewards: strS{->}(strS{->}float)[] 597 @return: the total change to the value function 598 @rtype: float 599 """ 600 values = [] 601 # Proceed backward from end of lookahead 602 sequence = self.getLookahead()[:] # entity.entities.getSequence()[:] 603 sequence.reverse() 604 delta = 0. 605 for index in range(len(sequence)): 606 name = sequence[index] 607 for current in self.rules.keys(): 608 if name != self.entity.name: 609 # Determine what action this agent will do 610 best = {'value':None} 611 for option in policies[name].rules[current]: 612 value = policies[name].values[current][name][str(option)] 613 if best['value'] is None or value > best['value']: 614 best['value'] = value 615 best['option'] = option 616 # Compute new value 617 value = rewards[self.entity.name][current][str(best['option'])] 618 table = self.transition[current][str(best['option'])] 619 next = sequence[index-1] 620 for dest,prob in table.items(): 621 if next != self.entity.name: 622 value += prob*self.values[dest][next] 623 else: 624 value += prob*max(self.values[dest][next].values()) 625 self.values[current][name] = value 626 else: 627 # Evaluate all of my possible actions 628 for option in self.rules[current]: 629 value = rewards[self.entity.name][current][str(option)] 630 table = self.transition[current][str(option)] 631 next = sequence[index-1] 632 for dest,prob in table.items(): 633 value += prob*self.values[dest][next] 634 self.values[current][name][str(option)] = value 635 order = self.rules[current][:] 636 order.sort(lambda o1,o2:cmp(-self.values[current][name][str(o1)],-self.values[current][name][str(o2)])) 637 if order != self.rules[current]: 638 delta += 1. 639 self.rules[current] = order 640 return delta
641
642 - def abstractReward(self,intervals,goals,tree,interrupt=None):
643 reward = 0. 644 # Apply state-dependent factors of reward 645 for index in range(len(self.attributes)): 646 if interrupt and interrupt.isSet(): return None 647 obj,values = self.attributes[index] 648 interval = intervals[index] 649 if isinstance(obj,ThresholdRow): 650 key = obj.specialKeys[0] 651 if goals.has_key(key): 652 # We have a non-zero reward weight for a feature 653 # where we know the interval of possible values 654 mean = (interval['hi']+interval['lo'])/2. 655 reward += goals[key]*mean 656 else: 657 raise NotImplementedError,'Unable to compute abstract reward for %s attributes' % (obj.__class__.__name__) 658 return reward
659
660 - def oldReachable(self,choices,policies,state,observations,debug=False):
661 # Determine what policy entries are reachable from here 662 reachable = {} 663 current = [(state,observations)] 664 # Move myself to end of lookahead 665 sequence = self.getLookahead()[:] 666 sequence.append(sequence.pop(0)) 667 horizon = len(sequence) 668 for t in range(horizon): 669 name = sequence[t] 670 if name == self.entity.name: 671 for state,observations in current: 672 reachable[self.index(state,observations)] = True 673 actions = choices 674 else: 675 agent = self.entity.getEntity(name) 676 actions = agent.actions.getOptions() 677 for oldState,oldHistory in current[:]: 678 current.remove((oldState,oldHistory)) 679 for action in actions: 680 newHistory = copy.copy(oldHistory) 681 newHistory[name] = action 682 assert name == action[0]['actor'] 683 event = {name:action} 684 dynamics = self.entity.entities.getDynamics(event) 685 delta = dynamics['state'].apply(oldState) 686 newState = delta*oldState 687 current.append((newState,newHistory)) 688 # Iterate over joint RHS entries for reachable rules 689 rules = reachable.keys() 690 rules.sort() 691 return rules
692
693 - def evaluate(self,policies,state,observations,history=None,debug=False, 694 fixed=True,start=0,details=False):
695 """Computes the expected value of this policy in response to the given 696 policies for the other agents 697 @param policies: the policies that the other agents are expected to follow 698 @type policies: dict:strS{->}L{PolicyTable} 699 @param state: the current state 700 @type state: L{KeyedVector} 701 @param observations: the current observation history 702 @type observations: dict:strS{->}L{Action<teamwork.action.PsychActions.Action>}[] 703 @param history: dictionary of actions that have been performed so far (not changed by this method) 704 @type history: dict 705 @param fixed: flag, if C{True}, then the other agents follow their given policies; otherwise, they can optimize 706 @type fixed: bool 707 @param start: the time offset to use in the lookahead (0 starts with this agent, and is the default) 708 @type start: int 709 @param details: flag, if C{True}, then the details of this evaluation are returned in addition to the expected value 710 @return: expected value of this policy (and sequence of actions if details is C{True}) 711 @rtype: float 712 """ 713 total = 0. 714 observations = copy.copy(observations) 715 if isinstance(state,KeyedVector): 716 stateVector = state 717 else: 718 stateVector = state['state'].expectation() 719 goals = self.entity.getGoalVector() 720 goals['state'].fill(stateVector.keys(),0.) 721 sequence = self.getLookahead()[:] 722 for t in range(start): 723 sequence.append(sequence.pop(0)) 724 if history is None: 725 history = {} 726 else: 727 history = copy.copy(history) 728 actions = [] 729 for name in sequence: 730 if fixed or name == self.entity.name: 731 action,exp = policies[name].execute(stateVector,observations, 732 history) 733 else: 734 action = policies[name].localSolve(policies,stateVector, 735 observations, 736 update=False,debug=debug) 737 actions.append({'actor':name,'decision':action}) 738 if debug: 739 print '\texpects',name,'to',action 740 # Update observation history 741 observations[name] = action 742 for act in action: 743 history[str(act)] = True 744 # Update state 745 dynamics = self.entity.entities.getDynamics({name:action}) 746 stateVector = dynamics['state'].apply(stateVector)*stateVector 747 # Compute expected value 748 total += self.expectedValue(stateVector,action,goals) 749 if debug: 750 print 'EV =',total 751 if details: 752 return total,actions 753 else: 754 return total
755
756 - def localSolve(self,policies,state,observations,update=False,debug=False):
757 """Determines the best action out of the available options, given the current state and observation history, and while holding fixed the expected policies of the other agents. 758 @param update: if C{True}, as a side effect, this policy is modified to have this best action be the RHS of the applicable rule. 759 @type update: bool 760 @return: the best action 761 @rtype: L{Action<teamwork.action.PsychActions.Action>}[] 762 """ 763 index = self.index(state,observations) 764 best = None 765 original = self.execute(state,observations) 766 for action in self.entity.actions.getOptions(): 767 self.rules[index] = action 768 value = self.evaluate(policies,state,observations,debug=debug) 769 if debug: 770 print '\t\tEV of',action,'=',value 771 if best is None or value > best['value']: 772 best = {'action':action,'value':value} 773 if update: 774 self.rules[index] = best['action'] 775 else: 776 self.rules[index] = original 777 return best['action']
778
779 - def expectedValue(self,state,action,goals=None,debug=False):
780 if goals is None: 781 goals = self.entity.getGoalVector() 782 goals['state'].fill(state.keys(),0.) 783 total = (goals['state']*state).expectation() 784 for goal in self.entity.getGoals(): 785 if goal.type == 'state': 786 # Already covered by vector 787 pass 788 elif goal.type == 'act': 789 # Action goals not yet handled by vector 790 for subAct in action: 791 if subAct['type'] == goal.key: 792 total += goal.weight 793 else: 794 raise NotImplementedError,\ 795 'Unable to handle goals of type %s' % (goal.type) 796 return total
797
798 - def chooseRule(self):
799 """Generates a random state and observation history and finds the rule 800 corresponding to them 801 @return: a tuple of the state, observation history, and rule index 802 @rtype: (L{KeyedVector},dict:strS{->}L{Action<teamwork.action.PsychActions.Action>}[],int) 803 """ 804 state = self.entity.entities.getState().expectation() 805 observations = {} 806 for obj,values in self.attributes: 807 if isinstance(obj,KeyedVector): 808 # Randomly generated state vector 809 if isinstance(obj,ThresholdRow): 810 key = obj.specialKeys[0] 811 index = random.choice(range(len(values)+1)) 812 if index == 0: 813 lo = -1. 814 else: 815 lo = values[index-1] 816 if index == len(values): 817 hi = 1. 818 else: 819 hi = values[index] 820 state[key] = (lo+hi)/2. 821 else: 822 raise NotImplementedError,'Unable to generate random choices for attributes of type %s' % (obj.__class__.__name__) 823 else: 824 # Randomly generated observation history 825 name = obj.name 826 agent = self.entity.getEntity(name) 827 observations[name] = random.choice(agent.actions.getOptions()) 828 return state,observations,self.index(state,observations)
829
830 - def abstract(self,index):
831 """ 832 @param index: the rule index of interest 833 @type index: int 834 @return: the abstract state subspace where the given rule is applicable, in the form of a list of intervals, one for each attribute, where each interval is a dictionary with keys C{weights}, C{index}, C{lo}, and C{hi} 835 @rtype: dict[] 836 """ 837 abstract = [] 838 for attrIndex in range(len(self.attributes)): 839 obj,values = self.attributes[-attrIndex-1] 840 if isinstance(obj,KeyedVector): 841 subIndex = index % (len(values)+1) 842 index /= (len(values)+1) 843 if subIndex > 0: 844 lo = values[subIndex-1] 845 else: 846 lo = -1. 847 try: 848 hi = values[subIndex] 849 except IndexError: 850 hi = 1. 851 abstract.append({'weights':obj,'index':subIndex, 852 'lo':lo,'hi':hi}) 853 else: 854 raise NotImplementedError,'Not yet able to abstract over rules on observations.' 855 abstract.reverse() 856 return abstract
857
858 - def fromIndex(self,index,choices=None):
859 """Fills in the rules using the given number as an I{n}-ary representation of the RHS values (where I{n} is the number of possible RHS values) 860 """ 861 if choices is None: 862 choices = self.entity.actions.getOptions() 863 for rule in range(len(self.rules)): 864 self.rules[rule] = choices[index % len(choices)] 865 index /= len(choices)
866
867 - def toIndex(self,choices=None):
868 """ 869 @return: the I{n}-ary representation of the RHS values (where I{n} is the number of possible RHS values) 870 @rtype: int 871 """ 872 if choices is None: 873 choices = self.entity.actions.getOptions() 874 index = 0 875 for rule in range(len(self.rules)): 876 index *= len(choices) 877 index += choices.index(self.rules[-rule-1]) 878 return index
879
880 - def updateAttributes(self,actor,attributes,diffs,leaves):
881 """ 882 @param actor: the name of the agent whose turn it is 883 @type actor: str 884 @param attributes: the attributes already found 885 @type attributes: KeyedPlane[] 886 """ 887 options = self.entity.getEntity(actor).actions.getOptions() 888 newDiffs = {} 889 newLeaves = {} 890 values = {} 891 for action in options: 892 actionDict = {self.entity.name:action} 893 dynamics = self.entity.entities.getDynamics(actionDict) 894 tree = dynamics['state'].getTree() 895 try: 896 myLeaves = values[str(action)] 897 except KeyError: 898 myLeaves = values[str(action)] = tree.leaves() 899 myBranches = tree.branches().values() 900 for branch in myBranches: 901 if len(leaves) > 0: 902 # Project this branch over the actions that have 903 # occurred so far 904 for leaf in leaves.values(): 905 newBranch = KeyedPlane(branch.weights*leaf, 906 branch.threshold) 907 insertBranch(newBranch,attributes) 908 else: 909 # No previous actions, so this branch is directly 910 # relevant 911 insertBranch(branch,attributes) 912 if len(diffs) == 0: 913 for myLeaf in myLeaves: 914 newLeaves[myLeaf.simpleText()] = myLeaf 915 for other in options: 916 if action != other: 917 try: 918 yrLeaves = values[str(other)] 919 except KeyError: 920 actionDict = {self.entity.name:other} 921 dynamics = self.entity.entities.getDynamics(actionDict) 922 tree = dynamics['state'].getTree() 923 yrLeaves = values[str(other)] = tree.leaves() 924 for myLeaf in myLeaves: 925 for yrLeaf in yrLeaves: 926 diff = myLeaf - yrLeaf 927 newDiffs[diff.simpleText()] = diff 928 else: 929 for diff in diffs.values(): 930 for myLeaf in myLeaves: 931 newDiff = myLeaf*diff 932 newDiffs[newDiff.simpleText()] = newDiff 933 for leaf in leaves.values(): 934 for myLeaf in myLeaves: 935 newLeaf = myLeaf*leaf 936 newLeaves[newLeaf.simpleText()] = newLeaf 937 diffs.clear() 938 for key,value in newDiffs.items(): 939 diffs[key] = value 940 leaves.clear() 941 for key,value in newLeaves.items(): 942 leaves[key] = value
943
944 - def importTable(self,table):
945 """Takes the given table and uses it to set the LHS and RHS of this policy (making sure that the RHS refers to my entity instead) 946 @param table: the PWL table that contains the relevant LHS and RHS 947 @type table: L{PWLTable} 948 @warning: Does not import value function, just policy itself 949 """ 950 self.reset() 951 self.attributes = table.attributes[:] 952 for rule,rhs in table.rules.items(): 953 for option in self.entity.actions.getOptions(): 954 if len(rhs) == len(option): 955 # Might be equal 956 for action in option: 957 for other in rhs: 958 if other['type'] == action['type'] and \ 959 other['object'] == action['object']: 960 break 961 else: 962 # Didn't find a matching action 963 break 964 else: 965 # Match 966 self.rules[rule] = option 967 break 968 else: 969 raise UserWarning,'Unable to find %s\'s equivalent for %s' % \ 970 (self.entity.name,str(rhs))
971
972 - def generateLHS(self,horizon=None,choices=None,debug=False):
973 if horizon is None: 974 horizon = self.horizon 975 if choices is None: 976 choices = self.entity.actions.getOptions() 977 attributes = {} 978 vectors = {} 979 keyList = filter(lambda k:isinstance(k,StateKey), 980 self.entity.entities.getStateKeys().keys()) 981 order = self.getLookahead() 982 for agent in order: 983 if agent == self.entity.name: 984 optionList = choices 985 else: 986 optionList = self.entity.getEntity(agent).actions.getOptions() 987 actions = {} 988 # Look for state features affected by this action 989 # (we could get dynamics off of PsychAgents object itself, 990 # but we sometimes want to use uncompiled dynamics) 991 for option in optionList: 992 for action in option: 993 actions[action] = True 994 for action in actions.keys(): 995 for key in keyList: 996 entity = self.entity.getEntity(key['entity']) 997 dynamics = entity.getDynamics(action,key['feature']) 998 tree = dynamics.getTree() 999 for plane in tree.branches().values(): 1000 if isinstance(plane.weights,ThresholdRow): 1001 # Needs to be a bit more permissive, but... 1002 label = plane.weights.simpleText() 1003 try: 1004 attributes[label][plane.threshold] = True 1005 except KeyError: 1006 attributes[label] = {plane.threshold: True} 1007 vectors[label] = plane.weights 1008 self.attributes = [] 1009 for key,values in attributes.items(): 1010 values = values.keys() 1011 values.sort() 1012 self.attributes.append((vectors[key],values))
1013
1014 - def OLDgenerateLHS(self,horizon=None,choices=None,debug=False):
1015 if horizon is None: 1016 horizon = self.horizon 1017 if choices is None: 1018 choices = self.entity.actions.getOptions() 1019 goals = self.entity.getGoalTree().leaves()[0] 1020 attributes = [] 1021 diffs = {} 1022 leaves = {} 1023 order = self.getLookahead() 1024 for agent in order: 1025 if debug: 1026 print agent 1027 self.updateAttributes(agent,attributes,diffs,leaves) 1028 if debug: 1029 for plane in attributes: 1030 print '\t',plane.weights.getArray(),plane.threshold 1031 print 1032 valuePlanes = {} 1033 for matrix in diffs.values(): 1034 weights = goals*matrix 1035 plane = KeyedPlane(copy.deepcopy(weights),0.) 1036 if plane.always() is None: 1037 for other in valuePlanes.values(): 1038 result = plane.compare(other) 1039 if result in ['equal','inverse']: 1040 del diffs[matrix.simpleText()] 1041 break 1042 else: 1043 if debug: 1044 print '\t',weights.getArray() 1045 valuePlanes[plane.simpleText()] = plane 1046 else: 1047 del diffs[matrix.simpleText()] 1048 if debug: 1049 print 1050 for matrix in diffs.values(): 1051 weights = goals*matrix 1052 plane = KeyedPlane(weights,0.) 1053 insertBranch(plane,attributes) 1054 if debug: 1055 print 'FINAL:' 1056 newAttributes = {} 1057 for plane in attributes: 1058 weights = plane.weights.getArray() 1059 if abs(weights[1]-1.) < epsilon: 1060 scaling = 1./weights[1] 1061 else: 1062 scaling = 1. 1063 plane.weights *= scaling 1064 threshold = plane.threshold * scaling 1065 plane.threshold = 0. 1066 for other in newAttributes.keys(): 1067 result = plane.compare(KeyedPlane(newAttributes[other][0],0.)) 1068 if result == 'equal': 1069 newAttributes[other][1].append(threshold) 1070 break 1071 elif result == 'inverse': 1072 newAttributes[other][1].append(-threshold) 1073 break 1074 else: 1075 newAttributes[plane.weights.simpleText()] = (plane.weights,[threshold]) 1076 for name in order[1:]: 1077 agent = self.entity.getEntity(name) 1078 newAttributes[name] = (agent,agent.actions.getOptions()) 1079 for weights,tList in newAttributes.values(): 1080 tList.sort() 1081 if debug: 1082 print self.attrString(weights) 1083 print tList 1084 self.attributes = newAttributes.values()
1085
1086 - def __copy__(self):
1087 result = LookaheadPolicy.__copy__(self) 1088 return PWLTable.copy(self,result)
1089
1090 - def __xml__(self):
1091 doc = LookaheadPolicy.__xml__(self) 1092 # Save attributes 1093 root = doc.createElement('attributes') 1094 doc.documentElement.appendChild(root) 1095 for obj,values in self.attributes: 1096 node = doc.createElement('attribute') 1097 root.appendChild(node) 1098 node.appendChild(obj.__xml__().documentElement) 1099 for value in values: 1100 child = doc.createElement('value') 1101 child.setAttribute('threshold',str(value)) 1102 node.appendChild(child) 1103 if len(self.rules) > 0: 1104 # Save RHS 1105 root = doc.createElement('rules') 1106 flag = False # Set type attribute only once 1107 doc.documentElement.appendChild(root) 1108 for rhs in self.rules: 1109 if rhs is None: 1110 rhs = self.entity.actions.getOptions() 1111 node = doc.createElement('rhs') 1112 if isinstance(rhs,list): 1113 # RHS is an ordered list of actions 1114 node.setAttribute('type','list') 1115 for option in rhs: 1116 subNode = doc.createElement('option') 1117 node.appendChild(subNode) 1118 for action in option: 1119 subNode.appendChild(action.__xml__().documentElement) 1120 elif isinstance(rhs,dict): 1121 # RHS is a table of action-value tree pairs 1122 node.setAttribute('type','dict') 1123 for action,value in rhs.items(): 1124 for key,dynamics in value.items(): 1125 subNode = doc.createElement('entry') 1126 subNode.appendChild(action.__xml__().documentElement) 1127 subNode.appendChild(key.__xml__().documentElement) 1128 if isinstance(dynamics,dict): 1129 link = doc.createElement('link') 1130 link.setAttribute('entity',dynamics['entity']) 1131 link.appendChild(dynamics['key'].__xml__().documentElement) 1132 link.appendChild(dynamics['action'].__xml__().documentElement) 1133 subNode.appendChild(link) 1134 else: 1135 subNode.appendChild(dynamics.__xml__().documentElement) 1136 node.appendChild(subNode) 1137 else: 1138 raise UserWarning,'Unknown RHS type: %s' % \ 1139 (rhs.__class__.__name__) 1140 root.appendChild(node) 1141 return doc
1142
1143 - def parse(self,element):
1144 LookaheadPolicy.parse(self,element) 1145 node = element.firstChild 1146 while node: 1147 if node.nodeType == node.ELEMENT_NODE: 1148 if node.tagName == 'attributes': 1149 vector = KeyedVector() 1150 child = node.firstChild 1151 while child: 1152 if child.nodeType == child.ELEMENT_NODE: 1153 if child.tagName == 'attribute': 1154 values = [] 1155 obj = None 1156 grandchild = child.firstChild 1157 while grandchild: 1158 if grandchild.nodeType == child.ELEMENT_NODE: 1159 if grandchild.tagName == 'value': 1160 values.append(float(grandchild.getAttribute('threshold'))) 1161 else: 1162 obj = vector.parse(grandchild) 1163 grandchild = grandchild.nextSibling 1164 values.sort() 1165 if obj is not None: 1166 self.attributes.append((obj,values)) 1167 child = child.nextSibling 1168 elif node.tagName == 'rules': 1169 child = node.firstChild 1170 while child: 1171 if child.nodeType == child.ELEMENT_NODE: 1172 assert child.tagName == 'rhs' 1173 rhsType = str(child.getAttribute('type')) 1174 if rhsType == 'dict': 1175 rhs = {} 1176 entry = child.firstChild 1177 while entry: 1178 if entry.nodeType == child.ELEMENT_NODE: 1179 assert entry.tagName == 'entry' 1180 action = None 1181 dynamics = None 1182 key = None 1183 grandChild = entry.firstChild 1184 while grandChild: 1185 if grandChild.nodeType == grandChild.ELEMENT_NODE: 1186 if grandChild.tagName == 'action': 1187 action = Action() 1188 action.parse(grandChild) 1189 elif grandChild.tagName == 'dynamic': 1190 dynamics = PWLDynamics() 1191 dynamics.parse(grandChild) 1192 elif grandChild.tagName == 'link': 1193 dynamics = {'entity':str(grandChild.getAttribute('entity')),'action':Action(),'key':Key()} 1194 greatChild = grandChild.firstChild 1195 while greatChild: 1196 if greatChild.nodeType == greatChild.ELEMENT_NODE: 1197 if greatChild.tagName == 'action': 1198 dynamics['action'].parse(greatChild) 1199 elif greatChild.tagName == 'key': 1200 dynamics['key'] = dynamics['key'].parse(greatChild) 1201 greatChild = greatChild.nextSibling 1202 else: 1203 key = Key() 1204 key = key.parse(grandChild) 1205 grandChild = grandChild.nextSibling 1206 assert not action is None 1207 assert not dynamics is None 1208 assert not key is None 1209 try: 1210 rhs[action][key] = dynamics 1211 except KeyError: 1212 rhs[action] = {key: dynamics} 1213 entry = entry.nextSibling 1214 self.rules.append(rhs) 1215 else: 1216 rhs = [] 1217 grandChild = child.firstChild 1218 while grandChild: 1219 if grandChild.nodeType == child.ELEMENT_NODE: 1220 assert grandChild.tagName == 'option' 1221 option = [] 1222 elements = grandChild.getElementsByTagName('action') 1223 for element in elements: 1224 action = Action() 1225 action.parse(element) 1226 option.append(action) 1227 rhs.append(option) 1228 grandChild = grandChild.nextSibling 1229 self.rules.append(rhs) 1230 child = child.nextSibling 1231 node = node.nextSibling 1232 if self.entity.name == 'Police' and self.entity.__class__.__name__ == 'PWLAgent': 1233 if len(self.rules) == 0: 1234 raise UserWarning,'%s has no rules' % (self.entity.name)
1235
1236 -def adjustInterval(intervals,index,key,row):
1237 """Adjust a specific interval by the effect of the given row 1238 @param intervals: the intervals the state currently lies in 1239 @param index: the index of the current interval of relevance 1240 @param key: the state feature key being adjusted 1241 @param row: the effect row on the state feature 1242 @return: the adjusted interval 1243 """ 1244 interval = intervals[index] 1245 if isinstance(row,UnchangedRow): 1246 # No change to current interval 1247 return interval 1248 elif isinstance(row,SetToFeatureRow): 1249 # Interval set by % of other interval 1250 for other in intervals: 1251 otherRow = other['weights'] 1252 if isinstance(otherRow,ThresholdRow) and \ 1253 other is not interval and \ 1254 otherRow.specialKeys[0] == row.deltaKey: 1255 if row[row.deltaKey] > 0.: 1256 lo = row[row.deltaKey]*other['lo'] 1257 hi = row[row.deltaKey]*other['hi'] 1258 else: 1259 lo = row[row.deltaKey]*other['hi'] 1260 hi = row[row.deltaKey]*other['lo'] 1261 return {'lo':lo,'hi':hi} 1262 else: 1263 lo = max(-1.,interval['lo']-abs(row[row.deltaKey])) 1264 hi = min(1.,interval['hi']+abs(row[row.deltaKey])) 1265 return {'lo':lo,'hi':hi} 1266 elif isinstance(row,SetToConstantRow): 1267 # Interval set to fixed point 1268 lo = hi = row[row.deltaKey] 1269 return {'lo':lo,'hi':hi} 1270 elif isinstance(row,IncrementRow): 1271 # Interval offset by fixed amount 1272 lo = max(-1.,interval['lo']+row[row.deltaKey]) 1273 hi = min(1.,interval['hi']+row[row.deltaKey]) 1274 return {'lo':lo,'hi':hi} 1275 elif isinstance(row,ScaleRow): 1276 # Interval offset by % of other 1277 if row.deltaKey == key: 1278 lo = max(-1.,interval['lo']*row[key]) 1279 hi = min(1.,interval['hi']*row[key]) 1280 return {'lo':lo,'hi':hi} 1281 else: 1282 for other in intervals: 1283 otherRow = other['weights'] 1284 if isinstance(otherRow,ThresholdRow) and \ 1285 other is not interval and \ 1286 otherRow.specialKeys[0] == row.deltaKey: 1287 if row[row.deltaKey] > 0.: 1288 delta = {'lo':row[row.deltaKey]*other['lo'], 1289 'hi':row[row.deltaKey]*other['hi']} 1290 else: 1291 delta = {'lo':row[row.deltaKey]*other['hi'], 1292 'hi':row[row.deltaKey]*other['lo']} 1293 break 1294 else: 1295 # No constraint on this state featureb 1296 delta = {'lo':-abs(row[row.deltaKey]), 1297 'hi': abs(row[row.deltaKey])} 1298 lo = max(-1.,interval['lo']+delta['lo']) 1299 hi = min( 1.,interval['hi']+delta['hi']) 1300 return {'lo':lo,'hi':hi} 1301 else: 1302 raise NotImplementedError,'Unable to compute abstract effect for %s\n'\ 1303 % (row.__class__.__name__)
1304
1305 -def insertBranch(new,oldList):
1306 """Adds the new branch to the old list if neither it (nor its inverse) is 1307 already present 1308 @param new: branch to be added 1309 @type new: KeyedPlane 1310 @param oldList: the branches already found (modified directly) 1311 @type oldList: KeyedPlane[] 1312 """ 1313 for branch in oldList: 1314 result = new.compare(branch) 1315 if result in ['equal','inverse']: 1316 break 1317 else: 1318 oldList.append(new)
1319 1320 if __name__ == '__main__': 1321 import sys 1322 from xml.dom.minidom import parse 1323 from teamwork.multiagent.sequential import SequentialAgents 1324 from teamwork.agent.Entities import PsychEntity 1325 from optparse import OptionParser 1326 1327 # Parse command-line options 1328 parser = OptionParser('usage: %prog [options] SCENARIO') 1329 parser.add_option('-b','--bully-resolution', 1330 dest='bullyResolution', 1331 metavar='BRES', 1332 help='resolution of goal space for bully', 1333 default=5) 1334 parser.add_option('-t','--teacher-resolution', 1335 dest='teacherResolution', 1336 metavar='TRES', 1337 help='resolution of goal space for teacher', 1338 default=5) 1339 parser.add_option('-o','--output-file', 1340 dest='output', 1341 metavar='FILE', 1342 help='file in which to store data [default is standard out]', 1343 default=None) 1344 parser.add_option('-c','--collapse', 1345 dest='collapse', 1346 action='store_true', 1347 help='collapse common rows into a single entry', 1348 default=False) 1349 1350 (options,args) = parser.parse_args() 1351 if len(args) != 1: 1352 parser.error("incorrect number of arguments") 1353 doc = parse(args[0]) 1354 scenario = SequentialAgents() 1355 scenario.parse(doc.documentElement,PsychEntity) 1356 policies = {} 1357 name = 'Teacher' 1358 for agent in scenario.activeMembers(): 1359 print agent.name 1360 for goal in agent.getGoals(): 1361 print '\t',goal,agent.getGoalWeight(goal) 1362 table = PolicyTable(agent,agent.actions.getOptions(), 1363 len(scenario),agent.horizon) 1364 if agent.name != name: 1365 table.rules = [None] 1366 policies[agent.name] = table 1367 1368 ## # Temporary output generation of policies from indices 1369 ## for index in [0,6,687,1030,2059,1,36,43]: 1370 ## policies[name].rules = range(4) 1371 ## policies[name].fromIndex(index) 1372 ## print index 1373 ## print policies[name].rules 1374 ## # End of temporary code 1375 agent = scenario[name] 1376 state = scenario.getState().domain()[0] 1377 granularity = int(options.teacherResolution) 1378 tSpace = agent.generateSpace(granularity) 1379 granularity = int(options.bullyResolution) 1380 bSpace = scenario['Bully'].generateSpace(granularity) 1381 # Print column headings 1382 columns = ['B Type','B Goals','T Type','T Goals','T Policy','T Count', 1383 'B Policy','O Policy','T Action','B ER'] 1384 ## errors = ['B MisAction','O MisAction','T MisAction','Error'] 1385 errors = ['Error'] 1386 if options.collapse: 1387 try: columns.remove('T Type') 1388 except ValueError: pass 1389 try: columns.remove('T Goals') 1390 except ValueError: pass 1391 else: 1392 try: columns.remove('T Count') 1393 except ValueError: pass 1394 header = '' 1395 for column in columns: 1396 if column == 'T Goals': 1397 goals = agent.getGoals() 1398 goals.sort() 1399 for goal in goals: 1400 header += '%s %s,' % (goal.direction,goal.entity[0]) 1401 elif column == 'B Goals': 1402 goals = scenario['Bully'].getGoals() 1403 goals.sort() 1404 for goal in goals: 1405 header += '%s %s,' % (goal.direction,goal.entity[0]) 1406 else: 1407 header += '%s,' % (column) 1408 if options.output: 1409 f = open(options.output,'w') 1410 else: 1411 f = sys.stdout 1412 f.write(header[:-1]+'\n') 1413 for bully in range(len(bSpace)): 1414 # Try each possible set of goals for the bully 1415 feasible = {} 1416 bWeights = bSpace[bully] 1417 for goal,weight in bWeights.items(): 1418 scenario['Bully'].setGoalWeight(goal,weight,normalize=False) 1419 agent.getEntity('Bully').setGoalWeight(goal,weight,normalize=False) 1420 for teacher in range(len(tSpace)): 1421 # Try each possible set of goals for the teacher 1422 tWeights = tSpace[teacher] 1423 for goal,weight in tWeights.items(): 1424 agent.setGoalWeight(goal,weight,normalize=False) 1425 policies[name].solve(choices=agent.actions.getOptions(), 1426 policies=policies) 1427 index = policies[name].toIndex() 1428 entry = {'Teacher goals':tWeights, 1429 'Teacher policy':index, 1430 'Bully goals':bWeights, 1431 'Bully RHS':policies['Bully'].rules[0], 1432 'Onlooker RHS':policies['Onlooker'].rules[0], 1433 'Bully ER':policies['Bully'].evaluate(policies,state,{}) 1434 } 1435 # Output data 1436 line = '' 1437 for column in columns: 1438 if column == 'B Goals': 1439 goals = bWeights.keys() 1440 goals.sort() 1441 for goal in goals: 1442 line += '%f,' % (scenario['Bully'].getGoalWeight(goal)) 1443 elif column == 'B Type': 1444 line += '%d,' % (bully) 1445 elif column == 'T Goals': 1446 goals = tWeights.keys() 1447 goals.sort() 1448 for goal in goals: 1449 line += '%f,' % (agent.getGoalWeight(goal)) 1450 elif column == 'T Type': 1451 line += '%d,' % (teacher) 1452 elif column == 'T Policy': 1453 line += '%d,' % (index) 1454 elif column == 'O Policy': 1455 line += '%s,' % (policies['Onlooker'].rules[0][0]['type']) 1456 elif column == 'B Policy': 1457 line += '%s,' % (policies['Bully'].rules[0][0]['type']) 1458 elif column == 'B ER': 1459 line += '%f,' % (entry['Bully ER']) 1460 elif column == 'T Action': 1461 observations = {'Bully':policies['Bully'].execute(state,None), 1462 'Onlooker':policies['Onlooker'].execute(state,None), 1463 } 1464 action = policies['Teacher'].execute(state,observations) 1465 line += '%s,' % (action[0]['type']) 1466 else: 1467 line += '%s,' % (column) 1468 entry['data'] = line 1469 try: 1470 feasible[index].append(entry) 1471 except KeyError: 1472 feasible[index] = [entry] 1473 if not options.collapse: 1474 f.write(line[:-1]+'\n') 1475 f.flush() 1476 if options.collapse: 1477 modelSpace = feasible.keys() 1478 modelSpace.sort() 1479 for model in modelSpace: 1480 # Loop through possible mental models to use 1481 entryList = feasible[model] 1482 entry = entryList[0] 1483 line = entry['data'] 1484 # Count number of teacher models that reduce to this policy 1485 line = line.replace('T Count','%d' % (len(entryList))) 1486 # Analyze lost ER when using this model 1487 policies['Bully'].rules[0] = entry['Bully RHS'] 1488 for real in modelSpace: 1489 realEntry = feasible[real][0] 1490 policies['Teacher'].fromIndex(real) 1491 policies['Onlooker'].rules[0] = realEntry['Onlooker RHS'] 1492 if model == real: 1493 error = 0. 1494 else: 1495 # Measure lost ER 1496 value = policies['Bully'].evaluate(policies,state,{}) 1497 error = entry['Bully ER']-value 1498 for column in errors: 1499 if column == 'Error': 1500 line += '%f,' % (error) 1501 elif column == 'B MisAction': 1502 line += '%s,' % (policies['Bully'].rules[0][0]['type']) 1503 elif column == 'O MisAction': 1504 line += '%s,' % (policies['Onlooker'].rules[0][0]['type']) 1505 elif column == 'T MisAction': 1506 observations = {'Bully':policies['Bully'].execute(state,None), 1507 'Onlooker':policies['Onlooker'].execute(state,None), 1508 } 1509 action = policies['Teacher'].execute(state,observations) 1510 line += '%s,' % (action[0]['type']) 1511 else: 1512 raise NotImplementedError,column 1513 f.write(line[:-1]+'\n') 1514 f.flush() 1515 1516 f.close() 1517