teamwork.math.fitting

1 import copy 2 import string 3 import time 4 from types import * 5 6 import KeyedVector 7 from KeyedMatrix import * 8 from teamwork.math.KeyedTree import * 9 from ProbabilityTree import * 10

11 -def expandPolicy(entity,name,interrupt=None,keys=[],debug=0):

12 """ 13 @return: the dynamics that the given entity thinks govern the turn of the named other entity 14 """ 15 other = entity.getEntity(name) 16 policy = other.policy.compileTree(interrupt=interrupt,debug=debug) 17 if not policy: 18 return None 19 policy = copy.deepcopy(policy) 20 done = [] 21 for action in policy.leaves(): 22 if action in done: 23 continue 24 else: 25 done.append(action) 26 if debug > 0: 27 print '\tExpanding:',action 28 # Compute the specific dynamics for this action 29 dynamics = entity.entities.getDynamics({name:action}) 30 if interrupt and interrupt.isSet(): 31 return None 32 subtree = dynamics['state'].getTree() 33 policy.replace(action,subtree) 34 if interrupt and interrupt.isSet(): 35 return None 36 ## policy.prune() 37 ## if interrupt and interrupt.isSet(): 38 ## return None 39 if debug > 1: 40 print '\t',subtree.simpleText() 41 return policy

42

43 -def getActionKeys(entity,sequence):

44 actionKeys = [] 45 for key in entity.getGoalVector()['action'].keys(): 46 actionKeys.append(key) 47 for turns in sequence: 48 for name in turns: 49 # Consider each agent who can act at this point in time 50 other = entity.getEntity(name) 51 for key in other.policy.tree.getKeys(): 52 if key['class'] == 'observation': 53 if not key in actionKeys: 54 actionKeys.append(key) 55 return actionKeys

56

57 -def getLookaheadTree(entity,chosenAction,sequence, 58 local=False,choices=None, 59 goals=None,interrupt=None,debug=0):

60 """ 61 @param chosenAction: the action being projected 62 @type chosenAction: L{teamwork.action.PsychActions.Action}[] 63 @param sequence: the turns anticipated by this agent in its lookahead, as a list of turns, where each turn is a list of names 64 @type sequence: str[][] 65 @param local: if C{True}, then the entity will compile a locally optimal policy, expecting itself to behave according to whatever mental model it has of itself; otherwise, it will plan over I{all} of its turns in the sequence (more strategic). Default is C{False} 66 @type local: boolean 67 @param choices: the possible actions to be considered in this policy (if C{None}, defaults to all available actions) 68 @type choices: L{Action}[][] 69 @param goals: if you want an expected value tree (as opposed to a reward-independent sum over state projections), then you can pass in a tree representing the reward function to use (default is to be reward-independent) 70 @type goals: L{ProbabilityTree} 71 @param interrupt: a thread Event, the compilation process will continually test whether the event is set; if it is, it will exit. In other words, this is a way to interrupt the compilation 72 @type interrupt: Event 73 @return: a decision tree representing the dynamics of the given actions followed by the given sequence of agent responses. The sequence is a list of lists of agent name strings. The agents named in list I{i} of the sequence apply their policy-driven actions at time I{i}, where time 0 occurs in parallel with the given entity's performance of the chosen action 74 """ 75 if debug > 0 and chosenAction: 76 print 'Lookahead for:',chosenAction 77 if choices is None: 78 choices = entity.actions.getOptions() 79 # The sum over the state sequence 80 total = None 81 # The dynamics over the prior state sequence 82 last = None 83 # The policy trees of the other agents 84 policies = {} 85 actionKeys = [] 86 ## actionKeys = getActionKeys(entity,sequence) 87 ## if debug > 0: 88 ## print 'Actions:',actionKeys 89 # Replace policy leaves with action dynamics trees 90 flag = chosenAction 91 for turns in sequence: 92 for name in turns: 93 if name == entity.name and flag: 94 # Can skip the acting entity once 95 flag = not local 96 elif not policies.has_key(name): 97 if debug > 1: 98 print 'Expanding policy for:',entity.ancestry(),name 99 policies[name] = expandPolicy(entity,name,interrupt, 100 actionKeys,debug) 101 if interrupt and interrupt.isSet(): 102 return None 103 if debug > 0: 104 print '\tPolicy tree w/dynamics has %d leaves' % \ 105 (len(policies[name].leaves())) 106 if debug > 1: 107 print 'Expanded policy:', policies[name] 108 # Create projection 109 if len(sequence) == 0: 110 sequence = [[entity.name]] 111 recursed = False 112 for t in range(len(sequence)): 113 # Step t in the lookahead 114 turns = sequence[t] 115 if debug > 0: 116 print '-----------' 117 print 'Time: %d/%d' % (t+1,len(sequence)) 118 print '-----------' 119 subtree = None 120 stepValue = None 121 for name in turns: 122 if debug > 0: 123 print 'Turn:',name 124 if name == entity.name: 125 if chosenAction: 126 if debug > 0: 127 print '\tFixed action:',chosenAction 128 # Fix my action to the one chosen 129 action = {entity.name:chosenAction} 130 newTree = entity.entities.getDynamics(action)['state'].getTree() 131 # Do this fixed action only once 132 chosenAction = False 133 elif local: 134 # Use mental model of myself 135 newTree = policies[name] 136 if debug > 0: 137 print '\tInserting policy of size', 138 print len(newTree.leaves()) 139 else: 140 horizon = len(sequence)-t 141 # Compute my policy over the rest of the horizon 142 if debug > 0: 143 print '----------------------------------------------' 144 print '\tComputing subpolicy of horizon:',horizon 145 newTree = entity.policy.compileTree(horizon=horizon, 146 choices=choices, 147 key='weights', 148 interrupt=interrupt, 149 debug=debug) 150 if interrupt and interrupt.isSet(): 151 return None 152 recursed = True 153 if debug > 0: 154 print '\tFinished with horizon %d (%d leaves)' % \ 155 (len(sequence)-t,len(newTree.leaves())) 156 print '----------------------------------------------' 157 else: 158 # Combine this agent's policy into the overall dynamics 159 # for this time step 160 newTree = policies[name] 161 if debug > 0: 162 print '\tInserting %s\'s policy dynamics [%d leaves]' % (name,len(newTree.leaves())) 163 if newTree.getValue(): 164 # Update transition probability at time t 165 if subtree is None: 166 subtree = newTree 167 else: 168 subtree += newTree 169 if interrupt and interrupt.isSet(): 170 return None 171 if debug > 1: 172 print name,subtree.simpleText() 173 print 174 # Update value at time t 175 if goals: 176 if recursed: 177 subValue = newTree 178 else: 179 subValue = goals*newTree 180 if stepValue is None: 181 stepValue = subValue 182 else: 183 stepValue += subValue 184 if interrupt and interrupt.isSet(): 185 return None 186 # Compute transition probability to time t 187 if debug > 0: 188 print 'Multiplying overall projection by step %d dynamics [%d leaves]' % \ 189 (t+1,len(subtree.leaves())) 190 if last: 191 last = subtree * last 192 else: 193 last = subtree 194 if debug > 0: 195 if total: 196 print 'Adding step %d projection [%d leaves] to total [%d leaves]' % \ 197 (t+1,len(last.leaves()),len(total.leaves())) 198 if interrupt and interrupt.isSet(): 199 return None 200 # Update total value to time t 201 if total is None: 202 total = stepValue 203 else: 204 total += stepValue 205 if debug > 0: 206 print 'Total so far:',len(total.leaves()),'leaves' 207 if recursed: 208 # Recursive call has already computed the rest of the sequence 209 break 210 return {'transition':last, 211 'total': total}

212 ## 'total': total.scale(1./float(len(sequence)))} 213

214 -def getDiffTree(entity,action1,action2,sequence,debug=1):

215 """Returns a decision tree representing the state difference 216 between the two specified actions (i.e., S(action1)-S(action2)), 217 subject to the provided turn sequence, following the format for 218 L{getLookaheadTree}.""" 219 gValue = entity.policy.getActionTree(action1) 220 if debug > 0: 221 print 'EV[%s] = %s' % (action1,gValue.simpleText()) 222 bValue = entity.policy.getActionTree(action2) 223 if debug > 0: 224 print 'EV[%s] = %s' % (action2,bValue.simpleText()) 225 return gValue, bValue

226

227 -def sign(value):

228 """Returns 1 for any positive value, -1 for any negative value, 229 and 0 for any zero value""" 230 return value.__cmp__(0.)

231

232 -def findConstraint(entity,goodAction,badAction,sequence,debug=None):

233 """Returns a dictionary of possible singleton goal weight changes 234 that satisfy the constraint that the specified entity prefer the 235 goodAction over the badAction given the provided lookahead turn 236 sequence. If the constraint is satisfied by the current goal 237 weights, then the returned dictionary is empty""" 238 # Compute dynamics 239 goodV,badV = getDiffTree(entity,goodAction,badAction,sequence, 240 max(0,debug-1)) 241 diffTree = goodV - badV 242 if debug > 0: 243 print 'Tree:',diffTree.simpleText() 244 # Find leaf node for current state 245 state = getStateVector(diffTree.getKeys(),entity) 246 if debug > 0: 247 print 'State:',state 248 # Apply goals to difference 249 goals = entity.getGoalVector()['state'] 250 if debug > 0: 251 print 'Goals:',goals 252 goodTotal = goodV[state]*state 253 print 'EV[%s] =' % (goodAction),goodTotal 254 print 'EV[%s] = %5.3f' % (goodAction,float(goals*goodTotal)) 255 badTotal = badV[state]*state 256 print 'EV[%s] =' % (badAction),badTotal 257 print 'EV[%s] = %5.3f' % (badAction,float(goals*badTotal)) 258 print 'Delta EV =',goodTotal-badTotal 259 print 'Delta EV = %5.3f' % \ 260 (float(goals*goodTotal)-float(goals*badTotal)) 261 print 'Difference Tree:',diffTree[state].simpleText() 262 diffVector = diffTree[state]*state 263 if debug > 0: 264 print 'Difference Vector:',diffVector 265 diff = float(goals*diffVector) 266 solutions = {} 267 constraint = {'delta':diff, 268 'plane':diffVector, 269 'solution':solutions, 270 'slope':{}, 271 } 272 if diff > 0.: 273 if debug > 0: 274 print 'Correct: Chose %s over %s' % (goodAction,badAction) 275 else: 276 if debug > 0: 277 print 'Incorrect: Chose %s over %s' % (badAction,goodAction) 278 print 'Off by:',abs(diff) 279 for key in goals.keys(): 280 try: 281 slope = diffVector[key] 282 except KeyError: 283 slope = 0. 284 constraint['slope'][key] = slope 285 try: 286 delta = -diff/slope-epsilon 287 except ZeroDivisionError: 288 delta = 'NaN' 289 if debug > 0: 290 print key 291 print '\tCurrent Value:',goals[key] 292 print '\tSlope:',slope 293 print '\tdelta:',delta 294 ## new = goals[key]+delta 295 ## if (not sign(goals[key]) or not sign(new) or \ 296 ## (sign(goals[key]) == sign(new))) and \ 297 ## (new >= Interval.FLOOR and new < Interval.CEILING): 298 ## if debug > 0: 299 ## print '\tNew Value:',new 300 ## solutions[key] = new 301 ## else: 302 ## if debug > 0: 303 ## print '\tInvalid change' 304 return constraint

305

306 -def findAllConstraints(entity,goodAction,sequence,debug=0):

307 constraints = [] 308 for action in entity.actions.getOptions(): 309 if action != goodAction: 310 constraints.append(findConstraint(entity,goodAction,action, 311 sequence,debug)) 312 return constraints

313

Source Code for Module teamwork.math.fitting