teamwork.agent.GoalBased

13 """An entity mix-in class that has a reward function based on maximization/minimization of features/actions 14 @ivar goals: the goals of this agent 15 @type goals: L{MinMaxGoal}S{->}float 16 @ivar horizon: the horizon of this agent's lookahead 17 @type horizon: int 18 @ivar constraints: the constraints on the goal weights already imposed. 19 Each constraint is a dictionary, with the key element being the 'plane' 20 expressing the constraint 21 @type constraints: dict:str->L{KeyedPlane} 22 """ 23 24 valueType = 'average' 25

26 - def __init__(self,name=''):

27 RecursiveAgent.__init__(self,name) 28 self.goals = Distribution() 29 self.horizon = 1 30 self.constraints = {}

31 32 # Goal methods 33

34 - def setGoals(self,goals):

35 """Sets the goals to the provided list, after normalizing weights 36 @type goals: L{MinMaxGoal}[] 37 @warning: Replaces any existing goals.""" 38 # Clear any existing goals 39 self.goals.clear() 40 # Add the new goals 41 for goal in goals: 42 if isinstance(goal,MinMaxGoal): 43 goalObj = goal 44 else: 45 try: 46 key = goal['key'] 47 except KeyError: 48 key = goal['feature'] 49 print 'Warning: Use "key" instead of "feature" when specifying goals' 50 goalObj = MinMaxGoal(entity=goal['entity'], 51 direction=goal['direction'], 52 goalType=goal['type'], 53 key=key) 54 try: 55 goalObj.weight = goal['weight'] 56 except KeyError: 57 raise KeyError,'%s has goal "%s" with no weight' % \ 58 (self.ancestry(),`goalObj`) 59 self.setGoalWeight(goalObj,goalObj.weight,False) 60 self.normalizeGoals()

61

62 - def normalizeGoals(self):

63 """Scales all goal weights to sum to 1.""" 64 self.goals.normalize()

65

66 - def setGoalWeight(self,goal,value,normalize=True):

67 """Assigns the weight of the specified goal 68 @type goal: L{MinMaxGoal} 69 @type value: float 70 @param normalize: if C{True}, renormalizes weights across all goals to sum to 1 71 @type normalize: bool 72 """ 73 total = 0. 74 delta = 0. 75 found = False 76 # Adjust selected goal weight, compute normalization factor 77 # along the way 78 for existing in self.goals.domain(): 79 weight = self.goals[existing] 80 if existing.keys == goal.keys: 81 delta = float(value) - weight 82 self.goals[existing] = float(value) 83 found = True 84 else: 85 total += weight 86 # If we haven't found it, then this is a newly relevant goal 87 if not found: 88 delta = float(value) 89 self.goals[goal] = delta 90 # Renormalize 91 if normalize: 92 for existing in self.goals.domain(): 93 weight = self.goals[existing] 94 if existing.keys != goal.keys: 95 try: 96 self.goals[existing] -= delta*weight/total 97 except ZeroDivisionError: 98 if weight > 0.: 99 self.goals[existing] = 1.-value 100 # Set weights on goal objects as well 101 for goal in self.getGoals(): 102 goal.weight = self.getGoalWeight(goal)

103

104 - def applyGoals(self,entity=None,world=None,debug=None):

105 """ 106 @param entity: the entity whose goals are to be evaluated (default is self) 107 @type entity: L{GoalBasedAgent} 108 @param world: the context for evaluating the goals (default is the beliefs of I{entity}) 109 @type world: L{teamwork.multiagent.PsychAgents.PsychAgents} 110 @return: expected reward of the I{entity} in current I{world} 111 @rtype: L{Distribution} over C{float}""" 112 if not entity: 113 entity = self 114 if world is None: 115 world = self.entities 116 state = world.getState() 117 goals = entity.getGoalVector() 118 goals['state'].fill(state.domain()[0].keys(),0.) 119 return goals['state']*state

120

121 - def getGoals(self):

122 """ 123 @rtype: L{PWLGoal}[]""" 124 return self.goals.domain()

125

126 - def getGoalWeight(self,goal):

127 """ 128 @type goal: L{PWLGoal} 129 @return: the weight of the specified goal 130 @rtype: float 131 """ 132 for existing in self.goals.domain(): 133 if existing.keys == goal.keys: 134 return self.goals[existing] 135 else: 136 return 0.0

137

138 - def getGoalVector(self):

139 """Returns a vector representing the goal weights 140 @rtype: L{KeyedVector} instance""" 141 stateGoals = KeyedVector() 142 actionGoals = KeyedVector() 143 totalGoals = KeyedVector() 144 for goal in self.getGoals(): 145 key = goal.toKey() 146 weight = self.getGoalWeight(goal) 147 if not goal.isMax(): 148 weight = -weight 149 if isinstance(key,StateKey): 150 stateGoals[key] = weight 151 elif isinstance(key,ActionKey): 152 actionGoals[key] = weight 153 else: 154 raise NotImplementedError,'Unable to make vector with %s instances' % (key.__class__.__name__) 155 totalGoals[key] = weight 156 return {'state':Distribution({stateGoals:1.}), 157 'action':Distribution({actionGoals:1.}), 158 'total':Distribution({totalGoals:1.}), 159 }

160

161 - def getGoalTree(self):

162 """@return: the decision tree representing this entity's goal weights 163 @rtype: L{teamwork.math.ProbabilityTree}""" 164 vector = self.getGoalVector()['state'] 165 tree = ProbabilityTree() 166 if len(vector) > 1: 167 tree.branch(vector) 168 else: 169 # Only one value, with probability 1, so don't bother branching 170 tree.makeLeaf(vector.domain()[0]) 171 keyList = self.entities.getStateKeys().keys() 172 if len(keyList) == 1: 173 # At the bottom of belief hierarchy, so let's pretend we have 174 # a full state vector. It's fun to pretend. 175 keyList = self.state.domainKeys().keys() 176 keyList.sort() 177 tree.fill(keyList) 178 tree.freeze() 179 return tree

180

181 - def actionValue(self,actions,horizon=1,state=None,debug=None):

182 """Compute the expected value of performing the given action 183 @param actions: the actions whose effect we want to evaluate 184 @type actions: L{Action}[] 185 @param horizon: the length of the forward projection 186 @type horizon: int 187 @param state: the world state to evaluate the actions in (defaults to current world state) 188 @type state: L{teamwork.math.probability.Distribution} 189 """ 190 if not self.hasBelief(self.name): 191 return Distribution({0.:1.}),{} 192 if debug: 193 debug.message(3,'Computing EV[%s]' % (`actions`)) 194 if state is None: 195 state = self.getAllBeliefs() 196 start = {self.name:actions} 197 value,explanation = self.expectedValue(horizon=horizon, 198 start=start, 199 state=state, 200 goals=[self], 201 debug=debug) 202 value = value[self.name] 203 return value,explanation

204

205 - def expectedValue(self,horizon=1,start={},goals=None,state=None, 206 debug=None):

207 """ 208 @param horizon: the horizon for the lookahead when computing the expected value 209 @type horizon: C{int} 210 @param start: a dictionary of actions to be specified in the first time step 211 @type start: C{dict:strS{->}L{Action}[]} 212 @param goals: the agent(s) whose reward function should be used to compute the expectation (defaults to C{self}) 213 @type goals: C{L{GoalBasedAgent}[]} 214 @param state: the world state to evaluate the actions in (defaults to current world state) 215 @type state: L{teamwork.math.probability.Distribution} 216 @type debug: L{Debugger} 217 @return: the expected reward from the current state 218 219 """ 220 if goals is None: 221 goals = [self] 222 if state is None: 223 state = self.getAllBeliefs() 224 # Lookahead 225 sequence = self.multistep(horizon=horizon,start=start, 226 state=state,debug=debug) 227 value = {} 228 if self.valueType == 'average': 229 expectation = None 230 for t in range(len(sequence)): 231 delta = sequence[t] 232 if delta['state']: 233 if expectation is None: 234 expectation = delta['state'].expectation() 235 else: 236 expectation += delta['state'].expectation() 237 if expectation is None: 238 # Should this happen? Probably not, but let's handle it 239 expectation = {} 240 else: 241 scale = float(len(sequence)) 242 for key in expectation.keys(): 243 expectation[key] /= scale 244 elif self.valueType == 'final': 245 expectation = state['state'].expectation() 246 else: 247 raise NotImplementedError,\ 248 'I do not know how to compute "%s" expected value' \ 249 % (self.valueType) 250 # Compute the value of these changes 251 for goalEntity in goals: 252 name = goalEntity.name 253 reward = goalEntity.getGoalVector()['state'] 254 if len(expectation) == 1: 255 # Empty state, at bottom level of belief structure 256 expectedReward = 0. 257 elif len(expectation) == 0: 258 # Zero horizon? 259 expectedReward = 0. 260 else: 261 reward.fill(expectation.keys()) 262 expectedReward = reward*expectation 263 try: 264 value[name] += expectedReward 265 except KeyError: 266 value[name] = expectedReward 267 if debug: 268 debug.message(6,'R[%s] = %s' % (name,`value[name]`)) 269 if len(value) == 0: 270 # No projection happened 271 for goal in goals: 272 value[goal] = goal.applyGoals(None,debug=debug) 273 return value,{'value':value, 274 'projection':expectation, 275 'breakdown':sequence, 276 }

277

278 - def getNormalization(self,constant=False):

279 """ 280 @param constant: if C{True}, include a column for the constant factor (which will be 1) 281 @type constant: bool 282 @return: the vector expressing the constraint that the goal weights sum to 1 283 @rtype: L{KeyedVector} 284 """ 285 weights = KeyedVector() 286 for goal in self.getGoals(): 287 key = goal.toKey() 288 if goal.isMax(): 289 weights[key] = 1. 290 else: 291 weights[key] = -1. 292 if constant: 293 weights[keyConstant] = 1. 294 weights.freeze() 295 return weights

296

297 - def generateConstraints(self,desired,horizon=-1,state=None):

298 """Computes a set of constraints on possible goal weights for this agent that, if satisfied, will cause the agent to prefer the desired action in the given state. Each constraint is dictionary with the following elements: 299 - delta: the total difference that must be made up 300 - slope: dictionary of coefficients for each goal weight in the sum that must make up that difference 301 - plane: the vector of weights, such that the product of this vector and the goal weight vector must exceed 0 for the desired action to be preferred 302 @param desired: the action that the agent should prefer 303 @type desired: L{Action}[] 304 @param horizon: the horizon of lookahead to use (if not provided, the agent's default horizon is used) 305 @type horizon: int 306 @param state: the current state of this agent's beliefs (if not provided, defaults to the result of L{getAllBeliefs} 307 @type state: dict 308 @return: a list of constraints 309 @rtype: dict[] 310 """ 311 if horizon < 0: 312 horizon = self.horizon 313 if state is None: 314 state = self.getAllBeliefs() 315 goals = self.getGoalVector()['total'] 316 if len(goals.domain()) != 1: 317 raise NotImplementedError,\ 318 'Unable to handle uncertain goals when fitting' 319 goals = goals.domain()[0] 320 # Compute projections for all actions 321 matrices = {} 322 for action in self.actions.getOptions(): 323 sequence = self.multistep(horizon=horizon, 324 start={self.name:action}, 325 state=copy.deepcopy(state)) 326 value = None 327 if self.valueType == 'average': 328 for t in range(len(sequence)): 329 current = copy.deepcopy(sequence[t]['state']) 330 # Add in current state 331 if value is None: 332 value = current.expectation() 333 else: 334 current.unfreeze() 335 current.fill(value.keys()) 336 current.freeze() 337 value += current.expectation() 338 # Add in relevant actions 339 for key in filter(lambda k:isinstance(k,ObservationKey), 340 goals.keys()): 341 if not value.has_key(key): 342 value.unfreeze() 343 value[key] = 0. 344 value.freeze() 345 for act in sum(sequence[t]['action'].values(),[]): 346 if act['type'] == key['type']: 347 value[key] += 1. 348 elif self.valueType == 'final': 349 # Assume no action goals if we care about only the final state 350 value = sequence[-1]['state'] 351 else: 352 raise NotImplementedError,\ 353 'I do not know how to fit "%s" expected value' \ 354 % (self.valueType) 355 matrices[str(action)] = value 356 # Compare against desired action 357 constraints = [] 358 goals.fill(matrices[str(desired)].keys()) 359 for action in self.actions.getOptions(): 360 if action != desired: 361 projection = matrices[str(desired)] - matrices[str(action)] 362 diff = goals*projection 363 constraint = {'delta':diff, 364 'value':True, 365 'slope':KeyedVector(), 366 'option':action, 367 } 368 for goal in self.getGoals(): 369 key = goal.toKey() 370 constraint['slope'][key] = projection[key] 371 constraint['plane'] = KeyedPlane(constraint['slope'],0.) 372 constraint['plane'].weights.freeze() 373 constraints.append(constraint) 374 return constraints

375

376 - def fit(self,desired,horizon=-1,state=None,granularity=0.01,label=None):

377 """Computes a new set of goal weights for this agent that will cause the agent to prefer the desired action in the given state. 378 @param desired: the action that the agent should prefer 379 @type desired: L{Action}[] 380 @param horizon: the horizon of lookahead to use (if not provided, the agent's default horizon is used) 381 @type horizon: int 382 @param state: the current state of this agent's beliefs (if not provided, defaults to the result of L{getAllBeliefs} 383 @type state: dict 384 @param granularity: the minimum movement of a goal weight (default is 0.01) 385 @type granularity: float 386 @param label: the label to store the generated constraints under, 387 overwriting any previous constraints using the same label (by default, 388 C{None}) 389 @type label: str 390 @return: a goal vector (error message if no such vector exists) 391 @rtype: L{KeyedVector} (str) 392 """ 393 constraints = self.generateConstraints(desired,horizon,state) 394 # Remove redundant constraints 395 remove = {} 396 for index in range(len(constraints)): 397 constraint = constraints[index] 398 for other in constraints[:index]: 399 result = other['plane'].compare(constraint['plane']) 400 if result == 'equal': 401 remove[index] = True 402 break 403 remove = remove.keys() 404 remove.sort() 405 remove.reverse() 406 for index in remove: 407 del constraints[index] 408 # Add earlier constraints that do not refer to current state 409 cumulative = constraints[:] 410 for key,values in self.constraints.items(): 411 if key != label: 412 cumulative += values 413 # Find the current goal weights 414 vectors = self.getGoalVector() 415 goals = vectors['state'].domain()[0] 416 vector = vectors['action'].domain()[0] 417 for key in vector.keys(): 418 goals[key] = vector[key] 419 # Find the surface of the combined constraints 420 surface = None 421 for index in range(len(cumulative)): 422 constraint = cumulative[index] 423 if constraint['plane'].isZero(): 424 # There is no difference between this action and another 425 return 'The selected action is equivalent to %s' % \ 426 (', '.join(map(str,constraint['option']))) 427 elif surface is None: 428 surface = KeyedTree(constraint['plane']) 429 else: 430 split = [] 431 for other in cumulative[:index]: 432 result = other['plane'].compare(constraint['plane']) 433 if result == 'indeterminate': 434 # The planes intersect 435 weights = constraint['plane'].weights - \ 436 other['plane'].weights 437 threshold = constraint['plane'].threshold - \ 438 other['plane'].threshold 439 # Check whether this new plane can be satisfied 440 total = 0. 441 for key in weights.keys(): 442 if weights[key] > 0.: 443 total += weights[key] 444 else: 445 total -= weights[key] 446 if total > threshold: 447 split.append(KeyedPlane(weights,threshold)) 448 else: 449 # Not even at all +/-1 can this work 450 break 451 elif result == 'equal': 452 # This plane is redundant 453 break 454 elif result == 'less': 455 # This plane dominates another one 456 # (we could also remove the other one, but tricky) 457 pass 458 elif result == 'greater': 459 # This plane is dominated 460 break 461 elif result == 'inverse': 462 # These planes are contradictory 463 return 'There are contradictory constraints on the goals.' 464 else: 465 subtree = KeyedTree(constraint['plane']) 466 if len(split) > 0: 467 # This plane is maximal only under some conditions 468 new = KeyedTree() 469 new.branch(split,subtree,surface) 470 surface = new 471 else: 472 # This plane dominates all previous ones 473 surface = subtree 474 # Check whether we can move toward surface 475 if surface[goals].test(goals): 476 # We already satisfy our constraints 477 return goals 478 # Check whether we can move toward the surface 479 plane = surface[goals] 480 increase = {} 481 decrease = {} 482 for key in goals.keys(): 483 if goals[key]*plane.weights[key] > 0.: 484 increase[key] = True 485 elif goals[key]*plane.weights[key] < 0.: 486 decrease[key] = True 487 if len(increase) == 0 or len(decrease) == 0: 488 # All the weights must move in the same direction! 489 return 'No goals motivate the desired action!' 490 # Move into the surface (with some maximum # of moves) 491 for index in range(100): 492 plane = surface[goals] 493 if plane.test(goals): 494 # Hooray! Now, it's safe to save these constraints 495 self.constraints[label] = constraints 496 return goals 497 # Find out how many goals have large enough weights to change 498 for key in increase.keys(): 499 if abs(goals[key]) > 1.- granularity: 500 del increase[key] 501 for key in decrease.keys(): 502 if abs(goals[key]) < granularity: 503 del decrease[key] 504 # For the side that must change by more than 505 # "granularity", test whether there are enough to change 506 change = True 507 while change: 508 if len(increase) == 0 or len(decrease) == 0: 509 # We have lost all of the goals on one side 510 break 511 change = False 512 ratio = float(len(increase)/len(decrease)) 513 if ratio > 1.: 514 for key in decrease.keys(): 515 if abs(goals[key]) < granularity*ratio: 516 del decrease[key] 517 change = True 518 break 519 elif ratio < 1.: 520 for key in increase.keys(): 521 if abs(goals[key]) < granularity/ratio: 522 del increase[key] 523 change = True 524 break 525 if change: 526 # We exited on a failure 527 break 528 else: 529 # OK, let's try moving the goal weights 530 for key in increase.keys(): 531 if plane.weights[key] > 0.: 532 delta = granularity 533 elif plane.weights[key] < 0.: 534 delta = -granularity 535 if ratio < 1.: 536 delta /= ratio 537 goals[key] += delta 538 for key in decrease.keys(): 539 if plane.weights[key] > 0.: 540 delta = granularity 541 elif plane.weights[key] < 0.: 542 delta = -granularity 543 if ratio > 1.: 544 delta *= ratio 545 goals[key] += delta 546 # Failure 547 return 'Unable to find a satisfying set of goal weights.'

548

549 - def __str__(self):

550 rep = RecursiveAgent.__str__(self) 551 rep = rep + '\n\tGoals:\n' 552 rep = rep + '\t\t' + `map(lambda g,s=self:(g,s.getGoalWeight(g)), 553 self.goals.keys())`+'\n' 554 return rep

555

556 - def __copy__(self,new=None):

557 if not new: 558 new = RecursiveAgent.__copy__(self) 559 for goal in self.getGoals(): 560 new.setGoalWeight(goal,self.getGoalWeight(goal),None) 561 return new

562

563 - def __xml__(self):

564 doc = RecursiveAgent.__xml__(self) 565 # Add goals 566 root = doc.createElement('goals') 567 doc.documentElement.appendChild(root) 568 for goal,weight in self.goals.items(): 569 node = goal.__xml__().documentElement 570 node.setAttribute('weight',str(weight)) 571 root.appendChild(node) 572 # Add horizon 573 doc.documentElement.setAttribute('horizon',str(self.horizon)) 574 return doc

575

576 - def parse(self,element):

577 RecursiveAgent.parse(self,element) 578 child = element.firstChild 579 while child: 580 if child.nodeType == child.ELEMENT_NODE and \ 581 child.tagName == 'goals': 582 node = child.firstChild 583 while node: 584 if node.nodeType == node.ELEMENT_NODE: 585 if str(node.getAttribute('type')): 586 goal = MinMaxGoal() 587 else: 588 goal = PWLGoal() 589 goal.parse(node) 590 if str(node.getAttribute('type')): 591 # Backward compatibility 592 if goal.direction == 'min': 593 goal = minGoal(goal.toKey()) 594 else: 595 goal = maxGoal(goal.toKey()) 596 weight = float(node.getAttribute('weight')) 597 self.goals[goal] = weight 598 node = node.nextSibling 599 child = child.nextSibling 600 try: 601 self.horizon = int(element.getAttribute('horizon')) 602 except ValueError: 603 self.horizon = 1

Source Code for Module teamwork.agent.GoalBased