1 """Defines the layer for handling reward functions"""
2 import copy
3 from teamwork.math.Keys import StateKey,ObservationKey
4 from teamwork.math.KeyedVector import KeyedVector
5 from teamwork.math.KeyedTree import KeyedPlane,KeyedTree
6 from teamwork.math.probability import Distribution
7 from teamwork.math.ProbabilityTree import ProbabilityTree
8 from teamwork.reward.goal import PWLGoal,minGoal,maxGoal
9 from teamwork.reward.MinMaxGoal import MinMaxGoal
10 from RecursiveAgent import RecursiveAgent
11
13 """An entity mix-in class that has a reward function based on maximization/minimization of features/actions
14 @ivar goals: the goals of this agent
15 @type goals: L{MinMaxGoal}S{->}float
16 @ivar horizon: the horizon of this agent's lookahead
17 @type horizon: int
18 @ivar constraints: the constraints on the goal weights already imposed.
19 Each constraint is a dictionary, with the key element being the 'plane'
20 expressing the constraint
21 @type constraints: dict:str->L{KeyedPlane}
22 """
23
24 valueType = 'average'
25
31
32
33
35 """Sets the goals to the provided list, after normalizing weights
36 @type goals: L{MinMaxGoal}[]
37 @warning: Replaces any existing goals."""
38
39 self.goals.clear()
40
41 for goal in goals:
42 if isinstance(goal,MinMaxGoal):
43 goalObj = goal
44 else:
45 try:
46 key = goal['key']
47 except KeyError:
48 key = goal['feature']
49 print 'Warning: Use "key" instead of "feature" when specifying goals'
50 goalObj = MinMaxGoal(entity=goal['entity'],
51 direction=goal['direction'],
52 goalType=goal['type'],
53 key=key)
54 try:
55 goalObj.weight = goal['weight']
56 except KeyError:
57 raise KeyError,'%s has goal "%s" with no weight' % \
58 (self.ancestry(),`goalObj`)
59 self.setGoalWeight(goalObj,goalObj.weight,False)
60 self.normalizeGoals()
61
63 """Scales all goal weights to sum to 1."""
64 self.goals.normalize()
65
103
104 - def applyGoals(self,entity=None,world=None,debug=None):
105 """
106 @param entity: the entity whose goals are to be evaluated (default is self)
107 @type entity: L{GoalBasedAgent}
108 @param world: the context for evaluating the goals (default is the beliefs of I{entity})
109 @type world: L{teamwork.multiagent.PsychAgents.PsychAgents}
110 @return: expected reward of the I{entity} in current I{world}
111 @rtype: L{Distribution} over C{float}"""
112 if not entity:
113 entity = self
114 if world is None:
115 world = self.entities
116 state = world.getState()
117 goals = entity.getGoalVector()
118 goals['state'].fill(state.domain()[0].keys(),0.)
119 return goals['state']*state
120
122 """
123 @rtype: L{PWLGoal}[]"""
124 return self.goals.domain()
125
127 """
128 @type goal: L{PWLGoal}
129 @return: the weight of the specified goal
130 @rtype: float
131 """
132 for existing in self.goals.domain():
133 if existing.keys == goal.keys:
134 return self.goals[existing]
135 else:
136 return 0.0
137
139 """Returns a vector representing the goal weights
140 @rtype: L{KeyedVector} instance"""
141 stateGoals = KeyedVector()
142 actionGoals = KeyedVector()
143 totalGoals = KeyedVector()
144 for goal in self.getGoals():
145 key = goal.toKey()
146 weight = self.getGoalWeight(goal)
147 if not goal.isMax():
148 weight = -weight
149 if isinstance(key,StateKey):
150 stateGoals[key] = weight
151 elif isinstance(key,ActionKey):
152 actionGoals[key] = weight
153 else:
154 raise NotImplementedError,'Unable to make vector with %s instances' % (key.__class__.__name__)
155 totalGoals[key] = weight
156 return {'state':Distribution({stateGoals:1.}),
157 'action':Distribution({actionGoals:1.}),
158 'total':Distribution({totalGoals:1.}),
159 }
160
180
181 - def actionValue(self,actions,horizon=1,state=None,debug=None):
182 """Compute the expected value of performing the given action
183 @param actions: the actions whose effect we want to evaluate
184 @type actions: L{Action}[]
185 @param horizon: the length of the forward projection
186 @type horizon: int
187 @param state: the world state to evaluate the actions in (defaults to current world state)
188 @type state: L{teamwork.math.probability.Distribution}
189 """
190 if not self.hasBelief(self.name):
191 return Distribution({0.:1.}),{}
192 if debug:
193 debug.message(3,'Computing EV[%s]' % (`actions`))
194 if state is None:
195 state = self.getAllBeliefs()
196 start = {self.name:actions}
197 value,explanation = self.expectedValue(horizon=horizon,
198 start=start,
199 state=state,
200 goals=[self],
201 debug=debug)
202 value = value[self.name]
203 return value,explanation
204
205 - def expectedValue(self,horizon=1,start={},goals=None,state=None,
206 debug=None):
207 """
208 @param horizon: the horizon for the lookahead when computing the expected value
209 @type horizon: C{int}
210 @param start: a dictionary of actions to be specified in the first time step
211 @type start: C{dict:strS{->}L{Action}[]}
212 @param goals: the agent(s) whose reward function should be used to compute the expectation (defaults to C{self})
213 @type goals: C{L{GoalBasedAgent}[]}
214 @param state: the world state to evaluate the actions in (defaults to current world state)
215 @type state: L{teamwork.math.probability.Distribution}
216 @type debug: L{Debugger}
217 @return: the expected reward from the current state
218
219 """
220 if goals is None:
221 goals = [self]
222 if state is None:
223 state = self.getAllBeliefs()
224
225 sequence = self.multistep(horizon=horizon,start=start,
226 state=state,debug=debug)
227 value = {}
228 if self.valueType == 'average':
229 expectation = None
230 for t in range(len(sequence)):
231 delta = sequence[t]
232 if delta['state']:
233 if expectation is None:
234 expectation = delta['state'].expectation()
235 else:
236 expectation += delta['state'].expectation()
237 if expectation is None:
238
239 expectation = {}
240 else:
241 scale = float(len(sequence))
242 for key in expectation.keys():
243 expectation[key] /= scale
244 elif self.valueType == 'final':
245 expectation = state['state'].expectation()
246 else:
247 raise NotImplementedError,\
248 'I do not know how to compute "%s" expected value' \
249 % (self.valueType)
250
251 for goalEntity in goals:
252 name = goalEntity.name
253 reward = goalEntity.getGoalVector()['state']
254 if len(expectation) == 1:
255
256 expectedReward = 0.
257 elif len(expectation) == 0:
258
259 expectedReward = 0.
260 else:
261 reward.fill(expectation.keys())
262 expectedReward = reward*expectation
263 try:
264 value[name] += expectedReward
265 except KeyError:
266 value[name] = expectedReward
267 if debug:
268 debug.message(6,'R[%s] = %s' % (name,`value[name]`))
269 if len(value) == 0:
270
271 for goal in goals:
272 value[goal] = goal.applyGoals(None,debug=debug)
273 return value,{'value':value,
274 'projection':expectation,
275 'breakdown':sequence,
276 }
277
279 """
280 @param constant: if C{True}, include a column for the constant factor (which will be 1)
281 @type constant: bool
282 @return: the vector expressing the constraint that the goal weights sum to 1
283 @rtype: L{KeyedVector}
284 """
285 weights = KeyedVector()
286 for goal in self.getGoals():
287 key = goal.toKey()
288 if goal.isMax():
289 weights[key] = 1.
290 else:
291 weights[key] = -1.
292 if constant:
293 weights[keyConstant] = 1.
294 weights.freeze()
295 return weights
296
298 """Computes a set of constraints on possible goal weights for this agent that, if satisfied, will cause the agent to prefer the desired action in the given state. Each constraint is dictionary with the following elements:
299 - delta: the total difference that must be made up
300 - slope: dictionary of coefficients for each goal weight in the sum that must make up that difference
301 - plane: the vector of weights, such that the product of this vector and the goal weight vector must exceed 0 for the desired action to be preferred
302 @param desired: the action that the agent should prefer
303 @type desired: L{Action}[]
304 @param horizon: the horizon of lookahead to use (if not provided, the agent's default horizon is used)
305 @type horizon: int
306 @param state: the current state of this agent's beliefs (if not provided, defaults to the result of L{getAllBeliefs}
307 @type state: dict
308 @return: a list of constraints
309 @rtype: dict[]
310 """
311 if horizon < 0:
312 horizon = self.horizon
313 if state is None:
314 state = self.getAllBeliefs()
315 goals = self.getGoalVector()['total']
316 if len(goals.domain()) != 1:
317 raise NotImplementedError,\
318 'Unable to handle uncertain goals when fitting'
319 goals = goals.domain()[0]
320
321 matrices = {}
322 for action in self.actions.getOptions():
323 sequence = self.multistep(horizon=horizon,
324 start={self.name:action},
325 state=copy.deepcopy(state))
326 value = None
327 if self.valueType == 'average':
328 for t in range(len(sequence)):
329 current = copy.deepcopy(sequence[t]['state'])
330
331 if value is None:
332 value = current.expectation()
333 else:
334 current.unfreeze()
335 current.fill(value.keys())
336 current.freeze()
337 value += current.expectation()
338
339 for key in filter(lambda k:isinstance(k,ObservationKey),
340 goals.keys()):
341 if not value.has_key(key):
342 value.unfreeze()
343 value[key] = 0.
344 value.freeze()
345 for act in sum(sequence[t]['action'].values(),[]):
346 if act['type'] == key['type']:
347 value[key] += 1.
348 elif self.valueType == 'final':
349
350 value = sequence[-1]['state']
351 else:
352 raise NotImplementedError,\
353 'I do not know how to fit "%s" expected value' \
354 % (self.valueType)
355 matrices[str(action)] = value
356
357 constraints = []
358 goals.fill(matrices[str(desired)].keys())
359 for action in self.actions.getOptions():
360 if action != desired:
361 projection = matrices[str(desired)] - matrices[str(action)]
362 diff = goals*projection
363 constraint = {'delta':diff,
364 'value':True,
365 'slope':KeyedVector(),
366 'option':action,
367 }
368 for goal in self.getGoals():
369 key = goal.toKey()
370 constraint['slope'][key] = projection[key]
371 constraint['plane'] = KeyedPlane(constraint['slope'],0.)
372 constraint['plane'].weights.freeze()
373 constraints.append(constraint)
374 return constraints
375
376 - def fit(self,desired,horizon=-1,state=None,granularity=0.01,label=None):
377 """Computes a new set of goal weights for this agent that will cause the agent to prefer the desired action in the given state.
378 @param desired: the action that the agent should prefer
379 @type desired: L{Action}[]
380 @param horizon: the horizon of lookahead to use (if not provided, the agent's default horizon is used)
381 @type horizon: int
382 @param state: the current state of this agent's beliefs (if not provided, defaults to the result of L{getAllBeliefs}
383 @type state: dict
384 @param granularity: the minimum movement of a goal weight (default is 0.01)
385 @type granularity: float
386 @param label: the label to store the generated constraints under,
387 overwriting any previous constraints using the same label (by default,
388 C{None})
389 @type label: str
390 @return: a goal vector (error message if no such vector exists)
391 @rtype: L{KeyedVector} (str)
392 """
393 constraints = self.generateConstraints(desired,horizon,state)
394
395 remove = {}
396 for index in range(len(constraints)):
397 constraint = constraints[index]
398 for other in constraints[:index]:
399 result = other['plane'].compare(constraint['plane'])
400 if result == 'equal':
401 remove[index] = True
402 break
403 remove = remove.keys()
404 remove.sort()
405 remove.reverse()
406 for index in remove:
407 del constraints[index]
408
409 cumulative = constraints[:]
410 for key,values in self.constraints.items():
411 if key != label:
412 cumulative += values
413
414 vectors = self.getGoalVector()
415 goals = vectors['state'].domain()[0]
416 vector = vectors['action'].domain()[0]
417 for key in vector.keys():
418 goals[key] = vector[key]
419
420 surface = None
421 for index in range(len(cumulative)):
422 constraint = cumulative[index]
423 if constraint['plane'].isZero():
424
425 return 'The selected action is equivalent to %s' % \
426 (', '.join(map(str,constraint['option'])))
427 elif surface is None:
428 surface = KeyedTree(constraint['plane'])
429 else:
430 split = []
431 for other in cumulative[:index]:
432 result = other['plane'].compare(constraint['plane'])
433 if result == 'indeterminate':
434
435 weights = constraint['plane'].weights - \
436 other['plane'].weights
437 threshold = constraint['plane'].threshold - \
438 other['plane'].threshold
439
440 total = 0.
441 for key in weights.keys():
442 if weights[key] > 0.:
443 total += weights[key]
444 else:
445 total -= weights[key]
446 if total > threshold:
447 split.append(KeyedPlane(weights,threshold))
448 else:
449
450 break
451 elif result == 'equal':
452
453 break
454 elif result == 'less':
455
456
457 pass
458 elif result == 'greater':
459
460 break
461 elif result == 'inverse':
462
463 return 'There are contradictory constraints on the goals.'
464 else:
465 subtree = KeyedTree(constraint['plane'])
466 if len(split) > 0:
467
468 new = KeyedTree()
469 new.branch(split,subtree,surface)
470 surface = new
471 else:
472
473 surface = subtree
474
475 if surface[goals].test(goals):
476
477 return goals
478
479 plane = surface[goals]
480 increase = {}
481 decrease = {}
482 for key in goals.keys():
483 if goals[key]*plane.weights[key] > 0.:
484 increase[key] = True
485 elif goals[key]*plane.weights[key] < 0.:
486 decrease[key] = True
487 if len(increase) == 0 or len(decrease) == 0:
488
489 return 'No goals motivate the desired action!'
490
491 for index in range(100):
492 plane = surface[goals]
493 if plane.test(goals):
494
495 self.constraints[label] = constraints
496 return goals
497
498 for key in increase.keys():
499 if abs(goals[key]) > 1.- granularity:
500 del increase[key]
501 for key in decrease.keys():
502 if abs(goals[key]) < granularity:
503 del decrease[key]
504
505
506 change = True
507 while change:
508 if len(increase) == 0 or len(decrease) == 0:
509
510 break
511 change = False
512 ratio = float(len(increase)/len(decrease))
513 if ratio > 1.:
514 for key in decrease.keys():
515 if abs(goals[key]) < granularity*ratio:
516 del decrease[key]
517 change = True
518 break
519 elif ratio < 1.:
520 for key in increase.keys():
521 if abs(goals[key]) < granularity/ratio:
522 del increase[key]
523 change = True
524 break
525 if change:
526
527 break
528 else:
529
530 for key in increase.keys():
531 if plane.weights[key] > 0.:
532 delta = granularity
533 elif plane.weights[key] < 0.:
534 delta = -granularity
535 if ratio < 1.:
536 delta /= ratio
537 goals[key] += delta
538 for key in decrease.keys():
539 if plane.weights[key] > 0.:
540 delta = granularity
541 elif plane.weights[key] < 0.:
542 delta = -granularity
543 if ratio > 1.:
544 delta *= ratio
545 goals[key] += delta
546
547 return 'Unable to find a satisfying set of goal weights.'
548
555
562
564 doc = RecursiveAgent.__xml__(self)
565
566 root = doc.createElement('goals')
567 doc.documentElement.appendChild(root)
568 for goal,weight in self.goals.items():
569 node = goal.__xml__().documentElement
570 node.setAttribute('weight',str(weight))
571 root.appendChild(node)
572
573 doc.documentElement.setAttribute('horizon',str(self.horizon))
574 return doc
575
576 - def parse(self,element):
604
605 if __name__ == '__main__':
606 from teamwork.test.agent.testRecursiveAgent import TestRecursiveAgentIraq
607 from unittest import TestResult
608 case = TestRecursiveAgentIraq('testValueAttack')
609 result = TestResult()
610 case(result)
611 for failure in result.errors+result.failures:
612 print failure[1]
613