Package teamwork :: Package reward :: Module goal
[hide private]
[frames] | no frames]

Source Code for Module teamwork.reward.goal

  1  """Goals as piecewise linear reward functions 
  2  @author: David V. Pynadath <pynadath@ict.usc.edu> 
  3  """ 
  4  from xml.dom.minidom import * 
  5  from teamwork.math.Keys import Key,StateKey,ActionKey 
  6  from teamwork.math.KeyedVector import KeyedVector 
  7  from teamwork.math.KeyedMatrix import ActionCountMatrix,IdentityMatrix 
  8  from teamwork.math.ProbabilityTree import ProbabilityTree 
  9  from teamwork.action.PsychActions import ActionCondition 
 10   
11 -class PWLGoal:
12 """A reward subfunction represented as a PWL function of some vector (typically the world state) 13 @ivar dependency: set of decision trees that represent the reward subfunction over possible action triggers 14 @type dependency: list 15 @ivar keys: set of L{Key} instances that this goal depends on 16 @type keys: L{Key}[] 17 @ivar max: C{True} iff this goal represents a positive incentive; otherwise, it is a disincentive (default is C{True}) 18 @type max: bool 19 """ 20
21 - def __init__(self):
22 self.max = True 23 self.dependency = [] 24 self.keys = []
25
26 - def addDependency(self,condition,tree=None):
27 """ 28 Adds a new dependency to this subfunction 29 @param condition: the trigger condition for this dependency 30 @type condition: L{ActionCondition} 31 @param tree: the PWL function to compute when triggered (ignored if the condition is a counting one) 32 @type tree: L{ProbabilityTree} 33 """ 34 self.dependency.append((condition,tree))
35
36 - def toKey(self):
37 if len(self.keys) == 1: 38 return self.keys[0] 39 else: 40 raise NotImplementedError,'Unable to manipulate goals over: %s' % \ 41 (', '.join(map(str,self.keys)))
42
43 - def reward(self,context,actions=[]):
44 for entry in self.dependency: 45 if entry[0].count: 46 # We want an action count 47 return self._sign(float(entry[0].match(actions))) 48 elif entry[0].match(actions): 49 # Apply this entry to given context 50 return self._sign(entry[1][context['state']]*context['state']) 51 else: 52 # Null reward 53 return 0.
54
55 - def _sign(self,value):
56 """Applies the appropriate sign to a goal magnitude 57 @param value: the (presumably nonnegative) magnitude of the reward 58 @type value: float 59 @return: the appropriately signed reward value 60 @rtype: float 61 """ 62 if self.max: 63 return value 64 else: 65 return -value
66
67 - def __str__(self):
68 label = ','.join(map(str,self.keys)) 69 if self.max: 70 return label 71 else: 72 return 'Minimize '+label
73
74 - def __hash__(self):
75 return hash(str(self))
76
77 - def __xml__(self):
78 doc = Document() 79 root = doc.createElement('goal') 80 doc.appendChild(root) 81 root.setAttribute('max',str(self.max)) 82 for key in self.keys: 83 root.appendChild(key.__xml__().documentElement) 84 for condition,tree in self.dependency: 85 root.appendChild(condition.__xml__().documentElement) 86 root.appendChild(tree.__xml__().documentElement) 87 return doc
88
89 - def parse(self,element):
90 self.max = not (str(element.getAttribute('max')) == str(False)) 91 node = element.firstChild 92 condition = None 93 while node: 94 if node.nodeType == node.ELEMENT_NODE: 95 if node.tagName == 'key': 96 key = Key() 97 self.keys.append(key.parse(node)) 98 elif node.tagName == 'condition': 99 condition = ActionCondition() 100 condition.parse(node) 101 elif node.tagName == 'tree': 102 tree = ProbabilityTree() 103 tree.parse(node) 104 assert not condition is None 105 self.addDependency(condition,tree) 106 condition = None 107 node = node.nextSibling
108
109 -def maxGoal(key):
110 """Creates a L{PWLGoal} corresponding to a maximization goal 111 @param key: the L{Key} that points to the vector element to be maximized 112 @type key: L{Key} 113 @rtype: L{PWLGoal} 114 """ 115 goal = PWLGoal() 116 goal.max = True 117 goal.keys.append(key) 118 if isinstance(key,StateKey): 119 matrix = IdentityMatrix(source=key) 120 elif isinstance(key,ActionKey): 121 matrix = ActionCountMatrix(key,key,0.) 122 else: 123 raise NotImplementedError,'Unable to make simple goal on %s' % (str(key)) 124 tree = ProbabilityTree() 125 tree.makeLeaf(matrix) 126 goal.addDependency(ActionCondition(),tree) 127 return goal
128
129 -def minGoal(key):
130 """Creates a L{PWLGoal} corresponding to a minimization goal 131 @param key: the L{Key} that points to the vector element to be minimized 132 @type key: L{Key} 133 @rtype: L{PWLGoal} 134 """ 135 goal = maxGoal(key) 136 goal.max = False 137 return goal
138