teamwork.reward.goal

12 """A reward subfunction represented as a PWL function of some vector (typically the world state) 13 @ivar dependency: set of decision trees that represent the reward subfunction over possible action triggers 14 @type dependency: list 15 @ivar keys: set of L{Key} instances that this goal depends on 16 @type keys: L{Key}[] 17 @ivar max: C{True} iff this goal represents a positive incentive; otherwise, it is a disincentive (default is C{True}) 18 @type max: bool 19 """ 20

21 - def __init__(self):

22 self.max = True 23 self.dependency = [] 24 self.keys = []

25

26 - def addDependency(self,condition,tree=None):

27 """ 28 Adds a new dependency to this subfunction 29 @param condition: the trigger condition for this dependency 30 @type condition: L{ActionCondition} 31 @param tree: the PWL function to compute when triggered (ignored if the condition is a counting one) 32 @type tree: L{ProbabilityTree} 33 """ 34 self.dependency.append((condition,tree))

35

36 - def toKey(self):

37 if len(self.keys) == 1: 38 return self.keys[0] 39 else: 40 raise NotImplementedError,'Unable to manipulate goals over: %s' % \ 41 (', '.join(map(str,self.keys)))

42

43 - def reward(self,context,actions=[]):

44 for entry in self.dependency: 45 if entry[0].count: 46 # We want an action count 47 return self._sign(float(entry[0].match(actions))) 48 elif entry[0].match(actions): 49 # Apply this entry to given context 50 return self._sign(entry[1][context['state']]*context['state']) 51 else: 52 # Null reward 53 return 0.

54

55 - def _sign(self,value):

56 """Applies the appropriate sign to a goal magnitude 57 @param value: the (presumably nonnegative) magnitude of the reward 58 @type value: float 59 @return: the appropriately signed reward value 60 @rtype: float 61 """ 62 if self.max: 63 return value 64 else: 65 return -value

66

67 - def __str__(self):

68 label = ','.join(map(str,self.keys)) 69 if self.max: 70 return label 71 else: 72 return 'Minimize '+label

73

74 - def __hash__(self):

75 return hash(str(self))

76

77 - def __xml__(self):

78 doc = Document() 79 root = doc.createElement('goal') 80 doc.appendChild(root) 81 root.setAttribute('max',str(self.max)) 82 for key in self.keys: 83 root.appendChild(key.__xml__().documentElement) 84 for condition,tree in self.dependency: 85 root.appendChild(condition.__xml__().documentElement) 86 root.appendChild(tree.__xml__().documentElement) 87 return doc

88

89 - def parse(self,element):

90 self.max = not (str(element.getAttribute('max')) == str(False)) 91 node = element.firstChild 92 condition = None 93 while node: 94 if node.nodeType == node.ELEMENT_NODE: 95 if node.tagName == 'key': 96 key = Key() 97 self.keys.append(key.parse(node)) 98 elif node.tagName == 'condition': 99 condition = ActionCondition() 100 condition.parse(node) 101 elif node.tagName == 'tree': 102 tree = ProbabilityTree() 103 tree.parse(node) 104 assert not condition is None 105 self.addDependency(condition,tree) 106 condition = None 107 node = node.nextSibling

110 """Creates a L{PWLGoal} corresponding to a maximization goal 111 @param key: the L{Key} that points to the vector element to be maximized 112 @type key: L{Key} 113 @rtype: L{PWLGoal} 114 """ 115 goal = PWLGoal() 116 goal.max = True 117 goal.keys.append(key) 118 if isinstance(key,StateKey): 119 matrix = IdentityMatrix(source=key) 120 elif isinstance(key,ActionKey): 121 matrix = ActionCountMatrix(key,key,0.) 122 else: 123 raise NotImplementedError,'Unable to make simple goal on %s' % (str(key)) 124 tree = ProbabilityTree() 125 tree.makeLeaf(matrix) 126 goal.addDependency(ActionCondition(),tree) 127 return goal

Source Code for Module teamwork.reward.goal