teamwork.policy.LookaheadPolicy

11 """Policy subclass that looks a fixed number of turns into the 12 future and examines the expected reward received in response to 13 the actions of other agents 14 @ivar horizon: the lookahead horizon 15 @type horizon: C{int} 16 @ivar consistentTieBreaking: if C{True}, always breaks ties between equally valued actions in a consistent manner, i.e., it's behavior is deterministic (default is C{True}). 17 @type consistentTieBreaking: bool 18 """ 19

20 - def __init__(self,entity,actions=[],horizon=1):

21 """ 22 @param entity: the entity whose policy this is (not sure whether this is necessary) 23 @type entity: L{teamwork.agent.Agent.Agent} 24 @param actions: the options considered by this policy (used by superclass) 25 @type actions: L{Action}[] 26 @param horizon: the lookahead horizon 27 @type horizon: C{int} 28 """ 29 Policy.__init__(self,actions) 30 self.entity = entity 31 self.horizon = horizon 32 self.threshold = 0.5 33 self.consistentTieBreaking = True

34

35 - def setHorizon(self,horizon=1):

36 """Sets the default horizon of lookahead (which can still be overridden by a method call argument 37 @param horizon: the desired horizon (default is 1) 38 @type horizon: C{int} 39 """ 40 self.horizon = horizon

41

42 - def execute(self,state,choices=[],debug=None,horizon=-1,explain=False):

43 return self.findBest(state=state,choices=choices,debug=debug, 44 horizon=horizon,explain=explain)

45

46 - def evaluateChoices(self,state,choices=[],debug=None,horizon=-1):

47 """Evaluates the expected reward of a set of possible actions 48 @param state: the agent considering its options 49 @type state: L{GoalBasedAgent<teamwork.agent.GoalBased.GoalBasedAgent>} 50 @param choices: the actions the agent has to choose from (default is all available actions) 51 @type choices: C{L{Action}[]} 52 @type debug: L{Debugger} 53 @param horizon: the horizon of the lookahead (if omitted, agent's default horizon is used) 54 @type horizon: C{int} 55 @return: a dictionary, indexed by action, of the projection of the reward of that action (as returned by L{actionValue<teamwork.agent.GoalBased.GoalBasedAgent.actionValue>}) with an additional I{action} field indicating the chosen actions 56 @rtype: C{dict} 57 """ 58 values = {} 59 if len(choices) == 0: 60 choices = self.entity.actions.getOptions() 61 for action in choices: 62 if debug: 63 debug.message(9,'%s considering %s' % (self.entity.ancestry(), 64 action)) 65 value,exp = LookaheadPolicy.actionValue(self, 66 state=copy.copy(state), 67 actStruct=action, 68 debug=debug,horizon=horizon) 69 if debug: 70 debug.message(9,'Value of %s = %s' % (action,value)) 71 # Compare value against best so far 72 exp['action'] = action 73 exp['value'] = value 74 values[str(action)] = exp 75 return values

76

77 - def findBest(self,state,choices=[],debug=None, 78 horizon=-1,explain=False):

79 """Determines the option with the highest expected reward 80 @param state: the current world state 81 @type state: L{Distribution<teamwork.math.probability.Distribution>} 82 @param choices: the actions the agent has to choose from (default is all available actions) 83 @type choices: C{L{Action}[]} 84 @type debug: L{Debugger} 85 @param horizon: the horizon of the lookahead (if omitted, agent's default horizon is used) 86 @type horizon: C{int} 87 @return: the optimal action and a log of the lookahead in dictionary form: 88 - value: the expected reward of the optimal action 89 - decision: the optimal action 90 - options: a dictionary, indexed by action, of the projection of the reward of that action (as returned by L{evaluateChoices}) 91 @rtype: C{dict} 92 """ 93 bestAction = [] 94 bestValue = None 95 explanation = {'options':self.evaluateChoices(state,choices, 96 debug,horizon)} 97 for result in explanation['options'].values(): 98 action = result['action'] 99 value = result['value'] 100 if not bestAction: 101 better = True 102 else: 103 if float(value) > float(bestValue): 104 better = True 105 elif float(value) < float(bestValue): 106 better = False 107 elif self.consistentTieBreaking: 108 # Tie, so let's use a consistent tie-breaking mechanism 109 # (i.e, alphabetical order) 110 better = str(action) > str(bestAction) 111 else: 112 better = False 113 if better: 114 bestAction = action 115 bestValue = value 116 if explain: 117 # Generate XML document explaining the policy's output 118 exp = Document() 119 root = exp.createElement('explanation') 120 exp.appendChild(root) 121 bestResult = explanation['options'][str(bestAction)] 122 if len(explanation['options']) > 1: 123 # Add actions not chosen 124 field = exp.createElement('alternatives') 125 root.appendChild(field) 126 # Add agent goals 127 node = exp.createElement('goals') 128 field.appendChild(node) 129 goals = self.entity.getGoalVector()['state'].expectation() 130 node.appendChild(goals.__xml__().documentElement) 131 for result in explanation['options'].values(): 132 if result['action'] != bestAction: 133 node = exp.createElement('alternative') 134 field.appendChild(node) 135 if result['projection']: 136 delta = result['projection'] - bestResult['projection'] 137 node.appendChild(delta.__xml__().documentElement) 138 for action in result['action']: 139 node.appendChild(action.__xml__().documentElement) 140 if bestResult.has_key('breakdown'): 141 # Add expected actions 142 field = exp.createElement('expectations') 143 root.appendChild(field) 144 for step in bestResult['breakdown']: 145 for name,option in step['action'].items(): 146 node = exp.createElement('turn') 147 field.appendChild(node) 148 node.setAttribute('agent',name) 149 for action in option: 150 node.appendChild(action.__xml__().documentElement) 151 else: 152 exp = explanation 153 if debug: 154 debug.message(7,'Search complete') 155 explanation['value'] = bestValue 156 explanation['decision'] = bestAction 157 if state is not None: 158 try: 159 explanation['beliefs'] = state[None] 160 except KeyError: 161 # Should not really use 'state' as a key 162 explanation['beliefs'] = state['state'] 163 if debug: 164 debug.message(9,'%s prefers %s' % (self.entity.name,`bestAction`)) 165 return bestAction,exp

166

167 - def __copy__(self):

168 return self.__class__(self.entity,horizon=self.horizon)

169

170 - def actionValue(self,state,actStruct,debug=None,horizon=-1):

171 """ 172 @return: expected value of performing action""" 173 if horizon < 0: 174 horizon = self.horizon 175 return self.entity.actionValue(actStruct,horizon,state,debug)

176

177 - def __str__(self):

178 return 'Lookahead to horizon '+`self.horizon`

179

180 - def __contains__(self,value):

181 return False

182

183 - def __xml__(self):

184 doc = Document() 185 root = doc.createElement('policy') 186 doc.appendChild(root) 187 root.setAttribute('horizon',str(self.horizon)) 188 return doc

189

190 - def parse(self,element):

191 try: 192 self.horizon = int(element.getAttribute('horizon')) 193 except ValueError: 194 try: 195 self.horizon = int(element.getAttribute('depth')) 196 except ValueError: 197 self.horizon = 1

Source Code for Module teamwork.policy.LookaheadPolicy