1 import copy
2 from types import *
3 from xml.dom.minidom import *
4
5 from generic import *
6
7 from teamwork.action.PsychActions import *
8 from teamwork.action.DecisionSpace import DecisionSpace
9
11 """Policy subclass that looks a fixed number of turns into the
12 future and examines the expected reward received in response to
13 the actions of other agents
14 @ivar horizon: the lookahead horizon
15 @type horizon: C{int}
16 @ivar consistentTieBreaking: if C{True}, always breaks ties between equally valued actions in a consistent manner, i.e., it's behavior is deterministic (default is C{True}).
17 @type consistentTieBreaking: bool
18 """
19
20 - def __init__(self,entity,actions=[],horizon=1):
21 """
22 @param entity: the entity whose policy this is (not sure whether this is necessary)
23 @type entity: L{teamwork.agent.Agent.Agent}
24 @param actions: the options considered by this policy (used by superclass)
25 @type actions: L{Action}[]
26 @param horizon: the lookahead horizon
27 @type horizon: C{int}
28 """
29 Policy.__init__(self,actions)
30 self.entity = entity
31 self.horizon = horizon
32 self.threshold = 0.5
33 self.consistentTieBreaking = True
34
36 """Sets the default horizon of lookahead (which can still be overridden by a method call argument
37 @param horizon: the desired horizon (default is 1)
38 @type horizon: C{int}
39 """
40 self.horizon = horizon
41
42 - def execute(self,state,choices=[],debug=None,horizon=-1,explain=False):
45
47 """Evaluates the expected reward of a set of possible actions
48 @param state: the agent considering its options
49 @type state: L{GoalBasedAgent<teamwork.agent.GoalBased.GoalBasedAgent>}
50 @param choices: the actions the agent has to choose from (default is all available actions)
51 @type choices: C{L{Action}[]}
52 @type debug: L{Debugger}
53 @param horizon: the horizon of the lookahead (if omitted, agent's default horizon is used)
54 @type horizon: C{int}
55 @return: a dictionary, indexed by action, of the projection of the reward of that action (as returned by L{actionValue<teamwork.agent.GoalBased.GoalBasedAgent.actionValue>}) with an additional I{action} field indicating the chosen actions
56 @rtype: C{dict}
57 """
58 values = {}
59 if len(choices) == 0:
60 choices = self.entity.actions.getOptions()
61 for action in choices:
62 if debug:
63 debug.message(9,'%s considering %s' % (self.entity.ancestry(),
64 action))
65 value,exp = LookaheadPolicy.actionValue(self,
66 state=copy.copy(state),
67 actStruct=action,
68 debug=debug,horizon=horizon)
69 if debug:
70 debug.message(9,'Value of %s = %s' % (action,value))
71
72 exp['action'] = action
73 exp['value'] = value
74 values[str(action)] = exp
75 return values
76
77 - def findBest(self,state,choices=[],debug=None,
78 horizon=-1,explain=False):
79 """Determines the option with the highest expected reward
80 @param state: the current world state
81 @type state: L{Distribution<teamwork.math.probability.Distribution>}
82 @param choices: the actions the agent has to choose from (default is all available actions)
83 @type choices: C{L{Action}[]}
84 @type debug: L{Debugger}
85 @param horizon: the horizon of the lookahead (if omitted, agent's default horizon is used)
86 @type horizon: C{int}
87 @return: the optimal action and a log of the lookahead in dictionary form:
88 - value: the expected reward of the optimal action
89 - decision: the optimal action
90 - options: a dictionary, indexed by action, of the projection of the reward of that action (as returned by L{evaluateChoices})
91 @rtype: C{dict}
92 """
93 bestAction = []
94 bestValue = None
95 explanation = {'options':self.evaluateChoices(state,choices,
96 debug,horizon)}
97 for result in explanation['options'].values():
98 action = result['action']
99 value = result['value']
100 if not bestAction:
101 better = True
102 else:
103 if float(value) > float(bestValue):
104 better = True
105 elif float(value) < float(bestValue):
106 better = False
107 elif self.consistentTieBreaking:
108
109
110 better = str(action) > str(bestAction)
111 else:
112 better = False
113 if better:
114 bestAction = action
115 bestValue = value
116 if explain:
117
118 exp = Document()
119 root = exp.createElement('explanation')
120 exp.appendChild(root)
121 bestResult = explanation['options'][str(bestAction)]
122 if len(explanation['options']) > 1:
123
124 field = exp.createElement('alternatives')
125 root.appendChild(field)
126
127 node = exp.createElement('goals')
128 field.appendChild(node)
129 goals = self.entity.getGoalVector()['state'].expectation()
130 node.appendChild(goals.__xml__().documentElement)
131 for result in explanation['options'].values():
132 if result['action'] != bestAction:
133 node = exp.createElement('alternative')
134 field.appendChild(node)
135 if result['projection']:
136 delta = result['projection'] - bestResult['projection']
137 node.appendChild(delta.__xml__().documentElement)
138 for action in result['action']:
139 node.appendChild(action.__xml__().documentElement)
140 if bestResult.has_key('breakdown'):
141
142 field = exp.createElement('expectations')
143 root.appendChild(field)
144 for step in bestResult['breakdown']:
145 for name,option in step['action'].items():
146 node = exp.createElement('turn')
147 field.appendChild(node)
148 node.setAttribute('agent',name)
149 for action in option:
150 node.appendChild(action.__xml__().documentElement)
151 else:
152 exp = explanation
153 if debug:
154 debug.message(7,'Search complete')
155 explanation['value'] = bestValue
156 explanation['decision'] = bestAction
157 if state is not None:
158 try:
159 explanation['beliefs'] = state[None]
160 except KeyError:
161
162 explanation['beliefs'] = state['state']
163 if debug:
164 debug.message(9,'%s prefers %s' % (self.entity.name,`bestAction`))
165 return bestAction,exp
166
169
170 - def actionValue(self,state,actStruct,debug=None,horizon=-1):
176
178 return 'Lookahead to horizon '+`self.horizon`
179
182
184 doc = Document()
185 root = doc.createElement('policy')
186 doc.appendChild(root)
187 root.setAttribute('horizon',str(self.horizon))
188 return doc
189
190 - def parse(self,element):
191 try:
192 self.horizon = int(element.getAttribute('horizon'))
193 except ValueError:
194 try:
195 self.horizon = int(element.getAttribute('depth'))
196 except ValueError:
197 self.horizon = 1
198