1 import copy
2 import string
3 import time
4 from types import *
5
6 import KeyedVector
7 from KeyedMatrix import *
8 from teamwork.math.KeyedTree import *
9 from ProbabilityTree import *
10
11 -def expandPolicy(entity,name,interrupt=None,keys=[],debug=0):
42
44 actionKeys = []
45 for key in entity.getGoalVector()['action'].keys():
46 actionKeys.append(key)
47 for turns in sequence:
48 for name in turns:
49
50 other = entity.getEntity(name)
51 for key in other.policy.tree.getKeys():
52 if key['class'] == 'observation':
53 if not key in actionKeys:
54 actionKeys.append(key)
55 return actionKeys
56
57 -def getLookaheadTree(entity,chosenAction,sequence,
58 local=False,choices=None,
59 goals=None,interrupt=None,debug=0):
60 """
61 @param chosenAction: the action being projected
62 @type chosenAction: L{teamwork.action.PsychActions.Action}[]
63 @param sequence: the turns anticipated by this agent in its lookahead, as a list of turns, where each turn is a list of names
64 @type sequence: str[][]
65 @param local: if C{True}, then the entity will compile a locally optimal policy, expecting itself to behave according to whatever mental model it has of itself; otherwise, it will plan over I{all} of its turns in the sequence (more strategic). Default is C{False}
66 @type local: boolean
67 @param choices: the possible actions to be considered in this policy (if C{None}, defaults to all available actions)
68 @type choices: L{Action}[][]
69 @param goals: if you want an expected value tree (as opposed to a reward-independent sum over state projections), then you can pass in a tree representing the reward function to use (default is to be reward-independent)
70 @type goals: L{ProbabilityTree}
71 @param interrupt: a thread Event, the compilation process will continually test whether the event is set; if it is, it will exit. In other words, this is a way to interrupt the compilation
72 @type interrupt: Event
73 @return: a decision tree representing the dynamics of the given actions followed by the given sequence of agent responses. The sequence is a list of lists of agent name strings. The agents named in list I{i} of the sequence apply their policy-driven actions at time I{i}, where time 0 occurs in parallel with the given entity's performance of the chosen action
74 """
75 if debug > 0 and chosenAction:
76 print 'Lookahead for:',chosenAction
77 if choices is None:
78 choices = entity.actions.getOptions()
79
80 total = None
81
82 last = None
83
84 policies = {}
85 actionKeys = []
86
87
88
89
90 flag = chosenAction
91 for turns in sequence:
92 for name in turns:
93 if name == entity.name and flag:
94
95 flag = not local
96 elif not policies.has_key(name):
97 if debug > 1:
98 print 'Expanding policy for:',entity.ancestry(),name
99 policies[name] = expandPolicy(entity,name,interrupt,
100 actionKeys,debug)
101 if interrupt and interrupt.isSet():
102 return None
103 if debug > 0:
104 print '\tPolicy tree w/dynamics has %d leaves' % \
105 (len(policies[name].leaves()))
106 if debug > 1:
107 print 'Expanded policy:', policies[name]
108
109 if len(sequence) == 0:
110 sequence = [[entity.name]]
111 recursed = False
112 for t in range(len(sequence)):
113
114 turns = sequence[t]
115 if debug > 0:
116 print '-----------'
117 print 'Time: %d/%d' % (t+1,len(sequence))
118 print '-----------'
119 subtree = None
120 stepValue = None
121 for name in turns:
122 if debug > 0:
123 print 'Turn:',name
124 if name == entity.name:
125 if chosenAction:
126 if debug > 0:
127 print '\tFixed action:',chosenAction
128
129 action = {entity.name:chosenAction}
130 newTree = entity.entities.getDynamics(action)['state'].getTree()
131
132 chosenAction = False
133 elif local:
134
135 newTree = policies[name]
136 if debug > 0:
137 print '\tInserting policy of size',
138 print len(newTree.leaves())
139 else:
140 horizon = len(sequence)-t
141
142 if debug > 0:
143 print '----------------------------------------------'
144 print '\tComputing subpolicy of horizon:',horizon
145 newTree = entity.policy.compileTree(horizon=horizon,
146 choices=choices,
147 key='weights',
148 interrupt=interrupt,
149 debug=debug)
150 if interrupt and interrupt.isSet():
151 return None
152 recursed = True
153 if debug > 0:
154 print '\tFinished with horizon %d (%d leaves)' % \
155 (len(sequence)-t,len(newTree.leaves()))
156 print '----------------------------------------------'
157 else:
158
159
160 newTree = policies[name]
161 if debug > 0:
162 print '\tInserting %s\'s policy dynamics [%d leaves]' % (name,len(newTree.leaves()))
163 if newTree.getValue():
164
165 if subtree is None:
166 subtree = newTree
167 else:
168 subtree += newTree
169 if interrupt and interrupt.isSet():
170 return None
171 if debug > 1:
172 print name,subtree.simpleText()
173 print
174
175 if goals:
176 if recursed:
177 subValue = newTree
178 else:
179 subValue = goals*newTree
180 if stepValue is None:
181 stepValue = subValue
182 else:
183 stepValue += subValue
184 if interrupt and interrupt.isSet():
185 return None
186
187 if debug > 0:
188 print 'Multiplying overall projection by step %d dynamics [%d leaves]' % \
189 (t+1,len(subtree.leaves()))
190 if last:
191 last = subtree * last
192 else:
193 last = subtree
194 if debug > 0:
195 if total:
196 print 'Adding step %d projection [%d leaves] to total [%d leaves]' % \
197 (t+1,len(last.leaves()),len(total.leaves()))
198 if interrupt and interrupt.isSet():
199 return None
200
201 if total is None:
202 total = stepValue
203 else:
204 total += stepValue
205 if debug > 0:
206 print 'Total so far:',len(total.leaves()),'leaves'
207 if recursed:
208
209 break
210 return {'transition':last,
211 'total': total}
212
213
214 -def getDiffTree(entity,action1,action2,sequence,debug=1):
215 """Returns a decision tree representing the state difference
216 between the two specified actions (i.e., S(action1)-S(action2)),
217 subject to the provided turn sequence, following the format for
218 L{getLookaheadTree}."""
219 gValue = entity.policy.getActionTree(action1)
220 if debug > 0:
221 print 'EV[%s] = %s' % (action1,gValue.simpleText())
222 bValue = entity.policy.getActionTree(action2)
223 if debug > 0:
224 print 'EV[%s] = %s' % (action2,bValue.simpleText())
225 return gValue, bValue
226
228 """Returns 1 for any positive value, -1 for any negative value,
229 and 0 for any zero value"""
230 return value.__cmp__(0.)
231
233 """Returns a dictionary of possible singleton goal weight changes
234 that satisfy the constraint that the specified entity prefer the
235 goodAction over the badAction given the provided lookahead turn
236 sequence. If the constraint is satisfied by the current goal
237 weights, then the returned dictionary is empty"""
238
239 goodV,badV = getDiffTree(entity,goodAction,badAction,sequence,
240 max(0,debug-1))
241 diffTree = goodV - badV
242 if debug > 0:
243 print 'Tree:',diffTree.simpleText()
244
245 state = getStateVector(diffTree.getKeys(),entity)
246 if debug > 0:
247 print 'State:',state
248
249 goals = entity.getGoalVector()['state']
250 if debug > 0:
251 print 'Goals:',goals
252 goodTotal = goodV[state]*state
253 print 'EV[%s] =' % (goodAction),goodTotal
254 print 'EV[%s] = %5.3f' % (goodAction,float(goals*goodTotal))
255 badTotal = badV[state]*state
256 print 'EV[%s] =' % (badAction),badTotal
257 print 'EV[%s] = %5.3f' % (badAction,float(goals*badTotal))
258 print 'Delta EV =',goodTotal-badTotal
259 print 'Delta EV = %5.3f' % \
260 (float(goals*goodTotal)-float(goals*badTotal))
261 print 'Difference Tree:',diffTree[state].simpleText()
262 diffVector = diffTree[state]*state
263 if debug > 0:
264 print 'Difference Vector:',diffVector
265 diff = float(goals*diffVector)
266 solutions = {}
267 constraint = {'delta':diff,
268 'plane':diffVector,
269 'solution':solutions,
270 'slope':{},
271 }
272 if diff > 0.:
273 if debug > 0:
274 print 'Correct: Chose %s over %s' % (goodAction,badAction)
275 else:
276 if debug > 0:
277 print 'Incorrect: Chose %s over %s' % (badAction,goodAction)
278 print 'Off by:',abs(diff)
279 for key in goals.keys():
280 try:
281 slope = diffVector[key]
282 except KeyError:
283 slope = 0.
284 constraint['slope'][key] = slope
285 try:
286 delta = -diff/slope-epsilon
287 except ZeroDivisionError:
288 delta = 'NaN'
289 if debug > 0:
290 print key
291 print '\tCurrent Value:',goals[key]
292 print '\tSlope:',slope
293 print '\tdelta:',delta
294
295
296
297
298
299
300
301
302
303
304 return constraint
305
313