1
2
3
4
5
6
7
8
9
10
11 import copy
12 import string
13 from types import *
14
15
16
17
19 s = {}
20 for key in state.keys():
21 if key[0] != '_':
22 s[key] = state[key]
23 return s
24
26 version = 1.0
27 - def __init__(self,states,team,name='Generic COM-MTDP'):
34
35
36 - def reward(self,state,actions,messages):
39
40
42 raise NotImplementedError
43
44
47
48
50 raise NotImplementedError
51
52 - def evaluatePolicy(self,domPolicy,comPolicy,state,horizon=100,debug=0):
53 """Computes expected reward of following the domain- and
54 communication-level policies over the finite horizon
55 specified, from the initial state specified"""
56
57
58 stateList = [{'_world': state,'_prob':1.0,'_value':0.0,
59 '_actions':{},'_epoch':0}]
60 for s in stateList:
61 for agent in self.agents:
62 s[agent.name] = agent.initialStateEstimator()
63 return self.__evaluatePolicy(stateList,domPolicy,comPolicy,
64 horizon,debug)
65
67 results = {'Reward':0.0,
68 'Messages':0.0}
69 if len(stateList) == 0:
70 return results
71 for epoch in range(stateList[0]['_epoch'],horizon):
72 if debug:
73 print '========='
74 print 'Epoch',epoch
75 print '========='
76 self.ProjectObservations(stateList,None,[],epoch,debug)
77 results['Messages'] = results['Messages'] + \
78 self.ExecuteCommunication(stateList,
79 comPolicy,debug)
80 results['Reward'] = results['Reward'] \
81 + self.ExecuteActions(stateList,domPolicy,
82 debug)
83 self.ProjectWorldDynamics(stateList)
84 if debug:
85 print 'EValue:',results['Reward']
86 print '# Msgs:',results['Messages']
87 return results
88
105
107 """Generates all possible observations over all agents"""
108 if len(agents) == 0:
109 return obsList
110 else:
111
112 agent = agents[0]
113 agentObsList = [{'_type':'observation'}]
114 self.__generateAgentObservations(state,actions,agent,self.states.features.keys()[:],agentObsList,debug)
115
116 for obs in obsList[:]:
117 obsList.remove(obs)
118 for agentObs in agentObsList:
119 newObs = copy.copy(obs)
120 newObs[agent.name] = agentObs
121 newObs['_prob'] = newObs['_prob'] * \
122 self.team.individualObs(state,actions,agentObs,agent.name)
123 if newObs['_prob'] > 0.0:
124 obsList.append(newObs)
125 agents.remove(agent)
126 return self.__generateTeamObservations(state,actions,agents,obsList)
127
130 """Generates all possible observations for an individual agent"""
131 if len(features) == 0:
132 if debug:
133 print 'Agent',agent.name,'observes:',observations
134 return observations
135 else:
136 feature = features[0]
137 if agent.observations.has_key(feature):
138 if agent.observations[feature] == 'observable':
139
140
141 for obs in observations:
142 obs[feature] = state[feature]
143 else:
144
145
146
147 for obs in observations[:]:
148 observations.remove(obs)
149 for value in agent.observations[feature]:
150 newObs = copy.copy(obs)
151 newObs[feature] = value
152 observations.append(newObs)
153 else:
154
155 pass
156 features.remove(feature)
157 return self.__generateAgentObservations(state,actions,
158 agent,features,
159 observations)
167
168 - def bestAction(self,agent,history,horizon,policyOthers):
177
178 - def valueHistory(self,agent,history,horizon,action,policyOthers):
179 value = 0.0
180
181 state = self.states.getFirstState()
182 while state:
183
184
185 belief = self.computeBelief(state,agent,history,policyOthers)
186
187
188 for actionStr in self.team.generateAllActions({agent:action}):
189 actionTeam = self.team.decomposeActions(actionStr)
190
191 prob = belief * self.probAction(actionOthers,agent,history,
192 state)
193
194 value = value + prob * self.reward(state,actionTeam)
195
196 if horizon > 1:
197 pass
198 state = self.states.getNextState(state)
199 return value
200
203
204 if debug:
205 print
206 print '--------'
207 print 'Observation phase:'
208 for s in states[:]:
209 if debug:
210 print '--------'
211 print 'Examining state:',strip(s['_world'])
212
213 observations = [{'_prob':1.0}]
214 self.__generateTeamObservations(s['_world'],s['_actions'],
215 self.agents[:],observations,
216 debug)
217 if debug:
218 print 'Possible observations:'
219 for obs in observations:
220 print '\t',strip(obs)
221
222 for belief in beliefs:
223 if belief['_type'] == 'observation' and \
224 belief['_epoch'] == epoch:
225 totalProb = 0.0
226 break
227 else:
228 totalProb = 1.0
229 belief = None
230 if belief:
231 for obs in observations[:]:
232 for feature in obs[agent].keys():
233 if feature[0] != '_' and \
234 obs[agent][feature] != belief[feature]:
235 break
236 else:
237 feature = None
238 if feature:
239
240 observations.remove(obs)
241 else:
242
243 totalProb = totalProb + obs['_prob']
244 if len(observations) == 0:
245
246 for state in states:
247 if not state is s:
248 state['_prob'] = state['_prob'] / (1.0 - s['_prob'])
249 else:
250
251 if s.has_key('_parent'):
252 s['_children'] = []
253 for obs in observations:
254 newState = copy.copy(s)
255 for a in self.agents:
256 newState[a.name] = a.preComStateEstimator(s[a.name],obs[a.name],epoch)
257
258 newState['_prob'] = newState['_prob'] \
259 * obs['_prob'] / totalProb
260 if s.has_key('_parent'):
261 newState['_parent'] = s
262 s['_children'].append(newState)
263 states.append(newState)
264 states.remove(s)
265
315
317
318 if debug:
319 print
320 print '--------'
321 print 'Action phase:'
322 value = 0.0
323 for s in states[:]:
324 if debug:
325 print '--------'
326 print 'Examining state:',strip(s['_world'])
327 print 'Beliefs:',strip(s)
328
329
330 s['_actions'] = {}
331 for agent in self.agents:
332 act = domPolicy[agent.name].execute(s[agent.name])
333 if act:
334 s['_actions'][agent.name] = act
335 if debug:
336 print 'Actions:',s['_actions']
337
338 s['_value'] = s['_value'] \
339 + self.rewardAct(s['_world'],s['_actions'])
340
341 value = value + s['_prob'] * s['_value']
342 if debug:
343 print 'Reward:',s['_value']
344 return value
345
347
348 if debug:
349 print
350 print '--------'
351 print 'World dynamics phase:'
352 for s in states[:]:
353 if debug:
354 print '--------'
355 print 'Examining state:',strip(s['_world'])
356 print 'Beliefs:',strip(s)
357 orig = self.states.state2index(s['_world'])
358 action = self.team.composeActions(s['_actions'])
359 if s.has_key('_parent'):
360 s['_children'] = {}
361 for dest in self.states.children[orig][action]:
362 destState = self.states.index2state(dest)
363 newState = copy.copy(s)
364 newState['_world'] = destState
365 newState['_epoch'] = newState['_epoch'] + 1
366 newState['_prob'] = s['_prob'] \
367 * self.probability(s['_world'],
368 destState,action)
369 if s.has_key('_parent'):
370 newState['_parent'] = s
371 s['_children'][action] = newState
372 try:
373 del newState['_messages']
374 except KeyError:
375 pass
376 states.append(newState)
377 states.remove(s)
378
380 """Generates all possible new belief states for an agent"""
381 raise NotImplementedError
382