1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28 import copy
29 import string
30 from types import *
31 from teamwork.policy.generic import Policy
32
34 if horizon == 0:
35 return policySpace
36 else:
37 for policy in policySpace[:]:
38
39 policySpace.remove(policy)
40 subspace = [policy]
41 policyObj = GenericPolicy(policy)
42 if debug:
43 print 'Expanding policy:',policyObj
44 leaves = policyObj.getNodes()
45 while len(leaves) > 0:
46
47 entry = leaves.pop()
48 if debug:
49 print '\tExpanding leaf:',entry
50
51 table = []
52 for omega in observations:
53 tableEntry = {'key':omega}
54 table.append(tableEntry)
55
56 newTables = [[]]
57 generateActionCombos(table,actions,newTables)
58 if debug:
59 print '\t\tNew actions:'
60
61 for partialPolicy in subspace[:]:
62
63
64 subspace.remove(partialPolicy)
65 for table in newTables:
66
67 if debug:
68 print '\t\t\tTable:',table
69 newPolicy = copy.deepcopy(partialPolicy)
70 for currentEntry in \
71 GenericPolicy(newPolicy).getNodes():
72 if currentEntry == entry:
73 break
74 else:
75
76 print 'Unable to find entry!!!'
77 currentEntry['table'] = table
78
79
80 subspace.append(newPolicy)
81
82
83 for partialPolicy in subspace:
84 policySpace.append(partialPolicy)
85 if debug:
86 print '\t\t\tNew Policy:',GenericPolicy(partialPolicy)
87 print
88 del subspace
89 if horizon == 1:
90 return policySpace
91
92
93 return generatePolicies(policySpace,actions,observations,
94 horizon-1,debug)
95
97 while len(table) > 0:
98 entry = table.pop()
99 for partialTable in result[:]:
100 result.remove(partialTable)
101 for action in actions:
102 entry['action'] = action
103 newTable = copy.copy(partialTable)
104 newTable.append(copy.copy(entry))
105 result.append(newTable)
106 return result
107
109 """generic class for a communication policy, where there is a
110 single possible message and it refers to the achievement of some
111 joint goal"""
112
113 - def __init__(self,jpg,achievedMsg,type='joint intentions'):
114 """jpg: a dictionary of conditions under which the JPG is
115 achieved. The keys are the relevant feature names; the
116 values are a list of relevant feature values."""
117 Policy.__init__(self,[achievedMsg,None],type)
118 self.jpg = jpg
119 self.trueMsg = achievedMsg
120
121 - def execute(self,state,choices=[],debug=0):
122 for feature in self.jpg.keys():
123 if not state[0][feature] in self.jpg[feature]:
124 break
125 else:
126
127
128 for belief in state:
129 if belief['_type'] == 'message':
130 for agent in belief.keys():
131 if belief[agent] == self.trueMsg:
132
133 return None
134 else:
135
136
137 return self.trueMsg
138
139 return None
140
142 """subclass of joint intentions policies, but following the STEAM
143 algorithm for decision-theoretic selectivity"""
144 - def __init__(self,jpg,achievedMsg,gamma,costMiscoord,costComm):
145 JIPolicy.__init__(self,jpg,achievedMsg,'STEAM')
146
147 if gamma == 'high':
148 if costMiscoord == 'high':
149 costNonComm = 'high'
150 elif costMiscoord == 'medium':
151 costNonComm = 'high'
152 else:
153 costNonComm = 'medium'
154 elif gamma == 'low':
155 if costMiscoord == 'high':
156 costNonComm = 'medium'
157 elif costMiscoord == 'medium':
158 costNonComm = 'low'
159 else:
160 costNonComm = 'low'
161
162
163 if costNonComm == 'high':
164 self.communicate = 1
165 elif costNonComm == 'medium':
166 if costComm == 'low':
167 self.communicate = 1
168 else:
169 self.communicate = None
170 else:
171 self.communicate = None
172
173 - def execute(self,state,choices=[],debug=0):
178
180 """generic policy class that never specifies communication"""
183
184 - def execute(self,state,choices=[],debug=0):
186
188 """a generic communication policy class that searches a space of
189 candidate policies and finds the best one for a given COM-MTDP.
190 It allows one to specify a space of possible policies, and have
191 the constructor return the optimal policy from that space."""
192 - def __init__(self,com_mtdp,states,agent,policySpace,otherAgentsComPolicy,
193 domPolicy,horizon,debug=0):
217
218 - def execute(self,state,choices=[],debug=0):
220
222 """subclass of joint intentions policies that performs a brute
223 force search to determine the best policy of communication, in
224 reference to a single JPG"""
225 - def __init__(self,com_mtdp,states,domPolicy,agent,horizon,
226 jpg,achievedMsg,debug=0):
227 JIPolicy.__init__(self,jpg,achievedMsg,'globally optimal')
228 self.agent = agent
229 self.domPolicy = domPolicy
230 self.mtdp = com_mtdp
231 comPolicy = {self.agent:SingleMsgPolicy(self)}
232 for agent in self.mtdp.agents:
233 if agent.name != self.agent:
234 comPolicy[agent.name] = SilentPolicy()
235
236
237 stateList = []
238 for state in states:
239 stateList.append({'_world': state,'_parent':None,
240 '_prob':1.0/float(len(states)),'_epoch':0,
241 '_value':0.0,'_actions':{},'_messages':{}})
242 for s in stateList:
243 for agent in self.mtdp.agents:
244 s[agent.name] = agent.initialStateEstimator()
245 for epoch in range(horizon):
246 if debug:
247 print '========='
248 print 'Epoch',epoch
249 if debug:
250 print '========='
251 print '# States:',len(stateList)
252
253 self.mtdp.ProjectObservations(stateList,None,[],epoch,debug)
254 self.mtdp.ExecuteCommunication(stateList,comPolicy,debug)
255 self.mtdp.ExecuteActions(stateList,domPolicy,debug)
256 self.mtdp.ProjectWorldDynamics(stateList)
257
258
259 if debug:
260 print
261 print '-------'
262 print 'Policy selection phase'
263 if debug:
264 print '-------'
265 print 'Examining Leaf Nodes:',len(stateList)
266 values = []
267 leafNodes = stateList
268 stateList = []
269 for index in range(len(leafNodes)):
270 print index
271 s = leafNodes[index]
272 value = s['_value'] * s['_prob']
273
274
275 key = 'Communicate'
276 if not s['_parent']['_messages'].has_key(self.agent):
277 key = 'No ' + key
278 if debug:
279 print '-------'
280 print 'Examining state:',strip(s['_world'])
281 print 'Epoch:',s['_epoch']
282 print 'Latest belief update:',s[self.agent][0]
283 print 'Message choice:',key
284 print 'Value:',value
285
286
287 parent = s['_parent']['_parent']
288 belief = parent[self.agent]
289 if not parent.has_key('_msgValues'):
290 parent['_msgValues'] = {'Communicate':0.0,
291 'No Communicate':0.0}
292 parent['_msgValues'][key] = parent['_msgValues'][key] \
293 + value
294 if debug:
295 print 'Parent:',strip(s['_world'])
296
297 for entry in values:
298 if entry['Beliefs'] == belief:
299 if debug:
300 print 'Updating value...'
301 break
302 else:
303 if debug:
304 print 'Creating value...'
305 entry = {'Beliefs':belief,
306 'Communicate':0.0,
307 'No Communicate':0.0}
308 values.append(entry)
309 entry[key] = entry[key] + value
310 if debug:
311 print 'New total value:',entry[key]
312
313 if not parent in leafNodes:
314 leafNodes.append(parent)
315 del s
316 self.policy = values
317 for index in range(length(values)):
318 entry = {'Beliefs':values[index]['Beliefs']}
319 entry['Policy'] = values[index]['Communicate'] \
320 > values[index]['No Communicate']
321 del values[index]
322 self.policy.append(entry)
323 debug = 1
324 while len(stateList) > 0:
325 if debug:
326 print '-------'
327 print 'Starting a new ply...'
328
329 for s in stateList[:]:
330 if debug:
331 print '-------'
332 print 'Examining state:',strip(s['_world'])
333 print 'Epoch:',s['_epoch']
334 print 'Latest belief update:',s[self.agent][0]
335 print 'Values:',s['_msgValues']
336
337 value = s['_value']
338
339
340 key = self.policy[`s[self.agent]`]
341 if debug:
342 print 'Preference:',key
343 value = value + s['_msgValues'][key]
344 value = value * s['_prob']
345 stateList.remove(s)
346
347 s = s['_parent']
348 if s['_parent']:
349
350
351 key = 'Communicate'
352 if not s['_parent']['_messages'].has_key(self.agent):
353 key = 'No ' + key
354 if debug:
355 print 'Message choice:',key
356 print 'Value:',value
357
358
359 parent = s['_parent']['_parent']
360 belief = parent[self.agent]
361 if not parent.has_key('_msgValues'):
362 parent['_msgValues'] = {'Communicate':0.0,
363 'No Communicate':0.0}
364 parent['_msgValues'][key] = parent['_msgValues'][key] \
365 + value
366 if debug:
367 print 'Parent:',strip(s['_world'])
368
369 for entry in values:
370 if entry['Beliefs'] == belief:
371 if debug:
372 print 'Updating value...'
373 break
374 else:
375 if debug:
376 print 'Creating value...'
377 entry = {'Beliefs':belief,
378 'Communicate':0.0,
379 'No Communicate':0.0}
380 values.append(entry)
381 entry[key] = entry[key] + value
382 if debug:
383 print 'New total value:',entry[key]
384
385 if not parent in stateList[:]:
386 stateList.append(parent)
387 del s
388 for index in range(length(values)):
389 entry = {'Beliefs':values[index]['Beliefs']}
390 entry['Policy'] = values[index]['Communicate'] \
391 > values[index]['No Communicate']
392 del values[index]
393 self.policy.append(entry)
394 if debug:
395 print
396 print '-------'
397 print 'Final Policy:'
398 print 'Communicate in the following belief states:'
399 for key in policy.keys():
400 if policy[key]:
401 print '-------'
402 print key
403 print hello
404
405 - def execute(self,state,choices=[],debug=0):
407
409 """subclass of joint intentions policies that generates all
410 possible communication behaviors for an agent with a single joint
411 commitment (this class is no longer very useful)"""
415
416 - def execute(self,state,choices=[],debug=0):
417 """Generates possible messages for this agent"""
418 for belief in state:
419 if belief['_type'] == 'message':
420 if belief.has_key(self.agent):
421 return [None]
422 else:
423 return [None,self.trueMsg]
424
425
427 """subclass of joint intentions policies that generates locally
428 optimal decisions with respect to a single JPG"""
429 - def __init__(self,com_mtdp,states,comPolicy,domPolicy,agent,horizon,
430 jpg,achievedMsg,debug=0):
439
440 - def execute(self,state,choices=[],debug=-1):
441 if debug < 0:
442 debug = self.debug
443 if not JIPolicy.execute(self,state,choices):
444
445 return None
446
447 currentEpoch = state[0]['_epoch']
448 if debug:
449 print 'Executing at time:',currentEpoch
450 print 'Beliefs:',state
451
452
453 stateList = []
454 for s in self.initial:
455 stateList.append({'_world': s,'_epoch':0,
456 '_prob':1.0/float(len(self.initial)),
457 '_value':0.0,'_actions':{}})
458 for s in stateList:
459 for agent in self.mtdp.agents:
460 s[agent.name] = agent.initialStateEstimator()
461 self.__generateConsistentStates(state,stateList,currentEpoch,debug)
462 if debug:
463 print 'Consistent states:',len(stateList)
464
465 if debug:
466 print
467 print '--------'
468 print 'Communication phase:'
469 value = {'Communicate': 0.0, 'No Communicate':0.0}
470 states = {'Communicate':[],'No Communicate':[]}
471 for s in stateList[:]:
472 if debug:
473 print '--------'
474 print 'Examining state:',strip(s['_world'])
475 print 'Beliefs:',strip(s[self.agent])
476
477 messages = self.mtdp.__generateTeamMessages(s,self.mtdp.agents[:],
478 [{}])
479 for msg in messages:
480 newState = copy.copy(s)
481 msg['_type'] = 'message'
482 if debug:
483 print 'Messages:',msg
484
485 for agent in self.mtdp.agents:
486 newState[agent.name] = agent.postComStateEstimator(s[agent.name],msg)
487
488 newState['_value'] = self.mtdp.rewardCom(s['_world'],msg)
489 newState['_parent'] = s
490 if msg.has_key(self.agent):
491 if msg[self.agent] == self.trueMsg:
492 states['Communicate'].append(newState)
493 else:
494 states['No Communicate'].append(newState)
495 stateList.remove(s)
496
497 for choice in ['Communicate','No Communicate']:
498 stateList = states[choice]
499 if debug:
500 print '+++++++'
501 print 'Evaluating Policy:',choice
502 print '# States:',len(stateList)
503 print '+++++++'
504
505 value[choice] = value[choice] + \
506 self.mtdp.ExecuteActions(stateList,self.domPolicy,
507 debug)
508 self.mtdp.ProjectWorldDynamics(stateList)
509 result = self.mtdp._evaluatePolicy(stateList,self.domPolicy,
510 self.comPolicy,
511 self.horizon,debug)
512 value[choice] = value[choice] + result['Reward']
513
514
515 if debug:
516 print
517 print '-------'
518 print 'Policy selection phase'
519 print '-------'
520 for key in value.keys():
521 print key+':',value[key]
522 if value['Communicate'] > value['No Communicate']:
523
524 return JIPolicy.execute(self,state,choices)
525 else:
526
527 return None
528
530 for epoch in range(currentTime+1):
531 if debug:
532 print '========='
533 print 'Epoch',epoch
534 print '========='
535 self.mtdp.ProjectObservations(stateList,self.agent,beliefs,
536 epoch,debug)
537 if epoch == currentTime:
538 break
539 comPolicy = {}
540 for agent in self.mtdp.agents:
541 comPolicy[agent.name] = RepeatMsgs(agent.name,beliefs,epoch)
542 self.mtdp.ExecuteCommunication(stateList,comPolicy,debug)
543 self.mtdp.ExecuteActions(stateList,self.domPolicy,debug)
544 self.mtdp.ProjectWorldDynamics(stateList)
545
547 """policy subclass, useful as a helper to the
548 LocallyOptimalJIPolicy class"""
549 - def __init__(self,agent,beliefs,epoch):
550 self.msg = None
551
552 for msg in beliefs:
553 if msg['_type'] == 'message' and \
554 msg['_epoch'] == epoch:
555 if msg.has_key(agent):
556 self.msg = msg[agent]
557 break
558
559 - def execute(self,state,choices=[],debug=0):
561
562
563 if __name__=='__main__':
564 initialSpace = [{}]
565
566
567
568 space = generatePolicies(initialSpace,['left','right'],
569 [{'Tiger':'l'}],1)
570 space = generatePolicies(space,['left','right'],
571 [{'Tiger':'l'},{'Tiger':'r'}],2)
572 for policy in space:
573 policy = GenericPolicy(policy)
574 print policy
575 print '--------------------------'
576 print '# Policies:',len(space)
577