1 """PsychSim specification of the multiagent tiger problem
2 """
3 import sys
4 import time
5
6 from teamwork.agent.AgentClasses import classHierarchy
7 from teamwork.math.KeyedMatrix import *
8 from teamwork.math.probability import Distribution
9 from teamwork.math.ProbabilityTree import *
10 from teamwork.action.PsychActions import Action
11 from teamwork.policy.pwlTable import PWLTable,getProbRep
12 from teamwork.policy.pwlPolicy import PWLPolicy
13 from teamwork.policy.ObservationPolicy import ObservationPolicy,solve
14
15 RIGHT = 1.
16 LEFT = 0.
17
18 LISTENCOST = -1.
19 DAMAGE = -50.
20 PRIZE = 20.
21
32
37
38
39 classHierarchy['Tiger'] = {
40 'parent': ['Entity'],
41 'state':{'position': Distribution({RIGHT:0.5,LEFT:0.5})
42 },
43 'depth':0,
44 'actions':{'type':None},
45 'dynamics':{'position':{'OpenRight':resetPosition(),
46 'OpenLeft':resetPosition(),
47 'Listen':stayPosition(),
48 },
49 },
50 }
51
64
69
74
88
89 rewardDict = {'OpenRightOpenRight':openSame(RIGHT),
90 'OpenRightListen':openListen(RIGHT),
91 'OpenRightOpenLeft':openDifferent(),
92 'ListenOpenRight':openListen(RIGHT),
93 'ListenListen':listen(),
94 'ListenOpenLeft':openListen(LEFT),
95 'OpenLeftOpenRight':openDifferent(),
96 'OpenLeftListen':openListen(LEFT),
97 'OpenLeftOpenLeft':openSame(LEFT),
98 }
99 keyList = [ConstantKey(),StateKey({'entity':'Tiger','feature':'position'})]
100 for tree in rewardDict.values():
101 tree.fill(keyList)
102 tree.freeze()
103
104 Omega = {'left':ObservationKey({'type':'heard left'}),
105 'right':ObservationKey({'type':'heard right'}),
106 }
125
154
155 observationDict = {'OpenRightOpenRight':openObs(),
156 'OpenRightListen':openObs(),
157 'OpenRightOpenLeft':openObs(),
158 'ListenOpenRight':openObs(),
159 'ListenListen':listenObs(),
160 'ListenOpenLeft':openObs(),
161 'OpenLeftOpenRight':openObs(),
162 'OpenLeftListen':openObs(),
163 'OpenLeftOpenLeft':openObs(),
164 }
165
166
167 classHierarchy['Dude'] = {
168 'parent': ['Entity'],
169 'goals':[],
170 'beliefs':{'Tiger':{'position':Distribution({RIGHT:0.75,LEFT:0.25}),
171 },
172 },
173 'actions':{'type':'XOR',
174 'key':'type',
175 'values':[{'type':'literal','value':'OpenRight'},
176 {'type':'literal','value':'OpenLeft'},
177 {'type':'literal','value':'Listen'},
178 ],
179 },
180 'observations':{Omega['left']:{},
181 Omega['right']:{},
182 },
183 }
184
196
197 -def exhaustive(scenario,transition,observations,reward,
198 policyHorizon,lookaheadHorizon,timings):
199 """
200 @warning: pretty specific to tiger scenario
201 """
202 policies = []
203 start = time.time()
204 for t in range(policyHorizon+1):
205 policies.append({})
206
207 size = pow(2,t)
208
209 beliefs = {}
210 for agent in filter(lambda a: len(a.actions.getOptions())>1,
211 scenario.members()):
212 beliefs[agent.name] = []
213 best = {'policy':None,'value':None}
214 worlds = state.keys()
215 policies[-1]['Player 1'] = 0
216 while policies[-1]['Player 1'] < pow(2,size):
217 policies[-1]['Player 2'] = policies[-1]['Player 1']
218 while policies[-1]['Player 2'] < pow(2,size):
219 value = EV(scenario,transition,observations,reward,
220 lookaheadHorizon)
221 if best['policy'] is None or value > best['value']:
222 best['policy'] = copy.copy(policies[-1])
223 best['value'] = value
224 policies[-1]['Player 2'] += 1
225 policies[-1]['Player 1'] += 1
226 policies[-1] = best['policy']
227 timings.append(time.time()-start)
228 return policies
229
231 """
232 @type policies: dict
233 @type beliefs: L{Key}[]
234 """
235 horizon = min(len(beliefs),len(agent.policy)+1)
236 try:
237 policy = agent.policy[len(beliefs)]
238 except IndexError:
239 policy = agent.policy[-1]
240 relevant = beliefs
241
242 index = 0
243 for t in range(len(relevant)):
244 if relevant[t] == Omega['left']:
245 index += pow(2,t)
246
247 entry = (policy/pow(2,index))%2
248
249 for option in agent.actions.getOptions():
250 if entry == 0:
251 if option[0]['type'] == 'Listen':
252 return option
253 elif len(beliefs) == 0:
254
255 if option[0]['type'] != 'Listen':
256 return option
257 elif beliefs[-1] == Omega['left']:
258 if option[0]['type'] == 'OpenRight':
259 return option
260 elif beliefs[-1] == Omega['right']:
261 if option[0]['type'] == 'OpenLeft':
262 return option
263 else:
264 raise UserWarning
265
275
287
288 -def mergeAction(policy,value,option,comparisons=None,conditions=[]):
289 if comparisons == None:
290 comparisons = {}
291 if not policy.isLeaf():
292 fTree,tTree = policy.getValue()
293 fTree = mergeAction(fTree,value,option,comparisons,
294 conditions+[(policy.split,False)])
295 tTree = mergeAction(tTree,value,option,comparisons,
296 conditions+[(policy.split,True)])
297 result = KeyedTree()
298 result.branch(policy.split,fTree,tTree,pruneF=False,pruneT=False)
299 return result
300 elif value.isLeaf():
301
302 old = policy.getValue()
303 new = {'value':value.getValue(),'option':option}
304 weights = new['value']-old['value']
305
306 split = [KeyedPlane(weights,0.)]
307 for plane,truth in conditions:
308 split = comparePlaneSets(split,plane,truth,comparisons,
309 negative=False)
310 if isinstance(split,bool):
311 if split:
312
313 return KeyedTree(new)
314 else:
315
316 return KeyedTree(old)
317
318 result = KeyedTree()
319 result.branch(split,old,new,pruneF=False,pruneT=False)
320 return result
321 else:
322 fValue,tValue = value.getValue()
323 split = value.split
324 for plane,truth in conditions:
325 split = comparePlaneSets(split,plane,truth,comparisons,
326 negative=False)
327 if isinstance(split,bool):
328 if split:
329
330 result = mergeAction(policy,tValue,option,comparisons,conditions)
331 return result
332 else:
333
334 result = mergeAction(policy,fValue,option,comparisons,conditivons)
335 return result
336 fTree = mergeAction(policy,fValue,option,comparisons,
337 conditions+[(split,False)])
338 tTree = mergeAction(policy,tValue,option,comparisons,
339 conditions+[(split,True)])
340 result = KeyedTree()
341 result.branch(split,fTree,tTree,pruneF=False,pruneT=False)
342 return result
343
358
373
374 -def EV(scenario,transition,observations,reward,horizon,
375 debug=False,behaviors=None):
376 """Computes the EV of a set of policies through exhaustive simulation"""
377 worlds = scenario.state.keys()
378 current = []
379 node = {'probability':0.5,'state':copy.copy(scenario.state)}
380 node['state'][worlds[0]] = 1.
381 node['state'][worlds[1]] = 0.
382 for agent in filter(lambda a: len(a.actions.getOptions()) > 1,
383 scenario.members()):
384 node['observations %s' % (agent.name)] = []
385 node[agent.name] = agent.beliefs
386 current.append(node)
387 node = {'probability':0.5,'state':copy.copy(scenario.state)}
388 node['state'][worlds[0]] = 0.
389 node['state'][worlds[1]] = 1.
390 for agent in filter(lambda a: len(a.actions.getOptions()) > 1,
391 scenario.members()):
392 node['observations %s' % (agent.name)] = []
393 node[agent.name] = agent.beliefs
394 current.append(node)
395 return simulate(scenario,current,transition,observations,reward,
396 horizon,debug,behaviors)
397
398 -def simulate(scenario,current,transition,observations,reward,horizon,
399 debug=False,behaviors=None):
400 """
401 @param current: list of nodes to expand
402 """
403 if debug:
404 print 'Horizon:',horizon
405 start = time.time()
406 sys.stderr.write('%d to go (%d nodes)\n' % (horizon,len(current)))
407 value = 0.
408 for node in current:
409 if debug:
410 print '%s %f' % (node['state'].getArray(),node['probability'])
411
412 actions = {}
413 for agent in filter(lambda a: len(a.actions.getOptions()) > 1,
414 scenario.members()):
415 if isinstance(agent.policy,int):
416
417 actions[agent.name] = executeObsPolicy(agent,node[agent.name])
418 elif isinstance(agent.policy,PWLPolicy):
419
420 actions[agent.name] = agent.policy.execute(node[agent.name],
421 depth=horizon)[0]
422 else:
423
424 actions[agent.name] = agent.policy.execute(node['observations %s' % (agent.name)])
425 if debug:
426 print ','.join(map(str,node['observations %s' % (agent.name)])),
427 if node.has_key(agent.name):
428 print node[agent.name].getArray(),
429 print '-> %s' % (str(actions[agent.name]))
430 node['action'] = actions
431 if isinstance(behaviors,dict):
432 for agent in behaviors.keys():
433 behaviors[agent].append((node,actions[agent]))
434 node['key'] = ' '.join(map(str,actions.values()))
435
436 r = reward[node['key']]*node['state']
437 if debug:
438 print node['key']
439 print node['state'].getArray()
440 print 'R =',r
441 value += node['probability']*r
442 if horizon == 0:
443 return value
444
445 projection = []
446 for node in current:
447
448 dist = transition[node['key']]*node['state']
449 worlds = dist.keys()
450 combinations = []
451 for world in range(len(dist)):
452 new = copy.copy(node['state'])
453 new[worlds[world]] = 1.
454 for other in range(len(dist)):
455 if other != world:
456 new[worlds[other]] = 0.
457 prob = node['probability']*dist[worlds[world]]
458 if prob > 0.:
459 combo = {'probability':prob,'state':new}
460 for agent in filter(lambda a: len(a.actions.getOptions())>1,
461 scenario.members()):
462 key = 'observations %s' % (agent.name)
463 combo[key] = node[key][:]
464 combinations.append(combo)
465 for agent in filter(lambda a: len(a.actions.getOptions())>1,
466 scenario.members()):
467 next = []
468 for old in combinations:
469 for omega in Omega.values():
470
471 action = node['action'][agent.name]
472 prob = observations[node['key']][omega]*old['state']
473 new = copy.deepcopy(old)
474 new['observations %s' % (agent.name)].append(omega)
475 new['probability'] *= prob
476 if new['probability'] > 0.:
477 if isinstance(agent.policy,PWLPolicy):
478 belief = agent.stateEstimator(beliefs=node[agent.name],
479 actions=action,
480 observation=omega)
481 new[agent.name] = belief
482 next.append(new)
483 combinations = next
484 projection += combinations
485
486 index = 1
487 while index < len(projection):
488 node = projection[index]
489 for other in projection[:index]:
490 for key in filter(lambda k: k != 'probability',node.keys()):
491 if key[:12] == 'observations':
492 if node[key] != other[key]:
493
494 break
495 elif sum(map(abs,(node[key] - other[key]).getArray())) > epsilon:
496
497 break
498 else:
499
500 other['probability'] += node['probability']
501 del projection[index]
502 break
503 else:
504
505 index += 1
506 total = 0.
507 for node in projection:
508 total += node['probability']
509 assert abs(total-1.) < epsilon, 'Prob = %f' % (total)
510 if debug:
511 sys.stderr.write('\t%d sec\n%f\n' % (time.time()-start,value))
512 return value + simulate(scenario,projection,transition,observations,
513 reward,horizon-1,debug,behaviors)
514
515
517 import bz2
518 import copy
519 import random
520
521 from teamwork.multiagent.GenericSociety import GenericSociety
522 society = GenericSociety()
523 try:
524 f = bz2.BZ2File('tiger.soc','r')
525 data = f.read()
526 f.close()
527 from xml.dom import minidom
528 doc = minidom.parseString(data)
529 society.parse(doc.documentElement)
530
531 except IOError:
532 society.importDict(classHierarchy)
533 society.save('tiger.soc')
534 print 'Saved generic models'
535
536 from teamwork.agent.Entities import PsychEntity
537 from teamwork.multiagent.PsychAgents import PsychAgents
538
539 agents = []
540 agents.append(society.instantiate('Tiger','Tiger',PsychEntity))
541 agents.append(society.instantiate('Dude','Player 1',PsychEntity))
542 agents.append(society.instantiate('Dude','Player 2',PsychEntity))
543 full = PsychAgents(agents)
544 full.applyDefaults()
545
546
547 from teamwork.multiagent.pwlSimulation import PWLSimulation
548
549 scenario = PWLSimulation(full)
550
551
552 keyList = scenario.state.domain()[0].keys()
553 keyList.sort()
554 state = scenario.getState()
555 for tree in rewardDict.values():
556 tree.fill(keyList)
557 tree.freeze()
558 for key,tree in observationDict.items():
559 for plane in tree.branches().values():
560 if not isinstance(plane,Distribution):
561 plane.weights.fill(keyList)
562 for matrix in tree.leaves():
563 matrix.addColumns(keyList)
564 tree.freeze()
565
566 actions = {}
567 for act1 in scenario['Player 1'].actions.getOptions():
568 if act1[0]['type'] == 'Listen':
569 LISTEN1 = act1
570 elif act1[0]['type'] == 'OpenRight':
571 RIGHT1 = act1
572 elif act1[0]['type'] == 'OpenLeft':
573 LEFT1 = act1
574 else:
575 raise UserWarning,'Unknown action: %s' % (act1[0]['type'])
576 for act2 in scenario['Player 2'].actions.getOptions():
577 if act2[0]['type'] == 'Listen':
578 LISTEN2 = act2
579 elif act2[0]['type'] == 'OpenRight':
580 RIGHT2 = act2
581 elif act2[0]['type'] == 'OpenLeft':
582 LEFT2 = act2
583 else:
584 raise UserWarning,'Unknown action: %s' % (act2[0]['type'])
585 for act1 in scenario['Player 1'].actions.getOptions():
586 actions['Player 1'] = act1
587 for act2 in scenario['Player 2'].actions.getOptions():
588 actions['Player 2'] = act2
589 actionKey = ' '.join(map(str,actions.values()))
590 if act1[0]['type'] == 'Listen' and act2[0]['type'] == 'Listen':
591 dynamics = full.getDynamics({'Player 1':act1})
592 elif act1[0]['type'] != 'Listen':
593 dynamics = full.getDynamics({'Player 1':act1})
594 else:
595 dynamics = full.getDynamics({'Player 2':act2})
596 tree = dynamics['state'].getTree()
597 tree.unfreeze()
598 tree.fill(keyList)
599 tree.freeze()
600 dynamics['state'].args['tree'] = tree
601 full.dynamics[actionKey] = dynamics
602
603 start = time.time()
604
605 worlds,lookup = full.generateWorlds()
606 for name in ['Player 1','Player 2']:
607 scenario[name].beliefs = KeyedVector()
608 for key,world in worlds.items():
609 scenario[name].beliefs[key] = scenario.state[world]
610 state = KeyedVector()
611 for key,world in worlds.items():
612 state[key] = scenario.state[world]
613 scenario.state = state
614 transition = full.getDynamicsMatrix(worlds,lookup)
615 reward = {}
616 observations = {}
617 for actions in full.generateActions():
618 actionKey = ' '.join(map(str,actions.values()))
619
620 tree = rewardDict[actions.values()[0][0]['type']+\
621 actions.values()[1][0]['type']]
622 vector = KeyedVector()
623 for key,world in worlds.items():
624 vector[key] = tree[world]
625 vector.freeze()
626 reward[actionKey] = vector
627
628 tree = observationDict[actions.values()[0][0]['type']+\
629 actions.values()[1][0]['type']]
630 matrix = KeyedMatrix()
631 for colKey,world in worlds.items():
632 new = tree[world]*world
633 for vector,prob in new.items():
634 for omega in Omega.values():
635 if vector[omega] > 0.5:
636 matrix.set(omega,colKey,prob)
637 matrix.freeze()
638 observations[actionKey] = matrix
639 print observations
640 blah
641 return scenario,full,transition,reward,observations
642
643 if __name__ == '__main__':
644 scenario,full,transition,reward,observations = setupTigers()
645 state = scenario.state
646 timings = []
647
648 best = {'key':None,'value':None}
649 for key,vector in reward.items():
650 value = vector*state
651 if best['key'] is None or value > best['value']:
652 best['key'] = key
653 best['value'] = value
654 for actions in full.generateActions():
655 if best['key'] == ' '.join(map(str,actions.values())):
656 best['action'] = actions
657 break
658 else:
659 raise UserWarning,'Unknown joint action: %s' % (best['key'])
660
661 for name,option in best['action'].items():
662 scenario[name].policy = PWLPolicy(scenario[name])
663 table = PWLTable()
664 table.rules = {0:option}
665 table.values = {0:{}}
666 actions = copy.copy(best['action'])
667 for alternative in scenario[name].actions.getOptions():
668 actions[name] = alternative
669 actionKey = ' '.join(map(str,actions.values()))
670 value = reward[actionKey]
671 table.values[0][str(alternative)] = value
672 scenario[name].policy.tables.append([table])
673
674 agent = scenario['Player 1']
675 other = scenario['Player 2']
676 depth = 0
677 maxDepth = 1
678 horizon = 5
679 evalHorizon = 4
680 debug = False
681 while depth < maxDepth:
682
683 R = other.policy.getTable()
684 for index in range(len(R)):
685 yrAction = other.policy.getTable().rules[index]
686 R.values[index].clear()
687 for myAction in agent.actions.getOptions():
688 actions = {agent.name:myAction,
689 other.name:yrAction}
690 actionKey = ' '.join(map(str,actions.values()))
691 R.values[index][str(myAction)] = copy.copy(reward[actionKey])
692
693 agent.setEstimator(transition,observations)
694 other.setEstimator(transition,observations)
695 for t in range(horizon):
696 evalHorizon = t
697 if debug:
698 print
699 print 'Horizon:',t
700 start = time.time()
701 agent.policy.project(R,depth=depth+1,debug=False)
702 timings.append(time.time()-start)
703
704 behaviors = None
705 if True:
706
707 table = agent.policy.tables[depth+1][t]
708 rules = table.rules.keys()
709 rules.sort()
710 last = 'OpenLeft'
711 for rule in rules:
712 factors = table.index2factored(rule)
713 action = table.rules[rule][0]['type']
714 if action != last:
715 try:
716 attr = factors.index(0) - 1
717 except ValueError:
718 attr = len(factors) - 1
719 if action == 'Listen':
720 if attr < 0:
721
722 print action
723 break
724 else:
725 print getProbRep(table.attributes[attr][0],0),last
726 elif action == 'OpenRight':
727 print getProbRep(table.attributes[attr][0],1),action
728 break
729 last = action
730 printObsPolicy(agent,horizon)
731
732 total = EV(scenario,transition,observations,reward,evalHorizon,
733 False,behaviors)
734 valueAsym = total/float(evalHorizon+1)
735 if debug:
736 print 'EV =',total/float(evalHorizon+1)
737
738 old = other.policy
739 other.policy = PWLPolicy(other)
740 for policy in agent.policy.tables:
741 tableList = []
742 for table in policy:
743 new = PWLTable()
744 new.attributes = table.attributes[:]
745 new.rules.clear()
746 new.rules.update(table.rules)
747 for rule,RHS in new.rules.items():
748 for option in other.actions.getOptions():
749 if option[0]['type'] == RHS[0]['type']:
750 break
751 else:
752 raise UserWarning
753 new.rules[rule] = option
754 tableList.append(new)
755 other.policy.tables.append(tableList)
756 total = EV(scenario,transition,observations,reward,evalHorizon,
757 False,behaviors)
758 valueSym = total/float(evalHorizon+1)
759 if debug:
760 print 'EV =',valueSym
761 other.policy = old
762 print '%d,%f,%f,%f' % (t,valueAsym,valueSym,timings[-1])
763 sys.stdout.flush()
764 if behaviors:
765 unique = []
766 for node,action in behaviors[agent.name]:
767 for others in unique:
768 if node['state'] == others[0]['state'] and \
769 node[agent.name] == others[0][agent.name]:
770 others[0]['probability'] += node['probability']
771 break
772 else:
773 unique.append((node,action))
774 total = 0.
775 for node,action in unique:
776 prob = node['probability']/float(horizon+1)
777 total += prob
778 if action[0]['type'] != 'Listen':
779 print 'Real:',node['state']
780 print 'Belief:',node[agent.name]
781 print 'Probability:',prob
782 print action
783 assert abs(total-1.) < epsilon
784 depth += 1
785 if debug:
786 print '\nTimes:',timings
787