1 import copy
2
3 import random
4 import sys
5 import time
6 from xml.dom.minidom import *
7 from LookaheadPolicy import LookaheadPolicy
8 from teamwork.action.PsychActions import Action
9 from teamwork.math.Keys import *
10 from teamwork.math.KeyedVector import *
11 from teamwork.math.KeyedMatrix import IdentityMatrix
12 from teamwork.math.KeyedTree import KeyedPlane
13 from teamwork.math.probability import Distribution
14 from teamwork.math.ProbabilityTree import ProbabilityTree
15 from teamwork.dynamics.pwlDynamics import PWLDynamics
16 from teamwork.math.matrices import epsilon
17 from pwlTable import PWLTable
18
20 """Super-nifty class for representing policies as tables and using policy iteration to optimize them.
21 @cvar seedMethod: the method to use to generate the initial RHS values to seed policy iteration. There are currently two types supported:
22 - random: the RHS values are randomly generated
23 - greedy: the initial RHS value provides the best immediate expected reward (i.e., over a one-step horizon)
24 The default is I{greedy}.
25 @type seedMethod: str
26 @ivar rules: the list of RHS actions
27 @type rules: L{Action}[][]
28 """
29 seedMethod = 'greedy'
30
31 - def __init__(self,entity,actions=[],horizon=1):
41
42 - def execute(self,state,observations={},history=None,choices=[],
43 index=None,debug=False,explain=False,entities={},cache={}):
44 """Applies this policy to the given state and observation history
45 @param state: the current state
46 @type state: L{KeyedVector}
47 @param observations: the current observation history
48 @type observations: dict:strS{->}L{Action<teamwork.action.PsychActions.Action>}[]
49 @param history: dictionary of actions that have been performed so far
50 @type history: dict
51 @param entities: a dictionary of entities to be used as a value tree cache
52 @param cache: values computed so far
53 @return: the corresponding action in the table
54 @rtype: L{Action<teamwork.action.PsychActions.Action>}[]
55 """
56 if history is None:
57 history = {}
58 if index is None:
59 if len(self.rules) <= 1:
60 index = 0
61 elif isinstance(state,KeyedVector):
62 index = self.index(state,observations)
63 else:
64 index = self.index(state['state'].expectation(),observations)
65 if debug is True:
66 print '\t\t',index
67 if len(choices) == 0:
68 choices = self.entity.actions.getOptions()
69 try:
70 rhs = self.rules[index]
71 except KeyError:
72
73 return LookaheadPolicy.execute(self,state,choices,
74 explain=explain)
75 if rhs is None:
76
77 if isinstance(state,KeyedVector):
78 self.rules[index] = [self.default(choices,state,observations)]
79 else:
80 self.rules[index] = [self.default(choices,
81 state['state'].expectation(),
82 observations)]
83 rhs = self.rules[index]
84 if not isinstance(rhs,dict) and not isinstance(rhs,list):
85
86 rhs = [rhs]
87 if explain:
88 exp = Document()
89 root = exp.createElement('explanation')
90 exp.appendChild(root)
91 field = exp.createElement('alternatives')
92 root.appendChild(field)
93 else:
94 exp = None
95 if isinstance(rhs,list):
96 labels = map(str,choices)
97 for index in range(len(rhs)):
98 option = rhs[index]
99 if str(option) in labels:
100
101 for action in option:
102 if not action['repeatable']:
103
104 if history.has_key(str(action)):
105
106 if explain:
107 node = exp.createElement('alternative')
108 node.setAttribute('reason','repeated')
109 node.setAttribute('rank',str(index))
110 for action in option:
111 node.appendChild(action.__xml__().documentElement)
112 field.appendChild(node)
113 break
114 else:
115
116 break
117 else:
118
119 if explain:
120 node = exp.createElement('alternative')
121 node.setAttribute('reason','ineligible')
122 node.setAttribute('rank',str(index))
123 for action in option:
124 node.appendChild(action.__xml__().documentElement)
125 field.appendChild(node)
126 else:
127
128 return LookaheadPolicy.execute(self,state,choices,explain=explain)
129 if explain:
130
131 for index in range(index+1,len(rhs)):
132 alternative = rhs[index]
133 node = exp.createElement('alternative')
134 node.setAttribute('reason','worse')
135 node.setAttribute('rank',str(index))
136 for action in alternative:
137 node.appendChild(action.__xml__().documentElement)
138 field.appendChild(node)
139
140 policies = self.getPolicies()
141 if len(policies) > 0:
142 node = exp.createElement('expectations')
143 value,sequence = self.evaluate(policies,state,observations,
144 history,details=True)
145 for step in sequence:
146 subNode = exp.createElement('turn')
147 node.appendChild(subNode)
148 subNode.setAttribute('agent',step['actor'])
149 for action in step['decision']:
150 subNode.appendChild(action.__xml__().documentElement)
151 root.appendChild(node)
152 elif isinstance(rhs,dict):
153
154 goals = self.entity.goals.expectation()
155 eState = state['state'].expectation()
156 best = {'option':None,'value':None}
157 for option in choices:
158 try:
159 value = rhs[option[0]]
160 except KeyError:
161 value = None
162 if explain:
163 node = exp.createElement('alternative')
164 node.appendChild(option[0].__xml__().documentElement)
165 if value:
166 total = 0.
167 for key,dynamics in value.items():
168 delta = None
169 if isinstance(dynamics,dict):
170
171 entry = dynamics['entity']+str(dynamics['action'])+str(dynamics['key'])
172 try:
173 delta = cache[entry]
174 except KeyError:
175 entity = entities[dynamics['entity']]
176 dynamics = entity.policy.rules[index][dynamics['action']][dynamics['key']]
177 else:
178 entry = self.entity.name+str(option[0])+str(key)
179 if delta is None:
180
181 matrix = dynamics.apply(state['state']).expectation()
182 delta = goals[key]*(matrix[key]*eState)
183 cache[entry] = delta
184 total += delta
185 else:
186 total = 0.
187 if best['option'] is None or total > best['value']:
188 best['option'] = option
189 best['value'] = total
190 option = best['option']
191 return option,exp
192
193 - def default(self,choices,state,observations):
194 """Generates a default RHS, presumably with minimal effort. The exact method is determined by the L{seedMethod} class attribute.
195 @param state: the current state
196 @type state: L{KeyedVector}
197 @param observations: the current observation history
198 @type observations: dict:strS{->}L{Action<teamwork.action.PsychActions.Action>}[]
199 @return: the corresponding action in the table
200 @rtype: L{Action<teamwork.action.PsychActions.Action>}[]
201 """
202 if self.seedMethod == 'greedy':
203 return self._defaultGreedy(choices,state,observations)
204 elif self.seedMethod == 'random':
205 return self._defaultRandom(choices,state,observations)
206 else:
207 raise NotImplementedError,'Unable to use %s seeding' \
208 % (self.seedMethod)
209
211 """Default RHS is a random choice
212 """
213 return random.choice(choices)
214
216 """Default RHS is the optimal action over a one-step time horizon
217 """
218 best = None
219 for action in choices:
220 value = self.expectedValue(state,action)
221 if not best or value > best['value']:
222 best = {'action':action,'value':value}
223 return best['action']
224
242
244 """
245 @return: the turn sequence used for the forward projection
246 @rtype: str[]
247 """
248 try:
249 others = self.entity.entities.getSequence()
250 except AttributeError:
251
252 return []
253 if len(others) > 0 and \
254 len(self.lookahead) != self.horizon:
255 self.lookahead = []
256 while len(self.lookahead) < self.horizon:
257 self.lookahead += others
258 self.lookahead = self.lookahead[:self.horizon]
259 while self.lookahead[0] != self.entity.name:
260 last = self.lookahead.pop()
261 self.lookahead.insert(0,last)
262 return self.lookahead
263
265 """
266 @return: a dictionary of the policies of all of the agents in this entity's lookahed
267 @rtype: strS{->}L{PolicyTable}
268 """
269 policies = {}
270 order = self.getLookahead()[:]
271
272 while len(order) > 0:
273 name = order.pop()
274 if name == self.entity.name:
275 policies[name] = self
276 else:
277 agent = self.entity.getEntity(name)
278 if not policies.has_key(name):
279 policies[name] = agent.policy
280
281 for other in policies[name].getLookahead():
282 if not policies.has_key(other) and not other in order:
283 order.append(other)
284 for policy in policies.values():
285 policy.initialize()
286 return policies
287
288 - def solve(self,horizon=None,choices=None,debug=False,policies=None,
289 interrupt=None,search='exhaustive',progress=None):
290 if policies is None:
291 policies = self.getPolicies()
292 for policy in policies.values():
293 assert policy.rules
294 if search == 'exhaustive':
295 try:
296 value = self.iterate(choices,policies,state,
297 True,False,interrupt)
298 except OverflowError:
299 return False
300 if value is False:
301 return value
302 else:
303 return True
304 elif search == 'greedy':
305 changes = 1
306 candidates = []
307 while changes > 0:
308 changes = 0
309
310 if len(candidates) == 0:
311 candidates = policies.keys()
312 name = random.choice(candidates)
313 candidates.remove(name)
314
315
316 for index in range(1000):
317 if interrupt and interrupt.isSet():
318 return False
319 if progress: progress()
320 if policies[name].perturb(policies,interrupt,debug):
321 changes += 1
322 return True
323 elif search == 'abstract':
324 return self.abstractSolve(policies,interrupt,progress)
325 elif search == 'parallel':
326 return self.parallelSolve(policies,interrupt,progress)
327 else:
328 raise NameError,'Unknown search type "%s"' % (search)
329
330 - def iterate(self,choices,policies,state,recurse=False,debug=False,
331 interrupt=None):
332 """Exhaustive policy search"""
333 if choices is None:
334 choices = self.entity.actions.getOptions()
335 assert policies[self.entity.name] is self
336 best = None
337
338 for rule in xrange(pow(len(choices),len(self.rules))):
339 if interrupt and interrupt.isSet():
340 if best is None:
341
342 return None
343 else:
344
345 break
346 self.fromIndex(rule,choices)
347 assert self.toIndex(choices) == rule
348 if debug:
349 print self.entity.name,map(lambda a:a[0]['type'],self.rules)
350 current = {'policy':rule}
351 if recurse:
352
353 order = self.getLookahead()[:]
354 order.reverse()
355 for name in order:
356 if name != self.entity.name:
357 index = policies[name].iterate(None,policies,state,
358 False,debug,interrupt)
359 current[name] = index
360 if debug:
361
362 print '\t',policies[name].rules[0]
363
364 if recurse:
365 current['value'] = self.evaluate(policies,state,{},start=1)
366 else:
367 current['value'] = self.evaluate(policies,state,{},start=0)
368 if best is None or current['value'] > best['value']:
369 best = current
370 if debug:
371 print 'ER =',current['value']
372 sys.stdout.flush()
373
374 self.fromIndex(best['policy'],choices)
375 if recurse:
376
377 for name in self.getLookahead():
378 if name != self.entity.name:
379 actions = self.entity.getEntity(name).actions.getOptions()
380 policies[name].fromIndex(best[name])
381 return best['policy']
382
383 - def perturb(self,policies,interrupt=None,debug=False):
384 """Consider a random perturbation of this policy
385 @return: C{True} iff a better variation was found
386 @rtype: bool
387 """
388
389 state,observations,index = self.chooseRule()
390
391 old = self.evaluate(policies,state,observations)
392
393 size = len(self.rules[index])
394 i = random.randint(0,size-1)
395 j = random.randint(0,size-2)
396 if j >= i: j += 1
397 if debug:
398 print 'Perturbing %s Rule #%d, %s<->%s' % \
399 (self.entity.name,index,self.rules[index][i],
400 self.rules[index][j])
401 original = self.rules[index][i]
402 self.rules[index][i] = self.rules[index][j]
403 self.rules[index][j] = original
404 new = self.evaluate(policies,state,observations)
405 if debug:
406 print old,'->',new
407 if old >= new:
408
409 original = self.rules[index][i]
410 self.rules[index][i] = self.rules[index][j]
411 self.rules[index][j] = original
412 return False
413 else:
414
415 return True
416
417 - def parallelSolve(self,policies,interrupt=None,progress=None,debug=False):
418 """
419 Generates an abstract state space and does value iteration to generate a policy when agents all act in parallel, each with its own LHS
420 @rtype: bool
421 """
422 savings = 0
423 goals = self.entity.goals.expectation()
424 keys = filter(lambda k:isinstance(k,StateKey) and \
425 goals[k] > epsilon,goals.keys())
426 keys.sort()
427 for key in keys:
428 entity = self.entity.world[key['entity']]
429 if entity.dynamics.has_key(key['feature']):
430 if debug:
431 print 'Compiling %s\'s goal of %s' % (self.entity.name,
432 str(key))
433 for option in self.entity.actions.getOptions():
434 assert len(option) == 1
435 dynamics = entity.getDynamics(option[0],key['feature'],
436 cache=True,debug=debug)
437 self.entity.compiled = True
438 return savings
439
441 """
442 Generates an abstract state space (defined by the LHS attributes) and does value iteration to generate a policy
443 @warning: assumes that all of the agents have the same LHS attribute breakdown! (not hard to overcome this assumption, but annoying to code)
444 @rtype: bool
445 """
446 assert self.rules
447
448 if self.transition is None:
449 if not self.abstractTransition(policies,interrupt):
450
451 self.transition = None
452 return None
453 for name in policies.keys():
454 if name != self.entity.name:
455 policies[name].transition = self.transition
456
457 for name in policies.keys():
458 policies[name].values = {}
459 for current in self.rules.keys():
460 policies[name].values[current] = {}
461 for other in policies.keys():
462 if other == name:
463 policies[name].values[current][other] = {}
464 for option in policies[name].rules[current]:
465 policies[name].values[current][other][str(option)] = 0.
466 else:
467 policies[name].values[current][other] = 0.
468
469 rewards = {}
470 goals = self.entity.getGoalVector()['state'].expectation()
471 for name in policies.keys():
472 rewards[name] = {}
473 for current in self.rules.keys():
474 intervals = self.abstract(current)
475 table = {}
476 rewards[name][current] = table
477 for other in policies.keys():
478 for option in policies[other].rules[current]:
479 value = self.abstractReward(intervals,goals,option,
480 interrupt)
481 if value is None:
482
483 return None
484 table[str(option)] = value
485
486 change = 1.
487 iterations = 0
488 while iterations < self.horizon:
489 print iterations
490 iterations += 1
491 change = 0.
492 for name in policies.keys():
493 if interrupt and interrupt.isSet(): return None
494
495 change += policies[name].abstractIterate(policies,rewards,
496 interrupt)
497 if progress: progress()
498 if change > epsilon:
499 iterations = 0
500 print 'Result:',self.rules
501 return True
502
504 """
505 Generates a transition probability function over the abstract state state space (defined by the LHS attributes)
506 """
507 self.transition = {}
508 for current in self.rules.keys():
509 self.transition[current] = {}
510 for name in policies.keys():
511
512 intervals = self.abstract(current)
513 if policies[name].rules[current] is None:
514 entity = self.entity.getEntity(name)
515 policies[name].rules[current] = entity.actions.getOptions()
516
517 for option in policies[name].rules[current]:
518
519 event = {name:option}
520 dynamics = self.entity.entities.getDynamics(event)
521 tree = dynamics['state'].getTree()
522
523 table = Distribution()
524 self.transition[current][str(option)] = table
525 for matrix,prob in tree[intervals].items():
526 new = []
527 for index in range(len(self.attributes)):
528 if interrupt and interrupt.isSet(): return None
529 if progress: progress()
530
531 obj,values = self.attributes[index]
532 assert isinstance(obj,KeyedVector)
533 if isinstance(obj,ThresholdRow):
534 key = obj.specialKeys[0]
535 print option
536 print key
537 print matrix[key].simpleText()
538 new.append(adjustInterval(intervals,index,
539 key,matrix[key]))
540 elif isinstance(obj,DifferenceRow):
541 key = obj.specialKeys[0]
542 hi = adjustInterval(intervals,index,
543 key,matrix[key])
544 key = obj.specialKeys[1]
545 lo = adjustInterval(intervals,index,
546 key,matrix[key])
547 new.append({'lo':hi['lo']-lo['hi'],
548 'hi':hi['hi']-lo['lo']})
549 else:
550 raise NotImplementedError,'Unable to compute abstract transitions of %s attributes' % (obj.__class__.__name__)
551
552 destinations = [{'prob':prob,'factors':[]}]
553 for index in range(len(self.attributes)):
554 obj,values = self.attributes[index]
555 interval = new[index]
556 span = interval['hi'] - interval['lo']
557 loIndex = self.subIndex(index,interval['lo'])
558 hiIndex = self.subIndex(index,interval['hi'])
559
560 subProbs = {}
561 if hiIndex > loIndex:
562
563 subProbs[loIndex] = (values[loIndex]-interval['lo'])/span
564 subProbs[hiIndex] = (interval['hi']-values[hiIndex-1])/span
565 for subIndex in range(loIndex+1,hiIndex):
566 subProbs[subIndex] = (values[subIndex]-values[subIndex-1])/span
567 else:
568
569 subProbs[loIndex] = 1.
570
571 for dest in destinations[:]:
572 destinations.remove(dest)
573 for subIndex in range(loIndex,hiIndex+1):
574 newDest = copy.deepcopy(dest)
575 newDest['prob'] *= subProbs[subIndex]
576 newDest['factors'].append(subIndex)
577 destinations.append(newDest)
578 for dest in destinations:
579 index = self.factored2index(dest['factors'])
580 try:
581 table[index] += dest['prob']
582 except KeyError:
583 table[index] = dest['prob']
584 for dest,prob in table.items():
585 assert prob > -epsilon
586 if prob < epsilon:
587 del table[dest]
588 assert abs(sum(table.values())-1.) < epsilon
589 return True
590
592 """
593 One pass of value iteration over abstract state space.
594 @warning: Currently works for only two agents (easy, but messy, to generalize)
595 @param rewards: C{rewards[name][state][action]} = the reward that agent C{name} gets in state C{state} (index form) derived from the performance of C{action}
596 @type rewards: strS{->}(strS{->}float)[]
597 @return: the total change to the value function
598 @rtype: float
599 """
600 values = []
601
602 sequence = self.getLookahead()[:]
603 sequence.reverse()
604 delta = 0.
605 for index in range(len(sequence)):
606 name = sequence[index]
607 for current in self.rules.keys():
608 if name != self.entity.name:
609
610 best = {'value':None}
611 for option in policies[name].rules[current]:
612 value = policies[name].values[current][name][str(option)]
613 if best['value'] is None or value > best['value']:
614 best['value'] = value
615 best['option'] = option
616
617 value = rewards[self.entity.name][current][str(best['option'])]
618 table = self.transition[current][str(best['option'])]
619 next = sequence[index-1]
620 for dest,prob in table.items():
621 if next != self.entity.name:
622 value += prob*self.values[dest][next]
623 else:
624 value += prob*max(self.values[dest][next].values())
625 self.values[current][name] = value
626 else:
627
628 for option in self.rules[current]:
629 value = rewards[self.entity.name][current][str(option)]
630 table = self.transition[current][str(option)]
631 next = sequence[index-1]
632 for dest,prob in table.items():
633 value += prob*self.values[dest][next]
634 self.values[current][name][str(option)] = value
635 order = self.rules[current][:]
636 order.sort(lambda o1,o2:cmp(-self.values[current][name][str(o1)],-self.values[current][name][str(o2)]))
637 if order != self.rules[current]:
638 delta += 1.
639 self.rules[current] = order
640 return delta
641
643 reward = 0.
644
645 for index in range(len(self.attributes)):
646 if interrupt and interrupt.isSet(): return None
647 obj,values = self.attributes[index]
648 interval = intervals[index]
649 if isinstance(obj,ThresholdRow):
650 key = obj.specialKeys[0]
651 if goals.has_key(key):
652
653
654 mean = (interval['hi']+interval['lo'])/2.
655 reward += goals[key]*mean
656 else:
657 raise NotImplementedError,'Unable to compute abstract reward for %s attributes' % (obj.__class__.__name__)
658 return reward
659
660 - def oldReachable(self,choices,policies,state,observations,debug=False):
692
693 - def evaluate(self,policies,state,observations,history=None,debug=False,
694 fixed=True,start=0,details=False):
695 """Computes the expected value of this policy in response to the given
696 policies for the other agents
697 @param policies: the policies that the other agents are expected to follow
698 @type policies: dict:strS{->}L{PolicyTable}
699 @param state: the current state
700 @type state: L{KeyedVector}
701 @param observations: the current observation history
702 @type observations: dict:strS{->}L{Action<teamwork.action.PsychActions.Action>}[]
703 @param history: dictionary of actions that have been performed so far (not changed by this method)
704 @type history: dict
705 @param fixed: flag, if C{True}, then the other agents follow their given policies; otherwise, they can optimize
706 @type fixed: bool
707 @param start: the time offset to use in the lookahead (0 starts with this agent, and is the default)
708 @type start: int
709 @param details: flag, if C{True}, then the details of this evaluation are returned in addition to the expected value
710 @return: expected value of this policy (and sequence of actions if details is C{True})
711 @rtype: float
712 """
713 total = 0.
714 observations = copy.copy(observations)
715 if isinstance(state,KeyedVector):
716 stateVector = state
717 else:
718 stateVector = state['state'].expectation()
719 goals = self.entity.getGoalVector()
720 goals['state'].fill(stateVector.keys(),0.)
721 sequence = self.getLookahead()[:]
722 for t in range(start):
723 sequence.append(sequence.pop(0))
724 if history is None:
725 history = {}
726 else:
727 history = copy.copy(history)
728 actions = []
729 for name in sequence:
730 if fixed or name == self.entity.name:
731 action,exp = policies[name].execute(stateVector,observations,
732 history)
733 else:
734 action = policies[name].localSolve(policies,stateVector,
735 observations,
736 update=False,debug=debug)
737 actions.append({'actor':name,'decision':action})
738 if debug:
739 print '\texpects',name,'to',action
740
741 observations[name] = action
742 for act in action:
743 history[str(act)] = True
744
745 dynamics = self.entity.entities.getDynamics({name:action})
746 stateVector = dynamics['state'].apply(stateVector)*stateVector
747
748 total += self.expectedValue(stateVector,action,goals)
749 if debug:
750 print 'EV =',total
751 if details:
752 return total,actions
753 else:
754 return total
755
756 - def localSolve(self,policies,state,observations,update=False,debug=False):
757 """Determines the best action out of the available options, given the current state and observation history, and while holding fixed the expected policies of the other agents.
758 @param update: if C{True}, as a side effect, this policy is modified to have this best action be the RHS of the applicable rule.
759 @type update: bool
760 @return: the best action
761 @rtype: L{Action<teamwork.action.PsychActions.Action>}[]
762 """
763 index = self.index(state,observations)
764 best = None
765 original = self.execute(state,observations)
766 for action in self.entity.actions.getOptions():
767 self.rules[index] = action
768 value = self.evaluate(policies,state,observations,debug=debug)
769 if debug:
770 print '\t\tEV of',action,'=',value
771 if best is None or value > best['value']:
772 best = {'action':action,'value':value}
773 if update:
774 self.rules[index] = best['action']
775 else:
776 self.rules[index] = original
777 return best['action']
778
797
799 """Generates a random state and observation history and finds the rule
800 corresponding to them
801 @return: a tuple of the state, observation history, and rule index
802 @rtype: (L{KeyedVector},dict:strS{->}L{Action<teamwork.action.PsychActions.Action>}[],int)
803 """
804 state = self.entity.entities.getState().expectation()
805 observations = {}
806 for obj,values in self.attributes:
807 if isinstance(obj,KeyedVector):
808
809 if isinstance(obj,ThresholdRow):
810 key = obj.specialKeys[0]
811 index = random.choice(range(len(values)+1))
812 if index == 0:
813 lo = -1.
814 else:
815 lo = values[index-1]
816 if index == len(values):
817 hi = 1.
818 else:
819 hi = values[index]
820 state[key] = (lo+hi)/2.
821 else:
822 raise NotImplementedError,'Unable to generate random choices for attributes of type %s' % (obj.__class__.__name__)
823 else:
824
825 name = obj.name
826 agent = self.entity.getEntity(name)
827 observations[name] = random.choice(agent.actions.getOptions())
828 return state,observations,self.index(state,observations)
829
831 """
832 @param index: the rule index of interest
833 @type index: int
834 @return: the abstract state subspace where the given rule is applicable, in the form of a list of intervals, one for each attribute, where each interval is a dictionary with keys C{weights}, C{index}, C{lo}, and C{hi}
835 @rtype: dict[]
836 """
837 abstract = []
838 for attrIndex in range(len(self.attributes)):
839 obj,values = self.attributes[-attrIndex-1]
840 if isinstance(obj,KeyedVector):
841 subIndex = index % (len(values)+1)
842 index /= (len(values)+1)
843 if subIndex > 0:
844 lo = values[subIndex-1]
845 else:
846 lo = -1.
847 try:
848 hi = values[subIndex]
849 except IndexError:
850 hi = 1.
851 abstract.append({'weights':obj,'index':subIndex,
852 'lo':lo,'hi':hi})
853 else:
854 raise NotImplementedError,'Not yet able to abstract over rules on observations.'
855 abstract.reverse()
856 return abstract
857
859 """Fills in the rules using the given number as an I{n}-ary representation of the RHS values (where I{n} is the number of possible RHS values)
860 """
861 if choices is None:
862 choices = self.entity.actions.getOptions()
863 for rule in range(len(self.rules)):
864 self.rules[rule] = choices[index % len(choices)]
865 index /= len(choices)
866
868 """
869 @return: the I{n}-ary representation of the RHS values (where I{n} is the number of possible RHS values)
870 @rtype: int
871 """
872 if choices is None:
873 choices = self.entity.actions.getOptions()
874 index = 0
875 for rule in range(len(self.rules)):
876 index *= len(choices)
877 index += choices.index(self.rules[-rule-1])
878 return index
879
881 """
882 @param actor: the name of the agent whose turn it is
883 @type actor: str
884 @param attributes: the attributes already found
885 @type attributes: KeyedPlane[]
886 """
887 options = self.entity.getEntity(actor).actions.getOptions()
888 newDiffs = {}
889 newLeaves = {}
890 values = {}
891 for action in options:
892 actionDict = {self.entity.name:action}
893 dynamics = self.entity.entities.getDynamics(actionDict)
894 tree = dynamics['state'].getTree()
895 try:
896 myLeaves = values[str(action)]
897 except KeyError:
898 myLeaves = values[str(action)] = tree.leaves()
899 myBranches = tree.branches().values()
900 for branch in myBranches:
901 if len(leaves) > 0:
902
903
904 for leaf in leaves.values():
905 newBranch = KeyedPlane(branch.weights*leaf,
906 branch.threshold)
907 insertBranch(newBranch,attributes)
908 else:
909
910
911 insertBranch(branch,attributes)
912 if len(diffs) == 0:
913 for myLeaf in myLeaves:
914 newLeaves[myLeaf.simpleText()] = myLeaf
915 for other in options:
916 if action != other:
917 try:
918 yrLeaves = values[str(other)]
919 except KeyError:
920 actionDict = {self.entity.name:other}
921 dynamics = self.entity.entities.getDynamics(actionDict)
922 tree = dynamics['state'].getTree()
923 yrLeaves = values[str(other)] = tree.leaves()
924 for myLeaf in myLeaves:
925 for yrLeaf in yrLeaves:
926 diff = myLeaf - yrLeaf
927 newDiffs[diff.simpleText()] = diff
928 else:
929 for diff in diffs.values():
930 for myLeaf in myLeaves:
931 newDiff = myLeaf*diff
932 newDiffs[newDiff.simpleText()] = newDiff
933 for leaf in leaves.values():
934 for myLeaf in myLeaves:
935 newLeaf = myLeaf*leaf
936 newLeaves[newLeaf.simpleText()] = newLeaf
937 diffs.clear()
938 for key,value in newDiffs.items():
939 diffs[key] = value
940 leaves.clear()
941 for key,value in newLeaves.items():
942 leaves[key] = value
943
945 """Takes the given table and uses it to set the LHS and RHS of this policy (making sure that the RHS refers to my entity instead)
946 @param table: the PWL table that contains the relevant LHS and RHS
947 @type table: L{PWLTable}
948 @warning: Does not import value function, just policy itself
949 """
950 self.reset()
951 self.attributes = table.attributes[:]
952 for rule,rhs in table.rules.items():
953 for option in self.entity.actions.getOptions():
954 if len(rhs) == len(option):
955
956 for action in option:
957 for other in rhs:
958 if other['type'] == action['type'] and \
959 other['object'] == action['object']:
960 break
961 else:
962
963 break
964 else:
965
966 self.rules[rule] = option
967 break
968 else:
969 raise UserWarning,'Unable to find %s\'s equivalent for %s' % \
970 (self.entity.name,str(rhs))
971
972 - def generateLHS(self,horizon=None,choices=None,debug=False):
1013
1015 if horizon is None:
1016 horizon = self.horizon
1017 if choices is None:
1018 choices = self.entity.actions.getOptions()
1019 goals = self.entity.getGoalTree().leaves()[0]
1020 attributes = []
1021 diffs = {}
1022 leaves = {}
1023 order = self.getLookahead()
1024 for agent in order:
1025 if debug:
1026 print agent
1027 self.updateAttributes(agent,attributes,diffs,leaves)
1028 if debug:
1029 for plane in attributes:
1030 print '\t',plane.weights.getArray(),plane.threshold
1031 print
1032 valuePlanes = {}
1033 for matrix in diffs.values():
1034 weights = goals*matrix
1035 plane = KeyedPlane(copy.deepcopy(weights),0.)
1036 if plane.always() is None:
1037 for other in valuePlanes.values():
1038 result = plane.compare(other)
1039 if result in ['equal','inverse']:
1040 del diffs[matrix.simpleText()]
1041 break
1042 else:
1043 if debug:
1044 print '\t',weights.getArray()
1045 valuePlanes[plane.simpleText()] = plane
1046 else:
1047 del diffs[matrix.simpleText()]
1048 if debug:
1049 print
1050 for matrix in diffs.values():
1051 weights = goals*matrix
1052 plane = KeyedPlane(weights,0.)
1053 insertBranch(plane,attributes)
1054 if debug:
1055 print 'FINAL:'
1056 newAttributes = {}
1057 for plane in attributes:
1058 weights = plane.weights.getArray()
1059 if abs(weights[1]-1.) < epsilon:
1060 scaling = 1./weights[1]
1061 else:
1062 scaling = 1.
1063 plane.weights *= scaling
1064 threshold = plane.threshold * scaling
1065 plane.threshold = 0.
1066 for other in newAttributes.keys():
1067 result = plane.compare(KeyedPlane(newAttributes[other][0],0.))
1068 if result == 'equal':
1069 newAttributes[other][1].append(threshold)
1070 break
1071 elif result == 'inverse':
1072 newAttributes[other][1].append(-threshold)
1073 break
1074 else:
1075 newAttributes[plane.weights.simpleText()] = (plane.weights,[threshold])
1076 for name in order[1:]:
1077 agent = self.entity.getEntity(name)
1078 newAttributes[name] = (agent,agent.actions.getOptions())
1079 for weights,tList in newAttributes.values():
1080 tList.sort()
1081 if debug:
1082 print self.attrString(weights)
1083 print tList
1084 self.attributes = newAttributes.values()
1085
1089
1091 doc = LookaheadPolicy.__xml__(self)
1092
1093 root = doc.createElement('attributes')
1094 doc.documentElement.appendChild(root)
1095 for obj,values in self.attributes:
1096 node = doc.createElement('attribute')
1097 root.appendChild(node)
1098 node.appendChild(obj.__xml__().documentElement)
1099 for value in values:
1100 child = doc.createElement('value')
1101 child.setAttribute('threshold',str(value))
1102 node.appendChild(child)
1103 if len(self.rules) > 0:
1104
1105 root = doc.createElement('rules')
1106 flag = False
1107 doc.documentElement.appendChild(root)
1108 for rhs in self.rules:
1109 if rhs is None:
1110 rhs = self.entity.actions.getOptions()
1111 node = doc.createElement('rhs')
1112 if isinstance(rhs,list):
1113
1114 node.setAttribute('type','list')
1115 for option in rhs:
1116 subNode = doc.createElement('option')
1117 node.appendChild(subNode)
1118 for action in option:
1119 subNode.appendChild(action.__xml__().documentElement)
1120 elif isinstance(rhs,dict):
1121
1122 node.setAttribute('type','dict')
1123 for action,value in rhs.items():
1124 for key,dynamics in value.items():
1125 subNode = doc.createElement('entry')
1126 subNode.appendChild(action.__xml__().documentElement)
1127 subNode.appendChild(key.__xml__().documentElement)
1128 if isinstance(dynamics,dict):
1129 link = doc.createElement('link')
1130 link.setAttribute('entity',dynamics['entity'])
1131 link.appendChild(dynamics['key'].__xml__().documentElement)
1132 link.appendChild(dynamics['action'].__xml__().documentElement)
1133 subNode.appendChild(link)
1134 else:
1135 subNode.appendChild(dynamics.__xml__().documentElement)
1136 node.appendChild(subNode)
1137 else:
1138 raise UserWarning,'Unknown RHS type: %s' % \
1139 (rhs.__class__.__name__)
1140 root.appendChild(node)
1141 return doc
1142
1143 - def parse(self,element):
1144 LookaheadPolicy.parse(self,element)
1145 node = element.firstChild
1146 while node:
1147 if node.nodeType == node.ELEMENT_NODE:
1148 if node.tagName == 'attributes':
1149 vector = KeyedVector()
1150 child = node.firstChild
1151 while child:
1152 if child.nodeType == child.ELEMENT_NODE:
1153 if child.tagName == 'attribute':
1154 values = []
1155 obj = None
1156 grandchild = child.firstChild
1157 while grandchild:
1158 if grandchild.nodeType == child.ELEMENT_NODE:
1159 if grandchild.tagName == 'value':
1160 values.append(float(grandchild.getAttribute('threshold')))
1161 else:
1162 obj = vector.parse(grandchild)
1163 grandchild = grandchild.nextSibling
1164 values.sort()
1165 if obj is not None:
1166 self.attributes.append((obj,values))
1167 child = child.nextSibling
1168 elif node.tagName == 'rules':
1169 child = node.firstChild
1170 while child:
1171 if child.nodeType == child.ELEMENT_NODE:
1172 assert child.tagName == 'rhs'
1173 rhsType = str(child.getAttribute('type'))
1174 if rhsType == 'dict':
1175 rhs = {}
1176 entry = child.firstChild
1177 while entry:
1178 if entry.nodeType == child.ELEMENT_NODE:
1179 assert entry.tagName == 'entry'
1180 action = None
1181 dynamics = None
1182 key = None
1183 grandChild = entry.firstChild
1184 while grandChild:
1185 if grandChild.nodeType == grandChild.ELEMENT_NODE:
1186 if grandChild.tagName == 'action':
1187 action = Action()
1188 action.parse(grandChild)
1189 elif grandChild.tagName == 'dynamic':
1190 dynamics = PWLDynamics()
1191 dynamics.parse(grandChild)
1192 elif grandChild.tagName == 'link':
1193 dynamics = {'entity':str(grandChild.getAttribute('entity')),'action':Action(),'key':Key()}
1194 greatChild = grandChild.firstChild
1195 while greatChild:
1196 if greatChild.nodeType == greatChild.ELEMENT_NODE:
1197 if greatChild.tagName == 'action':
1198 dynamics['action'].parse(greatChild)
1199 elif greatChild.tagName == 'key':
1200 dynamics['key'] = dynamics['key'].parse(greatChild)
1201 greatChild = greatChild.nextSibling
1202 else:
1203 key = Key()
1204 key = key.parse(grandChild)
1205 grandChild = grandChild.nextSibling
1206 assert not action is None
1207 assert not dynamics is None
1208 assert not key is None
1209 try:
1210 rhs[action][key] = dynamics
1211 except KeyError:
1212 rhs[action] = {key: dynamics}
1213 entry = entry.nextSibling
1214 self.rules.append(rhs)
1215 else:
1216 rhs = []
1217 grandChild = child.firstChild
1218 while grandChild:
1219 if grandChild.nodeType == child.ELEMENT_NODE:
1220 assert grandChild.tagName == 'option'
1221 option = []
1222 elements = grandChild.getElementsByTagName('action')
1223 for element in elements:
1224 action = Action()
1225 action.parse(element)
1226 option.append(action)
1227 rhs.append(option)
1228 grandChild = grandChild.nextSibling
1229 self.rules.append(rhs)
1230 child = child.nextSibling
1231 node = node.nextSibling
1232 if self.entity.name == 'Police' and self.entity.__class__.__name__ == 'PWLAgent':
1233 if len(self.rules) == 0:
1234 raise UserWarning,'%s has no rules' % (self.entity.name)
1235
1237 """Adjust a specific interval by the effect of the given row
1238 @param intervals: the intervals the state currently lies in
1239 @param index: the index of the current interval of relevance
1240 @param key: the state feature key being adjusted
1241 @param row: the effect row on the state feature
1242 @return: the adjusted interval
1243 """
1244 interval = intervals[index]
1245 if isinstance(row,UnchangedRow):
1246
1247 return interval
1248 elif isinstance(row,SetToFeatureRow):
1249
1250 for other in intervals:
1251 otherRow = other['weights']
1252 if isinstance(otherRow,ThresholdRow) and \
1253 other is not interval and \
1254 otherRow.specialKeys[0] == row.deltaKey:
1255 if row[row.deltaKey] > 0.:
1256 lo = row[row.deltaKey]*other['lo']
1257 hi = row[row.deltaKey]*other['hi']
1258 else:
1259 lo = row[row.deltaKey]*other['hi']
1260 hi = row[row.deltaKey]*other['lo']
1261 return {'lo':lo,'hi':hi}
1262 else:
1263 lo = max(-1.,interval['lo']-abs(row[row.deltaKey]))
1264 hi = min(1.,interval['hi']+abs(row[row.deltaKey]))
1265 return {'lo':lo,'hi':hi}
1266 elif isinstance(row,SetToConstantRow):
1267
1268 lo = hi = row[row.deltaKey]
1269 return {'lo':lo,'hi':hi}
1270 elif isinstance(row,IncrementRow):
1271
1272 lo = max(-1.,interval['lo']+row[row.deltaKey])
1273 hi = min(1.,interval['hi']+row[row.deltaKey])
1274 return {'lo':lo,'hi':hi}
1275 elif isinstance(row,ScaleRow):
1276
1277 if row.deltaKey == key:
1278 lo = max(-1.,interval['lo']*row[key])
1279 hi = min(1.,interval['hi']*row[key])
1280 return {'lo':lo,'hi':hi}
1281 else:
1282 for other in intervals:
1283 otherRow = other['weights']
1284 if isinstance(otherRow,ThresholdRow) and \
1285 other is not interval and \
1286 otherRow.specialKeys[0] == row.deltaKey:
1287 if row[row.deltaKey] > 0.:
1288 delta = {'lo':row[row.deltaKey]*other['lo'],
1289 'hi':row[row.deltaKey]*other['hi']}
1290 else:
1291 delta = {'lo':row[row.deltaKey]*other['hi'],
1292 'hi':row[row.deltaKey]*other['lo']}
1293 break
1294 else:
1295
1296 delta = {'lo':-abs(row[row.deltaKey]),
1297 'hi': abs(row[row.deltaKey])}
1298 lo = max(-1.,interval['lo']+delta['lo'])
1299 hi = min( 1.,interval['hi']+delta['hi'])
1300 return {'lo':lo,'hi':hi}
1301 else:
1302 raise NotImplementedError,'Unable to compute abstract effect for %s\n'\
1303 % (row.__class__.__name__)
1304
1306 """Adds the new branch to the old list if neither it (nor its inverse) is
1307 already present
1308 @param new: branch to be added
1309 @type new: KeyedPlane
1310 @param oldList: the branches already found (modified directly)
1311 @type oldList: KeyedPlane[]
1312 """
1313 for branch in oldList:
1314 result = new.compare(branch)
1315 if result in ['equal','inverse']:
1316 break
1317 else:
1318 oldList.append(new)
1319
1320 if __name__ == '__main__':
1321 import sys
1322 from xml.dom.minidom import parse
1323 from teamwork.multiagent.sequential import SequentialAgents
1324 from teamwork.agent.Entities import PsychEntity
1325 from optparse import OptionParser
1326
1327
1328 parser = OptionParser('usage: %prog [options] SCENARIO')
1329 parser.add_option('-b','--bully-resolution',
1330 dest='bullyResolution',
1331 metavar='BRES',
1332 help='resolution of goal space for bully',
1333 default=5)
1334 parser.add_option('-t','--teacher-resolution',
1335 dest='teacherResolution',
1336 metavar='TRES',
1337 help='resolution of goal space for teacher',
1338 default=5)
1339 parser.add_option('-o','--output-file',
1340 dest='output',
1341 metavar='FILE',
1342 help='file in which to store data [default is standard out]',
1343 default=None)
1344 parser.add_option('-c','--collapse',
1345 dest='collapse',
1346 action='store_true',
1347 help='collapse common rows into a single entry',
1348 default=False)
1349
1350 (options,args) = parser.parse_args()
1351 if len(args) != 1:
1352 parser.error("incorrect number of arguments")
1353 doc = parse(args[0])
1354 scenario = SequentialAgents()
1355 scenario.parse(doc.documentElement,PsychEntity)
1356 policies = {}
1357 name = 'Teacher'
1358 for agent in scenario.activeMembers():
1359 print agent.name
1360 for goal in agent.getGoals():
1361 print '\t',goal,agent.getGoalWeight(goal)
1362 table = PolicyTable(agent,agent.actions.getOptions(),
1363 len(scenario),agent.horizon)
1364 if agent.name != name:
1365 table.rules = [None]
1366 policies[agent.name] = table
1367
1368
1369
1370
1371
1372
1373
1374
1375 agent = scenario[name]
1376 state = scenario.getState().domain()[0]
1377 granularity = int(options.teacherResolution)
1378 tSpace = agent.generateSpace(granularity)
1379 granularity = int(options.bullyResolution)
1380 bSpace = scenario['Bully'].generateSpace(granularity)
1381
1382 columns = ['B Type','B Goals','T Type','T Goals','T Policy','T Count',
1383 'B Policy','O Policy','T Action','B ER']
1384
1385 errors = ['Error']
1386 if options.collapse:
1387 try: columns.remove('T Type')
1388 except ValueError: pass
1389 try: columns.remove('T Goals')
1390 except ValueError: pass
1391 else:
1392 try: columns.remove('T Count')
1393 except ValueError: pass
1394 header = ''
1395 for column in columns:
1396 if column == 'T Goals':
1397 goals = agent.getGoals()
1398 goals.sort()
1399 for goal in goals:
1400 header += '%s %s,' % (goal.direction,goal.entity[0])
1401 elif column == 'B Goals':
1402 goals = scenario['Bully'].getGoals()
1403 goals.sort()
1404 for goal in goals:
1405 header += '%s %s,' % (goal.direction,goal.entity[0])
1406 else:
1407 header += '%s,' % (column)
1408 if options.output:
1409 f = open(options.output,'w')
1410 else:
1411 f = sys.stdout
1412 f.write(header[:-1]+'\n')
1413 for bully in range(len(bSpace)):
1414
1415 feasible = {}
1416 bWeights = bSpace[bully]
1417 for goal,weight in bWeights.items():
1418 scenario['Bully'].setGoalWeight(goal,weight,normalize=False)
1419 agent.getEntity('Bully').setGoalWeight(goal,weight,normalize=False)
1420 for teacher in range(len(tSpace)):
1421
1422 tWeights = tSpace[teacher]
1423 for goal,weight in tWeights.items():
1424 agent.setGoalWeight(goal,weight,normalize=False)
1425 policies[name].solve(choices=agent.actions.getOptions(),
1426 policies=policies)
1427 index = policies[name].toIndex()
1428 entry = {'Teacher goals':tWeights,
1429 'Teacher policy':index,
1430 'Bully goals':bWeights,
1431 'Bully RHS':policies['Bully'].rules[0],
1432 'Onlooker RHS':policies['Onlooker'].rules[0],
1433 'Bully ER':policies['Bully'].evaluate(policies,state,{})
1434 }
1435
1436 line = ''
1437 for column in columns:
1438 if column == 'B Goals':
1439 goals = bWeights.keys()
1440 goals.sort()
1441 for goal in goals:
1442 line += '%f,' % (scenario['Bully'].getGoalWeight(goal))
1443 elif column == 'B Type':
1444 line += '%d,' % (bully)
1445 elif column == 'T Goals':
1446 goals = tWeights.keys()
1447 goals.sort()
1448 for goal in goals:
1449 line += '%f,' % (agent.getGoalWeight(goal))
1450 elif column == 'T Type':
1451 line += '%d,' % (teacher)
1452 elif column == 'T Policy':
1453 line += '%d,' % (index)
1454 elif column == 'O Policy':
1455 line += '%s,' % (policies['Onlooker'].rules[0][0]['type'])
1456 elif column == 'B Policy':
1457 line += '%s,' % (policies['Bully'].rules[0][0]['type'])
1458 elif column == 'B ER':
1459 line += '%f,' % (entry['Bully ER'])
1460 elif column == 'T Action':
1461 observations = {'Bully':policies['Bully'].execute(state,None),
1462 'Onlooker':policies['Onlooker'].execute(state,None),
1463 }
1464 action = policies['Teacher'].execute(state,observations)
1465 line += '%s,' % (action[0]['type'])
1466 else:
1467 line += '%s,' % (column)
1468 entry['data'] = line
1469 try:
1470 feasible[index].append(entry)
1471 except KeyError:
1472 feasible[index] = [entry]
1473 if not options.collapse:
1474 f.write(line[:-1]+'\n')
1475 f.flush()
1476 if options.collapse:
1477 modelSpace = feasible.keys()
1478 modelSpace.sort()
1479 for model in modelSpace:
1480
1481 entryList = feasible[model]
1482 entry = entryList[0]
1483 line = entry['data']
1484
1485 line = line.replace('T Count','%d' % (len(entryList)))
1486
1487 policies['Bully'].rules[0] = entry['Bully RHS']
1488 for real in modelSpace:
1489 realEntry = feasible[real][0]
1490 policies['Teacher'].fromIndex(real)
1491 policies['Onlooker'].rules[0] = realEntry['Onlooker RHS']
1492 if model == real:
1493 error = 0.
1494 else:
1495
1496 value = policies['Bully'].evaluate(policies,state,{})
1497 error = entry['Bully ER']-value
1498 for column in errors:
1499 if column == 'Error':
1500 line += '%f,' % (error)
1501 elif column == 'B MisAction':
1502 line += '%s,' % (policies['Bully'].rules[0][0]['type'])
1503 elif column == 'O MisAction':
1504 line += '%s,' % (policies['Onlooker'].rules[0][0]['type'])
1505 elif column == 'T MisAction':
1506 observations = {'Bully':policies['Bully'].execute(state,None),
1507 'Onlooker':policies['Onlooker'].execute(state,None),
1508 }
1509 action = policies['Teacher'].execute(state,observations)
1510 line += '%s,' % (action[0]['type'])
1511 else:
1512 raise NotImplementedError,column
1513 f.write(line[:-1]+'\n')
1514 f.flush()
1515
1516 f.close()
1517