teamwork.policy.StochasticPolicy

8 """A nondeterministic version of the lookahead-based policy, where 9 actions are selected with a probability that is a function of 10 their expected values.""" 11 # This policy's "temperature" constant for controlling the 12 # probability distribution of actions. Increasing values produce 13 # a behavior closer to deterministic lookahead; decreasing values 14 # produce a behavior closer to uniform randomness 15 beta = 1. 16

17 - def execute(self,state,choices=[],debug=Debugger(),depth=-1):

18 """Returns a randomly selected action out of the available 19 choices, with each action selected with a probability 20 dependent on its relative expected value""" 21 # Compute the EV of each option 22 values = self.evaluateChoices(state=state,choices=choices, 23 debug=debug,depth=depth) 24 # Compute the probability distribution 25 for option in values.values(): 26 option['whole value'] = option['value'] 27 option['value'] = option['value'].total().mean() 28 self.computeDistribution(values) 29 for option in values.values(): 30 debug.message(7,'P(%s) = %4.3f' % (`option['decision']`, 31 option['probability'])) 32 # Choose an action according to this distribution 33 cutoff = random.random() 34 total = 0. 35 for option in values.values(): 36 total += option['probability'] 37 if total > cutoff: 38 break 39 # Return the selected action 40 action = option['decision'] 41 del action['actor'] 42 explanation = {'options':values, 43 'value':option['whole value'], 44 'decision':action, 45 'actor':state.name, 46 'breakdown':option['breakdown'], 47 'effect':option['effect'], 48 'differential':0.} 49 debug.message(9,'%s selects %s with Prob %4.3f' \ 50 % (state.name,`action`,option['probability'])) 51 return action,explanation

52

53 - def computeDistribution(self,options):

54 """Computes a probability distribution over the provided 55 dictionary of action choices. Each value in the dictionary 56 must have a 'value' field containing a float. This method 57 computes a Boltzmann distribution based on these values and 58 stores it in the 'probability' field of each entry. Modify 59 the 'beta' attribute on this object to vary the steepness of 60 the distribution (0 is a uniform distribution, increasing 61 values lead to deterministic behavior). To use a different 62 distribution altogether, simply override this method.""" 63 return prob(options,self.beta*float(len(options)))

Source Code for Module teamwork.policy.StochasticPolicy