1 from teamwork.math.boltzmann import *
2
3 from LookaheadPolicy import *
4 from LookupAheadPolicy import *
5
6 import random
8 """A nondeterministic version of the lookahead-based policy, where
9 actions are selected with a probability that is a function of
10 their expected values."""
11
12
13
14
15 beta = 1.
16
18 """Returns a randomly selected action out of the available
19 choices, with each action selected with a probability
20 dependent on its relative expected value"""
21
22 values = self.evaluateChoices(state=state,choices=choices,
23 debug=debug,depth=depth)
24
25 for option in values.values():
26 option['whole value'] = option['value']
27 option['value'] = option['value'].total().mean()
28 self.computeDistribution(values)
29 for option in values.values():
30 debug.message(7,'P(%s) = %4.3f' % (`option['decision']`,
31 option['probability']))
32
33 cutoff = random.random()
34 total = 0.
35 for option in values.values():
36 total += option['probability']
37 if total > cutoff:
38 break
39
40 action = option['decision']
41 del action['actor']
42 explanation = {'options':values,
43 'value':option['whole value'],
44 'decision':action,
45 'actor':state.name,
46 'breakdown':option['breakdown'],
47 'effect':option['effect'],
48 'differential':0.}
49 debug.message(9,'%s selects %s with Prob %4.3f' \
50 % (state.name,`action`,option['probability']))
51 return action,explanation
52
54 """Computes a probability distribution over the provided
55 dictionary of action choices. Each value in the dictionary
56 must have a 'value' field containing a float. This method
57 computes a Boltzmann distribution based on these values and
58 stores it in the 'probability' field of each entry. Modify
59 the 'beta' attribute on this object to vary the steepness of
60 the distribution (0 is a uniform distribution, increasing
61 values lead to deterministic behavior). To use a different
62 distribution altogether, simply override this method."""
63 return prob(options,self.beta*float(len(options)))
64