teamwork.reward.MinMaxGoal

17 """A reward subfunction that is linear in a single feature/action 18 19 0) Creating a new goal (i.e., __init__()): 20 goal = L{MinMaxGoal}(entity,direction,type,key) 21 22 1) Accessing elements of this goal 23 goal.L{isMax}() 24 """

25 - def __init__(self,entity=None,direction=None,goalType=None, 26 key=None,value={}):

27 """Constructs a goal object with the specified field values 28 29 The named arguments are stored as corresponding attributes: 30 @param entity: list of names (for recursive belief access if a state goal, or for relevant actor/object if action goal) 31 @type entity: C{str[]} 32 @param direction: either 'min' or 'max' 33 @type direction,goalType,key: C{str} 34 @param goalType: either 'state' (value of a state feature), 'act' (number of occurences of action of given type), 'actActor' (number of occurrences of action of given type by given actor), 'actObject' (number of occurences of action of given type to given object) 35 @param key: for a 'state' goal, is the state feature to be min/maximized for an 'act', goal, is the act type to be min/maximized""" 36 if type(entity) is StringType: 37 raise DeprecationWarning,'Entity specification should be list of names' 38 self.entity = entity 39 if direction: 40 self.direction = string.lower(direction) 41 else: 42 # By default, maximize 43 self.direction = None 44 if goalType: 45 self.type = goalType 46 else: 47 # By default, state 48 self.type = 'state' 49 if key: 50 self.key = key 51 else: 52 self.key = '' 53 self.name = self.generateName() 54 self.value = value 55 self.weight = 1.

56

57 - def __copy__(self):

58 newGoal = self.__class__(self.entity,self.direction,self.type, 59 self.key,copy.deepcopy(self.value)) 60 newGoal.weight = self.weight 61 return newGoal

62

63 - def generateName(self):

64 """Returns a canonical string representation""" 65 if not self.direction: 66 # Empty goal 67 return '' 68 # Start with direction 69 name = self.direction+'imize ' 70 # Generate string rep of key 71 if self.key[0] == '_': 72 feature = self.key[1:] 73 else: 74 feature = self.key 75 # Generate entity name(s) 76 if len(self.entity) == 1: 77 entity = self.entity[0] 78 elif len(self.entity) == 2: 79 entity = '%s to %s' % (self.entity[1],self.entity[0]) 80 elif len(self.entity) > 2: 81 entity = self.entity[0] 82 for other in self.entity[1:]: 83 entity += '->%s' % (other) 84 # Generate type-specific usage of entity 85 if self.type == 'state': 86 name += '%s of %s' % (feature,entity) 87 else: 88 name += 'number of %s' % (feature) 89 if self.type == 'actActor': 90 name += ' by %s' % (entity) 91 elif self.type == 'actObject': 92 name += ' to %s' % (entity) 93 return name

94

95 - def isMax(self):

96 """ 97 @return: true if maximization goal, false if minimization 98 @rtype: C{boolean} 99 """ 100 if self.direction == 'min': 101 return False 102 else: 103 return True

104

105 - def toKey(self):

106 """ 107 @return: the vector key corresponding to this goal 108 @rtype: L{Key} 109 @warning: There is no L{Key} subclass for beliefs 110 """ 111 if self.type == 'state': 112 if len(self.entity) == 1: 113 return StateKey({'entity':self.entity[0], 114 'feature':self.key}) 115 else: 116 raise NotImplementedError,'Goals on beliefs cannot yet be converted into keys' 117 else: 118 # Need to handle action goals as well 119 return makeActionKey(Action({'type':self.key}))

120

121 - def reward(self,context):

122 """Applies this goal in the specified context""" 123 # Grab the entity relevant to this goal 124 entity = None 125 if self.entity: 126 entity = context 127 for name in self.entity: 128 try: 129 entity = entity[name] 130 except TypeError: 131 # Probably shouldn't have this clunkiness 132 entity = entity.getEntity(name) 133 except KeyError: 134 # No info about the relevant entity, so give up 135 value = Distribution({0.0:1.}) 136 return value 137 if self.type == 'state': 138 try: 139 value = entity.getState(self.key) 140 except KeyError: 141 print entity.getGoals() 142 raise KeyError,'%s has no %s' % (entity.ancestry(),self.key) 143 elif self.type[:3] == 'act': 144 if self.key == 'obey': 145 # Special action key indicating incentive for 146 # obeying commands 147 value = 0. 148 try: 149 superiors = entity.relationships['_commander'] 150 except KeyError: 151 superiors = [] 152 # Determine what I did last 153 myAct,myDepth = context.findObservation({'actor':entity.name}) 154 if myAct: 155 for superior in superiors: 156 # Look for any commands 157 command = {'type':'command', 158 'object':entity.name, 159 'actor':superior} 160 act,depth = context.findObservation(command) 161 if act and depth > myDepth: 162 # There was a command before my last action 163 if myAct == act['command']: 164 try: 165 value += entity.getSupport(act['actor']).mean() 166 except KeyError: 167 value += 0.1 168 else: 169 # Determine target actions to look for 170 if self.type == 'actObject': 171 targetList = [] 172 for entity in self.entity: 173 targetList.append({'type':self.key, 174 'object':entity}) 175 elif self.type == 'actActor': 176 targetList = [] 177 for entity in self.entity: 178 targetList.append({'type':self.key, 179 'actor':entity}) 180 else: 181 targetList = [{'type':self.key}] 182 # Count up time-decayed occurrences of target actions 183 value = 0. 184 for target in targetList: 185 act,depth = context.findObservation(target) 186 if act: 187 value += pow(ActionKey.decayRate,float(depth)) 188 # Should be a distribution all the way through eventually 189 value = Distribution({value:1.}) 190 else: 191 raise NameError,'unknown goal type '+self.type 192 if self.direction == 'max': 193 pass 194 elif self.direction == 'min': 195 value = -value 196 else: 197 raise NameError,'unknown goal direction '+ self.direction 198 return value

199

200 - def maxElement(self):

201 """Finds the element that has the highest value 202 @return: a tuple of the key and value 203 @rtype: (C{str},L{Distribution})""" 204 maxKey = None 205 maxValue = Interval.FLOOR 206 for key in self.keys(): 207 if self[key] > maxValue: 208 maxKey = key 209 maxValue = self[key] 210 return maxKey,maxValue

211

212 - def __getitem__(self,index):

213 """Accessor: supports access in the form `self[index]'""" 214 if self.value: 215 return self.value[index] 216 else: 217 return 0.0

218

219 - def __setitem__(self,index,value):

220 """Accessor: supports access in the form `self[index]=x'""" 221 self.value[index] = value

222

223 - def keys(self):

224 """@return: all of the element names in this goal""" 225 if self.value: 226 return self.value.keys() 227 else: 228 return []

229

230 - def evaluate(self,context):

231 """Returns a new goal instance in the given context by computing the 232 reward and storing it in its value attribute""" 233 return self.__class__(self.entity,self.direction,self.type,self.key, 234 {self.name:self.reward(context)})

235

236 - def __add__(self,goal):

237 if self.value: 238 value = copy.deepcopy(self.value) 239 if goal.value: 240 for key in goal.value.keys(): 241 try: 242 value[key] = value[key] + goal.value[key] 243 except KeyError: 244 value[key] = goal.value[key] 245 return self.__class__(None,None,None,None,value) 246 elif goal.value: 247 return goal + self 248 else: 249 return self.__class__()

250

251 - def __neg__(self):

252 return self*(-1.0)

253

254 - def __sub__(self,goal):

255 return self + (-goal)

256

257 - def __mul__(self,factor):

258 if type(factor) is FloatType: 259 # Perform dot product 260 value = copy.deepcopy(self.value) 261 try: 262 for key in value.keys(): 263 value[key] = value[key] * factor 264 except AttributeError,e: 265 print value 266 raise AttributeError,e 267 elif factor.__class__ == self.__class__: 268 # Perform dot product 269 value = copy.deepcopy(self.value) 270 for key in value.keys(): 271 try: 272 value[key] = value[key] * goal.value[key] 273 except KeyError: 274 pass 275 elif type(factor) is DictType: 276 # Perform dot product 277 value = copy.deepcopy(self.value) 278 for key in value.keys(): 279 try: 280 value[key] = value[key] * factor[key] 281 except KeyError: 282 # Assume weight is 0 283 del value[key] 284 elif self.value: 285 # Scale 286 value = copy.deepcopy(self.value) 287 for key in value.keys(): 288 value[key] = factor * value[key] 289 else: 290 value = 0.0 291 return self.__class__(None,None,None,None,value)

292

293 - def __div__(self,factor):

294 if type(factor) is InstanceType and \ 295 factor.__class__ == self.__class__: 296 value = copy.deepcopy(factor.value) 297 for key in value.keys(): 298 value[key] = 1.0 / value[key] 299 return self * value 300 else: 301 return self * (1.0/factor)

302

303 - def total(self):

304 """Returns sum over the individual elements in an instantiated goal""" 305 value = Distribution({0.0:1.}) 306 if self.value: 307 for key in self.value.keys(): 308 value = self.value[key] + value 309 return value

310

311 - def __str__(self):

312 if self.value: 313 content = '{' 314 keyList = self.value.keys() 315 keyList.sort() 316 first = 1 317 for key in keyList: 318 if first: 319 first = None 320 else: 321 content = content + ',' 322 content = content + '\n\t' + key + ': ' 323 try: 324 content = content + '%6.4f' % self.value[key] 325 except TypeError: 326 content += `self.value[key]` 327 except AttributeError: 328 content = content + `self.value[key]` 329 return content+'\n}' 330 elif self.name: 331 content = self.name 332 return content 333 else: 334 return '<null>'

335

336 - def __hash__(self):

337 return hash(self.name)

338

339 - def __xml__(self):

340 doc = Document() 341 root = doc.createElement('goal') 342 doc.appendChild(root) 343 root.setAttribute('direction',self.direction) 344 root.setAttribute('type',self.type) 345 root.setAttribute('key',self.key) 346 root.setAttribute('weight',self.weight) 347 node = doc.createElement('entity') 348 root.appendChild(node) 349 for name in self.entity: 350 subNode = doc.createElement('name') 351 node.appendChild(subNode) 352 subNode.appendChild(doc.createTextNode(name)) 353 return doc

354

355 - def parse(self,element):

356 entity = [] 357 child = element.firstChild 358 while child: 359 if child.nodeType == Node.ELEMENT_NODE: 360 assert(child.tagName,'entity') 361 subNodes = child.getElementsByTagName('name') 362 for subNode in subNodes: 363 entity.append(string.strip(str(subNode.firstChild.data))) 364 child = child.nextSibling 365 self.entity = entity 366 self.direction = str(element.getAttribute('direction')) 367 self.type = str(element.getAttribute('type')) 368 self.key = str(element.getAttribute('key')) 369 self.weight = float(element.getAttribute('weight')) 370 self.name = self.generateName()

Source Code for Module teamwork.reward.MinMaxGoal