Module pacai.bin.gridworld
Functions
def getUserAction(state, actionFunction)
-
Get an action from the user (rather than the agent).
Used for debugging and lecture demos.
def main(argv)
-
Entry point for the gridworld simulation The args are a blind pass of
sys.argv
with the executable stripped. def makeGrid(gridString)
def parseOptions(argv)
-
Processes the command used to run gridworld from the command line.
def runEpisode(agent, environment, discount, decision, display, message, pause, episode)
Classes
class Grid (width, height, initialValue=' ')
-
A 2-dimensional array of immutables backed by a list of lists. Data is accessed via grid[x][y] where (x, y) are cartesian coordinates with x horizontal, y vertical and the origin (0, 0) in the bottom left corner.
The str method constructs an output that is oriented appropriately.
Expand source code
class Grid(object): """ A 2-dimensional array of immutables backed by a list of lists. Data is accessed via grid[x][y] where (x, y) are cartesian coordinates with x horizontal, y vertical and the origin (0, 0) in the bottom left corner. The __str__ method constructs an output that is oriented appropriately. """ def __init__(self, width, height, initialValue=' '): self.width = width self.height = height self.data = [[initialValue for y in range(height)] for x in range(width)] self.terminalState = 'TERMINAL_STATE' def __getitem__(self, i): return self.data[i] def __setitem__(self, key, item): self.data[key] = item def __eq__(self, other): if (other is None): return False return self.data == other.data def __hash__(self): return hash(self.data) def copy(self): g = Grid(self.width, self.height) g.data = [x[:] for x in self.data] return g def deepCopy(self): return self.copy() def shallowCopy(self): g = Grid(self.width, self.height) g.data = self.data return g def _getLegacyText(self): t = [[self.data[x][y] for x in range(self.width)] for y in range(self.height)] t.reverse() return t def __str__(self): return str(self._getLegacyText())
Methods
def copy(self)
def deepCopy(self)
def shallowCopy(self)
class Gridworld (grid)
-
Helper class that provides a standard way to create an ABC using inheritance.
Expand source code
class Gridworld(MarkovDecisionProcess): def __init__(self, grid): # layout if (isinstance(grid, list)): grid = makeGrid(grid) self.grid = grid # parameters self.livingReward = 0.0 self.noise = 0.2 def setLivingReward(self, reward): """ The (negative) reward for exiting "normal" states. Note that in the R+N text, this reward is on entering a state and therefore is not clearly part of the state's future rewards. """ self.livingReward = reward def setNoise(self, noise): """ The probability of moving in an unintended direction. """ self.noise = noise def getPossibleActions(self, state): """ Returns list of valid actions for 'state'. Note that you can request moves into walls and that "exit" states transition to the terminal state under the special action "done". """ if state == self.grid.terminalState: return () x, y = state if isinstance(self.grid[x][y], int): return ('exit', ) return ('north', 'west', 'south', 'east') def getStates(self): """ Return list of all states. """ # The true terminal state. states = [self.grid.terminalState] for x in range(self.grid.width): for y in range(self.grid.height): if self.grid[x][y] != '#': state = (x, y) states.append(state) return states def getReward(self, state, action, nextState): """ Get reward for state, action, nextState transition. Note that the reward depends only on the state being departed (as in the R+N book examples, which more or less use this convention). """ if state == self.grid.terminalState: return 0.0 x, y = state cell = self.grid[x][y] if isinstance(cell, int) or isinstance(cell, float): return cell return self.livingReward def getStartState(self): for x in range(self.grid.width): for y in range(self.grid.height): if self.grid[x][y] == 'S': return (x, y) raise Exception('Grid has no start state') def isTerminal(self, state): """ Only the TERMINAL_STATE state is *actually* a terminal state. The other "exit" states are technically non-terminals with a single action "exit" which leads to the true terminal state. This convention is to make the grids line up with the examples in the R+N textbook. """ return state == self.grid.terminalState def getTransitionStatesAndProbs(self, state, action): """ Returns list of (nextState, prob) pairs representing the states reachable from 'state' by taking 'action' along with their transition probabilities. """ if action not in self.getPossibleActions(state): raise Exception('Illegal action!') if self.isTerminal(state): return [] x, y = state if isinstance(self.grid[x][y], int) or isinstance(self.grid[x][y], float): termState = self.grid.terminalState return [(termState, 1.0)] successors = [] northState = (self.__isAllowed(y + 1, x) and (x, y + 1)) or state westState = (self.__isAllowed(y, x - 1) and (x - 1, y)) or state southState = (self.__isAllowed(y - 1, x) and (x, y - 1)) or state eastState = (self.__isAllowed(y, x + 1) and (x + 1, y)) or state if action == 'north' or action == 'south': if action == 'north': successors.append((northState, 1 - self.noise)) else: successors.append((southState, 1 - self.noise)) massLeft = self.noise successors.append((westState, massLeft / 2.0)) successors.append((eastState, massLeft / 2.0)) if action == 'west' or action == 'east': if action == 'west': successors.append((westState, 1 - self.noise)) else: successors.append((eastState, 1 - self.noise)) massLeft = self.noise successors.append((northState, massLeft / 2.0)) successors.append((southState, massLeft / 2.0)) successors = self.__aggregate(successors) return successors def __aggregate(self, statesAndProbs): counter = {} for state, prob in statesAndProbs: if state not in counter: counter[state] = 0.0 counter[state] += prob newStatesAndProbs = [] for state, prob in counter.items(): newStatesAndProbs.append((state, prob)) return newStatesAndProbs def __isAllowed(self, y, x): if y < 0 or y >= self.grid.height: return False if x < 0 or x >= self.grid.width: return False return self.grid[x][y] != '#'
Ancestors
- MarkovDecisionProcess
- abc.ABC
Methods
def getPossibleActions(self, state)
-
Returns list of valid actions for 'state'.
Note that you can request moves into walls and that "exit" states transition to the terminal state under the special action "done".
def getReward(self, state, action, nextState)
-
Get reward for state, action, nextState transition.
Note that the reward depends only on the state being departed (as in the R+N book examples, which more or less use this convention).
def getStartState(self)
-
Inherited from:
MarkovDecisionProcess
.getStartState
Return the start state of the MDP.
def getStates(self)
-
Return list of all states.
def getTransitionStatesAndProbs(self, state, action)
-
Returns list of (nextState, prob) pairs representing the states reachable from 'state' by taking 'action' along with their transition probabilities.
def isTerminal(self, state)
-
Only the TERMINAL_STATE state is actually a terminal state. The other "exit" states are technically non-terminals with a single action "exit" which leads to the true terminal state. This convention is to make the grids line up with the examples in the R+N textbook.
def setLivingReward(self, reward)
-
The (negative) reward for exiting "normal" states.
Note that in the R+N text, this reward is on entering a state and therefore is not clearly part of the state's future rewards.
def setNoise(self, noise)
-
The probability of moving in an unintended direction.
class GridworldEnvironment (gridWorld)
-
Helper class that provides a standard way to create an ABC using inheritance.
Expand source code
class GridworldEnvironment(Environment): def __init__(self, gridWorld): self.gridWorld = gridWorld self.reset() def getCurrentState(self): return self.state def getPossibleActions(self, state): return self.gridWorld.getPossibleActions(state) def doAction(self, action): successors = self.gridWorld.getTransitionStatesAndProbs(self.state, action) sum = 0.0 rand = random.random() state = self.getCurrentState() for nextState, prob in successors: sum += prob if sum > 1.0: raise Exception('Total transition probability more than one; sample failure.') if rand < sum: reward = self.gridWorld.getReward(state, action, nextState) self.state = nextState return (nextState, reward) raise Exception('Total transition probability less than one; sample failure.') def reset(self): self.state = self.gridWorld.getStartState()
Ancestors
- Environment
- abc.ABC
Methods
def doAction(self, action)
-
Inherited from:
Environment
.doAction
Performs the given action in the current environment state and updates the enviornment …
def getCurrentState(self)
-
Inherited from:
Environment
.getCurrentState
Returns the current state of enviornment.
def getPossibleActions(self, state)
-
Inherited from:
Environment
.getPossibleActions
Returns possible actions the agent can take in the given state. Can return the empty list if we are in a terminal state.
def isTerminal(self)
-
Inherited from:
Environment
.isTerminal
Has the enviornment entered a terminal state? This means there are no successors.
def reset(self)
-
Inherited from:
Environment
.reset
Resets the current state to the start state.