Module pacai.bin.gridworld

Functions

def getUserAction(state, actionFunction)

Get an action from the user (rather than the agent).

Used for debugging and lecture demos.

def main(argv)

Entry point for the gridworld simulation The args are a blind pass of sys.argv with the executable stripped.

def makeGrid(gridString)
def parseOptions(argv)

Processes the command used to run gridworld from the command line.

def runEpisode(agent, environment, discount, decision, display, message, pause, episode)

Classes

class Grid (width, height, initialValue=' ')

A 2-dimensional array of immutables backed by a list of lists. Data is accessed via grid[x][y] where (x, y) are cartesian coordinates with x horizontal, y vertical and the origin (0, 0) in the bottom left corner.

The str method constructs an output that is oriented appropriately.

Expand source code
class Grid(object):
    """
    A 2-dimensional array of immutables backed by a list of lists.
    Data is accessed via grid[x][y] where (x, y) are cartesian coordinates with x horizontal,
    y vertical and the origin (0, 0) in the bottom left corner.

    The __str__ method constructs an output that is oriented appropriately.
    """

    def __init__(self, width, height, initialValue=' '):
        self.width = width
        self.height = height
        self.data = [[initialValue for y in range(height)] for x in range(width)]
        self.terminalState = 'TERMINAL_STATE'

    def __getitem__(self, i):
        return self.data[i]

    def __setitem__(self, key, item):
        self.data[key] = item

    def __eq__(self, other):
        if (other is None):
            return False
        return self.data == other.data

    def __hash__(self):
        return hash(self.data)

    def copy(self):
        g = Grid(self.width, self.height)
        g.data = [x[:] for x in self.data]
        return g

    def deepCopy(self):
        return self.copy()

    def shallowCopy(self):
        g = Grid(self.width, self.height)
        g.data = self.data
        return g

    def _getLegacyText(self):
        t = [[self.data[x][y] for x in range(self.width)] for y in range(self.height)]
        t.reverse()
        return t

    def __str__(self):
        return str(self._getLegacyText())

Methods

def copy(self)
def deepCopy(self)
def shallowCopy(self)
class Gridworld (grid)

Helper class that provides a standard way to create an ABC using inheritance.

Expand source code
class Gridworld(MarkovDecisionProcess):
    def __init__(self, grid):
        # layout
        if (isinstance(grid, list)):
            grid = makeGrid(grid)

        self.grid = grid

        # parameters
        self.livingReward = 0.0
        self.noise = 0.2

    def setLivingReward(self, reward):
        """
        The (negative) reward for exiting "normal" states.

        Note that in the R+N text, this reward is on entering
        a state and therefore is not clearly part of the state's
        future rewards.
        """

        self.livingReward = reward

    def setNoise(self, noise):
        """
        The probability of moving in an unintended direction.
        """

        self.noise = noise

    def getPossibleActions(self, state):
        """
        Returns list of valid actions for 'state'.

        Note that you can request moves into walls and
        that "exit" states transition to the terminal
        state under the special action "done".
        """

        if state == self.grid.terminalState:
            return ()

        x, y = state
        if isinstance(self.grid[x][y], int):
            return ('exit', )

        return ('north', 'west', 'south', 'east')

    def getStates(self):
        """
        Return list of all states.
        """

        # The true terminal state.
        states = [self.grid.terminalState]
        for x in range(self.grid.width):
            for y in range(self.grid.height):
                if self.grid[x][y] != '#':
                    state = (x, y)
                    states.append(state)

        return states

    def getReward(self, state, action, nextState):
        """
        Get reward for state, action, nextState transition.

        Note that the reward depends only on the state being
        departed (as in the R+N book examples, which more or
        less use this convention).
        """

        if state == self.grid.terminalState:
            return 0.0

        x, y = state
        cell = self.grid[x][y]
        if isinstance(cell, int) or isinstance(cell, float):
            return cell

        return self.livingReward

    def getStartState(self):
        for x in range(self.grid.width):
            for y in range(self.grid.height):
                if self.grid[x][y] == 'S':
                    return (x, y)

        raise Exception('Grid has no start state')

    def isTerminal(self, state):
        """
        Only the TERMINAL_STATE state is *actually* a terminal state.
        The other "exit" states are technically non-terminals with
        a single action "exit" which leads to the true terminal state.
        This convention is to make the grids line up with the examples
        in the R+N textbook.
        """

        return state == self.grid.terminalState

    def getTransitionStatesAndProbs(self, state, action):
        """
        Returns list of (nextState, prob) pairs
        representing the states reachable
        from 'state' by taking 'action' along
        with their transition probabilities.
        """

        if action not in self.getPossibleActions(state):
            raise Exception('Illegal action!')

        if self.isTerminal(state):
            return []

        x, y = state

        if isinstance(self.grid[x][y], int) or isinstance(self.grid[x][y], float):
            termState = self.grid.terminalState
            return [(termState, 1.0)]

        successors = []

        northState = (self.__isAllowed(y + 1, x) and (x, y + 1)) or state
        westState = (self.__isAllowed(y, x - 1) and (x - 1, y)) or state
        southState = (self.__isAllowed(y - 1, x) and (x, y - 1)) or state
        eastState = (self.__isAllowed(y, x + 1) and (x + 1, y)) or state

        if action == 'north' or action == 'south':
            if action == 'north':
                successors.append((northState, 1 - self.noise))
            else:
                successors.append((southState, 1 - self.noise))

            massLeft = self.noise
            successors.append((westState, massLeft / 2.0))
            successors.append((eastState, massLeft / 2.0))

        if action == 'west' or action == 'east':
            if action == 'west':
                successors.append((westState, 1 - self.noise))
            else:
                successors.append((eastState, 1 - self.noise))

            massLeft = self.noise
            successors.append((northState, massLeft / 2.0))
            successors.append((southState, massLeft / 2.0))

        successors = self.__aggregate(successors)
        return successors

    def __aggregate(self, statesAndProbs):
        counter = {}
        for state, prob in statesAndProbs:
            if state not in counter:
                counter[state] = 0.0
            counter[state] += prob

        newStatesAndProbs = []
        for state, prob in counter.items():
            newStatesAndProbs.append((state, prob))

        return newStatesAndProbs

    def __isAllowed(self, y, x):
        if y < 0 or y >= self.grid.height:
            return False

        if x < 0 or x >= self.grid.width:
            return False

        return self.grid[x][y] != '#'

Ancestors

Methods

def getPossibleActions(self, state)

Returns list of valid actions for 'state'.

Note that you can request moves into walls and that "exit" states transition to the terminal state under the special action "done".

def getReward(self, state, action, nextState)

Get reward for state, action, nextState transition.

Note that the reward depends only on the state being departed (as in the R+N book examples, which more or less use this convention).

def getStartState(self)

Inherited from: MarkovDecisionProcess.getStartState

Return the start state of the MDP.

def getStates(self)

Return list of all states.

def getTransitionStatesAndProbs(self, state, action)

Returns list of (nextState, prob) pairs representing the states reachable from 'state' by taking 'action' along with their transition probabilities.

def isTerminal(self, state)

Only the TERMINAL_STATE state is actually a terminal state. The other "exit" states are technically non-terminals with a single action "exit" which leads to the true terminal state. This convention is to make the grids line up with the examples in the R+N textbook.

def setLivingReward(self, reward)

The (negative) reward for exiting "normal" states.

Note that in the R+N text, this reward is on entering a state and therefore is not clearly part of the state's future rewards.

def setNoise(self, noise)

The probability of moving in an unintended direction.

class GridworldEnvironment (gridWorld)

Helper class that provides a standard way to create an ABC using inheritance.

Expand source code
class GridworldEnvironment(Environment):
    def __init__(self, gridWorld):
        self.gridWorld = gridWorld
        self.reset()

    def getCurrentState(self):
        return self.state

    def getPossibleActions(self, state):
        return self.gridWorld.getPossibleActions(state)

    def doAction(self, action):
        successors = self.gridWorld.getTransitionStatesAndProbs(self.state, action)
        sum = 0.0
        rand = random.random()
        state = self.getCurrentState()

        for nextState, prob in successors:
            sum += prob
            if sum > 1.0:
                raise Exception('Total transition probability more than one; sample failure.')

            if rand < sum:
                reward = self.gridWorld.getReward(state, action, nextState)
                self.state = nextState
                return (nextState, reward)

        raise Exception('Total transition probability less than one; sample failure.')

    def reset(self):
        self.state = self.gridWorld.getStartState()

Ancestors

Methods

def doAction(self, action)

Inherited from: Environment.doAction

Performs the given action in the current environment state and updates the enviornment …

def getCurrentState(self)

Inherited from: Environment.getCurrentState

Returns the current state of enviornment.

def getPossibleActions(self, state)

Inherited from: Environment.getPossibleActions

Returns possible actions the agent can take in the given state. Can return the empty list if we are in a terminal state.

def isTerminal(self)

Inherited from: Environment.isTerminal

Has the enviornment entered a terminal state? This means there are no successors.

def reset(self)

Inherited from: Environment.reset

Resets the current state to the start state.