Module pacai.ui.crawler.gui

Functions

def run(max_steps=None)

Classes

class Application (win, max_steps)
Expand source code
class Application(object):
    def __init__(self, win, max_steps):
        self.ep = 0
        self.ga = 2
        self.al = 2
        self.stepCount = 0
        self.max_steps = max_steps
        self.exit_status = 0

        # Init Gui
        self.__initGUI(win)

        self.robot = CrawlingRobot(self.canvas)
        self.robotEnvironment = CrawlingRobotEnvironment(self.robot)

        # Init Agent
        actionFn = lambda state: self.robotEnvironment.getPossibleActions(state)
        self.learner = QLearningAgent(0, actionFn=actionFn)

        self.learner.setEpsilon(self.epsilon)
        self.learner.setLearningRate(self.alpha)
        self.learner.setDiscount(self.gamma)

        # Start GUI
        self.running = True
        self.stopped = False
        self.stepsToSkip = 0
        self.thread = threading.Thread(target = self._run_wrapper)
        self.thread.start()

    def sigmoid(self, x):
        return 1.0 / (1.0 + 2.0 ** (-x))

    def incrementSpeed(self, inc):
        self.tickTime *= inc
        # self.epsilon = min(1.0, self.epsilon)
        # self.epsilon = max(0.0, self.epsilon)
        # self.learner.setSpeed(self.epsilon)
        self.speed_label['text'] = 'Step Delay: %.5f' % (self.tickTime)

    def incrementEpsilon(self, inc):
        self.ep += inc
        self.epsilon = self.sigmoid(self.ep)
        self.learner.setEpsilon(self.epsilon)
        self.epsilon_label['text'] = 'Epsilon: %.3f' % (self.epsilon)

    def incrementGamma(self, inc):
        self.ga += inc
        self.gamma = self.sigmoid(self.ga)
        self.learner.setDiscount(self.gamma)
        self.gamma_label['text'] = 'Discount: %.3f' % (self.gamma)

    def incrementAlpha(self, inc):
        self.al += inc
        self.alpha = self.sigmoid(self.al)
        self.learner.setLearningRate(self.alpha)
        self.alpha_label['text'] = 'Learning Rate: %.3f' % (self.alpha)

    def __initGUI(self, win):
        # Window
        self.win = win

        # Initialize Frame
        win.grid()
        self.dec = -0.5
        self.inc = 0.5
        self.tickTime = 0.05

        # Epsilon Button + Label
        self.setupSpeedButtonAndLabel(win)

        self.setupEpsilonButtonAndLabel(win)

        # Gamma Button + Label
        self.setUpGammaButtonAndLabel(win)

        # Alpha Button + Label
        self.setupAlphaButtonAndLabel(win)

        # Exit Button
        # self.exit_button = tkinter.Button(win, text='Quit', command=self.exit)
        # self.exit_button.grid(row=0, column=9)

        # Simulation Buttons
        # self.setupSimulationButtons(win)

        # Canvas
        self.canvas = tkinter.Canvas(root, height=200, width=1000)
        self.canvas.grid(row=2, columnspan=10)

    def setupAlphaButtonAndLabel(self, win):
        self.alpha_minus = tkinter.Button(win, text="-",
                command=(lambda: self.incrementAlpha(self.dec)))
        self.alpha_minus.grid(row=1, column=3, padx=10)

        self.alpha = self.sigmoid(self.al)
        self.alpha_label = tkinter.Label(win, text='Learning Rate: %.3f' % (self.alpha))
        self.alpha_label.grid(row=1, column=4)

        self.alpha_plus = tkinter.Button(win, text="+",
                command=(lambda: self.incrementAlpha(self.inc)))
        self.alpha_plus.grid(row=1, column=5, padx=10)

    def setUpGammaButtonAndLabel(self, win):
        self.gamma_minus = tkinter.Button(win, text="-",
                command=(lambda: self.incrementGamma(self.dec)))
        self.gamma_minus.grid(row=1, column=0, padx=10)

        self.gamma = self.sigmoid(self.ga)
        self.gamma_label = tkinter.Label(win, text='Discount: %.3f' % (self.gamma))
        self.gamma_label.grid(row=1, column=1)

        self.gamma_plus = tkinter.Button(win, text="+",
                command=(lambda: self.incrementGamma(self.inc)))
        self.gamma_plus.grid(row=1, column=2, padx=10)

    def setupEpsilonButtonAndLabel(self, win):
        self.epsilon_minus = tkinter.Button(win, text="-",
                command=(lambda: self.incrementEpsilon(self.dec)))
        self.epsilon_minus.grid(row=0, column=3)

        self.epsilon = self.sigmoid(self.ep)
        self.epsilon_label = tkinter.Label(win, text='Epsilon: %.3f' % (self.epsilon))
        self.epsilon_label.grid(row=0, column=4)

        self.epsilon_plus = tkinter.Button(win, text="+",
                command=(lambda: self.incrementEpsilon(self.inc)))
        self.epsilon_plus.grid(row=0, column=5)

    def setupSpeedButtonAndLabel(self, win):
        self.speed_minus = tkinter.Button(win, text="-", command=(lambda: self.incrementSpeed(0.5)))
        self.speed_minus.grid(row=0, column=0)

        self.speed_label = tkinter.Label(win, text='Step Delay: %.5f' % (self.tickTime))
        self.speed_label.grid(row=0, column=1)

        self.speed_plus = tkinter.Button(win, text="+", command=(lambda: self.incrementSpeed(2)))
        self.speed_plus.grid(row=0, column=2)

    def skip5kSteps(self):
        self.stepsToSkip = 5000

    def exit(self):
        self.running = False

        if (self.thread is not None):
            self.thread.join()
            self.thread = None

        if (self.win is not None):
            self.win.destroy()
            self.win = None

    def step(self):
        self.stepCount += 1

        state = self.robotEnvironment.getCurrentState()
        actions = self.robotEnvironment.getPossibleActions(state)

        if len(actions) == 0.0:
            self.robotEnvironment.reset()
            state = self.robotEnvironment.getCurrentState()
            actions = self.robotEnvironment.getPossibleActions(state)
            print('Reset!')

        action = self.learner.getAction(state)
        if (action is None):
            raise Exception('None action returned: Code Not Complete')

        nextState, reward = self.robotEnvironment.doAction(action)
        self.learner.observeTransition(state, action, nextState, reward)

    # Run on a different thread.
    def _run_wrapper(self):
        try:
            self.run()
        except Exception:
            self.exit_status = 10
            traceback.print_exc()
            self.win.quit()

        self.running = False
        self.stopped = True

        self.win.quit()

    # Run on a different thread.
    def run(self):
        self.stepCount = 0
        self.learner.startEpisode()

        while True:
            minSleep = 0.01
            tm = max(minSleep, self.tickTime)
            time.sleep(tm)
            self.stepsToSkip = int(tm / self.tickTime) - 1

            if not self.running:
                break

            for i in range(self.stepsToSkip):
                self.step()

            self.stepsToSkip = 0
            self.step()

            if (self.max_steps is not None and self.stepCount >= self.max_steps):
                break

        self.learner.stopEpisode()

    def start(self):
        self.win.mainloop()

Methods

def exit(self)
def incrementAlpha(self, inc)
def incrementEpsilon(self, inc)
def incrementGamma(self, inc)
def incrementSpeed(self, inc)
def run(self)
def setUpGammaButtonAndLabel(self, win)
def setupAlphaButtonAndLabel(self, win)
def setupEpsilonButtonAndLabel(self, win)
def setupSpeedButtonAndLabel(self, win)
def sigmoid(self, x)
def skip5kSteps(self)
def start(self)
def step(self)
class CrawlingRobot (canvas)
Expand source code
class CrawlingRobot(object):
    def setAngles(self, armAngle, handAngle):
        """
        set the robot's arm and hand angles
        to the passed in values
        """

        self.armAngle = armAngle
        self.handAngle = handAngle

    def getAngles(self):
        """
        returns the pair of (armAngle, handAngle)
        """

        return self.armAngle, self.handAngle

    def getRobotPosition(self):
        """
        returns the (x, y) coordinates
        of the lower-left point of the robot
        """

        return self.robotPos

    def moveArm(self, newArmAngle):
        """
        move the robot arm to 'newArmAngle'
        """

        if newArmAngle > self.maxArmAngle:
            raise Exception('Crawling Robot: Arm Raised too high. Careful!')

        if newArmAngle < self.minArmAngle:
            raise Exception('Crawling Robot: Arm Raised too low. Careful!')

        disp = self.displacement(self.armAngle, self.handAngle, newArmAngle, self.handAngle)
        curXPos = self.robotPos[0]
        self.robotPos = (curXPos + disp, self.robotPos[1])
        self.armAngle = newArmAngle

        # Position and Velocity Sign Post
        self.positions.append(self.getRobotPosition()[0])

        if len(self.positions) > 100:
            self.positions.pop(0)
            # self.angleSums.pop(0)

    def moveHand(self, newHandAngle):
        """
        move the robot hand to 'newArmAngle'
        """

        if newHandAngle > self.maxHandAngle:
            raise Exception('Crawling Robot: Hand Raised too high. Careful!')

        if newHandAngle < self.minHandAngle:
            raise Exception('Crawling Robot: Hand Raised too low. Careful!')

        disp = self.displacement(self.armAngle, self.handAngle, self.armAngle, newHandAngle)
        curXPos = self.robotPos[0]
        self.robotPos = (curXPos + disp, self.robotPos[1])
        self.handAngle = newHandAngle

        # Position and Velocity Sign Post
        self.positions.append(self.getRobotPosition()[0])

        if len(self.positions) > 100:
            self.positions.pop(0)
            # self.angleSums.pop(0)

    def getMinAndMaxArmAngles(self):
        """
        get the lower- and upper- bound
        for the arm angles returns (min, max) pair
        """

        return self.minArmAngle, self.maxArmAngle

    def getMinAndMaxHandAngles(self):
        """
        get the lower- and upper- bound
        for the hand angles returns (min, max) pair
        """

        return self.minHandAngle, self.maxHandAngle

    def getRotationAngle(self):
        """
        get the current angle the
        robot body is rotated off the ground
        """

        armCos, armSin = self.__getCosAndSin(self.armAngle)
        handCos, handSin = self.__getCosAndSin(self.handAngle)

        x = self.armLength * armCos + self.handLength * handCos + self.robotWidth
        y = self.armLength * armSin + self.handLength * handSin + self.robotHeight

        if y < 0:
            return math.atan(-y / x)
        return 0.0

    # You shouldn't need methods below here

    def __getCosAndSin(self, angle):
        return math.cos(angle), math.sin(angle)

    def displacement(self, oldArmDegree, oldHandDegree, armDegree, handDegree):
        oldArmCos, oldArmSin = self.__getCosAndSin(oldArmDegree)
        armCos, armSin = self.__getCosAndSin(armDegree)
        oldHandCos, oldHandSin = self.__getCosAndSin(oldHandDegree)
        handCos, handSin = self.__getCosAndSin(handDegree)

        xOld = self.armLength * oldArmCos + self.handLength * oldHandCos + self.robotWidth
        yOld = self.armLength * oldArmSin + self.handLength * oldHandSin + self.robotHeight

        x = self.armLength * armCos + self.handLength * handCos + self.robotWidth
        y = self.armLength * armSin + self.handLength * handSin + self.robotHeight

        if y < 0:
            if yOld <= 0:
                return math.sqrt(xOld * xOld + yOld * yOld) - math.sqrt(x * x + y * y)
            return (xOld - yOld * (x - xOld) / (y - yOld)) - math.sqrt(x * x + y * y)
        else:
            if yOld >= 0:
                return 0.0
            return -(x - y * (xOld - x) / (yOld - y)) + math.sqrt(xOld * xOld + yOld * yOld)

        raise Exception('Never Should See This!')

    def draw(self, stepCount, stepDelay):
        x1, y1 = self.getRobotPosition()
        x1 = x1 % self.totWidth

        # Check Lower Still on the ground
        if y1 != self.groundY:
            raise Exception('Flying Robot!!')

        rotationAngle = self.getRotationAngle()
        cosRot, sinRot = self.__getCosAndSin(rotationAngle)

        x2 = x1 + self.robotWidth * cosRot
        y2 = y1 - self.robotWidth * sinRot

        x3 = x1 - self.robotHeight * sinRot
        y3 = y1 - self.robotHeight * cosRot

        x4 = x3 + cosRot * self.robotWidth
        y4 = y3 - sinRot * self.robotWidth

        self.canvas.coords(self.robotBody, x1, y1, x2, y2, x4, y4, x3, y3)

        armCos, armSin = self.__getCosAndSin(rotationAngle + self.armAngle)
        xArm = x4 + self.armLength * armCos
        yArm = y4 - self.armLength * armSin

        self.canvas.coords(self.robotArm, x4, y4, xArm, yArm)

        handCos, handSin = self.__getCosAndSin(self.handAngle + rotationAngle)
        xHand = xArm + self.handLength * handCos
        yHand = yArm - self.handLength * handSin

        self.canvas.coords(self.robotHand, xArm, yArm, xHand, yHand)

        # Position and Velocity Sign Post

        # time = len(self.positions) + 0.5 * sum(self.angleSums)
        # velocity = (self.positions[-1]-self.positions[0]) / time
        # if len(self.positions) == 1: return

        steps = (stepCount - self.lastStep)
        if (steps == 0):
            return

        # pos = self.positions[-1]
        # velocity = (pos - self.lastPos) / steps
        # g = .9 ** (10 * stepDelay)
        # g = .99 ** steps
        # self.velAvg = g * self.velAvg + (1 - g) * velocity
        # g = .999 ** steps
        # self.velAvg2 = g * self.velAvg2 + (1 - g) * velocity

        pos = self.positions[-1]
        velocity = pos - self.positions[-2]
        vel2 = (pos - self.positions[0]) / len(self.positions)
        self.velAvg = .9 * self.velAvg + .1 * vel2
        velMsg = '100-step Avg Velocity: %.2f' % self.velAvg
        # velMsg2 = '1000-step Avg Velocity: %.2f' % self.velAvg2
        velocityMsg = 'Velocity: %.2f' % velocity
        positionMsg = 'Position: %2.f' % pos
        stepMsg = 'Step: %d' % stepCount

        if (self.vel_msg is not None):
            self.canvas.delete(self.vel_msg)
            self.canvas.delete(self.pos_msg)
            self.canvas.delete(self.step_msg)
            self.canvas.delete(self.velavg_msg)
            # self.canvas.delete(self.velavg2_msg)
            # self.velavg2_msg = self.canvas.create_text(850, 190, text=velMsg2)

        self.velavg_msg = self.canvas.create_text(650, 190, text=velMsg)
        self.vel_msg = self.canvas.create_text(450, 190, text=velocityMsg)
        self.pos_msg = self.canvas.create_text(250, 190, text=positionMsg)
        self.step_msg = self.canvas.create_text(50, 190, text=stepMsg)

        # self.lastPos = pos
        self.lastStep = stepCount
        # self.lastVel = velocity

    def __init__(self, canvas):
        # Canvas
        self.canvas = canvas
        self.velAvg = 0
        # self.velAvg2 = 0
        # self.lastPos = 0
        self.lastStep = 0
        # self.lastVel = 0

        # Arm and Hand Degrees
        self.armAngle = self.oldArmDegree = 0.0
        self.handAngle = self.oldHandDegree = -math.pi / 6

        self.maxArmAngle = math.pi / 6
        self.minArmAngle = -math.pi / 6

        self.maxHandAngle = 0
        self.minHandAngle = -(5.0 / 6.0) * math.pi

        # Draw Ground
        self.totWidth = canvas.winfo_reqwidth()
        self.totHeight = canvas.winfo_reqheight()
        self.groundHeight = 40
        self.groundY = self.totHeight - self.groundHeight

        self.ground = canvas.create_rectangle(0, self.groundY, self.totWidth, self.totHeight,
                fill = 'blue')

        # Robot Body
        self.robotWidth = 80
        self.robotHeight = 40
        self.robotPos = (20, self.groundY)
        self.robotBody = canvas.create_polygon(0, 0, 0, 0, 0, 0, 0, 0, fill='green')

        # Robot Arm
        self.armLength = 60
        self.robotArm = canvas.create_line(0, 0, 0, 0, fill='orange', width=5)

        # Robot Hand
        self.handLength = 40
        self.robotHand = canvas.create_line(0, 0, 0, 0, fill='red', width=3)

        self.positions = [0, 0]
        # self.angleSums = [0, 0]

        self.vel_msg = None
        self.velavg_msg = None
        self.pos_msg = None
        self.step_msg = None

Methods

def displacement(self, oldArmDegree, oldHandDegree, armDegree, handDegree)
def draw(self, stepCount, stepDelay)
def getAngles(self)

returns the pair of (armAngle, handAngle)

def getMinAndMaxArmAngles(self)

get the lower- and upper- bound for the arm angles returns (min, max) pair

def getMinAndMaxHandAngles(self)

get the lower- and upper- bound for the hand angles returns (min, max) pair

def getRobotPosition(self)

returns the (x, y) coordinates of the lower-left point of the robot

def getRotationAngle(self)

get the current angle the robot body is rotated off the ground

def moveArm(self, newArmAngle)

move the robot arm to 'newArmAngle'

def moveHand(self, newHandAngle)

move the robot hand to 'newArmAngle'

def setAngles(self, armAngle, handAngle)

set the robot's arm and hand angles to the passed in values

class CrawlingRobotEnvironment (crawlingRobot)

A GUI display for crawler.

Expand source code
class CrawlingRobotEnvironment(Environment):
    """
    A GUI display for crawler.
    """

    def __init__(self, crawlingRobot):
        self.crawlingRobot = crawlingRobot

        # The state is of the form (armAngle, handAngle)
        # where the angles are bucket numbers, not actual
        # degree measurements
        self.state = None

        self.nArmStates = 9
        self.nHandStates = 13

        # create a list of arm buckets and hand buckets to
        # discretize the state space
        minArmAngle, maxArmAngle = self.crawlingRobot.getMinAndMaxArmAngles()
        minHandAngle, maxHandAngle = self.crawlingRobot.getMinAndMaxHandAngles()
        armIncrement = (maxArmAngle - minArmAngle) / (self.nArmStates - 1)
        handIncrement = (maxHandAngle - minHandAngle) / (self.nHandStates - 1)
        self.armBuckets = [minArmAngle + (armIncrement * i) for i in range(self.nArmStates)]
        self.handBuckets = [minHandAngle + (handIncrement * i) for i in range(self.nHandStates)]

        # Reset
        self.reset()

    def getCurrentState(self):
        """
        Return the current state of the crawling robot.
        """

        return self.state

    def getPossibleActions(self, state):
        """
        Returns possible actions for the states in the current state.
        """

        actions = list()
        currArmBucket, currHandBucket = state

        if currArmBucket > 0:
            actions.append('arm-down')

        if currArmBucket < self.nArmStates - 1:
            actions.append('arm-up')

        if currHandBucket > 0:
            actions.append('hand-down')

        if currHandBucket < self.nHandStates - 1:
            actions.append('hand-up')

        return actions

    def doAction(self, action):
        """
        Perform the action and update
        the current state of the Environment
        and return the reward for the
        current state, the next state
        and the taken action.

        Returns:
            nextState, reward
        """

        nextState, reward = None, None

        oldX, oldY = self.crawlingRobot.getRobotPosition()

        armBucket, handBucket = self.state
        armAngle, handAngle = self.crawlingRobot.getAngles()

        if action == 'arm-up':
            newArmAngle = self.armBuckets[armBucket + 1]
            self.crawlingRobot.moveArm(newArmAngle)

            nextState = (armBucket + 1, handBucket)
        if action == 'arm-down':
            newArmAngle = self.armBuckets[armBucket - 1]
            self.crawlingRobot.moveArm(newArmAngle)
            nextState = (armBucket - 1, handBucket)

        if action == 'hand-up':
            newHandAngle = self.handBuckets[handBucket + 1]
            self.crawlingRobot.moveHand(newHandAngle)
            nextState = (armBucket, handBucket + 1)

        if action == 'hand-down':
            newHandAngle = self.handBuckets[handBucket - 1]
            self.crawlingRobot.moveHand(newHandAngle)
            nextState = (armBucket, handBucket - 1)

        newX, newY = self.crawlingRobot.getRobotPosition()

        # a simple reward function
        reward = newX - oldX

        self.state = nextState
        return nextState, reward

    def reset(self):
        """
        Resets the Environment to the initial state
        """

        # Initialize the state to be the middle
        # value for each parameter e.g. if there are 13 and 19
        # buckets for the arm and hand parameters, then the intial
        # state should be (6, 9)

        # Also call self.crawlingRobot.setAngles()
        # to the initial arm and hand angle

        armState = int(self.nArmStates / 2)
        handState = int(self.nHandStates / 2)

        self.state = armState, handState
        self.crawlingRobot.setAngles(self.armBuckets[armState], self.handBuckets[handState])
        self.crawlingRobot.positions = [20, self.crawlingRobot.getRobotPosition()[0]]

Ancestors

Methods

def doAction(self, action)

Perform the action and update the current state of the Environment and return the reward for the current state, the next state and the taken action.

Returns

nextState, reward

def getCurrentState(self)

Return the current state of the crawling robot.

def getPossibleActions(self, state)

Returns possible actions for the states in the current state.

def isTerminal(self)

Inherited from: Environment.isTerminal

Has the enviornment entered a terminal state? This means there are no successors.

def reset(self)

Resets the Environment to the initial state