Source code for simple_rl.mdp.MDPClass

''' MDPClass.py: Contains the MDP Class. '''

# Python imports.
import copy

[docs]class MDP(object):
    ''' Abstract class for a Markov Decision Process. '''
    
    def __init__(self, actions, transition_func, reward_func, init_state, gamma=0.99, step_cost=0):
        self.actions = actions
        self.transition_func = transition_func
        self.reward_func = reward_func
        self.gamma = gamma
        self.init_state = copy.deepcopy(init_state)
        self.cur_state = init_state
        self.step_cost = step_cost

    # ---------------
    # -- Accessors --
    # ---------------

[docs]    def get_parameters(self):
        '''
        Returns:
            (dict) key=param_name (str) --> val=param_val (object).
        '''
        param_dict = {}
        param_dict["gamma"] = self.gamma
        param_dict["step_cost"] = self.step_cost

        return param_dict

[docs]    def get_init_state(self):
        return self.init_state

[docs]    def get_curr_state(self):
        return self.cur_state

[docs]    def get_actions(self):
        return self.actions

[docs]    def get_gamma(self):
        return self.gamma

[docs]    def get_reward_func(self):
        return self.reward_func

[docs]    def get_transition_func(self):
        return self.transition_func

[docs]    def get_num_state_feats(self):
        return self.init_state.get_num_feats()

[docs]    def get_slip_prob(self):
        pass

    # --------------
    # -- Mutators --
    # --------------

[docs]    def set_gamma(self, new_gamma):
        self.gamma = new_gamma

[docs]    def set_step_cost(self, new_step_cost):
        self.step_cost = new_step_cost

[docs]    def set_slip_prob(self, slip_prob):
        pass

    # ----------
    # -- Core --
    # ----------

[docs]    def execute_agent_action(self, action):
        '''
        Args:
            action (str)

        Returns:
            (tuple: <float,State>): reward, State

        Summary:
            Core method of all of simple_rl. Facilitates interaction
            between the MDP and an agent.
        '''
        reward = self.reward_func(self.cur_state, action)
        next_state = self.transition_func(self.cur_state, action)
        self.cur_state = next_state

        return reward, next_state

[docs]    def reset(self):
        self.cur_state = copy.deepcopy(self.init_state)

[docs]    def end_of_instance(self):
        pass