Source code for simple_rl.mdp.MDPClass

''' MDPClass.py: Contains the MDP Class. '''

# Python imports.
import copy

[docs]class MDP(object): ''' Abstract class for a Markov Decision Process. ''' def __init__(self, actions, transition_func, reward_func, init_state, gamma=0.99, step_cost=0): self.actions = actions self.transition_func = transition_func self.reward_func = reward_func self.gamma = gamma self.init_state = copy.deepcopy(init_state) self.cur_state = init_state self.step_cost = step_cost # --------------- # -- Accessors -- # ---------------
[docs] def get_parameters(self): ''' Returns: (dict) key=param_name (str) --> val=param_val (object). ''' param_dict = {} param_dict["gamma"] = self.gamma param_dict["step_cost"] = self.step_cost return param_dict
[docs] def get_init_state(self): return self.init_state
[docs] def get_curr_state(self): return self.cur_state
[docs] def get_actions(self): return self.actions
[docs] def get_gamma(self): return self.gamma
[docs] def get_reward_func(self): return self.reward_func
[docs] def get_transition_func(self): return self.transition_func
[docs] def get_num_state_feats(self): return self.init_state.get_num_feats()
[docs] def get_slip_prob(self): pass
# -------------- # -- Mutators -- # --------------
[docs] def set_gamma(self, new_gamma): self.gamma = new_gamma
[docs] def set_step_cost(self, new_step_cost): self.step_cost = new_step_cost
[docs] def set_slip_prob(self, slip_prob): pass
# ---------- # -- Core -- # ----------
[docs] def execute_agent_action(self, action): ''' Args: action (str) Returns: (tuple: <float,State>): reward, State Summary: Core method of all of simple_rl. Facilitates interaction between the MDP and an agent. ''' reward = self.reward_func(self.cur_state, action) next_state = self.transition_func(self.cur_state, action) self.cur_state = next_state return reward, next_state
[docs] def reset(self): self.cur_state = copy.deepcopy(self.init_state)
[docs] def end_of_instance(self): pass