Source code for simple_rl.mdp.MDPClass
''' MDPClass.py: Contains the MDP Class. '''
# Python imports.
import copy
[docs]class MDP(object):
''' Abstract class for a Markov Decision Process. '''
def __init__(self, actions, transition_func, reward_func, init_state, gamma=0.99, step_cost=0):
self.actions = actions
self.transition_func = transition_func
self.reward_func = reward_func
self.gamma = gamma
self.init_state = copy.deepcopy(init_state)
self.cur_state = init_state
self.step_cost = step_cost
# ---------------
# -- Accessors --
# ---------------
[docs] def get_parameters(self):
'''
Returns:
(dict) key=param_name (str) --> val=param_val (object).
'''
param_dict = {}
param_dict["gamma"] = self.gamma
param_dict["step_cost"] = self.step_cost
return param_dict
[docs] def get_init_state(self):
return self.init_state
[docs] def get_curr_state(self):
return self.cur_state
[docs] def get_actions(self):
return self.actions
[docs] def get_gamma(self):
return self.gamma
[docs] def get_reward_func(self):
return self.reward_func
[docs] def get_transition_func(self):
return self.transition_func
[docs] def get_num_state_feats(self):
return self.init_state.get_num_feats()
[docs] def get_slip_prob(self):
pass
# --------------
# -- Mutators --
# --------------
[docs] def set_gamma(self, new_gamma):
self.gamma = new_gamma
[docs] def set_step_cost(self, new_step_cost):
self.step_cost = new_step_cost
[docs] def set_slip_prob(self, slip_prob):
pass
# ----------
# -- Core --
# ----------
[docs] def execute_agent_action(self, action):
'''
Args:
action (str)
Returns:
(tuple: <float,State>): reward, State
Summary:
Core method of all of simple_rl. Facilitates interaction
between the MDP and an agent.
'''
reward = self.reward_func(self.cur_state, action)
next_state = self.transition_func(self.cur_state, action)
self.cur_state = next_state
return reward, next_state
[docs] def reset(self):
self.cur_state = copy.deepcopy(self.init_state)
[docs] def end_of_instance(self):
pass