Source code for simple_rl.experiments.ExperimentClass

'''
ExperimentClass.py: Contains the Experiment Class for reproducing RL Experiments.

Purpose:
    - Stores all relevant parameters in experiment directory for easy reproducibility.
    - Auto generates plot using chart_utils.
    - Can document learning activity.
'''

# Python imports.
from __future__ import print_function
import os
from collections import defaultdict

# Other imports.
from simple_rl.utils import chart_utils
from simple_rl.experiments.ExperimentParametersClass import ExperimentParameters

[docs]class Experiment(object): FULL_EXP_FILE_NAME = "full_experiment.txt" EXP_PARAM_FILE_NAME = "exp_info.txt" ''' Experiment Class for RL Experiments ''' # Dumps the results in a directory called "results" in the current working dir. RESULTS_DIR = os.path.join(os.getcwd(), "results", "") def __init__(self, agents, mdp, agent_colors=[], params=None, is_episodic=False, is_markov_game=False, is_lifelong=False, track_disc_reward=False, clear_old_results=True, count_r_per_n_timestep=1, cumulative_plot=True, exp_function="run_agents_on_mdp", dir_for_plot="", experiment_name_prefix=""): ''' Args: agents (list) mdp (MDP) agent_colors (list) params (dict) is_episodic (bool) is_markov_game (bool) is_lifelong (bool) clear_old_results (bool) count_r_per_n_timestep (int) cumulative_plot (bool) exp_function (lambda): tracks with run_experiments.py function was called. dir_for_plot (str) experiment_name_prefix (str) ''' # Store all relevant bools. self.agents = agents self.agent_colors = range(len(self.agents)) if agent_colors == [] else agent_colors params["track_disc_reward"] = track_disc_reward # params["is_lifelong"] = is_lifelong # params["agent_colors"] = agent_colors self.parameters = ExperimentParameters(params) self.mdp = mdp self.track_disc_reward = track_disc_reward self.count_r_per_n_timestep = count_r_per_n_timestep self.steps_since_added_r = 1 self.rew_since_count = 0 self.cumulative_plot = cumulative_plot self.name = str(self.mdp) self.rewards = defaultdict(list) self.times = defaultdict(list) if dir_for_plot == "": self.exp_directory = os.path.join(Experiment.RESULTS_DIR, self.name) else: self.exp_directory = os.path.join(os.getcwd(), dir_for_plot, self.name) self.experiment_name_prefix = experiment_name_prefix self.is_episodic = is_episodic self.is_markov_game = is_markov_game self._setup_files(clear_old_results) # Write experiment reproduction file. self._make_and_write_agent_and_mdp_params(agents, mdp, self.parameters.params, exp_function) def _make_and_write_agent_and_mdp_params(self, agents, mdp, parameters, exp_function): ''' Args: agents mdp parameters Summary: Writes enough detail about @agents, @mdp, and @parameters to the file results/<exp_name>/params.txt so that the function simple_rl.run_experiments.reproduce_from_exp_file can rerun the experiment. ''' out_file = open(os.path.join(self.exp_directory, Experiment.FULL_EXP_FILE_NAME), "w") from simple_rl.mdp import OOMDP from simple_rl.pomdp.POMDPClass import POMDP from simple_rl.mdp.markov_game.MarkovGameMDPClass import MarkovGameMDP if isinstance(mdp, OOMDP) or isinstance(mdp, POMDP) or isinstance(mdp, MarkovGameMDP): # We don't do markov games. return # MDP. mdp_class = str(type(mdp)) mdp_params = mdp.get_parameters() out_file.write("MDP:" + mdp_class + "\n") for param in mdp_params: out_file.write("\t\t" + param + "=" + str(mdp_params[param]) + "=" + str(type(mdp_params[param])) + "\n") out_file.write("\n") # Get agents and their parameters. for i, agent in enumerate(agents): agent_params = agent.get_parameters() agent_class = str(i) + "-" + str(type(agent)) out_file.write("AGENT:" + agent_class + "\n") for param in agent_params: out_file.write("\t\t" + param + "=" + str(agent_params[param]) + "=" + str(type(agent_params[param])) + "\n") out_file.write("\n") out_file.write("\n") # Misc. Params. out_file.write("MISC\n") for param in parameters: out_file.write("\t\t" + param + "=" + str(parameters[param]) + "=" + str(type(parameters[param])) + "\n") # Track the function called. out_file.write("\n\nFUNC\n\t" + exp_function + "\n") # Close. out_file.close() def _setup_files(self, clear_old_results=True): ''' Summary: Creates and removes relevant directories/files. ''' if not os.path.exists(os.path.join(self.exp_directory, "")): os.makedirs(self.exp_directory) elif clear_old_results: for agent in self.agents: if os.path.exists(os.path.join(self.exp_directory, str(agent)) + ".csv"): os.remove(os.path.join(self.exp_directory, str(agent)) + ".csv") self.write_exp_info_to_file()
[docs] def make_plots(self, open_plot=True): ''' Summary: Makes plots for the current experiment. ''' if self.is_markov_game: agent_name_ls = [agent_name for agent_name in self.agents.keys()] else: agent_name_ls = [a.get_name() for a in self.agents] if self.experiment_name_prefix != "": plot_file_name = self.experiment_name_prefix + str(self.mdp) else: plot_file_name = "" chart_utils.make_plots(self.exp_directory, agent_name_ls, episodic=self.is_episodic, plot_file_name=plot_file_name, cumulative=self.cumulative_plot, track_disc_reward=self.track_disc_reward, open_plot=open_plot)
def _write_extra_datum_to_file(self, mdp_name, agent, datum, datum_name): out_file = open(os.path.join(self.exp_directory, str(agent)) + "-" + datum_name + ".csv", "a+") out_file.write(str(datum) + ",") out_file.close()
[docs] def get_agent_avg_cumulative_rew(self, agent): result_file = open(os.path.join(self.exp_directory, str(agent)) + ".csv", "r") total = 0 num_lines = 0 for line in result_file.readlines(): total += sum([float(datum) for datum in line.strip().split(",")[:-1]]) num_lines += 1 result_file.close() return total / num_lines
[docs] def add_experience(self, agent, state, action, reward, next_state, time_taken=0): ''' Args: agent (agent OR dict): if self.is_markov_game, contains a dict of agents Summary: Record any relevant information about this experience. ''' # Markov Game. if self.is_markov_game: for a in agent: self.rewards[a] += [reward[a]] return # Regular MDP. if self.steps_since_added_r % self.count_r_per_n_timestep == 0: if self.is_markov_game and self.count_r_per_n_timestep > 1: raise ValueError("(simple_rl) Experiment Error: can't track markov games per step. (set rew_step_count to 1).") else: self.rewards[agent] += [self.rew_since_count + reward] self.times[agent] += [time_taken] self.rew_since_count = 0 self.steps_since_added_r = 1 else: self.rew_since_count += reward self.steps_since_added_r += 1
[docs] def end_of_episode(self, agent, num_times_to_write=1): ''' Args: agent (str) Summary: Writes reward data from this episode to file and resets the reward. ''' if self.is_episodic: for x in range(num_times_to_write): self.write_datum_to_file(agent, sum(self.rewards[agent])) self.write_datum_to_file(agent, sum(self.times[agent]), extra_dir="times/") else: for x in range(num_times_to_write): for step_reward in self.rewards[agent]: self.write_datum_to_file(agent, step_reward) self.rewards[agent] = []
[docs] def end_of_instance(self, agent): ''' Summary: Adds a new line to indicate we're onto a new instance. ''' # out_file = open(os.path.join(self.exp_directory, str(agent)) + ".csv", "a+") out_file.write("\n") out_file.close() if os.path.isdir(os.path.join(self.exp_directory, "times", "")): out_file = open(os.path.join(self.exp_directory, "times", str(agent)) + ".csv", "a+") out_file.write("\n") out_file.close()
[docs] def write_datum_to_file(self, agent, datum, extra_dir=""): ''' Summary: Writes datum to file. ''' if extra_dir != "" and not os.path.isdir(self.exp_directory + "/" + extra_dir): os.makedirs(os.path.join(self.exp_directory, extra_dir)) out_file = open(os.path.join(self.exp_directory, extra_dir, str(agent)) + ".csv", "a+") out_file.write(str(datum) + ",") out_file.close()
[docs] def write_exp_info_to_file(self): ''' Summary: Writes relevant experiment information to a file for reproducibility. ''' out_file = open(os.path.join(self.exp_directory, Experiment.EXP_PARAM_FILE_NAME), "w+") to_write_to_file = self._get_exp_file_string() out_file.write(to_write_to_file) out_file.close()
def _get_exp_file_string(self): ''' Returns: (str): contains the AGENT-names, the MDP-names, and PARAMETER-information. ''' mdp_text = "(Markov Game MDP)" if self.is_markov_game else "(MDP)" mdp_string = mdp_text + "\n\t" + self.name + "\n" agent_string = "(Agents)\n" for i, agent in enumerate(self.agents): agent_string += "\t" + str(agent) + "," + str(self.agent_colors[i]) + "\n" param_string = "(Params)" + str(self.parameters) + "\n" return mdp_string + agent_string + param_string def __str__(self): return self._get_exp_file_string()