Source code for simple_rl.experiments.ExperimentClass

'''
ExperimentClass.py: Contains the Experiment Class for reproducing RL Experiments.

Purpose:
    - Stores all relevant parameters in experiment directory for easy reproducibility.
    - Auto generates plot using chart_utils.
    - Can document learning activity.
'''

# Python imports.
from __future__ import print_function
import os
from collections import defaultdict

# Other imports.
from simple_rl.utils import chart_utils
from simple_rl.experiments.ExperimentParametersClass import ExperimentParameters

[docs]class Experiment(object):

    FULL_EXP_FILE_NAME = "full_experiment.txt"
    EXP_PARAM_FILE_NAME = "exp_info.txt"

    ''' Experiment Class for RL Experiments '''

    # Dumps the results in a directory called "results" in the current working dir.
    RESULTS_DIR = os.path.join(os.getcwd(), "results", "")

    def __init__(self,
                    agents,
                    mdp,
                    agent_colors=[],
                    params=None,
                    is_episodic=False,
                    is_markov_game=False,
                    is_lifelong=False,
                    track_disc_reward=False,
                    clear_old_results=True,
                    count_r_per_n_timestep=1,
                    cumulative_plot=True,
                    exp_function="run_agents_on_mdp",
                    dir_for_plot="",
                    experiment_name_prefix=""):
        '''
        Args:
            agents (list)
            mdp (MDP)
            agent_colors (list)
            params (dict)
            is_episodic (bool)
            is_markov_game (bool)
            is_lifelong (bool)
            clear_old_results (bool)
            count_r_per_n_timestep (int)
            cumulative_plot (bool)
            exp_function (lambda): tracks with run_experiments.py function was called.
            dir_for_plot (str)
            experiment_name_prefix (str)
        '''
        # Store all relevant bools.
        self.agents = agents
        self.agent_colors = range(len(self.agents)) if agent_colors == [] else agent_colors
        params["track_disc_reward"] = track_disc_reward
        # params["is_lifelong"] = is_lifelong
        # params["agent_colors"] = agent_colors
        self.parameters = ExperimentParameters(params)
        self.mdp = mdp
        self.track_disc_reward = track_disc_reward
        self.count_r_per_n_timestep = count_r_per_n_timestep
        self.steps_since_added_r = 1
        self.rew_since_count = 0
        self.cumulative_plot = cumulative_plot
        self.name = str(self.mdp)
        self.rewards = defaultdict(list)
        self.times = defaultdict(list)
        if dir_for_plot == "":
            self.exp_directory = os.path.join(Experiment.RESULTS_DIR, self.name)
        else:
            self.exp_directory = os.path.join(os.getcwd(), dir_for_plot, self.name)

        self.experiment_name_prefix = experiment_name_prefix
        self.is_episodic = is_episodic
        self.is_markov_game = is_markov_game
        self._setup_files(clear_old_results)

        # Write experiment reproduction file.
        self._make_and_write_agent_and_mdp_params(agents, mdp, self.parameters.params, exp_function)

    def _make_and_write_agent_and_mdp_params(self, agents, mdp, parameters, exp_function):
        '''
        Args:
            agents
            mdp
            parameters

        Summary:
            Writes enough detail about @agents, @mdp, and @parameters to the file results/<exp_name>/params.txt 
            so that the function simple_rl.run_experiments.reproduce_from_exp_file can rerun the experiment.
        '''
        out_file = open(os.path.join(self.exp_directory, Experiment.FULL_EXP_FILE_NAME), "w")

        from simple_rl.mdp import OOMDP
        from simple_rl.pomdp.POMDPClass import POMDP
        from simple_rl.mdp.markov_game.MarkovGameMDPClass import MarkovGameMDP
        if isinstance(mdp, OOMDP) or isinstance(mdp, POMDP) or isinstance(mdp, MarkovGameMDP):
            # We don't do markov games.
            return

        # MDP.
        mdp_class = str(type(mdp))
        mdp_params = mdp.get_parameters()
        out_file.write("MDP:" + mdp_class + "\n")
        for param in mdp_params:
            out_file.write("\t\t" + param + "=" + str(mdp_params[param]) + "=" + str(type(mdp_params[param])) + "\n")

        out_file.write("\n")

        # Get agents and their parameters.
        for i, agent in enumerate(agents):
            agent_params = agent.get_parameters()
            agent_class = str(i) + "-" + str(type(agent))

            out_file.write("AGENT:" + agent_class + "\n")
            for param in agent_params:
                out_file.write("\t\t" + param + "=" + str(agent_params[param]) + "=" + str(type(agent_params[param])) + "\n")
            out_file.write("\n")

        out_file.write("\n")

        # Misc. Params.
        out_file.write("MISC\n")
        for param in parameters:
            out_file.write("\t\t" + param + "=" + str(parameters[param]) + "=" + str(type(parameters[param])) + "\n")

        # Track the function called.
        out_file.write("\n\nFUNC\n\t" + exp_function + "\n")

        # Close.
        out_file.close()

    def _setup_files(self, clear_old_results=True):
        '''
        Summary:
            Creates and removes relevant directories/files.
        '''
        if not os.path.exists(os.path.join(self.exp_directory, "")):
            os.makedirs(self.exp_directory)
        elif clear_old_results:
            for agent in self.agents:
                if os.path.exists(os.path.join(self.exp_directory, str(agent)) + ".csv"):
                    os.remove(os.path.join(self.exp_directory, str(agent)) + ".csv")
        self.write_exp_info_to_file()

[docs]    def make_plots(self, open_plot=True):
        '''
        Summary:
            Makes plots for the current experiment.
        '''
        if self.is_markov_game:
            agent_name_ls = [agent_name for agent_name in self.agents.keys()]
        else:
            agent_name_ls = [a.get_name() for a in self.agents]
            
        if self.experiment_name_prefix != "":
            plot_file_name = self.experiment_name_prefix + str(self.mdp)
        else:
            plot_file_name = ""

        chart_utils.make_plots(self.exp_directory,
                                agent_name_ls,
                                episodic=self.is_episodic,
                                plot_file_name=plot_file_name,
                                cumulative=self.cumulative_plot,
                                track_disc_reward=self.track_disc_reward,
                                open_plot=open_plot)

    def _write_extra_datum_to_file(self, mdp_name, agent, datum, datum_name):
        out_file = open(os.path.join(self.exp_directory, str(agent)) + "-" + datum_name + ".csv", "a+")
        out_file.write(str(datum) + ",")
        out_file.close()

[docs]    def get_agent_avg_cumulative_rew(self, agent):
        result_file = open(os.path.join(self.exp_directory, str(agent)) + ".csv", "r")
        
        total = 0
        num_lines = 0
        for line in result_file.readlines():
            total += sum([float(datum) for datum in line.strip().split(",")[:-1]])
            num_lines += 1

        result_file.close()

        return total / num_lines

[docs]    def add_experience(self, agent, state, action, reward, next_state, time_taken=0):
        '''
        Args:
            agent (agent OR dict): if self.is_markov_game, contains a dict of agents

        Summary:
            Record any relevant information about this experience.
        '''

        # Markov Game.
        if self.is_markov_game:
            for a in agent:
                self.rewards[a] += [reward[a]]
            return

        # Regular MDP.
        if self.steps_since_added_r % self.count_r_per_n_timestep == 0:
            if self.is_markov_game and self.count_r_per_n_timestep > 1:
                raise ValueError("(simple_rl) Experiment Error: can't track markov games per step. (set rew_step_count to 1).")
            else:
                self.rewards[agent] += [self.rew_since_count + reward]
                self.times[agent] += [time_taken]
                self.rew_since_count = 0
            self.steps_since_added_r = 1
        else:
            self.rew_since_count += reward
            self.steps_since_added_r += 1

[docs]    def end_of_episode(self, agent, num_times_to_write=1):
        '''
        Args:
            agent (str)

        Summary:
            Writes reward data from this episode to file and resets the reward.
        '''
        if self.is_episodic:
            for x in range(num_times_to_write):
                self.write_datum_to_file(agent, sum(self.rewards[agent]))
                self.write_datum_to_file(agent, sum(self.times[agent]), extra_dir="times/")
        else:
            for x in range(num_times_to_write):
                for step_reward in self.rewards[agent]:
                    self.write_datum_to_file(agent, step_reward)
        self.rewards[agent] = []

[docs]    def end_of_instance(self, agent):
        '''
        Summary:
            Adds a new line to indicate we're onto a new instance.
        '''
        #
        out_file = open(os.path.join(self.exp_directory, str(agent)) + ".csv", "a+")
        out_file.write("\n")
        out_file.close()

        if os.path.isdir(os.path.join(self.exp_directory, "times", "")):
            out_file = open(os.path.join(self.exp_directory, "times", str(agent)) + ".csv", "a+")
            out_file.write("\n")
            out_file.close()

[docs]    def write_datum_to_file(self, agent, datum, extra_dir=""):
        '''
        Summary:
            Writes datum to file.
        '''
        if extra_dir != "" and not os.path.isdir(self.exp_directory + "/" + extra_dir):
            os.makedirs(os.path.join(self.exp_directory, extra_dir))
        out_file = open(os.path.join(self.exp_directory, extra_dir, str(agent)) + ".csv", "a+")
        out_file.write(str(datum) + ",")
        out_file.close()

[docs]    def write_exp_info_to_file(self):
        '''
        Summary:
            Writes relevant experiment information to a file for reproducibility.
        '''
        out_file = open(os.path.join(self.exp_directory, Experiment.EXP_PARAM_FILE_NAME), "w+")
        to_write_to_file = self._get_exp_file_string()
        out_file.write(to_write_to_file)
        out_file.close()

    def _get_exp_file_string(self):
        '''
        Returns:
            (str): contains the AGENT-names, the MDP-names, and PARAMETER-information.
        '''
        mdp_text = "(Markov Game MDP)" if self.is_markov_game else "(MDP)"
        mdp_string = mdp_text + "\n\t" + self.name + "\n"
        agent_string = "(Agents)\n"
        for i, agent in enumerate(self.agents):
            agent_string += "\t" + str(agent) + "," + str(self.agent_colors[i]) + "\n"
        param_string = "(Params)" + str(self.parameters) + "\n"

        return  mdp_string + agent_string + param_string

    def __str__(self):
        return self._get_exp_file_string()