"""Plotting helper."""
from __future__ import annotations
import numpy as np
import pandas as pd
import seaborn as sns
sns.set_style("darkgrid")
[docs]
def space_sep_upper(column_name: str) -> str:
"""Separates strings at underscores into headings.
Used to generate labels from logging names.
Parameters
----------
column_name : str
Name to generate label for
Returns:
-------
str
"""
if column_name is None:
return None
return column_name.title().replace("_", " ")
[docs]
def generate_global_step(
data: pd.DataFrame,
x_column: str = "global_step",
x_label_columns: str = ["episode", "step"],
) -> tuple[pd.DataFrame, str, list[str]]:
"""Add a global_step column which enumerate all step over all episodes.
Returns the altered data, a data frame containing mapping between global_step,
x_column and x_label_columns.
Often used in combination with add_multi_level_ticks.
Parameters
----------
data: pd.DataFrame
data source
x_column: str
the name of the global_step (default 'global_step')
x_label_columns: [str, ...]
the name and hierarchical order of the columns (default ['episode', 'step']
Returns:
-------
(data, plot_index, x_column, x_label_columns)
"""
plot_index = (
data.groupby(x_label_columns)
.count()
.reset_index()[x_label_columns]
.sort_values(x_label_columns)
)
plot_index[x_column] = np.arange(len(plot_index))
plot_index.set_index(x_column)
data = data.merge(plot_index, on=x_label_columns)
return data, plot_index, x_column, x_label_columns
[docs]
def add_multi_level_ticks(
grid: sns.FacetGrid, plot_index: pd.DataFrame, x_column: str, x_label_columns: str
) -> None:
"""Expects a FacedGrid with global_step (x_column) as x-axis
and replaces the tick labels to match format episode:step.
E.g. Run with 3 episodes, each of 10 steps. This results in 30 global steps.
The resulting tick labels could be ['0', '4', '9', '14', '19', '24', '29'].
After applying this method they will look like
['0:0', '0:4', '1:0', '1:4', '2:0', '2:4', '3:0', '3:4']
Parameters
----------
grid: sns.FacesGrid
The grid to plot onto
plot_index: pd.DataFrame
The mapping between current tick labels (global step values) and new tick labels
joined by ':'. Usually the result from generate_global_step
x_column: str
column label to use for looking up tick values
x_label_columns: [str, ...]
columns labels of columns to use for new labels (joined by ':'
"""
for ax in grid.axes.flat:
ticks = ax.get_xticks()
sub_set = plot_index[plot_index[x_column].isin(ticks)]
new_labels = (
(
sub_set.loc[tick][x_label_columns].tolist()
if tick in sub_set.index
else (None, None)
)
for tick in ticks
)
new_labels = [
f"{epoch}:{step}" if epoch is not None else "" for epoch, step in new_labels
]
ax.set_xticklabels(new_labels, minor=False)
[docs]
def plot(
plot_function,
settings: dict,
title: str | None = None,
x_label: str | None = None,
y_label: str | None = None,
**kwargs,
) -> sns.FacetGrid:
"""Helper function that creates a FacetGrid.
1. Updates settings with kwargs (overwrites values)
2. Plots using plot_function(**settings)
3. Set x and y labels of not provided the columns names will converted
to pretty strings using space_sep_upper
4. Sets title (some times has to be readjusted afterwards especially in
case of large plots e.g. multiple rows/cols)
Parameters
----------
plot_function:
function to generate the FacedGrid. E.g. sns.catplot or sns.catplot
settings: dict
a dicts containing all needed default settings.
title: str
Title of the plot (optional)
x_label: str
Label of the x-axis (optional)
y_label: str
Label of the y-axis (optional)
kwargs:
Keyword arguments to overwrite default settings.
Returns:
-------
sns.FacedGrid
"""
settings.update(kwargs.items()) # 1.
grid = plot_function(**settings) # 2.
# 3.
x_label = space_sep_upper(grid._x_var) if x_label is None else x_label
y_label = space_sep_upper(grid._y_var) if y_label is None else y_label
grid.set_xlabels(x_label)
grid.set_ylabels(y_label)
# 4.
grid.tight_layout()
if title is not None:
grid.fig.suptitle(title, y=0.97) # rule of thumb. Has to be improved in future
grid.fig.subplots_adjust(top=0.9)
return grid
[docs]
def plot_step_time(
data,
show_global_step=False,
interval=1,
title=None,
x_label=None,
y_label=None,
**args,
) -> sns.FacetGrid:
"""Create a line plot showing the measured time per step.
Per default the mean performance and and one stddev over all instances
and seeds is shown if you want to change this specify a property to map
those attributes to e.g hue='seed' or/and col='instance'.
For more details see: https://seaborn.pydata.org/generated/seaborn.relplot.html
For examples refer to examples/plotting/time_plotting.py
Parameters
----------
data: pd.DataFrame
Dataframe resulting from logging and loading using
log2dataframe(logs, wide=True)
show_global_step: bool
If to show the global_step (step enumerated over all episodes)
or Episode:Step. (False default)
interval: int
Interval in number of steps to average over. (default = 1)
title: str
Title of the plot (optional)
x_label: str
Label of the x-axis (optional)
y_label: str
Label of the y-axis (optional)
kwargs:
Keyword arguments to overwrite default settings.
Returns:
-------
sns.FacedGrid
"""
multi_level_x_label = "Epoch:Step"
data, plot_index, x_column, x_label_columns = generate_global_step(data)
if interval > 1:
data["groups"] = data[x_column] // interval
data = data.groupby("groups").agg({x_column: "min", "step_duration": "mean"})
y_label = (
f"Mean per duration per {interval} steps" if y_label is None else y_label
)
settings = {
"data": data,
"x": x_column,
"y": "step_duration",
"kind": "line",
}
if x_label is None and not show_global_step:
x_label = multi_level_x_label
grid = plot(sns.relplot, settings, title, x_label, y_label, **args)
if not show_global_step:
add_multi_level_ticks(grid, plot_index, x_column, x_label_columns)
return grid
[docs]
def plot_episode_time(
data, title=None, x_label=None, y_label=None, **kargs
) -> sns.FacetGrid:
"""Create a line plot showing the measured time per episode.
Per default the mean performance and and one stddev over all instances
and seeds is shown if you want to change this specify a property to map
those attributes to e.g hue='seed' or/and col='instance'.
For more details see: https://seaborn.pydata.org/generated/seaborn.relplot.html
For examples refer to examples/plotting/time_plotting.py
Parameters
----------
data: pd.DataFrame
Dataframe resulting from logging and loading using
log2dataframe(logs, wide=True)
title: str
Title of the plot (optional)
x_label: str
Label of the x-axis (optional)
y_label: str
Label of the y-axis (optional)
kwargs:
Keyword arguments to overwrite default settings.
Returns:
-------
sns.FacedGrid
"""
settings = {
"data": data,
"x": "episode",
"y": "episode_duration",
"kind": "line",
}
return plot(sns.relplot, settings, title, x_label, y_label, **kargs)
[docs]
def plot_action(
data,
show_global_step=False,
interval=1,
title=None,
x_label=None,
y_label=None,
**kargs,
):
"""Create a line plot showing actions over time.
Please be aware that spaces can be quite large and the plots can become quite messy
(and take some time) if you try plot all dimensions at once.
It is therefore recommended to select a subset of columns before running the
plot method. Especially for dict spaces.
Per default the mean performance and and one stddev over all instances
and seeds is shown if you want to change this specify a property to map those
attributes to e.g hue='seed' or/and col='instance'.
For more details see: https://seaborn.pydata.org/generated/seaborn.relplot.html
For examples refer to examples/plotting/action_plotting.py
Parameters
----------
data: pd.DataFrame
Dataframe resulting from logging and loading using
log2dataframe(logs, wide=True)
show_global_step: bool
If to show the global_step (step enumerated over all episodes)
or Episode:Step. (False default)
interval: int
Interval in number of steps to average over. (default = 1)
title: str
Title of the plot (optional)
x_label: str
Label of the x-axis (optional)
y_label: str
Label of the y-axis (optional)
kwargs:
Keyword arguments to overwrite default settings.
Returns:
-------
sns.FacedGrid
"""
return plot_space(
data, "action", show_global_step, interval, title, x_label, y_label, **kargs
)
[docs]
def plot_state(
data,
show_global_step=False,
interval=1,
title=None,
x_label=None,
y_label=None,
**kargs,
):
"""Create a line plot showing state over time.
-----
Create a line plot showing space over time.
Please be aware that spaces can be quite large and the plots can become quite messy
(and take some time) if you try plot all dimensions at once.
It is therefore recommended to select a subset of columns before running the
plot method. Especially for dict spaces.
Per default the mean performance and and one stddev over all instances
and seeds is shown if you want to change this specify a property to map those
attributes to e.g hue='seed' or/and col='instance'.
For more details see: https://seaborn.pydata.org/generated/seaborn.relplot.html
For examples refer to examples/plotting/state_plotting.py
Parameters
----------
data: pd.DataFrame
Dataframe resulting from logging and loading using
log2dataframe(logs, wide=True)
show_global_step: bool
If to show the global_step (step enumerated over all episodes)
or Episode:Step. (False default)
interval: int
Interval in number of steps to average over. (default = 1)
title: str
Title of the plot (optional)
x_label: str
Label of the x-axis (optional)
y_label: str
Label of the y-axis (optional)
kwargs:
Keyword arguments to overwrite default settings.
Returns:
-------
sns.FacedGrid
"""
return plot_space(
data, "state", show_global_step, interval, title, x_label, y_label, **kargs
)
[docs]
def plot_space(
data,
space_column_name,
show_global_step,
interval=1,
title=None,
x_label=None,
y_label=None,
**args,
) -> sns.FacetGrid:
"""Create a line plot showing space over time.
Please be aware that spaces can be quite large and the plots can become quite messy
(and take some time) if you try plot all dimensions at once.
It is therefore recommended to select a subset of columns before running the
plot method. Especially for dict spaces.
Per default the mean performance and and one stddev over all instances
and seeds is shown if you want to change this specify a property to map those
attributes to e.g hue='seed' or/and col='instance'.
For more details see: https://seaborn.pydata.org/generated/seaborn.relplot.html
For examples refer to
examples/plotting/state_plotting.py or
examples/plotting/action_plotting.py
Parameters
----------
data: pd.DataFrame
Dataframe resulting from logging and loading
using log2dataframe(logs, wide=True)
space_column_name : str
Name of the column in the space which to plot
show_global_step: bool
If to show the global_step (step enumerated over all episodes)
or Episode:Step. (False default)
interval: int
Interval in number of steps to average over. (default = 1)
title: str
Title of the plot (optional)
x_label: str
Label of the x-axis (optional)
y_label: str
Label of the y-axis (optional)
kwargs:
Keyword arguments to overwrite default settings.
Returns:
-------
sns.FacedGrid
"""
# first find columns with prefix space_column_name
space_entries = list(
filter(lambda col: col.startswith(space_column_name), data.columns)
)
number_of_space_entries = len(space_entries)
y_label_name = space_column_name
if number_of_space_entries > 1:
# if we have more than one space dims we reshape the dataframe
# in order to be able to control the plots behavior per dimension
data = pd.wide_to_long(
data,
stubnames=[space_column_name],
sep="_",
i=["episode", "step", "instance"]
+ (["seed"] if "seed" in data.columns else []),
j="i",
suffix=".*",
).reset_index()
elif number_of_space_entries == 1 and space_column_name not in data.columns:
# Of there is only one dimension but the name is odd
space_column_name, *_ = space_entries
data, plot_index, x_column, x_label_columns = generate_global_step(data)
# perform averaging over intervals
if interval > 1:
data["interval"] = data[x_column] // interval
group_columns = list(
data.columns.drop([*x_label_columns, x_column, space_column_name])
)
data = data.groupby(group_columns).agg(
{x_column: "min", space_column_name: "mean"}
)
y_label = (
f"Mean {y_label_name} per {interval} steps" if y_label is None else y_label
)
data = data.reset_index()
settings = {
"data": data,
"x": x_column,
"y": space_column_name,
"kind": "line",
}
# we want the different dims in different plots / columns
# todo: refactor
if number_of_space_entries > 1:
settings["col"] = "i"
if number_of_space_entries > 3:
settings["col_wrap"] = 3
if "instance" in data.columns:
settings["hue"] = "instance"
if x_label is None:
x_label = None if show_global_step else "Epoch:Step"
if y_label is None:
y_label = y_label_name
grid = plot(sns.relplot, settings, title, x_label, y_label, **args)
if not show_global_step:
add_multi_level_ticks(grid, plot_index, x_column, x_label_columns)
return grid