History#

API links

Basic Usage#

The History class is used to store Reports from Trials.

In it's most simple usage, you can simply add() a Report as you recieve them and then use the df() method to get a pandas.DataFrame of the history.

Reference History

from amltk.optimization import Trial, History, Metric

loss = Metric("loss", minimize=True)

def quadratic(x):
    return x**2

history = History()
trials = [
    Trial.create(name=f"trial_{count}", config={"x": i}, metrics=[loss])
    for count, i in enumerate(range(-5, 5))
]

reports = []
for trial in trials:
    x = trial.config["x"]
    report = trial.success(loss=quadratic(x))
    history.add(report)

print(history.df())

          status  trial_seed  ... metric:loss (minimize) config:x
name                          ...                                
trial_0  success        <NA>  ...                     25       -5
trial_1  success        <NA>  ...                     16       -4
trial_2  success        <NA>  ...                      9       -3
trial_3  success        <NA>  ...                      4       -2
trial_4  success        <NA>  ...                      1       -1
trial_5  success        <NA>  ...                      0        0
trial_6  success        <NA>  ...                      1        1
trial_7  success        <NA>  ...                      4        2
trial_8  success        <NA>  ...                      9        3
trial_9  success        <NA>  ...                     16        4

[10 rows x 9 columns]

Typically, to use this inside of an optimization run, you would add the reports inside of a callback from your Tasks. Please see the optimization guide for more details.

With an Optimizer and Scheduler

from amltk.optimization import Trial, History, Metric
from amltk.scheduling import Scheduler
from amltk.pipeline import Searchable

searchable = Searchable("quad", space={"x": (-5, 5)})
n_workers = 2

def quadratic(x):
    return x**2

def target_function(trial: Trial) -> Trial.Report:
    x = trial.config["x"]
    cost = quadratic(x)
    return trial.success(cost=cost)

optimizer = SMACOptimizer(space=searchable, metrics=Metric("cost", minimize=True), seed=42)

scheduler = Scheduler.with_processes(2)
task = scheduler.task(quadratic)

@scheduler.on_start(repeat=n_workers)
def launch_trial():
    trial = optimizer.ask()
    task(trial)

@task.on_result
def add_to_history(report):
    history.add(report)

@task.on_done
def launch_another(_):
    trial = optimizer.ask()
    task(trial)

scheduler.run(timeout=3)

Querying#

The History can be queried by either an index or by the trial name.

History Querying [str]

last_report = history[-1]
print(last_report)
print(history[last_report.name])

Trial.Report(trial=Trial(name='trial_9', config={'x': 4}, bucket=PathBucket(PosixPath('trial-trial_9-2024-08-13T07:34:54.644221')), metrics=MetricCollection(metrics={'loss': Metric(name='loss', minimize=True, bounds=None, fn=None)}), created_at=datetime.datetime(2024, 8, 13, 7, 34, 54, 644220), seed=None, fidelities={}, summary={}, storage=set(), extras={}), status=<Status.SUCCESS: 'success'>, reported_at=datetime.datetime(2024, 8, 13, 7, 34, 54, 644401), exception=None, values={'loss': 16})
Trial.Report(trial=Trial(name='trial_9', config={'x': 4}, bucket=PathBucket(PosixPath('trial-trial_9-2024-08-13T07:34:54.644221')), metrics=MetricCollection(metrics={'loss': Metric(name='loss', minimize=True, bounds=None, fn=None)}), created_at=datetime.datetime(2024, 8, 13, 7, 34, 54, 644220), seed=None, fidelities={}, summary={}, storage=set(), extras={}), status=<Status.SUCCESS: 'success'>, reported_at=datetime.datetime(2024, 8, 13, 7, 34, 54, 644401), exception=None, values={'loss': 16})

for report in history:
    print(report.name, f"loss = {report.values['loss']}")

trial_0 loss = 25
trial_1 loss = 16
trial_2 loss = 9
trial_3 loss = 4
trial_4 loss = 1
trial_5 loss = 0
trial_6 loss = 1
trial_7 loss = 4
trial_8 loss = 9
trial_9 loss = 16

sorted_history = history.sortby("loss")
print(sorted_history[0])

Trial.Report(trial=Trial(name='trial_5', config={'x': 0}, bucket=PathBucket(PosixPath('trial-trial_5-2024-08-13T07:34:54.644019')), metrics=MetricCollection(metrics={'loss': Metric(name='loss', minimize=True, bounds=None, fn=None)}), created_at=datetime.datetime(2024, 8, 13, 7, 34, 54, 644018), seed=None, fidelities={}, summary={}, storage=set(), extras={}), status=<Status.SUCCESS: 'success'>, reported_at=datetime.datetime(2024, 8, 13, 7, 34, 54, 644370), exception=None, values={'loss': 0})

Filtering#

You can filter the history by using the filter() method. This method takes a Callable[[Trial.Report], bool] and returns a new History with only the Reports that return True from the given function.

Filtering

def is_even(report):
    return report.config["x"] % 2 == 0

even_history = history.filter(is_even)
even_history_df = even_history.df(profiles=False)
print(even_history_df)

          status  trial_seed  ... metric:loss (minimize) config:x
name                          ...                                
trial_1  success        <NA>  ...                     16       -4
trial_3  success        <NA>  ...                      4       -2
trial_5  success        <NA>  ...                      0        0
trial_7  success        <NA>  ...                      4        2
trial_9  success        <NA>  ...                     16        4

[5 rows x 9 columns]