Skip to content

History

The History is used to keep a structured record of what occured with Trials and their associated Reports.

Usage

from amltk.optimization import Trial, History, Metric
from amltk.store import PathBucket

loss = Metric("loss", minimize=True)

def target_function(trial: Trial) -> Trial.Report:
    x = trial.config["x"]
    y = trial.config["y"]
    trial.store({"config.json": trial.config})

    with trial.begin():
        loss = x**2 - y

    if trial.exception:
        return trial.fail()

    return trial.success(loss=loss)

# ... usually obtained from an optimizer
bucket = PathBucket("all-trial-results")
history = History()

for x, y in zip([1, 2, 3], [4, 5, 6]):
    trial = Trial(name="some-unique-name", config={"x": x, "y": y}, bucket=bucket, metrics=[loss])
    report = target_function(trial)
    history.add(report)

print(history.df())
bucket.rmdir()  # markdon-exec: hide

status trial_seed ... time:kind time:unit name ... some-unique-name success ... wall seconds some-unique-name success ... wall seconds some-unique-name success ... wall seconds [3 rows x 20 columns]

You'll often need to perform some operations on a History so we provide some utility functions here:

  • filter(key=...) - Filters the history by some predicate, e.g. history.filter(lambda report: report.status == "success")
  • groupby(key=...) - Groups the history by some key, e.g. history.groupby(lambda report: report.config["x"] < 5)
  • sortby(key=...) - Sorts the history by some key, e.g. history.sortby(lambda report: report.time.end)

There is also some serialization capabilities built in, to allow you to store your reports and load them back in later:

  • df(...) - Output a pd.DataFrame of all the information available.
  • from_df(...) - Create a History from a pd.DataFrame.

You can also retrieve individual reports from the history by using their name, e.g. history["some-unique-name"] or iterate through the history with for report in history: ....

class History
dataclass
#

Bases: RichRenderable

A history of trials.

This is a collections of reports from trials, where you can access the reports by their trial name. It is unsorted in general, but by using sortby() you can sort the history.

History
from amltk.optimization import Trial, History, Metric

metric = Metric("cost", minimize=True)
trials = [
    Trial(name=f"trial_{i}", config={"x": i}, metrics=[metric])
    for i in range(10)
]
history = History()

for trial in trials:
    with trial.begin():
        x = trial.config["x"]
        report = trial.success(cost=x**2 - x*2 + 4)
        history.add(report)

for report in history:
    print(f"{report.name=}, {report}")

print(history.metrics)
print(history.df())

print(history.best())
report.name='trial_0', Trial.Report(trial=Trial(name='trial_0', config={'x': 0}, bucket=PathBucket(PosixPath('unknown-trial-bucket')), metrics=[Metric(name='cost', minimize=True, bounds=None)], seed=None, fidelities=None, summary={}, exception=None, storage=set(), extras={}), status=<Status.SUCCESS: 'success'>, metrics={'cost': 4.0}, metric_values=(Metric.Value(metric=Metric(name='cost', minimize=True, bounds=None), value=4.0),), metric_defs={'cost': Metric(name='cost', minimize=True, bounds=None)}, metric_names=('cost',))
report.name='trial_1', Trial.Report(trial=Trial(name='trial_1', config={'x': 1}, bucket=PathBucket(PosixPath('unknown-trial-bucket')), metrics=[Metric(name='cost', minimize=True, bounds=None)], seed=None, fidelities=None, summary={}, exception=None, storage=set(), extras={}), status=<Status.SUCCESS: 'success'>, metrics={'cost': 3.0}, metric_values=(Metric.Value(metric=Metric(name='cost', minimize=True, bounds=None), value=3.0),), metric_defs={'cost': Metric(name='cost', minimize=True, bounds=None)}, metric_names=('cost',))
report.name='trial_2', Trial.Report(trial=Trial(name='trial_2', config={'x': 2}, bucket=PathBucket(PosixPath('unknown-trial-bucket')), metrics=[Metric(name='cost', minimize=True, bounds=None)], seed=None, fidelities=None, summary={}, exception=None, storage=set(), extras={}), status=<Status.SUCCESS: 'success'>, metrics={'cost': 4.0}, metric_values=(Metric.Value(metric=Metric(name='cost', minimize=True, bounds=None), value=4.0),), metric_defs={'cost': Metric(name='cost', minimize=True, bounds=None)}, metric_names=('cost',))
report.name='trial_3', Trial.Report(trial=Trial(name='trial_3', config={'x': 3}, bucket=PathBucket(PosixPath('unknown-trial-bucket')), metrics=[Metric(name='cost', minimize=True, bounds=None)], seed=None, fidelities=None, summary={}, exception=None, storage=set(), extras={}), status=<Status.SUCCESS: 'success'>, metrics={'cost': 7.0}, metric_values=(Metric.Value(metric=Metric(name='cost', minimize=True, bounds=None), value=7.0),), metric_defs={'cost': Metric(name='cost', minimize=True, bounds=None)}, metric_names=('cost',))
report.name='trial_4', Trial.Report(trial=Trial(name='trial_4', config={'x': 4}, bucket=PathBucket(PosixPath('unknown-trial-bucket')), metrics=[Metric(name='cost', minimize=True, bounds=None)], seed=None, fidelities=None, summary={}, exception=None, storage=set(), extras={}), status=<Status.SUCCESS: 'success'>, metrics={'cost': 12.0}, metric_values=(Metric.Value(metric=Metric(name='cost', minimize=True, bounds=None), value=12.0),), metric_defs={'cost': Metric(name='cost', minimize=True, bounds=None)}, metric_names=('cost',))
report.name='trial_5', Trial.Report(trial=Trial(name='trial_5', config={'x': 5}, bucket=PathBucket(PosixPath('unknown-trial-bucket')), metrics=[Metric(name='cost', minimize=True, bounds=None)], seed=None, fidelities=None, summary={}, exception=None, storage=set(), extras={}), status=<Status.SUCCESS: 'success'>, metrics={'cost': 19.0}, metric_values=(Metric.Value(metric=Metric(name='cost', minimize=True, bounds=None), value=19.0),), metric_defs={'cost': Metric(name='cost', minimize=True, bounds=None)}, metric_names=('cost',))
report.name='trial_6', Trial.Report(trial=Trial(name='trial_6', config={'x': 6}, bucket=PathBucket(PosixPath('unknown-trial-bucket')), metrics=[Metric(name='cost', minimize=True, bounds=None)], seed=None, fidelities=None, summary={}, exception=None, storage=set(), extras={}), status=<Status.SUCCESS: 'success'>, metrics={'cost': 28.0}, metric_values=(Metric.Value(metric=Metric(name='cost', minimize=True, bounds=None), value=28.0),), metric_defs={'cost': Metric(name='cost', minimize=True, bounds=None)}, metric_names=('cost',))
report.name='trial_7', Trial.Report(trial=Trial(name='trial_7', config={'x': 7}, bucket=PathBucket(PosixPath('unknown-trial-bucket')), metrics=[Metric(name='cost', minimize=True, bounds=None)], seed=None, fidelities=None, summary={}, exception=None, storage=set(), extras={}), status=<Status.SUCCESS: 'success'>, metrics={'cost': 39.0}, metric_values=(Metric.Value(metric=Metric(name='cost', minimize=True, bounds=None), value=39.0),), metric_defs={'cost': Metric(name='cost', minimize=True, bounds=None)}, metric_names=('cost',))
report.name='trial_8', Trial.Report(trial=Trial(name='trial_8', config={'x': 8}, bucket=PathBucket(PosixPath('unknown-trial-bucket')), metrics=[Metric(name='cost', minimize=True, bounds=None)], seed=None, fidelities=None, summary={}, exception=None, storage=set(), extras={}), status=<Status.SUCCESS: 'success'>, metrics={'cost': 52.0}, metric_values=(Metric.Value(metric=Metric(name='cost', minimize=True, bounds=None), value=52.0),), metric_defs={'cost': Metric(name='cost', minimize=True, bounds=None)}, metric_names=('cost',))
report.name='trial_9', Trial.Report(trial=Trial(name='trial_9', config={'x': 9}, bucket=PathBucket(PosixPath('unknown-trial-bucket')), metrics=[Metric(name='cost', minimize=True, bounds=None)], seed=None, fidelities=None, summary={}, exception=None, storage=set(), extras={}), status=<Status.SUCCESS: 'success'>, metrics={'cost': 67.0}, metric_values=(Metric.Value(metric=Metric(name='cost', minimize=True, bounds=None), value=67.0),), metric_defs={'cost': Metric(name='cost', minimize=True, bounds=None)}, metric_names=('cost',))
{'cost': Metric(name='cost', minimize=True, bounds=None)}
          status  trial_seed exception  ... time:duration time:kind  time:unit
name                                    ...                                   
trial_0  success        <NA>        NA  ...      0.000037      wall    seconds
trial_1  success        <NA>        NA  ...      0.000024      wall    seconds
trial_2  success        <NA>        NA  ...      0.000021      wall    seconds
trial_3  success        <NA>        NA  ...       0.00003      wall    seconds
trial_4  success        <NA>        NA  ...      0.000021      wall    seconds
trial_5  success        <NA>        NA  ...      0.000021      wall    seconds
trial_6  success        <NA>        NA  ...      0.000021      wall    seconds
trial_7  success        <NA>        NA  ...      0.000021      wall    seconds
trial_8  success        <NA>        NA  ...      0.000021      wall    seconds
trial_9  success        <NA>        NA  ...       0.00002      wall    seconds

[10 rows x 19 columns]
Trial.Report(trial=Trial(name='trial_1', config={'x': 1}, bucket=PathBucket(PosixPath('unknown-trial-bucket')), metrics=[Metric(name='cost', minimize=True, bounds=None)], seed=None, fidelities=None, summary={}, exception=None, storage=set(), extras={}), status=<Status.SUCCESS: 'success'>, metrics={'cost': 3.0}, metric_values=(Metric.Value(metric=Metric(name='cost', minimize=True, bounds=None), value=3.0),), metric_defs={'cost': Metric(name='cost', minimize=True, bounds=None)}, metric_names=('cost',))
ATTRIBUTE DESCRIPTION
reports

A mapping of trial names to reports.

TYPE: list[Report]

def from_reports(reports)
classmethod
#

Creates a history from reports.

PARAMETER DESCRIPTION
reports

An iterable of reports.

TYPE: Iterable[Report]

RETURNS DESCRIPTION
History

A history.

Source code in src/amltk/optimization/history.py
@classmethod
def from_reports(cls, reports: Iterable[Trial.Report]) -> History:
    """Creates a history from reports.

    Args:
        reports: An iterable of reports.

    Returns:
        A history.
    """
    history = cls()
    history.add(reports)
    return history

def best(metric=None) #

Returns the best report in the history.

PARAMETER DESCRIPTION
metric

The metric to sort by. If None, it will use the first metric in the history. If there are multiple metrics and non are specified, it will raise an error.

TYPE: str | None DEFAULT: None

RETURNS DESCRIPTION
Report

The best report.

Source code in src/amltk/optimization/history.py
def best(self, metric: str | None = None) -> Trial.Report:
    """Returns the best report in the history.

    Args:
        metric: The metric to sort by. If `None`, it will use the
            first metric in the history. If there are multiple metrics
            and non are specified, it will raise an error.

    Returns:
        The best report.
    """
    if metric is None:
        if len(self.metrics) > 1:
            raise ValueError(
                "There are multiple metrics in the history, "
                "please specify which metric to sort by.",
            )

        _metric_def = next(iter(self.metrics.values()))
        _metric_name = _metric_def.name
    else:
        if metric not in self.metrics:
            raise ValueError(
                f"Metric {metric} not found in history. "
                f"Available metrics: {list(self.metrics.keys())}",
            )
        _metric_def = self.metrics[metric]
        _metric_name = metric

    _by = min if _metric_def.minimize else max
    return _by(self.reports, key=lambda r: r.metrics[_metric_name])

def add(report) #

Adds a report or reports to the history.

PARAMETER DESCRIPTION
report

A report or reports to add.

TYPE: Report | Iterable[Report]

Source code in src/amltk/optimization/history.py
def add(self, report: Trial.Report | Iterable[Trial.Report]) -> None:
    """Adds a report or reports to the history.

    Args:
        report: A report or reports to add.
    """
    match report:
        case Trial.Report():
            for m in report.metric_values:
                if (_m := self.metrics.get(m.name)) is not None:
                    if m.metric != _m:
                        raise ValueError(
                            f"Metric {m.name} has conflicting definitions:"
                            f"\n{m.metric} != {_m}",
                        )
                else:
                    self.metrics[m.name] = m.metric

            self.reports.append(report)
            self._lookup[report.name] = len(self.reports) - 1
        case reports:
            for _report in reports:
                self.add(_report)

def find(name) #

Finds a report by trial name.

PARAMETER DESCRIPTION
name

The name of the trial.

TYPE: str

RETURNS DESCRIPTION
Report

The report.

Source code in src/amltk/optimization/history.py
def find(self, name: str) -> Trial.Report:
    """Finds a report by trial name.

    Args:
        name: The name of the trial.

    Returns:
        The report.
    """
    return self.reports[self._lookup[name]]

def df(*, profiles=True, configs=True, summary=True, metrics=True, normalize_time=True) #

Returns a pandas DataFrame of the history.

Each individual trial will be a row in the dataframe.

Prefixes

  • summary: Entries will be prefixed with "summary:"
  • config: Entries will be prefixed with "config:"
  • metrics: Entries will be prefixed with "metrics:"
df
from amltk.optimization import Trial, History, Metric

metric = Metric("cost", minimize=True)
trials = [Trial(name=f"trial_{i}", config={"x": i}, metrics=[metric]) for i in range(10)]
history = History()

for trial in trials:
    with trial.begin():
        x = trial.config["x"]
        report = trial.success(cost=x**2 - x*2 + 4)
        history.add(report)

print(history.df())
          status  trial_seed exception  ... time:duration time:kind  time:unit
name                                    ...                                   
trial_0  success        <NA>        NA  ...      0.000036      wall    seconds
trial_1  success        <NA>        NA  ...      0.000025      wall    seconds
trial_2  success        <NA>        NA  ...      0.000021      wall    seconds
trial_3  success        <NA>        NA  ...      0.000022      wall    seconds
trial_4  success        <NA>        NA  ...      0.000022      wall    seconds
trial_5  success        <NA>        NA  ...      0.000021      wall    seconds
trial_6  success        <NA>        NA  ...      0.000021      wall    seconds
trial_7  success        <NA>        NA  ...       0.00002      wall    seconds
trial_8  success        <NA>        NA  ...       0.00002      wall    seconds
trial_9  success        <NA>        NA  ...       0.00002      wall    seconds

[10 rows x 19 columns]
PARAMETER DESCRIPTION
profiles

Whether to include the profiles.

TYPE: bool DEFAULT: True

configs

Whether to include the configs.

TYPE: bool DEFAULT: True

summary

Whether to include the summary.

TYPE: bool DEFAULT: True

metrics

Whether to include the metrics.

TYPE: bool DEFAULT: True

normalize_time

Whether to normalize the time to the first report. If given a float, it will normalize to that value.

Will normalize all columns with "time:end". and "time:start" in their name. It will use the time of the earliest report as the offset.

TYPE: bool | float DEFAULT: True

RETURNS DESCRIPTION
DataFrame

A pandas DataFrame of the history.

Source code in src/amltk/optimization/history.py
def df(
    self,
    *,
    profiles: bool = True,
    configs: bool = True,
    summary: bool = True,
    metrics: bool = True,
    normalize_time: bool | float = True,
) -> pd.DataFrame:
    """Returns a pandas DataFrame of the history.

    Each individual trial will be a row in the dataframe.

    !!! note "Prefixes"

        * `summary`: Entries will be prefixed with `#!python "summary:"`
        * `config`: Entries will be prefixed with `#!python "config:"`
        * `metrics`: Entries will be prefixed with `#!python "metrics:"`

    ```python exec="true" source="material-block" result="python" title="df" hl_lines="12"
    from amltk.optimization import Trial, History, Metric

    metric = Metric("cost", minimize=True)
    trials = [Trial(name=f"trial_{i}", config={"x": i}, metrics=[metric]) for i in range(10)]
    history = History()

    for trial in trials:
        with trial.begin():
            x = trial.config["x"]
            report = trial.success(cost=x**2 - x*2 + 4)
            history.add(report)

    print(history.df())
    ```

    Args:
        profiles: Whether to include the profiles.
        configs: Whether to include the configs.
        summary: Whether to include the summary.
        metrics: Whether to include the metrics.
        normalize_time: Whether to normalize the time to the first
            report. If given a `#!python float`, it will normalize
            to that value.

            Will normalize all columns with `#!python "time:end"`.
            and `#!python "time:start"` in their name. It will use
            the time of the earliest report as the offset.

    Returns:
        A pandas DataFrame of the history.
    """  # noqa: E501
    if len(self) == 0:
        return pd.DataFrame()

    _df = pd.concat(
        [
            report.df(
                profiles=profiles,
                configs=configs,
                summary=summary,
                metrics=metrics,
            )
            for report in self.reports
        ],
    )
    _df = _df.convert_dtypes()

    match normalize_time:
        case True if "time:start" in _df.columns:
            time_columns = ("time:start", "time:end")
            cols = [c for c in _df.columns if c.endswith(time_columns)]
            _df[cols] -= _df["time:start"].min()
        case float():
            time_columns = ("time:start", "time:end")
            cols = [c for c in _df.columns if c.endswith(time_columns)]
            _df[cols] -= normalize_time
        case _:
            pass

    return _df

def filter(key) #

Filters the history by a predicate.

filter
from amltk.optimization import Trial, History, Metric

metric = Metric("cost", minimize=True)
trials = [Trial(name=f"trial_{i}", config={"x": i}, metrics=[metric]) for i in range(10)]
history = History()

for trial in trials:
    with trial.begin():
        x = trial.config["x"]
        report = trial.success(cost=x**2 - x*2 + 4)
        history.add(report)

filtered_history = history.filter(lambda report: report.metrics["cost"] < 10)
for report in filtered_history:
    cost = report.metrics["cost"]
    print(f"{report.name}, {cost=}, {report}")
trial_0, cost=4.0, Trial.Report(trial=Trial(name='trial_0', config={'x': 0}, bucket=PathBucket(PosixPath('unknown-trial-bucket')), metrics=[Metric(name='cost', minimize=True, bounds=None)], seed=None, fidelities=None, summary={}, exception=None, storage=set(), extras={}), status=<Status.SUCCESS: 'success'>, metrics={'cost': 4.0}, metric_values=(Metric.Value(metric=Metric(name='cost', minimize=True, bounds=None), value=4.0),), metric_defs={'cost': Metric(name='cost', minimize=True, bounds=None)}, metric_names=('cost',))
trial_1, cost=3.0, Trial.Report(trial=Trial(name='trial_1', config={'x': 1}, bucket=PathBucket(PosixPath('unknown-trial-bucket')), metrics=[Metric(name='cost', minimize=True, bounds=None)], seed=None, fidelities=None, summary={}, exception=None, storage=set(), extras={}), status=<Status.SUCCESS: 'success'>, metrics={'cost': 3.0}, metric_values=(Metric.Value(metric=Metric(name='cost', minimize=True, bounds=None), value=3.0),), metric_defs={'cost': Metric(name='cost', minimize=True, bounds=None)}, metric_names=('cost',))
trial_2, cost=4.0, Trial.Report(trial=Trial(name='trial_2', config={'x': 2}, bucket=PathBucket(PosixPath('unknown-trial-bucket')), metrics=[Metric(name='cost', minimize=True, bounds=None)], seed=None, fidelities=None, summary={}, exception=None, storage=set(), extras={}), status=<Status.SUCCESS: 'success'>, metrics={'cost': 4.0}, metric_values=(Metric.Value(metric=Metric(name='cost', minimize=True, bounds=None), value=4.0),), metric_defs={'cost': Metric(name='cost', minimize=True, bounds=None)}, metric_names=('cost',))
trial_3, cost=7.0, Trial.Report(trial=Trial(name='trial_3', config={'x': 3}, bucket=PathBucket(PosixPath('unknown-trial-bucket')), metrics=[Metric(name='cost', minimize=True, bounds=None)], seed=None, fidelities=None, summary={}, exception=None, storage=set(), extras={}), status=<Status.SUCCESS: 'success'>, metrics={'cost': 7.0}, metric_values=(Metric.Value(metric=Metric(name='cost', minimize=True, bounds=None), value=7.0),), metric_defs={'cost': Metric(name='cost', minimize=True, bounds=None)}, metric_names=('cost',))
PARAMETER DESCRIPTION
key

A predicate to filter by.

TYPE: Callable[[Report], bool]

RETURNS DESCRIPTION
History

A new history with the filtered reports.

Source code in src/amltk/optimization/history.py
def filter(self, key: Callable[[Trial.Report], bool]) -> History:
    """Filters the history by a predicate.

    ```python exec="true" source="material-block" result="python" title="filter" hl_lines="12"
    from amltk.optimization import Trial, History, Metric

    metric = Metric("cost", minimize=True)
    trials = [Trial(name=f"trial_{i}", config={"x": i}, metrics=[metric]) for i in range(10)]
    history = History()

    for trial in trials:
        with trial.begin():
            x = trial.config["x"]
            report = trial.success(cost=x**2 - x*2 + 4)
            history.add(report)

    filtered_history = history.filter(lambda report: report.metrics["cost"] < 10)
    for report in filtered_history:
        cost = report.metrics["cost"]
        print(f"{report.name}, {cost=}, {report}")
    ```

    Args:
        key: A predicate to filter by.

    Returns:
        A new history with the filtered reports.
    """  # noqa: E501
    return History.from_reports([report for report in self.reports if key(report)])

def groupby(key) #

Groups the history by the values of a key.

groupby
from amltk.optimization import Trial, History, Metric

metric = Metric("cost", minimize=True)
trials = [Trial(name=f"trial_{i}", config={"x": i}, metrics=[metric]) for i in range(10)]
history = History()

for trial in trials:
    with trial.begin():
        x = trial.config["x"]
        if x % 2 == 0:
            report = trial.fail(cost=1_000)
        else:
            report = trial.success(cost=x**2 - x*2 + 4)
        history.add(report)

for status, history in history.groupby("status").items():
    print(f"{status=}, {len(history)=}")
status=<Status.FAIL: 'fail'>, len(history)=5
status=<Status.SUCCESS: 'success'>, len(history)=5

You can pass a Callable to group by any key you like:

from amltk.optimization import Trial, History, Metric

metric = Metric("cost", minimize=True)
trials = [Trial(name=f"trial_{i}", config={"x": i}, metrics=[metric]) for i in range(10)]
history = History()

for trial in trials:
    with trial.begin():
        x = trial.config["x"]
        report = trial.fail(cost=x)
        history.add(report)

for below_5, history in history.groupby(lambda r: r.metrics["cost"] < 5).items():
    print(f"{below_5=}, {len(history)=}")
below_5=True, len(history)=5
below_5=False, len(history)=5
PARAMETER DESCRIPTION
key

A key to group by. If "status" is passed, the history will be grouped by the status of the reports.

TYPE: Literal['status'] | Callable[[Report], Hashable]

RETURNS DESCRIPTION
dict[Hashable, History]

A mapping of keys to histories.

Source code in src/amltk/optimization/history.py
def groupby(
    self,
    key: Literal["status"] | Callable[[Trial.Report], Hashable],
) -> dict[Hashable, History]:
    """Groups the history by the values of a key.

    ```python exec="true" source="material-block" result="python" title="groupby" hl_lines="15"
    from amltk.optimization import Trial, History, Metric

    metric = Metric("cost", minimize=True)
    trials = [Trial(name=f"trial_{i}", config={"x": i}, metrics=[metric]) for i in range(10)]
    history = History()

    for trial in trials:
        with trial.begin():
            x = trial.config["x"]
            if x % 2 == 0:
                report = trial.fail(cost=1_000)
            else:
                report = trial.success(cost=x**2 - x*2 + 4)
            history.add(report)

    for status, history in history.groupby("status").items():
        print(f"{status=}, {len(history)=}")
    ```

    You can pass a `#!python Callable` to group by any key you like:

    ```python exec="true" source="material-block" result="python"
    from amltk.optimization import Trial, History, Metric

    metric = Metric("cost", minimize=True)
    trials = [Trial(name=f"trial_{i}", config={"x": i}, metrics=[metric]) for i in range(10)]
    history = History()

    for trial in trials:
        with trial.begin():
            x = trial.config["x"]
            report = trial.fail(cost=x)
            history.add(report)

    for below_5, history in history.groupby(lambda r: r.metrics["cost"] < 5).items():
        print(f"{below_5=}, {len(history)=}")
    ```

    Args:
        key: A key to group by. If `"status"` is passed, the history will be
            grouped by the status of the reports.

    Returns:
        A mapping of keys to histories.
    """  # noqa: E501
    d = defaultdict(list)

    if key == "status":
        key = operator.attrgetter("status")

    for report in self.reports:
        d[key(report)].append(report)

    return {k: History.from_reports(v) for k, v in d.items()}

def incumbents(key, *, sortby=lambda : report.time.end, reverse=None, ffill=False) #

Returns a trace of the incumbents, where only the report that is better than the previous best report is kept.

incumbents
from amltk.optimization import Trial, History, Metric

metric = Metric("cost", minimize=True)
trials = [Trial(name=f"trial_{i}", config={"x": i}, metrics=[metric]) for i in range(10)]
history = History()

for trial in trials:
    with trial.begin():
        x = trial.config["x"]
        report = trial.success(cost=x**2 - x*2 + 4)
        history.add(report)

incumbents = (
    history
    .incumbents("cost", sortby=lambda r: r.time.end)
)
for report in incumbents:
    print(f"{report.metrics=}, {report.config=}")
report.metrics={'cost': 4.0}, report.config={'x': 0}
report.metrics={'cost': 3.0}, report.config={'x': 1}
PARAMETER DESCRIPTION
key

The key to use. If given a str, it will use that as the key to use in the metrics, defining if one report is better than another. If given a Callable, it should return a bool, indicating if the first argument report is better than the second argument report.

TYPE: Callable[[Report, Report], bool] | str

sortby

The key to sort by. If given a str, it will sort by the value of that key in the .metrics and also filter out anything that does not contain this key. By default, it will sort by the end time of the report.

TYPE: Callable[[Report], Comparable] | str DEFAULT: lambda : end

reverse

Whether to sort in some given order. By default (None), if given a metric key, the reports with the best metric values will be sorted first. If given a Callable, the reports with the smallest values will be sorted first. Using reverse=True will always reverse this order, while reverse=False will always preserve it.

TYPE: bool | None DEFAULT: None

ffill

Whether to forward fill the incumbents. This means that if a report is not an incumbent, it will be replaced with the current best. This is useful if you want to visualize the incumbents over some x axis, where the you have a point at every place along the axis.

TYPE: bool DEFAULT: False

RETURNS DESCRIPTION
list[Report]

The history of incumbents.

Source code in src/amltk/optimization/history.py
def incumbents(
    self,
    key: Callable[[Trial.Report, Trial.Report], bool] | str,
    *,
    sortby: Callable[[Trial.Report], Comparable]
    | str = lambda report: report.time.end,
    reverse: bool | None = None,
    ffill: bool = False,
) -> list[Trial.Report]:
    """Returns a trace of the incumbents, where only the report that is better than the previous
    best report is kept.

    ```python exec="true" source="material-block" result="python" title="incumbents"
    from amltk.optimization import Trial, History, Metric

    metric = Metric("cost", minimize=True)
    trials = [Trial(name=f"trial_{i}", config={"x": i}, metrics=[metric]) for i in range(10)]
    history = History()

    for trial in trials:
        with trial.begin():
            x = trial.config["x"]
            report = trial.success(cost=x**2 - x*2 + 4)
            history.add(report)

    incumbents = (
        history
        .incumbents("cost", sortby=lambda r: r.time.end)
    )
    for report in incumbents:
        print(f"{report.metrics=}, {report.config=}")
    ```

    Args:
        key: The key to use. If given a str, it will use that as the
            key to use in the metrics, defining if one report is better
            than another. If given a `#!python Callable`, it should
            return a `bool`, indicating if the first argument report
            is better than the second argument report.
        sortby: The key to sort by. If given a str, it will sort by
            the value of that key in the `.metrics` and also filter
            out anything that does not contain this key.
            By default, it will sort by the end time of the report.
        reverse: Whether to sort in some given order. By
            default (`None`), if given a metric key, the reports with
            the best metric values will be sorted first. If
            given a `#!python Callable`, the reports with the
            smallest values will be sorted first. Using
            `reverse=True` will always reverse this order, while
            `reverse=False` will always preserve it.
        ffill: Whether to forward fill the incumbents. This means that
            if a report is not an incumbent, it will be replaced with
            the current best. This is useful if you want to
            visualize the incumbents over some x axis, where the
            you have a point at every place along the axis.

    Returns:
        The history of incumbents.
    """  # noqa: E501
    match key:
        case str():
            metric = self.metrics[key]
            __op = operator.lt if metric.minimize else operator.gt  # type: ignore
            op = lambda r1, r2: __op(r1.metrics[key], r2.metrics[key])
        case _:
            op = key

    sorted_reports = self.sortby(sortby, reverse=reverse)
    return list(compare_accumulate(sorted_reports, op=op, ffill=ffill))

def sortby(key, *, reverse=None) #

Sorts the history by a key and returns a sorted History.

sortby
from amltk.optimization import Trial, History, Metric

metric = Metric("cost", minimize=True)
trials = [Trial(name=f"trial_{i}", config={"x": i}, metrics=[metric]) for i in range(10)]
history = History()

for trial in trials:
    with trial.begin():
        x = trial.config["x"]
        report = trial.success(cost=x**2 - x*2 + 4)
        history.add(report)

trace = (
    history
    .filter(lambda report: report.status == "success")
    .sortby("cost")
)

for report in trace:
    print(f"{report.metrics}, {report}")
{'cost': 3.0}, Trial.Report(trial=Trial(name='trial_1', config={'x': 1}, bucket=PathBucket(PosixPath('unknown-trial-bucket')), metrics=[Metric(name='cost', minimize=True, bounds=None)], seed=None, fidelities=None, summary={}, exception=None, storage=set(), extras={}), status=<Status.SUCCESS: 'success'>, metrics={'cost': 3.0}, metric_values=(Metric.Value(metric=Metric(name='cost', minimize=True, bounds=None), value=3.0),), metric_defs={'cost': Metric(name='cost', minimize=True, bounds=None)}, metric_names=('cost',))
{'cost': 4.0}, Trial.Report(trial=Trial(name='trial_0', config={'x': 0}, bucket=PathBucket(PosixPath('unknown-trial-bucket')), metrics=[Metric(name='cost', minimize=True, bounds=None)], seed=None, fidelities=None, summary={}, exception=None, storage=set(), extras={}), status=<Status.SUCCESS: 'success'>, metrics={'cost': 4.0}, metric_values=(Metric.Value(metric=Metric(name='cost', minimize=True, bounds=None), value=4.0),), metric_defs={'cost': Metric(name='cost', minimize=True, bounds=None)}, metric_names=('cost',))
{'cost': 4.0}, Trial.Report(trial=Trial(name='trial_2', config={'x': 2}, bucket=PathBucket(PosixPath('unknown-trial-bucket')), metrics=[Metric(name='cost', minimize=True, bounds=None)], seed=None, fidelities=None, summary={}, exception=None, storage=set(), extras={}), status=<Status.SUCCESS: 'success'>, metrics={'cost': 4.0}, metric_values=(Metric.Value(metric=Metric(name='cost', minimize=True, bounds=None), value=4.0),), metric_defs={'cost': Metric(name='cost', minimize=True, bounds=None)}, metric_names=('cost',))
{'cost': 7.0}, Trial.Report(trial=Trial(name='trial_3', config={'x': 3}, bucket=PathBucket(PosixPath('unknown-trial-bucket')), metrics=[Metric(name='cost', minimize=True, bounds=None)], seed=None, fidelities=None, summary={}, exception=None, storage=set(), extras={}), status=<Status.SUCCESS: 'success'>, metrics={'cost': 7.0}, metric_values=(Metric.Value(metric=Metric(name='cost', minimize=True, bounds=None), value=7.0),), metric_defs={'cost': Metric(name='cost', minimize=True, bounds=None)}, metric_names=('cost',))
{'cost': 12.0}, Trial.Report(trial=Trial(name='trial_4', config={'x': 4}, bucket=PathBucket(PosixPath('unknown-trial-bucket')), metrics=[Metric(name='cost', minimize=True, bounds=None)], seed=None, fidelities=None, summary={}, exception=None, storage=set(), extras={}), status=<Status.SUCCESS: 'success'>, metrics={'cost': 12.0}, metric_values=(Metric.Value(metric=Metric(name='cost', minimize=True, bounds=None), value=12.0),), metric_defs={'cost': Metric(name='cost', minimize=True, bounds=None)}, metric_names=('cost',))
{'cost': 19.0}, Trial.Report(trial=Trial(name='trial_5', config={'x': 5}, bucket=PathBucket(PosixPath('unknown-trial-bucket')), metrics=[Metric(name='cost', minimize=True, bounds=None)], seed=None, fidelities=None, summary={}, exception=None, storage=set(), extras={}), status=<Status.SUCCESS: 'success'>, metrics={'cost': 19.0}, metric_values=(Metric.Value(metric=Metric(name='cost', minimize=True, bounds=None), value=19.0),), metric_defs={'cost': Metric(name='cost', minimize=True, bounds=None)}, metric_names=('cost',))
{'cost': 28.0}, Trial.Report(trial=Trial(name='trial_6', config={'x': 6}, bucket=PathBucket(PosixPath('unknown-trial-bucket')), metrics=[Metric(name='cost', minimize=True, bounds=None)], seed=None, fidelities=None, summary={}, exception=None, storage=set(), extras={}), status=<Status.SUCCESS: 'success'>, metrics={'cost': 28.0}, metric_values=(Metric.Value(metric=Metric(name='cost', minimize=True, bounds=None), value=28.0),), metric_defs={'cost': Metric(name='cost', minimize=True, bounds=None)}, metric_names=('cost',))
{'cost': 39.0}, Trial.Report(trial=Trial(name='trial_7', config={'x': 7}, bucket=PathBucket(PosixPath('unknown-trial-bucket')), metrics=[Metric(name='cost', minimize=True, bounds=None)], seed=None, fidelities=None, summary={}, exception=None, storage=set(), extras={}), status=<Status.SUCCESS: 'success'>, metrics={'cost': 39.0}, metric_values=(Metric.Value(metric=Metric(name='cost', minimize=True, bounds=None), value=39.0),), metric_defs={'cost': Metric(name='cost', minimize=True, bounds=None)}, metric_names=('cost',))
{'cost': 52.0}, Trial.Report(trial=Trial(name='trial_8', config={'x': 8}, bucket=PathBucket(PosixPath('unknown-trial-bucket')), metrics=[Metric(name='cost', minimize=True, bounds=None)], seed=None, fidelities=None, summary={}, exception=None, storage=set(), extras={}), status=<Status.SUCCESS: 'success'>, metrics={'cost': 52.0}, metric_values=(Metric.Value(metric=Metric(name='cost', minimize=True, bounds=None), value=52.0),), metric_defs={'cost': Metric(name='cost', minimize=True, bounds=None)}, metric_names=('cost',))
{'cost': 67.0}, Trial.Report(trial=Trial(name='trial_9', config={'x': 9}, bucket=PathBucket(PosixPath('unknown-trial-bucket')), metrics=[Metric(name='cost', minimize=True, bounds=None)], seed=None, fidelities=None, summary={}, exception=None, storage=set(), extras={}), status=<Status.SUCCESS: 'success'>, metrics={'cost': 67.0}, metric_values=(Metric.Value(metric=Metric(name='cost', minimize=True, bounds=None), value=67.0),), metric_defs={'cost': Metric(name='cost', minimize=True, bounds=None)}, metric_names=('cost',))
PARAMETER DESCRIPTION
key

The key to sort by. If given a str, it will sort by the value of that key in the .metrics and also filter out anything that does not contain this key.

TYPE: Callable[[Report], Comparable] | str

reverse

Whether to sort in some given order. By default (None), if given a metric key, the reports with the best metric values will be sorted first. If given a Callable, the reports with the smallest values will be sorted first. Using reverse=True will always reverse this order, while reverse=False will always preserve it.

TYPE: bool | None DEFAULT: None

RETURNS DESCRIPTION
list[Report]

A sorted list of reports

Source code in src/amltk/optimization/history.py
def sortby(
    self,
    key: Callable[[Trial.Report], Comparable] | str,
    *,
    reverse: bool | None = None,
) -> list[Trial.Report]:
    """Sorts the history by a key and returns a sorted History.

    ```python exec="true" source="material-block" result="python" title="sortby" hl_lines="15"
    from amltk.optimization import Trial, History, Metric

    metric = Metric("cost", minimize=True)
    trials = [Trial(name=f"trial_{i}", config={"x": i}, metrics=[metric]) for i in range(10)]
    history = History()

    for trial in trials:
        with trial.begin():
            x = trial.config["x"]
            report = trial.success(cost=x**2 - x*2 + 4)
            history.add(report)

    trace = (
        history
        .filter(lambda report: report.status == "success")
        .sortby("cost")
    )

    for report in trace:
        print(f"{report.metrics}, {report}")
    ```

    Args:
        key: The key to sort by. If given a str, it will sort by
            the value of that key in the `.metrics` and also filter
            out anything that does not contain this key.
        reverse: Whether to sort in some given order. By
            default (`None`), if given a metric key, the reports with
            the best metric values will be sorted first. If
            given a `#!python Callable`, the reports with the
            smallest values will be sorted first. Using
            `reverse=True` will always reverse this order, while
            `reverse=False` will always preserve it.

    Returns:
        A sorted list of reports
    """  # noqa: E501
    # If given a str, filter out anything that doesn't have that key
    if isinstance(key, str):
        history = self.filter(lambda report: key in report.metric_names)
        sort_key: Callable[[Trial.Report], Comparable] = lambda r: r.metrics[key]
        reverse = (
            reverse if reverse is not None else (not self.metrics[key].minimize)
        )
    else:
        history = self
        sort_key = key
        reverse = False if reverse is None else reverse

    return sorted(history.reports, key=sort_key, reverse=reverse)

def from_df(df)
classmethod
#

Loads a history from a pandas DataFrame.

PARAMETER DESCRIPTION
df

The DataFrame to load the history from.

TYPE: DataFrame

RETURNS DESCRIPTION
History

A History.

Source code in src/amltk/optimization/history.py
@classmethod
def from_df(cls, df: pd.DataFrame) -> History:
    """Loads a history from a pandas DataFrame.

    Args:
        df: The DataFrame to load the history from.

    Returns:
        A History.
    """
    if len(df) == 0:
        return cls()
    return History.from_reports(Trial.Report.from_df(s) for _, s in df.iterrows())