History

The History is used to keep a structured record of what occured with Trials and their associated Reports.

Usage

from amltk.optimization import Trial, History, Metric
from amltk.store import PathBucket

loss = Metric("loss", minimize=True)

def target_function(trial: Trial) -> Trial.Report:
    x = trial.config["x"]
    y = trial.config["y"]
    trial.store({"config.json": trial.config})

    with trial.begin():
        loss = x**2 - y

    if trial.exception:
        return trial.fail()

    return trial.success(loss=loss)

# ... usually obtained from an optimizer
bucket = PathBucket("all-trial-results")
history = History()

for x, y in zip([1, 2, 3], [4, 5, 6]):
    trial = Trial(name="some-unique-name", config={"x": x, "y": y}, bucket=bucket, metrics=[loss])
    report = target_function(trial)
    history.add(report)

print(history.df())
bucket.rmdir()  # markdon-exec: hide

status trial_seed ... time:kind time:unit name ... some-unique-name success ... wall seconds some-unique-name success ... wall seconds some-unique-name success ... wall seconds [3 rows x 20 columns]

You'll often need to perform some operations on a History so we provide some utility functions here:

filter(key=...) - Filters the history by some predicate, e.g. history.filter(lambda report: report.status == "success")
groupby(key=...) - Groups the history by some key, e.g. history.groupby(lambda report: report.config["x"] < 5)
sortby(key=...) - Sorts the history by some key, e.g. history.sortby(lambda report: report.time.end)

There is also some serialization capabilities built in, to allow you to store your reports and load them back in later:

df(...) - Output a pd.DataFrame of all the information available.
from_df(...) - Create a History from a pd.DataFrame.

You can also retrieve individual reports from the history by using their name, e.g. history["some-unique-name"] or iterate through the history with for report in history: ....

`class History`
`dataclass` #

Bases: RichRenderable

A history of trials.

This is a collections of reports from trials, where you can access the reports by their trial name. It is unsorted in general, but by using sortby() you can sort the history.

History

from amltk.optimization import Trial, History, Metric

metric = Metric("cost", minimize=True)
trials = [
    Trial(name=f"trial_{i}", config={"x": i}, metrics=[metric])
    for i in range(10)
]
history = History()

for trial in trials:
    with trial.begin():
        x = trial.config["x"]
        report = trial.success(cost=x**2 - x*2 + 4)
        history.add(report)

for report in history:
    print(f"{report.name=}, {report}")

print(history.metrics)
print(history.df())

print(history.best())

report.name='trial_0', Trial.Report(trial=Trial(name='trial_0', config={'x': 0}, bucket=PathBucket(PosixPath('unknown-trial-bucket')), metrics=[Metric(name='cost', minimize=True, bounds=None)], seed=None, fidelities=None, summary={}, exception=None, storage=set(), extras={}), status=<Status.SUCCESS: 'success'>, metrics={'cost': 4.0}, metric_values=(Metric.Value(metric=Metric(name='cost', minimize=True, bounds=None), value=4.0),), metric_defs={'cost': Metric(name='cost', minimize=True, bounds=None)}, metric_names=('cost',))
report.name='trial_1', Trial.Report(trial=Trial(name='trial_1', config={'x': 1}, bucket=PathBucket(PosixPath('unknown-trial-bucket')), metrics=[Metric(name='cost', minimize=True, bounds=None)], seed=None, fidelities=None, summary={}, exception=None, storage=set(), extras={}), status=<Status.SUCCESS: 'success'>, metrics={'cost': 3.0}, metric_values=(Metric.Value(metric=Metric(name='cost', minimize=True, bounds=None), value=3.0),), metric_defs={'cost': Metric(name='cost', minimize=True, bounds=None)}, metric_names=('cost',))
report.name='trial_2', Trial.Report(trial=Trial(name='trial_2', config={'x': 2}, bucket=PathBucket(PosixPath('unknown-trial-bucket')), metrics=[Metric(name='cost', minimize=True, bounds=None)], seed=None, fidelities=None, summary={}, exception=None, storage=set(), extras={}), status=<Status.SUCCESS: 'success'>, metrics={'cost': 4.0}, metric_values=(Metric.Value(metric=Metric(name='cost', minimize=True, bounds=None), value=4.0),), metric_defs={'cost': Metric(name='cost', minimize=True, bounds=None)}, metric_names=('cost',))
report.name='trial_3', Trial.Report(trial=Trial(name='trial_3', config={'x': 3}, bucket=PathBucket(PosixPath('unknown-trial-bucket')), metrics=[Metric(name='cost', minimize=True, bounds=None)], seed=None, fidelities=None, summary={}, exception=None, storage=set(), extras={}), status=<Status.SUCCESS: 'success'>, metrics={'cost': 7.0}, metric_values=(Metric.Value(metric=Metric(name='cost', minimize=True, bounds=None), value=7.0),), metric_defs={'cost': Metric(name='cost', minimize=True, bounds=None)}, metric_names=('cost',))
report.name='trial_4', Trial.Report(trial=Trial(name='trial_4', config={'x': 4}, bucket=PathBucket(PosixPath('unknown-trial-bucket')), metrics=[Metric(name='cost', minimize=True, bounds=None)], seed=None, fidelities=None, summary={}, exception=None, storage=set(), extras={}), status=<Status.SUCCESS: 'success'>, metrics={'cost': 12.0}, metric_values=(Metric.Value(metric=Metric(name='cost', minimize=True, bounds=None), value=12.0),), metric_defs={'cost': Metric(name='cost', minimize=True, bounds=None)}, metric_names=('cost',))
report.name='trial_5', Trial.Report(trial=Trial(name='trial_5', config={'x': 5}, bucket=PathBucket(PosixPath('unknown-trial-bucket')), metrics=[Metric(name='cost', minimize=True, bounds=None)], seed=None, fidelities=None, summary={}, exception=None, storage=set(), extras={}), status=<Status.SUCCESS: 'success'>, metrics={'cost': 19.0}, metric_values=(Metric.Value(metric=Metric(name='cost', minimize=True, bounds=None), value=19.0),), metric_defs={'cost': Metric(name='cost', minimize=True, bounds=None)}, metric_names=('cost',))
report.name='trial_6', Trial.Report(trial=Trial(name='trial_6', config={'x': 6}, bucket=PathBucket(PosixPath('unknown-trial-bucket')), metrics=[Metric(name='cost', minimize=True, bounds=None)], seed=None, fidelities=None, summary={}, exception=None, storage=set(), extras={}), status=<Status.SUCCESS: 'success'>, metrics={'cost': 28.0}, metric_values=(Metric.Value(metric=Metric(name='cost', minimize=True, bounds=None), value=28.0),), metric_defs={'cost': Metric(name='cost', minimize=True, bounds=None)}, metric_names=('cost',))
report.name='trial_7', Trial.Report(trial=Trial(name='trial_7', config={'x': 7}, bucket=PathBucket(PosixPath('unknown-trial-bucket')), metrics=[Metric(name='cost', minimize=True, bounds=None)], seed=None, fidelities=None, summary={}, exception=None, storage=set(), extras={}), status=<Status.SUCCESS: 'success'>, metrics={'cost': 39.0}, metric_values=(Metric.Value(metric=Metric(name='cost', minimize=True, bounds=None), value=39.0),), metric_defs={'cost': Metric(name='cost', minimize=True, bounds=None)}, metric_names=('cost',))
report.name='trial_8', Trial.Report(trial=Trial(name='trial_8', config={'x': 8}, bucket=PathBucket(PosixPath('unknown-trial-bucket')), metrics=[Metric(name='cost', minimize=True, bounds=None)], seed=None, fidelities=None, summary={}, exception=None, storage=set(), extras={}), status=<Status.SUCCESS: 'success'>, metrics={'cost': 52.0}, metric_values=(Metric.Value(metric=Metric(name='cost', minimize=True, bounds=None), value=52.0),), metric_defs={'cost': Metric(name='cost', minimize=True, bounds=None)}, metric_names=('cost',))
report.name='trial_9', Trial.Report(trial=Trial(name='trial_9', config={'x': 9}, bucket=PathBucket(PosixPath('unknown-trial-bucket')), metrics=[Metric(name='cost', minimize=True, bounds=None)], seed=None, fidelities=None, summary={}, exception=None, storage=set(), extras={}), status=<Status.SUCCESS: 'success'>, metrics={'cost': 67.0}, metric_values=(Metric.Value(metric=Metric(name='cost', minimize=True, bounds=None), value=67.0),), metric_defs={'cost': Metric(name='cost', minimize=True, bounds=None)}, metric_names=('cost',))
{'cost': Metric(name='cost', minimize=True, bounds=None)}
          status  trial_seed exception  ... time:duration time:kind  time:unit
name                                    ...                                   
trial_0  success        <NA>        NA  ...      0.000037      wall    seconds
trial_1  success        <NA>        NA  ...      0.000024      wall    seconds
trial_2  success        <NA>        NA  ...      0.000021      wall    seconds
trial_3  success        <NA>        NA  ...       0.00003      wall    seconds
trial_4  success        <NA>        NA  ...      0.000021      wall    seconds
trial_5  success        <NA>        NA  ...      0.000021      wall    seconds
trial_6  success        <NA>        NA  ...      0.000021      wall    seconds
trial_7  success        <NA>        NA  ...      0.000021      wall    seconds
trial_8  success        <NA>        NA  ...      0.000021      wall    seconds
trial_9  success        <NA>        NA  ...       0.00002      wall    seconds

[10 rows x 19 columns]
Trial.Report(trial=Trial(name='trial_1', config={'x': 1}, bucket=PathBucket(PosixPath('unknown-trial-bucket')), metrics=[Metric(name='cost', minimize=True, bounds=None)], seed=None, fidelities=None, summary={}, exception=None, storage=set(), extras={}), status=<Status.SUCCESS: 'success'>, metrics={'cost': 3.0}, metric_values=(Metric.Value(metric=Metric(name='cost', minimize=True, bounds=None), value=3.0),), metric_defs={'cost': Metric(name='cost', minimize=True, bounds=None)}, metric_names=('cost',))

ATTRIBUTE	DESCRIPTION
`reports`	A mapping of trial names to reports. TYPE: `list[Report]`

`def from_reports(reports)`
`classmethod` #

Creates a history from reports.

PARAMETER	DESCRIPTION
`reports`	An iterable of reports. TYPE: `Iterable[Report]`

RETURNS	DESCRIPTION
`History`	A history.

Source code in src/amltk/optimization/history.py

@classmethod
def from_reports(cls, reports: Iterable[Trial.Report]) -> History:
    """Creates a history from reports.

    Args:
        reports: An iterable of reports.

    Returns:
        A history.
    """
    history = cls()
    history.add(reports)
    return history

`def best(metric=None)` #

Returns the best report in the history.

PARAMETER	DESCRIPTION
`metric`	The metric to sort by. If `None`, it will use the first metric in the history. If there are multiple metrics and non are specified, it will raise an error. TYPE: `str \| None` DEFAULT: `None`

RETURNS	DESCRIPTION
`Report`	The best report.

Source code in src/amltk/optimization/history.py

def best(self, metric: str | None = None) -> Trial.Report:
    """Returns the best report in the history.

    Args:
        metric: The metric to sort by. If `None`, it will use the
            first metric in the history. If there are multiple metrics
            and non are specified, it will raise an error.

    Returns:
        The best report.
    """
    if metric is None:
        if len(self.metrics) > 1:
            raise ValueError(
                "There are multiple metrics in the history, "
                "please specify which metric to sort by.",
            )

        _metric_def = next(iter(self.metrics.values()))
        _metric_name = _metric_def.name
    else:
        if metric not in self.metrics:
            raise ValueError(
                f"Metric {metric} not found in history. "
                f"Available metrics: {list(self.metrics.keys())}",
            )
        _metric_def = self.metrics[metric]
        _metric_name = metric

    _by = min if _metric_def.minimize else max
    return _by(self.reports, key=lambda r: r.metrics[_metric_name])

`def add(report)` #

Adds a report or reports to the history.

PARAMETER	DESCRIPTION
`report`	A report or reports to add. TYPE: `Report \| Iterable[Report]`

Source code in src/amltk/optimization/history.py

def add(self, report: Trial.Report | Iterable[Trial.Report]) -> None:
    """Adds a report or reports to the history.

    Args:
        report: A report or reports to add.
    """
    match report:
        case Trial.Report():
            for m in report.metric_values:
                if (_m := self.metrics.get(m.name)) is not None:
                    if m.metric != _m:
                        raise ValueError(
                            f"Metric {m.name} has conflicting definitions:"
                            f"\n{m.metric} != {_m}",
                        )
                else:
                    self.metrics[m.name] = m.metric

            self.reports.append(report)
            self._lookup[report.name] = len(self.reports) - 1
        case reports:
            for _report in reports:
                self.add(_report)

`def find(name)` #

Finds a report by trial name.

PARAMETER	DESCRIPTION
`name`	The name of the trial. TYPE: `str`

RETURNS	DESCRIPTION
`Report`	The report.

Source code in src/amltk/optimization/history.py

def find(self, name: str) -> Trial.Report:
    """Finds a report by trial name.

    Args:
        name: The name of the trial.

    Returns:
        The report.
    """
    return self.reports[self._lookup[name]]

`def df(*, profiles=True, configs=True, summary=True, metrics=True, normalize_time=True)` #

Returns a pandas DataFrame of the history.

Each individual trial will be a row in the dataframe.

Prefixes

summary: Entries will be prefixed with "summary:"
config: Entries will be prefixed with "config:"
metrics: Entries will be prefixed with "metrics:"

df

from amltk.optimization import Trial, History, Metric

metric = Metric("cost", minimize=True)
trials = [Trial(name=f"trial_{i}", config={"x": i}, metrics=[metric]) for i in range(10)]
history = History()

for trial in trials:
    with trial.begin():
        x = trial.config["x"]
        report = trial.success(cost=x**2 - x*2 + 4)
        history.add(report)

print(history.df())

          status  trial_seed exception  ... time:duration time:kind  time:unit
name                                    ...                                   
trial_0  success        <NA>        NA  ...      0.000036      wall    seconds
trial_1  success        <NA>        NA  ...      0.000025      wall    seconds
trial_2  success        <NA>        NA  ...      0.000021      wall    seconds
trial_3  success        <NA>        NA  ...      0.000022      wall    seconds
trial_4  success        <NA>        NA  ...      0.000022      wall    seconds
trial_5  success        <NA>        NA  ...      0.000021      wall    seconds
trial_6  success        <NA>        NA  ...      0.000021      wall    seconds
trial_7  success        <NA>        NA  ...       0.00002      wall    seconds
trial_8  success        <NA>        NA  ...       0.00002      wall    seconds
trial_9  success        <NA>        NA  ...       0.00002      wall    seconds

[10 rows x 19 columns]

PARAMETER	DESCRIPTION
`profiles`	Whether to include the profiles. TYPE: `bool` DEFAULT: `True`
`configs`	Whether to include the configs. TYPE: `bool` DEFAULT: `True`
`summary`	Whether to include the summary. TYPE: `bool` DEFAULT: `True`
`metrics`	Whether to include the metrics. TYPE: `bool` DEFAULT: `True`
`normalize_time`	Whether to normalize the time to the first report. If given a `float`, it will normalize to that value. Will normalize all columns with `"time:end"`. and `"time:start"` in their name. It will use the time of the earliest report as the offset. TYPE: `bool \| float` DEFAULT: `True`

RETURNS	DESCRIPTION
`DataFrame`	A pandas DataFrame of the history.

Source code in src/amltk/optimization/history.py

def df(
    self,
    *,
    profiles: bool = True,
    configs: bool = True,
    summary: bool = True,
    metrics: bool = True,
    normalize_time: bool | float = True,
) -> pd.DataFrame:
    """Returns a pandas DataFrame of the history.

    Each individual trial will be a row in the dataframe.

    !!! note "Prefixes"

        * `summary`: Entries will be prefixed with `#!python "summary:"`
        * `config`: Entries will be prefixed with `#!python "config:"`
        * `metrics`: Entries will be prefixed with `#!python "metrics:"`

    ```python exec="true" source="material-block" result="python" title="df" hl_lines="12"
    from amltk.optimization import Trial, History, Metric

    metric = Metric("cost", minimize=True)
    trials = [Trial(name=f"trial_{i}", config={"x": i}, metrics=[metric]) for i in range(10)]
    history = History()

    for trial in trials:
        with trial.begin():
            x = trial.config["x"]
            report = trial.success(cost=x**2 - x*2 + 4)
            history.add(report)

    print(history.df())
    ```

    Args:
        profiles: Whether to include the profiles.
        configs: Whether to include the configs.
        summary: Whether to include the summary.
        metrics: Whether to include the metrics.
        normalize_time: Whether to normalize the time to the first
            report. If given a `#!python float`, it will normalize
            to that value.

            Will normalize all columns with `#!python "time:end"`.
            and `#!python "time:start"` in their name. It will use
            the time of the earliest report as the offset.

    Returns:
        A pandas DataFrame of the history.
    """  # noqa: E501
    if len(self) == 0:
        return pd.DataFrame()

    _df = pd.concat(
        [
            report.df(
                profiles=profiles,
                configs=configs,
                summary=summary,
                metrics=metrics,
            )
            for report in self.reports
        ],
    )
    _df = _df.convert_dtypes()

    match normalize_time:
        case True if "time:start" in _df.columns:
            time_columns = ("time:start", "time:end")
            cols = [c for c in _df.columns if c.endswith(time_columns)]
            _df[cols] -= _df["time:start"].min()
        case float():
            time_columns = ("time:start", "time:end")
            cols = [c for c in _df.columns if c.endswith(time_columns)]
            _df[cols] -= normalize_time
        case _:
            pass

    return _df

`def filter(key)` #

Filters the history by a predicate.

filter

from amltk.optimization import Trial, History, Metric

metric = Metric("cost", minimize=True)
trials = [Trial(name=f"trial_{i}", config={"x": i}, metrics=[metric]) for i in range(10)]
history = History()

for trial in trials:
    with trial.begin():
        x = trial.config["x"]
        report = trial.success(cost=x**2 - x*2 + 4)
        history.add(report)

filtered_history = history.filter(lambda report: report.metrics["cost"] < 10)
for report in filtered_history:
    cost = report.metrics["cost"]
    print(f"{report.name}, {cost=}, {report}")

trial_0, cost=4.0, Trial.Report(trial=Trial(name='trial_0', config={'x': 0}, bucket=PathBucket(PosixPath('unknown-trial-bucket')), metrics=[Metric(name='cost', minimize=True, bounds=None)], seed=None, fidelities=None, summary={}, exception=None, storage=set(), extras={}), status=<Status.SUCCESS: 'success'>, metrics={'cost': 4.0}, metric_values=(Metric.Value(metric=Metric(name='cost', minimize=True, bounds=None), value=4.0),), metric_defs={'cost': Metric(name='cost', minimize=True, bounds=None)}, metric_names=('cost',))
trial_1, cost=3.0, Trial.Report(trial=Trial(name='trial_1', config={'x': 1}, bucket=PathBucket(PosixPath('unknown-trial-bucket')), metrics=[Metric(name='cost', minimize=True, bounds=None)], seed=None, fidelities=None, summary={}, exception=None, storage=set(), extras={}), status=<Status.SUCCESS: 'success'>, metrics={'cost': 3.0}, metric_values=(Metric.Value(metric=Metric(name='cost', minimize=True, bounds=None), value=3.0),), metric_defs={'cost': Metric(name='cost', minimize=True, bounds=None)}, metric_names=('cost',))
trial_2, cost=4.0, Trial.Report(trial=Trial(name='trial_2', config={'x': 2}, bucket=PathBucket(PosixPath('unknown-trial-bucket')), metrics=[Metric(name='cost', minimize=True, bounds=None)], seed=None, fidelities=None, summary={}, exception=None, storage=set(), extras={}), status=<Status.SUCCESS: 'success'>, metrics={'cost': 4.0}, metric_values=(Metric.Value(metric=Metric(name='cost', minimize=True, bounds=None), value=4.0),), metric_defs={'cost': Metric(name='cost', minimize=True, bounds=None)}, metric_names=('cost',))
trial_3, cost=7.0, Trial.Report(trial=Trial(name='trial_3', config={'x': 3}, bucket=PathBucket(PosixPath('unknown-trial-bucket')), metrics=[Metric(name='cost', minimize=True, bounds=None)], seed=None, fidelities=None, summary={}, exception=None, storage=set(), extras={}), status=<Status.SUCCESS: 'success'>, metrics={'cost': 7.0}, metric_values=(Metric.Value(metric=Metric(name='cost', minimize=True, bounds=None), value=7.0),), metric_defs={'cost': Metric(name='cost', minimize=True, bounds=None)}, metric_names=('cost',))

PARAMETER	DESCRIPTION
`key`	A predicate to filter by. TYPE: `Callable[[Report], bool]`

RETURNS	DESCRIPTION
`History`	A new history with the filtered reports.

Source code in src/amltk/optimization/history.py

def filter(self, key: Callable[[Trial.Report], bool]) -> History:
    """Filters the history by a predicate.

    ```python exec="true" source="material-block" result="python" title="filter" hl_lines="12"
    from amltk.optimization import Trial, History, Metric

    metric = Metric("cost", minimize=True)
    trials = [Trial(name=f"trial_{i}", config={"x": i}, metrics=[metric]) for i in range(10)]
    history = History()

    for trial in trials:
        with trial.begin():
            x = trial.config["x"]
            report = trial.success(cost=x**2 - x*2 + 4)
            history.add(report)

    filtered_history = history.filter(lambda report: report.metrics["cost"] < 10)
    for report in filtered_history:
        cost = report.metrics["cost"]
        print(f"{report.name}, {cost=}, {report}")
    ```

    Args:
        key: A predicate to filter by.

    Returns:
        A new history with the filtered reports.
    """  # noqa: E501
    return History.from_reports([report for report in self.reports if key(report)])

`def groupby(key)` #

Groups the history by the values of a key.

groupby

from amltk.optimization import Trial, History, Metric

metric = Metric("cost", minimize=True)
trials = [Trial(name=f"trial_{i}", config={"x": i}, metrics=[metric]) for i in range(10)]
history = History()

for trial in trials:
    with trial.begin():
        x = trial.config["x"]
        if x % 2 == 0:
            report = trial.fail(cost=1_000)
        else:
            report = trial.success(cost=x**2 - x*2 + 4)
        history.add(report)

for status, history in history.groupby("status").items():
    print(f"{status=}, {len(history)=}")

status=<Status.FAIL: 'fail'>, len(history)=5
status=<Status.SUCCESS: 'success'>, len(history)=5

You can pass a Callable to group by any key you like:

from amltk.optimization import Trial, History, Metric

metric = Metric("cost", minimize=True)
trials = [Trial(name=f"trial_{i}", config={"x": i}, metrics=[metric]) for i in range(10)]
history = History()

for trial in trials:
    with trial.begin():
        x = trial.config["x"]
        report = trial.fail(cost=x)
        history.add(report)

for below_5, history in history.groupby(lambda r: r.metrics["cost"] < 5).items():
    print(f"{below_5=}, {len(history)=}")

below_5=True, len(history)=5
below_5=False, len(history)=5

PARAMETER	DESCRIPTION
`key`	A key to group by. If `"status"` is passed, the history will be grouped by the status of the reports. TYPE: `Literal['status'] \| Callable[[Report], Hashable]`

RETURNS	DESCRIPTION
`dict[Hashable, History]`	A mapping of keys to histories.

Source code in src/amltk/optimization/history.py

def groupby(
    self,
    key: Literal["status"] | Callable[[Trial.Report], Hashable],
) -> dict[Hashable, History]:
    """Groups the history by the values of a key.

    ```python exec="true" source="material-block" result="python" title="groupby" hl_lines="15"
    from amltk.optimization import Trial, History, Metric

    metric = Metric("cost", minimize=True)
    trials = [Trial(name=f"trial_{i}", config={"x": i}, metrics=[metric]) for i in range(10)]
    history = History()

    for trial in trials:
        with trial.begin():
            x = trial.config["x"]
            if x % 2 == 0:
                report = trial.fail(cost=1_000)
            else:
                report = trial.success(cost=x**2 - x*2 + 4)
            history.add(report)

    for status, history in history.groupby("status").items():
        print(f"{status=}, {len(history)=}")
    ```

    You can pass a `#!python Callable` to group by any key you like:

    ```python exec="true" source="material-block" result="python"
    from amltk.optimization import Trial, History, Metric

    metric = Metric("cost", minimize=True)
    trials = [Trial(name=f"trial_{i}", config={"x": i}, metrics=[metric]) for i in range(10)]
    history = History()

    for trial in trials:
        with trial.begin():
            x = trial.config["x"]
            report = trial.fail(cost=x)
            history.add(report)

    for below_5, history in history.groupby(lambda r: r.metrics["cost"] < 5).items():
        print(f"{below_5=}, {len(history)=}")
    ```

    Args:
        key: A key to group by. If `"status"` is passed, the history will be
            grouped by the status of the reports.

    Returns:
        A mapping of keys to histories.
    """  # noqa: E501
    d = defaultdict(list)

    if key == "status":
        key = operator.attrgetter("status")

    for report in self.reports:
        d[key(report)].append(report)

    return {k: History.from_reports(v) for k, v in d.items()}

`def incumbents(key, *, sortby=lambda : report.time.end, reverse=None, ffill=False)` #

Returns a trace of the incumbents, where only the report that is better than the previous best report is kept.

incumbents

from amltk.optimization import Trial, History, Metric

metric = Metric("cost", minimize=True)
trials = [Trial(name=f"trial_{i}", config={"x": i}, metrics=[metric]) for i in range(10)]
history = History()

for trial in trials:
    with trial.begin():
        x = trial.config["x"]
        report = trial.success(cost=x**2 - x*2 + 4)
        history.add(report)

incumbents = (
    history
    .incumbents("cost", sortby=lambda r: r.time.end)
)
for report in incumbents:
    print(f"{report.metrics=}, {report.config=}")

report.metrics={'cost': 4.0}, report.config={'x': 0}
report.metrics={'cost': 3.0}, report.config={'x': 1}

PARAMETER	DESCRIPTION
`key`	The key to use. If given a str, it will use that as the key to use in the metrics, defining if one report is better than another. If given a `Callable`, it should return a `bool`, indicating if the first argument report is better than the second argument report. TYPE: `Callable[[Report, Report], bool] \| str`
`sortby`	The key to sort by. If given a str, it will sort by the value of that key in the `.metrics` and also filter out anything that does not contain this key. By default, it will sort by the end time of the report. TYPE: `Callable[[Report], Comparable] \| str` DEFAULT: `lambda : end`
`reverse`	Whether to sort in some given order. By default (`None`), if given a metric key, the reports with the best metric values will be sorted first. If given a `Callable`, the reports with the smallest values will be sorted first. Using `reverse=True` will always reverse this order, while `reverse=False` will always preserve it. TYPE: `bool \| None` DEFAULT: `None`
`ffill`	Whether to forward fill the incumbents. This means that if a report is not an incumbent, it will be replaced with the current best. This is useful if you want to visualize the incumbents over some x axis, where the you have a point at every place along the axis. TYPE: `bool` DEFAULT: `False`

RETURNS	DESCRIPTION
`list[Report]`	The history of incumbents.

Source code in src/amltk/optimization/history.py

def incumbents(
    self,
    key: Callable[[Trial.Report, Trial.Report], bool] | str,
    *,
    sortby: Callable[[Trial.Report], Comparable]
    | str = lambda report: report.time.end,
    reverse: bool | None = None,
    ffill: bool = False,
) -> list[Trial.Report]:
    """Returns a trace of the incumbents, where only the report that is better than the previous
    best report is kept.

    ```python exec="true" source="material-block" result="python" title="incumbents"
    from amltk.optimization import Trial, History, Metric

    metric = Metric("cost", minimize=True)
    trials = [Trial(name=f"trial_{i}", config={"x": i}, metrics=[metric]) for i in range(10)]
    history = History()

    for trial in trials:
        with trial.begin():
            x = trial.config["x"]
            report = trial.success(cost=x**2 - x*2 + 4)
            history.add(report)

    incumbents = (
        history
        .incumbents("cost", sortby=lambda r: r.time.end)
    )
    for report in incumbents:
        print(f"{report.metrics=}, {report.config=}")
    ```

    Args:
        key: The key to use. If given a str, it will use that as the
            key to use in the metrics, defining if one report is better
            than another. If given a `#!python Callable`, it should
            return a `bool`, indicating if the first argument report
            is better than the second argument report.
        sortby: The key to sort by. If given a str, it will sort by
            the value of that key in the `.metrics` and also filter
            out anything that does not contain this key.
            By default, it will sort by the end time of the report.
        reverse: Whether to sort in some given order. By
            default (`None`), if given a metric key, the reports with
            the best metric values will be sorted first. If
            given a `#!python Callable`, the reports with the
            smallest values will be sorted first. Using
            `reverse=True` will always reverse this order, while
            `reverse=False` will always preserve it.
        ffill: Whether to forward fill the incumbents. This means that
            if a report is not an incumbent, it will be replaced with
            the current best. This is useful if you want to
            visualize the incumbents over some x axis, where the
            you have a point at every place along the axis.

    Returns:
        The history of incumbents.
    """  # noqa: E501
    match key:
        case str():
            metric = self.metrics[key]
            __op = operator.lt if metric.minimize else operator.gt  # type: ignore
            op = lambda r1, r2: __op(r1.metrics[key], r2.metrics[key])
        case _:
            op = key

    sorted_reports = self.sortby(sortby, reverse=reverse)
    return list(compare_accumulate(sorted_reports, op=op, ffill=ffill))

`def sortby(key, *, reverse=None)` #

Sorts the history by a key and returns a sorted History.

sortby

from amltk.optimization import Trial, History, Metric

metric = Metric("cost", minimize=True)
trials = [Trial(name=f"trial_{i}", config={"x": i}, metrics=[metric]) for i in range(10)]
history = History()

for trial in trials:
    with trial.begin():
        x = trial.config["x"]
        report = trial.success(cost=x**2 - x*2 + 4)
        history.add(report)

trace = (
    history
    .filter(lambda report: report.status == "success")
    .sortby("cost")
)

for report in trace:
    print(f"{report.metrics}, {report}")

{'cost': 3.0}, Trial.Report(trial=Trial(name='trial_1', config={'x': 1}, bucket=PathBucket(PosixPath('unknown-trial-bucket')), metrics=[Metric(name='cost', minimize=True, bounds=None)], seed=None, fidelities=None, summary={}, exception=None, storage=set(), extras={}), status=<Status.SUCCESS: 'success'>, metrics={'cost': 3.0}, metric_values=(Metric.Value(metric=Metric(name='cost', minimize=True, bounds=None), value=3.0),), metric_defs={'cost': Metric(name='cost', minimize=True, bounds=None)}, metric_names=('cost',))
{'cost': 4.0}, Trial.Report(trial=Trial(name='trial_0', config={'x': 0}, bucket=PathBucket(PosixPath('unknown-trial-bucket')), metrics=[Metric(name='cost', minimize=True, bounds=None)], seed=None, fidelities=None, summary={}, exception=None, storage=set(), extras={}), status=<Status.SUCCESS: 'success'>, metrics={'cost': 4.0}, metric_values=(Metric.Value(metric=Metric(name='cost', minimize=True, bounds=None), value=4.0),), metric_defs={'cost': Metric(name='cost', minimize=True, bounds=None)}, metric_names=('cost',))
{'cost': 4.0}, Trial.Report(trial=Trial(name='trial_2', config={'x': 2}, bucket=PathBucket(PosixPath('unknown-trial-bucket')), metrics=[Metric(name='cost', minimize=True, bounds=None)], seed=None, fidelities=None, summary={}, exception=None, storage=set(), extras={}), status=<Status.SUCCESS: 'success'>, metrics={'cost': 4.0}, metric_values=(Metric.Value(metric=Metric(name='cost', minimize=True, bounds=None), value=4.0),), metric_defs={'cost': Metric(name='cost', minimize=True, bounds=None)}, metric_names=('cost',))
{'cost': 7.0}, Trial.Report(trial=Trial(name='trial_3', config={'x': 3}, bucket=PathBucket(PosixPath('unknown-trial-bucket')), metrics=[Metric(name='cost', minimize=True, bounds=None)], seed=None, fidelities=None, summary={}, exception=None, storage=set(), extras={}), status=<Status.SUCCESS: 'success'>, metrics={'cost': 7.0}, metric_values=(Metric.Value(metric=Metric(name='cost', minimize=True, bounds=None), value=7.0),), metric_defs={'cost': Metric(name='cost', minimize=True, bounds=None)}, metric_names=('cost',))
{'cost': 12.0}, Trial.Report(trial=Trial(name='trial_4', config={'x': 4}, bucket=PathBucket(PosixPath('unknown-trial-bucket')), metrics=[Metric(name='cost', minimize=True, bounds=None)], seed=None, fidelities=None, summary={}, exception=None, storage=set(), extras={}), status=<Status.SUCCESS: 'success'>, metrics={'cost': 12.0}, metric_values=(Metric.Value(metric=Metric(name='cost', minimize=True, bounds=None), value=12.0),), metric_defs={'cost': Metric(name='cost', minimize=True, bounds=None)}, metric_names=('cost',))
{'cost': 19.0}, Trial.Report(trial=Trial(name='trial_5', config={'x': 5}, bucket=PathBucket(PosixPath('unknown-trial-bucket')), metrics=[Metric(name='cost', minimize=True, bounds=None)], seed=None, fidelities=None, summary={}, exception=None, storage=set(), extras={}), status=<Status.SUCCESS: 'success'>, metrics={'cost': 19.0}, metric_values=(Metric.Value(metric=Metric(name='cost', minimize=True, bounds=None), value=19.0),), metric_defs={'cost': Metric(name='cost', minimize=True, bounds=None)}, metric_names=('cost',))
{'cost': 28.0}, Trial.Report(trial=Trial(name='trial_6', config={'x': 6}, bucket=PathBucket(PosixPath('unknown-trial-bucket')), metrics=[Metric(name='cost', minimize=True, bounds=None)], seed=None, fidelities=None, summary={}, exception=None, storage=set(), extras={}), status=<Status.SUCCESS: 'success'>, metrics={'cost': 28.0}, metric_values=(Metric.Value(metric=Metric(name='cost', minimize=True, bounds=None), value=28.0),), metric_defs={'cost': Metric(name='cost', minimize=True, bounds=None)}, metric_names=('cost',))
{'cost': 39.0}, Trial.Report(trial=Trial(name='trial_7', config={'x': 7}, bucket=PathBucket(PosixPath('unknown-trial-bucket')), metrics=[Metric(name='cost', minimize=True, bounds=None)], seed=None, fidelities=None, summary={}, exception=None, storage=set(), extras={}), status=<Status.SUCCESS: 'success'>, metrics={'cost': 39.0}, metric_values=(Metric.Value(metric=Metric(name='cost', minimize=True, bounds=None), value=39.0),), metric_defs={'cost': Metric(name='cost', minimize=True, bounds=None)}, metric_names=('cost',))
{'cost': 52.0}, Trial.Report(trial=Trial(name='trial_8', config={'x': 8}, bucket=PathBucket(PosixPath('unknown-trial-bucket')), metrics=[Metric(name='cost', minimize=True, bounds=None)], seed=None, fidelities=None, summary={}, exception=None, storage=set(), extras={}), status=<Status.SUCCESS: 'success'>, metrics={'cost': 52.0}, metric_values=(Metric.Value(metric=Metric(name='cost', minimize=True, bounds=None), value=52.0),), metric_defs={'cost': Metric(name='cost', minimize=True, bounds=None)}, metric_names=('cost',))
{'cost': 67.0}, Trial.Report(trial=Trial(name='trial_9', config={'x': 9}, bucket=PathBucket(PosixPath('unknown-trial-bucket')), metrics=[Metric(name='cost', minimize=True, bounds=None)], seed=None, fidelities=None, summary={}, exception=None, storage=set(), extras={}), status=<Status.SUCCESS: 'success'>, metrics={'cost': 67.0}, metric_values=(Metric.Value(metric=Metric(name='cost', minimize=True, bounds=None), value=67.0),), metric_defs={'cost': Metric(name='cost', minimize=True, bounds=None)}, metric_names=('cost',))

PARAMETER	DESCRIPTION
`key`	The key to sort by. If given a str, it will sort by the value of that key in the `.metrics` and also filter out anything that does not contain this key. TYPE: `Callable[[Report], Comparable] \| str`
`reverse`	Whether to sort in some given order. By default (`None`), if given a metric key, the reports with the best metric values will be sorted first. If given a `Callable`, the reports with the smallest values will be sorted first. Using `reverse=True` will always reverse this order, while `reverse=False` will always preserve it. TYPE: `bool \| None` DEFAULT: `None`

RETURNS	DESCRIPTION
`list[Report]`	A sorted list of reports

Source code in src/amltk/optimization/history.py

def sortby(
    self,
    key: Callable[[Trial.Report], Comparable] | str,
    *,
    reverse: bool | None = None,
) -> list[Trial.Report]:
    """Sorts the history by a key and returns a sorted History.

    ```python exec="true" source="material-block" result="python" title="sortby" hl_lines="15"
    from amltk.optimization import Trial, History, Metric

    metric = Metric("cost", minimize=True)
    trials = [Trial(name=f"trial_{i}", config={"x": i}, metrics=[metric]) for i in range(10)]
    history = History()

    for trial in trials:
        with trial.begin():
            x = trial.config["x"]
            report = trial.success(cost=x**2 - x*2 + 4)
            history.add(report)

    trace = (
        history
        .filter(lambda report: report.status == "success")
        .sortby("cost")
    )

    for report in trace:
        print(f"{report.metrics}, {report}")
    ```

    Args:
        key: The key to sort by. If given a str, it will sort by
            the value of that key in the `.metrics` and also filter
            out anything that does not contain this key.
        reverse: Whether to sort in some given order. By
            default (`None`), if given a metric key, the reports with
            the best metric values will be sorted first. If
            given a `#!python Callable`, the reports with the
            smallest values will be sorted first. Using
            `reverse=True` will always reverse this order, while
            `reverse=False` will always preserve it.

    Returns:
        A sorted list of reports
    """  # noqa: E501
    # If given a str, filter out anything that doesn't have that key
    if isinstance(key, str):
        history = self.filter(lambda report: key in report.metric_names)
        sort_key: Callable[[Trial.Report], Comparable] = lambda r: r.metrics[key]
        reverse = (
            reverse if reverse is not None else (not self.metrics[key].minimize)
        )
    else:
        history = self
        sort_key = key
        reverse = False if reverse is None else reverse

    return sorted(history.reports, key=sort_key, reverse=reverse)

`def from_df(df)`
`classmethod` #

Loads a history from a pandas DataFrame.

PARAMETER	DESCRIPTION
`df`	The DataFrame to load the history from. TYPE: `DataFrame`

RETURNS	DESCRIPTION
`History`	A History.

Source code in src/amltk/optimization/history.py

@classmethod
def from_df(cls, df: pd.DataFrame) -> History:
    """Loads a history from a pandas DataFrame.

    Args:
        df: The DataFrame to load the history from.

    Returns:
        A History.
    """
    if len(df) == 0:
        return cls()
    return History.from_reports(Trial.Report.from_df(s) for _, s in df.iterrows())

History

class History dataclass #

def from_reports(reports) classmethod #

def best(metric=None) #

def add(report) #

def find(name) #

def df(*, profiles=True, configs=True, summary=True, metrics=True, normalize_time=True) #

def filter(key) #

def groupby(key) #

def incumbents(key, *, sortby=lambda : report.time.end, reverse=None, ffill=False) #

def sortby(key, *, reverse=None) #

def from_df(df) classmethod #

`class History`
`dataclass` #

`def from_reports(reports)`
`classmethod` #

`def best(metric=None)` #

`def add(report)` #

`def find(name)` #

`def df(*, profiles=True, configs=True, summary=True, metrics=True, normalize_time=True)` #

`def filter(key)` #

`def groupby(key)` #

`def incumbents(key, *, sortby=lambda : report.time.end, reverse=None, ffill=False)` #

`def sortby(key, *, reverse=None)` #

`def from_df(df)`
`classmethod` #