Skip to content

Status

Functions to get the status of a run and save the status to CSV files.

Summary dataclass #

Summary(
    by_state: dict[State, list[Trial]],
    best: tuple[Trial, float] | None,
    is_multiobjective: bool,
)

Summary of the current state of a neps run.

num_errors property #

num_errors: int

Number of trials that have errored.

num_evaluated property #

num_evaluated: int

Number of trials that have been evaluated.

num_pending property #

num_pending: int

Number of trials that are pending.

completed #

completed() -> list[Trial]

Return all trials which are in a completed state.

Source code in neps\status\status.py
def completed(self) -> list[Trial]:
    """Return all trials which are in a completed state."""
    return list(
        itertools.chain(
            self.by_state[State.SUCCESS],
            self.by_state[State.FAILED],
            self.by_state[State.CRASHED],
        )
    )

df #

df() -> DataFrame

Convert the summary into a dataframe.

Source code in neps\status\status.py
def df(self) -> pd.DataFrame:
    """Convert the summary into a dataframe."""
    trials = sorted(
        itertools.chain(*self.by_state.values()),
        key=lambda t: t.metadata.time_sampled,
    )

    # Config dataframe, config columns prefixed with `config.`
    config_df = (
        pd.DataFrame.from_records([trial.config for trial in trials])
        .rename(columns=lambda name: f"config.{name}")
        .convert_dtypes()
    )

    # Report dataframe
    report_df = pd.DataFrame.from_records(
        [asdict(t.report) if t.report is not None else {} for t in trials]
    ).convert_dtypes()

    extra_df = pd.DataFrame()
    # We pop out the user extra column to flatten it
    if "extra" in report_df.columns:
        extra_column = report_df.pop("extra")
        extra_df = pd.json_normalize(extra_column).rename(  # type: ignore
            columns=lambda name: f"extra.{name}"
        )

    # Metadata dataframe
    metadata_df = pd.DataFrame.from_records(
        [asdict(t.metadata) for t in trials]
    ).convert_dtypes()

    return (
        pd.concat([config_df, extra_df, report_df, metadata_df], axis="columns")
        .set_index("id")
        .dropna(how="all", axis="columns")
    )

formatted #

formatted() -> str

Return a formatted string of the summary.

Source code in neps\status\status.py
def formatted(self) -> str:
    """Return a formatted string of the summary."""
    state_summary = "\n".join(
        f"    {state.name.lower()}: {len(trials)}"
        for state, trials in self.by_state.items()
        if len(trials) > 0
    )

    if self.best is None:
        if self.is_multiobjective:
            best_summary = "Multiobjective summary not supported yet for best yet."
        else:
            best_summary = "No best found yet."
    else:
        best_trial, best_objective_to_minimize = self.best
        best_summary = (
            f"# Best Found (config {best_trial.metadata.id}):"
            "\n"
            f"\n    objective_to_minimize: {best_objective_to_minimize}"
            f"\n    config: {best_trial.config}"
            f"\n    path: {best_trial.metadata.location}"
        )
        assert best_trial.report is not None
        if best_trial.report.cost is not None:
            best_summary += f"\n    cost: {best_trial.report.cost}"
        if len(best_trial.report.extra) > 0:
            best_summary += f"\n    extra: {best_trial.report.extra}"

    return f"# Configs: {self.num_evaluated}\n\n{state_summary}\n\n{best_summary}"

from_directory classmethod #

from_directory(root_directory: str | Path) -> Summary

Create a summary from a neps run directory.

Source code in neps\status\status.py
@classmethod
def from_directory(cls, root_directory: str | Path) -> Summary:
    """Create a summary from a neps run directory."""
    root_directory = Path(root_directory)

    is_multiobjective: bool = False
    best: tuple[Trial, float] | None = None
    by_state: dict[State, list[Trial]] = {s: [] for s in State}

    # NOTE: We don't lock the shared state since we are just reading and don't need to
    # make decisions based on the state
    try:
        shared_state = get_workers_neps_state()
    except RuntimeError:
        shared_state = NePSState.create_or_load(root_directory, load_only=True)

    trials = shared_state.lock_and_read_trials()

    for _trial_id, trial in trials.items():
        state = trial.metadata.state
        by_state[state].append(trial)

        if trial.report is not None:
            objective_to_minimize = trial.report.objective_to_minimize
            match objective_to_minimize:
                case None:
                    pass
                case float() | int() | np.number() if not is_multiobjective:
                    if best is None or objective_to_minimize < best[1]:
                        best = (trial, objective_to_minimize)
                case Sequence():
                    is_multiobjective = True
                    best = None
                case _:
                    raise RuntimeError("Unexpected type for objective_to_minimize")

    return cls(by_state=by_state, best=best, is_multiobjective=is_multiobjective)

post_run_csv #

post_run_csv(
    root_directory: str | Path,
) -> tuple[Path, Path]

Create CSV files summarizing the run data.

PARAMETER DESCRIPTION
root_directory

The root directory of the NePS run.

TYPE: str | Path

RETURNS DESCRIPTION
tuple[Path, Path]

The paths to the configuration data CSV and the run data CSV.

Source code in neps\status\status.py
def post_run_csv(root_directory: str | Path) -> tuple[Path, Path]:
    """Create CSV files summarizing the run data.

    Args:
        root_directory: The root directory of the NePS run.

    Returns:
        The paths to the configuration data CSV and the run data CSV.
    """
    full_df, short = status(root_directory, print_summary=False)
    full_df_path, short_path, csv_locker = _initiate_summary_csv(root_directory)

    with csv_locker.lock():
        full_df.to_csv(full_df_path)
        short.to_frame().to_csv(short_path)

    return full_df_path, short_path

status #

status(
    root_directory: str | Path,
    *,
    print_summary: bool = False
) -> tuple[DataFrame, Series]

Print status information of a neps run and return results.

PARAMETER DESCRIPTION
root_directory

The root directory given to neps.run.

TYPE: str | Path

print_summary

If true, print a summary of the current run state

TYPE: bool DEFAULT: False

RETURNS DESCRIPTION
tuple[DataFrame, Series]

Dataframe of full results and short summary series.

Source code in neps\status\status.py
def status(
    root_directory: str | Path,
    *,
    print_summary: bool = False,
) -> tuple[pd.DataFrame, pd.Series]:
    """Print status information of a neps run and return results.

    Args:
        root_directory: The root directory given to neps.run.
        print_summary: If true, print a summary of the current run state

    Returns:
        Dataframe of full results and short summary series.
    """
    root_directory = Path(root_directory)
    summary = Summary.from_directory(root_directory)

    if print_summary:
        print(summary.formatted())

    df = summary.df()

    if len(df) == 0:
        return df, pd.Series()

    short = (
        df.groupby("state")
        .size()
        .rename(lambda name: f"num_{name.replace('State.', '').lower()}")
    )
    short.name = "value"
    short.index.name = "summary"
    short.index = short.index.astype(str)
    assert isinstance(short, pd.Series)

    # Not implemented for hypervolume -_-
    if summary.is_multiobjective:
        return df, short

    idx_min = df["objective_to_minimize"].idxmin()
    row = df.loc[idx_min]
    assert isinstance(row, pd.Series)
    short["best_objective_to_minimize"] = row["objective_to_minimize"]
    short["best_config_id"] = row.name

    row = row.loc[row.index.str.startswith("config.")]
    row.index = row.index.str.replace("config.", "")  # type: ignore
    short = pd.concat([short, row])  # type: ignore
    assert isinstance(short, pd.Series)
    return df, short