Source code for smac.stats.stats

from typing import Optional

import json
import logging
import os
import time

import numpy as np

from smac.scenario.scenario import Scenario

__author__ = "Marius Lindauer"
__copyright__ = "Copyright 2016, ML4AAD"
__license__ = "3-clause BSD"
__maintainer__ = "Marius Lindauer"
__email__ = "lindauer@cs.uni-freiburg.de"
__version__ = "0.0.1"


[docs]class Stats(object): """All statistics collected during configuration run. Written to output- directory to be restored. Parameters ---------- scenario : Scenario Attributes ---------- submitted_ta_runs finished_ta_runs n_configs wallclock_time_used ta_time_used inc_changed """ def __init__(self, scenario: Scenario): self.__scenario = scenario self.submitted_ta_runs = 0 self.finished_ta_runs = 0 self.n_configs = 0 self.wallclock_time_used = 0.0 self.ta_time_used = 0.0 self.inc_changed = 0 # debug stats self._n_configs_per_intensify = 0 self._n_calls_of_intensify = 0 # exponential moving average self._ema_n_configs_per_intensifiy = 0.0 self._EMA_ALPHA = 0.2 self._start_time = np.NaN self._logger = logging.getLogger(self.__module__ + "." + self.__class__.__name__)
[docs] def save(self) -> None: """Save all relevant attributes to json-dictionary.""" if not self.__scenario.output_dir_for_this_run: self._logger.debug("No scenario.output_dir: not saving stats!") return # Set used_wallclock_time self.wallclock_time_used = self.get_used_wallclock_time() data = {} for v in vars(self): if v not in ["_Stats__scenario", "_logger", "_start_time"]: data[v] = getattr(self, v) path = os.path.join(self.__scenario.output_dir_for_this_run, "stats.json") self._logger.debug("Saving stats to %s", path) with open(path, "w") as fh: json.dump(data, fh)
[docs] def load(self, fn: Optional[str] = None) -> None: """Load all attributes from dictionary in file into stats-object. Parameters ---------- fn: string or None Path to file to load stats from. If no path is given, the path given in the current scenario is used. """ if not fn: assert self.__scenario.output_dir_for_this_run is not None # please mypy fn = os.path.join(self.__scenario.output_dir_for_this_run, "stats.json") with open(fn, "r") as fh: data = json.load(fh) # Set attributes for key in data: if hasattr(self, key): setattr(self, key, data[key]) else: raise ValueError("Stats does not recognize {}".format(key))
[docs] def start_timing(self) -> None: """Starts the timer (for the runtime configuration budget). Substracting wallclock time used so we can continue loaded Stats. """ if self.__scenario: self._start_time = time.time() - self.wallclock_time_used else: raise ValueError("Scenario is missing")
[docs] def get_used_wallclock_time(self) -> float: """Returns used wallclock time. Returns ------- wallclock_time : int used wallclock time in sec """ return time.time() - self._start_time
[docs] def get_remaing_time_budget(self) -> float: """Subtracts the runtime configuration budget with the used wallclock time.""" if self.__scenario: return self.__scenario.wallclock_limit - (time.time() - self._start_time) else: raise ValueError("Scenario is missing")
[docs] def get_remaining_ta_runs(self) -> int: """Subtract the target algorithm runs in the scenario with the used ta runs.""" if self.__scenario: return self.__scenario.ta_run_limit - self.submitted_ta_runs # type: ignore[attr-defined] # noqa F821 else: raise ValueError("Scenario is missing")
[docs] def get_remaining_ta_budget(self) -> float: """Subtracts the ta running budget with the used time.""" if self.__scenario: return self.__scenario.algo_runs_timelimit - self.ta_time_used else: raise ValueError("Scenario is missing")
[docs] def is_budget_exhausted(self) -> bool: """Check whether the configuration budget for time budget, ta_budget and submitted_ta_runs is exhausted. Returns ------- exhaustedness: boolean true if one of the budgets is exhausted. """ return ( self.get_remaing_time_budget() < 0 or self.get_remaining_ta_budget() < 0 ) or self.get_remaining_ta_runs() <= 0
[docs] def update_average_configs_per_intensify(self, n_configs: int) -> None: """Updates statistics how many configurations on average per used in intensify. Parameters ---------- n_configs: int number of configurations in current intensify """ self._n_calls_of_intensify += 1 self._n_configs_per_intensify += n_configs if self._n_calls_of_intensify == 1: self._ema_n_configs_per_intensifiy = float(n_configs) else: self._ema_n_configs_per_intensifiy = ( 1 - self._EMA_ALPHA ) * self._ema_n_configs_per_intensifiy + self._EMA_ALPHA * n_configs
[docs] def print_stats(self, debug_out: bool = False) -> None: """Prints all statistics. Parameters ---------- debug: bool use logging.debug instead of logging.info if set to true """ log_func = self._logger.info if debug_out: log_func = self._logger.debug log_func("---------------------STATISTICS---------------------") log_func("Incumbent changed: %d" % (self.inc_changed - 1)) # first change is default conf log_func( "Submitted target algorithm runs: %d / %s" % (self.submitted_ta_runs, str(self.__scenario.ta_run_limit)) # type: ignore[attr-defined] # noqa F821 ) log_func( "Finished target algorithm runs: %d / %s" % (self.finished_ta_runs, str(self.__scenario.ta_run_limit)) # type: ignore[attr-defined] # noqa F821 ) log_func("Configurations: %d" % (self.n_configs)) log_func( "Used wallclock time: %.2f / %.2f sec " % (time.time() - self._start_time, self.__scenario.wallclock_limit) ) log_func( "Used target algorithm runtime: %.2f / %.2f sec" % (self.ta_time_used, self.__scenario.algo_runs_timelimit) ) self._logger.debug("Debug Statistics:") if self._n_calls_of_intensify > 0: self._logger.debug( "Average Configurations per Intensify: %.2f" % (self._n_configs_per_intensify / self._n_calls_of_intensify) ) self._logger.debug( "Exponential Moving Average of Configurations per Intensify: %.2f" % (self._ema_n_configs_per_intensifiy) ) log_func("----------------------------------------------------")