from typing import Callable, Dict, List, Optional, Tuple, Union, cast
import inspect
import math
import time
import traceback
import numpy as np
import pynisher
from smac.configspace import Configuration
from smac.stats.stats import Stats
from smac.tae import StatusType
from smac.tae.serial_runner import SerialRunner
from smac.utils.constants import MAX_CUTOFF, MAXINT
from smac.utils.logging import PickableLoggerAdapter
__author__ = "Marius Lindauer, Matthias Feurer"
__copyright__ = "Copyright 2015, ML4AAD"
__license__ = "3-clause BSD"
__maintainer__ = "Marius Lindauer"
__email__ = "lindauer@cs.uni-freiburg.de"
__version__ = "0.0.2"
[docs]class AbstractTAFunc(SerialRunner):
"""Baseclass to execute target algorithms which are python functions.
**Note:*** Do not use directly
Parameters
----------
ta : callable
Function (target algorithm) to be optimized.
stats: Stats()
stats object to collect statistics about runtime and so on
multi_objectives: List[str]
names of the objectives, by default it is a single objective parameter "cost"
run_obj: str
run objective of SMAC
memory_limit : int, optional
Memory limit (in MB) that will be applied to the target algorithm.
par_factor: int
penalization factor
cost_for_crash : float
cost that is used in case of crashed runs (including runs
that returned NaN or inf)
use_pynisher: bool
use pynisher to limit resources;
if disabled
* TA func can use as many resources
as it wants (time and memory) --- use with caution
* all runs will be returned as SUCCESS if returned value is not None
Attributes
----------
memory_limit
use_pynisher
"""
def __init__(
self,
ta: Callable,
stats: Stats,
multi_objectives: List[str] = ["cost"],
run_obj: str = "quality",
memory_limit: Optional[int] = None,
par_factor: int = 1,
cost_for_crash: float = float(MAXINT),
abort_on_first_run_crash: bool = False,
use_pynisher: bool = True,
):
super().__init__(
ta=ta,
stats=stats,
multi_objectives=multi_objectives,
run_obj=run_obj,
par_factor=par_factor,
cost_for_crash=cost_for_crash,
abort_on_first_run_crash=abort_on_first_run_crash,
)
self.ta = ta
self.stats = stats
self.multi_objectives = multi_objectives
self.run_obj = run_obj
self.par_factor = par_factor
self.cost_for_crash = cost_for_crash
self.abort_on_first_run_crash = abort_on_first_run_crash
signature = inspect.signature(ta).parameters
self._accepts_seed = "seed" in signature.keys()
self._accepts_instance = "instance" in signature.keys()
self._accepts_budget = "budget" in signature.keys()
if not callable(ta):
raise TypeError("Argument `ta` must be a callable, but is %s" % type(ta))
self._ta = cast(Callable, ta)
if memory_limit is not None:
memory_limit = int(math.ceil(memory_limit))
self.memory_limit = memory_limit
self.use_pynisher = use_pynisher
self.logger = PickableLoggerAdapter(self.__module__ + "." + self.__class__.__name__)
[docs] def run(
self,
config: Configuration,
instance: Optional[str] = None,
cutoff: Optional[float] = None,
seed: int = 12345,
budget: Optional[float] = None,
instance_specific: str = "0",
) -> Tuple[StatusType, float, float, Dict]:
"""Runs target algorithm <self._ta> with configuration <config> for at most <cutoff>
seconds, allowing it to use at most <memory_limit> RAM.
Whether the target algorithm is called with the <instance> and
<seed> depends on the subclass implementing the actual call to
the target algorithm.
Parameters
----------
config : Configuration, dictionary (or similar)
Dictionary param -> value
instance : str, optional
Problem instance
cutoff : float, optional
Wallclock time limit of the target algorithm. If no value is
provided no limit will be enforced. It is casted to integer internally.
seed : int
Random seed
budget : float, optional
A positive, real-valued number representing an arbitrary limit to the target algorithm
Handled by the target algorithm internally
instance_specific: str
Instance specific information (e.g., domain file or solution)
Returns
-------
status: enum of StatusType (int)
{SUCCESS, TIMEOUT, CRASHED, ABORT}
cost: np.ndarray
cost/regret/quality/runtime (float) (None, if not returned by TA)
runtime: float
runtime (None if not returned by TA)
additional_info: dict
all further additional run information
"""
obj_kwargs = {} # type: Dict[str, Union[int, str, float, None]]
if self._accepts_seed:
obj_kwargs["seed"] = seed
if self._accepts_instance:
obj_kwargs["instance"] = instance
if self._accepts_budget:
obj_kwargs["budget"] = budget
cost = self.cost_for_crash # type: Union[float, List[float]]
if self.use_pynisher:
# walltime for pynisher has to be a rounded up integer
if cutoff is not None:
cutoff = int(math.ceil(cutoff))
if cutoff > MAX_CUTOFF:
raise ValueError(
"%d is outside the legal range of [0, 65535] "
"for cutoff (when using pynisher, due to OS limitations)" % cutoff
)
arguments = {
"logger": self.logger,
"wall_time_in_s": cutoff,
"mem_in_mb": self.memory_limit,
}
# call ta
try:
obj = pynisher.enforce_limits(**arguments)(self._ta)
rval = self._call_ta(obj, config, obj_kwargs)
except Exception as e:
cost = np.asarray(cost).squeeze().tolist()
exception_traceback = traceback.format_exc()
error_message = repr(e)
additional_info = {
"traceback": exception_traceback,
"error": error_message,
}
return StatusType.CRASHED, cost, 0.0, additional_info # type: ignore
if isinstance(rval, tuple):
result = rval[0]
additional_run_info = rval[1]
else:
result = rval
additional_run_info = {}
# get status, cost, time
if obj.exit_status is pynisher.TimeoutException:
status = StatusType.TIMEOUT
elif obj.exit_status is pynisher.MemorylimitException:
status = StatusType.MEMOUT
elif obj.exit_status == 0 and result is not None:
status = StatusType.SUCCESS
cost = result # type: ignore # noqa
else:
status = StatusType.CRASHED
runtime = float(obj.wall_clock_time)
else:
start_time = time.time()
# call ta
try:
rval = self._call_ta(self._ta, config, obj_kwargs)
if isinstance(rval, tuple):
result = rval[0]
additional_run_info = rval[1]
else:
result = rval
additional_run_info = {}
status = StatusType.SUCCESS
cost = result # type: ignore
except Exception as e:
self.logger.exception(e)
status = StatusType.CRASHED
additional_run_info = {}
runtime = time.time() - start_time
# Do some sanity checking (for multi objective)
if len(self.multi_objectives) > 1:
error = f"Returned costs {cost} does not match the number of objectives {len(self.multi_objectives)}."
# If dict convert to array
# Make sure the ordering is correct
if isinstance(cost, dict):
ordered_cost = []
for name in self.multi_objectives:
if name not in cost:
raise RuntimeError(f"Objective {name} was not found in the returned costs.")
ordered_cost.append(cost[name])
cost = ordered_cost
if isinstance(cost, list):
if len(cost) != len(self.multi_objectives):
raise RuntimeError(error)
if isinstance(cost, float):
raise RuntimeError(error)
if cost is None or status == StatusType.CRASHED:
status = StatusType.CRASHED
cost = self.cost_for_crash
cost = np.asarray(cost).squeeze().tolist()
return status, cost, runtime, additional_run_info # type: ignore
def _call_ta(
self,
obj: Callable,
config: Configuration,
obj_kwargs: Dict[str, Union[int, str, float, None]],
) -> Union[float, Tuple[float, Dict]]:
raise NotImplementedError()
[docs]class ExecuteTAFuncDict(AbstractTAFunc):
"""Evaluate function for given configuration and resource limit.
Passes the configuration as a dictionary to the target algorithm. The
target algorithm needs to implement one of the following signatures:
* ``target_algorithm(config: Configuration) -> Union[float, Tuple[float, Any]]``
* ``target_algorithm(config: Configuration, seed: int) -> Union[float, Tuple[float, Any]]``
* ``target_algorithm(config: Configuration, seed: int, instance: str) -> Union[float, Tuple[float, Any]]``
The target algorithm can either return a float (the loss), or a tuple
with the first element being a float and the second being additional run
information.
ExecuteTAFuncDict will use inspection to figure out the correct call to
the target algorithm.
Parameters
----------
ta : callable
Function (target algorithm) to be optimized.
stats : smac.stats.stats.Stats, optional
Stats object to collect statistics about runtime etc.
run_obj : str, optional
Run objective (runtime or quality)
memory_limit : int, optional
Memory limit (in MB) that will be applied to the target algorithm.
par_factor : int, optional
Penalized average runtime factor. Only used when `run_obj='runtime'`
use_pynisher: bool, optional
use pynisher to limit resources;
"""
def _call_ta(
self,
obj: Callable,
config: Configuration,
obj_kwargs: Dict[str, Union[int, str, float, None]],
) -> Union[float, Tuple[float, Dict]]:
return obj(config, **obj_kwargs)
[docs]class ExecuteTAFuncArray(AbstractTAFunc):
"""Evaluate function for given configuration and resource limit.
Passes the configuration as an array-like to the target algorithm. The
target algorithm needs to implement one of the following signatures:
* ``target_algorithm(config: np.ndarray) -> Union[float, Tuple[float, Any]]``
* ``target_algorithm(config: np.ndarray, seed: int) -> Union[float, Tuple[float, Any]]``
* ``target_algorithm(config: np.ndarray, seed: int, instance: str) -> Union[float, Tuple[float, Any]]``
The target algorithm can either return a float (the loss), or a tuple
with the first element being a float and the second being additional run
information.
ExecuteTAFuncDict will use inspection to figure out the correct call to
the target algorithm.
Parameters
----------
ta : callable
Function (target algorithm) to be optimized.
stats : smac.stats.stats.Stats, optional
Stats object to collect statistics about runtime etc.
run_obj: str, optional
Run objective (runtime or quality)
memory_limit : int, optional
Memory limit (in MB) that will be applied to the target algorithm.
par_factor: int, optional
Penalized average runtime factor. Only used when `run_obj='runtime'`
"""
def _call_ta(
self,
obj: Callable,
config: Configuration,
obj_kwargs: Dict[str, Union[int, str, float, None]],
) -> Union[float, Tuple[float, Dict]]:
x = np.array([val for _, val in sorted(config.get_dictionary().items())], dtype=float)
return obj(x, **obj_kwargs)