Source code for smac.tae.execute_func

from typing import Callable, Dict, List, Optional, Tuple, Union, cast

import inspect
import math
import time
import traceback

import numpy as np
import pynisher

from smac.configspace import Configuration
from smac.stats.stats import Stats
from smac.tae import StatusType
from smac.tae.serial_runner import SerialRunner
from smac.utils.constants import MAX_CUTOFF, MAXINT
from smac.utils.logging import PickableLoggerAdapter

__author__ = "Marius Lindauer, Matthias Feurer"
__copyright__ = "Copyright 2015, ML4AAD"
__license__ = "3-clause BSD"
__maintainer__ = "Marius Lindauer"
__email__ = "lindauer@cs.uni-freiburg.de"
__version__ = "0.0.2"


[docs]class AbstractTAFunc(SerialRunner):
    """Baseclass to execute target algorithms which are python functions.

    **Note:*** Do not use directly

    Parameters
    ----------
    ta : callable
        Function (target algorithm) to be optimized.
    stats: Stats()
         stats object to collect statistics about runtime and so on
    multi_objectives: List[str]
        names of the objectives, by default it is a single objective parameter "cost"
    run_obj: str
        run objective of SMAC
    memory_limit : int, optional
        Memory limit (in MB) that will be applied to the target algorithm.
    par_factor: int
        penalization factor
    cost_for_crash : float
        cost that is used in case of crashed runs (including runs
        that returned NaN or inf)
    use_pynisher: bool
        use pynisher to limit resources;
        if disabled
          * TA func can use as many resources
          as it wants (time and memory) --- use with caution
          * all runs will be returned as SUCCESS if returned value is not None

    Attributes
    ----------
    memory_limit
    use_pynisher
    """

    def __init__(
        self,
        ta: Callable,
        stats: Stats,
        multi_objectives: List[str] = ["cost"],
        run_obj: str = "quality",
        memory_limit: Optional[int] = None,
        par_factor: int = 1,
        cost_for_crash: float = float(MAXINT),
        abort_on_first_run_crash: bool = False,
        use_pynisher: bool = True,
    ):

        super().__init__(
            ta=ta,
            stats=stats,
            multi_objectives=multi_objectives,
            run_obj=run_obj,
            par_factor=par_factor,
            cost_for_crash=cost_for_crash,
            abort_on_first_run_crash=abort_on_first_run_crash,
        )
        self.ta = ta
        self.stats = stats
        self.multi_objectives = multi_objectives
        self.run_obj = run_obj

        self.par_factor = par_factor
        self.cost_for_crash = cost_for_crash
        self.abort_on_first_run_crash = abort_on_first_run_crash

        signature = inspect.signature(ta).parameters
        self._accepts_seed = "seed" in signature.keys()
        self._accepts_instance = "instance" in signature.keys()
        self._accepts_budget = "budget" in signature.keys()
        if not callable(ta):
            raise TypeError("Argument `ta` must be a callable, but is %s" % type(ta))
        self._ta = cast(Callable, ta)

        if memory_limit is not None:
            memory_limit = int(math.ceil(memory_limit))
        self.memory_limit = memory_limit

        self.use_pynisher = use_pynisher

        self.logger = PickableLoggerAdapter(self.__module__ + "." + self.__class__.__name__)

[docs]    def run(
        self,
        config: Configuration,
        instance: Optional[str] = None,
        cutoff: Optional[float] = None,
        seed: int = 12345,
        budget: Optional[float] = None,
        instance_specific: str = "0",
    ) -> Tuple[StatusType, float, float, Dict]:
        """Runs target algorithm <self._ta> with configuration <config> for at most <cutoff>
        seconds, allowing it to use at most <memory_limit> RAM.

        Whether the target algorithm is called with the <instance> and
        <seed> depends on the subclass implementing the actual call to
        the target algorithm.

        Parameters
        ----------
            config : Configuration, dictionary (or similar)
                Dictionary param -> value
            instance : str, optional
                Problem instance
            cutoff : float, optional
                Wallclock time limit of the target algorithm. If no value is
                provided no limit will be enforced. It is casted to integer internally.
            seed : int
                Random seed
            budget : float, optional
                A positive, real-valued number representing an arbitrary limit to the target algorithm
                Handled by the target algorithm internally
            instance_specific: str
                Instance specific information (e.g., domain file or solution)

        Returns
        -------
            status: enum of StatusType (int)
                {SUCCESS, TIMEOUT, CRASHED, ABORT}
            cost: np.ndarray
                cost/regret/quality/runtime (float) (None, if not returned by TA)
            runtime: float
                runtime (None if not returned by TA)
            additional_info: dict
                all further additional run information
        """
        obj_kwargs = {}  # type: Dict[str, Union[int, str, float, None]]
        if self._accepts_seed:
            obj_kwargs["seed"] = seed
        if self._accepts_instance:
            obj_kwargs["instance"] = instance
        if self._accepts_budget:
            obj_kwargs["budget"] = budget

        cost = self.cost_for_crash  # type: Union[float, List[float]]

        if self.use_pynisher:
            # walltime for pynisher has to be a rounded up integer
            if cutoff is not None:
                cutoff = int(math.ceil(cutoff))
                if cutoff > MAX_CUTOFF:
                    raise ValueError(
                        "%d is outside the legal range of [0, 65535] "
                        "for cutoff (when using pynisher, due to OS limitations)" % cutoff
                    )

            arguments = {
                "logger": self.logger,
                "wall_time_in_s": cutoff,
                "mem_in_mb": self.memory_limit,
            }

            # call ta
            try:
                obj = pynisher.enforce_limits(**arguments)(self._ta)
                rval = self._call_ta(obj, config, obj_kwargs)
            except Exception as e:
                cost = np.asarray(cost).squeeze().tolist()
                exception_traceback = traceback.format_exc()
                error_message = repr(e)
                additional_info = {
                    "traceback": exception_traceback,
                    "error": error_message,
                }

                return StatusType.CRASHED, cost, 0.0, additional_info  # type: ignore

            if isinstance(rval, tuple):
                result = rval[0]
                additional_run_info = rval[1]
            else:
                result = rval
                additional_run_info = {}

            # get status, cost, time
            if obj.exit_status is pynisher.TimeoutException:
                status = StatusType.TIMEOUT
            elif obj.exit_status is pynisher.MemorylimitException:
                status = StatusType.MEMOUT
            elif obj.exit_status == 0 and result is not None:
                status = StatusType.SUCCESS
                cost = result  # type: ignore # noqa
            else:
                status = StatusType.CRASHED

            runtime = float(obj.wall_clock_time)
        else:
            start_time = time.time()

            # call ta
            try:
                rval = self._call_ta(self._ta, config, obj_kwargs)

                if isinstance(rval, tuple):
                    result = rval[0]
                    additional_run_info = rval[1]
                else:
                    result = rval
                    additional_run_info = {}

                status = StatusType.SUCCESS
                cost = result  # type: ignore
            except Exception as e:
                self.logger.exception(e)
                status = StatusType.CRASHED
                additional_run_info = {}

            runtime = time.time() - start_time

        # Do some sanity checking (for multi objective)
        if len(self.multi_objectives) > 1:
            error = f"Returned costs {cost} does not match the number of objectives {len(self.multi_objectives)}."

            # If dict convert to array
            # Make sure the ordering is correct
            if isinstance(cost, dict):
                ordered_cost = []
                for name in self.multi_objectives:
                    if name not in cost:
                        raise RuntimeError(f"Objective {name} was not found in the returned costs.")

                    ordered_cost.append(cost[name])
                cost = ordered_cost

            if isinstance(cost, list):
                if len(cost) != len(self.multi_objectives):
                    raise RuntimeError(error)

            if isinstance(cost, float):
                raise RuntimeError(error)

        if cost is None or status == StatusType.CRASHED:
            status = StatusType.CRASHED
            cost = self.cost_for_crash

        cost = np.asarray(cost).squeeze().tolist()

        return status, cost, runtime, additional_run_info  # type: ignore

    def _call_ta(
        self,
        obj: Callable,
        config: Configuration,
        obj_kwargs: Dict[str, Union[int, str, float, None]],
    ) -> Union[float, Tuple[float, Dict]]:
        raise NotImplementedError()


[docs]class ExecuteTAFuncDict(AbstractTAFunc):
    """Evaluate function for given configuration and resource limit.

    Passes the configuration as a dictionary to the target algorithm. The
    target algorithm needs to implement one of the following signatures:

    * ``target_algorithm(config: Configuration) -> Union[float, Tuple[float, Any]]``
    * ``target_algorithm(config: Configuration, seed: int) -> Union[float, Tuple[float, Any]]``
    * ``target_algorithm(config: Configuration, seed: int, instance: str) -> Union[float, Tuple[float, Any]]``

    The target algorithm can either return a float (the loss), or a tuple
    with the first element being a float and the second being additional run
    information.

    ExecuteTAFuncDict will use inspection to figure out the correct call to
    the target algorithm.

    Parameters
    ----------
    ta : callable
        Function (target algorithm) to be optimized.
    stats : smac.stats.stats.Stats, optional
        Stats object to collect statistics about runtime etc.
    run_obj : str, optional
        Run objective (runtime or quality)
    memory_limit : int, optional
        Memory limit (in MB) that will be applied to the target algorithm.
    par_factor : int, optional
        Penalized average runtime factor. Only used when `run_obj='runtime'`
    use_pynisher: bool, optional
        use pynisher to limit resources;
    """

    def _call_ta(
        self,
        obj: Callable,
        config: Configuration,
        obj_kwargs: Dict[str, Union[int, str, float, None]],
    ) -> Union[float, Tuple[float, Dict]]:

        return obj(config, **obj_kwargs)


[docs]class ExecuteTAFuncArray(AbstractTAFunc):
    """Evaluate function for given configuration and resource limit.

    Passes the configuration as an array-like to the target algorithm. The
    target algorithm needs to implement one of the following signatures:

    * ``target_algorithm(config: np.ndarray) -> Union[float, Tuple[float, Any]]``
    * ``target_algorithm(config: np.ndarray, seed: int) -> Union[float, Tuple[float, Any]]``
    * ``target_algorithm(config: np.ndarray, seed: int, instance: str) -> Union[float, Tuple[float, Any]]``

    The target algorithm can either return a float (the loss), or a tuple
    with the first element being a float and the second being additional run
    information.

    ExecuteTAFuncDict will use inspection to figure out the correct call to
    the target algorithm.

    Parameters
    ----------
    ta : callable
        Function (target algorithm) to be optimized.
    stats : smac.stats.stats.Stats, optional
        Stats object to collect statistics about runtime etc.
    run_obj: str, optional
        Run objective (runtime or quality)
    memory_limit : int, optional
        Memory limit (in MB) that will be applied to the target algorithm.
    par_factor: int, optional
        Penalized average runtime factor. Only used when `run_obj='runtime'`
    """

    def _call_ta(
        self,
        obj: Callable,
        config: Configuration,
        obj_kwargs: Dict[str, Union[int, str, float, None]],
    ) -> Union[float, Tuple[float, Dict]]:

        x = np.array([val for _, val in sorted(config.get_dictionary().items())], dtype=float)
        return obj(x, **obj_kwargs)
SMAC3 Documentation

Source code for smac.tae.execute_func