Source code for autoPyTorch.pipeline.components.base_component

import importlib
import inspect
import pkgutil
import sys
import warnings
from collections import OrderedDict
from typing import Any, Dict, List, Optional, Union

from ConfigSpace.configuration_space import Configuration, ConfigurationSpace

import numpy as np

from sklearn.base import BaseEstimator
from sklearn.utils import check_random_state

from autoPyTorch.datasets.base_dataset import BaseDatasetPropertiesType
from autoPyTorch.utils.common import FitRequirement, HyperparameterSearchSpace
from autoPyTorch.utils.hyperparameter_search_space_update import HyperparameterSearchSpaceUpdate


def find_components(
    package: str,
    directory: str,
    base_class: BaseEstimator
) -> Dict[str, BaseEstimator]:
    """Utility to find component on a given directory,
    that inherit from base_class

    Args:
        package (str):
            The associated package that contains the components
        directory (str):
            The directory from which to extract the components
        base_class (BaseEstimator):
            base class to filter out desired components
            that don't inherit from this class
    """
    components = OrderedDict()

    for module_loader, module_name, ispkg in pkgutil.iter_modules([directory]):
        full_module_name = "%s.%s" % (package, module_name)
        if full_module_name not in sys.modules and not ispkg:
            module = importlib.import_module(full_module_name)

            for member_name, obj in inspect.getmembers(module):
                if inspect.isclass(obj) and issubclass(obj, base_class) and obj != base_class:
                    # TODO test if the obj implements the interface
                    # Keep in mind that this only instantiates the ensemble_wrapper,
                    # but not the real target classifier
                    classifier = obj
                    components[module_name] = classifier

    return components


class ThirdPartyComponents(object):
    """
    This class allow the user to create a new component for any stage of the pipeline.
    Inheriting from the base class of each component does not provide any checks,
    to make sure that the hyperparameter space is properly specified.

    This class ensures the minimum component checking for the configuration
    space to work.

    Args:
        base_class (BaseEstimator):
            Component type desired to be created
    """

    def __init__(self, base_class: BaseEstimator):
        self.base_class = base_class
        self.components: Dict[str, BaseEstimator] = OrderedDict()

    def add_component(self, obj: BaseEstimator) -> None:
        if inspect.isclass(obj) and self.base_class in obj.__bases__:
            name = obj.__name__
            classifier = obj
        else:
            raise TypeError('add_component works only with a subclass of %s' %
                            str(self.base_class))

        properties = set(classifier.get_properties())
        class_specific_properties = classifier.get_required_properties()
        # TODO: Add desired properties when we define them
        should_be_there = {'shortname', 'name'}
        if class_specific_properties is not None:
            should_be_there = should_be_there.union(class_specific_properties)
        for property in properties:
            if property not in should_be_there:
                raise ValueError('Property %s must not be specified for '
                                 'algorithm %s. Only the following properties '
                                 'can be specified: %s' %
                                 (property, name, str(should_be_there)))
        for property in should_be_there:
            if property not in properties:
                raise ValueError('Property %s not specified for algorithm %s' %
                                 (property, name))

        self.components[name] = classifier


[docs]class autoPyTorchComponent(BaseEstimator): """ Provides an abstract interface which can be used to create steps of a pipeline in AutoPyTorch. Args: random_state (Optional[np.random.RandomState]): Allows to produce reproducible results by setting a seed for randomized settings """ _required_properties: Optional[List[str]] = None def __init__(self, random_state: Optional[np.random.RandomState] = None) -> None: super().__init__() if random_state is None: self.random_state = check_random_state(1) else: self.random_state = check_random_state(random_state) self._fit_requirements: List[FitRequirement] = list() self._cs_updates: Dict[str, HyperparameterSearchSpaceUpdate] = dict()
[docs] @classmethod def get_required_properties(cls) -> Optional[List[str]]: """ Function to get the properties in the component that are required for the properly fitting the pipeline. Usually defined in the base class of the component Returns: List[str]: list of properties autopytorch component must have for proper functioning of the pipeline """ return cls._required_properties
[docs] def get_fit_requirements(self) -> Optional[List[FitRequirement]]: """ Function to get the required keys by the component that need to be in the fit dictionary Returns: List[FitRequirement]: a list containing required keys in a named tuple (name: str, type: object) """ return self._fit_requirements
def add_fit_requirements(self, requirements: List[FitRequirement]) -> None: self._fit_requirements.extend(requirements)
[docs] @staticmethod def get_properties(dataset_properties: Optional[Dict[str, BaseDatasetPropertiesType]] = None ) -> Dict[str, Union[str, bool]]: """Get the properties of the underlying algorithm. Args: dataset_properties (Optional[Dict[str, Union[str, int]]): Describes the dataset to work on Returns: Dict[str, Any]: Properties of the algorithm """ raise NotImplementedError()
[docs] @staticmethod def get_hyperparameter_search_space( dataset_properties: Optional[Dict[str, BaseDatasetPropertiesType]] = None ) -> ConfigurationSpace: """Return the configuration space of this classification algorithm. Args: dataset_properties (Optional[Dict[str, Union[str, int]]): Describes the dataset to work on Returns: ConfigurationSpace: The configuration space of this algorithm. """ raise NotImplementedError()
[docs] def fit(self, X: Dict[str, Any], y: Any = None) -> "autoPyTorchComponent": """The fit function calls the fit function of the underlying model and returns `self`. Args: X (Dict[str, Any]): Dictionary with fitted parameters. It is a message passing mechanism, in which during a transform, a components adds relevant information so that further stages can be properly fitted y (Any): Not Used -- to comply with API Returns: self: returns an instance of self. Notes: Please see the `scikit-learn API documentation <http://scikit-learn.org/dev/developers/index.html#apis-of-scikit -learn-objects>`_ for further information. """ raise NotImplementedError()
[docs] def set_hyperparameters(self, configuration: Configuration, init_params: Optional[Dict[str, Any]] = None ) -> BaseEstimator: """ Applies a configuration to the given component. This method translate a hierarchical configuration key, to an actual parameter of the autoPyTorch component. Args: configuration (Configuration): Which configuration to apply to the chosen component init_params (Optional[Dict[str, any]]): Optional arguments to initialize the chosen component Returns: An instance of self """ params = configuration.get_dictionary() for param, value in params.items(): if not hasattr(self, param): raise ValueError('Cannot set hyperparameter %s for %s because ' 'the hyperparameter does not exist.' % (param, str(self))) setattr(self, param, value) if init_params is not None: for param, value in init_params.items(): if not hasattr(self, param): raise ValueError('Cannot set init param %s for %s because ' 'the init param does not exist.' % (param, str(self))) setattr(self, param, value) return self
[docs] def check_requirements(self, X: Dict[str, Any], y: Any = None) -> None: """ A mechanism in code to ensure the correctness of the fit dictionary It recursively makes sure that the children and parent level requirements are honored before fit. Args: X (Dict[str, Any]): Dictionary with fitted parameters. It is a message passing mechanism, in which during a transform, a components adds relevant information so that further stages can be properly fitted """ assert isinstance(X, dict), "The input X to the pipeline must be a dictionary" if y is not None: warnings.warn("Provided y argument, yet only X is required") if 'dataset_properties' not in X: raise ValueError( "To fit a pipeline, expected fit dictionary to have a dataset_properties key") for requirement in self._fit_requirements: check_dict = X['dataset_properties'] if requirement.dataset_property else X if requirement.name not in check_dict.keys(): if requirement.name in ['X_train', 'backend']: if 'X_train' in check_dict.keys() or 'backend' in check_dict.keys(): continue else: raise ValueError( "To fit {}, expected fit dictionary to have '{}'" " but got \n {}".format( self.__class__.__name__, requirement.name, list(check_dict.keys()))) else: TYPE_SUPPORTED = isinstance(check_dict[requirement.name], tuple(requirement.supported_types)) if not TYPE_SUPPORTED: raise TypeError("Expected {} to be instance of {} got {}" .format(requirement.name, requirement.supported_types, type(check_dict[requirement.name])))
def __str__(self) -> str: """Representation of the current Component""" name = self.get_properties()['name'] return "autoPyTorch.pipeline %s" % name def _apply_search_space_update(self, hyperparameter_search_space_update: HyperparameterSearchSpaceUpdate) -> None: """Allows the user to update a hyperparameter Args: name (str): name of hyperparameter new_value_range (List[Union[int, str, float]]): value range can be either lower, upper or a list of possible candidates log (bool): Whether to use log scale """ self._cs_updates[hyperparameter_search_space_update.hyperparameter] = hyperparameter_search_space_update def _get_search_space_updates(self) -> Dict[str, HyperparameterSearchSpace]: """Get the search space updates Returns: _ (Dict[str, HyperparameterSearchSpace]): Mapping of search space updates. Keys don't contain the prefix. """ result: Dict[str, HyperparameterSearchSpace] = dict() # iterate over all search space updates of this node and keep the ones that have the given prefix for key in self._cs_updates.keys(): result[key] = self._cs_updates[key].get_search_space() return result