.. DO NOT EDIT.
.. THIS FILE WAS AUTOMATICALLY GENERATED BY SPHINX-GALLERY.
.. TO MAKE CHANGES, EDIT THE SOURCE PYTHON FILE:
.. "examples/80_extending/example_extending_preprocessor.py"
.. LINE NUMBERS ARE GIVEN BELOW.

.. only:: html

    .. note::
        :class: sphx-glr-download-link-note

        Click :ref:`here <sphx_glr_download_examples_80_extending_example_extending_preprocessor.py>`
        to download the full example code or to run this example in your browser via Binder

.. rst-class:: sphx-glr-example-title

.. _sphx_glr_examples_80_extending_example_extending_preprocessor.py:


==================================================
Extending Auto-Sklearn with Preprocessor Component
==================================================

The following example demonstrates how to create a wrapper around the linear
discriminant analysis (LDA) algorithm from sklearn and use it as a preprocessor
in auto-sklearn.

.. GENERATED FROM PYTHON SOURCE LINES 10-33

.. code-block:: default

    from typing import Optional
    from pprint import pprint

    from ConfigSpace.configuration_space import ConfigurationSpace
    from ConfigSpace.hyperparameters import (
        UniformFloatHyperparameter,
        CategoricalHyperparameter,
    )
    from ConfigSpace.conditions import InCondition

    import sklearn.metrics

    from autosklearn.askl_typing import FEAT_TYPE_TYPE
    import autosklearn.classification
    import autosklearn.pipeline.components.feature_preprocessing
    from autosklearn.pipeline.components.base import AutoSklearnPreprocessingAlgorithm
    from autosklearn.pipeline.constants import DENSE, SIGNED_DATA, UNSIGNED_DATA
    from autosklearn.util.common import check_none

    from sklearn.datasets import load_breast_cancer
    from sklearn.model_selection import train_test_split


.. GENERATED FROM PYTHON SOURCE LINES 34-36

Create LDA component for auto-sklearn
=====================================

.. GENERATED FROM PYTHON SOURCE LINES 36-104

.. code-block:: default

    class LDA(AutoSklearnPreprocessingAlgorithm):
        def __init__(self, solver, tol, shrinkage=None, random_state=None):
            self.solver = solver
            self.shrinkage = shrinkage
            self.tol = tol
            self.random_state = random_state
            self.preprocessor = None

        def fit(self, X, y=None):
            if check_none(self.shrinkage):
                self.shrinkage = None
            else:
                self.shrinkage = float(self.shrinkage)
            self.tol = float(self.tol)

            import sklearn.discriminant_analysis

            self.preprocessor = sklearn.discriminant_analysis.LinearDiscriminantAnalysis(
                shrinkage=self.shrinkage,
                solver=self.solver,
                tol=self.tol,
            )
            self.preprocessor.fit(X, y)
            return self

        def transform(self, X):
            if self.preprocessor is None:
                raise NotImplementedError()
            return self.preprocessor.transform(X)

        @staticmethod
        def get_properties(dataset_properties=None):
            return {
                "shortname": "LDA",
                "name": "Linear Discriminant Analysis",
                "handles_regression": False,
                "handles_classification": True,
                "handles_multiclass": False,
                "handles_multilabel": False,
                "handles_multioutput": False,
                "is_deterministic": True,
                "input": (DENSE, UNSIGNED_DATA, SIGNED_DATA),
                "output": (DENSE, UNSIGNED_DATA, SIGNED_DATA),
            }

        @staticmethod
        def get_hyperparameter_search_space(
            feat_type: Optional[FEAT_TYPE_TYPE] = None, dataset_properties=None
        ):
            cs = ConfigurationSpace()
            solver = CategoricalHyperparameter(
                name="solver", choices=["svd", "lsqr", "eigen"], default_value="svd"
            )
            shrinkage = UniformFloatHyperparameter(
                name="shrinkage", lower=0.0, upper=1.0, default_value=0.5
            )
            tol = UniformFloatHyperparameter(
                name="tol", lower=0.0001, upper=1, default_value=0.0001
            )
            cs.add_hyperparameters([solver, shrinkage, tol])
            shrinkage_condition = InCondition(shrinkage, solver, ["lsqr", "eigen"])
            cs.add_condition(shrinkage_condition)
            return cs


    # Add LDA component to auto-sklearn.
    autosklearn.pipeline.components.feature_preprocessing.add_preprocessor(LDA)


.. GENERATED FROM PYTHON SOURCE LINES 105-107

Create dataset
==============

.. GENERATED FROM PYTHON SOURCE LINES 107-111

.. code-block:: default


    X, y = load_breast_cancer(return_X_y=True)
    X_train, X_test, y_train, y_test = train_test_split(X, y)


.. GENERATED FROM PYTHON SOURCE LINES 112-114

Configuration space
===================

.. GENERATED FROM PYTHON SOURCE LINES 114-118

.. code-block:: default


    cs = LDA.get_hyperparameter_search_space()
    print(cs)


.. rst-class:: sphx-glr-script-out

 .. code-block:: none

    Configuration space object:
      Hyperparameters:
        shrinkage, Type: UniformFloat, Range: [0.0, 1.0], Default: 0.5
        solver, Type: Categorical, Choices: {svd, lsqr, eigen}, Default: svd
        tol, Type: UniformFloat, Range: [0.0001, 1.0], Default: 0.0001
      Conditions:
        shrinkage | solver in {'lsqr', 'eigen'}


.. GENERATED FROM PYTHON SOURCE LINES 119-121

Fit the model using LDA as preprocessor
=======================================

.. GENERATED FROM PYTHON SOURCE LINES 121-132

.. code-block:: default


    clf = autosklearn.classification.AutoSklearnClassifier(
        time_left_for_this_task=30,
        include={"feature_preprocessor": ["LDA"]},
        # Bellow two flags are provided to speed up calculations
        # Not recommended for a real implementation
        initial_configurations_via_metalearning=0,
        smac_scenario_args={"runcount_limit": 5},
    )
    clf.fit(X_train, y_train)


.. rst-class:: sphx-glr-script-out

 .. code-block:: none


    AutoSklearnClassifier(ensemble_class=<class 'autosklearn.ensembles.ensemble_selection.EnsembleSelection'>,
                          include={'feature_preprocessor': ['LDA']},
                          initial_configurations_via_metalearning=0,
                          per_run_time_limit=3,
                          smac_scenario_args={'runcount_limit': 5},
                          time_left_for_this_task=30)


.. GENERATED FROM PYTHON SOURCE LINES 133-135

Print prediction score and statistics
=====================================

.. GENERATED FROM PYTHON SOURCE LINES 135-139

.. code-block:: default


    y_pred = clf.predict(X_test)
    print("accuracy: ", sklearn.metrics.accuracy_score(y_pred, y_test))
    pprint(clf.show_models(), indent=4)


.. rst-class:: sphx-glr-script-out

 .. code-block:: none

    accuracy:  0.9440559440559441
    {   2: {   'balancing': Balancing(random_state=1),
               'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05ce64b280>,
               'cost': 0.049645390070921946,
               'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d45bd250>,
               'ensemble_weight': 0.28,
               'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05d237ec40>,
               'model_id': 2,
               'rank': 1,
               'sklearn_classifier': RandomForestClassifier(max_features=1, n_estimators=512, n_jobs=1,
                           random_state=1, warm_start=True)},
        4: {   'balancing': Balancing(random_state=1, strategy='weighting'),
               'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05d3ff3fd0>,
               'cost': 0.049645390070921946,
               'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05ccb9f4c0>,
               'ensemble_weight': 0.32,
               'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05d3ff35b0>,
               'model_id': 4,
               'rank': 2,
               'sklearn_classifier': BernoulliNB(alpha=23.955206843654683, fit_prior=False)},
        5: {   'balancing': Balancing(random_state=1, strategy='weighting'),
               'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05d1ad46d0>,
               'cost': 0.12056737588652477,
               'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05cc27dc40>,
               'ensemble_weight': 0.4,
               'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05d0fc0d60>,
               'model_id': 5,
               'rank': 3,
               'sklearn_classifier': AdaBoostClassifier(algorithm='SAMME',
                       base_estimator=DecisionTreeClassifier(max_depth=7),
                       learning_rate=1.371412136822841, n_estimators=135,
                       random_state=1)}}


.. rst-class:: sphx-glr-timing

   **Total running time of the script:** ( 0 minutes  20.324 seconds)


.. _sphx_glr_download_examples_80_extending_example_extending_preprocessor.py:

.. only:: html

  .. container:: sphx-glr-footer sphx-glr-footer-example


    .. container:: binder-badge

      .. image:: images/binder_badge_logo.svg
        :target: https://mybinder.org/v2/gh/automl/auto-sklearn/master?urlpath=lab/tree/notebooks/examples/80_extending/example_extending_preprocessor.ipynb
        :alt: Launch binder
        :width: 150 px

    .. container:: sphx-glr-download sphx-glr-download-python

      :download:`Download Python source code: example_extending_preprocessor.py <example_extending_preprocessor.py>`

    .. container:: sphx-glr-download sphx-glr-download-jupyter

      :download:`Download Jupyter notebook: example_extending_preprocessor.ipynb <example_extending_preprocessor.ipynb>`


.. only:: html

 .. rst-class:: sphx-glr-signature

    `Gallery generated by Sphinx-Gallery <https://sphinx-gallery.github.io>`_