.. DO NOT EDIT. .. THIS FILE WAS AUTOMATICALLY GENERATED BY SPHINX-GALLERY. .. TO MAKE CHANGES, EDIT THE SOURCE PYTHON FILE: .. "examples/80_extending/example_extending_preprocessor.py" .. LINE NUMBERS ARE GIVEN BELOW. .. only:: html .. note:: :class: sphx-glr-download-link-note Click :ref:`here ` to download the full example code or to run this example in your browser via Binder .. rst-class:: sphx-glr-example-title .. _sphx_glr_examples_80_extending_example_extending_preprocessor.py: ================================================== Extending Auto-Sklearn with Preprocessor Component ================================================== The following example demonstrates how to create a wrapper around the linear discriminant analysis (LDA) algorithm from sklearn and use it as a preprocessor in auto-sklearn. .. GENERATED FROM PYTHON SOURCE LINES 10-33 .. code-block:: default from typing import Optional from pprint import pprint from ConfigSpace.configuration_space import ConfigurationSpace from ConfigSpace.hyperparameters import ( UniformFloatHyperparameter, CategoricalHyperparameter, ) from ConfigSpace.conditions import InCondition import sklearn.metrics from autosklearn.askl_typing import FEAT_TYPE_TYPE import autosklearn.classification import autosklearn.pipeline.components.feature_preprocessing from autosklearn.pipeline.components.base import AutoSklearnPreprocessingAlgorithm from autosklearn.pipeline.constants import DENSE, SIGNED_DATA, UNSIGNED_DATA from autosklearn.util.common import check_none from sklearn.datasets import load_breast_cancer from sklearn.model_selection import train_test_split .. GENERATED FROM PYTHON SOURCE LINES 34-36 Create LDA component for auto-sklearn ===================================== .. GENERATED FROM PYTHON SOURCE LINES 36-104 .. code-block:: default class LDA(AutoSklearnPreprocessingAlgorithm): def __init__(self, solver, tol, shrinkage=None, random_state=None): self.solver = solver self.shrinkage = shrinkage self.tol = tol self.random_state = random_state self.preprocessor = None def fit(self, X, y=None): if check_none(self.shrinkage): self.shrinkage = None else: self.shrinkage = float(self.shrinkage) self.tol = float(self.tol) import sklearn.discriminant_analysis self.preprocessor = sklearn.discriminant_analysis.LinearDiscriminantAnalysis( shrinkage=self.shrinkage, solver=self.solver, tol=self.tol, ) self.preprocessor.fit(X, y) return self def transform(self, X): if self.preprocessor is None: raise NotImplementedError() return self.preprocessor.transform(X) @staticmethod def get_properties(dataset_properties=None): return { "shortname": "LDA", "name": "Linear Discriminant Analysis", "handles_regression": False, "handles_classification": True, "handles_multiclass": False, "handles_multilabel": False, "handles_multioutput": False, "is_deterministic": True, "input": (DENSE, UNSIGNED_DATA, SIGNED_DATA), "output": (DENSE, UNSIGNED_DATA, SIGNED_DATA), } @staticmethod def get_hyperparameter_search_space( feat_type: Optional[FEAT_TYPE_TYPE] = None, dataset_properties=None ): cs = ConfigurationSpace() solver = CategoricalHyperparameter( name="solver", choices=["svd", "lsqr", "eigen"], default_value="svd" ) shrinkage = UniformFloatHyperparameter( name="shrinkage", lower=0.0, upper=1.0, default_value=0.5 ) tol = UniformFloatHyperparameter( name="tol", lower=0.0001, upper=1, default_value=0.0001 ) cs.add_hyperparameters([solver, shrinkage, tol]) shrinkage_condition = InCondition(shrinkage, solver, ["lsqr", "eigen"]) cs.add_condition(shrinkage_condition) return cs # Add LDA component to auto-sklearn. autosklearn.pipeline.components.feature_preprocessing.add_preprocessor(LDA) .. GENERATED FROM PYTHON SOURCE LINES 105-107 Create dataset ============== .. GENERATED FROM PYTHON SOURCE LINES 107-111 .. code-block:: default X, y = load_breast_cancer(return_X_y=True) X_train, X_test, y_train, y_test = train_test_split(X, y) .. GENERATED FROM PYTHON SOURCE LINES 112-114 Configuration space =================== .. GENERATED FROM PYTHON SOURCE LINES 114-118 .. code-block:: default cs = LDA.get_hyperparameter_search_space() print(cs) .. rst-class:: sphx-glr-script-out .. code-block:: none Configuration space object: Hyperparameters: shrinkage, Type: UniformFloat, Range: [0.0, 1.0], Default: 0.5 solver, Type: Categorical, Choices: {svd, lsqr, eigen}, Default: svd tol, Type: UniformFloat, Range: [0.0001, 1.0], Default: 0.0001 Conditions: shrinkage | solver in {'lsqr', 'eigen'} .. GENERATED FROM PYTHON SOURCE LINES 119-121 Fit the model using LDA as preprocessor ======================================= .. GENERATED FROM PYTHON SOURCE LINES 121-132 .. code-block:: default clf = autosklearn.classification.AutoSklearnClassifier( time_left_for_this_task=30, include={"feature_preprocessor": ["LDA"]}, # Bellow two flags are provided to speed up calculations # Not recommended for a real implementation initial_configurations_via_metalearning=0, smac_scenario_args={"runcount_limit": 5}, ) clf.fit(X_train, y_train) .. rst-class:: sphx-glr-script-out .. code-block:: none AutoSklearnClassifier(ensemble_class=, include={'feature_preprocessor': ['LDA']}, initial_configurations_via_metalearning=0, per_run_time_limit=3, smac_scenario_args={'runcount_limit': 5}, time_left_for_this_task=30) .. GENERATED FROM PYTHON SOURCE LINES 133-135 Print prediction score and statistics ===================================== .. GENERATED FROM PYTHON SOURCE LINES 135-139 .. code-block:: default y_pred = clf.predict(X_test) print("accuracy: ", sklearn.metrics.accuracy_score(y_pred, y_test)) pprint(clf.show_models(), indent=4) .. rst-class:: sphx-glr-script-out .. code-block:: none accuracy: 0.9440559440559441 { 2: { 'balancing': Balancing(random_state=1), 'classifier': , 'cost': 0.049645390070921946, 'data_preprocessor': , 'ensemble_weight': 0.28, 'feature_preprocessor': , 'model_id': 2, 'rank': 1, 'sklearn_classifier': RandomForestClassifier(max_features=1, n_estimators=512, n_jobs=1, random_state=1, warm_start=True)}, 4: { 'balancing': Balancing(random_state=1, strategy='weighting'), 'classifier': , 'cost': 0.049645390070921946, 'data_preprocessor': , 'ensemble_weight': 0.32, 'feature_preprocessor': , 'model_id': 4, 'rank': 2, 'sklearn_classifier': BernoulliNB(alpha=23.955206843654683, fit_prior=False)}, 5: { 'balancing': Balancing(random_state=1, strategy='weighting'), 'classifier': , 'cost': 0.12056737588652477, 'data_preprocessor': , 'ensemble_weight': 0.4, 'feature_preprocessor': , 'model_id': 5, 'rank': 3, 'sklearn_classifier': AdaBoostClassifier(algorithm='SAMME', base_estimator=DecisionTreeClassifier(max_depth=7), learning_rate=1.371412136822841, n_estimators=135, random_state=1)}} .. rst-class:: sphx-glr-timing **Total running time of the script:** ( 0 minutes 20.324 seconds) .. _sphx_glr_download_examples_80_extending_example_extending_preprocessor.py: .. only:: html .. container:: sphx-glr-footer sphx-glr-footer-example .. container:: binder-badge .. image:: images/binder_badge_logo.svg :target: https://mybinder.org/v2/gh/automl/auto-sklearn/master?urlpath=lab/tree/notebooks/examples/80_extending/example_extending_preprocessor.ipynb :alt: Launch binder :width: 150 px .. container:: sphx-glr-download sphx-glr-download-python :download:`Download Python source code: example_extending_preprocessor.py ` .. container:: sphx-glr-download sphx-glr-download-jupyter :download:`Download Jupyter notebook: example_extending_preprocessor.ipynb ` .. only:: html .. rst-class:: sphx-glr-signature `Gallery generated by Sphinx-Gallery `_