.. DO NOT EDIT.
.. THIS FILE WAS AUTOMATICALLY GENERATED BY SPHINX-GALLERY.
.. TO MAKE CHANGES, EDIT THE SOURCE PYTHON FILE:
.. "examples/20_basic/example_multilabel_classification.py"
.. LINE NUMBERS ARE GIVEN BELOW.

.. only:: html

    .. note::
        :class: sphx-glr-download-link-note

        Click :ref:`here <sphx_glr_download_examples_20_basic_example_multilabel_classification.py>`
        to download the full example code or to run this example in your browser via Binder

.. rst-class:: sphx-glr-example-title

.. _sphx_glr_examples_20_basic_example_multilabel_classification.py:


==========================
Multi-label Classification
==========================

This examples shows how to format the targets for a multilabel classification
problem. Details on multilabel classification can be found
`here <https://scikit-learn.org/stable/modules/multiclass.html>`_.

.. GENERATED FROM PYTHON SOURCE LINES 10-20

.. code-block:: default

    import numpy as np
    from pprint import pprint

    import sklearn.datasets
    import sklearn.metrics
    from sklearn.utils.multiclass import type_of_target

    import autosklearn.classification


.. GENERATED FROM PYTHON SOURCE LINES 21-23

Data Loading
============

.. GENERATED FROM PYTHON SOURCE LINES 23-44

.. code-block:: default


    # Using reuters multilabel dataset -- https://www.openml.org/d/40594
    X, y = sklearn.datasets.fetch_openml(data_id=40594, return_X_y=True, as_frame=False)

    # fetch openml downloads a numpy array with TRUE/FALSE strings. Re-map it to
    # integer dtype with ones and zeros
    # This is to comply with Scikit-learn requirement:
    # "Positive classes are indicated with 1 and negative classes with 0 or -1."
    # More information on: https://scikit-learn.org/stable/modules/multiclass.html
    y[y == "TRUE"] = 1
    y[y == "FALSE"] = 0
    y = y.astype(int)

    # Using type of target is a good way to make sure your data
    # is properly formatted
    print(f"type_of_target={type_of_target(y)}")

    X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(
        X, y, random_state=1
    )


.. rst-class:: sphx-glr-script-out

 .. code-block:: none

    type_of_target=multilabel-indicator


.. GENERATED FROM PYTHON SOURCE LINES 45-47

Building the classifier
=======================

.. GENERATED FROM PYTHON SOURCE LINES 47-58

.. code-block:: default


    automl = autosklearn.classification.AutoSklearnClassifier(
        time_left_for_this_task=60,
        per_run_time_limit=30,
        # Bellow two flags are provided to speed up calculations
        # Not recommended for a real implementation
        initial_configurations_via_metalearning=0,
        smac_scenario_args={"runcount_limit": 1},
    )
    automl.fit(X_train, y_train, dataset_name="reuters")


.. rst-class:: sphx-glr-script-out

 .. code-block:: none


    AutoSklearnClassifier(ensemble_class=<class 'autosklearn.ensembles.ensemble_selection.EnsembleSelection'>,
                          initial_configurations_via_metalearning=0,
                          per_run_time_limit=30,
                          smac_scenario_args={'runcount_limit': 1},
                          time_left_for_this_task=60)


.. GENERATED FROM PYTHON SOURCE LINES 59-61

View the models found by auto-sklearn
=====================================

.. GENERATED FROM PYTHON SOURCE LINES 61-65

.. code-block:: default


    print(automl.leaderboard())


.. rst-class:: sphx-glr-script-out

 .. code-block:: none

              rank  ensemble_weight           type      cost  duration
    model_id                                                          
    2            1              1.0  random_forest  0.447294  4.031171


.. GENERATED FROM PYTHON SOURCE LINES 66-68

Print the final ensemble constructed by auto-sklearn
====================================================

.. GENERATED FROM PYTHON SOURCE LINES 68-71

.. code-block:: default


    pprint(automl.show_models(), indent=4)


.. rst-class:: sphx-glr-script-out

 .. code-block:: none

    {   2: {   'balancing': Balancing(random_state=1),
               'classifier': <autosklearn.pipeline.components.classification.ClassifierChoice object at 0x7f05d248c2e0>,
               'cost': 0.4472941828699525,
               'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing.DataPreprocessorChoice object at 0x7f05d0f2e520>,
               'ensemble_weight': 1.0,
               'feature_preprocessor': <autosklearn.pipeline.components.feature_preprocessing.FeaturePreprocessorChoice object at 0x7f05d248c8b0>,
               'model_id': 2,
               'rank': 1,
               'sklearn_classifier': RandomForestClassifier(max_features=15, n_estimators=512, n_jobs=1,
                           random_state=1, warm_start=True)}}


.. GENERATED FROM PYTHON SOURCE LINES 72-74

Print statistics about the auto-sklearn run
===========================================

.. GENERATED FROM PYTHON SOURCE LINES 74-79

.. code-block:: default


    # Print statistics about the auto-sklearn run such as number of
    # iterations, number of models failed with a time out.
    print(automl.sprint_statistics())


.. rst-class:: sphx-glr-script-out

 .. code-block:: none

    auto-sklearn results:
      Dataset name: reuters
      Metric: f1_macro
      Best validation score: 0.552706
      Number of target algorithm runs: 1
      Number of successful target algorithm runs: 1
      Number of crashed target algorithm runs: 0
      Number of target algorithms that exceeded the time limit: 0
      Number of target algorithms that exceeded the memory limit: 0


.. GENERATED FROM PYTHON SOURCE LINES 80-82

Get the Score of the final ensemble
===================================

.. GENERATED FROM PYTHON SOURCE LINES 82-85

.. code-block:: default


    predictions = automl.predict(X_test)
    print("Accuracy score", sklearn.metrics.accuracy_score(y_test, predictions))


.. rst-class:: sphx-glr-script-out

 .. code-block:: none

    Accuracy score 0.604


.. rst-class:: sphx-glr-timing

   **Total running time of the script:** ( 0 minutes  24.585 seconds)


.. _sphx_glr_download_examples_20_basic_example_multilabel_classification.py:

.. only:: html

  .. container:: sphx-glr-footer sphx-glr-footer-example


    .. container:: binder-badge

      .. image:: images/binder_badge_logo.svg
        :target: https://mybinder.org/v2/gh/automl/auto-sklearn/master?urlpath=lab/tree/notebooks/examples/20_basic/example_multilabel_classification.ipynb
        :alt: Launch binder
        :width: 150 px

    .. container:: sphx-glr-download sphx-glr-download-python

      :download:`Download Python source code: example_multilabel_classification.py <example_multilabel_classification.py>`

    .. container:: sphx-glr-download sphx-glr-download-jupyter

      :download:`Download Jupyter notebook: example_multilabel_classification.ipynb <example_multilabel_classification.ipynb>`


.. only:: html

 .. rst-class:: sphx-glr-signature

    `Gallery generated by Sphinx-Gallery <https://sphinx-gallery.github.io>`_