Tabular Classification with n parallel jobsΒΆ

The following example shows how to fit a sample classification model parallely on 2 cores with AutoPyTorch

[ERROR] [2022-08-23 15:08:15,342:asyncio.events]
Traceback (most recent call last):
  File "/opt/hostedtoolcache/Python/3.8.13/x64/lib/python3.8/site-packages/distributed/utils.py", line 799, in wrapper
    return await func(*args, **kwargs)
  File "/opt/hostedtoolcache/Python/3.8.13/x64/lib/python3.8/site-packages/distributed/client.py", line 1246, in _reconnect
    await self._ensure_connected(timeout=timeout)
  File "/opt/hostedtoolcache/Python/3.8.13/x64/lib/python3.8/site-packages/distributed/client.py", line 1276, in _ensure_connected
    comm = await connect(
  File "/opt/hostedtoolcache/Python/3.8.13/x64/lib/python3.8/site-packages/distributed/comm/core.py", line 315, in connect
    await asyncio.sleep(backoff)
  File "/opt/hostedtoolcache/Python/3.8.13/x64/lib/python3.8/asyncio/tasks.py", line 659, in sleep
    return await future
asyncio.exceptions.CancelledError
[ERROR] [2022-08-23 15:08:15,360:asyncio.events]
Traceback (most recent call last):
  File "/opt/hostedtoolcache/Python/3.8.13/x64/lib/python3.8/site-packages/distributed/utils.py", line 799, in wrapper
    return await func(*args, **kwargs)
  File "/opt/hostedtoolcache/Python/3.8.13/x64/lib/python3.8/site-packages/distributed/client.py", line 1435, in _handle_report
    await self._reconnect()
  File "/opt/hostedtoolcache/Python/3.8.13/x64/lib/python3.8/site-packages/distributed/utils.py", line 799, in wrapper
    return await func(*args, **kwargs)
  File "/opt/hostedtoolcache/Python/3.8.13/x64/lib/python3.8/site-packages/distributed/client.py", line 1246, in _reconnect
    await self._ensure_connected(timeout=timeout)
  File "/opt/hostedtoolcache/Python/3.8.13/x64/lib/python3.8/site-packages/distributed/client.py", line 1276, in _ensure_connected
    comm = await connect(
  File "/opt/hostedtoolcache/Python/3.8.13/x64/lib/python3.8/site-packages/distributed/comm/core.py", line 315, in connect
    await asyncio.sleep(backoff)
  File "/opt/hostedtoolcache/Python/3.8.13/x64/lib/python3.8/asyncio/tasks.py", line 659, in sleep
    return await future
asyncio.exceptions.CancelledError
{'accuracy': 0.8728323699421965}
autoPyTorch results:
        Dataset name: a93dcf4e-22f4-11ed-8835-b1fa420cf160
        Optimisation Metric: accuracy
        Best validation score: 0.8713450292397661
        Number of target algorithm runs: 42
        Number of successful target algorithm runs: 32
        Number of crashed target algorithm runs: 7
        Number of target algorithms that exceeded the time limit: 3
        Number of target algorithms that exceeded the memory limit: 0

import os
import tempfile as tmp
import warnings

os.environ['JOBLIB_TEMP_FOLDER'] = tmp.gettempdir()
os.environ['OMP_NUM_THREADS'] = '1'
os.environ['OPENBLAS_NUM_THREADS'] = '1'
os.environ['MKL_NUM_THREADS'] = '1'

warnings.simplefilter(action='ignore', category=UserWarning)
warnings.simplefilter(action='ignore', category=FutureWarning)

import sklearn.datasets
import sklearn.model_selection

from autoPyTorch.api.tabular_classification import TabularClassificationTask

if __name__ == '__main__':
    ############################################################################
    # Data Loading
    # ============
    X, y = sklearn.datasets.fetch_openml(data_id=40981, return_X_y=True, as_frame=True)
    X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(
        X,
        y,
        random_state=1,
    )

    ############################################################################
    # Build and fit a classifier
    # ==========================
    api = TabularClassificationTask(
        n_jobs=2,
        seed=42,
    )

    ############################################################################
    # Search for an ensemble of machine learning algorithms
    # =====================================================
    api.search(
        X_train=X_train,
        y_train=y_train,
        X_test=X_test.copy(),
        y_test=y_test.copy(),
        optimize_metric='accuracy',
        total_walltime_limit=300,
        func_eval_time_limit_secs=50,
        # Each one of the 2 jobs is allocated 3GB
        memory_limit=3072,
    )

    ############################################################################
    # Print the final ensemble performance
    # ====================================
    y_pred = api.predict(X_test)
    score = api.score(y_pred, y_test)
    print(score)
    # Print the final ensemble built by AutoPyTorch
    print(api.sprint_statistics())

Total running time of the script: ( 5 minutes 30.701 seconds)

Gallery generated by Sphinx-Gallery