Note
Click here to download the full example code or to run this example in your browser via Binder
Tabular Classification with n parallel jobsΒΆ
The following example shows how to fit a sample classification model parallely on 2 cores with AutoPyTorch
[ERROR] [2022-08-23 17:15:49,771:asyncio.events]
Traceback (most recent call last):
File "/opt/hostedtoolcache/Python/3.8.13/x64/lib/python3.8/site-packages/distributed/utils.py", line 799, in wrapper
return await func(*args, **kwargs)
File "/opt/hostedtoolcache/Python/3.8.13/x64/lib/python3.8/site-packages/distributed/client.py", line 1246, in _reconnect
await self._ensure_connected(timeout=timeout)
File "/opt/hostedtoolcache/Python/3.8.13/x64/lib/python3.8/site-packages/distributed/client.py", line 1276, in _ensure_connected
comm = await connect(
File "/opt/hostedtoolcache/Python/3.8.13/x64/lib/python3.8/site-packages/distributed/comm/core.py", line 315, in connect
await asyncio.sleep(backoff)
File "/opt/hostedtoolcache/Python/3.8.13/x64/lib/python3.8/asyncio/tasks.py", line 659, in sleep
return await future
asyncio.exceptions.CancelledError
[ERROR] [2022-08-23 17:15:49,814:asyncio.events]
Traceback (most recent call last):
File "/opt/hostedtoolcache/Python/3.8.13/x64/lib/python3.8/site-packages/distributed/utils.py", line 799, in wrapper
return await func(*args, **kwargs)
File "/opt/hostedtoolcache/Python/3.8.13/x64/lib/python3.8/site-packages/distributed/client.py", line 1435, in _handle_report
await self._reconnect()
File "/opt/hostedtoolcache/Python/3.8.13/x64/lib/python3.8/site-packages/distributed/utils.py", line 799, in wrapper
return await func(*args, **kwargs)
File "/opt/hostedtoolcache/Python/3.8.13/x64/lib/python3.8/site-packages/distributed/client.py", line 1246, in _reconnect
await self._ensure_connected(timeout=timeout)
File "/opt/hostedtoolcache/Python/3.8.13/x64/lib/python3.8/site-packages/distributed/client.py", line 1276, in _ensure_connected
comm = await connect(
File "/opt/hostedtoolcache/Python/3.8.13/x64/lib/python3.8/site-packages/distributed/comm/core.py", line 315, in connect
await asyncio.sleep(backoff)
File "/opt/hostedtoolcache/Python/3.8.13/x64/lib/python3.8/asyncio/tasks.py", line 659, in sleep
return await future
asyncio.exceptions.CancelledError
{'accuracy': 0.8554913294797688}
autoPyTorch results:
Dataset name: 79a6d6a9-2306-11ed-884d-557eb8b24584
Optimisation Metric: accuracy
Best validation score: 0.8713450292397661
Number of target algorithm runs: 45
Number of successful target algorithm runs: 37
Number of crashed target algorithm runs: 7
Number of target algorithms that exceeded the time limit: 1
Number of target algorithms that exceeded the memory limit: 0
import os
import tempfile as tmp
import warnings
os.environ['JOBLIB_TEMP_FOLDER'] = tmp.gettempdir()
os.environ['OMP_NUM_THREADS'] = '1'
os.environ['OPENBLAS_NUM_THREADS'] = '1'
os.environ['MKL_NUM_THREADS'] = '1'
warnings.simplefilter(action='ignore', category=UserWarning)
warnings.simplefilter(action='ignore', category=FutureWarning)
import sklearn.datasets
import sklearn.model_selection
from autoPyTorch.api.tabular_classification import TabularClassificationTask
if __name__ == '__main__':
############################################################################
# Data Loading
# ============
X, y = sklearn.datasets.fetch_openml(data_id=40981, return_X_y=True, as_frame=True)
X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(
X,
y,
random_state=1,
)
############################################################################
# Build and fit a classifier
# ==========================
api = TabularClassificationTask(
n_jobs=2,
seed=42,
)
############################################################################
# Search for an ensemble of machine learning algorithms
# =====================================================
api.search(
X_train=X_train,
y_train=y_train,
X_test=X_test.copy(),
y_test=y_test.copy(),
optimize_metric='accuracy',
total_walltime_limit=300,
func_eval_time_limit_secs=50,
# Each one of the 2 jobs is allocated 3GB
memory_limit=3072,
)
############################################################################
# Print the final ensemble performance
# ====================================
y_pred = api.predict(X_test)
score = api.score(y_pred, y_test)
print(score)
# Print the final ensemble built by AutoPyTorch
print(api.sprint_statistics())
Total running time of the script: ( 5 minutes 37.985 seconds)