Optuna

Optuna is an automatic hyperparameter optimization software framework, particularly designed for machine learning.

Requirements

This requires Optuna which can be installed with:

pip install amltk[optuna]

# Or directly
pip install optuna

We provide a thin wrapper called OptunaOptimizer from which you can integrate Optuna into your workflow.

This uses an Optuna-like search_space() for its optimization.

Users should report results using trial.success() with either cost= or values= depending on any optimization directions given to the underyling optimizer created. Please see their documentation for more.

Visit their documentation for what you can pass to OptunaOptimizer.create(), which is forward to optun.create_study().

from __future__ import annotations

import logging

from sklearn.datasets import load_iris
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split

from amltk.optimization.optimizers.optuna import OptunaOptimizer
from amltk.scheduling import Scheduler
from amltk.optimization import History, Trial, Metric
from amltk.pipeline import Component

logging.basicConfig(level=logging.INFO)


def target_function(trial: Trial, pipeline: Pipeline) -> Trial.Report:
    X, y = load_iris(return_X_y=True)
    X_train, X_test, y_train, y_test = train_test_split(X, y)
    clf = pipeline.configure(trial.config).build("sklearn")

    with trial.begin():
        clf.fit(X_train, y_train)
        y_pred = clf.predict(X_test)
        return trial.success(accuracy=accuracy_score(y_test, y_pred))

    return trial.fail()

pipeline = Component(RandomForestClassifier, space={"n_estimators": (10, 100)})

accuracy_metric = Metric("accuracy", minimize=False, bounds=(0, 1))
optimizer = OptunaOptimizer.create(space=pipeline, metrics=accuracy_metric, bucket="optuna-doc-example")

N_WORKERS = 2
scheduler = Scheduler.with_processes(N_WORKERS)
task = scheduler.task(target_function)

history = History()

@scheduler.on_start(repeat=N_WORKERS)
def on_start():
    trial = optimizer.ask()
    task.submit(trial, pipeline)

@task.on_result
def tell_and_launch_trial(_, report: Trial.Report):
    if scheduler.running():
        optimizer.tell(report)
        trial = optimizer.ask()
        task.submit(trial, pipeline)


@task.on_result
def add_to_history(_, report: Trial.Report):
    history.add(report)

scheduler.run(timeout=3, wait=False)

print(history.df())

                  status  trial_seed  ... time:kind time:unit
name                                  ...                    
trial_number=0   success   923541539  ...      wall   seconds
trial_number=1   success   923541539  ...      wall   seconds
trial_number=2   success   923541539  ...      wall   seconds
trial_number=3   success   923541539  ...      wall   seconds
trial_number=4   success   923541539  ...      wall   seconds
...                  ...         ...  ...       ...       ...
trial_number=59  success   923541539  ...      wall   seconds
trial_number=60  success   923541539  ...      wall   seconds
trial_number=61  success   923541539  ...      wall   seconds
trial_number=62  success   923541539  ...      wall   seconds
trial_number=63  success   923541539  ...      wall   seconds

[64 rows x 19 columns]

Some more documentation

Sorry!

`class OptunaParser` #

Bases: Protocol

A protocol for Optuna search space parser.