Optuna
Optuna is an automatic hyperparameter optimization software framework, particularly designed for machine learning.
Requirements
This requires Optuna which can be installed with:
We provide a thin wrapper called
OptunaOptimizer from which
you can integrate Optuna into your workflow.
This uses an Optuna-like search_space() for
its optimization.
Users should report results using
trial.success()
with either cost= or values= depending on any optimization directions
given to the underyling optimizer created. Please see their documentation
for more.
Visit their documentation for what you can pass to
OptunaOptimizer.create(),
which is forward to optun.create_study().
from __future__ import annotations
import logging
from sklearn.datasets import load_iris
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from amltk.optimization.optimizers.optuna import OptunaOptimizer
from amltk.scheduling import Scheduler
from amltk.optimization import History, Trial, Metric
from amltk.pipeline import Component
logging.basicConfig(level=logging.INFO)
def target_function(trial: Trial, pipeline: Pipeline) -> Trial.Report:
X, y = load_iris(return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(X, y)
clf = pipeline.configure(trial.config).build("sklearn")
with trial.begin():
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)
return trial.success(accuracy=accuracy_score(y_test, y_pred))
return trial.fail()
accuracy_metric = Metric("accuracy", minimize=False, bounds=(0, 1))
pipeline = Component(RandomForestClassifier, space={"n_estimators": (10, 100)})
optimizer = OptunaOptimizer.create(space=pipeline, metrics=accuracy_metric)
N_WORKERS = 2
scheduler = Scheduler.with_processes(N_WORKERS)
task = scheduler.task(target_function)
history = History()
@scheduler.on_start(repeat=N_WORKERS)
def on_start():
trial = optimizer.ask()
task.submit(trial, pipeline)
@task.on_result
def tell_and_launch_trial(_, report: Trial.Report):
if scheduler.running():
optimizer.tell(report)
trial = optimizer.ask()
task.submit(trial, pipeline)
@task.on_result
def add_to_history(_, report: Trial.Report):
history.add(report)
scheduler.run(timeout=3, wait=False)
print(history.df())
status trial_seed ... time:kind time:unit
name ...
trial_number=0 success 635955917 ... wall seconds
trial_number=1 success 635955917 ... wall seconds
trial_number=3 success 635955917 ... wall seconds
trial_number=2 success 635955917 ... wall seconds
trial_number=5 success 635955917 ... wall seconds
... ... ... ... ... ...
trial_number=74 success 635955917 ... wall seconds
trial_number=75 success 635955917 ... wall seconds
trial_number=76 success 635955917 ... wall seconds
trial_number=77 success 635955917 ... wall seconds
trial_number=78 success 635955917 ... wall seconds
[79 rows x 19 columns]
Some more documentation
Sorry!
class OptunaParser
#
class OptunaOptimizer(*, study, metrics, bucket=None, seed=None, space)
#
An optimizer that uses Optuna to optimize a search space.
| PARAMETER | DESCRIPTION |
|---|---|
study |
The Optuna Study to use.
TYPE:
|
metrics |
The metrics to optimize. |
bucket |
The bucket given to trials generated by this optimizer.
TYPE:
|
space |
Defines the current search space.
TYPE:
|
seed |
The seed to use for the sampler and trials.
TYPE:
|
Source code in src/amltk/optimization/optimizers/optuna.py
def create(*, space, metrics, bucket=None, sampler=None, seed=None, **kwargs)
classmethod
#
Create a new Optuna optimizer. For more information, check Optuna documentation here.
| PARAMETER | DESCRIPTION |
|---|---|
space |
Defines the current search space.
TYPE:
|
metrics |
The metrics to optimize. |
bucket |
The bucket given to trials generated by this optimizer.
TYPE:
|
sampler |
The sampler to use. Default is to use:
TYPE:
|
seed |
The seed to use for the sampler and trials.
TYPE:
|
**kwargs |
Additional arguments to pass to
TYPE:
|
| RETURNS | DESCRIPTION |
|---|---|
Self
|
The newly created optimizer.
TYPE:
|
Source code in src/amltk/optimization/optimizers/optuna.py
def ask()
#
Ask the optimizer for a new config.
| RETURNS | DESCRIPTION |
|---|---|
Trial[Trial]
|
The trial info for the new config. |
Source code in src/amltk/optimization/optimizers/optuna.py
def tell(report)
#
Tell the optimizer the result of the sampled config.
| PARAMETER | DESCRIPTION |
|---|---|
report |
The report of the trial. |