Optuna
Optuna is an automatic hyperparameter optimization software framework, particularly designed for machine learning.
Requirements
This requires Optuna
which can be installed with:
We provide a thin wrapper called
OptunaOptimizer
from which
you can integrate Optuna
into your workflow.
This uses an Optuna-like search_space()
for
its optimization.
Users should report results using
trial.success()
with either cost=
or values=
depending on any optimization directions
given to the underyling optimizer created. Please see their documentation
for more.
Visit their documentation for what you can pass to
OptunaOptimizer.create()
,
which is forward to optun.create_study()
.
from __future__ import annotations
import logging
from sklearn.datasets import load_iris
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from amltk.optimization.optimizers.optuna import OptunaOptimizer
from amltk.scheduling import Scheduler
from amltk.optimization import History, Trial, Metric
from amltk.pipeline import Component
logging.basicConfig(level=logging.INFO)
def target_function(trial: Trial, pipeline: Pipeline) -> Trial.Report:
X, y = load_iris(return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(X, y)
clf = pipeline.configure(trial.config).build("sklearn")
with trial.begin():
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)
return trial.success(accuracy=accuracy_score(y_test, y_pred))
return trial.fail()
pipeline = Component(RandomForestClassifier, space={"n_estimators": (10, 100)})
accuracy_metric = Metric("accuracy", minimize=False, bounds=(0, 1))
optimizer = OptunaOptimizer.create(space=pipeline, metrics=accuracy_metric, bucket="optuna-doc-example")
N_WORKERS = 2
scheduler = Scheduler.with_processes(N_WORKERS)
task = scheduler.task(target_function)
history = History()
@scheduler.on_start(repeat=N_WORKERS)
def on_start():
trial = optimizer.ask()
task.submit(trial, pipeline)
@task.on_result
def tell_and_launch_trial(_, report: Trial.Report):
if scheduler.running():
optimizer.tell(report)
trial = optimizer.ask()
task.submit(trial, pipeline)
@task.on_result
def add_to_history(_, report: Trial.Report):
history.add(report)
scheduler.run(timeout=3, wait=False)
print(history.df())
status trial_seed ... time:kind time:unit
name ...
trial_number=0 success 923541539 ... wall seconds
trial_number=1 success 923541539 ... wall seconds
trial_number=2 success 923541539 ... wall seconds
trial_number=3 success 923541539 ... wall seconds
trial_number=4 success 923541539 ... wall seconds
... ... ... ... ... ...
trial_number=59 success 923541539 ... wall seconds
trial_number=60 success 923541539 ... wall seconds
trial_number=61 success 923541539 ... wall seconds
trial_number=62 success 923541539 ... wall seconds
trial_number=63 success 923541539 ... wall seconds
[64 rows x 19 columns]
Some more documentation
Sorry!
class OptunaParser
#
class OptunaOptimizer(*, study, metrics, bucket=None, seed=None, space)
#
An optimizer that uses Optuna to optimize a search space.
PARAMETER | DESCRIPTION |
---|---|
study |
The Optuna Study to use.
TYPE:
|
metrics |
The metrics to optimize. |
bucket |
The bucket given to trials generated by this optimizer.
TYPE:
|
space |
Defines the current search space.
TYPE:
|
seed |
The seed to use for the sampler and trials.
TYPE:
|
Source code in src/amltk/optimization/optimizers/optuna.py
def create(*, space, metrics, bucket=None, sampler=None, seed=None, **kwargs)
classmethod
#
Create a new Optuna optimizer. For more information, check Optuna documentation here.
PARAMETER | DESCRIPTION |
---|---|
space |
Defines the current search space.
TYPE:
|
metrics |
The metrics to optimize. |
bucket |
The bucket given to trials generated by this optimizer.
TYPE:
|
sampler |
The sampler to use. Default is to use:
TYPE:
|
seed |
The seed to use for the sampler and trials.
TYPE:
|
**kwargs |
Additional arguments to pass to
TYPE:
|
RETURNS | DESCRIPTION |
---|---|
Self
|
The newly created optimizer.
TYPE:
|
Source code in src/amltk/optimization/optimizers/optuna.py
def ask()
#
Ask the optimizer for a new config.
RETURNS | DESCRIPTION |
---|---|
Trial[Trial]
|
The trial info for the new config. |
Source code in src/amltk/optimization/optimizers/optuna.py
def tell(report)
#
Tell the optimizer the result of the sampled config.
PARAMETER | DESCRIPTION |
---|---|
report |
The report of the trial. |