from typing import List, Tuple
from smac.configspace import ConfigurationSpace
from smac.runhistory.runhistory import DataOrigin, RunHistory
from smac.scenario.scenario import Scenario
__copyright__ = "Copyright 2021, AutoML.org Freiburg-Hannover"
__license__ = "3-clause BSD"
[docs]def merge_foreign_data_from_file(
scenario: Scenario,
runhistory: RunHistory,
in_scenario_fn_list: List[str],
in_runhistory_fn_list: List[str],
cs: ConfigurationSpace,
) -> Tuple[Scenario, RunHistory]:
"""Extend <scenario> and <runhistory> with runhistory data from another.
<in_scenario> assuming the same pcs, feature space, but different instances
Parameters
----------
scenario: Scenario
original scenario -- feature dictionary will be extended
runhistory: RunHistory
original runhistory -- will be extended by further data points
in_scenario_fn_list: List[str]
input scenario file names
in_runhistory_fn_list: List[str]
list filenames of runhistory dumps
cs: ConfigurationSpace
parameter configuration space to read runhistory from file
Returns
-------
scenario: Scenario
runhistory: Runhistory
"""
if not in_scenario_fn_list:
raise ValueError(
"To read warmstart data from previous runhistories,"
" the corresponding scenarios are required. Use option --warmstart_scenario"
)
scens = [Scenario(scenario=scen_fn, cmd_options={"output_dir": ""}) for scen_fn in in_scenario_fn_list]
rhs = []
for rh_fn in in_runhistory_fn_list:
rh = RunHistory()
rh.load_json(rh_fn, cs)
rhs.append(rh)
return merge_foreign_data(scenario, runhistory, in_scenario_list=scens, in_runhistory_list=rhs)
[docs]def merge_foreign_data(
scenario: Scenario,
runhistory: RunHistory,
in_scenario_list: List[Scenario],
in_runhistory_list: List[RunHistory],
) -> Tuple[Scenario, RunHistory]:
"""Extend <scenario> and <runhistory> with runhistory data from another.
<in_scenario> assuming the same pcs, feature space, but different instances
Parameters
----------
scenario: Scenario
original scenario -- feature dictionary will be extended
runhistory: RunHistory
original runhistory -- will be extended by further data points
in_scenario_list: List[Scenario]
input scenario
in_runhistory_list: List[RunHistory]
list of runhistories wrt <in_scenario>
Returns
-------
scenario: Scenario
runhistory: Runhistory
"""
# add further instance features
for in_scenario in in_scenario_list:
if scenario.n_features != in_scenario.n_features:
raise ValueError(
"Feature Space has to be the same for both scenarios (%d vs %d)."
% (scenario.n_features, in_scenario.n_features)
)
if scenario.cs != in_scenario.cs: # type: ignore[attr-defined] # noqa F821
raise ValueError("PCS of both scenarios have to be identical.")
if scenario.cutoff != in_scenario.cutoff: # type: ignore[attr-defined] # noqa F821
raise ValueError("Cutoffs of both scenarios have to be identical.")
scenario.feature_dict.update(in_scenario.feature_dict)
# extend runhistory
for rh in in_runhistory_list:
runhistory.update(rh, origin=DataOrigin.EXTERNAL_DIFFERENT_INSTANCES)
for date in runhistory.data:
if scenario.feature_dict.get(date.instance_id) is None:
raise ValueError('Instance feature for "%s" was not found in scenario data.' % (date.instance_id))
runhistory.compute_all_costs(instances=scenario.train_insts)
return scenario, runhistory