Source code for smac.utils.io.input_reader
from typing import Any, Dict, List, Optional, Tuple
import numpy as np
from smac.configspace import ConfigurationSpace
from smac.configspace import json as pcs_json
from smac.configspace import pcs, pcs_new
from smac.utils.logging import PickableLoggerAdapter
__author__ = "Marius Lindauer"
__copyright__ = "Copyright 2015, ML4AAD"
__license__ = "3-clause BSD"
__maintainer__ = "Marius Lindauer"
__email__ = "lindauer@cs.uni-freiburg.de"
__version__ = "0.0.1"
INSTANCE_TYPE = List[List[str]]
INSTANCE_FEATURES_TYPE = Tuple[List[str], Dict[str, np.ndarray]]
[docs]class InputReader(object):
"""Reading all input files for SMAC (scenario file, instance files, ...)
Note: Most of this code was taken from the pysmac repository.
We copy it here because we don't want smac3 to depend
on an earlier version!
"""
def __init__(self) -> None:
pass
[docs] def read_scenario_file(self, fn: str) -> Dict[str, Any]:
"""Encapsulates read_scenario_file of pysmac
Parameters
----------
fn: string
File name of scenario file
Returns
-------
dict : dictionary
(key, value) pairs are (variable name, variable value)
"""
# translate the difference option names to a canonical name
# kept for backwards-compatibility
scenario_option_names = {
"algo-exec": "algo",
"algoExec": "algo",
"algo": "algo",
"algo-exec-dir": "execdir",
"exec-dir": "execdir",
"execDir": "execdir",
"execdir": "execdir",
"algo-deterministic": "deterministic",
"deterministic": "deterministic",
"paramFile": "paramfile",
"pcs-file": "paramfile",
"param-file": "paramfile",
"paramfile": "paramfile",
"run-obj": "run_obj",
"run-objective": "run_obj",
"runObj": "run_obj",
"run_obj": "run_obj",
"overall_obj": "overall_obj",
"intra-obj": "overall_obj",
"intra-instance-obj": "overall_obj",
"overall-obj": "overall_obj",
"intraInstanceObj": "overall_obj",
"overallObj": "overall_obj",
"intra_instance_obj": "overall_obj",
"cost-for-crash": "cost_for_crash",
"cost_for_crash": "cost_for_crash",
"algo-cutoff-time": "cutoff_time",
"target-run-cputime-limit": "cutoff_time",
"target_run_cputime_limit": "cutoff_time",
"cutoff-time": "cutoff_time",
"cutoffTime": "cutoff_time",
"cutoff_time": "cutoff_time",
"memory-limit": "memory_limit",
"memory_limit": "memory_limit",
"cputime-limit": "tuner_timeout",
"cputime_limit": "tuner_timeout",
"tunertime-limit": "tuner_timeout",
"tuner-timeout": "tuner_timeout",
"tunerTimeout": "tuner_timeout",
"tuner_timeout": "tuner_timeout",
"wallclock-limit": "wallclock_limit",
"runtime-limit": "wallclock_limit",
"runtimeLimit": "wallclock_limit",
"wallClockLimit": "wallclock_limit",
"wallclock_limit": "wallclock_limit",
"output-dir": "output_dir",
"outputDirectory": "output_dir",
"outdir": "output_dir",
"output_dir": "output_dir",
"instances": "instance_file",
"instance-file": "instance_file",
"instance-dir": "instance_file",
"instanceFile": "instance_file",
"instance_file": "instance_file",
"i": "instance_file",
"instance_seed_file": "instance_file",
"test-instances": "test_instance_file",
"test-instance-file": "test_instance_file",
"test-instance-dir": "test_instance_file",
"testInstanceFile": "test_instance_file",
"test_instance_file": "test_instance_file",
"test_instance_seed_file": "test_instance_file",
"feature-file": "feature_file",
"instanceFeatureFile": "feature_file",
"feature_file": "feature_file",
"runcount-limit": "runcount_limit",
"runcount_limit": "runcount_limit",
"totalNumRunsLimit": "runcount_limit",
"numRunsLimit": "runcount_limit",
"numberOfRunsLimit": "runcount_limit",
"initial-incumbent": "initial_incumbent",
"initial_incumbent": "initial_incumbent",
}
scenario_dict = {}
with open(fn, "r") as fh:
for line in fh:
line = line.replace("\n", "").strip(" ")
# remove comments
if line.find("#") > -1:
line = line[: line.find("#")]
# skip empty lines
if line == "":
continue
if "=" in line:
tmp = line.split("=")
tmp = [" ".join(s.split()) for s in tmp]
else:
tmp = line.split()
scenario_dict[scenario_option_names.get(tmp[0], tmp[0])] = " ".join(tmp[1:])
return scenario_dict
[docs] def read_instance_file(self, fn: str) -> INSTANCE_TYPE:
"""Encapsulates read_instances_file of pysmac
Parameters
----------
fn: string
File name of instance file
Returns
-------
instances: list
Each element is a list where the first element is the
instance name followed by additional
information for the specific instance.
"""
with open(fn, "r") as fh:
instance_names = fh.readlines()
return [s.strip().split() for s in instance_names]
[docs] def read_instance_features_file(
self,
fn: str,
) -> INSTANCE_FEATURES_TYPE:
"""Encapsulates read_instances_file of pysmac
Parameters
----------
fn: string
File name of instance feature file
Returns
-------
features: tuple
first entry is a list of the feature names,
second one is a dict with 'instance name' -
'numpy array containing the features' key-value pairs
"""
instances = {}
with open(fn, "r") as fh:
lines = fh.readlines()
for line in lines[1:]:
tmp = line.strip().split(",")
instances[tmp[0]] = np.array(tmp[1:], dtype=np.double)
return [f.strip() for f in lines[0].rstrip("\n").split(",")[1:]], instances # type: ignore
[docs] @staticmethod
def read_pcs_file(fn: str, logger: Optional[PickableLoggerAdapter] = None) -> ConfigurationSpace:
"""Encapsulates generating configuration space object from file.
Automatically detects whether the cs is saved in json, pcs or pcs_new.
Parameters
----------
fn: string
File name of pcs file
Returns
-------
ConfigSpace: ConfigSpace
"""
# Three possible formats: json, pcs and pcs_new. We prefer json.
with open(fn) as fp:
if fn.endswith(".json"):
cs = pcs_json.read(fp.read())
if logger:
logger.debug("Loading pcs as json from: %s", fn)
else:
pcs_str = fp.readlines()
try:
cs = pcs.read(pcs_str)
except NotImplementedError:
if logger:
logger.debug("Could not parse pcs file with old format; trying new format ...")
cs = pcs_new.read(pcs_str)
return cs