Source code for smac.utils.io.result_merging

from typing import Any, Dict, List, Optional, Sequence, Union

import json
import os
from pathlib import Path

import numpy as np
import regex as re
from ConfigSpace.read_and_write import json as csjson

from smac.runhistory.runhistory import RunHistory


[docs]def glob_re(pattern: str, strings: List[str]) -> filter: """ Filter strings according to pattern. Parameters ---------- pattern: str Regex pattern strings: List[str] List of strings to filter. Returns ------- filter[str] """ return filter(re.compile(pattern).match, strings)
[docs]def get_rundirs(pattern: str, path: Union[str, Path]) -> Sequence[str]: """ Get SMAC run dirs, often starting with `run_`. Parameters ---------- pattern: str Regex expresssion. path: Union[str, Path] Path to folder containing single SMAC rundirs Returns ------- Sequence[str] Single SMAC rundirs """ subdirs = list(glob_re(pattern, os.listdir(path))) rundirs = [os.path.join(path, sd) for sd in subdirs] return rundirs
class ResultMerger: def __init__( self, output_dir: Optional[Union[str, Path]] = None, rundir_pattern: str = r"run_*\d$", rundirs: Optional[List[Union[str, Path]]] = None, ): """ Merge runhistories from different SMAC runs. Parameters ---------- output_dir : Optional[Union[str, Path]] Output directory containing single SMAC run folders. The rundirs are inside and collected via the pattern `rundir_pattern`. rundir_pattern : str Regex expression to find single rundirs in `output_dir`. rundirs : Optional[List[Union[str, Path]]] Paths to all SMAC output folders. If not specified, please specify `output_dir`. """ self.output_dir = output_dir self.run_dirs: Sequence[Union[str, Path]] if rundirs: self.run_dirs = rundirs else: if self.output_dir is None: raise ValueError("Please provide either `rundirs` or `output_dir` with" " an optional pattern.") self.run_dirs = get_rundirs(pattern=rundir_pattern, path=self.output_dir) cs_fn = Path(self.run_dirs[0]) / "configspace.json" with open(cs_fn, "r") as fh: json_string = fh.read() self.configuration_space = csjson.read(json_string) def get_runhistory(self) -> RunHistory: """ Get runhistory For this, merge all runhistories in pSMAC subfolders. Returns ------- RunHistory Empty, if `self.run_dirs` is None. """ runhistory = RunHistory() if self.run_dirs: runhistory_filenames = [os.path.join(d, "runhistory.json") for d in self.run_dirs] for fn in runhistory_filenames: runhistory.update_from_json(fn=fn, cs=self.configuration_space) return runhistory def get_trajectory(self) -> Optional[List[Dict[str, Any]]]: """ Get trajectory For this, extract trajectory from merged runhistories. Return trajectories in json format. Returns ------- Optional[List[Dict[str, Any]] - None, if `self.run_dirs` is None. - List of trajectory entries. Each trajectory entry is a dict with keys ['cpu_time', 'wallclock_time', 'evaluations', 'cost', 'incumbent', 'budget', 'origin']. """ trajectory = None if self.run_dirs is None: return trajectory rh = self.get_runhistory() # Sort configurations chronologically by starttime rvals = rh.values() starttimes = np.array([rv.starttime for rv in rvals]) ids = np.argsort(starttimes) rhitems = list(rh.items()) rhitems = [rhitems[i] for i in ids] # Find incumbents # Incumbent = cost is lower than alltime cost trajectory = [] # Inject first trajectory entry from file from first rundir rundir = self.run_dirs[0] traj_fn = Path(rundir) / "traj.json" with open(traj_fn, "r") as file: line = file.readline() traj_entry = json.loads(line) trajectory.append(traj_entry) # Populate from merged runhistory cost = np.inf for i, (rk, rv) in enumerate(rhitems): if rv.cost < cost: cost = rv.cost # traj_entry = TrajEntry( # rv.cost, # train_perf # rk.config_id, # incumbent_id # rh.ids_config[rk.config_id], # incumbent # i + 1, # ta_runs # rv.time, # ta_time_used # rv.starttime, # wallclock_time # rk.budget, # budget # ) # TODO return traj_entry as TrajEntry and convert to json for write_trajectory incumbent = rh.ids_config[rk.config_id] traj_entry = { "cpu_time": rv.time, "wallclock_time": rv.starttime, "evaluations": i + 1, "cost": rv.cost, "incumbent": incumbent.get_dictionary(), "budget": rk.budget, "origin": incumbent.origin, } trajectory.append(traj_entry) return trajectory def write_trajectory(self) -> None: """ Write trajectory to traj.json Returns ------- None """ if self.output_dir is not None: traj_fn = Path(self.output_dir) / "traj.json" traj = self.get_trajectory() traj_fn.open("w") if traj is not None: for traj_entry in traj: with open(traj_fn, "a") as fp: # TODO: write or append? json.dump(traj_entry, fp) fp.write("\n") def write_runhistory(self) -> None: """ Write runhistory to runhistory.json Returns ------- None """ if self.output_dir is not None: rh_fn = Path(self.output_dir) / "runhistory.json" rh = self.get_runhistory() rh.save_json(fn=str(rh_fn), save_external=True)