Source code for deepcave.runs.run

# Copyright 2021-2024 The DeepCAVE Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

#  noqa: D400
"""
# Run

This module provides utilities to create a new run and get its attributes.

## Classes
    - Run: Create a new run.
"""

from abc import ABC, abstractmethod
from typing import Any, Dict, List, Optional, Union

import json
from pathlib import Path

import ConfigSpace
import jsonlines
import numpy as np
from ConfigSpace.configuration_space import Configuration, ConfigurationSpace
from ConfigSpace.hyperparameters.hp_components import ROUND_PLACES

from deepcave.runs import AbstractRun, Status, Trial
from deepcave.runs.objective import Objective
from deepcave.utils.compression import Encoder
from deepcave.utils.files import make_dirs
from deepcave.utils.hash import string_to_hash
from deepcave.utils.util import config_to_tuple


[docs] class Run(AbstractRun, ABC): """ Create a new run and get its attributes. If path is given, runs are loaded from the path. Properties ---------- configspace : ConfigurationSpace The configuration space of the run. path : Optional[Union[str, Path]] The path of a run to be loaded. If path is not None, the given parameters are used. If path is None, the run is created from the given parameters. meta : Dict[str, Any] Contains serialized objectives and budgets. prefix : str The prefix for the id. meta_fn : Path The path to the meta data. configspace_fn : Path The path to the configuration space file. configs_fn : Path The path to the configurations file. origins_fn : Path The path to the origins file. history_fn : Path The path to the history file. models_dir : Path The path to the models directory. configs : Dict[int, Configuration] Containing the configurations. models : Dict[int, Optional[Union[str, "torch.nn.Module"]]] Contains the models. """ prefix = "run" _initial_order: int def __init__( self, name: str, configspace: Optional[ConfigSpace.ConfigurationSpace] = None, objectives: Optional[Union[Objective, List[Objective]]] = None, meta: Optional[Dict[str, Any]] = None, path: Optional[Path] = None, ) -> None: super(Run, self).__init__(name) if objectives is None: objectives = [] if meta is None: meta = {} # Reset and load configspace/path self.reset() if configspace is not None: self.configspace = configspace self.path = path if self.path is not None: self.load() return if configspace is None and path is None: raise RuntimeError( "Please provide a configspace or specify a path to load existing trials." ) # Objectives if not isinstance(objectives, List): objectives = [objectives] serialized_objectives = [] for objective in objectives: assert isinstance(objective, Objective) serialized_objectives += [objective.to_json()] # Meta self.meta = {"objectives": serialized_objectives, "budgets": [], "seeds": []} self.meta.update(meta)
[docs] @classmethod @abstractmethod def from_path(cls, path: Path) -> "Run": """ Based on a path, return a new Run object. Parameters ---------- path : Path The path to get the run from. Returns ------- "Run" The run loaded from the path. """ pass
@property def id(self) -> str: """ Get a hash as id. Returns ------- str The hashed id. """ return string_to_hash(f"{self.prefix}:{self.path}") @property def path(self) -> Optional[Path]: """ Return the path of the run if it exists. Returns ------- Optional[Path] The path of the run. """ return self._path @path.setter def path(self, value: Optional[Union[str, Path]]) -> None: """ Set the paths of the run and the JSON files. Parameters ---------- value : Optional[Union[str, Path]] The path for the directory. """ if value is None: self._path = None return self._path = Path(value) make_dirs(self._path) self.meta_fn = self._path / "meta.json" self.configspace_fn = self._path / "configspace.json" self.configs_fn = self._path / "configs.json" self.origins_fn = self._path / "origins.json" self.history_fn = self._path / "history.jsonl" self.models_dir = self._path / "models"
[docs] def exists(self) -> bool: """ Check if the run exists based on the internal path. Returns ------- bool If run exists. """ if self._path is None: return False return all( f.is_file() for f in ( self.meta_fn, self.configspace_fn, self.configs_fn, self.origins_fn, self.history_fn, ) )
[docs] def add( self, costs: Union[List[float], float], config: Union[Dict, Configuration], seed: int, budget: float = np.inf, start_time: float = 0.0, end_time: float = 0.0, status: Status = Status.SUCCESS, origin: Optional[str] = None, model: Union[str, "torch.nn.Module"] = None, # type: ignore # noqa: F821 additional: Optional[Dict] = None, ) -> None: """ Add a trial to the run. If combination of config, seed, and budget already exists, it will be overwritten. Not successful runs are added with `None` costs. Parameters ---------- costs : Union[List[float], float] Costs of the run. In case of multi-objective, a list of costs is expected. config : Union[Dict, Configuration] The corresponding configuration. seed : int Seed of the run. budget : float, optional Budget of the run. By default np.inf start_time : float, optional Start time. By default, 0.0 end_time : float, optional End time. By default, 0.0 status : Status, optional Status of the trial. By default, Status.SUCCESS origin : str, optional Origin of the trial. By default, None model : Union[str, "torch.nn.Module"], optional Model of the trial. By default, None additional : Optional[Dict], optional Additional information of the trial. By default, None. Following information is used by DeepCAVE: * traceback Raises ------ RuntimeError If number of costs does not match number of objectives. ValueError If config id is None. """ if additional is None: additional = {} if not isinstance(costs, list): costs = [costs] if len(costs) != len(self.get_objectives()): raise RuntimeError("Number of costs does not match number of objectives.") updated_objectives = [] for i in range(len(costs)): cost = costs[i] objective = self.get_objectives()[i] # Update time objective here if objective.name == "time" and cost is None: costs[i] = end_time - start_time cost = costs[i] # If cost is none, replace it later with the highest cost if cost is not None: # Update bounds here if not objective.lock_lower and objective.lower is not None: if cost < objective.lower: objective.lower = cost if not objective.lock_upper and objective.upper is not None: if cost > objective.upper: objective.upper = cost updated_objectives += [objective.to_json()] self.meta["objectives"] = updated_objectives if isinstance(config, Configuration): config = dict(config) if config not in self.configs.values(): config_id_len = len(self.configs) self.configs[config_id_len] = config self.origins[config_id_len] = origin # Use same rounding as ConfigSpace does self.config_id_mapping[config_to_tuple(config, ROUND_PLACES)] = config_id_len config_id = self.get_config_id(config) if config_id is None: raise ValueError("Config id is None.") trial = Trial( config_id=config_id, budget=budget, seed=seed, costs=costs, start_time=np.round(start_time, 2), end_time=np.round(end_time, 2), status=status, additional=additional, ) trial_key = trial.get_key() if trial_key not in self.trial_keys: self.trial_keys[trial_key] = len(self.history) self.history += [trial] else: # Overwrite self.history[self.trial_keys[trial_key]] = trial # Update budgets if budget not in self.meta["budgets"]: self.meta["budgets"].append(budget) self.meta["budgets"].sort() self._update_highest_budget(config_id, budget, status) # Update seeds if seed not in self.meta["seeds"]: self.meta["seeds"].append(seed) self.meta["seeds"].sort() # Update models # Problem: The model should not be in the cache. # Therefore, first the model is kept as it is, # but remove it from the dict and save it to the disk later on. if model is not None: self.models[config_id] = model
[docs] def save(self, path: Union[str, Path]) -> None: """ Save the run and its information. Parameters ---------- path : Optional[Union[str, Path]] The path in which to save the trials. Raises ------ RuntimeError If the path is not specified. """ if path is None: raise RuntimeError("Please specify a path to save the trials.") self.path = Path(path) # Save configspace self.configspace.to_json(self.configspace_fn) # Save meta data (could be changed) self.meta_fn.write_text(json.dumps(self.meta, cls=Encoder, indent=4)) self.configs_fn.write_text(json.dumps(self.configs, cls=Encoder, indent=4)) self.origins_fn.write_text(json.dumps(self.origins, cls=Encoder, indent=4)) # Save history with jsonlines.open(self.history_fn, mode="w") as f: for trial in self.history: f.write(trial.to_json()) # TODO: Update general cache file and tell him that self.path was used # to save the run. # Then, DeepCAVE can show direct suggestions in the select path dialog. # Models if len(self.models) > 0: # torch is imported here, because it is not wanted as requirement. import torch # Iterate over models and save them if they are a module. for config_id in list(self.models.keys()): filename = self.models_dir / f"{str(config_id)}.pth" if not filename.exists(): make_dirs(filename) model = self.models[config_id] if isinstance(model, torch.nn.Module): torch.save(model, filename) else: raise RuntimeError("Unknown model type.") # Remove from dict del self.models[config_id]
[docs] def load(self, path: Optional[Union[str, Path]] = None) -> None: """ Load the run. Parameters ---------- path : Optional[Union[str, Path]], optional The path where to load the run from. Default is None. Raises ------ RuntimeError If the path is None. If the trials were not found. """ self.reset() if path is None and self.path is None: raise RuntimeError("Could not load trials because path is None.") if path is not None: self.path = Path(path) if not self.exists(): raise RuntimeError("Could not load trials because trials were not found.") # Load meta data self.meta = json.loads(self.meta_fn.read_text()) # Load configspace self.configspace = ConfigurationSpace.from_json(self.configspace_fn) # Load configs configs = json.loads(self.configs_fn.read_text()) # Make sure all keys are integers self.configs = {int(k): v for k, v in configs.items()} # Load origins origins = json.loads(self.origins_fn.read_text()) self.origins = {int(k): v for k, v in origins.items()} # Make sure there is a config_id_mapping if not self.config_id_mapping: for config_id, config in self.configs.items(): if isinstance(config, Configuration): config = dict(config) self.config_id_mapping[config_to_tuple(config, ROUND_PLACES)] = config_id # Load history with jsonlines.open(self.history_fn) as f: self.history = [] for obj in f: # Create trial object here trial = Trial(*obj) self.history.append(trial) # Also create trial_keys self.trial_keys[trial.get_key()] = len(self.history) - 1 # Update highest budget self._update_highest_budget(trial.config_id, trial.budget, trial.status)