Source code for deepcave.utils.run_caches

# Copyright 2021-2024 The DeepCAVE Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

#  noqa: D400
"""
# RunCaches

This module defines a class for holding the caches for selected runs.

Utilities provided include updating, getting, setting and clearing.

## Classes
    - RunCaches: Hold the caches for the selected runs.
"""

from typing import Any, Dict, Optional

import shutil

from deepcave.config import Config
from deepcave.runs import AbstractRun
from deepcave.utils.cache import Cache
from deepcave.utils.logs import get_logger


[docs] class RunCaches: """ Hold the caches for the selected runs. The caches are used for the plugins to store the raw outputs so that raw outputs must not be calculated again. Each input has its own cache. This change was necessary because it ensures that not all data is loaded if not needed. Properties ---------- cache_dir : Path The path to the cache directory of the run. logger : Logger The logger for the run cache. """ def __init__(self, config: "Config"): self.cache_dir = config.CACHE_DIR / "run_cache" self.logger = get_logger("RunCache") self._debug = config.DEBUG
[docs] def update(self, run: AbstractRun) -> bool: """ Update the cache for the given run. If the cache does not exists it will be created. If the run hash is different from the saved variant the cache will be reset. Parameters ---------- run : AbstractRun The run which should be updated. Returns ------- bool True if the run cache was updated. """ filename = self.cache_dir / run.id / "index.json" # Reads the cache. cache = Cache(filename, debug=self._debug, write_file=False) if not filename.exists(): self._reset(run, cache) self.logger.info(f"Cache for {run.name} has been created.") return True current_hash = cache.get("hash") try: hash = run.hash except FileNotFoundError: return True if current_hash != hash: # Delete all caches related to the run. self.clear_run(run) # And also reset the "main" cache. cache = Cache(filename, debug=self._debug, write_file=False) self._reset(run, cache) self.logger.info(f"Hash for {run.name} has changed.") return True return False
def _reset(self, run: AbstractRun, cache: Cache) -> None: """ Initializes/resets the cache for the given run. Parameters ---------- run : AbstractRun The run to reset the cache for. cache : Cache Instance of the cache. """ # Initialize run here. cache.clear(write_file=False) cache.set("name", value=run.name, write_file=False) cache.set("hash", value=run.hash, write_file=False) if run.path is not None: cache.set("path", value=str(run.path), write_file=False) cache.write()
[docs] def get(self, run: AbstractRun, plugin_id: str, inputs_key: str) -> Optional[Dict[str, Any]]: """ Return the raw outputs for the given run, plugin and inputs key. Parameters ---------- run : AbstractRun The run to get the results for. plugin_id : str The plugin id to get the results for. inputs_key : str The input key to get the results for. Should be the output from `Plugin._dict_as_key`. Returns ------- Optional[Dict[str, Any]] Raw outputs for the given run, plugin and inputs key. Raises ------ AssertionError If the outputs of the cache are not a dict. """ filename = self.cache_dir / run.id / plugin_id / f"{inputs_key}.json" if not filename.exists(): return None cache = Cache(filename, debug=self._debug, write_file=False) outputs = cache.get("outputs") assert isinstance(outputs, dict), "Outputs of cache must be a dict." return outputs
[docs] def set(self, run: AbstractRun, plugin_id: str, inputs_key: str, value: Any) -> None: """ Set the value for the given run, plugin and inputs key. Since each input key has it's own cache, only necessary data are loaded. Parameters ---------- run : AbstractRun The run to set the cache for. plugin_id : str The plugin id to set the cache for. inputs_key : str The inputs key to set the cache for. Should be the output from `Plugin._dict_as_key`. value : Any The value to set. """ filename = self.cache_dir / run.id / plugin_id / f"{inputs_key}.json" cache = Cache(filename, debug=self._debug, write_file=False) cache.set("outputs", value=value)
[docs] def clear_run(self, run: AbstractRun) -> None: """Remove all caches for the given run.""" shutil.rmtree(self.cache_dir / run.id)
[docs] def clear(self) -> None: """Remove all caches.""" try: shutil.rmtree(self.cache_dir) except Exception: pass