Skip to content

Path bucket

A module containing a concreate implementation of a Bucket that uses the Path API to store objects.

class PathBucket(path, *, loaders=None, create=True, clean=False, exists_ok=True) #

Bases: Bucket[str, Path]

A bucket that uses the Path API to store objects.

This bucket is a key-value lookup backed up by some filesystem. By assinging to the bucket, you store the object to the filesystem. However the values you get back are instead a Drop that can be used to perform operations on the stores object, such as load, get and remove.

Drop methods
  • Drop.load - Load the object from the bucket.
  • Drop.get - Load the object from the bucket with a default if something fails.
  • Drop.put - Store an object in the bucket.
  • Drop.remove - Remove the object from the bucket.
  • Drop.exists - Check if the object exists in the bucket.
from amltk.store.paths import PathBucket
import numpy as np
import pandas as pd
from sklearn.linear_model import LinearRegression

bucket = PathBucket("path/to/bucket")

array = np.array([1, 2, 3])
dataframe = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
model = LinearRegression()

# Store things
bucket["myarray.npy"] = array # (1)!
bucket["df.csv"] = dataframe  # (2)!
bucket["model.pkl"].put(model)

bucket["config.json"] = {"hello": "world"}
assert bucket["config.json"].exists()
bucket["config.json"].remove()

# Load things
array = bucket["myarray.npy"].load()
maybe_df = bucket["df.csv"].get()  # (3)!
model: LinearRegression = bucket["model.pkl"].get(check=LinearRegression)  # (4)!

# Create subdirectories
model_bucket = bucket / "my_model" # (5)!
model_bucket["model.pkl"] = model
model_bucket["predictions.npy"] = model.predict(X)

# Acts like a mapping
assert "myarray.npy" in bucket
assert len(bucket) == 3
for key, item in bucket.items():
    print(key, item.load())
del bucket["model.pkl"]
  1. The = is a shortcut for bucket["myarray.npy"].put(array)
  2. The extension is used to determine which PathLoader to use and how to save it.
  3. The get method acts like the dict.load method.
  4. The get method can be used to check the type of the loaded object. If the type does not match, a TypeError is raised.
  5. Uses the familiar Path API to create subdirectories.
PARAMETER DESCRIPTION
path

The path to the bucket.

TYPE: Path | str

loaders

A sequence of loaders to use when loading objects. These will be prepended to the default loaders and attempted to be used first.

TYPE: Sequence[type[PathLoader]] | None DEFAULT: None

create

If True, the base path will be created if it does not exist.

TYPE: bool DEFAULT: True

clean

If True, the base path will be deleted if it exists.

TYPE: bool DEFAULT: False

exists_ok

If False, an error will be raised if the base path already exists.

TYPE: bool DEFAULT: True

Source code in src/amltk/store/paths/path_bucket.py
def __init__(
    self,
    path: Path | str,
    *,
    loaders: Sequence[type[PathLoader]] | None = None,
    create: bool = True,
    clean: bool = False,
    exists_ok: bool = True,
) -> None:
    """Create a new PathBucket.

    Args:
        path: The path to the bucket.
        loaders: A sequence of loaders to use when loading objects.
            These will be prepended to the default loaders and attempted
            to be used first.
        create: If True, the base path will be created if it does not
            exist.
        clean: If True, the base path will be deleted if it exists.
        exists_ok: If False, an error will be raised if the base path
            already exists.
    """
    super().__init__()
    _loaders = DEFAULT_LOADERS
    if loaders is not None:
        _loaders = tuple(chain(loaders, DEFAULT_LOADERS))

    if isinstance(path, str):
        path = Path(path)

    if clean and path.exists():
        shutil.rmtree(path, ignore_errors=True)

    if not exists_ok and path.exists():
        raise FileExistsError(f"File/Directory already exists at {path}")

    if create:
        path.mkdir(parents=True, exist_ok=True)

    self._create = create
    self.path = path
    self.loaders = _loaders

def sizes() #

Get the sizes of all the files in the bucket.

Files only

This method only returns the sizes of the files in the bucket. It does not include directories, their sizes, or their contents.

RETURNS DESCRIPTION
dict[str, int]

A dictionary mapping the keys to the sizes of the files.

Source code in src/amltk/store/paths/path_bucket.py
def sizes(self) -> dict[str, int]:
    """Get the sizes of all the files in the bucket.

    !!! warning "Files only"

        This method only returns the sizes of the files in the bucket.
        It does not include directories, their sizes, or their contents.

    Returns:
        A dictionary mapping the keys to the sizes of the files.
    """
    return {str(path.name): path.stat().st_size for path in self.path.iterdir()}

def add_loader(loader) #

Add a loader to the bucket.

PARAMETER DESCRIPTION
loader

The loader to add.

TYPE: type[PathLoader]

Source code in src/amltk/store/paths/path_bucket.py
def add_loader(self, loader: type[PathLoader]) -> None:
    """Add a loader to the bucket.

    Args:
        loader: The loader to add.
    """
    self.loaders = (loader, *self.loaders)

def sub(key, *, create=None) #

Create a subdirectory of the bucket.

PARAMETER DESCRIPTION
key

The name of the subdirectory.

TYPE: str

create

Whether the subdirectory will be created if it does not exist. If None, the default, the value of create passed to the constructor will be used.

TYPE: bool | None DEFAULT: None

RETURNS DESCRIPTION
Self

A new bucket with the same loaders as the current bucket.

Source code in src/amltk/store/paths/path_bucket.py
@override
def sub(self, key: str, *, create: bool | None = None) -> Self:
    """Create a subdirectory of the bucket.

    Args:
        key: The name of the subdirectory.
        create: Whether the subdirectory will be created if it does not
            exist. If None, the default, the value of `create` passed to
            the constructor will be used.

    Returns:
        A new bucket with the same loaders as the current bucket.
    """
    return self.__class__(
        self.path / key,
        loaders=self.loaders,
        create=self._create if create is None else create,
        clean=False,
    )

def rmdir() #

Delete the bucket.

Source code in src/amltk/store/paths/path_bucket.py
def rmdir(self) -> None:
    """Delete the bucket."""
    shutil.rmtree(self.path)