Skip to content

correlations

class RunningStats() #

Source code in src/mfpbench/correlations.py
def __init__(self) -> None:  # noqa: D107
    self.n = 0
    self.old_m = np.array(0)
    self.new_m = np.array(0)
    self.old_s = np.array(0)
    self.new_s = np.array(0)
    self.previous_m = np.array(0)
    self.previous_s = np.array(0)

def clear() #

Clear the running stats.

Source code in src/mfpbench/correlations.py
def clear(self) -> None:
    """Clear the running stats."""
    self.n = 0

def push(x) #

Push a new value into the running stats.

Source code in src/mfpbench/correlations.py
def push(self, x: np.ndarray) -> None:
    """Push a new value into the running stats."""
    self.n += 1
    self.previous_m = self.old_m
    self.previous_s = self.old_s

    if self.n == 1:
        self.old_m = self.new_m = x
        self.old_s = np.array(0)
    else:
        self.new_m = self.old_m + (x - self.old_m) / self.n
        self.new_s = self.old_s + (x - self.old_m) * (x - self.new_m)

        self.old_m = self.new_m
        self.old_s = self.new_s

def mean() #

Return the mean of the running stats.

Source code in src/mfpbench/correlations.py
def mean(self) -> np.ndarray:
    """Return the mean of the running stats."""
    return self.new_m if self.n > 1 else 0.0  # type: ignore

def variance() #

Return the variance of the running stats.

Source code in src/mfpbench/correlations.py
def variance(self) -> np.ndarray:
    """Return the variance of the running stats."""
    return self.new_s / (self.n - 1) if self.n > 1 else np.array(0.0)

def std() #

Return the standard deviation of the running stats.

Source code in src/mfpbench/correlations.py
def std(self) -> np.ndarray:
    """Return the standard deviation of the running stats."""
    return np.asarray(np.sqrt(self.variance()))

def correlation_curve(b, *, n_samples=25, method='spearman') #

Compute the correlation curve for a benchmark.

PARAMETER DESCRIPTION
b

The benchmark to compute the correlation curve for

TYPE: Benchmark

n_samples

The number of samples to take from the benchmark

TYPE: int DEFAULT: 25

method

The method to use for computing the correlation curve

TYPE: Literal['spearman', 'kendalltau', 'cosine'] DEFAULT: 'spearman'

RETURNS DESCRIPTION
ndarray

The mean correlation curve

Source code in src/mfpbench/correlations.py
def correlation_curve(
    b: Benchmark,
    *,
    n_samples: int = 25,
    method: Literal["spearman", "kendalltau", "cosine"] = "spearman",
) -> np.ndarray:
    """Compute the correlation curve for a benchmark.

    Args:
        b: The benchmark to compute the correlation curve for
        n_samples: The number of samples to take from the benchmark
        method: The method to use for computing the correlation curve

    Returns:
        The mean correlation curve
    """
    configs = b.sample(n_samples)
    frame = b.frame()
    for config in configs:
        trajectory = b.trajectory(config)
        for r in trajectory:
            frame.add(r)

    correlations = frame.correlations(method=method)
    return correlations[-1, :]

def monte_carlo(benchmark, n_samples=25, epsilon=0.001, iterations_max=5000) #

Compute the correlation curve use a mc method for convergence.

PARAMETER DESCRIPTION
benchmark

The benchmark to compute the correlation curve for

TYPE: Benchmark

n_samples

The number of samples to take from the benchmark per iteration

TYPE: int DEFAULT: 25

epsilon

The convergence threshold

TYPE: float DEFAULT: 0.001

iterations_max

The maximum number of iterations to run

TYPE: int DEFAULT: 5000

RETURNS DESCRIPTION
RunningStats

RunningStats

Source code in src/mfpbench/correlations.py
def monte_carlo(
    benchmark: Benchmark,
    n_samples: int = 25,
    epsilon: float = 1e-3,
    iterations_max: int = 5000,
) -> RunningStats:
    """Compute the correlation curve use a mc method for convergence.

    Args:
        benchmark: The benchmark to compute the correlation curve for
        n_samples: The number of samples to take from the benchmark per iteration
        epsilon: The convergence threshold
        iterations_max: The maximum number of iterations to run

    Returns:
        RunningStats
    """
    stats = RunningStats()
    converged = False
    itrs = 0
    diff: float = np.inf
    while not converged and itrs < iterations_max:
        curve = correlation_curve(benchmark, n_samples=n_samples)
        stats.push(curve)

        if stats.n > 2:
            diff = float(np.linalg.norm(stats.new_m - stats.previous_m, ord=2))
            if diff <= epsilon:
                converged = True

        else:
            diff = np.inf
        itrs += 1

    return stats