import numpy
from scipy.stats import beta as beta_distribution
import sys
from typing import Union
from .metric import Metric
[docs]
class SampleTTSMetric(Metric):
"""Time to solution (TTS) metric."""
def __init__(
self,
tau_attribute: str,
percentile: float = 50.0,
confidence: float = 0.99,
num_bootstraps: int = 100,
failure_fill_in_value: float = sys.float_info.max,
tolerance: float = 1e-5,
seed: int = None,
**kwargs
):
"""Time to solution (TTS) metric.
Args:
tau_attribute (str): Name of result key to use as tau = the length of each
run, tau is then set to the mean over the results.
percentile (float, optional): The percentile of the R99 distribution to
find. Defaults to 50.0.
confidence (float, optional): The required confidence such as 0.99 for 99%
confidence. Defaults to 0.99.
num_bootstraps (int, optional): Number of bootstrapped samples.
Defaults to 100.
failure_fill_in_value (float, optional): Fill in value to replace the mean
and std with (respectively) if they are NaN or inf, or None to leave as is.
Defaults to sys.float_info.max.
tolerance (float, optional): Tolerance for comparisons with
best_known_energy. Defaults to 1e-5.
seed (int, optional): Random seed for the bootstrapping process.
Defaults to None.
Keyword Args:
Additional arguments to pass to the parent __init__ method.
Raises:
ValueError: Raises error if the confidence value is out of range.
"""
super(SampleTTSMetric, self).__init__(**kwargs)
self._rng = numpy.random.RandomState(seed)
self.name = "tts"
self.tau_attribute = tau_attribute
self.percentile = percentile
self.confidence = confidence
self.num_bootstraps = num_bootstraps
self.failure_fill_in_value = failure_fill_in_value
self.tolerance = tolerance
if not 0 < self.confidence < 1:
raise ValueError("confidence must be between 0 and 1")
[docs]
def calc(
self, results: list[list], best_known_energies: list, **kwargs
) -> list[float]:
"""Calculate the mean and std of the sample TTS with the given
percentile.
Args:
results (list[list]): A result for each problem.
best_known_energies (list): A best known energy for the problem associated
with each result.
Keyword Args:
Captures additional named arguments passed to Metric.calc().
Returns:
list[float]: The estimate of the given percentile of the TTS distribution,
the std of the given percentile of the TTS distribution.
"""
# Calculate the success probabilities
success_probabilities = self.calc_success_probabilities(
results, best_known_energies
)
# If less than percentile problems were solved, do not attempt
# to estimate the TTS
frac_solved = (success_probabilities > 0).mean()
if frac_solved < self.percentile / 100.0:
mean_TTS = numpy.inf
std_TTS = numpy.inf
else:
# Find the number of repeats
num_repeats = self.num_solutions_per_result(results)
# Calculate the R99 sample, with this percentile
R99_distribution = self.calc_R99_distribution(
success_probabilities, num_repeats
)
# Mean and variance of tau
mean_tau = self.overall_mean(results, self.tau_attribute)
var_tau = self.overall_variance(results, self.tau_attribute)
# Mean and variance of R99
mean_R99 = numpy.mean(R99_distribution)
var_R99 = numpy.var(R99_distribution)
# Mean and std of TTS
mean_TTS = mean_R99 * mean_tau
std_TTS = (
(var_R99 * var_tau)
+ (mean_R99**2 * var_tau)
+ (mean_tau**2 * var_R99)
) ** 0.5
if self.failure_fill_in_value is not None:
mean_TTS = self.fill_in_value(mean_TTS, self.failure_fill_in_value)
std_TTS = self.fill_in_value(std_TTS, self.failure_fill_in_value)
return mean_TTS, std_TTS
[docs]
def calc_R99(self, success_probability: float) -> float:
"""Calculate the R99.
Given a set of solutions and the best known solution, calculate the R99,
defined as the number of independent runs that we need to call the
solver to find a solution as good as the best_known_energy (or better)
at least once with 99% confidence.
Args:
success_probability (float): The success probability for solutions given best_known_energy.
Raises:
ValueError: Raises an error if the confidence value is out of range.
Returns:
float: The calcuated R99 value.
"""
if not 0 < self.confidence < 1:
raise ValueError("confidence must be between 0 and 1")
# Trivial cases
if success_probability == 0:
R99 = numpy.inf
elif success_probability == 1:
R99 = 1.0
else:
# Calculate the R99
R99 = numpy.log(1 - self.confidence) / numpy.log(1 - success_probability)
# Cannot be smaller than 1
if R99 < 1:
R99 = 1.0
# Final check
assert (R99 is numpy.inf) or (R99 >= 1.0)
return R99
[docs]
def calc_R99_distribution(
self, success_probabilities: list[float], num_repeats: int
) -> numpy.ndarray:
"""Returns the percentile-th R99 distribution for a sample of problems.
Note: problems should be of the same size.
Args:
success_probabilities (list[float]): A success probability for each problem.
num_repeats (int): Number of repeats in each result.
Returns:
numpy.ndarray: An R99 distribution from bootstrapping.
"""
# Find the Beta posterior distribution
beta_posterior = []
for success_probability in success_probabilities:
# The parameters of the Beta posterior distribution The prior is
# assumed to be a Beta distribution with alpha = 0.5 and beta = 0.5
alpha = 0.5 + success_probability * num_repeats # 0.5 + number of successes
beta = (
0.5 + (1 - success_probability) * num_repeats
) # 0.5 + number of failures
beta_posterior.append((alpha, beta))
# The percentile R99 empirical distribution, one percentile of R99 for
# each bootstrapped sample
R99_distribution = numpy.empty(self.num_bootstraps, dtype=float)
# For each bootstrap sample TODO: check default for num_bootstraps: 100
# might be too few, maybe 500 or 1000 (?)
for i in range(self.num_bootstraps):
# Construct a sample of R99's of the same length as success_probabilities
R99_sampled = []
# Choose the random indices into beta_posterior for the below loop
# in advance
random_indices = self._rng.randint(
0, len(beta_posterior), len(success_probabilities)
)
# Choose the random numbers for the cdf_values in the below loop, in advance
cdf_values = self._rng.uniform(0, 1, len(success_probabilities))
for random_index, cdf_value in zip(random_indices, cdf_values):
# Generate the beta random variable given the cdf_value
sampled_probability = float(
beta_distribution.ppf(cdf_value, *beta_posterior[random_index])
)
# Calculate the R99 and add it to the list of R99s for this bootstrap
R99_sampled.append(self.calc_R99(sampled_probability))
# Calculate the percentile of the bootstrap sample and store it
R99_distribution[i] = numpy.percentile(R99_sampled, self.percentile)
# Return distribution of R99s
return R99_distribution
[docs]
def calc_success_probabilities(
self, results: list, best_known_energies: list
) -> numpy.ndarray:
"""Calculate the success probabilities for all problems.
Args:
results (list): Results for each problem.
best_known_energies (list): A best known energy for the problem associated with each result
Returns:
numpy.ndarray: A success probability for each problem.
"""
probabilities = numpy.empty(len(results), dtype=float)
for i, (result, energy) in enumerate(zip(results, best_known_energies)):
probabilities[i] = self.calc_success_probability(result, energy)
return probabilities
[docs]
def calc_success_probability(
self, solutions: Union[list, dict], best_known_energy: float
) -> float:
"""Calculate the success probability for a given problem.
Args:
solutions (Union[list, dict]): The solutions.
best_known_energy (float): The lowest energy known.
Returns:
float: Success probability.
"""
n_success = sum(
1
for solution in solutions
if solution["best_energy"] < best_known_energy + self.tolerance
)
return n_success / float(len(solutions))