"""This module provides a class to generate
parametrised distributions for simulation purposes
It is still in experimental stage and may not be fully functional
or tested. Use at your own risk.
"""
import math
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import lognorm, norm, uniform, triang
[docs]
class Simulation:
"""convenience class to generate parametrised distributions"""
def __init__(
self,
mean=None,
std_dev=None,
lower_bound=None,
upper_bound=None,
probability=None,
seed=None,
):
try:
assert (
lower_bound is None and upper_bound is None and probability is None
) or (lower_bound < upper_bound and probability > 0 and probability < 1.0)
except AssertionError:
print(
"Ensure you provide either: a) lower_bound, upper_bound and probability, or b) mean and std_dev"
)
try:
assert (mean is None and std_dev is None) or (mean != 0 and std_dev > 0)
except AssertionError:
print("Error in the parameters mean and std_dev:", mean, std_dev)
self.mean = mean # mean
self.std_dev = std_dev # standard deviation
self.seed = seed
self.samples = None
self.lower_bound = lower_bound
self.upper_bound = upper_bound
self.probability = probability # probability of the confidence interval
np.random.seed(self.seed)
self.random_series = None
[docs]
def generate(self, size=1, random_series=None):
"""generate simulation, by default uses normal distribution"""
alpha = 1 - self.probability
z = norm.ppf(1 - alpha / 2)
if self.mean is None:
self.mean = (self.upper_bound + self.lower_bound) / 2
if self.std_dev is None:
self.std_dev = (self.upper_bound - self.lower_bound) / (2 * z)
if (random_series is not None) and (len(random_series) >= size):
print("Random series provided")
self.random_series = random_series[:size]
else:
# uniform distribution 0,1
self.random_series = np.random.uniform(0, 1, size)
self.samples = norm.ppf(q=self.random_series, loc=self.mean, scale=self.std_dev)
self.simulated_mean = np.mean(self.samples)
self.simulated_median = np.median(self.samples)
self.simulated_std_dev = np.std(self.samples)
self.size = size
[docs]
def plot_log_scale(self):
"""plot the histogram of the samples in log scale"""
plt.hist(self.samples, bins=100, edgecolor="k", alpha=0.7)
plt.title("Monte Carlo Simulation")
plt.xlabel("Values")
plt.ylabel("Frequency")
plt.xscale("log")
plt.yscale("log")
plt.show()
[docs]
def plot(self, bins=100):
"""plot the histogram of the samples"""
hist, bins, _ = plt.hist(self.samples, bins=bins, edgecolor="k", alpha=0.7)
plt.title("Monte Carlo Simulation")
plt.xlabel("Values")
plt.ylabel("Frequency")
# count of observations within confidence interval
num_within_interval = np.sum(
(self.samples >= self.lower_bound) & (self.samples <= self.upper_bound)
)
# Calculate the proportion of observations within the confidence interval
proportion_within_interval = num_within_interval / self.size
plt.axvline(
x=self.lower_bound,
color="r",
linestyle="dashed",
linewidth=2,
label=f"Lower bound ({self.lower_bound})",
)
plt.axvline(
x=self.upper_bound,
color="g",
linestyle="dashed",
linewidth=2,
label=f"Upper bound ({self.upper_bound})",
)
plt.axvline(
x=self.mean,
color="b",
linestyle="dashed",
linewidth=2,
label=f"Mean ({int(self.mean)})",
)
# add mean, median and std deviation to the plot as floating box
plt.text(
0.8,
0.9,
f"Mean: {self.simulated_mean:,.2f}\nMedian: {self.simulated_median:,.2f}\nStd Dev: {self.simulated_std_dev:,.2f}\nProportion within interval:{proportion_within_interval:.2}",
horizontalalignment="center",
verticalalignment="center",
transform=plt.gca().transAxes,
)
plt.show()
[docs]
def statistics(self):
"""returns a dictionary with all the statistics"""
return {
"mean": self.simulated_mean,
"median": self.simulated_median,
"std_dev": self.simulated_std_dev,
}
[docs]
class SimulationLognormal(Simulation):
"""Generates lognormal distribution with the lower and upper bound provided"""
def __init__(
self,
mean=None,
std_dev=None,
lower_bound=None,
upper_bound=None,
probability=None,
seed=None,
):
super().__init__(
mean=mean,
std_dev=std_dev,
lower_bound=lower_bound,
upper_bound=upper_bound,
probability=probability,
seed=seed,
)
[docs]
def generate(
self, size=1, random_series=None
): # generate the lognormal distribution
assert size > 0, "size must be greater than 0"
if (random_series is not None) and (len(random_series) >= size):
self.random_series = random_series[:size]
else:
# uniform distribution 0,1
self.random_series = np.random.uniform(0, 1, size)
if self.mean is not None and self.std_dev is not None:
# self.samples = np.random.lognormal(self.mean, self.std_dev, size)
self.samples = lognorm(self.mean, scale=self.std_dev).ppf(
self.random_series
)
elif self.lower_bound is not None and self.upper_bound is not None:
# Convert the confidence level to a z-score
z_score = norm.ppf((1 + self.probability) / 2)
# Calculate the log-transformed mean and confidence interval
log_lower_bound = np.log(self.lower_bound)
log_upper_bound = np.log(self.upper_bound)
# Calculate the mean and standard deviation of the underlying normal distribution
mu_Y = (log_lower_bound + log_upper_bound) / 2
sigma_Y = (log_upper_bound - log_lower_bound) / (2 * z_score)
# samples_normal = np.random.normal(mu_Y, sigma_Y, size)
# Theoretical mean from a lognormal based on the upper and lower band
lognormal_mean = np.exp(mu_Y + (sigma_Y**2) / 2)
self.mean = lognormal_mean
# We now transform that normal into exp to generate the lognormal distribution
samples_normal = norm.ppf(q=self.random_series, loc=mu_Y, scale=sigma_Y)
self.samples = np.exp(samples_normal)
else:
raise ValueError(
"Either mean and sigma or lower and upper bounds must be provided"
)
self.simulated_mean = np.mean(self.samples)
self.simulated_median = np.median(self.samples)
self.simulated_std_dev = np.std(self.samples)
self.size = size
[docs]
class SimulationTriangular(Simulation):
"""Generates triangular distribution with the lower and upper bound provided"""
def __init__(
self, mode=None, lower_bound=None, upper_bound=None, probability=None, seed=None
):
if lower_bound is None or upper_bound is None or mode is None:
raise ValueError(
"All mode, lower and upper bounds must be provided for the triangular distribution"
)
std_dev = math.sqrt(
(
lower_bound**2
+ upper_bound**2
+ mode**2
- lower_bound * upper_bound
- lower_bound * mode
- upper_bound * mode
)
/ 18
)
mean = (lower_bound + upper_bound + mode) / 3
super().__init__(
mean=mean,
std_dev=std_dev,
lower_bound=lower_bound,
upper_bound=upper_bound,
probability=probability,
seed=seed,
)
self.mode = mode
self.mode_scaled = (mode - lower_bound) / (
upper_bound - lower_bound
) # c parameter for the triangular distribution
[docs]
def generate(
self, size=1, random_series=None
): # generate the triangular distribution
assert size > 0, "size must be greater than 0"
if (random_series is not None) and (len(random_series) >= size):
self.random_series = random_series[:size]
else:
# uniform distribution 0,1
self.random_series = np.random.uniform(0, 1, size)
if (
self.mode is not None
and self.lower_bound is not None
and self.upper_bound is not None
):
# self.samples = np.random.triangular(left=self.lower_bound, mode=self.mode, right=self.upper_bound, size=size)
self.samples = triang.ppf(
q=self.random_series,
c=self.mode_scaled,
loc=self.lower_bound,
scale=self.upper_bound - self.lower_bound,
)
else:
raise ValueError(
"All mode, lower and upper bounds must be provided for the triangular distribution"
)
self.simulated_mean = np.mean(self.samples)
self.simulated_median = np.median(self.samples)
self.simulated_std_dev = np.std(self.samples)
self.size = size