import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from SALib.sample import saltelli
from SALib.analyze import sobol
from ring_plot_funct import ring_plot


input_names = ['MutMxSz', 'MutSSz', 'MutSpP', 'GlutMnSz', 'GlutSSz', 'GlutFzP', 'GlutFzA', 'GlutIniPop']
problem = {'num_vars': len(input_names),
           'names': input_names,
           'bounds': [[1,10], [0.1,5], [0.01,0.3], [0.1,2], [0.1,5], [0.01,0.2], [0.01,0.2], [3,9]]
           }
n_real = 100  # Number of realizations (model runs with the same inputs)
output_names = ['AvgGlutPop', 'AvgGlutRad', 'AvgGlutRadGn', 'AvgMutPop', 'MaxMutPop', 'MinMutPop']  # Output names
data = pd.read_csv('wolf.out', header=None, sep=' ')  # This is with the modified file with just the output in the 4th column (AvgMut)
data.drop(6, axis=1, inplace=True)  # There is a space in the last column (if there is no space write axis=0)
data.columns = output_names
data.head()


data.drop(['MinMutPop', 'AvgGlutPop'], axis=1, inplace=True)  # We drop AvgGlutPop as it is fixed in GLutmut program, and MinMutPop as it is always 0
output_names = output_names[:-1]
output_names = output_names[1:]
data.head()


exp = pd.DataFrame(data.values.reshape(-1, n_real, data.shape[1]).mean(1))  # Mean of the model realizations, E[Y|X] = Y_d
exp.columns = output_names
exp.head()


var = pd.DataFrame(data.values.reshape(-1, n_real, data.shape[1]).var(1))  # Variance of the model realizations, V[Y|X] = \Psi_s
var.columns = output_names
var.head()


v_exp = exp.var()
e_var = var.mean()
for variable in output_names:
    print('Output {}: V(Y) = {}. Vd + Es = {}'.format(variable, data.var()[variable], v_exp[variable] + e_var[variable]))

Output AvgGlutRad: V(Y) = 6063.752154326935. Vd + Es = 6064.321044416751
Output AvgGlutRadGn: V(Y) = 2998.681254928705. Vd + Es = 2998.780016559973
Output AvgMutPop: V(Y) = 133418.15569267675. Vd + Es = 133420.99322904038
Output MaxMutPop: V(Y) = 235418.66930422568. Vd + Es = 235430.38992564275


vy = data.var()  # Total variance, V(Y)
v_exp = exp.var()  # Deterministic component, V_s
s_exp = v_exp/vy 
print('Fraction of deterministic variance:\n', s_exp)

Fraction of deterministic variance:
 AvgGlutRad      0.970698
AvgGlutRadGn    0.347255
AvgMutPop       0.227784
MaxMutPop       0.519812
dtype: float64


keys = ['S1', 'ST', 'S1_conf', 'ST_conf']  # Keys in the SALib returned dictionary
# Initialize the DataFrame containing our stochastic sensitivity indices
df_exp = pd.DataFrame()

for out in output_names:
    # Analyze the GSA outputs
    s_indices = sobol.analyze(problem, np.array(exp[out]), calc_second_order=False)

    # Use a dictionary to store the sensitivity indices and their confidence intervals
    s_dict = {x: s_indices[x] for x in keys}
    # Save the original sensitivity indices
    s_dict['oSi'] = s_dict['S1']
    s_dict['oST'] = s_dict['ST']
    # Scale the sensitivity indices with the fraction of deterministic variance
    s_dict['S1'] = s_dict['S1'] * s_exp[out]
    s_dict['ST'] = s_dict['ST'] * s_exp[out]
    # Name the inputs and outputs
    s_dict['input'] = problem['names']
    s_dict['output'] = [out] * len(s_indices['S1'])
    # Append the results for each of the outputs to the same dataframe
    temp = pd.DataFrame(s_dict)
    df_exp = pd.concat([df_exp, temp], ignore_index=1)

# Rename the DataFrame columns, changing the notation from S1 to Si
df_exp.columns = ['Si', 'ST', 'Si_conf', 'ST_conf', 'oSi', 'oST', 'input', 'output']

df_exp.head()


# Initialize a column using the name of the output
df_exp['S_exp'] = df_exp['output'].values
# Substitute that column with the corresponding fraction of variance due to deterministic effects based on the output
df_exp['S_exp'] = df_exp['S_exp'].map(s_exp.to_dict())
print(df_exp.head())

         Si        ST   Si_conf   ST_conf       oSi       oST     input  \
0  0.243631  0.506919  0.081006  0.093726  0.250986  0.522221   MutMxSz   
1 -0.000591  0.000310  0.001364  0.000090 -0.000609  0.000319    MutSSz   
2  0.095691  0.286182  0.036759  0.055437  0.098579  0.294821    MutSpP   
3 -0.000012  0.000331  0.001273  0.000090 -0.000012  0.000341  GlutMnSz   
4  0.065383  0.263578  0.047685  0.050147  0.067356  0.271535   GlutSSz   

       output     S_exp  
0  AvgGlutRad  0.970698  
1  AvgGlutRad  0.970698  
2  AvgGlutRad  0.970698  
3  AvgGlutRad  0.970698  
4  AvgGlutRad  0.970698


# Replace negative values with 0
df_exp[df_exp.select_dtypes("float64").columns] = df_exp.select_dtypes("float64").clip(lower=0)
df_exp.to_csv('det_S_ind.csv', index=False)  # Save deterministic sensitivity indices
df_exp.head()


ring_plot(df_exp, det_sto='det')  # The det_sto argument indicates whether our sensitivity indices are deterministic or stochastic


keys = ['S1', 'ST', 'S1_conf', 'ST_conf']  # Keys in the SALib returned dictionary

if n_real > 2:
    # Initialize the DataFrame containing our stochastic sensitivity indices
    df_var = pd.DataFrame()
    for out in output_names:
        # Analyze the GSA outputs
        s_indices = sobol.analyze(problem, np.array(var[out]), calc_second_order=False)

        # Use a dictionary to store the sensitivity indices and their confidence intervals
        s_dict = {x: s_indices[x] for x in keys}
        # Save the original sensitivity indices
        s_dict['oSi'] = s_dict['S1']
        s_dict['oST'] = s_dict['ST']
        # We do not scale the sensitivity indices as the variance decomposition is not continued from V[Y] (i.e., original ST = ST)
        s_dict['S1'] = s_dict['S1']
        s_dict['ST'] = s_dict['ST']
        # Name the inputs and outputs
        s_dict['input'] = problem['names']
        s_dict['output'] = [out] * len(s_indices['S1'])
        # Append the results for each of the outputs to the same dataframe
        temp = pd.DataFrame(s_dict)
        df_var = pd.concat([df_var, temp], ignore_index=1)

    # Rename the DataFrame columns, changing the notation from S1 to Si
    df_var.columns = ['Si', 'ST', 'Si_conf', 'ST_conf', 'oSi', 'oST', 'input', 'output']
    if not df_var.empty:
        print(df_var.head())
    else:
        print("No stochastic variance can be calculated without model repetitions (n_real=1):")
else:
    print("ERROR: No stochastic variance can be calculated without model repetitions (n_rel=1)")

         Si        ST   Si_conf   ST_conf       oSi       oST     input  \
0  0.183997  0.618793  0.071131  0.162488  0.183997  0.618793   MutMxSz   
1  0.010782  0.021679  0.021707  0.007525  0.010782  0.021679    MutSSz   
2  0.201237  0.456813  0.093894  0.127559  0.201237  0.456813    MutSpP   
3  0.009562  0.024853  0.011170  0.010834  0.009562  0.024853  GlutMnSz   
4  0.023953  0.290596  0.038795  0.083373  0.023953  0.290596   GlutSSz   

       output  
0  AvgGlutRad  
1  AvgGlutRad  
2  AvgGlutRad  
3  AvgGlutRad  
4  AvgGlutRad


if n_real > 2:
    df_var[df_var.select_dtypes("float64").columns] = df_var.select_dtypes("float64").clip(lower=0)
    df_var.to_csv('sto_S_ind.csv', index=False)
    print(df_var.head())
else:
    print("ERROR: No stochastic variance can be calculated without model repetitions(n_real=1)")

         Si        ST   Si_conf   ST_conf       oSi       oST     input  \
0  0.183997  0.618793  0.071131  0.162488  0.183997  0.618793   MutMxSz   
1  0.010782  0.021679  0.021707  0.007525  0.010782  0.021679    MutSSz   
2  0.201237  0.456813  0.093894  0.127559  0.201237  0.456813    MutSpP   
3  0.009562  0.024853  0.011170  0.010834  0.009562  0.024853  GlutMnSz   
4  0.023953  0.290596  0.038795  0.083373  0.023953  0.290596   GlutSSz   

       output  
0  AvgGlutRad  
1  AvgGlutRad  
2  AvgGlutRad  
3  AvgGlutRad  
4  AvgGlutRad


if n_real > 2:
    ring_plot(df_exp, det_sto='sto')
else:
    print("ERROR: No stochastic variance can be calculated without model repetitions(n_real=1)")


# Threshold for our behavioral output distribution
threshold = 70
# Plot the output distributions
for output in ['AvgMutPop']:
    data[output].hist(alpha=0.25, label=output, density=True, bins=50)
# Plot threshold line for behavioral distribution
plt.plot([threshold, threshold], [0, 0.006], '--k')
plt.legend()
plt.xlabel('Population')
plt.ylabel('Density')

Text(0, 0.5, 'Density')


print('Probability of AvgMutPop > {}: '.format(threshold), (data['AvgMutPop'] > threshold).sum()/len(data))

Probability of AvgMutPop > 70:  0.161720703125


data.describe()


# Read the GSA sample matrix that generated the outputs
inputs = pd.read_csv('wolf_gsa_matrix_100real.txt', sep='\t', header=None).drop([0], axis=1)  # The file included a column of sens_class and an empty column
inputs.columns = input_names
# Let's check everything looks good
inputs.head()


# Concatenate the columns of the outputs and the inputs
data_inputs = pd.concat([data, inputs], axis=1)
# Let's check everything looks good
data_inputs.tail()


# Set the condition for behavioral distribution
behavioral = data_inputs[data_inputs['AvgMutPop'] > threshold]
print(behavioral.head())

    AvgGlutRad  AvgGlutRadGn  AvgMutPop  MaxMutPop   MutMxSz   MutSSz  \
6     8.490367      0.026615       78.0      126.0  2.977539  0.57373   
13   10.786193      2.638279       80.0       95.0  2.977539  0.57373   
15    7.652617      0.800113      121.0      121.0  2.977539  0.57373   
17   10.407952      0.298364       82.0      135.0  2.977539  0.57373   
29    8.359962      3.549761      116.0      117.0  2.977539  0.57373   

      MutSpP  GlutMnSz  GlutSSz   GlutFzP   GlutFzA  GlutIniPop  
6   0.160381   1.38584  1.47334  0.182373  0.018721    8.396484  
13  0.160381   1.38584  1.47334  0.182373  0.018721    8.396484  
15  0.160381   1.38584  1.47334  0.182373  0.018721    8.396484  
17  0.160381   1.38584  1.47334  0.182373  0.018721    8.396484  
29  0.160381   1.38584  1.47334  0.182373  0.018721    8.396484


for input_var in input_names:
    plt.figure(input_var)
    behavioral[input_var].hist()
    plt.ylabel('Frequency')
    plt.xlabel(input_var)


for input_var in output_names:
    # Create a figure with two subplots (density histogram and cumulative probability)
    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 6))
    # Plot the histograms of pre-intervention and post-intervention populations in the first subplot
    exp[input_var].hist(alpha=0.25, label='E[Y|X]', density=True, bins=500, ax=ax1)
    data[input_var].hist(alpha=0.25, label='All values', density=True, bins=500, ax=ax1)
    # Plot threshold line for behavioral distribution if input_var is 'AvgMutPop'
    if input_var == 'AvgMutPop':
        ax1.plot([threshold, threshold], [0, 0.006], '--k')    
    ax1.legend()
    ax1.set_xlabel(input_var)  # Use input_var as x-axis label
    ax1.set_ylabel('Density')
    ax1.set_xlim(0, 200)  # Set X-axis limit up to 200 units

    # Plot cumulative probabilities of pre-intervention and post-intervention populations in the second subplot
    sorted_exp = exp[input_var].sort_values()
    sorted_data = data[input_var].sort_values()
    cumulative_exp = (sorted_exp.rank() - 1) / len(sorted_exp)
    cumulative_data = (sorted_data.rank() - 1) / len(sorted_data)
    ax2.plot(sorted_exp, cumulative_exp, label='E[Y|X]')
    ax2.plot(sorted_data, cumulative_data, label='All values')
   # Plot threshold line if input_var is 'AvgMutPop'
    if input_var == 'AvgMutPop':
        ax2.axvline(x=threshold, color='r', linestyle='--')
    ax2.set_xlim(0, 200)  # Set X-axis limit up to 200 units
    ax2.legend()
    ax2.set_xlabel(input_var)  # Use input_var as x-axis label
    ax2.set_ylabel('Cumulative Probability')


    print('Fraction of deterministic variance:\n', s_exp)

Fraction of deterministic variance:
 AvgGlutRad      0.970698
AvgGlutRadGn    0.347255
AvgMutPop       0.227784
MaxMutPop       0.519812
dtype: float64

	AvgGlutPop	AvgGlutRad	AvgGlutRadGn	AvgMutPop	MaxMutPop
0	9.0	11.286599	7.157074	44.0	170.0
1	9.0	9.756715	1.126875	41.0	120.0
2	9.0	10.265316	4.335474	35.0	97.0
3	9.0	8.084866	18.202648	58.0	127.0
4	9.0	7.882991	5.344454	33.0	171.0

	AvgGlutRad	AvgGlutRadGn	AvgMutPop	MaxMutPop
0	11.286599	7.157074	44.0	170.0
1	9.756715	1.126875	41.0	120.0
2	10.265316	4.335474	35.0	97.0
3	8.084866	18.202648	58.0	127.0
4	7.882991	5.344454	33.0	171.0

	AvgGlutRad	AvgGlutRadGn	AvgMutPop	MaxMutPop
0	8.632314	8.760255	44.02	125.37
1	69.496089	30.452940	4.02	54.42
2	8.735444	8.722916	50.84	138.52
3	2.243327	9.651485	33.92	57.76
4	8.600231	9.113118	46.22	127.47

	AvgGlutRad	AvgGlutRadGn	AvgMutPop	MaxMutPop
0	1.616960	70.061261	555.7996	640.2731
1	217.446319	741.147586	356.4996	1778.8836
2	1.223908	69.342294	666.8144	496.8296
3	0.150063	75.278266	71.7736	72.1624
4	2.101137	67.433012	626.7316	534.3291

	AvgGlutRad	AvgGlutRadGn	AvgMutPop	MaxMutPop
count	1.024000e+06	1.024000e+06	1.024000e+06	1.024000e+06
mean	4.641985e+01	3.964457e+01	9.355988e+01	2.122230e+02
std	7.787010e+01	5.476022e+01	3.652645e+02	4.851996e+02
min	1.009277e-01	1.116429e-12	0.000000e+00	3.000000e+00
25%	3.589138e+00	2.996924e+00	1.000000e+00	2.400000e+01
50%	1.355338e+01	1.514644e+01	8.000000e+00	5.000000e+01
75%	4.856505e+01	5.431364e+01	3.700000e+01	1.410000e+02
max	5.172543e+02	3.131049e+02	7.877000e+03	8.266000e+03

Notebook 2: Global Sensitivity and Uncertainty Analysis of Model Results¶

Sensitivity Analysis: Variance Decomposition for Stochastic Models¶

Definition of the inputs, distributions, number of realizations and outputs of the analysis¶

1. Decomposition of the Deterministic Variance: Deterministic Sensitivity Indices¶

First step: Law of total variance¶

Second step: Decomposition of the deterministic component, $V_{d}$¶

2. Decomposition of the Stochastic Variance: Stochastic Sensitivity Indices¶

Uncertainty Analysis¶

Probability of failure of individual shepherd vs. the regional response¶

Important Take-home Message¶

	Si	ST	Si_conf	ST_conf	oSi	oST	input	output
0	0.243631	0.506919	0.081006	0.093726	0.250986	0.522221	MutMxSz	AvgGlutRad
1	-0.000591	0.000310	0.001364	0.000090	-0.000609	0.000319	MutSSz	AvgGlutRad
2	0.095691	0.286182	0.036759	0.055437	0.098579	0.294821	MutSpP	AvgGlutRad
3	-0.000012	0.000331	0.001273	0.000090	-0.000012	0.000341	GlutMnSz	AvgGlutRad
4	0.065383	0.263578	0.047685	0.050147	0.067356	0.271535	GlutSSz	AvgGlutRad

	MutMxSz	MutSSz	MutSpP	GlutMnSz	GlutSSz	GlutFzP	GlutFzA	GlutIniPop
0	2.977539	0.57373	0.160381	1.38584	1.47334	0.182373	0.018721	8.396484
1	2.977539	0.57373	0.160381	1.38584	1.47334	0.182373	0.018721	8.396484
2	2.977539	0.57373	0.160381	1.38584	1.47334	0.182373	0.018721	8.396484
3	2.977539	0.57373	0.160381	1.38584	1.47334	0.182373	0.018721	8.396484
4	2.977539	0.57373	0.160381	1.38584	1.47334	0.182373	0.018721	8.396484

	AvgGlutRad	AvgGlutRadGn	AvgMutPop	MaxMutPop	MutMxSz	MutSSz	MutSpP	GlutMnSz	GlutSSz	GlutFzP	GlutFzA	GlutIniPop
1023995	5.129673	8.557813	86.0	96.0	4.449707	0.293799	0.254263	1.247607	4.035791	0.132368	0.191929	5.006836
1023996	6.359249	8.077404	52.0	111.0	4.449707	0.293799	0.254263	1.247607	4.035791	0.132368	0.191929	5.006836
1023997	7.688465	18.294091	38.0	114.0	4.449707	0.293799	0.254263	1.247607	4.035791	0.132368	0.191929	5.006836
1023998	7.205223	39.904821	43.0	97.0	4.449707	0.293799	0.254263	1.247607	4.035791	0.132368	0.191929	5.006836
1023999	9.166289	46.482640	40.0	114.0	4.449707	0.293799	0.254263	1.247607	4.035791	0.132368	0.191929	5.006836