import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from SALib.sample import saltelli
from SALib.analyze import sobol
from ring_plot_funct import ring_plot


input_names = ['MutMxSz', 'MutSSz', 'MutSpP', 'GlutMnSz', 'GlutSSz', 'GlutFzP', 'GlutFzA', 'GlutIniPop']
problem = {'num_vars': len(input_names),
           'names': input_names,
           'bounds': [[1,10], [0.1,5], [0.01,0.3], [0.1,2], [0.1,5], [0.01,0.2], [0.01,0.2], [3,9]]
           }
n_real = 100  # Number of realizations (model runs with the same inputs)
output_names = ['AvgGlutPop', 'AvgGlutRad', 'AvgGlutRadGn', 'AvgMutPop', 'MaxMutPop', 'MinMutPop']  # Output names

data = pd.read_csv('wolf.out', header=None, sep=' ')  # This is with the modified file with just the output in the 4th column (AvgMut)
data.drop(6, axis=1, inplace=True)  # There is a space in the last column
data.columns = output_names
data.head()


# Read the GSA sample matrix that generated the outputs
inputs = pd.read_csv('wolf_gsa_matrix_100real.txt', sep='\t', header=None).drop([0], axis=1)  # The file included a column of sens_class and an empty column
inputs.columns = input_names
# Let's check everything looks good
inputs.head()


threshold = 70


data.drop(['MinMutPop', 'AvgGlutPop'], axis=1, inplace=True)  # We drop MinMutPop as it is always 0
output_names = output_names[:-1]
output_names = output_names[1:]
data.head()


# Plot the output distributions

# Create a figure with two subplots
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 6))

# Plot the output distributions in the first subplot
for output in ['AvgMutPop']:
    data[output].hist(alpha=0.25, label=output, density=True, bins=50, ax=ax1)
    # Plot threshold line for behavioral distribution
    ax1.plot([threshold, threshold], [0, 0.006], '--k')
ax1.legend()
ax1.set_xlabel('Population')
ax1.set_ylabel('Density')
#ax1.set_xlim(0, 200)  # Set X-axis limit up to 200 units

# Plot cumulative probability in the second subplot
sorted_data = data['AvgMutPop'].sort_values()
cumulative_prob = (sorted_data.rank() - 1) / len(sorted_data)
ax2.plot(sorted_data, cumulative_prob)
# Plot threshold line
ax2.axvline(x=threshold, color='r', linestyle='--')
ax2.set_xlabel('Population')
ax2.set_ylabel('Cumulative Probability')
ax2.set_xlim(0, 200)  # Set X-axis limit up to 200 units

# Calculate also how likely it is that the sheep population would be over the threshold
print()
print('Probability of AvgMutPop > {}: '.format(threshold), (data['AvgMutPop'] > threshold).sum()/len(data))

Probability of AvgMutPop > 70:  0.161720703125


data.describe()


# Concatenate the columns of the outputs and the inputs
data_inputs = pd.concat([data, inputs], axis=1)
# Let's check everything looks good
data_inputs.tail()


# Set the condition for behavioral distribution
behavioral = data_inputs[data_inputs['AvgMutPop'] > threshold]
print(behavioral.head())

    AvgGlutRad  AvgGlutRadGn  AvgMutPop  MaxMutPop   MutMxSz   MutSSz  \
6     8.490367      0.026615       78.0      126.0  2.977539  0.57373   
13   10.786193      2.638279       80.0       95.0  2.977539  0.57373   
15    7.652617      0.800113      121.0      121.0  2.977539  0.57373   
17   10.407952      0.298364       82.0      135.0  2.977539  0.57373   
29    8.359962      3.549761      116.0      117.0  2.977539  0.57373   

      MutSpP  GlutMnSz  GlutSSz   GlutFzP   GlutFzA  GlutIniPop  
6   0.160381   1.38584  1.47334  0.182373  0.018721    8.396484  
13  0.160381   1.38584  1.47334  0.182373  0.018721    8.396484  
15  0.160381   1.38584  1.47334  0.182373  0.018721    8.396484  
17  0.160381   1.38584  1.47334  0.182373  0.018721    8.396484  
29  0.160381   1.38584  1.47334  0.182373  0.018721    8.396484


# Set the condition for nonbehavioral distribution
nonbehavioral = data_inputs[data_inputs['AvgMutPop'] <= threshold]
print(nonbehavioral.head())

   AvgGlutRad  AvgGlutRadGn  AvgMutPop  MaxMutPop   MutMxSz   MutSSz  \
0   11.286599      7.157074       44.0      170.0  2.977539  0.57373   
1    9.756715      1.126875       41.0      120.0  2.977539  0.57373   
2   10.265316      4.335474       35.0       97.0  2.977539  0.57373   
3    8.084866     18.202648       58.0      127.0  2.977539  0.57373   
4    7.882991      5.344454       33.0      171.0  2.977539  0.57373   

     MutSpP  GlutMnSz  GlutSSz   GlutFzP   GlutFzA  GlutIniPop  
0  0.160381   1.38584  1.47334  0.182373  0.018721    8.396484  
1  0.160381   1.38584  1.47334  0.182373  0.018721    8.396484  
2  0.160381   1.38584  1.47334  0.182373  0.018721    8.396484  
3  0.160381   1.38584  1.47334  0.182373  0.018721    8.396484  
4  0.160381   1.38584  1.47334  0.182373  0.018721    8.396484


for input_var in input_names:
    plt.figure(input_var)
    #plt.title(input_var)
    behavioral[input_var].hist(alpha=0.25, label='behavioral', density=True, bins=50)
    nonbehavioral[input_var].hist(alpha=0.25, label='nonbehavioral', density=True, bins=50)
    plt.legend()
    plt.ylabel('Density')
    plt.xlabel(input_var)


# Sort and normalize the behavioural and and non-behavioural input data
for input_var in input_names:
    b_sorted = np.sort(behavioral[input_var])/max(behavioral[input_var])
    nonb_sorted = np.sort(nonbehavioral[input_var])/max(nonbehavioral[input_var])

    # Calculate the cumulative probabilities
    prob1 = np.arange(1, len(b_sorted) + 1) / len(b_sorted)
    prob2 = np.arange(1, len(nonb_sorted) + 1) / len(nonb_sorted)

    # Plot CDF for behavioural inputs
    plt.plot(b_sorted, prob1, label='behavioural')

    # Plot CDF for non-behavioural inputs
    plt.plot(nonb_sorted, prob2, label='non-bahavioural')

    # Add labels and legend
    plt.xlabel(f'{input_var} Normalized')
    plt.ylabel('Cumulative Probability')
    plt.legend()

    # Show plot
    plt.grid(True)
    plt.show()

    # Do the test Kolmogorov-Smirnov test on the normalized CDFs
    from scipy.stats import ks_2samp
    ks_test = ks_2samp(behavioral[input_var], nonbehavioral[input_var],
                       alternative='two-sided')
    print('Maximum discrepancy between the distributions is {:4f}, p-value = {:4f} (H0: the distributions are the same)'.format(ks_test[0], ks_test[1]))

Maximum discrepancy between the distributions is 0.192565, p-value = 0.000000 (H0: the distributions are the same)

Maximum discrepancy between the distributions is 0.220262, p-value = 0.000000 (H0: the distributions are the same)

Maximum discrepancy between the distributions is 0.191908, p-value = 0.000000 (H0: the distributions are the same)

Maximum discrepancy between the distributions is 0.038349, p-value = 0.000000 (H0: the distributions are the same)

Maximum discrepancy between the distributions is 0.070009, p-value = 0.000000 (H0: the distributions are the same)

Maximum discrepancy between the distributions is 0.030574, p-value = 0.000000 (H0: the distributions are the same)

Maximum discrepancy between the distributions is 0.031733, p-value = 0.000000 (H0: the distributions are the same)

Maximum discrepancy between the distributions is 0.154237, p-value = 0.000000 (H0: the distributions are the same)


# Define the GSA input distributions
input_names = ['MutMxSz', 'MutSSz', 'MutSpP', 'GlutMnSz', 'GlutSSz', 'GlutFzP', 'GlutFzA', 'GlutIniPop']
problem2 = {'num_vars': len(input_names),
           'names': input_names,
           'bounds': [[1,10], [0.1,5], [0.156,0.3], [0.1,2], [0.1,5], [0.01,0.2], [0.01,0.2], [3,9]]
           }
l = 1024  # Sampling intensity
param_values2 = pd.DataFrame(saltelli.sample(problem2, l, calc_second_order=False), columns=input_names)  # calc_second_order=True for second-order interactions
print("GSA sample matrix size: {}".format(param_values2.shape))
param_values2.head()

GSA sample matrix size: (10240, 8)


sample_matrix = pd.concat([pd.DataFrame(['sens_class']*param_values2.shape[0]), param_values2], axis=1)
sample_matrix.head()
n_real = 100  # We will only run 100 realizations this time because we do not intend to decompose the variance
sample_matrix_realizations = pd.DataFrame(np.repeat(sample_matrix.to_numpy(), repeats=n_real, axis=0))
sample_matrix_realizations.head()


# Save GSA sample matrix
sample_matrix_realizations.to_csv('wolf_mcf_matrix_{}real.txt'.format(n_real), header=False, index=False, sep='\t')

	AvgGlutRad	AvgGlutRadGn	AvgMutPop	MaxMutPop
count	1.024000e+06	1.024000e+06	1.024000e+06	1.024000e+06
mean	4.641985e+01	3.964457e+01	9.355988e+01	2.122230e+02
std	7.787010e+01	5.476022e+01	3.652645e+02	4.851996e+02
min	1.009277e-01	1.116429e-12	0.000000e+00	3.000000e+00
25%	3.589138e+00	2.996924e+00	1.000000e+00	2.400000e+01
50%	1.355338e+01	1.514644e+01	8.000000e+00	5.000000e+01
75%	4.856505e+01	5.431364e+01	3.700000e+01	1.410000e+02
max	5.172543e+02	3.131049e+02	7.877000e+03	8.266000e+03

	AvgGlutRad	AvgGlutRadGn	AvgMutPop	MaxMutPop	MutMxSz	MutSSz	MutSpP	GlutMnSz	GlutSSz	GlutFzP	GlutFzA	GlutIniPop
1023995	5.129673	8.557813	86.0	96.0	4.449707	0.293799	0.254263	1.247607	4.035791	0.132368	0.191929	5.006836
1023996	6.359249	8.077404	52.0	111.0	4.449707	0.293799	0.254263	1.247607	4.035791	0.132368	0.191929	5.006836
1023997	7.688465	18.294091	38.0	114.0	4.449707	0.293799	0.254263	1.247607	4.035791	0.132368	0.191929	5.006836
1023998	7.205223	39.904821	43.0	97.0	4.449707	0.293799	0.254263	1.247607	4.035791	0.132368	0.191929	5.006836
1023999	9.166289	46.482640	40.0	114.0	4.449707	0.293799	0.254263	1.247607	4.035791	0.132368	0.191929	5.006836

	MutMxSz	MutSSz	MutSpP	GlutMnSz	GlutSSz	GlutFzP	GlutFzA	GlutIniPop
0	1.013184	1.944678	0.220477	1.024951	2.829932	0.170405	0.055923	6.524414
1	7.270996	1.944678	0.220477	1.024951	2.829932	0.170405	0.055923	6.524414
2	1.013184	3.389795	0.220477	1.024951	2.829932	0.170405	0.055923	6.524414
3	1.013184	1.944678	0.274336	1.024951	2.829932	0.170405	0.055923	6.524414
4	1.013184	1.944678	0.220477	1.850635	2.829932	0.170405	0.055923	6.524414

	0	1	2	3	4	5	6	7	8
0	sens_class	1.013184	1.944678	0.220477	1.024951	2.829932	0.170405	0.055923	6.524414
1	sens_class	1.013184	1.944678	0.220477	1.024951	2.829932	0.170405	0.055923	6.524414
2	sens_class	1.013184	1.944678	0.220477	1.024951	2.829932	0.170405	0.055923	6.524414
3	sens_class	1.013184	1.944678	0.220477	1.024951	2.829932	0.170405	0.055923	6.524414
4	sens_class	1.013184	1.944678	0.220477	1.024951	2.829932	0.170405	0.055923	6.524414

	AvgGlutPop	AvgGlutRad	AvgGlutRadGn	AvgMutPop	MaxMutPop
0	9.0	11.286599	7.157074	44.0	170.0
1	9.0	9.756715	1.126875	41.0	120.0
2	9.0	10.265316	4.335474	35.0	97.0
3	9.0	8.084866	18.202648	58.0	127.0
4	9.0	7.882991	5.344454	33.0	171.0

	MutMxSz	MutSSz	MutSpP	GlutMnSz	GlutSSz	GlutFzP	GlutFzA	GlutIniPop
0	2.977539	0.57373	0.160381	1.38584	1.47334	0.182373	0.018721	8.396484
1	2.977539	0.57373	0.160381	1.38584	1.47334	0.182373	0.018721	8.396484
2	2.977539	0.57373	0.160381	1.38584	1.47334	0.182373	0.018721	8.396484
3	2.977539	0.57373	0.160381	1.38584	1.47334	0.182373	0.018721	8.396484
4	2.977539	0.57373	0.160381	1.38584	1.47334	0.182373	0.018721	8.396484

Notebook 3: Monte Carlo Filtering (MCF): Design of interventions¶