import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from SALib.sample import saltelli
from SALib.analyze import sobol
from ring_plot_funct import ring_plot


output_names = ['AvgGlutPop', 'AvgGlutRad', 'AvgGlutRadGn', 'AvgMutPop', 'MaxMutPop', 'MinMutPop']  # Output names


pre_intervention = pd.read_csv('wolf.out', header=None, sep=' ')  # This is with the modified file with just the output in the 4th column (AvgMut)
pre_intervention.drop(6, axis=1, inplace=True)  # There is a space in the last column
pre_intervention.columns = output_names
pre_intervention.head()


post_intervention = pd.read_csv('wolf_int1.out', header=None, sep=' ')
post_intervention.drop(6, axis=1, inplace=True)  # There is a space in the last column
post_intervention.columns = output_names
post_intervention.head()


# Threshold for our behavioral output distribution
threshold = 70
# Create a figure with two subplots (density histogram and cumulative probability)
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 6))
# Plot the histograms of pre-intervention and post-intervention populations in the first subplot
pre_intervention['AvgMutPop'].hist(alpha=0.25, label='pre-intervention', density=True, bins=500, ax=ax1)
post_intervention['AvgMutPop'].hist(alpha=0.25, label='post-intervention', density=True, bins=500, ax=ax1)
# Plot threshold line for behavioral distribution
ax1.plot([threshold, threshold], [0, 0.006], '--k')
ax1.legend()
ax1.set_xlabel('Population')
ax1.set_ylabel('Density')
ax1.set_xlim(0, 200)  # Set X-axis limit up to 200 units

# Plot cumulative probabilities of pre-intervention and post-intervention populations in the second subplot
sorted_pre_intervention = pre_intervention['AvgMutPop'].sort_values()
sorted_post_intervention = post_intervention['AvgMutPop'].sort_values()
cumulative_prob_pre = (sorted_pre_intervention.rank() - 1) / len(sorted_pre_intervention)
cumulative_prob_post = (sorted_post_intervention.rank() - 1) / len(sorted_post_intervention)
ax2.plot(sorted_pre_intervention, cumulative_prob_pre, label='pre-intervention')
ax2.plot(sorted_post_intervention, cumulative_prob_post, label='post-intervention')
# Plot threshold line
ax2.axvline(x=threshold, color='r', linestyle='--')
ax2.set_xlim(0, 200)  # Set X-axis limit up to 200 units
ax2.legend()
ax2.set_xlabel('Population')
ax2.set_ylabel('Cumulative Probability')

Text(0, 0.5, 'Cumulative Probability')


p_under_threshold_pre_intervention = len(pre_intervention[pre_intervention['AvgMutPop'] > threshold]['AvgMutPop'])/len(pre_intervention)
p_under_threshold_post_intervention = len(post_intervention[post_intervention['AvgMutPop'] > threshold]['AvgMutPop'])/len(post_intervention)
print('Probability before intervention: {}'.format(np.round(p_under_threshold_pre_intervention, 2)))
print('Probability after intervention: {}'.format(np.round(p_under_threshold_post_intervention, 2)))
print('Probability reduction due to intervention: {}'.format(np.round(p_under_threshold_post_intervention - p_under_threshold_pre_intervention, 2)))

Probability before intervention: 0.16
Probability after intervention: 0.21
Probability reduction due to intervention: 0.05


# Sort and normalize the pre and post intervention data
pre_sorted = np.sort(pre_intervention['AvgMutPop'])/max(pre_intervention['AvgMutPop'])
post_sorted = np.sort(post_intervention['AvgMutPop'])/max(pre_intervention['AvgMutPop'])

# Calculate the cumulative probabilities
prob1 = np.arange(1, len(pre_sorted) + 1) / len(pre_sorted)
prob2 = np.arange(1, len(post_sorted) + 1) / len(post_sorted)

# Plot CDF for pre intervention
plt.plot(pre_sorted, prob1, label='pre-intervention')

# Plot CDF for post intervention
plt.plot(post_sorted, prob2, label='post-intervention')

# Add labels and legend
plt.xlabel('Normalized AvgMutPop')
plt.ylabel('Cumulative Probability')
plt.title('Normalized CDFs')
plt.legend()

# Show plot
plt.grid(True)
plt.show()

# Do the test Kolmogorov-Smirnov test on the normalized CDFs
from scipy.stats import ks_2samp
ks_test = ks_2samp(pre_intervention['AvgMutPop'], post_intervention['AvgMutPop'],
                   alternative='two-sided')
print('Maximum discrepancy between the distributions is {:4f}, p-value = {:4f} (H0: the distributions are the same)'.format(ks_test[0], ks_test[1]))

Maximum discrepancy between the distributions is 0.084778, p-value = 0.000000 (H0: the distributions are the same)


# Create a figure with two subplots (density histogram and cumulative probability)
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 6))
# Plot the histograms of pre-intervention and post-intervention populations in the first subplot
pre_intervention['AvgGlutRad'].hist(alpha=0.25, label='pre-intervention', density=True, bins=50, ax=ax1)
post_intervention['AvgGlutRad'].hist(alpha=0.25, label='post-intervention', density=True, bins=50, ax=ax1)
ax1.legend()
ax1.set_xlabel('AvgGlutRad')
ax1.set_ylabel('Density')
# Plot cumulative probabilities of pre-intervention and post-intervention populations in the second subplot
sorted_pre_intervention = pre_intervention['AvgGlutRad'].sort_values()
sorted_post_intervention = post_intervention['AvgGlutRad'].sort_values()
cumulative_prob_pre = (sorted_pre_intervention.rank() - 1) / len(sorted_pre_intervention)
cumulative_prob_post = (sorted_post_intervention.rank() - 1) / len(sorted_post_intervention)
ax2.plot(sorted_pre_intervention, cumulative_prob_pre, label='pre-intervention')
ax2.plot(sorted_post_intervention, cumulative_prob_post, label='post-intervention')
# Plot threshold line
ax2.axvline(x=threshold, color='r', linestyle='--')
ax2.legend()
ax2.set_xlabel('AvgGlutRad')
ax2.set_ylabel('Cumulative Probability')

Text(0, 0.5, 'Cumulative Probability')


# Sort and normalize the pre and post intervention data
pre_sorted = np.sort(pre_intervention['AvgGlutRad'])/max(pre_intervention['AvgGlutRad'])
post_sorted = np.sort(post_intervention['AvgGlutRad'])/max(pre_intervention['AvgGlutRad'])

# Calculate the cumulative probabilities
prob1 = np.arange(1, len(pre_sorted) + 1) / len(pre_sorted)
prob2 = np.arange(1, len(post_sorted) + 1) / len(post_sorted)

# Plot CDF for pre intervention
plt.plot(pre_sorted, prob1, label='pre-intervention')

# Plot CDF for post intervention
plt.plot(post_sorted, prob2, label='post-intervention')

# Add labels and legend
plt.xlabel('Normalized AvgGlutRad')
plt.ylabel('Cumulative Probability')
plt.title('Normalized CDFs')
plt.legend()

# Show plot
plt.grid(True)
plt.show()

# Do the test Kolmogorov-Smirnov test on the normalized CDFs
from scipy.stats import ks_2samp
ks_test = ks_2samp(pre_intervention['AvgGlutRad'], post_intervention['AvgGlutRad'],
                   alternative='two-sided')
print('Maximum discrepancy between the distributions is {:4f}, p-value = {:4f} (H0: the distributions are the same)'.format(ks_test[0], ks_test[1]))

Maximum discrepancy between the distributions is 0.148704, p-value = 0.000000 (H0: the distributions are the same)

	AvgGlutPop	AvgGlutRad	AvgGlutRadGn	AvgMutPop	MaxMutPop
0	9.0	11.286599	7.157074	44.0	170.0
1	9.0	9.756715	1.126875	41.0	120.0
2	9.0	10.265316	4.335474	35.0	97.0
3	9.0	8.084866	18.202648	58.0	127.0
4	9.0	7.882991	5.344454	33.0	171.0

	AvgGlutPop	AvgGlutRad	AvgGlutRadGn	AvgMutPop	MaxMutPop
0	9.0	16.075886	4.244997	88.0	134.0
1	9.0	15.686430	3.131929	24.0	136.0
2	9.0	17.756265	19.466610	223.0	225.0
3	9.0	16.885292	4.704905	5.0	129.0
4	9.0	15.312204	24.399949	3.0	109.0

Notebook 4: Analysis of MCF intervention¶