-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathdark_time_analysis.py
More file actions
145 lines (106 loc) · 5.51 KB
/
dark_time_analysis.py
File metadata and controls
145 lines (106 loc) · 5.51 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
###############################################################################################
'''
add the data to..
'''
###############################################################################################
import numpy as np
from scipy import stats
from scipy.optimize import curve_fit
###############################################################################################
def gaussian_function(x, y0, A1, w, xc):
return y0 + (A1 / (w * np.sqrt(np.pi/2))) * np.exp(-2 * ((x - xc)/w)**2)
def gaussian_fit_manual(inv_td_list):
hist, bin_edges = np.histogram(inv_td_list, bins=50, density=True)
bin_centers = (bin_edges[:-1] + bin_edges[1:]) / 2
# Initial guess for parameters
xc_guess = np.mean(inv_td_list) # center
w_guess = np.std(inv_td_list) # width
A_guess = np.max(hist) # amplitude
try:
# Fit Gaussian
popt, pcov = curve_fit(gaussian_function, bin_centers, hist,
p0=[xc_guess, w_guess, A_guess],
bounds=([0, 0, 0], [np.inf, np.inf, np.inf]))
xc, w, A = popt
return xc, w
except Exception as e:
print(f"Warning: Gaussian manual fitting failed: {e}")
# Return mean and std as fallback
return np.mean(inv_td_list), np.std(inv_td_list)
def gaussian_fit_mle(inv_td_list):
"""
Gaussian fit using scipy.stats, more robust fitting
Uses Maximum Likelihood Estimation (MLE) to find μ and σ
Handles outliers better
"""
try:
# Fit normal distribution
mu, sigma = stats.norm.fit(inv_td_list)
return mu, sigma # mu = xc, sigma = w
except Exception as e:
print(f"Warning: Gaussian fitting using MLE failed: {e}")
return np.mean(inv_td_list), np.std(inv_td_list)
def calculate_calibration_values(unspecific_dark_list, protein_names):
# Extract unique days, conditions
days = list(set(data_entry["day"] for data_entry in unspecific_dark_list))
conditions = list(set(data_entry["condition"] for data_entry in unspecific_dark_list))
calibration_results = {}
for day in days:
calibration_results[day] = {}
for protein in protein_names:
# Collect all Td values for this condition/day/protein
td_values = []
for data_entry in unspecific_dark_list:
if (
# data_entry["condition"] == condition and
data_entry["day"] == day and
data_entry["protein"] == protein):
try:
unspecific_locs = data_entry['unspecific_locs']
td_data = unspecific_locs['Td']
# Filter out zeros and negative values
valid_td = td_data[td_data > 0]
# valid_td = td_data[(td_data > 0) & (td_data < 100)]
# Append to the list
td_values.extend(valid_td)
except (KeyError, ValueError) as e:
print(f"Warning: Could not extract Td for {protein} on {day}: {e}")
continue
if len(td_values) > 10:
inv_td_values = 1.0 / np.array(td_values)
# Filter 1/Td values
valid_inv_td = inv_td_values[(inv_td_values > 0) & np.isfinite(inv_td_values)]
q1 = np.percentile(valid_inv_td, 1)
q99 = np.percentile(valid_inv_td, 99)
filtered_inv_td = valid_inv_td[(valid_inv_td >= q1) & (valid_inv_td <= q99)]
# Fit using mle function
try:
_, w = gaussian_fit_mle(filtered_inv_td)
xc = np.median(valid_inv_td)
N_calib_array = valid_inv_td/xc
N_calib = float(np.median(N_calib_array))
calibration_results[day][protein] = {
'xc_calib': xc,
'w_calib': w,
'N_calib': N_calib,
# 'N_calib_array': N_calib_array,
'num_calib_Td': len(td_values)
}
print(f"{day}_{protein}: xc={xc:.4f}, w={w:.4f}, N_calib={N_calib:.4f}, n={len(td_values)}")
except Exception as e:
print(f"Warning: Fitting failed for {protein} on {day}: {e}")
calibration_results[day][protein] = {
'xc_calib': 0,
'w_calib': 0,
'N_calib': 0,
# 'N_calib_array': 0,
'num_calib_Td': len(td_values)
}
# else:
# print(f"No data for {protein} on {day} in {condition} (n={len(td_values)})")
# calibration_results[condition][day][protein] = {
# 'xc_calib': 0,
# 'w_calib': 0,
# 'num_calib_Td': len(td_values)
# }
return calibration_results