-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathgaussfit.py
172 lines (138 loc) · 5.85 KB
/
gaussfit.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
# Routines for gauss fitting.
import numpy as np
import scipy
from astropy import modeling
import matplotlib.pyplot as plt
import math
import warnings
import scipy.stats
class Peak:
def __init__(self, x, y):
self.x = x
self.y = y
self.true_center = None
self.width = None # the sigma value of the fitted Gaussian
# self.anderson_test = None
# self.dagostino_test = None
# self.shapiro_test = None
def gauss(x, amp, cen, wid):
"""
Gauss fitting function. Using the precision (tau) to define the width of the
distribution, as mentioned here:
https://en.wikipedia.org/wiki/Normal_distribution.
"""
return (amp / ((2*math.pi)**0.5 * wid)) * scipy.exp(-(x-cen)**2 / (2*wid**2))
def is_data_gaussian(data, peak):
"""
This function determines if the data is Gaussian using the Shapiro-Wilk test,
D'Agostino's K^2 test, and the Anderson-Darling test, as suggested in
https://machinelearningmastery.com/a-gentle-introduction-to-normality-tests-in-python/.
"""
shapiro_test, dagostino_test, anderson_test = True, True, True
alpha = 0.05
# Shapiro-Wilk test
shapiro_stat, shapiro_p = scipy.stats.shapiro(data)
# print("shapiro p value:", shapiro_p)
if shapiro_p <= alpha:
# print("Data point not Gaussian, as determined by Shapiro-Wilk test.")
shapiro_test = False
# Dagostino's K^2 test
dagostino_stat, dagostino_p = scipy.stats.normaltest(data)
# print("dagostino p value:", dagostino_p)
if dagostino_p <= alpha:
# print("Data not Gaussian, as determined by Shapiro-Wilk test.")
dagostino_test = False
# Anderson-Darling test
anderson_result = scipy.stats.anderson(data)
reject_H0_list = []
# print("anderson statistic:", anderson_result.statistic)
if anderson_result.statistic < anderson_result.critical_values[2]:
anderson_test = False
# Encompassing the nature of the test in the peak value
peak.anderson = anderson_test
peak.dagostino = dagostino_test
peak.shapiro = shapiro_test
# All tests fail
if not dagostino_test and not shapiro_test and not anderson_test:
return "hard_fail"
# All tests succeed
elif dagostino_test and shapiro_test and anderson_test:
return "success"
# Some tests fail and some tests succeed
else:
return "soft_fail"
def fit_gaussian(fits_file, rng, peak, show=False):
"""
This function obtains the fitting parameters for each Gaussian profile.
This includes the mean, expected max, and the standard deviation. It then
uses those parameters on a "continuous" domain to obtain a nice looking
Gaussian from which we can obtain each peak.
"""
fits_image = fits_file.image_data
# At a particular x value, this obtains the y values in the image so that
# we can get a set of points for which we want to fit the Gaussian.
ystart = rng[0]
yend = rng[1]
# Creates the the range of yvalues for which we want to fit our Gaussian
yrange = np.arange(ystart, yend)
# Grabs the intensity at each y value and the given x value
intensity = fits_image[yrange, peak.x]
# Determing if the intensity array is Gaussian. If it is not, then there is
# no reason to do a Gaussian fit, so we will just not modify the peak
# object.
# TODO: Determine what statistics are actually valid and should be used for
# the various statistical tests. However, these might not even be useful.
# It will be left here.
# if is_data_gaussian(intensity, peak) != "success":
# peak.true_center = "failed"
# return
# safety check to ensure same number of my points
assert(len(intensity) == len(yrange))
# x, y points for which we want to fit our Gaussian
x = np.array(yrange)
y = np.array(intensity)
# We use a continuous domain as suggested here so that we have a
# smooth Gaussian:
# https://stackoverflow.com/questions/42026554/fitting-a-better-gaussian-to-data-points
x_continuous = np.linspace(yrange[0], yrange[-1], 100)
# These are parameters used to construct the Gaussian.
# We divide by sum(y) for the reason cited here:
# https://stackoverflow.com/questions/44398770/python-gaussian-curve-fitting-gives-straight-line-supplied-amplitude-of-y
peak_value = y.max()
mean = sum(x*y)/sum(y)
sigma = sum(y*(x-mean)**2)/sum(y)
# Flag that determines if the fit was successful
fit_successful = True
# To determine the p0 values, we used the information here:
# https://stackoverflow.com/questions/29599227/fitting-a-gaussian-getting-a-straight-line-python-2-7.
with warnings.catch_warnings():
warnings.simplefilter("ignore")
try:
popt, pcov = scipy.optimize.curve_fit(gauss, x, y,
p0=[peak_value, mean, sigma],
maxfev=10000)
mean_intensity = popt[0]
mean_y = popt[1]
peak.true_center = mean_y
peak.width = popt[2]
fit = gauss(x_continuous, *popt)
except:
peak.true_center = peak.y
peak.width = "failed"
fit_successful = False
if show:
fig = plt.figure()
ax = fig.add_subplot(111)
ax.scatter(yrange, intensity, color="red")
ax.plot(x_continuous, fit, label="fit")
ax.annotate(
"gaussian peak:(" + str(mean_y) + "," + str(mean_intensity) + ")",
xy=(mean_y, mean_intensity),
xytext=(mean_y+1, mean_intensity+1.5),
arrowprops=dict(facecolor="black", shrink=0.5)
)
plt.xlabel("ypixel")
plt.ylabel("intensity")
plt.title("Intensity v. ypixel for " + fits_file.get_file_name() + " at x=" + str(peak.x))
plt.show()
return fit_successful