forked from dermen/cxid9114_gain
-
Notifications
You must be signed in to change notification settings - Fork 0
/
utils.py
291 lines (241 loc) · 9.25 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
import bisect
import os
import cPickle
import numpy as np
import h5py
from dials.array_family import flex
from dxtbx.imageset import MemReader, MemMasker
from dxtbx.datablock import DataBlockFactory
from dxtbx.imageset import ImageSet, ImageSetData
from cxid9114.spots import count_spots, spot_utils
try:
import psana
has_psana = True
except ImportError:
has_psana=False
class FormatInMemory:
"""
this class is a special image type
necessary to create cctbx imagesets and
datablocks from numpy array images
and masks.
"""
def __init__(self, image, mask=None):
self.image = image
if image.dtype != np.float64:
self.image = self.image.astype(np.float64)
if mask is None:
self.mask = np.ones_like( self.image).astype(np.bool)
else:
assert (mask.shape==image.shape)
assert( self.mask.dtype == bool)
self.mask = mask
def get_raw_data(self):
return flex.double(self.image)
def get_mask(self, goniometer=None):
return flex.bool(self.mask)
def datablock_from_numpyarrays(image, detector, beam, mask=None):
"""
So that one can do e.g.
>> dblock = datablock_from_numpyarrays( image, detector, beam)
>> refl = flex.reflection_table.from_observations(dblock, spot_finder_params)
without having to utilize the harddisk
:param image: numpy array image
:param mask: numpy mask
:param detector: dxtbx detector model
:param beam: dxtbx beam model
:return: datablock for the image
"""
I = FormatInMemory(image=image, mask=mask)
reader = MemReader([I])
masker = MemMasker([I])
iset_Data = ImageSetData(reader, masker)
iset = ImageSet(iset_Data)
iset.set_beam(beam)
iset.set_detector(detector)
dblock = DataBlockFactory.from_imageset([iset])[0]
return dblock
def open_flex(filename):
"""unpickle the flex file which requires flex import"""
with open(filename, "r") as f:
data = cPickle.load(f)
return data
def save_flex(data, filename):
"""save pickle"""
with open(filename, "w") as f:
cPickle.dump(data, f)
def psana_mask_to_aaron64_mask(mask_32panels, pickle_name, force=False):
"""
FormatXTCCspad divides CSPAD ASICS (panels) into 2 due to the gap that is not an
integer multiple of 109.92 microns on each CSPAD ASIC,
This does the same for masks that are made in the psana format.
:param mask_32panels: psana-style mask (False means "mask this pixel")
:param pickle_name: string , where to store the flex mask to use for FormatXTCCspad
:param force: bool, whether to force an overwrite of pickle_name
:return: None
"""
flex_mask = []
for panelmask in mask_32panels:
flex_mask += [panelmask[:, :194], panelmask[:, 194:]]
# or this one liner for interactive ease
# flex_mask = [ l for sl in [(m[:, :194], m[:, 194:]) for m in mask_32panels] for l in sl]
# this just maps to flex.bool but also ensures arrays are contiguous as
# that is a requirement for flex (I think)
flex_mask = tuple(map(lambda x: flex.bool(np.ascontiguousarray(x)), flex_mask))
if not force:
if os.path.exists(pickle_name):
raise OSError("The file %s exists, use force=True to overwrite" % pickle_name)
with open(pickle_name, "w") as out:
cPickle.dump(flex_mask, out)
def smooth(x, beta=10.0, window_size=11):
"""
https://glowingpython.blogspot.com/2012/02/convolution-with-numpy.html
Apply a Kaiser window smoothing convolution.
Parameters
----------
x : ndarray, float
The array to smooth.
Optional Parameters
-------------------
beta : float
Parameter controlling the strength of the smoothing -- bigger beta
results in a smoother function.
window_size : int
The size of the Kaiser window to apply, i.e. the number of neighboring
points used in the smoothing.
Returns
-------
smoothed : ndarray, float
A smoothed version of `x`.
"""
# make sure the window size is odd
if window_size % 2 == 0:
window_size += 1
# apply the smoothing function
s = np.r_[x[window_size - 1:0:-1], x, x[-1:-window_size:-1]]
w = np.kaiser(window_size, beta)
y = np.convolve(w / w.sum(), s, mode='valid')
# remove the extra array length convolve adds
b = int((window_size - 1) / 2)
smoothed = y[b:len(y) - b]
return smoothed
def is_outlier(points, thresh=3.5):
"""
http://stackoverflow.com/a/22357811/2077270
Returns a boolean array with True if points are outliers and False
otherwise.
Parameters:
-----------
points : An numobservations by numdimensions array of observations
thresh : The modified z-score to use as a threshold. Observations with
a modified z-score (based on the median absolute deviation) greater
than this value will be classified as outliers.
Returns:
--------
mask : A numobservations-length boolean array.
References:
----------
Boris Iglewicz and David Hoaglin (1993), "Volume 16: How to Detect and
Handle Outliers", The ASQC Basic References in Quality Control:
Statistical Techniques, Edward F. Mykytka, Ph.D., Editor.
"""
if len(points.shape) == 1:
points = points[:, None]
median = np.median(points, axis=0)
diff = np.sum((points - median) ** 2, axis=-1)
diff = np.sqrt(diff)
med_abs_deviation = np.median(diff)
modified_z_score = 0.6745 * diff / med_abs_deviation
return modified_z_score > thresh
def write_cxi_peaks(h5, peaks_path, pkX, pkY, pkI):
"""
adds peaks to an hdf5 file in the CXI format
:param h5: h5 file handle
:param peaks_path: peaks group name
:param pkX: X-coordinate of peaks (list of lists)
:param pkY: Y-coordinate of peaks (list of lists like pkX)
:param pkI: Intensity of peaks (list of float
"""
import numpy as np
npeaks = np.array([len(x) for x in pkX])
max_n = max(npeaks)
Nimg = len(pkX)
data_x = np.zeros((Nimg, max_n), dtype=np.float32)
data_I = np.zeros_like(data_x)
data_y = np.zeros_like(data_x)
for i in xrange(Nimg):
n = npeaks[i]
data_x[i, :n] = pkX[i]
data_y[i, :n] = pkY[i]
data_I[i, :n] = pkI[i]
peaks = h5.create_group(peaks_path)
peaks.create_dataset('nPeaks', data=npeaks)
peaks.create_dataset('peakXPosRaw', data=data_x)
peaks.create_dataset('peakYPosRaw', data=data_y)
peaks.create_dataset('peakTotalIntensity', data=data_I)
def make_event_time(sec, nanosec, fid):
if not has_psana:
print("No psana")
return
time = int((sec<<32)|nanosec)
et = psana.EventTime(time, fid)
return time, et
class GetSpectrum:
"""
The spectrum data is processed separately and stored in
hdf5 files where there is one spectrum per event time
Then, when we process the CSPAD images we keep a copy of each
CSPAD images event time, and this class is designed to retrieve
the spectrum from the hdf5 file for a given event time
"""
def __init__(self,
spec_file="/home/dermen/cxid9114/spec_trace/traces.62.h5",
spec_file_times="event_time",
spec_file_data="line_mn",
spec_is_1d = True):
"""
:param spec_file: path to the spectrum file which contans data and event times
:param spec_file_times: dataset path of the times in the hdf5 file
:param spec_file_data: dataset path of the spectrum data in the hdf5 file
:param spec_is_1d: is the spectrum data 1d? If not process it as if it were 2d
"""
self._f = h5py.File(spec_file, 'r')
times = self._f[spec_file_times][()]
self.spec_traces = self._f[spec_file_data]
self.order = times.argsort()
self.spec_sorted_times = times[self.order]
self.spec_is_1d = spec_is_1d
def get_spec(self, time):
"""
Retrieve the spectrum!
:param time: psana event time combo of sec and nanosec
:return: the spectrum data if found, else None
"""
if time not in self.spec_sorted_times:
print "No spectrum for given time %d" % time
return None
sorted_pos = bisect.bisect_left(self.spec_sorted_times, time)
trace_idx = self.order[sorted_pos]
data = self.spec_traces[trace_idx]
if self.spec_is_1d:
return data
#else:
# return self.project_fee_img(data)
def images_and_refls_to_simview(prefix, imgs, refls):
refls_concat = spot_utils.combine_refls(refls)
refl_info = count_spots.group_refl_by_shotID(refls_concat)
refl_shotIds = refl_info.keys()
Nrefl = len( refl_shotIds)
Nimg = len( imgs)
assert(Nimg==Nrefl)
assert( all([ i in range(Nrefl) for i in refl_shotIds]))
with open("%s_strong.pkl" % prefix, "w") as strong_f:
cPickle.dump(refls_concat, strong_f)
print "Wrote %s" % strong_f.name
with h5py.File( "%s.h5" % prefix, "w") as img_f:
for i_img in range(Nimg):
if imgs[i].dtype != np.float32:
imgs[i] = imgs[i].astype(np.float32)
img_f.create_dataset("simulated_d9114_images",
data=imgs)
print "Wrote %s" % img_f.filename