-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathget_data_iue.py
538 lines (462 loc) · 22.5 KB
/
get_data_iue.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
"""
.. module:: get_data_iue
:synopsis: Returns IUE spectral data as a JSON string.
.. moduleauthor:: Scott W. Fleming <fleming@stsci.edu>
"""
import collections
import math
from operator import itemgetter
from astropy.io import fits
from data_series import DataSeries
import numpy
from parse_obsid_iue import parse_obsid_iue
from scipy.interpolate import interp1d
#--------------------
def calculate_cut_wl(camera, order, aperture):
"""
Calculate the cut wavelength for an order following Solano's paper.
:param camera: The camera used.
:type camera: str
:param order: The order number.
:type order: float
:param aperture: The aperture used.
:type aperture: str
:returns: float -- The cut wavelength for this order.
"""
# NOTE: When I tried to use it I did not get cut wavelengths that fell
# within the order's range, either the equation is wrong or I am applying it
# wrong.
if camera == "LWP":
if order >= 77 and order <= 124:
if aperture == "LARGE":
cut_wl = -7.9697 + 233257.6280/order
else:
cut_wl = -7.7959 + 233382.6450/order
else:
# Non-overlap orders, just return a very large wavelength.
cut_wl = 9E9
elif camera == "LWR":
if order >= 76 and order <= 119:
if aperture == "LARGE":
cut_wl = -11.3459 + 233737.5903/order
else:
cut_wl = -11.2214 + 233876.9950/order
else:
cut_wl = 9E9
else:
# The only other camera option is SWP.
if order >= 73 and order <= 120:
if aperture == "LARGE":
cut_wl = 24.3952 + 132875.4838/order + 325840.9715/(order*order)
else:
cut_wl = 22.2095 + 133293.4862/order + 300351.2209/(order*order)
else:
cut_wl = 9E9
return cut_wl
#--------------------
#--------------------
def interpolate_subspec(wls, fls, prev_index, gap_ind, wl_step):
"""
Interpolates a subsection of a spectrum (from prev_index to gap_index) onto
a new grid.
:param wls: List of subsection wavelengths.
:type wls: list
:param fls: List of subsection fluxes.
:type fls: list
:param prev_index: Starting index of subsection.
:type prev_index: int
:param gap_ind: Ending index of subsection.
:type gap_ind: int
:param wl_step: Wavelength step size to use for interpolated spectrum.
:type wl_step: float
:returns: tuple -- (list of int. wavelengths, list of int. fluxes)
"""
# Get the subspectrum
sub_spec_wls = wls[prev_index:gap_ind+1]
sub_spec_fls = fls[prev_index:gap_ind+1]
# Interpolate onto the new grid, using linear interpolation.
interp_f = interp1d(sub_spec_wls, sub_spec_fls, kind="linear")
# Calculate the number of linear wavelength steps needed.
min_wl = min(sub_spec_wls)
max_wl = max(sub_spec_wls)
n_steps = math.ceil((max_wl - min_wl) / wl_step)
# Try a couple step sizes to get as close to the ideal size as possible.
new_wls1, step_size1 = numpy.linspace(min_wl, max_wl, n_steps,
retstep=True)
new_wls2, step_size2 = numpy.linspace(min_wl, max_wl, n_steps+1,
retstep=True)
new_wls3, step_size3 = numpy.linspace(min_wl, max_wl, n_steps-1,
retstep=True)
# Choose the linear step size closest to our desired step size.
diffs = [abs(x-wl_step) for x in [step_size1, step_size2, step_size3]]
if diffs[0] <= diffs[1] and diffs[0] <= diffs[2]:
new_wls = new_wls1
elif diffs[1] <= diffs[2] and diffs[1] <= diffs[0]:
new_wls = new_wls2
else:
new_wls = new_wls3
# Calculate the interpolated values and extend the spectrum with them.
return (list(new_wls), list(interp_f(new_wls)))
#--------------------
#--------------------
def order_combine(order_spectra, camera, showplot=False):
"""
Combine spectral orders following Solano's definition of how to cut each
order's spectrum to deal with overlap.
:param order_spectra: A list containing the wavelengths, fluxes, and order
numbers for each spectral order, stored as dicts. The wavelengths and
fluxes are numpy.ndarrays
:type order_spectra: list
:param camera: The camera used for this spectrum, either "SWP", "LWR", or
"LWP".
:type camera: str
:param showplot: Set to true to show a plot of the original vs. interpolated
spectrum (default = False, only set when debugging).
:type showplot: bool
:returns: list -- combined wavelength, flux pairs
"""
# These lists will hold the order-combined wavelengths and spectra.
all_wls = []
all_fls = []
for ord_ind in range(len(order_spectra)-1):
# Current order ("m").
wls1 = order_spectra[ord_ind]['wls']
fls1 = order_spectra[ord_ind]['fls']
# Next order ("m-1").
wls2 = order_spectra[ord_ind+1]['wls']
fls2 = order_spectra[ord_ind+1]['fls']
# Determine the cut wavelength following Equations 1 and 2 from Solano.
if camera in ["LWP", "LWR"]:
cut_wl = wls2[0] + 2. * (wls1[-1]-wls2[0])/3.
else:
cut_wl = wls2[0] + (wls1[-1]-wls2[0])/3.
if showplot:
import matplotlib.pyplot as pyp
pyp.plot([x for x in wls1 if x <= cut_wl],
[y for x, y in zip(wls1, fls1) if x <= cut_wl], 'bo')
pyp.plot([x for x in wls1 if x > cut_wl],
[y for x, y in zip(wls1, fls1) if x > cut_wl], 'go')
pyp.plot(wls2, fls2, 'ro')
pyp.plot([x for x in wls2 if x > cut_wl],
[y for x, y in zip(wls2, fls2) if x > cut_wl], 'ro')
pyp.plot([x for x in wls2 if x <= cut_wl],
[y for x, y in zip(wls2, fls2) if x <= cut_wl], 'yo')
pyp.axvline(cut_wl)
pyp.suptitle('Blue = Order "m" (keep), Green = Order "m" (cut), Red'
' = Order "m-1" (keep), Yellow = Order "m-1" (cut)')
pyp.show()
# Keep those wavelengths from the two orders that don't cross the cut.
keep1 = numpy.where(wls1 <= cut_wl)[0]
keep2 = numpy.where(wls2 > cut_wl)[0]
order_spectra[ord_ind]['wls'] = wls1[keep1]
order_spectra[ord_ind]['fls'] = fls1[keep1]
order_spectra[ord_ind+1]['wls'] = wls2[keep2]
order_spectra[ord_ind+1]['fls'] = fls2[keep2]
# Update the order that has already been trimmed on both sides.
all_wls.extend(order_spectra[ord_ind]['wls'])
all_fls.extend(order_spectra[ord_ind]['fls'])
# If this is the second-to-last order, update the last order as well,
# since that is only trimmed on one side.
if ord_ind == len(order_spectra)-2:
all_wls.extend(order_spectra[ord_ind+1]['wls'])
all_fls.extend(order_spectra[ord_ind+1]['fls'])
return zip(all_wls, all_fls)
#--------------------
#--------------------
def resample_spectrum(combined_spectrum, camera, showplot=False):
"""
Resamples the order-combined spectrum to an evenly-sampled wavelength scale.
:param combined_spectrum: The order-combined spectrum with unequal
wavelength sampling. It is a list of (wl,fl) tuples.
:type combined_spectrum: list
:param camera: The camera used for this spectrum, either "SWP", "LWR", or
"LWP".
:type camera: str
:param showplot: Set to true to show a plot of the original vs. interpolated
spectrum (default = False, only set when debugging).
:type showplot: bool
:returns: list -- A list of (wl,fl) tuples of the evenly-sampled spectrum.
"""
# Unpack the wavelengths and fluxes.
wls, fls = zip(*combined_spectrum)
# Generate the re-sampled x-axis, starting at the min. wavelength and ending
# at the max. wavelength. The final bin size should be 0.05 Ang. for SWP
# cameras or 0.10 Ang. for LWP and LWR cameras. We oversample by a factor
# of 10 before binning down.
oversample = 10.
if camera in ["LWP", "LWR"]:
wl_step = 0.1 / oversample
else:
wl_step = 0.05 / oversample
# Identify gaps in the data, interpolate those gaps separately so you don't
# interpolate over a gap. A gap is defined as anywhere with more than three
# missing points (based on the mean wavelength difference across the
# spectrum).
wl_diffs = numpy.diff(wls)
# These are the *end points* of a given subsection.
wl_gaps = numpy.where(numpy.digitize(wl_diffs, [3.*numpy.mean(wl_diffs)]) !=
0)[0]
# If there are no gaps at all, then define the gap to be the last element.
if wl_gaps.size == 0:
wl_gaps = numpy.asarray([len(wls)-1])
# Build the binned spectrum for each subspectrum (skipping over gaps).
prev_index = 0
binned_wls = []
binned_fls = []
# Only build up the interpolated spectrum if it is to be plotted.
if showplot:
interpolated_wls = []
interpolated_fls = []
for gap_ind in wl_gaps:
# Get interpolated spectrum for this subsection.
new_wls, new_fls = interpolate_subspec(wls, fls, prev_index, gap_ind,
wl_step)
# Push the interpolated values into the list via extension, but only if
# it is to be plotted.
if showplot:
interpolated_wls.extend(new_wls)
interpolated_fls.extend(new_fls)
# Now bin the spectrum down by a factor of 10 in resolution to our
# desired wavelength spacing.
# First need to pad to an integer of 10 by adding NaNs.
if len(new_wls) % 10 != 0:
n_pad = 10 - (len(new_wls) % 10)
new_wls.extend([numpy.nan]*n_pad)
new_fls.extend([numpy.nan]*n_pad)
binned_sub_wl = numpy.nanmean(numpy.asarray(new_wls).reshape(-1, 10),
axis=1)
binned_sub_fl = numpy.nanmean(numpy.asarray(new_fls).reshape(-1, 10),
axis=1)
binned_wls.extend(binned_sub_wl)
binned_fls.extend(binned_sub_fl)
# Update where the next sub_spectrum starts.
prev_index = gap_ind+1
# If the last gap did not cover to the end of the spectrum, do one more
# subsection.
if prev_index < len(wls):
# Get interpolated spectrum for the final subsection.
new_wls, new_fls = interpolate_subspec(wls, fls, prev_index, len(wls),
wl_step)
# Push the interpolated values into the list via extension, but only if
# it is to be plotted.
if showplot:
interpolated_wls.extend(new_wls)
interpolated_fls.extend(new_fls)
# Now bin the spectrum down by a factor of 10 in resolution to our
# desired wavelength spacing.
# First need to pad to an integer of 10 by adding NaNs.
if len(new_wls) % 10 != 0:
n_pad = 10 - (len(new_wls) % 10)
new_wls.extend([numpy.nan]*n_pad)
new_fls.extend([numpy.nan]*n_pad)
binned_sub_wl = numpy.nanmean(numpy.asarray(new_wls).reshape(-1, 10),
axis=1)
binned_sub_fl = numpy.nanmean(numpy.asarray(new_fls).reshape(-1, 10),
axis=1)
binned_wls.extend(binned_sub_wl)
binned_fls.extend(binned_sub_fl)
# Show the plotted spectra if requested.
if showplot:
import matplotlib.pyplot as pyp
pyp.plot(wls, fls, '-ko')
# Uncomment the lines below to overplot the (oversampled) interpolated
# spectrum.
if showplot:
pyp.plot(interpolated_wls, interpolated_fls, '-ro')
pyp.plot(binned_wls, binned_fls, '-go')
for gapmark_ind in wl_gaps:
pyp.axvline(wls[gapmark_ind])
pyp.suptitle("Red = Oversampled, Green = Resampled, Black = Original")
pyp.show()
return zip(binned_wls, binned_fls)
#--------------------
#--------------------
def get_data_iue(obsid, filt):
"""
Given an IUE observation ID, returns the spectral data. Note that, in some
cases, an observation ID has both a low and high dispersion spectrum
available. In that event, both the low and high dispersion spectra are
returned as data series, regardless of whether the user wanted one or both.
:param obsid: The IUE observation ID to retrieve the data from.
:type obsid: str
:param filt: The filter for this IUE observation ID. It must be either
"LOW_DISP" or "HIGH_DISP".
:type filt: str
:returns: JSON -- The spectral data for this observation ID.
Error codes:
From parse_obsid_iue:
0 = No error parsing observation ID.
1 = Observation ID does not begin with expected first three letters.
2 = No mxlo or mxhi file found on disk.
From this module:
3 = Could not open one or more FITS file for reading.
4 = Filter value is not an accepted value.
"""
# This error code will be used unless there's a problem reading any
# of the FITS files in the list, or the FILTER value is not understood.
errcode = 0
# This defines a data point for a DataSeries object as a namedtuple.
data_point = collections.namedtuple('DataPoint', ['x', 'y'])
# For IUE, this defines the x-axis and y-axis units as a string.
iue_xunit = "Angstroms (vacuum, heliocentric)"
iue_yunit = "ergs/cm^2/s/Angstrom"
# Parse the obsID string to determine the paths+files to read. Note:
# this step will assign some of the error codes returned to the top level.
if filt == ' ':
filt = "UNKNOWN"
if filt.upper() in ["LOW_DISP", "HIGH_DISP"] or filt == "UNKNOWN":
parsed_files_result = parse_obsid_iue(obsid, filt.upper())
errcode = parsed_files_result.errcode
else:
errcode = 4
# In the case of low dispersion spectra, there can be two apertures for
# a single obsID. In that case, we return a list of TWO DataSeries, one
# for each aperture. In other words, we treat the single obsID as if it
# were two different obsIDs in the case of a double-aperture.
all_data_series = []
# For each file, read in the contents and create a return JSON object.
if errcode == 0:
for sfile in parsed_files_result.specfiles:
# Figure out if this is an mxhi or mxlo spectrum.
if sfile[-7:] == "mxlo.gz":
is_lo = True
is_hi = False
else:
is_lo = False
is_hi = True
try:
with fits.open(sfile) as hdulist:
if is_lo:
# Get the dispersion type from the primary header.
dispersion = hdulist[0].header["disptype"]
# Get the aperture size(s) from the header.
apertures = hdulist[1].data["aperture"]
n_apertures = len(apertures)
# Number of spectral data points for each aperture size.
n_wls = [int(x) for x in hdulist[1].data["npoints"]]
# Initial wavelength value(s).
starting_wl = [float(x) for x in
hdulist[1].data["wavelength"]]
# Step size(s) for each subsequent wavelength.
delta_wl = [float(x) for x in hdulist[1].data["deltaw"]]
# Generate the full array of wavelength values, and get
# full array of flux values, for each aperture.
for aper in range(n_apertures):
wls = [starting_wl[aper] +
x*delta_wl[aper] for
x in range(n_wls[aper])]
fls = [float(x) for
x in hdulist[1].data["flux"][aper]]
# Make sure wavelengths and fluxes are sorted
# from smallest wavelength to largest.
sort_indexes = [x[0] for x in
sorted(enumerate(wls),
key=itemgetter(1))]
wls = [wls[x] for x in sort_indexes]
fls = [fls[x] for x in sort_indexes]
wlfls = [(x, y) for x, y in zip(wls, fls) if
y != 0.]
if wlfls != []:
datapoints = [
[data_point(x=float("{0:.8f}".format(x)),
y=float("{0:.8e}".format(y)))
for x, y in wlfls]]
# Create the return DataSeries object.
all_data_series.append(
DataSeries('iue', obsid,
datapoints,
['IUE_' + obsid + ' DISP:'
+ dispersion + ' APER:' +
apertures[aper]],
[iue_xunit], [iue_yunit],
errcode))
if is_hi:
# Get the aperture from the primary header.
aperture = hdulist[0].header["aperture"].strip()
# Get the dispersion type from the primary header.
dispersion = hdulist[0].header["disptype"].strip()
# Get the camera used (SWP, LWP, LWR).
camera = hdulist[0].header["camera"].strip()
# Get a list of spectral orders. Those that are beyond
# the range defined in Solano are not considered.
if camera == "LWP":
max_order = 124
elif camera == "LWR":
max_order = 119
else:
max_order = 120
orders = [int(x) for x in hdulist[1].data["order"] if x
<= max_order]
n_orders = len(orders)
# This lists will store each orders' spectral info.
order_spectra = []
# Loop over each order.
for order in range(n_orders):
# Number of fluxes for this order.
n_p = int(hdulist[1].data["npoints"][order])
# Starting pixel within the array of 768 elements.
s_pix = int(
hdulist[1].data["startpix"][order])
# Wavelength corresponding to this start pixel.
starting_wl = float(
hdulist[1].data["wavelength"][order])
# Step size for each subsequent wavelength.
delta_wl = float(
hdulist[1].data["deltaw"][order])
# Generate the full array of wavelength values.
wls = [starting_wl + x*delta_wl for x in
range(n_p)]
# Extract the fluxes that go along with these wls.
all_fluxes = hdulist[1].data["abs_cal"][order]
fls = [float(x) for x in
all_fluxes[(s_pix-1):(s_pix-1+n_p-1+1)]]
# Extract the quality flags that go along with
# these wls.
all_qfs = hdulist[1].data["quality"][order]
qfs = [int(x) for x in all_qfs[(s_pix-1):(s_pix-1+
n_p-1+1)]]
# Only keep good Quality Flags, if the order is all
# bad flags, don't add it.
keep = [i for i, x in enumerate(qfs) if (qfs[i] >
-16384)]
if keep != [] and fls != [0.]*len(fls):
wls = [wls[i] for i in keep]
fls = [fls[i] for i in keep]
# Create a dict that will store this order's
# info.
order_spec = {'order':orders[order],
'wls':numpy.asarray(wls),
'fls':numpy.asarray(fls)}
order_spectra.append(order_spec)
# Order-combine the spectra.
comb_spec = order_combine(order_spectra, camera, False)
# Resample onto an evenly-spaced wavelength scale.
comb_spec_reb = resample_spectrum(comb_spec, camera,
False)
# Create the return DataSeries object.
datapoints = [
[data_point(x=float("{0:.8f}".format(x)),
y=float("{0:.8e}".format(y)))
for x, y in comb_spec_reb]]
all_data_series.append(
DataSeries('iue', obsid,
datapoints,
['IUE_' + obsid + ' DISP:'
+ dispersion + ' APER:' +
aperture],
[iue_xunit], [iue_yunit],
errcode))
except IOError:
errcode = 3
all_data_series.append(
DataSeries('iue', obsid, [], [''], [''], [''], errcode))
else:
# This is where an error DataSeries object would be returned.
all_data_series.append(
DataSeries('iue', obsid, [], [], [],
[], errcode))
# Return the DataSeries object back to the calling module.
if len(all_data_series) == 1:
return all_data_series[0]
return all_data_series
#--------------------