From b6ff9a7f571c6705441a1934b49a2b9e7d832379 Mon Sep 17 00:00:00 2001 From: Doug Branton Date: Thu, 8 Aug 2024 14:15:09 -0700 Subject: [PATCH 1/4] add SDSS spectra notebook --- docs/requirements.txt | 4 +- docs/tutorials.rst | 1 + docs/tutorials/nested_spectra.ipynb | 185 ++++++++++++++++++++++++++++ 3 files changed, 189 insertions(+), 1 deletion(-) create mode 100644 docs/tutorials/nested_spectra.ipynb diff --git a/docs/requirements.txt b/docs/requirements.txt index 661f412..b75b037 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -7,4 +7,6 @@ nbsphinx sphinx sphinx-autoapi sphinx-copybutton -sphinx-book-theme \ No newline at end of file +sphinx-book-theme +astroquery +astropy \ No newline at end of file diff --git a/docs/tutorials.rst b/docs/tutorials.rst index 67f7b24..787a425 100644 --- a/docs/tutorials.rst +++ b/docs/tutorials.rst @@ -5,3 +5,4 @@ Tutorials Loading Data into Nested-Pandas Lower-level interfaces + Using Nested-Pandas with Astronomical Spectra diff --git a/docs/tutorials/nested_spectra.ipynb b/docs/tutorials/nested_spectra.ipynb new file mode 100644 index 0000000..b28ee87 --- /dev/null +++ b/docs/tutorials/nested_spectra.ipynb @@ -0,0 +1,185 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Using Nested-Pandas with Astronomical Spectra" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "In Astronomy, a spectrum is a measurement (or combination of measurements) of an object that shows the intensity of light emitted over a range of energies. In this tutorial, we'll walk through a simple example of working with spectra from the Sloan Digital Sky Survey (SDSS), in particular showing how it can be represented as a `NestedFrame`." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "First, we'll use `astroquery` and `astropy` to download a handful of spectra from SDSS:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from astroquery.sdss import SDSS\n", + "from astropy import coordinates as coords\n", + "import astropy.units as u\n", + "import nested_pandas as npd\n", + "\n", + "# Query SDSS for a set of objects with spectra\n", + "pos = coords.SkyCoord(\"0h8m10.63s +14d50m23.3s\", frame=\"icrs\")\n", + "xid = SDSS.query_region(pos, radius=3 * u.arcmin, spectro=True)\n", + "xid_ndf = npd.NestedFrame(xid.to_pandas())\n", + "xid_ndf" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This initial query returns a set of objects with spectra (as specified by the `spectro=True` flag). To actually retrieve the spectra, we can do the following:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Query SDSS for the corresponding spectra\n", + "sp = SDSS.get_spectra(matches=xid)\n", + "sp" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The result is a list of FITS formatted data, from this point there are a few ways that we could move towards a nested-pandas representation. The most straightforward is to build a \"flat\" spectra table from all the objects." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "\n", + "# Build a flat spectrum dataframe\n", + "\n", + "# Initialize some empty arrays to hold the flat data\n", + "wave = np.array([])\n", + "flux = np.array([])\n", + "err = np.array([])\n", + "index = np.array([])\n", + "# Loop over each spectrum, adding it's data to the arrays\n", + "for i, hdu in enumerate(sp):\n", + " wave = np.append(\n", + " wave, 10 ** hdu[\"COADD\"].data.loglam\n", + " ) # * u.angstrom # only one entry because we only search for one xid at a time. Could change that?\n", + " flux = np.append(flux, hdu[\"COADD\"].data.flux * 1e-17) # * u.erg/u.second/u.centimeter**2/u.angstrom\n", + " err = np.append(err, 1 / hdu[\"COADD\"].data.ivar * 1e-17) # * flux.unit\n", + "\n", + " # We'll need to set an index to keep track of which rows correspond\n", + " # to which object\n", + " index = np.append(index, i * np.ones(len(hdu[\"COADD\"].data.loglam)))\n", + "\n", + "# Build a NestedFrame from the arrays\n", + "flat_spec = npd.NestedFrame(dict(wave=wave, flux=flux, err=err), index=index.astype(np.int8))\n", + "flat_spec" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "From here, we can simply nest our flat table within our original query result:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "spec_ndf = xid_ndf.add_nested(flat_spec, \"coadd_spectrum\").set_index(\"objid\")\n", + "spec_ndf" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "And we can see that each object now has the \"coadd_spectrum\" nested column with the full spectrum available." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Look at one of the spectra\n", + "spec_ndf.iloc[1].coadd_spectrum" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We now have our spectra nested, and can proceed to do any filtering and analysis as normal within nested-pandas.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import matplotlib.pyplot as plt\n", + "\n", + "# Plot a spectrum\n", + "spec = spec_ndf.iloc[1].coadd_spectrum\n", + "\n", + "plt.plot(spec[\"wave\"], spec[\"flux\"])\n", + "plt.xlabel(\"Wavelength (Angstroms)\")\n", + "plt.ylabel(\"Flux (ergs/s)\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.11" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} From cf7ee62bdc637358013c133a25e8f0ffb1e967e8 Mon Sep 17 00:00:00 2001 From: Doug Branton Date: Thu, 8 Aug 2024 14:25:12 -0700 Subject: [PATCH 2/4] add matplotlib doc req --- docs/requirements.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/docs/requirements.txt b/docs/requirements.txt index b75b037..4b88d38 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -9,4 +9,5 @@ sphinx-autoapi sphinx-copybutton sphinx-book-theme astroquery -astropy \ No newline at end of file +astropy +matplotlib \ No newline at end of file From 2782bbc536f5d2098dfec05d72fa051b6399f5a0 Mon Sep 17 00:00:00 2001 From: Doug Branton Date: Thu, 8 Aug 2024 14:34:00 -0700 Subject: [PATCH 3/4] fix units --- docs/tutorials/nested_spectra.ipynb | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/docs/tutorials/nested_spectra.ipynb b/docs/tutorials/nested_spectra.ipynb index b28ee87..9dae9ce 100644 --- a/docs/tutorials/nested_spectra.ipynb +++ b/docs/tutorials/nested_spectra.ipynb @@ -81,9 +81,7 @@ "index = np.array([])\n", "# Loop over each spectrum, adding it's data to the arrays\n", "for i, hdu in enumerate(sp):\n", - " wave = np.append(\n", - " wave, 10 ** hdu[\"COADD\"].data.loglam\n", - " ) # * u.angstrom # only one entry because we only search for one xid at a time. Could change that?\n", + " wave = np.append(wave, 10 ** hdu[\"COADD\"].data.loglam) # * u.angstrom\n", " flux = np.append(flux, hdu[\"COADD\"].data.flux * 1e-17) # * u.erg/u.second/u.centimeter**2/u.angstrom\n", " err = np.append(err, 1 / hdu[\"COADD\"].data.ivar * 1e-17) # * flux.unit\n", "\n", @@ -149,8 +147,8 @@ "spec = spec_ndf.iloc[1].coadd_spectrum\n", "\n", "plt.plot(spec[\"wave\"], spec[\"flux\"])\n", - "plt.xlabel(\"Wavelength (Angstroms)\")\n", - "plt.ylabel(\"Flux (ergs/s)\")" + "plt.xlabel(\"Wavelength (Å)\")\n", + "plt.ylabel(r\"Flux ($ergs/s/cm^2/Å$)\")" ] }, { From d05af730c57c902865790a37cf1510843bab6fcf Mon Sep 17 00:00:00 2001 From: Doug Branton Date: Thu, 8 Aug 2024 15:14:04 -0700 Subject: [PATCH 4/4] grammar+flat description --- docs/tutorials/nested_spectra.ipynb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/tutorials/nested_spectra.ipynb b/docs/tutorials/nested_spectra.ipynb index 9dae9ce..d311655 100644 --- a/docs/tutorials/nested_spectra.ipynb +++ b/docs/tutorials/nested_spectra.ipynb @@ -61,7 +61,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "The result is a list of FITS formatted data, from this point there are a few ways that we could move towards a nested-pandas representation. The most straightforward is to build a \"flat\" spectra table from all the objects." + "The result is a list of FITS formatted data. From this point there are a few ways that we could move towards a nested-pandas representation. The most straightforward is to build a \"flat\" spectra table from all the objects, where we gather the information from each spectrum into a single combined table." ] }, {