From 95db9c310caaa751eeb938634246ebe33ecd9dfb Mon Sep 17 00:00:00 2001 From: Sandro Dias Pinto Vitenti Date: Fri, 18 Oct 2024 16:24:03 -0300 Subject: [PATCH] Fixing LSST SRD binning (#463) * Adding correct source binning. * Updated tutorials. * Adding testing for new Y10 code. * Better adjusting plots size. * Lazy loading, expanded interface to access distributions. * Added test for y10 SRD bins. * Expanding tutorial to include source bins discussion. * Improving table formatting. --- .gitignore | 2 + firecrown/generators/inferred_galaxy_zdist.py | 496 ++++++++++++++---- .../generators/test_inferred_galaxy_zdist.py | 176 ++++--- tutorial/inferred_zdist_generators.qmd | 198 +++++-- tutorial/inferred_zdist_serialization.qmd | 12 +- 5 files changed, 660 insertions(+), 224 deletions(-) diff --git a/.gitignore b/.gitignore index 5a409e10..4777bd7b 100644 --- a/.gitignore +++ b/.gitignore @@ -160,3 +160,5 @@ dmypy.json out/ tutorial/theoretical_predictions_tutorial_files tutorial/theoretical_predictions_tutorial.html + +/.quarto/ diff --git a/firecrown/generators/inferred_galaxy_zdist.py b/firecrown/generators/inferred_galaxy_zdist.py index 0e0fe06f..ec6aaca6 100644 --- a/firecrown/generators/inferred_galaxy_zdist.py +++ b/firecrown/generators/inferred_galaxy_zdist.py @@ -1,7 +1,8 @@ """Generation of inferred galaxy redshift distributions.""" -from typing import TypedDict, Annotated, Any +from typing import TypedDict, Annotated, Any, Unpack from itertools import pairwise +from functools import cache from pydantic import BaseModel, ConfigDict, Field, field_serializer, BeforeValidator @@ -25,17 +26,30 @@ BinsType = TypedDict("BinsType", {"edges": npt.NDArray, "sigma_z": float}) -Y1_ALPHA = 0.94 -Y1_BETA = 2.0 -Y1_Z0 = 0.26 -Y1_LENS_BINS: BinsType = {"edges": np.linspace(0.2, 1.2, 5 + 1), "sigma_z": 0.03} -Y1_SOURCE_BINS: BinsType = {"edges": np.linspace(0.2, 1.2, 5 + 1), "sigma_z": 0.05} +Y1_LENS_ALPHA = 0.94 +Y1_LENS_BETA = 2.0 +Y1_LENS_Z0 = 0.26 -Y10_ALPHA = 0.90 -Y10_BETA = 2.0 -Y10_Z0 = 0.28 -Y10_LENS_BINS: BinsType = {"edges": np.linspace(0.2, 1.2, 10 + 1), "sigma_z": 0.03} -Y10_SOURCE_BINS: BinsType = {"edges": np.linspace(0.2, 1.2, 10 + 1), "sigma_z": 0.05} +Y1_SOURCE_ALPHA = 0.78 +Y1_SOURCE_BETA = 2.0 +Y1_SOURCE_Z0 = 0.13 + +Y10_LENS_ALPHA = 0.90 +Y10_LENS_BETA = 2.0 +Y10_LENS_Z0 = 0.28 + +Y10_SOURCE_ALPHA = 0.68 +Y10_SOURCE_BETA = 2.0 +Y10_SOURCE_Z0 = 0.11 + + +class ZDistLSSTSRDOpt(TypedDict, total=False): + """Optional parameters for the LSST Inferred galaxy redshift distribution.""" + + max_z: float + use_autoknot: bool + autoknots_reltol: float + autoknots_abstol: float class ZDistLSSTSRD: @@ -50,48 +64,116 @@ class ZDistLSSTSRD: flexibility when desired. """ - def __init__(self, alpha: float, beta: float, z0: float) -> None: + def __init__( + self, + alpha: float, + beta: float, + z0: float, + max_z: float = 5.0, + use_autoknot: bool = False, + autoknots_reltol: float = 1.0e-4, + autoknots_abstol: float = 1.0e-15, + ) -> None: """Initialize the LSST Inferred galaxy redshift distribution. + The true redshift distribution is integrated up to `max_z` to generate the + photo-z distribution. + :param alpha: The alpha parameter of the distribution. :param beta: The beta parameter of the distribution. :param z0: The z0 parameter of the distribution. + :param max_z: The maximum true redshift to consider. + :param use_autoknot: Whether to use the AutoKnots algorithm of NumCosmo + :param autoknots_reltol: The relative tolerance for the AutoKnots algorithm + :param autoknots_abstol: The absolute tolerance for the AutoKnots algorithm """ self.alpha = alpha self.beta = beta self.z0 = z0 + self.max_z = max_z + self.use_autoknot = use_autoknot + self.autoknots_reltol = autoknots_reltol + self.autoknots_abstol = autoknots_abstol + + @classmethod + def year_1_lens( + cls, + alpha: float = Y1_LENS_ALPHA, + beta: float = Y1_LENS_BETA, + z0: float = Y1_LENS_Z0, + **kwargs: Unpack[ZDistLSSTSRDOpt], + ) -> "ZDistLSSTSRD": + """Create a ZDistLSSTSRD object for the first year of LSST. + + It uses the default values of the alpha, beta and z0 parameters from + the LSST SRD Year 1 for the lens distribution. + + :param alpha: The alpha parameter of the distribution + :param beta: The beta parameter of the distribution + :param z0: The z0 parameter of the distribution + :return: A ZDistLSSTSRD object. + """ + return cls(alpha=alpha, beta=beta, z0=z0, **kwargs) @classmethod - def year_1( - cls, alpha: float = Y1_ALPHA, beta: float = Y1_BETA, z0: float = Y1_Z0 + def year_1_source( + cls, + alpha: float = Y1_SOURCE_ALPHA, + beta: float = Y1_SOURCE_BETA, + z0: float = Y1_SOURCE_Z0, + **kwargs: Unpack[ZDistLSSTSRDOpt], ) -> "ZDistLSSTSRD": """Create a ZDistLSSTSRD object for the first year of LSST. It uses the default values of the alpha, beta and z0 parameters from - the LSST SRD Year 1. + the LSST SRD Year 1 for the source distribution. :param alpha: The alpha parameter of the distribution :param beta: The beta parameter of the distribution :param z0: The z0 parameter of the distribution :return: A ZDistLSSTSRD object. """ - return cls(alpha=alpha, beta=beta, z0=z0) + return cls(alpha=alpha, beta=beta, z0=z0, **kwargs) @classmethod - def year_10( - cls, alpha: float = Y10_ALPHA, beta: float = Y10_BETA, z0: float = Y10_Z0 + def year_10_lens( + cls, + alpha: float = Y10_LENS_ALPHA, + beta: float = Y10_LENS_BETA, + z0: float = Y10_LENS_Z0, + **kwargs: Unpack[ZDistLSSTSRDOpt], ) -> "ZDistLSSTSRD": """Create a ZDistLSSTSRD object for the tenth year of LSST. It uses the default values of the alpha, beta and z0 parameters from - the LSST SRD Year 10. + the LSST SRD Year 10 for the lens distribution. :param alpha: The alpha parameter of the distribution :param beta: The beta parameter of the distribution :param z0: The z0 parameter of the distribution :return: A ZDistLSSTSRD object. """ - return cls(alpha=alpha, beta=beta, z0=z0) + return cls(alpha=alpha, beta=beta, z0=z0, **kwargs) + + @classmethod + def year_10_source( + cls, + alpha: float = Y10_SOURCE_ALPHA, + beta: float = Y10_SOURCE_BETA, + z0: float = Y10_SOURCE_Z0, + **kwargs: Unpack[ZDistLSSTSRDOpt], + ) -> "ZDistLSSTSRD": + """Create a ZDistLSSTSRD object for the tenth year of LSST. + + It uses the default values of the alpha, beta and z0 parameters from + the LSST SRD Year 10 for the source distribution. + + :param alpha: The alpha parameter of the distribution + :param beta: The beta parameter of the distribution + :param z0: The z0 parameter of the distribution + :return: A ZDistLSSTSRD object. + """ + return cls(alpha=alpha, beta=beta, z0=z0, **kwargs) def distribution(self, z: npt.NDArray) -> npt.NDArray: """Generate the inferred galaxy redshift distribution. @@ -105,6 +187,26 @@ def distribution(self, z: npt.NDArray) -> npt.NDArray: norma * (z / self.z0) ** self.beta * np.exp(-((z / self.z0) ** self.alpha)) ) + def distribution_zp(self, zp: float, sigma_z: float) -> float: + """Generate the Gaussian convolution of the distribution. + + :param sigma_z: The resolution parameter + :param zp: The photometric redshift + :return: The Gaussian distribution + """ + sqrt_2 = np.sqrt(2.0) + sqrt_2pi = np.sqrt(2.0 * np.pi) + + def integrand(z): + lsigma_z = sigma_z * (1.0 + z) + return ( + self.distribution(z) + * np.exp(-((zp - z) ** 2) / (2.0 * lsigma_z**2)) + / (sqrt_2pi * lsigma_z * 0.5 * (erf(z / (sqrt_2 * lsigma_z)) + 1.0)) + ) + + return quad(integrand, 0.0, self.max_z)[0] + def _integrated_gaussian_scalar( self, zpl: float, zpu: float, sigma_z: float, z: float ) -> float: @@ -152,6 +254,104 @@ def _integrated_gaussian( return result + def compute_distribution(self, sigma_z: float) -> Ncm.StatsDist1d: + """Generate the inferred galaxy redshift distribution. + + Computes the distribution by convolving the true distribution with a + Gaussian. The convolution is computed using the AutoKnots algorithm of + NumCosmo. + + :param sigma_z: The resolution parameter + :return: The inferred galaxy redshift distribution + """ + + def m2lndist(z, _): + return -2.0 * np.log( + self.distribution_zp(z, sigma_z) + self.autoknots_abstol + ) + + s = Ncm.SplineCubicNotaknot() + s.set_func1( + Ncm.SplineFuncType.FUNCTION_SPLINE, + m2lndist, + None, + 0.0, + self.max_z, + 0, + self.autoknots_reltol, + ) + + stats = Ncm.StatsDist1dSpline.new(s) + stats.props.abstol = self.autoknots_abstol + stats.set_compute_cdf(True) + stats.prepare() + + return stats + + def compute_true_distribution(self) -> Ncm.StatsDist1d: + """Generate the inferred galaxy redshift distribution. + + Computes the distribution without the convolution with the Gaussian. + That is, the true redshift distribution. + + :return: The inferred galaxy redshift distribution + """ + + def m2lndist(z, _): + return -2.0 * np.log(self.distribution(z) + self.autoknots_abstol) + + s = Ncm.SplineCubicNotaknot() + s.set_func1( + Ncm.SplineFuncType.FUNCTION_SPLINE, + m2lndist, + None, + 0.0, + self.max_z, + 0, + self.autoknots_reltol, + ) + + stats = Ncm.StatsDist1dSpline.new(s) + stats.props.abstol = self.autoknots_abstol + stats.set_compute_cdf(True) + stats.prepare() + + return stats + + def equal_area_bins( + self, + n_bins: int, + sigma_z: float, + last_z: float, + use_true_distribution: bool = False, + ) -> npt.NDArray: + """Generate equal area bins for the distribution. + + In order to compute the bin edges, the convolution of the distribution + with a Gaussian is computed. The bin edges are then computed by + inverting the cumulative distribution function of the convolution. + + If the true distribution is used, the convolution is not computed. + This provides a faster way to compute the bin edges. + + :param n_bins: The number of bins + :param sigma_z: The resolution parameter + :param last_z: The last redshift to consider + :param use_true_distribution: Whether to use the true distribution + + :return: The bin edges + """ + if use_true_distribution: + stats = self.compute_true_distribution() + else: + stats = self.compute_distribution(sigma_z) + + total_area = stats.eval_pdf(last_z) + + return np.array( + [stats.eval_inv_pdf(i * total_area / n_bins) for i in range(n_bins + 1)] + ) + def binned_distribution( self, *, @@ -161,9 +361,6 @@ def binned_distribution( z: npt.NDArray, name: str, measurements: set[Measurement], - use_autoknot: bool = False, - autoknots_reltol: float = 1.0e-4, - autoknots_abstol: float = 1.0e-15, ) -> InferredGalaxyZDist: """Generate the inferred galaxy redshift distribution in bins. @@ -173,9 +370,7 @@ def binned_distribution( :param z: The redshifts at which to evaluate the distribution :param name: The name of the distribution :param measurements: The set of measurements of the distribution - :param use_autoknot: Whether to use the NotAKnot algorithm of NumCosmo - :param autoknots_reltol: The relative tolerance for the NotAKnot algorithm - :param autoknots_abstol: The absolute tolerance for the NotAKnot algorithm + :return: The inferred galaxy redshift distribution """ @@ -188,12 +383,12 @@ def _P(z, _): return ( self.distribution(z) * self._integrated_gaussian_scalar(zpl, zpu, sigma_z, z) - + autoknots_abstol + + self.autoknots_abstol ) norma = quad(_P, z[0], z[-1], args=None)[0] - if not use_autoknot: + if not self.use_autoknot: z_knots = z dndz = ( self._integrated_gaussian(zpl, zpu, sigma_z, z) @@ -209,7 +404,7 @@ def _P(z, _): z[0], z[-1], 0, - autoknots_reltol, + self.autoknots_reltol, ) z_knots = np.array(s.peek_xv().dup_array()) # pylint: disable-msg=no-member dndz = ( @@ -297,9 +492,6 @@ class ZDistLSSTSRDBin(BaseModel): z: Annotated[Grid1D, Field(union_mode="left_to_right")] bin_name: str measurements: Annotated[set[Measurement], BeforeValidator(make_measurements)] - use_autoknot: bool = False - autoknots_reltol: float = 1.0e-4 - autoknots_abstol: float = 1.0e-15 @field_serializer("measurements") @classmethod @@ -316,9 +508,6 @@ def generate(self, zdist: ZDistLSSTSRD) -> InferredGalaxyZDist: z=self.z.generate(), name=self.bin_name, measurements=self.measurements, - use_autoknot=self.use_autoknot, - autoknots_reltol=self.autoknots_reltol, - autoknots_abstol=self.autoknots_abstol, ) @@ -331,85 +520,170 @@ class ZDistLSSTSRDBinCollection(BaseModel): beta: float z0: float bins: list[ZDistLSSTSRDBin] + max_z: float = 5.0 + use_autoknot: bool = True + autoknots_reltol: float = 1.0e-4 + autoknots_abstol: float = 1.0e-15 def generate(self) -> list[InferredGalaxyZDist]: """Generate the inferred galaxy redshift distributions in bins.""" - zdist = ZDistLSSTSRD(alpha=self.alpha, beta=self.beta, z0=self.z0) - return [bin.generate(zdist) for bin in self.bins] - - -LSST_Y1_LENS_BIN_COLLECTION = ZDistLSSTSRDBinCollection( - alpha=Y1_ALPHA, - beta=Y1_BETA, - z0=Y1_Z0, - bins=[ - ZDistLSSTSRDBin( - zpl=zpl, - zpu=zpu, - sigma_z=Y1_LENS_BINS["sigma_z"], - z=RawGrid1D(values=[0.0, 3.0]), - bin_name=f"lens_{zpl:.1f}_{zpu:.1f}_y1", - measurements={Galaxies.COUNTS}, - use_autoknot=True, - autoknots_reltol=1.0e-5, - ) - for zpl, zpu in pairwise(Y1_LENS_BINS["edges"]) - ], -) - -LSST_Y1_SOURCE_BIN_COLLECTION = ZDistLSSTSRDBinCollection( - alpha=Y1_ALPHA, - beta=Y1_BETA, - z0=Y1_Z0, - bins=[ - ZDistLSSTSRDBin( - zpl=zpl, - zpu=zpu, - sigma_z=Y1_SOURCE_BINS["sigma_z"], - z=RawGrid1D(values=[0.0, 3.0]), - bin_name=f"source_{zpl:.1f}_{zpu:.1f}_y1", - measurements={Galaxies.SHEAR_E}, - use_autoknot=True, - autoknots_reltol=1.0e-5, + zdist = ZDistLSSTSRD( + alpha=self.alpha, + beta=self.beta, + z0=self.z0, + use_autoknot=self.use_autoknot, + autoknots_reltol=self.autoknots_reltol, + autoknots_abstol=self.autoknots_abstol, ) - for zpl, zpu in pairwise(Y1_SOURCE_BINS["edges"]) - ], -) + return [bin.generate(zdist) for bin in self.bins] -LSST_Y10_LENS_BIN_COLLECTION = ZDistLSSTSRDBinCollection( - alpha=Y10_ALPHA, - beta=Y10_BETA, - z0=Y10_Z0, - bins=[ - ZDistLSSTSRDBin( - zpl=zpl, - zpu=zpu, - sigma_z=Y10_LENS_BINS["sigma_z"], - z=RawGrid1D(values=[0.0, 3.0]), - bin_name=f"lens_{zpl:.1f}_{zpu:.1f}_y10", - measurements={Galaxies.COUNTS}, - use_autoknot=True, - autoknots_reltol=1.0e-5, - ) - for zpl, zpu in pairwise(Y10_LENS_BINS["edges"]) - ], -) -LSST_Y10_SOURCE_BIN_COLLECTION = ZDistLSSTSRDBinCollection( - alpha=Y10_ALPHA, - beta=Y10_BETA, - z0=Y10_Z0, - bins=[ - ZDistLSSTSRDBin( - zpl=zpl, - zpu=zpu, - sigma_z=Y10_SOURCE_BINS["sigma_z"], - z=RawGrid1D(values=[0.0, 3.0]), - bin_name=f"source_{zpl:.1f}_{zpu:.1f}_y10", - measurements={Galaxies.SHEAR_E}, - use_autoknot=True, - autoknots_reltol=1.0e-5, - ) - for zpl, zpu in pairwise(Y10_SOURCE_BINS["edges"]) - ], -) +@cache +def get_y1_lens_bins() -> BinsType: + """Get the Year 1 lens bins.""" + return {"edges": np.linspace(0.2, 1.2, 5 + 1), "sigma_z": 0.03} + + +@cache +def get_y1_source_bins() -> BinsType: + """Get the Year 1 source bins.""" + return { + "edges": ZDistLSSTSRD.year_1_source().equal_area_bins(5, 0.05, 3.5), + "sigma_z": 0.05, + } + + +@cache +def get_y10_lens_bins() -> BinsType: + """Get the Year 10 lens bins.""" + return {"edges": np.linspace(0.2, 1.2, 10 + 1), "sigma_z": 0.03} + + +@cache +def get_y10_source_bins() -> BinsType: + """Get the Year 10 source bins.""" + return { + "edges": ZDistLSSTSRD.year_10_source().equal_area_bins(5, 0.05, 3.5), + "sigma_z": 0.05, + } + + +@cache +def get_lsst_y1_lens_bin_collection() -> ZDistLSSTSRDBinCollection: + """Get the LSST Year 1 lens bin collection.""" + y1_lens_bins = get_y1_lens_bins() + return ZDistLSSTSRDBinCollection( + alpha=Y1_LENS_ALPHA, + beta=Y1_LENS_BETA, + z0=Y1_LENS_Z0, + bins=[ + ZDistLSSTSRDBin( + zpl=zpl, + zpu=zpu, + sigma_z=y1_lens_bins["sigma_z"], + z=RawGrid1D(values=[0.0, 3.5]), + bin_name=f"lens_{zpl:.1f}_{zpu:.1f}_y1", + measurements={Galaxies.COUNTS}, + ) + for zpl, zpu in pairwise(y1_lens_bins["edges"]) + ], + ) + + +@cache +def get_lsst_y1_source_bin_collection() -> ZDistLSSTSRDBinCollection: + """Get the LSST Year 1 source bin collection.""" + y1_source_bins = get_y1_source_bins() + return ZDistLSSTSRDBinCollection( + alpha=Y1_SOURCE_ALPHA, + beta=Y1_SOURCE_BETA, + z0=Y1_SOURCE_Z0, + bins=[ + ZDistLSSTSRDBin( + zpl=zpl, + zpu=zpu, + sigma_z=y1_source_bins["sigma_z"], + z=RawGrid1D(values=[0.0, 3.5]), + bin_name=f"source_{zpl:.1f}_{zpu:.1f}_y1", + measurements={Galaxies.SHEAR_E}, + ) + for zpl, zpu in pairwise(y1_source_bins["edges"]) + ], + ) + + +@cache +def get_lsst_y10_lens_bin_collection() -> ZDistLSSTSRDBinCollection: + """Get the LSST Year 10 lens bin collection.""" + y10_lens_bins = get_y10_lens_bins() + return ZDistLSSTSRDBinCollection( + alpha=Y10_LENS_ALPHA, + beta=Y10_LENS_BETA, + z0=Y10_LENS_Z0, + bins=[ + ZDistLSSTSRDBin( + zpl=zpl, + zpu=zpu, + sigma_z=y10_lens_bins["sigma_z"], + z=RawGrid1D(values=[0.0, 3.5]), + bin_name=f"lens_{zpl:.1f}_{zpu:.1f}_y10", + measurements={Galaxies.COUNTS}, + ) + for zpl, zpu in pairwise(y10_lens_bins["edges"]) + ], + ) + + +@cache +def get_lsst_y10_source_bin_collection() -> ZDistLSSTSRDBinCollection: + """Get the LSST Year 10 source bin collection.""" + y10_source_bins = get_y10_source_bins() + return ZDistLSSTSRDBinCollection( + alpha=Y10_SOURCE_ALPHA, + beta=Y10_SOURCE_BETA, + z0=Y10_SOURCE_Z0, + bins=[ + ZDistLSSTSRDBin( + zpl=zpl, + zpu=zpu, + sigma_z=y10_source_bins["sigma_z"], + z=RawGrid1D(values=[0.0, 3.5]), + bin_name=f"source_{zpl:.1f}_{zpu:.1f}_y10", + measurements={Galaxies.SHEAR_E}, + ) + for zpl, zpu in pairwise(y10_source_bins["edges"]) + ], + ) + + +def __getattr__(name: str) -> Any: # pylint: disable-msg=too-many-return-statements + """Lazy evaluation of the bins.""" + match name: + case "Y1_LENS_BINS": + return get_y1_lens_bins() + case "Y1_SOURCE_BINS": + return get_y1_source_bins() + case "Y10_LENS_BINS": + return get_y10_lens_bins() + case "Y10_SOURCE_BINS": + return get_y10_source_bins() + case "LSST_Y1_LENS_BIN_COLLECTION": + return get_lsst_y1_lens_bin_collection() + case "LSST_Y1_SOURCE_BIN_COLLECTION": + return get_lsst_y1_source_bin_collection() + case "LSST_Y10_LENS_BIN_COLLECTION": + return get_lsst_y10_lens_bin_collection() + case "LSST_Y10_SOURCE_BIN_COLLECTION": + return get_lsst_y10_source_bin_collection() + case _: + raise AttributeError(f"module {__name__!r} has no attribute {name!r}") + + +Y1_LENS_BINS: BinsType +Y1_SOURCE_BINS: BinsType +Y10_LENS_BINS: BinsType +Y10_SOURCE_BINS: BinsType +LSST_Y1_LENS_BIN_COLLECTION: ZDistLSSTSRDBinCollection +LSST_Y1_SOURCE_BIN_COLLECTION: ZDistLSSTSRDBinCollection +LSST_Y10_LENS_BIN_COLLECTION: ZDistLSSTSRDBinCollection +LSST_Y10_SOURCE_BIN_COLLECTION: ZDistLSSTSRDBinCollection diff --git a/tests/generators/test_inferred_galaxy_zdist.py b/tests/generators/test_inferred_galaxy_zdist.py index 43cb3469..a149b050 100644 --- a/tests/generators/test_inferred_galaxy_zdist.py +++ b/tests/generators/test_inferred_galaxy_zdist.py @@ -10,6 +10,8 @@ import numpy.typing as npt from numpy.testing import assert_array_equal, assert_allclose from scipy.integrate import simpson +from numcosmo_py import Ncm + import yaml from firecrown.generators.inferred_galaxy_zdist import ( @@ -23,6 +25,8 @@ ZDistLSSTSRDBinCollection, LSST_Y1_LENS_BIN_COLLECTION, LSST_Y1_SOURCE_BIN_COLLECTION, + LSST_Y10_LENS_BIN_COLLECTION, + LSST_Y10_SOURCE_BIN_COLLECTION, Measurement, make_measurements, make_measurements_dict, @@ -31,10 +35,37 @@ from firecrown.utils import base_model_from_yaml, base_model_to_yaml -@pytest.fixture(name="zdist", params=[ZDistLSSTSRD.year_1(), ZDistLSSTSRD.year_10()]) +@pytest.fixture( + name="zdist", + params=product( + [ + ZDistLSSTSRD.year_1_lens, + ZDistLSSTSRD.year_10_lens, + ZDistLSSTSRD.year_1_source, + ZDistLSSTSRD.year_10_source, + ], + [True, False], + [1e-4, 1e-5], + ), + ids=[ + f"{dist}-autoknots-{use_autoknots}-reltol-{reltol}" + for dist, use_autoknots, reltol in product( + [ + "Y1_lens", + "Y10_lens", + "Y1_source", + "Y10_source", + ], + ["true", "false"], + [1e-4, 1e-5], + ) + ], +) def fixture_zdist_y1(request): """Fixture for the ZDistLSSTSRD class.""" - return request.param + return request.param[0]( + use_autoknot=request.param[1], autoknots_reltol=request.param[2] + ) @pytest.fixture(name="z_array", params=[100, 600, 1000]) @@ -57,7 +88,18 @@ def fixture_reltol(request): return request.param -BINS_LIST = ["one_lens", "all_lens", "one_source", "all_source", "lens_and_source"] +BINS_LIST = [ + "one_lens_y1", + "all_lens_y1", + "one_source_y1", + "all_source_y1", + "lens_and_source_y1", + "one_lens_y10", + "all_lens_y10", + "one_source_y10", + "all_source_y10", + "lens_and_source_y10", +] @pytest.fixture( @@ -71,23 +113,31 @@ def fixture_reltol(request): def fixture_zdist_bins(request) -> list[ZDistLSSTSRDBin]: """Fixture for the ZDistLSSTSRD class.""" match request.param[0]: - case "one_lens": + case "one_lens_y1": bins = copy.deepcopy(LSST_Y1_LENS_BIN_COLLECTION.bins[0:1]) - case "all_lens": + case "all_lens_y1": bins = copy.deepcopy(LSST_Y1_LENS_BIN_COLLECTION.bins) - case "one_source": + case "one_source_y1": bins = copy.deepcopy(LSST_Y1_SOURCE_BIN_COLLECTION.bins[0:1]) - case "all_source": + case "all_source_y1": bins = copy.deepcopy(LSST_Y1_SOURCE_BIN_COLLECTION.bins) - case "lens_and_source": + case "lens_and_source_y1": bins = copy.deepcopy(LSST_Y1_LENS_BIN_COLLECTION.bins) bins.extend(copy.deepcopy(LSST_Y1_SOURCE_BIN_COLLECTION.bins)) + case "one_lens_y10": + bins = copy.deepcopy(LSST_Y10_LENS_BIN_COLLECTION.bins[0:1]) + case "all_lens_y10": + bins = copy.deepcopy(LSST_Y10_LENS_BIN_COLLECTION.bins) + case "one_source_y10": + bins = copy.deepcopy(LSST_Y10_SOURCE_BIN_COLLECTION.bins[0:1]) + case "all_source_y10": + bins = copy.deepcopy(LSST_Y10_SOURCE_BIN_COLLECTION.bins) + case "lens_and_source_y10": + bins = copy.deepcopy(LSST_Y10_LENS_BIN_COLLECTION.bins) + bins.extend(copy.deepcopy(LSST_Y10_SOURCE_BIN_COLLECTION.bins)) case _: raise ValueError(f"Invalid parameter: {request.param}") - for zbin in bins: - zbin.use_autoknot = request.param[1] - return bins @@ -97,7 +147,7 @@ def test_zdist(zdist: ZDistLSSTSRD, z_array: npt.NDArray[np.float64]): assert Pz.shape == z_array.shape -def test_compute_one_bin_dist_fix_z( +def test_compute_one_bin_dist( zdist: ZDistLSSTSRD, z_array: npt.NDArray[np.float64], bins: dict[str, Any], @@ -113,58 +163,15 @@ def test_compute_one_bin_dist_fix_z( measurements={Galaxies.COUNTS}, ) - assert_array_equal(Pz.z, z_array) - assert_allclose(simpson(y=Pz.dndz, x=z_array), 1.0, atol=1e-3) - - -def test_compute_all_bins_dist_fix_z( - zdist: ZDistLSSTSRD, - z_array: npt.NDArray[np.float64], - bins: dict[str, Any], -): - """Test the compute_binned_dist method.""" - - for zpl, zpu in pairwise(bins["edges"]): - Pz = zdist.binned_distribution( - zpl=zpl, - zpu=zpu, - sigma_z=bins["sigma_z"], - z=z_array, - name="lens_y1", - measurements={Galaxies.COUNTS}, - ) - + if not zdist.use_autoknot: assert_array_equal(Pz.z, z_array) - assert_allclose(simpson(y=Pz.dndz, x=z_array), 1.0, atol=1e-3) + assert_allclose(simpson(y=Pz.dndz, x=Pz.z), 1.0, atol=1e-3) -def test_compute_one_bin_dist_autoknot( +def test_compute_all_bins_dist( zdist: ZDistLSSTSRD, z_array: npt.NDArray[np.float64], bins: dict[str, Any], - reltol: float, -): - """Test the compute_binned_dist method.""" - - Pz = zdist.binned_distribution( - zpl=bins["edges"][0], - zpu=bins["edges"][1], - sigma_z=bins["sigma_z"], - z=z_array, - name="lens0_y1", - measurements={Galaxies.COUNTS}, - use_autoknot=True, - autoknots_reltol=reltol, - ) - - assert_allclose(simpson(y=Pz.dndz, x=Pz.z), 1.0, atol=reltol) - - -def test_compute_all_bins_dist_autoknot( - zdist: ZDistLSSTSRD, - z_array: npt.NDArray[np.float64], - bins: dict[str, Any], - reltol: float, ): """Test the compute_binned_dist method.""" @@ -176,11 +183,10 @@ def test_compute_all_bins_dist_autoknot( z=z_array, name="lens_y1", measurements={Galaxies.COUNTS}, - use_autoknot=True, - autoknots_reltol=reltol, ) - - assert_allclose(simpson(y=Pz.dndz, x=Pz.z), 1.0, atol=reltol) + if not zdist.use_autoknot: + assert_array_equal(Pz.z, z_array) + assert_allclose(simpson(y=Pz.dndz, x=Pz.z), 1.0, atol=1e-3) def test_zdist_bin(): @@ -229,7 +235,8 @@ def test_zdist_bin_generate(zdist: ZDistLSSTSRD): Pz = zbin.generate(zdist) - assert_array_equal(Pz.z, z.generate()) + if not zdist.use_autoknot: + assert_array_equal(Pz.z, z.generate()) assert Pz.bin_name == bin_name assert Pz.measurements == measurements @@ -308,9 +315,6 @@ def test_zdist_bin_to_yaml(): "z": {"start": zbin.z.start, "end": zbin.z.end, "num": zbin.z.num}, "bin_name": zbin.bin_name, "measurements": make_measurements_dict(zbin.measurements), - "use_autoknot": zbin.use_autoknot, - "autoknots_reltol": zbin.autoknots_reltol, - "autoknots_abstol": zbin.autoknots_abstol, } @@ -349,7 +353,7 @@ def test_zdist_bin_collection_generate(zdist_bins): assert len(Pz_list) == len(zdist_bins) for zbin, Pz in zip(zdist_bins, Pz_list): - if not zbin.use_autoknot: + if not zbin_collection.use_autoknot: assert_array_equal(Pz.z, zbin.z.generate()) else: assert not np.array_equal(Pz.z, zbin.z.generate()) @@ -377,6 +381,10 @@ def test_zdist_bin_collection_to_yaml(zdist_bins): "alpha": alpha, "beta": beta, "z0": z0, + "max_z": zbin_collection.max_z, + "use_autoknot": zbin_collection.use_autoknot, + "autoknots_reltol": zbin_collection.autoknots_reltol, + "autoknots_abstol": zbin_collection.autoknots_abstol, "bins": [ { "zpl": zbin.zpl, @@ -385,9 +393,6 @@ def test_zdist_bin_collection_to_yaml(zdist_bins): "z": zbin.z.model_dump(), "bin_name": zbin.bin_name, "measurements": make_measurements_dict(zbin.measurements), - "use_autoknot": zbin.use_autoknot, - "autoknots_reltol": zbin.autoknots_reltol, - "autoknots_abstol": zbin.autoknots_abstol, } for zbin in zdist_bins ], @@ -412,9 +417,6 @@ def test_zdist_bin_collection_from_yaml(zdist_bins): "z": zbin.z.model_dump(), "bin_name": zbin.bin_name, "measurements": make_measurements_dict(zbin.measurements), - "use_autoknot": zbin.use_autoknot, - "autoknots_reltol": zbin.autoknots_reltol, - "autoknots_abstol": zbin.autoknots_abstol, } for zbin in zdist_bins ], @@ -434,9 +436,6 @@ def test_zdist_bin_collection_from_yaml(zdist_bins): assert_array_equal(zbin.z.generate(), zbin_from_yaml.z.generate()) assert zbin.bin_name == zbin_from_yaml.bin_name assert zbin.measurements == zbin_from_yaml.measurements - assert zbin.use_autoknot == zbin_from_yaml.use_autoknot - assert zbin.autoknots_reltol == zbin_from_yaml.autoknots_reltol - assert zbin.autoknots_abstol == zbin_from_yaml.autoknots_abstol def test_make_measurement_from_measurement(): @@ -471,3 +470,28 @@ def test_make_measurement_from_dictionary(): ValueError, match=re.escape(r"Invalid Measurement: {3} is not a dictionary") ): _ = make_measurements({3}) # type: ignore + + +def test_distribution(zdist: ZDistLSSTSRD): + """Test the distribution method.""" + + stats = zdist.compute_distribution(0.03) + assert isinstance(stats, Ncm.StatsDist1d) + + middle_z = stats.eval_inv_pdf(0.5) + + bins = zdist.equal_area_bins(2, 0.03, zdist.max_z) + + assert_allclose([0.0, middle_z, zdist.max_z], bins) + + +def test_true_distribution(zdist: ZDistLSSTSRD): + """Test the true_distribution method.""" + stats = zdist.compute_true_distribution() + assert isinstance(stats, Ncm.StatsDist1d) + + middle_z = stats.eval_inv_pdf(0.5) + + bins = zdist.equal_area_bins(2, 0.03, zdist.max_z, use_true_distribution=True) + + assert_allclose([0.0, middle_z, zdist.max_z], bins) diff --git a/tutorial/inferred_zdist_generators.qmd b/tutorial/inferred_zdist_generators.qmd index 7f3dd70d..c0f57bc8 100644 --- a/tutorial/inferred_zdist_generators.qmd +++ b/tutorial/inferred_zdist_generators.qmd @@ -56,25 +56,40 @@ The values of $\alpha$ and $z_0$ are different for Year 1 and Year 10. `ZDistLLSTSRD` provides these values as defaults and allows for greater flexibility when desired. Below we calculate @eq-fz for year 1 and year 10 parameters from LSST SRD. -Here we are choosing $100$ equally-spaced points in the range $0 < z < 3$ at which to evaluate the function $f(z)$. +Here we are choosing $100$ equally-spaced points in the range $0 < z < 3.5$ at which to evaluate the function $f(z)$. ```{python} import numpy as np import firecrown from firecrown.generators.inferred_galaxy_zdist import ZDistLSSTSRD # These are the values of z at which we will evalute f(z) -z = np.linspace(0, 3.0, 100) +z = np.linspace(0, 3.5, 100) # We want to evaluate f(z) for both Y1 and Y10, using the SRD prescriptions. -zdist_y1 = ZDistLSSTSRD.year_1() -zdist_y10 = ZDistLSSTSRD.year_10() +zdist_y1_lens = ZDistLSSTSRD.year_1_lens(use_autoknot=True, autoknots_reltol=1.0e-5) +zdist_y10_lens = ZDistLSSTSRD.year_10_lens(use_autoknot=True, autoknots_reltol=1.0e-5) + +zdist_y1_source = ZDistLSSTSRD.year_1_source(use_autoknot=True, autoknots_reltol=1.0e-5) +zdist_y10_source = ZDistLSSTSRD.year_10_source( + use_autoknot=True, autoknots_reltol=1.0e-5 +) # Now we can generate the values we want to plot. -# Note that Pz_y1 and Pz_y10 are both numpy arrays: -Pz_y1 = zdist_y1.distribution(z) -Pz_y10 = zdist_y10.distribution(z) +# Note that Pz_y1_* and Pz_y10_* are both numpy arrays: +Pz_y1_lens = zdist_y1_lens.distribution(z) +Pz_y10_lens = zdist_y10_lens.distribution(z) + +Pz_y1_source = zdist_y1_source.distribution(z) +Pz_y10_source = zdist_y10_source.distribution(z) + ``` +We use NumCosmo's `autoknots` to determine the number of knots to use in the spline interpolation. +This allows us to create the knots such that the interpolation error is less than $10^{-5}$.[^4] + +[^4]: The `autoknots` method is a part of the NumCosmo package, which is a dependency of Firecrown. +See [the NumCosmo documentation](https://numcosmo.github.io/manual/NcmSplineCubicNotaknot.html) for more information. + The generated data are plotted in @fig-fz ```{python} @@ -87,18 +102,33 @@ import pandas as pd # The data were not originally generated in a dataframe convenient for # plotting, so our first task it to put them into such a form. -# First we create a dataframe with the raw data. -data = pd.DataFrame({"z": z, "Y1": Pz_y1, "Y10": Pz_y10}) -# Then we 'melt' the data into a longer form, which is more conveient for -# making multiple plots on the same axes, or for making panel (faceted) plots. -long = pd.melt(data, id_vars=["z"], var_name="year", value_name="fz") +d_y_all = pd.concat( + [ + pd.DataFrame( + { + "z": z, + "dndz": Pz, + "year": year, + "distribution": dist, + } + ) + for Pz, year, dist in [ + (Pz_y1_lens, "Y1", "lens"), + (Pz_y10_lens, "Y10", "lens"), + (Pz_y1_source, "Y1", "source"), + (Pz_y10_source, "Y10", "source"), + ] + ] +) -# Now we can generate the plot. +# Now we can generate the plot. We need a theme to increase the width ( - ggplot(long, aes("z", "fz", color="year")) + ggplot(d_y_all, aes("z", "dndz", color="year")) + geom_point() + labs(y="dN/dz") + doc_theme() + + facet_wrap("~ distribution", ncol=2) + + theme(figure_size=(10, 4)) ) ``` @@ -123,9 +153,10 @@ from firecrown.metadata_types import Measurement, Galaxies # These are the values at which we will evaluate the distribution. z = np.linspace(0.0, 0.6, 100) -# We use the same zdist_y1 and zdist_y10 that were created above. + +# We use the same zdist_y1_* and zdist_y10_* that were created above. # We create two InferredGalaxyZDist objects, one for Y1 and one for Y10. -Pz_lens0_y1 = zdist_y1.binned_distribution( +Pz_lens0_y1 = zdist_y1_lens.binned_distribution( zpl=Y1_LENS_BINS["edges"][0], zpu=Y1_LENS_BINS["edges"][1], sigma_z=Y1_LENS_BINS["sigma_z"], @@ -133,7 +164,7 @@ Pz_lens0_y1 = zdist_y1.binned_distribution( name="lens0_y1", measurements={Galaxies.COUNTS}, ) -Pz_source0_y1 = zdist_y1.binned_distribution( +Pz_source0_y1 = zdist_y1_source.binned_distribution( zpl=Y1_SOURCE_BINS["edges"][0], zpu=Y1_SOURCE_BINS["edges"][1], sigma_z=Y1_SOURCE_BINS["sigma_z"], @@ -141,7 +172,7 @@ Pz_source0_y1 = zdist_y1.binned_distribution( name="source0_y1", measurements={Galaxies.SHEAR_E}, ) -Pz_lens0_y10 = zdist_y10.binned_distribution( +Pz_lens0_y10 = zdist_y10_lens.binned_distribution( zpl=Y10_LENS_BINS["edges"][0], zpu=Y10_LENS_BINS["edges"][1], sigma_z=Y10_LENS_BINS["sigma_z"], @@ -149,7 +180,7 @@ Pz_lens0_y10 = zdist_y10.binned_distribution( name="lens0_y10", measurements={Galaxies.COUNTS}, ) -Pz_source0_y10 = zdist_y10.binned_distribution( +Pz_source0_y10 = zdist_y10_source.binned_distribution( zpl=Y10_SOURCE_BINS["edges"][0], zpu=Y10_SOURCE_BINS["edges"][1], sigma_z=Y10_SOURCE_BINS["sigma_z"], @@ -221,40 +252,31 @@ data = pd.concat([d_lens0_y1, d_source0_y1, d_lens0_y10, d_source0_y10]) The `ZDistLSSTSRD` object can also be used to generate the redshift distribution for all bins at once. The following code block demonstrates how to do this. -Moreover, we also use NumCosmo's `autoknots` to determine the number of knots to use in the spline interpolation. -This allows us to create the knots such that the interpolation error is less than $10^{-5}$.[^4] - -[^4]: The `autoknots` method is a part of the NumCosmo package, which is a dependency of Firecrown. -See [the NumCosmo documentation](https://numcosmo.github.io/manual/NcmSplineCubicNotaknot.html) for more information. ```{python} from itertools import pairwise # We use the same zdist_y1 and zdist_y10 that were created above. # We create two InferredGalaxyZDist objects, one for Y1 and one for Y10. -z = np.linspace(0.0, 1.6, 1000) +z = np.linspace(0.0, 3.5, 1000) all_y1_bins = [ - zdist_y1.binned_distribution( + zdist_y1_lens.binned_distribution( zpl=zpl, zpu=zpu, sigma_z=Y1_LENS_BINS["sigma_z"], z=z, name=f"lens_{zpl:.1f}_{zpu:.1f}_y1", measurements={Galaxies.COUNTS}, - use_autoknot=True, - autoknots_reltol=1.0e-5, ) for zpl, zpu in pairwise(Y1_LENS_BINS["edges"]) ] + [ - zdist_y1.binned_distribution( + zdist_y1_source.binned_distribution( zpl=zpl, zpu=zpu, sigma_z=Y1_SOURCE_BINS["sigma_z"], z=z, name=f"source_{zpl:.1f}_{zpu:.1f}_y1", measurements={Galaxies.SHEAR_E}, - use_autoknot=True, - autoknots_reltol=1.0e-5, ) for zpl, zpu in pairwise(Y1_SOURCE_BINS["edges"]) ] @@ -290,9 +312,123 @@ d_y1 = pd.concat( + labs(y="dN/dz") + facet_wrap("~ measurement", nrow=2) + doc_theme() + + theme(figure_size=(10, 6)) ) ``` +## Source Binning + +The SRD specifies different binning strategies for lens and source samples. +- **Lens binning**: Defined by explicitly providing the bin edges. +- **Source binning**: Defined to ensure equal numbers of galaxies in each bin. + +The `ZDistLSSTSRD` class offers the `equal_area_bins` method to generate source bins with equal galaxy counts. +However, the implementation involves some subtleties due to the complexity of photometric redshift distributions. + +Since the final photometric redshift distribution is a **convolution** of the true redshift distribution with photometric redshift errors, generating bins directly from photometric redshifts is non-trivial. +The process involves: +1. Generating the **true redshift distribution**. +2. Convolving it with **photometric redshift errors** to produce the final **photometric redshift distribution**. +3. Using the **cumulative distribution function (CDF)** of the photometric distribution to determine the bin edges that yield equal galaxy counts per bin. + +Because this exact approach can be computationally expensive, the `equal_area_bins` method offers an **approximate mode** by setting `use_true_distribution=True`. +In this mode, the bins are generated using the **true redshift distribution** only, which is faster but may not perfectly balance the galaxy counts across bins. + +The following code block demonstrates how to generate source bins with equal galaxy counts using the `ZDistLSSTSRD` class. +```{python} +# We use the same zdist_y1_source that was created above. +# Next, we generate the source bins using the true distribution. +source_bins = zdist_y1_source.equal_area_bins( + n_bins=5, sigma_z=Y1_SOURCE_BINS["sigma_z"], use_true_distribution=True, last_z=3.5 +) + +# Now we do the same, but using the photometric distribution. +source_bins_phot = zdist_y1_source.equal_area_bins( + n_bins=5, sigma_z=Y1_SOURCE_BINS["sigma_z"], use_true_distribution=False, last_z=3.5 +) +``` + +We can plot just the bins edges to see the difference between the two methods. +```{python} +# | label: fig-source-bins-edges +# | fig-cap: Source bins edges for LSST Year 1 using the formulation from the SRD. +# | fig-cap-location: margin +# | code-fold: true + +z = np.linspace(0.0, 3.5, 1000) +stats = zdist_y1_source.compute_distribution(Y1_SOURCE_BINS["sigma_z"]) +dndz = np.array([stats.eval_p(z_i) for z_i in z]) + +d_source_bins = pd.DataFrame({"z": z, "dndz": dndz}) + +# Create DataFrames for the bin edges +d_bin_edges = pd.DataFrame({"bin_edges": source_bins}) +d_bin_edges_phot = pd.DataFrame({"bin_edges_phot": source_bins_phot}) + +( + ggplot(d_source_bins, aes("z", "dndz")) + + geom_line() + + geom_vline( + data=d_bin_edges, + mapping=aes(xintercept="bin_edges"), + color="red", + linetype="dashed", + size=0.8, + ) + + geom_vline( + data=d_bin_edges_phot, + mapping=aes(xintercept="bin_edges_phot"), + color="blue", + linetype="dotted", + size=0.8, + ) + + labs(y="dN/dz") + + doc_theme() + + theme(figure_size=(10, 4)) +) +``` + +The plot in @fig-source-bins-edges shows the bin edges for the source sample in LSST Year 1. +The red dashed lines represent the bin edges generated using the true distribution, while the blue dotted lines represent the bin edges generated using the photometric distribution. +The percentage difference in the number of galaxies in each bin is shown in the table below. +```{python} +# | label: tbl-source-bins-diff +# | tbl-cap: Percentage difference in the number of galaxies in each bin for LSST Year 1 using the formulation from the SRD. +# | tbl-cap-location: margin +# | code-fold: true + +N_total = stats.eval_pdf(3.5) +N_gal = np.array( + [ + (stats.eval_pdf(zu) - stats.eval_pdf(zl)) / N_total + for zl, zu in pairwise(source_bins) + ] +) +N_gal_phot = np.array( + [ + (stats.eval_pdf(zu) - stats.eval_pdf(zl)) / N_total + for zl, zu in pairwise(source_bins_phot) + ] +) +percent_diff = 100.0 * (N_gal - N_gal_phot) / N_gal +( + pd.DataFrame( + { + "True Distribution Bin": [ + f"({i:.3f}, {j:.3f})" for i, j in pairwise(source_bins) + ], + "Photometric Distribution Bin": [ + f"({i:.3f}, {j:.3f})" for i, j in pairwise(source_bins_phot) + ], + "Galaxies (True Dist.)": [f"{n:.3f}" for n in N_gal], + "Galaxies (Photometric Dist.)": [f"{n:.3f}" for n in N_gal_phot], + "Percent Difference (%)": [f"{p:.1f}%" for p in percent_diff], + } + ) +) +``` + + ## End note Note that these facilities for creating redshift distributions are not limitations on the use of the distributions. diff --git a/tutorial/inferred_zdist_serialization.qmd b/tutorial/inferred_zdist_serialization.qmd index 062265f4..eb079d88 100644 --- a/tutorial/inferred_zdist_serialization.qmd +++ b/tutorial/inferred_zdist_serialization.qmd @@ -64,9 +64,9 @@ The code snippet below demonstrates how to initialize and serialize this datacla ```{python} from firecrown.generators.inferred_galaxy_zdist import ( ZDistLSSTSRDBinCollection, - Y1_ALPHA, - Y1_BETA, - Y1_Z0, + Y1_LENS_ALPHA, + Y1_LENS_BETA, + Y1_LENS_Z0, ) # We add a bin1, to demonstrate the use of multiple bins. @@ -79,9 +79,9 @@ d.update(bin_name="bin1", zpl=0.2, zpu=0.3) bin1 = ZDistLSSTSRDBin.model_validate(d) bin_collection = ZDistLSSTSRDBinCollection( - alpha=Y1_ALPHA, - beta=Y1_BETA, - z0=Y1_Z0, + alpha=Y1_LENS_ALPHA, + beta=Y1_LENS_BETA, + z0=Y1_LENS_Z0, bins=[bin0, bin1], )