From 046ba01f917885c1e46a1947964b7e1b809159b3 Mon Sep 17 00:00:00 2001
From: Lanqing Yuan <yuanlq@uchicago.edu>
Date: Sat, 6 Apr 2024 18:25:00 -0500
Subject: [PATCH] updated example_peaks

---
 notebooks/README.md           |   2 +-
 notebooks/example_peaks.ipynb | 596 +++++++++++++++++++++++++++-------
 2 files changed, 477 insertions(+), 121 deletions(-)

diff --git a/notebooks/README.md b/notebooks/README.md
index bd9aa46..de03ea0 100644
--- a/notebooks/README.md
+++ b/notebooks/README.md
@@ -5,4 +5,4 @@ These are notebooks for hopefully pedagogical purpose. Due to the sensitive info
 An example of using this framework to sprinkle events into real data, with followed analysis functions defined in this package.
 
 ## `example_peaks.ipynb`
-An example of using this framework to sprinkle single electron peaks into real data, with followed analysis functions defined in this package.
+An example of using this framework to sprinkle single electron peaks (with realistic timestamps and XY information passing SE selection, based on bootstrapping data SEs) into real data, with followed analysis functions defined in this package.
diff --git a/notebooks/example_peaks.ipynb b/notebooks/example_peaks.ipynb
index bc7d84c..cbe1b06 100644
--- a/notebooks/example_peaks.ipynb
+++ b/notebooks/example_peaks.ipynb
@@ -2,17 +2,26 @@
  "cells": [
   {
    "cell_type": "markdown",
-   "id": "b27492af-dc89-4ce0-b800-ea780c1896c3",
+   "id": "28624e18-72b8-47d3-a721-503e68d8b3b2",
    "metadata": {},
    "source": [
-    "This notebook shows sprinkling result in SR1 data, with single electrons sprinkled.\n",
+    "This notebook shows sprinkling result in SR1 data, with single electrons sprinkled with realistic timing and XY from data. Since we loads lots of data with even waveforms, you'd better get 40GB for this notebook.\n",
     "\n",
-    "Lanqing, Mar 16 2024"
+    "It is expected to be running in `2024.03.1` container. \n",
+    "\n",
+    "There will be 3 datasets in the end:\n",
+    "- `data`: Exactly the same as `v14` offline real data.\n",
+    "- `simulation`: Events reconstructed using the simulation instruction only, there is nothing else in the reconstruction process.\n",
+    "- `sprinkled`: Events reconstructed by mixing simulation and data. Some time it is also called `salt` and they mean the same thing.\n",
+    "\n",
+    "The source of simulation instruction from data SEs selected with a random 0.1FDT offset is here at `/project/lgrandi/yuanlq/salt/se_instructions/`. Unlike AmBe case, here the sprinkled SEs are already having fixed instruction in directories above, with a rate of 200Hz.\n",
+    "\n",
+    "Lanqing, Mar 26 2024 (Editted Apr 04 2024)"
    ]
   },
   {
    "cell_type": "markdown",
-   "id": "807ce1f3-0e13-43c7-a127-ee8727809c6b",
+   "id": "8b629e95-8cc4-4735-9750-2f51d9837046",
    "metadata": {},
    "source": [
     "# Preparation"
@@ -21,7 +30,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "5256c142-8e49-4b19-92f3-94b3ef04d951",
+   "id": "63130121-69b7-4efb-b4a0-d2f26a465dcd",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -31,8 +40,10 @@
     "from tqdm import tqdm\n",
     "import gc \n",
     "import numpy as np\n",
+    "import utilix\n",
     "import matplotlib.pyplot as plt\n",
     "from tabulate import tabulate\n",
+    "from glob import glob\n",
     "from itertools import cycle\n",
     "from saltax.match.utils import *\n",
     "straxen.print_versions()"
@@ -41,175 +52,241 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "c8c98b6d-20b0-4ce3-ae97-b62bb59f422e",
-   "metadata": {},
+   "id": "294220d1-9e74-4d96-9e2e-aa2e2d31a595",
+   "metadata": {
+    "scrolled": true
+   },
    "outputs": [],
    "source": [
-    "sr1 = '051613,050996,051948,051939,051910,051909,051905,047876,047865,047860,053167,053153,053139,052997,049123,049450,049505,049521,049544'.split(',')\n",
-    "modes = ['tpc_radon222', 'tpc_radon222',\n",
-    "         'ambe_linked_hev', 'ambe_linked_hev', 'ambe_linked_hev', 'ambe_linked_hev', 'ambe_linked_hev',\n",
-    "         'ybe_linked', 'ybe_linked', 'ybe_linked', \n",
-    "         'tpc_kr83m', 'tpc_kr83m', 'tpc_kr83m', 'tpc_kr83m',\n",
-    "         'tpc_radon_hev', 'tpc_radon_hev', 'tpc_radon_hev', 'tpc_radon_hev', 'tpc_radon_hev']\n",
-    "modes_dict = {}\n",
-    "for i in range(len(sr1)):\n",
-    "    modes_dict[sr1[i]] = modes[i]"
+    "# Define contexts for sprinkling mode and simulation mode respectively\n",
+    "st_salt = saltax.contexts.sxenonnt(\n",
+    "             saltax_mode='salt',\n",
+    "             output_folder='/project/lgrandi/yuanlq/salt/se_bootstrapped',\n",
+    "             faxconf_version=\"sr0_v4\",\n",
+    "             generator_name='se_bootstrapped',\n",
+    "             recoil=8,\n",
+    "             mode='all')\n",
+    "st_simu = saltax.contexts.sxenonnt(\n",
+    "             saltax_mode='simu',\n",
+    "             output_folder='/project/lgrandi/yuanlq/salt/se_bootstrapped',\n",
+    "             faxconf_version=\"sr0_v4\",\n",
+    "             generator_name='se_bootstrapped',\n",
+    "             recoil=8,\n",
+    "             mode='all')\n",
+    "\n",
+    "# This is the regular straxen context to load data\n",
+    "st_data = cutax.xenonnt_offline()"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "c0c5e3a1-9500-4e7d-b3d9-ac6a80a80037",
+   "id": "d00510ca-b5f0-4923-9fd0-184f69046d01",
    "metadata": {},
    "outputs": [],
    "source": [
-    "st_salt = saltax.contexts.sxenonnt(runid=51613, # put anything here should work\n",
-    "             saltax_mode='salt',\n",
-    "             output_folder='/scratch/midway2/yuanlq/salt/se',\n",
-    "             faxconf_version=\"sr0_v4\",\n",
-    "             generator_name='se',\n",
-    "             recoil=8,\n",
-    "             mode='all')\n",
-    "st_simu = saltax.contexts.sxenonnt(runid=51613, # put anything here should work\n",
-    "             saltax_mode='simu',\n",
-    "             output_folder='/scratch/midway2/yuanlq/salt/se',\n",
-    "             faxconf_version=\"sr0_v4\",\n",
-    "             generator_name='se',\n",
-    "             recoil=8,\n",
-    "             mode='all')"
+    "runs_with_rawdata = saltax.find_runs_with_rawdata(\n",
+    "        rawdata_folders=[\n",
+    "            '/project/lgrandi/yuanlq/salt/raw_records/',\n",
+    "            '/scratch/midway2/yuanlq/salt/raw_records/',\n",
+    "            '/scratch/midway3/yuanlq/salt/raw_records/'\n",
+    "        ]\n",
+    ")\n",
+    "runs_with_rawdata"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "d3af735c-4482-4a0d-a83e-7257428c76e4",
+   "id": "3346d90a-e07b-4e9f-b6ee-eee6233b6eed",
    "metadata": {},
    "outputs": [],
    "source": [
-    "runs_available = []\n",
-    "for run in sr1:\n",
-    "    print(run, modes_dict[run], \n",
-    "          st_salt.is_stored(run, 'peak_basics'), \n",
-    "          st_simu.is_stored(run, 'peak_basics'))"
+    "saltax.get_available_runs(runs_with_rawdata, st_salt, st_simu,\n",
+    "                          salt_available=['peak_basics', 'peak_positions_mlp'],\n",
+    "                          simu_available=[])"
    ]
   },
   {
-   "cell_type": "markdown",
-   "id": "7cd00ef9-5f7b-4927-b68f-d072c7dba8c2",
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "f203c5a3-28f9-441c-bc41-e3c0fa1a4807",
    "metadata": {},
+   "outputs": [],
    "source": [
-    "Warning! The single electrons are affected by a bug in wfsim! It looks down-sampled in time (100ns resolution), and currently we have no way to fix it. However luckily, we don't care that much about <100ns scale process."
+    "kr83m = ['053167']\n",
+    "rn220 = ['048693', '048692', '048698', '049432', '049433']\n",
+    "ybe = ['047876']"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "90fd3c06-8551-45d2-80b8-9f3deaf3c4b7",
+   "id": "3bcdf15a-9ab9-47d1-8149-a94b55aaebad",
    "metadata": {},
    "outputs": [],
    "source": [
-    "peaks = st_simu.get_array('049505', 'peaks', seconds_range=(0,1));\n",
+    "peaks = st_simu.get_array('053167', 'peaks', seconds_range=(0,2));\n",
     "plt.figure(dpi=150)\n",
     "for p in peaks:\n",
     "    plt.plot(np.arange(200)*p['dt'], p['data'])\n",
     "plt.xlabel('Time [ns]')\n",
     "plt.ylabel('Amplitude [PE/10ns]')\n",
-    "plt.title('Example Problematic Single Electrons Simulated')"
+    "plt.xlim(0,2000)\n",
+    "plt.title('Example Single Electrons Simulated')"
    ]
   },
   {
    "cell_type": "markdown",
-   "id": "25da9913-892d-470c-bd09-54a430529a2e",
+   "id": "49dd6964-520c-4d5b-8180-6363277862ce",
    "metadata": {},
    "source": [
-    "# Let's sprinkle"
+    "Good, on the [special wfsim version](https://github.com/XENONnT/WFSim/pull/434) our SEs look good. If it looks like combs at 250ns resolution, it means you are using the wrong `wfsim` or `fuse` version."
    ]
   },
   {
    "cell_type": "markdown",
-   "id": "f40d2414-dcc2-40f3-b07f-0c608c0a4021",
+   "id": "e196aa7f-92d7-4d07-94d5-3faf008b6bfe",
    "metadata": {},
    "source": [
-    "## SR1 AmBe"
+    "# Let's sprinkle"
    ]
   },
   {
    "cell_type": "markdown",
-   "id": "384c660c-b828-4f65-98ff-f5af76691876",
+   "id": "56a25bb9-12c2-4a1b-afae-7bef96f25ef0",
    "metadata": {},
    "source": [
-    "It may take minutes to load..."
+    "## Rn220"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "724da284-c100-43a6-9f8e-e440e7a472de",
+   "id": "1a79761f-4b1b-49d9-8831-9f1381bb4814",
    "metadata": {
     "scrolled": true
    },
    "outputs": [],
    "source": [
-    "(_, _, _, _, \n",
-    " peaks_salt_matched_to_simu, # they are of same length\n",
-    " peaks_simu_matched_to_salt  # they are of same length\n",
-    ") = load_peaks(['051948', '051939', '051910', '051909', '051905'], st_salt, st_simu);"
+    "(peaks_simu, peaks_salt, inds_dict) = load_peaks(rn220, st_salt, st_simu);"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "976d9e21-25ee-4b09-b581-9c070f9e6021",
+   "id": "f3d1186c-1a0b-4cc7-9c66-5d31962e7e20",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Now peaks_salt_matched_to_simu and peaks_simu_matched_to_salt are 1-1 corresponding\n",
+    "peaks_salt_matched_to_simu = peaks_salt[inds_dict['ind_salt_peak_found']]\n",
+    "peaks_simu_matched_to_salt = peaks_simu[inds_dict['ind_simu_peak_found']]\n",
+    "\n",
+    "# Further filter out the ones whose simu fail daq cut\n",
+    "mask_simu_daq_cut = saltax.apply_peaks_daq_cuts(st_data, rn220, \n",
+    "                                                peaks_simu_matched_to_salt) \n",
+    "peaks_salt_matched_to_simu = peaks_salt_matched_to_simu[mask_simu_daq_cut]\n",
+    "peaks_simu_matched_to_salt = peaks_simu_matched_to_salt[mask_simu_daq_cut]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "eace4731-567f-4c38-9779-d427a241a193",
    "metadata": {},
    "outputs": [],
    "source": [
     "plt.figure(dpi=150)\n",
-    "plt.hist(peaks_salt_matched_to_simu['area'], bins=np.linspace(0,100,100), \n",
-    "         histtype='step', color='tab:blue', label='Sprinkled')\n",
-    "plt.hist(peaks_simu_matched_to_salt['area'], bins=np.linspace(0,100,100), \n",
-    "         histtype='step', color='tab:red', label = 'Simulated')\n",
-    "plt.title('Before Cuts SE Ambience Interference in SR1 AmBe')\n",
+    "plt.hist(peaks_salt_matched_to_simu['area'], \n",
+    "         bins=np.linspace(0,100,101), \n",
+    "         histtype='step', color='tab:blue', \n",
+    "         label='Matched Sprinkled: %sPE'%(\n",
+    "             np.round(np.median(peaks_salt_matched_to_simu['area']), decimals=2)))\n",
+    "plt.hist(peaks_simu_matched_to_salt['area'], bins=np.linspace(0,100,101), \n",
+    "         histtype='step', color='tab:red', \n",
+    "         label='Matched Simulated: %sPE'%(np.round(np.median(peaks_simu_matched_to_salt['area']), decimals=2)))\n",
+    "plt.title('Before Cuts SE Ambience Interference in SR1 Rn220')\n",
     "plt.legend()\n",
     "plt.xlabel('Area [PE]')\n",
     "plt.ylabel('Counts [AU]')"
    ]
   },
   {
-   "cell_type": "markdown",
-   "id": "f6f0669c-6540-4e09-b5e9-a93aa479c266",
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "1c1ea98f-bef3-41a2-898b-7aac8868171d",
    "metadata": {},
+   "outputs": [],
    "source": [
-    "## SR1 Rn222"
+    "plt.figure(dpi=150)\n",
+    "plt.hist(peaks_salt_matched_to_simu['area'] - peaks_simu_matched_to_salt['area'], \n",
+    "         bins=np.linspace(-1,60,201), \n",
+    "         histtype='step', color='tab:blue', )\n",
+    "#plt.legend()\n",
+    "plt.xlabel('Area Sprinkled-Simulated [PE]')\n",
+    "plt.ylabel('Counts [AU]')\n",
+    "plt.title('Before Cuts SE Ambience Interference in SR1 Rn220')\n",
+    "plt.yscale('log')"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "9cce3437-47d9-4cc3-b45d-b71ed546a83e",
-   "metadata": {
-    "scrolled": true
-   },
+   "id": "64136a42-2f5f-4f82-b2c6-c3eb0bbf3622",
+   "metadata": {},
    "outputs": [],
    "source": [
-    "(_, _, _, _, \n",
-    " peaks_salt_matched_to_simu, # they are of same length\n",
-    " peaks_simu_matched_to_salt  # they are of same length\n",
-    ") = load_peaks(['051613', '050996'], st_salt, st_simu);"
+    "plt.figure(dpi=150)\n",
+    "plt.hist2d(peaks_simu_matched_to_salt['x_mlp'], peaks_simu_matched_to_salt['y_mlp'], \n",
+    "           bins=(np.linspace(-65,65,100), np.linspace(-65,65,100)))\n",
+    "plt.xlabel(\"X [cm]\")\n",
+    "plt.ylabel(\"Y [cm]\")\n",
+    "plt.title('Before Cuts Selected SE in SR1 Rn220')\n",
+    "plt.show()"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "ca6c8683-4653-40e8-b905-350e2c075d8f",
+   "id": "b9b128a2-7ee8-45b2-b603-6161160d91fa",
    "metadata": {},
    "outputs": [],
    "source": [
     "plt.figure(dpi=150)\n",
-    "plt.hist(peaks_salt_matched_to_simu['area'], bins=np.linspace(0,100,100), \n",
-    "         histtype='step', color='tab:blue', label='Sprinkled')\n",
-    "plt.hist(peaks_simu_matched_to_salt['area'], bins=np.linspace(0,100,100), \n",
-    "         histtype='step', color='tab:red', label = 'Simulated')\n",
-    "plt.title('Before Cuts SE Ambience Interference in SR1 Rn222')\n",
+    "plt.hist(peaks_salt_matched_to_simu['range_50p_area'], \n",
+    "         bins=np.linspace(0,5e3,100), \n",
+    "         histtype='step', \n",
+    "         color='tab:blue',\n",
+    "         label='Matched Sprinkled')\n",
+    "plt.hist(peaks_simu_matched_to_salt['range_50p_area'], \n",
+    "         bins=np.linspace(0,5e3,100), \n",
+    "         histtype='step', \n",
+    "         color='tab:red',\n",
+    "         label='Matched Simulated')\n",
+    "plt.yscale('log')\n",
+    "plt.legend()\n",
+    "plt.xlabel('50p width [ns]')\n",
+    "plt.title('Before Cuts SE Ambience Interference in SR1 Rn220')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "b1c736c3-7ca1-4cfb-9784-0970c4ee3576",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "plt.figure(dpi=150)\n",
+    "after_cuts_salt = peaks_salt_matched_to_simu[peaks_salt_matched_to_simu['range_90p_area']<2000]\n",
+    "after_cuts_simu = peaks_simu_matched_to_salt[peaks_salt_matched_to_simu['range_90p_area']<2000]\n",
+    "plt.hist(after_cuts_salt['area'], bins=np.linspace(0,100,100), \n",
+    "         histtype='step', color='tab:blue', \n",
+    "         label='Matched Sprinkled: %sPE'%(np.round(np.median(after_cuts_salt['area'][~np.isnan(after_cuts_salt['area'])]), decimals=2)))\n",
+    "plt.hist(after_cuts_simu['area'], bins=np.linspace(0,100,100), \n",
+    "         histtype='step', color='tab:red', \n",
+    "         label='Matched Simulated: %sPE'%(np.round(np.median(after_cuts_simu['area'][~np.isnan(after_cuts_simu['area'])]), decimals=2)))\n",
+    "plt.title('90p Width < 2000ns SE Ambience Interference in SR1 Rn220')\n",
     "plt.legend()\n",
     "plt.xlabel('Area [PE]')\n",
     "plt.ylabel('Counts [AU]')"
@@ -217,78 +294,194 @@
   },
   {
    "cell_type": "markdown",
-   "id": "b2869d36-add2-4e75-99b0-f05e56803cff",
+   "id": "5374a9f3-df26-45e5-9c23-e83d60d8a85d",
    "metadata": {},
    "source": [
-    "## SR1 YBe"
+    "## YBe"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "c300bf6e-52d2-4b98-bb9c-9076ad346125",
-   "metadata": {
-    "scrolled": true
-   },
+   "id": "5569d74d-db20-4360-bfe5-f915e8eca5a9",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "(peaks_simu, peaks_salt, inds_dict) = load_peaks(ybe, st_salt, st_simu);"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "2a7b1232-c475-4c90-96aa-6575508cb717",
+   "metadata": {},
    "outputs": [],
    "source": [
-    "(_, _, _, _, \n",
-    " peaks_salt_matched_to_simu, # they are of same length\n",
-    " peaks_simu_matched_to_salt  # they are of same length\n",
-    ") = load_peaks(['047876', '047860'], st_salt, st_simu);"
+    "# Now peaks_salt_matched_to_simu and peaks_simu_matched_to_salt are 1-1 corresponding\n",
+    "peaks_salt_matched_to_simu = peaks_salt[inds_dict['ind_salt_peak_found']]\n",
+    "peaks_simu_matched_to_salt = peaks_simu[inds_dict['ind_simu_peak_found']]\n",
+    "\n",
+    "# Further filter out the ones whose simu fail daq cut\n",
+    "mask_simu_daq_cut = saltax.apply_peaks_daq_cuts(st_data, ybe, \n",
+    "                                                peaks_simu_matched_to_salt) \n",
+    "peaks_salt_matched_to_simu = peaks_salt_matched_to_simu[mask_simu_daq_cut]\n",
+    "peaks_simu_matched_to_salt = peaks_simu_matched_to_salt[mask_simu_daq_cut]"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "44d43694-e4ea-46de-96cc-555afb15143a",
+   "id": "ee2b6206-4b66-4ba7-857a-d6643d151452",
    "metadata": {},
    "outputs": [],
    "source": [
     "plt.figure(dpi=150)\n",
-    "plt.hist(peaks_salt_matched_to_simu['area'], bins=np.linspace(0,100,100), \n",
-    "         histtype='step', color='tab:blue', label='Sprinkled')\n",
-    "plt.hist(peaks_simu_matched_to_salt['area'], bins=np.linspace(0,100,100), \n",
-    "         histtype='step', color='tab:red', label = 'Simulated')\n",
+    "plt.hist(peaks_salt_matched_to_simu['area'], \n",
+    "         bins=np.linspace(0,100,101), \n",
+    "         histtype='step', color='tab:blue', \n",
+    "         label='Matched Sprinkled: %sPE'%(\n",
+    "             np.round(np.median(peaks_salt_matched_to_simu['area']), decimals=2)))\n",
+    "plt.hist(peaks_simu_matched_to_salt['area'], bins=np.linspace(0,100,101), \n",
+    "         histtype='step', color='tab:red', \n",
+    "         label='Matched Simulated: %sPE'%(np.round(np.median(peaks_simu_matched_to_salt['area']), decimals=2)))\n",
     "plt.title('Before Cuts SE Ambience Interference in SR1 YBe')\n",
     "plt.legend()\n",
     "plt.xlabel('Area [PE]')\n",
     "plt.ylabel('Counts [AU]')"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "7db9aba5-c34d-4b4d-aec2-aaa465e4a84b",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "plt.figure(dpi=150)\n",
+    "plt.hist(peaks_salt_matched_to_simu['area'] - peaks_simu_matched_to_salt['area'], \n",
+    "         bins=np.linspace(-1,60,201), \n",
+    "         histtype='step', color='tab:blue', )\n",
+    "#plt.legend()\n",
+    "plt.xlabel('Area Sprinkled-Simulated [PE]')\n",
+    "plt.ylabel('Counts [AU]')\n",
+    "plt.title('Before Cuts SE Ambience Interference in SR1 YBe')\n",
+    "plt.yscale('log')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "acb8b807-d41d-4ac9-8731-36a51f6aea38",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "plt.figure(dpi=150)\n",
+    "plt.hist2d(peaks_simu_matched_to_salt['x_mlp'], peaks_simu_matched_to_salt['y_mlp'], \n",
+    "           bins=(np.linspace(-65,65,100), np.linspace(-65,65,100)))\n",
+    "plt.xlabel(\"X [cm]\")\n",
+    "plt.ylabel(\"Y [cm]\")\n",
+    "plt.title('Before Cuts Selected SE in SR1 YBe')\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "ff09f765-83cf-47b5-86ab-94b23198744f",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "plt.figure(dpi=150)\n",
+    "plt.hist(peaks_salt_matched_to_simu['range_50p_area'], \n",
+    "         bins=np.linspace(0,5e3,100), \n",
+    "         histtype='step', \n",
+    "         color='tab:blue',\n",
+    "         label='Matched Sprinkled')\n",
+    "plt.hist(peaks_simu_matched_to_salt['range_50p_area'], \n",
+    "         bins=np.linspace(0,5e3,100), \n",
+    "         histtype='step', \n",
+    "         color='tab:red',\n",
+    "         label='Matched Simulated')\n",
+    "plt.yscale('log')\n",
+    "plt.legend()\n",
+    "plt.xlabel('50p width [ns]')\n",
+    "plt.title('Before Cuts SE Ambience Interference in SR1 YBe')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "0b97b519-2a4f-4824-a6c2-7179c59051a2",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "plt.figure(dpi=150)\n",
+    "after_cuts_salt = peaks_salt_matched_to_simu[peaks_salt_matched_to_simu['range_90p_area']<2000]\n",
+    "after_cuts_simu = peaks_simu_matched_to_salt[peaks_salt_matched_to_simu['range_90p_area']<2000]\n",
+    "plt.hist(after_cuts_salt['area'], bins=np.linspace(0,100,100), \n",
+    "         histtype='step', color='tab:blue', \n",
+    "         label='Matched Sprinkled: %sPE'%(np.round(np.median(after_cuts_salt['area'][~np.isnan(after_cuts_salt['area'])]), decimals=2)))\n",
+    "plt.hist(after_cuts_simu['area'], bins=np.linspace(0,100,100), \n",
+    "         histtype='step', color='tab:red', \n",
+    "         label='Matched Simulated: %sPE'%(np.round(np.median(after_cuts_simu['area'][~np.isnan(after_cuts_simu['area'])]), decimals=2)))\n",
+    "plt.title('90p Width < 2000ns SE Ambience Interference in SR1 YBe')\n",
+    "plt.legend()\n",
+    "plt.xlabel('Area [PE]')\n",
+    "plt.ylabel('Counts [AU]')"
+   ]
+  },
   {
    "cell_type": "markdown",
-   "id": "e11d2f04-9836-4c3f-882a-d13ba77987a1",
+   "id": "6b74a1de-654e-4e1d-a7fd-3d9845221839",
+   "metadata": {},
+   "source": [
+    "## Kr83m"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "4861b5c9-d659-4034-ba67-f68b103507ab",
    "metadata": {},
+   "outputs": [],
    "source": [
-    "## SR1 Kr83m"
+    "(peaks_simu, peaks_salt, inds_dict) = load_peaks(kr83m, st_salt, st_simu);"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "6560f7fe-64a0-4ae1-98b9-dc59d31a9d40",
+   "id": "283b67e9-7fcb-4ae2-ab92-8c7c750c453c",
    "metadata": {},
    "outputs": [],
    "source": [
-    "(_, _, _, _, \n",
-    " peaks_salt_matched_to_simu, # they are of same length\n",
-    " peaks_simu_matched_to_salt  # they are of same length\n",
-    ") = load_peaks(['053167', '053153', '053139', '052997'], st_salt, st_simu);"
+    "# Now peaks_salt_matched_to_simu and peaks_simu_matched_to_salt are 1-1 corresponding\n",
+    "peaks_salt_matched_to_simu = peaks_salt[inds_dict['ind_salt_peak_found']]\n",
+    "peaks_simu_matched_to_salt = peaks_simu[inds_dict['ind_simu_peak_found']]\n",
+    "\n",
+    "# Further filter out the ones whose simu fail daq cut\n",
+    "mask_simu_daq_cut = saltax.apply_peaks_daq_cuts(st_data, kr83m, \n",
+    "                                                peaks_simu_matched_to_salt) \n",
+    "peaks_salt_matched_to_simu = peaks_salt_matched_to_simu[mask_simu_daq_cut]\n",
+    "peaks_simu_matched_to_salt = peaks_simu_matched_to_salt[mask_simu_daq_cut]"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "ec8b8ff0-dc5b-4fea-b9d0-4b04d529f76e",
+   "id": "536276f5-f786-4e99-a298-39d3e7ff5530",
    "metadata": {},
    "outputs": [],
    "source": [
     "plt.figure(dpi=150)\n",
-    "plt.hist(peaks_salt_matched_to_simu['area'], bins=np.linspace(0,100,100), \n",
-    "         histtype='step', color='tab:blue', label='Sprinkled')\n",
-    "plt.hist(peaks_simu_matched_to_salt['area'], bins=np.linspace(0,100,100), \n",
-    "         histtype='step', color='tab:red', label = 'Simulated')\n",
+    "plt.hist(peaks_salt_matched_to_simu['area'], \n",
+    "         bins=np.linspace(0,100,101), \n",
+    "         histtype='step', color='tab:blue', \n",
+    "         label='Matched Sprinkled: %sPE'%(\n",
+    "             np.round(np.median(peaks_salt_matched_to_simu['area']), decimals=2)))\n",
+    "plt.hist(peaks_simu_matched_to_salt['area'], bins=np.linspace(0,100,101), \n",
+    "         histtype='step', color='tab:red', \n",
+    "         label='Matched Simulated: %sPE'%(np.round(np.median(peaks_simu_matched_to_salt['area']), decimals=2)))\n",
     "plt.title('Before Cuts SE Ambience Interference in SR1 Kr83m')\n",
     "plt.legend()\n",
     "plt.xlabel('Area [PE]')\n",
@@ -296,48 +489,211 @@
    ]
   },
   {
-   "cell_type": "markdown",
-   "id": "8e57a7a4-83d4-4d26-9b87-adf89a496ebb",
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "1a072359-3c52-4814-8d0e-559ac4104b9a",
    "metadata": {},
+   "outputs": [],
    "source": [
-    "## SR1 Rn220"
+    "plt.figure(dpi=150)\n",
+    "plt.hist(peaks_salt_matched_to_simu['area'] - peaks_simu_matched_to_salt['area'], \n",
+    "         bins=np.linspace(-1,60,201), \n",
+    "         histtype='step', color='tab:blue', )\n",
+    "#plt.legend()\n",
+    "plt.xlabel('Area Sprinkled-Simulated [PE]')\n",
+    "plt.ylabel('Counts [AU]')\n",
+    "plt.title('Before Cuts SE Ambience Interference in SR1 Kr83m')\n",
+    "plt.yscale('log')"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "59de1561-a1d2-49a9-a4b7-003d6ae60c6e",
+   "id": "302c7dc0-9889-4085-8c64-13ddb4eacac6",
    "metadata": {},
    "outputs": [],
    "source": [
-    "(_, _, _, _, \n",
-    " peaks_salt_matched_to_simu, # they are of same length\n",
-    " peaks_simu_matched_to_salt  # they are of same length\n",
-    ") = load_peaks(['049123', '049450', '049505'], st_salt, st_simu);"
+    "plt.figure(dpi=150)\n",
+    "plt.hist2d(peaks_simu_matched_to_salt['x_mlp'], peaks_simu_matched_to_salt['y_mlp'], \n",
+    "           bins=(np.linspace(-65,65,100), np.linspace(-65,65,100)))\n",
+    "plt.xlabel(\"X [cm]\")\n",
+    "plt.ylabel(\"Y [cm]\")\n",
+    "plt.title('Before Cuts Selected SE in SR1 Kr83m')\n",
+    "plt.show()"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "34f6735c-98aa-4cf5-a6d1-b96eede24d46",
+   "id": "cc7595b4-8b01-47a5-b26a-bc02de93aaab",
    "metadata": {},
    "outputs": [],
    "source": [
     "plt.figure(dpi=150)\n",
-    "plt.hist(peaks_salt_matched_to_simu['area'], bins=np.linspace(0,100,100), \n",
-    "         histtype='step', color='tab:blue', label='Sprinkled')\n",
-    "plt.hist(peaks_simu_matched_to_salt['area'], bins=np.linspace(0,100,100), \n",
-    "         histtype='step', color='tab:red', label = 'Simulated')\n",
-    "plt.title('Before Cuts SE Ambience Interference in SR1 Rn220')\n",
+    "plt.hist(peaks_salt_matched_to_simu['range_50p_area'], \n",
+    "         bins=np.linspace(0,5e3,100), \n",
+    "         histtype='step', \n",
+    "         color='tab:blue',\n",
+    "         label='Matched Sprinkled')\n",
+    "plt.hist(peaks_simu_matched_to_salt['range_50p_area'], \n",
+    "         bins=np.linspace(0,5e3,100), \n",
+    "         histtype='step', \n",
+    "         color='tab:red',\n",
+    "         label='Matched Simulated')\n",
+    "plt.yscale('log')\n",
+    "plt.legend()\n",
+    "plt.xlabel('50p width [ns]')\n",
+    "plt.title('Before Cuts SE Ambience Interference in SR1 Kr83m')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "9e67fdaa-b3d2-4b6a-b67b-4b4348d45bf2",
+   "metadata": {},
+   "source": [
+    "Let's apply a brutal width cut"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "be58044e-23f8-4d98-920a-016a020719bb",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "plt.figure(dpi=150)\n",
+    "after_cuts_salt = peaks_salt_matched_to_simu[peaks_salt_matched_to_simu['range_90p_area']<2000]\n",
+    "after_cuts_simu = peaks_simu_matched_to_salt[peaks_salt_matched_to_simu['range_90p_area']<2000]\n",
+    "plt.hist(after_cuts_salt['area'], bins=np.linspace(0,100,100), \n",
+    "         histtype='step', color='tab:blue', \n",
+    "         label='Matched Sprinkled: %sPE'%(np.round(np.median(after_cuts_salt['area'][~np.isnan(after_cuts_salt['area'])]), decimals=2)))\n",
+    "plt.hist(after_cuts_simu['area'], bins=np.linspace(0,100,100), \n",
+    "         histtype='step', color='tab:red', \n",
+    "         label='Matched Simulated: %sPE'%(np.round(np.median(after_cuts_simu['area'][~np.isnan(after_cuts_simu['area'])]), decimals=2)))\n",
+    "plt.title('90p Width < 2000ns SE Ambience Interference in SR1 Kr83m')\n",
     "plt.legend()\n",
     "plt.xlabel('Area [PE]')\n",
     "plt.ylabel('Counts [AU]')"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "id": "68e0d626-46bf-4225-a50f-9ea269688049",
+   "metadata": {},
+   "source": [
+    "# Waveforms"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "c7af36c1-251a-4fc1-8799-0567e1b7300c",
+   "metadata": {},
+   "source": [
+    "Let's watch some waveforms for those who get increased in area for sprinkled dataset. You will need 40GB RAM to run this section."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "8507fd50-df9a-455e-9e24-f25a23ee13e2",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# This one loads waveforms so it should be heavy and slow!\n",
+    "(peaks_simu, peaks_salt, inds_dict) = load_peaks(\n",
+    "    ['053167'], st_salt, st_simu,\n",
+    "    plugins=('peak_basics', 'peak_positions_mlp', 'peaks') # Just adding peaks so that you have waveforms\n",
+    ");"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "a730805f-d223-4bab-9f2e-faa43d7637eb",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Now peaks_salt_matched_to_simu and peaks_simu_matched_to_salt are 1-1 corresponding\n",
+    "peaks_salt_matched_to_simu = peaks_salt[inds_dict['ind_salt_peak_found']]\n",
+    "peaks_simu_matched_to_salt = peaks_simu[inds_dict['ind_simu_peak_found']]\n",
+    "\n",
+    "# Further filter out the ones whose simu fail daq cut\n",
+    "mask_simu_daq_cut = saltax.apply_peaks_daq_cuts(st_data, ['053167'], \n",
+    "                                                peaks_simu_matched_to_salt) \n",
+    "peaks_salt_matched_to_simu = peaks_salt_matched_to_simu[mask_simu_daq_cut]\n",
+    "peaks_simu_matched_to_salt = peaks_simu_matched_to_salt[mask_simu_daq_cut]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "4e2fcc93-2d1e-47a2-8b41-d90a7d50d9df",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "abnormal_mask = peaks_salt_matched_to_simu['area'] - peaks_simu_matched_to_salt['area'] > 1\n",
+    "ind = 0\n",
+    "plt.figure(dpi=150)\n",
+    "p_salt = peaks_salt_matched_to_simu[abnormal_mask][ind]\n",
+    "p_simu = peaks_simu_matched_to_salt[abnormal_mask][ind]\n",
+    "plt.plot(np.arange(200)*p_salt['dt'], p_salt['data']/p_salt['dt'], \n",
+    "         color='tab:blue', alpha=0.5, label='Sprinkled:%sPE'%(np.round(p_salt['area'], decimals=2)))\n",
+    "plt.plot(np.arange(200)*p_simu['dt'], p_simu['data']/p_simu['dt'], \n",
+    "         color='tab:red', alpha=0.5, label='Simulated:%sPE'%(np.round(p_simu['area'], decimals=2)))\n",
+    "plt.xlabel('Time [ns]')\n",
+    "plt.ylabel('Amplitude [PE/ns]')\n",
+    "plt.legend()\n",
+    "plt.title(\"Sprinkled-Simulated Area > 1 PE Waveforms in SR1 Kr83m\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "02630224-93cb-4149-b569-709e12803d9d",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "abnormal_mask = peaks_salt_matched_to_simu['area'] - peaks_simu_matched_to_salt['area'] > 1\n",
+    "ind = 16\n",
+    "plt.figure(dpi=150)\n",
+    "p_salt = peaks_salt_matched_to_simu[abnormal_mask][ind]\n",
+    "p_simu = peaks_simu_matched_to_salt[abnormal_mask][ind]\n",
+    "plt.plot(np.arange(200)*p_salt['dt'], p_salt['data']/p_salt['dt'], \n",
+    "         color='tab:blue', alpha=0.5, label='Sprinkled:%sPE'%(np.round(p_salt['area'], decimals=2)))\n",
+    "plt.plot(np.arange(200)*p_simu['dt'], p_simu['data']/p_simu['dt'], \n",
+    "         color='tab:red', alpha=0.5, label='Simulated:%sPE'%(np.round(p_simu['area'], decimals=2)))\n",
+    "plt.xlabel('Time [ns]')\n",
+    "plt.ylabel('Amplitude [PE/ns]')\n",
+    "plt.legend()\n",
+    "plt.title(\"Sprinkled-Simulated Area > 1 PE Waveforms in SR1 Kr83m\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "3a91c7ea-fa1e-425b-9414-85d186356020",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "abnormal_mask = peaks_salt_matched_to_simu['area'] - peaks_simu_matched_to_salt['area'] > 1\n",
+    "ind = 66\n",
+    "plt.figure(dpi=150)\n",
+    "p_salt = peaks_salt_matched_to_simu[abnormal_mask][ind]\n",
+    "p_simu = peaks_simu_matched_to_salt[abnormal_mask][ind]\n",
+    "plt.plot(np.arange(200)*p_salt['dt'], p_salt['data']/p_salt['dt'], \n",
+    "         color='tab:blue', alpha=0.5, label='Sprinkled:%sPE'%(np.round(p_salt['area'], decimals=2)))\n",
+    "plt.plot(np.arange(200)*p_simu['dt'], p_simu['data']/p_simu['dt'], \n",
+    "         color='tab:red', alpha=0.5, label='Simulated:%sPE'%(np.round(p_simu['area'], decimals=2)))\n",
+    "plt.xlabel('Time [ns]')\n",
+    "plt.ylabel('Amplitude [PE/ns]')\n",
+    "plt.legend()\n",
+    "plt.title(\"Sprinkled-Simulated Area > 1 PE Waveforms in SR1 Kr83m\")"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "aab23885-5b43-4915-9754-177d120e1194",
+   "id": "6e44523c-2de1-49f5-ac01-b60eaa319c23",
    "metadata": {},
    "outputs": [],
    "source": []
@@ -359,7 +715,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.9.18"
+   "version": "3.9.19"
   }
  },
  "nbformat": 4,