From 7d752a4d227d7cdc0a66a510b2fd49cec205adfc Mon Sep 17 00:00:00 2001
From: Sarah G <sarah.g.2475962@gmail.com>
Date: Thu, 26 Oct 2023 19:16:55 +0000
Subject: [PATCH 01/40] Improvements to sdba _quantile function

---
 docs/notebooks/sdba-speed.ipynb |  0
 xclim/sdba/nbutils.py           | 52 +++++++++++++++++++++++++++++++--
 2 files changed, 49 insertions(+), 3 deletions(-)
 create mode 100644 docs/notebooks/sdba-speed.ipynb

diff --git a/docs/notebooks/sdba-speed.ipynb b/docs/notebooks/sdba-speed.ipynb
new file mode 100644
index 000000000..e69de29bb
diff --git a/xclim/sdba/nbutils.py b/xclim/sdba/nbutils.py
index fcd4b5444..ac601f54c 100644
--- a/xclim/sdba/nbutils.py
+++ b/xclim/sdba/nbutils.py
@@ -5,10 +5,13 @@
 from __future__ import annotations
 
 import numpy as np
-from numba import boolean, float32, float64, guvectorize, njit
+from numba import boolean, float32, float64, int64, guvectorize, njit
 from xarray import DataArray
 from xarray.core import utils
+from os import environ
 
+USE_NANQUANTILE = environ.get('USE_NANQUANTILE', False)
+USE_SORTQUANTILE = environ.get('USE_SORTQUANTILE', False)
 
 @guvectorize(
     [(float32[:], float32, float32[:]), (float64[:], float64, float64[:])],
@@ -46,14 +49,57 @@ def vecquantiles(da, rnk, dim):
 def _quantile(arr, q):
     if arr.ndim == 1:
         out = np.empty((q.size,), dtype=arr.dtype)
-        out[:] = np.nanquantile(arr, q)
+        out[:] = _choosequantile(arr, q)
     else:
         out = np.empty((arr.shape[0], q.size), dtype=arr.dtype)
         for index in range(out.shape[0]):
-            out[index] = np.nanquantile(arr[index], q)
+            out[index] = _choosequantile(arr[index], q)
     return out
 
 
+@njit([int64(float64[:]),int64(float32[:])],fastmath=False,nogil=True)
+def numnan_sorted(s):
+    # Given a sorted array s, return the number of NaNs.
+    # This is faster than np.isnan(s).sum(), but only works if s is sorted,
+    # and only for 
+	ind = 0
+	for i in range(s.size -1 , 0, -1):
+		if np.isnan(s[i]):
+			ind += 1
+		else:
+			return ind
+	return ind
+
+
+@njit([float64[:](float64[:], float64[:]),float32[:](float32[:], float32[:])], fastmath=False, nogil=True)
+def _sortquantile(arr,q):
+    # This function sorts arr into ascending order,
+    # then computes the quantiles as a linear interpolation
+    # between the sorted values.
+    sortarr = np.sort(arr)
+    numnan = numnan_sorted(sortarr)
+    # compute the indices where each quantile should go:
+    # nb: nan goes to the end, so we need to subtract numnan to the size.
+    indices = q * (arr.size - 1 - numnan) 
+    # compute the quantiles manually to avoid casting to float64:
+    # (alternative is to use np.interp(indices, np.arange(arr.size), sortarr))
+    frac = indices % 1
+    low  = np.floor(indices).astype(np.int64)
+    high  = np.ceil(indices).astype(np.int64)
+    return (1 - frac) * sortarr[low] + frac * sortarr[high]
+
+
+@njit([float64[:](float64[:], float64[:]),float32[:](float32[:], float32[:])], fastmath=False, nogil=True)
+def _choosequantile(arr,q):
+    # When the number of quantiles requested is large (1% of arr.size for nojit, any (< 0.1%) for jit), 
+    # it becomes more efficient to sort the array,
+    # and simply obtain the quantiles from the sorted array.
+    if (arr.size > 1000 * q.size or USE_NANQUANTILE) and not USE_SORTQUANTILE:
+        return np.nanquantile(arr, q).astype(arr.dtype)
+    else:
+        return _sortquantile(arr, q)
+
+        
 def quantile(da, q, dim):
     """Compute the quantiles from a fixed list `q`."""
     # We have two cases :

From 2de908656aed25634635890109b0cc8c2220bbdd Mon Sep 17 00:00:00 2001
From: Sarah G <sarah.g.2475962@gmail.com>
Date: Thu, 26 Oct 2023 19:19:53 +0000
Subject: [PATCH 02/40] Add to vecquantiles

---
 xclim/sdba/nbutils.py | 23 ++++++++++++-----------
 1 file changed, 12 insertions(+), 11 deletions(-)

diff --git a/xclim/sdba/nbutils.py b/xclim/sdba/nbutils.py
index ac601f54c..b5a9eb2b7 100644
--- a/xclim/sdba/nbutils.py
+++ b/xclim/sdba/nbutils.py
@@ -13,17 +13,7 @@
 USE_NANQUANTILE = environ.get('USE_NANQUANTILE', False)
 USE_SORTQUANTILE = environ.get('USE_SORTQUANTILE', False)
 
-@guvectorize(
-    [(float32[:], float32, float32[:]), (float64[:], float64, float64[:])],
-    "(n),()->()",
-    nopython=True,
-    cache=True,
-)
-def _vecquantiles(arr, rnk, res):
-    if np.isnan(rnk):
-        res[0] = np.NaN
-    else:
-        res[0] = np.nanquantile(arr, rnk)
+
 
 
 def vecquantiles(da, rnk, dim):
@@ -99,6 +89,17 @@ def _choosequantile(arr,q):
     else:
         return _sortquantile(arr, q)
 
+@guvectorize(
+    [(float32[:], float32, float32[:]), (float64[:], float64, float64[:])],
+    "(n),()->()",
+    nopython=True,
+    cache=True,
+)
+def _vecquantiles(arr, rnk, res):
+    if np.isnan(rnk):
+        res[0] = np.NaN
+    else:
+        res[0] = _choosequantile(arr, rnk)
         
 def quantile(da, q, dim):
     """Compute the quantiles from a fixed list `q`."""

From 65ddbdd5900f4ffa7cdf3e977ea1c9820385b52e Mon Sep 17 00:00:00 2001
From: Sarah G <sarah.g.2475962@gmail.com>
Date: Thu, 26 Oct 2023 20:16:58 +0000
Subject: [PATCH 03/40] fix vecquantiles?

---
 xclim/sdba/nbutils.py | 66 +++++++++++++++++++++++++++++--------------
 1 file changed, 45 insertions(+), 21 deletions(-)

diff --git a/xclim/sdba/nbutils.py b/xclim/sdba/nbutils.py
index b5a9eb2b7..479c4858b 100644
--- a/xclim/sdba/nbutils.py
+++ b/xclim/sdba/nbutils.py
@@ -15,26 +15,6 @@
 
 
 
-
-def vecquantiles(da, rnk, dim):
-    """For when the quantile (rnk) is different for each point.
-
-    da and rnk must share all dimensions but dim.
-    """
-    tem = utils.get_temp_dimname(da.dims, "temporal")
-    dims = [dim] if isinstance(dim, str) else dim
-    da = da.stack({tem: dims})
-    da = da.transpose(*rnk.dims, tem)
-
-    res = DataArray(
-        _vecquantiles(da.values, rnk.values),
-        dims=rnk.dims,
-        coords=rnk.coords,
-        attrs=da.attrs,
-    )
-    return res
-
-
 @njit
 def _quantile(arr, q):
     if arr.ndim == 1:
@@ -99,8 +79,52 @@ def _vecquantiles(arr, rnk, res):
     if np.isnan(rnk):
         res[0] = np.NaN
     else:
-        res[0] = _choosequantile(arr, rnk)
+        res[0] = np.nanquantile(arr, rnk)
         
+@guvectorize(
+    [(float32[:], float32, int64, float32[:]), (float64[:], float64, int64, float64[:])],
+    "(n),(),()->()",
+    nopython=True,
+    cache=True,
+)
+def _vecquantiles_sorted(arr, rnk, numnan, res):
+    if np.isnan(rnk):
+        res[0] = np.NaN
+    else:
+        index = rnk * (arr.size - 1 - numnan)
+        frac = index % 1
+        low = np.int64(np.floor(index))
+        high = np.int64(np.ceil(index))
+        res[0] = (1 - frac) * arr[low] + frac * arr[high]
+
+@njit(fastmath=False, nogil=True)
+def _vecquantiles_wrapper(arr, rnk):
+    if (USE_NANQUANTILE) and not USE_SORTQUANTILE:
+        return _vecquantiles(arr, rnk)
+    else:
+        sortarr = np.sort(arr)
+        numnan = numnan_sorted(sortarr)
+        res = np.empty_like(rnk)
+        return _vecquantiles_sorted(sortarr, rnk, numnan, res)
+    
+
+def vecquantiles(da, rnk, dim):
+    """For when the quantile (rnk) is different for each point.
+
+    da and rnk must share all dimensions but dim.
+    """
+    tem = utils.get_temp_dimname(da.dims, "temporal")
+    dims = [dim] if isinstance(dim, str) else dim
+    da = da.stack({tem: dims})
+    da = da.transpose(*rnk.dims, tem)
+
+    res = DataArray(
+        _vecquantiles_wrapper(da.values, rnk.values),
+        dims=rnk.dims,
+        coords=rnk.coords,
+        attrs=da.attrs,
+    )
+    return res  
 def quantile(da, q, dim):
     """Compute the quantiles from a fixed list `q`."""
     # We have two cases :

From 719e9c6be22e1bb2a9e79786c74794972e7a8bb2 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Thu, 26 Oct 2023 20:22:51 +0000
Subject: [PATCH 04/40] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 xclim/sdba/nbutils.py | 72 ++++++++++++++++++++++++++-----------------
 1 file changed, 44 insertions(+), 28 deletions(-)

diff --git a/xclim/sdba/nbutils.py b/xclim/sdba/nbutils.py
index 479c4858b..2925c3ded 100644
--- a/xclim/sdba/nbutils.py
+++ b/xclim/sdba/nbutils.py
@@ -4,15 +4,15 @@
 """
 from __future__ import annotations
 
+from os import environ
+
 import numpy as np
-from numba import boolean, float32, float64, int64, guvectorize, njit
+from numba import boolean, float32, float64, guvectorize, int64, njit
 from xarray import DataArray
 from xarray.core import utils
-from os import environ
-
-USE_NANQUANTILE = environ.get('USE_NANQUANTILE', False)
-USE_SORTQUANTILE = environ.get('USE_SORTQUANTILE', False)
 
+USE_NANQUANTILE = environ.get("USE_NANQUANTILE", False)
+USE_SORTQUANTILE = environ.get("USE_SORTQUANTILE", False)
 
 
 @njit
@@ -27,22 +27,26 @@ def _quantile(arr, q):
     return out
 
 
-@njit([int64(float64[:]),int64(float32[:])],fastmath=False,nogil=True)
+@njit([int64(float64[:]), int64(float32[:])], fastmath=False, nogil=True)
 def numnan_sorted(s):
     # Given a sorted array s, return the number of NaNs.
     # This is faster than np.isnan(s).sum(), but only works if s is sorted,
-    # and only for 
-	ind = 0
-	for i in range(s.size -1 , 0, -1):
-		if np.isnan(s[i]):
-			ind += 1
-		else:
-			return ind
-	return ind
-
-
-@njit([float64[:](float64[:], float64[:]),float32[:](float32[:], float32[:])], fastmath=False, nogil=True)
-def _sortquantile(arr,q):
+    # and only for
+    ind = 0
+    for i in range(s.size - 1, 0, -1):
+        if np.isnan(s[i]):
+            ind += 1
+        else:
+            return ind
+    return ind
+
+
+@njit(
+    [float64[:](float64[:], float64[:]), float32[:](float32[:], float32[:])],
+    fastmath=False,
+    nogil=True,
+)
+def _sortquantile(arr, q):
     # This function sorts arr into ascending order,
     # then computes the quantiles as a linear interpolation
     # between the sorted values.
@@ -50,18 +54,22 @@ def _sortquantile(arr,q):
     numnan = numnan_sorted(sortarr)
     # compute the indices where each quantile should go:
     # nb: nan goes to the end, so we need to subtract numnan to the size.
-    indices = q * (arr.size - 1 - numnan) 
+    indices = q * (arr.size - 1 - numnan)
     # compute the quantiles manually to avoid casting to float64:
     # (alternative is to use np.interp(indices, np.arange(arr.size), sortarr))
     frac = indices % 1
-    low  = np.floor(indices).astype(np.int64)
-    high  = np.ceil(indices).astype(np.int64)
+    low = np.floor(indices).astype(np.int64)
+    high = np.ceil(indices).astype(np.int64)
     return (1 - frac) * sortarr[low] + frac * sortarr[high]
 
 
-@njit([float64[:](float64[:], float64[:]),float32[:](float32[:], float32[:])], fastmath=False, nogil=True)
-def _choosequantile(arr,q):
-    # When the number of quantiles requested is large (1% of arr.size for nojit, any (< 0.1%) for jit), 
+@njit(
+    [float64[:](float64[:], float64[:]), float32[:](float32[:], float32[:])],
+    fastmath=False,
+    nogil=True,
+)
+def _choosequantile(arr, q):
+    # When the number of quantiles requested is large (1% of arr.size for nojit, any (< 0.1%) for jit),
     # it becomes more efficient to sort the array,
     # and simply obtain the quantiles from the sorted array.
     if (arr.size > 1000 * q.size or USE_NANQUANTILE) and not USE_SORTQUANTILE:
@@ -69,6 +77,7 @@ def _choosequantile(arr,q):
     else:
         return _sortquantile(arr, q)
 
+
 @guvectorize(
     [(float32[:], float32, float32[:]), (float64[:], float64, float64[:])],
     "(n),()->()",
@@ -80,9 +89,13 @@ def _vecquantiles(arr, rnk, res):
         res[0] = np.NaN
     else:
         res[0] = np.nanquantile(arr, rnk)
-        
+
+
 @guvectorize(
-    [(float32[:], float32, int64, float32[:]), (float64[:], float64, int64, float64[:])],
+    [
+        (float32[:], float32, int64, float32[:]),
+        (float64[:], float64, int64, float64[:]),
+    ],
     "(n),(),()->()",
     nopython=True,
     cache=True,
@@ -97,6 +110,7 @@ def _vecquantiles_sorted(arr, rnk, numnan, res):
         high = np.int64(np.ceil(index))
         res[0] = (1 - frac) * arr[low] + frac * arr[high]
 
+
 @njit(fastmath=False, nogil=True)
 def _vecquantiles_wrapper(arr, rnk):
     if (USE_NANQUANTILE) and not USE_SORTQUANTILE:
@@ -106,7 +120,7 @@ def _vecquantiles_wrapper(arr, rnk):
         numnan = numnan_sorted(sortarr)
         res = np.empty_like(rnk)
         return _vecquantiles_sorted(sortarr, rnk, numnan, res)
-    
+
 
 def vecquantiles(da, rnk, dim):
     """For when the quantile (rnk) is different for each point.
@@ -124,7 +138,9 @@ def vecquantiles(da, rnk, dim):
         coords=rnk.coords,
         attrs=da.attrs,
     )
-    return res  
+    return res
+
+
 def quantile(da, q, dim):
     """Compute the quantiles from a fixed list `q`."""
     # We have two cases :

From a798e8aeafddbad924925077651c3c291e0b7713 Mon Sep 17 00:00:00 2001
From: Sarah G <sarah.g.2475962@gmail.com>
Date: Thu, 26 Oct 2023 20:57:29 +0000
Subject: [PATCH 05/40] pre-commit and proper signature orders

---
 xclim/sdba/nbutils.py | 34 ++++++++++++++++++++++++----------
 1 file changed, 24 insertions(+), 10 deletions(-)

diff --git a/xclim/sdba/nbutils.py b/xclim/sdba/nbutils.py
index 2925c3ded..cdb3be5b0 100644
--- a/xclim/sdba/nbutils.py
+++ b/xclim/sdba/nbutils.py
@@ -27,8 +27,16 @@ def _quantile(arr, q):
     return out
 
 
-@njit([int64(float64[:]), int64(float32[:])], fastmath=False, nogil=True)
+@njit(
+    [
+        int64(float32[:]),
+        int64(float64[:]),
+    ],
+    fastmath=False,
+    nogil=True,
+)
 def numnan_sorted(s):
+    """Given a sorted array s, return the number of NaNs."""
     # Given a sorted array s, return the number of NaNs.
     # This is faster than np.isnan(s).sum(), but only works if s is sorted,
     # and only for
@@ -42,14 +50,18 @@ def numnan_sorted(s):
 
 
 @njit(
-    [float64[:](float64[:], float64[:]), float32[:](float32[:], float32[:])],
+    [
+        float32[:](float32[:], float32[:]),
+        float64[:](float64[:], float64[:]),
+    ],
     fastmath=False,
     nogil=True,
 )
 def _sortquantile(arr, q):
-    # This function sorts arr into ascending order,
-    # then computes the quantiles as a linear interpolation
-    # between the sorted values.
+    """Sorts arr into ascending order,
+    then computes the quantiles as a linear interpolation
+    between the sorted values.
+    """
     sortarr = np.sort(arr)
     numnan = numnan_sorted(sortarr)
     # compute the indices where each quantile should go:
@@ -64,7 +76,10 @@ def _sortquantile(arr, q):
 
 
 @njit(
-    [float64[:](float64[:], float64[:]), float32[:](float32[:], float32[:])],
+    [
+        float32[:](float32[:], float32[:]),
+        float64[:](float64[:], float64[:]),
+    ],
     fastmath=False,
     nogil=True,
 )
@@ -82,7 +97,7 @@ def _choosequantile(arr, q):
     [(float32[:], float32, float32[:]), (float64[:], float64, float64[:])],
     "(n),()->()",
     nopython=True,
-    cache=True,
+    cache=False,
 )
 def _vecquantiles(arr, rnk, res):
     if np.isnan(rnk):
@@ -98,7 +113,7 @@ def _vecquantiles(arr, rnk, res):
     ],
     "(n),(),()->()",
     nopython=True,
-    cache=True,
+    cache=False,
 )
 def _vecquantiles_sorted(arr, rnk, numnan, res):
     if np.isnan(rnk):
@@ -118,8 +133,7 @@ def _vecquantiles_wrapper(arr, rnk):
     else:
         sortarr = np.sort(arr)
         numnan = numnan_sorted(sortarr)
-        res = np.empty_like(rnk)
-        return _vecquantiles_sorted(sortarr, rnk, numnan, res)
+        return _vecquantiles_sorted(sortarr, rnk, numnan)
 
 
 def vecquantiles(da, rnk, dim):

From a134bc21f548b243b75305858ea65a0817541081 Mon Sep 17 00:00:00 2001
From: Sarah G <sarah.g.2475962@gmail.com>
Date: Mon, 30 Oct 2023 17:06:05 +0000
Subject: [PATCH 06/40] Remove changes to vecquantiles

---
 xclim/sdba/nbutils.py | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/xclim/sdba/nbutils.py b/xclim/sdba/nbutils.py
index cdb3be5b0..f5f6a63ff 100644
--- a/xclim/sdba/nbutils.py
+++ b/xclim/sdba/nbutils.py
@@ -147,7 +147,7 @@ def vecquantiles(da, rnk, dim):
     da = da.transpose(*rnk.dims, tem)
 
     res = DataArray(
-        _vecquantiles_wrapper(da.values, rnk.values),
+        _vecquantiles(da.values, rnk.values),
         dims=rnk.dims,
         coords=rnk.coords,
         attrs=da.attrs,
@@ -167,14 +167,14 @@ def quantile(da, q, dim):
     dims = [dim] if isinstance(dim, str) else dim
     tem = utils.get_temp_dimname(da.dims, "temporal")
     da = da.stack({tem: dims})
-
+    print(q)
     # So we cut in half the definitions to declare in numba
     # We still use q as the coords so it corresponds to what was done upstream
-    if not hasattr(q, "dtype") or q.dtype != da.dtype:
-        qc = np.array(q, dtype=da.dtype)
-    else:
-        qc = q
-
+    # if not hasattr(q, "dtype") or q.dtype != da.dtype or q.shape[0] != da.shape[0]:
+    #    qc = np.array(q, dtype=da.dtype)
+    # else:
+    #    qc = q
+    qc = np.array(q, dtype=da.dtype)
     if len(da.dims) > 1:
         # There are some extra dims
         extra = utils.get_temp_dimname(da.dims, "extra")

From 749a467f7ed2ca4552efdb92ba2c3b075a5eefa2 Mon Sep 17 00:00:00 2001
From: Sarah G <sarah.g.2475962@gmail.com>
Date: Mon, 30 Oct 2023 17:08:21 +0000
Subject: [PATCH 07/40] Reformat sdba-speed notebook

---
 docs/notebooks/sdba-speed.ipynb | 113 ++++++++++++++++++++++++++++++++
 1 file changed, 113 insertions(+)

diff --git a/docs/notebooks/sdba-speed.ipynb b/docs/notebooks/sdba-speed.ipynb
index e69de29bb..9e698de3e 100644
--- a/docs/notebooks/sdba-speed.ipynb
+++ b/docs/notebooks/sdba-speed.ipynb
@@ -0,0 +1,113 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from __future__ import annotations\n",
+    "\n",
+    "import importlib\n",
+    "import os\n",
+    "import time\n",
+    "\n",
+    "import numpy as np\n",
+    "\n",
+    "from xclim import sdba\n",
+    "\n",
+    "sizes = [\n",
+    "    25,\n",
+    "    50,\n",
+    "    75,\n",
+    "    100,\n",
+    "    200,\n",
+    "    300,\n",
+    "    400,\n",
+    "    500,\n",
+    "    600,\n",
+    "    700,\n",
+    "    800,\n",
+    "    900,\n",
+    "    1000,\n",
+    "    1500,\n",
+    "    2000,\n",
+    "    5000,\n",
+    "    10000,\n",
+    "]\n",
+    "tsort = np.zeros(len(sizes))\n",
+    "tnan = np.zeros(len(sizes))\n",
+    "num_tests = 500\n",
+    "\n",
+    "os.environ[\"USE_NANQUANTILE\"] = \"\"\n",
+    "os.environ[\"USE_SORTQUANTILE\"] = \"True\"\n",
+    "importlib.reload(sdba.nbutils)\n",
+    "sdba.nbutils._choosequantile(np.random.randn(100), np.array([0.5]))\n",
+    "for i in range(len(sizes)):\n",
+    "    for k in range(num_tests):\n",
+    "        arr = np.random.randn(sizes[i])\n",
+    "        t0 = time.time()\n",
+    "        sdba.nbutils._choosequantile(arr, np.linspace(0, 1, 50))\n",
+    "        tsort[i] += time.time() - t0\n",
+    "\n",
+    "os.environ[\"USE_NANQUANTILE\"] = \"True\"\n",
+    "os.environ[\"USE_SORTQUANTILE\"] = \"\"\n",
+    "importlib.reload(sdba.nbutils)\n",
+    "sdba.nbutils._choosequantile(np.random.randn(100), np.array([0.5]))\n",
+    "for i in range(len(sizes)):\n",
+    "    for k in range(num_tests):\n",
+    "        arr = np.random.randn(sizes[i])\n",
+    "        t0 = time.time()\n",
+    "        sdba.nbutils._choosequantile(arr, np.linspace(0, 1, 50))\n",
+    "        tnan[i] += time.time() - t0\n",
+    "print(\n",
+    "    \"\\n\".join(\n",
+    "        [\n",
+    "            f\"{x:4.0f}, {y:.5f}, {z:.5f}, {z / y:.2f}\"\n",
+    "            for x, y, z in zip(sizes, tsort, tnan)\n",
+    "        ]\n",
+    "    )\n",
+    ")\n",
+    "import matplotlib.pyplot as plt\n",
+    "\n",
+    "plt.plot(sizes, tsort / num_tests, label=\"sortquantile\")\n",
+    "plt.plot(sizes, tnan / num_tests, label=\"nanquantile\")\n",
+    "plt.legend()\n",
+    "plt.title(\n",
+    "    \"Quantile calculation using sortquantile and nanquantile, average time vs array size, for 50 quantiles\"\n",
+    ")\n",
+    "plt.xlabel(\"Array size\")\n",
+    "plt.ylabel(\"Time (s)\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "plt.plot(sizes, tnan / tsort, label=\"Ratio of times, nanquantile / sortquantile\")\n",
+    "plt.legend()\n",
+    "plt.title(\"Ratio of times, nanquantile / sortquantile\")\n",
+    "plt.xlabel(\"Array size\")\n",
+    "plt.ylabel(\"Time ratio\")"
+   ]
+  }
+ ],
+ "metadata": {
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.6"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}

From ea57cf591ea6b5c54673c36fb06264b37cbb897b Mon Sep 17 00:00:00 2001
From: Sarah G <sarah.g.2475962@gmail.com>
Date: Mon, 30 Oct 2023 17:24:51 +0000
Subject: [PATCH 08/40] Update AUTHORS.rst

---
 AUTHORS.rst | 1 +
 1 file changed, 1 insertion(+)

diff --git a/AUTHORS.rst b/AUTHORS.rst
index 46208a9c8..30a596020 100644
--- a/AUTHORS.rst
+++ b/AUTHORS.rst
@@ -29,6 +29,7 @@ Contributors
 * David Caron `@davidcaron <https://github.com/davidcaron>`_
 * Carsten Ehbrecht <ehbrecht@dkrz.de> `@cehbrecht <https://github.com/cehbrecht>`_
 * Jeremy Fyke `@jeremyfyke <https://github.com/jeremyfyke>`_
+* Sarah Gammon `@SarahG-579462 <https://github.com/SarahG-579462>`
 * Tom Keel <thomas.keel.18@ucl.ac.uk> `@Thomasjkeel <https://github.com/Thomasjkeel>`_
 * Marie-Pier Labonté <labonte.marie-pier@ouranos.ca> `@marielabonte <https://github.com/marielabonte>`_
 * Ludwig Lierhammer <ludwig.lierhammer@hereon.de> `@ludwiglierhammer <https://github.com/ludwiglierhammer>`_

From 7393c340baf87512c3be03f6abcdb77397348dce Mon Sep 17 00:00:00 2001
From: Sarah G <sarah.g.2475962@gmail.com>
Date: Mon, 30 Oct 2023 19:09:30 +0000
Subject: [PATCH 09/40] change criteria for nanquantile algorithm

---
 docs/notebooks/sdba-speed.ipynb | 27 ++++--------------------
 xclim/sdba/nbutils.py           | 37 ++++-----------------------------
 2 files changed, 8 insertions(+), 56 deletions(-)

diff --git a/docs/notebooks/sdba-speed.ipynb b/docs/notebooks/sdba-speed.ipynb
index 9e698de3e..95818ffb4 100644
--- a/docs/notebooks/sdba-speed.ipynb
+++ b/docs/notebooks/sdba-speed.ipynb
@@ -16,49 +16,30 @@
     "\n",
     "from xclim import sdba\n",
     "\n",
-    "sizes = [\n",
-    "    25,\n",
-    "    50,\n",
-    "    75,\n",
-    "    100,\n",
-    "    200,\n",
-    "    300,\n",
-    "    400,\n",
-    "    500,\n",
-    "    600,\n",
-    "    700,\n",
-    "    800,\n",
-    "    900,\n",
-    "    1000,\n",
-    "    1500,\n",
-    "    2000,\n",
-    "    5000,\n",
-    "    10000,\n",
-    "]\n",
+    "q = 50\n",
+    "sizes = [25, 50, 75, 100, 200, 300, 400, 500, 600, 700, 800, 900, 1000, 1500, 2000]\n",
     "tsort = np.zeros(len(sizes))\n",
     "tnan = np.zeros(len(sizes))\n",
     "num_tests = 500\n",
     "\n",
     "os.environ[\"USE_NANQUANTILE\"] = \"\"\n",
-    "os.environ[\"USE_SORTQUANTILE\"] = \"True\"\n",
     "importlib.reload(sdba.nbutils)\n",
     "sdba.nbutils._choosequantile(np.random.randn(100), np.array([0.5]))\n",
     "for i in range(len(sizes)):\n",
     "    for k in range(num_tests):\n",
     "        arr = np.random.randn(sizes[i])\n",
     "        t0 = time.time()\n",
-    "        sdba.nbutils._choosequantile(arr, np.linspace(0, 1, 50))\n",
+    "        sdba.nbutils._choosequantile(arr, np.linspace(0, 1, q))\n",
     "        tsort[i] += time.time() - t0\n",
     "\n",
     "os.environ[\"USE_NANQUANTILE\"] = \"True\"\n",
-    "os.environ[\"USE_SORTQUANTILE\"] = \"\"\n",
     "importlib.reload(sdba.nbutils)\n",
     "sdba.nbutils._choosequantile(np.random.randn(100), np.array([0.5]))\n",
     "for i in range(len(sizes)):\n",
     "    for k in range(num_tests):\n",
     "        arr = np.random.randn(sizes[i])\n",
     "        t0 = time.time()\n",
-    "        sdba.nbutils._choosequantile(arr, np.linspace(0, 1, 50))\n",
+    "        sdba.nbutils._choosequantile(arr, np.linspace(0, 1, q))\n",
     "        tnan[i] += time.time() - t0\n",
     "print(\n",
     "    \"\\n\".join(\n",
diff --git a/xclim/sdba/nbutils.py b/xclim/sdba/nbutils.py
index f5f6a63ff..bec24f30a 100644
--- a/xclim/sdba/nbutils.py
+++ b/xclim/sdba/nbutils.py
@@ -12,7 +12,6 @@
 from xarray.core import utils
 
 USE_NANQUANTILE = environ.get("USE_NANQUANTILE", False)
-USE_SORTQUANTILE = environ.get("USE_SORTQUANTILE", False)
 
 
 @njit
@@ -84,10 +83,12 @@ def _sortquantile(arr, q):
     nogil=True,
 )
 def _choosequantile(arr, q):
-    # When the number of quantiles requested is large (1% of arr.size for nojit, any (< 0.1%) for jit),
+    # When the number of quantiles requested is large,
     # it becomes more efficient to sort the array,
     # and simply obtain the quantiles from the sorted array.
-    if (arr.size > 1000 * q.size or USE_NANQUANTILE) and not USE_SORTQUANTILE:
+    # The first method is O(arr.size*q.size),
+    # the second O(arr.size*log(arr.size) + q.size) amortized.
+    if (q.size <= 10) or (q.size <= np.log(arr.size)) or (USE_NANQUANTILE):
         return np.nanquantile(arr, q).astype(arr.dtype)
     else:
         return _sortquantile(arr, q)
@@ -106,36 +107,6 @@ def _vecquantiles(arr, rnk, res):
         res[0] = np.nanquantile(arr, rnk)
 
 
-@guvectorize(
-    [
-        (float32[:], float32, int64, float32[:]),
-        (float64[:], float64, int64, float64[:]),
-    ],
-    "(n),(),()->()",
-    nopython=True,
-    cache=False,
-)
-def _vecquantiles_sorted(arr, rnk, numnan, res):
-    if np.isnan(rnk):
-        res[0] = np.NaN
-    else:
-        index = rnk * (arr.size - 1 - numnan)
-        frac = index % 1
-        low = np.int64(np.floor(index))
-        high = np.int64(np.ceil(index))
-        res[0] = (1 - frac) * arr[low] + frac * arr[high]
-
-
-@njit(fastmath=False, nogil=True)
-def _vecquantiles_wrapper(arr, rnk):
-    if (USE_NANQUANTILE) and not USE_SORTQUANTILE:
-        return _vecquantiles(arr, rnk)
-    else:
-        sortarr = np.sort(arr)
-        numnan = numnan_sorted(sortarr)
-        return _vecquantiles_sorted(sortarr, rnk, numnan)
-
-
 def vecquantiles(da, rnk, dim):
     """For when the quantile (rnk) is different for each point.
 

From 8845208bef25e891503ab72c769aa83d49025bb8 Mon Sep 17 00:00:00 2001
From: Sarah G <sarah.g.2475962@gmail.com>
Date: Tue, 31 Oct 2023 17:54:35 +0000
Subject: [PATCH 10/40] precompilation for most nbutils

---
 xclim/sdba/nbutils.py | 102 +++++++++++++++++++++++++++++++++---------
 1 file changed, 82 insertions(+), 20 deletions(-)

diff --git a/xclim/sdba/nbutils.py b/xclim/sdba/nbutils.py
index bec24f30a..41311b388 100644
--- a/xclim/sdba/nbutils.py
+++ b/xclim/sdba/nbutils.py
@@ -14,18 +14,6 @@
 USE_NANQUANTILE = environ.get("USE_NANQUANTILE", False)
 
 
-@njit
-def _quantile(arr, q):
-    if arr.ndim == 1:
-        out = np.empty((q.size,), dtype=arr.dtype)
-        out[:] = _choosequantile(arr, q)
-    else:
-        out = np.empty((arr.shape[0], q.size), dtype=arr.dtype)
-        for index in range(out.shape[0]):
-            out[index] = _choosequantile(arr[index], q)
-    return out
-
-
 @njit(
     [
         int64(float32[:]),
@@ -33,6 +21,7 @@ def _quantile(arr, q):
     ],
     fastmath=False,
     nogil=True,
+    cache=False,
 )
 def numnan_sorted(s):
     """Given a sorted array s, return the number of NaNs."""
@@ -55,6 +44,7 @@ def numnan_sorted(s):
     ],
     fastmath=False,
     nogil=True,
+    cache=False,
 )
 def _sortquantile(arr, q):
     """Sorts arr into ascending order,
@@ -81,6 +71,7 @@ def _sortquantile(arr, q):
     ],
     fastmath=False,
     nogil=True,
+    cache=False,
 )
 def _choosequantile(arr, q):
     # When the number of quantiles requested is large,
@@ -94,6 +85,28 @@ def _choosequantile(arr, q):
         return _sortquantile(arr, q)
 
 
+@njit(
+    [
+        float32[:](float32[:], float32[:]),
+        float64[:](float64[:], float64[:]),
+        float32[:, :](float32[:, :], float32[:]),
+        float64[:, :](float64[:, :], float64[:]),
+    ],
+    fastmath=False,
+    nogil=True,
+    cache=False,
+)
+def _quantile(arr, q):
+    if arr.ndim == 1:
+        out = np.empty((q.size,), dtype=arr.dtype)
+        out[:] = _choosequantile(arr, q)
+    else:
+        out = np.empty((arr.shape[0], q.size), dtype=arr.dtype)
+        for index in range(out.shape[0]):
+            out[index] = _choosequantile(arr[index], q)
+    return out
+
+
 @guvectorize(
     [(float32[:], float32, float32[:]), (float64[:], float64, float64[:])],
     "(n),()->()",
@@ -138,7 +151,6 @@ def quantile(da, q, dim):
     dims = [dim] if isinstance(dim, str) else dim
     tem = utils.get_temp_dimname(da.dims, "temporal")
     da = da.stack({tem: dims})
-    print(q)
     # So we cut in half the definitions to declare in numba
     # We still use q as the coords so it corresponds to what was done upstream
     # if not hasattr(q, "dtype") or q.dtype != da.dtype or q.shape[0] != da.shape[0]:
@@ -170,7 +182,15 @@ def quantile(da, q, dim):
     return res
 
 
-@njit
+@njit(
+    [
+        float32[:, :](float32[:, :]),
+        float64[:, :](float64[:, :]),
+    ],
+    fastmath=False,
+    nogil=True,
+    cache=False,
+)
 def remove_NaNs(x):  # noqa
     """Remove NaN values from series."""
     remove = np.zeros_like(x[0, :], dtype=boolean)
@@ -179,7 +199,15 @@ def remove_NaNs(x):  # noqa
     return x[:, ~remove]
 
 
-@njit(fastmath=True)
+@njit(
+    [
+        float32(float32[:, :], float32[:, :]),
+        float64(float64[:, :], float64[:, :]),
+    ],
+    fastmath=True,
+    nogil=True,
+    cache=False,
+)
 def _correlation(X, Y):
     """Compute a correlation as the mean of pairwise distances between points in X and Y.
 
@@ -196,7 +224,15 @@ def _correlation(X, Y):
     return d / (X.shape[1] * Y.shape[1])
 
 
-@njit(fastmath=True)
+@njit(
+    [
+        float32(float32[:, :]),
+        float64(float64[:, :]),
+    ],
+    fastmath=True,
+    nogil=True,
+    cache=False,
+)
 def _autocorrelation(X):
     """Mean of the NxN pairwise distances of points in X of shape KxN.
 
@@ -219,7 +255,7 @@ def _autocorrelation(X):
     ],
     "(k, n),(k, m)->()",
     nopython=True,
-    cache=True,
+    cache=False,
 )
 def _escore(tgt, sim, out):
     """E-score based on the Székely-Rizzo e-distances between clusters.
@@ -242,7 +278,17 @@ def _escore(tgt, sim, out):
     out[0] = w * (sXY + sXY - sXX - sYY) / 2
 
 
-@njit
+@njit(
+    #    [
+    #        float32[:,:](float32[:]),
+    #        float64[:,:](float64[:]),
+    #        float32[:,:](float32[:,:]),
+    #        float64[:,:](float64[:,:]),
+    #    ],
+    fastmath=False,
+    nogil=True,
+    cache=False,
+)
 def _first_and_last_nonnull(arr):
     """For each row of arr, get the first and last non NaN elements."""
     out = np.empty((arr.shape[0], 2))
@@ -255,7 +301,15 @@ def _first_and_last_nonnull(arr):
     return out
 
 
-@njit
+@njit(
+    #    [
+    #        float64[:](float32[:],float32[:],float32[:],float32[:],float32[:],float32[:],optional(typeof("constant"))),
+    #        float64[:](float64[:],float64[:],float64[:],float64[:],float64[:],float64[:],optional(typeof("constant"))),
+    #    ],
+    fastmath=False,
+    nogil=True,
+    cache=False,
+)
 def _extrapolate_on_quantiles(
     interp, oldx, oldg, oldy, newx, newg, method="constant"
 ):  # noqa
@@ -279,7 +333,15 @@ def _extrapolate_on_quantiles(
     return interp
 
 
-@njit
+@njit(
+    #    [
+    #        typeof((float64[:,:],float64,float64))(float32[:],float32[:],optional(boolean)),
+    #        typeof((float64[:,:],float64,float64))(float64[:],float64[:],optional(boolean)),
+    #    ],
+    fastmath=False,
+    nogil=True,
+    cache=False,
+)
 def _pairwise_haversine_and_bins(lond, latd, transpose=False):
     """Inter-site distances with the haversine approximation."""
     N = lond.shape[0]

From 6e35ec48c7c4d1d5c77717beeb2047d73f79cb96 Mon Sep 17 00:00:00 2001
From: juliettelavoie <juliette.lavoie@hotmail.ca>
Date: Thu, 16 Nov 2023 10:55:21 -0500
Subject: [PATCH 11/40] fix escores

---
 xclim/sdba/_adjustment.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/xclim/sdba/_adjustment.py b/xclim/sdba/_adjustment.py
index a28f9c03a..5a651a7ea 100644
--- a/xclim/sdba/_adjustment.py
+++ b/xclim/sdba/_adjustment.py
@@ -309,7 +309,7 @@ def npdf_transform(ds: xr.Dataset, **kwargs) -> xr.Dataset:
         # All NaN, but with the proper shape.
         escores = (
             ref.isel({dim: 0, "time": 0}) * hist.isel({dim: 0, "time": 0})
-        ).expand_dims(iterations=ds.iteration) * np.NaN
+        ).expand_dims(iterations=ds.iterations) * np.NaN
 
     return xr.Dataset(
         data_vars={

From 5741c2d48f2db9f77fd62cf2e028e51b6db3151d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=89ric=20Dupuis?= <eric.dupuis.1@umontreal.ca>
Date: Mon, 29 Apr 2024 15:06:09 -0400
Subject: [PATCH 12/40] `allow_sortquantile` option

---
 xclim/core/options.py |  8 +++++++
 xclim/sdba/nbutils.py | 51 +++++++++++++++++++++----------------------
 2 files changed, 33 insertions(+), 26 deletions(-)

diff --git a/xclim/core/options.py b/xclim/core/options.py
index 1eba36a3f..163965e53 100644
--- a/xclim/core/options.py
+++ b/xclim/core/options.py
@@ -22,6 +22,7 @@
 RUN_LENGTH_UFUNC = "run_length_ufunc"
 SDBA_EXTRA_OUTPUT = "sdba_extra_output"
 SDBA_ENCODE_CF = "sdba_encode_cf"
+ALLOW_SORTQUANTILE = "allow_sortquantile"
 KEEP_ATTRS = "keep_attrs"
 
 MISSING_METHODS: dict[str, Callable] = {}
@@ -35,11 +36,13 @@
     RUN_LENGTH_UFUNC: "auto",
     SDBA_EXTRA_OUTPUT: False,
     SDBA_ENCODE_CF: False,
+    ALLOW_SORTQUANTILE: False,
     KEEP_ATTRS: "xarray",
 }
 
 _LOUDNESS_OPTIONS = frozenset(["log", "warn", "raise"])
 _RUN_LENGTH_UFUNC_OPTIONS = frozenset(["auto", True, False])
+_ALLOW_SORTQUANTILE_OPTIONS = frozenset([True, False])
 _KEEP_ATTRS_OPTIONS = frozenset(["xarray", True, False])
 
 
@@ -66,6 +69,7 @@ def _valid_missing_options(mopts):
     RUN_LENGTH_UFUNC: _RUN_LENGTH_UFUNC_OPTIONS.__contains__,
     SDBA_EXTRA_OUTPUT: lambda opt: isinstance(opt, bool),
     SDBA_ENCODE_CF: lambda opt: isinstance(opt, bool),
+    ALLOW_SORTQUANTILE: _ALLOW_SORTQUANTILE_OPTIONS.__contains__,
     KEEP_ATTRS: _KEEP_ATTRS_OPTIONS.__contains__,
 }
 
@@ -163,6 +167,10 @@ class set_options:
     run_length_ufunc : str
       Whether to use the 1D ufunc version of run length algorithms or the dask-ready broadcasting version.
       Default is ``"auto"``, which means the latter is used for dask-backed and large arrays.
+    allow_sortquantile : bool
+      If `True`,  the optimal function between ``np.nanquantile`` or ``sdba.nbutils._sortquantile` is chosen.
+      `_sortquantile` is optimal when the number of quantiles is large (in absolute and relatively to array size).
+      Default is `False` in which case ``np.nanquantile`` is selected.
     sdba_extra_output : bool
         Whether to add diagnostic variables to outputs of sdba's `train`, `adjust`
         and `processing` operations. Details about these additional variables are given in the object's
diff --git a/xclim/sdba/nbutils.py b/xclim/sdba/nbutils.py
index 41311b388..41899d1eb 100644
--- a/xclim/sdba/nbutils.py
+++ b/xclim/sdba/nbutils.py
@@ -4,14 +4,12 @@
 """
 from __future__ import annotations
 
-from os import environ
-
 import numpy as np
 from numba import boolean, float32, float64, guvectorize, int64, njit
 from xarray import DataArray
 from xarray.core import utils
 
-USE_NANQUANTILE = environ.get("USE_NANQUANTILE", False)
+from xclim.core.options import ALLOW_SORTQUANTILE, OPTIONS
 
 
 @njit(
@@ -38,10 +36,10 @@ def numnan_sorted(s):
 
 
 @njit(
-    [
-        float32[:](float32[:], float32[:]),
-        float64[:](float64[:], float64[:]),
-    ],
+    # [
+    #     float32[:](float32[:], float32[:]),
+    #     float64[:](float64[:], float64[:]),
+    # ],
     fastmath=False,
     nogil=True,
     cache=False,
@@ -65,45 +63,45 @@ def _sortquantile(arr, q):
 
 
 @njit(
-    [
-        float32[:](float32[:], float32[:]),
-        float64[:](float64[:], float64[:]),
-    ],
+    # [
+    #     float32[:](float32[:], float32[:]),
+    #     float64[:](float64[:], float64[:]),
+    # ],
     fastmath=False,
     nogil=True,
     cache=False,
 )
-def _choosequantile(arr, q):
+def _choosequantile(arr, q, allow_sortquantile=OPTIONS[ALLOW_SORTQUANTILE]):
     # When the number of quantiles requested is large,
     # it becomes more efficient to sort the array,
     # and simply obtain the quantiles from the sorted array.
     # The first method is O(arr.size*q.size),
     # the second O(arr.size*log(arr.size) + q.size) amortized.
-    if (q.size <= 10) or (q.size <= np.log(arr.size)) or (USE_NANQUANTILE):
-        return np.nanquantile(arr, q).astype(arr.dtype)
-    else:
+    if allow_sortquantile and len(q) > 10 and len(q) > np.log(len(arr)):
         return _sortquantile(arr, q)
+    else:
+        return np.nanquantile(arr, q).astype(arr.dtype)
 
 
 @njit(
-    [
-        float32[:](float32[:], float32[:]),
-        float64[:](float64[:], float64[:]),
-        float32[:, :](float32[:, :], float32[:]),
-        float64[:, :](float64[:, :], float64[:]),
-    ],
+    # [
+    #     float32[:](float32[:], float32[:]),
+    #     float64[:](float64[:], float64[:]),
+    #     float32[:, :](float32[:, :], float32[:]),
+    #     float64[:, :](float64[:, :], float64[:]),
+    # ],
     fastmath=False,
     nogil=True,
     cache=False,
 )
-def _quantile(arr, q):
+def _quantile(arr, q, allow_sortquantile=OPTIONS[ALLOW_SORTQUANTILE]):
     if arr.ndim == 1:
         out = np.empty((q.size,), dtype=arr.dtype)
-        out[:] = _choosequantile(arr, q)
+        out[:] = _choosequantile(arr, q, allow_sortquantile)
     else:
         out = np.empty((arr.shape[0], q.size), dtype=arr.dtype)
         for index in range(out.shape[0]):
-            out[index] = _choosequantile(arr[index], q)
+            out[index] = _choosequantile(arr[index], q, allow_sortquantile)
     return out
 
 
@@ -141,6 +139,7 @@ def vecquantiles(da, rnk, dim):
 
 def quantile(da, q, dim):
     """Compute the quantiles from a fixed list `q`."""
+    allow_sortquantile = OPTIONS[ALLOW_SORTQUANTILE]
     # We have two cases :
     # - When all dims are processed : we stack them and use _quantile1d
     # - When the quantiles are vectorized over some dims, these are also stacked and then _quantile2D is used.
@@ -164,7 +163,7 @@ def quantile(da, q, dim):
         da = da.stack({extra: set(da.dims) - {tem}})
         da = da.transpose(..., tem)
         res = DataArray(
-            _quantile(da.values, qc),
+            _quantile(da.values, qc, allow_sortquantile),
             dims=(extra, "quantiles"),
             coords={extra: da[extra], "quantiles": q},
             attrs=da.attrs,
@@ -173,7 +172,7 @@ def quantile(da, q, dim):
     else:
         # All dims are processed
         res = DataArray(
-            _quantile(da.values, qc),
+            _quantile(da.values, qc, allow_sortquantile),
             dims=("quantiles"),
             coords={"quantiles": q},
             attrs=da.attrs,

From 33a79b042eaed71b5fd6ec7448cc0a37545706f7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=89ric=20Dupuis?= <eric.dupuis.1@umontreal.ca>
Date: Tue, 30 Apr 2024 18:32:26 -0400
Subject: [PATCH 13/40] time dimensions not stacked = faster quantile

---
 xclim/sdba/nbutils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/xclim/sdba/nbutils.py b/xclim/sdba/nbutils.py
index e46be3aa4..12afbfcd2 100644
--- a/xclim/sdba/nbutils.py
+++ b/xclim/sdba/nbutils.py
@@ -102,7 +102,7 @@ def _quantile(arr, q, allow_sortquantile=OPTIONS[ALLOW_SORTQUANTILE]):
     else:
         out = np.empty((arr.shape[0], q.size), dtype=arr.dtype)
         for index in range(out.shape[0]):
-            out[index] = _choosequantile(arr[index], q, allow_sortquantile)
+            out[index] = _choosequantile(arr[index].flatten(), q, allow_sortquantile)
     return out
 
 

From 8e1e3625e8e4b095664ba48a7eda62c0b035970b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=89ric=20Dupuis?= <eric.dupuis.1@umontreal.ca>
Date: Tue, 30 Apr 2024 18:45:25 -0400
Subject: [PATCH 14/40] correct nd vs.1d condition & clean

---
 xclim/sdba/nbutils.py | 25 +++++--------------------
 1 file changed, 5 insertions(+), 20 deletions(-)

diff --git a/xclim/sdba/nbutils.py b/xclim/sdba/nbutils.py
index 12afbfcd2..624f0c937 100644
--- a/xclim/sdba/nbutils.py
+++ b/xclim/sdba/nbutils.py
@@ -149,27 +149,12 @@ def quantile(da: DataArray, q, dim: str | DataArray.dims) -> DataArray:
 
     # Stack the dims and send to the last position
     # This is in case there are more than one
-    dims = [dim] if isinstance(dim, str) else dim
-    tem = utils.get_temp_dimname(da.dims, "temporal")
-    da = da.stack({tem: dims})
-    # So we cut in half the definitions to declare in numba
-    # We still use q as the coords so it corresponds to what was done upstream
-    # if not hasattr(q, "dtype") or q.dtype != da.dtype or q.shape[0] != da.shape[0]:
-    #    qc = np.array(q, dtype=da.dtype)
-    # else:
-    #    qc = q
     qc = np.array(q, dtype=da.dtype)
-    # We still use q as the coords, so it corresponds to what was done upstream
-    if not hasattr(q, "dtype") or q.dtype != da.dtype:
-        qc = np.array(q, dtype=da.dtype)
-    else:
-        qc = q
-
-    if len(da.dims) > 1:
-        # There are some extra dims
+    dims = [dim] if isinstance(dim, str) else dim
+    if len(da.dims) > len(dims):
         extra = utils.get_temp_dimname(da.dims, "extra")
-        da = da.stack({extra: set(da.dims) - {tem}})
-        da = da.transpose(..., tem)
+        da = da.stack({extra: set(da.dims) - set(dims)})
+        da = da.transpose(extra, ...)
         res = DataArray(
             _quantile(da.values, qc, allow_sortquantile),
             dims=(extra, "quantiles"),
@@ -180,7 +165,7 @@ def quantile(da: DataArray, q, dim: str | DataArray.dims) -> DataArray:
     else:
         # All dims are processed
         res = DataArray(
-            _quantile(da.values, qc, allow_sortquantile),
+            _quantile(da.values.flatten(), qc, allow_sortquantile),
             dims=("quantiles"),
             coords={"quantiles": q},
             attrs=da.attrs,

From e4da29ef4f0da50a10818d921bc2cc91a9b7f03d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=89ric=20Dupuis?= <eric.dupuis.1@umontreal.ca>
Date: Fri, 3 May 2024 12:21:50 -0400
Subject: [PATCH 15/40] np reshape and apply_ufunc -> avoid stack, etc

---
 docs/notebooks/sdba-speed.ipynb |  2 +-
 xclim/sdba/nbutils.py           | 93 +++++++++++++++------------------
 2 files changed, 43 insertions(+), 52 deletions(-)

diff --git a/docs/notebooks/sdba-speed.ipynb b/docs/notebooks/sdba-speed.ipynb
index 95818ffb4..281f28e4e 100644
--- a/docs/notebooks/sdba-speed.ipynb
+++ b/docs/notebooks/sdba-speed.ipynb
@@ -86,7 +86,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.11.6"
+   "version": "3.12.3"
   }
  },
  "nbformat": 4,
diff --git a/xclim/sdba/nbutils.py b/xclim/sdba/nbutils.py
index 624f0c937..d007b329a 100644
--- a/xclim/sdba/nbutils.py
+++ b/xclim/sdba/nbutils.py
@@ -7,7 +7,7 @@
 
 import numpy as np
 from numba import boolean, float32, float64, guvectorize, int64, njit
-from xarray import DataArray
+from xarray import DataArray, apply_ufunc
 from xarray.core import utils
 
 from xclim.core.options import ALLOW_SORTQUANTILE, OPTIONS
@@ -84,28 +84,6 @@ def _choosequantile(arr, q, allow_sortquantile=OPTIONS[ALLOW_SORTQUANTILE]):
         return np.nanquantile(arr, q).astype(arr.dtype)
 
 
-@njit(
-    # [
-    #     float32[:](float32[:], float32[:]),
-    #     float64[:](float64[:], float64[:]),
-    #     float32[:, :](float32[:, :], float32[:]),
-    #     float64[:, :](float64[:, :], float64[:]),
-    # ],
-    fastmath=False,
-    nogil=True,
-    cache=False,
-)
-def _quantile(arr, q, allow_sortquantile=OPTIONS[ALLOW_SORTQUANTILE]):
-    if arr.ndim == 1:
-        out = np.empty((q.size,), dtype=arr.dtype)
-        out[:] = _choosequantile(arr, q, allow_sortquantile)
-    else:
-        out = np.empty((arr.shape[0], q.size), dtype=arr.dtype)
-        for index in range(out.shape[0]):
-            out[index] = _choosequantile(arr[index].flatten(), q, allow_sortquantile)
-    return out
-
-
 @guvectorize(
     [(float32[:], float32, float32[:]), (float64[:], float64, float64[:])],
     "(n),()->()",
@@ -138,39 +116,52 @@ def vecquantiles(da: DataArray, rnk: DataArray, dim: str | DataArray.dims) -> Da
     return res
 
 
-def quantile(da: DataArray, q, dim: str | DataArray.dims) -> DataArray:
+@njit
+def _wrapper_quantile1d(arr, q, allow_sortquantile=OPTIONS[ALLOW_SORTQUANTILE]):
+    out = np.empty((arr.shape[0], q.size), dtype=arr.dtype)
+    for index in range(out.shape[0]):
+        out[index] = _choosequantile(arr[index], q, allow_sortquantile)
+    return out
+
+
+def _quantile(arr, q, nreduce, allow_sortquantile=OPTIONS[ALLOW_SORTQUANTILE]):
+    if arr.ndim == nreduce:
+        out = _choosequantile(arr.flatten(), q, allow_sortquantile)
+    else:
+        # dimensions that are reduced by quantile
+        red_axis = np.arange(len(arr.shape) - nreduce, len(arr.shape))
+        reduction_dim_size = np.prod([arr.shape[idx] for idx in red_axis])
+        # kept dimensions
+        keep_axis = np.arange(len(arr.shape) - nreduce)
+        final_shape = [arr.shape[idx] for idx in keep_axis] + [len(q)]
+        # reshape as (keep_dims, red_dims), compute, reshape back
+        arr = arr.reshape(-1, reduction_dim_size)
+        out = _wrapper_quantile1d(arr, q, allow_sortquantile)
+        out = out.reshape(final_shape)
+    return out
+
+
+def quantile(da, q, dim):
     """Compute the quantiles from a fixed list `q`."""
-    # A switch that determines the backend function computing quantiles
     allow_sortquantile = OPTIONS[ALLOW_SORTQUANTILE]
-    # We have two cases :
-    # - When all dims are processed : we stack them and use _quantile1d
-    # - When the quantiles are vectorized over some dims, these are also stacked and then _quantile2D is used.
-    # All this stacking is so that we can cover all ND+1D cases with one numba function.
-
-    # Stack the dims and send to the last position
-    # This is in case there are more than one
     qc = np.array(q, dtype=da.dtype)
     dims = [dim] if isinstance(dim, str) else dim
-    if len(da.dims) > len(dims):
-        extra = utils.get_temp_dimname(da.dims, "extra")
-        da = da.stack({extra: set(da.dims) - set(dims)})
-        da = da.transpose(extra, ...)
-        res = DataArray(
-            _quantile(da.values, qc, allow_sortquantile),
-            dims=(extra, "quantiles"),
-            coords={extra: da[extra], "quantiles": q},
-            attrs=da.attrs,
-        ).unstack(extra)
-
-    else:
-        # All dims are processed
-        res = DataArray(
-            _quantile(da.values.flatten(), qc, allow_sortquantile),
-            dims=("quantiles"),
-            coords={"quantiles": q},
-            attrs=da.attrs,
+    kwargs = dict(nreduce=len(dims), q=qc, allow_sortquantile=allow_sortquantile)
+    res = (
+        apply_ufunc(
+            _quantile,
+            da,
+            input_core_dims=[dims],
+            exclude_dims=set(dims),
+            output_core_dims=[["quantiles"]],
+            output_dtypes=[da.dtype],
+            dask_gufunc_kwargs=dict(output_sizes={"quantiles": len(q)}),
+            dask="parallelized",
+            kwargs=kwargs,
         )
-
+        .assign_coords(quantiles=q)
+        .assign_attrs(da.attrs)
+    )
     return res
 
 

From 3d42f875cc591bceb6111e6b1a99dbd7a9381f43 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=89ric=20Dupuis?= <eric.dupuis.1@umontreal.ca>
Date: Mon, 6 May 2024 10:33:08 -0400
Subject: [PATCH 16/40] add fastnanquantile as option, _sort True by default

---
 xclim/core/options.py |  2 +-
 xclim/sdba/nbutils.py | 62 ++++++++++++++++++++++++++++++-------------
 2 files changed, 44 insertions(+), 20 deletions(-)

diff --git a/xclim/core/options.py b/xclim/core/options.py
index 30276500a..1c07b34c6 100644
--- a/xclim/core/options.py
+++ b/xclim/core/options.py
@@ -38,7 +38,7 @@
     RUN_LENGTH_UFUNC: "auto",
     SDBA_EXTRA_OUTPUT: False,
     SDBA_ENCODE_CF: False,
-    ALLOW_SORTQUANTILE: False,
+    ALLOW_SORTQUANTILE: True,
     KEEP_ATTRS: "xarray",
     AS_DATASET: False,
 }
diff --git a/xclim/sdba/nbutils.py b/xclim/sdba/nbutils.py
index d007b329a..2d7e763d5 100644
--- a/xclim/sdba/nbutils.py
+++ b/xclim/sdba/nbutils.py
@@ -12,6 +12,13 @@
 
 from xclim.core.options import ALLOW_SORTQUANTILE, OPTIONS
 
+try:
+    from fastnanquantie.xrcompat import xr_apply_nanquantile
+
+    USE_FASTNANQUANTILE = True
+except ImportError:
+    USE_FASTNANQUANTILE = False
+
 
 @njit(
     [
@@ -101,6 +108,20 @@ def vecquantiles(da: DataArray, rnk: DataArray, dim: str | DataArray.dims) -> Da
     """For when the quantile (rnk) is different for each point.
 
     da and rnk must share all dimensions but dim.
+
+    Parameters
+    ----------
+    da : xarray.DataArray
+        The data to compute the quantiles on.
+    rnk : xarray.DataArray
+        The quantiles to compute.
+    dim : str or sequence of str
+        The dimension along which to compute the quantiles.
+
+    Returns
+    -------
+    xarray.DataArray
+        The quantiles computed along the `dim` dimension.
     """
     tem = utils.get_temp_dimname(da.dims, "temporal")
     dims = [dim] if isinstance(dim, str) else dim
@@ -143,26 +164,29 @@ def _quantile(arr, q, nreduce, allow_sortquantile=OPTIONS[ALLOW_SORTQUANTILE]):
 
 def quantile(da, q, dim):
     """Compute the quantiles from a fixed list `q`."""
-    allow_sortquantile = OPTIONS[ALLOW_SORTQUANTILE]
-    qc = np.array(q, dtype=da.dtype)
-    dims = [dim] if isinstance(dim, str) else dim
-    kwargs = dict(nreduce=len(dims), q=qc, allow_sortquantile=allow_sortquantile)
-    res = (
-        apply_ufunc(
-            _quantile,
-            da,
-            input_core_dims=[dims],
-            exclude_dims=set(dims),
-            output_core_dims=[["quantiles"]],
-            output_dtypes=[da.dtype],
-            dask_gufunc_kwargs=dict(output_sizes={"quantiles": len(q)}),
-            dask="parallelized",
-            kwargs=kwargs,
+    if USE_FASTNANQUANTILE is True:
+        return xr_apply_nanquantile(da, dim=dim, q=q).rename({"quantile": "quantiles"})
+    else:
+        allow_sortquantile = OPTIONS[ALLOW_SORTQUANTILE]
+        qc = np.array(q, dtype=da.dtype)
+        dims = [dim] if isinstance(dim, str) else dim
+        kwargs = dict(nreduce=len(dims), q=qc, allow_sortquantile=allow_sortquantile)
+        res = (
+            apply_ufunc(
+                _quantile,
+                da,
+                input_core_dims=[dims],
+                exclude_dims=set(dims),
+                output_core_dims=[["quantiles"]],
+                output_dtypes=[da.dtype],
+                dask_gufunc_kwargs=dict(output_sizes={"quantiles": len(q)}),
+                dask="parallelized",
+                kwargs=kwargs,
+            )
+            .assign_coords(quantiles=q)
+            .assign_attrs(da.attrs)
         )
-        .assign_coords(quantiles=q)
-        .assign_attrs(da.attrs)
-    )
-    return res
+        return res
 
 
 @njit(

From 354d78f9f60a418d86cd7932bc9d3e2f7fe143af Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=89ric=20Dupuis?= <eric.dupuis.1@umontreal.ca>
Date: Mon, 6 May 2024 13:09:00 -0400
Subject: [PATCH 17/40] update doc

---
 xclim/core/options.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/xclim/core/options.py b/xclim/core/options.py
index 1c07b34c6..c53f79e0e 100644
--- a/xclim/core/options.py
+++ b/xclim/core/options.py
@@ -172,9 +172,9 @@ class set_options:
       Whether to use the 1D ufunc version of run length algorithms or the dask-ready broadcasting version.
       Default is ``"auto"``, which means the latter is used for dask-backed and large arrays.
     allow_sortquantile : bool
-      If `True`,  the optimal function between ``np.nanquantile`` or ``sdba.nbutils._sortquantile` is chosen.
-      `_sortquantile` is optimal when the number of quantiles is large (in absolute and relatively to array size).
-      Default is `False` in which case ``np.nanquantile`` is selected.
+      If `True`,  the optimal function between ``np.nanquantile`` or ``sdba.nbutils._sortquantile` is chosen. If `False`,
+      ``np.nanquantile`` is selected. `_sortquantile` is optimal when the number of quantiles is large (in absolute and relatively to array size).
+      Default: `True`.
     sdba_extra_output : bool
         Whether to add diagnostic variables to outputs of sdba's `train`, `adjust`
         and `processing` operations. Details about these additional variables are given in the object's

From 5c8667aa1611f6d11db046111d9686ab548ad551 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=89ric=20Dupuis?= <eric.dupuis.1@umontreal.ca>
Date: Mon, 6 May 2024 15:21:27 -0400
Subject: [PATCH 18/40] move benchmark tests to hidden folder

---
 docs/notebooks/benchmarks/sdba_quantile.ipynb | 163 ++++++++++++++++++
 docs/notebooks/sdba-speed.ipynb               |  94 ----------
 xclim/sdba/nbutils.py                         |   2 +-
 3 files changed, 164 insertions(+), 95 deletions(-)
 create mode 100644 docs/notebooks/benchmarks/sdba_quantile.ipynb
 delete mode 100644 docs/notebooks/sdba-speed.ipynb

diff --git a/docs/notebooks/benchmarks/sdba_quantile.ipynb b/docs/notebooks/benchmarks/sdba_quantile.ipynb
new file mode 100644
index 000000000..8c54c830e
--- /dev/null
+++ b/docs/notebooks/benchmarks/sdba_quantile.ipynb
@@ -0,0 +1,163 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from __future__ import annotations\n",
+    "\n",
+    "import time\n",
+    "\n",
+    "import numpy as np\n",
+    "\n",
+    "import xclim\n",
+    "from xclim import sdba\n",
+    "from xclim.testing import open_dataset\n",
+    "\n",
+    "ds = open_dataset(\"sdba/CanESM2_1950-2100.nc\")\n",
+    "tx = ds.sel(time=slice(\"1950\", \"1980\")).tasmax\n",
+    "kws = {\"dim\": \"time\", \"q\": np.linspace(0, 1, 50)}"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Tests with %%timeit (full 30 years)\n",
+    "\n",
+    "Here `fastnanquantile` is the best algorithm"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%%timeit\n",
+    "tx.quantile(**kws).compute()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%%timeit\n",
+    "sdba.nbutils.USE_FASTNANQUANTILE = False\n",
+    "with xclim.set_options(allow_sortquantile=True):\n",
+    "    sdba.nbutils.quantile(tx, **kws).compute()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%%timeit\n",
+    "sdba.nbutils.USE_FASTNANQUANTILE = False\n",
+    "with xclim.set_options(allow_sortquantile=False):\n",
+    "    sdba.nbutils.quantile(tx, **kws).compute()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "! pip install fastnanquantile"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%%timeit\n",
+    "sdba.nbutils.USE_FASTNANQUANTILE = True\n",
+    "sdba.nbutils.quantile(tx, **kws).compute()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Test computation time as a function of number of points\n",
+    "\n",
+    "For a smaller number of time steps <=2000, `_sortquantile` is the best algorithm in general"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import time\n",
+    "\n",
+    "import xarray as xr\n",
+    "\n",
+    "num_tests = 500\n",
+    "import matplotlib.pyplot as plt\n",
+    "\n",
+    "timed = {}\n",
+    "\n",
+    "# fastnanquantile has nothing to do with sortquantile\n",
+    "# I just added a third step using this variable\n",
+    "for allow_sortquantile in [True, False, \"fnq\"]:\n",
+    "    with xclim.set_options(\n",
+    "        allow_sortquantile=allow_sortquantile if allow_sortquantile != \"fnq\" else True\n",
+    "    ):\n",
+    "        # heat-up the jit\n",
+    "        sdba.nbutils.quantile(\n",
+    "            xr.DataArray(np.array([0, 1.5])), dim=\"dim_0\", q=np.array([0.5])\n",
+    "        )\n",
+    "        timed[allow_sortquantile] = []\n",
+    "        for size in np.arange(250, 2000 + 250, 250):\n",
+    "            da = tx.isel(time=slice(0, size))\n",
+    "            if allow_sortquantile == \"fnq\":\n",
+    "                sdba.nbutils.USE_FASTNANQUANTILE = True\n",
+    "            else:\n",
+    "                sdba.nbutils.USE_FASTNANQUANTILE = False\n",
+    "            t0 = time.time()\n",
+    "            for ii in range(num_tests):\n",
+    "                sdba.nbutils.quantile(da, **kws).compute()\n",
+    "            timed[allow_sortquantile].append([size, time.time() - t0])\n",
+    "\n",
+    "for k, lab in zip(\n",
+    "    [True, False, \"fnq\"], [\"sortquantile\", \"np.nanquantile\", \"fastnanquantile\"]\n",
+    "):\n",
+    "    arr = np.array(timed[k])\n",
+    "    plt.plot(arr[:, 0], arr[:, 1] / num_tests, label=lab)\n",
+    "plt.legend()\n",
+    "plt.title(\n",
+    "    \"Quantile calculation using sortquantile and nanquantile, average time vs array size, for 50 quantiles\"\n",
+    ")\n",
+    "plt.xlabel(\"Number of time steps in the distribution\")\n",
+    "plt.ylabel(\"Computation time (s)\")"
+   ]
+  }
+ ],
+ "metadata": {
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.3"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/docs/notebooks/sdba-speed.ipynb b/docs/notebooks/sdba-speed.ipynb
deleted file mode 100644
index 281f28e4e..000000000
--- a/docs/notebooks/sdba-speed.ipynb
+++ /dev/null
@@ -1,94 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from __future__ import annotations\n",
-    "\n",
-    "import importlib\n",
-    "import os\n",
-    "import time\n",
-    "\n",
-    "import numpy as np\n",
-    "\n",
-    "from xclim import sdba\n",
-    "\n",
-    "q = 50\n",
-    "sizes = [25, 50, 75, 100, 200, 300, 400, 500, 600, 700, 800, 900, 1000, 1500, 2000]\n",
-    "tsort = np.zeros(len(sizes))\n",
-    "tnan = np.zeros(len(sizes))\n",
-    "num_tests = 500\n",
-    "\n",
-    "os.environ[\"USE_NANQUANTILE\"] = \"\"\n",
-    "importlib.reload(sdba.nbutils)\n",
-    "sdba.nbutils._choosequantile(np.random.randn(100), np.array([0.5]))\n",
-    "for i in range(len(sizes)):\n",
-    "    for k in range(num_tests):\n",
-    "        arr = np.random.randn(sizes[i])\n",
-    "        t0 = time.time()\n",
-    "        sdba.nbutils._choosequantile(arr, np.linspace(0, 1, q))\n",
-    "        tsort[i] += time.time() - t0\n",
-    "\n",
-    "os.environ[\"USE_NANQUANTILE\"] = \"True\"\n",
-    "importlib.reload(sdba.nbutils)\n",
-    "sdba.nbutils._choosequantile(np.random.randn(100), np.array([0.5]))\n",
-    "for i in range(len(sizes)):\n",
-    "    for k in range(num_tests):\n",
-    "        arr = np.random.randn(sizes[i])\n",
-    "        t0 = time.time()\n",
-    "        sdba.nbutils._choosequantile(arr, np.linspace(0, 1, q))\n",
-    "        tnan[i] += time.time() - t0\n",
-    "print(\n",
-    "    \"\\n\".join(\n",
-    "        [\n",
-    "            f\"{x:4.0f}, {y:.5f}, {z:.5f}, {z / y:.2f}\"\n",
-    "            for x, y, z in zip(sizes, tsort, tnan)\n",
-    "        ]\n",
-    "    )\n",
-    ")\n",
-    "import matplotlib.pyplot as plt\n",
-    "\n",
-    "plt.plot(sizes, tsort / num_tests, label=\"sortquantile\")\n",
-    "plt.plot(sizes, tnan / num_tests, label=\"nanquantile\")\n",
-    "plt.legend()\n",
-    "plt.title(\n",
-    "    \"Quantile calculation using sortquantile and nanquantile, average time vs array size, for 50 quantiles\"\n",
-    ")\n",
-    "plt.xlabel(\"Array size\")\n",
-    "plt.ylabel(\"Time (s)\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "plt.plot(sizes, tnan / tsort, label=\"Ratio of times, nanquantile / sortquantile\")\n",
-    "plt.legend()\n",
-    "plt.title(\"Ratio of times, nanquantile / sortquantile\")\n",
-    "plt.xlabel(\"Array size\")\n",
-    "plt.ylabel(\"Time ratio\")"
-   ]
-  }
- ],
- "metadata": {
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.12.3"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}
diff --git a/xclim/sdba/nbutils.py b/xclim/sdba/nbutils.py
index dc2296358..08d6cea87 100644
--- a/xclim/sdba/nbutils.py
+++ b/xclim/sdba/nbutils.py
@@ -15,7 +15,7 @@
 from xclim.core.options import ALLOW_SORTQUANTILE, OPTIONS
 
 try:
-    from fastnanquantie.xrcompat import xr_apply_nanquantile
+    from fastnanquantile.xrcompat import xr_apply_nanquantile
 
     USE_FASTNANQUANTILE = True
 except ImportError:

From 2842b39fe6e4ba2ce85fa466bc86cd73aa24e1ff Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=89ric=20Dupuis?= <eric.dupuis.1@umontreal.ca>
Date: Mon, 6 May 2024 15:41:48 -0400
Subject: [PATCH 19/40] update doc, ignore notebook

---
 docs/conf.py                                  |  1 +
 docs/notebooks/benchmarks/sdba_quantile.ipynb | 11 +++++++----
 2 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/docs/conf.py b/docs/conf.py
index b4e71d3a6..5c504e44b 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -267,6 +267,7 @@ class XCStyle(AlphaStyle):
     "_build",
     "Thumbs.db",
     ".DS_Store",
+    "notebooks/benchmarks",
     "notebooks/xclim_training",
     "paper/paper.md",
     "**.ipynb_checkpoints",
diff --git a/docs/notebooks/benchmarks/sdba_quantile.ipynb b/docs/notebooks/benchmarks/sdba_quantile.ipynb
index 8c54c830e..d5481543a 100644
--- a/docs/notebooks/benchmarks/sdba_quantile.ipynb
+++ b/docs/notebooks/benchmarks/sdba_quantile.ipynb
@@ -27,7 +27,12 @@
    "source": [
     "## Tests with %%timeit (full 30 years)\n",
     "\n",
-    "Here `fastnanquantile` is the best algorithm"
+    "Here `fastnanquantile` is the best algorithm out of \n",
+    "* `xr.DataArray.quantile`\n",
+    "* `nbutils.quantile`, using: \n",
+    "  * either `_sortquantile/np.quantile` (i.e. most likely `_sortquantile` )\n",
+    "  * `np.nanquantile`\n",
+    "  * `fastnanquantile`\n"
    ]
   },
   {
@@ -101,13 +106,11 @@
    "source": [
     "import time\n",
     "\n",
+    "import matplotlib.pyplot as plt\n",
     "import xarray as xr\n",
     "\n",
     "num_tests = 500\n",
-    "import matplotlib.pyplot as plt\n",
-    "\n",
     "timed = {}\n",
-    "\n",
     "# fastnanquantile has nothing to do with sortquantile\n",
     "# I just added a third step using this variable\n",
     "for allow_sortquantile in [True, False, \"fnq\"]:\n",

From d63af58f8682e64dc9259221494956fd02802dde Mon Sep 17 00:00:00 2001
From: SarahG-579462 <gammon.sarah@ouranos.ca>
Date: Tue, 14 May 2024 20:12:19 +0000
Subject: [PATCH 20/40] add 1d numba-compiled implementation of
 core.utils._nan_quantile

---
 xclim/sdba/nbutils.py | 301 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 301 insertions(+)

diff --git a/xclim/sdba/nbutils.py b/xclim/sdba/nbutils.py
index 08d6cea87..423cfbab6 100644
--- a/xclim/sdba/nbutils.py
+++ b/xclim/sdba/nbutils.py
@@ -5,6 +5,26 @@
 """
 from __future__ import annotations
 
+import functools
+import importlib.util
+import logging
+import os
+import warnings
+from collections import defaultdict
+from collections.abc import Sequence
+from enum import IntEnum
+from importlib.resources import as_file, files
+from inspect import Parameter, _empty  # noqa
+from io import StringIO
+from pathlib import Path
+from typing import Callable, NewType, TypeVar
+
+import numpy as np
+import xarray as xr
+from dask import array as dsk
+from pint import Quantity
+from yaml import safe_dump, safe_load
+
 from collections.abc import Hashable, Sequence
 
 import numpy as np
@@ -12,8 +32,14 @@
 from xarray import DataArray, apply_ufunc
 from xarray.core import utils
 
+import logging
+
 from xclim.core.options import ALLOW_SORTQUANTILE, OPTIONS
 
+logger = logging.getLogger("xclim")
+
+
+
 try:
     from fastnanquantile.xrcompat import xr_apply_nanquantile
 
@@ -381,3 +407,278 @@ def _pairwise_haversine_and_bins(lond, latd, transpose=False):
     if transpose:
         np.fill_diagonal(dists, 0)
     return dists, mn, mx
+
+
+
+@njit(
+    #    [
+    #        float64[:](float32[:],float32[:],float32[:],float32[:],float32[:],float32[:],optional(typeof("constant"))),
+    #        float64[:](float64[:],float64[:],float64[:],float64[:],float64[:],float64[:],optional(typeof("constant"))),
+    #    ],
+    fastmath=False,
+    nogil=True,
+    cache=False,
+)
+def calc_perc(
+    arr: np.ndarray,
+    percentiles: Sequence[float] | None = None,
+    alpha: float = 1.0,
+    beta: float = 1.0,
+    copy: bool = True,
+) -> np.ndarray:
+    """Compute percentiles using nan_calc_percentiles and move the percentiles' axis to the end."""
+    if percentiles is None:
+        _percentiles = [50.0]
+    else:
+        _percentiles = percentiles
+
+    return np.moveaxis(
+        nan_calc_percentiles(
+            arr=arr,
+            percentiles=_percentiles,
+            axis=-1,
+            alpha=alpha,
+            beta=beta,
+            copy=copy,
+        ),
+        source=0,
+        destination=-1,
+    )
+
+@njit(
+    #    [
+    #        float64[:](float32[:],float32[:],float32[:],float32[:],float32[:],float32[:],optional(typeof("constant"))),
+    #        float64[:](float64[:],float64[:],float64[:],float64[:],float64[:],float64[:],optional(typeof("constant"))),
+    #    ],
+    fastmath=False,
+    nogil=True,
+    cache=False,
+)
+def nan_calc_percentiles(
+    arr: np.ndarray,
+    percentiles: Sequence[float] | None = None,
+    axis: int = -1,
+    alpha: float = 1.0,
+    beta: float = 1.0,
+    copy: bool = True,
+) -> np.ndarray:
+    """Convert the percentiles to quantiles and compute them using _nan_quantile."""
+    if percentiles is None:
+        _percentiles = [50.0]
+    else:
+        _percentiles = percentiles
+
+    if copy:
+        # bootstrapping already works on a data's copy
+        # doing it again is extremely costly, especially with dask.
+        arr = arr.copy()
+    quantiles = np.array([per / 100.0 for per in _percentiles])
+    return _nan_quantile(arr, quantiles, axis, alpha, beta)
+
+@njit(
+    #    [
+    #        float64[:](float32[:],float32[:],float32[:],float32[:],float32[:],float32[:],optional(typeof("constant"))),
+    #        float64[:](float64[:],float64[:],float64[:],float64[:],float64[:],float64[:],optional(typeof("constant"))),
+    #    ],
+    fastmath=False,
+    nogil=True,
+    cache=False,
+)
+def _compute_virtual_index(
+    n: float, quantiles: np.ndarray, alpha: float, beta: float
+):
+    """Compute the floating point indexes of an array for the linear interpolation of quantiles.
+
+    Based on the approach used by :cite:t:`hyndman_sample_1996`.
+
+    Parameters
+    ----------
+    n : array_like
+        The sample sizes.
+    quantiles : array_like
+        The quantiles values.
+    alpha : float
+        A constant used to correct the index computed.
+    beta : float
+        A constant used to correct the index computed.
+
+    Notes
+    -----
+    `alpha` and `beta` values depend on the chosen method (see quantile documentation).
+
+    References
+    ----------
+    :cite:cts:`hyndman_sample_1996`
+    """
+    return n * quantiles + (alpha + quantiles * (1 - alpha - beta)) - 1
+
+@njit(
+    #    [
+    #        float64[:](float32[:],float32[:],float32[:],float32[:],float32[:],float32[:],optional(typeof("constant"))),
+    #        float64[:](float64[:],float64[:],float64[:],float64[:],float64[:],float64[:],optional(typeof("constant"))),
+    #    ],
+    fastmath=False,
+    nogil=True,
+    cache=False,
+)
+def _get_gamma(virtual_indexes: np.ndarray, previous_indexes: np.ndarray):
+    """Compute gamma (AKA 'm' or 'weight') for the linear interpolation of quantiles.
+
+    Parameters
+    ----------
+    virtual_indexes: array_like
+      The indexes where the percentile is supposed to be found in the sorted sample.
+    previous_indexes: array_like
+      The floor values of virtual_indexes.
+
+    Notes
+    -----
+    `gamma` is usually the fractional part of virtual_indexes but can be modified by the interpolation method.
+    """
+    gamma = np.asarray(virtual_indexes - previous_indexes)
+    return np.asarray(gamma)
+
+@njit(
+    #    [
+    #        float64[:](float32[:],float32[:],float32[:],float32[:],float32[:],float32[:],optional(typeof("constant"))),
+    #        float64[:](float64[:],float64[:],float64[:],float64[:],float64[:],float64[:],optional(typeof("constant"))),
+    #    ],
+    fastmath=False,
+    nogil=True,
+    cache=False,
+)
+def _get_indexes(
+    arr: np.ndarray, virtual_indexes: np.ndarray, valid_values_count: np.ndarray
+) -> tuple[np.ndarray, np.ndarray]:
+    """Get the valid indexes of arr neighbouring virtual_indexes.
+
+    Notes
+    -----
+    This is a companion function to linear interpolation of quantiles.
+
+    Parameters
+    ----------
+    arr : array-like
+    virtual_indexes : array-like
+    valid_values_count : array-like
+
+    Returns
+    -------
+    array-like, array-like
+        A tuple of virtual_indexes neighbouring indexes (previous and next)
+    """
+    previous_indexes = np.asarray(np.floor(virtual_indexes))
+    next_indexes = np.asarray(previous_indexes + 1)
+    indexes_above_bounds = virtual_indexes >= valid_values_count - 1
+    # When indexes is above max index, take the max value of the array
+    if indexes_above_bounds.any():
+        previous_indexes[indexes_above_bounds] = -1
+        next_indexes[indexes_above_bounds] = -1
+    # When indexes is below min index, take the min value of the array
+    indexes_below_bounds = virtual_indexes < 0
+    if indexes_below_bounds.any():
+        previous_indexes[indexes_below_bounds] = 0
+        next_indexes[indexes_below_bounds] = 0
+    if (arr.dtype is np.dtype(np.float64)) or (arr.dtype is np.dtype(np.float32)):
+        # After the sort, slices having NaNs will have for last element a NaN
+        virtual_indexes_nans = np.isnan(virtual_indexes)
+        if virtual_indexes_nans.any():
+            previous_indexes[virtual_indexes_nans] = -1
+            next_indexes[virtual_indexes_nans] = -1
+    previous_indexes = previous_indexes.astype(np.intp)
+    next_indexes = next_indexes.astype(np.intp)
+    return previous_indexes, next_indexes
+
+@njit(
+    #    [
+    #        float64[:](float32[:],float32[:],float32[:],float32[:],float32[:],float32[:],optional(typeof("constant"))),
+    #        float64[:](float64[:],float64[:],float64[:],float64[:],float64[:],float64[:],optional(typeof("constant"))),
+    #    ],
+    fastmath=False,
+    nogil=True,
+    cache=False,
+)
+def _linear_interpolation(
+    left: np.ndarray,
+    right: np.ndarray,
+    gamma: np.ndarray,
+) -> np.ndarray:
+    """Compute the linear interpolation weighted by gamma on each point of two same shape arrays.
+
+    Parameters
+    ----------
+    left : array_like
+        Left bound.
+    right : array_like
+        Right bound.
+    gamma : array_like
+        The interpolation weight.
+
+    Returns
+    -------
+    array_like
+    """
+    diff_b_a = np.subtract(right, left)
+    lerp_interpolation = np.asarray(np.add(left, diff_b_a * gamma))
+    ind = gamma >= 0.5
+    lerp_interpolation[ind] = right[ind] - diff_b_a[ind] * (1 - gamma[ind])
+    #np.subtract(
+    #    right, diff_b_a * (1 - gamma), out=lerp_interpolation, where=gamma >= 0.5
+    #)
+    #if lerp_interpolation.ndim == 0:
+    #    lerp_interpolation = lerp_interpolation[()]  # unpack 0d arrays
+    return lerp_interpolation
+
+@njit(
+    fastmath=False,
+    nogil=True,
+    cache=False,
+)
+def _nan_quantile(
+    arr: np.ndarray,
+    quantiles: np.ndarray,
+    alpha: float = 1.0,
+    beta: float = 1.0,
+) -> float | np.ndarray:
+    """Get the quantiles of the 1-dimensional array.
+
+    A linear interpolation is performed using alpha and beta.
+
+    Notes
+    -----
+    By default, alpha == beta == 1 which performs the 7th method of :cite:t:`hyndman_sample_1996`.
+    with alpha == beta == 1/3 we get the 8th method.
+    """
+    # --- Setup
+    data_axis_length = arr.shape[0]
+    # nan_count is not a scalar. Convert to same type as arr.
+    nan_count = arr.dtype.type(np.isnan(arr).sum())
+    valid_values_count = data_axis_length - nan_count
+    # We need at least two values to do an interpolation
+    #too_few_values = valid_values_count < 2
+    #if too_few_values:
+    #    return np.nan
+    # --- Computation of indexes
+    # Add axis for quantiles
+    #valid_values_count = valid_values_count[..., np.newaxis]
+    virtual_indexes = _compute_virtual_index(valid_values_count, quantiles, alpha, beta)
+    virtual_indexes = np.asarray(virtual_indexes)
+    previous_indexes, next_indexes = _get_indexes(
+        arr, virtual_indexes, valid_values_count
+    )
+    # --- Sorting
+    arr.sort()
+    # --- Get values from indexes
+    #arr = arr#[..., np.newaxis]
+
+    previous = arr[previous_indexes]  # np.take_along_axis(arr, previous_indexes[np.newaxis, ...], 0)
+    next_elements = arr[next_indexes]  # np.take_along_axis(arr, next_indexes[np.newaxis, ...], 0)
+
+    # --- Linear interpolation
+    gamma = _get_gamma(virtual_indexes, previous_indexes)
+    interpolation = _linear_interpolation(previous, next_elements, gamma)
+    # When an interpolation is in Nan range, (near the end of the sorted array) it means
+    # we can clip to the array max value.
+    result = np.where(np.isnan(interpolation), arr[np.intp(valid_values_count) - 1], interpolation)
+    # Move quantile axis in front
+    return result
\ No newline at end of file

From 915cd6af09b42584668f64f30ef12d8dbe69f152 Mon Sep 17 00:00:00 2001
From: SarahG-579462 <gammon.sarah@ouranos.ca>
Date: Tue, 14 May 2024 20:13:56 +0000
Subject: [PATCH 21/40] add test_quantile.ipynb temporarily, to test
 numpy-level implementations

---
 test_quantile.ipynb   | 358 ++++++++++++++++++++++++++++++++++++++++++
 xclim/sdba/nbutils.py |  52 +++---
 2 files changed, 385 insertions(+), 25 deletions(-)
 create mode 100644 test_quantile.ipynb

diff --git a/test_quantile.ipynb b/test_quantile.ipynb
new file mode 100644
index 000000000..ac7429dd4
--- /dev/null
+++ b/test_quantile.ipynb
@@ -0,0 +1,358 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {
+    "metadata": {}
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "4.2005265481208437e-26"
+      ]
+     },
+     "execution_count": 3,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "from pathlib import Path\n",
+    "from xclim.testing import open_dataset\n",
+    "import numpy as np\n",
+    "import xclim\n",
+    "from xclim import sdba\n",
+    "\n",
+    "ds = open_dataset(\"sdba/CanESM2_1950-2100.nc\", cache=True, cache_dir=Path(\"~/data/xclim/\"))\n",
+    "tx = ds.tasmax.sel(time=slice(\"1950\", \"1980\")).isel(location=0)\n",
+    "q = np.linspace(0, 1, 50)\n",
+    "arr = tx.values\n",
+    "arr_m1 = arr.copy()\n",
+    "arr_m2 = arr.copy()\n",
+    "\n",
+    "# do some sort of computation to make sure it's compiled.\n",
+    "np.square(sdba.nbutils._sortquantile(np.copy(arr_m1),q) - sdba.nbutils._nan_quantile(np.copy(arr_m2), q)).sum()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {
+    "metadata": {}
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "DUMP CPUDispatcher[_nan_quantile, type code=2691]\n",
+      "  DUMP CompileResult <built-in method _nan_quantile of _dynfunc._Closure object at 0x7f70b1b28a00>\n",
+      "    DUMP Signature [type code: 3191]\n",
+      "      Argument types:\n",
+      "      | DUMP Array[code=2697, name=array(float32, 1d, C)]\n",
+      "      | DUMP Array[code=1966, name=array(float64, 1d, C)]\n",
+      "      | DUMP Omitted[code=2902, name=omitted(default=1.0)]\n",
+      "      | DUMP Omitted[code=2902, name=omitted(default=1.0)]\n",
+      "      Return type:\n",
+      "      | DUMP Array[code=1966, name=array(float64, 1d, C)]\n",
+      "    END DUMP\n",
+      "  END DUMP\n",
+      "END DUMP CPUDispatcher[_nan_quantile]\n"
+     ]
+    }
+   ],
+   "source": [
+    "sdba.nbutils._nan_quantile.dump()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {
+    "metadata": {}
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "877 µs ± 10.9 µs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%timeit\n",
+    "# sortquantile implementation\n",
+    "sdba.nbutils._sortquantile(arr_m1,q)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {
+    "metadata": {}
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "156 µs ± 1.41 µs per loop (mean ± std. dev. of 7 runs, 10,000 loops each)\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%timeit arr_m3 = np.copy(arr_m2)\n",
+    "# 1d numba-compiled version of xclim.core.utils._nan_quantile (cheating)\n",
+    "sdba.nbutils._nan_quantile(arr_m3,q)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "901 µs ± 35.7 µs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%timeit\n",
+    "# 1d numba-compiled version of xclim.core.utils._nan_quantile\n",
+    "sdba.nbutils._nan_quantile(np.copy(arr_m2),q)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {
+    "metadata": {}
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "858 µs ± 8.82 µs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%timeit\n",
+    "# Abel's implementation\n",
+    "xclim.core.utils._nan_quantile(np.copy(arr_m2),q)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "metadata": {
+    "metadata": {}
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "221 µs ± 2.94 µs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%timeit arr_m3 = np.copy(arr_m2)\n",
+    "# Abel's implementation (in-place (cheating)).\n",
+    "xclim.core.utils._nan_quantile(arr_m3,q)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "metadata": {
+    "metadata": {}
+   },
+   "outputs": [],
+   "source": [
+    "arr_m3 = np.copy(arr_m2)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "metadata": {
+    "metadata": {}
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "217 µs ± 4.06 µs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%timeit\n",
+    "# Abel's implementation (in-place, sorted beforehand (super cheating!)).\n",
+    "xclim.core.utils._nan_quantile(arr_m3,q)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 21,
+   "metadata": {
+    "metadata": {}
+   },
+   "outputs": [],
+   "source": [
+    "import xarray as xr\n",
+    "import dask\n",
+    "ds2 = xr.open_dataset(\"~/data/xclim/tasmax_day_CanESM5_ssp245_r1i1p1f1_gn_20150101-21001231.nc\")\n",
+    "\n",
+    "ds_sel2 = ds2.isel(lat=0, lon=0).tasmax.compute()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "metadata": {
+    "metadata": {}
+   },
+   "outputs": [],
+   "source": [
+    "\n",
+    "arr2 = ds_sel2.values.copy()\n",
+    "percent_of_nans = 0.0\n",
+    "num_of_nans = int(percent_of_nans * arr2.size)\n",
+    "random_indices = np.random.randint(0, arr2.shape[0], size = num_of_nans)\n",
+    "arr2[random_indices] = np.nan"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 19,
+   "metadata": {
+    "metadata": {}
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "430 µs ± 10.4 µs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%timeit\n",
+    "xclim.core.utils._nan_quantile(arr2,q)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 20,
+   "metadata": {
+    "metadata": {}
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "435 µs ± 3.86 µs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%timeit\n",
+    "sdba.nbutils._sortquantile(arr2,q)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "metadata": {
+    "metadata": {}
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(31390,)"
+      ]
+     },
+     "execution_count": 16,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "arr2.shape"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "metadata": {
+    "metadata": {}
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "array([0.        , 0.02040816, 0.04081633, 0.06122449, 0.08163265,\n",
+       "       0.10204082, 0.12244898, 0.14285714, 0.16326531, 0.18367347,\n",
+       "       0.20408163, 0.2244898 , 0.24489796, 0.26530612, 0.28571429,\n",
+       "       0.30612245, 0.32653061, 0.34693878, 0.36734694, 0.3877551 ,\n",
+       "       0.40816327, 0.42857143, 0.44897959, 0.46938776, 0.48979592,\n",
+       "       0.51020408, 0.53061224, 0.55102041, 0.57142857, 0.59183673,\n",
+       "       0.6122449 , 0.63265306, 0.65306122, 0.67346939, 0.69387755,\n",
+       "       0.71428571, 0.73469388, 0.75510204, 0.7755102 , 0.79591837,\n",
+       "       0.81632653, 0.83673469, 0.85714286, 0.87755102, 0.89795918,\n",
+       "       0.91836735, 0.93877551, 0.95918367, 0.97959184, 1.        ])"
+      ]
+     },
+     "execution_count": 17,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "q"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "yclim",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.6"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/xclim/sdba/nbutils.py b/xclim/sdba/nbutils.py
index 423cfbab6..3bd3876e2 100644
--- a/xclim/sdba/nbutils.py
+++ b/xclim/sdba/nbutils.py
@@ -11,7 +11,7 @@
 import os
 import warnings
 from collections import defaultdict
-from collections.abc import Sequence
+from collections.abc import Hashable, Sequence
 from enum import IntEnum
 from importlib.resources import as_file, files
 from inspect import Parameter, _empty  # noqa
@@ -22,24 +22,17 @@
 import numpy as np
 import xarray as xr
 from dask import array as dsk
-from pint import Quantity
-from yaml import safe_dump, safe_load
-
-from collections.abc import Hashable, Sequence
-
-import numpy as np
 from numba import boolean, float32, float64, guvectorize, int64, njit
+from pint import Quantity
 from xarray import DataArray, apply_ufunc
 from xarray.core import utils
-
-import logging
+from yaml import safe_dump, safe_load
 
 from xclim.core.options import ALLOW_SORTQUANTILE, OPTIONS
 
 logger = logging.getLogger("xclim")
 
 
-
 try:
     from fastnanquantile.xrcompat import xr_apply_nanquantile
 
@@ -409,7 +402,6 @@ def _pairwise_haversine_and_bins(lond, latd, transpose=False):
     return dists, mn, mx
 
 
-
 @njit(
     #    [
     #        float64[:](float32[:],float32[:],float32[:],float32[:],float32[:],float32[:],optional(typeof("constant"))),
@@ -445,6 +437,7 @@ def calc_perc(
         destination=-1,
     )
 
+
 @njit(
     #    [
     #        float64[:](float32[:],float32[:],float32[:],float32[:],float32[:],float32[:],optional(typeof("constant"))),
@@ -475,6 +468,7 @@ def nan_calc_percentiles(
     quantiles = np.array([per / 100.0 for per in _percentiles])
     return _nan_quantile(arr, quantiles, axis, alpha, beta)
 
+
 @njit(
     #    [
     #        float64[:](float32[:],float32[:],float32[:],float32[:],float32[:],float32[:],optional(typeof("constant"))),
@@ -484,9 +478,7 @@ def nan_calc_percentiles(
     nogil=True,
     cache=False,
 )
-def _compute_virtual_index(
-    n: float, quantiles: np.ndarray, alpha: float, beta: float
-):
+def _compute_virtual_index(n: float, quantiles: np.ndarray, alpha: float, beta: float):
     """Compute the floating point indexes of an array for the linear interpolation of quantiles.
 
     Based on the approach used by :cite:t:`hyndman_sample_1996`.
@@ -512,6 +504,7 @@ def _compute_virtual_index(
     """
     return n * quantiles + (alpha + quantiles * (1 - alpha - beta)) - 1
 
+
 @njit(
     #    [
     #        float64[:](float32[:],float32[:],float32[:],float32[:],float32[:],float32[:],optional(typeof("constant"))),
@@ -538,6 +531,7 @@ def _get_gamma(virtual_indexes: np.ndarray, previous_indexes: np.ndarray):
     gamma = np.asarray(virtual_indexes - previous_indexes)
     return np.asarray(gamma)
 
+
 @njit(
     #    [
     #        float64[:](float32[:],float32[:],float32[:],float32[:],float32[:],float32[:],optional(typeof("constant"))),
@@ -589,6 +583,7 @@ def _get_indexes(
     next_indexes = next_indexes.astype(np.intp)
     return previous_indexes, next_indexes
 
+
 @njit(
     #    [
     #        float64[:](float32[:],float32[:],float32[:],float32[:],float32[:],float32[:],optional(typeof("constant"))),
@@ -622,13 +617,14 @@ def _linear_interpolation(
     lerp_interpolation = np.asarray(np.add(left, diff_b_a * gamma))
     ind = gamma >= 0.5
     lerp_interpolation[ind] = right[ind] - diff_b_a[ind] * (1 - gamma[ind])
-    #np.subtract(
+    # np.subtract(
     #    right, diff_b_a * (1 - gamma), out=lerp_interpolation, where=gamma >= 0.5
-    #)
-    #if lerp_interpolation.ndim == 0:
+    # )
+    # if lerp_interpolation.ndim == 0:
     #    lerp_interpolation = lerp_interpolation[()]  # unpack 0d arrays
     return lerp_interpolation
 
+
 @njit(
     fastmath=False,
     nogil=True,
@@ -655,12 +651,12 @@ def _nan_quantile(
     nan_count = arr.dtype.type(np.isnan(arr).sum())
     valid_values_count = data_axis_length - nan_count
     # We need at least two values to do an interpolation
-    #too_few_values = valid_values_count < 2
-    #if too_few_values:
+    # too_few_values = valid_values_count < 2
+    # if too_few_values:
     #    return np.nan
     # --- Computation of indexes
     # Add axis for quantiles
-    #valid_values_count = valid_values_count[..., np.newaxis]
+    # valid_values_count = valid_values_count[..., np.newaxis]
     virtual_indexes = _compute_virtual_index(valid_values_count, quantiles, alpha, beta)
     virtual_indexes = np.asarray(virtual_indexes)
     previous_indexes, next_indexes = _get_indexes(
@@ -669,16 +665,22 @@ def _nan_quantile(
     # --- Sorting
     arr.sort()
     # --- Get values from indexes
-    #arr = arr#[..., np.newaxis]
+    # arr = arr#[..., np.newaxis]
 
-    previous = arr[previous_indexes]  # np.take_along_axis(arr, previous_indexes[np.newaxis, ...], 0)
-    next_elements = arr[next_indexes]  # np.take_along_axis(arr, next_indexes[np.newaxis, ...], 0)
+    previous = arr[
+        previous_indexes
+    ]  # np.take_along_axis(arr, previous_indexes[np.newaxis, ...], 0)
+    next_elements = arr[
+        next_indexes
+    ]  # np.take_along_axis(arr, next_indexes[np.newaxis, ...], 0)
 
     # --- Linear interpolation
     gamma = _get_gamma(virtual_indexes, previous_indexes)
     interpolation = _linear_interpolation(previous, next_elements, gamma)
     # When an interpolation is in Nan range, (near the end of the sorted array) it means
     # we can clip to the array max value.
-    result = np.where(np.isnan(interpolation), arr[np.intp(valid_values_count) - 1], interpolation)
+    result = np.where(
+        np.isnan(interpolation), arr[np.intp(valid_values_count) - 1], interpolation
+    )
     # Move quantile axis in front
-    return result
\ No newline at end of file
+    return result

From 2810d86b0fea162db82ba6a69fe77540c4bf5c1f Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Tue, 14 May 2024 20:17:51 +0000
Subject: [PATCH 22/40] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 test_quantile.ipynb | 41 ++++++++++++++++++++++++++---------------
 1 file changed, 26 insertions(+), 15 deletions(-)

diff --git a/test_quantile.ipynb b/test_quantile.ipynb
index ac7429dd4..c6b52e607 100644
--- a/test_quantile.ipynb
+++ b/test_quantile.ipynb
@@ -19,13 +19,19 @@
     }
    ],
    "source": [
+    "from __future__ import annotations\n",
+    "\n",
     "from pathlib import Path\n",
-    "from xclim.testing import open_dataset\n",
+    "\n",
     "import numpy as np\n",
+    "\n",
     "import xclim\n",
     "from xclim import sdba\n",
+    "from xclim.testing import open_dataset\n",
     "\n",
-    "ds = open_dataset(\"sdba/CanESM2_1950-2100.nc\", cache=True, cache_dir=Path(\"~/data/xclim/\"))\n",
+    "ds = open_dataset(\n",
+    "    \"sdba/CanESM2_1950-2100.nc\", cache=True, cache_dir=Path(\"~/data/xclim/\")\n",
+    ")\n",
     "tx = ds.tasmax.sel(time=slice(\"1950\", \"1980\")).isel(location=0)\n",
     "q = np.linspace(0, 1, 50)\n",
     "arr = tx.values\n",
@@ -33,7 +39,10 @@
     "arr_m2 = arr.copy()\n",
     "\n",
     "# do some sort of computation to make sure it's compiled.\n",
-    "np.square(sdba.nbutils._sortquantile(np.copy(arr_m1),q) - sdba.nbutils._nan_quantile(np.copy(arr_m2), q)).sum()"
+    "np.square(\n",
+    "    sdba.nbutils._sortquantile(np.copy(arr_m1), q)\n",
+    "    - sdba.nbutils._nan_quantile(np.copy(arr_m2), q)\n",
+    ").sum()"
    ]
   },
   {
@@ -85,7 +94,7 @@
    "source": [
     "%%timeit\n",
     "# sortquantile implementation\n",
-    "sdba.nbutils._sortquantile(arr_m1,q)"
+    "sdba.nbutils._sortquantile(arr_m1, q)"
    ]
   },
   {
@@ -106,7 +115,7 @@
    "source": [
     "%%timeit arr_m3 = np.copy(arr_m2)\n",
     "# 1d numba-compiled version of xclim.core.utils._nan_quantile (cheating)\n",
-    "sdba.nbutils._nan_quantile(arr_m3,q)"
+    "sdba.nbutils._nan_quantile(arr_m3, q)"
    ]
   },
   {
@@ -125,7 +134,7 @@
    "source": [
     "%%timeit\n",
     "# 1d numba-compiled version of xclim.core.utils._nan_quantile\n",
-    "sdba.nbutils._nan_quantile(np.copy(arr_m2),q)"
+    "sdba.nbutils._nan_quantile(np.copy(arr_m2), q)"
    ]
   },
   {
@@ -146,7 +155,7 @@
    "source": [
     "%%timeit\n",
     "# Abel's implementation\n",
-    "xclim.core.utils._nan_quantile(np.copy(arr_m2),q)"
+    "xclim.core.utils._nan_quantile(np.copy(arr_m2), q)"
    ]
   },
   {
@@ -167,7 +176,7 @@
    "source": [
     "%%timeit arr_m3 = np.copy(arr_m2)\n",
     "# Abel's implementation (in-place (cheating)).\n",
-    "xclim.core.utils._nan_quantile(arr_m3,q)"
+    "xclim.core.utils._nan_quantile(arr_m3, q)"
    ]
   },
   {
@@ -199,7 +208,7 @@
    "source": [
     "%%timeit\n",
     "# Abel's implementation (in-place, sorted beforehand (super cheating!)).\n",
-    "xclim.core.utils._nan_quantile(arr_m3,q)"
+    "xclim.core.utils._nan_quantile(arr_m3, q)"
    ]
   },
   {
@@ -210,9 +219,12 @@
    },
    "outputs": [],
    "source": [
-    "import xarray as xr\n",
     "import dask\n",
-    "ds2 = xr.open_dataset(\"~/data/xclim/tasmax_day_CanESM5_ssp245_r1i1p1f1_gn_20150101-21001231.nc\")\n",
+    "import xarray as xr\n",
+    "\n",
+    "ds2 = xr.open_dataset(\n",
+    "    \"~/data/xclim/tasmax_day_CanESM5_ssp245_r1i1p1f1_gn_20150101-21001231.nc\"\n",
+    ")\n",
     "\n",
     "ds_sel2 = ds2.isel(lat=0, lon=0).tasmax.compute()"
    ]
@@ -225,11 +237,10 @@
    },
    "outputs": [],
    "source": [
-    "\n",
     "arr2 = ds_sel2.values.copy()\n",
     "percent_of_nans = 0.0\n",
     "num_of_nans = int(percent_of_nans * arr2.size)\n",
-    "random_indices = np.random.randint(0, arr2.shape[0], size = num_of_nans)\n",
+    "random_indices = np.random.randint(0, arr2.shape[0], size=num_of_nans)\n",
     "arr2[random_indices] = np.nan"
    ]
   },
@@ -250,7 +261,7 @@
    ],
    "source": [
     "%%timeit\n",
-    "xclim.core.utils._nan_quantile(arr2,q)"
+    "xclim.core.utils._nan_quantile(arr2, q)"
    ]
   },
   {
@@ -270,7 +281,7 @@
    ],
    "source": [
     "%%timeit\n",
-    "sdba.nbutils._sortquantile(arr2,q)"
+    "sdba.nbutils._sortquantile(arr2, q)"
    ]
   },
   {

From 7bb324e9bd9768191fa4ea167e781b6147fe923f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=89ric=20Dupuis?= <eric.dupuis.1@umontreal.ca>
Date: Tue, 18 Jun 2024 17:35:18 -0400
Subject: [PATCH 23/40] remove _sortquantile, use utils._nan_quantile

---
 CHANGES.rst           |  9 +++++
 xclim/core/options.py |  4 --
 xclim/sdba/nbutils.py | 88 ++++---------------------------------------
 3 files changed, 17 insertions(+), 84 deletions(-)

diff --git a/CHANGES.rst b/CHANGES.rst
index c3aebc195..3bb2e3e9b 100644
--- a/CHANGES.rst
+++ b/CHANGES.rst
@@ -2,6 +2,15 @@
 Changelog
 =========
 
+v0.51.0 (unreleased)
+--------------------
+Contributors to this version: Éric Dupuis (:user:`coxipi`), Sarah Gammon (:user:`SarahG-579462`)
+
+
+New features and enhancements
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+* ``sdba.nbutils.quantile`` and its child functions are now faster. If the module `fastnanquantile` is installed, it is used as the backen for the computation of quantiles and yields even faster results. (:issue:`1513`).
+
 v0.50.0 (2024-06-17)
 --------------------
 Contributors to this version: Trevor James Smith (:user:`Zeitsperre`), Éric Dupuis (:user:`coxipi`).
diff --git a/xclim/core/options.py b/xclim/core/options.py
index c53f79e0e..411b00026 100644
--- a/xclim/core/options.py
+++ b/xclim/core/options.py
@@ -23,7 +23,6 @@
 RUN_LENGTH_UFUNC = "run_length_ufunc"
 SDBA_EXTRA_OUTPUT = "sdba_extra_output"
 SDBA_ENCODE_CF = "sdba_encode_cf"
-ALLOW_SORTQUANTILE = "allow_sortquantile"
 KEEP_ATTRS = "keep_attrs"
 AS_DATASET = "as_dataset"
 
@@ -38,14 +37,12 @@
     RUN_LENGTH_UFUNC: "auto",
     SDBA_EXTRA_OUTPUT: False,
     SDBA_ENCODE_CF: False,
-    ALLOW_SORTQUANTILE: True,
     KEEP_ATTRS: "xarray",
     AS_DATASET: False,
 }
 
 _LOUDNESS_OPTIONS = frozenset(["log", "warn", "raise"])
 _RUN_LENGTH_UFUNC_OPTIONS = frozenset(["auto", True, False])
-_ALLOW_SORTQUANTILE_OPTIONS = frozenset([True, False])
 _KEEP_ATTRS_OPTIONS = frozenset(["xarray", True, False])
 
 
@@ -72,7 +69,6 @@ def _valid_missing_options(mopts):
     RUN_LENGTH_UFUNC: _RUN_LENGTH_UFUNC_OPTIONS.__contains__,
     SDBA_EXTRA_OUTPUT: lambda opt: isinstance(opt, bool),
     SDBA_ENCODE_CF: lambda opt: isinstance(opt, bool),
-    ALLOW_SORTQUANTILE: _ALLOW_SORTQUANTILE_OPTIONS.__contains__,
     KEEP_ATTRS: _KEEP_ATTRS_OPTIONS.__contains__,
     AS_DATASET: lambda opt: isinstance(opt, bool),
 }
diff --git a/xclim/sdba/nbutils.py b/xclim/sdba/nbutils.py
index 08d6cea87..7531e4e7f 100644
--- a/xclim/sdba/nbutils.py
+++ b/xclim/sdba/nbutils.py
@@ -8,11 +8,11 @@
 from collections.abc import Hashable, Sequence
 
 import numpy as np
-from numba import boolean, float32, float64, guvectorize, int64, njit
+from numba import boolean, float32, float64, guvectorize, njit
 from xarray import DataArray, apply_ufunc
 from xarray.core import utils
 
-from xclim.core.options import ALLOW_SORTQUANTILE, OPTIONS
+from xclim.core.utils import _nan_quantile
 
 try:
     from fastnanquantile.xrcompat import xr_apply_nanquantile
@@ -22,77 +22,6 @@
     USE_FASTNANQUANTILE = False
 
 
-@njit(
-    [
-        int64(float32[:]),
-        int64(float64[:]),
-    ],
-    fastmath=False,
-    nogil=True,
-    cache=False,
-)
-def numnan_sorted(s):
-    """Given a sorted array s, return the number of NaNs."""
-    # Given a sorted array s, return the number of NaNs.
-    # This is faster than np.isnan(s).sum(), but only works if s is sorted,
-    # and only for
-    ind = 0
-    for i in range(s.size - 1, 0, -1):
-        if np.isnan(s[i]):
-            ind += 1
-        else:
-            return ind
-    return ind
-
-
-@njit(
-    # [
-    #     float32[:](float32[:], float32[:]),
-    #     float64[:](float64[:], float64[:]),
-    # ],
-    fastmath=False,
-    nogil=True,
-    cache=False,
-)
-def _sortquantile(arr, q):
-    """Sorts arr into ascending order,
-    then computes the quantiles as a linear interpolation
-    between the sorted values.
-    """
-    sortarr = np.sort(arr)
-    numnan = numnan_sorted(sortarr)
-    # compute the indices where each quantile should go:
-    # nb: nan goes to the end, so we need to subtract numnan to the size.
-    indices = q * (arr.size - 1 - numnan)
-    # compute the quantiles manually to avoid casting to float64:
-    # (alternative is to use np.interp(indices, np.arange(arr.size), sortarr))
-    frac = indices % 1
-    low = np.floor(indices).astype(np.int64)
-    high = np.ceil(indices).astype(np.int64)
-    return (1 - frac) * sortarr[low] + frac * sortarr[high]
-
-
-@njit(
-    # [
-    #     float32[:](float32[:], float32[:]),
-    #     float64[:](float64[:], float64[:]),
-    # ],
-    fastmath=False,
-    nogil=True,
-    cache=False,
-)
-def _choosequantile(arr, q, allow_sortquantile=OPTIONS[ALLOW_SORTQUANTILE]):
-    # When the number of quantiles requested is large,
-    # it becomes more efficient to sort the array,
-    # and simply obtain the quantiles from the sorted array.
-    # The first method is O(arr.size*q.size),
-    # the second O(arr.size*log(arr.size) + q.size) amortized.
-    if allow_sortquantile and len(q) > 10 and len(q) > np.log(len(arr)):
-        return _sortquantile(arr, q)
-    else:
-        return np.nanquantile(arr, q).astype(arr.dtype)
-
-
 @guvectorize(
     [(float32[:], float32, float32[:]), (float64[:], float64, float64[:])],
     "(n),()->()",
@@ -142,16 +71,16 @@ def vecquantiles(
 
 
 @njit
-def _wrapper_quantile1d(arr, q, allow_sortquantile=OPTIONS[ALLOW_SORTQUANTILE]):
+def _wrapper_quantile1d(arr, q):
     out = np.empty((arr.shape[0], q.size), dtype=arr.dtype)
     for index in range(out.shape[0]):
-        out[index] = _choosequantile(arr[index], q, allow_sortquantile)
+        out[index] = _nan_quantile(arr[index], q)
     return out
 
 
-def _quantile(arr, q, nreduce, allow_sortquantile=OPTIONS[ALLOW_SORTQUANTILE]):
+def _quantile(arr, q, nreduce):
     if arr.ndim == nreduce:
-        out = _choosequantile(arr.flatten(), q, allow_sortquantile)
+        out = _nan_quantile(arr.flatten(), q)
     else:
         # dimensions that are reduced by quantile
         red_axis = np.arange(len(arr.shape) - nreduce, len(arr.shape))
@@ -161,7 +90,7 @@ def _quantile(arr, q, nreduce, allow_sortquantile=OPTIONS[ALLOW_SORTQUANTILE]):
         final_shape = [arr.shape[idx] for idx in keep_axis] + [len(q)]
         # reshape as (keep_dims, red_dims), compute, reshape back
         arr = arr.reshape(-1, reduction_dim_size)
-        out = _wrapper_quantile1d(arr, q, allow_sortquantile)
+        out = _wrapper_quantile1d(arr, q)
         out = out.reshape(final_shape)
     return out
 
@@ -171,10 +100,9 @@ def quantile(da, q, dim):
     if USE_FASTNANQUANTILE is True:
         return xr_apply_nanquantile(da, dim=dim, q=q).rename({"quantile": "quantiles"})
     else:
-        allow_sortquantile = OPTIONS[ALLOW_SORTQUANTILE]
         qc = np.array(q, dtype=da.dtype)
         dims = [dim] if isinstance(dim, str) else dim
-        kwargs = dict(nreduce=len(dims), q=qc, allow_sortquantile=allow_sortquantile)
+        kwargs = dict(nreduce=len(dims), q=qc)
         res = (
             apply_ufunc(
                 _quantile,

From 83333577bf56f74cba31428ad456d25c91ce878a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=89ric=20Dupuis?= <eric.dupuis.1@umontreal.ca>
Date: Tue, 18 Jun 2024 17:37:28 -0400
Subject: [PATCH 24/40] _nan_quantile in vecquantiles

---
 xclim/sdba/nbutils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/xclim/sdba/nbutils.py b/xclim/sdba/nbutils.py
index 7531e4e7f..1ac6107a1 100644
--- a/xclim/sdba/nbutils.py
+++ b/xclim/sdba/nbutils.py
@@ -32,7 +32,7 @@ def _vecquantiles(arr, rnk, res):
     if np.isnan(rnk):
         res[0] = np.NaN
     else:
-        res[0] = np.nanquantile(arr, rnk)
+        res[0] = _nan_quantile(arr, rnk)
 
 
 def vecquantiles(

From 6dc179f740250c76b0a65f1ab418507998237b86 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=89ric=20Dupuis?= <eric.dupuis.1@umontreal.ca>
Date: Tue, 18 Jun 2024 17:47:13 -0400
Subject: [PATCH 25/40] remove mentions of _sortquantile

---
 docs/notebooks/benchmarks/sdba_quantile.ipynb | 58 ++++++-------------
 xclim/core/options.py                         |  4 --
 2 files changed, 17 insertions(+), 45 deletions(-)

diff --git a/docs/notebooks/benchmarks/sdba_quantile.ipynb b/docs/notebooks/benchmarks/sdba_quantile.ipynb
index d5481543a..bc3a2a6e8 100644
--- a/docs/notebooks/benchmarks/sdba_quantile.ipynb
+++ b/docs/notebooks/benchmarks/sdba_quantile.ipynb
@@ -30,8 +30,7 @@
     "Here `fastnanquantile` is the best algorithm out of \n",
     "* `xr.DataArray.quantile`\n",
     "* `nbutils.quantile`, using: \n",
-    "  * either `_sortquantile/np.quantile` (i.e. most likely `_sortquantile` )\n",
-    "  * `np.nanquantile`\n",
+    "  * `xclim.core.utils._nan_quantile`\n",
     "  * `fastnanquantile`\n"
    ]
   },
@@ -53,20 +52,7 @@
    "source": [
     "%%timeit\n",
     "sdba.nbutils.USE_FASTNANQUANTILE = False\n",
-    "with xclim.set_options(allow_sortquantile=True):\n",
-    "    sdba.nbutils.quantile(tx, **kws).compute()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "%%timeit\n",
-    "sdba.nbutils.USE_FASTNANQUANTILE = False\n",
-    "with xclim.set_options(allow_sortquantile=False):\n",
-    "    sdba.nbutils.quantile(tx, **kws).compute()"
+    "sdba.nbutils.quantile(tx, **kws).compute()"
    ]
   },
   {
@@ -113,35 +99,25 @@
     "timed = {}\n",
     "# fastnanquantile has nothing to do with sortquantile\n",
     "# I just added a third step using this variable\n",
-    "for allow_sortquantile in [True, False, \"fnq\"]:\n",
-    "    with xclim.set_options(\n",
-    "        allow_sortquantile=allow_sortquantile if allow_sortquantile != \"fnq\" else True\n",
-    "    ):\n",
-    "        # heat-up the jit\n",
-    "        sdba.nbutils.quantile(\n",
-    "            xr.DataArray(np.array([0, 1.5])), dim=\"dim_0\", q=np.array([0.5])\n",
-    "        )\n",
-    "        timed[allow_sortquantile] = []\n",
-    "        for size in np.arange(250, 2000 + 250, 250):\n",
-    "            da = tx.isel(time=slice(0, size))\n",
-    "            if allow_sortquantile == \"fnq\":\n",
-    "                sdba.nbutils.USE_FASTNANQUANTILE = True\n",
-    "            else:\n",
-    "                sdba.nbutils.USE_FASTNANQUANTILE = False\n",
-    "            t0 = time.time()\n",
-    "            for ii in range(num_tests):\n",
-    "                sdba.nbutils.quantile(da, **kws).compute()\n",
-    "            timed[allow_sortquantile].append([size, time.time() - t0])\n",
     "\n",
-    "for k, lab in zip(\n",
-    "    [True, False, \"fnq\"], [\"sortquantile\", \"np.nanquantile\", \"fastnanquantile\"]\n",
-    "):\n",
+    "for use_fnq in [True, False]:\n",
+    "    sdba.nbutils.USE_FASTNANQUANTILE = use_fnq\n",
+    "    # heat-up the jit\n",
+    "    sdba.nbutils.quantile(\n",
+    "        xr.DataArray(np.array([0, 1.5])), dim=\"dim_0\", q=np.array([0.5])\n",
+    "    )\n",
+    "    for size in np.arange(250, 2000 + 250, 250):\n",
+    "        da = tx.isel(time=slice(0, size))\n",
+    "        t0 = time.time()\n",
+    "        for ii in range(num_tests):\n",
+    "            sdba.nbutils.quantile(da, **kws).compute()\n",
+    "        timed[use_fnq].append([size, time.time() - t0])\n",
+    "\n",
+    "for k, lab in zip([True, False], [\"xclim.core.utils._nan_quantile\", \"fastnanquantile\"]):\n",
     "    arr = np.array(timed[k])\n",
     "    plt.plot(arr[:, 0], arr[:, 1] / num_tests, label=lab)\n",
     "plt.legend()\n",
-    "plt.title(\n",
-    "    \"Quantile calculation using sortquantile and nanquantile, average time vs array size, for 50 quantiles\"\n",
-    ")\n",
+    "plt.title(\"Quantile computation, average time vs array size, for 50 quantiles\")\n",
     "plt.xlabel(\"Number of time steps in the distribution\")\n",
     "plt.ylabel(\"Computation time (s)\")"
    ]
diff --git a/xclim/core/options.py b/xclim/core/options.py
index 411b00026..e4f78a255 100644
--- a/xclim/core/options.py
+++ b/xclim/core/options.py
@@ -167,10 +167,6 @@ class set_options:
     run_length_ufunc : str
       Whether to use the 1D ufunc version of run length algorithms or the dask-ready broadcasting version.
       Default is ``"auto"``, which means the latter is used for dask-backed and large arrays.
-    allow_sortquantile : bool
-      If `True`,  the optimal function between ``np.nanquantile`` or ``sdba.nbutils._sortquantile` is chosen. If `False`,
-      ``np.nanquantile`` is selected. `_sortquantile` is optimal when the number of quantiles is large (in absolute and relatively to array size).
-      Default: `True`.
     sdba_extra_output : bool
         Whether to add diagnostic variables to outputs of sdba's `train`, `adjust`
         and `processing` operations. Details about these additional variables are given in the object's

From d6c87c0dbf0162d2f6259f6b055861d625bc22b7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=89ric=20Dupuis?= <eric.dupuis.1@umontreal.ca>
Date: Tue, 18 Jun 2024 17:52:11 -0400
Subject: [PATCH 26/40] revert: np.nanquantile in vecquantiles

---
 xclim/sdba/nbutils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/xclim/sdba/nbutils.py b/xclim/sdba/nbutils.py
index 1ac6107a1..ef28b714d 100644
--- a/xclim/sdba/nbutils.py
+++ b/xclim/sdba/nbutils.py
@@ -32,7 +32,7 @@ def _vecquantiles(arr, rnk, res):
     if np.isnan(rnk):
         res[0] = np.NaN
     else:
-        res[0] = _nan_quantile(arr, rnk)
+        res[0] = np.quantile(arr, rnk)
 
 
 def vecquantiles(

From 0c1ea6210fdeac359ec559b7f71bbd2384aafa65 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=89ric=20Dupuis?= <eric.dupuis.1@umontreal.ca>
Date: Tue, 18 Jun 2024 17:54:35 -0400
Subject: [PATCH 27/40] quantile -> nanquantile (duh)

---
 xclim/sdba/nbutils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/xclim/sdba/nbutils.py b/xclim/sdba/nbutils.py
index ef28b714d..7531e4e7f 100644
--- a/xclim/sdba/nbutils.py
+++ b/xclim/sdba/nbutils.py
@@ -32,7 +32,7 @@ def _vecquantiles(arr, rnk, res):
     if np.isnan(rnk):
         res[0] = np.NaN
     else:
-        res[0] = np.quantile(arr, rnk)
+        res[0] = np.nanquantile(arr, rnk)
 
 
 def vecquantiles(

From 8191aa0155bc88820f440e04e759a5e05d003634 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=89ric=20Dupuis?= <eric.dupuis.1@umontreal.ca>
Date: Tue, 18 Jun 2024 17:58:55 -0400
Subject: [PATCH 28/40] add issue number

---
 CHANGES.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/CHANGES.rst b/CHANGES.rst
index 3bb2e3e9b..8ec447f8f 100644
--- a/CHANGES.rst
+++ b/CHANGES.rst
@@ -9,7 +9,7 @@ Contributors to this version: Éric Dupuis (:user:`coxipi`), Sarah Gammon (:user
 
 New features and enhancements
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-* ``sdba.nbutils.quantile`` and its child functions are now faster. If the module `fastnanquantile` is installed, it is used as the backen for the computation of quantiles and yields even faster results. (:issue:`1513`).
+* ``sdba.nbutils.quantile`` and its child functions are now faster. If the module `fastnanquantile` is installed, it is used as the backen for the computation of quantiles and yields even faster results. (:issue:`1255`, :pull:`1513`).
 
 v0.50.0 (2024-06-17)
 --------------------

From 1159cddbb6eed40dc7d6c4420431a9d35b5c1e97 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=89ric=20Dupuis?= <eric.dupuis.1@umontreal.ca>
Date: Thu, 20 Jun 2024 12:14:21 -0400
Subject: [PATCH 29/40] public nan_quantile & add back docstring

---
 CHANGES.rst           |   2 +-
 test_quantile.ipynb   | 205 ++++++++----------------------------------
 xclim/core/utils.py   |   6 +-
 xclim/sdba/nbutils.py |  23 ++++-
 4 files changed, 60 insertions(+), 176 deletions(-)

diff --git a/CHANGES.rst b/CHANGES.rst
index 8ec447f8f..ed116ecd6 100644
--- a/CHANGES.rst
+++ b/CHANGES.rst
@@ -9,7 +9,7 @@ Contributors to this version: Éric Dupuis (:user:`coxipi`), Sarah Gammon (:user
 
 New features and enhancements
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-* ``sdba.nbutils.quantile`` and its child functions are now faster. If the module `fastnanquantile` is installed, it is used as the backen for the computation of quantiles and yields even faster results. (:issue:`1255`, :pull:`1513`).
+* ``sdba.nbutils.quantile`` and its child functions are now faster. If the module `fastnanquantile` is installed, it is used as the backend for the computation of quantiles and yields even faster results. (:issue:`1255`, :pull:`1513`).
 
 v0.50.0 (2024-06-17)
 --------------------
diff --git a/test_quantile.ipynb b/test_quantile.ipynb
index c6b52e607..0844cb9ed 100644
--- a/test_quantile.ipynb
+++ b/test_quantile.ipynb
@@ -2,22 +2,11 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": null,
    "metadata": {
     "metadata": {}
    },
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "4.2005265481208437e-26"
-      ]
-     },
-     "execution_count": 3,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
     "from __future__ import annotations\n",
     "\n",
@@ -41,56 +30,28 @@
     "# do some sort of computation to make sure it's compiled.\n",
     "np.square(\n",
     "    sdba.nbutils._sortquantile(np.copy(arr_m1), q)\n",
-    "    - sdba.nbutils._nan_quantile(np.copy(arr_m2), q)\n",
+    "    - sdba.nbutils.nan_quantile(np.copy(arr_m2), q)\n",
     ").sum()"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": null,
    "metadata": {
     "metadata": {}
    },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "DUMP CPUDispatcher[_nan_quantile, type code=2691]\n",
-      "  DUMP CompileResult <built-in method _nan_quantile of _dynfunc._Closure object at 0x7f70b1b28a00>\n",
-      "    DUMP Signature [type code: 3191]\n",
-      "      Argument types:\n",
-      "      | DUMP Array[code=2697, name=array(float32, 1d, C)]\n",
-      "      | DUMP Array[code=1966, name=array(float64, 1d, C)]\n",
-      "      | DUMP Omitted[code=2902, name=omitted(default=1.0)]\n",
-      "      | DUMP Omitted[code=2902, name=omitted(default=1.0)]\n",
-      "      Return type:\n",
-      "      | DUMP Array[code=1966, name=array(float64, 1d, C)]\n",
-      "    END DUMP\n",
-      "  END DUMP\n",
-      "END DUMP CPUDispatcher[_nan_quantile]\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
-    "sdba.nbutils._nan_quantile.dump()"
+    "sdba.nbutils.nan_quantile.dump()"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": null,
    "metadata": {
     "metadata": {}
    },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "877 µs ± 10.9 µs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "%%timeit\n",
     "# sortquantile implementation\n",
@@ -99,89 +60,57 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": null,
    "metadata": {
     "metadata": {}
    },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "156 µs ± 1.41 µs per loop (mean ± std. dev. of 7 runs, 10,000 loops each)\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "%%timeit arr_m3 = np.copy(arr_m2)\n",
-    "# 1d numba-compiled version of xclim.core.utils._nan_quantile (cheating)\n",
-    "sdba.nbutils._nan_quantile(arr_m3, q)"
+    "# 1d numba-compiled version of xclim.core.utils.nan_quantile (cheating)\n",
+    "sdba.nbutils.nan_quantile(arr_m3, q)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "901 µs ± 35.7 µs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "%%timeit\n",
-    "# 1d numba-compiled version of xclim.core.utils._nan_quantile\n",
-    "sdba.nbutils._nan_quantile(np.copy(arr_m2), q)"
+    "# 1d numba-compiled version of xclim.core.utils.nan_quantile\n",
+    "sdba.nbutils.nan_quantile(np.copy(arr_m2), q)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": null,
    "metadata": {
     "metadata": {}
    },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "858 µs ± 8.82 µs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "%%timeit\n",
     "# Abel's implementation\n",
-    "xclim.core.utils._nan_quantile(np.copy(arr_m2), q)"
+    "xclim.core.utils.nan_quantile(np.copy(arr_m2), q)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 13,
+   "execution_count": null,
    "metadata": {
     "metadata": {}
    },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "221 µs ± 2.94 µs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "%%timeit arr_m3 = np.copy(arr_m2)\n",
     "# Abel's implementation (in-place (cheating)).\n",
-    "xclim.core.utils._nan_quantile(arr_m3, q)"
+    "xclim.core.utils.nan_quantile(arr_m3, q)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 14,
+   "execution_count": null,
    "metadata": {
     "metadata": {}
    },
@@ -192,28 +121,20 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 15,
+   "execution_count": null,
    "metadata": {
     "metadata": {}
    },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "217 µs ± 4.06 µs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "%%timeit\n",
     "# Abel's implementation (in-place, sorted beforehand (super cheating!)).\n",
-    "xclim.core.utils._nan_quantile(arr_m3, q)"
+    "xclim.core.utils.nan_quantile(arr_m3, q)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 21,
+   "execution_count": null,
    "metadata": {
     "metadata": {}
    },
@@ -231,7 +152,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 13,
+   "execution_count": null,
    "metadata": {
     "metadata": {}
    },
@@ -246,39 +167,23 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 19,
+   "execution_count": null,
    "metadata": {
     "metadata": {}
    },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "430 µs ± 10.4 µs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "%%timeit\n",
-    "xclim.core.utils._nan_quantile(arr2, q)"
+    "xclim.core.utils.nan_quantile(arr2, q)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 20,
+   "execution_count": null,
    "metadata": {
     "metadata": {}
    },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "435 µs ± 3.86 µs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "%%timeit\n",
     "sdba.nbutils._sortquantile(arr2, q)"
@@ -286,53 +191,22 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 16,
+   "execution_count": null,
    "metadata": {
     "metadata": {}
    },
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "(31390,)"
-      ]
-     },
-     "execution_count": 16,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
     "arr2.shape"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 17,
+   "execution_count": null,
    "metadata": {
     "metadata": {}
    },
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "array([0.        , 0.02040816, 0.04081633, 0.06122449, 0.08163265,\n",
-       "       0.10204082, 0.12244898, 0.14285714, 0.16326531, 0.18367347,\n",
-       "       0.20408163, 0.2244898 , 0.24489796, 0.26530612, 0.28571429,\n",
-       "       0.30612245, 0.32653061, 0.34693878, 0.36734694, 0.3877551 ,\n",
-       "       0.40816327, 0.42857143, 0.44897959, 0.46938776, 0.48979592,\n",
-       "       0.51020408, 0.53061224, 0.55102041, 0.57142857, 0.59183673,\n",
-       "       0.6122449 , 0.63265306, 0.65306122, 0.67346939, 0.69387755,\n",
-       "       0.71428571, 0.73469388, 0.75510204, 0.7755102 , 0.79591837,\n",
-       "       0.81632653, 0.83673469, 0.85714286, 0.87755102, 0.89795918,\n",
-       "       0.91836735, 0.93877551, 0.95918367, 0.97959184, 1.        ])"
-      ]
-     },
-     "execution_count": 17,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
     "q"
    ]
@@ -346,11 +220,6 @@
   }
  ],
  "metadata": {
-  "kernelspec": {
-   "display_name": "yclim",
-   "language": "python",
-   "name": "python3"
-  },
   "language_info": {
    "codemirror_mode": {
     "name": "ipython",
diff --git a/xclim/core/utils.py b/xclim/core/utils.py
index 25fc7b985..f245e94c6 100644
--- a/xclim/core/utils.py
+++ b/xclim/core/utils.py
@@ -285,7 +285,7 @@ def nan_calc_percentiles(
     beta: float = 1.0,
     copy: bool = True,
 ) -> np.ndarray:
-    """Convert the percentiles to quantiles and compute them using _nan_quantile."""
+    """Convert the percentiles to quantiles and compute them using nan_quantile."""
     if percentiles is None:
         _percentiles = [50.0]
     else:
@@ -296,7 +296,7 @@ def nan_calc_percentiles(
         # doing it again is extremely costly, especially with dask.
         arr = arr.copy()
     quantiles = np.array([per / 100.0 for per in _percentiles])
-    return _nan_quantile(arr, quantiles, axis, alpha, beta)
+    return nan_quantile(arr, quantiles, axis, alpha, beta)
 
 
 def _compute_virtual_index(
@@ -419,7 +419,7 @@ def _linear_interpolation(
     return lerp_interpolation
 
 
-def _nan_quantile(
+def nan_quantile(
     arr: np.ndarray,
     quantiles: np.ndarray,
     axis: int = 0,
diff --git a/xclim/sdba/nbutils.py b/xclim/sdba/nbutils.py
index 7531e4e7f..9e0f26519 100644
--- a/xclim/sdba/nbutils.py
+++ b/xclim/sdba/nbutils.py
@@ -12,7 +12,7 @@
 from xarray import DataArray, apply_ufunc
 from xarray.core import utils
 
-from xclim.core.utils import _nan_quantile
+from xclim.core.utils import nan_quantile
 
 try:
     from fastnanquantile.xrcompat import xr_apply_nanquantile
@@ -74,13 +74,13 @@ def vecquantiles(
 def _wrapper_quantile1d(arr, q):
     out = np.empty((arr.shape[0], q.size), dtype=arr.dtype)
     for index in range(out.shape[0]):
-        out[index] = _nan_quantile(arr[index], q)
+        out[index] = nan_quantile(arr[index], q)
     return out
 
 
 def _quantile(arr, q, nreduce):
     if arr.ndim == nreduce:
-        out = _nan_quantile(arr.flatten(), q)
+        out = nan_quantile(arr.flatten(), q)
     else:
         # dimensions that are reduced by quantile
         red_axis = np.arange(len(arr.shape) - nreduce, len(arr.shape))
@@ -96,7 +96,22 @@ def _quantile(arr, q, nreduce):
 
 
 def quantile(da, q, dim):
-    """Compute the quantiles from a fixed list `q`."""
+    """Compute the quantiles from a fixed list `q`.
+
+    Parameters
+    ----------
+    da : xarray.DataArray
+        The data to compute the quantiles on.
+    q : array-like
+        The quantiles to compute.
+    dim : str or sequence of str
+        The dimension along which to compute the quantiles.
+
+    Returns
+    -------
+    xarray.DataArray
+        The quantiles computed along the `dim` dimension.
+    """
     if USE_FASTNANQUANTILE is True:
         return xr_apply_nanquantile(da, dim=dim, q=q).rename({"quantile": "quantiles"})
     else:

From 8d5e4d636af8f1303e1a7be6e75d789e6ad409a0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=89ric=20Dupuis?=
 <71575674+coxipi@users.noreply.github.com>
Date: Fri, 21 Jun 2024 09:46:51 -0400
Subject: [PATCH 30/40] Delete test_quantile.ipynb

---
 test_quantile.ipynb | 238 --------------------------------------------
 1 file changed, 238 deletions(-)
 delete mode 100644 test_quantile.ipynb

diff --git a/test_quantile.ipynb b/test_quantile.ipynb
deleted file mode 100644
index 0844cb9ed..000000000
--- a/test_quantile.ipynb
+++ /dev/null
@@ -1,238 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "metadata": {}
-   },
-   "outputs": [],
-   "source": [
-    "from __future__ import annotations\n",
-    "\n",
-    "from pathlib import Path\n",
-    "\n",
-    "import numpy as np\n",
-    "\n",
-    "import xclim\n",
-    "from xclim import sdba\n",
-    "from xclim.testing import open_dataset\n",
-    "\n",
-    "ds = open_dataset(\n",
-    "    \"sdba/CanESM2_1950-2100.nc\", cache=True, cache_dir=Path(\"~/data/xclim/\")\n",
-    ")\n",
-    "tx = ds.tasmax.sel(time=slice(\"1950\", \"1980\")).isel(location=0)\n",
-    "q = np.linspace(0, 1, 50)\n",
-    "arr = tx.values\n",
-    "arr_m1 = arr.copy()\n",
-    "arr_m2 = arr.copy()\n",
-    "\n",
-    "# do some sort of computation to make sure it's compiled.\n",
-    "np.square(\n",
-    "    sdba.nbutils._sortquantile(np.copy(arr_m1), q)\n",
-    "    - sdba.nbutils.nan_quantile(np.copy(arr_m2), q)\n",
-    ").sum()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "metadata": {}
-   },
-   "outputs": [],
-   "source": [
-    "sdba.nbutils.nan_quantile.dump()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "metadata": {}
-   },
-   "outputs": [],
-   "source": [
-    "%%timeit\n",
-    "# sortquantile implementation\n",
-    "sdba.nbutils._sortquantile(arr_m1, q)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "metadata": {}
-   },
-   "outputs": [],
-   "source": [
-    "%%timeit arr_m3 = np.copy(arr_m2)\n",
-    "# 1d numba-compiled version of xclim.core.utils.nan_quantile (cheating)\n",
-    "sdba.nbutils.nan_quantile(arr_m3, q)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "%%timeit\n",
-    "# 1d numba-compiled version of xclim.core.utils.nan_quantile\n",
-    "sdba.nbutils.nan_quantile(np.copy(arr_m2), q)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "metadata": {}
-   },
-   "outputs": [],
-   "source": [
-    "%%timeit\n",
-    "# Abel's implementation\n",
-    "xclim.core.utils.nan_quantile(np.copy(arr_m2), q)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "metadata": {}
-   },
-   "outputs": [],
-   "source": [
-    "%%timeit arr_m3 = np.copy(arr_m2)\n",
-    "# Abel's implementation (in-place (cheating)).\n",
-    "xclim.core.utils.nan_quantile(arr_m3, q)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "metadata": {}
-   },
-   "outputs": [],
-   "source": [
-    "arr_m3 = np.copy(arr_m2)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "metadata": {}
-   },
-   "outputs": [],
-   "source": [
-    "%%timeit\n",
-    "# Abel's implementation (in-place, sorted beforehand (super cheating!)).\n",
-    "xclim.core.utils.nan_quantile(arr_m3, q)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "metadata": {}
-   },
-   "outputs": [],
-   "source": [
-    "import dask\n",
-    "import xarray as xr\n",
-    "\n",
-    "ds2 = xr.open_dataset(\n",
-    "    \"~/data/xclim/tasmax_day_CanESM5_ssp245_r1i1p1f1_gn_20150101-21001231.nc\"\n",
-    ")\n",
-    "\n",
-    "ds_sel2 = ds2.isel(lat=0, lon=0).tasmax.compute()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "metadata": {}
-   },
-   "outputs": [],
-   "source": [
-    "arr2 = ds_sel2.values.copy()\n",
-    "percent_of_nans = 0.0\n",
-    "num_of_nans = int(percent_of_nans * arr2.size)\n",
-    "random_indices = np.random.randint(0, arr2.shape[0], size=num_of_nans)\n",
-    "arr2[random_indices] = np.nan"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "metadata": {}
-   },
-   "outputs": [],
-   "source": [
-    "%%timeit\n",
-    "xclim.core.utils.nan_quantile(arr2, q)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "metadata": {}
-   },
-   "outputs": [],
-   "source": [
-    "%%timeit\n",
-    "sdba.nbutils._sortquantile(arr2, q)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "metadata": {}
-   },
-   "outputs": [],
-   "source": [
-    "arr2.shape"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "metadata": {}
-   },
-   "outputs": [],
-   "source": [
-    "q"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  }
- ],
- "metadata": {
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.11.6"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}

From c001863cba854d3c9c88d12f38bb1e9b4dd51640 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=89ric=20Dupuis?= <eric.dupuis.1@umontreal.ca>
Date: Fri, 21 Jun 2024 09:51:10 -0400
Subject: [PATCH 31/40] remove comments

---
 xclim/sdba/nbutils.py | 16 +---------------
 1 file changed, 1 insertion(+), 15 deletions(-)

diff --git a/xclim/sdba/nbutils.py b/xclim/sdba/nbutils.py
index 9e0f26519..f9c0f7d8c 100644
--- a/xclim/sdba/nbutils.py
+++ b/xclim/sdba/nbutils.py
@@ -95,7 +95,7 @@ def _quantile(arr, q, nreduce):
     return out
 
 
-def quantile(da, q, dim):
+def quantile(da: DataArray, q: np.ndarray, dim: str | Sequence[Hashable]) -> DataArray:
     """Compute the quantiles from a fixed list `q`.
 
     Parameters
@@ -233,12 +233,6 @@ def _escore(tgt, sim, out):
 
 
 @njit(
-    #    [
-    #        float32[:,:](float32[:]),
-    #        float64[:,:](float64[:]),
-    #        float32[:,:](float32[:,:]),
-    #        float64[:,:](float64[:,:]),
-    #    ],
     fastmath=False,
     nogil=True,
     cache=False,
@@ -256,10 +250,6 @@ def _first_and_last_nonnull(arr):
 
 
 @njit(
-    #    [
-    #        float64[:](float32[:],float32[:],float32[:],float32[:],float32[:],float32[:],optional(typeof("constant"))),
-    #        float64[:](float64[:],float64[:],float64[:],float64[:],float64[:],float64[:],optional(typeof("constant"))),
-    #    ],
     fastmath=False,
     nogil=True,
     cache=False,
@@ -288,10 +278,6 @@ def _extrapolate_on_quantiles(
 
 
 @njit(
-    #    [
-    #        typeof((float64[:,:],float64,float64))(float32[:],float32[:],optional(boolean)),
-    #        typeof((float64[:,:],float64,float64))(float64[:],float64[:],optional(boolean)),
-    #    ],
     fastmath=False,
     nogil=True,
     cache=False,

From a6807ee637e183aa0f3247e92eee25a144014c22 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=89ric=20Dupuis?=
 <71575674+coxipi@users.noreply.github.com>
Date: Fri, 21 Jun 2024 13:49:42 -0400
Subject: [PATCH 32/40] revert unwanted AUTHORS change

---
 AUTHORS.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/AUTHORS.rst b/AUTHORS.rst
index c810afba9..34f1e835c 100644
--- a/AUTHORS.rst
+++ b/AUTHORS.rst
@@ -29,7 +29,7 @@ Contributors
 * David Caron `@davidcaron <https://github.com/davidcaron>`_
 * Carsten Ehbrecht <ehbrecht@dkrz.de> `@cehbrecht <https://github.com/cehbrecht>`_
 * Jeremy Fyke `@jeremyfyke <https://github.com/jeremyfyke>`_
-* Sarah Gammon `@SarahG-579462 <https://github.com/SarahG-579462>`
+* Sarah Gammon `@SarahG-579462 <https://github.com/SarahG-579462>`_
 * Tom Keel <thomas.keel.18@ucl.ac.uk> `@Thomasjkeel <https://github.com/Thomasjkeel>`_
 * Marie-Pier Labonté <labonte.marie-pier@ouranos.ca> `@marielabonte <https://github.com/marielabonte>`_
 * Ludwig Lierhammer <ludwig.lierhammer@hereon.de> `@ludwiglierhammer <https://github.com/ludwiglierhammer>`_

From 980ec94ae6fee2ee506f95b215145f00e8dfd280 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=89ric=20Dupuis?= <eric.dupuis.1@umontreal.ca>
Date: Tue, 9 Jul 2024 13:19:58 -0400
Subject: [PATCH 33/40] 1d numba compatible _nan_quantile in sdba

---
 docs/notebooks/benchmarks/sdba_quantile.ipynb |   4 +-
 xclim/core/utils.py                           |   6 +-
 xclim/sdba/nbutils.py                         | 139 +++++++++++++++++-
 3 files changed, 139 insertions(+), 10 deletions(-)

diff --git a/docs/notebooks/benchmarks/sdba_quantile.ipynb b/docs/notebooks/benchmarks/sdba_quantile.ipynb
index bc3a2a6e8..ce19b230c 100644
--- a/docs/notebooks/benchmarks/sdba_quantile.ipynb
+++ b/docs/notebooks/benchmarks/sdba_quantile.ipynb
@@ -30,7 +30,7 @@
     "Here `fastnanquantile` is the best algorithm out of \n",
     "* `xr.DataArray.quantile`\n",
     "* `nbutils.quantile`, using: \n",
-    "  * `xclim.core.utils._nan_quantile`\n",
+    "  * `xclim.core.utils.nan_quantile`\n",
     "  * `fastnanquantile`\n"
    ]
   },
@@ -113,7 +113,7 @@
     "            sdba.nbutils.quantile(da, **kws).compute()\n",
     "        timed[use_fnq].append([size, time.time() - t0])\n",
     "\n",
-    "for k, lab in zip([True, False], [\"xclim.core.utils._nan_quantile\", \"fastnanquantile\"]):\n",
+    "for k, lab in zip([True, False], [\"xclim.core.utils.nan_quantile\", \"fastnanquantile\"]):\n",
     "    arr = np.array(timed[k])\n",
     "    plt.plot(arr[:, 0], arr[:, 1] / num_tests, label=lab)\n",
     "plt.legend()\n",
diff --git a/xclim/core/utils.py b/xclim/core/utils.py
index f245e94c6..25fc7b985 100644
--- a/xclim/core/utils.py
+++ b/xclim/core/utils.py
@@ -285,7 +285,7 @@ def nan_calc_percentiles(
     beta: float = 1.0,
     copy: bool = True,
 ) -> np.ndarray:
-    """Convert the percentiles to quantiles and compute them using nan_quantile."""
+    """Convert the percentiles to quantiles and compute them using _nan_quantile."""
     if percentiles is None:
         _percentiles = [50.0]
     else:
@@ -296,7 +296,7 @@ def nan_calc_percentiles(
         # doing it again is extremely costly, especially with dask.
         arr = arr.copy()
     quantiles = np.array([per / 100.0 for per in _percentiles])
-    return nan_quantile(arr, quantiles, axis, alpha, beta)
+    return _nan_quantile(arr, quantiles, axis, alpha, beta)
 
 
 def _compute_virtual_index(
@@ -419,7 +419,7 @@ def _linear_interpolation(
     return lerp_interpolation
 
 
-def nan_quantile(
+def _nan_quantile(
     arr: np.ndarray,
     quantiles: np.ndarray,
     axis: int = 0,
diff --git a/xclim/sdba/nbutils.py b/xclim/sdba/nbutils.py
index 945d22a57..f594d3065 100644
--- a/xclim/sdba/nbutils.py
+++ b/xclim/sdba/nbutils.py
@@ -12,8 +12,6 @@
 from xarray import DataArray, apply_ufunc
 from xarray.core import utils
 
-from xclim.core.utils import nan_quantile
-
 try:
     from fastnanquantile.xrcompat import xr_apply_nanquantile
 
@@ -22,6 +20,138 @@
     USE_FASTNANQUANTILE = False
 
 
+@njit(
+    fastmath={"arcp", "contract", "reassoc", "nsz", "afn"},
+    nogil=True,
+    cache=False,
+)
+def _get_indexes(
+    arr: np.array, virtual_indexes: np.array, valid_values_count: np.array
+) -> tuple[np.array, np.array]:
+    """Get the valid indexes of arr neighbouring virtual_indexes.
+
+    Parameters
+    ----------
+    arr : array-like
+    virtual_indexes : array-like
+    valid_values_count : array-like
+
+    Returns
+    -------
+    array-like, array-like
+        A tuple of virtual_indexes neighbouring indexes (previous and next)
+
+    Notes
+    -----
+    This is a companion function to linear interpolation of quantiles.
+    """
+    previous_indexes = np.asarray(np.floor(virtual_indexes))
+    next_indexes = np.asarray(previous_indexes + 1)
+    indexes_above_bounds = virtual_indexes >= valid_values_count - 1
+    # When indexes is above max index, take the max value of the array
+    if indexes_above_bounds.any():
+        previous_indexes[indexes_above_bounds] = -1
+        next_indexes[indexes_above_bounds] = -1
+    # When indexes is below min index, take the min value of the array
+    indexes_below_bounds = virtual_indexes < 0
+    if indexes_below_bounds.any():
+        previous_indexes[indexes_below_bounds] = 0
+        next_indexes[indexes_below_bounds] = 0
+    if (arr.dtype is np.dtype(np.float64)) or (arr.dtype is np.dtype(np.float32)):
+        # After the sort, slices having NaNs will have for last element a NaN
+        virtual_indexes_nans = np.isnan(virtual_indexes)
+        if virtual_indexes_nans.any():
+            previous_indexes[virtual_indexes_nans] = -1
+            next_indexes[virtual_indexes_nans] = -1
+    previous_indexes = previous_indexes.astype(np.intp)
+    next_indexes = next_indexes.astype(np.intp)
+    return previous_indexes, next_indexes
+
+
+@njit(
+    fastmath={"arcp", "contract", "reassoc", "nsz", "afn"},
+    nogil=True,
+    cache=False,
+)
+def _linear_interpolation(
+    left: np.array,
+    right: np.array,
+    gamma: np.array,
+) -> np.array:
+    """Compute the linear interpolation weighted by gamma on each point of two same shape arrays.
+
+    Parameters
+    ----------
+    left : array_like
+        Left bound.
+    right : array_like
+        Right bound.
+    gamma : array_like
+        The interpolation weight.
+
+    Returns
+    -------
+    array_like
+
+    Notes
+    -----
+    This is a companion function for `_nan_quantile_1d`
+    """
+    diff_b_a = np.subtract(right, left)
+    lerp_interpolation = np.asarray(np.add(left, diff_b_a * gamma))
+    ind = gamma >= 0.5
+    lerp_interpolation[ind] = right[ind] - diff_b_a[ind] * (1 - gamma[ind])
+    return lerp_interpolation
+
+
+@njit(
+    fastmath={"arcp", "contract", "reassoc", "nsz", "afn"},
+    nogil=True,
+    cache=False,
+)
+def _nan_quantile_1d(
+    arr: np.array,
+    quantiles: np.array,
+    alpha: float = 1.0,
+    beta: float = 1.0,
+) -> float | np.array:
+    """Get the quantiles of the 1-dimensional array.
+
+    A  linear interpolation is performed using alpha and beta.
+
+    Notes
+    -----
+    By default, `alpha == beta == 1` which performs the 7th method of :cite:t:`hyndman_sample_1996`.
+    with `alpha == beta == 1/3` we get the 8th method. alpha == beta == 1 reproduces the behaviour of `np.nanquantile`.
+    """
+    # We need at least two values to do an interpolation
+    valid_values_count = (~np.isnan(arr)).sum()
+
+    # Computation of indexes
+    virtual_indexes = (
+        valid_values_count * quantiles + (alpha + quantiles * (1 - alpha - beta)) - 1
+    )
+    virtual_indexes = np.asarray(virtual_indexes)
+    previous_indexes, next_indexes = _get_indexes(
+        arr, virtual_indexes, valid_values_count
+    )
+    # Sorting
+    arr.sort()
+
+    previous = arr[previous_indexes]
+    next_elements = arr[next_indexes]
+
+    # Linear interpolation
+    gamma = np.asarray(virtual_indexes - previous_indexes)
+    interpolation = _linear_interpolation(previous, next_elements, gamma)
+    # When an interpolation is in Nan range, (near the end of the sorted array) it means
+    # we can clip to the array max value.
+    result = np.where(
+        np.isnan(interpolation), arr[np.intp(valid_values_count) - 1], interpolation
+    )
+    return result
+
+
 @guvectorize(
     [(float32[:], float32, float32[:]), (float64[:], float64, float64[:])],
     "(n),()->()",
@@ -74,14 +204,13 @@ def vecquantiles(
 def _wrapper_quantile1d(arr, q):
     out = np.empty((arr.shape[0], q.size), dtype=arr.dtype)
     for index in range(out.shape[0]):
-        out[index] = nan_quantile(arr[index], q)
+        out[index] = _nan_quantile_1d(arr[index], q)
     return out
 
 
 def _quantile(arr, q, nreduce):
     if arr.ndim == nreduce:
-        print("hehe")
-        out = nan_quantile(arr.flatten(), q)
+        out = _nan_quantile_1d(arr.flatten(), q)
     else:
         # dimensions that are reduced by quantile
         red_axis = np.arange(len(arr.shape) - nreduce, len(arr.shape))

From ccc829146c84609640c1cfee2bed1dd4d7a20066 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=89ric=20Dupuis?= <eric.dupuis.1@umontreal.ca>
Date: Tue, 9 Jul 2024 13:23:02 -0400
Subject: [PATCH 34/40] test nbu.quantile

---
 tests/test_sdba/test_nbutils.py | 36 +++++++++++++++++++++++++++++++++
 1 file changed, 36 insertions(+)
 create mode 100644 tests/test_sdba/test_nbutils.py

diff --git a/tests/test_sdba/test_nbutils.py b/tests/test_sdba/test_nbutils.py
new file mode 100644
index 000000000..bd36ba5c7
--- /dev/null
+++ b/tests/test_sdba/test_nbutils.py
@@ -0,0 +1,36 @@
+from __future__ import annotations
+
+import numpy as np
+import pytest
+import xarray as xr
+
+from xclim.sdba import nbutils as nbu
+
+
+class TestQuantiles:
+    @pytest.mark.parametrize("uses_dask", [True, False])
+    def test_quantile(self, open_dataset, uses_dask):
+        da = (
+            open_dataset("sdba/CanESM2_1950-2100.nc").sel(time=slice("1950", "1955")).pr
+        ).load()
+        if uses_dask:
+            da = da.chunk({"location": 1})
+        else:
+            da = da.load()
+        q = np.linspace(0.1, 0.99, 50)
+        out_nbu = nbu.quantile(da, q, dim="time").transpose("location", ...)
+        out_xr = da.quantile(q=q, dim="time").transpose("location", ...)
+        np.testing.assert_array_almost_equal(out_nbu.values, out_xr.values)
+
+    def test_edge_cases(self, open_dataset):
+        q = np.linspace(0.1, 0.99, 50)
+
+        # only 1 non-null value
+        da = xr.DataArray([1] + [np.NAN] * 100, dims="dim_0")
+        out_nbu = nbu.quantile(da, q, dim="dim_0")
+        np.testing.assert_array_equal(out_nbu.values, np.full_like(q, 1))
+
+        # only NANs
+        da = xr.DataArray([np.NAN] * 100, dims="dim_0")
+        out_nbu = nbu.quantile(da, q, dim="dim_0")
+        np.testing.assert_array_equal(out_nbu.values, np.full_like(q, np.NAN))

From 7905a154c90d1b3f872d26d768a427376999fafa Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=89ric=20Dupuis?= <eric.dupuis.1@umontreal.ca>
Date: Tue, 9 Jul 2024 13:42:43 -0400
Subject: [PATCH 35/40] conserve arr.dtype

---
 xclim/sdba/nbutils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/xclim/sdba/nbutils.py b/xclim/sdba/nbutils.py
index f594d3065..29a1ae9ad 100644
--- a/xclim/sdba/nbutils.py
+++ b/xclim/sdba/nbutils.py
@@ -142,7 +142,7 @@ def _nan_quantile_1d(
     next_elements = arr[next_indexes]
 
     # Linear interpolation
-    gamma = np.asarray(virtual_indexes - previous_indexes)
+    gamma = np.asarray(virtual_indexes - previous_indexes, dtype=arr.dtype)
     interpolation = _linear_interpolation(previous, next_elements, gamma)
     # When an interpolation is in Nan range, (near the end of the sorted array) it means
     # we can clip to the array max value.

From a2682ef0f51576b34900da4eec354b642f71a026 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=89ric=20Dupuis?= <eric.dupuis.1@umontreal.ca>
Date: Tue, 9 Jul 2024 13:50:07 -0400
Subject: [PATCH 36/40] CHANGELOG formatting

---
 CHANGELOG.rst | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/CHANGELOG.rst b/CHANGELOG.rst
index 7d1789576..2dce5b383 100644
--- a/CHANGELOG.rst
+++ b/CHANGELOG.rst
@@ -4,15 +4,16 @@ Changelog
 
 v0.52.0 (unreleased)
 --------------------
-Contributors to this version: David Huard (:user:`huard`), Éric Dupuis (:user:`coxipi`), Sarah Gammon (:user:`SarahG-579462`)
+Contributors to this version: David Huard (:user:`huard`), Éric Dupuis (:user:`coxipi`), Sarah Gammon (:user:`SarahG-579462`).
+
+New features and enhancements
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+* ``xclim.sdba.nbutils.quantile`` and its child functions are now faster. If the module `fastnanquantile` is installed, it is used as the backend for the computation of quantiles and yields even faster results. (:issue:`1255`, :pull:`1513`).
 
 Internal changes
 ^^^^^^^^^^^^^^^^
 * Changed french translation of "wet days" from "jours mouillés" to "jours pluvieux". (:issue:`1825`, :pull:`1826`).
 
-New features and enhancements
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-* ``xclim.sdba.nbutils.quantile`` and its child functions are now faster. If the module `fastnanquantile` is installed, it is used as the backend for the computation of quantiles and yields even faster results. (:issue:`1255`, :pull:`1513`).
 
 
 v0.51.0 (2024-07-04)

From 1585fc0c605bac26027ab8ed3b26c78d0eef2942 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=89ric=20Dupuis?= <eric.dupuis.1@umontreal.ca>
Date: Tue, 9 Jul 2024 13:50:45 -0400
Subject: [PATCH 37/40] unwanted space

---
 CHANGELOG.rst | 1 -
 1 file changed, 1 deletion(-)

diff --git a/CHANGELOG.rst b/CHANGELOG.rst
index 2dce5b383..596a93b18 100644
--- a/CHANGELOG.rst
+++ b/CHANGELOG.rst
@@ -15,7 +15,6 @@ Internal changes
 * Changed french translation of "wet days" from "jours mouillés" to "jours pluvieux". (:issue:`1825`, :pull:`1826`).
 
 
-
 v0.51.0 (2024-07-04)
 --------------------
 Contributors to this version: Trevor James Smith (:user:`Zeitsperre`), Pascal Bourgault (:user:`aulemahal`).

From 10b13ddf0e19f5e44bb3b83b1708d8b24e6fe0ef Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=89ric=20Dupuis?= <eric.dupuis.1@umontreal.ca>
Date: Tue, 9 Jul 2024 16:28:38 -0400
Subject: [PATCH 38/40] respect np2 new conventions

---
 tests/test_sdba/test_nbutils.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tests/test_sdba/test_nbutils.py b/tests/test_sdba/test_nbutils.py
index bd36ba5c7..20ece1e21 100644
--- a/tests/test_sdba/test_nbutils.py
+++ b/tests/test_sdba/test_nbutils.py
@@ -26,11 +26,11 @@ def test_edge_cases(self, open_dataset):
         q = np.linspace(0.1, 0.99, 50)
 
         # only 1 non-null value
-        da = xr.DataArray([1] + [np.NAN] * 100, dims="dim_0")
+        da = xr.DataArray([1] + [np.nan] * 100, dims="dim_0")
         out_nbu = nbu.quantile(da, q, dim="dim_0")
         np.testing.assert_array_equal(out_nbu.values, np.full_like(q, 1))
 
         # only NANs
-        da = xr.DataArray([np.NAN] * 100, dims="dim_0")
+        da = xr.DataArray([np.nan] * 100, dims="dim_0")
         out_nbu = nbu.quantile(da, q, dim="dim_0")
-        np.testing.assert_array_equal(out_nbu.values, np.full_like(q, np.NAN))
+        np.testing.assert_array_equal(out_nbu.values, np.full_like(q, np.nan))

From 53c9f2396310f26cbf935d302e7a18dc5e67d56c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=89ric=20Dupuis?= <eric.dupuis.1@umontreal.ca>
Date: Thu, 11 Jul 2024 20:14:21 -0400
Subject: [PATCH 39/40] extras dependency (fastnanquantile)

---
 .github/workflows/main.yml | 2 +-
 pyproject.toml             | 5 ++++-
 2 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index c3db33531..9e765c2a4 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -138,7 +138,7 @@ jobs:
           - tox-env: py310-coverage-lmoments  # No markers -- includes slow tests
             python-version: "3.10"
             os: ubuntu-latest
-          - tox-env: py311-coverage-sbck
+          - tox-env: py311-coverage-sbck-extras
             python-version: "3.11"
             markers: -m 'not slow'
             os: ubuntu-latest
diff --git a/pyproject.toml b/pyproject.toml
index 549670960..d2ee1fc26 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -110,7 +110,10 @@ docs = [
   "sphinxcontrib-bibtex",
   "sphinxcontrib-svg2pdfconverter[Cairosvg]"
 ]
-all = ["xclim[dev]", "xclim[docs]"]
+extras = [
+  "fastnanquantile"
+]
+all = ["xclim[dev]", "xclim[docs]", "xclim[extras]"]
 
 [project.scripts]
 xclim = "xclim.cli:cli"

From 9206b7327f505d19cf6dda73679e52a0dd9e72c5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=89ric=20Dupuis?= <eric.dupuis.1@umontreal.ca>
Date: Fri, 12 Jul 2024 10:01:07 -0400
Subject: [PATCH 40/40] extra mention  of extra

---
 pyproject.toml | 4 +---
 tox.ini        | 4 +++-
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index d2ee1fc26..2517cd5a9 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -110,9 +110,7 @@ docs = [
   "sphinxcontrib-bibtex",
   "sphinxcontrib-svg2pdfconverter[Cairosvg]"
 ]
-extras = [
-  "fastnanquantile"
-]
+extras = ["fastnanquantile"]
 all = ["xclim[dev]", "xclim[docs]", "xclim[extras]"]
 
 [project.scripts]
diff --git a/tox.ini b/tox.ini
index dcb355920..ca18e6cb3 100644
--- a/tox.ini
+++ b/tox.ini
@@ -113,7 +113,9 @@ passenv =
     LD_LIBRARY_PATH
     SKIP_NOTEBOOKS
     XCLIM_*
-extras = dev
+extras =
+    dev
+    extras: extras
 deps =
     upstream: -r CI/requirements_upstream.txt
     sbck: pybind11