diff --git a/src/array_api_extra/__init__.py b/src/array_api_extra/__init__.py index 7c05552a..a3d6021a 100644 --- a/src/array_api_extra/__init__.py +++ b/src/array_api_extra/__init__.py @@ -11,6 +11,7 @@ one_hot, pad, partition, + quantile, sinc, ) from ._lib._at import at @@ -48,6 +49,7 @@ "one_hot", "pad", "partition", + "quantile", "setdiff1d", "sinc", ] diff --git a/src/array_api_extra/_delegation.py b/src/array_api_extra/_delegation.py index e9a943c0..cd8c3989 100644 --- a/src/array_api_extra/_delegation.py +++ b/src/array_api_extra/_delegation.py @@ -4,7 +4,8 @@ from types import ModuleType from typing import Literal -from ._lib import _funcs +from ._lib import _funcs, _quantile +from ._lib._backends import NUMPY_VERSION from ._lib._utils._compat import ( array_namespace, is_cupy_namespace, @@ -768,7 +769,7 @@ def argpartition( Axis along which to partition. The default is ``-1`` (the last axis). If ``None``, the flattened array is used. xp : array_namespace, optional - The standard-compatible namespace for `x`. Default: infer. + The standard-compatible namespace for `a`. Default: infer. Returns ------- @@ -895,3 +896,273 @@ def isin( return xp.isin(a, b, assume_unique=assume_unique, invert=invert) return _funcs.isin(a, b, assume_unique=assume_unique, invert=invert, xp=xp) + + +def quantile( + a: Array, + q: float | Array, + /, + axis: int | None = None, + method: str = "linear", + keepdims: bool = False, + nan_policy: str = "propagate", + *, + weights: Array | None = None, + xp: ModuleType | None = None, +) -> Array: + """ + Compute the q-th quantile of the data along the specified axis. + + Parameters + ---------- + a : array_like of real numbers + Input array or object that can be converted to an array. + q : array_like of float + Probability or sequence of probabilities of the quantiles to compute. + Values must be between 0 and 1 inclusive. + axis : {int, tuple of int, None}, optional + Axis or axes along which the quantiles are computed. The default is + to compute the quantile(s) along a flattened version of the array. + method : str, optional + This parameter specifies the method to use for estimating the + quantile. There are many different methods. + The recommended options, numbered as they appear in [1]_, are: + + 1. 'inverted_cdf' + 2. 'averaged_inverted_cdf' + 3. 'closest_observation' + 4. 'interpolated_inverted_cdf' + 5. 'hazen' + 6. 'weibull' + 7. 'linear' (default) + 8. 'median_unbiased' + 9. 'normal_unbiased' + + The first three methods are discontinuous. + Only 'linear', 'inverted_cdf' and 'averaged_inverted_cdf' are implemented. + + keepdims : bool, optional + If this is set to True, the axes which are reduced are left in + the result as dimensions with size one. With this option, the + result will broadcast correctly against the original array `a`. + + nan_policy : str, optional + 'propagate' (default) or 'omit'. + 'omit' is supported only when `weights` are provided. + + weights : array_like, optional + An array of weights associated with the values in `a`. Each value in + `a` contributes to the quantile according to its associated weight. + The weights array can either be 1-D (in which case its length must be + the size of `a` along the given axis) or of the same shape as `a`. + If `weights=None`, then all data in `a` are assumed to have a + weight equal to one. + Only `method="inverted_cdf"` or `method="averaged_inverted_cdf"` + support weights. See the notes for more details. + + xp : array_namespace, optional + The standard-compatible namespace for `a` and `q`. Default: infer. + + Returns + ------- + scalar or ndarray + If `q` is a single probability and `axis=None`, then the result + is a scalar. If multiple probability levels are given, first axis + of the result corresponds to the quantiles. The other axes are + the axes that remain after the reduction of `a`. If the input + contains integers or floats smaller than ``float64``, the output + data-type is ``float64``. Otherwise, the output data-type is the + same as that of the input. If `out` is specified, that array is + returned instead. + + Notes + ----- + Given a sample `a` from an underlying distribution, `quantile` provides a + nonparametric estimate of the inverse cumulative distribution function. + + By default, this is done by interpolating between adjacent elements in + ``y``, a sorted copy of `a`:: + + (1-g)*y[j] + g*y[j+1] + + where the index ``j`` and coefficient ``g`` are the integral and + fractional components of ``q * (n-1)``, and ``n`` is the number of + elements in the sample. + + This is a special case of Equation 1 of H&F [1]_. More generally, + + - ``j = (q*n + m - 1) // 1``, and + - ``g = (q*n + m - 1) % 1``, + + where ``m`` may be defined according to several different conventions. + The preferred convention may be selected using the ``method`` parameter: + + =============================== =============== =============== + ``method`` number in H&F ``m`` + =============================== =============== =============== + ``interpolated_inverted_cdf`` 4 ``0`` + ``hazen`` 5 ``1/2`` + ``weibull`` 6 ``q`` + ``linear`` (default) 7 ``1 - q`` + ``median_unbiased`` 8 ``q/3 + 1/3`` + ``normal_unbiased`` 9 ``q/4 + 3/8`` + =============================== =============== =============== + + Note that indices ``j`` and ``j + 1`` are clipped to the range ``0`` to + ``n - 1`` when the results of the formula would be outside the allowed + range of non-negative indices. The ``- 1`` in the formulas for ``j`` and + ``g`` accounts for Python's 0-based indexing. + + The table above includes only the estimators from H&F that are continuous + functions of probability `q` (estimators 4-9). NumPy also provides the + three discontinuous estimators from H&F (estimators 1-3), where ``j`` is + defined as above, ``m`` is defined as follows, and ``g`` is a function + of the real-valued ``index = q*n + m - 1`` and ``j``. + + 1. ``inverted_cdf``: ``m = 0`` and ``g = int(index - j > 0)`` + 2. ``averaged_inverted_cdf``: ``m = 0`` and + ``g = (1 + int(index - j > 0)) / 2`` + 3. ``closest_observation``: ``m = -1/2`` and + ``g = 1 - int((index == j) & (j%2 == 1))`` + + **Weighted quantiles:** + More formally, the quantile at probability level :math:`q` of a cumulative + distribution function :math:`F(y)=P(Y \\leq y)` with probability measure + :math:`P` is defined as any number :math:`x` that fulfills the + *coverage conditions* + + .. math:: P(Y < x) \\leq q \\quad\\text{and}\\quad P(Y \\leq x) \\geq q + + with random variable :math:`Y\\sim P`. + Sample quantiles, the result of `quantile`, provide nonparametric + estimation of the underlying population counterparts, represented by the + unknown :math:`F`, given a data vector `a` of length ``n``. + + Some of the estimators above arise when one considers :math:`F` as the + empirical distribution function of the data, i.e. + :math:`F(y) = \\frac{1}{n} \\sum_i 1_{a_i \\leq y}`. + Then, different methods correspond to different choices of :math:`x` that + fulfill the above coverage conditions. Methods that follow this approach + are ``inverted_cdf`` and ``averaged_inverted_cdf``. + + For weighted quantiles, the coverage conditions still hold. The + empirical cumulative distribution is simply replaced by its weighted + version, i.e. + :math:`P(Y \\leq t) = \\frac{1}{\\sum_i w_i} \\sum_i w_i 1_{x_i \\leq t}`. + + References + ---------- + .. [1] R. J. Hyndman and Y. Fan, + "Sample quantiles in statistical packages," + The American Statistician, 50(4), pp. 361-365, 1996 + """ + if xp is None: + xp = array_namespace(a) + if is_pydata_sparse_namespace(xp): + msg = "Sparse backend not supported" + raise ValueError(msg) + + methods = {"linear", "inverted_cdf", "averaged_inverted_cdf"} + if method not in methods: + msg = f"`method` must be one of {methods}" + raise ValueError(msg) + nan_policies = {"propagate", "omit"} + if nan_policy not in nan_policies: + msg = f"`nan_policy` must be one of {nan_policies}" + raise ValueError(msg) + + a = xp.asarray(a) + if not xp.isdtype(a.dtype, ("integral", "real floating")): + msg = "`a` must have real dtype." + raise ValueError(msg) + if not xp.isdtype(xp.asarray(q).dtype, "real floating"): + msg = "`q` must have real floating dtype." + raise ValueError(msg) + weights = None if weights is None else xp.asarray(weights) + + ndim = a.ndim + if ndim < 1: + msg = "`a` must be at least 1-dimensional." + raise TypeError(msg) + if axis is not None and ((axis >= ndim) or (axis < -ndim)): + msg = "`axis` is not compatible with the dimension of `a`." + raise ValueError(msg) + if weights is None: + if nan_policy != "propagate": + msg = "When `weights` aren't provided, `nan_policy` must be 'propagate'." + raise ValueError(msg) + else: + if method not in {"inverted_cdf", "averaged_inverted_cdf"}: + msg = f"`method` '{method}' not supported with weights." + raise ValueError(msg) + if not xp.isdtype(weights.dtype, ("integral", "real floating")): + msg = "`weights` must have real dtype." + raise ValueError(msg) + if ndim > 2: + msg = "When weights are provided, dimension of `a` must be 1 or 2." + raise ValueError(msg) + if a.shape != weights.shape: + if axis is None: + msg = "Axis must be specified when shapes of `a` and ̀ weights` differ." + raise TypeError(msg) + if weights.shape != eager_shape(a, axis): + msg = ( + "Shape of weights must be consistent with shape" + " of a along specified axis." + ) + raise ValueError(msg) + if axis is None and ndim == 2: + msg = "Axis must be specified when `a` and ̀ weights` are 2d." + raise ValueError(msg) + + # Align result dtype with what numpy does: + dtype = xp.result_type( + xp.float64 if xp.isdtype(a.dtype, "integral") else a, + xp.asarray(q), + xp.float64, # at least float64 + ) + device = get_device(a) + a = xp.asarray(a, dtype=dtype, device=device) + q_arr = xp.asarray(q, dtype=dtype, device=device) + # TODO: cast weights here? Assert weights are on the same device as `a`? + + if xp.any((q_arr > 1) | (q_arr < 0) | xp.isnan(q_arr)): + msg = "`q` values must be in the range [0, 1]" + raise ValueError(msg) + + # Delegate when possible. + # Note: No delegation for dask: I couldn't make it work. + basic_case = method == "linear" and weights is None + + np_2 = NUMPY_VERSION >= (2, 0) + np_handles_weights = np_2 and nan_policy == "propagate" and method == "inverted_cdf" + if weights is None: + if is_numpy_namespace(xp) and (basic_case or np_2): + quantile = xp.quantile if nan_policy == "propagate" else xp.nanquantile + return quantile(a, q_arr, axis=axis, method=method, keepdims=keepdims) + elif is_numpy_namespace(xp) and np_handles_weights: + # TODO: call nanquantile for nan_policy == "omit" once + # https://github.com/numpy/numpy/issues/29709 is fixed + return xp.quantile( + a, q_arr, axis=axis, method=method, keepdims=keepdims, weights=weights + ) + + jax_or_cupy = is_jax_namespace(xp) or is_cupy_namespace(xp) + if jax_or_cupy and basic_case and nan_policy == "propagate": + return xp.quantile(a, q_arr, axis=axis, method=method, keepdims=keepdims) + if is_torch_namespace(xp) and basic_case: + quantile = xp.quantile if nan_policy == "propagate" else xp.nanquantile + return quantile(a, q_arr, dim=axis, interpolation=method, keepdim=keepdims) + + # Otherwise call our implementation (will sort data) + return _quantile.quantile( + # XXX: I'm not sure we want to support dask, it seems uterly slow... + a, + q_arr, + axis=axis, + method=method, + keepdims=keepdims, + nan_policy=nan_policy, + weights=weights, + xp=xp, + ) diff --git a/src/array_api_extra/_lib/_quantile.py b/src/array_api_extra/_lib/_quantile.py new file mode 100644 index 00000000..df23e94f --- /dev/null +++ b/src/array_api_extra/_lib/_quantile.py @@ -0,0 +1,186 @@ +"""Implementations of the quantile function.""" + +from types import ModuleType + +from ._utils._compat import device as get_device +from ._utils._helpers import eager_shape +from ._utils._typing import Array, Device + + +def quantile( # numpydoc ignore=PR01,RT01 + a: Array, + q: Array, + /, + method: str = "linear", + axis: int | None = None, + keepdims: bool = False, + nan_policy: str = "propagate", + *, + weights: Array | None = None, + xp: ModuleType, +) -> Array: + """See docstring in `array_api_extra._delegation.py`.""" + device = get_device(a) + a_shape = list(a.shape) + + q_scalar = q.ndim == 0 + if q_scalar: + q = xp.reshape(q, (1,)) + + axis_none = axis is None + a_ndim = a.ndim + if axis is None: + a = xp.reshape(a, (-1,)) + axis = 0 + else: + axis = int(axis) + + (n,) = eager_shape(a, axis) + + if weights is None: + res = _quantile(a, q, n, axis, method, xp) + if not axis_none: + res = xp.moveaxis(res, axis, 0) + else: + weights_arr = xp.asarray(weights, dtype=xp.float64, device=device) + average = method == "averaged_inverted_cdf" + res = _weighted_quantile( + a, + q, + weights_arr, + n, + axis, + average, + nan_policy=nan_policy, + xp=xp, + device=device, + ) + + # reshaping to conform to doc/other libs' behavior + if axis_none: + if keepdims: + res = xp.reshape(res, q.shape + (1,) * a_ndim) + elif keepdims: + a_shape[axis] = 1 + res = xp.reshape(res, q.shape + tuple(a_shape)) + + return res[0, ...] if q_scalar else res + + +def _quantile( # numpydoc ignore=PR01,RT01 + a: Array, q: Array, n: int, axis: int, method: str, xp: ModuleType +) -> Array: + """Compute quantile by sorting `a`.""" + a = xp.sort(a, axis=axis, stable=False) + mask_nan = xp.any(xp.isnan(a), axis=axis, keepdims=True) + if xp.any(mask_nan): + # propagate NaNs: + mask = xp.repeat(mask_nan, n, axis=axis) + a = xp.where(mask, xp.nan, a) + del mask + + m = ( + 1 - q + if method == "linear" + # method is "inverted_cdf" or "averaged_inverted_cdf" + else xp.asarray(0, dtype=q.dtype) + ) + + jg = q * float(n) + m - 1 + j = jg // 1 + g = jg % 1 + if method == "inverted_cdf": + g = xp.astype((g > 0), jg.dtype) + elif method == "averaged_inverted_cdf": + g = (1 + xp.astype((g > 0), jg.dtype)) / 2 + + g = xp.where(j < 0, 0, g) # equivalent to g[j < 0] = 0, but works with readonly + new_g_shape = [1] * a.ndim + new_g_shape[axis] = g.shape[0] + g = xp.reshape(g, tuple(new_g_shape)) + + j = xp.clip(j, 0.0, float(n - 1)) + jp1 = xp.clip(j + 1, 0.0, float(n - 1)) + # `̀j` and `jp1` are 1d arrays + + return (1 - g) * xp.take(a, xp.astype(j, xp.int64), axis=axis) + g * xp.take( + a, xp.astype(jp1, xp.int64), axis=axis + ) + + +def _weighted_quantile( # numpydoc ignore=PR01,RT01 + a: Array, + q: Array, + weights: Array, + n: int, + axis: int, + average: bool, + nan_policy: str, + xp: ModuleType, + device: Device, +) -> Array: + """ + Compute weighted quantile using searchsorted on CDF. + + `a` is expected to be 1d or 2d. + """ + a = xp.moveaxis(a, axis, -1) + if weights.ndim > 1: + weights = xp.moveaxis(weights, axis, -1) + sorter = xp.argsort(a, axis=-1, stable=False) + + if a.ndim == 1: + return _weighted_quantile_sorted_1d( + a, weights, sorter, q, n, average, nan_policy, xp, device + ) + + (d,) = eager_shape(a, axis=0) + res = [] + for idx in range(d): + w = weights if weights.ndim == 1 else weights[idx, ...] + res.append( + _weighted_quantile_sorted_1d( + a[idx, ...], w, sorter[idx, ...], q, n, average, nan_policy, xp, device + ) + ) + + return xp.stack(res, axis=1) + + +def _weighted_quantile_sorted_1d( # numpydoc ignore=GL08 + x: Array, + w: Array, + sorter: Array, + q: Array, + n: int, + average: bool, + nan_policy: str, + xp: ModuleType, + device: Device, +) -> Array: + if nan_policy == "omit": + w = xp.where(xp.isnan(x), 0.0, w) + elif xp.any(xp.isnan(x)): + return xp.full(q.shape, xp.nan, dtype=x.dtype, device=device) + + cdf = xp.cumulative_sum(xp.take(w, sorter)) + t = cdf[-1] * q + + i = xp.searchsorted(cdf, t, side="left") + i = xp.clip(i, 0, n - 1) + i = xp.take(sorter, i) + + q0 = t == 0.0 + if average or xp.any(q0): + j = xp.searchsorted(cdf, t, side="right") + j = xp.clip(j, 0, n - 1) + j = xp.take(sorter, j) + # Ignore leading `weights=0` observations when `q=0` + i = xp.where(q0, j, i) + + if average: + # Ignore trailing `weights=0` observations when `q=1` + j = xp.where(q == 1.0, i, j) + return (xp.take(x, i) + xp.take(x, j)) / 2 + + return xp.take(x, i) diff --git a/tests/test_funcs.py b/tests/test_funcs.py index 92e794ed..09a5babe 100644 --- a/tests/test_funcs.py +++ b/tests/test_funcs.py @@ -1,7 +1,7 @@ import math import warnings from types import ModuleType -from typing import Any, cast +from typing import Any, Literal, cast, get_args import hypothesis import hypothesis.extra.numpy as npst @@ -29,6 +29,7 @@ one_hot, pad, partition, + quantile, setdiff1d, sinc, ) @@ -1529,3 +1530,282 @@ def test_kind(self, xp: ModuleType, library: Backend): expected = xp.asarray([False, True, False, True]) res = isin(a, b, kind="sort") xp_assert_equal(res, expected) + + +METHOD = Literal["linear", "inverted_cdf", "averaged_inverted_cdf"] + + +@pytest.mark.xfail_xp_backend(Backend.SPARSE, reason="no xp.take") +class TestQuantile: + def test_basic(self, xp: ModuleType): + x = xp.asarray([1, 2, 3, 4, 5]) + actual = quantile(x, 0.5) + expect = xp.asarray(3.0, dtype=xp.float64) + xp_assert_close(actual, expect) + + def test_xp(self, xp: ModuleType): + x = xp.asarray([1, 2, 3, 4, 5]) + actual = quantile(x, 0.5, xp=xp) + expect = xp.asarray(3.0, dtype=xp.float64) + xp_assert_close(actual, expect) + + def test_multiple_quantiles(self, xp: ModuleType): + x = xp.asarray([1, 2, 3, 4, 5]) + actual = quantile(x, xp.asarray([0.25, 0.5, 0.75])) + expect = xp.asarray([2.0, 3.0, 4.0], dtype=xp.float64) + xp_assert_close(actual, expect) + + def test_shape(self, xp: ModuleType): + rng = np.random.default_rng() + a = xp.asarray(rng.random((3, 4, 5))) + q = xp.asarray(rng.random(2)) + assert quantile(a, q, axis=0).shape == (2, 4, 5) + assert quantile(a, q, axis=1).shape == (2, 3, 5) + assert quantile(a, q, axis=2).shape == (2, 3, 4) + + assert quantile(a, q, axis=0, keepdims=True).shape == (2, 1, 4, 5) + assert quantile(a, q, axis=1, keepdims=True).shape == (2, 3, 1, 5) + assert quantile(a, q, axis=2, keepdims=True).shape == (2, 3, 4, 1) + + @pytest.mark.parametrize("with_nans", ["no_nans", "with_nans"]) + @pytest.mark.parametrize("method", get_args(METHOD)) + def test_against_numpy_1d(self, xp: ModuleType, with_nans: str, method: METHOD): + rng = np.random.default_rng() + a_np = rng.random(40) + if with_nans == "with_nans": + a_np[rng.random(a_np.shape) < rng.random() * 0.5] = np.nan + q_np = np.asarray([0, *rng.random(2), 1]) + a = xp.asarray(a_np) + q = xp.asarray(q_np) + + actual = quantile(a, q, method=method) + expected = np.quantile(a_np, q_np, method=method) + expected = xp.asarray(expected) + xp_assert_close(actual, expected) + + @pytest.mark.parametrize("with_nans", ["no_nans", "with_nans"]) + @pytest.mark.parametrize("method", get_args(METHOD)) + @pytest.mark.parametrize("keepdims", [True, False]) + def test_against_numpy_nd( + self, xp: ModuleType, keepdims: bool, with_nans: str, method: METHOD + ): + rng = np.random.default_rng() + a_np = rng.random((3, 4, 5)) + if with_nans == "with_nans": + a_np[rng.random(a_np.shape) < rng.random()] = np.nan + q_np = rng.random(2) + a = xp.asarray(a_np) + q = xp.asarray(q_np) + for axis in [None, *range(a.ndim)]: + actual = quantile(a, q, axis=axis, keepdims=keepdims, method=method) + expected = np.quantile( + a_np, q_np, axis=axis, keepdims=keepdims, method=method + ) + expected = xp.asarray(expected) + xp_assert_close(actual, expected) + + @pytest.mark.parametrize("nan_policy", ["no_nans", "propagate"]) + @pytest.mark.parametrize("with_weights", ["with_weights", "no_weights"]) + def test_against_median_min_max( + self, + xp: ModuleType, + nan_policy: str, + with_weights: str, + ): + rng = np.random.default_rng() + n = 40 + a_np = rng.random(n) + w_np = rng.integers(0, 2, n) if with_weights == "with_weights" else None + if nan_policy == "no_nans": + nan_policy = "propagate" + else: + # from 0% to 50% of NaNs: + a_np[rng.random(n) < rng.random(n) * 0.5] = np.nan + if w_np is not None: + # ensure at least one NaN on non-null weight: + (nz_weights_idx,) = np.where(w_np > 0) + a_np[nz_weights_idx[0]] = np.nan + + a_np_med = a_np if w_np is None else a_np[w_np > 0] + a = xp.asarray(a_np) + w = xp.asarray(w_np) if w_np is not None else None + + np_median = np.nanmedian if nan_policy == "omit" else np.median + expected = np_median(a_np_med) + method = "averaged_inverted_cdf" + actual = quantile(a, 0.5, method=method, nan_policy=nan_policy, weights=w) + xp_assert_close(actual, xp.asarray(expected)) + + for method in ["inverted_cdf", "averaged_inverted_cdf"]: + np_min = np.nanmin if nan_policy == "omit" else np.min + expected = np_min(a_np_med) + actual = quantile(a, 0.0, method=method, nan_policy=nan_policy, weights=w) + xp_assert_close(actual, xp.asarray(expected)) + + np_max = np.nanmax if nan_policy == "omit" else np.max + expected = np_max(a_np_med) + actual = quantile(a, 1.0, method=method, nan_policy=nan_policy, weights=w) + xp_assert_close(actual, xp.asarray(expected)) + + @pytest.mark.parametrize("keepdims", [True, False]) + @pytest.mark.parametrize("nan_policy", ["no_nans", "propagate", "omit"]) + @pytest.mark.parametrize("q_np", [0.5, 0.0, 1.0, np.linspace(0, 1, num=11)]) + def test_weighted_against_numpy( + self, xp: ModuleType, keepdims: bool, q_np: Array | float, nan_policy: str + ): + if NUMPY_VERSION < (2, 0): + pytest.xfail(reason="NumPy 1.x does not support weights in quantile") + rng = np.random.default_rng() + n, d = 10, 20 + a_2d = rng.random((n, d)) + mask_nan = np.zeros((n, d), dtype=bool) + if nan_policy == "no_nans": + nan_policy = "propagate" + else: + # from 0% to 100% of NaNs: + mask_nan = rng.random((n, d)) < rng.random((n, 1)) + # don't put nans in the first row: + mask_nan[:] = False + a_2d[mask_nan] = np.nan + + q = xp.asarray(q_np, copy=True) + m: METHOD = "inverted_cdf" + + np_quantile = np.quantile + if nan_policy == "omit": + np_quantile = np.nanquantile + + for a_np, w_np, axis in [ + (a_2d, rng.random(n), 0), + (a_2d, rng.random(d), 1), + (a_2d[0], rng.random(d), None), + (a_2d, rng.integers(0, 3, n), 0), + (a_2d, rng.integers(0, 2, d), 1), + (a_2d, rng.integers(0, 2, (n, d)), 0), + (a_2d, rng.integers(0, 3, (n, d)), 1), + ]: + with warnings.catch_warnings(record=True) as warning: + divide_msg = "invalid value encountered in divide" + warnings.filterwarnings("always", divide_msg, RuntimeWarning) + nan_slice_msg = "All-NaN slice encountered" + warnings.filterwarnings("ignore", nan_slice_msg, RuntimeWarning) + try: + expected = np_quantile( + a_np, + np.asarray(q_np), + axis=axis, + method=m, + weights=w_np, # type: ignore[arg-type] + keepdims=keepdims, + ) + except IndexError: + continue + if warning: + # this means some weights sum was 0 + # in this case we skip calling xpx.quantile + continue + expected = xp.asarray(expected) + + a = xp.asarray(a_np) + w = xp.asarray(w_np) + actual = quantile( + a, + q, + axis=axis, + method=m, + weights=w, + keepdims=keepdims, + nan_policy=nan_policy, + ) + xp_assert_close(actual, expected) + + def test_methods(self, xp: ModuleType): + x = xp.asarray([1, 2, 3, 4, 5]) + methods = ["linear", "inverted_cdf", "averaged_inverted_cdf"] + for method in methods: + actual = quantile(x, 0.5, method=method) + # All methods should give reasonable results + assert 2.5 <= float(actual) <= 3.5 + + def test_edge_cases(self, xp: ModuleType): + x = xp.asarray([1, 2, 3, 4, 5]) + # q = 0 should give minimum + actual = quantile(x, 0.0) + expect = xp.asarray(1.0, dtype=xp.float64) + xp_assert_close(actual, expect) + + # q = 1 should give maximum + actual = quantile(x, 1.0) + expect = xp.asarray(5.0, dtype=xp.float64) + xp_assert_close(actual, expect) + + def test_invalid_q(self, xp: ModuleType): + x = xp.asarray([1, 2, 3, 4, 5]) + # q > 1 should raise + with pytest.raises( + ValueError, match=r"`q` values must be in the range \[0, 1\]" + ): + _ = quantile(x, 1.5) + # q < 0 should raise + with pytest.raises( + ValueError, match=r"`q` values must be in the range \[0, 1\]" + ): + _ = quantile(x, -0.5) + + def test_invalid_shape(self, xp: ModuleType): + with pytest.raises(TypeError, match="at least 1-dimensional"): + _ = quantile(xp.asarray(3.0), 0.5) + with pytest.raises(ValueError, match="not compatible with the dimension"): + _ = quantile(xp.asarray([3.0]), 0.5, axis=1) + # with weights: + method = "inverted_cdf" + + shape = (2, 3, 4) + with pytest.raises(ValueError, match="dimension of `a` must be 1 or 2"): + _ = quantile( + xp.ones(shape), 0.5, axis=1, weights=xp.ones(shape), method=method + ) + + with pytest.raises(TypeError, match="Axis must be specified"): + _ = quantile(xp.ones((2, 3)), 0.5, weights=xp.ones(3), method=method) + + with pytest.raises(ValueError, match="Shape of weights must be consistent"): + _ = quantile( + xp.ones((2, 3)), 0.5, axis=0, weights=xp.ones(3), method=method + ) + + with pytest.raises(ValueError, match="Axis must be specified"): + _ = quantile(xp.ones((2, 3)), 0.5, weights=xp.ones((2, 3)), method=method) + + def test_invalid_dtype(self, xp: ModuleType): + with pytest.raises(ValueError, match="`a` must have real dtype"): + _ = quantile(xp.ones(5, dtype=xp.bool), 0.5) + + a = xp.ones(5) + with pytest.raises(ValueError, match="`q` must have real floating dtype"): + _ = quantile(a, xp.asarray([0, 1])) + + weights = xp.ones(5, dtype=xp.bool) + with pytest.raises(ValueError, match="`weights` must have real dtype"): + _ = quantile(a, 0.5, weights=weights, method="inverted_cdf") + + def test_invalid_method(self, xp: ModuleType): + with pytest.raises(ValueError, match="`method` must be one of"): + _ = quantile(xp.ones(5), 0.5, method="invalid") + + with pytest.raises(ValueError, match="not supported with weights"): + _ = quantile(xp.ones(5), 0.5, method="linear", weights=xp.ones(5)) + + def test_invalid_nan_policy(self, xp: ModuleType): + with pytest.raises(ValueError, match="`nan_policy` must be one of"): + _ = quantile(xp.ones(5), 0.5, nan_policy="invalid") + + with pytest.raises(ValueError, match="must be 'propagate'"): + _ = quantile(xp.ones(5), 0.5, nan_policy="omit") + + def test_device(self, xp: ModuleType, device: Device): + if hasattr(device, "type") and device.type == "meta": # pyright: ignore[reportAttributeAccessIssue] + pytest.xfail("No Tensor.item() on meta device") + x = xp.asarray([1, 2, 3, 4, 5], device=device) + actual = quantile(x, 0.5) + assert get_device(actual) == device