Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
112 changes: 97 additions & 15 deletions src/blosc2/lazyexpr.py
Original file line number Diff line number Diff line change
Expand Up @@ -225,6 +225,8 @@ class ReduceOp(Enum):
MIN = np.minimum
ANY = np.any
ALL = np.all
ARGMAX = np.argmax
ARGMIN = np.argmin


class LazyArrayEnum(Enum):
Expand Down Expand Up @@ -1704,6 +1706,8 @@ def infer_reduction_dtype(dtype, operation):
return dtype
elif operation in {ReduceOp.ANY, ReduceOp.ALL}:
return np.bool_
elif operation in {ReduceOp.ARGMAX, ReduceOp.ARGMIN}:
return np.int64
else:
raise ValueError(f"Unsupported operation: {operation}")

Expand Down Expand Up @@ -1758,6 +1762,7 @@ def reduce_slices( # noqa: C901
The resulting output array.
"""
out = kwargs.pop("_output", None)
res_out_ = None # temporary required to store max/min for argmax/argmin
ne_args: dict = kwargs.pop("_ne_args", {})
if ne_args is None:
ne_args = {}
Expand Down Expand Up @@ -1787,12 +1792,15 @@ def reduce_slices( # noqa: C901
# after slicing, we reduce to calculate shape of output
if axis is None:
axis = tuple(range(len(shape_slice)))
elif not isinstance(axis, tuple):
elif np.isscalar(axis):
axis = (axis,)
axis = np.array([a if a >= 0 else a + len(shape_slice) for a in axis])
axis = tuple(a if a >= 0 else a + len(shape_slice) for a in axis)
if np.any(mask_slice):
axis = tuple(axis + np.cumsum(mask_slice)[axis]) # axis now refers to new shape with dummy dims
reduce_args["axis"] = axis
add_idx = np.cumsum(mask_slice)
axis = tuple(a + add_idx[a] for a in axis) # axis now refers to new shape with dummy dims
if reduce_args["axis"] is not None:
# conserve as integer if was not tuple originally
reduce_args["axis"] = axis[0] if np.isscalar(reduce_args["axis"]) else axis
if keepdims:
reduced_shape = tuple(1 if i in axis else s for i, s in enumerate(shape_slice))
else:
Expand Down Expand Up @@ -1868,15 +1876,16 @@ def reduce_slices( # noqa: C901
cslice = step_handler(cslice, _slice)
chunks_ = tuple(s.stop - s.start for s in cslice)
unit_steps = np.all([s.step == 1 for s in cslice])
# Starts for slice
starts = [s.start if s.start is not None else 0 for s in cslice]
if _slice == () and fast_path and unit_steps:
# Fast path
full_chunk = chunks_ == chunks
fill_chunk_operands(
operands, cslice, chunks_, full_chunk, aligned, nchunk, iter_disk, chunk_operands, reduc=True
)
else:
# Get the starts and stops for the slice
starts = [s.start if s.start is not None else 0 for s in cslice]
# Get the stops for the slice
stops = [s.stop if s.stop is not None else sh for s, sh in zip(cslice, chunks_, strict=True)]
# Get the slice of each operand
for key, value in operands.items():
Expand Down Expand Up @@ -1952,36 +1961,81 @@ def reduce_slices( # noqa: C901
result = np.any(result, **reduce_args)
elif reduce_op == ReduceOp.ALL:
result = np.all(result, **reduce_args)
elif reduce_op == ReduceOp.ARGMAX or reduce_op == ReduceOp.ARGMIN:
# offset for start of slice
slice_ref = (
starts
if _slice == ()
else [
(s - sl.start - np.sign(sl.step)) // sl.step + 1
for s, sl in zip(starts, _slice, strict=True)
]
)
result_idx = (
np.argmin(result, **reduce_args)
if reduce_op == ReduceOp.ARGMIN
else np.argmax(result, **reduce_args)
)
if reduce_args["axis"] is None: # indexing into flattened array
result = result[np.unravel_index(result_idx, shape=result.shape)]
idx_within_cslice = np.unravel_index(result_idx, shape=chunks_)
result_idx = np.ravel_multi_index(
tuple(o + i for o, i in zip(slice_ref, idx_within_cslice, strict=True)), shape_slice
)
else: # axis is an integer
result = np.take_along_axis(
result,
np.expand_dims(result_idx, axis=reduce_args["axis"]) if not keepdims else result_idx,
axis=reduce_args["axis"],
)
result = result if keepdims else result.squeeze(axis=reduce_args["axis"])
result_idx += slice_ref[reduce_args["axis"]]
else:
result = reduce_op.value.reduce(result, **reduce_args)

if not out_init:
if out is None:
out = convert_none_out(result.dtype, reduce_op, reduced_shape)
out_, res_out_ = convert_none_out(result.dtype, reduce_op, reduced_shape)
if out is not None:
out[:] = out_
del out_
else:
out2 = convert_none_out(result.dtype, reduce_op, reduced_shape)
out[:] = out2
del out2
out = out_
out_init = True

# Update the output array with the result
if reduce_op == ReduceOp.ANY:
out[reduced_slice] += result
elif reduce_op == ReduceOp.ALL:
out[reduced_slice] *= result
elif res_out_ is not None: # i.e. ReduceOp.ARGMAX or ReduceOp.ARGMIN
# need lowest index for which optimum attained
cond = (res_out_[reduced_slice] == result) & (result_idx < out[reduced_slice])
if reduce_op == ReduceOp.ARGMAX:
cond |= res_out_[reduced_slice] < result
else: # ARGMIN
cond |= res_out_[reduced_slice] > result
if reduced_slice == ():
out = np.where(cond, result_idx, out[reduced_slice])
res_out_ = np.where(cond, result, res_out_[reduced_slice])
else:
out[reduced_slice] = np.where(cond, result_idx, out[reduced_slice])
res_out_[reduced_slice] = np.where(cond, result, res_out_[reduced_slice])
else:
if reduced_slice == ():
out = reduce_op.value(out, result)
else:
out[reduced_slice] = reduce_op.value(out[reduced_slice], result)

# No longer need res_out_
del res_out_

if out is None:
if reduce_op in (ReduceOp.MIN, ReduceOp.MAX):
raise ValueError("zero-size array in min/max reduction operation is not supported")
if reduce_op in (ReduceOp.MIN, ReduceOp.MAX, ReduceOp.ARGMIN, ReduceOp.ARGMAX):
raise ValueError("zero-size array in (arg-)min/max reduction operation is not supported")
if dtype is None:
# We have no hint here, so choose a default dtype
dtype = np.float64
out = convert_none_out(dtype, reduce_op, reduced_shape)
out, _ = convert_none_out(dtype, reduce_op, reduced_shape)

final_mask = tuple(np.where(mask_slice)[0])
if np.any(mask_slice): # remove dummy dims
Expand Down Expand Up @@ -2013,7 +2067,19 @@ def convert_none_out(dtype, reduce_op, reduced_shape):
out = np.zeros(reduced_shape, dtype=np.bool_)
elif reduce_op == ReduceOp.ALL:
out = np.ones(reduced_shape, dtype=np.bool_)
return out
elif reduce_op == ReduceOp.ARGMIN:
if np.issubdtype(dtype, np.integer):
res_out_ = np.iinfo(dtype).max * np.ones(reduced_shape, dtype=dtype)
else:
res_out_ = np.inf * np.ones(reduced_shape, dtype=dtype)
out = (np.zeros(reduced_shape, dtype=blosc2.DEFAULT_INDEX), res_out_)
elif reduce_op == ReduceOp.ARGMAX:
if np.issubdtype(dtype, np.integer):
res_out_ = np.iinfo(dtype).min * np.ones(reduced_shape, dtype=dtype)
else:
res_out_ = -np.inf * np.ones(reduced_shape, dtype=dtype)
out = (np.zeros(reduced_shape, dtype=blosc2.DEFAULT_INDEX), res_out_)
return out if isinstance(out, tuple) else (out, None)


def chunked_eval( # noqa: C901
Expand Down Expand Up @@ -2707,6 +2773,22 @@ def all(self, axis=None, keepdims=False, **kwargs):
}
return self.compute(_reduce_args=reduce_args, **kwargs)

def argmax(self, axis=None, keepdims=False, **kwargs):
reduce_args = {
"op": ReduceOp.ARGMAX,
"axis": axis,
"keepdims": keepdims,
}
return self.compute(_reduce_args=reduce_args, **kwargs)

def argmin(self, axis=None, keepdims=False, **kwargs):
reduce_args = {
"op": ReduceOp.ARGMIN,
"axis": axis,
"keepdims": keepdims,
}
return self.compute(_reduce_args=reduce_args, **kwargs)

def _eval_constructor(self, expression, constructor, operands):
"""Evaluate a constructor function inside a string expression."""

Expand Down
64 changes: 64 additions & 0 deletions src/blosc2/ndarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -868,6 +868,60 @@ def any(
return ndarr.any(axis=axis, keepdims=keepdims, **kwargs)


def argmin(
ndarr: blosc2.Array, axis: int | None = None, keepdims: bool = False, **kwargs
) -> blosc2.Array | int:
"""
Returns the indices of the minimum values along a specified axis.

When the minimum value occurs multiple times, only the indices corresponding to the first occurrence are returned.

Parameters
----------
x: blosc2.Array
Input array. Should have a real-valued data type.

axis: int | None
Axis along which to search. If None, return index of the minimum value of flattened array. Default: None.

keepdims: bool
If True, reduced axis included in the result as singleton dimension. Otherwise, axis not included in the result. Default: False.

Returns
-------
out: blosc2.Array
If axis is None, a zero-dimensional array containing the index of the first occurrence of the minimum value; otherwise, a non-zero-dimensional array containing the indices of the minimum values.
"""
return ndarr.argmin(axis=axis, keepdims=keepdims, **kwargs)


def argmax(
ndarr: blosc2.Array, axis: int | None = None, keepdims: bool = False, **kwargs
) -> blosc2.Array | int:
"""
Returns the indices of the maximum values along a specified axis.

When the maximum value occurs multiple times, only the indices corresponding to the first occurrence are returned.

Parameters
----------
x: blosc2.Array
Input array. Should have a real-valued data type.

axis: int | None
Axis along which to search. If None, return index of the maximum value of flattened array. Default: None.

keepdims: bool
If True, reduced axis included in the result as singleton dimension. Otherwise, axis not included in the result. Default: False.

Returns
-------
out: blosc2.Array
If axis is None, a zero-dimensional array containing the index of the first occurrence of the maximum value; otherwise, a non-zero-dimensional array containing the indices of the maximum values.
"""
return ndarr.argmax(axis=axis, keepdims=keepdims, **kwargs)


def all(
ndarr: blosc2.Array,
axis: int | tuple[int] | None = None,
Expand Down Expand Up @@ -3355,6 +3409,16 @@ def max(self, axis=None, keepdims=False, **kwargs):
expr = blosc2.LazyExpr(new_op=(self, None, None))
return expr.max(axis=axis, keepdims=keepdims, **kwargs)

@is_documented_by(argmax)
def argmax(self, axis=None, keepdims=False, **kwargs):
expr = blosc2.LazyExpr(new_op=(self, None, None))
return expr.argmax(axis=axis, keepdims=keepdims, **kwargs)

@is_documented_by(argmin)
def argmin(self, axis=None, keepdims=False, **kwargs):
expr = blosc2.LazyExpr(new_op=(self, None, None))
return expr.argmin(axis=axis, keepdims=keepdims, **kwargs)

@is_documented_by(any)
def any(self, axis=None, keepdims=False, **kwargs):
expr = blosc2.LazyExpr(new_op=(self, None, None))
Expand Down
15 changes: 14 additions & 1 deletion src/blosc2/shape_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,20 @@
]

linalg_attrs = ["T", "mT"]
reducers = ["sum", "prod", "min", "max", "std", "mean", "var", "any", "all", "count_nonzero"]
reducers = [
"sum",
"prod",
"min",
"max",
"std",
"mean",
"var",
"any",
"all",
"count_nonzero",
"argmax",
"argmin",
]

# All the available constructors and reducers necessary for the (string) expression evaluator
constructors = [
Expand Down
Loading
Loading