From 89c3576d2b1a0ddff8ef4a4e451f82dc69374356 Mon Sep 17 00:00:00 2001 From: Anton Volkov Date: Wed, 22 Oct 2025 14:30:00 +0200 Subject: [PATCH 01/19] Extend _result_typeid() utility ufunc function to template the mapping table --- .../elementwise_functions_type_utils.cpp | 3 ++- .../elementwise_functions_type_utils.hpp | 8 +++++--- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/dpnp/backend/extensions/elementwise_functions/elementwise_functions_type_utils.cpp b/dpnp/backend/extensions/elementwise_functions/elementwise_functions_type_utils.cpp index d90a63cf4c2..ed21c618069 100644 --- a/dpnp/backend/extensions/elementwise_functions/elementwise_functions_type_utils.cpp +++ b/dpnp/backend/extensions/elementwise_functions/elementwise_functions_type_utils.cpp @@ -78,7 +78,8 @@ py::dtype _dtype_from_typenum(td_ns::typenum_t dst_typenum_t) } } -int _result_typeid(int arg_typeid, const int *fn_output_id) +template +output_idT _result_typeid(int arg_typeid, const output_idT *fn_output_id) { if (arg_typeid < 0 || arg_typeid >= td_ns::num_types) { throw py::value_error("Input typeid " + std::to_string(arg_typeid) + diff --git a/dpnp/backend/extensions/elementwise_functions/elementwise_functions_type_utils.hpp b/dpnp/backend/extensions/elementwise_functions/elementwise_functions_type_utils.hpp index 465f3f135f0..05903d5ba9e 100644 --- a/dpnp/backend/extensions/elementwise_functions/elementwise_functions_type_utils.hpp +++ b/dpnp/backend/extensions/elementwise_functions/elementwise_functions_type_utils.hpp @@ -36,15 +36,17 @@ // dpctl tensor headers #include "utils/type_dispatch.hpp" +namespace dpnp::extensions::py_internal::type_utils +{ namespace py = pybind11; namespace td_ns = dpctl::tensor::type_dispatch; -namespace dpnp::extensions::py_internal::type_utils -{ /*! @brief Produce dtype from a type number */ extern py::dtype _dtype_from_typenum(td_ns::typenum_t); /*! @brief Lookup typeid of the result from typeid of * argument and the mapping table */ -extern int _result_typeid(int, const int *); +template +extern output_idT _result_typeid(int arg_typeid, + const output_idT *fn_output_id); } // namespace dpnp::extensions::py_internal::type_utils From 92baa1ea427340eaa92a71ec11ecf5f8c77f0986 Mon Sep 17 00:00:00 2001 From: Anton Volkov Date: Wed, 22 Oct 2025 14:32:03 +0200 Subject: [PATCH 02/19] Remove unnecessary import in ldexp.cpp --- .../backend/extensions/ufunc/elementwise_functions/ldexp.cpp | 5 ----- 1 file changed, 5 deletions(-) diff --git a/dpnp/backend/extensions/ufunc/elementwise_functions/ldexp.cpp b/dpnp/backend/extensions/ufunc/elementwise_functions/ldexp.cpp index 5cd3586f542..c8e9598cb1b 100644 --- a/dpnp/backend/extensions/ufunc/elementwise_functions/ldexp.cpp +++ b/dpnp/backend/extensions/ufunc/elementwise_functions/ldexp.cpp @@ -45,7 +45,6 @@ // dpctl tensor headers #include "kernels/elementwise_functions/common.hpp" -#include "kernels/elementwise_functions/maximum.hpp" #include "utils/type_dispatch.hpp" namespace dpnp::extensions::ufunc @@ -57,11 +56,7 @@ namespace td_ns = dpctl::tensor::type_dispatch; namespace impl { namespace ew_cmn_ns = dpctl::tensor::kernels::elementwise_common; -namespace max_ns = dpctl::tensor::kernels::maximum; -// Supports the same types table as for maximum function in dpctl -// template -// using OutputType = max_ns::MaximumOutputType; template struct OutputType { From c086482636d8eba1142845635bb4a854a87e9ab1 Mon Sep 17 00:00:00 2001 From: Anton Volkov Date: Wed, 22 Oct 2025 14:33:00 +0200 Subject: [PATCH 03/19] Declare namespace in correct scope in fmod.cpp --- dpnp/backend/extensions/ufunc/elementwise_functions/fmod.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dpnp/backend/extensions/ufunc/elementwise_functions/fmod.cpp b/dpnp/backend/extensions/ufunc/elementwise_functions/fmod.cpp index 7160f0795ea..5b83595b3f7 100644 --- a/dpnp/backend/extensions/ufunc/elementwise_functions/fmod.cpp +++ b/dpnp/backend/extensions/ufunc/elementwise_functions/fmod.cpp @@ -51,11 +51,11 @@ namespace dpnp::extensions::ufunc { namespace py = pybind11; namespace py_int = dpnp::extensions::py_internal; +namespace td_ns = dpctl::tensor::type_dispatch; namespace impl { namespace ew_cmn_ns = dpctl::tensor::kernels::elementwise_common; -namespace td_ns = dpctl::tensor::type_dispatch; /** * @brief A factory to define pairs of supported types for which From fea147712a4ef7bccb882c60aa4006f96064feff Mon Sep 17 00:00:00 2001 From: Anton Volkov Date: Wed, 22 Oct 2025 14:51:12 +0200 Subject: [PATCH 04/19] Add unary implementation for ufunc with two output arrays --- .../elementwise_functions.hpp | 294 ++++++++++++++++-- .../simplify_iteration_space.cpp | 31 +- 2 files changed, 290 insertions(+), 35 deletions(-) diff --git a/dpnp/backend/extensions/elementwise_functions/elementwise_functions.hpp b/dpnp/backend/extensions/elementwise_functions/elementwise_functions.hpp index 9dfe8cae976..ac592475459 100644 --- a/dpnp/backend/extensions/elementwise_functions/elementwise_functions.hpp +++ b/dpnp/backend/extensions/elementwise_functions/elementwise_functions.hpp @@ -28,8 +28,11 @@ #pragma once +#include #include #include +#include +#include #include @@ -43,20 +46,18 @@ // dpctl tensor headers #include "kernels/alignment.hpp" -// #include "kernels/dpctl_tensor_types.hpp" #include "utils/memory_overlap.hpp" #include "utils/offset_utils.hpp" #include "utils/output_validation.hpp" #include "utils/sycl_alloc_utils.hpp" #include "utils/type_dispatch.hpp" -namespace py = pybind11; -namespace td_ns = dpctl::tensor::type_dispatch; - static_assert(std::is_same_v); namespace dpnp::extensions::py_internal { +namespace py = pybind11; +namespace td_ns = dpctl::tensor::type_dispatch; using dpctl::tensor::kernels::alignment_utils::is_aligned; using dpctl::tensor::kernels::alignment_utils::required_alignment; @@ -108,10 +109,10 @@ std::pair const py::ssize_t *src_shape = src.get_shape_raw(); const py::ssize_t *dst_shape = dst.get_shape_raw(); bool shapes_equal(true); - size_t src_nelems(1); + std::size_t src_nelems(1); for (int i = 0; i < src_nd; ++i) { - src_nelems *= static_cast(src_shape[i]); + src_nelems *= static_cast(src_shape[i]); shapes_equal = shapes_equal && (src_shape[i] == dst_shape[i]); } if (!shapes_equal) { @@ -277,6 +278,262 @@ py::object py_unary_ufunc_result_type(const py::dtype &input_dtype, } } +/** + * @brief Template implementing Python API for a unary elementwise function + * with two output arrays. + */ +template +std::pair + py_unary_two_outputs_ufunc(const dpctl::tensor::usm_ndarray &src, + const dpctl::tensor::usm_ndarray &dst1, + const dpctl::tensor::usm_ndarray &dst2, + sycl::queue &q, + const std::vector &depends, + // + const output_typesT &output_type_vec, + const contig_dispatchT &contig_dispatch_vector, + const strided_dispatchT &strided_dispatch_vector) +{ + int src_typenum = src.get_typenum(); + int dst1_typenum = dst1.get_typenum(); + int dst2_typenum = dst2.get_typenum(); + + const auto &array_types = td_ns::usm_ndarray_types(); + int src_typeid = array_types.typenum_to_lookup_id(src_typenum); + int dst1_typeid = array_types.typenum_to_lookup_id(dst1_typenum); + int dst2_typeid = array_types.typenum_to_lookup_id(dst2_typenum); + + std::pair func_output_typeids = output_type_vec[src_typeid]; + + // check that types are supported + if (dst1_typeid != func_output_typeids.first || + dst2_typeid != func_output_typeids.second) + { + throw py::value_error( + "One of destination arrays has unexpected elemental data type."); + } + + // check that queues are compatible + if (!dpctl::utils::queues_are_compatible(q, {src, dst1, dst2})) { + throw py::value_error( + "Execution queue is not compatible with allocation queues"); + } + + dpctl::tensor::validation::CheckWritable::throw_if_not_writable(dst1); + dpctl::tensor::validation::CheckWritable::throw_if_not_writable(dst2); + + // check that dimensions are the same + int src_nd = src.get_ndim(); + if (src_nd != dst1.get_ndim() || src_nd != dst2.get_ndim()) { + throw py::value_error("Array dimensions are not the same."); + } + + // check that shapes are the same + const py::ssize_t *src_shape = src.get_shape_raw(); + const py::ssize_t *dst1_shape = dst1.get_shape_raw(); + const py::ssize_t *dst2_shape = dst2.get_shape_raw(); + bool shapes_equal(true); + std::size_t src_nelems(1); + + for (int i = 0; i < src_nd; ++i) { + src_nelems *= static_cast(src_shape[i]); + shapes_equal = shapes_equal && (src_shape[i] == dst1_shape[i]) && + (src_shape[i] == dst2_shape[i]); + } + if (!shapes_equal) { + throw py::value_error("Array shapes are not the same."); + } + + // if nelems is zero, return + if (src_nelems == 0) { + return std::make_pair(sycl::event(), sycl::event()); + } + + dpctl::tensor::validation::AmpleMemory::throw_if_not_ample(dst1, + src_nelems); + dpctl::tensor::validation::AmpleMemory::throw_if_not_ample(dst2, + src_nelems); + + // check memory overlap + auto const &overlap = dpctl::tensor::overlap::MemoryOverlap(); + auto const &same_logical_tensors = + dpctl::tensor::overlap::SameLogicalTensors(); + if ((overlap(src, dst1) && !same_logical_tensors(src, dst1)) || + (overlap(src, dst2) && !same_logical_tensors(src, dst2)) || + (overlap(dst1, dst2) && !same_logical_tensors(dst1, dst2))) + { + throw py::value_error("Arrays index overlapping segments of memory"); + } + + const char *src_data = src.get_data(); + char *dst1_data = dst1.get_data(); + char *dst2_data = dst2.get_data(); + + // handle contiguous inputs + bool is_src_c_contig = src.is_c_contiguous(); + bool is_src_f_contig = src.is_f_contiguous(); + + bool is_dst1_c_contig = dst1.is_c_contiguous(); + bool is_dst1_f_contig = dst1.is_f_contiguous(); + + bool is_dst2_c_contig = dst2.is_c_contiguous(); + bool is_dst2_f_contig = dst2.is_f_contiguous(); + + bool all_c_contig = + (is_src_c_contig && is_dst1_c_contig && is_dst2_c_contig); + bool all_f_contig = + (is_src_f_contig && is_dst1_f_contig && is_dst2_f_contig); + + if (all_c_contig || all_f_contig) { + auto contig_fn = contig_dispatch_vector[src_typeid]; + + if (contig_fn == nullptr) { + throw std::runtime_error( + "Contiguous implementation is missing for src_typeid=" + + std::to_string(src_typeid)); + } + + auto comp_ev = + contig_fn(q, src_nelems, src_data, dst1_data, dst2_data, depends); + sycl::event ht_ev = + dpctl::utils::keep_args_alive(q, {src, dst1, dst2}, {comp_ev}); + + return std::make_pair(ht_ev, comp_ev); + } + + // simplify iteration space + // if 1d with strides 1 - input is contig + // dispatch to strided + + auto const &src_strides = src.get_strides_vector(); + auto const &dst1_strides = dst1.get_strides_vector(); + auto const &dst2_strides = dst2.get_strides_vector(); + + using shT = std::vector; + shT simplified_shape; + shT simplified_src_strides; + shT simplified_dst1_strides; + shT simplified_dst2_strides; + py::ssize_t src_offset(0); + py::ssize_t dst1_offset(0); + py::ssize_t dst2_offset(0); + + int nd = src_nd; + const py::ssize_t *shape = src_shape; + + simplify_iteration_space_3( + nd, shape, src_strides, dst1_strides, dst2_strides, + // output + simplified_shape, simplified_src_strides, simplified_dst1_strides, + simplified_dst2_strides, src_offset, dst1_offset, dst2_offset); + + if (nd == 1 && simplified_src_strides[0] == 1 && + simplified_dst1_strides[0] == 1 && simplified_dst2_strides[0] == 1) + { + // Special case of contiguous data + auto contig_fn = contig_dispatch_vector[src_typeid]; + + if (contig_fn == nullptr) { + throw std::runtime_error( + "Contiguous implementation is missing for src_typeid=" + + std::to_string(src_typeid)); + } + + int src_elem_size = src.get_elemsize(); + int dst1_elem_size = dst1.get_elemsize(); + int dst2_elem_size = dst2.get_elemsize(); + auto comp_ev = + contig_fn(q, src_nelems, src_data + src_elem_size * src_offset, + dst1_data + dst1_elem_size * dst1_offset, + dst2_data + dst2_elem_size * dst2_offset, depends); + + sycl::event ht_ev = + dpctl::utils::keep_args_alive(q, {src, dst1, dst2}, {comp_ev}); + + return std::make_pair(ht_ev, comp_ev); + } + + // Strided implementation + auto strided_fn = strided_dispatch_vector[src_typeid]; + + if (strided_fn == nullptr) { + throw std::runtime_error( + "Strided implementation is missing for src_typeid=" + + std::to_string(src_typeid)); + } + + using dpctl::tensor::offset_utils::device_allocate_and_pack; + + std::vector host_tasks{}; + host_tasks.reserve(2); + + auto ptr_size_event_triple_ = device_allocate_and_pack( + q, host_tasks, simplified_shape, simplified_src_strides, + simplified_dst1_strides, simplified_dst2_strides); + auto shape_strides_owner = std::move(std::get<0>(ptr_size_event_triple_)); + const auto ©_shape_ev = std::get<2>(ptr_size_event_triple_); + const py::ssize_t *shape_strides = shape_strides_owner.get(); + + sycl::event strided_fn_ev = strided_fn( + q, src_nelems, nd, shape_strides, src_data, src_offset, dst1_data, + dst1_offset, dst2_data, dst2_offset, depends, {copy_shape_ev}); + + // async free of shape_strides temporary + sycl::event tmp_cleanup_ev = dpctl::tensor::alloc_utils::async_smart_free( + q, {strided_fn_ev}, shape_strides_owner); + + host_tasks.push_back(tmp_cleanup_ev); + + return std::make_pair( + dpctl::utils::keep_args_alive(q, {src, dst1, dst2}, host_tasks), + strided_fn_ev); +} + +/** + * @brief Template implementing Python API for querying of type support by + * a unary elementwise function with two output arrays. + */ +template +std::pair + py_unary_two_outputs_ufunc_result_type(const py::dtype &input_dtype, + const output_typesT &output_types) +{ + int tn = input_dtype.num(); // NumPy type numbers are the same as in dpctl + int src_typeid = -1; + + auto array_types = td_ns::usm_ndarray_types(); + + try { + src_typeid = array_types.typenum_to_lookup_id(tn); + } catch (const std::exception &e) { + throw py::value_error(e.what()); + } + + using type_utils::_result_typeid; + std::pair dst_typeids = _result_typeid(src_typeid, output_types); + int dst1_typeid = dst_typeids.first; + int dst2_typeid = dst_typeids.second; + + if (dst1_typeid < 0 || dst2_typeid < 0) { + auto res = py::none(); + auto py_res = py::cast(res); + return std::make_pair(py_res, py_res); + } + else { + using type_utils::_dtype_from_typenum; + + auto dst1_typenum_t = static_cast(dst1_typeid); + auto dst2_typenum_t = static_cast(dst2_typeid); + auto dt1 = _dtype_from_typenum(dst1_typenum_t); + auto dt2 = _dtype_from_typenum(dst2_typenum_t); + + return std::make_pair(py::cast(dt1), + py::cast(dt2)); + } +} + // ======================== Binary functions =========================== namespace @@ -347,10 +604,10 @@ std::pair py_binary_ufunc( const py::ssize_t *src2_shape = src2.get_shape_raw(); const py::ssize_t *dst_shape = dst.get_shape_raw(); bool shapes_equal(true); - size_t src_nelems(1); + std::size_t src_nelems(1); for (int i = 0; i < dst_nd; ++i) { - src_nelems *= static_cast(src1_shape[i]); + src_nelems *= static_cast(src1_shape[i]); shapes_equal = shapes_equal && (src1_shape[i] == dst_shape[i] && src2_shape[i] == dst_shape[i]); } @@ -456,7 +713,7 @@ std::pair py_binary_ufunc( std::initializer_list{0, 1}; static constexpr auto one_zero_strides = std::initializer_list{1, 0}; - constexpr py::ssize_t one{1}; + static constexpr py::ssize_t one{1}; // special case of C-contiguous matrix and a row if (isEqual(simplified_src2_strides, zero_one_strides) && isEqual(simplified_src1_strides, {simplified_shape[1], one}) && @@ -477,8 +734,8 @@ std::pair py_binary_ufunc( is_aligned( dst_data + dst_offset * dst_itemsize)) { - size_t n0 = simplified_shape[0]; - size_t n1 = simplified_shape[1]; + std::size_t n0 = simplified_shape[0]; + std::size_t n1 = simplified_shape[1]; sycl::event comp_ev = matrix_row_broadcast_fn( exec_q, host_tasks, n0, n1, src1_data, src1_offset, src2_data, src2_offset, dst_data, dst_offset, @@ -511,8 +768,8 @@ std::pair py_binary_ufunc( is_aligned( dst_data + dst_offset * dst_itemsize)) { - size_t n0 = simplified_shape[1]; - size_t n1 = simplified_shape[0]; + std::size_t n0 = simplified_shape[1]; + std::size_t n1 = simplified_shape[0]; sycl::event comp_ev = row_matrix_broadcast_fn( exec_q, host_tasks, n0, n1, src1_data, src1_offset, src2_data, src2_offset, dst_data, dst_offset, @@ -655,10 +912,10 @@ std::pair const py::ssize_t *rhs_shape = rhs.get_shape_raw(); const py::ssize_t *lhs_shape = lhs.get_shape_raw(); bool shapes_equal(true); - size_t rhs_nelems(1); + std::size_t rhs_nelems(1); for (int i = 0; i < lhs_nd; ++i) { - rhs_nelems *= static_cast(rhs_shape[i]); + rhs_nelems *= static_cast(rhs_shape[i]); shapes_equal = shapes_equal && (rhs_shape[i] == lhs_shape[i]); } if (!shapes_equal) { @@ -749,7 +1006,7 @@ std::pair if (nd == 2) { static constexpr auto one_zero_strides = std::initializer_list{1, 0}; - constexpr py::ssize_t one{1}; + static constexpr py::ssize_t one{1}; // special case of C-contiguous matrix and a row if (isEqual(simplified_rhs_strides, one_zero_strides) && isEqual(simplified_lhs_strides, {one, simplified_shape[0]})) @@ -758,8 +1015,8 @@ std::pair contig_row_matrix_broadcast_dispatch_table[rhs_typeid] [lhs_typeid]; if (row_matrix_broadcast_fn != nullptr) { - size_t n0 = simplified_shape[1]; - size_t n1 = simplified_shape[0]; + std::size_t n0 = simplified_shape[1]; + std::size_t n1 = simplified_shape[0]; sycl::event comp_ev = row_matrix_broadcast_fn( exec_q, host_tasks, n0, n1, rhs_data, rhs_offset, lhs_data, lhs_offset, depends); @@ -805,5 +1062,4 @@ std::pair dpctl::utils::keep_args_alive(exec_q, {rhs, lhs}, host_tasks), strided_fn_ev); } - } // namespace dpnp::extensions::py_internal diff --git a/dpnp/backend/extensions/elementwise_functions/simplify_iteration_space.cpp b/dpnp/backend/extensions/elementwise_functions/simplify_iteration_space.cpp index 48fd2c4c425..7555f91d280 100644 --- a/dpnp/backend/extensions/elementwise_functions/simplify_iteration_space.cpp +++ b/dpnp/backend/extensions/elementwise_functions/simplify_iteration_space.cpp @@ -26,8 +26,7 @@ // THE POSSIBILITY OF SUCH DAMAGE. //***************************************************************************** -#include "dpctl4pybind11.hpp" - +#include #include #include @@ -58,19 +57,19 @@ void simplify_iteration_space(int &nd, simplified_shape.reserve(nd); simplified_shape.insert(std::begin(simplified_shape), shape, shape + nd); - assert(simplified_shape.size() == static_cast(nd)); + assert(simplified_shape.size() == static_cast(nd)); simplified_src_strides.reserve(nd); simplified_src_strides.insert(std::end(simplified_src_strides), std::begin(src_strides), std::end(src_strides)); - assert(simplified_src_strides.size() == static_cast(nd)); + assert(simplified_src_strides.size() == static_cast(nd)); simplified_dst_strides.reserve(nd); simplified_dst_strides.insert(std::end(simplified_dst_strides), std::begin(dst_strides), std::end(dst_strides)); - assert(simplified_dst_strides.size() == static_cast(nd)); + assert(simplified_dst_strides.size() == static_cast(nd)); int contracted_nd = st_ns::simplify_iteration_two_strides( nd, simplified_shape.data(), simplified_src_strides.data(), @@ -90,7 +89,7 @@ void simplify_iteration_space(int &nd, // Populate vectors simplified_shape.reserve(nd); simplified_shape.push_back(shape[0]); - assert(simplified_shape.size() == static_cast(nd)); + assert(simplified_shape.size() == static_cast(nd)); simplified_src_strides.reserve(nd); simplified_dst_strides.reserve(nd); @@ -108,8 +107,8 @@ void simplify_iteration_space(int &nd, simplified_dst_strides.push_back(dst_strides[0]); } - assert(simplified_src_strides.size() == static_cast(nd)); - assert(simplified_dst_strides.size() == static_cast(nd)); + assert(simplified_src_strides.size() == static_cast(nd)); + assert(simplified_dst_strides.size() == static_cast(nd)); } } @@ -136,25 +135,25 @@ void simplify_iteration_space_3( // and improve access pattern simplified_shape.reserve(nd); simplified_shape.insert(std::end(simplified_shape), shape, shape + nd); - assert(simplified_shape.size() == static_cast(nd)); + assert(simplified_shape.size() == static_cast(nd)); simplified_src1_strides.reserve(nd); simplified_src1_strides.insert(std::end(simplified_src1_strides), std::begin(src1_strides), std::end(src1_strides)); - assert(simplified_src1_strides.size() == static_cast(nd)); + assert(simplified_src1_strides.size() == static_cast(nd)); simplified_src2_strides.reserve(nd); simplified_src2_strides.insert(std::end(simplified_src2_strides), std::begin(src2_strides), std::end(src2_strides)); - assert(simplified_src2_strides.size() == static_cast(nd)); + assert(simplified_src2_strides.size() == static_cast(nd)); simplified_dst_strides.reserve(nd); simplified_dst_strides.insert(std::end(simplified_dst_strides), std::begin(dst_strides), std::end(dst_strides)); - assert(simplified_dst_strides.size() == static_cast(nd)); + assert(simplified_dst_strides.size() == static_cast(nd)); int contracted_nd = st_ns::simplify_iteration_three_strides( nd, simplified_shape.data(), simplified_src1_strides.data(), @@ -177,7 +176,7 @@ void simplify_iteration_space_3( // Populate vectors simplified_shape.reserve(nd); simplified_shape.push_back(shape[0]); - assert(simplified_shape.size() == static_cast(nd)); + assert(simplified_shape.size() == static_cast(nd)); simplified_src1_strides.reserve(nd); simplified_src2_strides.reserve(nd); @@ -200,9 +199,9 @@ void simplify_iteration_space_3( simplified_dst_strides.push_back(dst_strides[0]); } - assert(simplified_src1_strides.size() == static_cast(nd)); - assert(simplified_src2_strides.size() == static_cast(nd)); - assert(simplified_dst_strides.size() == static_cast(nd)); + assert(simplified_src1_strides.size() == static_cast(nd)); + assert(simplified_src2_strides.size() == static_cast(nd)); + assert(simplified_dst_strides.size() == static_cast(nd)); } } } // namespace dpnp::extensions::py_internal From 8be7efc684faa44b0a225a59aa79233a7a3a59d0 Mon Sep 17 00:00:00 2001 From: Anton Volkov Date: Wed, 22 Oct 2025 15:14:07 +0200 Subject: [PATCH 05/19] Move definition of _result_typeid to the header to avoid undefined reference issue --- .../elementwise_functions.hpp | 5 ++--- .../elementwise_functions_type_utils.cpp | 15 ++------------- .../elementwise_functions_type_utils.hpp | 11 +++++++++-- 3 files changed, 13 insertions(+), 18 deletions(-) diff --git a/dpnp/backend/extensions/elementwise_functions/elementwise_functions.hpp b/dpnp/backend/extensions/elementwise_functions/elementwise_functions.hpp index ac592475459..8132f0dad82 100644 --- a/dpnp/backend/extensions/elementwise_functions/elementwise_functions.hpp +++ b/dpnp/backend/extensions/elementwise_functions/elementwise_functions.hpp @@ -62,6 +62,8 @@ namespace td_ns = dpctl::tensor::type_dispatch; using dpctl::tensor::kernels::alignment_utils::is_aligned; using dpctl::tensor::kernels::alignment_utils::required_alignment; +using type_utils::_result_typeid; + /*! @brief Template implementing Python API for unary elementwise functions */ template (res); @@ -511,7 +511,6 @@ std::pair throw py::value_error(e.what()); } - using type_utils::_result_typeid; std::pair dst_typeids = _result_typeid(src_typeid, output_types); int dst1_typeid = dst_typeids.first; int dst2_typeid = dst_typeids.second; diff --git a/dpnp/backend/extensions/elementwise_functions/elementwise_functions_type_utils.cpp b/dpnp/backend/extensions/elementwise_functions/elementwise_functions_type_utils.cpp index ed21c618069..62f7584a3e0 100644 --- a/dpnp/backend/extensions/elementwise_functions/elementwise_functions_type_utils.cpp +++ b/dpnp/backend/extensions/elementwise_functions/elementwise_functions_type_utils.cpp @@ -37,11 +37,11 @@ // dpctl tensor headers #include "utils/type_dispatch.hpp" +namespace dpnp::extensions::py_internal::type_utils +{ namespace py = pybind11; namespace td_ns = dpctl::tensor::type_dispatch; -namespace dpnp::extensions::py_internal::type_utils -{ py::dtype _dtype_from_typenum(td_ns::typenum_t dst_typenum_t) { switch (dst_typenum_t) { @@ -77,15 +77,4 @@ py::dtype _dtype_from_typenum(td_ns::typenum_t dst_typenum_t) throw py::value_error("Unrecognized dst_typeid"); } } - -template -output_idT _result_typeid(int arg_typeid, const output_idT *fn_output_id) -{ - if (arg_typeid < 0 || arg_typeid >= td_ns::num_types) { - throw py::value_error("Input typeid " + std::to_string(arg_typeid) + - " is outside of expected bounds."); - } - - return fn_output_id[arg_typeid]; -} } // namespace dpnp::extensions::py_internal::type_utils diff --git a/dpnp/backend/extensions/elementwise_functions/elementwise_functions_type_utils.hpp b/dpnp/backend/extensions/elementwise_functions/elementwise_functions_type_utils.hpp index 05903d5ba9e..1bb6fedd702 100644 --- a/dpnp/backend/extensions/elementwise_functions/elementwise_functions_type_utils.hpp +++ b/dpnp/backend/extensions/elementwise_functions/elementwise_functions_type_utils.hpp @@ -47,6 +47,13 @@ extern py::dtype _dtype_from_typenum(td_ns::typenum_t); /*! @brief Lookup typeid of the result from typeid of * argument and the mapping table */ template -extern output_idT _result_typeid(int arg_typeid, - const output_idT *fn_output_id); +output_idT _result_typeid(int arg_typeid, const output_idT *fn_output_id) +{ + if (arg_typeid < 0 || arg_typeid >= td_ns::num_types) { + throw py::value_error("Input typeid " + std::to_string(arg_typeid) + + " is outside of expected bounds."); + } + + return fn_output_id[arg_typeid]; +} } // namespace dpnp::extensions::py_internal::type_utils From a46c76f79266eff32a6c1e7fb4122a2938a04777 Mon Sep 17 00:00:00 2001 From: Anton Volkov Date: Wed, 22 Oct 2025 15:39:56 +0200 Subject: [PATCH 06/19] Extend type_dispatch namespace with TypeMapTwoResultsEntry and DefaultTwoResultsEntry --- .../type_dispatch_building.hpp | 61 +++++++++++++++++++ 1 file changed, 61 insertions(+) create mode 100644 dpnp/backend/extensions/elementwise_functions/type_dispatch_building.hpp diff --git a/dpnp/backend/extensions/elementwise_functions/type_dispatch_building.hpp b/dpnp/backend/extensions/elementwise_functions/type_dispatch_building.hpp new file mode 100644 index 00000000000..4813daf2bf6 --- /dev/null +++ b/dpnp/backend/extensions/elementwise_functions/type_dispatch_building.hpp @@ -0,0 +1,61 @@ +//***************************************************************************** +// Copyright (c) 2025, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// maxification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// - Neither the name of the copyright holder nor the names of its contributors +// may be used to endorse or promote products derived from this software +// without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** + +#pragma once + +#include + +#include "utils/type_dispatch.hpp" + +namespace dpnp::extensions::py_internal::type_dispatch +{ +/** + * Extends dpctl::tensor::type_dispatch::TypeMapResultEntry helper structure + * with support of the two result types. + */ +template +struct TypeMapTwoResultsEntry : std::bool_constant> +{ + using result_type1 = ResTy1; + using result_type2 = ResTy2; +}; + +/** + * Extends dpctl::tensor::type_dispatch::DefaultResultEntry helper structure + * with support of the two result types. + */ +template +struct DefaultTwoResultsEntry : std::true_type +{ + using result_type1 = Ty; + using result_type2 = Ty; +}; +} // namespace dpnp::extensions::py_internal::type_dispatch From bf63ef4dc9a4f5db91301e413cc12b0ddd6382bc Mon Sep 17 00:00:00 2001 From: Anton Volkov Date: Wed, 22 Oct 2025 16:17:09 +0200 Subject: [PATCH 07/19] Extend dpctl::tensor::kernels::elementwise_common namespace with support of two output arrays --- .../elementwise_functions/common.hpp | 480 ++++++++++++++++++ 1 file changed, 480 insertions(+) create mode 100644 dpnp/backend/extensions/elementwise_functions/common.hpp diff --git a/dpnp/backend/extensions/elementwise_functions/common.hpp b/dpnp/backend/extensions/elementwise_functions/common.hpp new file mode 100644 index 00000000000..406575cd00c --- /dev/null +++ b/dpnp/backend/extensions/elementwise_functions/common.hpp @@ -0,0 +1,480 @@ +//***************************************************************************** +// Copyright (c) 2025, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// maxification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// - Neither the name of the copyright holder nor the names of its contributors +// may be used to endorse or promote products derived from this software +// without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** + +#pragma once + +#include +#include +#include +#include + +#include + +// dpctl tensor headers +#include "kernels/alignment.hpp" +#include "kernels/elementwise_functions/common.hpp" +#include "utils/sycl_utils.hpp" + +namespace dpnp::extensions::py_internal::elementwise_common +{ +using dpctl::tensor::kernels::alignment_utils:: + disabled_sg_loadstore_wrapper_krn; +using dpctl::tensor::kernels::alignment_utils::is_aligned; +using dpctl::tensor::kernels::alignment_utils::required_alignment; + +using dpctl::tensor::kernels::elementwise_common::select_lws; + +using dpctl::tensor::sycl_utils::sub_group_load; +using dpctl::tensor::sycl_utils::sub_group_store; + +/** + * @brief Functor for evaluation of a unary function with two output arrays on + * contiguous arrays. + * + * @note It extends UnaryContigFunctor from + * dpctl::tensor::kernels::elementwise_common namespace. + */ +template +struct UnaryTwoOutputsContigFunctor +{ +private: + const argT *in = nullptr; + resT1 *out1 = nullptr; + resT2 *out2 = nullptr; + std::size_t nelems_; + +public: + UnaryTwoOutputsContigFunctor(const argT *inp, + resT1 *res1, + resT2 *res2, + const std::size_t n_elems) + : in(inp), out1(res1), out2(res2), nelems_(n_elems) + { + } + + void operator()(sycl::nd_item<1> ndit) const + { + static constexpr std::uint8_t elems_per_wi = n_vecs * vec_sz; + UnaryTwoOutputsOpT op{}; + /* Each work-item processes vec_sz elements, contiguous in memory */ + /* NOTE: work-group size must be divisible by sub-group size */ + + if constexpr (enable_sg_loadstore && + UnaryTwoOutputsOpT::is_constant::value) { + // value of operator is known to be a known constant + constexpr resT1 const_val1 = UnaryTwoOutputsOpT::constant_value1; + constexpr resT2 const_val2 = UnaryTwoOutputsOpT::constant_value2; + + auto sg = ndit.get_sub_group(); + const std::uint16_t sgSize = sg.get_max_local_range()[0]; + + const std::size_t base = + elems_per_wi * (ndit.get_group(0) * ndit.get_local_range(0) + + sg.get_group_id()[0] * sgSize); + if (base + elems_per_wi * sgSize < nelems_) { + static constexpr sycl::vec res1_vec(const_val1); + static constexpr sycl::vec res2_vec(const_val2); +#pragma unroll + for (std::uint8_t it = 0; it < elems_per_wi; it += vec_sz) { + const std::size_t offset = base + it * sgSize; + auto out1_multi_ptr = sycl::address_space_cast< + sycl::access::address_space::global_space, + sycl::access::decorated::yes>(&out1[offset]); + auto out2_multi_ptr = sycl::address_space_cast< + sycl::access::address_space::global_space, + sycl::access::decorated::yes>(&out2[offset]); + + sub_group_store(sg, res1_vec, out1_multi_ptr); + sub_group_store(sg, res2_vec, out2_multi_ptr); + } + } + else { + const std::size_t lane_id = sg.get_local_id()[0]; + for (std::size_t k = base + lane_id; k < nelems_; k += sgSize) { + out1[k] = const_val1; + out2[k] = const_val2; + } + } + } + else if constexpr (enable_sg_loadstore && + UnaryTwoOutputsOpT::supports_sg_loadstore::value && + UnaryTwoOutputsOpT::supports_vec::value && + (vec_sz > 1)) + { + auto sg = ndit.get_sub_group(); + const std::uint16_t sgSize = sg.get_max_local_range()[0]; + + const std::size_t base = + elems_per_wi * (ndit.get_group(0) * ndit.get_local_range(0) + + sg.get_group_id()[0] * sgSize); + if (base + elems_per_wi * sgSize < nelems_) { +#pragma unroll + for (std::uint8_t it = 0; it < elems_per_wi; it += vec_sz) { + const std::size_t offset = base + it * sgSize; + auto in_multi_ptr = sycl::address_space_cast< + sycl::access::address_space::global_space, + sycl::access::decorated::yes>(&in[offset]); + auto out1_multi_ptr = sycl::address_space_cast< + sycl::access::address_space::global_space, + sycl::access::decorated::yes>(&out1[offset]); + auto out2_multi_ptr = sycl::address_space_cast< + sycl::access::address_space::global_space, + sycl::access::decorated::yes>(&out2[offset]); + + const sycl::vec x = + sub_group_load(sg, in_multi_ptr); + sycl::vec res2_vec = {}; + const sycl::vec res1_vec = op(x, res2_vec); + sub_group_store(sg, res1_vec, out1_multi_ptr); + sub_group_store(sg, res2_vec, out2_multi_ptr); + } + } + else { + const std::size_t lane_id = sg.get_local_id()[0]; + for (std::size_t k = base + lane_id; k < nelems_; k += sgSize) { + // scalar call + out1[k] = op(in[k], out2[k]); + } + } + } + else if constexpr (enable_sg_loadstore && + UnaryTwoOutputsOpT::supports_sg_loadstore::value && + std::is_same_v) + { + // default: use scalar-value function + + auto sg = ndit.get_sub_group(); + const std::uint16_t sgSize = sg.get_max_local_range()[0]; + const std::size_t base = + elems_per_wi * (ndit.get_group(0) * ndit.get_local_range(0) + + sg.get_group_id()[0] * sgSize); + + if (base + elems_per_wi * sgSize < nelems_) { +#pragma unroll + for (std::uint8_t it = 0; it < elems_per_wi; it += vec_sz) { + const std::size_t offset = base + it * sgSize; + auto in_multi_ptr = sycl::address_space_cast< + sycl::access::address_space::global_space, + sycl::access::decorated::yes>(&in[offset]); + auto out1_multi_ptr = sycl::address_space_cast< + sycl::access::address_space::global_space, + sycl::access::decorated::yes>(&out1[offset]); + auto out2_multi_ptr = sycl::address_space_cast< + sycl::access::address_space::global_space, + sycl::access::decorated::yes>(&out2[offset]); + + sycl::vec arg_vec = + sub_group_load(sg, in_multi_ptr); + sycl::vec res2_vec = {}; +#pragma unroll + for (std::uint32_t k = 0; k < vec_sz; ++k) { + arg_vec[k] = op(arg_vec[k], res2_vec[k]); + } + sub_group_store(sg, arg_vec, out1_multi_ptr); + sub_group_store(sg, res2_vec, out2_multi_ptr); + } + } + else { + const std::size_t lane_id = sg.get_local_id()[0]; + for (std::size_t k = base + lane_id; k < nelems_; k += sgSize) { + out1[k] = op(in[k], out2[k]); + } + } + } + else if constexpr (enable_sg_loadstore && + UnaryTwoOutputsOpT::supports_sg_loadstore::value) + { + // default: use scalar-value function + + auto sg = ndit.get_sub_group(); + const std::uint16_t sgSize = sg.get_max_local_range()[0]; + const std::size_t base = + elems_per_wi * (ndit.get_group(0) * ndit.get_local_range(0) + + sg.get_group_id()[0] * sgSize); + + if (base + elems_per_wi * sgSize < nelems_) { +#pragma unroll + for (std::uint8_t it = 0; it < elems_per_wi; it += vec_sz) { + const std::size_t offset = base + it * sgSize; + auto in_multi_ptr = sycl::address_space_cast< + sycl::access::address_space::global_space, + sycl::access::decorated::yes>(&in[offset]); + auto out1_multi_ptr = sycl::address_space_cast< + sycl::access::address_space::global_space, + sycl::access::decorated::yes>(&out1[offset]); + auto out2_multi_ptr = sycl::address_space_cast< + sycl::access::address_space::global_space, + sycl::access::decorated::yes>(&out2[offset]); + + const sycl::vec arg_vec = + sub_group_load(sg, in_multi_ptr); + sycl::vec res1_vec = {}; + sycl::vec res2_vec = {}; +#pragma unroll + for (std::uint8_t k = 0; k < vec_sz; ++k) { + res1_vec[k] = op(arg_vec[k], res2_vec[k]); + } + sub_group_store(sg, res1_vec, out1_multi_ptr); + sub_group_store(sg, res2_vec, out2_multi_ptr); + } + } + else { + const std::size_t lane_id = sg.get_local_id()[0]; + for (std::size_t k = base + lane_id; k < nelems_; k += sgSize) { + out1[k] = op(in[k], out2[k]); + } + } + } + else { + const std::uint16_t sgSize = + ndit.get_sub_group().get_local_range()[0]; + const std::size_t gid = ndit.get_global_linear_id(); + const std::uint16_t elems_per_sg = sgSize * elems_per_wi; + + const std::size_t start = + (gid / sgSize) * (elems_per_sg - sgSize) + gid; + const std::size_t end = std::min(nelems_, start + elems_per_sg); + for (std::size_t offset = start; offset < end; offset += sgSize) { + out1[offset] = op(in[offset], out2[offset]); + } + } + } +}; + +/** + * @brief Functor for evaluation of a unary function with two output arrays on + * strided data. + * + * @note It extends UnaryStridedFunctor from + * dpctl::tensor::kernels::elementwise_common namespace. + */ +template +struct UnaryTwoOutputsStridedFunctor +{ +private: + const argT *inp_ = nullptr; + resT1 *res1_ = nullptr; + resT2 *res2_ = nullptr; + IndexerT inp_out_indexer_; + +public: + UnaryTwoOutputsStridedFunctor(const argT *inp_p, + resT1 *res1_p, + resT2 *res2_p, + const IndexerT &inp_out_indexer) + : inp_(inp_p), res1_(res1_p), res2_(res2_p), + inp_out_indexer_(inp_out_indexer) + { + } + + void operator()(sycl::id<1> wid) const + { + const auto &offsets_ = inp_out_indexer_(wid.get(0)); + const ssize_t &inp_offset = offsets_.get_first_offset(); + const ssize_t &res1_offset = offsets_.get_second_offset(); + const ssize_t &res2_offset = offsets_.get_third_offset(); + + UnaryTwoOutputsOpT op{}; + + res1_[res1_offset] = op(inp_[inp_offset], res2_[res2_offset]); + } +}; + +/** + * @brief Function to submit a kernel for unary functor with two output arrays + * on contiguous arrays. + * + * @note It extends unary_contig_impl from + * dpctl::tensor::kernels::elementwise_common namespace. + */ +template + class UnaryTwoOutputsType, + template + class UnaryTwoOutputsContigFunctorT, + template + class kernel_name, + std::uint8_t vec_sz = 4u, + std::uint8_t n_vecs = 2u> +sycl::event + unary_two_outputs_contig_impl(sycl::queue &exec_q, + std::size_t nelems, + const char *arg_p, + char *res1_p, + char *res2_p, + const std::vector &depends = {}) +{ + static constexpr std::uint8_t elems_per_wi = n_vecs * vec_sz; + const std::size_t n_work_items_needed = nelems / elems_per_wi; + const std::size_t lws = + select_lws(exec_q.get_device(), n_work_items_needed); + + const std::size_t n_groups = + ((nelems + lws * elems_per_wi - 1) / (lws * elems_per_wi)); + const auto gws_range = sycl::range<1>(n_groups * lws); + const auto lws_range = sycl::range<1>(lws); + + using resTy1 = typename UnaryTwoOutputsType::value_type1; + using resTy2 = typename UnaryTwoOutputsType::value_type2; + using BaseKernelName = kernel_name; + + const argTy *arg_tp = reinterpret_cast(arg_p); + resTy1 *res1_tp = reinterpret_cast(res1_p); + resTy2 *res2_tp = reinterpret_cast(res2_p); + + sycl::event comp_ev = exec_q.submit([&](sycl::handler &cgh) { + cgh.depends_on(depends); + + if (is_aligned(arg_p) && + is_aligned(res1_p) && + is_aligned(res2_p)) + { + static constexpr bool enable_sg_loadstore = true; + using KernelName = BaseKernelName; + using Impl = + UnaryTwoOutputsContigFunctorT; + + cgh.parallel_for( + sycl::nd_range<1>(gws_range, lws_range), + Impl(arg_tp, res1_tp, res2_tp, nelems)); + } + else { + static constexpr bool disable_sg_loadstore = false; + using KernelName = + disabled_sg_loadstore_wrapper_krn; + using Impl = + UnaryTwoOutputsContigFunctorT; + + cgh.parallel_for( + sycl::nd_range<1>(gws_range, lws_range), + Impl(arg_tp, res1_tp, res2_tp, nelems)); + } + }); + + return comp_ev; +} + +/** + * @brief Function to submit a kernel for unary functor with two output arrays + * on strided data. + * + * @note It extends unary_strided_impl from + * dpctl::tensor::kernels::elementwise_common namespace. + */ +template + class UnaryTwoOutputsType, + template + class UnaryTwoOutputsStridedFunctorT, + template + class kernel_name> +sycl::event unary_two_outputs_strided_impl( + sycl::queue &exec_q, + std::size_t nelems, + int nd, + const ssize_t *shape_and_strides, + const char *arg_p, + ssize_t arg_offset, + char *res1_p, + ssize_t res1_offset, + char *res2_p, + ssize_t res2_offset, + const std::vector &depends, + const std::vector &additional_depends) +{ + sycl::event comp_ev = exec_q.submit([&](sycl::handler &cgh) { + cgh.depends_on(depends); + cgh.depends_on(additional_depends); + + using res1Ty = typename UnaryTwoOutputsType::value_type1; + using res2Ty = typename UnaryTwoOutputsType::value_type2; + using IndexerT = + typename dpctl::tensor::offset_utils::ThreeOffsets_StridedIndexer; + + const IndexerT indexer{nd, arg_offset, res1_offset, res2_offset, + shape_and_strides}; + + const argTy *arg_tp = reinterpret_cast(arg_p); + res1Ty *res1_tp = reinterpret_cast(res1_p); + res2Ty *res2_tp = reinterpret_cast(res2_p); + + using Impl = + UnaryTwoOutputsStridedFunctorT; + + cgh.parallel_for>( + {nelems}, Impl(arg_tp, res1_tp, res2_tp, indexer)); + }); + return comp_ev; +} + +// Typedefs for function pointers + +typedef sycl::event (*unary_two_outputs_contig_impl_fn_ptr_t)( + sycl::queue &, + std::size_t, + const char *, + char *, + char *, + const std::vector &); + +typedef sycl::event (*unary_two_outputs_strided_impl_fn_ptr_t)( + sycl::queue &, + std::size_t, + int, + const ssize_t *, + const char *, + ssize_t, + char *, + ssize_t, + char *, + ssize_t, + const std::vector &, + const std::vector &); + +} // namespace dpnp::extensions::py_internal::elementwise_common From 2b4f5273eca42d514addb796640a226c34ce22bb Mon Sep 17 00:00:00 2001 From: Anton Volkov Date: Wed, 22 Oct 2025 16:32:51 +0200 Subject: [PATCH 08/19] Add frexp implementation to ufunc extension --- dpnp/backend/extensions/ufunc/CMakeLists.txt | 1 + .../ufunc/elementwise_functions/common.cpp | 2 + .../ufunc/elementwise_functions/frexp.cpp | 261 ++++++++++++++++++ .../ufunc/elementwise_functions/frexp.hpp | 38 +++ .../kernels/elementwise_functions/frexp.hpp | 53 ++++ 5 files changed, 355 insertions(+) create mode 100644 dpnp/backend/extensions/ufunc/elementwise_functions/frexp.cpp create mode 100644 dpnp/backend/extensions/ufunc/elementwise_functions/frexp.hpp create mode 100644 dpnp/backend/kernels/elementwise_functions/frexp.hpp diff --git a/dpnp/backend/extensions/ufunc/CMakeLists.txt b/dpnp/backend/extensions/ufunc/CMakeLists.txt index 403eb144d77..62e8249b508 100644 --- a/dpnp/backend/extensions/ufunc/CMakeLists.txt +++ b/dpnp/backend/extensions/ufunc/CMakeLists.txt @@ -38,6 +38,7 @@ set(_elementwise_sources ${CMAKE_CURRENT_SOURCE_DIR}/elementwise_functions/fmax.cpp ${CMAKE_CURRENT_SOURCE_DIR}/elementwise_functions/fmin.cpp ${CMAKE_CURRENT_SOURCE_DIR}/elementwise_functions/fmod.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/elementwise_functions/frexp.cpp ${CMAKE_CURRENT_SOURCE_DIR}/elementwise_functions/gcd.cpp ${CMAKE_CURRENT_SOURCE_DIR}/elementwise_functions/heaviside.cpp ${CMAKE_CURRENT_SOURCE_DIR}/elementwise_functions/i0.cpp diff --git a/dpnp/backend/extensions/ufunc/elementwise_functions/common.cpp b/dpnp/backend/extensions/ufunc/elementwise_functions/common.cpp index 23283d94bed..f59f6c709a7 100644 --- a/dpnp/backend/extensions/ufunc/elementwise_functions/common.cpp +++ b/dpnp/backend/extensions/ufunc/elementwise_functions/common.cpp @@ -37,6 +37,7 @@ #include "fmax.hpp" #include "fmin.hpp" #include "fmod.hpp" +#include "frexp.hpp" #include "gcd.hpp" #include "heaviside.hpp" #include "i0.hpp" @@ -68,6 +69,7 @@ void init_elementwise_functions(py::module_ m) init_fmax(m); init_fmin(m); init_fmod(m); + init_frexp(m); init_gcd(m); init_heaviside(m); init_i0(m); diff --git a/dpnp/backend/extensions/ufunc/elementwise_functions/frexp.cpp b/dpnp/backend/extensions/ufunc/elementwise_functions/frexp.cpp new file mode 100644 index 00000000000..5e5a8929012 --- /dev/null +++ b/dpnp/backend/extensions/ufunc/elementwise_functions/frexp.cpp @@ -0,0 +1,261 @@ +//***************************************************************************** +// Copyright (c) 2025, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// maxification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// - Neither the name of the copyright holder nor the names of its contributors +// may be used to endorse or promote products derived from this software +// without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** + +#include +#include +#include +#include + +#include + +#include "dpctl4pybind11.hpp" + +#include "frexp.hpp" +#include "kernels/elementwise_functions/frexp.hpp" +#include "populate.hpp" + +// include a local copy of elementwise common header from dpctl tensor: +// dpctl/tensor/libtensor/source/elementwise_functions/elementwise_functions.hpp +// TODO: replace by including dpctl header once available +#include "../../elementwise_functions/elementwise_functions.hpp" + +#include "../../elementwise_functions/common.hpp" +#include "../../elementwise_functions/type_dispatch_building.hpp" + +// utils extension header +#include "ext/common.hpp" + +// dpctl tensor headers +#include "kernels/elementwise_functions/common.hpp" +#include "utils/type_dispatch.hpp" + +namespace dpnp::extensions::ufunc +{ +namespace py = pybind11; +namespace py_int = dpnp::extensions::py_internal; + +namespace impl +{ +namespace ew_cmn_ns = dpnp::extensions::py_internal::elementwise_common; +namespace td_int_ns = py_int::type_dispatch; +namespace td_ns = dpctl::tensor::type_dispatch; + +using dpnp::kernels::frexp::FrexpFunctor; +using ext::common::init_dispatch_vector; + +template +struct FrexpOutputType +{ + using table_type = std::disjunction< // disjunction is C++17 + // feature, supported by DPC++ + td_int_ns:: + TypeMapTwoResultsEntry, + td_int_ns::TypeMapTwoResultsEntry, + td_int_ns::TypeMapTwoResultsEntry, + td_int_ns::DefaultTwoResultsEntry>; + using value_type1 = typename table_type::result_type1; + using value_type2 = typename table_type::result_type2; +}; + +// contiguous implementation + +template +using FrexpContigFunctor = + ew_cmn_ns::UnaryTwoOutputsContigFunctor, + vec_sz, + n_vecs, + enable_sg_loadstore>; + +// strided implementation + +template +using FrexpStridedFunctor = ew_cmn_ns::UnaryTwoOutputsStridedFunctor< + argTy, + resTy1, + resTy2, + IndexerT, + FrexpFunctor>; + +template +class frexp_contig_kernel; + +template +sycl::event frexp_contig_impl(sycl::queue &exec_q, + size_t nelems, + const char *arg_p, + char *res1_p, + char *res2_p, + const std::vector &depends = {}) +{ + return ew_cmn_ns::unary_two_outputs_contig_impl< + argTy, FrexpOutputType, FrexpContigFunctor, frexp_contig_kernel>( + exec_q, nelems, arg_p, res1_p, res2_p, depends); +} + +template +struct FrexpContigFactory +{ + fnT get() + { + if constexpr (std::is_same_v::value_type1, + void> || + std::is_same_v::value_type2, + void>) + { + fnT fn = nullptr; + return fn; + } + else { + fnT fn = frexp_contig_impl; + return fn; + } + } +}; + +template +class frexp_strided_kernel; + +template +sycl::event + frexp_strided_impl(sycl::queue &exec_q, + size_t nelems, + int nd, + const ssize_t *shape_and_strides, + const char *arg_p, + ssize_t arg_offset, + char *res1_p, + ssize_t res1_offset, + char *res2_p, + ssize_t res2_offset, + const std::vector &depends, + const std::vector &additional_depends) +{ + return ew_cmn_ns::unary_two_outputs_strided_impl< + argTy, FrexpOutputType, FrexpStridedFunctor, frexp_strided_kernel>( + exec_q, nelems, nd, shape_and_strides, arg_p, arg_offset, res1_p, + res1_offset, res2_p, res2_offset, depends, additional_depends); +} + +template +struct FrexpStridedFactory +{ + fnT get() + { + if constexpr (std::is_same_v::value_type1, + void> || + std::is_same_v::value_type2, + void>) + { + fnT fn = nullptr; + return fn; + } + else { + fnT fn = frexp_strided_impl; + return fn; + } + } +}; + +template +struct FrexpTypeMapFactory +{ + /*! @brief get typeid for output type of sycl::frexp(T x) */ + std::enable_if_t>::value, + std::pair> + get() + { + using rT1 = typename FrexpOutputType::value_type1; + using rT2 = typename FrexpOutputType::value_type2; + return std::make_pair(td_ns::GetTypeid{}.get(), + td_ns::GetTypeid{}.get()); + } +}; + +using ew_cmn_ns::unary_two_outputs_contig_impl_fn_ptr_t; +using ew_cmn_ns::unary_two_outputs_strided_impl_fn_ptr_t; + +static unary_two_outputs_contig_impl_fn_ptr_t + frexp_contig_dispatch_vector[td_ns::num_types]; +static std::pair frexp_output_typeid_vector[td_ns::num_types]; +static unary_two_outputs_strided_impl_fn_ptr_t + frexp_strided_dispatch_vector[td_ns::num_types]; + +void populate_frexp_dispatch_vectors(void) +{ + init_dispatch_vector(frexp_contig_dispatch_vector); + init_dispatch_vector(frexp_strided_dispatch_vector); + init_dispatch_vector, FrexpTypeMapFactory>( + frexp_output_typeid_vector); +}; + +// MACRO_POPULATE_DISPATCH_TABLES(ldexp); +} // namespace impl + +void init_frexp(py::module_ m) +{ + using arrayT = dpctl::tensor::usm_ndarray; + using event_vecT = std::vector; + { + impl::populate_frexp_dispatch_vectors(); + using impl::frexp_contig_dispatch_vector; + using impl::frexp_output_typeid_vector; + using impl::frexp_strided_dispatch_vector; + + auto frexp_pyapi = [&](const arrayT &src, const arrayT &dst1, + const arrayT &dst2, sycl::queue &exec_q, + const event_vecT &depends = {}) { + return py_int::py_unary_two_outputs_ufunc( + src, dst1, dst2, exec_q, depends, frexp_output_typeid_vector, + frexp_contig_dispatch_vector, frexp_strided_dispatch_vector); + }; + m.def("_frexp", frexp_pyapi, "", py::arg("src"), py::arg("dst1"), + py::arg("dst2"), py::arg("sycl_queue"), + py::arg("depends") = py::list()); + + auto frexp_result_type_pyapi = [&](const py::dtype &dtype) { + return py_int::py_unary_two_outputs_ufunc_result_type( + dtype, frexp_output_typeid_vector); + }; + m.def("_frexp_result_type", frexp_result_type_pyapi); + } +} +} // namespace dpnp::extensions::ufunc diff --git a/dpnp/backend/extensions/ufunc/elementwise_functions/frexp.hpp b/dpnp/backend/extensions/ufunc/elementwise_functions/frexp.hpp new file mode 100644 index 00000000000..e87f63ce1cb --- /dev/null +++ b/dpnp/backend/extensions/ufunc/elementwise_functions/frexp.hpp @@ -0,0 +1,38 @@ +//***************************************************************************** +// Copyright (c) 2025, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// - Neither the name of the copyright holder nor the names of its contributors +// may be used to endorse or promote products derived from this software +// without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** + +#pragma once + +#include + +namespace py = pybind11; + +namespace dpnp::extensions::ufunc +{ +void init_frexp(py::module_ m); +} // namespace dpnp::extensions::ufunc diff --git a/dpnp/backend/kernels/elementwise_functions/frexp.hpp b/dpnp/backend/kernels/elementwise_functions/frexp.hpp new file mode 100644 index 00000000000..8ab800fef15 --- /dev/null +++ b/dpnp/backend/kernels/elementwise_functions/frexp.hpp @@ -0,0 +1,53 @@ +//***************************************************************************** +// Copyright (c) 2025, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// - Neither the name of the copyright holder nor the names of its contributors +// may be used to endorse or promote products derived from this software +// without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** + +#pragma once + +#include + +namespace dpnp::kernels::frexp +{ +template +struct FrexpFunctor +{ + // is function constant for given argT + using is_constant = typename std::false_type; + // constant value, if constant + // constexpr resT1 constant_value1 = resT1{}; + // constexpr resT2 constant_value2 = resT2{}; + // is function defined for sycl::vec + using supports_vec = typename std::false_type; + // do both argT and mantT, expT support subgroup store/load operation + using supports_sg_loadstore = typename std::true_type; + + mantT operator()(const argT &in, expT &exp) const + { + return sycl::frexp(in, &exp); + } +}; +} // namespace dpnp::kernels::frexp From 6df1b8035d0a0ee9f696033182650007d41b4b02 Mon Sep 17 00:00:00 2001 From: Anton Volkov Date: Thu, 23 Oct 2025 17:09:43 +0200 Subject: [PATCH 09/19] Add DPNPUnaryTwoOutputsFunc class for unary element-wise functions with two output arrays --- dpnp/dpnp_algo/dpnp_elementwise_common.py | 229 ++++++++++++++++++++++ dpnp/dpnp_utils/dpnp_utils_common.py | 29 ++- 2 files changed, 255 insertions(+), 3 deletions(-) diff --git a/dpnp/dpnp_algo/dpnp_elementwise_common.py b/dpnp/dpnp_algo/dpnp_elementwise_common.py index c7393a79657..a6e29622c89 100644 --- a/dpnp/dpnp_algo/dpnp_elementwise_common.py +++ b/dpnp/dpnp_algo/dpnp_elementwise_common.py @@ -28,8 +28,10 @@ # ***************************************************************************** import dpctl.tensor as dpt +import dpctl.tensor._copy_utils as dtc import dpctl.tensor._tensor_impl as dti import dpctl.tensor._type_utils as dtu +import dpctl.utils as dpu import numpy from dpctl.tensor._elementwise_common import ( BinaryElementwiseFunc, @@ -39,6 +41,9 @@ import dpnp import dpnp.backend.extensions.vm._vm_impl as vmi from dpnp.dpnp_array import dpnp_array +from dpnp.dpnp_utils.dpnp_utils_common import ( + find_buf_dtype_3out, +) __all__ = [ "DPNPI0", @@ -50,6 +55,7 @@ "DPNPRound", "DPNPSinc", "DPNPUnaryFunc", + "DPNPUnaryTwoOutputsFunc", "acceptance_fn_gcd_lcm", "acceptance_fn_negative", "acceptance_fn_positive", @@ -102,6 +108,7 @@ class DPNPUnaryFunc(UnaryElementwiseFunc): The function is invoked when the argument of the unary function requires casting, e.g. the argument of `dpctl.tensor.log` is an array with integral data type. + """ def __init__( @@ -197,6 +204,227 @@ def __call__( return dpnp_array._create_from_usm_ndarray(res_usm) +class DPNPUnaryTwoOutputsFunc(UnaryElementwiseFunc): + """ + Class that implements unary element-wise functions with two output arrays. + + Parameters + ---------- + name : {str} + Name of the unary function + result_type_resovler_fn : {callable} + Function that takes dtype of the input and returns the dtype of + the result if the implementation functions supports it, or + returns `None` otherwise. + unary_dp_impl_fn : {callable} + Data-parallel implementation function with signature + `impl_fn(src: usm_ndarray, dst: usm_ndarray, + sycl_queue: SyclQueue, depends: Optional[List[SyclEvent]])` + where the `src` is the argument array, `dst` is the + array to be populated with function values, effectively + evaluating `dst = func(src)`. + The `impl_fn` is expected to return a 2-tuple of `SyclEvent`s. + The first event corresponds to data-management host tasks, + including lifetime management of argument Python objects to ensure + that their associated USM allocation is not freed before offloaded + computational tasks complete execution, while the second event + corresponds to computational tasks associated with function evaluation. + docs : {str} + Documentation string for the unary function. + + """ + + def __init__( + self, + name, + result_type_resolver_fn, + unary_dp_impl_fn, + docs, + ): + super().__init__( + name, + result_type_resolver_fn, + unary_dp_impl_fn, + docs, + ) + self.__name__ = "DPNPUnaryTwoOutputsFunc" + + @property + def nout(self): + """Returns the number of arguments treated as outputs.""" + return 2 + + def __call__( + self, + x, + out1=None, + out2=None, + /, + *, + out=(None, None), + where=True, + order="K", + dtype=None, + subok=True, + **kwargs, + ): + if kwargs: + raise NotImplementedError( + f"Requested function={self.name_} with kwargs={kwargs} " + "isn't currently supported." + ) + elif where is not True: + raise NotImplementedError( + f"Requested function={self.name_} with where={where} " + "isn't currently supported." + ) + elif dtype is not None: + raise NotImplementedError( + f"Requested function={self.name_} with dtype={dtype} " + "isn't currently supported." + ) + elif subok is not True: + raise NotImplementedError( + f"Requested function={self.name_} with subok={subok} " + "isn't currently supported." + ) + + x = dpnp.get_usm_ndarray(x) + exec_q = x.sycl_queue + + if order is None: + order = "K" + elif order in "afkcAFKC": + order = order.upper() + if order == "A": + order = "F" if x.flags.f_contiguous else "C" + else: + raise ValueError( + "order must be one of 'C', 'F', 'A', or 'K' " f"(got '{order}')" + ) + + buf_dt, res1_dt, res2_dt = find_buf_dtype_3out( + x.dtype, + self.result_type_resolver_fn_, + x.sycl_device, + ) + if res1_dt is None or res2_dt is None: + raise ValueError( + f"function '{self.name_}' does not support input type " + f"({x.dtype}), " + "and the input could not be safely coerced to any " + "supported types according to the casting rule ''safe''." + ) + + if not isinstance(out, tuple): + raise TypeError("'out' must be a tuple of arrays") + + if len(out) != 2: + raise ValueError( + "'out' tuple must have exactly one entry per ufunc output" + ) + + if not (out1 is None and out2 is None): + if all(res is None for res in out): + out = (out1, out2) + else: + raise TypeError( + "cannot specify 'out' as both a positional and keyword argument" + ) + + orig_out, out = list(out), list(out) + res_dts = [res1_dt, res2_dt] + + for i in range(2): + if out[i] is None: + continue + + res = dpnp.get_usm_ndarray(out[i]) + if not res.flags.writable: + raise ValueError("provided output array is read-only") + + if res.shape != x.shape: + raise ValueError( + "The shape of input and output arrays are inconsistent. " + f"Expected output shape is {x.shape}, got {res.shape}" + ) + + if dpu.get_execution_queue((exec_q, res.sycl_queue)) is None: + raise dpnp.exceptions.ExecutionPlacementError( + "Input and output allocation queues are not compatible" + ) + + res_dt = res_dts[i] + if res_dt != res.dtype: + # Allocate a temporary buffer with the required dtype + out[i] = dpt.empty_like(res, dtype=res_dt) + elif ( + buf_dt is None + and dti._array_overlap(x, res) + and not dti._same_logical_tensors(x, res) + ): + # Allocate a temporary buffer to avoid memory overlapping. + # Note if `buf_dt` is not None, a temporary copy of `x` will be + # created, so the array overlap check isn't needed. + out[i] = dpt.empty_like(res) + + _manager = dpu.SequentialOrderManager[exec_q] + dep_evs = _manager.submitted_events + + # Cast input array to the supported type if needed + if buf_dt is not None: + if order == "K": + buf = dtc._empty_like_orderK(x, buf_dt) + else: + buf = dpt.empty_like(x, dtype=buf_dt, order=order) + + ht_copy_ev, copy_ev = dti._copy_usm_ndarray_into_usm_ndarray( + src=x, dst=buf, sycl_queue=exec_q, depends=dep_evs + ) + _manager.add_event_pair(ht_copy_ev, copy_ev) + + x = buf + dep_evs = copy_ev + + # Allocate a buffer for the output arrays if needed + for i in range(2): + if out[i] is None: + res_dt = res_dts[i] + if order == "K": + out[i] = dtc._empty_like_orderK(x, res_dt) + else: + out[i] = dpt.empty_like(x, dtype=res_dt, order=order) + + # Call the unary function with input and output arrays + dep_evs = _manager.submitted_events + ht_unary_ev, unary_ev = self.get_implementation_function()( + x, + dpnp.get_usm_ndarray(out[0]), + dpnp.get_usm_ndarray(out[1]), + sycl_queue=exec_q, + depends=dep_evs, + ) + _manager.add_event_pair(ht_unary_ev, unary_ev) + + for i in range(2): + orig_res, res = orig_out[i], out[i] + if not (orig_res is None or orig_res is res): + # Copy the out data from temporary buffer to original memory + ht_copy_ev, copy_ev = dti._copy_usm_ndarray_into_usm_ndarray( + src=res, + dst=dpnp.get_usm_ndarray(orig_res), + sycl_queue=exec_q, + depends=[unary_ev], + ) + _manager.add_event_pair(ht_copy_ev, copy_ev) + res = out[i] = orig_res + + if not isinstance(res, dpnp_array): + # Always return dpnp.ndarray + out[i] = dpnp_array._create_from_usm_ndarray(res) + return out + + class DPNPBinaryFunc(BinaryElementwiseFunc): """ Class that implements binary element-wise functions. @@ -262,6 +490,7 @@ class DPNPBinaryFunc(BinaryElementwiseFunc): sycl_dev - The :class:`dpctl.SyclDevice` where the function evaluation is carried out. One of `o1_dtype` and `o2_dtype` must be a ``dtype`` instance. + """ def __init__( diff --git a/dpnp/dpnp_utils/dpnp_utils_common.py b/dpnp/dpnp_utils/dpnp_utils_common.py index 61d1940ebe5..2cf5973d1e8 100644 --- a/dpnp/dpnp_utils/dpnp_utils_common.py +++ b/dpnp/dpnp_utils/dpnp_utils_common.py @@ -29,12 +29,35 @@ from collections.abc import Iterable -from dpctl.tensor._type_utils import _can_cast +import dpctl.tensor._type_utils as dtu import dpnp from dpnp.dpnp_utils import map_dtype_to_device -__all__ = ["result_type_for_device", "to_supported_dtypes"] +__all__ = [ + "find_buf_dtype_3out", + "result_type_for_device", + "to_supported_dtypes", +] + + +def find_buf_dtype_3out(arg_dtype, query_fn, sycl_dev): + """Works as dpu._find_buf_dtype, but with two output arrays.""" + + res1_dt, res2_dt = query_fn(arg_dtype) + if res1_dt and res2_dt: + return None, res1_dt, res2_dt + + _fp16 = sycl_dev.has_aspect_fp16 + _fp64 = sycl_dev.has_aspect_fp64 + all_dts = dtu._all_data_types(_fp16, _fp64) + for buf_dt in all_dts: + if dtu._can_cast(arg_dtype, buf_dt, _fp16, _fp64): + res1_dt, res2_dt = query_fn(buf_dt) + if res1_dt and res2_dt: + return buf_dt, res1_dt, res2_dt + + return None, None, None def result_type_for_device(dtypes, device): @@ -55,7 +78,7 @@ def to_supported_dtypes(dtypes, supported_types, device): has_fp16 = device.has_aspect_fp16 def is_castable(dtype, stype): - return _can_cast(dtype, stype, has_fp16, has_fp64) + return dtu._can_cast(dtype, stype, has_fp16, has_fp64) if not isinstance(supported_types, Iterable): supported_types = (supported_types,) # pragma: no cover From ba43ffe20fe98758b3bde86ec089ef93fced10e9 Mon Sep 17 00:00:00 2001 From: Anton Volkov Date: Thu, 23 Oct 2025 17:11:23 +0200 Subject: [PATCH 10/19] Add python implementation --- dpnp/dpnp_iface_mathematical.py | 73 +++++++++++++++++++++++++++++++++ pyproject.toml | 2 +- 2 files changed, 74 insertions(+), 1 deletion(-) diff --git a/dpnp/dpnp_iface_mathematical.py b/dpnp/dpnp_iface_mathematical.py index 6fe165632ae..1f58d0c843c 100644 --- a/dpnp/dpnp_iface_mathematical.py +++ b/dpnp/dpnp_iface_mathematical.py @@ -73,6 +73,7 @@ DPNPRound, DPNPSinc, DPNPUnaryFunc, + DPNPUnaryTwoOutputsFunc, acceptance_fn_gcd_lcm, acceptance_fn_negative, acceptance_fn_positive, @@ -112,6 +113,7 @@ "fmax", "fmin", "fmod", + "frexp", "gcd", "gradient", "heaviside", @@ -2342,6 +2344,77 @@ def ediff1d(ary, to_end=None, to_begin=None): mkl_impl_fn="_fmod", ) + +_FREXP_DOCSTRING = """ +Decompose each element :math:`x_i` of the input array `x` into the mantissa and +the twos exponent. + +For full documentation refer to :obj:`numpy.frexp`. + +Parameters +---------- +x : {dpnp.ndarray, usm_ndarray} + Array of numbers to be decomposed, expected to have a real-valued + floating-point data type. +out1 : {None, dpnp.ndarray, usm_ndarray}, optional + Output array for the mantissa to populate. Array must have the same shape + as `x` and the expected data type. + + Default: ``None``. +out2 : {None, dpnp.ndarray, usm_ndarray}, optional + Output array for the exponent to populate. Array must have the same shape + as `x` and the expected data type. + + Default: ``None``. +order : {None, "C", "F", "A", "K"}, optional + Memory layout of the newly output array, if parameter `out` is ``None``. + + Default: ``"K"``. + +Returns +------- +mantissa : dpnp.ndarray + Floating values between -1 and 1. +exponent : dpnp.ndarray + Integer exponents of 2. + +Limitations +----------- +Parameters `where` and `subok` are supported with their default values. +Keyword argument `kwargs` is currently unsupported. +Otherwise ``NotImplementedError`` exception will be raised. + +See Also +-------- +:obj:`dpnp.ldexp` : Compute :math:`y = x1 * 2^{x2}`, inverse to + :obj:`dpnp.frexp`. + +Notes +----- +Complex dtypes are not supported, they will raise a ``TypeError``. + +Examples +-------- +>>> import dpnp as np +>>> x = np.arange(9) +>>> y1, y2 = np.frexp(x) +>>> y1 +array([0. , 0.5 , 0.5 , 0.75 , 0.5 , 0.625, 0.75 , 0.875, 0.5 ]) +>>> y2 +array([0, 1, 2, 2, 3, 3, 3, 3, 4], dtype=int32) +>>> y1 * 2**y2 +array([0., 1., 2., 3., 4., 5., 6., 7., 8.]) + +""" + +frexp = DPNPUnaryTwoOutputsFunc( + "_frexp", + ufi._frexp_result_type, + ufi._frexp, + _FREXP_DOCSTRING, +) + + _GCD_DOCSTRING = r""" Returns the greatest common divisor of :math:`\abs{x1}` and :math:`\abs{x2}`. diff --git a/pyproject.toml b/pyproject.toml index 68a54aa095e..c5e55863f88 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -154,7 +154,7 @@ max-returns = 8 [tool.pylint.format] max-line-length = 80 -max-module-lines = 5000 +max-module-lines = 8000 [tool.pylint.imports] allow-wildcard-with-all = true From 2472ba03221dbdbbb5ae65645787d6981825e187 Mon Sep 17 00:00:00 2001 From: Anton Volkov Date: Thu, 23 Oct 2025 17:20:53 +0200 Subject: [PATCH 11/19] Enable umath and third party tests --- dpnp/tests/test_umath.py | 2 +- dpnp/tests/third_party/cupy/math_tests/test_floating.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/dpnp/tests/test_umath.py b/dpnp/tests/test_umath.py index 6284950ae94..31b55a204a1 100644 --- a/dpnp/tests/test_umath.py +++ b/dpnp/tests/test_umath.py @@ -115,7 +115,7 @@ def test_umaths(test_cases): and not (vmi._is_available() and has_support_aspect64()) ): pytest.skip("dpctl-2031") - elif umath in ["divmod", "frexp"]: + elif umath in ["divmod"]: pytest.skip("Not implemented umath") elif umath == "modf": if args[0].dtype == dpnp.float16: diff --git a/dpnp/tests/third_party/cupy/math_tests/test_floating.py b/dpnp/tests/third_party/cupy/math_tests/test_floating.py index 3bfb4bb0043..d7affc54157 100644 --- a/dpnp/tests/third_party/cupy/math_tests/test_floating.py +++ b/dpnp/tests/third_party/cupy/math_tests/test_floating.py @@ -1,7 +1,8 @@ +from __future__ import annotations + import unittest import numpy -import pytest import dpnp as cupy from dpnp.tests.helper import has_support_aspect64 @@ -38,7 +39,6 @@ def test_ldexp(self, xp, ftype, itype): b = xp.array([-3, -2, -1, 0, 1, 2, 3], dtype=itype) return xp.ldexp(a, b) - @pytest.mark.skip("frexp() is not implemented yet") @testing.for_float_dtypes() def test_frexp(self, dtype): numpy_a = numpy.array( From 5de2f646ed6a9d725cf801b379f0e1030e3821ec Mon Sep 17 00:00:00 2001 From: Anton Volkov Date: Thu, 23 Oct 2025 17:26:52 +0200 Subject: [PATCH 12/19] Add CFD tests --- dpnp/tests/test_sycl_queue.py | 24 ++++++++++++++++++++++++ dpnp/tests/test_usm_type.py | 13 +++++++++++++ 2 files changed, 37 insertions(+) diff --git a/dpnp/tests/test_sycl_queue.py b/dpnp/tests/test_sycl_queue.py index 3ed3ef17f96..85ae164b528 100644 --- a/dpnp/tests/test_sycl_queue.py +++ b/dpnp/tests/test_sycl_queue.py @@ -356,6 +356,30 @@ def test_1in_1out(func, data, device): pass +@pytest.mark.parametrize( + "func, data", + [ + pytest.param("frexp", numpy.arange(9)), + ], +) +@pytest.mark.parametrize("device", valid_dev, ids=dev_ids) +def test_1in_2out(func, data, device): + x = dpnp.array(data, device=device) + res1, res2 = getattr(dpnp, func)(x) + assert_sycl_queue_equal(res1.sycl_queue, x.sycl_queue) + assert_sycl_queue_equal(res2.sycl_queue, x.sycl_queue) + + out1 = dpnp.empty_like(res1) + out2 = dpnp.empty_like(res2) + try: + # some functions do not support out kwarg + getattr(dpnp, func)(x, out=(out1, out2)) + assert_sycl_queue_equal(out1.sycl_queue, x.sycl_queue) + assert_sycl_queue_equal(out2.sycl_queue, x.sycl_queue) + except TypeError: + pass + + @pytest.mark.parametrize( "func,data1,data2", [ diff --git a/dpnp/tests/test_usm_type.py b/dpnp/tests/test_usm_type.py index 97ceb0220e5..1c84b746128 100644 --- a/dpnp/tests/test_usm_type.py +++ b/dpnp/tests/test_usm_type.py @@ -649,6 +649,19 @@ def test_1in_1out(func, data, usm_type): assert x.usm_type == usm_type == res.usm_type +@pytest.mark.parametrize( + "func, data", + [ + pytest.param("frexp", numpy.arange(9)), + ], +) +@pytest.mark.parametrize("usm_type", list_of_usm_types) +def test_1in_2out(func, data, usm_type): + x = dpnp.array(data, usm_type=usm_type) + res1, res2 = getattr(dpnp, func)(x) + assert x.usm_type == usm_type == res1.usm_type == res2.usm_type + + @pytest.mark.parametrize( "func,data1,data2", [ From 52299392d51f8131ede3e629795c6248d59a0d45 Mon Sep 17 00:00:00 2001 From: Anton Volkov Date: Thu, 23 Oct 2025 21:43:01 +0200 Subject: [PATCH 13/19] Add tests to cover frexp and new class --- dpnp/dpnp_algo/dpnp_elementwise_common.py | 10 +- dpnp/tests/test_mathematical.py | 270 +++++++++++++++++----- 2 files changed, 218 insertions(+), 62 deletions(-) diff --git a/dpnp/dpnp_algo/dpnp_elementwise_common.py b/dpnp/dpnp_algo/dpnp_elementwise_common.py index a6e29622c89..70f49c26125 100644 --- a/dpnp/dpnp_algo/dpnp_elementwise_common.py +++ b/dpnp/dpnp_algo/dpnp_elementwise_common.py @@ -179,7 +179,7 @@ def __call__( ) elif dtype is not None and out is not None: raise TypeError( - f"Requested function={self.name_} only takes `out` or `dtype`" + f"Requested function={self.name_} only takes `out` or `dtype` " "as an argument, but both were provided." ) @@ -356,6 +356,12 @@ def __call__( res_dt = res_dts[i] if res_dt != res.dtype: + if not dpnp.can_cast(res_dt, res.dtype, casting="same_kind"): + raise TypeError( + f"Cannot cast ufunc '{self.name_}' output {i + 1} from " + f"{res_dt} to {res.dtype} with casting rule 'same_kind'" + ) + # Allocate a temporary buffer with the required dtype out[i] = dpt.empty_like(res, dtype=res_dt) elif ( @@ -564,7 +570,7 @@ def __call__( ) elif dtype is not None and out is not None: raise TypeError( - f"Requested function={self.name_} only takes `out` or `dtype`" + f"Requested function={self.name_} only takes `out` or `dtype` " "as an argument, but both were provided." ) diff --git a/dpnp/tests/test_mathematical.py b/dpnp/tests/test_mathematical.py index d0dbd44c853..e8b852189e1 100644 --- a/dpnp/tests/test_mathematical.py +++ b/dpnp/tests/test_mathematical.py @@ -710,6 +710,112 @@ def test_errors(self): assert_raises(ExecutionPlacementError, dpnp.ediff1d, ia, to_end=to_end) +class TestFrexp: + ALL_DTYPES = get_all_dtypes(no_none=True) + ALL_DTYPES_NO_COMPLEX = get_all_dtypes( + no_none=True, no_float16=False, no_complex=True + ) + + @pytest.mark.parametrize("dt", ALL_DTYPES_NO_COMPLEX) + def test_basic(self, dt): + a = numpy.array([-2, 5, 1, 4, 3], dtype=dt) + ia = dpnp.array(a) + + res1, res2 = dpnp.frexp(ia) + exp1, exp2 = numpy.frexp(a) + assert_array_equal(res1, exp1) + assert_array_equal(res2, exp2) + + def test_out(self): + a = numpy.array(5.7) + ia = dpnp.array(a) + + out1 = numpy.empty(()) + out2 = numpy.empty((), dtype=numpy.int32) + iout1, iout2 = dpnp.array(out1), dpnp.array(out2) + + res1, res2 = dpnp.frexp(ia, iout1) + exp1, exp2 = numpy.frexp(a, out1) + assert_array_equal(res1, exp1) + assert_array_equal(res2, exp2) + assert res1 is iout1 + + res1, res2 = dpnp.frexp(ia, None, iout2) + exp1, exp2 = numpy.frexp(a, None, out2) + assert_array_equal(res1, exp1) + assert_array_equal(res2, exp2) + assert res2 is iout2 + + res1, res2 = dpnp.frexp(ia, iout1, iout2) + exp1, exp2 = numpy.frexp(a, out1, out2) + assert_array_equal(res1, exp1) + assert_array_equal(res2, exp2) + assert res1 is iout1 + assert res2 is iout2 + + @pytest.mark.parametrize("dt", ALL_DTYPES_NO_COMPLEX) + @pytest.mark.parametrize("out1_dt", ALL_DTYPES) + @pytest.mark.parametrize("out2_dt", ALL_DTYPES) + def test_out_all_dtypes(self, dt, out1_dt, out2_dt): + a = numpy.ones(9, dtype=dt) + ia = dpnp.array(a) + + out1 = numpy.zeros(9, dtype=out1_dt) + out2 = numpy.zeros(9, dtype=out2_dt) + iout1, iout2 = dpnp.array(out1), dpnp.array(out2) + + try: + res1, res2 = dpnp.frexp(ia, out=(iout1, iout2)) + except TypeError: + # expect numpy to fail with the same reason + with pytest.raises(TypeError): + _ = numpy.frexp(a, out=(out1, out2)) + else: + exp1, exp2 = numpy.frexp(a, out=(out1, out2)) + assert_array_equal(res1, exp1) + assert_array_equal(res2, exp2) + assert res1 is iout1 + assert res2 is iout2 + + @pytest.mark.parametrize("stride", [-4, -2, -1, 1, 2, 4]) + @pytest.mark.parametrize("dt", get_float_dtypes()) + def test_strides_out(self, stride, dt): + a = numpy.array( + [numpy.nan, numpy.nan, numpy.inf, -numpy.inf, 0.0, -0.0, 1.0, -1.0], + dtype=dt, + ) + ia = dpnp.array(a) + + out_mant = numpy.ones(8, dtype=dt) + out_exp = 2 * numpy.ones(8, dtype="i") + iout_mant, iout_exp = dpnp.array(out_mant), dpnp.array(out_exp) + + res1, res2 = dpnp.frexp( + ia[::stride], out=(iout_mant[::stride], iout_exp[::stride]) + ) + exp1, exp2 = numpy.frexp( + a[::stride], out=(out_mant[::stride], out_exp[::stride]) + ) + assert_array_equal(res1, exp1) + assert_array_equal(res2, exp2) + + assert_array_equal(iout_mant, out_mant) + assert_array_equal(iout_exp, out_exp) + + @pytest.mark.parametrize("xp", [numpy, dpnp]) + def test_out_wrong_type(self, xp): + a = xp.array(0.5) + with pytest.raises(TypeError, match="'out' must be a tuple of arrays"): + _ = xp.frexp(a, out=xp.empty(())) + + @pytest.mark.parametrize("xp", [numpy, dpnp]) + @pytest.mark.parametrize("dt", get_complex_dtypes()) + def test_complex_dtype(self, xp, dt): + a = xp.array([-2, 5, 1, 4, 3], dtype=dt) + with pytest.raises((TypeError, ValueError)): + _ = xp.frexp(a) + + class TestGradient: @pytest.mark.parametrize("dt", get_all_dtypes(no_none=True, no_bool=True)) def test_basic(self, dt): @@ -1925,6 +2031,110 @@ def test_ndim(self): assert_dtype_allclose(result, expected) +class TestUfunc: + @pytest.mark.parametrize( + "func, nin, nout", + [ + pytest.param("abs", 1, 1, id="DPNPUnaryFunc"), + pytest.param("frexp", 1, 2, id="DPNPUnaryTwoOutputsFunc"), + pytest.param("add", 2, 1, id="DPNPBinaryFunc"), + ], + ) + def test_nin_nout(self, func, nin, nout): + assert getattr(dpnp, func).nin == nin + assert getattr(dpnp, func).nout == nout + + @pytest.mark.parametrize( + "func, kwargs", + [ + pytest.param( + "abs", + {"unknown_kwarg": 1, "where": False, "subok": False}, + id="DPNPUnaryFunc", + ), + pytest.param( + "frexp", + { + "unknown_kwarg": 1, + "where": False, + "dtype": "?", + "subok": False, + }, + id="DPNPUnaryTwoOutputsFunc", + ), + pytest.param( + "add", + {"unknown_kwarg": 1, "where": False, "subok": False}, + id="DPNPBinaryFunc", + ), + ], + ) + def test_not_supported_kwargs(self, func, kwargs): + x = dpnp.array([1, 2, 3]) + + fn = getattr(dpnp, func) + args = [x] * fn.nin + for key, val in kwargs.items(): + with pytest.raises(NotImplementedError): + fn(*args, **{key: val}) + + @pytest.mark.parametrize("func", ["abs", "frexp", "add"]) + @pytest.mark.parametrize("x", [1, [1, 2], numpy.ones(5)]) + def test_wrong_input(self, func, x): + fn = getattr(dpnp, func) + args = [x] * fn.nin + with pytest.raises(TypeError): + fn(*args) + + @pytest.mark.parametrize("func", ["add"]) + def test_binary_wrong_input(self, func): + x = dpnp.array([1, 2, 3]) + with pytest.raises(TypeError): + getattr(dpnp, func)(x, [1, 2]) + with pytest.raises(TypeError): + getattr(dpnp, func)([1, 2], x) + + @pytest.mark.parametrize("func", ["abs", "frexp", "add"]) + def test_wrong_order(self, func): + x = dpnp.array([1, 2, 3]) + + fn = getattr(dpnp, func) + args = [x] * fn.nin + with pytest.raises(ValueError, match="order must be one of"): + fn(*args, order="H") + + @pytest.mark.parametrize("func", ["abs", "add"]) + def test_out_dtype(self, func): + x = dpnp.array([1, 2, 3]) + out = dpnp.array([1, 2, 3]) + + fn = getattr(dpnp, func) + args = [x] * fn.nin + with pytest.raises( + TypeError, match="only takes `out` or `dtype` as an argument" + ): + fn(*args, out=out, dtype="f4") + + @pytest.mark.parametrize("func", ["abs", "frexp", "add"]) + def test_order_none(self, func): + a = numpy.array([1, 2, 3]) + ia = dpnp.array(a) + + fn = getattr(numpy, func) + ifn = getattr(dpnp, func) + + args = [a] * fn.nin + iargs = [ia] * ifn.nin + + result = ifn(*iargs, order=None) + expected = fn(*args, order=None) + if fn.nout == 1: + assert_dtype_allclose(result, expected) + else: + for i in range(fn.nout): + assert_dtype_allclose(result[i], expected[i]) + + class TestUnwrap: @pytest.mark.parametrize("dt", get_float_dtypes()) def test_basic(self, dt): @@ -2568,66 +2778,6 @@ def test_inplace_floor_divide(dtype): assert_allclose(ia, a) -def test_elemenwise_nin_nout(): - assert dpnp.abs.nin == 1 - assert dpnp.add.nin == 2 - - assert dpnp.abs.nout == 1 - assert dpnp.add.nout == 1 - - -def test_elemenwise_error(): - x = dpnp.array([1, 2, 3]) - out = dpnp.array([1, 2, 3]) - - with pytest.raises(NotImplementedError): - dpnp.abs(x, unknown_kwarg=1) - with pytest.raises(NotImplementedError): - dpnp.abs(x, where=False) - with pytest.raises(NotImplementedError): - dpnp.abs(x, subok=False) - with pytest.raises(TypeError): - dpnp.abs(1) - with pytest.raises(TypeError): - dpnp.abs([1, 2]) - with pytest.raises(TypeError): - dpnp.abs(x, out=out, dtype="f4") - with pytest.raises(ValueError): - dpnp.abs(x, order="H") - - with pytest.raises(NotImplementedError): - dpnp.add(x, x, unknown_kwarg=1) - with pytest.raises(NotImplementedError): - dpnp.add(x, x, where=False) - with pytest.raises(NotImplementedError): - dpnp.add(x, x, subok=False) - with pytest.raises(TypeError): - dpnp.add(1, 2) - with pytest.raises(TypeError): - dpnp.add([1, 2], [1, 2]) - with pytest.raises(TypeError): - dpnp.add(x, [1, 2]) - with pytest.raises(TypeError): - dpnp.add([1, 2], x) - with pytest.raises(TypeError): - dpnp.add(x, x, out=out, dtype="f4") - with pytest.raises(ValueError): - dpnp.add(x, x, order="H") - - -def test_elemenwise_order_none(): - x_np = numpy.array([1, 2, 3]) - x = dpnp.array([1, 2, 3]) - - result = dpnp.abs(x, order=None) - expected = numpy.abs(x_np, order=None) - assert_dtype_allclose(result, expected) - - result = dpnp.add(x, x, order=None) - expected = numpy.add(x_np, x_np, order=None) - assert_dtype_allclose(result, expected) - - def test_bitwise_1array_input(): x = dpnp.array([1, 2, 3]) x_np = numpy.array([1, 2, 3]) From 25f8cbaae05f98d2bf1517255dec874ce5192540 Mon Sep 17 00:00:00 2001 From: Anton Volkov Date: Thu, 23 Oct 2025 21:46:21 +0200 Subject: [PATCH 14/19] Add PR to the changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 15f1840317a..cbc4840017b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -15,6 +15,7 @@ The release drops support for Python 3.9, making Python 3.10 the minimum require * Added implementation of `dpnp.linalg.lu_solve` for batch inputs (SciPy-compatible) [#2619](https://github.com/IntelPython/dpnp/pull/2619) * Added `dpnp.exceptions` submodule to aggregate the generic exceptions used by dpnp [#2616](https://github.com/IntelPython/dpnp/pull/2616) * Added implementation of `dpnp.scipy.special.erfcx` [#2596](https://github.com/IntelPython/dpnp/pull/2596) +* Added implementation of `dpnp.frexp` [#2635](https://github.com/IntelPython/dpnp/pull/2635) ### Changed From 65f3d153afed6c9d0b8030b9b8a1fc7fcdd2dc43 Mon Sep 17 00:00:00 2001 From: Anton Volkov Date: Mon, 27 Oct 2025 10:42:35 +0100 Subject: [PATCH 15/19] Add more tests to improve the coverage --- dpnp/dpnp_algo/dpnp_elementwise_common.py | 4 +- dpnp/tests/test_mathematical.py | 112 +++++++++++++++++++--- 2 files changed, 103 insertions(+), 13 deletions(-) diff --git a/dpnp/dpnp_algo/dpnp_elementwise_common.py b/dpnp/dpnp_algo/dpnp_elementwise_common.py index 70f49c26125..ddbb52d9291 100644 --- a/dpnp/dpnp_algo/dpnp_elementwise_common.py +++ b/dpnp/dpnp_algo/dpnp_elementwise_common.py @@ -319,7 +319,7 @@ def __call__( if not isinstance(out, tuple): raise TypeError("'out' must be a tuple of arrays") - if len(out) != 2: + if len(out) != self.nout: raise ValueError( "'out' tuple must have exactly one entry per ufunc output" ) @@ -341,7 +341,7 @@ def __call__( res = dpnp.get_usm_ndarray(out[i]) if not res.flags.writable: - raise ValueError("provided output array is read-only") + raise ValueError("output array is read-only") if res.shape != x.shape: raise ValueError( diff --git a/dpnp/tests/test_mathematical.py b/dpnp/tests/test_mathematical.py index e8b852189e1..97be8f615d2 100644 --- a/dpnp/tests/test_mathematical.py +++ b/dpnp/tests/test_mathematical.py @@ -786,8 +786,8 @@ def test_strides_out(self, stride, dt): ) ia = dpnp.array(a) - out_mant = numpy.ones(8, dtype=dt) - out_exp = 2 * numpy.ones(8, dtype="i") + out_mant = numpy.ones_like(a) + out_exp = 2 * numpy.ones_like(a, dtype="i") iout_mant, iout_exp = dpnp.array(out_mant), dpnp.array(out_exp) res1, res2 = dpnp.frexp( @@ -802,11 +802,34 @@ def test_strides_out(self, stride, dt): assert_array_equal(iout_mant, out_mant) assert_array_equal(iout_exp, out_exp) - @pytest.mark.parametrize("xp", [numpy, dpnp]) - def test_out_wrong_type(self, xp): - a = xp.array(0.5) - with pytest.raises(TypeError, match="'out' must be a tuple of arrays"): - _ = xp.frexp(a, out=xp.empty(())) + @pytest.mark.parametrize("dt", get_float_dtypes()) + def test_out_overlap(self, dt): + a = numpy.ones(15, dtype=dt) + ia = dpnp.array(a) + + out_mant = numpy.ones_like(a) + out_exp = 2 * numpy.ones_like(a, dtype="i") + iout_mant, iout_exp = dpnp.array(out_mant), dpnp.array(out_exp) + + res1, res2 = dpnp.frexp(ia, out=(iout_mant, iout_exp)) + exp1, exp2 = numpy.frexp(a, out=(out_mant, out_exp)) + assert_array_equal(res1, exp1) + assert_array_equal(res2, exp2) + + assert_array_equal(iout_mant, out_mant) + assert_array_equal(iout_exp, out_exp) + assert res1 is iout_mant + assert res2 is iout_exp + + @pytest.mark.parametrize("dt", get_float_dtypes()) + def test_empty(self, dt): + a = numpy.empty((), dtype=dt) + ia = dpnp.array(a) + + res1, res2 = dpnp.frexp(ia) + exp1, exp2 = numpy.frexp(a) + assert_array_equal(res1, exp1, strict=True) + assert_array_equal(res2, exp2, strict=True) @pytest.mark.parametrize("xp", [numpy, dpnp]) @pytest.mark.parametrize("dt", get_complex_dtypes()) @@ -2080,7 +2103,7 @@ def test_not_supported_kwargs(self, func, kwargs): @pytest.mark.parametrize("func", ["abs", "frexp", "add"]) @pytest.mark.parametrize("x", [1, [1, 2], numpy.ones(5)]) - def test_wrong_input(self, func, x): + def test_unary_wrong_input(self, func, x): fn = getattr(dpnp, func) args = [x] * fn.nin with pytest.raises(TypeError): @@ -2115,8 +2138,75 @@ def test_out_dtype(self, func): ): fn(*args, out=out, dtype="f4") + @pytest.mark.parametrize("xp", [numpy, dpnp]) + def test_unary_two_outs_out_ndarray(self, xp): + x = xp.array(0.5) + with pytest.raises(TypeError, match="'out' must be a tuple of arrays"): + _ = xp.frexp(x, out=xp.empty(())) + + @pytest.mark.parametrize("xp", [numpy, dpnp]) + @pytest.mark.parametrize("out", [(), (1,), (1, 2, 3)]) + def test_unary_two_outs_out_wrong_tuple_len(self, xp, out): + x = xp.array(0.5) + with pytest.raises( + ValueError, + match="'out' tuple must have exactly one entry per ufunc output", + ): + _ = xp.frexp(x, out=out) + + @pytest.mark.parametrize("xp", [numpy, dpnp]) + def test_unary_two_outs_out_mixed(self, xp): + x = xp.array(0.5) + with pytest.raises( + TypeError, + match="cannot specify 'out' as both a positional and keyword", + ): + _ = xp.frexp(x, xp.empty(()), out=(xp.empty(()), None)) + + @pytest.mark.parametrize("xp", [numpy, dpnp]) + def test_unary_two_outs_out_not_writable(self, xp): + x = xp.array(0.5) + out1 = xp.empty(()) + out1.flags["W"] = False + + with pytest.raises(ValueError, match="array is read-only"): + _ = xp.frexp(x, out1) + + out2 = xp.empty((), dtype="i") + out2.flags["W"] = False + with pytest.raises(ValueError, match="array is read-only"): + _ = xp.frexp(x, out=(None, out2)) + + @pytest.mark.parametrize("xp", [numpy, dpnp]) + def test_unary_two_outs_out_wrong_shape(self, xp): + x = xp.full(6, fill_value=0.5) + out1 = xp.empty(12) + with pytest.raises(ValueError): + _ = xp.frexp(x, out1) + + out2 = xp.empty((2, 3), dtype="i") + with pytest.raises(ValueError): + _ = xp.frexp(x, out=(None, out2)) + + def test_unary_two_outs_cfd_error(self): + x = dpnp.array(0.5, sycl_queue=dpctl.SyclQueue()) + out1 = dpnp.empty((), sycl_queue=dpctl.SyclQueue()) + out2 = dpnp.empty((), sycl_queue=dpctl.SyclQueue()) + with pytest.raises( + ExecutionPlacementError, + match="Input and output allocation queues are not compatible", + ): + _ = dpnp.frexp(x, out1) + + with pytest.raises( + ExecutionPlacementError, + match="Input and output allocation queues are not compatible", + ): + _ = dpnp.frexp(x, out=(None, out2)) + @pytest.mark.parametrize("func", ["abs", "frexp", "add"]) - def test_order_none(self, func): + @pytest.mark.parametrize("order", [None, "K", "A", "f", "c"]) + def test_order(self, func, order): a = numpy.array([1, 2, 3]) ia = dpnp.array(a) @@ -2126,8 +2216,8 @@ def test_order_none(self, func): args = [a] * fn.nin iargs = [ia] * ifn.nin - result = ifn(*iargs, order=None) - expected = fn(*args, order=None) + result = ifn(*iargs, order=order) + expected = fn(*args, order=order) if fn.nout == 1: assert_dtype_allclose(result, expected) else: From 73b5ea5d14821db681e32b1252631318ec863e4e Mon Sep 17 00:00:00 2001 From: Anton Volkov Date: Mon, 27 Oct 2025 13:51:13 +0100 Subject: [PATCH 16/19] Run test_strides_out only with Linux due to platform dependant result returned by numpy.frexp --- dpnp/tests/test_mathematical.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/dpnp/tests/test_mathematical.py b/dpnp/tests/test_mathematical.py index 97be8f615d2..5f235afd717 100644 --- a/dpnp/tests/test_mathematical.py +++ b/dpnp/tests/test_mathematical.py @@ -33,6 +33,7 @@ has_support_aspect16, has_support_aspect64, is_intel_numpy, + is_win_platform, numpy_version, ) from .third_party.cupy import testing @@ -777,6 +778,10 @@ def test_out_all_dtypes(self, dt, out1_dt, out2_dt): assert res1 is iout1 assert res2 is iout2 + @pytest.mark.skipif( + is_win_platform(), + reason="numpy.frexp gives different answers for NAN/INF on Windows and Linux", + ) @pytest.mark.parametrize("stride", [-4, -2, -1, 1, 2, 4]) @pytest.mark.parametrize("dt", get_float_dtypes()) def test_strides_out(self, stride, dt): From f53cbcbd8a8525d48816c2c3c8e587f327908297 Mon Sep 17 00:00:00 2001 From: Anton Volkov Date: Mon, 27 Oct 2025 16:11:24 +0100 Subject: [PATCH 17/19] Update test_empty to test exactly empty input array --- dpnp/tests/test_mathematical.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dpnp/tests/test_mathematical.py b/dpnp/tests/test_mathematical.py index 5f235afd717..e1d73f36fc3 100644 --- a/dpnp/tests/test_mathematical.py +++ b/dpnp/tests/test_mathematical.py @@ -828,7 +828,7 @@ def test_out_overlap(self, dt): @pytest.mark.parametrize("dt", get_float_dtypes()) def test_empty(self, dt): - a = numpy.empty((), dtype=dt) + a = numpy.empty(0, dtype=dt) ia = dpnp.array(a) res1, res2 = dpnp.frexp(ia) From 281bae194072fe19d20d0d6a120b1e1d5d2323f0 Mon Sep 17 00:00:00 2001 From: Anton Volkov Date: Mon, 27 Oct 2025 18:12:00 +0100 Subject: [PATCH 18/19] Fix TestFrexp::test_basic to work with unsigned dtype --- dpnp/tests/test_mathematical.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dpnp/tests/test_mathematical.py b/dpnp/tests/test_mathematical.py index e1d73f36fc3..b72d9f2e02d 100644 --- a/dpnp/tests/test_mathematical.py +++ b/dpnp/tests/test_mathematical.py @@ -719,7 +719,7 @@ class TestFrexp: @pytest.mark.parametrize("dt", ALL_DTYPES_NO_COMPLEX) def test_basic(self, dt): - a = numpy.array([-2, 5, 1, 4, 3], dtype=dt) + a = get_abs_array([-2, 5, 1, 4, 3], dtype=dt) ia = dpnp.array(a) res1, res2 = dpnp.frexp(ia) From d7401a582be555fc450948e4e6715457fdc77042 Mon Sep 17 00:00:00 2001 From: Anton Volkov Date: Mon, 27 Oct 2025 18:51:50 +0100 Subject: [PATCH 19/19] Peremetrize dtype of the out in TestFrexp::test_out --- dpnp/tests/test_mathematical.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/dpnp/tests/test_mathematical.py b/dpnp/tests/test_mathematical.py index b72d9f2e02d..30a7ff14f44 100644 --- a/dpnp/tests/test_mathematical.py +++ b/dpnp/tests/test_mathematical.py @@ -727,11 +727,12 @@ def test_basic(self, dt): assert_array_equal(res1, exp1) assert_array_equal(res2, exp2) - def test_out(self): - a = numpy.array(5.7) + @pytest.mark.parametrize("dt", get_float_dtypes()) + def test_out(self, dt): + a = numpy.array(5.7, dtype=dt) ia = dpnp.array(a) - out1 = numpy.empty(()) + out1 = numpy.empty((), dtype=dt) out2 = numpy.empty((), dtype=numpy.int32) iout1, iout2 = dpnp.array(out1), dpnp.array(out2)