From 89c3576d2b1a0ddff8ef4a4e451f82dc69374356 Mon Sep 17 00:00:00 2001
From: Anton Volkov <antonwolfy@gmail.com>
Date: Wed, 22 Oct 2025 14:30:00 +0200
Subject: [PATCH 01/19] Extend _result_typeid() utility ufunc function to
 template the mapping table

---
 .../elementwise_functions_type_utils.cpp                  | 3 ++-
 .../elementwise_functions_type_utils.hpp                  | 8 +++++---
 2 files changed, 7 insertions(+), 4 deletions(-)
diff --git a/dpnp/backend/extensions/elementwise_functions/elementwise_functions_type_utils.cpp b/dpnp/backend/extensions/elementwise_functions/elementwise_functions_type_utils.cpp
index d90a63cf4c2..ed21c618069 100644
--- a/dpnp/backend/extensions/elementwise_functions/elementwise_functions_type_utils.cpp
+++ b/dpnp/backend/extensions/elementwise_functions/elementwise_functions_type_utils.cpp
@@ -78,7 +78,8 @@ py::dtype _dtype_from_typenum(td_ns::typenum_t dst_typenum_t)
     }
 }
 
-int _result_typeid(int arg_typeid, const int *fn_output_id)
+template <typename output_idT>
+output_idT _result_typeid(int arg_typeid, const output_idT *fn_output_id)
 {
     if (arg_typeid < 0 || arg_typeid >= td_ns::num_types) {
         throw py::value_error("Input typeid " + std::to_string(arg_typeid) +
diff --git a/dpnp/backend/extensions/elementwise_functions/elementwise_functions_type_utils.hpp b/dpnp/backend/extensions/elementwise_functions/elementwise_functions_type_utils.hpp
index 465f3f135f0..05903d5ba9e 100644
--- a/dpnp/backend/extensions/elementwise_functions/elementwise_functions_type_utils.hpp
+++ b/dpnp/backend/extensions/elementwise_functions/elementwise_functions_type_utils.hpp
@@ -36,15 +36,17 @@
 // dpctl tensor headers
 #include "utils/type_dispatch.hpp"
 
+namespace dpnp::extensions::py_internal::type_utils
+{
 namespace py = pybind11;
 namespace td_ns = dpctl::tensor::type_dispatch;
 
-namespace dpnp::extensions::py_internal::type_utils
-{
 /*! @brief Produce dtype from a type number */
 extern py::dtype _dtype_from_typenum(td_ns::typenum_t);
 
 /*! @brief Lookup typeid of the result from typeid of
  *         argument and the mapping table */
-extern int _result_typeid(int, const int *);
+template <typename output_idT>
+extern output_idT _result_typeid(int arg_typeid,
+                                 const output_idT *fn_output_id);
 } // namespace dpnp::extensions::py_internal::type_utils

From 92baa1ea427340eaa92a71ec11ecf5f8c77f0986 Mon Sep 17 00:00:00 2001
From: Anton Volkov <antonwolfy@gmail.com>
Date: Wed, 22 Oct 2025 14:32:03 +0200
Subject: [PATCH 02/19] Remove unnecessary import in ldexp.cpp

---
 .../backend/extensions/ufunc/elementwise_functions/ldexp.cpp | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/dpnp/backend/extensions/ufunc/elementwise_functions/ldexp.cpp b/dpnp/backend/extensions/ufunc/elementwise_functions/ldexp.cpp
index 5cd3586f542..c8e9598cb1b 100644
--- a/dpnp/backend/extensions/ufunc/elementwise_functions/ldexp.cpp
+++ b/dpnp/backend/extensions/ufunc/elementwise_functions/ldexp.cpp
@@ -45,7 +45,6 @@
 
 // dpctl tensor headers
 #include "kernels/elementwise_functions/common.hpp"
-#include "kernels/elementwise_functions/maximum.hpp"
 #include "utils/type_dispatch.hpp"
 
 namespace dpnp::extensions::ufunc
@@ -57,11 +56,7 @@ namespace td_ns = dpctl::tensor::type_dispatch;
 namespace impl
 {
 namespace ew_cmn_ns = dpctl::tensor::kernels::elementwise_common;
-namespace max_ns = dpctl::tensor::kernels::maximum;
 
-// Supports the same types table as for maximum function in dpctl
-// template <typename T1, typename T2>
-// using OutputType = max_ns::MaximumOutputType<T1, T2>;
 template <typename T1, typename T2>
 struct OutputType
 {

From c086482636d8eba1142845635bb4a854a87e9ab1 Mon Sep 17 00:00:00 2001
From: Anton Volkov <antonwolfy@gmail.com>
Date: Wed, 22 Oct 2025 14:33:00 +0200
Subject: [PATCH 03/19] Declare namespace in correct scope in fmod.cpp

---
 dpnp/backend/extensions/ufunc/elementwise_functions/fmod.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dpnp/backend/extensions/ufunc/elementwise_functions/fmod.cpp b/dpnp/backend/extensions/ufunc/elementwise_functions/fmod.cpp
index 7160f0795ea..5b83595b3f7 100644
--- a/dpnp/backend/extensions/ufunc/elementwise_functions/fmod.cpp
+++ b/dpnp/backend/extensions/ufunc/elementwise_functions/fmod.cpp
@@ -51,11 +51,11 @@ namespace dpnp::extensions::ufunc
 {
 namespace py = pybind11;
 namespace py_int = dpnp::extensions::py_internal;
+namespace td_ns = dpctl::tensor::type_dispatch;
 
 namespace impl
 {
 namespace ew_cmn_ns = dpctl::tensor::kernels::elementwise_common;
-namespace td_ns = dpctl::tensor::type_dispatch;
 
 /**
  * @brief A factory to define pairs of supported types for which

From fea147712a4ef7bccb882c60aa4006f96064feff Mon Sep 17 00:00:00 2001
From: Anton Volkov <antonwolfy@gmail.com>
Date: Wed, 22 Oct 2025 14:51:12 +0200
Subject: [PATCH 04/19] Add unary implementation for ufunc with two output
 arrays

---
 .../elementwise_functions.hpp                 | 294 ++++++++++++++++--
 .../simplify_iteration_space.cpp              |  31 +-
 2 files changed, 290 insertions(+), 35 deletions(-)

diff --git a/dpnp/backend/extensions/elementwise_functions/elementwise_functions.hpp b/dpnp/backend/extensions/elementwise_functions/elementwise_functions.hpp
index 9dfe8cae976..ac592475459 100644
--- a/dpnp/backend/extensions/elementwise_functions/elementwise_functions.hpp
+++ b/dpnp/backend/extensions/elementwise_functions/elementwise_functions.hpp
@@ -28,8 +28,11 @@
 
 #pragma once
 
+#include <cstddef>
 #include <exception>
 #include <stdexcept>
+#include <utility>
+#include <vector>
 
 #include <sycl/sycl.hpp>
 
@@ -43,20 +46,18 @@
 
 // dpctl tensor headers
 #include "kernels/alignment.hpp"
-// #include "kernels/dpctl_tensor_types.hpp"
 #include "utils/memory_overlap.hpp"
 #include "utils/offset_utils.hpp"
 #include "utils/output_validation.hpp"
 #include "utils/sycl_alloc_utils.hpp"
 #include "utils/type_dispatch.hpp"
 
-namespace py = pybind11;
-namespace td_ns = dpctl::tensor::type_dispatch;
-
 static_assert(std::is_same_v<py::ssize_t, dpctl::tensor::ssize_t>);
 
 namespace dpnp::extensions::py_internal
 {
+namespace py = pybind11;
+namespace td_ns = dpctl::tensor::type_dispatch;
 
 using dpctl::tensor::kernels::alignment_utils::is_aligned;
 using dpctl::tensor::kernels::alignment_utils::required_alignment;
@@ -108,10 +109,10 @@ std::pair<sycl::event, sycl::event>
     const py::ssize_t *src_shape = src.get_shape_raw();
     const py::ssize_t *dst_shape = dst.get_shape_raw();
     bool shapes_equal(true);
-    size_t src_nelems(1);
+    std::size_t src_nelems(1);
 
     for (int i = 0; i < src_nd; ++i) {
-        src_nelems *= static_cast<size_t>(src_shape[i]);
+        src_nelems *= static_cast<std::size_t>(src_shape[i]);
         shapes_equal = shapes_equal && (src_shape[i] == dst_shape[i]);
     }
     if (!shapes_equal) {
@@ -277,6 +278,262 @@ py::object py_unary_ufunc_result_type(const py::dtype &input_dtype,
     }
 }
 
+/**
+ * @brief Template implementing Python API for a unary elementwise function
+ * with two output arrays.
+ */
+template <typename output_typesT,
+          typename contig_dispatchT,
+          typename strided_dispatchT>
+std::pair<sycl::event, sycl::event>
+    py_unary_two_outputs_ufunc(const dpctl::tensor::usm_ndarray &src,
+                               const dpctl::tensor::usm_ndarray &dst1,
+                               const dpctl::tensor::usm_ndarray &dst2,
+                               sycl::queue &q,
+                               const std::vector<sycl::event> &depends,
+                               //
+                               const output_typesT &output_type_vec,
+                               const contig_dispatchT &contig_dispatch_vector,
+                               const strided_dispatchT &strided_dispatch_vector)
+{
+    int src_typenum = src.get_typenum();
+    int dst1_typenum = dst1.get_typenum();
+    int dst2_typenum = dst2.get_typenum();
+
+    const auto &array_types = td_ns::usm_ndarray_types();
+    int src_typeid = array_types.typenum_to_lookup_id(src_typenum);
+    int dst1_typeid = array_types.typenum_to_lookup_id(dst1_typenum);
+    int dst2_typeid = array_types.typenum_to_lookup_id(dst2_typenum);
+
+    std::pair<int, int> func_output_typeids = output_type_vec[src_typeid];
+
+    // check that types are supported
+    if (dst1_typeid != func_output_typeids.first ||
+        dst2_typeid != func_output_typeids.second)
+    {
+        throw py::value_error(
+            "One of destination arrays has unexpected elemental data type.");
+    }
+
+    // check that queues are compatible
+    if (!dpctl::utils::queues_are_compatible(q, {src, dst1, dst2})) {
+        throw py::value_error(
+            "Execution queue is not compatible with allocation queues");
+    }
+
+    dpctl::tensor::validation::CheckWritable::throw_if_not_writable(dst1);
+    dpctl::tensor::validation::CheckWritable::throw_if_not_writable(dst2);
+
+    // check that dimensions are the same
+    int src_nd = src.get_ndim();
+    if (src_nd != dst1.get_ndim() || src_nd != dst2.get_ndim()) {
+        throw py::value_error("Array dimensions are not the same.");
+    }
+
+    // check that shapes are the same
+    const py::ssize_t *src_shape = src.get_shape_raw();
+    const py::ssize_t *dst1_shape = dst1.get_shape_raw();
+    const py::ssize_t *dst2_shape = dst2.get_shape_raw();
+    bool shapes_equal(true);
+    std::size_t src_nelems(1);
+
+    for (int i = 0; i < src_nd; ++i) {
+        src_nelems *= static_cast<std::size_t>(src_shape[i]);
+        shapes_equal = shapes_equal && (src_shape[i] == dst1_shape[i]) &&
+                       (src_shape[i] == dst2_shape[i]);
+    }
+    if (!shapes_equal) {
+        throw py::value_error("Array shapes are not the same.");
+    }
+
+    // if nelems is zero, return
+    if (src_nelems == 0) {
+        return std::make_pair(sycl::event(), sycl::event());
+    }
+
+    dpctl::tensor::validation::AmpleMemory::throw_if_not_ample(dst1,
+                                                               src_nelems);
+    dpctl::tensor::validation::AmpleMemory::throw_if_not_ample(dst2,
+                                                               src_nelems);
+
+    // check memory overlap
+    auto const &overlap = dpctl::tensor::overlap::MemoryOverlap();
+    auto const &same_logical_tensors =
+        dpctl::tensor::overlap::SameLogicalTensors();
+    if ((overlap(src, dst1) && !same_logical_tensors(src, dst1)) ||
+        (overlap(src, dst2) && !same_logical_tensors(src, dst2)) ||
+        (overlap(dst1, dst2) && !same_logical_tensors(dst1, dst2)))
+    {
+        throw py::value_error("Arrays index overlapping segments of memory");
+    }
+
+    const char *src_data = src.get_data();
+    char *dst1_data = dst1.get_data();
+    char *dst2_data = dst2.get_data();
+
+    // handle contiguous inputs
+    bool is_src_c_contig = src.is_c_contiguous();
+    bool is_src_f_contig = src.is_f_contiguous();
+
+    bool is_dst1_c_contig = dst1.is_c_contiguous();
+    bool is_dst1_f_contig = dst1.is_f_contiguous();
+
+    bool is_dst2_c_contig = dst2.is_c_contiguous();
+    bool is_dst2_f_contig = dst2.is_f_contiguous();
+
+    bool all_c_contig =
+        (is_src_c_contig && is_dst1_c_contig && is_dst2_c_contig);
+    bool all_f_contig =
+        (is_src_f_contig && is_dst1_f_contig && is_dst2_f_contig);
+
+    if (all_c_contig || all_f_contig) {
+        auto contig_fn = contig_dispatch_vector[src_typeid];
+
+        if (contig_fn == nullptr) {
+            throw std::runtime_error(
+                "Contiguous implementation is missing for src_typeid=" +
+                std::to_string(src_typeid));
+        }
+
+        auto comp_ev =
+            contig_fn(q, src_nelems, src_data, dst1_data, dst2_data, depends);
+        sycl::event ht_ev =
+            dpctl::utils::keep_args_alive(q, {src, dst1, dst2}, {comp_ev});
+
+        return std::make_pair(ht_ev, comp_ev);
+    }
+
+    // simplify iteration space
+    //     if 1d with strides 1 - input is contig
+    //     dispatch to strided
+
+    auto const &src_strides = src.get_strides_vector();
+    auto const &dst1_strides = dst1.get_strides_vector();
+    auto const &dst2_strides = dst2.get_strides_vector();
+
+    using shT = std::vector<py::ssize_t>;
+    shT simplified_shape;
+    shT simplified_src_strides;
+    shT simplified_dst1_strides;
+    shT simplified_dst2_strides;
+    py::ssize_t src_offset(0);
+    py::ssize_t dst1_offset(0);
+    py::ssize_t dst2_offset(0);
+
+    int nd = src_nd;
+    const py::ssize_t *shape = src_shape;
+
+    simplify_iteration_space_3(
+        nd, shape, src_strides, dst1_strides, dst2_strides,
+        // output
+        simplified_shape, simplified_src_strides, simplified_dst1_strides,
+        simplified_dst2_strides, src_offset, dst1_offset, dst2_offset);
+
+    if (nd == 1 && simplified_src_strides[0] == 1 &&
+        simplified_dst1_strides[0] == 1 && simplified_dst2_strides[0] == 1)
+    {
+        // Special case of contiguous data
+        auto contig_fn = contig_dispatch_vector[src_typeid];
+
+        if (contig_fn == nullptr) {
+            throw std::runtime_error(
+                "Contiguous implementation is missing for src_typeid=" +
+                std::to_string(src_typeid));
+        }
+
+        int src_elem_size = src.get_elemsize();
+        int dst1_elem_size = dst1.get_elemsize();
+        int dst2_elem_size = dst2.get_elemsize();
+        auto comp_ev =
+            contig_fn(q, src_nelems, src_data + src_elem_size * src_offset,
+                      dst1_data + dst1_elem_size * dst1_offset,
+                      dst2_data + dst2_elem_size * dst2_offset, depends);
+
+        sycl::event ht_ev =
+            dpctl::utils::keep_args_alive(q, {src, dst1, dst2}, {comp_ev});
+
+        return std::make_pair(ht_ev, comp_ev);
+    }
+
+    // Strided implementation
+    auto strided_fn = strided_dispatch_vector[src_typeid];
+
+    if (strided_fn == nullptr) {
+        throw std::runtime_error(
+            "Strided implementation is missing for src_typeid=" +
+            std::to_string(src_typeid));
+    }
+
+    using dpctl::tensor::offset_utils::device_allocate_and_pack;
+
+    std::vector<sycl::event> host_tasks{};
+    host_tasks.reserve(2);
+
+    auto ptr_size_event_triple_ = device_allocate_and_pack<py::ssize_t>(
+        q, host_tasks, simplified_shape, simplified_src_strides,
+        simplified_dst1_strides, simplified_dst2_strides);
+    auto shape_strides_owner = std::move(std::get<0>(ptr_size_event_triple_));
+    const auto &copy_shape_ev = std::get<2>(ptr_size_event_triple_);
+    const py::ssize_t *shape_strides = shape_strides_owner.get();
+
+    sycl::event strided_fn_ev = strided_fn(
+        q, src_nelems, nd, shape_strides, src_data, src_offset, dst1_data,
+        dst1_offset, dst2_data, dst2_offset, depends, {copy_shape_ev});
+
+    // async free of shape_strides temporary
+    sycl::event tmp_cleanup_ev = dpctl::tensor::alloc_utils::async_smart_free(
+        q, {strided_fn_ev}, shape_strides_owner);
+
+    host_tasks.push_back(tmp_cleanup_ev);
+
+    return std::make_pair(
+        dpctl::utils::keep_args_alive(q, {src, dst1, dst2}, host_tasks),
+        strided_fn_ev);
+}
+
+/**
+ * @brief Template implementing Python API for querying of type support by
+ * a unary elementwise function with two output arrays.
+ */
+template <typename output_typesT>
+std::pair<py::object, py::object>
+    py_unary_two_outputs_ufunc_result_type(const py::dtype &input_dtype,
+                                           const output_typesT &output_types)
+{
+    int tn = input_dtype.num(); // NumPy type numbers are the same as in dpctl
+    int src_typeid = -1;
+
+    auto array_types = td_ns::usm_ndarray_types();
+
+    try {
+        src_typeid = array_types.typenum_to_lookup_id(tn);
+    } catch (const std::exception &e) {
+        throw py::value_error(e.what());
+    }
+
+    using type_utils::_result_typeid;
+    std::pair<int, int> dst_typeids = _result_typeid(src_typeid, output_types);
+    int dst1_typeid = dst_typeids.first;
+    int dst2_typeid = dst_typeids.second;
+
+    if (dst1_typeid < 0 || dst2_typeid < 0) {
+        auto res = py::none();
+        auto py_res = py::cast<py::object>(res);
+        return std::make_pair(py_res, py_res);
+    }
+    else {
+        using type_utils::_dtype_from_typenum;
+
+        auto dst1_typenum_t = static_cast<td_ns::typenum_t>(dst1_typeid);
+        auto dst2_typenum_t = static_cast<td_ns::typenum_t>(dst2_typeid);
+        auto dt1 = _dtype_from_typenum(dst1_typenum_t);
+        auto dt2 = _dtype_from_typenum(dst2_typenum_t);
+
+        return std::make_pair(py::cast<py::object>(dt1),
+                              py::cast<py::object>(dt2));
+    }
+}
+
 // ======================== Binary functions ===========================
 
 namespace
@@ -347,10 +604,10 @@ std::pair<sycl::event, sycl::event> py_binary_ufunc(
     const py::ssize_t *src2_shape = src2.get_shape_raw();
     const py::ssize_t *dst_shape = dst.get_shape_raw();
     bool shapes_equal(true);
-    size_t src_nelems(1);
+    std::size_t src_nelems(1);
 
     for (int i = 0; i < dst_nd; ++i) {
-        src_nelems *= static_cast<size_t>(src1_shape[i]);
+        src_nelems *= static_cast<std::size_t>(src1_shape[i]);
         shapes_equal = shapes_equal && (src1_shape[i] == dst_shape[i] &&
                                         src2_shape[i] == dst_shape[i]);
     }
@@ -456,7 +713,7 @@ std::pair<sycl::event, sycl::event> py_binary_ufunc(
                 std::initializer_list<py::ssize_t>{0, 1};
             static constexpr auto one_zero_strides =
                 std::initializer_list<py::ssize_t>{1, 0};
-            constexpr py::ssize_t one{1};
+            static constexpr py::ssize_t one{1};
             // special case of C-contiguous matrix and a row
             if (isEqual(simplified_src2_strides, zero_one_strides) &&
                 isEqual(simplified_src1_strides, {simplified_shape[1], one}) &&
@@ -477,8 +734,8 @@ std::pair<sycl::event, sycl::event> py_binary_ufunc(
                         is_aligned<required_alignment>(
                             dst_data + dst_offset * dst_itemsize))
                     {
-                        size_t n0 = simplified_shape[0];
-                        size_t n1 = simplified_shape[1];
+                        std::size_t n0 = simplified_shape[0];
+                        std::size_t n1 = simplified_shape[1];
                         sycl::event comp_ev = matrix_row_broadcast_fn(
                             exec_q, host_tasks, n0, n1, src1_data, src1_offset,
                             src2_data, src2_offset, dst_data, dst_offset,
@@ -511,8 +768,8 @@ std::pair<sycl::event, sycl::event> py_binary_ufunc(
                         is_aligned<required_alignment>(
                             dst_data + dst_offset * dst_itemsize))
                     {
-                        size_t n0 = simplified_shape[1];
-                        size_t n1 = simplified_shape[0];
+                        std::size_t n0 = simplified_shape[1];
+                        std::size_t n1 = simplified_shape[0];
                         sycl::event comp_ev = row_matrix_broadcast_fn(
                             exec_q, host_tasks, n0, n1, src1_data, src1_offset,
                             src2_data, src2_offset, dst_data, dst_offset,
@@ -655,10 +912,10 @@ std::pair<sycl::event, sycl::event>
     const py::ssize_t *rhs_shape = rhs.get_shape_raw();
     const py::ssize_t *lhs_shape = lhs.get_shape_raw();
     bool shapes_equal(true);
-    size_t rhs_nelems(1);
+    std::size_t rhs_nelems(1);
 
     for (int i = 0; i < lhs_nd; ++i) {
-        rhs_nelems *= static_cast<size_t>(rhs_shape[i]);
+        rhs_nelems *= static_cast<std::size_t>(rhs_shape[i]);
         shapes_equal = shapes_equal && (rhs_shape[i] == lhs_shape[i]);
     }
     if (!shapes_equal) {
@@ -749,7 +1006,7 @@ std::pair<sycl::event, sycl::event>
         if (nd == 2) {
             static constexpr auto one_zero_strides =
                 std::initializer_list<py::ssize_t>{1, 0};
-            constexpr py::ssize_t one{1};
+            static constexpr py::ssize_t one{1};
             // special case of C-contiguous matrix and a row
             if (isEqual(simplified_rhs_strides, one_zero_strides) &&
                 isEqual(simplified_lhs_strides, {one, simplified_shape[0]}))
@@ -758,8 +1015,8 @@ std::pair<sycl::event, sycl::event>
                     contig_row_matrix_broadcast_dispatch_table[rhs_typeid]
                                                               [lhs_typeid];
                 if (row_matrix_broadcast_fn != nullptr) {
-                    size_t n0 = simplified_shape[1];
-                    size_t n1 = simplified_shape[0];
+                    std::size_t n0 = simplified_shape[1];
+                    std::size_t n1 = simplified_shape[0];
                     sycl::event comp_ev = row_matrix_broadcast_fn(
                         exec_q, host_tasks, n0, n1, rhs_data, rhs_offset,
                         lhs_data, lhs_offset, depends);
@@ -805,5 +1062,4 @@ std::pair<sycl::event, sycl::event>
         dpctl::utils::keep_args_alive(exec_q, {rhs, lhs}, host_tasks),
         strided_fn_ev);
 }
-
 } // namespace dpnp::extensions::py_internal
diff --git a/dpnp/backend/extensions/elementwise_functions/simplify_iteration_space.cpp b/dpnp/backend/extensions/elementwise_functions/simplify_iteration_space.cpp
index 48fd2c4c425..7555f91d280 100644
--- a/dpnp/backend/extensions/elementwise_functions/simplify_iteration_space.cpp
+++ b/dpnp/backend/extensions/elementwise_functions/simplify_iteration_space.cpp
@@ -26,8 +26,7 @@
 // THE POSSIBILITY OF SUCH DAMAGE.
 //*****************************************************************************
 
-#include "dpctl4pybind11.hpp"
-
+#include <cstddef>
 #include <pybind11/pybind11.h>
 #include <vector>
 
@@ -58,19 +57,19 @@ void simplify_iteration_space(int &nd,
         simplified_shape.reserve(nd);
         simplified_shape.insert(std::begin(simplified_shape), shape,
                                 shape + nd);
-        assert(simplified_shape.size() == static_cast<size_t>(nd));
+        assert(simplified_shape.size() == static_cast<std::size_t>(nd));
 
         simplified_src_strides.reserve(nd);
         simplified_src_strides.insert(std::end(simplified_src_strides),
                                       std::begin(src_strides),
                                       std::end(src_strides));
-        assert(simplified_src_strides.size() == static_cast<size_t>(nd));
+        assert(simplified_src_strides.size() == static_cast<std::size_t>(nd));
 
         simplified_dst_strides.reserve(nd);
         simplified_dst_strides.insert(std::end(simplified_dst_strides),
                                       std::begin(dst_strides),
                                       std::end(dst_strides));
-        assert(simplified_dst_strides.size() == static_cast<size_t>(nd));
+        assert(simplified_dst_strides.size() == static_cast<std::size_t>(nd));
 
         int contracted_nd = st_ns::simplify_iteration_two_strides(
             nd, simplified_shape.data(), simplified_src_strides.data(),
@@ -90,7 +89,7 @@ void simplify_iteration_space(int &nd,
         // Populate vectors
         simplified_shape.reserve(nd);
         simplified_shape.push_back(shape[0]);
-        assert(simplified_shape.size() == static_cast<size_t>(nd));
+        assert(simplified_shape.size() == static_cast<std::size_t>(nd));
 
         simplified_src_strides.reserve(nd);
         simplified_dst_strides.reserve(nd);
@@ -108,8 +107,8 @@ void simplify_iteration_space(int &nd,
             simplified_dst_strides.push_back(dst_strides[0]);
         }
 
-        assert(simplified_src_strides.size() == static_cast<size_t>(nd));
-        assert(simplified_dst_strides.size() == static_cast<size_t>(nd));
+        assert(simplified_src_strides.size() == static_cast<std::size_t>(nd));
+        assert(simplified_dst_strides.size() == static_cast<std::size_t>(nd));
     }
 }
 
@@ -136,25 +135,25 @@ void simplify_iteration_space_3(
         // and improve access pattern
         simplified_shape.reserve(nd);
         simplified_shape.insert(std::end(simplified_shape), shape, shape + nd);
-        assert(simplified_shape.size() == static_cast<size_t>(nd));
+        assert(simplified_shape.size() == static_cast<std::size_t>(nd));
 
         simplified_src1_strides.reserve(nd);
         simplified_src1_strides.insert(std::end(simplified_src1_strides),
                                        std::begin(src1_strides),
                                        std::end(src1_strides));
-        assert(simplified_src1_strides.size() == static_cast<size_t>(nd));
+        assert(simplified_src1_strides.size() == static_cast<std::size_t>(nd));
 
         simplified_src2_strides.reserve(nd);
         simplified_src2_strides.insert(std::end(simplified_src2_strides),
                                        std::begin(src2_strides),
                                        std::end(src2_strides));
-        assert(simplified_src2_strides.size() == static_cast<size_t>(nd));
+        assert(simplified_src2_strides.size() == static_cast<std::size_t>(nd));
 
         simplified_dst_strides.reserve(nd);
         simplified_dst_strides.insert(std::end(simplified_dst_strides),
                                       std::begin(dst_strides),
                                       std::end(dst_strides));
-        assert(simplified_dst_strides.size() == static_cast<size_t>(nd));
+        assert(simplified_dst_strides.size() == static_cast<std::size_t>(nd));
 
         int contracted_nd = st_ns::simplify_iteration_three_strides(
             nd, simplified_shape.data(), simplified_src1_strides.data(),
@@ -177,7 +176,7 @@ void simplify_iteration_space_3(
         // Populate vectors
         simplified_shape.reserve(nd);
         simplified_shape.push_back(shape[0]);
-        assert(simplified_shape.size() == static_cast<size_t>(nd));
+        assert(simplified_shape.size() == static_cast<std::size_t>(nd));
 
         simplified_src1_strides.reserve(nd);
         simplified_src2_strides.reserve(nd);
@@ -200,9 +199,9 @@ void simplify_iteration_space_3(
             simplified_dst_strides.push_back(dst_strides[0]);
         }
 
-        assert(simplified_src1_strides.size() == static_cast<size_t>(nd));
-        assert(simplified_src2_strides.size() == static_cast<size_t>(nd));
-        assert(simplified_dst_strides.size() == static_cast<size_t>(nd));
+        assert(simplified_src1_strides.size() == static_cast<std::size_t>(nd));
+        assert(simplified_src2_strides.size() == static_cast<std::size_t>(nd));
+        assert(simplified_dst_strides.size() == static_cast<std::size_t>(nd));
     }
 }
 } // namespace dpnp::extensions::py_internal

From 8be7efc684faa44b0a225a59aa79233a7a3a59d0 Mon Sep 17 00:00:00 2001
From: Anton Volkov <antonwolfy@gmail.com>
Date: Wed, 22 Oct 2025 15:14:07 +0200
Subject: [PATCH 05/19] Move definition of _result_typeid to the header to
 avoid undefined reference issue

---
 .../elementwise_functions.hpp                     |  5 ++---
 .../elementwise_functions_type_utils.cpp          | 15 ++-------------
 .../elementwise_functions_type_utils.hpp          | 11 +++++++++--
 3 files changed, 13 insertions(+), 18 deletions(-)

diff --git a/dpnp/backend/extensions/elementwise_functions/elementwise_functions.hpp b/dpnp/backend/extensions/elementwise_functions/elementwise_functions.hpp
index ac592475459..8132f0dad82 100644
--- a/dpnp/backend/extensions/elementwise_functions/elementwise_functions.hpp
+++ b/dpnp/backend/extensions/elementwise_functions/elementwise_functions.hpp
@@ -62,6 +62,8 @@ namespace td_ns = dpctl::tensor::type_dispatch;
 using dpctl::tensor::kernels::alignment_utils::is_aligned;
 using dpctl::tensor::kernels::alignment_utils::required_alignment;
 
+using type_utils::_result_typeid;
+
 /*! @brief Template implementing Python API for unary elementwise functions */
 template <typename output_typesT,
           typename contig_dispatchT,
@@ -261,9 +263,7 @@ py::object py_unary_ufunc_result_type(const py::dtype &input_dtype,
         throw py::value_error(e.what());
     }
 
-    using type_utils::_result_typeid;
     int dst_typeid = _result_typeid(src_typeid, output_types);
-
     if (dst_typeid < 0) {
         auto res = py::none();
         return py::cast<py::object>(res);
@@ -511,7 +511,6 @@ std::pair<py::object, py::object>
         throw py::value_error(e.what());
     }
 
-    using type_utils::_result_typeid;
     std::pair<int, int> dst_typeids = _result_typeid(src_typeid, output_types);
     int dst1_typeid = dst_typeids.first;
     int dst2_typeid = dst_typeids.second;
diff --git a/dpnp/backend/extensions/elementwise_functions/elementwise_functions_type_utils.cpp b/dpnp/backend/extensions/elementwise_functions/elementwise_functions_type_utils.cpp
index ed21c618069..62f7584a3e0 100644
--- a/dpnp/backend/extensions/elementwise_functions/elementwise_functions_type_utils.cpp
+++ b/dpnp/backend/extensions/elementwise_functions/elementwise_functions_type_utils.cpp
@@ -37,11 +37,11 @@
 // dpctl tensor headers
 #include "utils/type_dispatch.hpp"
 
+namespace dpnp::extensions::py_internal::type_utils
+{
 namespace py = pybind11;
 namespace td_ns = dpctl::tensor::type_dispatch;
 
-namespace dpnp::extensions::py_internal::type_utils
-{
 py::dtype _dtype_from_typenum(td_ns::typenum_t dst_typenum_t)
 {
     switch (dst_typenum_t) {
@@ -77,15 +77,4 @@ py::dtype _dtype_from_typenum(td_ns::typenum_t dst_typenum_t)
         throw py::value_error("Unrecognized dst_typeid");
     }
 }
-
-template <typename output_idT>
-output_idT _result_typeid(int arg_typeid, const output_idT *fn_output_id)
-{
-    if (arg_typeid < 0 || arg_typeid >= td_ns::num_types) {
-        throw py::value_error("Input typeid " + std::to_string(arg_typeid) +
-                              " is outside of expected bounds.");
-    }
-
-    return fn_output_id[arg_typeid];
-}
 } // namespace dpnp::extensions::py_internal::type_utils
diff --git a/dpnp/backend/extensions/elementwise_functions/elementwise_functions_type_utils.hpp b/dpnp/backend/extensions/elementwise_functions/elementwise_functions_type_utils.hpp
index 05903d5ba9e..1bb6fedd702 100644
--- a/dpnp/backend/extensions/elementwise_functions/elementwise_functions_type_utils.hpp
+++ b/dpnp/backend/extensions/elementwise_functions/elementwise_functions_type_utils.hpp
@@ -47,6 +47,13 @@ extern py::dtype _dtype_from_typenum(td_ns::typenum_t);
 /*! @brief Lookup typeid of the result from typeid of
  *         argument and the mapping table */
 template <typename output_idT>
-extern output_idT _result_typeid(int arg_typeid,
-                                 const output_idT *fn_output_id);
+output_idT _result_typeid(int arg_typeid, const output_idT *fn_output_id)
+{
+    if (arg_typeid < 0 || arg_typeid >= td_ns::num_types) {
+        throw py::value_error("Input typeid " + std::to_string(arg_typeid) +
+                              " is outside of expected bounds.");
+    }
+
+    return fn_output_id[arg_typeid];
+}
 } // namespace dpnp::extensions::py_internal::type_utils

From a46c76f79266eff32a6c1e7fb4122a2938a04777 Mon Sep 17 00:00:00 2001
From: Anton Volkov <antonwolfy@gmail.com>
Date: Wed, 22 Oct 2025 15:39:56 +0200
Subject: [PATCH 06/19] Extend type_dispatch namespace with
 TypeMapTwoResultsEntry and DefaultTwoResultsEntry

---
 .../type_dispatch_building.hpp                | 61 +++++++++++++++++++
 1 file changed, 61 insertions(+)
 create mode 100644 dpnp/backend/extensions/elementwise_functions/type_dispatch_building.hpp

diff --git a/dpnp/backend/extensions/elementwise_functions/type_dispatch_building.hpp b/dpnp/backend/extensions/elementwise_functions/type_dispatch_building.hpp
new file mode 100644
index 00000000000..4813daf2bf6
--- /dev/null
+++ b/dpnp/backend/extensions/elementwise_functions/type_dispatch_building.hpp
@@ -0,0 +1,61 @@
+//*****************************************************************************
+// Copyright (c) 2025, Intel Corporation
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// maxification, are permitted provided that the following conditions are met:
+// - Redistributions of source code must retain the above copyright notice,
+//   this list of conditions and the following disclaimer.
+// - Redistributions in binary form must reproduce the above copyright notice,
+//   this list of conditions and the following disclaimer in the documentation
+//   and/or other materials provided with the distribution.
+// - Neither the name of the copyright holder nor the names of its contributors
+//   may be used to endorse or promote products derived from this software
+//   without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+// THE POSSIBILITY OF SUCH DAMAGE.
+//*****************************************************************************
+
+#pragma once
+
+#include <type_traits>
+
+#include "utils/type_dispatch.hpp"
+
+namespace dpnp::extensions::py_internal::type_dispatch
+{
+/**
+ * Extends dpctl::tensor::type_dispatch::TypeMapResultEntry helper structure
+ * with support of the two result types.
+ */
+template <typename Ty,
+          typename ArgTy,
+          typename ResTy1 = ArgTy,
+          typename ResTy2 = ArgTy>
+struct TypeMapTwoResultsEntry : std::bool_constant<std::is_same_v<Ty, ArgTy>>
+{
+    using result_type1 = ResTy1;
+    using result_type2 = ResTy2;
+};
+
+/**
+ * Extends dpctl::tensor::type_dispatch::DefaultResultEntry helper structure
+ * with support of the two result types.
+ */
+template <typename Ty = void>
+struct DefaultTwoResultsEntry : std::true_type
+{
+    using result_type1 = Ty;
+    using result_type2 = Ty;
+};
+} // namespace dpnp::extensions::py_internal::type_dispatch

From bf63ef4dc9a4f5db91301e413cc12b0ddd6382bc Mon Sep 17 00:00:00 2001
From: Anton Volkov <antonwolfy@gmail.com>
Date: Wed, 22 Oct 2025 16:17:09 +0200
Subject: [PATCH 07/19] Extend dpctl::tensor::kernels::elementwise_common
 namespace with support of two output arrays

---
 .../elementwise_functions/common.hpp          | 480 ++++++++++++++++++
 1 file changed, 480 insertions(+)
 create mode 100644 dpnp/backend/extensions/elementwise_functions/common.hpp

diff --git a/dpnp/backend/extensions/elementwise_functions/common.hpp b/dpnp/backend/extensions/elementwise_functions/common.hpp
new file mode 100644
index 00000000000..406575cd00c
--- /dev/null
+++ b/dpnp/backend/extensions/elementwise_functions/common.hpp
@@ -0,0 +1,480 @@
+//*****************************************************************************
+// Copyright (c) 2025, Intel Corporation
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// maxification, are permitted provided that the following conditions are met:
+// - Redistributions of source code must retain the above copyright notice,
+//   this list of conditions and the following disclaimer.
+// - Redistributions in binary form must reproduce the above copyright notice,
+//   this list of conditions and the following disclaimer in the documentation
+//   and/or other materials provided with the distribution.
+// - Neither the name of the copyright holder nor the names of its contributors
+//   may be used to endorse or promote products derived from this software
+//   without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+// THE POSSIBILITY OF SUCH DAMAGE.
+//*****************************************************************************
+
+#pragma once
+
+#include <cstddef>
+#include <cstdint>
+#include <utility>
+#include <vector>
+
+#include <sycl/sycl.hpp>
+
+// dpctl tensor headers
+#include "kernels/alignment.hpp"
+#include "kernels/elementwise_functions/common.hpp"
+#include "utils/sycl_utils.hpp"
+
+namespace dpnp::extensions::py_internal::elementwise_common
+{
+using dpctl::tensor::kernels::alignment_utils::
+    disabled_sg_loadstore_wrapper_krn;
+using dpctl::tensor::kernels::alignment_utils::is_aligned;
+using dpctl::tensor::kernels::alignment_utils::required_alignment;
+
+using dpctl::tensor::kernels::elementwise_common::select_lws;
+
+using dpctl::tensor::sycl_utils::sub_group_load;
+using dpctl::tensor::sycl_utils::sub_group_store;
+
+/**
+ * @brief Functor for evaluation of a unary function with two output arrays on
+ * contiguous arrays.
+ *
+ * @note It extends UnaryContigFunctor from
+ * dpctl::tensor::kernels::elementwise_common namespace.
+ */
+template <typename argT,
+          typename resT1,
+          typename resT2,
+          typename UnaryTwoOutputsOpT,
+          std::uint8_t vec_sz = 4u,
+          std::uint8_t n_vecs = 2u,
+          bool enable_sg_loadstore = true>
+struct UnaryTwoOutputsContigFunctor
+{
+private:
+    const argT *in = nullptr;
+    resT1 *out1 = nullptr;
+    resT2 *out2 = nullptr;
+    std::size_t nelems_;
+
+public:
+    UnaryTwoOutputsContigFunctor(const argT *inp,
+                                 resT1 *res1,
+                                 resT2 *res2,
+                                 const std::size_t n_elems)
+        : in(inp), out1(res1), out2(res2), nelems_(n_elems)
+    {
+    }
+
+    void operator()(sycl::nd_item<1> ndit) const
+    {
+        static constexpr std::uint8_t elems_per_wi = n_vecs * vec_sz;
+        UnaryTwoOutputsOpT op{};
+        /* Each work-item processes vec_sz elements, contiguous in memory */
+        /* NOTE: work-group size must be divisible by sub-group size */
+
+        if constexpr (enable_sg_loadstore &&
+                      UnaryTwoOutputsOpT::is_constant::value) {
+            // value of operator is known to be a known constant
+            constexpr resT1 const_val1 = UnaryTwoOutputsOpT::constant_value1;
+            constexpr resT2 const_val2 = UnaryTwoOutputsOpT::constant_value2;
+
+            auto sg = ndit.get_sub_group();
+            const std::uint16_t sgSize = sg.get_max_local_range()[0];
+
+            const std::size_t base =
+                elems_per_wi * (ndit.get_group(0) * ndit.get_local_range(0) +
+                                sg.get_group_id()[0] * sgSize);
+            if (base + elems_per_wi * sgSize < nelems_) {
+                static constexpr sycl::vec<resT1, vec_sz> res1_vec(const_val1);
+                static constexpr sycl::vec<resT2, vec_sz> res2_vec(const_val2);
+#pragma unroll
+                for (std::uint8_t it = 0; it < elems_per_wi; it += vec_sz) {
+                    const std::size_t offset = base + it * sgSize;
+                    auto out1_multi_ptr = sycl::address_space_cast<
+                        sycl::access::address_space::global_space,
+                        sycl::access::decorated::yes>(&out1[offset]);
+                    auto out2_multi_ptr = sycl::address_space_cast<
+                        sycl::access::address_space::global_space,
+                        sycl::access::decorated::yes>(&out2[offset]);
+
+                    sub_group_store<vec_sz>(sg, res1_vec, out1_multi_ptr);
+                    sub_group_store<vec_sz>(sg, res2_vec, out2_multi_ptr);
+                }
+            }
+            else {
+                const std::size_t lane_id = sg.get_local_id()[0];
+                for (std::size_t k = base + lane_id; k < nelems_; k += sgSize) {
+                    out1[k] = const_val1;
+                    out2[k] = const_val2;
+                }
+            }
+        }
+        else if constexpr (enable_sg_loadstore &&
+                           UnaryTwoOutputsOpT::supports_sg_loadstore::value &&
+                           UnaryTwoOutputsOpT::supports_vec::value &&
+                           (vec_sz > 1))
+        {
+            auto sg = ndit.get_sub_group();
+            const std::uint16_t sgSize = sg.get_max_local_range()[0];
+
+            const std::size_t base =
+                elems_per_wi * (ndit.get_group(0) * ndit.get_local_range(0) +
+                                sg.get_group_id()[0] * sgSize);
+            if (base + elems_per_wi * sgSize < nelems_) {
+#pragma unroll
+                for (std::uint8_t it = 0; it < elems_per_wi; it += vec_sz) {
+                    const std::size_t offset = base + it * sgSize;
+                    auto in_multi_ptr = sycl::address_space_cast<
+                        sycl::access::address_space::global_space,
+                        sycl::access::decorated::yes>(&in[offset]);
+                    auto out1_multi_ptr = sycl::address_space_cast<
+                        sycl::access::address_space::global_space,
+                        sycl::access::decorated::yes>(&out1[offset]);
+                    auto out2_multi_ptr = sycl::address_space_cast<
+                        sycl::access::address_space::global_space,
+                        sycl::access::decorated::yes>(&out2[offset]);
+
+                    const sycl::vec<argT, vec_sz> x =
+                        sub_group_load<vec_sz>(sg, in_multi_ptr);
+                    sycl::vec<resT2, vec_sz> res2_vec = {};
+                    const sycl::vec<resT1, vec_sz> res1_vec = op(x, res2_vec);
+                    sub_group_store<vec_sz>(sg, res1_vec, out1_multi_ptr);
+                    sub_group_store<vec_sz>(sg, res2_vec, out2_multi_ptr);
+                }
+            }
+            else {
+                const std::size_t lane_id = sg.get_local_id()[0];
+                for (std::size_t k = base + lane_id; k < nelems_; k += sgSize) {
+                    // scalar call
+                    out1[k] = op(in[k], out2[k]);
+                }
+            }
+        }
+        else if constexpr (enable_sg_loadstore &&
+                           UnaryTwoOutputsOpT::supports_sg_loadstore::value &&
+                           std::is_same_v<resT1, argT>)
+        {
+            // default: use scalar-value function
+
+            auto sg = ndit.get_sub_group();
+            const std::uint16_t sgSize = sg.get_max_local_range()[0];
+            const std::size_t base =
+                elems_per_wi * (ndit.get_group(0) * ndit.get_local_range(0) +
+                                sg.get_group_id()[0] * sgSize);
+
+            if (base + elems_per_wi * sgSize < nelems_) {
+#pragma unroll
+                for (std::uint8_t it = 0; it < elems_per_wi; it += vec_sz) {
+                    const std::size_t offset = base + it * sgSize;
+                    auto in_multi_ptr = sycl::address_space_cast<
+                        sycl::access::address_space::global_space,
+                        sycl::access::decorated::yes>(&in[offset]);
+                    auto out1_multi_ptr = sycl::address_space_cast<
+                        sycl::access::address_space::global_space,
+                        sycl::access::decorated::yes>(&out1[offset]);
+                    auto out2_multi_ptr = sycl::address_space_cast<
+                        sycl::access::address_space::global_space,
+                        sycl::access::decorated::yes>(&out2[offset]);
+
+                    sycl::vec<argT, vec_sz> arg_vec =
+                        sub_group_load<vec_sz>(sg, in_multi_ptr);
+                    sycl::vec<resT2, vec_sz> res2_vec = {};
+#pragma unroll
+                    for (std::uint32_t k = 0; k < vec_sz; ++k) {
+                        arg_vec[k] = op(arg_vec[k], res2_vec[k]);
+                    }
+                    sub_group_store<vec_sz>(sg, arg_vec, out1_multi_ptr);
+                    sub_group_store<vec_sz>(sg, res2_vec, out2_multi_ptr);
+                }
+            }
+            else {
+                const std::size_t lane_id = sg.get_local_id()[0];
+                for (std::size_t k = base + lane_id; k < nelems_; k += sgSize) {
+                    out1[k] = op(in[k], out2[k]);
+                }
+            }
+        }
+        else if constexpr (enable_sg_loadstore &&
+                           UnaryTwoOutputsOpT::supports_sg_loadstore::value)
+        {
+            // default: use scalar-value function
+
+            auto sg = ndit.get_sub_group();
+            const std::uint16_t sgSize = sg.get_max_local_range()[0];
+            const std::size_t base =
+                elems_per_wi * (ndit.get_group(0) * ndit.get_local_range(0) +
+                                sg.get_group_id()[0] * sgSize);
+
+            if (base + elems_per_wi * sgSize < nelems_) {
+#pragma unroll
+                for (std::uint8_t it = 0; it < elems_per_wi; it += vec_sz) {
+                    const std::size_t offset = base + it * sgSize;
+                    auto in_multi_ptr = sycl::address_space_cast<
+                        sycl::access::address_space::global_space,
+                        sycl::access::decorated::yes>(&in[offset]);
+                    auto out1_multi_ptr = sycl::address_space_cast<
+                        sycl::access::address_space::global_space,
+                        sycl::access::decorated::yes>(&out1[offset]);
+                    auto out2_multi_ptr = sycl::address_space_cast<
+                        sycl::access::address_space::global_space,
+                        sycl::access::decorated::yes>(&out2[offset]);
+
+                    const sycl::vec<argT, vec_sz> arg_vec =
+                        sub_group_load<vec_sz>(sg, in_multi_ptr);
+                    sycl::vec<resT1, vec_sz> res1_vec = {};
+                    sycl::vec<resT2, vec_sz> res2_vec = {};
+#pragma unroll
+                    for (std::uint8_t k = 0; k < vec_sz; ++k) {
+                        res1_vec[k] = op(arg_vec[k], res2_vec[k]);
+                    }
+                    sub_group_store<vec_sz>(sg, res1_vec, out1_multi_ptr);
+                    sub_group_store<vec_sz>(sg, res2_vec, out2_multi_ptr);
+                }
+            }
+            else {
+                const std::size_t lane_id = sg.get_local_id()[0];
+                for (std::size_t k = base + lane_id; k < nelems_; k += sgSize) {
+                    out1[k] = op(in[k], out2[k]);
+                }
+            }
+        }
+        else {
+            const std::uint16_t sgSize =
+                ndit.get_sub_group().get_local_range()[0];
+            const std::size_t gid = ndit.get_global_linear_id();
+            const std::uint16_t elems_per_sg = sgSize * elems_per_wi;
+
+            const std::size_t start =
+                (gid / sgSize) * (elems_per_sg - sgSize) + gid;
+            const std::size_t end = std::min(nelems_, start + elems_per_sg);
+            for (std::size_t offset = start; offset < end; offset += sgSize) {
+                out1[offset] = op(in[offset], out2[offset]);
+            }
+        }
+    }
+};
+
+/**
+ * @brief Functor for evaluation of a unary function with two output arrays on
+ * strided data.
+ *
+ * @note It extends UnaryStridedFunctor from
+ * dpctl::tensor::kernels::elementwise_common namespace.
+ */
+template <typename argT,
+          typename resT1,
+          typename resT2,
+          typename IndexerT,
+          typename UnaryTwoOutputsOpT>
+struct UnaryTwoOutputsStridedFunctor
+{
+private:
+    const argT *inp_ = nullptr;
+    resT1 *res1_ = nullptr;
+    resT2 *res2_ = nullptr;
+    IndexerT inp_out_indexer_;
+
+public:
+    UnaryTwoOutputsStridedFunctor(const argT *inp_p,
+                                  resT1 *res1_p,
+                                  resT2 *res2_p,
+                                  const IndexerT &inp_out_indexer)
+        : inp_(inp_p), res1_(res1_p), res2_(res2_p),
+          inp_out_indexer_(inp_out_indexer)
+    {
+    }
+
+    void operator()(sycl::id<1> wid) const
+    {
+        const auto &offsets_ = inp_out_indexer_(wid.get(0));
+        const ssize_t &inp_offset = offsets_.get_first_offset();
+        const ssize_t &res1_offset = offsets_.get_second_offset();
+        const ssize_t &res2_offset = offsets_.get_third_offset();
+
+        UnaryTwoOutputsOpT op{};
+
+        res1_[res1_offset] = op(inp_[inp_offset], res2_[res2_offset]);
+    }
+};
+
+/**
+ * @brief Function to submit a kernel for unary functor with two output arrays
+ * on contiguous arrays.
+ *
+ * @note It extends unary_contig_impl from
+ * dpctl::tensor::kernels::elementwise_common namespace.
+ */
+template <typename argTy,
+          template <typename T>
+          class UnaryTwoOutputsType,
+          template <typename A,
+                    typename R1,
+                    typename R2,
+                    std::uint8_t vs,
+                    std::uint8_t nv,
+                    bool enable>
+          class UnaryTwoOutputsContigFunctorT,
+          template <typename A,
+                    typename R1,
+                    typename R2,
+                    std::uint8_t vs,
+                    std::uint8_t nv>
+          class kernel_name,
+          std::uint8_t vec_sz = 4u,
+          std::uint8_t n_vecs = 2u>
+sycl::event
+    unary_two_outputs_contig_impl(sycl::queue &exec_q,
+                                  std::size_t nelems,
+                                  const char *arg_p,
+                                  char *res1_p,
+                                  char *res2_p,
+                                  const std::vector<sycl::event> &depends = {})
+{
+    static constexpr std::uint8_t elems_per_wi = n_vecs * vec_sz;
+    const std::size_t n_work_items_needed = nelems / elems_per_wi;
+    const std::size_t lws =
+        select_lws(exec_q.get_device(), n_work_items_needed);
+
+    const std::size_t n_groups =
+        ((nelems + lws * elems_per_wi - 1) / (lws * elems_per_wi));
+    const auto gws_range = sycl::range<1>(n_groups * lws);
+    const auto lws_range = sycl::range<1>(lws);
+
+    using resTy1 = typename UnaryTwoOutputsType<argTy>::value_type1;
+    using resTy2 = typename UnaryTwoOutputsType<argTy>::value_type2;
+    using BaseKernelName = kernel_name<argTy, resTy1, resTy2, vec_sz, n_vecs>;
+
+    const argTy *arg_tp = reinterpret_cast<const argTy *>(arg_p);
+    resTy1 *res1_tp = reinterpret_cast<resTy1 *>(res1_p);
+    resTy2 *res2_tp = reinterpret_cast<resTy2 *>(res2_p);
+
+    sycl::event comp_ev = exec_q.submit([&](sycl::handler &cgh) {
+        cgh.depends_on(depends);
+
+        if (is_aligned<required_alignment>(arg_p) &&
+            is_aligned<required_alignment>(res1_p) &&
+            is_aligned<required_alignment>(res2_p))
+        {
+            static constexpr bool enable_sg_loadstore = true;
+            using KernelName = BaseKernelName;
+            using Impl =
+                UnaryTwoOutputsContigFunctorT<argTy, resTy1, resTy2, vec_sz,
+                                              n_vecs, enable_sg_loadstore>;
+
+            cgh.parallel_for<KernelName>(
+                sycl::nd_range<1>(gws_range, lws_range),
+                Impl(arg_tp, res1_tp, res2_tp, nelems));
+        }
+        else {
+            static constexpr bool disable_sg_loadstore = false;
+            using KernelName =
+                disabled_sg_loadstore_wrapper_krn<BaseKernelName>;
+            using Impl =
+                UnaryTwoOutputsContigFunctorT<argTy, resTy1, resTy2, vec_sz,
+                                              n_vecs, disable_sg_loadstore>;
+
+            cgh.parallel_for<KernelName>(
+                sycl::nd_range<1>(gws_range, lws_range),
+                Impl(arg_tp, res1_tp, res2_tp, nelems));
+        }
+    });
+
+    return comp_ev;
+}
+
+/**
+ * @brief Function to submit a kernel for unary functor with two output arrays
+ * on strided data.
+ *
+ * @note It extends unary_strided_impl from
+ * dpctl::tensor::kernels::elementwise_common namespace.
+ */
+template <typename argTy,
+          template <typename T>
+          class UnaryTwoOutputsType,
+          template <typename A, typename R1, typename R2, typename I>
+          class UnaryTwoOutputsStridedFunctorT,
+          template <typename A, typename R1, typename R2, typename I>
+          class kernel_name>
+sycl::event unary_two_outputs_strided_impl(
+    sycl::queue &exec_q,
+    std::size_t nelems,
+    int nd,
+    const ssize_t *shape_and_strides,
+    const char *arg_p,
+    ssize_t arg_offset,
+    char *res1_p,
+    ssize_t res1_offset,
+    char *res2_p,
+    ssize_t res2_offset,
+    const std::vector<sycl::event> &depends,
+    const std::vector<sycl::event> &additional_depends)
+{
+    sycl::event comp_ev = exec_q.submit([&](sycl::handler &cgh) {
+        cgh.depends_on(depends);
+        cgh.depends_on(additional_depends);
+
+        using res1Ty = typename UnaryTwoOutputsType<argTy>::value_type1;
+        using res2Ty = typename UnaryTwoOutputsType<argTy>::value_type2;
+        using IndexerT =
+            typename dpctl::tensor::offset_utils::ThreeOffsets_StridedIndexer;
+
+        const IndexerT indexer{nd, arg_offset, res1_offset, res2_offset,
+                               shape_and_strides};
+
+        const argTy *arg_tp = reinterpret_cast<const argTy *>(arg_p);
+        res1Ty *res1_tp = reinterpret_cast<res1Ty *>(res1_p);
+        res2Ty *res2_tp = reinterpret_cast<res2Ty *>(res2_p);
+
+        using Impl =
+            UnaryTwoOutputsStridedFunctorT<argTy, res1Ty, res2Ty, IndexerT>;
+
+        cgh.parallel_for<kernel_name<argTy, res1Ty, res2Ty, IndexerT>>(
+            {nelems}, Impl(arg_tp, res1_tp, res2_tp, indexer));
+    });
+    return comp_ev;
+}
+
+// Typedefs for function pointers
+
+typedef sycl::event (*unary_two_outputs_contig_impl_fn_ptr_t)(
+    sycl::queue &,
+    std::size_t,
+    const char *,
+    char *,
+    char *,
+    const std::vector<sycl::event> &);
+
+typedef sycl::event (*unary_two_outputs_strided_impl_fn_ptr_t)(
+    sycl::queue &,
+    std::size_t,
+    int,
+    const ssize_t *,
+    const char *,
+    ssize_t,
+    char *,
+    ssize_t,
+    char *,
+    ssize_t,
+    const std::vector<sycl::event> &,
+    const std::vector<sycl::event> &);
+
+} // namespace dpnp::extensions::py_internal::elementwise_common

From 2b4f5273eca42d514addb796640a226c34ce22bb Mon Sep 17 00:00:00 2001
From: Anton Volkov <antonwolfy@gmail.com>
Date: Wed, 22 Oct 2025 16:32:51 +0200
Subject: [PATCH 08/19] Add frexp implementation to ufunc extension

---
 dpnp/backend/extensions/ufunc/CMakeLists.txt  |   1 +
 .../ufunc/elementwise_functions/common.cpp    |   2 +
 .../ufunc/elementwise_functions/frexp.cpp     | 261 ++++++++++++++++++
 .../ufunc/elementwise_functions/frexp.hpp     |  38 +++
 .../kernels/elementwise_functions/frexp.hpp   |  53 ++++
 5 files changed, 355 insertions(+)
 create mode 100644 dpnp/backend/extensions/ufunc/elementwise_functions/frexp.cpp
 create mode 100644 dpnp/backend/extensions/ufunc/elementwise_functions/frexp.hpp
 create mode 100644 dpnp/backend/kernels/elementwise_functions/frexp.hpp

diff --git a/dpnp/backend/extensions/ufunc/CMakeLists.txt b/dpnp/backend/extensions/ufunc/CMakeLists.txt
index 403eb144d77..62e8249b508 100644
--- a/dpnp/backend/extensions/ufunc/CMakeLists.txt
+++ b/dpnp/backend/extensions/ufunc/CMakeLists.txt
@@ -38,6 +38,7 @@ set(_elementwise_sources
     ${CMAKE_CURRENT_SOURCE_DIR}/elementwise_functions/fmax.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/elementwise_functions/fmin.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/elementwise_functions/fmod.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/elementwise_functions/frexp.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/elementwise_functions/gcd.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/elementwise_functions/heaviside.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/elementwise_functions/i0.cpp
diff --git a/dpnp/backend/extensions/ufunc/elementwise_functions/common.cpp b/dpnp/backend/extensions/ufunc/elementwise_functions/common.cpp
index 23283d94bed..f59f6c709a7 100644
--- a/dpnp/backend/extensions/ufunc/elementwise_functions/common.cpp
+++ b/dpnp/backend/extensions/ufunc/elementwise_functions/common.cpp
@@ -37,6 +37,7 @@
 #include "fmax.hpp"
 #include "fmin.hpp"
 #include "fmod.hpp"
+#include "frexp.hpp"
 #include "gcd.hpp"
 #include "heaviside.hpp"
 #include "i0.hpp"
@@ -68,6 +69,7 @@ void init_elementwise_functions(py::module_ m)
     init_fmax(m);
     init_fmin(m);
     init_fmod(m);
+    init_frexp(m);
     init_gcd(m);
     init_heaviside(m);
     init_i0(m);
diff --git a/dpnp/backend/extensions/ufunc/elementwise_functions/frexp.cpp b/dpnp/backend/extensions/ufunc/elementwise_functions/frexp.cpp
new file mode 100644
index 00000000000..5e5a8929012
--- /dev/null
+++ b/dpnp/backend/extensions/ufunc/elementwise_functions/frexp.cpp
@@ -0,0 +1,261 @@
+//*****************************************************************************
+// Copyright (c) 2025, Intel Corporation
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// maxification, are permitted provided that the following conditions are met:
+// - Redistributions of source code must retain the above copyright notice,
+//   this list of conditions and the following disclaimer.
+// - Redistributions in binary form must reproduce the above copyright notice,
+//   this list of conditions and the following disclaimer in the documentation
+//   and/or other materials provided with the distribution.
+// - Neither the name of the copyright holder nor the names of its contributors
+//   may be used to endorse or promote products derived from this software
+//   without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+// THE POSSIBILITY OF SUCH DAMAGE.
+//*****************************************************************************
+
+#include <cstdint>
+#include <type_traits>
+#include <utility>
+#include <vector>
+
+#include <sycl/sycl.hpp>
+
+#include "dpctl4pybind11.hpp"
+
+#include "frexp.hpp"
+#include "kernels/elementwise_functions/frexp.hpp"
+#include "populate.hpp"
+
+// include a local copy of elementwise common header from dpctl tensor:
+// dpctl/tensor/libtensor/source/elementwise_functions/elementwise_functions.hpp
+// TODO: replace by including dpctl header once available
+#include "../../elementwise_functions/elementwise_functions.hpp"
+
+#include "../../elementwise_functions/common.hpp"
+#include "../../elementwise_functions/type_dispatch_building.hpp"
+
+// utils extension header
+#include "ext/common.hpp"
+
+// dpctl tensor headers
+#include "kernels/elementwise_functions/common.hpp"
+#include "utils/type_dispatch.hpp"
+
+namespace dpnp::extensions::ufunc
+{
+namespace py = pybind11;
+namespace py_int = dpnp::extensions::py_internal;
+
+namespace impl
+{
+namespace ew_cmn_ns = dpnp::extensions::py_internal::elementwise_common;
+namespace td_int_ns = py_int::type_dispatch;
+namespace td_ns = dpctl::tensor::type_dispatch;
+
+using dpnp::kernels::frexp::FrexpFunctor;
+using ext::common::init_dispatch_vector;
+
+template <typename T>
+struct FrexpOutputType
+{
+    using table_type = std::disjunction< // disjunction is C++17
+                                         // feature, supported by DPC++
+        td_int_ns::
+            TypeMapTwoResultsEntry<T, sycl::half, sycl::half, std::int32_t>,
+        td_int_ns::TypeMapTwoResultsEntry<T, float, float, std::int32_t>,
+        td_int_ns::TypeMapTwoResultsEntry<T, double, double, std::int32_t>,
+        td_int_ns::DefaultTwoResultsEntry<void>>;
+    using value_type1 = typename table_type::result_type1;
+    using value_type2 = typename table_type::result_type2;
+};
+
+// contiguous implementation
+
+template <typename argTy,
+          typename resTy1 = argTy,
+          typename resTy2 = argTy,
+          std::uint8_t vec_sz = 4u,
+          std::uint8_t n_vecs = 2u,
+          bool enable_sg_loadstore = true>
+using FrexpContigFunctor =
+    ew_cmn_ns::UnaryTwoOutputsContigFunctor<argTy,
+                                            resTy1,
+                                            resTy2,
+                                            FrexpFunctor<argTy, resTy1, resTy2>,
+                                            vec_sz,
+                                            n_vecs,
+                                            enable_sg_loadstore>;
+
+// strided implementation
+
+template <typename argTy, typename resTy1, typename resTy2, typename IndexerT>
+using FrexpStridedFunctor = ew_cmn_ns::UnaryTwoOutputsStridedFunctor<
+    argTy,
+    resTy1,
+    resTy2,
+    IndexerT,
+    FrexpFunctor<argTy, resTy1, resTy2>>;
+
+template <typename T1,
+          typename T2,
+          typename T3,
+          unsigned int vec_sz,
+          unsigned int n_vecs>
+class frexp_contig_kernel;
+
+template <typename argTy>
+sycl::event frexp_contig_impl(sycl::queue &exec_q,
+                              size_t nelems,
+                              const char *arg_p,
+                              char *res1_p,
+                              char *res2_p,
+                              const std::vector<sycl::event> &depends = {})
+{
+    return ew_cmn_ns::unary_two_outputs_contig_impl<
+        argTy, FrexpOutputType, FrexpContigFunctor, frexp_contig_kernel>(
+        exec_q, nelems, arg_p, res1_p, res2_p, depends);
+}
+
+template <typename fnT, typename T>
+struct FrexpContigFactory
+{
+    fnT get()
+    {
+        if constexpr (std::is_same_v<typename FrexpOutputType<T>::value_type1,
+                                     void> ||
+                      std::is_same_v<typename FrexpOutputType<T>::value_type2,
+                                     void>)
+        {
+            fnT fn = nullptr;
+            return fn;
+        }
+        else {
+            fnT fn = frexp_contig_impl<T>;
+            return fn;
+        }
+    }
+};
+
+template <typename T1, typename T2, typename T3, typename T4>
+class frexp_strided_kernel;
+
+template <typename argTy>
+sycl::event
+    frexp_strided_impl(sycl::queue &exec_q,
+                       size_t nelems,
+                       int nd,
+                       const ssize_t *shape_and_strides,
+                       const char *arg_p,
+                       ssize_t arg_offset,
+                       char *res1_p,
+                       ssize_t res1_offset,
+                       char *res2_p,
+                       ssize_t res2_offset,
+                       const std::vector<sycl::event> &depends,
+                       const std::vector<sycl::event> &additional_depends)
+{
+    return ew_cmn_ns::unary_two_outputs_strided_impl<
+        argTy, FrexpOutputType, FrexpStridedFunctor, frexp_strided_kernel>(
+        exec_q, nelems, nd, shape_and_strides, arg_p, arg_offset, res1_p,
+        res1_offset, res2_p, res2_offset, depends, additional_depends);
+}
+
+template <typename fnT, typename T>
+struct FrexpStridedFactory
+{
+    fnT get()
+    {
+        if constexpr (std::is_same_v<typename FrexpOutputType<T>::value_type1,
+                                     void> ||
+                      std::is_same_v<typename FrexpOutputType<T>::value_type2,
+                                     void>)
+        {
+            fnT fn = nullptr;
+            return fn;
+        }
+        else {
+            fnT fn = frexp_strided_impl<T>;
+            return fn;
+        }
+    }
+};
+
+template <typename fnT, typename T>
+struct FrexpTypeMapFactory
+{
+    /*! @brief get typeid for output type of sycl::frexp(T x) */
+    std::enable_if_t<std::is_same<fnT, std::pair<int, int>>::value,
+                     std::pair<int, int>>
+        get()
+    {
+        using rT1 = typename FrexpOutputType<T>::value_type1;
+        using rT2 = typename FrexpOutputType<T>::value_type2;
+        return std::make_pair(td_ns::GetTypeid<rT1>{}.get(),
+                              td_ns::GetTypeid<rT2>{}.get());
+    }
+};
+
+using ew_cmn_ns::unary_two_outputs_contig_impl_fn_ptr_t;
+using ew_cmn_ns::unary_two_outputs_strided_impl_fn_ptr_t;
+
+static unary_two_outputs_contig_impl_fn_ptr_t
+    frexp_contig_dispatch_vector[td_ns::num_types];
+static std::pair<int, int> frexp_output_typeid_vector[td_ns::num_types];
+static unary_two_outputs_strided_impl_fn_ptr_t
+    frexp_strided_dispatch_vector[td_ns::num_types];
+
+void populate_frexp_dispatch_vectors(void)
+{
+    init_dispatch_vector<unary_two_outputs_contig_impl_fn_ptr_t,
+                         FrexpContigFactory>(frexp_contig_dispatch_vector);
+    init_dispatch_vector<unary_two_outputs_strided_impl_fn_ptr_t,
+                         FrexpStridedFactory>(frexp_strided_dispatch_vector);
+    init_dispatch_vector<std::pair<int, int>, FrexpTypeMapFactory>(
+        frexp_output_typeid_vector);
+};
+
+// MACRO_POPULATE_DISPATCH_TABLES(ldexp);
+} // namespace impl
+
+void init_frexp(py::module_ m)
+{
+    using arrayT = dpctl::tensor::usm_ndarray;
+    using event_vecT = std::vector<sycl::event>;
+    {
+        impl::populate_frexp_dispatch_vectors();
+        using impl::frexp_contig_dispatch_vector;
+        using impl::frexp_output_typeid_vector;
+        using impl::frexp_strided_dispatch_vector;
+
+        auto frexp_pyapi = [&](const arrayT &src, const arrayT &dst1,
+                               const arrayT &dst2, sycl::queue &exec_q,
+                               const event_vecT &depends = {}) {
+            return py_int::py_unary_two_outputs_ufunc(
+                src, dst1, dst2, exec_q, depends, frexp_output_typeid_vector,
+                frexp_contig_dispatch_vector, frexp_strided_dispatch_vector);
+        };
+        m.def("_frexp", frexp_pyapi, "", py::arg("src"), py::arg("dst1"),
+              py::arg("dst2"), py::arg("sycl_queue"),
+              py::arg("depends") = py::list());
+
+        auto frexp_result_type_pyapi = [&](const py::dtype &dtype) {
+            return py_int::py_unary_two_outputs_ufunc_result_type(
+                dtype, frexp_output_typeid_vector);
+        };
+        m.def("_frexp_result_type", frexp_result_type_pyapi);
+    }
+}
+} // namespace dpnp::extensions::ufunc
diff --git a/dpnp/backend/extensions/ufunc/elementwise_functions/frexp.hpp b/dpnp/backend/extensions/ufunc/elementwise_functions/frexp.hpp
new file mode 100644
index 00000000000..e87f63ce1cb
--- /dev/null
+++ b/dpnp/backend/extensions/ufunc/elementwise_functions/frexp.hpp
@@ -0,0 +1,38 @@
+//*****************************************************************************
+// Copyright (c) 2025, Intel Corporation
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+// - Redistributions of source code must retain the above copyright notice,
+//   this list of conditions and the following disclaimer.
+// - Redistributions in binary form must reproduce the above copyright notice,
+//   this list of conditions and the following disclaimer in the documentation
+//   and/or other materials provided with the distribution.
+// - Neither the name of the copyright holder nor the names of its contributors
+//   may be used to endorse or promote products derived from this software
+//   without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+// THE POSSIBILITY OF SUCH DAMAGE.
+//*****************************************************************************
+
+#pragma once
+
+#include <pybind11/pybind11.h>
+
+namespace py = pybind11;
+
+namespace dpnp::extensions::ufunc
+{
+void init_frexp(py::module_ m);
+} // namespace dpnp::extensions::ufunc
diff --git a/dpnp/backend/kernels/elementwise_functions/frexp.hpp b/dpnp/backend/kernels/elementwise_functions/frexp.hpp
new file mode 100644
index 00000000000..8ab800fef15
--- /dev/null
+++ b/dpnp/backend/kernels/elementwise_functions/frexp.hpp
@@ -0,0 +1,53 @@
+//*****************************************************************************
+// Copyright (c) 2025, Intel Corporation
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+// - Redistributions of source code must retain the above copyright notice,
+//   this list of conditions and the following disclaimer.
+// - Redistributions in binary form must reproduce the above copyright notice,
+//   this list of conditions and the following disclaimer in the documentation
+//   and/or other materials provided with the distribution.
+// - Neither the name of the copyright holder nor the names of its contributors
+//   may be used to endorse or promote products derived from this software
+//   without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+// THE POSSIBILITY OF SUCH DAMAGE.
+//*****************************************************************************
+
+#pragma once
+
+#include <sycl/sycl.hpp>
+
+namespace dpnp::kernels::frexp
+{
+template <typename argT, typename mantT, typename expT>
+struct FrexpFunctor
+{
+    // is function constant for given argT
+    using is_constant = typename std::false_type;
+    // constant value, if constant
+    // constexpr resT1 constant_value1 = resT1{};
+    // constexpr resT2 constant_value2 = resT2{};
+    // is function defined for sycl::vec
+    using supports_vec = typename std::false_type;
+    // do both argT and mantT, expT support subgroup store/load operation
+    using supports_sg_loadstore = typename std::true_type;
+
+    mantT operator()(const argT &in, expT &exp) const
+    {
+        return sycl::frexp(in, &exp);
+    }
+};
+} // namespace dpnp::kernels::frexp

From 6df1b8035d0a0ee9f696033182650007d41b4b02 Mon Sep 17 00:00:00 2001
From: Anton Volkov <antonwolfy@gmail.com>
Date: Thu, 23 Oct 2025 17:09:43 +0200
Subject: [PATCH 09/19] Add DPNPUnaryTwoOutputsFunc class for unary
 element-wise functions with two output arrays

---
 dpnp/dpnp_algo/dpnp_elementwise_common.py | 229 ++++++++++++++++++++++
 dpnp/dpnp_utils/dpnp_utils_common.py      |  29 ++-
 2 files changed, 255 insertions(+), 3 deletions(-)

diff --git a/dpnp/dpnp_algo/dpnp_elementwise_common.py b/dpnp/dpnp_algo/dpnp_elementwise_common.py
index c7393a79657..a6e29622c89 100644
--- a/dpnp/dpnp_algo/dpnp_elementwise_common.py
+++ b/dpnp/dpnp_algo/dpnp_elementwise_common.py
@@ -28,8 +28,10 @@
 # *****************************************************************************
 
 import dpctl.tensor as dpt
+import dpctl.tensor._copy_utils as dtc
 import dpctl.tensor._tensor_impl as dti
 import dpctl.tensor._type_utils as dtu
+import dpctl.utils as dpu
 import numpy
 from dpctl.tensor._elementwise_common import (
     BinaryElementwiseFunc,
@@ -39,6 +41,9 @@
 import dpnp
 import dpnp.backend.extensions.vm._vm_impl as vmi
 from dpnp.dpnp_array import dpnp_array
+from dpnp.dpnp_utils.dpnp_utils_common import (
+    find_buf_dtype_3out,
+)
 
 __all__ = [
     "DPNPI0",
@@ -50,6 +55,7 @@
     "DPNPRound",
     "DPNPSinc",
     "DPNPUnaryFunc",
+    "DPNPUnaryTwoOutputsFunc",
     "acceptance_fn_gcd_lcm",
     "acceptance_fn_negative",
     "acceptance_fn_positive",
@@ -102,6 +108,7 @@ class DPNPUnaryFunc(UnaryElementwiseFunc):
         The function is invoked when the argument of the unary function
         requires casting, e.g. the argument of `dpctl.tensor.log` is an
         array with integral data type.
+
     """
 
     def __init__(
@@ -197,6 +204,227 @@ def __call__(
         return dpnp_array._create_from_usm_ndarray(res_usm)
 
 
+class DPNPUnaryTwoOutputsFunc(UnaryElementwiseFunc):
+    """
+    Class that implements unary element-wise functions with two output arrays.
+
+    Parameters
+    ----------
+    name : {str}
+        Name of the unary function
+    result_type_resovler_fn : {callable}
+        Function that takes dtype of the input and returns the dtype of
+        the result if the implementation functions supports it, or
+        returns `None` otherwise.
+    unary_dp_impl_fn : {callable}
+        Data-parallel implementation function with signature
+        `impl_fn(src: usm_ndarray, dst: usm_ndarray,
+            sycl_queue: SyclQueue, depends: Optional[List[SyclEvent]])`
+        where the `src` is the argument array, `dst` is the
+        array to be populated with function values, effectively
+        evaluating `dst = func(src)`.
+        The `impl_fn` is expected to return a 2-tuple of `SyclEvent`s.
+        The first event corresponds to data-management host tasks,
+        including lifetime management of argument Python objects to ensure
+        that their associated USM allocation is not freed before offloaded
+        computational tasks complete execution, while the second event
+        corresponds to computational tasks associated with function evaluation.
+    docs : {str}
+        Documentation string for the unary function.
+
+    """
+
+    def __init__(
+        self,
+        name,
+        result_type_resolver_fn,
+        unary_dp_impl_fn,
+        docs,
+    ):
+        super().__init__(
+            name,
+            result_type_resolver_fn,
+            unary_dp_impl_fn,
+            docs,
+        )
+        self.__name__ = "DPNPUnaryTwoOutputsFunc"
+
+    @property
+    def nout(self):
+        """Returns the number of arguments treated as outputs."""
+        return 2
+
+    def __call__(
+        self,
+        x,
+        out1=None,
+        out2=None,
+        /,
+        *,
+        out=(None, None),
+        where=True,
+        order="K",
+        dtype=None,
+        subok=True,
+        **kwargs,
+    ):
+        if kwargs:
+            raise NotImplementedError(
+                f"Requested function={self.name_} with kwargs={kwargs} "
+                "isn't currently supported."
+            )
+        elif where is not True:
+            raise NotImplementedError(
+                f"Requested function={self.name_} with where={where} "
+                "isn't currently supported."
+            )
+        elif dtype is not None:
+            raise NotImplementedError(
+                f"Requested function={self.name_} with dtype={dtype} "
+                "isn't currently supported."
+            )
+        elif subok is not True:
+            raise NotImplementedError(
+                f"Requested function={self.name_} with subok={subok} "
+                "isn't currently supported."
+            )
+
+        x = dpnp.get_usm_ndarray(x)
+        exec_q = x.sycl_queue
+
+        if order is None:
+            order = "K"
+        elif order in "afkcAFKC":
+            order = order.upper()
+            if order == "A":
+                order = "F" if x.flags.f_contiguous else "C"
+        else:
+            raise ValueError(
+                "order must be one of 'C', 'F', 'A', or 'K' " f"(got '{order}')"
+            )
+
+        buf_dt, res1_dt, res2_dt = find_buf_dtype_3out(
+            x.dtype,
+            self.result_type_resolver_fn_,
+            x.sycl_device,
+        )
+        if res1_dt is None or res2_dt is None:
+            raise ValueError(
+                f"function '{self.name_}' does not support input type "
+                f"({x.dtype}), "
+                "and the input could not be safely coerced to any "
+                "supported types according to the casting rule ''safe''."
+            )
+
+        if not isinstance(out, tuple):
+            raise TypeError("'out' must be a tuple of arrays")
+
+        if len(out) != 2:
+            raise ValueError(
+                "'out' tuple must have exactly one entry per ufunc output"
+            )
+
+        if not (out1 is None and out2 is None):
+            if all(res is None for res in out):
+                out = (out1, out2)
+            else:
+                raise TypeError(
+                    "cannot specify 'out' as both a positional and keyword argument"
+                )
+
+        orig_out, out = list(out), list(out)
+        res_dts = [res1_dt, res2_dt]
+
+        for i in range(2):
+            if out[i] is None:
+                continue
+
+            res = dpnp.get_usm_ndarray(out[i])
+            if not res.flags.writable:
+                raise ValueError("provided output array is read-only")
+
+            if res.shape != x.shape:
+                raise ValueError(
+                    "The shape of input and output arrays are inconsistent. "
+                    f"Expected output shape is {x.shape}, got {res.shape}"
+                )
+
+            if dpu.get_execution_queue((exec_q, res.sycl_queue)) is None:
+                raise dpnp.exceptions.ExecutionPlacementError(
+                    "Input and output allocation queues are not compatible"
+                )
+
+            res_dt = res_dts[i]
+            if res_dt != res.dtype:
+                # Allocate a temporary buffer with the required dtype
+                out[i] = dpt.empty_like(res, dtype=res_dt)
+            elif (
+                buf_dt is None
+                and dti._array_overlap(x, res)
+                and not dti._same_logical_tensors(x, res)
+            ):
+                # Allocate a temporary buffer to avoid memory overlapping.
+                # Note if `buf_dt` is not None, a temporary copy of `x` will be
+                # created, so the array overlap check isn't needed.
+                out[i] = dpt.empty_like(res)
+
+        _manager = dpu.SequentialOrderManager[exec_q]
+        dep_evs = _manager.submitted_events
+
+        # Cast input array to the supported type if needed
+        if buf_dt is not None:
+            if order == "K":
+                buf = dtc._empty_like_orderK(x, buf_dt)
+            else:
+                buf = dpt.empty_like(x, dtype=buf_dt, order=order)
+
+            ht_copy_ev, copy_ev = dti._copy_usm_ndarray_into_usm_ndarray(
+                src=x, dst=buf, sycl_queue=exec_q, depends=dep_evs
+            )
+            _manager.add_event_pair(ht_copy_ev, copy_ev)
+
+            x = buf
+            dep_evs = copy_ev
+
+        # Allocate a buffer for the output arrays if needed
+        for i in range(2):
+            if out[i] is None:
+                res_dt = res_dts[i]
+                if order == "K":
+                    out[i] = dtc._empty_like_orderK(x, res_dt)
+                else:
+                    out[i] = dpt.empty_like(x, dtype=res_dt, order=order)
+
+        # Call the unary function with input and output arrays
+        dep_evs = _manager.submitted_events
+        ht_unary_ev, unary_ev = self.get_implementation_function()(
+            x,
+            dpnp.get_usm_ndarray(out[0]),
+            dpnp.get_usm_ndarray(out[1]),
+            sycl_queue=exec_q,
+            depends=dep_evs,
+        )
+        _manager.add_event_pair(ht_unary_ev, unary_ev)
+
+        for i in range(2):
+            orig_res, res = orig_out[i], out[i]
+            if not (orig_res is None or orig_res is res):
+                # Copy the out data from temporary buffer to original memory
+                ht_copy_ev, copy_ev = dti._copy_usm_ndarray_into_usm_ndarray(
+                    src=res,
+                    dst=dpnp.get_usm_ndarray(orig_res),
+                    sycl_queue=exec_q,
+                    depends=[unary_ev],
+                )
+                _manager.add_event_pair(ht_copy_ev, copy_ev)
+                res = out[i] = orig_res
+
+            if not isinstance(res, dpnp_array):
+                # Always return dpnp.ndarray
+                out[i] = dpnp_array._create_from_usm_ndarray(res)
+        return out
+
+
 class DPNPBinaryFunc(BinaryElementwiseFunc):
     """
     Class that implements binary element-wise functions.
@@ -262,6 +490,7 @@ class DPNPBinaryFunc(BinaryElementwiseFunc):
             sycl_dev - The :class:`dpctl.SyclDevice` where the function
                 evaluation is carried out.
         One of `o1_dtype` and `o2_dtype` must be a ``dtype`` instance.
+
     """
 
     def __init__(
diff --git a/dpnp/dpnp_utils/dpnp_utils_common.py b/dpnp/dpnp_utils/dpnp_utils_common.py
index 61d1940ebe5..2cf5973d1e8 100644
--- a/dpnp/dpnp_utils/dpnp_utils_common.py
+++ b/dpnp/dpnp_utils/dpnp_utils_common.py
@@ -29,12 +29,35 @@
 
 from collections.abc import Iterable
 
-from dpctl.tensor._type_utils import _can_cast
+import dpctl.tensor._type_utils as dtu
 
 import dpnp
 from dpnp.dpnp_utils import map_dtype_to_device
 
-__all__ = ["result_type_for_device", "to_supported_dtypes"]
+__all__ = [
+    "find_buf_dtype_3out",
+    "result_type_for_device",
+    "to_supported_dtypes",
+]
+
+
+def find_buf_dtype_3out(arg_dtype, query_fn, sycl_dev):
+    """Works as dpu._find_buf_dtype, but with two output arrays."""
+
+    res1_dt, res2_dt = query_fn(arg_dtype)
+    if res1_dt and res2_dt:
+        return None, res1_dt, res2_dt
+
+    _fp16 = sycl_dev.has_aspect_fp16
+    _fp64 = sycl_dev.has_aspect_fp64
+    all_dts = dtu._all_data_types(_fp16, _fp64)
+    for buf_dt in all_dts:
+        if dtu._can_cast(arg_dtype, buf_dt, _fp16, _fp64):
+            res1_dt, res2_dt = query_fn(buf_dt)
+            if res1_dt and res2_dt:
+                return buf_dt, res1_dt, res2_dt
+
+    return None, None, None
 
 
 def result_type_for_device(dtypes, device):
@@ -55,7 +78,7 @@ def to_supported_dtypes(dtypes, supported_types, device):
     has_fp16 = device.has_aspect_fp16
 
     def is_castable(dtype, stype):
-        return _can_cast(dtype, stype, has_fp16, has_fp64)
+        return dtu._can_cast(dtype, stype, has_fp16, has_fp64)
 
     if not isinstance(supported_types, Iterable):
         supported_types = (supported_types,)  # pragma: no cover

From ba43ffe20fe98758b3bde86ec089ef93fced10e9 Mon Sep 17 00:00:00 2001
From: Anton Volkov <antonwolfy@gmail.com>
Date: Thu, 23 Oct 2025 17:11:23 +0200
Subject: [PATCH 10/19] Add python implementation

---
 dpnp/dpnp_iface_mathematical.py | 73 +++++++++++++++++++++++++++++++++
 pyproject.toml                  |  2 +-
 2 files changed, 74 insertions(+), 1 deletion(-)

diff --git a/dpnp/dpnp_iface_mathematical.py b/dpnp/dpnp_iface_mathematical.py
index 6fe165632ae..1f58d0c843c 100644
--- a/dpnp/dpnp_iface_mathematical.py
+++ b/dpnp/dpnp_iface_mathematical.py
@@ -73,6 +73,7 @@
     DPNPRound,
     DPNPSinc,
     DPNPUnaryFunc,
+    DPNPUnaryTwoOutputsFunc,
     acceptance_fn_gcd_lcm,
     acceptance_fn_negative,
     acceptance_fn_positive,
@@ -112,6 +113,7 @@
     "fmax",
     "fmin",
     "fmod",
+    "frexp",
     "gcd",
     "gradient",
     "heaviside",
@@ -2342,6 +2344,77 @@ def ediff1d(ary, to_end=None, to_begin=None):
     mkl_impl_fn="_fmod",
 )
 
+
+_FREXP_DOCSTRING = """
+Decompose each element :math:`x_i` of the input array `x` into the mantissa and
+the twos exponent.
+
+For full documentation refer to :obj:`numpy.frexp`.
+
+Parameters
+----------
+x : {dpnp.ndarray, usm_ndarray}
+    Array of numbers to be decomposed, expected to have a real-valued
+    floating-point data type.
+out1 : {None, dpnp.ndarray, usm_ndarray}, optional
+    Output array for the mantissa to populate. Array must have the same shape
+    as `x` and the expected data type.
+
+    Default: ``None``.
+out2 : {None, dpnp.ndarray, usm_ndarray}, optional
+    Output array for the exponent to populate. Array must have the same shape
+    as `x` and the expected data type.
+
+    Default: ``None``.
+order : {None, "C", "F", "A", "K"}, optional
+    Memory layout of the newly output array, if parameter `out` is ``None``.
+
+    Default: ``"K"``.
+
+Returns
+-------
+mantissa : dpnp.ndarray
+    Floating values between -1 and 1.
+exponent : dpnp.ndarray
+    Integer exponents of 2.
+
+Limitations
+-----------
+Parameters `where` and `subok` are supported with their default values.
+Keyword argument `kwargs` is currently unsupported.
+Otherwise ``NotImplementedError`` exception will be raised.
+
+See Also
+--------
+:obj:`dpnp.ldexp` : Compute :math:`y = x1 * 2^{x2}`, inverse to
+                    :obj:`dpnp.frexp`.
+
+Notes
+-----
+Complex dtypes are not supported, they will raise a ``TypeError``.
+
+Examples
+--------
+>>> import dpnp as np
+>>> x = np.arange(9)
+>>> y1, y2 = np.frexp(x)
+>>> y1
+array([0.   , 0.5  , 0.5  , 0.75 , 0.5  , 0.625, 0.75 , 0.875, 0.5  ])
+>>> y2
+array([0, 1, 2, 2, 3, 3, 3, 3, 4], dtype=int32)
+>>> y1 * 2**y2
+array([0., 1., 2., 3., 4., 5., 6., 7., 8.])
+
+"""
+
+frexp = DPNPUnaryTwoOutputsFunc(
+    "_frexp",
+    ufi._frexp_result_type,
+    ufi._frexp,
+    _FREXP_DOCSTRING,
+)
+
+
 _GCD_DOCSTRING = r"""
 Returns the greatest common divisor of :math:`\abs{x1}` and :math:`\abs{x2}`.
 
diff --git a/pyproject.toml b/pyproject.toml
index 68a54aa095e..c5e55863f88 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -154,7 +154,7 @@ max-returns = 8
 
 [tool.pylint.format]
 max-line-length = 80
-max-module-lines = 5000
+max-module-lines = 8000
 
 [tool.pylint.imports]
 allow-wildcard-with-all = true

From 2472ba03221dbdbbb5ae65645787d6981825e187 Mon Sep 17 00:00:00 2001
From: Anton Volkov <antonwolfy@gmail.com>
Date: Thu, 23 Oct 2025 17:20:53 +0200
Subject: [PATCH 11/19] Enable umath and third party tests

---
 dpnp/tests/test_umath.py                                | 2 +-
 dpnp/tests/third_party/cupy/math_tests/test_floating.py | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/dpnp/tests/test_umath.py b/dpnp/tests/test_umath.py
index 6284950ae94..31b55a204a1 100644
--- a/dpnp/tests/test_umath.py
+++ b/dpnp/tests/test_umath.py
@@ -115,7 +115,7 @@ def test_umaths(test_cases):
         and not (vmi._is_available() and has_support_aspect64())
     ):
         pytest.skip("dpctl-2031")
-    elif umath in ["divmod", "frexp"]:
+    elif umath in ["divmod"]:
         pytest.skip("Not implemented umath")
     elif umath == "modf":
         if args[0].dtype == dpnp.float16:
diff --git a/dpnp/tests/third_party/cupy/math_tests/test_floating.py b/dpnp/tests/third_party/cupy/math_tests/test_floating.py
index 3bfb4bb0043..d7affc54157 100644
--- a/dpnp/tests/third_party/cupy/math_tests/test_floating.py
+++ b/dpnp/tests/third_party/cupy/math_tests/test_floating.py
@@ -1,7 +1,8 @@
+from __future__ import annotations
+
 import unittest
 
 import numpy
-import pytest
 
 import dpnp as cupy
 from dpnp.tests.helper import has_support_aspect64
@@ -38,7 +39,6 @@ def test_ldexp(self, xp, ftype, itype):
         b = xp.array([-3, -2, -1, 0, 1, 2, 3], dtype=itype)
         return xp.ldexp(a, b)
 
-    @pytest.mark.skip("frexp() is not implemented yet")
     @testing.for_float_dtypes()
     def test_frexp(self, dtype):
         numpy_a = numpy.array(

From 5de2f646ed6a9d725cf801b379f0e1030e3821ec Mon Sep 17 00:00:00 2001
From: Anton Volkov <antonwolfy@gmail.com>
Date: Thu, 23 Oct 2025 17:26:52 +0200
Subject: [PATCH 12/19] Add CFD tests

---
 dpnp/tests/test_sycl_queue.py | 24 ++++++++++++++++++++++++
 dpnp/tests/test_usm_type.py   | 13 +++++++++++++
 2 files changed, 37 insertions(+)

diff --git a/dpnp/tests/test_sycl_queue.py b/dpnp/tests/test_sycl_queue.py
index 3ed3ef17f96..85ae164b528 100644
--- a/dpnp/tests/test_sycl_queue.py
+++ b/dpnp/tests/test_sycl_queue.py
@@ -356,6 +356,30 @@ def test_1in_1out(func, data, device):
         pass
 
 
+@pytest.mark.parametrize(
+    "func, data",
+    [
+        pytest.param("frexp", numpy.arange(9)),
+    ],
+)
+@pytest.mark.parametrize("device", valid_dev, ids=dev_ids)
+def test_1in_2out(func, data, device):
+    x = dpnp.array(data, device=device)
+    res1, res2 = getattr(dpnp, func)(x)
+    assert_sycl_queue_equal(res1.sycl_queue, x.sycl_queue)
+    assert_sycl_queue_equal(res2.sycl_queue, x.sycl_queue)
+
+    out1 = dpnp.empty_like(res1)
+    out2 = dpnp.empty_like(res2)
+    try:
+        # some functions do not support out kwarg
+        getattr(dpnp, func)(x, out=(out1, out2))
+        assert_sycl_queue_equal(out1.sycl_queue, x.sycl_queue)
+        assert_sycl_queue_equal(out2.sycl_queue, x.sycl_queue)
+    except TypeError:
+        pass
+
+
 @pytest.mark.parametrize(
     "func,data1,data2",
     [
diff --git a/dpnp/tests/test_usm_type.py b/dpnp/tests/test_usm_type.py
index 97ceb0220e5..1c84b746128 100644
--- a/dpnp/tests/test_usm_type.py
+++ b/dpnp/tests/test_usm_type.py
@@ -649,6 +649,19 @@ def test_1in_1out(func, data, usm_type):
     assert x.usm_type == usm_type == res.usm_type
 
 
+@pytest.mark.parametrize(
+    "func, data",
+    [
+        pytest.param("frexp", numpy.arange(9)),
+    ],
+)
+@pytest.mark.parametrize("usm_type", list_of_usm_types)
+def test_1in_2out(func, data, usm_type):
+    x = dpnp.array(data, usm_type=usm_type)
+    res1, res2 = getattr(dpnp, func)(x)
+    assert x.usm_type == usm_type == res1.usm_type == res2.usm_type
+
+
 @pytest.mark.parametrize(
     "func,data1,data2",
     [

From 52299392d51f8131ede3e629795c6248d59a0d45 Mon Sep 17 00:00:00 2001
From: Anton Volkov <antonwolfy@gmail.com>
Date: Thu, 23 Oct 2025 21:43:01 +0200
Subject: [PATCH 13/19] Add tests to cover frexp and new class

---
 dpnp/dpnp_algo/dpnp_elementwise_common.py |  10 +-
 dpnp/tests/test_mathematical.py           | 270 +++++++++++++++++-----
 2 files changed, 218 insertions(+), 62 deletions(-)

diff --git a/dpnp/dpnp_algo/dpnp_elementwise_common.py b/dpnp/dpnp_algo/dpnp_elementwise_common.py
index a6e29622c89..70f49c26125 100644
--- a/dpnp/dpnp_algo/dpnp_elementwise_common.py
+++ b/dpnp/dpnp_algo/dpnp_elementwise_common.py
@@ -179,7 +179,7 @@ def __call__(
             )
         elif dtype is not None and out is not None:
             raise TypeError(
-                f"Requested function={self.name_} only takes `out` or `dtype`"
+                f"Requested function={self.name_} only takes `out` or `dtype` "
                 "as an argument, but both were provided."
             )
 
@@ -356,6 +356,12 @@ def __call__(
 
             res_dt = res_dts[i]
             if res_dt != res.dtype:
+                if not dpnp.can_cast(res_dt, res.dtype, casting="same_kind"):
+                    raise TypeError(
+                        f"Cannot cast ufunc '{self.name_}' output {i + 1} from "
+                        f"{res_dt} to {res.dtype} with casting rule 'same_kind'"
+                    )
+
                 # Allocate a temporary buffer with the required dtype
                 out[i] = dpt.empty_like(res, dtype=res_dt)
             elif (
@@ -564,7 +570,7 @@ def __call__(
             )
         elif dtype is not None and out is not None:
             raise TypeError(
-                f"Requested function={self.name_} only takes `out` or `dtype`"
+                f"Requested function={self.name_} only takes `out` or `dtype` "
                 "as an argument, but both were provided."
             )
 
diff --git a/dpnp/tests/test_mathematical.py b/dpnp/tests/test_mathematical.py
index d0dbd44c853..e8b852189e1 100644
--- a/dpnp/tests/test_mathematical.py
+++ b/dpnp/tests/test_mathematical.py
@@ -710,6 +710,112 @@ def test_errors(self):
         assert_raises(ExecutionPlacementError, dpnp.ediff1d, ia, to_end=to_end)
 
 
+class TestFrexp:
+    ALL_DTYPES = get_all_dtypes(no_none=True)
+    ALL_DTYPES_NO_COMPLEX = get_all_dtypes(
+        no_none=True, no_float16=False, no_complex=True
+    )
+
+    @pytest.mark.parametrize("dt", ALL_DTYPES_NO_COMPLEX)
+    def test_basic(self, dt):
+        a = numpy.array([-2, 5, 1, 4, 3], dtype=dt)
+        ia = dpnp.array(a)
+
+        res1, res2 = dpnp.frexp(ia)
+        exp1, exp2 = numpy.frexp(a)
+        assert_array_equal(res1, exp1)
+        assert_array_equal(res2, exp2)
+
+    def test_out(self):
+        a = numpy.array(5.7)
+        ia = dpnp.array(a)
+
+        out1 = numpy.empty(())
+        out2 = numpy.empty((), dtype=numpy.int32)
+        iout1, iout2 = dpnp.array(out1), dpnp.array(out2)
+
+        res1, res2 = dpnp.frexp(ia, iout1)
+        exp1, exp2 = numpy.frexp(a, out1)
+        assert_array_equal(res1, exp1)
+        assert_array_equal(res2, exp2)
+        assert res1 is iout1
+
+        res1, res2 = dpnp.frexp(ia, None, iout2)
+        exp1, exp2 = numpy.frexp(a, None, out2)
+        assert_array_equal(res1, exp1)
+        assert_array_equal(res2, exp2)
+        assert res2 is iout2
+
+        res1, res2 = dpnp.frexp(ia, iout1, iout2)
+        exp1, exp2 = numpy.frexp(a, out1, out2)
+        assert_array_equal(res1, exp1)
+        assert_array_equal(res2, exp2)
+        assert res1 is iout1
+        assert res2 is iout2
+
+    @pytest.mark.parametrize("dt", ALL_DTYPES_NO_COMPLEX)
+    @pytest.mark.parametrize("out1_dt", ALL_DTYPES)
+    @pytest.mark.parametrize("out2_dt", ALL_DTYPES)
+    def test_out_all_dtypes(self, dt, out1_dt, out2_dt):
+        a = numpy.ones(9, dtype=dt)
+        ia = dpnp.array(a)
+
+        out1 = numpy.zeros(9, dtype=out1_dt)
+        out2 = numpy.zeros(9, dtype=out2_dt)
+        iout1, iout2 = dpnp.array(out1), dpnp.array(out2)
+
+        try:
+            res1, res2 = dpnp.frexp(ia, out=(iout1, iout2))
+        except TypeError:
+            # expect numpy to fail with the same reason
+            with pytest.raises(TypeError):
+                _ = numpy.frexp(a, out=(out1, out2))
+        else:
+            exp1, exp2 = numpy.frexp(a, out=(out1, out2))
+            assert_array_equal(res1, exp1)
+            assert_array_equal(res2, exp2)
+            assert res1 is iout1
+            assert res2 is iout2
+
+    @pytest.mark.parametrize("stride", [-4, -2, -1, 1, 2, 4])
+    @pytest.mark.parametrize("dt", get_float_dtypes())
+    def test_strides_out(self, stride, dt):
+        a = numpy.array(
+            [numpy.nan, numpy.nan, numpy.inf, -numpy.inf, 0.0, -0.0, 1.0, -1.0],
+            dtype=dt,
+        )
+        ia = dpnp.array(a)
+
+        out_mant = numpy.ones(8, dtype=dt)
+        out_exp = 2 * numpy.ones(8, dtype="i")
+        iout_mant, iout_exp = dpnp.array(out_mant), dpnp.array(out_exp)
+
+        res1, res2 = dpnp.frexp(
+            ia[::stride], out=(iout_mant[::stride], iout_exp[::stride])
+        )
+        exp1, exp2 = numpy.frexp(
+            a[::stride], out=(out_mant[::stride], out_exp[::stride])
+        )
+        assert_array_equal(res1, exp1)
+        assert_array_equal(res2, exp2)
+
+        assert_array_equal(iout_mant, out_mant)
+        assert_array_equal(iout_exp, out_exp)
+
+    @pytest.mark.parametrize("xp", [numpy, dpnp])
+    def test_out_wrong_type(self, xp):
+        a = xp.array(0.5)
+        with pytest.raises(TypeError, match="'out' must be a tuple of arrays"):
+            _ = xp.frexp(a, out=xp.empty(()))
+
+    @pytest.mark.parametrize("xp", [numpy, dpnp])
+    @pytest.mark.parametrize("dt", get_complex_dtypes())
+    def test_complex_dtype(self, xp, dt):
+        a = xp.array([-2, 5, 1, 4, 3], dtype=dt)
+        with pytest.raises((TypeError, ValueError)):
+            _ = xp.frexp(a)
+
+
 class TestGradient:
     @pytest.mark.parametrize("dt", get_all_dtypes(no_none=True, no_bool=True))
     def test_basic(self, dt):
@@ -1925,6 +2031,110 @@ def test_ndim(self):
         assert_dtype_allclose(result, expected)
 
 
+class TestUfunc:
+    @pytest.mark.parametrize(
+        "func, nin, nout",
+        [
+            pytest.param("abs", 1, 1, id="DPNPUnaryFunc"),
+            pytest.param("frexp", 1, 2, id="DPNPUnaryTwoOutputsFunc"),
+            pytest.param("add", 2, 1, id="DPNPBinaryFunc"),
+        ],
+    )
+    def test_nin_nout(self, func, nin, nout):
+        assert getattr(dpnp, func).nin == nin
+        assert getattr(dpnp, func).nout == nout
+
+    @pytest.mark.parametrize(
+        "func, kwargs",
+        [
+            pytest.param(
+                "abs",
+                {"unknown_kwarg": 1, "where": False, "subok": False},
+                id="DPNPUnaryFunc",
+            ),
+            pytest.param(
+                "frexp",
+                {
+                    "unknown_kwarg": 1,
+                    "where": False,
+                    "dtype": "?",
+                    "subok": False,
+                },
+                id="DPNPUnaryTwoOutputsFunc",
+            ),
+            pytest.param(
+                "add",
+                {"unknown_kwarg": 1, "where": False, "subok": False},
+                id="DPNPBinaryFunc",
+            ),
+        ],
+    )
+    def test_not_supported_kwargs(self, func, kwargs):
+        x = dpnp.array([1, 2, 3])
+
+        fn = getattr(dpnp, func)
+        args = [x] * fn.nin
+        for key, val in kwargs.items():
+            with pytest.raises(NotImplementedError):
+                fn(*args, **{key: val})
+
+    @pytest.mark.parametrize("func", ["abs", "frexp", "add"])
+    @pytest.mark.parametrize("x", [1, [1, 2], numpy.ones(5)])
+    def test_wrong_input(self, func, x):
+        fn = getattr(dpnp, func)
+        args = [x] * fn.nin
+        with pytest.raises(TypeError):
+            fn(*args)
+
+    @pytest.mark.parametrize("func", ["add"])
+    def test_binary_wrong_input(self, func):
+        x = dpnp.array([1, 2, 3])
+        with pytest.raises(TypeError):
+            getattr(dpnp, func)(x, [1, 2])
+        with pytest.raises(TypeError):
+            getattr(dpnp, func)([1, 2], x)
+
+    @pytest.mark.parametrize("func", ["abs", "frexp", "add"])
+    def test_wrong_order(self, func):
+        x = dpnp.array([1, 2, 3])
+
+        fn = getattr(dpnp, func)
+        args = [x] * fn.nin
+        with pytest.raises(ValueError, match="order must be one of"):
+            fn(*args, order="H")
+
+    @pytest.mark.parametrize("func", ["abs", "add"])
+    def test_out_dtype(self, func):
+        x = dpnp.array([1, 2, 3])
+        out = dpnp.array([1, 2, 3])
+
+        fn = getattr(dpnp, func)
+        args = [x] * fn.nin
+        with pytest.raises(
+            TypeError, match="only takes `out` or `dtype` as an argument"
+        ):
+            fn(*args, out=out, dtype="f4")
+
+    @pytest.mark.parametrize("func", ["abs", "frexp", "add"])
+    def test_order_none(self, func):
+        a = numpy.array([1, 2, 3])
+        ia = dpnp.array(a)
+
+        fn = getattr(numpy, func)
+        ifn = getattr(dpnp, func)
+
+        args = [a] * fn.nin
+        iargs = [ia] * ifn.nin
+
+        result = ifn(*iargs, order=None)
+        expected = fn(*args, order=None)
+        if fn.nout == 1:
+            assert_dtype_allclose(result, expected)
+        else:
+            for i in range(fn.nout):
+                assert_dtype_allclose(result[i], expected[i])
+
+
 class TestUnwrap:
     @pytest.mark.parametrize("dt", get_float_dtypes())
     def test_basic(self, dt):
@@ -2568,66 +2778,6 @@ def test_inplace_floor_divide(dtype):
     assert_allclose(ia, a)
 
 
-def test_elemenwise_nin_nout():
-    assert dpnp.abs.nin == 1
-    assert dpnp.add.nin == 2
-
-    assert dpnp.abs.nout == 1
-    assert dpnp.add.nout == 1
-
-
-def test_elemenwise_error():
-    x = dpnp.array([1, 2, 3])
-    out = dpnp.array([1, 2, 3])
-
-    with pytest.raises(NotImplementedError):
-        dpnp.abs(x, unknown_kwarg=1)
-    with pytest.raises(NotImplementedError):
-        dpnp.abs(x, where=False)
-    with pytest.raises(NotImplementedError):
-        dpnp.abs(x, subok=False)
-    with pytest.raises(TypeError):
-        dpnp.abs(1)
-    with pytest.raises(TypeError):
-        dpnp.abs([1, 2])
-    with pytest.raises(TypeError):
-        dpnp.abs(x, out=out, dtype="f4")
-    with pytest.raises(ValueError):
-        dpnp.abs(x, order="H")
-
-    with pytest.raises(NotImplementedError):
-        dpnp.add(x, x, unknown_kwarg=1)
-    with pytest.raises(NotImplementedError):
-        dpnp.add(x, x, where=False)
-    with pytest.raises(NotImplementedError):
-        dpnp.add(x, x, subok=False)
-    with pytest.raises(TypeError):
-        dpnp.add(1, 2)
-    with pytest.raises(TypeError):
-        dpnp.add([1, 2], [1, 2])
-    with pytest.raises(TypeError):
-        dpnp.add(x, [1, 2])
-    with pytest.raises(TypeError):
-        dpnp.add([1, 2], x)
-    with pytest.raises(TypeError):
-        dpnp.add(x, x, out=out, dtype="f4")
-    with pytest.raises(ValueError):
-        dpnp.add(x, x, order="H")
-
-
-def test_elemenwise_order_none():
-    x_np = numpy.array([1, 2, 3])
-    x = dpnp.array([1, 2, 3])
-
-    result = dpnp.abs(x, order=None)
-    expected = numpy.abs(x_np, order=None)
-    assert_dtype_allclose(result, expected)
-
-    result = dpnp.add(x, x, order=None)
-    expected = numpy.add(x_np, x_np, order=None)
-    assert_dtype_allclose(result, expected)
-
-
 def test_bitwise_1array_input():
     x = dpnp.array([1, 2, 3])
     x_np = numpy.array([1, 2, 3])

From 25f8cbaae05f98d2bf1517255dec874ce5192540 Mon Sep 17 00:00:00 2001
From: Anton Volkov <antonwolfy@gmail.com>
Date: Thu, 23 Oct 2025 21:46:21 +0200
Subject: [PATCH 14/19] Add PR to the changelog

---
 CHANGELOG.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 15f1840317a..cbc4840017b 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -15,6 +15,7 @@ The release drops support for Python 3.9, making Python 3.10 the minimum require
 * Added implementation of `dpnp.linalg.lu_solve` for batch inputs (SciPy-compatible) [#2619](https://github.com/IntelPython/dpnp/pull/2619)
 * Added `dpnp.exceptions` submodule to aggregate the generic exceptions used by dpnp [#2616](https://github.com/IntelPython/dpnp/pull/2616)
 * Added implementation of `dpnp.scipy.special.erfcx` [#2596](https://github.com/IntelPython/dpnp/pull/2596)
+* Added implementation of `dpnp.frexp` [#2635](https://github.com/IntelPython/dpnp/pull/2635)
 
 ### Changed
 

From 65f3d153afed6c9d0b8030b9b8a1fc7fcdd2dc43 Mon Sep 17 00:00:00 2001
From: Anton Volkov <antonwolfy@gmail.com>
Date: Mon, 27 Oct 2025 10:42:35 +0100
Subject: [PATCH 15/19] Add more tests to improve the coverage

---
 dpnp/dpnp_algo/dpnp_elementwise_common.py |   4 +-
 dpnp/tests/test_mathematical.py           | 112 +++++++++++++++++++---
 2 files changed, 103 insertions(+), 13 deletions(-)

diff --git a/dpnp/dpnp_algo/dpnp_elementwise_common.py b/dpnp/dpnp_algo/dpnp_elementwise_common.py
index 70f49c26125..ddbb52d9291 100644
--- a/dpnp/dpnp_algo/dpnp_elementwise_common.py
+++ b/dpnp/dpnp_algo/dpnp_elementwise_common.py
@@ -319,7 +319,7 @@ def __call__(
         if not isinstance(out, tuple):
             raise TypeError("'out' must be a tuple of arrays")
 
-        if len(out) != 2:
+        if len(out) != self.nout:
             raise ValueError(
                 "'out' tuple must have exactly one entry per ufunc output"
             )
@@ -341,7 +341,7 @@ def __call__(
 
             res = dpnp.get_usm_ndarray(out[i])
             if not res.flags.writable:
-                raise ValueError("provided output array is read-only")
+                raise ValueError("output array is read-only")
 
             if res.shape != x.shape:
                 raise ValueError(
diff --git a/dpnp/tests/test_mathematical.py b/dpnp/tests/test_mathematical.py
index e8b852189e1..97be8f615d2 100644
--- a/dpnp/tests/test_mathematical.py
+++ b/dpnp/tests/test_mathematical.py
@@ -786,8 +786,8 @@ def test_strides_out(self, stride, dt):
         )
         ia = dpnp.array(a)
 
-        out_mant = numpy.ones(8, dtype=dt)
-        out_exp = 2 * numpy.ones(8, dtype="i")
+        out_mant = numpy.ones_like(a)
+        out_exp = 2 * numpy.ones_like(a, dtype="i")
         iout_mant, iout_exp = dpnp.array(out_mant), dpnp.array(out_exp)
 
         res1, res2 = dpnp.frexp(
@@ -802,11 +802,34 @@ def test_strides_out(self, stride, dt):
         assert_array_equal(iout_mant, out_mant)
         assert_array_equal(iout_exp, out_exp)
 
-    @pytest.mark.parametrize("xp", [numpy, dpnp])
-    def test_out_wrong_type(self, xp):
-        a = xp.array(0.5)
-        with pytest.raises(TypeError, match="'out' must be a tuple of arrays"):
-            _ = xp.frexp(a, out=xp.empty(()))
+    @pytest.mark.parametrize("dt", get_float_dtypes())
+    def test_out_overlap(self, dt):
+        a = numpy.ones(15, dtype=dt)
+        ia = dpnp.array(a)
+
+        out_mant = numpy.ones_like(a)
+        out_exp = 2 * numpy.ones_like(a, dtype="i")
+        iout_mant, iout_exp = dpnp.array(out_mant), dpnp.array(out_exp)
+
+        res1, res2 = dpnp.frexp(ia, out=(iout_mant, iout_exp))
+        exp1, exp2 = numpy.frexp(a, out=(out_mant, out_exp))
+        assert_array_equal(res1, exp1)
+        assert_array_equal(res2, exp2)
+
+        assert_array_equal(iout_mant, out_mant)
+        assert_array_equal(iout_exp, out_exp)
+        assert res1 is iout_mant
+        assert res2 is iout_exp
+
+    @pytest.mark.parametrize("dt", get_float_dtypes())
+    def test_empty(self, dt):
+        a = numpy.empty((), dtype=dt)
+        ia = dpnp.array(a)
+
+        res1, res2 = dpnp.frexp(ia)
+        exp1, exp2 = numpy.frexp(a)
+        assert_array_equal(res1, exp1, strict=True)
+        assert_array_equal(res2, exp2, strict=True)
 
     @pytest.mark.parametrize("xp", [numpy, dpnp])
     @pytest.mark.parametrize("dt", get_complex_dtypes())
@@ -2080,7 +2103,7 @@ def test_not_supported_kwargs(self, func, kwargs):
 
     @pytest.mark.parametrize("func", ["abs", "frexp", "add"])
     @pytest.mark.parametrize("x", [1, [1, 2], numpy.ones(5)])
-    def test_wrong_input(self, func, x):
+    def test_unary_wrong_input(self, func, x):
         fn = getattr(dpnp, func)
         args = [x] * fn.nin
         with pytest.raises(TypeError):
@@ -2115,8 +2138,75 @@ def test_out_dtype(self, func):
         ):
             fn(*args, out=out, dtype="f4")
 
+    @pytest.mark.parametrize("xp", [numpy, dpnp])
+    def test_unary_two_outs_out_ndarray(self, xp):
+        x = xp.array(0.5)
+        with pytest.raises(TypeError, match="'out' must be a tuple of arrays"):
+            _ = xp.frexp(x, out=xp.empty(()))
+
+    @pytest.mark.parametrize("xp", [numpy, dpnp])
+    @pytest.mark.parametrize("out", [(), (1,), (1, 2, 3)])
+    def test_unary_two_outs_out_wrong_tuple_len(self, xp, out):
+        x = xp.array(0.5)
+        with pytest.raises(
+            ValueError,
+            match="'out' tuple must have exactly one entry per ufunc output",
+        ):
+            _ = xp.frexp(x, out=out)
+
+    @pytest.mark.parametrize("xp", [numpy, dpnp])
+    def test_unary_two_outs_out_mixed(self, xp):
+        x = xp.array(0.5)
+        with pytest.raises(
+            TypeError,
+            match="cannot specify 'out' as both a positional and keyword",
+        ):
+            _ = xp.frexp(x, xp.empty(()), out=(xp.empty(()), None))
+
+    @pytest.mark.parametrize("xp", [numpy, dpnp])
+    def test_unary_two_outs_out_not_writable(self, xp):
+        x = xp.array(0.5)
+        out1 = xp.empty(())
+        out1.flags["W"] = False
+
+        with pytest.raises(ValueError, match="array is read-only"):
+            _ = xp.frexp(x, out1)
+
+        out2 = xp.empty((), dtype="i")
+        out2.flags["W"] = False
+        with pytest.raises(ValueError, match="array is read-only"):
+            _ = xp.frexp(x, out=(None, out2))
+
+    @pytest.mark.parametrize("xp", [numpy, dpnp])
+    def test_unary_two_outs_out_wrong_shape(self, xp):
+        x = xp.full(6, fill_value=0.5)
+        out1 = xp.empty(12)
+        with pytest.raises(ValueError):
+            _ = xp.frexp(x, out1)
+
+        out2 = xp.empty((2, 3), dtype="i")
+        with pytest.raises(ValueError):
+            _ = xp.frexp(x, out=(None, out2))
+
+    def test_unary_two_outs_cfd_error(self):
+        x = dpnp.array(0.5, sycl_queue=dpctl.SyclQueue())
+        out1 = dpnp.empty((), sycl_queue=dpctl.SyclQueue())
+        out2 = dpnp.empty((), sycl_queue=dpctl.SyclQueue())
+        with pytest.raises(
+            ExecutionPlacementError,
+            match="Input and output allocation queues are not compatible",
+        ):
+            _ = dpnp.frexp(x, out1)
+
+        with pytest.raises(
+            ExecutionPlacementError,
+            match="Input and output allocation queues are not compatible",
+        ):
+            _ = dpnp.frexp(x, out=(None, out2))
+
     @pytest.mark.parametrize("func", ["abs", "frexp", "add"])
-    def test_order_none(self, func):
+    @pytest.mark.parametrize("order", [None, "K", "A", "f", "c"])
+    def test_order(self, func, order):
         a = numpy.array([1, 2, 3])
         ia = dpnp.array(a)
 
@@ -2126,8 +2216,8 @@ def test_order_none(self, func):
         args = [a] * fn.nin
         iargs = [ia] * ifn.nin
 
-        result = ifn(*iargs, order=None)
-        expected = fn(*args, order=None)
+        result = ifn(*iargs, order=order)
+        expected = fn(*args, order=order)
         if fn.nout == 1:
             assert_dtype_allclose(result, expected)
         else:

From 73b5ea5d14821db681e32b1252631318ec863e4e Mon Sep 17 00:00:00 2001
From: Anton Volkov <antonwolfy@gmail.com>
Date: Mon, 27 Oct 2025 13:51:13 +0100
Subject: [PATCH 16/19] Run test_strides_out only with Linux due to platform
 dependant result returned by numpy.frexp

---
 dpnp/tests/test_mathematical.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/dpnp/tests/test_mathematical.py b/dpnp/tests/test_mathematical.py
index 97be8f615d2..5f235afd717 100644
--- a/dpnp/tests/test_mathematical.py
+++ b/dpnp/tests/test_mathematical.py
@@ -33,6 +33,7 @@
     has_support_aspect16,
     has_support_aspect64,
     is_intel_numpy,
+    is_win_platform,
     numpy_version,
 )
 from .third_party.cupy import testing
@@ -777,6 +778,10 @@ def test_out_all_dtypes(self, dt, out1_dt, out2_dt):
             assert res1 is iout1
             assert res2 is iout2
 
+    @pytest.mark.skipif(
+        is_win_platform(),
+        reason="numpy.frexp gives different answers for NAN/INF on Windows and Linux",
+    )
     @pytest.mark.parametrize("stride", [-4, -2, -1, 1, 2, 4])
     @pytest.mark.parametrize("dt", get_float_dtypes())
     def test_strides_out(self, stride, dt):

From f53cbcbd8a8525d48816c2c3c8e587f327908297 Mon Sep 17 00:00:00 2001
From: Anton Volkov <antonwolfy@gmail.com>
Date: Mon, 27 Oct 2025 16:11:24 +0100
Subject: [PATCH 17/19] Update test_empty to test exactly empty input array

---
 dpnp/tests/test_mathematical.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dpnp/tests/test_mathematical.py b/dpnp/tests/test_mathematical.py
index 5f235afd717..e1d73f36fc3 100644
--- a/dpnp/tests/test_mathematical.py
+++ b/dpnp/tests/test_mathematical.py
@@ -828,7 +828,7 @@ def test_out_overlap(self, dt):
 
     @pytest.mark.parametrize("dt", get_float_dtypes())
     def test_empty(self, dt):
-        a = numpy.empty((), dtype=dt)
+        a = numpy.empty(0, dtype=dt)
         ia = dpnp.array(a)
 
         res1, res2 = dpnp.frexp(ia)

From 281bae194072fe19d20d0d6a120b1e1d5d2323f0 Mon Sep 17 00:00:00 2001
From: Anton Volkov <antonwolfy@gmail.com>
Date: Mon, 27 Oct 2025 18:12:00 +0100
Subject: [PATCH 18/19] Fix TestFrexp::test_basic to work with unsigned dtype

---
 dpnp/tests/test_mathematical.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dpnp/tests/test_mathematical.py b/dpnp/tests/test_mathematical.py
index e1d73f36fc3..b72d9f2e02d 100644
--- a/dpnp/tests/test_mathematical.py
+++ b/dpnp/tests/test_mathematical.py
@@ -719,7 +719,7 @@ class TestFrexp:
 
     @pytest.mark.parametrize("dt", ALL_DTYPES_NO_COMPLEX)
     def test_basic(self, dt):
-        a = numpy.array([-2, 5, 1, 4, 3], dtype=dt)
+        a = get_abs_array([-2, 5, 1, 4, 3], dtype=dt)
         ia = dpnp.array(a)
 
         res1, res2 = dpnp.frexp(ia)

From d7401a582be555fc450948e4e6715457fdc77042 Mon Sep 17 00:00:00 2001
From: Anton Volkov <antonwolfy@gmail.com>
Date: Mon, 27 Oct 2025 18:51:50 +0100
Subject: [PATCH 19/19] Peremetrize dtype of the out in TestFrexp::test_out

---
 dpnp/tests/test_mathematical.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/dpnp/tests/test_mathematical.py b/dpnp/tests/test_mathematical.py
index b72d9f2e02d..30a7ff14f44 100644
--- a/dpnp/tests/test_mathematical.py
+++ b/dpnp/tests/test_mathematical.py
@@ -727,11 +727,12 @@ def test_basic(self, dt):
         assert_array_equal(res1, exp1)
         assert_array_equal(res2, exp2)
 
-    def test_out(self):
-        a = numpy.array(5.7)
+    @pytest.mark.parametrize("dt", get_float_dtypes())
+    def test_out(self, dt):
+        a = numpy.array(5.7, dtype=dt)
         ia = dpnp.array(a)
 
-        out1 = numpy.empty(())
+        out1 = numpy.empty((), dtype=dt)
         out2 = numpy.empty((), dtype=numpy.int32)
         iout1, iout2 = dpnp.array(out1), dpnp.array(out2)