diff --git a/dpctl_ext/tensor/CMakeLists.txt b/dpctl_ext/tensor/CMakeLists.txt index b032dc34bdb..7e1170f4ebf 100644 --- a/dpctl_ext/tensor/CMakeLists.txt +++ b/dpctl_ext/tensor/CMakeLists.txt @@ -94,26 +94,26 @@ set(_elementwise_sources #${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/copysign.cpp ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/cos.cpp ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/cosh.cpp - #${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/equal.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/equal.cpp ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/exp.cpp ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/exp2.cpp ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/expm1.cpp - #${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/floor_divide.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/floor_divide.cpp ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/floor.cpp - #${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/greater_equal.cpp - #${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/greater.cpp - #${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/hypot.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/greater_equal.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/greater.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/hypot.cpp ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/imag.cpp ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/isfinite.cpp ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/isinf.cpp ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/isnan.cpp - #${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/less_equal.cpp - #${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/less.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/less_equal.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/less.cpp ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/log.cpp ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/log1p.cpp ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/log2.cpp ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/log10.cpp - #${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/logaddexp.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/logaddexp.cpp #${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/logical_and.cpp ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/logical_not.cpp #${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/logical_or.cpp @@ -141,7 +141,7 @@ set(_elementwise_sources #${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/subtract.cpp ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/tan.cpp ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/tanh.cpp - #${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/true_divide.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/true_divide.cpp ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/trunc.cpp ) set(_reduction_sources diff --git a/dpctl_ext/tensor/__init__.py b/dpctl_ext/tensor/__init__.py index 5172d426334..279e3a95fd0 100644 --- a/dpctl_ext/tensor/__init__.py +++ b/dpctl_ext/tensor/__init__.py @@ -75,18 +75,27 @@ conj, cos, cosh, + divide, + equal, exp, exp2, expm1, floor, + floor_divide, + greater, + greater_equal, + hypot, imag, isfinite, isinf, isnan, + less, + less_equal, log, log1p, log2, log10, + logaddexp, logical_not, negative, positive, @@ -203,8 +212,10 @@ "cumulative_prod", "cumulative_sum", "diff", + "divide", "empty", "empty_like", + "equal", "extract", "expand_dims", "eye", @@ -214,9 +225,13 @@ "finfo", "flip", "floor", + "floor_divide", "from_numpy", "full", "full_like", + "greater", + "greater_equal", + "hypot", "iinfo", "imag", "isfinite", @@ -224,8 +239,11 @@ "isdtype", "isin", "isnan", + "less", + "less_equal", "linspace", "log", + "logaddexp", "logical_not", "logsumexp", "log1p", diff --git a/dpctl_ext/tensor/_elementwise_funcs.py b/dpctl_ext/tensor/_elementwise_funcs.py index 08d59d8289a..17bdf94d9be 100644 --- a/dpctl_ext/tensor/_elementwise_funcs.py +++ b/dpctl_ext/tensor/_elementwise_funcs.py @@ -32,8 +32,10 @@ from ._elementwise_common import BinaryElementwiseFunc, UnaryElementwiseFunc from ._type_utils import ( + _acceptance_fn_divide, _acceptance_fn_negative, _acceptance_fn_reciprocal, + _resolve_weak_types_all_py_ints, ) # U01: ==== ABS (x) @@ -637,6 +639,78 @@ ) del _cosh_docstring +# B08: ==== DIVIDE (x1, x2) +_divide_docstring_ = r""" +divide(x1, x2, /, \*, out=None, order='K') + +Calculates the ratio for each element `x1_i` of the input array `x1` with +the respective element `x2_i` of the input array `x2`. + +Args: + x1 (usm_ndarray): + First input array, expected to have a floating-point data type. + x2 (usm_ndarray): + Second input array, also expected to have a floating-point data type. + out (Union[usm_ndarray, None], optional): + Output array to populate. + Array must have the correct shape and the expected data type. + order ("C","F","A","K", optional): + Memory layout of the new output array, if parameter + `out` is ``None``. + Default: "K". + +Returns: + usm_ndarray: + An array containing the result of element-wise division. The data type + of the returned array is determined by the Type Promotion Rules. +""" + +divide = BinaryElementwiseFunc( + "divide", + ti._divide_result_type, + ti._divide, + _divide_docstring_, + binary_inplace_fn=ti._divide_inplace, + acceptance_fn=_acceptance_fn_divide, + weak_type_resolver=_resolve_weak_types_all_py_ints, +) +del _divide_docstring_ + +# B09: ==== EQUAL (x1, x2) +_equal_docstring_ = r""" +equal(x1, x2, /, \*, out=None, order='K') + +Calculates equality test results for each element `x1_i` of the input array `x1` +with the respective element `x2_i` of the input array `x2`. + +Args: + x1 (usm_ndarray): + First input array. May have any data type. + x2 (usm_ndarray): + Second input array. May have any data type. + out (Union[usm_ndarray, None], optional): + Output array to populate. + Array must have the correct shape and the expected data type. + order ("C","F","A","K", optional): + Memory layout of the new output array, if parameter + `out` is ``None``. + Default: "K". + +Returns: + usm_ndarray: + An array containing the result of element-wise equality comparison. + The returned array has a data type of `bool`. +""" + +equal = BinaryElementwiseFunc( + "equal", + ti._equal_result_type, + ti._equal, + _equal_docstring_, + weak_type_resolver=_resolve_weak_types_all_py_ints, +) +del _equal_docstring_ + # U13: ==== EXP (x) _exp_docstring = r""" exp(x, /, \*, out=None, order='K') @@ -664,6 +738,114 @@ exp = UnaryElementwiseFunc("exp", ti._exp_result_type, ti._exp, _exp_docstring) del _exp_docstring +# B10: ==== FLOOR_DIVIDE (x1, x2) +_floor_divide_docstring_ = r""" +floor_divide(x1, x2, /, \*, out=None, order='K') + +Calculates the ratio for each element `x1_i` of the input array `x1` with +the respective element `x2_i` of the input array `x2` to the greatest +integer-value number that is not greater than the division result. + +Args: + x1 (usm_ndarray): + First input array, expected to have a real-valued data type. + x2 (usm_ndarray): + Second input array, also expected to have a real-valued data type. + out (Union[usm_ndarray, None], optional): + Output array to populate. + Array must have the correct shape and the expected data type. + order ("C","F","A","K", optional): + Memory layout of the new output array, if parameter + `out` is ``None``. + Default: "K". + +Returns: + usm_ndarray: + An array containing the result of element-wise floor of division. + The data type of the returned array is determined by the Type + Promotion Rules. +""" + +floor_divide = BinaryElementwiseFunc( + "floor_divide", + ti._floor_divide_result_type, + ti._floor_divide, + _floor_divide_docstring_, + binary_inplace_fn=ti._floor_divide_inplace, +) +del _floor_divide_docstring_ + +# B11: ==== GREATER (x1, x2) +_greater_docstring_ = r""" +greater(x1, x2, /, \*, out=None, order='K') + +Computes the greater-than test results for each element `x1_i` of +the input array `x1` with the respective element `x2_i` of the input array `x2`. + +Args: + x1 (usm_ndarray): + First input array. May have any data type. + x2 (usm_ndarray): + Second input array. May have any data type. + out (Union[usm_ndarray, None], optional): + Output array to populate. + Array must have the correct shape and the expected data type. + order ("C","F","A","K", optional): + Memory layout of the new output array, if parameter + `out` is ``None``. + Default: "K". + +Returns: + usm_ndarray: + An array containing the result of element-wise greater-than comparison. + The returned array has a data type of `bool`. +""" + +greater = BinaryElementwiseFunc( + "greater", + ti._greater_result_type, + ti._greater, + _greater_docstring_, + weak_type_resolver=_resolve_weak_types_all_py_ints, +) +del _greater_docstring_ + +# B12: ==== GREATER_EQUAL (x1, x2) +_greater_equal_docstring_ = r""" +greater_equal(x1, x2, /, \*, out=None, order='K') + +Computes the greater-than or equal-to test results for each element `x1_i` of +the input array `x1` with the respective element `x2_i` of the input array `x2`. + +Args: + x1 (usm_ndarray): + First input array. May have any data type. + x2 (usm_ndarray): + Second input array. May have any data type. + out (Union[usm_ndarray, None], optional): + Output array to populate. + Array must have the correct shape and the expected data type. + order ("C","F","A","K", optional): + Memory layout of the new output array, if parameter + `out` is ``None``. + Default: "K". + +Returns: + usm_ndarray: + An array containing the result of element-wise greater-than or equal-to + comparison. + The returned array has a data type of `bool`. +""" + +greater_equal = BinaryElementwiseFunc( + "greater_equal", + ti._greater_equal_result_type, + ti._greater_equal, + _greater_equal_docstring_, + weak_type_resolver=_resolve_weak_types_all_py_ints, +) +del _greater_equal_docstring_ + # U14: ==== EXPM1 (x) _expm1_docstring = r""" expm1(x, /, \*, out=None, order='K') @@ -839,6 +1021,77 @@ ) del _isnan_docstring_ +# B13: ==== LESS (x1, x2) +_less_docstring_ = r""" +less(x1, x2, /, \*, out=None, order='K') + +Computes the less-than test results for each element `x1_i` of +the input array `x1` with the respective element `x2_i` of the input array `x2`. + +Args: + x1 (usm_ndarray): + First input array. May have any data type. + x2 (usm_ndarray): + Second input array. May have any data type. + out (Union[usm_ndarray, None], optional): + Output array to populate. + Array must have the correct shape and the expected data type. + order ("C","F","A","K", optional): + Memory layout of the new output array, if parameter + `out` is ``None``. + Default: "K". + +Returns: + usm_ndarray: + An array containing the result of element-wise less-than comparison. + The returned array has a data type of `bool`. +""" + +less = BinaryElementwiseFunc( + "less", + ti._less_result_type, + ti._less, + _less_docstring_, + weak_type_resolver=_resolve_weak_types_all_py_ints, +) +del _less_docstring_ + + +# B14: ==== LESS_EQUAL (x1, x2) +_less_equal_docstring_ = r""" +less_equal(x1, x2, /, \*, out=None, order='K') + +Computes the less-than or equal-to test results for each element `x1_i` of +the input array `x1` with the respective element `x2_i` of the input array `x2`. + +Args: + x1 (usm_ndarray): + First input array. May have any data type. + x2 (usm_ndarray): + Second input array. May have any data type. + out (Union[usm_ndarray, None], optional): + Output array to populate. + Array must have the correct shape and the expected data type. + order ("C","F","A","K", optional): + Memory layout of the new output array, if parameter + `out` is ``None``. + Default: "K". + +Returns: + usm_ndarray: + An array containing the result of element-wise less-than or equal-to + comparison. The returned array has a data type of `bool`. +""" + +less_equal = BinaryElementwiseFunc( + "less_equal", + ti._less_equal_result_type, + ti._less_equal, + _less_equal_docstring_, + weak_type_resolver=_resolve_weak_types_all_py_ints, +) +del _less_equal_docstring_ + # U20: ==== LOG (x) _log_docstring = r""" log(x, /, \*, out=None, order='K') @@ -953,6 +1206,43 @@ ) del _log10_docstring_ +# B15: ==== LOGADDEXP (x1, x2) +_logaddexp_docstring_ = r""" +logaddexp(x1, x2, /, \*, out=None, order='K') + +Calculates the natural logarithm of the sum of exponentials for each element +`x1_i` of the input array `x1` with the respective element `x2_i` of the input +array `x2`. + +This function calculates `log(exp(x1) + exp(x2))` more accurately for small +values of `x`. + +Args: + x1 (usm_ndarray): + First input array, expected to have a real-valued floating-point data + type. + x2 (usm_ndarray): + Second input array, also expected to have a real-valued floating-point + data type. + out (Union[usm_ndarray, None], optional): + Output array to populate. + Array must have the correct shape and the expected data type. + order ("C","F","A","K", optional): + Memory layout of the new output array, if parameter + `out` is ``None``. + Default: "K". + +Returns: + usm_ndarray: + An array containing the element-wise results. The data type + of the returned array is determined by the Type Promotion Rules. +""" + +logaddexp = BinaryElementwiseFunc( + "logaddexp", ti._logaddexp_result_type, ti._logaddexp, _logaddexp_docstring_ +) +del _logaddexp_docstring_ + # U24: ==== LOGICAL_NOT (x) _logical_not_docstring = r""" logical_not(x, /, \*, out=None, order='K') @@ -1329,6 +1619,39 @@ ) del _trunc_docstring +# B24: ==== HYPOT (x1, x2) +_hypot_docstring_ = r""" +hypot(x1, x2, /, \*, out=None, order='K') + +Computes the square root of the sum of squares for each element `x1_i` of the +input array `x1` with the respective element `x2_i` of the input array `x2`. + +Args: + x1 (usm_ndarray): + First input array, expected to have a real-valued floating-point data + type. + x2 (usm_ndarray): + Second input array, also expected to have a real-valued floating-point + data type. + out (Union[usm_ndarray, None], optional): + Output array to populate. + Array must have the correct shape and the expected data type. + order ("C","F","A","K", optional): + Memory layout of the new output array, if parameter + `out` is ``None``. + Default: "K". + +Returns: + usm_ndarray: + An array containing the element-wise hypotenuse. The data type + of the returned array is determined by the Type Promotion Rules. +""" + +hypot = BinaryElementwiseFunc( + "hypot", ti._hypot_result_type, ti._hypot, _hypot_docstring_ +) +del _hypot_docstring_ + # U37: ==== CBRT (x) _cbrt_docstring_ = r""" cbrt(x, /, \*, out=None, order='K') diff --git a/dpctl_ext/tensor/libtensor/include/kernels/elementwise_functions/equal.hpp b/dpctl_ext/tensor/libtensor/include/kernels/elementwise_functions/equal.hpp new file mode 100644 index 00000000000..e8884ed039a --- /dev/null +++ b/dpctl_ext/tensor/libtensor/include/kernels/elementwise_functions/equal.hpp @@ -0,0 +1,318 @@ +//***************************************************************************** +// Copyright (c) 2026, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// - Neither the name of the copyright holder nor the names of its contributors +// may be used to endorse or promote products derived from this software +// without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** +// +//===---------------------------------------------------------------------===// +/// +/// \file +/// This file defines kernels for elementwise evaluation of equality of +/// tensor elements. +//===---------------------------------------------------------------------===// + +#pragma once +#include +#include +#include +#include + +#include + +#include "sycl_complex.hpp" +#include "vec_size_util.hpp" + +#include "kernels/dpctl_tensor_types.hpp" +#include "kernels/elementwise_functions/common.hpp" + +#include "utils/offset_utils.hpp" +#include "utils/type_dispatch_building.hpp" +#include "utils/type_utils.hpp" + +namespace dpctl::tensor::kernels::equal +{ + +using dpctl::tensor::ssize_t; +namespace td_ns = dpctl::tensor::type_dispatch; +namespace tu_ns = dpctl::tensor::type_utils; + +template +struct EqualFunctor +{ + static_assert(std::is_same_v); + + using supports_sg_loadstore = std::negation< + std::disjunction, tu_ns::is_complex>>; + using supports_vec = std::conjunction< + std::is_same, + std::negation, + tu_ns::is_complex>>>; + + resT operator()(const argT1 &in1, const argT2 &in2) const + { + if constexpr (tu_ns::is_complex::value && + tu_ns::is_complex::value) + { + using realT1 = typename argT1::value_type; + using realT2 = typename argT2::value_type; + + return exprm_ns::complex(in1) == + exprm_ns::complex(in2); + } + else { + if constexpr (std::is_integral_v && + std::is_integral_v && + std::is_signed_v != std::is_signed_v) + { + if constexpr (std::is_signed_v && + !std::is_signed_v) { + return (in1 < 0) ? false : (static_cast(in1) == in2); + } + else { + if constexpr (!std::is_signed_v && + std::is_signed_v) { + return (in2 < 0) ? false + : (in1 == static_cast(in2)); + } + } + } + else { + return (in1 == in2); + } + } + } + + template + sycl::vec + operator()(const sycl::vec &in1, + const sycl::vec &in2) const + { + auto tmp = (in1 == in2); + if constexpr (std::is_same_v) { + return tmp; + } + else { + using dpctl::tensor::type_utils::vec_cast; + + return vec_cast( + tmp); + } + } +}; + +template +using EqualContigFunctor = + elementwise_common::BinaryContigFunctor, + vec_sz, + n_vecs, + enable_sg_loadstore>; + +template +using EqualStridedFunctor = + elementwise_common::BinaryStridedFunctor>; + +template +struct EqualOutputType +{ + using value_type = typename std::disjunction< + td_ns::BinaryTypeMapResultEntry, + td_ns:: + BinaryTypeMapResultEntry, + td_ns::BinaryTypeMapResultEntry, + td_ns::BinaryTypeMapResultEntry, + td_ns:: + BinaryTypeMapResultEntry, + td_ns::BinaryTypeMapResultEntry, + td_ns:: + BinaryTypeMapResultEntry, + td_ns::BinaryTypeMapResultEntry, + td_ns:: + BinaryTypeMapResultEntry, + td_ns:: + BinaryTypeMapResultEntry, + td_ns:: + BinaryTypeMapResultEntry, + td_ns::BinaryTypeMapResultEntry, + td_ns::BinaryTypeMapResultEntry, + td_ns::BinaryTypeMapResultEntry, + td_ns::BinaryTypeMapResultEntry, + T2, + std::complex, + bool>, + td_ns::BinaryTypeMapResultEntry, + T2, + std::complex, + bool>, + td_ns::DefaultResultEntry>::result_type; + + static constexpr bool is_defined = !std::is_same_v; +}; + +namespace hyperparam_detail +{ + +namespace vsu_ns = dpctl::tensor::kernels::vec_size_utils; + +using vsu_ns::BinaryContigHyperparameterSetEntry; +using vsu_ns::ContigHyperparameterSetDefault; + +template +struct EqualContigHyperparameterSet +{ + using value_type = + typename std::disjunction>; + + constexpr static auto vec_sz = value_type::vec_sz; + constexpr static auto n_vecs = value_type::n_vecs; +}; + +} // end of namespace hyperparam_detail + +template +class equal_contig_kernel; + +template +sycl::event equal_contig_impl(sycl::queue &exec_q, + std::size_t nelems, + const char *arg1_p, + ssize_t arg1_offset, + const char *arg2_p, + ssize_t arg2_offset, + char *res_p, + ssize_t res_offset, + const std::vector &depends = {}) +{ + using EqualHS = + hyperparam_detail::EqualContigHyperparameterSet; + static constexpr std::uint8_t vec_sz = EqualHS::vec_sz; + static constexpr std::uint8_t n_vecs = EqualHS::n_vecs; + + return elementwise_common::binary_contig_impl< + argTy1, argTy2, EqualOutputType, EqualContigFunctor, + equal_contig_kernel, vec_sz, n_vecs>(exec_q, nelems, arg1_p, + arg1_offset, arg2_p, arg2_offset, + res_p, res_offset, depends); +} + +template +struct EqualContigFactory +{ + fnT get() + { + if constexpr (!EqualOutputType::is_defined) { + fnT fn = nullptr; + return fn; + } + else { + fnT fn = equal_contig_impl; + return fn; + } + } +}; + +template +struct EqualTypeMapFactory +{ + /*! @brief get typeid for output type of operator()==(x, y), always bool */ + std::enable_if_t::value, int> get() + { + using rT = typename EqualOutputType::value_type; + return td_ns::GetTypeid{}.get(); + } +}; + +template +class equal_strided_kernel; + +template +sycl::event + equal_strided_impl(sycl::queue &exec_q, + std::size_t nelems, + int nd, + const ssize_t *shape_and_strides, + const char *arg1_p, + ssize_t arg1_offset, + const char *arg2_p, + ssize_t arg2_offset, + char *res_p, + ssize_t res_offset, + const std::vector &depends, + const std::vector &additional_depends) +{ + return elementwise_common::binary_strided_impl< + argTy1, argTy2, EqualOutputType, EqualStridedFunctor, + equal_strided_kernel>(exec_q, nelems, nd, shape_and_strides, arg1_p, + arg1_offset, arg2_p, arg2_offset, res_p, + res_offset, depends, additional_depends); +} + +template +struct EqualStridedFactory +{ + fnT get() + { + if constexpr (!EqualOutputType::is_defined) { + fnT fn = nullptr; + return fn; + } + else { + fnT fn = equal_strided_impl; + return fn; + } + } +}; + +} // namespace dpctl::tensor::kernels::equal diff --git a/dpctl_ext/tensor/libtensor/include/kernels/elementwise_functions/floor_divide.hpp b/dpctl_ext/tensor/libtensor/include/kernels/elementwise_functions/floor_divide.hpp new file mode 100644 index 00000000000..1b529514bc9 --- /dev/null +++ b/dpctl_ext/tensor/libtensor/include/kernels/elementwise_functions/floor_divide.hpp @@ -0,0 +1,553 @@ +//***************************************************************************** +// Copyright (c) 2026, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// - Neither the name of the copyright holder nor the names of its contributors +// may be used to endorse or promote products derived from this software +// without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** +// +//===---------------------------------------------------------------------===// +/// +/// \file +/// This file defines kernels for elementwise evaluation of FLOOR_DIVIDE(x1, x2) +/// function. +//===---------------------------------------------------------------------===// + +#pragma once +#include +#include +#include +#include + +#include + +#include "vec_size_util.hpp" + +#include "kernels/dpctl_tensor_types.hpp" +#include "kernels/elementwise_functions/common.hpp" +#include "kernels/elementwise_functions/common_inplace.hpp" + +#include "utils/offset_utils.hpp" +#include "utils/type_dispatch_building.hpp" +#include "utils/type_utils.hpp" + +namespace dpctl::tensor::kernels::floor_divide +{ + +using dpctl::tensor::ssize_t; +namespace td_ns = dpctl::tensor::type_dispatch; +namespace tu_ns = dpctl::tensor::type_utils; + +template +struct FloorDivideFunctor +{ + using supports_sg_loadstore = std::true_type; + using supports_vec = std::true_type; + + resT operator()(const argT1 &in1, const argT2 &in2) const + { + if constexpr (std::is_integral_v || std::is_integral_v) { + if (in2 == argT2(0)) { + return resT(0); + } + if constexpr (std::is_signed_v || std::is_signed_v) { + auto div = in1 / in2; + auto mod = in1 % in2; + auto corr = (mod != 0 && l_xor(mod < 0, in2 < 0)); + return (div - corr); + } + else { + return (in1 / in2); + } + } + else { + auto div = in1 / in2; + return (div == resT(0)) ? div : resT(sycl::floor(div)); + } + } + + template + sycl::vec + operator()(const sycl::vec &in1, + const sycl::vec &in2) const + { + if constexpr (std::is_integral_v) { + sycl::vec res; +#pragma unroll + for (int i = 0; i < vec_sz; ++i) { + if (in2[i] == argT2(0)) { + res[i] = resT(0); + } + else { + res[i] = in1[i] / in2[i]; + if constexpr (std::is_signed_v) { + auto mod = in1[i] % in2[i]; + auto corr = (mod != 0 && l_xor(mod < 0, in2[i] < 0)); + res[i] -= corr; + } + } + } + return res; + } + else { + auto tmp = in1 / in2; + using tmpT = typename decltype(tmp)::element_type; +#pragma unroll + for (int i = 0; i < vec_sz; ++i) { + if (in2[i] != argT2(0)) { + tmp[i] = sycl::floor(tmp[i]); + } + } + if constexpr (std::is_same_v) { + return tmp; + } + else { + using dpctl::tensor::type_utils::vec_cast; + return vec_cast(tmp); + } + } + } + +private: + bool l_xor(bool b1, bool b2) const + { + return (b1 != b2); + } +}; + +template +using FloorDivideContigFunctor = elementwise_common::BinaryContigFunctor< + argT1, + argT2, + resT, + FloorDivideFunctor, + vec_sz, + n_vecs, + enable_sg_loadstore>; + +template +using FloorDivideStridedFunctor = elementwise_common::BinaryStridedFunctor< + argT1, + argT2, + resT, + IndexerT, + FloorDivideFunctor>; + +template +struct FloorDivideOutputType +{ + using value_type = typename std::disjunction< + td_ns::BinaryTypeMapResultEntry, + td_ns::BinaryTypeMapResultEntry, + td_ns::BinaryTypeMapResultEntry, + td_ns::BinaryTypeMapResultEntry, + td_ns::BinaryTypeMapResultEntry, + td_ns::BinaryTypeMapResultEntry, + td_ns::BinaryTypeMapResultEntry, + td_ns::BinaryTypeMapResultEntry, + td_ns::BinaryTypeMapResultEntry, + td_ns::BinaryTypeMapResultEntry, + td_ns::BinaryTypeMapResultEntry, + td_ns::DefaultResultEntry>::result_type; + + static constexpr bool is_defined = !std::is_same_v; +}; + +namespace hyperparam_detail +{ + +namespace vsu_ns = dpctl::tensor::kernels::vec_size_utils; + +using vsu_ns::BinaryContigHyperparameterSetEntry; +using vsu_ns::ContigHyperparameterSetDefault; + +template +struct FloorDivideContigHyperparameterSet +{ + using value_type = + typename std::disjunction>; + + constexpr static auto vec_sz = value_type::vec_sz; + constexpr static auto n_vecs = value_type::n_vecs; +}; + +} // end of namespace hyperparam_detail + +template +class floor_divide_contig_kernel; + +template +sycl::event + floor_divide_contig_impl(sycl::queue &exec_q, + std::size_t nelems, + const char *arg1_p, + ssize_t arg1_offset, + const char *arg2_p, + ssize_t arg2_offset, + char *res_p, + ssize_t res_offset, + const std::vector &depends = {}) +{ + using FloorDivideHS = + hyperparam_detail::FloorDivideContigHyperparameterSet; + static constexpr std::uint8_t vec_sz = FloorDivideHS::vec_sz; + static constexpr std::uint8_t n_vecs = FloorDivideHS::n_vecs; + + return elementwise_common::binary_contig_impl< + argTy1, argTy2, FloorDivideOutputType, FloorDivideContigFunctor, + floor_divide_contig_kernel, vec_sz, n_vecs>( + exec_q, nelems, arg1_p, arg1_offset, arg2_p, arg2_offset, res_p, + res_offset, depends); +} + +template +struct FloorDivideContigFactory +{ + fnT get() + { + if constexpr (!FloorDivideOutputType::is_defined) { + fnT fn = nullptr; + return fn; + } + else { + fnT fn = floor_divide_contig_impl; + return fn; + } + } +}; + +template +struct FloorDivideTypeMapFactory +{ + /*! @brief get typeid for output type of floor_divide(T1 x, T2 y) */ + std::enable_if_t::value, int> get() + { + using rT = typename FloorDivideOutputType::value_type; + return td_ns::GetTypeid{}.get(); + } +}; + +template +class floor_divide_strided_kernel; + +template +sycl::event floor_divide_strided_impl( + sycl::queue &exec_q, + std::size_t nelems, + int nd, + const ssize_t *shape_and_strides, + const char *arg1_p, + ssize_t arg1_offset, + const char *arg2_p, + ssize_t arg2_offset, + char *res_p, + ssize_t res_offset, + const std::vector &depends, + const std::vector &additional_depends) +{ + return elementwise_common::binary_strided_impl< + argTy1, argTy2, FloorDivideOutputType, FloorDivideStridedFunctor, + floor_divide_strided_kernel>( + exec_q, nelems, nd, shape_and_strides, arg1_p, arg1_offset, arg2_p, + arg2_offset, res_p, res_offset, depends, additional_depends); +} + +template +struct FloorDivideStridedFactory +{ + fnT get() + { + if constexpr (!FloorDivideOutputType::is_defined) { + fnT fn = nullptr; + return fn; + } + else { + fnT fn = floor_divide_strided_impl; + return fn; + } + } +}; + +template +struct FloorDivideInplaceFunctor +{ + using supports_sg_loadstore = std::true_type; + using supports_vec = std::true_type; + + void operator()(resT &in1, const argT &in2) const + { + if constexpr (std::is_integral_v) { + if (in2 == argT(0)) { + in1 = 0; + return; + } + if constexpr (std::is_signed_v) { + auto tmp = in1; + in1 /= in2; + auto mod = tmp % in2; + auto corr = (mod != 0 && l_xor(mod < 0, in2 < 0)); + in1 -= corr; + } + else { + in1 /= in2; + } + } + else { + in1 /= in2; + if (in1 == resT(0)) { + return; + } + in1 = sycl::floor(in1); + } + } + + template + void operator()(sycl::vec &in1, + const sycl::vec &in2) const + { + if constexpr (std::is_integral_v) { +#pragma unroll + for (int i = 0; i < vec_sz; ++i) { + if (in2[i] == argT(0)) { + in1[i] = 0; + } + else { + if constexpr (std::is_signed_v) { + auto tmp = in1[i]; + in1[i] /= in2[i]; + auto mod = tmp % in2[i]; + auto corr = (mod != 0 && l_xor(mod < 0, in2[i] < 0)); + in1[i] -= corr; + } + else { + in1[i] /= in2[i]; + } + } + } + } + else { + in1 /= in2; +#pragma unroll + for (int i = 0; i < vec_sz; ++i) { + if (in2[i] != argT(0)) { + in1[i] = sycl::floor(in1[i]); + } + } + } + } + +private: + bool l_xor(bool b1, bool b2) const + { + return (b1 != b2); + } +}; + +template +using FloorDivideInplaceContigFunctor = + elementwise_common::BinaryInplaceContigFunctor< + argT, + resT, + FloorDivideInplaceFunctor, + vec_sz, + n_vecs, + enable_sg_loadstore>; + +template +using FloorDivideInplaceStridedFunctor = + elementwise_common::BinaryInplaceStridedFunctor< + argT, + resT, + IndexerT, + FloorDivideInplaceFunctor>; + +template +class floor_divide_inplace_contig_kernel; + +/* @brief Types supported by in-place floor division */ +template +struct FloorDivideInplaceTypePairSupport +{ + /* value if true a kernel for must be instantiated */ + static constexpr bool is_defined = std::disjunction< + td_ns::TypePairDefinedEntry, + td_ns::TypePairDefinedEntry, + td_ns::TypePairDefinedEntry, + td_ns::TypePairDefinedEntry, + td_ns::TypePairDefinedEntry, + td_ns::TypePairDefinedEntry, + td_ns::TypePairDefinedEntry, + td_ns::TypePairDefinedEntry, + td_ns::TypePairDefinedEntry, + td_ns::TypePairDefinedEntry, + td_ns::TypePairDefinedEntry, + // fall-through + td_ns::NotDefinedEntry>::is_defined; +}; + +template +struct FloorDivideInplaceTypeMapFactory +{ + /*! @brief get typeid for output type of x //= y */ + std::enable_if_t::value, int> get() + { + if constexpr (FloorDivideInplaceTypePairSupport::is_defined) + { + return td_ns::GetTypeid{}.get(); + } + else { + return td_ns::GetTypeid{}.get(); + } + } +}; + +template +sycl::event floor_divide_inplace_contig_impl( + sycl::queue &exec_q, + std::size_t nelems, + const char *arg_p, + ssize_t arg_offset, + char *res_p, + ssize_t res_offset, + const std::vector &depends = {}) +{ + using FloorDivideHS = + hyperparam_detail::FloorDivideContigHyperparameterSet; + + static constexpr std::uint8_t vec_sz = FloorDivideHS::vec_sz; + static constexpr std::uint8_t n_vecs = FloorDivideHS::n_vecs; + + return elementwise_common::binary_inplace_contig_impl< + argTy, resTy, FloorDivideInplaceContigFunctor, + floor_divide_inplace_contig_kernel, vec_sz, n_vecs>( + exec_q, nelems, arg_p, arg_offset, res_p, res_offset, depends); +} + +template +struct FloorDivideInplaceContigFactory +{ + fnT get() + { + if constexpr (!FloorDivideInplaceTypePairSupport::is_defined) { + fnT fn = nullptr; + return fn; + } + else { + fnT fn = floor_divide_inplace_contig_impl; + return fn; + } + } +}; + +template +class floor_divide_inplace_strided_kernel; + +template +sycl::event floor_divide_inplace_strided_impl( + sycl::queue &exec_q, + std::size_t nelems, + int nd, + const ssize_t *shape_and_strides, + const char *arg_p, + ssize_t arg_offset, + char *res_p, + ssize_t res_offset, + const std::vector &depends, + const std::vector &additional_depends) +{ + return elementwise_common::binary_inplace_strided_impl< + argTy, resTy, FloorDivideInplaceStridedFunctor, + floor_divide_inplace_strided_kernel>( + exec_q, nelems, nd, shape_and_strides, arg_p, arg_offset, res_p, + res_offset, depends, additional_depends); +} + +template +struct FloorDivideInplaceStridedFactory +{ + fnT get() + { + if constexpr (!FloorDivideInplaceTypePairSupport::is_defined) { + fnT fn = nullptr; + return fn; + } + else { + fnT fn = floor_divide_inplace_strided_impl; + return fn; + } + } +}; + +} // namespace dpctl::tensor::kernels::floor_divide diff --git a/dpctl_ext/tensor/libtensor/include/kernels/elementwise_functions/greater.hpp b/dpctl_ext/tensor/libtensor/include/kernels/elementwise_functions/greater.hpp new file mode 100644 index 00000000000..9a738436284 --- /dev/null +++ b/dpctl_ext/tensor/libtensor/include/kernels/elementwise_functions/greater.hpp @@ -0,0 +1,318 @@ +//***************************************************************************** +// Copyright (c) 2026, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// - Neither the name of the copyright holder nor the names of its contributors +// may be used to endorse or promote products derived from this software +// without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** +// +//===---------------------------------------------------------------------===// +/// +/// \file +/// This file defines kernels for elementwise evaluation of comparison of +/// tensor elements. +//===---------------------------------------------------------------------===// + +#pragma once +#include +#include +#include + +#include + +#include "vec_size_util.hpp" + +#include "utils/math_utils.hpp" +#include "utils/offset_utils.hpp" +#include "utils/type_dispatch_building.hpp" +#include "utils/type_utils.hpp" + +#include "kernels/dpctl_tensor_types.hpp" +#include "kernels/elementwise_functions/common.hpp" + +namespace dpctl::tensor::kernels::greater +{ + +using dpctl::tensor::ssize_t; +namespace td_ns = dpctl::tensor::type_dispatch; +namespace tu_ns = dpctl::tensor::type_utils; + +template +struct GreaterFunctor +{ + static_assert(std::is_same_v); + + using supports_sg_loadstore = std::negation< + std::disjunction, tu_ns::is_complex>>; + using supports_vec = std::conjunction< + std::is_same, + std::negation, + tu_ns::is_complex>>>; + + resT operator()(const argT1 &in1, const argT2 &in2) const + { + if constexpr (tu_ns::is_complex::value || + tu_ns::is_complex::value) + { + static_assert(std::is_same_v); + using dpctl::tensor::math_utils::greater_complex; + return greater_complex(in1, in2); + } + else { + if constexpr (std::is_integral_v && + std::is_integral_v && + std::is_signed_v != std::is_signed_v) + { + if constexpr (std::is_signed_v && + !std::is_signed_v) { + return (in1 < 0) ? false : (static_cast(in1) > in2); + } + else { + if constexpr (!std::is_signed_v && + std::is_signed_v) { + return (in2 < 0) ? true + : (in1 > static_cast(in2)); + } + } + } + else { + return (in1 > in2); + } + } + } + + template + sycl::vec + operator()(const sycl::vec &in1, + const sycl::vec &in2) const + { + + auto tmp = (in1 > in2); + + if constexpr (std::is_same_v) { + return tmp; + } + else { + using dpctl::tensor::type_utils::vec_cast; + + return vec_cast( + tmp); + } + } +}; + +template +using GreaterContigFunctor = + elementwise_common::BinaryContigFunctor, + vec_sz, + n_vecs, + enable_sg_loadstore>; + +template +using GreaterStridedFunctor = elementwise_common::BinaryStridedFunctor< + argT1, + argT2, + resT, + IndexerT, + GreaterFunctor>; + +template +struct GreaterOutputType +{ + using value_type = typename std::disjunction< + td_ns::BinaryTypeMapResultEntry, + td_ns:: + BinaryTypeMapResultEntry, + td_ns::BinaryTypeMapResultEntry, + td_ns::BinaryTypeMapResultEntry, + td_ns:: + BinaryTypeMapResultEntry, + td_ns::BinaryTypeMapResultEntry, + td_ns:: + BinaryTypeMapResultEntry, + td_ns::BinaryTypeMapResultEntry, + td_ns:: + BinaryTypeMapResultEntry, + td_ns:: + BinaryTypeMapResultEntry, + td_ns:: + BinaryTypeMapResultEntry, + td_ns::BinaryTypeMapResultEntry, + td_ns::BinaryTypeMapResultEntry, + td_ns::BinaryTypeMapResultEntry, + td_ns::BinaryTypeMapResultEntry, + T2, + std::complex, + bool>, + td_ns::BinaryTypeMapResultEntry, + T2, + std::complex, + bool>, + td_ns::DefaultResultEntry>::result_type; + + static constexpr bool is_defined = !std::is_same_v; +}; + +namespace hyperparam_detail +{ + +namespace vsu_ns = dpctl::tensor::kernels::vec_size_utils; + +using vsu_ns::BinaryContigHyperparameterSetEntry; +using vsu_ns::ContigHyperparameterSetDefault; + +template +struct GreaterContigHyperparameterSet +{ + using value_type = + typename std::disjunction>; + + constexpr static auto vec_sz = value_type::vec_sz; + constexpr static auto n_vecs = value_type::n_vecs; +}; + +} // end of namespace hyperparam_detail + +template +class greater_contig_kernel; + +template +sycl::event greater_contig_impl(sycl::queue &exec_q, + std::size_t nelems, + const char *arg1_p, + ssize_t arg1_offset, + const char *arg2_p, + ssize_t arg2_offset, + char *res_p, + ssize_t res_offset, + const std::vector &depends = {}) +{ + using GreaterHS = + hyperparam_detail::GreaterContigHyperparameterSet; + + static constexpr std::uint8_t vec_sz = GreaterHS::vec_sz; + static constexpr std::uint8_t n_vecs = GreaterHS::n_vecs; + + return elementwise_common::binary_contig_impl< + argTy1, argTy2, GreaterOutputType, GreaterContigFunctor, + greater_contig_kernel, vec_sz, n_vecs>(exec_q, nelems, arg1_p, + arg1_offset, arg2_p, arg2_offset, + res_p, res_offset, depends); +} + +template +struct GreaterContigFactory +{ + fnT get() + { + if constexpr (!GreaterOutputType::is_defined) { + fnT fn = nullptr; + return fn; + } + else { + fnT fn = greater_contig_impl; + return fn; + } + } +}; + +template +struct GreaterTypeMapFactory +{ + /*! @brief get typeid for output type of operator()>(x, y), always bool */ + std::enable_if_t::value, int> get() + { + using rT = typename GreaterOutputType::value_type; + return td_ns::GetTypeid{}.get(); + } +}; + +template +class greater_strided_kernel; + +template +sycl::event + greater_strided_impl(sycl::queue &exec_q, + std::size_t nelems, + int nd, + const ssize_t *shape_and_strides, + const char *arg1_p, + ssize_t arg1_offset, + const char *arg2_p, + ssize_t arg2_offset, + char *res_p, + ssize_t res_offset, + const std::vector &depends, + const std::vector &additional_depends) +{ + return elementwise_common::binary_strided_impl< + argTy1, argTy2, GreaterOutputType, GreaterStridedFunctor, + greater_strided_kernel>(exec_q, nelems, nd, shape_and_strides, arg1_p, + arg1_offset, arg2_p, arg2_offset, res_p, + res_offset, depends, additional_depends); +} + +template +struct GreaterStridedFactory +{ + fnT get() + { + if constexpr (!GreaterOutputType::is_defined) { + fnT fn = nullptr; + return fn; + } + else { + fnT fn = greater_strided_impl; + return fn; + } + } +}; + +} // namespace dpctl::tensor::kernels::greater diff --git a/dpctl_ext/tensor/libtensor/include/kernels/elementwise_functions/greater_equal.hpp b/dpctl_ext/tensor/libtensor/include/kernels/elementwise_functions/greater_equal.hpp new file mode 100644 index 00000000000..76ec4f830c5 --- /dev/null +++ b/dpctl_ext/tensor/libtensor/include/kernels/elementwise_functions/greater_equal.hpp @@ -0,0 +1,318 @@ +//***************************************************************************** +// Copyright (c) 2026, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// - Neither the name of the copyright holder nor the names of its contributors +// may be used to endorse or promote products derived from this software +// without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** +// +//===---------------------------------------------------------------------===// +/// +/// \file +/// This file defines kernels for elementwise evaluation of comparison of +/// tensor elements. +//===---------------------------------------------------------------------===// + +#pragma once +#include +#include +#include + +#include + +#include "vec_size_util.hpp" + +#include "utils/math_utils.hpp" +#include "utils/offset_utils.hpp" +#include "utils/type_dispatch_building.hpp" +#include "utils/type_utils.hpp" + +#include "kernels/dpctl_tensor_types.hpp" +#include "kernels/elementwise_functions/common.hpp" + +namespace dpctl::tensor::kernels::greater_equal +{ + +using dpctl::tensor::ssize_t; +namespace td_ns = dpctl::tensor::type_dispatch; +namespace tu_ns = dpctl::tensor::type_utils; + +template +struct GreaterEqualFunctor +{ + static_assert(std::is_same_v); + + using supports_sg_loadstore = std::negation< + std::disjunction, tu_ns::is_complex>>; + using supports_vec = std::conjunction< + std::is_same, + std::negation, + tu_ns::is_complex>>>; + + resT operator()(const argT1 &in1, const argT2 &in2) const + { + if constexpr (tu_ns::is_complex::value || + tu_ns::is_complex::value) + { + static_assert(std::is_same_v); + using dpctl::tensor::math_utils::greater_equal_complex; + return greater_equal_complex(in1, in2); + } + else { + if constexpr (std::is_integral_v && + std::is_integral_v && + std::is_signed_v != std::is_signed_v) + { + if constexpr (std::is_signed_v && + !std::is_signed_v) { + return (in1 < 0) ? false : (static_cast(in1) >= in2); + } + else { + if constexpr (!std::is_signed_v && + std::is_signed_v) { + return (in2 < 0) ? true + : (in1 >= static_cast(in2)); + } + } + } + else { + return (in1 >= in2); + } + } + } + + template + sycl::vec + operator()(const sycl::vec &in1, + const sycl::vec &in2) const + { + + auto tmp = (in1 >= in2); + + if constexpr (std::is_same_v) { + return tmp; + } + else { + using dpctl::tensor::type_utils::vec_cast; + + return vec_cast( + tmp); + } + } +}; + +template +using GreaterEqualContigFunctor = elementwise_common::BinaryContigFunctor< + argT1, + argT2, + resT, + GreaterEqualFunctor, + vec_sz, + n_vecs, + enable_sg_loadstore>; + +template +using GreaterEqualStridedFunctor = elementwise_common::BinaryStridedFunctor< + argT1, + argT2, + resT, + IndexerT, + GreaterEqualFunctor>; + +template +struct GreaterEqualOutputType +{ + using value_type = typename std::disjunction< + td_ns::BinaryTypeMapResultEntry, + td_ns:: + BinaryTypeMapResultEntry, + td_ns::BinaryTypeMapResultEntry, + td_ns::BinaryTypeMapResultEntry, + td_ns:: + BinaryTypeMapResultEntry, + td_ns::BinaryTypeMapResultEntry, + td_ns:: + BinaryTypeMapResultEntry, + td_ns::BinaryTypeMapResultEntry, + td_ns:: + BinaryTypeMapResultEntry, + td_ns:: + BinaryTypeMapResultEntry, + td_ns:: + BinaryTypeMapResultEntry, + td_ns::BinaryTypeMapResultEntry, + td_ns::BinaryTypeMapResultEntry, + td_ns::BinaryTypeMapResultEntry, + td_ns::BinaryTypeMapResultEntry, + T2, + std::complex, + bool>, + td_ns::BinaryTypeMapResultEntry, + T2, + std::complex, + bool>, + td_ns::DefaultResultEntry>::result_type; + + static constexpr bool is_defined = !std::is_same_v; +}; + +namespace hyperparam_detail +{ + +namespace vsu_ns = dpctl::tensor::kernels::vec_size_utils; + +using vsu_ns::BinaryContigHyperparameterSetEntry; +using vsu_ns::ContigHyperparameterSetDefault; + +template +struct GreaterEqualContigHyperparameterSet +{ + using value_type = + typename std::disjunction>; + + constexpr static auto vec_sz = value_type::vec_sz; + constexpr static auto n_vecs = value_type::n_vecs; +}; + +} // end of namespace hyperparam_detail + +template +class greater_equal_contig_kernel; + +template +sycl::event + greater_equal_contig_impl(sycl::queue &exec_q, + std::size_t nelems, + const char *arg1_p, + ssize_t arg1_offset, + const char *arg2_p, + ssize_t arg2_offset, + char *res_p, + ssize_t res_offset, + const std::vector &depends = {}) +{ + using GreaterEqHS = + hyperparam_detail::GreaterEqualContigHyperparameterSet; + static constexpr std::uint8_t vec_sz = GreaterEqHS::vec_sz; + static constexpr std::uint8_t n_vecs = GreaterEqHS::n_vecs; + + return elementwise_common::binary_contig_impl< + argTy1, argTy2, GreaterEqualOutputType, GreaterEqualContigFunctor, + greater_equal_contig_kernel, vec_sz, n_vecs>( + exec_q, nelems, arg1_p, arg1_offset, arg2_p, arg2_offset, res_p, + res_offset, depends); +} + +template +struct GreaterEqualContigFactory +{ + fnT get() + { + if constexpr (!GreaterEqualOutputType::is_defined) { + fnT fn = nullptr; + return fn; + } + else { + fnT fn = greater_equal_contig_impl; + return fn; + } + } +}; + +template +struct GreaterEqualTypeMapFactory +{ + /*! @brief get typeid for output type of operator()>(x, y), always bool */ + std::enable_if_t::value, int> get() + { + using rT = typename GreaterEqualOutputType::value_type; + return td_ns::GetTypeid{}.get(); + } +}; + +template +class greater_equal_strided_kernel; + +template +sycl::event greater_equal_strided_impl( + sycl::queue &exec_q, + std::size_t nelems, + int nd, + const ssize_t *shape_and_strides, + const char *arg1_p, + ssize_t arg1_offset, + const char *arg2_p, + ssize_t arg2_offset, + char *res_p, + ssize_t res_offset, + const std::vector &depends, + const std::vector &additional_depends) +{ + return elementwise_common::binary_strided_impl< + argTy1, argTy2, GreaterEqualOutputType, GreaterEqualStridedFunctor, + greater_equal_strided_kernel>( + exec_q, nelems, nd, shape_and_strides, arg1_p, arg1_offset, arg2_p, + arg2_offset, res_p, res_offset, depends, additional_depends); +} + +template +struct GreaterEqualStridedFactory +{ + fnT get() + { + if constexpr (!GreaterEqualOutputType::is_defined) { + fnT fn = nullptr; + return fn; + } + else { + fnT fn = greater_equal_strided_impl; + return fn; + } + } +}; + +} // namespace dpctl::tensor::kernels::greater_equal diff --git a/dpctl_ext/tensor/libtensor/include/kernels/elementwise_functions/hypot.hpp b/dpctl_ext/tensor/libtensor/include/kernels/elementwise_functions/hypot.hpp new file mode 100644 index 00000000000..d0eff1b210c --- /dev/null +++ b/dpctl_ext/tensor/libtensor/include/kernels/elementwise_functions/hypot.hpp @@ -0,0 +1,249 @@ +//***************************************************************************** +// Copyright (c) 2026, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// - Neither the name of the copyright holder nor the names of its contributors +// may be used to endorse or promote products derived from this software +// without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** +// +//===---------------------------------------------------------------------===// +/// +/// \file +/// This file defines kernels for elementwise evaluation of HYPOT(x1, x2) +/// function. +//===---------------------------------------------------------------------===// + +#pragma once +#include +#include +#include + +#include + +#include "vec_size_util.hpp" + +#include "utils/offset_utils.hpp" +#include "utils/type_dispatch_building.hpp" +#include "utils/type_utils.hpp" + +#include "kernels/dpctl_tensor_types.hpp" +#include "kernels/elementwise_functions/common.hpp" + +namespace dpctl::tensor::kernels::hypot +{ + +using dpctl::tensor::ssize_t; +namespace td_ns = dpctl::tensor::type_dispatch; +namespace tu_ns = dpctl::tensor::type_utils; + +template +struct HypotFunctor +{ + + using supports_sg_loadstore = std::negation< + std::disjunction, tu_ns::is_complex>>; + using supports_vec = std::negation< + std::disjunction, tu_ns::is_complex>>; + + resT operator()(const argT1 &in1, const argT2 &in2) const + { + return sycl::hypot(in1, in2); + } + + template + sycl::vec + operator()(const sycl::vec &in1, + const sycl::vec &in2) const + { + auto res = sycl::hypot(in1, in2); + if constexpr (std::is_same_v) { + return res; + } + else { + using dpctl::tensor::type_utils::vec_cast; + + return vec_cast( + res); + } + } +}; + +template +using HypotContigFunctor = + elementwise_common::BinaryContigFunctor, + vec_sz, + n_vecs, + enable_sg_loadstore>; + +template +using HypotStridedFunctor = + elementwise_common::BinaryStridedFunctor>; + +template +struct HypotOutputType +{ + using value_type = typename std::disjunction< + td_ns::BinaryTypeMapResultEntry, + td_ns::BinaryTypeMapResultEntry, + td_ns::BinaryTypeMapResultEntry, + td_ns::DefaultResultEntry>::result_type; + + static constexpr bool is_defined = !std::is_same_v; +}; + +namespace hyperparam_detail +{ + +namespace vsu_ns = dpctl::tensor::kernels::vec_size_utils; + +using vsu_ns::BinaryContigHyperparameterSetEntry; +using vsu_ns::ContigHyperparameterSetDefault; + +template +struct HypotContigHyperparameterSet +{ + using value_type = + typename std::disjunction>; + + constexpr static auto vec_sz = value_type::vec_sz; + constexpr static auto n_vecs = value_type::n_vecs; +}; + +} // end of namespace hyperparam_detail + +template +class hypot_contig_kernel; + +template +sycl::event hypot_contig_impl(sycl::queue &exec_q, + std::size_t nelems, + const char *arg1_p, + ssize_t arg1_offset, + const char *arg2_p, + ssize_t arg2_offset, + char *res_p, + ssize_t res_offset, + const std::vector &depends = {}) +{ + using HypotHS = + hyperparam_detail::HypotContigHyperparameterSet; + static constexpr std::uint8_t vec_sz = HypotHS::vec_sz; + static constexpr std::uint8_t n_vecs = HypotHS::n_vecs; + + return elementwise_common::binary_contig_impl< + argTy1, argTy2, HypotOutputType, HypotContigFunctor, + hypot_contig_kernel, vec_sz, n_vecs>(exec_q, nelems, arg1_p, + arg1_offset, arg2_p, arg2_offset, + res_p, res_offset, depends); +} + +template +struct HypotContigFactory +{ + fnT get() + { + if constexpr (!HypotOutputType::is_defined) { + fnT fn = nullptr; + return fn; + } + else { + fnT fn = hypot_contig_impl; + return fn; + } + } +}; + +template +struct HypotTypeMapFactory +{ + /*! @brief get typeid for output type of sycl::hypot(T1 x, T2 y) */ + std::enable_if_t::value, int> get() + { + using rT = typename HypotOutputType::value_type; + return td_ns::GetTypeid{}.get(); + } +}; + +template +class hypot_strided_kernel; + +template +sycl::event + hypot_strided_impl(sycl::queue &exec_q, + std::size_t nelems, + int nd, + const ssize_t *shape_and_strides, + const char *arg1_p, + ssize_t arg1_offset, + const char *arg2_p, + ssize_t arg2_offset, + char *res_p, + ssize_t res_offset, + const std::vector &depends, + const std::vector &additional_depends) +{ + return elementwise_common::binary_strided_impl< + argTy1, argTy2, HypotOutputType, HypotStridedFunctor, + hypot_strided_kernel>(exec_q, nelems, nd, shape_and_strides, arg1_p, + arg1_offset, arg2_p, arg2_offset, res_p, + res_offset, depends, additional_depends); +} + +template +struct HypotStridedFactory +{ + fnT get() + { + if constexpr (!HypotOutputType::is_defined) { + fnT fn = nullptr; + return fn; + } + else { + fnT fn = hypot_strided_impl; + return fn; + } + } +}; + +} // namespace dpctl::tensor::kernels::hypot diff --git a/dpctl_ext/tensor/libtensor/include/kernels/elementwise_functions/less.hpp b/dpctl_ext/tensor/libtensor/include/kernels/elementwise_functions/less.hpp new file mode 100644 index 00000000000..b4a9e605d0e --- /dev/null +++ b/dpctl_ext/tensor/libtensor/include/kernels/elementwise_functions/less.hpp @@ -0,0 +1,315 @@ +//***************************************************************************** +// Copyright (c) 2026, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// - Neither the name of the copyright holder nor the names of its contributors +// may be used to endorse or promote products derived from this software +// without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** +// +//===---------------------------------------------------------------------===// +/// +/// \file +/// This file defines kernels for elementwise evaluation of comparison of +/// tensor elements. +//===---------------------------------------------------------------------===// + +#pragma once +#include +#include +#include + +#include + +#include "vec_size_util.hpp" + +#include "utils/math_utils.hpp" +#include "utils/offset_utils.hpp" +#include "utils/type_dispatch_building.hpp" +#include "utils/type_utils.hpp" + +#include "kernels/dpctl_tensor_types.hpp" +#include "kernels/elementwise_functions/common.hpp" + +namespace dpctl::tensor::kernels::less +{ + +using dpctl::tensor::ssize_t; +namespace td_ns = dpctl::tensor::type_dispatch; +namespace tu_ns = dpctl::tensor::type_utils; + +template +struct LessFunctor +{ + static_assert(std::is_same_v); + + using supports_sg_loadstore = std::negation< + std::disjunction, tu_ns::is_complex>>; + using supports_vec = std::conjunction< + std::is_same, + std::negation, + tu_ns::is_complex>>>; + + resT operator()(const argT1 &in1, const argT2 &in2) const + { + if constexpr (tu_ns::is_complex::value || + tu_ns::is_complex::value) + { + static_assert(std::is_same_v); + using dpctl::tensor::math_utils::less_complex; + return less_complex(in1, in2); + } + else { + if constexpr (std::is_integral_v && + std::is_integral_v && + std::is_signed_v != std::is_signed_v) + { + if constexpr (std::is_signed_v && + !std::is_signed_v) { + return (in1 < 0) ? true : (static_cast(in1) < in2); + } + else { + if constexpr (!std::is_signed_v && + std::is_signed_v) { + return (in2 < 0) ? false + : (in1 < static_cast(in2)); + } + } + } + else { + return (in1 < in2); + } + } + } + + template + sycl::vec + operator()(const sycl::vec &in1, + const sycl::vec &in2) const + { + auto tmp = (in1 < in2); + + if constexpr (std::is_same_v) { + return tmp; + } + else { + using dpctl::tensor::type_utils::vec_cast; + + return vec_cast( + tmp); + } + } +}; + +template +using LessContigFunctor = + elementwise_common::BinaryContigFunctor, + vec_sz, + n_vecs, + enable_sg_loadstore>; + +template +using LessStridedFunctor = + elementwise_common::BinaryStridedFunctor>; + +template +struct LessOutputType +{ + using value_type = typename std::disjunction< + td_ns::BinaryTypeMapResultEntry, + td_ns:: + BinaryTypeMapResultEntry, + td_ns::BinaryTypeMapResultEntry, + td_ns::BinaryTypeMapResultEntry, + td_ns:: + BinaryTypeMapResultEntry, + td_ns::BinaryTypeMapResultEntry, + td_ns:: + BinaryTypeMapResultEntry, + td_ns::BinaryTypeMapResultEntry, + td_ns:: + BinaryTypeMapResultEntry, + td_ns:: + BinaryTypeMapResultEntry, + td_ns:: + BinaryTypeMapResultEntry, + td_ns::BinaryTypeMapResultEntry, + td_ns::BinaryTypeMapResultEntry, + td_ns::BinaryTypeMapResultEntry, + td_ns::BinaryTypeMapResultEntry, + T2, + std::complex, + bool>, + td_ns::BinaryTypeMapResultEntry, + T2, + std::complex, + bool>, + td_ns::DefaultResultEntry>::result_type; + + static constexpr bool is_defined = !std::is_same_v; +}; + +namespace hyperparam_detail +{ + +namespace vsu_ns = dpctl::tensor::kernels::vec_size_utils; + +using vsu_ns::BinaryContigHyperparameterSetEntry; +using vsu_ns::ContigHyperparameterSetDefault; + +template +struct LessContigHyperparameterSet +{ + using value_type = + typename std::disjunction>; + + constexpr static auto vec_sz = value_type::vec_sz; + constexpr static auto n_vecs = value_type::n_vecs; +}; + +} // end of namespace hyperparam_detail + +template +class less_contig_kernel; + +template +sycl::event less_contig_impl(sycl::queue &exec_q, + std::size_t nelems, + const char *arg1_p, + ssize_t arg1_offset, + const char *arg2_p, + ssize_t arg2_offset, + char *res_p, + ssize_t res_offset, + const std::vector &depends = {}) +{ + using LessHS = + hyperparam_detail::LessContigHyperparameterSet; + static constexpr std::uint8_t vec_sz = LessHS::vec_sz; + static constexpr std::uint8_t n_vecs = LessHS::n_vecs; + + return elementwise_common::binary_contig_impl< + argTy1, argTy2, LessOutputType, LessContigFunctor, less_contig_kernel, + vec_sz, n_vecs>(exec_q, nelems, arg1_p, arg1_offset, arg2_p, + arg2_offset, res_p, res_offset, depends); +} + +template +struct LessContigFactory +{ + fnT get() + { + if constexpr (!LessOutputType::is_defined) { + fnT fn = nullptr; + return fn; + } + else { + fnT fn = less_contig_impl; + return fn; + } + } +}; + +template +struct LessTypeMapFactory +{ + /*! @brief get typeid for output type of operator()>(x, y), always bool */ + std::enable_if_t::value, int> get() + { + using rT = typename LessOutputType::value_type; + return td_ns::GetTypeid{}.get(); + } +}; + +template +class less_strided_kernel; + +template +sycl::event + less_strided_impl(sycl::queue &exec_q, + std::size_t nelems, + int nd, + const ssize_t *shape_and_strides, + const char *arg1_p, + ssize_t arg1_offset, + const char *arg2_p, + ssize_t arg2_offset, + char *res_p, + ssize_t res_offset, + const std::vector &depends, + const std::vector &additional_depends) +{ + return elementwise_common::binary_strided_impl< + argTy1, argTy2, LessOutputType, LessStridedFunctor, + less_strided_kernel>(exec_q, nelems, nd, shape_and_strides, arg1_p, + arg1_offset, arg2_p, arg2_offset, res_p, + res_offset, depends, additional_depends); +} + +template +struct LessStridedFactory +{ + fnT get() + { + if constexpr (!LessOutputType::is_defined) { + fnT fn = nullptr; + return fn; + } + else { + fnT fn = less_strided_impl; + return fn; + } + } +}; + +} // namespace dpctl::tensor::kernels::less diff --git a/dpctl_ext/tensor/libtensor/include/kernels/elementwise_functions/less_equal.hpp b/dpctl_ext/tensor/libtensor/include/kernels/elementwise_functions/less_equal.hpp new file mode 100644 index 00000000000..391366f1fcd --- /dev/null +++ b/dpctl_ext/tensor/libtensor/include/kernels/elementwise_functions/less_equal.hpp @@ -0,0 +1,316 @@ +//***************************************************************************** +// Copyright (c) 2026, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// - Neither the name of the copyright holder nor the names of its contributors +// may be used to endorse or promote products derived from this software +// without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** +// +//===---------------------------------------------------------------------===// +/// +/// \file +/// This file defines kernels for elementwise evaluation of comparison of +/// tensor elements. +//===---------------------------------------------------------------------===// + +#pragma once +#include +#include +#include + +#include + +#include "vec_size_util.hpp" + +#include "utils/math_utils.hpp" +#include "utils/offset_utils.hpp" +#include "utils/type_dispatch_building.hpp" +#include "utils/type_utils.hpp" + +#include "kernels/elementwise_functions/common.hpp" + +namespace dpctl::tensor::kernels::less_equal +{ + +using dpctl::tensor::ssize_t; +namespace td_ns = dpctl::tensor::type_dispatch; +namespace tu_ns = dpctl::tensor::type_utils; + +template +struct LessEqualFunctor +{ + static_assert(std::is_same_v); + + using supports_sg_loadstore = std::negation< + std::disjunction, tu_ns::is_complex>>; + using supports_vec = std::conjunction< + std::is_same, + std::negation, + tu_ns::is_complex>>>; + + resT operator()(const argT1 &in1, const argT2 &in2) const + { + if constexpr (tu_ns::is_complex::value || + tu_ns::is_complex::value) + { + static_assert(std::is_same_v); + using dpctl::tensor::math_utils::less_equal_complex; + return less_equal_complex(in1, in2); + } + else { + if constexpr (std::is_integral_v && + std::is_integral_v && + std::is_signed_v != std::is_signed_v) + { + if constexpr (std::is_signed_v && + !std::is_signed_v) { + return (in1 < 0) ? true : (static_cast(in1) <= in2); + } + else { + if constexpr (!std::is_signed_v && + std::is_signed_v) { + return (in2 < 0) ? false + : (in1 <= static_cast(in2)); + } + } + } + else { + return (in1 <= in2); + } + } + } + + template + sycl::vec + operator()(const sycl::vec &in1, + const sycl::vec &in2) const + { + + auto tmp = (in1 <= in2); + + if constexpr (std::is_same_v) { + return tmp; + } + else { + using dpctl::tensor::type_utils::vec_cast; + + return vec_cast( + tmp); + } + } +}; + +template +using LessEqualContigFunctor = elementwise_common::BinaryContigFunctor< + argT1, + argT2, + resT, + LessEqualFunctor, + vec_sz, + n_vecs, + enable_sg_loadstore>; + +template +using LessEqualStridedFunctor = elementwise_common::BinaryStridedFunctor< + argT1, + argT2, + resT, + IndexerT, + LessEqualFunctor>; + +template +struct LessEqualOutputType +{ + using value_type = typename std::disjunction< + td_ns::BinaryTypeMapResultEntry, + td_ns:: + BinaryTypeMapResultEntry, + td_ns::BinaryTypeMapResultEntry, + td_ns::BinaryTypeMapResultEntry, + td_ns:: + BinaryTypeMapResultEntry, + td_ns::BinaryTypeMapResultEntry, + td_ns:: + BinaryTypeMapResultEntry, + td_ns::BinaryTypeMapResultEntry, + td_ns:: + BinaryTypeMapResultEntry, + td_ns:: + BinaryTypeMapResultEntry, + td_ns:: + BinaryTypeMapResultEntry, + td_ns::BinaryTypeMapResultEntry, + td_ns::BinaryTypeMapResultEntry, + td_ns::BinaryTypeMapResultEntry, + td_ns::BinaryTypeMapResultEntry, + T2, + std::complex, + bool>, + td_ns::BinaryTypeMapResultEntry, + T2, + std::complex, + bool>, + td_ns::DefaultResultEntry>::result_type; + + static constexpr bool is_defined = !std::is_same_v; +}; + +namespace hyperparam_detail +{ + +namespace vsu_ns = dpctl::tensor::kernels::vec_size_utils; + +using vsu_ns::BinaryContigHyperparameterSetEntry; +using vsu_ns::ContigHyperparameterSetDefault; + +template +struct LessEqualContigHyperparameterSet +{ + using value_type = + typename std::disjunction>; + + constexpr static auto vec_sz = value_type::vec_sz; + constexpr static auto n_vecs = value_type::n_vecs; +}; + +} // end of namespace hyperparam_detail + +template +class less_equal_contig_kernel; + +template +sycl::event less_equal_contig_impl(sycl::queue &exec_q, + std::size_t nelems, + const char *arg1_p, + ssize_t arg1_offset, + const char *arg2_p, + ssize_t arg2_offset, + char *res_p, + ssize_t res_offset, + const std::vector &depends = {}) +{ + using LessEqHS = + hyperparam_detail::LessEqualContigHyperparameterSet; + static constexpr std::uint8_t vec_sz = LessEqHS::vec_sz; + static constexpr std::uint8_t n_vecs = LessEqHS::n_vecs; + + return elementwise_common::binary_contig_impl< + argTy1, argTy2, LessEqualOutputType, LessEqualContigFunctor, + less_equal_contig_kernel, vec_sz, n_vecs>( + exec_q, nelems, arg1_p, arg1_offset, arg2_p, arg2_offset, res_p, + res_offset, depends); +} + +template +struct LessEqualContigFactory +{ + fnT get() + { + if constexpr (!LessEqualOutputType::is_defined) { + fnT fn = nullptr; + return fn; + } + else { + fnT fn = less_equal_contig_impl; + return fn; + } + } +}; + +template +struct LessEqualTypeMapFactory +{ + /*! @brief get typeid for output type of operator()>(x, y), always bool */ + std::enable_if_t::value, int> get() + { + using rT = typename LessEqualOutputType::value_type; + return td_ns::GetTypeid{}.get(); + } +}; + +template +class less_equal_strided_kernel; + +template +sycl::event + less_equal_strided_impl(sycl::queue &exec_q, + std::size_t nelems, + int nd, + const ssize_t *shape_and_strides, + const char *arg1_p, + ssize_t arg1_offset, + const char *arg2_p, + ssize_t arg2_offset, + char *res_p, + ssize_t res_offset, + const std::vector &depends, + const std::vector &additional_depends) +{ + return elementwise_common::binary_strided_impl< + argTy1, argTy2, LessEqualOutputType, LessEqualStridedFunctor, + less_equal_strided_kernel>( + exec_q, nelems, nd, shape_and_strides, arg1_p, arg1_offset, arg2_p, + arg2_offset, res_p, res_offset, depends, additional_depends); +} + +template +struct LessEqualStridedFactory +{ + fnT get() + { + if constexpr (!LessEqualOutputType::is_defined) { + fnT fn = nullptr; + return fn; + } + else { + fnT fn = less_equal_strided_impl; + return fn; + } + } +}; + +} // namespace dpctl::tensor::kernels::less_equal diff --git a/dpctl_ext/tensor/libtensor/include/kernels/elementwise_functions/logaddexp.hpp b/dpctl_ext/tensor/libtensor/include/kernels/elementwise_functions/logaddexp.hpp index af93b089f0b..3a79950672d 100644 --- a/dpctl_ext/tensor/libtensor/include/kernels/elementwise_functions/logaddexp.hpp +++ b/dpctl_ext/tensor/libtensor/include/kernels/elementwise_functions/logaddexp.hpp @@ -48,7 +48,6 @@ #include "utils/math_utils.hpp" #include "utils/type_dispatch_building.hpp" -#include "utils/type_utils.hpp" #include "kernels/dpctl_tensor_types.hpp" @@ -56,10 +55,6 @@ namespace dpctl::tensor::kernels::logaddexp { using dpctl::tensor::ssize_t; namespace td_ns = dpctl::tensor::type_dispatch; -namespace tu_ns = dpctl::tensor::type_utils; - -using dpctl::tensor::type_utils::is_complex; -using dpctl::tensor::type_utils::vec_cast; template struct LogAddExpFunctor diff --git a/dpctl_ext/tensor/libtensor/include/kernels/elementwise_functions/true_divide.hpp b/dpctl_ext/tensor/libtensor/include/kernels/elementwise_functions/true_divide.hpp new file mode 100644 index 00000000000..763cd9b2228 --- /dev/null +++ b/dpctl_ext/tensor/libtensor/include/kernels/elementwise_functions/true_divide.hpp @@ -0,0 +1,668 @@ +//***************************************************************************** +// Copyright (c) 2026, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// - Neither the name of the copyright holder nor the names of its contributors +// may be used to endorse or promote products derived from this software +// without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** +// +//===---------------------------------------------------------------------===// +/// +/// \file +/// This file defines kernels for elementwise evaluation of DIVIDE(x1, x2) +/// function. +//===---------------------------------------------------------------------===// + +#pragma once +#include +#include +#include +#include + +#include + +#include "sycl_complex.hpp" +#include "vec_size_util.hpp" + +#include "kernels/dpctl_tensor_types.hpp" +#include "kernels/elementwise_functions/common.hpp" +#include "kernels/elementwise_functions/common_inplace.hpp" + +#include "utils/offset_utils.hpp" +#include "utils/type_dispatch_building.hpp" +#include "utils/type_utils.hpp" + +namespace dpctl::tensor::kernels::true_divide +{ + +using dpctl::tensor::ssize_t; +namespace td_ns = dpctl::tensor::type_dispatch; +namespace tu_ns = dpctl::tensor::type_utils; + +template +struct TrueDivideFunctor +{ + + using supports_sg_loadstore = std::negation< + std::disjunction, tu_ns::is_complex>>; + using supports_vec = std::negation< + std::disjunction, tu_ns::is_complex>>; + + resT operator()(const argT1 &in1, const argT2 &in2) const + { + if constexpr (tu_ns::is_complex::value && + tu_ns::is_complex::value) + { + using realT1 = typename argT1::value_type; + using realT2 = typename argT2::value_type; + + return exprm_ns::complex(in1) / + exprm_ns::complex(in2); + } + else if constexpr (tu_ns::is_complex::value && + !tu_ns::is_complex::value) + { + using realT1 = typename argT1::value_type; + + return exprm_ns::complex(in1) / in2; + } + else if constexpr (!tu_ns::is_complex::value && + tu_ns::is_complex::value) + { + using realT2 = typename argT2::value_type; + + return in1 / exprm_ns::complex(in2); + } + else { + return in1 / in2; + } + } + + template + sycl::vec + operator()(const sycl::vec &in1, + const sycl::vec &in2) const + { + auto tmp = in1 / in2; + if constexpr (std::is_same_v) { + return tmp; + } + else { + using dpctl::tensor::type_utils::vec_cast; + + return vec_cast( + tmp); + } + } +}; + +template +using TrueDivideContigFunctor = elementwise_common::BinaryContigFunctor< + argT1, + argT2, + resT, + TrueDivideFunctor, + vec_sz, + n_vecs, + enable_sg_loadstore>; + +template +using TrueDivideStridedFunctor = elementwise_common::BinaryStridedFunctor< + argT1, + argT2, + resT, + IndexerT, + TrueDivideFunctor>; + +template +struct TrueDivideOutputType +{ + using value_type = typename std::disjunction< + td_ns::BinaryTypeMapResultEntry, + td_ns::BinaryTypeMapResultEntry, + td_ns::BinaryTypeMapResultEntry, + td_ns::BinaryTypeMapResultEntry, + T2, + std::complex, + std::complex>, + td_ns::BinaryTypeMapResultEntry, + T2, + float, + std::complex>, + td_ns::BinaryTypeMapResultEntry, + std::complex>, + td_ns::BinaryTypeMapResultEntry, + T2, + std::complex, + std::complex>, + td_ns::BinaryTypeMapResultEntry, + std::complex>, + td_ns::BinaryTypeMapResultEntry, + T2, + double, + std::complex>, + td_ns::DefaultResultEntry>::result_type; + + static constexpr bool is_defined = !std::is_same_v; +}; + +namespace hyperparam_detail +{ + +namespace vsu_ns = dpctl::tensor::kernels::vec_size_utils; + +using vsu_ns::BinaryContigHyperparameterSetEntry; +using vsu_ns::ContigHyperparameterSetDefault; + +template +struct TrueDivideContigHyperparameterSet +{ + using value_type = + typename std::disjunction>; + + constexpr static auto vec_sz = value_type::vec_sz; + constexpr static auto n_vecs = value_type::n_vecs; +}; + +} // end of namespace hyperparam_detail + +template +class true_divide_contig_kernel; + +template +sycl::event + true_divide_contig_impl(sycl::queue &exec_q, + std::size_t nelems, + const char *arg1_p, + ssize_t arg1_offset, + const char *arg2_p, + ssize_t arg2_offset, + char *res_p, + ssize_t res_offset, + const std::vector &depends = {}) +{ + using DivHS = + hyperparam_detail::TrueDivideContigHyperparameterSet; + static constexpr std::uint8_t vec_sz = DivHS::vec_sz; + static constexpr std::uint8_t n_vecs = DivHS::n_vecs; + + return elementwise_common::binary_contig_impl< + argTy1, argTy2, TrueDivideOutputType, TrueDivideContigFunctor, + true_divide_contig_kernel, vec_sz, n_vecs>( + exec_q, nelems, arg1_p, arg1_offset, arg2_p, arg2_offset, res_p, + res_offset, depends); +} + +template +struct TrueDivideContigFactory +{ + fnT get() + { + if constexpr (!TrueDivideOutputType::is_defined) { + fnT fn = nullptr; + return fn; + } + else { + fnT fn = true_divide_contig_impl; + return fn; + } + } +}; + +template +struct TrueDivideTypeMapFactory +{ + /*! @brief get typeid for output type of divide(T1 x, T2 y) */ + std::enable_if_t::value, int> get() + { + using rT = typename TrueDivideOutputType::value_type; + return td_ns::GetTypeid{}.get(); + } +}; + +template +class true_divide_strided_kernel; + +template +sycl::event + true_divide_strided_impl(sycl::queue &exec_q, + std::size_t nelems, + int nd, + const ssize_t *shape_and_strides, + const char *arg1_p, + ssize_t arg1_offset, + const char *arg2_p, + ssize_t arg2_offset, + char *res_p, + ssize_t res_offset, + const std::vector &depends, + const std::vector &additional_depends) +{ + return elementwise_common::binary_strided_impl< + argTy1, argTy2, TrueDivideOutputType, TrueDivideStridedFunctor, + true_divide_strided_kernel>( + exec_q, nelems, nd, shape_and_strides, arg1_p, arg1_offset, arg2_p, + arg2_offset, res_p, res_offset, depends, additional_depends); +} + +template +struct TrueDivideStridedFactory +{ + fnT get() + { + if constexpr (!TrueDivideOutputType::is_defined) { + fnT fn = nullptr; + return fn; + } + else { + fnT fn = true_divide_strided_impl; + return fn; + } + } +}; + +template +using TrueDivideContigMatrixContigRowBroadcastingFunctor = + elementwise_common::BinaryContigMatrixContigRowBroadcastingFunctor< + argT1, + argT2, + resT, + TrueDivideFunctor>; + +template +using TrueDivideContigRowContigMatrixBroadcastingFunctor = + elementwise_common::BinaryContigRowContigMatrixBroadcastingFunctor< + argT1, + argT2, + resT, + TrueDivideFunctor>; + +template +class true_divide_matrix_row_broadcast_sg_krn; + +template +class true_divide_row_matrix_broadcast_sg_krn; + +template +sycl::event true_divide_contig_matrix_contig_row_broadcast_impl( + sycl::queue &exec_q, + std::vector &host_tasks, + std::size_t n0, + std::size_t n1, + const char *mat_p, // typeless pointer to (n0, n1) C-contiguous matrix + ssize_t mat_offset, + const char *vec_p, // typeless pointer to (n1,) contiguous row + ssize_t vec_offset, + char *res_p, // typeless pointer to (n0, n1) result C-contig. matrix, + // res[i,j] = mat[i,j] / vec[j] + ssize_t res_offset, + const std::vector &depends = {}) +{ + return elementwise_common::binary_contig_matrix_contig_row_broadcast_impl< + argT1, argT2, resT, TrueDivideContigMatrixContigRowBroadcastingFunctor, + true_divide_matrix_row_broadcast_sg_krn>( + exec_q, host_tasks, n0, n1, mat_p, mat_offset, vec_p, vec_offset, res_p, + res_offset, depends); +} + +template +struct TrueDivideContigMatrixContigRowBroadcastFactory +{ + fnT get() + { + if constexpr (!TrueDivideOutputType::is_defined) { + fnT fn = nullptr; + return fn; + } + else { + using resT = typename TrueDivideOutputType::value_type; + if constexpr (dpctl::tensor::type_utils::is_complex::value || + dpctl::tensor::type_utils::is_complex::value || + dpctl::tensor::type_utils::is_complex::value) + { + fnT fn = nullptr; + return fn; + } + else { + fnT fn = + true_divide_contig_matrix_contig_row_broadcast_impl; + return fn; + } + } + } +}; + +template +sycl::event true_divide_contig_row_contig_matrix_broadcast_impl( + sycl::queue &exec_q, + std::vector &host_tasks, + std::size_t n0, + std::size_t n1, + const char *vec_p, // typeless pointer to (n1,) contiguous row + ssize_t vec_offset, + const char *mat_p, // typeless pointer to (n0, n1) C-contiguous matrix + ssize_t mat_offset, + char *res_p, // typeless pointer to (n0, n1) result C-contig. matrix, + // res[i,j] = mat[i,j] + vec[j] + ssize_t res_offset, + const std::vector &depends = {}) +{ + return elementwise_common::binary_contig_row_contig_matrix_broadcast_impl< + argT1, argT2, resT, TrueDivideContigRowContigMatrixBroadcastingFunctor, + true_divide_row_matrix_broadcast_sg_krn>( + exec_q, host_tasks, n0, n1, vec_p, vec_offset, mat_p, mat_offset, res_p, + res_offset, depends); +}; + +template +struct TrueDivideContigRowContigMatrixBroadcastFactory +{ + fnT get() + { + if constexpr (!TrueDivideOutputType::is_defined) { + fnT fn = nullptr; + return fn; + } + else { + using resT = typename TrueDivideOutputType::value_type; + if constexpr (dpctl::tensor::type_utils::is_complex::value || + dpctl::tensor::type_utils::is_complex::value || + dpctl::tensor::type_utils::is_complex::value) + { + fnT fn = nullptr; + return fn; + } + else { + fnT fn = + true_divide_contig_row_contig_matrix_broadcast_impl; + return fn; + } + } + } +}; + +template +struct TrueDivideInplaceFunctor +{ + + using supports_sg_loadstore = std::negation< + std::disjunction, tu_ns::is_complex>>; + using supports_vec = std::negation< + std::disjunction, tu_ns::is_complex>>; + + void operator()(resT &res, const argT &in) + { + if constexpr (tu_ns::is_complex::value) { + if constexpr (tu_ns::is_complex::value) { + using res_rT = typename resT::value_type; + using arg_rT = typename argT::value_type; + + auto res1 = exprm_ns::complex(res); + res1 /= exprm_ns::complex(in); + res = res1; + } + else { + using res_rT = typename resT::value_type; + + auto res1 = exprm_ns::complex(res); + res1 /= in; + res = res1; + } + } + else { + res /= in; + } + } + + template + void operator()(sycl::vec &res, + const sycl::vec &in) + { + res /= in; + } +}; + +/* @brief Types supported by in-place divide */ +template +struct TrueDivideInplaceTypePairSupport +{ + + /* value if true a kernel for must be instantiated */ + static constexpr bool is_defined = std::disjunction< + td_ns::TypePairDefinedEntry, + td_ns::TypePairDefinedEntry, + td_ns::TypePairDefinedEntry, + td_ns::TypePairDefinedEntry>, + td_ns::TypePairDefinedEntry, + resTy, + std::complex>, + td_ns::TypePairDefinedEntry>, + td_ns::TypePairDefinedEntry, + resTy, + std::complex>, + // fall-through + td_ns::NotDefinedEntry>::is_defined; +}; + +template +struct TrueDivideInplaceTypeMapFactory +{ + /*! @brief get typeid for output type of divide(T1 x, T2 y) */ + std::enable_if_t::value, int> get() + { + if constexpr (TrueDivideInplaceTypePairSupport::is_defined) + { + return td_ns::GetTypeid{}.get(); + } + else { + return td_ns::GetTypeid{}.get(); + } + } +}; + +template +using TrueDivideInplaceContigFunctor = + elementwise_common::BinaryInplaceContigFunctor< + argT, + resT, + TrueDivideInplaceFunctor, + vec_sz, + n_vecs, + enable_sg_loadstore>; + +template +using TrueDivideInplaceStridedFunctor = + elementwise_common::BinaryInplaceStridedFunctor< + argT, + resT, + IndexerT, + TrueDivideInplaceFunctor>; + +template +class true_divide_inplace_contig_kernel; + +template +sycl::event true_divide_inplace_contig_impl( + sycl::queue &exec_q, + std::size_t nelems, + const char *arg_p, + ssize_t arg_offset, + char *res_p, + ssize_t res_offset, + const std::vector &depends = {}) +{ + using DivHS = + hyperparam_detail::TrueDivideContigHyperparameterSet; + static constexpr std::uint8_t vec_sz = DivHS::vec_sz; + static constexpr std::uint8_t n_vecs = DivHS::vec_sz; + + return elementwise_common::binary_inplace_contig_impl< + argTy, resTy, TrueDivideInplaceContigFunctor, + true_divide_inplace_contig_kernel, vec_sz, n_vecs>( + exec_q, nelems, arg_p, arg_offset, res_p, res_offset, depends); +} + +template +struct TrueDivideInplaceContigFactory +{ + fnT get() + { + if constexpr (!TrueDivideInplaceTypePairSupport::is_defined) { + fnT fn = nullptr; + return fn; + } + else { + fnT fn = true_divide_inplace_contig_impl; + return fn; + } + } +}; + +template +class true_divide_inplace_strided_kernel; + +template +sycl::event true_divide_inplace_strided_impl( + sycl::queue &exec_q, + std::size_t nelems, + int nd, + const ssize_t *shape_and_strides, + const char *arg_p, + ssize_t arg_offset, + char *res_p, + ssize_t res_offset, + const std::vector &depends, + const std::vector &additional_depends) +{ + return elementwise_common::binary_inplace_strided_impl< + argTy, resTy, TrueDivideInplaceStridedFunctor, + true_divide_inplace_strided_kernel>( + exec_q, nelems, nd, shape_and_strides, arg_p, arg_offset, res_p, + res_offset, depends, additional_depends); +} + +template +struct TrueDivideInplaceStridedFactory +{ + fnT get() + { + if constexpr (!TrueDivideInplaceTypePairSupport::is_defined) { + fnT fn = nullptr; + return fn; + } + else { + fnT fn = true_divide_inplace_strided_impl; + return fn; + } + } +}; + +template +class true_divide_inplace_row_matrix_broadcast_sg_krn; + +template +using TrueDivideInplaceRowMatrixBroadcastingFunctor = + elementwise_common::BinaryInplaceRowMatrixBroadcastingFunctor< + argT, + resT, + TrueDivideInplaceFunctor>; + +template +sycl::event true_divide_inplace_row_matrix_broadcast_impl( + sycl::queue &exec_q, + std::vector &host_tasks, + std::size_t n0, + std::size_t n1, + const char *vec_p, // typeless pointer to (n1,) contiguous row + ssize_t vec_offset, + char *mat_p, // typeless pointer to (n0, n1) C-contiguous matrix + ssize_t mat_offset, + const std::vector &depends = {}) +{ + return elementwise_common::binary_inplace_row_matrix_broadcast_impl< + argT, resT, TrueDivideInplaceRowMatrixBroadcastingFunctor, + true_divide_inplace_row_matrix_broadcast_sg_krn>( + exec_q, host_tasks, n0, n1, vec_p, vec_offset, mat_p, mat_offset, + depends); +} + +template +struct TrueDivideInplaceRowMatrixBroadcastFactory +{ + fnT get() + { + if constexpr (!TrueDivideInplaceTypePairSupport::is_defined) { + fnT fn = nullptr; + return fn; + } + else { + if constexpr (dpctl::tensor::type_utils::is_complex::value || + dpctl::tensor::type_utils::is_complex::value) + { + fnT fn = nullptr; + return fn; + } + else { + fnT fn = true_divide_inplace_row_matrix_broadcast_impl; + return fn; + } + } + } +}; + +} // namespace dpctl::tensor::kernels::true_divide diff --git a/dpctl_ext/tensor/libtensor/source/elementwise_functions/elementwise_common.cpp b/dpctl_ext/tensor/libtensor/source/elementwise_functions/elementwise_common.cpp index e4e730a1da6..efee3ef529f 100644 --- a/dpctl_ext/tensor/libtensor/source/elementwise_functions/elementwise_common.cpp +++ b/dpctl_ext/tensor/libtensor/source/elementwise_functions/elementwise_common.cpp @@ -57,26 +57,26 @@ // #include "copysign.hpp" #include "cos.hpp" #include "cosh.hpp" -// #include "equal.hpp" +#include "equal.hpp" #include "exp.hpp" #include "exp2.hpp" #include "expm1.hpp" #include "floor.hpp" -// #include "floor_divide.hpp" -// #include "greater.hpp" -// #include "greater_equal.hpp" -// #include "hypot.hpp" +#include "floor_divide.hpp" +#include "greater.hpp" +#include "greater_equal.hpp" +#include "hypot.hpp" #include "imag.hpp" #include "isfinite.hpp" #include "isinf.hpp" #include "isnan.hpp" -// #include "less.hpp" -// #include "less_equal.hpp" +#include "less.hpp" +#include "less_equal.hpp" #include "log.hpp" #include "log10.hpp" #include "log1p.hpp" #include "log2.hpp" -// #include "logaddexp.hpp" +#include "logaddexp.hpp" // #include "logical_and.hpp" #include "logical_not.hpp" // #include "logical_or.hpp" @@ -104,7 +104,7 @@ // #include "subtract.hpp" #include "tan.hpp" #include "tanh.hpp" -// #include "true_divide.hpp" +#include "true_divide.hpp" #include "trunc.hpp" namespace dpctl::tensor::py_internal @@ -137,27 +137,27 @@ void init_elementwise_functions(py::module_ m) // init_copysign(m); init_cos(m); init_cosh(m); - // init_divide(m); - // init_equal(m); + init_divide(m); + init_equal(m); init_exp(m); init_exp2(m); init_expm1(m); init_floor(m); - // init_floor_divide(m); - // init_greater(m); - // init_greater_equal(m); - // init_hypot(m); + init_floor_divide(m); + init_greater(m); + init_greater_equal(m); + init_hypot(m); init_imag(m); init_isfinite(m); init_isinf(m); init_isnan(m); - // init_less(m); - // init_less_equal(m); + init_less(m); + init_less_equal(m); init_log(m); init_log10(m); init_log1p(m); init_log2(m); - // init_logaddexp(m); + init_logaddexp(m); // init_logical_and(m); init_logical_not(m); // init_logical_or(m); diff --git a/dpctl_ext/tensor/libtensor/source/elementwise_functions/equal.cpp b/dpctl_ext/tensor/libtensor/source/elementwise_functions/equal.cpp new file mode 100644 index 00000000000..863501bea36 --- /dev/null +++ b/dpctl_ext/tensor/libtensor/source/elementwise_functions/equal.cpp @@ -0,0 +1,145 @@ +//***************************************************************************** +// Copyright (c) 2026, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// - Neither the name of the copyright holder nor the names of its contributors +// may be used to endorse or promote products derived from this software +// without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** +// +//===---------------------------------------------------------------------===// +/// +/// \file +/// This file defines functions of dpctl.tensor._tensor_elementwise_impl +/// extension, specifically functions for elementwise operations. +//===---------------------------------------------------------------------===// + +#include + +#include + +#include "dpnp4pybind11.hpp" +#include +#include +#include + +#include "elementwise_functions.hpp" +#include "equal.hpp" +#include "utils/type_dispatch.hpp" + +#include "kernels/elementwise_functions/common.hpp" +#include "kernels/elementwise_functions/equal.hpp" + +namespace dpctl::tensor::py_internal +{ + +namespace py = pybind11; +namespace td_ns = dpctl::tensor::type_dispatch; + +namespace ew_cmn_ns = dpctl::tensor::kernels::elementwise_common; +using ew_cmn_ns::binary_contig_impl_fn_ptr_t; +using ew_cmn_ns::binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t; +using ew_cmn_ns::binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t; +using ew_cmn_ns::binary_strided_impl_fn_ptr_t; + +// B09: ===== EQUAL (x1, x2) +namespace impl +{ +namespace equal_fn_ns = dpctl::tensor::kernels::equal; + +static binary_contig_impl_fn_ptr_t + equal_contig_dispatch_table[td_ns::num_types][td_ns::num_types]; +static int equal_output_id_table[td_ns::num_types][td_ns::num_types]; + +static binary_strided_impl_fn_ptr_t + equal_strided_dispatch_table[td_ns::num_types][td_ns::num_types]; + +void populate_equal_dispatch_tables(void) +{ + using namespace td_ns; + namespace fn_ns = equal_fn_ns; + + // which input types are supported, and what is the type of the result + using fn_ns::EqualTypeMapFactory; + DispatchTableBuilder dtb1; + dtb1.populate_dispatch_table(equal_output_id_table); + + // function pointers for operation on general strided arrays + using fn_ns::EqualStridedFactory; + DispatchTableBuilder + dtb2; + dtb2.populate_dispatch_table(equal_strided_dispatch_table); + + // function pointers for operation on contiguous inputs and output + using fn_ns::EqualContigFactory; + DispatchTableBuilder + dtb3; + dtb3.populate_dispatch_table(equal_contig_dispatch_table); +}; + +} // namespace impl + +void init_equal(py::module_ m) +{ + using arrayT = dpctl::tensor::usm_ndarray; + using event_vecT = std::vector; + { + impl::populate_equal_dispatch_tables(); + using impl::equal_contig_dispatch_table; + using impl::equal_output_id_table; + using impl::equal_strided_dispatch_table; + + auto equal_pyapi = [&](const arrayT &src1, const arrayT &src2, + const arrayT &dst, sycl::queue &exec_q, + const event_vecT &depends = {}) { + return py_binary_ufunc( + src1, src2, dst, exec_q, depends, equal_output_id_table, + // function pointers to handle operation on contiguous arrays + // (pointers may be nullptr) + equal_contig_dispatch_table, + // function pointers to handle operation on strided arrays (most + // general case) + equal_strided_dispatch_table, + // function pointers to handle operation of c-contig matrix and + // c-contig row with broadcasting (may be nullptr) + td_ns::NullPtrTable< + binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t>{}, + // function pointers to handle operation of c-contig matrix and + // c-contig row with broadcasting (may be nullptr) + td_ns::NullPtrTable< + binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t>{}); + }; + auto equal_result_type_pyapi = [&](const py::dtype &dtype1, + const py::dtype &dtype2) { + return py_binary_ufunc_result_type(dtype1, dtype2, + equal_output_id_table); + }; + m.def("_equal", equal_pyapi, "", py::arg("src1"), py::arg("src2"), + py::arg("dst"), py::arg("sycl_queue"), + py::arg("depends") = py::list()); + m.def("_equal_result_type", equal_result_type_pyapi, ""); + } +} + +} // namespace dpctl::tensor::py_internal diff --git a/dpctl_ext/tensor/libtensor/source/elementwise_functions/equal.hpp b/dpctl_ext/tensor/libtensor/source/elementwise_functions/equal.hpp new file mode 100644 index 00000000000..23f37011145 --- /dev/null +++ b/dpctl_ext/tensor/libtensor/source/elementwise_functions/equal.hpp @@ -0,0 +1,46 @@ +//***************************************************************************** +// Copyright (c) 2026, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// - Neither the name of the copyright holder nor the names of its contributors +// may be used to endorse or promote products derived from this software +// without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** +// +//===---------------------------------------------------------------------===// +/// +/// \file +/// This file defines functions of dpctl.tensor._tensor_elementwise_impl +/// extension, specifically functions for elementwise operations. +//===---------------------------------------------------------------------===// + +#pragma once +#include + +namespace py = pybind11; + +namespace dpctl::tensor::py_internal +{ + +extern void init_equal(py::module_ m); + +} // namespace dpctl::tensor::py_internal diff --git a/dpctl_ext/tensor/libtensor/source/elementwise_functions/floor_divide.cpp b/dpctl_ext/tensor/libtensor/source/elementwise_functions/floor_divide.cpp new file mode 100644 index 00000000000..af4635a0f50 --- /dev/null +++ b/dpctl_ext/tensor/libtensor/source/elementwise_functions/floor_divide.cpp @@ -0,0 +1,205 @@ +//***************************************************************************** +// Copyright (c) 2026, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// - Neither the name of the copyright holder nor the names of its contributors +// may be used to endorse or promote products derived from this software +// without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** +// +//===---------------------------------------------------------------------===// +/// +/// \file +/// This file defines functions of dpctl.tensor._tensor_elementwise_impl +/// extension, specifically functions for elementwise operations. +//===---------------------------------------------------------------------===// + +#include + +#include + +#include "dpnp4pybind11.hpp" +#include +#include +#include + +#include "elementwise_functions.hpp" +#include "floor_divide.hpp" +#include "utils/type_dispatch.hpp" + +#include "kernels/elementwise_functions/common.hpp" +#include "kernels/elementwise_functions/common_inplace.hpp" +#include "kernels/elementwise_functions/floor_divide.hpp" + +namespace dpctl::tensor::py_internal +{ + +namespace py = pybind11; +namespace td_ns = dpctl::tensor::type_dispatch; + +namespace ew_cmn_ns = dpctl::tensor::kernels::elementwise_common; +using ew_cmn_ns::binary_contig_impl_fn_ptr_t; +using ew_cmn_ns::binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t; +using ew_cmn_ns::binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t; +using ew_cmn_ns::binary_strided_impl_fn_ptr_t; + +using ew_cmn_ns::binary_inplace_contig_impl_fn_ptr_t; +using ew_cmn_ns::binary_inplace_row_matrix_broadcast_impl_fn_ptr_t; +using ew_cmn_ns::binary_inplace_strided_impl_fn_ptr_t; + +// B10: ===== FLOOR_DIVIDE (x1, x2) +namespace impl +{ +namespace floor_divide_fn_ns = dpctl::tensor::kernels::floor_divide; + +static binary_contig_impl_fn_ptr_t + floor_divide_contig_dispatch_table[td_ns::num_types][td_ns::num_types]; + +static int floor_divide_output_id_table[td_ns::num_types][td_ns::num_types]; +static int floor_divide_inplace_output_id_table[td_ns::num_types] + [td_ns::num_types]; + +static binary_strided_impl_fn_ptr_t + floor_divide_strided_dispatch_table[td_ns::num_types][td_ns::num_types]; + +static binary_inplace_contig_impl_fn_ptr_t + floor_divide_inplace_contig_dispatch_table[td_ns::num_types] + [td_ns::num_types]; +static binary_inplace_strided_impl_fn_ptr_t + floor_divide_inplace_strided_dispatch_table[td_ns::num_types] + [td_ns::num_types]; + +void populate_floor_divide_dispatch_tables(void) +{ + using namespace td_ns; + namespace fn_ns = floor_divide_fn_ns; + + // which input types are supported, and what is the type of the result + using fn_ns::FloorDivideTypeMapFactory; + DispatchTableBuilder dtb1; + dtb1.populate_dispatch_table(floor_divide_output_id_table); + + // function pointers for operation on general strided arrays + using fn_ns::FloorDivideStridedFactory; + DispatchTableBuilder + dtb2; + dtb2.populate_dispatch_table(floor_divide_strided_dispatch_table); + + // function pointers for operation on contiguous inputs and output + using fn_ns::FloorDivideContigFactory; + DispatchTableBuilder + dtb3; + dtb3.populate_dispatch_table(floor_divide_contig_dispatch_table); + + // function pointers for inplace operation on general strided arrays + using fn_ns::FloorDivideInplaceStridedFactory; + DispatchTableBuilder + dtb4; + dtb4.populate_dispatch_table(floor_divide_inplace_strided_dispatch_table); + + // function pointers for inplace operation on contiguous inputs and output + using fn_ns::FloorDivideInplaceContigFactory; + DispatchTableBuilder + dtb5; + dtb5.populate_dispatch_table(floor_divide_inplace_contig_dispatch_table); + + // which types are supported by the in-place kernels + using fn_ns::FloorDivideInplaceTypeMapFactory; + DispatchTableBuilder dtb6; + dtb6.populate_dispatch_table(floor_divide_inplace_output_id_table); +}; + +} // namespace impl + +void init_floor_divide(py::module_ m) +{ + using arrayT = dpctl::tensor::usm_ndarray; + using event_vecT = std::vector; + { + impl::populate_floor_divide_dispatch_tables(); + using impl::floor_divide_contig_dispatch_table; + using impl::floor_divide_output_id_table; + using impl::floor_divide_strided_dispatch_table; + + auto floor_divide_pyapi = [&](const arrayT &src1, const arrayT &src2, + const arrayT &dst, sycl::queue &exec_q, + const event_vecT &depends = {}) { + return py_binary_ufunc( + src1, src2, dst, exec_q, depends, floor_divide_output_id_table, + // function pointers to handle operation on contiguous arrays + // (pointers may be nullptr) + floor_divide_contig_dispatch_table, + // function pointers to handle operation on strided arrays (most + // general case) + floor_divide_strided_dispatch_table, + // function pointers to handle operation of c-contig matrix and + // c-contig row with broadcasting (may be nullptr) + td_ns::NullPtrTable< + binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t>{}, + // function pointers to handle operation of c-contig matrix and + // c-contig row with broadcasting (may be nullptr) + td_ns::NullPtrTable< + binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t>{}); + }; + auto floor_divide_result_type_pyapi = [&](const py::dtype &dtype1, + const py::dtype &dtype2) { + return py_binary_ufunc_result_type(dtype1, dtype2, + floor_divide_output_id_table); + }; + m.def("_floor_divide", floor_divide_pyapi, "", py::arg("src1"), + py::arg("src2"), py::arg("dst"), py::arg("sycl_queue"), + py::arg("depends") = py::list()); + m.def("_floor_divide_result_type", floor_divide_result_type_pyapi, ""); + + using impl::floor_divide_inplace_contig_dispatch_table; + using impl::floor_divide_inplace_output_id_table; + using impl::floor_divide_inplace_strided_dispatch_table; + + auto floor_divide_inplace_pyapi = [&](const arrayT &src, + const arrayT &dst, + sycl::queue &exec_q, + const event_vecT &depends = {}) { + return py_binary_inplace_ufunc( + src, dst, exec_q, depends, floor_divide_inplace_output_id_table, + // function pointers to handle inplace operation on + // contiguous arrays (pointers may be nullptr) + floor_divide_inplace_contig_dispatch_table, + // function pointers to handle inplace operation on strided + // arrays (most general case) + floor_divide_inplace_strided_dispatch_table, + // function pointers to handle inplace operation on + // c-contig matrix with c-contig row with broadcasting + // (may be nullptr) + td_ns::NullPtrTable< + binary_inplace_row_matrix_broadcast_impl_fn_ptr_t>{}); + }; + m.def("_floor_divide_inplace", floor_divide_inplace_pyapi, "", + py::arg("lhs"), py::arg("rhs"), py::arg("sycl_queue"), + py::arg("depends") = py::list()); + } +} + +} // namespace dpctl::tensor::py_internal diff --git a/dpctl_ext/tensor/libtensor/source/elementwise_functions/floor_divide.hpp b/dpctl_ext/tensor/libtensor/source/elementwise_functions/floor_divide.hpp new file mode 100644 index 00000000000..17d493b5805 --- /dev/null +++ b/dpctl_ext/tensor/libtensor/source/elementwise_functions/floor_divide.hpp @@ -0,0 +1,46 @@ +//***************************************************************************** +// Copyright (c) 2026, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// - Neither the name of the copyright holder nor the names of its contributors +// may be used to endorse or promote products derived from this software +// without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** +// +//===---------------------------------------------------------------------===// +/// +/// \file +/// This file defines functions of dpctl.tensor._tensor_elementwise_impl +/// extension, specifically functions for elementwise operations. +//===---------------------------------------------------------------------===// + +#pragma once +#include + +namespace py = pybind11; + +namespace dpctl::tensor::py_internal +{ + +extern void init_floor_divide(py::module_ m); + +} // namespace dpctl::tensor::py_internal diff --git a/dpctl_ext/tensor/libtensor/source/elementwise_functions/greater.cpp b/dpctl_ext/tensor/libtensor/source/elementwise_functions/greater.cpp new file mode 100644 index 00000000000..f3cfaeae228 --- /dev/null +++ b/dpctl_ext/tensor/libtensor/source/elementwise_functions/greater.cpp @@ -0,0 +1,145 @@ +//***************************************************************************** +// Copyright (c) 2026, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// - Neither the name of the copyright holder nor the names of its contributors +// may be used to endorse or promote products derived from this software +// without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** +// +//===---------------------------------------------------------------------===// +/// +/// \file +/// This file defines functions of dpctl.tensor._tensor_elementwise_impl +/// extension, specifically functions for elementwise operations. +//===---------------------------------------------------------------------===// + +#include + +#include + +#include "dpnp4pybind11.hpp" +#include +#include +#include + +#include "elementwise_functions.hpp" +#include "greater.hpp" +#include "utils/type_dispatch.hpp" + +#include "kernels/elementwise_functions/common.hpp" +#include "kernels/elementwise_functions/greater.hpp" + +namespace dpctl::tensor::py_internal +{ + +namespace py = pybind11; +namespace td_ns = dpctl::tensor::type_dispatch; + +namespace ew_cmn_ns = dpctl::tensor::kernels::elementwise_common; +using ew_cmn_ns::binary_contig_impl_fn_ptr_t; +using ew_cmn_ns::binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t; +using ew_cmn_ns::binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t; +using ew_cmn_ns::binary_strided_impl_fn_ptr_t; + +// B11: ===== GREATER (x1, x2) +namespace impl +{ +namespace greater_fn_ns = dpctl::tensor::kernels::greater; + +static binary_contig_impl_fn_ptr_t + greater_contig_dispatch_table[td_ns::num_types][td_ns::num_types]; +static int greater_output_id_table[td_ns::num_types][td_ns::num_types]; + +static binary_strided_impl_fn_ptr_t + greater_strided_dispatch_table[td_ns::num_types][td_ns::num_types]; + +void populate_greater_dispatch_tables(void) +{ + using namespace td_ns; + namespace fn_ns = greater_fn_ns; + + // which input types are supported, and what is the type of the result + using fn_ns::GreaterTypeMapFactory; + DispatchTableBuilder dtb1; + dtb1.populate_dispatch_table(greater_output_id_table); + + // function pointers for operation on general strided arrays + using fn_ns::GreaterStridedFactory; + DispatchTableBuilder + dtb2; + dtb2.populate_dispatch_table(greater_strided_dispatch_table); + + // function pointers for operation on contiguous inputs and output + using fn_ns::GreaterContigFactory; + DispatchTableBuilder + dtb3; + dtb3.populate_dispatch_table(greater_contig_dispatch_table); +}; + +} // namespace impl + +void init_greater(py::module_ m) +{ + using arrayT = dpctl::tensor::usm_ndarray; + using event_vecT = std::vector; + { + impl::populate_greater_dispatch_tables(); + using impl::greater_contig_dispatch_table; + using impl::greater_output_id_table; + using impl::greater_strided_dispatch_table; + + auto greater_pyapi = [&](const arrayT &src1, const arrayT &src2, + const arrayT &dst, sycl::queue &exec_q, + const event_vecT &depends = {}) { + return py_binary_ufunc( + src1, src2, dst, exec_q, depends, greater_output_id_table, + // function pointers to handle operation on contiguous arrays + // (pointers may be nullptr) + greater_contig_dispatch_table, + // function pointers to handle operation on strided arrays (most + // general case) + greater_strided_dispatch_table, + // function pointers to handle operation of c-contig matrix and + // c-contig row with broadcasting (may be nullptr) + td_ns::NullPtrTable< + binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t>{}, + // function pointers to handle operation of c-contig matrix and + // c-contig row with broadcasting (may be nullptr) + td_ns::NullPtrTable< + binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t>{}); + }; + auto greater_result_type_pyapi = [&](const py::dtype &dtype1, + const py::dtype &dtype2) { + return py_binary_ufunc_result_type(dtype1, dtype2, + greater_output_id_table); + }; + m.def("_greater", greater_pyapi, "", py::arg("src1"), py::arg("src2"), + py::arg("dst"), py::arg("sycl_queue"), + py::arg("depends") = py::list()); + m.def("_greater_result_type", greater_result_type_pyapi, ""); + } +} + +} // namespace dpctl::tensor::py_internal diff --git a/dpctl_ext/tensor/libtensor/source/elementwise_functions/greater.hpp b/dpctl_ext/tensor/libtensor/source/elementwise_functions/greater.hpp new file mode 100644 index 00000000000..c8c3caa5f1f --- /dev/null +++ b/dpctl_ext/tensor/libtensor/source/elementwise_functions/greater.hpp @@ -0,0 +1,46 @@ +//***************************************************************************** +// Copyright (c) 2026, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// - Neither the name of the copyright holder nor the names of its contributors +// may be used to endorse or promote products derived from this software +// without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** +// +//===---------------------------------------------------------------------===// +/// +/// \file +/// This file defines functions of dpctl.tensor._tensor_elementwise_impl +/// extension, specifically functions for elementwise operations. +//===---------------------------------------------------------------------===// + +#pragma once +#include + +namespace py = pybind11; + +namespace dpctl::tensor::py_internal +{ + +extern void init_greater(py::module_ m); + +} // namespace dpctl::tensor::py_internal diff --git a/dpctl_ext/tensor/libtensor/source/elementwise_functions/greater_equal.cpp b/dpctl_ext/tensor/libtensor/source/elementwise_functions/greater_equal.cpp new file mode 100644 index 00000000000..ad9af91ce3d --- /dev/null +++ b/dpctl_ext/tensor/libtensor/source/elementwise_functions/greater_equal.cpp @@ -0,0 +1,146 @@ +//***************************************************************************** +// Copyright (c) 2026, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// - Neither the name of the copyright holder nor the names of its contributors +// may be used to endorse or promote products derived from this software +// without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** +// +//===---------------------------------------------------------------------===// +/// +/// \file +/// This file defines functions of dpctl.tensor._tensor_elementwise_impl +/// extension, specifically functions for elementwise operations. +//===---------------------------------------------------------------------===// + +#include + +#include + +#include "dpnp4pybind11.hpp" +#include +#include +#include + +#include "elementwise_functions.hpp" +#include "greater_equal.hpp" +#include "utils/type_dispatch.hpp" + +#include "kernels/elementwise_functions/common.hpp" +#include "kernels/elementwise_functions/greater_equal.hpp" + +namespace dpctl::tensor::py_internal +{ + +namespace py = pybind11; +namespace td_ns = dpctl::tensor::type_dispatch; + +namespace ew_cmn_ns = dpctl::tensor::kernels::elementwise_common; +using ew_cmn_ns::binary_contig_impl_fn_ptr_t; +using ew_cmn_ns::binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t; +using ew_cmn_ns::binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t; +using ew_cmn_ns::binary_strided_impl_fn_ptr_t; + +// B12: ===== GREATER_EQUAL (x1, x2) +namespace impl +{ +namespace greater_equal_fn_ns = dpctl::tensor::kernels::greater_equal; + +static binary_contig_impl_fn_ptr_t + greater_equal_contig_dispatch_table[td_ns::num_types][td_ns::num_types]; +static int greater_equal_output_id_table[td_ns::num_types][td_ns::num_types]; + +static binary_strided_impl_fn_ptr_t + greater_equal_strided_dispatch_table[td_ns::num_types][td_ns::num_types]; + +void populate_greater_equal_dispatch_tables(void) +{ + using namespace td_ns; + namespace fn_ns = greater_equal_fn_ns; + + // which input types are supported, and what is the type of the result + using fn_ns::GreaterEqualTypeMapFactory; + DispatchTableBuilder dtb1; + dtb1.populate_dispatch_table(greater_equal_output_id_table); + + // function pointers for operation on general strided arrays + using fn_ns::GreaterEqualStridedFactory; + DispatchTableBuilder + dtb2; + dtb2.populate_dispatch_table(greater_equal_strided_dispatch_table); + + // function pointers for operation on contiguous inputs and output + using fn_ns::GreaterEqualContigFactory; + DispatchTableBuilder + dtb3; + dtb3.populate_dispatch_table(greater_equal_contig_dispatch_table); +}; + +} // namespace impl + +void init_greater_equal(py::module_ m) +{ + using arrayT = dpctl::tensor::usm_ndarray; + using event_vecT = std::vector; + { + impl::populate_greater_equal_dispatch_tables(); + using impl::greater_equal_contig_dispatch_table; + using impl::greater_equal_output_id_table; + using impl::greater_equal_strided_dispatch_table; + + auto greater_equal_pyapi = [&](const arrayT &src1, const arrayT &src2, + const arrayT &dst, sycl::queue &exec_q, + const event_vecT &depends = {}) { + return py_binary_ufunc( + src1, src2, dst, exec_q, depends, greater_equal_output_id_table, + // function pointers to handle operation on contiguous arrays + // (pointers may be nullptr) + greater_equal_contig_dispatch_table, + // function pointers to handle operation on strided arrays (most + // general case) + greater_equal_strided_dispatch_table, + // function pointers to handle operation of c-contig matrix and + // c-contig row with broadcasting (may be nullptr) + td_ns::NullPtrTable< + binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t>{}, + // function pointers to handle operation of c-contig matrix and + // c-contig row with broadcasting (may be nullptr) + td_ns::NullPtrTable< + binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t>{}); + }; + auto greater_equal_result_type_pyapi = [&](const py::dtype &dtype1, + const py::dtype &dtype2) { + return py_binary_ufunc_result_type(dtype1, dtype2, + greater_equal_output_id_table); + }; + m.def("_greater_equal", greater_equal_pyapi, "", py::arg("src1"), + py::arg("src2"), py::arg("dst"), py::arg("sycl_queue"), + py::arg("depends") = py::list()); + m.def("_greater_equal_result_type", greater_equal_result_type_pyapi, + ""); + } +} + +} // namespace dpctl::tensor::py_internal diff --git a/dpctl_ext/tensor/libtensor/source/elementwise_functions/greater_equal.hpp b/dpctl_ext/tensor/libtensor/source/elementwise_functions/greater_equal.hpp new file mode 100644 index 00000000000..0cf7f8e89bb --- /dev/null +++ b/dpctl_ext/tensor/libtensor/source/elementwise_functions/greater_equal.hpp @@ -0,0 +1,46 @@ +//***************************************************************************** +// Copyright (c) 2026, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// - Neither the name of the copyright holder nor the names of its contributors +// may be used to endorse or promote products derived from this software +// without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** +// +//===---------------------------------------------------------------------===// +/// +/// \file +/// This file defines functions of dpctl.tensor._tensor_elementwise_impl +/// extension, specifically functions for elementwise operations. +//===---------------------------------------------------------------------===// + +#pragma once +#include + +namespace py = pybind11; + +namespace dpctl::tensor::py_internal +{ + +extern void init_greater_equal(py::module_ m); + +} // namespace dpctl::tensor::py_internal diff --git a/dpctl_ext/tensor/libtensor/source/elementwise_functions/hypot.cpp b/dpctl_ext/tensor/libtensor/source/elementwise_functions/hypot.cpp new file mode 100644 index 00000000000..f4ce161f4cd --- /dev/null +++ b/dpctl_ext/tensor/libtensor/source/elementwise_functions/hypot.cpp @@ -0,0 +1,145 @@ +//***************************************************************************** +// Copyright (c) 2026, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// - Neither the name of the copyright holder nor the names of its contributors +// may be used to endorse or promote products derived from this software +// without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** +// +//===---------------------------------------------------------------------===// +/// +/// \file +/// This file defines functions of dpctl.tensor._tensor_elementwise_impl +/// extension, specifically functions for elementwise operations. +//===---------------------------------------------------------------------===// + +#include + +#include + +#include "dpnp4pybind11.hpp" +#include +#include +#include + +#include "elementwise_functions.hpp" +#include "hypot.hpp" +#include "utils/type_dispatch.hpp" + +#include "kernels/elementwise_functions/common.hpp" +#include "kernels/elementwise_functions/hypot.hpp" + +namespace dpctl::tensor::py_internal +{ + +namespace py = pybind11; +namespace td_ns = dpctl::tensor::type_dispatch; + +namespace ew_cmn_ns = dpctl::tensor::kernels::elementwise_common; +using ew_cmn_ns::binary_contig_impl_fn_ptr_t; +using ew_cmn_ns::binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t; +using ew_cmn_ns::binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t; +using ew_cmn_ns::binary_strided_impl_fn_ptr_t; + +// B24: ===== HYPOT (x1, x2) +namespace impl +{ +namespace hypot_fn_ns = dpctl::tensor::kernels::hypot; + +static binary_contig_impl_fn_ptr_t + hypot_contig_dispatch_table[td_ns::num_types][td_ns::num_types]; +static int hypot_output_id_table[td_ns::num_types][td_ns::num_types]; + +static binary_strided_impl_fn_ptr_t + hypot_strided_dispatch_table[td_ns::num_types][td_ns::num_types]; + +void populate_hypot_dispatch_tables(void) +{ + using namespace td_ns; + namespace fn_ns = hypot_fn_ns; + + // which input types are supported, and what is the type of the result + using fn_ns::HypotTypeMapFactory; + DispatchTableBuilder dtb1; + dtb1.populate_dispatch_table(hypot_output_id_table); + + // function pointers for operation on general strided arrays + using fn_ns::HypotStridedFactory; + DispatchTableBuilder + dtb2; + dtb2.populate_dispatch_table(hypot_strided_dispatch_table); + + // function pointers for operation on contiguous inputs and output + using fn_ns::HypotContigFactory; + DispatchTableBuilder + dtb3; + dtb3.populate_dispatch_table(hypot_contig_dispatch_table); +}; + +} // namespace impl + +void init_hypot(py::module_ m) +{ + using arrayT = dpctl::tensor::usm_ndarray; + using event_vecT = std::vector; + { + impl::populate_hypot_dispatch_tables(); + using impl::hypot_contig_dispatch_table; + using impl::hypot_output_id_table; + using impl::hypot_strided_dispatch_table; + + auto hypot_pyapi = [&](const arrayT &src1, const arrayT &src2, + const arrayT &dst, sycl::queue &exec_q, + const event_vecT &depends = {}) { + return py_binary_ufunc( + src1, src2, dst, exec_q, depends, hypot_output_id_table, + // function pointers to handle operation on contiguous arrays + // (pointers may be nullptr) + hypot_contig_dispatch_table, + // function pointers to handle operation on strided arrays (most + // general case) + hypot_strided_dispatch_table, + // function pointers to handle operation of c-contig matrix and + // c-contig row with broadcasting (may be nullptr) + td_ns::NullPtrTable< + binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t>{}, + // function pointers to handle operation of c-contig matrix and + // c-contig row with broadcasting (may be nullptr) + td_ns::NullPtrTable< + binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t>{}); + }; + auto hypot_result_type_pyapi = [&](const py::dtype &dtype1, + const py::dtype &dtype2) { + return py_binary_ufunc_result_type(dtype1, dtype2, + hypot_output_id_table); + }; + m.def("_hypot", hypot_pyapi, "", py::arg("src1"), py::arg("src2"), + py::arg("dst"), py::arg("sycl_queue"), + py::arg("depends") = py::list()); + m.def("_hypot_result_type", hypot_result_type_pyapi, ""); + } +} + +} // namespace dpctl::tensor::py_internal diff --git a/dpctl_ext/tensor/libtensor/source/elementwise_functions/hypot.hpp b/dpctl_ext/tensor/libtensor/source/elementwise_functions/hypot.hpp new file mode 100644 index 00000000000..5bc73e717ad --- /dev/null +++ b/dpctl_ext/tensor/libtensor/source/elementwise_functions/hypot.hpp @@ -0,0 +1,46 @@ +//***************************************************************************** +// Copyright (c) 2026, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// - Neither the name of the copyright holder nor the names of its contributors +// may be used to endorse or promote products derived from this software +// without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** +// +//===---------------------------------------------------------------------===// +/// +/// \file +/// This file defines functions of dpctl.tensor._tensor_elementwise_impl +/// extension, specifically functions for elementwise operations. +//===---------------------------------------------------------------------===// + +#pragma once +#include + +namespace py = pybind11; + +namespace dpctl::tensor::py_internal +{ + +extern void init_hypot(py::module_ m); + +} // namespace dpctl::tensor::py_internal diff --git a/dpctl_ext/tensor/libtensor/source/elementwise_functions/less.cpp b/dpctl_ext/tensor/libtensor/source/elementwise_functions/less.cpp new file mode 100644 index 00000000000..d587ee71317 --- /dev/null +++ b/dpctl_ext/tensor/libtensor/source/elementwise_functions/less.cpp @@ -0,0 +1,145 @@ +//***************************************************************************** +// Copyright (c) 2026, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// - Neither the name of the copyright holder nor the names of its contributors +// may be used to endorse or promote products derived from this software +// without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** +// +//===---------------------------------------------------------------------===// +/// +/// \file +/// This file defines functions of dpctl.tensor._tensor_elementwise_impl +/// extension, specifically functions for elementwise operations. +//===---------------------------------------------------------------------===// + +#include + +#include + +#include "dpnp4pybind11.hpp" +#include +#include +#include + +#include "elementwise_functions.hpp" +#include "less.hpp" +#include "utils/type_dispatch.hpp" + +#include "kernels/elementwise_functions/common.hpp" +#include "kernels/elementwise_functions/less.hpp" + +namespace dpctl::tensor::py_internal +{ + +namespace py = pybind11; +namespace td_ns = dpctl::tensor::type_dispatch; + +namespace ew_cmn_ns = dpctl::tensor::kernels::elementwise_common; +using ew_cmn_ns::binary_contig_impl_fn_ptr_t; +using ew_cmn_ns::binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t; +using ew_cmn_ns::binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t; +using ew_cmn_ns::binary_strided_impl_fn_ptr_t; + +// B13: ===== LESS (x1, x2) +namespace impl +{ +namespace less_fn_ns = dpctl::tensor::kernels::less; + +static binary_contig_impl_fn_ptr_t less_contig_dispatch_table[td_ns::num_types] + [td_ns::num_types]; +static int less_output_id_table[td_ns::num_types][td_ns::num_types]; + +static binary_strided_impl_fn_ptr_t + less_strided_dispatch_table[td_ns::num_types][td_ns::num_types]; + +void populate_less_dispatch_tables(void) +{ + using namespace td_ns; + namespace fn_ns = less_fn_ns; + + // which input types are supported, and what is the type of the result + using fn_ns::LessTypeMapFactory; + DispatchTableBuilder dtb1; + dtb1.populate_dispatch_table(less_output_id_table); + + // function pointers for operation on general strided arrays + using fn_ns::LessStridedFactory; + DispatchTableBuilder + dtb2; + dtb2.populate_dispatch_table(less_strided_dispatch_table); + + // function pointers for operation on contiguous inputs and output + using fn_ns::LessContigFactory; + DispatchTableBuilder + dtb3; + dtb3.populate_dispatch_table(less_contig_dispatch_table); +}; + +} // namespace impl + +void init_less(py::module_ m) +{ + using arrayT = dpctl::tensor::usm_ndarray; + using event_vecT = std::vector; + { + impl::populate_less_dispatch_tables(); + using impl::less_contig_dispatch_table; + using impl::less_output_id_table; + using impl::less_strided_dispatch_table; + + auto less_pyapi = [&](const arrayT &src1, const arrayT &src2, + const arrayT &dst, sycl::queue &exec_q, + const event_vecT &depends = {}) { + return py_binary_ufunc( + src1, src2, dst, exec_q, depends, less_output_id_table, + // function pointers to handle operation on contiguous arrays + // (pointers may be nullptr) + less_contig_dispatch_table, + // function pointers to handle operation on strided arrays (most + // general case) + less_strided_dispatch_table, + // function pointers to handle operation of c-contig matrix and + // c-contig row with broadcasting (may be nullptr) + td_ns::NullPtrTable< + binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t>{}, + // function pointers to handle operation of c-contig matrix and + // c-contig row with broadcasting (may be nullptr) + td_ns::NullPtrTable< + binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t>{}); + }; + auto less_result_type_pyapi = [&](const py::dtype &dtype1, + const py::dtype &dtype2) { + return py_binary_ufunc_result_type(dtype1, dtype2, + less_output_id_table); + }; + m.def("_less", less_pyapi, "", py::arg("src1"), py::arg("src2"), + py::arg("dst"), py::arg("sycl_queue"), + py::arg("depends") = py::list()); + m.def("_less_result_type", less_result_type_pyapi, ""); + } +} + +} // namespace dpctl::tensor::py_internal diff --git a/dpctl_ext/tensor/libtensor/source/elementwise_functions/less.hpp b/dpctl_ext/tensor/libtensor/source/elementwise_functions/less.hpp new file mode 100644 index 00000000000..e08d84f380d --- /dev/null +++ b/dpctl_ext/tensor/libtensor/source/elementwise_functions/less.hpp @@ -0,0 +1,46 @@ +//***************************************************************************** +// Copyright (c) 2026, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// - Neither the name of the copyright holder nor the names of its contributors +// may be used to endorse or promote products derived from this software +// without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** +// +//===---------------------------------------------------------------------===// +/// +/// \file +/// This file defines functions of dpctl.tensor._tensor_elementwise_impl +/// extension, specifically functions for elementwise operations. +//===---------------------------------------------------------------------===// + +#pragma once +#include + +namespace py = pybind11; + +namespace dpctl::tensor::py_internal +{ + +extern void init_less(py::module_ m); + +} // namespace dpctl::tensor::py_internal diff --git a/dpctl_ext/tensor/libtensor/source/elementwise_functions/less_equal.cpp b/dpctl_ext/tensor/libtensor/source/elementwise_functions/less_equal.cpp new file mode 100644 index 00000000000..433969cead2 --- /dev/null +++ b/dpctl_ext/tensor/libtensor/source/elementwise_functions/less_equal.cpp @@ -0,0 +1,145 @@ +//***************************************************************************** +// Copyright (c) 2026, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// - Neither the name of the copyright holder nor the names of its contributors +// may be used to endorse or promote products derived from this software +// without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** +// +//===---------------------------------------------------------------------===// +/// +/// \file +/// This file defines functions of dpctl.tensor._tensor_elementwise_impl +/// extension, specifically functions for elementwise operations. +//===---------------------------------------------------------------------===// + +#include + +#include + +#include "dpnp4pybind11.hpp" +#include +#include +#include + +#include "elementwise_functions.hpp" +#include "less_equal.hpp" +#include "utils/type_dispatch.hpp" + +#include "kernels/elementwise_functions/common.hpp" +#include "kernels/elementwise_functions/less_equal.hpp" + +namespace dpctl::tensor::py_internal +{ + +namespace py = pybind11; +namespace td_ns = dpctl::tensor::type_dispatch; + +namespace ew_cmn_ns = dpctl::tensor::kernels::elementwise_common; +using ew_cmn_ns::binary_contig_impl_fn_ptr_t; +using ew_cmn_ns::binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t; +using ew_cmn_ns::binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t; +using ew_cmn_ns::binary_strided_impl_fn_ptr_t; + +// B14: ===== LESS_EQUAL (x1, x2) +namespace impl +{ +namespace less_equal_fn_ns = dpctl::tensor::kernels::less_equal; + +static binary_contig_impl_fn_ptr_t + less_equal_contig_dispatch_table[td_ns::num_types][td_ns::num_types]; +static int less_equal_output_id_table[td_ns::num_types][td_ns::num_types]; + +static binary_strided_impl_fn_ptr_t + less_equal_strided_dispatch_table[td_ns::num_types][td_ns::num_types]; + +void populate_less_equal_dispatch_tables(void) +{ + using namespace td_ns; + namespace fn_ns = less_equal_fn_ns; + + // which input types are supported, and what is the type of the result + using fn_ns::LessEqualTypeMapFactory; + DispatchTableBuilder dtb1; + dtb1.populate_dispatch_table(less_equal_output_id_table); + + // function pointers for operation on general strided arrays + using fn_ns::LessEqualStridedFactory; + DispatchTableBuilder + dtb2; + dtb2.populate_dispatch_table(less_equal_strided_dispatch_table); + + // function pointers for operation on contiguous inputs and output + using fn_ns::LessEqualContigFactory; + DispatchTableBuilder + dtb3; + dtb3.populate_dispatch_table(less_equal_contig_dispatch_table); +}; + +} // namespace impl + +void init_less_equal(py::module_ m) +{ + using arrayT = dpctl::tensor::usm_ndarray; + using event_vecT = std::vector; + { + impl::populate_less_equal_dispatch_tables(); + using impl::less_equal_contig_dispatch_table; + using impl::less_equal_output_id_table; + using impl::less_equal_strided_dispatch_table; + + auto less_equal_pyapi = [&](const arrayT &src1, const arrayT &src2, + const arrayT &dst, sycl::queue &exec_q, + const event_vecT &depends = {}) { + return py_binary_ufunc( + src1, src2, dst, exec_q, depends, less_equal_output_id_table, + // function pointers to handle operation on contiguous arrays + // (pointers may be nullptr) + less_equal_contig_dispatch_table, + // function pointers to handle operation on strided arrays (most + // general case) + less_equal_strided_dispatch_table, + // function pointers to handle operation of c-contig matrix and + // c-contig row with broadcasting (may be nullptr) + td_ns::NullPtrTable< + binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t>{}, + // function pointers to handle operation of c-contig matrix and + // c-contig row with broadcasting (may be nullptr) + td_ns::NullPtrTable< + binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t>{}); + }; + auto less_equal_result_type_pyapi = [&](const py::dtype &dtype1, + const py::dtype &dtype2) { + return py_binary_ufunc_result_type(dtype1, dtype2, + less_equal_output_id_table); + }; + m.def("_less_equal", less_equal_pyapi, "", py::arg("src1"), + py::arg("src2"), py::arg("dst"), py::arg("sycl_queue"), + py::arg("depends") = py::list()); + m.def("_less_equal_result_type", less_equal_result_type_pyapi, ""); + } +} + +} // namespace dpctl::tensor::py_internal diff --git a/dpctl_ext/tensor/libtensor/source/elementwise_functions/less_equal.hpp b/dpctl_ext/tensor/libtensor/source/elementwise_functions/less_equal.hpp new file mode 100644 index 00000000000..8eeb837a35a --- /dev/null +++ b/dpctl_ext/tensor/libtensor/source/elementwise_functions/less_equal.hpp @@ -0,0 +1,46 @@ +//***************************************************************************** +// Copyright (c) 2026, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// - Neither the name of the copyright holder nor the names of its contributors +// may be used to endorse or promote products derived from this software +// without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** +// +//===---------------------------------------------------------------------===// +/// +/// \file +/// This file defines functions of dpctl.tensor._tensor_elementwise_impl +/// extension, specifically functions for elementwise operations. +//===---------------------------------------------------------------------===// + +#pragma once +#include + +namespace py = pybind11; + +namespace dpctl::tensor::py_internal +{ + +extern void init_less_equal(py::module_ m); + +} // namespace dpctl::tensor::py_internal diff --git a/dpctl_ext/tensor/libtensor/source/elementwise_functions/logaddexp.cpp b/dpctl_ext/tensor/libtensor/source/elementwise_functions/logaddexp.cpp new file mode 100644 index 00000000000..71bc9cad403 --- /dev/null +++ b/dpctl_ext/tensor/libtensor/source/elementwise_functions/logaddexp.cpp @@ -0,0 +1,145 @@ +//***************************************************************************** +// Copyright (c) 2026, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// - Neither the name of the copyright holder nor the names of its contributors +// may be used to endorse or promote products derived from this software +// without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** +// +//===---------------------------------------------------------------------===// +/// +/// \file +/// This file defines functions of dpctl.tensor._tensor_elementwise_impl +/// extension, specifically functions for elementwise operations. +//===---------------------------------------------------------------------===// + +#include + +#include + +#include "dpnp4pybind11.hpp" +#include +#include +#include + +#include "elementwise_functions.hpp" +#include "logaddexp.hpp" +#include "utils/type_dispatch.hpp" + +#include "kernels/elementwise_functions/common.hpp" +#include "kernels/elementwise_functions/logaddexp.hpp" + +namespace dpctl::tensor::py_internal +{ + +namespace py = pybind11; +namespace td_ns = dpctl::tensor::type_dispatch; + +namespace ew_cmn_ns = dpctl::tensor::kernels::elementwise_common; +using ew_cmn_ns::binary_contig_impl_fn_ptr_t; +using ew_cmn_ns::binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t; +using ew_cmn_ns::binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t; +using ew_cmn_ns::binary_strided_impl_fn_ptr_t; + +// B15: ===== LOGADDEXP (x1, x2) +namespace impl +{ +namespace logaddexp_fn_ns = dpctl::tensor::kernels::logaddexp; + +static binary_contig_impl_fn_ptr_t + logaddexp_contig_dispatch_table[td_ns::num_types][td_ns::num_types]; +static int logaddexp_output_id_table[td_ns::num_types][td_ns::num_types]; + +static binary_strided_impl_fn_ptr_t + logaddexp_strided_dispatch_table[td_ns::num_types][td_ns::num_types]; + +void populate_logaddexp_dispatch_tables(void) +{ + using namespace td_ns; + namespace fn_ns = logaddexp_fn_ns; + + // which input types are supported, and what is the type of the result + using fn_ns::LogAddExpTypeMapFactory; + DispatchTableBuilder dtb1; + dtb1.populate_dispatch_table(logaddexp_output_id_table); + + // function pointers for operation on general strided arrays + using fn_ns::LogAddExpStridedFactory; + DispatchTableBuilder + dtb2; + dtb2.populate_dispatch_table(logaddexp_strided_dispatch_table); + + // function pointers for operation on contiguous inputs and output + using fn_ns::LogAddExpContigFactory; + DispatchTableBuilder + dtb3; + dtb3.populate_dispatch_table(logaddexp_contig_dispatch_table); +}; + +} // namespace impl + +void init_logaddexp(py::module_ m) +{ + using arrayT = dpctl::tensor::usm_ndarray; + using event_vecT = std::vector; + { + impl::populate_logaddexp_dispatch_tables(); + using impl::logaddexp_contig_dispatch_table; + using impl::logaddexp_output_id_table; + using impl::logaddexp_strided_dispatch_table; + + auto logaddexp_pyapi = [&](const arrayT &src1, const arrayT &src2, + const arrayT &dst, sycl::queue &exec_q, + const event_vecT &depends = {}) { + return py_binary_ufunc( + src1, src2, dst, exec_q, depends, logaddexp_output_id_table, + // function pointers to handle operation on contiguous arrays + // (pointers may be nullptr) + logaddexp_contig_dispatch_table, + // function pointers to handle operation on strided arrays (most + // general case) + logaddexp_strided_dispatch_table, + // function pointers to handle operation of c-contig matrix and + // c-contig row with broadcasting (may be nullptr) + td_ns::NullPtrTable< + binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t>{}, + // function pointers to handle operation of c-contig matrix and + // c-contig row with broadcasting (may be nullptr) + td_ns::NullPtrTable< + binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t>{}); + }; + auto logaddexp_result_type_pyapi = [&](const py::dtype &dtype1, + const py::dtype &dtype2) { + return py_binary_ufunc_result_type(dtype1, dtype2, + logaddexp_output_id_table); + }; + m.def("_logaddexp", logaddexp_pyapi, "", py::arg("src1"), + py::arg("src2"), py::arg("dst"), py::arg("sycl_queue"), + py::arg("depends") = py::list()); + m.def("_logaddexp_result_type", logaddexp_result_type_pyapi, ""); + } +} + +} // namespace dpctl::tensor::py_internal diff --git a/dpctl_ext/tensor/libtensor/source/elementwise_functions/logaddexp.hpp b/dpctl_ext/tensor/libtensor/source/elementwise_functions/logaddexp.hpp new file mode 100644 index 00000000000..2c4efa7d0c5 --- /dev/null +++ b/dpctl_ext/tensor/libtensor/source/elementwise_functions/logaddexp.hpp @@ -0,0 +1,46 @@ +//***************************************************************************** +// Copyright (c) 2026, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// - Neither the name of the copyright holder nor the names of its contributors +// may be used to endorse or promote products derived from this software +// without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** +// +//===---------------------------------------------------------------------===// +/// +/// \file +/// This file defines functions of dpctl.tensor._tensor_elementwise_impl +/// extension, specifically functions for elementwise operations. +//===---------------------------------------------------------------------===// + +#pragma once +#include + +namespace py = pybind11; + +namespace dpctl::tensor::py_internal +{ + +extern void init_logaddexp(py::module_ m); + +} // namespace dpctl::tensor::py_internal diff --git a/dpctl_ext/tensor/libtensor/source/elementwise_functions/true_divide.cpp b/dpctl_ext/tensor/libtensor/source/elementwise_functions/true_divide.cpp new file mode 100644 index 00000000000..3b8ca7712ba --- /dev/null +++ b/dpctl_ext/tensor/libtensor/source/elementwise_functions/true_divide.cpp @@ -0,0 +1,498 @@ +//***************************************************************************** +// Copyright (c) 2026, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// - Neither the name of the copyright holder nor the names of its contributors +// may be used to endorse or promote products derived from this software +// without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** +// +//===---------------------------------------------------------------------===// +/// +/// \file +/// This file defines functions of dpctl.tensor._tensor_elementwise_impl +/// extension, specifically functions for elementwise operations. +//===---------------------------------------------------------------------===// + +#include +#include +#include +#include +#include // for std::ignore +#include + +#include + +#include "dpnp4pybind11.hpp" +#include +#include +#include + +#include "elementwise_functions.hpp" +#include "simplify_iteration_space.hpp" +#include "true_divide.hpp" + +#include "utils/memory_overlap.hpp" +#include "utils/offset_utils.hpp" +#include "utils/output_validation.hpp" +#include "utils/type_dispatch.hpp" + +#include "kernels/elementwise_functions/common.hpp" +#include "kernels/elementwise_functions/common_inplace.hpp" +#include "kernels/elementwise_functions/true_divide.hpp" + +namespace dpctl::tensor::py_internal +{ + +namespace py = pybind11; +namespace td_ns = dpctl::tensor::type_dispatch; + +namespace ew_cmn_ns = dpctl::tensor::kernels::elementwise_common; +using ew_cmn_ns::binary_contig_impl_fn_ptr_t; +using ew_cmn_ns::binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t; +using ew_cmn_ns::binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t; +using ew_cmn_ns::binary_strided_impl_fn_ptr_t; + +using ew_cmn_ns::binary_inplace_contig_impl_fn_ptr_t; +using ew_cmn_ns::binary_inplace_row_matrix_broadcast_impl_fn_ptr_t; +using ew_cmn_ns::binary_inplace_strided_impl_fn_ptr_t; + +// B08: ===== DIVIDE (x1, x2) +namespace impl +{ +namespace true_divide_fn_ns = dpctl::tensor::kernels::true_divide; + +static binary_contig_impl_fn_ptr_t + true_divide_contig_dispatch_table[td_ns::num_types][td_ns::num_types]; +static int true_divide_output_id_table[td_ns::num_types][td_ns::num_types]; +static int true_divide_inplace_output_id_table[td_ns::num_types] + [td_ns::num_types]; + +static binary_strided_impl_fn_ptr_t + true_divide_strided_dispatch_table[td_ns::num_types][td_ns::num_types]; + +// divide(matrix, row) +static binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t + true_divide_contig_matrix_contig_row_broadcast_dispatch_table + [td_ns::num_types][td_ns::num_types]; + +// divide(row, matrix) +static binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t + true_divide_contig_row_contig_matrix_broadcast_dispatch_table + [td_ns::num_types][td_ns::num_types]; + +static binary_inplace_contig_impl_fn_ptr_t + true_divide_inplace_contig_dispatch_table[td_ns::num_types] + [td_ns::num_types]; +static binary_inplace_strided_impl_fn_ptr_t + true_divide_inplace_strided_dispatch_table[td_ns::num_types] + [td_ns::num_types]; +static binary_inplace_row_matrix_broadcast_impl_fn_ptr_t + true_divide_inplace_row_matrix_dispatch_table[td_ns::num_types] + [td_ns::num_types]; + +void populate_true_divide_dispatch_tables(void) +{ + using namespace td_ns; + namespace fn_ns = true_divide_fn_ns; + + // which input types are supported, and what is the type of the result + using fn_ns::TrueDivideTypeMapFactory; + DispatchTableBuilder dtb1; + dtb1.populate_dispatch_table(true_divide_output_id_table); + + // function pointers for operation on general strided arrays + using fn_ns::TrueDivideStridedFactory; + DispatchTableBuilder + dtb2; + dtb2.populate_dispatch_table(true_divide_strided_dispatch_table); + + // function pointers for operation on contiguous inputs and output + using fn_ns::TrueDivideContigFactory; + DispatchTableBuilder + dtb3; + dtb3.populate_dispatch_table(true_divide_contig_dispatch_table); + + // function pointers for operation on contiguous matrix, contiguous row + // with contiguous matrix output + using fn_ns::TrueDivideContigMatrixContigRowBroadcastFactory; + DispatchTableBuilder< + binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t, + TrueDivideContigMatrixContigRowBroadcastFactory, num_types> + dtb4; + dtb4.populate_dispatch_table( + true_divide_contig_matrix_contig_row_broadcast_dispatch_table); + + // function pointers for operation on contiguous row, contiguous matrix + // with contiguous matrix output + using fn_ns::TrueDivideContigRowContigMatrixBroadcastFactory; + DispatchTableBuilder< + binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t, + TrueDivideContigRowContigMatrixBroadcastFactory, num_types> + dtb5; + dtb5.populate_dispatch_table( + true_divide_contig_row_contig_matrix_broadcast_dispatch_table); + + // which types are supported by the in-place kernels + using fn_ns::TrueDivideInplaceTypeMapFactory; + DispatchTableBuilder dtb6; + dtb6.populate_dispatch_table(true_divide_inplace_output_id_table); + + // function pointers for inplace operation on general strided arrays + using fn_ns::TrueDivideInplaceStridedFactory; + DispatchTableBuilder + dtb7; + dtb7.populate_dispatch_table(true_divide_inplace_strided_dispatch_table); + + // function pointers for inplace operation on contiguous inputs and output + using fn_ns::TrueDivideInplaceContigFactory; + DispatchTableBuilder + dtb8; + dtb8.populate_dispatch_table(true_divide_inplace_contig_dispatch_table); + + // function pointers for inplace operation on contiguous matrix + // and contiguous row + using fn_ns::TrueDivideInplaceRowMatrixBroadcastFactory; + DispatchTableBuilder + dtb9; + dtb9.populate_dispatch_table(true_divide_inplace_row_matrix_dispatch_table); +}; + +template +class divide_by_scalar_krn; + +typedef sycl::event (*divide_by_scalar_fn_ptr_t)( + sycl::queue &, + std::size_t, + int, + const ssize_t *, + const char *, + py::ssize_t, + const char *, + char *, + py::ssize_t, + const std::vector &); + +template +sycl::event divide_by_scalar(sycl::queue &exec_q, + std::size_t nelems, + int nd, + const ssize_t *shape_and_strides, + const char *arg_p, + py::ssize_t arg_offset, + const char *scalar_ptr, + char *res_p, + py::ssize_t res_offset, + const std::vector &depends = {}) +{ + const scalarT sc_v = *reinterpret_cast(scalar_ptr); + + sycl::event comp_ev = exec_q.submit([&](sycl::handler &cgh) { + cgh.depends_on(depends); + + using BinOpT = + dpctl::tensor::kernels::true_divide::TrueDivideFunctor; + + auto op = BinOpT(); + + using IndexerT = + typename dpctl::tensor::offset_utils::TwoOffsets_StridedIndexer; + + const IndexerT two_offsets_indexer{nd, arg_offset, res_offset, + shape_and_strides}; + + const T *arg_tp = reinterpret_cast(arg_p); + T *res_tp = reinterpret_cast(res_p); + + cgh.parallel_for>( + {nelems}, [=](sycl::id<1> id) { + const auto &two_offsets_ = + two_offsets_indexer(static_cast(id.get(0))); + + const auto &arg_i = two_offsets_.get_first_offset(); + const auto &res_i = two_offsets_.get_second_offset(); + res_tp[res_i] = op(arg_tp[arg_i], sc_v); + }); + }); + return comp_ev; +} + +std::pair + py_divide_by_scalar(const dpctl::tensor::usm_ndarray &src, + double scalar, + const dpctl::tensor::usm_ndarray &dst, + sycl::queue &exec_q, + const std::vector &depends = {}) +{ + int src_typenum = src.get_typenum(); + int dst_typenum = dst.get_typenum(); + + auto array_types = td_ns::usm_ndarray_types(); + int src_typeid = array_types.typenum_to_lookup_id(src_typenum); + int dst_typeid = array_types.typenum_to_lookup_id(dst_typenum); + + if (src_typeid != dst_typeid) { + throw py::value_error( + "Destination array has unexpected elemental data type."); + } + + // check that queues are compatible + if (!dpctl::utils::queues_are_compatible(exec_q, {src, dst})) { + throw py::value_error( + "Execution queue is not compatible with allocation queues"); + } + + dpctl::tensor::validation::CheckWritable::throw_if_not_writable(dst); + // check shapes, broadcasting is assumed done by caller + // check that dimensions are the same + int dst_nd = dst.get_ndim(); + if (dst_nd != src.get_ndim()) { + throw py::value_error("Array dimensions are not the same."); + } + + // check that shapes are the same + const py::ssize_t *src_shape = src.get_shape_raw(); + const py::ssize_t *dst_shape = dst.get_shape_raw(); + bool shapes_equal(true); + std::size_t src_nelems(1); + + for (int i = 0; i < dst_nd; ++i) { + src_nelems *= static_cast(src_shape[i]); + shapes_equal = shapes_equal && (src_shape[i] == dst_shape[i]); + } + if (!shapes_equal) { + throw py::value_error("Array shapes are not the same."); + } + + // if nelems is zero, return + if (src_nelems == 0) { + return std::make_pair(sycl::event(), sycl::event()); + } + + dpctl::tensor::validation::AmpleMemory::throw_if_not_ample(dst, src_nelems); + + auto const &overlap = dpctl::tensor::overlap::MemoryOverlap(); + auto const &same_logical_tensors = + dpctl::tensor::overlap::SameLogicalTensors(); + if ((overlap(src, dst) && !same_logical_tensors(src, dst))) { + throw py::value_error("Arrays index overlapping segments of memory"); + } + + const char *src_data = src.get_data(); + char *dst_data = dst.get_data(); + + static constexpr int float16_typeid = + static_cast(td_ns::typenum_t::HALF); + static constexpr int float32_typeid = + static_cast(td_ns::typenum_t::FLOAT); + static constexpr int float64_typeid = + static_cast(td_ns::typenum_t::DOUBLE); + static constexpr int complex64_typeid = + static_cast(td_ns::typenum_t::CFLOAT); + static constexpr int complex128_typeid = + static_cast(td_ns::typenum_t::CDOUBLE); + + // statically pre-allocated memory for scalar + alignas(double) char scalar_alloc[sizeof(double)] = {0}; + + divide_by_scalar_fn_ptr_t fn; + // placement new into stack memory means no call to delete is necessary + switch (src_typeid) { + case float16_typeid: + { + fn = divide_by_scalar; + std::ignore = + new (scalar_alloc) sycl::half(static_cast(scalar)); + break; + } + case float32_typeid: + { + fn = divide_by_scalar; + std::ignore = new (scalar_alloc) float(scalar); + break; + } + case float64_typeid: + { + fn = divide_by_scalar; + std::ignore = new (scalar_alloc) double(scalar); + break; + } + case complex64_typeid: + { + fn = divide_by_scalar, float>; + std::ignore = new (scalar_alloc) float(scalar); + break; + } + case complex128_typeid: + { + fn = divide_by_scalar, double>; + std::ignore = new (scalar_alloc) double(scalar); + break; + } + default: + throw std::runtime_error("Implementation is missing for typeid=" + + std::to_string(src_typeid)); + } + + // simplify strides + auto const &src_strides = src.get_strides_vector(); + auto const &dst_strides = dst.get_strides_vector(); + + using shT = std::vector; + shT simplified_shape; + shT simplified_src_strides; + shT simplified_dst_strides; + py::ssize_t src_offset(0); + py::ssize_t dst_offset(0); + + int nd = dst_nd; + const py::ssize_t *shape = src_shape; + + std::vector host_tasks{}; + dpctl::tensor::py_internal::simplify_iteration_space( + nd, shape, src_strides, dst_strides, + // outputs + simplified_shape, simplified_src_strides, simplified_dst_strides, + src_offset, dst_offset); + + if (nd == 0) { + // handle 0d array as 1d array with 1 element + static constexpr py::ssize_t one{1}; + simplified_shape.push_back(one); + simplified_src_strides.push_back(one); + simplified_dst_strides.push_back(one); + src_offset = 0; + dst_offset = 0; + } + + using dpctl::tensor::offset_utils::device_allocate_and_pack; + auto ptr_sz_event_triple_ = device_allocate_and_pack( + exec_q, host_tasks, simplified_shape, simplified_src_strides, + simplified_dst_strides); + auto shape_strides_owner = std::move(std::get<0>(ptr_sz_event_triple_)); + auto ©_metadata_ev = std::get<2>(ptr_sz_event_triple_); + + const py::ssize_t *shape_strides = shape_strides_owner.get(); + + std::vector all_deps; + all_deps.reserve(depends.size() + 1); + all_deps.resize(depends.size()); + std::copy(depends.begin(), depends.end(), all_deps.begin()); + all_deps.push_back(copy_metadata_ev); + + sycl::event div_ev = + fn(exec_q, src_nelems, nd, shape_strides, src_data, src_offset, + scalar_alloc, dst_data, dst_offset, all_deps); + + // async free of shape_strides temporary + sycl::event tmp_cleanup_ev = dpctl::tensor::alloc_utils::async_smart_free( + exec_q, {div_ev}, shape_strides_owner); + + host_tasks.push_back(tmp_cleanup_ev); + + return std::make_pair( + dpctl::utils::keep_args_alive(exec_q, {src, dst}, host_tasks), div_ev); +} + +} // namespace impl + +void init_divide(py::module_ m) +{ + using arrayT = dpctl::tensor::usm_ndarray; + using event_vecT = std::vector; + { + impl::populate_true_divide_dispatch_tables(); + using impl::true_divide_contig_dispatch_table; + using impl:: + true_divide_contig_matrix_contig_row_broadcast_dispatch_table; + using impl:: + true_divide_contig_row_contig_matrix_broadcast_dispatch_table; + using impl::true_divide_output_id_table; + using impl::true_divide_strided_dispatch_table; + + auto divide_pyapi = [&](const arrayT &src1, const arrayT &src2, + const arrayT &dst, sycl::queue &exec_q, + const event_vecT &depends = {}) { + return py_binary_ufunc( + src1, src2, dst, exec_q, depends, true_divide_output_id_table, + // function pointers to handle operation on contiguous arrays + // (pointers may be nullptr) + true_divide_contig_dispatch_table, + // function pointers to handle operation on strided arrays (most + // general case) + true_divide_strided_dispatch_table, + // function pointers to handle operation of c-contig matrix and + // c-contig row with broadcasting (may be nullptr) + true_divide_contig_matrix_contig_row_broadcast_dispatch_table, + // function pointers to handle operation of c-contig matrix and + // c-contig row with broadcasting (may be nullptr) + true_divide_contig_row_contig_matrix_broadcast_dispatch_table); + }; + auto divide_result_type_pyapi = [&](const py::dtype &dtype1, + const py::dtype &dtype2) { + return py_binary_ufunc_result_type(dtype1, dtype2, + true_divide_output_id_table); + }; + m.def("_divide", divide_pyapi, "", py::arg("src1"), py::arg("src2"), + py::arg("dst"), py::arg("sycl_queue"), + py::arg("depends") = py::list()); + m.def("_divide_result_type", divide_result_type_pyapi, ""); + + using impl::true_divide_inplace_contig_dispatch_table; + using impl::true_divide_inplace_output_id_table; + using impl::true_divide_inplace_row_matrix_dispatch_table; + using impl::true_divide_inplace_strided_dispatch_table; + + auto divide_inplace_pyapi = [&](const arrayT &src, const arrayT &dst, + sycl::queue &exec_q, + const event_vecT &depends = {}) { + return py_binary_inplace_ufunc( + src, dst, exec_q, depends, true_divide_inplace_output_id_table, + // function pointers to handle inplace operation on + // contiguous arrays (pointers may be nullptr) + true_divide_inplace_contig_dispatch_table, + // function pointers to handle inplace operation on strided + // arrays (most general case) + true_divide_inplace_strided_dispatch_table, + // function pointers to handle inplace operation on + // c-contig matrix with c-contig row with broadcasting + // (may be nullptr) + true_divide_inplace_row_matrix_dispatch_table); + }; + m.def("_divide_inplace", divide_inplace_pyapi, "", py::arg("lhs"), + py::arg("rhs"), py::arg("sycl_queue"), + py::arg("depends") = py::list()); + + using impl::py_divide_by_scalar; + m.def("_divide_by_scalar", &py_divide_by_scalar, "", py::arg("src"), + py::arg("scalar"), py::arg("dst"), py::arg("sycl_queue"), + py::arg("depends") = py::list()); + } +} + +} // namespace dpctl::tensor::py_internal diff --git a/dpctl_ext/tensor/libtensor/source/elementwise_functions/true_divide.hpp b/dpctl_ext/tensor/libtensor/source/elementwise_functions/true_divide.hpp new file mode 100644 index 00000000000..941384beaf8 --- /dev/null +++ b/dpctl_ext/tensor/libtensor/source/elementwise_functions/true_divide.hpp @@ -0,0 +1,46 @@ +//***************************************************************************** +// Copyright (c) 2026, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// - Neither the name of the copyright holder nor the names of its contributors +// may be used to endorse or promote products derived from this software +// without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** +// +//===---------------------------------------------------------------------===// +/// +/// \file +/// This file defines functions of dpctl.tensor._tensor_elementwise_impl +/// extension, specifically functions for elementwise operations. +//===---------------------------------------------------------------------===// + +#pragma once +#include + +namespace py = pybind11; + +namespace dpctl::tensor::py_internal +{ + +extern void init_divide(py::module_ m); + +} // namespace dpctl::tensor::py_internal diff --git a/dpnp/dpnp_iface_logic.py b/dpnp/dpnp_iface_logic.py index 024cbe1cc75..a9ec3ebd0a8 100644 --- a/dpnp/dpnp_iface_logic.py +++ b/dpnp/dpnp_iface_logic.py @@ -661,8 +661,8 @@ def array_equiv(a1, a2): equal = DPNPBinaryFunc( "equal", - ti._equal_result_type, - ti._equal, + ti_ext._equal_result_type, + ti_ext._equal, _EQUAL_DOCSTRING, ) @@ -737,8 +737,8 @@ def array_equiv(a1, a2): greater = DPNPBinaryFunc( "greater", - ti._greater_result_type, - ti._greater, + ti_ext._greater_result_type, + ti_ext._greater, _GREATER_DOCSTRING, ) @@ -814,8 +814,8 @@ def array_equiv(a1, a2): greater_equal = DPNPBinaryFunc( "greater_equal", - ti._greater_equal_result_type, - ti._greater_equal, + ti_ext._greater_equal_result_type, + ti_ext._greater_equal, _GREATER_EQUAL_DOCSTRING, ) @@ -1750,8 +1750,8 @@ def isscalar(element): less = DPNPBinaryFunc( "less", - ti._less_result_type, - ti._less, + ti_ext._less_result_type, + ti_ext._less, _LESS_DOCSTRING, ) @@ -1826,8 +1826,8 @@ def isscalar(element): less_equal = DPNPBinaryFunc( "less_equal", - ti._less_equal_result_type, - ti._less_equal, + ti_ext._less_equal_result_type, + ti_ext._less_equal, _LESS_EQUAL_DOCSTRING, ) diff --git a/dpnp/dpnp_iface_mathematical.py b/dpnp/dpnp_iface_mathematical.py index d1bdbdcfc96..54a17cec0c3 100644 --- a/dpnp/dpnp_iface_mathematical.py +++ b/dpnp/dpnp_iface_mathematical.py @@ -1558,12 +1558,12 @@ def diff(a, n=1, axis=-1, prepend=None, append=None): divide = DPNPBinaryFunc( "divide", - ti._divide_result_type, - ti._divide, + ti_ext._divide_result_type, + ti_ext._divide, _DIVIDE_DOCSTRING, mkl_fn_to_call="_mkl_div_to_call", mkl_impl_fn="_div", - binary_inplace_fn=ti._divide_inplace, + binary_inplace_fn=ti_ext._divide_inplace, acceptance_fn=dtu._acceptance_fn_divide, ) @@ -2139,10 +2139,10 @@ def ediff1d(ary, to_end=None, to_begin=None): floor_divide = DPNPBinaryFunc( "floor_divide", - ti._floor_divide_result_type, - ti._floor_divide, + ti_ext._floor_divide_result_type, + ti_ext._floor_divide, _FLOOR_DIVIDE_DOCSTRING, - binary_inplace_fn=ti._floor_divide_inplace, + binary_inplace_fn=ti_ext._floor_divide_inplace, ) diff --git a/dpnp/dpnp_iface_trigonometric.py b/dpnp/dpnp_iface_trigonometric.py index 186ae47b095..906a20f1625 100644 --- a/dpnp/dpnp_iface_trigonometric.py +++ b/dpnp/dpnp_iface_trigonometric.py @@ -42,12 +42,10 @@ # pylint: disable=protected-access # pylint: disable=no-name-in-module -import dpctl.tensor._tensor_elementwise_impl as ti - # TODO: revert to `import dpctl.tensor...` # when dpnp fully migrates dpctl/tensor import dpctl_ext.tensor as dpt -import dpctl_ext.tensor._tensor_elementwise_impl as ti_ext +import dpctl_ext.tensor._tensor_elementwise_impl as ti import dpctl_ext.tensor._type_utils as dtu import dpnp import dpnp.backend.extensions.ufunc._ufunc_impl as ufi @@ -139,8 +137,8 @@ def _get_accumulation_res_dt(a, dtype): acos = DPNPUnaryFunc( "acos", - ti_ext._acos_result_type, - ti_ext._acos, + ti._acos_result_type, + ti._acos, _ACOS_DOCSTRING, mkl_fn_to_call="_mkl_acos_to_call", mkl_impl_fn="_acos", @@ -225,8 +223,8 @@ def _get_accumulation_res_dt(a, dtype): acosh = DPNPUnaryFunc( "acosh", - ti_ext._acosh_result_type, - ti_ext._acosh, + ti._acosh_result_type, + ti._acosh, _ACOSH_DOCSTRING, mkl_fn_to_call="_mkl_acosh_to_call", mkl_impl_fn="_acosh", @@ -311,8 +309,8 @@ def _get_accumulation_res_dt(a, dtype): asin = DPNPUnaryFunc( "asin", - ti_ext._asin_result_type, - ti_ext._asin, + ti._asin_result_type, + ti._asin, _ASIN_DOCSTRING, mkl_fn_to_call="_mkl_asin_to_call", mkl_impl_fn="_asin", @@ -395,8 +393,8 @@ def _get_accumulation_res_dt(a, dtype): asinh = DPNPUnaryFunc( "asinh", - ti_ext._asinh_result_type, - ti_ext._asinh, + ti._asinh_result_type, + ti._asinh, _ASINH_DOCSTRING, mkl_fn_to_call="_mkl_asinh_to_call", mkl_impl_fn="_asinh", @@ -481,8 +479,8 @@ def _get_accumulation_res_dt(a, dtype): atan = DPNPUnaryFunc( "atan", - ti_ext._atan_result_type, - ti_ext._atan, + ti._atan_result_type, + ti._atan, _ATAN_DOCSTRING, mkl_fn_to_call="_mkl_atan_to_call", mkl_impl_fn="_atan", @@ -572,8 +570,8 @@ def _get_accumulation_res_dt(a, dtype): atan2 = DPNPBinaryFunc( "atan2", - ti_ext._atan2_result_type, - ti_ext._atan2, + ti._atan2_result_type, + ti._atan2, _ATAN2_DOCSTRING, mkl_fn_to_call="_mkl_atan2_to_call", mkl_impl_fn="_atan2", @@ -656,8 +654,8 @@ def _get_accumulation_res_dt(a, dtype): atanh = DPNPUnaryFunc( "atanh", - ti_ext._atanh_result_type, - ti_ext._atanh, + ti._atanh_result_type, + ti._atanh, _ATANH_DOCSTRING, mkl_fn_to_call="_mkl_atanh_to_call", mkl_impl_fn="_atanh", @@ -718,8 +716,8 @@ def _get_accumulation_res_dt(a, dtype): cbrt = DPNPUnaryFunc( "cbrt", - ti_ext._cbrt_result_type, - ti_ext._cbrt, + ti._cbrt_result_type, + ti._cbrt, _CBRT_DOCSTRING, mkl_fn_to_call="_mkl_cbrt_to_call", mkl_impl_fn="_cbrt", @@ -777,8 +775,8 @@ def _get_accumulation_res_dt(a, dtype): cos = DPNPUnaryFunc( "cos", - ti_ext._cos_result_type, - ti_ext._cos, + ti._cos_result_type, + ti._cos, _COS_DOCSTRING, mkl_fn_to_call="_mkl_cos_to_call", mkl_impl_fn="_cos", @@ -841,8 +839,8 @@ def _get_accumulation_res_dt(a, dtype): cosh = DPNPUnaryFunc( "cosh", - ti_ext._cosh_result_type, - ti_ext._cosh, + ti._cosh_result_type, + ti._cosh, _COSH_DOCSTRING, mkl_fn_to_call="_mkl_cosh_to_call", mkl_impl_fn="_cosh", @@ -1127,8 +1125,8 @@ def cumlogsumexp( exp = DPNPUnaryFunc( "exp", - ti_ext._exp_result_type, - ti_ext._exp, + ti._exp_result_type, + ti._exp, _EXP_DOCSTRING, mkl_fn_to_call="_mkl_exp_to_call", mkl_impl_fn="_exp", @@ -1187,8 +1185,8 @@ def cumlogsumexp( exp2 = DPNPUnaryFunc( "exp2", - ti_ext._exp2_result_type, - ti_ext._exp2, + ti._exp2_result_type, + ti._exp2, _EXP2_DOCSTRING, mkl_fn_to_call="_mkl_exp2_to_call", mkl_impl_fn="_exp2", @@ -1259,8 +1257,8 @@ def cumlogsumexp( expm1 = DPNPUnaryFunc( "expm1", - ti_ext._expm1_result_type, - ti_ext._expm1, + ti._expm1_result_type, + ti._expm1, _EXPM1_DOCSTRING, mkl_fn_to_call="_mkl_expm1_to_call", mkl_impl_fn="_expm1", @@ -1416,8 +1414,8 @@ def cumlogsumexp( log = DPNPUnaryFunc( "log", - ti_ext._log_result_type, - ti_ext._log, + ti._log_result_type, + ti._log, _LOG_DOCSTRING, mkl_fn_to_call="_mkl_ln_to_call", mkl_impl_fn="_ln", @@ -1495,8 +1493,8 @@ def cumlogsumexp( log10 = DPNPUnaryFunc( "log10", - ti_ext._log10_result_type, - ti_ext._log10, + ti._log10_result_type, + ti._log10, _LOG10_DOCSTRING, mkl_fn_to_call="_mkl_log10_to_call", mkl_impl_fn="_log10", @@ -1580,8 +1578,8 @@ def cumlogsumexp( log1p = DPNPUnaryFunc( "log1p", - ti_ext._log1p_result_type, - ti_ext._log1p, + ti._log1p_result_type, + ti._log1p, _LOG1P_DOCSTRING, mkl_fn_to_call="_mkl_log1p_to_call", mkl_impl_fn="_log1p", @@ -1660,8 +1658,8 @@ def cumlogsumexp( log2 = DPNPUnaryFunc( "log2", - ti_ext._log2_result_type, - ti_ext._log2, + ti._log2_result_type, + ti._log2, _LOG2_DOCSTRING, mkl_fn_to_call="_mkl_log2_to_call", mkl_impl_fn="_log2", @@ -2107,8 +2105,8 @@ def logsumexp(x, /, *, axis=None, dtype=None, keepdims=False, out=None): reciprocal = DPNPUnaryFunc( "reciprocal", - ti_ext._reciprocal_result_type, - ti_ext._reciprocal, + ti._reciprocal_result_type, + ti._reciprocal, _RECIPROCAL_DOCSTRING, mkl_fn_to_call="_mkl_inv_to_call", mkl_impl_fn="_inv", @@ -2252,8 +2250,8 @@ def reduce_hypot(x, /, *, axis=None, dtype=None, keepdims=False, out=None): rsqrt = DPNPUnaryFunc( "rsqrt", - ti_ext._rsqrt_result_type, - ti_ext._rsqrt, + ti._rsqrt_result_type, + ti._rsqrt, _RSQRT_DOCSTRING, ) @@ -2309,8 +2307,8 @@ def reduce_hypot(x, /, *, axis=None, dtype=None, keepdims=False, out=None): sin = DPNPUnaryFunc( "sin", - ti_ext._sin_result_type, - ti_ext._sin, + ti._sin_result_type, + ti._sin, _SIN_DOCSTRING, mkl_fn_to_call="_mkl_sin_to_call", mkl_impl_fn="_sin", @@ -2372,8 +2370,8 @@ def reduce_hypot(x, /, *, axis=None, dtype=None, keepdims=False, out=None): sinh = DPNPUnaryFunc( "sinh", - ti_ext._sinh_result_type, - ti_ext._sinh, + ti._sinh_result_type, + ti._sinh, _SINH_DOCSTRING, mkl_fn_to_call="_mkl_sinh_to_call", mkl_impl_fn="_sinh", @@ -2449,8 +2447,8 @@ def reduce_hypot(x, /, *, axis=None, dtype=None, keepdims=False, out=None): sqrt = DPNPUnaryFunc( "sqrt", - ti_ext._sqrt_result_type, - ti_ext._sqrt, + ti._sqrt_result_type, + ti._sqrt, _SQRT_DOCSTRING, mkl_fn_to_call="_mkl_sqrt_to_call", mkl_impl_fn="_sqrt", @@ -2508,8 +2506,8 @@ def reduce_hypot(x, /, *, axis=None, dtype=None, keepdims=False, out=None): square = DPNPUnaryFunc( "square", - ti_ext._square_result_type, - ti_ext._square, + ti._square_result_type, + ti._square, _SQUARE_DOCSTRING, mkl_fn_to_call="_mkl_sqr_to_call", mkl_impl_fn="_sqr", @@ -2567,8 +2565,8 @@ def reduce_hypot(x, /, *, axis=None, dtype=None, keepdims=False, out=None): tan = DPNPUnaryFunc( "tan", - ti_ext._tan_result_type, - ti_ext._tan, + ti._tan_result_type, + ti._tan, _TAN_DOCSTRING, mkl_fn_to_call="_mkl_tan_to_call", mkl_impl_fn="_tan", @@ -2632,8 +2630,8 @@ def reduce_hypot(x, /, *, axis=None, dtype=None, keepdims=False, out=None): tanh = DPNPUnaryFunc( "tanh", - ti_ext._tanh_result_type, - ti_ext._tanh, + ti._tanh_result_type, + ti._tanh, _TANH_DOCSTRING, mkl_fn_to_call="_mkl_tanh_to_call", mkl_impl_fn="_tanh",