diff --git a/dpctl_ext/tensor/CMakeLists.txt b/dpctl_ext/tensor/CMakeLists.txt index 7e1170f4ebf..6f286a8d719 100644 --- a/dpctl_ext/tensor/CMakeLists.txt +++ b/dpctl_ext/tensor/CMakeLists.txt @@ -91,7 +91,7 @@ set(_elementwise_sources ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/cbrt.cpp ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/ceil.cpp ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/conj.cpp - #${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/copysign.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/copysign.cpp ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/cos.cpp ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/cosh.cpp ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/equal.cpp @@ -114,22 +114,22 @@ set(_elementwise_sources ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/log2.cpp ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/log10.cpp ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/logaddexp.cpp - #${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/logical_and.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/logical_and.cpp ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/logical_not.cpp - #${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/logical_or.cpp - #${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/logical_xor.cpp - #${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/maximum.cpp - #${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/minimum.cpp - #${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/multiply.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/logical_or.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/logical_xor.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/maximum.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/minimum.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/multiply.cpp ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/negative.cpp - #${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/nextafter.cpp - #${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/not_equal.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/nextafter.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/not_equal.cpp ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/positive.cpp - #${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/pow.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/pow.cpp ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/proj.cpp ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/real.cpp ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/reciprocal.cpp - #${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/remainder.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/remainder.cpp ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/round.cpp ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/rsqrt.cpp ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/sign.cpp @@ -138,7 +138,7 @@ set(_elementwise_sources ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/sinh.cpp ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/sqrt.cpp ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/square.cpp - #${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/subtract.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/subtract.cpp ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/tan.cpp ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/tanh.cpp ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/true_divide.cpp diff --git a/dpctl_ext/tensor/__init__.py b/dpctl_ext/tensor/__init__.py index 279e3a95fd0..71ef714c642 100644 --- a/dpctl_ext/tensor/__init__.py +++ b/dpctl_ext/tensor/__init__.py @@ -73,6 +73,7 @@ cbrt, ceil, conj, + copysign, cos, cosh, divide, @@ -96,12 +97,22 @@ log2, log10, logaddexp, + logical_and, logical_not, + logical_or, + logical_xor, + maximum, + minimum, + multiply, negative, + nextafter, + not_equal, positive, + pow, proj, real, reciprocal, + remainder, round, rsqrt, sign, @@ -110,6 +121,7 @@ sinh, sqrt, square, + subtract, tan, tanh, trunc, @@ -204,6 +216,7 @@ "concat", "conj", "copy", + "copysign", "cos", "cosh", "count_nonzero", @@ -244,24 +257,33 @@ "linspace", "log", "logaddexp", + "logical_and", "logical_not", + "logical_or", + "logical_xor", "logsumexp", "log1p", "log2", "log10", "max", + "maximum", "meshgrid", "min", + "minimum", "moveaxis", + "multiply", "permute_dims", "matmul", "matrix_transpose", "negative", + "nextafter", "nonzero", + "not_equal", "ones", "ones_like", "place", "positive", + "pow", "prod", "proj", "put", @@ -269,6 +291,7 @@ "real", "reciprocal", "reduce_hypot", + "remainder", "repeat", "reshape", "result_type", @@ -285,6 +308,7 @@ "square", "squeeze", "stack", + "subtract", "sum", "swapaxes", "take", diff --git a/dpctl_ext/tensor/_elementwise_funcs.py b/dpctl_ext/tensor/_elementwise_funcs.py index 17bdf94d9be..6442ef0b459 100644 --- a/dpctl_ext/tensor/_elementwise_funcs.py +++ b/dpctl_ext/tensor/_elementwise_funcs.py @@ -35,6 +35,7 @@ _acceptance_fn_divide, _acceptance_fn_negative, _acceptance_fn_reciprocal, + _acceptance_fn_subtract, _resolve_weak_types_all_py_ints, ) @@ -1243,6 +1244,102 @@ ) del _logaddexp_docstring_ +# B16: ==== LOGICAL_AND (x1, x2) +_logical_and_docstring_ = r""" +logical_and(x1, x2, /, \*, out=None, order='K') + +Computes the logical AND for each element `x1_i` of the input array `x1` with +the respective element `x2_i` of the input array `x2`. + +Args: + x1 (usm_ndarray): + First input array. May have any data type. + x2 (usm_ndarray): + Second input array. May have any data type. + out (Union[usm_ndarray, None], optional): + Output array to populate. + Array must have the correct shape and the expected data type. + order ("C","F","A","K", optional): + Memory layout of the new output array, if parameter + `out` is ``None``. + Default: "K". + +Returns: + usm_ndarray: + An array containing the element-wise logical AND results. +""" +logical_and = BinaryElementwiseFunc( + "logical_and", + ti._logical_and_result_type, + ti._logical_and, + _logical_and_docstring_, +) +del _logical_and_docstring_ + +# B17: ==== LOGICAL_OR (x1, x2) +_logical_or_docstring_ = r""" +logical_or(x1, x2, /, \*, out=None, order='K') + +Computes the logical OR for each element `x1_i` of the input array `x1` +with the respective element `x2_i` of the input array `x2`. + +Args: + x1 (usm_ndarray): + First input array. May have any data type. + x2 (usm_ndarray): + Second input array. May have any data type. + out (Union[usm_ndarray, None], optional): + Output array to populate. + Array must have the correct shape and the expected data type. + order ("C","F","A","K", optional): + Memory layout of the new output array, if parameter + `out` is ``None``. + Default: "K". + +Returns: + usm_ndarray: + An array containing the element-wise logical OR results. +""" +logical_or = BinaryElementwiseFunc( + "logical_or", + ti._logical_or_result_type, + ti._logical_or, + _logical_or_docstring_, +) +del _logical_or_docstring_ + +# B18: ==== LOGICAL_XOR (x1, x2) +_logical_xor_docstring_ = r""" +logical_xor(x1, x2, /, \*, out=None, order='K') + +Computes the logical XOR for each element `x1_i` of the input array `x1` +with the respective element `x2_i` of the input array `x2`. + +Args: + x1 (usm_ndarray): + First input array. May have any data type. + x2 (usm_ndarray): + Second input array. May have any data type. + out (Union[usm_ndarray, None], optional): + Output array to populate. + Array must have the correct shape and the expected data type. + order ("C","F","A","K", optional): + Memory layout of the new output array, if parameter + `out` is ``None``. + Default: "K". + +Returns: + usm_ndarray: + An array containing the element-wise logical XOR results. +""" +logical_xor = BinaryElementwiseFunc( + "logical_xor", + ti._logical_xor_result_type, + ti._logical_xor, + _logical_xor_docstring_, +) +del _logical_xor_docstring_ + # U24: ==== LOGICAL_NOT (x) _logical_not_docstring = r""" logical_not(x, /, \*, out=None, order='K') @@ -1272,6 +1369,106 @@ ) del _logical_not_docstring +# B26: ==== MAXIMUM (x1, x2) +_maximum_docstring_ = r""" +maximum(x1, x2, /, \*, out=None, order='K') + +Compares two input arrays `x1` and `x2` and returns a new array containing the +element-wise maxima. + +Args: + x1 (usm_ndarray): + First input array. May have any data type. + x2 (usm_ndarray): + Second input array. May have any data type. + out (Union[usm_ndarray, None], optional): + Output array to populate. + Array must have the correct shape and the expected data type. + order ("C","F","A","K", optional): + Memory layout of the new output array, if parameter + `out` is ``None``. + Default: "K". + +Returns: + usm_ndarray: + An array containing the element-wise maxima. The data type of + the returned array is determined by the Type Promotion Rules. +""" +maximum = BinaryElementwiseFunc( + "maximum", + ti._maximum_result_type, + ti._maximum, + _maximum_docstring_, +) +del _maximum_docstring_ + +# B27: ==== MINIMUM (x1, x2) +_minimum_docstring_ = r""" +minimum(x1, x2, /, \*, out=None, order='K') + +Compares two input arrays `x1` and `x2` and returns a new array containing the +element-wise minima. + +Args: + x1 (usm_ndarray): + First input array. May have any data type. + x2 (usm_ndarray): + Second input array. May have any data type. + out (Union[usm_ndarray, None], optional): + Output array to populate. + Array must have the correct shape and the expected data type. + order ("C","F","A","K", optional): + Memory layout of the new output array, if parameter + `out` is ``None``. + Default: "K". + +Returns: + usm_ndarray: + An array containing the element-wise minima. The data type of + the returned array is determined by the Type Promotion Rules. +""" +minimum = BinaryElementwiseFunc( + "minimum", + ti._minimum_result_type, + ti._minimum, + _minimum_docstring_, +) +del _minimum_docstring_ + +# B19: ==== MULTIPLY (x1, x2) +_multiply_docstring_ = r""" +multiply(x1, x2, /, \*, out=None, order='K') + +Calculates the product for each element `x1_i` of the input array `x1` with the +respective element `x2_i` of the input array `x2`. + +Args: + x1 (usm_ndarray): + First input array. May have any data type. + x2 (usm_ndarray): + Second input array. May have any data type. + out (Union[usm_ndarray, None], optional): + Output array to populate. + Array must have the correct shape and the expected data type. + order ("C","F","A","K", optional): + Memory layout of the new output array, if parameter + `out` is ``None``. + Default: "K". + +Returns: + usm_ndarray: + An array containing the element-wise products. The data type of + the returned array is determined by the Type Promotion Rules. +""" +multiply = BinaryElementwiseFunc( + "multiply", + ti._multiply_result_type, + ti._multiply, + _multiply_docstring_, + binary_inplace_fn=ti._multiply_inplace, +) +del _multiply_docstring_ + # U25: ==== NEGATIVE (x) _negative_docstring_ = r""" negative(x, /, \*, out=None, order='K') @@ -1302,6 +1499,77 @@ ) del _negative_docstring_ +# B28: ==== NEXTAFTER (x1, x2) +_nextafter_docstring_ = r""" +nextafter(x1, x2, /, \*, out=None, order='K') + +Calculates the next floating-point value after element `x1_i` of the input +array `x1` toward the respective element `x2_i` of the input array `x2`. + +Args: + x1 (usm_ndarray): + First input array, expected to have a real-valued floating-point data + type. + x2 (usm_ndarray): + Second input array, expected to have a real-valued floating-point data + type. + out (Union[usm_ndarray, None], optional): + Output array to populate. + Array must have the correct shape and the expected data type. + order ("C","F","A","K", optional): + Memory layout of the new output array, if parameter + `out` is ``None``. + Default: "K". + +Returns: + usm_ndarray: + An array containing the element-wise next representable values of `x1` + in the direction of `x2`. The data type of the returned array is + determined by the Type Promotion Rules. +""" +nextafter = BinaryElementwiseFunc( + "nextafter", + ti._nextafter_result_type, + ti._nextafter, + _nextafter_docstring_, +) +del _nextafter_docstring_ + +# B20: ==== NOT_EQUAL (x1, x2) +_not_equal_docstring_ = r""" +not_equal(x1, x2, /, \*, out=None, order='K') + +Calculates inequality test results for each element `x1_i` of the +input array `x1` with the respective element `x2_i` of the input array `x2`. + +Args: + x1 (usm_ndarray): + First input array. + x2 (usm_ndarray): + Second input array. + out (Union[usm_ndarray, None], optional): + Output array to populate. + Array must have the correct shape and the expected data type. + order ("C","F","A","K", optional): + Memory layout of the new output array, if parameter + `out` is ``None``. + Default: "K". + +Returns: + usm_ndarray: + An array containing the result of element-wise inequality comparison. + The returned array has a data type of `bool`. +""" + +not_equal = BinaryElementwiseFunc( + "not_equal", + ti._not_equal_result_type, + ti._not_equal, + _not_equal_docstring_, + weak_type_resolver=_resolve_weak_types_all_py_ints, +) +del _not_equal_docstring_ + # U26: ==== POSITIVE (x) _positive_docstring_ = r""" positive(x, /, \*, out=None, order='K') @@ -1328,6 +1596,40 @@ ) del _positive_docstring_ +# B21: ==== POW (x1, x2) +_pow_docstring_ = r""" +pow(x1, x2, /, \*, out=None, order='K') + +Calculates `x1_i` raised to `x2_i` for each element `x1_i` of the input array +`x1` with the respective element `x2_i` of the input array `x2`. + +Args: + x1 (usm_ndarray): + First input array, expected to have a numeric data type. + x2 (usm_ndarray): + Second input array, also expected to have a numeric data type. + out (usm_ndarray): + Output array to populate. Array must have the correct + shape and the expected data type. + order ("C","F","A","K", optional): memory layout of the new + output array, if parameter `out` is ``None``. + Default: "K". + +Returns: + usm_ndarray: + An array containing the bases in `x1` raised to the exponents in `x2` + element-wise. The data type of the returned array is determined by the + Type Promotion Rules. +""" +pow = BinaryElementwiseFunc( + "pow", + ti._pow_result_type, + ti._pow, + _pow_docstring_, + binary_inplace_fn=ti._pow_inplace, +) +del _pow_docstring_ + # U27: ==== REAL (x) _real_docstring = r""" real(x, /, \*, out=None, order='K') @@ -1359,6 +1661,43 @@ ) del _real_docstring +# B22: ==== REMAINDER (x1, x2) +_remainder_docstring_ = r""" +remainder(x1, x2, /, \*, out=None, order='K') + +Calculates the remainder of division for each element `x1_i` of the input array +`x1` with the respective element `x2_i` of the input array `x2`. + +This function is equivalent to the Python modulus operator. + +Args: + x1 (usm_ndarray): + First input array, expected to have a real-valued data type. + x2 (usm_ndarray): + Second input array, also expected to have a real-valued data type. + out (Union[usm_ndarray, None], optional): + Output array to populate. + Array must have the correct shape and the expected data type. + order ("C","F","A","K", optional): + Memory layout of the new output array, if parameter + `out` is ``None``. + Default: "K". + +Returns: + usm_ndarray: + An array containing the element-wise remainders. Each remainder has the + same sign as respective element `x2_i`. The data type of the returned + array is determined by the Type Promotion Rules. +""" +remainder = BinaryElementwiseFunc( + "remainder", + ti._remainder_result_type, + ti._remainder, + _remainder_docstring_, + binary_inplace_fn=ti._remainder_inplace, +) +del _remainder_docstring_ + # U28: ==== ROUND (x) _round_docstring = r""" round(x, /, \*, out=None, order='K') @@ -1534,6 +1873,41 @@ ) del _sqrt_docstring_ +# B23: ==== SUBTRACT (x1, x2) +_subtract_docstring_ = r""" +subtract(x1, x2, /, \*, out=None, order='K') + +Calculates the difference between each element `x1_i` of the input +array `x1` and the respective element `x2_i` of the input array `x2`. + +Args: + x1 (usm_ndarray): + First input array, expected to have a numeric data type. + x2 (usm_ndarray): + Second input array, also expected to have a numeric data type. + out (Union[usm_ndarray, None], optional): + Output array to populate. + Array must have the correct shape and the expected data type. + order ("C","F","A","K", optional): + Memory layout of the new output array, if parameter + `out` is ``None``. + Default: "K". + +Returns: + usm_ndarray: + An array containing the element-wise differences. The data type + of the returned array is determined by the Type Promotion Rules. +""" +subtract = BinaryElementwiseFunc( + "subtract", + ti._subtract_result_type, + ti._subtract, + _subtract_docstring_, + binary_inplace_fn=ti._subtract_inplace, + acceptance_fn=_acceptance_fn_subtract, +) +del _subtract_docstring_ + # U34: ==== TAN (x) _tan_docstring = r""" tan(x, /, \*, out=None, order='K') @@ -1710,6 +2084,41 @@ ) del _exp2_docstring_ +# B25: ==== COPYSIGN (x1, x2) +_copysign_docstring_ = r""" +copysign(x1, x2, /, \*, out=None, order='K') + +Composes a floating-point value with the magnitude of `x1_i` and the sign of +`x2_i` for each element of input arrays `x1` and `x2`. + +Args: + x1 (usm_ndarray): + First input array, expected to have a real-valued floating-point data + type. + x2 (usm_ndarray): + Second input array, also expected to have a real-valued floating-point + data type. + out (Union[usm_ndarray, None], optional): + Output array to populate. + Array have the correct shape and the expected data type. + order ("C","F","A","K", optional): + Memory layout of the new output array, if parameter + `out` is ``None``. + Default: "K". + +Returns: + usm_ndarray: + An array containing the element-wise results. The data type + of the returned array is determined by the Type Promotion Rules. +""" +copysign = BinaryElementwiseFunc( + "copysign", + ti._copysign_result_type, + ti._copysign, + _copysign_docstring_, +) +del _copysign_docstring_ + # U39: ==== RSQRT (x) _rsqrt_docstring_ = r""" rsqrt(x, /, \*, out=None, order='K') diff --git a/dpctl_ext/tensor/libtensor/include/kernels/elementwise_functions/copysign.hpp b/dpctl_ext/tensor/libtensor/include/kernels/elementwise_functions/copysign.hpp new file mode 100644 index 00000000000..473c8f6236c --- /dev/null +++ b/dpctl_ext/tensor/libtensor/include/kernels/elementwise_functions/copysign.hpp @@ -0,0 +1,247 @@ +//***************************************************************************** +// Copyright (c) 2026, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// - Neither the name of the copyright holder nor the names of its contributors +// may be used to endorse or promote products derived from this software +// without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** +// +//===---------------------------------------------------------------------===// +/// +/// \file +/// This file defines kernels for elementwise evaluation of COPYSIGN(x1, x2) +/// function. +//===---------------------------------------------------------------------===// + +#pragma once +#include +#include +#include +#include +#include + +#include "vec_size_util.hpp" + +#include "utils/offset_utils.hpp" +#include "utils/type_dispatch_building.hpp" +#include "utils/type_utils.hpp" + +#include "kernels/dpctl_tensor_types.hpp" +#include "kernels/elementwise_functions/common.hpp" + +namespace dpctl::tensor::kernels::copysign +{ + +using dpctl::tensor::ssize_t; +namespace td_ns = dpctl::tensor::type_dispatch; +namespace tu_ns = dpctl::tensor::type_utils; + +template +struct CopysignFunctor +{ + + using supports_sg_loadstore = std::true_type; + using supports_vec = std::true_type; + + resT operator()(const argT1 &in1, const argT2 &in2) const + { + return sycl::copysign(in1, in2); + } + + template + sycl::vec + operator()(const sycl::vec &in1, + const sycl::vec &in2) const + { + auto tmp = sycl::copysign(in1, in2); + if constexpr (std::is_same_v) { + return tmp; + } + else { + using dpctl::tensor::type_utils::vec_cast; + + return vec_cast( + tmp); + } + } +}; + +template +using CopysignContigFunctor = + elementwise_common::BinaryContigFunctor, + vec_sz, + n_vecs, + enable_sg_loadstore>; + +template +using CopysignStridedFunctor = elementwise_common::BinaryStridedFunctor< + argT1, + argT2, + resT, + IndexerT, + CopysignFunctor>; + +template +struct CopysignOutputType +{ + using value_type = typename std::disjunction< + td_ns::BinaryTypeMapResultEntry, + td_ns::BinaryTypeMapResultEntry, + td_ns::BinaryTypeMapResultEntry, + td_ns::DefaultResultEntry>::result_type; + + static constexpr bool is_defined = !std::is_same_v; +}; + +namespace hyperparam_detail +{ + +namespace vsu_ns = dpctl::tensor::kernels::vec_size_utils; + +using vsu_ns::BinaryContigHyperparameterSetEntry; +using vsu_ns::ContigHyperparameterSetDefault; + +template +struct CopysignContigHyperparameterSet +{ + using value_type = + typename std::disjunction>; + + constexpr static auto vec_sz = value_type::vec_sz; + constexpr static auto n_vecs = value_type::n_vecs; +}; + +} // end of namespace hyperparam_detail + +template +class copysign_contig_kernel; + +template +sycl::event copysign_contig_impl(sycl::queue &exec_q, + std::size_t nelems, + const char *arg1_p, + ssize_t arg1_offset, + const char *arg2_p, + ssize_t arg2_offset, + char *res_p, + ssize_t res_offset, + const std::vector &depends = {}) +{ + using CopySignHS = + hyperparam_detail::CopysignContigHyperparameterSet; + static constexpr std::uint8_t vec_sz = CopySignHS::vec_sz; + static constexpr std::uint8_t n_vecs = CopySignHS::n_vecs; + + return elementwise_common::binary_contig_impl< + argTy1, argTy2, CopysignOutputType, CopysignContigFunctor, + copysign_contig_kernel, vec_sz, n_vecs>( + exec_q, nelems, arg1_p, arg1_offset, arg2_p, arg2_offset, res_p, + res_offset, depends); +} + +template +struct CopysignContigFactory +{ + fnT get() + { + if constexpr (!CopysignOutputType::is_defined) { + fnT fn = nullptr; + return fn; + } + else { + fnT fn = copysign_contig_impl; + return fn; + } + } +}; + +template +struct CopysignTypeMapFactory +{ + /*! @brief get typeid for output type of divide(T1 x, T2 y) */ + std::enable_if_t::value, int> get() + { + using rT = typename CopysignOutputType::value_type; + return td_ns::GetTypeid{}.get(); + } +}; + +template +class copysign_strided_kernel; + +template +sycl::event + copysign_strided_impl(sycl::queue &exec_q, + std::size_t nelems, + int nd, + const ssize_t *shape_and_strides, + const char *arg1_p, + ssize_t arg1_offset, + const char *arg2_p, + ssize_t arg2_offset, + char *res_p, + ssize_t res_offset, + const std::vector &depends, + const std::vector &additional_depends) +{ + return elementwise_common::binary_strided_impl< + argTy1, argTy2, CopysignOutputType, CopysignStridedFunctor, + copysign_strided_kernel>(exec_q, nelems, nd, shape_and_strides, arg1_p, + arg1_offset, arg2_p, arg2_offset, res_p, + res_offset, depends, additional_depends); +} + +template +struct CopysignStridedFactory +{ + fnT get() + { + if constexpr (!CopysignOutputType::is_defined) { + fnT fn = nullptr; + return fn; + } + else { + fnT fn = copysign_strided_impl; + return fn; + } + } +}; + +} // namespace dpctl::tensor::kernels::copysign diff --git a/dpctl_ext/tensor/libtensor/include/kernels/elementwise_functions/logical_and.hpp b/dpctl_ext/tensor/libtensor/include/kernels/elementwise_functions/logical_and.hpp new file mode 100644 index 00000000000..8149c2e2eae --- /dev/null +++ b/dpctl_ext/tensor/libtensor/include/kernels/elementwise_functions/logical_and.hpp @@ -0,0 +1,291 @@ +//***************************************************************************** +// Copyright (c) 2026, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// - Neither the name of the copyright holder nor the names of its contributors +// may be used to endorse or promote products derived from this software +// without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** +// +//===---------------------------------------------------------------------===// +/// +/// \file +/// This file defines kernels for elementwise evaluation of comparison of +/// tensor elements. +//===---------------------------------------------------------------------===// + +#pragma once +#include +#include +#include +#include +#include + +#include + +#include "vec_size_util.hpp" + +#include "utils/offset_utils.hpp" +#include "utils/type_dispatch_building.hpp" +#include "utils/type_utils.hpp" + +#include "kernels/dpctl_tensor_types.hpp" +#include "kernels/elementwise_functions/common.hpp" + +namespace dpctl::tensor::kernels::logical_and +{ + +using dpctl::tensor::ssize_t; +namespace td_ns = dpctl::tensor::type_dispatch; +namespace tu_ns = dpctl::tensor::type_utils; + +template +struct LogicalAndFunctor +{ + static_assert(std::is_same_v); + + using supports_sg_loadstore = std::negation< + std::disjunction, tu_ns::is_complex>>; + using supports_vec = std::conjunction< + std::is_same, + std::negation, + tu_ns::is_complex>>>; + + resT operator()(const argT1 &in1, const argT2 &in2) const + { + using tu_ns::convert_impl; + + return (convert_impl(in1) && + convert_impl(in2)); + } + + template + sycl::vec + operator()(const sycl::vec &in1, + const sycl::vec &in2) const + { + + auto tmp = (in1 && in2); + if constexpr (std::is_same_v) { + return tmp; + } + else { + using dpctl::tensor::type_utils::vec_cast; + + return vec_cast( + tmp); + } + } +}; + +template +using LogicalAndContigFunctor = elementwise_common::BinaryContigFunctor< + argT1, + argT2, + resT, + LogicalAndFunctor, + vec_sz, + n_vecs, + enable_sg_loadstore>; + +template +using LogicalAndStridedFunctor = elementwise_common::BinaryStridedFunctor< + argT1, + argT2, + resT, + IndexerT, + LogicalAndFunctor>; + +template +struct LogicalAndOutputType +{ + using value_type = typename std::disjunction< + td_ns::BinaryTypeMapResultEntry, + td_ns:: + BinaryTypeMapResultEntry, + td_ns::BinaryTypeMapResultEntry, + td_ns::BinaryTypeMapResultEntry, + td_ns:: + BinaryTypeMapResultEntry, + td_ns::BinaryTypeMapResultEntry, + td_ns:: + BinaryTypeMapResultEntry, + td_ns::BinaryTypeMapResultEntry, + td_ns:: + BinaryTypeMapResultEntry, + td_ns::BinaryTypeMapResultEntry, + td_ns::BinaryTypeMapResultEntry, + td_ns::BinaryTypeMapResultEntry, + td_ns::BinaryTypeMapResultEntry, + T2, + std::complex, + bool>, + td_ns::BinaryTypeMapResultEntry, + T2, + std::complex, + bool>, + td_ns::DefaultResultEntry>::result_type; + + static constexpr bool is_defined = !std::is_same_v; +}; + +namespace hyperparam_detail +{ + +namespace vsu_ns = dpctl::tensor::kernels::vec_size_utils; + +using vsu_ns::BinaryContigHyperparameterSetEntry; +using vsu_ns::ContigHyperparameterSetDefault; + +template +struct LogicalAndContigHyperparameterSet +{ + using value_type = + typename std::disjunction>; + + constexpr static auto vec_sz = value_type::vec_sz; + constexpr static auto n_vecs = value_type::n_vecs; +}; + +} // end of namespace hyperparam_detail + +template +class logical_and_contig_kernel; + +template +sycl::event + logical_and_contig_impl(sycl::queue &exec_q, + std::size_t nelems, + const char *arg1_p, + ssize_t arg1_offset, + const char *arg2_p, + ssize_t arg2_offset, + char *res_p, + ssize_t res_offset, + const std::vector &depends = {}) +{ + using LogicalAndHS = + hyperparam_detail::LogicalAndContigHyperparameterSet; + static constexpr std::uint8_t vec_sz = LogicalAndHS::vec_sz; + static constexpr std::uint8_t n_vecs = LogicalAndHS::n_vecs; + + return elementwise_common::binary_contig_impl< + argTy1, argTy2, LogicalAndOutputType, LogicalAndContigFunctor, + logical_and_contig_kernel, vec_sz, n_vecs>( + exec_q, nelems, arg1_p, arg1_offset, arg2_p, arg2_offset, res_p, + res_offset, depends); +} + +template +struct LogicalAndContigFactory +{ + fnT get() + { + if constexpr (!LogicalAndOutputType::is_defined) { + fnT fn = nullptr; + return fn; + } + else { + fnT fn = logical_and_contig_impl; + return fn; + } + } +}; + +template +struct LogicalAndTypeMapFactory +{ + /*! @brief get typeid for output type of operator()>(x, y), always bool + */ + std::enable_if_t::value, int> get() + { + using rT = typename LogicalAndOutputType::value_type; + return td_ns::GetTypeid{}.get(); + } +}; + +template +class logical_and_strided_kernel; + +template +sycl::event + logical_and_strided_impl(sycl::queue &exec_q, + std::size_t nelems, + int nd, + const ssize_t *shape_and_strides, + const char *arg1_p, + ssize_t arg1_offset, + const char *arg2_p, + ssize_t arg2_offset, + char *res_p, + ssize_t res_offset, + const std::vector &depends, + const std::vector &additional_depends) +{ + return elementwise_common::binary_strided_impl< + argTy1, argTy2, LogicalAndOutputType, LogicalAndStridedFunctor, + logical_and_strided_kernel>( + exec_q, nelems, nd, shape_and_strides, arg1_p, arg1_offset, arg2_p, + arg2_offset, res_p, res_offset, depends, additional_depends); +} + +template +struct LogicalAndStridedFactory +{ + fnT get() + { + if constexpr (!LogicalAndOutputType::is_defined) { + fnT fn = nullptr; + return fn; + } + else { + fnT fn = logical_and_strided_impl; + return fn; + } + } +}; + +} // namespace dpctl::tensor::kernels::logical_and diff --git a/dpctl_ext/tensor/libtensor/include/kernels/elementwise_functions/logical_or.hpp b/dpctl_ext/tensor/libtensor/include/kernels/elementwise_functions/logical_or.hpp new file mode 100644 index 00000000000..e2e609b487e --- /dev/null +++ b/dpctl_ext/tensor/libtensor/include/kernels/elementwise_functions/logical_or.hpp @@ -0,0 +1,290 @@ +//***************************************************************************** +// Copyright (c) 2026, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// - Neither the name of the copyright holder nor the names of its contributors +// may be used to endorse or promote products derived from this software +// without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** +// +//===---------------------------------------------------------------------===// +/// +/// \file +/// This file defines kernels for elementwise evaluation of comparison of +/// tensor elements. +//===---------------------------------------------------------------------===// + +#pragma once +#include +#include +#include +#include +#include + +#include + +#include "vec_size_util.hpp" + +#include "utils/offset_utils.hpp" +#include "utils/type_dispatch_building.hpp" +#include "utils/type_utils.hpp" + +#include "kernels/dpctl_tensor_types.hpp" +#include "kernels/elementwise_functions/common.hpp" + +namespace dpctl::tensor::kernels::logical_or +{ + +using dpctl::tensor::ssize_t; +namespace td_ns = dpctl::tensor::type_dispatch; +namespace tu_ns = dpctl::tensor::type_utils; + +template +struct LogicalOrFunctor +{ + static_assert(std::is_same_v); + + using supports_sg_loadstore = std::negation< + std::disjunction, tu_ns::is_complex>>; + using supports_vec = std::conjunction< + std::is_same, + std::negation, + tu_ns::is_complex>>>; + + resT operator()(const argT1 &in1, const argT2 &in2) const + { + using tu_ns::convert_impl; + + return (convert_impl(in1) || + convert_impl(in2)); + } + + template + sycl::vec + operator()(const sycl::vec &in1, + const sycl::vec &in2) const + { + + auto tmp = (in1 || in2); + if constexpr (std::is_same_v) { + return tmp; + } + else { + using dpctl::tensor::type_utils::vec_cast; + + return vec_cast( + tmp); + } + } +}; + +template +using LogicalOrContigFunctor = elementwise_common::BinaryContigFunctor< + argT1, + argT2, + resT, + LogicalOrFunctor, + vec_sz, + n_vecs, + enable_sg_loadstore>; + +template +using LogicalOrStridedFunctor = elementwise_common::BinaryStridedFunctor< + argT1, + argT2, + resT, + IndexerT, + LogicalOrFunctor>; + +template +struct LogicalOrOutputType +{ + using value_type = typename std::disjunction< + td_ns::BinaryTypeMapResultEntry, + td_ns:: + BinaryTypeMapResultEntry, + td_ns::BinaryTypeMapResultEntry, + td_ns::BinaryTypeMapResultEntry, + td_ns:: + BinaryTypeMapResultEntry, + td_ns::BinaryTypeMapResultEntry, + td_ns:: + BinaryTypeMapResultEntry, + td_ns::BinaryTypeMapResultEntry, + td_ns:: + BinaryTypeMapResultEntry, + td_ns::BinaryTypeMapResultEntry, + td_ns::BinaryTypeMapResultEntry, + td_ns::BinaryTypeMapResultEntry, + td_ns::BinaryTypeMapResultEntry, + T2, + std::complex, + bool>, + td_ns::BinaryTypeMapResultEntry, + T2, + std::complex, + bool>, + td_ns::DefaultResultEntry>::result_type; + + static constexpr bool is_defined = !std::is_same_v; +}; + +namespace hyperparam_detail +{ + +namespace vsu_ns = dpctl::tensor::kernels::vec_size_utils; + +using vsu_ns::BinaryContigHyperparameterSetEntry; +using vsu_ns::ContigHyperparameterSetDefault; + +template +struct LogicalOrContigHyperparameterSet +{ + using value_type = + typename std::disjunction>; + + constexpr static auto vec_sz = value_type::vec_sz; + constexpr static auto n_vecs = value_type::n_vecs; +}; + +} // end of namespace hyperparam_detail + +template +class logical_or_contig_kernel; + +template +sycl::event logical_or_contig_impl(sycl::queue &exec_q, + std::size_t nelems, + const char *arg1_p, + ssize_t arg1_offset, + const char *arg2_p, + ssize_t arg2_offset, + char *res_p, + ssize_t res_offset, + const std::vector &depends = {}) +{ + using LogicalOrHS = + hyperparam_detail::LogicalOrContigHyperparameterSet; + static constexpr std::uint8_t vec_sz = LogicalOrHS::vec_sz; + static constexpr std::uint8_t n_vecs = LogicalOrHS::n_vecs; + + return elementwise_common::binary_contig_impl< + argTy1, argTy2, LogicalOrOutputType, LogicalOrContigFunctor, + logical_or_contig_kernel, vec_sz, n_vecs>( + exec_q, nelems, arg1_p, arg1_offset, arg2_p, arg2_offset, res_p, + res_offset, depends); +} + +template +struct LogicalOrContigFactory +{ + fnT get() + { + if constexpr (!LogicalOrOutputType::is_defined) { + fnT fn = nullptr; + return fn; + } + else { + fnT fn = logical_or_contig_impl; + return fn; + } + } +}; + +template +struct LogicalOrTypeMapFactory +{ + /*! @brief get typeid for output type of operator()>(x, y), always bool + */ + std::enable_if_t::value, int> get() + { + using rT = typename LogicalOrOutputType::value_type; + return td_ns::GetTypeid{}.get(); + } +}; + +template +class logical_or_strided_kernel; + +template +sycl::event + logical_or_strided_impl(sycl::queue &exec_q, + std::size_t nelems, + int nd, + const ssize_t *shape_and_strides, + const char *arg1_p, + ssize_t arg1_offset, + const char *arg2_p, + ssize_t arg2_offset, + char *res_p, + ssize_t res_offset, + const std::vector &depends, + const std::vector &additional_depends) +{ + return elementwise_common::binary_strided_impl< + argTy1, argTy2, LogicalOrOutputType, LogicalOrStridedFunctor, + logical_or_strided_kernel>( + exec_q, nelems, nd, shape_and_strides, arg1_p, arg1_offset, arg2_p, + arg2_offset, res_p, res_offset, depends, additional_depends); +} + +template +struct LogicalOrStridedFactory +{ + fnT get() + { + if constexpr (!LogicalOrOutputType::is_defined) { + fnT fn = nullptr; + return fn; + } + else { + fnT fn = logical_or_strided_impl; + return fn; + } + } +}; + +} // namespace dpctl::tensor::kernels::logical_or diff --git a/dpctl_ext/tensor/libtensor/include/kernels/elementwise_functions/logical_xor.hpp b/dpctl_ext/tensor/libtensor/include/kernels/elementwise_functions/logical_xor.hpp new file mode 100644 index 00000000000..8f725e090b1 --- /dev/null +++ b/dpctl_ext/tensor/libtensor/include/kernels/elementwise_functions/logical_xor.hpp @@ -0,0 +1,292 @@ +//***************************************************************************** +// Copyright (c) 2026, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// - Neither the name of the copyright holder nor the names of its contributors +// may be used to endorse or promote products derived from this software +// without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** +// +//===---------------------------------------------------------------------===// +/// +/// \file +/// This file defines kernels for elementwise evaluation of comparison of +/// tensor elements. +//===---------------------------------------------------------------------===// + +#pragma once +#include +#include +#include +#include +#include + +#include + +#include "vec_size_util.hpp" + +#include "utils/offset_utils.hpp" +#include "utils/type_dispatch_building.hpp" +#include "utils/type_utils.hpp" + +#include "kernels/dpctl_tensor_types.hpp" +#include "kernels/elementwise_functions/common.hpp" + +namespace dpctl::tensor::kernels::logical_xor +{ + +using dpctl::tensor::ssize_t; +namespace td_ns = dpctl::tensor::type_dispatch; +namespace tu_ns = dpctl::tensor::type_utils; + +template +struct LogicalXorFunctor +{ + static_assert(std::is_same_v); + + using supports_sg_loadstore = std::negation< + std::disjunction, tu_ns::is_complex>>; + using supports_vec = std::conjunction< + std::is_same, + std::negation, + tu_ns::is_complex>>>; + + resT operator()(const argT1 &in1, const argT2 &in2) const + { + using tu_ns::convert_impl; + + return (convert_impl(in1) != + convert_impl(in2)); + } + + template + sycl::vec + operator()(const sycl::vec &in1, + const sycl::vec &in2) const + { + using tu_ns::vec_cast; + auto tmp1 = vec_cast(in1); + auto tmp2 = vec_cast(in2); + + auto tmp = (tmp1 != tmp2); + if constexpr (std::is_same_v) { + return tmp; + } + else { + return vec_cast( + tmp); + } + } +}; + +template +using LogicalXorContigFunctor = elementwise_common::BinaryContigFunctor< + argT1, + argT2, + resT, + LogicalXorFunctor, + vec_sz, + n_vecs, + enable_sg_loadstore>; + +template +using LogicalXorStridedFunctor = elementwise_common::BinaryStridedFunctor< + argT1, + argT2, + resT, + IndexerT, + LogicalXorFunctor>; + +template +struct LogicalXorOutputType +{ + using value_type = typename std::disjunction< + td_ns::BinaryTypeMapResultEntry, + td_ns:: + BinaryTypeMapResultEntry, + td_ns::BinaryTypeMapResultEntry, + td_ns::BinaryTypeMapResultEntry, + td_ns:: + BinaryTypeMapResultEntry, + td_ns::BinaryTypeMapResultEntry, + td_ns:: + BinaryTypeMapResultEntry, + td_ns::BinaryTypeMapResultEntry, + td_ns:: + BinaryTypeMapResultEntry, + td_ns::BinaryTypeMapResultEntry, + td_ns::BinaryTypeMapResultEntry, + td_ns::BinaryTypeMapResultEntry, + td_ns::BinaryTypeMapResultEntry, + T2, + std::complex, + bool>, + td_ns::BinaryTypeMapResultEntry, + T2, + std::complex, + bool>, + td_ns::DefaultResultEntry>::result_type; + + static constexpr bool is_defined = !std::is_same_v; +}; + +namespace hyperparam_detail +{ + +namespace vsu_ns = dpctl::tensor::kernels::vec_size_utils; + +using vsu_ns::BinaryContigHyperparameterSetEntry; +using vsu_ns::ContigHyperparameterSetDefault; + +template +struct LogicalXorContigHyperparameterSet +{ + using value_type = + typename std::disjunction>; + + constexpr static auto vec_sz = value_type::vec_sz; + constexpr static auto n_vecs = value_type::n_vecs; +}; + +} // end of namespace hyperparam_detail + +template +class logical_xor_contig_kernel; + +template +sycl::event + logical_xor_contig_impl(sycl::queue &exec_q, + std::size_t nelems, + const char *arg1_p, + ssize_t arg1_offset, + const char *arg2_p, + ssize_t arg2_offset, + char *res_p, + ssize_t res_offset, + const std::vector &depends = {}) +{ + using LogicalXorHS = + hyperparam_detail::LogicalXorContigHyperparameterSet; + static constexpr std::uint8_t vec_sz = LogicalXorHS::vec_sz; + static constexpr std::uint8_t n_vecs = LogicalXorHS::n_vecs; + + return elementwise_common::binary_contig_impl< + argTy1, argTy2, LogicalXorOutputType, LogicalXorContigFunctor, + logical_xor_contig_kernel, vec_sz, n_vecs>( + exec_q, nelems, arg1_p, arg1_offset, arg2_p, arg2_offset, res_p, + res_offset, depends); +} + +template +struct LogicalXorContigFactory +{ + fnT get() + { + if constexpr (!LogicalXorOutputType::is_defined) { + fnT fn = nullptr; + return fn; + } + else { + fnT fn = logical_xor_contig_impl; + return fn; + } + } +}; + +template +struct LogicalXorTypeMapFactory +{ + /*! @brief get typeid for output type of operator()>(x, y), always bool + */ + std::enable_if_t::value, int> get() + { + using rT = typename LogicalXorOutputType::value_type; + return td_ns::GetTypeid{}.get(); + } +}; + +template +class logical_xor_strided_kernel; + +template +sycl::event + logical_xor_strided_impl(sycl::queue &exec_q, + std::size_t nelems, + int nd, + const ssize_t *shape_and_strides, + const char *arg1_p, + ssize_t arg1_offset, + const char *arg2_p, + ssize_t arg2_offset, + char *res_p, + ssize_t res_offset, + const std::vector &depends, + const std::vector &additional_depends) +{ + return elementwise_common::binary_strided_impl< + argTy1, argTy2, LogicalXorOutputType, LogicalXorStridedFunctor, + logical_xor_strided_kernel>( + exec_q, nelems, nd, shape_and_strides, arg1_p, arg1_offset, arg2_p, + arg2_offset, res_p, res_offset, depends, additional_depends); +} + +template +struct LogicalXorStridedFactory +{ + fnT get() + { + if constexpr (!LogicalXorOutputType::is_defined) { + fnT fn = nullptr; + return fn; + } + else { + fnT fn = logical_xor_strided_impl; + return fn; + } + } +}; + +} // namespace dpctl::tensor::kernels::logical_xor diff --git a/dpctl_ext/tensor/libtensor/include/kernels/elementwise_functions/maximum.hpp b/dpctl_ext/tensor/libtensor/include/kernels/elementwise_functions/maximum.hpp index f204b664004..af6f95863e6 100644 --- a/dpctl_ext/tensor/libtensor/include/kernels/elementwise_functions/maximum.hpp +++ b/dpctl_ext/tensor/libtensor/include/kernels/elementwise_functions/maximum.hpp @@ -25,6 +25,8 @@ // ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF // THE POSSIBILITY OF SUCH DAMAGE. //***************************************************************************** +// +//===---------------------------------------------------------------------===// /// /// \file /// This file defines kernels for elementwise evaluation of MAXIMUM(x1, x2) @@ -52,6 +54,7 @@ namespace dpctl::tensor::kernels::maximum { + using dpctl::tensor::ssize_t; namespace td_ns = dpctl::tensor::type_dispatch; namespace tu_ns = dpctl::tensor::type_utils; diff --git a/dpctl_ext/tensor/libtensor/include/kernels/elementwise_functions/minimum.hpp b/dpctl_ext/tensor/libtensor/include/kernels/elementwise_functions/minimum.hpp index d18577a5cf4..5541e850485 100644 --- a/dpctl_ext/tensor/libtensor/include/kernels/elementwise_functions/minimum.hpp +++ b/dpctl_ext/tensor/libtensor/include/kernels/elementwise_functions/minimum.hpp @@ -25,6 +25,8 @@ // ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF // THE POSSIBILITY OF SUCH DAMAGE. //***************************************************************************** +// +//===---------------------------------------------------------------------===// /// /// \file /// This file defines kernels for elementwise evaluation of MINIMUM(x1, x2) @@ -51,6 +53,7 @@ namespace dpctl::tensor::kernels::minimum { + using dpctl::tensor::ssize_t; namespace td_ns = dpctl::tensor::type_dispatch; namespace tu_ns = dpctl::tensor::type_utils; diff --git a/dpctl_ext/tensor/libtensor/include/kernels/elementwise_functions/multiply.hpp b/dpctl_ext/tensor/libtensor/include/kernels/elementwise_functions/multiply.hpp new file mode 100644 index 00000000000..587a05106ea --- /dev/null +++ b/dpctl_ext/tensor/libtensor/include/kernels/elementwise_functions/multiply.hpp @@ -0,0 +1,648 @@ +//***************************************************************************** +// Copyright (c) 2026, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// - Neither the name of the copyright holder nor the names of its contributors +// may be used to endorse or promote products derived from this software +// without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** +// +//===---------------------------------------------------------------------===// +/// +/// \file +/// This file defines kernels for elementwise evaluation of MUL(x1, x2) +/// function. +//===---------------------------------------------------------------------===// + +#pragma once +#include +#include +#include +#include +#include + +#include + +#include "sycl_complex.hpp" +#include "vec_size_util.hpp" + +#include "utils/offset_utils.hpp" +#include "utils/type_dispatch_building.hpp" +#include "utils/type_utils.hpp" + +#include "kernels/dpctl_tensor_types.hpp" +#include "kernels/elementwise_functions/common.hpp" +#include "kernels/elementwise_functions/common_inplace.hpp" + +namespace dpctl::tensor::kernels::multiply +{ + +using dpctl::tensor::ssize_t; +namespace td_ns = dpctl::tensor::type_dispatch; +namespace tu_ns = dpctl::tensor::type_utils; + +template +struct MultiplyFunctor +{ + + using supports_sg_loadstore = std::negation< + std::disjunction, tu_ns::is_complex>>; + using supports_vec = std::negation< + std::disjunction, tu_ns::is_complex>>; + + resT operator()(const argT1 &in1, const argT2 &in2) const + { + if constexpr (tu_ns::is_complex::value && + tu_ns::is_complex::value) + { + using realT1 = typename argT1::value_type; + using realT2 = typename argT2::value_type; + + return exprm_ns::complex(in1) * + exprm_ns::complex(in2); + } + else { + return in1 * in2; + } + } + + template + sycl::vec + operator()(const sycl::vec &in1, + const sycl::vec &in2) const + { + auto tmp = in1 * in2; + if constexpr (std::is_same_v) { + return tmp; + } + else { + using dpctl::tensor::type_utils::vec_cast; + + return vec_cast( + tmp); + } + } +}; + +template +using MultiplyContigFunctor = + elementwise_common::BinaryContigFunctor, + vec_sz, + n_vecs, + enable_sg_loadstore>; + +template +using MultiplyStridedFunctor = elementwise_common::BinaryStridedFunctor< + argT1, + argT2, + resT, + IndexerT, + MultiplyFunctor>; + +template +struct MultiplyOutputType +{ + using value_type = typename std::disjunction< + td_ns::BinaryTypeMapResultEntry, + td_ns::BinaryTypeMapResultEntry, + td_ns::BinaryTypeMapResultEntry, + td_ns::BinaryTypeMapResultEntry, + td_ns::BinaryTypeMapResultEntry, + td_ns::BinaryTypeMapResultEntry, + td_ns::BinaryTypeMapResultEntry, + td_ns::BinaryTypeMapResultEntry, + td_ns::BinaryTypeMapResultEntry, + td_ns::BinaryTypeMapResultEntry, + td_ns::BinaryTypeMapResultEntry, + td_ns::BinaryTypeMapResultEntry, + td_ns::BinaryTypeMapResultEntry, + T2, + std::complex, + std::complex>, + td_ns::BinaryTypeMapResultEntry, + T2, + std::complex, + std::complex>, + td_ns::DefaultResultEntry>::result_type; + + static constexpr bool is_defined = !std::is_same_v; +}; + +namespace hyperparam_detail +{ + +namespace vsu_ns = dpctl::tensor::kernels::vec_size_utils; + +using vsu_ns::BinaryContigHyperparameterSetEntry; +using vsu_ns::ContigHyperparameterSetDefault; + +template +struct MultiplyContigHyperparameterSet +{ + using value_type = + typename std::disjunction>; + + constexpr static auto vec_sz = value_type::vec_sz; + constexpr static auto n_vecs = value_type::n_vecs; +}; + +} // end of namespace hyperparam_detail + +template +class multiply_contig_kernel; + +template +sycl::event multiply_contig_impl(sycl::queue &exec_q, + std::size_t nelems, + const char *arg1_p, + ssize_t arg1_offset, + const char *arg2_p, + ssize_t arg2_offset, + char *res_p, + ssize_t res_offset, + const std::vector &depends = {}) +{ + using MulHS = + hyperparam_detail::MultiplyContigHyperparameterSet; + static constexpr std::uint8_t vec_sz = MulHS::vec_sz; + static constexpr std::uint8_t n_vecs = MulHS::n_vecs; + + return elementwise_common::binary_contig_impl< + argTy1, argTy2, MultiplyOutputType, MultiplyContigFunctor, + multiply_contig_kernel, vec_sz, n_vecs>( + exec_q, nelems, arg1_p, arg1_offset, arg2_p, arg2_offset, res_p, + res_offset, depends); +} + +template +struct MultiplyContigFactory +{ + fnT get() + { + if constexpr (!MultiplyOutputType::is_defined) { + fnT fn = nullptr; + return fn; + } + else { + fnT fn = multiply_contig_impl; + return fn; + } + } +}; + +template +struct MultiplyTypeMapFactory +{ + /*! @brief get typeid for output type of multiply(T1 x, T2 y) */ + std::enable_if_t::value, int> get() + { + using rT = typename MultiplyOutputType::value_type; + return td_ns::GetTypeid{}.get(); + } +}; + +template +class multiply_strided_kernel; + +template +sycl::event + multiply_strided_impl(sycl::queue &exec_q, + std::size_t nelems, + int nd, + const ssize_t *shape_and_strides, + const char *arg1_p, + ssize_t arg1_offset, + const char *arg2_p, + ssize_t arg2_offset, + char *res_p, + ssize_t res_offset, + const std::vector &depends, + const std::vector &additional_depends) +{ + return elementwise_common::binary_strided_impl< + argTy1, argTy2, MultiplyOutputType, MultiplyStridedFunctor, + multiply_strided_kernel>(exec_q, nelems, nd, shape_and_strides, arg1_p, + arg1_offset, arg2_p, arg2_offset, res_p, + res_offset, depends, additional_depends); +} + +template +struct MultiplyStridedFactory +{ + fnT get() + { + if constexpr (!MultiplyOutputType::is_defined) { + fnT fn = nullptr; + return fn; + } + else { + fnT fn = multiply_strided_impl; + return fn; + } + } +}; + +template +class multiply_matrix_row_broadcast_sg_krn; + +template +using MultiplyContigMatrixContigRowBroadcastingFunctor = + elementwise_common::BinaryContigMatrixContigRowBroadcastingFunctor< + argT1, + argT2, + resT, + MultiplyFunctor>; + +template +sycl::event multiply_contig_matrix_contig_row_broadcast_impl( + sycl::queue &exec_q, + std::vector &host_tasks, + std::size_t n0, + std::size_t n1, + const char *mat_p, // typeless pointer to (n0, n1) C-contiguous matrix + ssize_t mat_offset, + const char *vec_p, // typeless pointer to (n1,) contiguous row + ssize_t vec_offset, + char *res_p, // typeless pointer to (n0, n1) result C-contig. matrix, + // res[i,j] = mat[i,j] * vec[j] + ssize_t res_offset, + const std::vector &depends = {}) +{ + return elementwise_common::binary_contig_matrix_contig_row_broadcast_impl< + argT1, argT2, resT, MultiplyContigMatrixContigRowBroadcastingFunctor, + multiply_matrix_row_broadcast_sg_krn>(exec_q, host_tasks, n0, n1, mat_p, + mat_offset, vec_p, vec_offset, + res_p, res_offset, depends); +} + +template +struct MultiplyContigMatrixContigRowBroadcastFactory +{ + fnT get() + { + if constexpr (!MultiplyOutputType::is_defined) { + fnT fn = nullptr; + return fn; + } + else { + using resT = typename MultiplyOutputType::value_type; + if constexpr (dpctl::tensor::type_utils::is_complex::value || + dpctl::tensor::type_utils::is_complex::value || + dpctl::tensor::type_utils::is_complex::value) + { + fnT fn = nullptr; + return fn; + } + else { + fnT fn = + multiply_contig_matrix_contig_row_broadcast_impl; + return fn; + } + } + } +}; + +template +sycl::event multiply_contig_row_contig_matrix_broadcast_impl( + sycl::queue &exec_q, + std::vector &host_tasks, + std::size_t n0, + std::size_t n1, + const char *vec_p, // typeless pointer to (n1,) contiguous row + ssize_t vec_offset, + const char *mat_p, // typeless pointer to (n0, n1) C-contiguous matrix + ssize_t mat_offset, + char *res_p, // typeless pointer to (n0, n1) result C-contig. matrix, + // res[i,j] = mat[i,j] * vec[j] + ssize_t res_offset, + const std::vector &depends = {}) +{ + return multiply_contig_matrix_contig_row_broadcast_impl( + exec_q, host_tasks, n0, n1, mat_p, mat_offset, vec_p, vec_offset, res_p, + res_offset, depends); +}; + +template +struct MultiplyContigRowContigMatrixBroadcastFactory +{ + fnT get() + { + if constexpr (!MultiplyOutputType::is_defined) { + fnT fn = nullptr; + return fn; + } + else { + using resT = typename MultiplyOutputType::value_type; + if constexpr (dpctl::tensor::type_utils::is_complex::value || + dpctl::tensor::type_utils::is_complex::value || + dpctl::tensor::type_utils::is_complex::value) + { + fnT fn = nullptr; + return fn; + } + else { + fnT fn = + multiply_contig_row_contig_matrix_broadcast_impl; + return fn; + } + } + } +}; + +template +struct MultiplyInplaceFunctor +{ + + using supports_sg_loadstore = std::negation< + std::disjunction, tu_ns::is_complex>>; + using supports_vec = std::negation< + std::disjunction, tu_ns::is_complex>>; + + void operator()(resT &res, const argT &in) + { + res *= in; + } + + template + void operator()(sycl::vec &res, + const sycl::vec &in) + { + res *= in; + } +}; + +template +using MultiplyInplaceContigFunctor = + elementwise_common::BinaryInplaceContigFunctor< + argT, + resT, + MultiplyInplaceFunctor, + vec_sz, + n_vecs, + enable_sg_loadstore>; + +template +using MultiplyInplaceStridedFunctor = + elementwise_common::BinaryInplaceStridedFunctor< + argT, + resT, + IndexerT, + MultiplyInplaceFunctor>; + +template +class multiply_inplace_contig_kernel; + +/* @brief Types supported by in-place multiplication */ +template +struct MultiplyInplaceTypePairSupport +{ + /* value if true a kernel for must be instantiated */ + static constexpr bool is_defined = std::disjunction< + td_ns::TypePairDefinedEntry, + td_ns::TypePairDefinedEntry, + td_ns::TypePairDefinedEntry, + td_ns::TypePairDefinedEntry, + td_ns::TypePairDefinedEntry, + td_ns::TypePairDefinedEntry, + td_ns::TypePairDefinedEntry, + td_ns::TypePairDefinedEntry, + td_ns::TypePairDefinedEntry, + td_ns::TypePairDefinedEntry, + td_ns::TypePairDefinedEntry, + td_ns::TypePairDefinedEntry, + td_ns::TypePairDefinedEntry, + resTy, + std::complex>, + td_ns::TypePairDefinedEntry, + resTy, + std::complex>, + // fall-through + td_ns::NotDefinedEntry>::is_defined; +}; + +template +struct MultiplyInplaceTypeMapFactory +{ + /*! @brief get typeid for output type of x *= y */ + std::enable_if_t::value, int> get() + { + if constexpr (MultiplyInplaceTypePairSupport::is_defined) { + return td_ns::GetTypeid{}.get(); + } + else { + return td_ns::GetTypeid{}.get(); + } + } +}; + +template +sycl::event + multiply_inplace_contig_impl(sycl::queue &exec_q, + std::size_t nelems, + const char *arg_p, + ssize_t arg_offset, + char *res_p, + ssize_t res_offset, + const std::vector &depends = {}) +{ + using MulHS = + hyperparam_detail::MultiplyContigHyperparameterSet; + static constexpr std::uint8_t vec_sz = MulHS::vec_sz; + static constexpr std::uint8_t n_vecs = MulHS::n_vecs; + + return elementwise_common::binary_inplace_contig_impl< + argTy, resTy, MultiplyInplaceContigFunctor, + multiply_inplace_contig_kernel, vec_sz, n_vecs>( + exec_q, nelems, arg_p, arg_offset, res_p, res_offset, depends); +} + +template +struct MultiplyInplaceContigFactory +{ + fnT get() + { + if constexpr (!MultiplyInplaceTypePairSupport::is_defined) { + fnT fn = nullptr; + return fn; + } + else { + fnT fn = multiply_inplace_contig_impl; + return fn; + } + } +}; + +template +class multiply_inplace_strided_kernel; + +template +sycl::event multiply_inplace_strided_impl( + sycl::queue &exec_q, + std::size_t nelems, + int nd, + const ssize_t *shape_and_strides, + const char *arg_p, + ssize_t arg_offset, + char *res_p, + ssize_t res_offset, + const std::vector &depends, + const std::vector &additional_depends) +{ + return elementwise_common::binary_inplace_strided_impl< + argTy, resTy, MultiplyInplaceStridedFunctor, + multiply_inplace_strided_kernel>(exec_q, nelems, nd, shape_and_strides, + arg_p, arg_offset, res_p, res_offset, + depends, additional_depends); +} + +template +struct MultiplyInplaceStridedFactory +{ + fnT get() + { + if constexpr (!MultiplyInplaceTypePairSupport::is_defined) { + fnT fn = nullptr; + return fn; + } + else { + fnT fn = multiply_inplace_strided_impl; + return fn; + } + } +}; + +template +class multiply_inplace_row_matrix_broadcast_sg_krn; + +template +using MultiplyInplaceRowMatrixBroadcastingFunctor = + elementwise_common::BinaryInplaceRowMatrixBroadcastingFunctor< + argT, + resT, + MultiplyInplaceFunctor>; + +template +sycl::event multiply_inplace_row_matrix_broadcast_impl( + sycl::queue &exec_q, + std::vector &host_tasks, + std::size_t n0, + std::size_t n1, + const char *vec_p, // typeless pointer to (n1,) contiguous row + ssize_t vec_offset, + char *mat_p, // typeless pointer to (n0, n1) C-contiguous matrix + ssize_t mat_offset, + const std::vector &depends = {}) +{ + return elementwise_common::binary_inplace_row_matrix_broadcast_impl< + argT, resT, MultiplyInplaceRowMatrixBroadcastingFunctor, + multiply_inplace_row_matrix_broadcast_sg_krn>( + exec_q, host_tasks, n0, n1, vec_p, vec_offset, mat_p, mat_offset, + depends); +} + +template +struct MultiplyInplaceRowMatrixBroadcastFactory +{ + fnT get() + { + if constexpr (!MultiplyInplaceTypePairSupport::is_defined) { + fnT fn = nullptr; + return fn; + } + else { + if constexpr (dpctl::tensor::type_utils::is_complex::value || + dpctl::tensor::type_utils::is_complex::value) + { + fnT fn = nullptr; + return fn; + } + else { + fnT fn = multiply_inplace_row_matrix_broadcast_impl; + return fn; + } + } + } +}; + +} // namespace dpctl::tensor::kernels::multiply diff --git a/dpctl_ext/tensor/libtensor/include/kernels/elementwise_functions/nextafter.hpp b/dpctl_ext/tensor/libtensor/include/kernels/elementwise_functions/nextafter.hpp new file mode 100644 index 00000000000..a703892a760 --- /dev/null +++ b/dpctl_ext/tensor/libtensor/include/kernels/elementwise_functions/nextafter.hpp @@ -0,0 +1,248 @@ +//***************************************************************************** +// Copyright (c) 2026, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// - Neither the name of the copyright holder nor the names of its contributors +// may be used to endorse or promote products derived from this software +// without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** +// +//===---------------------------------------------------------------------===// +/// +/// \file +/// This file defines kernels for elementwise evaluation of NEXTAFTER(x1, x2) +/// function. +//===---------------------------------------------------------------------===// + +#pragma once +#include +#include +#include +#include + +#include + +#include "vec_size_util.hpp" + +#include "utils/offset_utils.hpp" +#include "utils/type_dispatch_building.hpp" +#include "utils/type_utils.hpp" + +#include "kernels/dpctl_tensor_types.hpp" +#include "kernels/elementwise_functions/common.hpp" + +namespace dpctl::tensor::kernels::nextafter +{ + +using dpctl::tensor::ssize_t; +namespace td_ns = dpctl::tensor::type_dispatch; +namespace tu_ns = dpctl::tensor::type_utils; + +template +struct NextafterFunctor +{ + + using supports_sg_loadstore = std::true_type; + using supports_vec = std::true_type; + + resT operator()(const argT1 &in1, const argT2 &in2) const + { + return sycl::nextafter(in1, in2); + } + + template + sycl::vec + operator()(const sycl::vec &in1, + const sycl::vec &in2) const + { + auto res = sycl::nextafter(in1, in2); + if constexpr (std::is_same_v) { + return res; + } + else { + using dpctl::tensor::type_utils::vec_cast; + + return vec_cast( + res); + } + } +}; + +template +using NextafterContigFunctor = elementwise_common::BinaryContigFunctor< + argT1, + argT2, + resT, + NextafterFunctor, + vec_sz, + n_vecs, + enable_sg_loadstore>; + +template +using NextafterStridedFunctor = elementwise_common::BinaryStridedFunctor< + argT1, + argT2, + resT, + IndexerT, + NextafterFunctor>; + +template +struct NextafterOutputType +{ + using value_type = typename std::disjunction< + td_ns::BinaryTypeMapResultEntry, + td_ns::BinaryTypeMapResultEntry, + td_ns::BinaryTypeMapResultEntry, + td_ns::DefaultResultEntry>::result_type; + + static constexpr bool is_defined = !std::is_same_v; +}; + +namespace hyperparam_detail +{ + +namespace vsu_ns = dpctl::tensor::kernels::vec_size_utils; + +using vsu_ns::BinaryContigHyperparameterSetEntry; +using vsu_ns::ContigHyperparameterSetDefault; + +template +struct NextafterContigHyperparameterSet +{ + using value_type = + typename std::disjunction>; + + constexpr static auto vec_sz = value_type::vec_sz; + constexpr static auto n_vecs = value_type::n_vecs; +}; + +} // end of namespace hyperparam_detail + +template +class nextafter_contig_kernel; + +template +sycl::event nextafter_contig_impl(sycl::queue &exec_q, + std::size_t nelems, + const char *arg1_p, + ssize_t arg1_offset, + const char *arg2_p, + ssize_t arg2_offset, + char *res_p, + ssize_t res_offset, + const std::vector &depends = {}) +{ + using NextafterHS = + hyperparam_detail::NextafterContigHyperparameterSet; + static constexpr std::uint8_t vec_sz = NextafterHS::vec_sz; + static constexpr std::uint8_t n_vecs = NextafterHS::n_vecs; + + return elementwise_common::binary_contig_impl< + argTy1, argTy2, NextafterOutputType, NextafterContigFunctor, + nextafter_contig_kernel, vec_sz, n_vecs>( + exec_q, nelems, arg1_p, arg1_offset, arg2_p, arg2_offset, res_p, + res_offset, depends); +} + +template +struct NextafterContigFactory +{ + fnT get() + { + if constexpr (!NextafterOutputType::is_defined) { + fnT fn = nullptr; + return fn; + } + else { + fnT fn = nextafter_contig_impl; + return fn; + } + } +}; + +template +struct NextafterTypeMapFactory +{ + /*! @brief get typeid for output type of std::nextafter(T1 x, T2 y) */ + std::enable_if_t::value, int> get() + { + using rT = typename NextafterOutputType::value_type; + return td_ns::GetTypeid{}.get(); + } +}; + +template +class nextafter_strided_kernel; + +template +sycl::event + nextafter_strided_impl(sycl::queue &exec_q, + std::size_t nelems, + int nd, + const ssize_t *shape_and_strides, + const char *arg1_p, + ssize_t arg1_offset, + const char *arg2_p, + ssize_t arg2_offset, + char *res_p, + ssize_t res_offset, + const std::vector &depends, + const std::vector &additional_depends) +{ + return elementwise_common::binary_strided_impl< + argTy1, argTy2, NextafterOutputType, NextafterStridedFunctor, + nextafter_strided_kernel>(exec_q, nelems, nd, shape_and_strides, arg1_p, + arg1_offset, arg2_p, arg2_offset, res_p, + res_offset, depends, additional_depends); +} + +template +struct NextafterStridedFactory +{ + fnT get() + { + if constexpr (!NextafterOutputType::is_defined) { + fnT fn = nullptr; + return fn; + } + else { + fnT fn = nextafter_strided_impl; + return fn; + } + } +}; + +} // namespace dpctl::tensor::kernels::nextafter diff --git a/dpctl_ext/tensor/libtensor/include/kernels/elementwise_functions/not_equal.hpp b/dpctl_ext/tensor/libtensor/include/kernels/elementwise_functions/not_equal.hpp new file mode 100644 index 00000000000..224e3fbe5b7 --- /dev/null +++ b/dpctl_ext/tensor/libtensor/include/kernels/elementwise_functions/not_equal.hpp @@ -0,0 +1,304 @@ +//***************************************************************************** +// Copyright (c) 2026, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// - Neither the name of the copyright holder nor the names of its contributors +// may be used to endorse or promote products derived from this software +// without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** +// +//===---------------------------------------------------------------------===// +/// +/// \file +/// This file defines kernels for elementwise evaluation of inequality of +/// tensor elements. +//===---------------------------------------------------------------------===// + +#pragma once +#include +#include +#include +#include +#include + +#include + +#include "vec_size_util.hpp" + +#include "utils/offset_utils.hpp" +#include "utils/type_dispatch_building.hpp" +#include "utils/type_utils.hpp" + +#include "kernels/dpctl_tensor_types.hpp" +#include "kernels/elementwise_functions/common.hpp" + +namespace dpctl::tensor::kernels::not_equal +{ + +using dpctl::tensor::ssize_t; +namespace td_ns = dpctl::tensor::type_dispatch; +namespace tu_ns = dpctl::tensor::type_utils; + +template +struct NotEqualFunctor +{ + static_assert(std::is_same_v); + + using supports_sg_loadstore = std::negation< + std::disjunction, tu_ns::is_complex>>; + using supports_vec = std::conjunction< + std::is_same, + std::negation, + tu_ns::is_complex>>>; + + resT operator()(const argT1 &in1, const argT2 &in2) const + { + if constexpr (std::is_integral_v && std::is_integral_v && + std::is_signed_v != std::is_signed_v) + { + if constexpr (std::is_signed_v && !std::is_signed_v) { + return (in1 < 0) ? true : (static_cast(in1) != in2); + } + else { + if constexpr (!std::is_signed_v && + std::is_signed_v) { + return (in2 < 0) ? true : (in1 != static_cast(in2)); + } + } + } + else { + return (in1 != in2); + } + } + + template + sycl::vec + operator()(const sycl::vec &in1, + const sycl::vec &in2) const + { + auto tmp = (in1 != in2); + if constexpr (std::is_same_v) { + return tmp; + } + else { + using dpctl::tensor::type_utils::vec_cast; + + return vec_cast( + tmp); + } + } +}; + +template +using NotEqualContigFunctor = + elementwise_common::BinaryContigFunctor, + vec_sz, + n_vecs, + enable_sg_loadstore>; + +template +using NotEqualStridedFunctor = elementwise_common::BinaryStridedFunctor< + argT1, + argT2, + resT, + IndexerT, + NotEqualFunctor>; + +template +struct NotEqualOutputType +{ + using value_type = typename std::disjunction< + td_ns::BinaryTypeMapResultEntry, + td_ns:: + BinaryTypeMapResultEntry, + td_ns::BinaryTypeMapResultEntry, + td_ns::BinaryTypeMapResultEntry, + td_ns:: + BinaryTypeMapResultEntry, + td_ns::BinaryTypeMapResultEntry, + td_ns:: + BinaryTypeMapResultEntry, + td_ns::BinaryTypeMapResultEntry, + td_ns:: + BinaryTypeMapResultEntry, + td_ns:: + BinaryTypeMapResultEntry, + td_ns:: + BinaryTypeMapResultEntry, + td_ns::BinaryTypeMapResultEntry, + td_ns::BinaryTypeMapResultEntry, + td_ns::BinaryTypeMapResultEntry, + td_ns::BinaryTypeMapResultEntry, + T2, + std::complex, + bool>, + td_ns::BinaryTypeMapResultEntry, + T2, + std::complex, + bool>, + td_ns::DefaultResultEntry>::result_type; + + static constexpr bool is_defined = !std::is_same_v; +}; + +namespace hyperparam_detail +{ + +namespace vsu_ns = dpctl::tensor::kernels::vec_size_utils; + +using vsu_ns::BinaryContigHyperparameterSetEntry; +using vsu_ns::ContigHyperparameterSetDefault; + +template +struct NotEqualContigHyperparameterSet +{ + using value_type = + typename std::disjunction>; + + constexpr static auto vec_sz = value_type::vec_sz; + constexpr static auto n_vecs = value_type::n_vecs; +}; + +} // end of namespace hyperparam_detail + +template +class not_equal_contig_kernel; + +template +sycl::event not_equal_contig_impl(sycl::queue &exec_q, + std::size_t nelems, + const char *arg1_p, + ssize_t arg1_offset, + const char *arg2_p, + ssize_t arg2_offset, + char *res_p, + ssize_t res_offset, + const std::vector &depends = {}) +{ + using NotEqHS = + hyperparam_detail::NotEqualContigHyperparameterSet; + static constexpr std::uint8_t vec_sz = NotEqHS::vec_sz; + static constexpr std::uint8_t n_vecs = NotEqHS::n_vecs; + + return elementwise_common::binary_contig_impl< + argTy1, argTy2, NotEqualOutputType, NotEqualContigFunctor, + not_equal_contig_kernel, vec_sz, n_vecs>( + exec_q, nelems, arg1_p, arg1_offset, arg2_p, arg2_offset, res_p, + res_offset, depends); +} + +template +struct NotEqualContigFactory +{ + fnT get() + { + if constexpr (!NotEqualOutputType::is_defined) { + fnT fn = nullptr; + return fn; + } + else { + fnT fn = not_equal_contig_impl; + return fn; + } + } +}; + +template +struct NotEqualTypeMapFactory +{ + /*! @brief get typeid for output type of operator()!=(x, y), always bool */ + std::enable_if_t::value, int> get() + { + using rT = typename NotEqualOutputType::value_type; + return td_ns::GetTypeid{}.get(); + } +}; + +template +class not_equal_strided_kernel; + +template +sycl::event + not_equal_strided_impl(sycl::queue &exec_q, + std::size_t nelems, + int nd, + const ssize_t *shape_and_strides, + const char *arg1_p, + ssize_t arg1_offset, + const char *arg2_p, + ssize_t arg2_offset, + char *res_p, + ssize_t res_offset, + const std::vector &depends, + const std::vector &additional_depends) +{ + return elementwise_common::binary_strided_impl< + argTy1, argTy2, NotEqualOutputType, NotEqualStridedFunctor, + not_equal_strided_kernel>(exec_q, nelems, nd, shape_and_strides, arg1_p, + arg1_offset, arg2_p, arg2_offset, res_p, + res_offset, depends, additional_depends); +} + +template +struct NotEqualStridedFactory +{ + fnT get() + { + if constexpr (!NotEqualOutputType::is_defined) { + fnT fn = nullptr; + return fn; + } + else { + fnT fn = not_equal_strided_impl; + return fn; + } + } +}; + +} // namespace dpctl::tensor::kernels::not_equal diff --git a/dpctl_ext/tensor/libtensor/include/kernels/elementwise_functions/pow.hpp b/dpctl_ext/tensor/libtensor/include/kernels/elementwise_functions/pow.hpp new file mode 100644 index 00000000000..46489f45985 --- /dev/null +++ b/dpctl_ext/tensor/libtensor/include/kernels/elementwise_functions/pow.hpp @@ -0,0 +1,602 @@ +//***************************************************************************** +// Copyright (c) 2026, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// - Neither the name of the copyright holder nor the names of its contributors +// may be used to endorse or promote products derived from this software +// without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** +// +//===---------------------------------------------------------------------===// +/// +/// \file +/// This file defines kernels for elementwise evaluation of POW(x1, x2) +/// function. +//===---------------------------------------------------------------------===// + +#pragma once +#include +#include +#include +#include +#include + +#include + +#include "sycl_complex.hpp" +#include "vec_size_util.hpp" + +#include "utils/offset_utils.hpp" +#include "utils/type_dispatch_building.hpp" +#include "utils/type_utils.hpp" + +#include "kernels/dpctl_tensor_types.hpp" +#include "kernels/elementwise_functions/common.hpp" +#include "kernels/elementwise_functions/common_inplace.hpp" + +namespace dpctl::tensor::kernels::pow +{ + +using dpctl::tensor::ssize_t; +namespace td_ns = dpctl::tensor::type_dispatch; +namespace tu_ns = dpctl::tensor::type_utils; + +template +struct PowFunctor +{ + + using supports_sg_loadstore = std::negation< + std::disjunction, tu_ns::is_complex>>; + using supports_vec = std::negation< + std::disjunction, tu_ns::is_complex>>; + + resT operator()(const argT1 &in1, const argT2 &in2) const + { + if constexpr (std::is_integral_v || std::is_integral_v) { + auto tmp1 = in1; + auto tmp2 = in2; + if constexpr (std::is_signed_v) { + if (tmp2 < 0) { + // invalid; return 0 + return resT(0); + } + } + resT res = 1; + if (tmp1 == 1 || tmp2 == 0) { + return res; + } + while (tmp2 > 0) { + if (tmp2 & 1) { + res *= tmp1; + } + tmp2 >>= 1; + tmp1 *= tmp1; + } + return res; + } + else if constexpr (tu_ns::is_complex::value && + tu_ns::is_complex::value) + { + using realT1 = typename argT1::value_type; + using realT2 = typename argT2::value_type; + + return exprm_ns::pow(exprm_ns::complex(in1), + exprm_ns::complex(in2)); + } + else { + return sycl::pow(in1, in2); + } + } + + template + sycl::vec + operator()(const sycl::vec &in1, + const sycl::vec &in2) const + { + if constexpr (std::is_integral_v || std::is_integral_v) { + sycl::vec res; +#pragma unroll + for (int i = 0; i < vec_sz; ++i) { + auto tmp1 = in1[i]; + auto tmp2 = in2[i]; + if constexpr (std::is_signed_v) { + if (tmp2 < 0) { + // invalid; yield 0 + res[i] = 0; + continue; + } + } + resT res_tmp = 1; + if (tmp1 == 1 || tmp2 == 0) { + res[i] = res_tmp; + continue; + } + while (tmp2 > 0) { + if (tmp2 & 1) { + res_tmp *= tmp1; + } + tmp2 >>= 1; + tmp1 *= tmp1; + } + res[i] = res_tmp; + } + return res; + } + else { + auto res = sycl::pow(in1, in2); + if constexpr (std::is_same_v) + { + return res; + } + else { + using dpctl::tensor::type_utils::vec_cast; + + return vec_cast(res); + } + } + } +}; + +template +using PowContigFunctor = + elementwise_common::BinaryContigFunctor, + vec_sz, + n_vecs, + enable_sg_loadstore>; + +template +using PowStridedFunctor = + elementwise_common::BinaryStridedFunctor>; + +template +struct PowOutputType +{ + using value_type = typename std::disjunction< + td_ns::BinaryTypeMapResultEntry, + td_ns::BinaryTypeMapResultEntry, + td_ns::BinaryTypeMapResultEntry, + td_ns::BinaryTypeMapResultEntry, + td_ns::BinaryTypeMapResultEntry, + td_ns::BinaryTypeMapResultEntry, + td_ns::BinaryTypeMapResultEntry, + td_ns::BinaryTypeMapResultEntry, + td_ns::BinaryTypeMapResultEntry, + td_ns::BinaryTypeMapResultEntry, + td_ns::BinaryTypeMapResultEntry, + td_ns::BinaryTypeMapResultEntry, + T2, + std::complex, + std::complex>, + td_ns::BinaryTypeMapResultEntry, + T2, + std::complex, + std::complex>, + td_ns::DefaultResultEntry>::result_type; + + static constexpr bool is_defined = !std::is_same_v; +}; + +namespace hyperparam_detail +{ + +namespace vsu_ns = dpctl::tensor::kernels::vec_size_utils; + +using vsu_ns::BinaryContigHyperparameterSetEntry; +using vsu_ns::ContigHyperparameterSetDefault; + +template +struct PowContigHyperparameterSet +{ + using value_type = + typename std::disjunction>; + + constexpr static auto vec_sz = value_type::vec_sz; + constexpr static auto n_vecs = value_type::n_vecs; +}; + +} // end of namespace hyperparam_detail + +template +class pow_contig_kernel; + +template +sycl::event pow_contig_impl(sycl::queue &exec_q, + std::size_t nelems, + const char *arg1_p, + ssize_t arg1_offset, + const char *arg2_p, + ssize_t arg2_offset, + char *res_p, + ssize_t res_offset, + const std::vector &depends = {}) +{ + using PowHS = hyperparam_detail::PowContigHyperparameterSet; + static constexpr std::uint8_t vec_sz = PowHS::vec_sz; + static constexpr std::uint8_t n_vecs = PowHS::n_vecs; + + return elementwise_common::binary_contig_impl< + argTy1, argTy2, PowOutputType, PowContigFunctor, pow_contig_kernel, + vec_sz, n_vecs>(exec_q, nelems, arg1_p, arg1_offset, arg2_p, + arg2_offset, res_p, res_offset, depends); +} + +template +struct PowContigFactory +{ + fnT get() + { + if constexpr (!PowOutputType::is_defined) { + fnT fn = nullptr; + return fn; + } + else { + fnT fn = pow_contig_impl; + return fn; + } + } +}; + +template +struct PowTypeMapFactory +{ + /*! @brief get typeid for output type of std::pow(T1 x, T2 y) */ + std::enable_if_t::value, int> get() + { + using rT = typename PowOutputType::value_type; + return td_ns::GetTypeid{}.get(); + } +}; + +template +class pow_strided_kernel; + +template +sycl::event pow_strided_impl(sycl::queue &exec_q, + std::size_t nelems, + int nd, + const ssize_t *shape_and_strides, + const char *arg1_p, + ssize_t arg1_offset, + const char *arg2_p, + ssize_t arg2_offset, + char *res_p, + ssize_t res_offset, + const std::vector &depends, + const std::vector &additional_depends) +{ + return elementwise_common::binary_strided_impl< + argTy1, argTy2, PowOutputType, PowStridedFunctor, pow_strided_kernel>( + exec_q, nelems, nd, shape_and_strides, arg1_p, arg1_offset, arg2_p, + arg2_offset, res_p, res_offset, depends, additional_depends); +} + +template +struct PowStridedFactory +{ + fnT get() + { + if constexpr (!PowOutputType::is_defined) { + fnT fn = nullptr; + return fn; + } + else { + fnT fn = pow_strided_impl; + return fn; + } + } +}; + +template +struct PowInplaceFunctor +{ + + using supports_sg_loadstore = std::negation< + std::disjunction, tu_ns::is_complex>>; + using supports_vec = std::negation< + std::disjunction, tu_ns::is_complex>>; + + void operator()(resT &res, const argT &in) + { + if constexpr (std::is_integral_v || std::is_integral_v) { + auto tmp1 = res; + auto tmp2 = in; + if constexpr (std::is_signed_v) { + if (tmp2 < 0) { + // invalid; return 0 + res = 0; + return; + } + } + if (tmp1 == 1) { + return; + } + if (tmp2 == 0) { + res = 1; + return; + } + resT res_tmp = 1; + while (tmp2 > 0) { + if (tmp2 & 1) { + res_tmp *= tmp1; + } + tmp2 >>= 1; + tmp1 *= tmp1; + } + res = res_tmp; + } + else if constexpr (tu_ns::is_complex::value && + tu_ns::is_complex::value) + { + using r_resT = typename resT::value_type; + using r_argT = typename argT::value_type; + + res = exprm_ns::pow(exprm_ns::complex(res), + exprm_ns::complex(in)); + } + else { + res = sycl::pow(res, in); + } + return; + } + + template + void operator()(sycl::vec &res, + const sycl::vec &in) + { + if constexpr (std::is_integral_v || std::is_integral_v) { +#pragma unroll + for (int i = 0; i < vec_sz; ++i) { + auto tmp1 = res[i]; + auto tmp2 = in[i]; + if constexpr (std::is_signed_v) { + if (tmp2 < 0) { + // invalid; return 0 + res[i] = 0; + continue; + } + } + if (tmp1 == 1) { + continue; + } + if (tmp2 == 0) { + res[i] = 1; + continue; + } + resT res_tmp = 1; + while (tmp2 > 0) { + if (tmp2 & 1) { + res_tmp *= tmp1; + } + tmp2 >>= 1; + tmp1 *= tmp1; + } + res[i] = res_tmp; + } + } + else { + res = sycl::pow(res, in); + } + } +}; + +template +using PowInplaceContigFunctor = elementwise_common::BinaryInplaceContigFunctor< + argT, + resT, + PowInplaceFunctor, + vec_sz, + n_vecs, + enable_sg_loadstore>; + +template +using PowInplaceStridedFunctor = + elementwise_common::BinaryInplaceStridedFunctor< + argT, + resT, + IndexerT, + PowInplaceFunctor>; + +template +class pow_inplace_contig_kernel; + +/* @brief Types supported by in-place pow */ +template +struct PowInplaceTypePairSupport +{ + /* value if true a kernel for must be instantiated */ + static constexpr bool is_defined = std::disjunction< + td_ns::TypePairDefinedEntry, + td_ns::TypePairDefinedEntry, + td_ns::TypePairDefinedEntry, + td_ns::TypePairDefinedEntry, + td_ns::TypePairDefinedEntry, + td_ns::TypePairDefinedEntry, + td_ns::TypePairDefinedEntry, + td_ns::TypePairDefinedEntry, + td_ns::TypePairDefinedEntry, + td_ns::TypePairDefinedEntry, + td_ns::TypePairDefinedEntry, + td_ns::TypePairDefinedEntry, + resTy, + std::complex>, + td_ns::TypePairDefinedEntry, + resTy, + std::complex>, + // fall-through + td_ns::NotDefinedEntry>::is_defined; +}; + +template +struct PowInplaceTypeMapFactory +{ + /*! @brief get typeid for output type of x **= y */ + std::enable_if_t::value, int> get() + { + if constexpr (PowInplaceTypePairSupport::is_defined) { + return td_ns::GetTypeid{}.get(); + } + else { + return td_ns::GetTypeid{}.get(); + } + } +}; + +template +sycl::event + pow_inplace_contig_impl(sycl::queue &exec_q, + std::size_t nelems, + const char *arg_p, + ssize_t arg_offset, + char *res_p, + ssize_t res_offset, + const std::vector &depends = {}) +{ + using PowHS = hyperparam_detail::PowContigHyperparameterSet; + static constexpr std::uint8_t vec_sz = PowHS::vec_sz; + static constexpr std::uint8_t n_vecs = PowHS::n_vecs; + + return elementwise_common::binary_inplace_contig_impl< + argTy, resTy, PowInplaceContigFunctor, pow_inplace_contig_kernel, + vec_sz, n_vecs>(exec_q, nelems, arg_p, arg_offset, res_p, res_offset, + depends); +} + +template +struct PowInplaceContigFactory +{ + fnT get() + { + if constexpr (!PowInplaceTypePairSupport::is_defined) { + fnT fn = nullptr; + return fn; + } + else { + fnT fn = pow_inplace_contig_impl; + return fn; + } + } +}; + +template +class pow_inplace_strided_kernel; + +template +sycl::event + pow_inplace_strided_impl(sycl::queue &exec_q, + std::size_t nelems, + int nd, + const ssize_t *shape_and_strides, + const char *arg_p, + ssize_t arg_offset, + char *res_p, + ssize_t res_offset, + const std::vector &depends, + const std::vector &additional_depends) +{ + return elementwise_common::binary_inplace_strided_impl< + argTy, resTy, PowInplaceStridedFunctor, pow_inplace_strided_kernel>( + exec_q, nelems, nd, shape_and_strides, arg_p, arg_offset, res_p, + res_offset, depends, additional_depends); +} + +template +struct PowInplaceStridedFactory +{ + fnT get() + { + if constexpr (!PowInplaceTypePairSupport::is_defined) { + fnT fn = nullptr; + return fn; + } + else { + fnT fn = pow_inplace_strided_impl; + return fn; + } + } +}; + +} // namespace dpctl::tensor::kernels::pow diff --git a/dpctl_ext/tensor/libtensor/include/kernels/elementwise_functions/remainder.hpp b/dpctl_ext/tensor/libtensor/include/kernels/elementwise_functions/remainder.hpp new file mode 100644 index 00000000000..d7583286210 --- /dev/null +++ b/dpctl_ext/tensor/libtensor/include/kernels/elementwise_functions/remainder.hpp @@ -0,0 +1,577 @@ +//***************************************************************************** +// Copyright (c) 2026, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// - Neither the name of the copyright holder nor the names of its contributors +// may be used to endorse or promote products derived from this software +// without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** +// +//===---------------------------------------------------------------------===// +/// +/// \file +/// This file defines kernels for elementwise evaluation of the +/// modulo of tensor elements. +//===---------------------------------------------------------------------===// + +#pragma once +#include +#include +#include +#include +#include + +#include "vec_size_util.hpp" + +#include "utils/offset_utils.hpp" +#include "utils/type_dispatch_building.hpp" +#include "utils/type_utils.hpp" + +#include "kernels/dpctl_tensor_types.hpp" +#include "kernels/elementwise_functions/common.hpp" +#include "kernels/elementwise_functions/common_inplace.hpp" + +namespace dpctl::tensor::kernels::remainder +{ + +using dpctl::tensor::ssize_t; +namespace td_ns = dpctl::tensor::type_dispatch; +namespace tu_ns = dpctl::tensor::type_utils; + +template +struct RemainderFunctor +{ + static_assert(std::is_same_v); + using supports_sg_loadstore = std::true_type; + using supports_vec = std::true_type; + + resT operator()(const argT1 &in1, const argT2 &in2) const + { + if constexpr (std::is_integral_v || std::is_integral_v) { + if (in2 == argT2(0)) { + return resT(0); + } + if constexpr (std::is_signed_v || std::is_signed_v) { + auto out = (in1 % in2); + if (out != 0 && l_xor(in1 < 0, in2 < 0)) { + out += in2; + } + return out; + } + else { + return (in1 % in2); + } + } + else { + auto rem = sycl::fmod(in1, in2); + if (rem) { + if (l_xor(in2 < 0, rem < 0)) { + rem += in2; + } + } + else { + rem = sycl::copysign(resT(0), in2); + } + return rem; + } + } + + template + sycl::vec + operator()(const sycl::vec &in1, + const sycl::vec &in2) const + { + if constexpr (std::is_integral_v || std::is_integral_v) { + sycl::vec rem; +#pragma unroll + for (auto i = 0; i < vec_sz; ++i) { + if (in2[i] == argT2(0)) { + rem[i] = resT(0); + } + else { + rem[i] = in1[i] % in2[i]; + if constexpr (std::is_signed_v || + std::is_signed_v) { + if (rem[i] != 0 && l_xor(in1[i] < 0, in2[i] < 0)) { + rem[i] += in2[i]; + } + } + } + } + return rem; + } + else { + auto rem = sycl::fmod(in1, in2); + using remT = typename decltype(rem)::element_type; +#pragma unroll + for (auto i = 0; i < vec_sz; ++i) { + if (rem[i]) { + if (l_xor(in2[i] < 0, rem[i] < 0)) { + rem[i] += in2[i]; + } + } + else { + rem[i] = sycl::copysign(remT(0), in2[i]); + } + } + if constexpr (std::is_same_v) { + return rem; + } + else { + using dpctl::tensor::type_utils::vec_cast; + + return vec_cast(rem); + } + } + } + +private: + bool l_xor(bool b1, bool b2) const + { + return (b1 != b2); + } +}; + +template +using RemainderContigFunctor = elementwise_common::BinaryContigFunctor< + argT1, + argT2, + resT, + RemainderFunctor, + vec_sz, + n_vecs, + enable_sg_loadstore>; + +template +using RemainderStridedFunctor = elementwise_common::BinaryStridedFunctor< + argT1, + argT2, + resT, + IndexerT, + RemainderFunctor>; + +template +struct RemainderOutputType +{ + using value_type = typename std::disjunction< + td_ns::BinaryTypeMapResultEntry, + td_ns::BinaryTypeMapResultEntry, + td_ns::BinaryTypeMapResultEntry, + td_ns::BinaryTypeMapResultEntry, + td_ns::BinaryTypeMapResultEntry, + td_ns::BinaryTypeMapResultEntry, + td_ns::BinaryTypeMapResultEntry, + td_ns::BinaryTypeMapResultEntry, + td_ns::BinaryTypeMapResultEntry, + td_ns::BinaryTypeMapResultEntry, + td_ns::BinaryTypeMapResultEntry, + td_ns::DefaultResultEntry>::result_type; + + static constexpr bool is_defined = !std::is_same_v; +}; + +namespace hyperparam_detail +{ + +namespace vsu_ns = dpctl::tensor::kernels::vec_size_utils; + +using vsu_ns::BinaryContigHyperparameterSetEntry; +using vsu_ns::ContigHyperparameterSetDefault; + +template +struct RemainderContigHyperparameterSet +{ + using value_type = + typename std::disjunction>; + + constexpr static auto vec_sz = value_type::vec_sz; + constexpr static auto n_vecs = value_type::n_vecs; +}; + +} // end of namespace hyperparam_detail + +template +class remainder_contig_kernel; + +template +sycl::event remainder_contig_impl(sycl::queue &exec_q, + std::size_t nelems, + const char *arg1_p, + ssize_t arg1_offset, + const char *arg2_p, + ssize_t arg2_offset, + char *res_p, + ssize_t res_offset, + const std::vector &depends = {}) +{ + using RemHS = + hyperparam_detail::RemainderContigHyperparameterSet; + static constexpr std::uint8_t vec_sz = RemHS::vec_sz; + static constexpr std::uint8_t n_vecs = RemHS::n_vecs; + + return elementwise_common::binary_contig_impl< + argTy1, argTy2, RemainderOutputType, RemainderContigFunctor, + remainder_contig_kernel, vec_sz, n_vecs>( + exec_q, nelems, arg1_p, arg1_offset, arg2_p, arg2_offset, res_p, + res_offset, depends); +} + +template +struct RemainderContigFactory +{ + fnT get() + { + if constexpr (!RemainderOutputType::is_defined) { + fnT fn = nullptr; + return fn; + } + else { + fnT fn = remainder_contig_impl; + return fn; + } + } +}; + +template +struct RemainderTypeMapFactory +{ + /*! @brief get typeid for output type of remainder(T x, T y) */ + std::enable_if_t::value, int> get() + { + using rT = typename RemainderOutputType::value_type; + return td_ns::GetTypeid{}.get(); + } +}; + +template +class remainder_strided_kernel; + +template +sycl::event + remainder_strided_impl(sycl::queue &exec_q, + std::size_t nelems, + int nd, + const ssize_t *shape_and_strides, + const char *arg1_p, + ssize_t arg1_offset, + const char *arg2_p, + ssize_t arg2_offset, + char *res_p, + ssize_t res_offset, + const std::vector &depends, + const std::vector &additional_depends) +{ + return elementwise_common::binary_strided_impl< + argTy1, argTy2, RemainderOutputType, RemainderStridedFunctor, + remainder_strided_kernel>(exec_q, nelems, nd, shape_and_strides, arg1_p, + arg1_offset, arg2_p, arg2_offset, res_p, + res_offset, depends, additional_depends); +} + +template +struct RemainderStridedFactory +{ + fnT get() + { + if constexpr (!RemainderOutputType::is_defined) { + fnT fn = nullptr; + return fn; + } + else { + fnT fn = remainder_strided_impl; + return fn; + } + } +}; + +template +struct RemainderInplaceFunctor +{ + + using supports_sg_loadstore = std::true_type; + using supports_vec = std::true_type; + + // functor is only well-defined when argT and resT are the same + static_assert(std::is_same_v); + + void operator()(resT &res, const argT &in) + { + if constexpr (std::is_integral_v || std::is_integral_v) { + if (in == argT(0)) { + res = 0; + return; + } + if constexpr (std::is_signed_v || std::is_signed_v) { + auto tmp = res; + res %= in; + if (res != resT(0) && l_xor(tmp < 0, in < 0)) { + res += in; + } + } + else { + res %= in; + } + } + else { + res = sycl::fmod(res, in); + if (res) { + if (l_xor(in < 0, res < 0)) { + res += in; + } + } + else { + res = sycl::copysign(resT(0), in); + } + } + } + + template + void operator()(sycl::vec &res, + const sycl::vec &in) + { + if constexpr (std::is_integral_v || std::is_integral_v) { +#pragma unroll + for (auto i = 0; i < vec_sz; ++i) { + if (in[i] == argT(0)) { + res[i] = 0; + } + else { + auto rem = res[i] % in[i]; + if constexpr (std::is_signed_v || + std::is_signed_v) { + if (rem != 0 && l_xor(res[i] < 0, in[i] < 0)) { + rem += in[i]; + } + } + res[i] = rem; + } + } + } + else { + res = sycl::fmod(res, in); +#pragma unroll + for (auto i = 0; i < vec_sz; ++i) { + if (res[i]) { + if (l_xor(in[i] < 0, res[i] < 0)) { + res[i] += in[i]; + } + } + else { + res[i] = sycl::copysign(resT(0), in[i]); + } + } + } + } + +private: + bool l_xor(bool b1, bool b2) const + { + return (b1 != b2); + } +}; + +template +using RemainderInplaceContigFunctor = + elementwise_common::BinaryInplaceContigFunctor< + argT, + resT, + RemainderInplaceFunctor, + vec_sz, + n_vecs, + enable_sg_loadstore>; + +template +using RemainderInplaceStridedFunctor = + elementwise_common::BinaryInplaceStridedFunctor< + argT, + resT, + IndexerT, + RemainderInplaceFunctor>; + +template +class remainder_inplace_contig_kernel; + +/* @brief Types supported by in-place remainder */ +template +struct RemainderInplaceTypePairSupport +{ + /* value if true a kernel for must be instantiated */ + static constexpr bool is_defined = std::disjunction< + td_ns::TypePairDefinedEntry, + td_ns::TypePairDefinedEntry, + td_ns::TypePairDefinedEntry, + td_ns::TypePairDefinedEntry, + td_ns::TypePairDefinedEntry, + td_ns::TypePairDefinedEntry, + td_ns::TypePairDefinedEntry, + td_ns::TypePairDefinedEntry, + td_ns::TypePairDefinedEntry, + td_ns::TypePairDefinedEntry, + td_ns::TypePairDefinedEntry, + // fall-through + td_ns::NotDefinedEntry>::is_defined; +}; + +template +struct RemainderInplaceTypeMapFactory +{ + /*! @brief get typeid for output type of x %= y */ + std::enable_if_t::value, int> get() + { + if constexpr (RemainderInplaceTypePairSupport::is_defined) { + return td_ns::GetTypeid{}.get(); + } + else { + return td_ns::GetTypeid{}.get(); + } + } +}; + +template +sycl::event + remainder_inplace_contig_impl(sycl::queue &exec_q, + std::size_t nelems, + const char *arg_p, + ssize_t arg_offset, + char *res_p, + ssize_t res_offset, + const std::vector &depends = {}) +{ + using RemHS = + hyperparam_detail::RemainderContigHyperparameterSet; + static constexpr std::uint8_t vec_sz = RemHS::vec_sz; + static constexpr std::uint8_t n_vecs = RemHS::n_vecs; + + return elementwise_common::binary_inplace_contig_impl< + argTy, resTy, RemainderInplaceContigFunctor, + remainder_inplace_contig_kernel, vec_sz, n_vecs>( + exec_q, nelems, arg_p, arg_offset, res_p, res_offset, depends); +} + +template +struct RemainderInplaceContigFactory +{ + fnT get() + { + if constexpr (!RemainderInplaceTypePairSupport::is_defined) { + fnT fn = nullptr; + return fn; + } + else { + fnT fn = remainder_inplace_contig_impl; + return fn; + } + } +}; + +template +class remainder_inplace_strided_kernel; + +template +sycl::event remainder_inplace_strided_impl( + sycl::queue &exec_q, + std::size_t nelems, + int nd, + const ssize_t *shape_and_strides, + const char *arg_p, + ssize_t arg_offset, + char *res_p, + ssize_t res_offset, + const std::vector &depends, + const std::vector &additional_depends) +{ + return elementwise_common::binary_inplace_strided_impl< + argTy, resTy, RemainderInplaceStridedFunctor, + remainder_inplace_strided_kernel>(exec_q, nelems, nd, shape_and_strides, + arg_p, arg_offset, res_p, res_offset, + depends, additional_depends); +} + +template +struct RemainderInplaceStridedFactory +{ + fnT get() + { + if constexpr (!RemainderInplaceTypePairSupport::is_defined) { + fnT fn = nullptr; + return fn; + } + else { + fnT fn = remainder_inplace_strided_impl; + return fn; + } + } +}; + +} // namespace dpctl::tensor::kernels::remainder diff --git a/dpctl_ext/tensor/libtensor/include/kernels/elementwise_functions/subtract.hpp b/dpctl_ext/tensor/libtensor/include/kernels/elementwise_functions/subtract.hpp new file mode 100644 index 00000000000..e43abdbe009 --- /dev/null +++ b/dpctl_ext/tensor/libtensor/include/kernels/elementwise_functions/subtract.hpp @@ -0,0 +1,646 @@ +//***************************************************************************** +// Copyright (c) 2026, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// - Neither the name of the copyright holder nor the names of its contributors +// may be used to endorse or promote products derived from this software +// without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** +// +//===---------------------------------------------------------------------===// +/// +/// \file +/// This file defines kernels for elementwise evaluation of DIVIDE(x1, x2) +/// function. +//===---------------------------------------------------------------------===// + +#pragma once +#include +#include +#include +#include +#include +#include + +#include "vec_size_util.hpp" + +#include "utils/offset_utils.hpp" +#include "utils/type_dispatch_building.hpp" +#include "utils/type_utils.hpp" + +#include "kernels/dpctl_tensor_types.hpp" +#include "kernels/elementwise_functions/common.hpp" +#include "kernels/elementwise_functions/common_inplace.hpp" + +namespace dpctl::tensor::kernels::subtract +{ + +using dpctl::tensor::ssize_t; +namespace td_ns = dpctl::tensor::type_dispatch; +namespace tu_ns = dpctl::tensor::type_utils; + +template +struct SubtractFunctor +{ + + using supports_sg_loadstore = std::negation< + std::disjunction, tu_ns::is_complex>>; + using supports_vec = std::negation< + std::disjunction, tu_ns::is_complex>>; + + resT operator()(const argT1 &in1, const argT2 &in2) const + { + return in1 - in2; + } + + template + sycl::vec + operator()(const sycl::vec &in1, + const sycl::vec &in2) const + { + auto tmp = in1 - in2; + if constexpr (std::is_same_v) { + return tmp; + } + else { + using dpctl::tensor::type_utils::vec_cast; + + return vec_cast( + tmp); + } + } +}; + +template +using SubtractContigFunctor = + elementwise_common::BinaryContigFunctor, + vec_sz, + n_vecs, + enable_sg_loadstore>; + +template +using SubtractStridedFunctor = elementwise_common::BinaryStridedFunctor< + argT1, + argT2, + resT, + IndexerT, + SubtractFunctor>; + +template +struct SubtractOutputType +{ + using value_type = typename std::disjunction< + td_ns::BinaryTypeMapResultEntry, + td_ns::BinaryTypeMapResultEntry, + td_ns::BinaryTypeMapResultEntry, + td_ns::BinaryTypeMapResultEntry, + td_ns::BinaryTypeMapResultEntry, + td_ns::BinaryTypeMapResultEntry, + td_ns::BinaryTypeMapResultEntry, + td_ns::BinaryTypeMapResultEntry, + td_ns::BinaryTypeMapResultEntry, + td_ns::BinaryTypeMapResultEntry, + td_ns::BinaryTypeMapResultEntry, + td_ns::BinaryTypeMapResultEntry, + T2, + std::complex, + std::complex>, + td_ns::BinaryTypeMapResultEntry, + T2, + std::complex, + std::complex>, + td_ns::DefaultResultEntry>::result_type; + + static constexpr bool is_defined = !std::is_same_v; +}; + +namespace hyperparam_detail +{ + +namespace vsu_ns = dpctl::tensor::kernels::vec_size_utils; + +using vsu_ns::BinaryContigHyperparameterSetEntry; +using vsu_ns::ContigHyperparameterSetDefault; + +template +struct SubtractContigHyperparameterSet +{ + using value_type = + typename std::disjunction>; + + constexpr static auto vec_sz = value_type::vec_sz; + constexpr static auto n_vecs = value_type::n_vecs; +}; + +} // end of namespace hyperparam_detail + +template +class subtract_contig_kernel; + +template +sycl::event subtract_contig_impl(sycl::queue &exec_q, + std::size_t nelems, + const char *arg1_p, + ssize_t arg1_offset, + const char *arg2_p, + ssize_t arg2_offset, + char *res_p, + ssize_t res_offset, + const std::vector &depends = {}) +{ + using SubHS = + hyperparam_detail::SubtractContigHyperparameterSet; + static constexpr std::uint8_t vec_sz = SubHS::vec_sz; + static constexpr std::uint8_t n_vecs = SubHS::n_vecs; + + return elementwise_common::binary_contig_impl< + argTy1, argTy2, SubtractOutputType, SubtractContigFunctor, + subtract_contig_kernel, vec_sz, n_vecs>( + exec_q, nelems, arg1_p, arg1_offset, arg2_p, arg2_offset, res_p, + res_offset, depends); +} + +template +struct SubtractContigFactory +{ + fnT get() + { + if constexpr (!SubtractOutputType::is_defined) { + fnT fn = nullptr; + return fn; + } + else { + fnT fn = subtract_contig_impl; + return fn; + } + } +}; + +template +struct SubtractTypeMapFactory +{ + /*! @brief get typeid for output type of divide(T1 x, T2 y) */ + std::enable_if_t::value, int> get() + { + using rT = typename SubtractOutputType::value_type; + return td_ns::GetTypeid{}.get(); + } +}; + +template +class subtract_strided_kernel; + +template +sycl::event + subtract_strided_impl(sycl::queue &exec_q, + std::size_t nelems, + int nd, + const ssize_t *shape_and_strides, + const char *arg1_p, + ssize_t arg1_offset, + const char *arg2_p, + ssize_t arg2_offset, + char *res_p, + ssize_t res_offset, + const std::vector &depends, + const std::vector &additional_depends) +{ + return elementwise_common::binary_strided_impl< + argTy1, argTy2, SubtractOutputType, SubtractStridedFunctor, + subtract_strided_kernel>(exec_q, nelems, nd, shape_and_strides, arg1_p, + arg1_offset, arg2_p, arg2_offset, res_p, + res_offset, depends, additional_depends); +} + +template +struct SubtractStridedFactory +{ + fnT get() + { + if constexpr (!SubtractOutputType::is_defined) { + fnT fn = nullptr; + return fn; + } + else { + fnT fn = subtract_strided_impl; + return fn; + } + } +}; + +template +using SubtractContigMatrixContigRowBroadcastingFunctor = + elementwise_common::BinaryContigMatrixContigRowBroadcastingFunctor< + argT1, + argT2, + resT, + SubtractFunctor>; + +template +using SubtractContigRowContigMatrixBroadcastingFunctor = + elementwise_common::BinaryContigRowContigMatrixBroadcastingFunctor< + argT1, + argT2, + resT, + SubtractFunctor>; + +template +class subtract_matrix_row_broadcast_sg_krn; + +template +class subtract_row_matrix_broadcast_sg_krn; + +template +sycl::event subtract_contig_matrix_contig_row_broadcast_impl( + sycl::queue &exec_q, + std::vector &host_tasks, + std::size_t n0, + std::size_t n1, + const char *mat_p, // typeless pointer to (n0, n1) C-contiguous matrix + ssize_t mat_offset, + const char *vec_p, // typeless pointer to (n1,) contiguous row + ssize_t vec_offset, + char *res_p, // typeless pointer to (n0, n1) result C-contig. matrix, + // res[i,j] = mat[i,j] - vec[j] + ssize_t res_offset, + const std::vector &depends = {}) +{ + return elementwise_common::binary_contig_matrix_contig_row_broadcast_impl< + argT1, argT2, resT, SubtractContigMatrixContigRowBroadcastingFunctor, + subtract_matrix_row_broadcast_sg_krn>(exec_q, host_tasks, n0, n1, mat_p, + mat_offset, vec_p, vec_offset, + res_p, res_offset, depends); +} + +template +struct SubtractContigMatrixContigRowBroadcastFactory +{ + fnT get() + { + if constexpr (!SubtractOutputType::is_defined) { + fnT fn = nullptr; + return fn; + } + else { + using resT = typename SubtractOutputType::value_type; + if constexpr (dpctl::tensor::type_utils::is_complex::value || + dpctl::tensor::type_utils::is_complex::value || + dpctl::tensor::type_utils::is_complex::value) + { + fnT fn = nullptr; + return fn; + } + else { + fnT fn = + subtract_contig_matrix_contig_row_broadcast_impl; + return fn; + } + } + } +}; + +template +sycl::event subtract_contig_row_contig_matrix_broadcast_impl( + sycl::queue &exec_q, + std::vector &host_tasks, + std::size_t n0, + std::size_t n1, + const char *vec_p, // typeless pointer to (n1,) contiguous row + ssize_t vec_offset, + const char *mat_p, // typeless pointer to (n0, n1) C-contiguous matrix + ssize_t mat_offset, + char *res_p, // typeless pointer to (n0, n1) result C-contig. matrix, + // res[i,j] = op(vec[j], mat[i,j]) + ssize_t res_offset, + const std::vector &depends = {}) +{ + return elementwise_common::binary_contig_row_contig_matrix_broadcast_impl< + argT1, argT2, resT, SubtractContigRowContigMatrixBroadcastingFunctor, + subtract_row_matrix_broadcast_sg_krn>(exec_q, host_tasks, n0, n1, vec_p, + vec_offset, mat_p, mat_offset, + res_p, res_offset, depends); +} + +template +struct SubtractContigRowContigMatrixBroadcastFactory +{ + fnT get() + { + if constexpr (!SubtractOutputType::is_defined) { + fnT fn = nullptr; + return fn; + } + else { + using resT = typename SubtractOutputType::value_type; + if constexpr (dpctl::tensor::type_utils::is_complex::value || + dpctl::tensor::type_utils::is_complex::value || + dpctl::tensor::type_utils::is_complex::value) + { + fnT fn = nullptr; + return fn; + } + else { + fnT fn = + subtract_contig_row_contig_matrix_broadcast_impl; + return fn; + } + } + } +}; + +template +struct SubtractInplaceFunctor +{ + + using supports_sg_loadstore = std::negation< + std::disjunction, tu_ns::is_complex>>; + using supports_vec = std::negation< + std::disjunction, tu_ns::is_complex>>; + + void operator()(resT &res, const argT &in) + { + res -= in; + } + + template + void operator()(sycl::vec &res, + const sycl::vec &in) + { + res -= in; + } +}; + +template +using SubtractInplaceContigFunctor = + elementwise_common::BinaryInplaceContigFunctor< + argT, + resT, + SubtractInplaceFunctor, + vec_sz, + n_vecs, + enable_sg_loadstore>; + +template +using SubtractInplaceStridedFunctor = + elementwise_common::BinaryInplaceStridedFunctor< + argT, + resT, + IndexerT, + SubtractInplaceFunctor>; + +template +class subtract_inplace_contig_kernel; + +/* @brief Types supported by in-place subtraction */ +template +struct SubtractInplaceTypePairSupport +{ + /* value if true a kernel for must be instantiated */ + static constexpr bool is_defined = std::disjunction< + td_ns::TypePairDefinedEntry, + td_ns::TypePairDefinedEntry, + td_ns::TypePairDefinedEntry, + td_ns::TypePairDefinedEntry, + td_ns::TypePairDefinedEntry, + td_ns::TypePairDefinedEntry, + td_ns::TypePairDefinedEntry, + td_ns::TypePairDefinedEntry, + td_ns::TypePairDefinedEntry, + td_ns::TypePairDefinedEntry, + td_ns::TypePairDefinedEntry, + td_ns::TypePairDefinedEntry, + resTy, + std::complex>, + td_ns::TypePairDefinedEntry, + resTy, + std::complex>, + // fall-through + td_ns::NotDefinedEntry>::is_defined; +}; + +template +struct SubtractInplaceTypeMapFactory +{ + /*! @brief get typeid for output type of x -= y */ + std::enable_if_t::value, int> get() + { + if constexpr (SubtractInplaceTypePairSupport::is_defined) { + return td_ns::GetTypeid{}.get(); + } + else { + return td_ns::GetTypeid{}.get(); + } + } +}; + +template +sycl::event + subtract_inplace_contig_impl(sycl::queue &exec_q, + std::size_t nelems, + const char *arg_p, + ssize_t arg_offset, + char *res_p, + ssize_t res_offset, + const std::vector &depends = {}) +{ + using SubHS = + hyperparam_detail::SubtractContigHyperparameterSet; + static constexpr std::uint8_t vec_sz = SubHS::vec_sz; + static constexpr std::uint8_t n_vecs = SubHS::n_vecs; + + return elementwise_common::binary_inplace_contig_impl< + argTy, resTy, SubtractInplaceContigFunctor, + subtract_inplace_contig_kernel, vec_sz, n_vecs>( + exec_q, nelems, arg_p, arg_offset, res_p, res_offset, depends); +} + +template +struct SubtractInplaceContigFactory +{ + fnT get() + { + if constexpr (!SubtractInplaceTypePairSupport::is_defined) { + fnT fn = nullptr; + return fn; + } + else { + fnT fn = subtract_inplace_contig_impl; + return fn; + } + } +}; + +template +class subtract_inplace_strided_kernel; + +template +sycl::event subtract_inplace_strided_impl( + sycl::queue &exec_q, + std::size_t nelems, + int nd, + const ssize_t *shape_and_strides, + const char *arg_p, + ssize_t arg_offset, + char *res_p, + ssize_t res_offset, + const std::vector &depends, + const std::vector &additional_depends) +{ + return elementwise_common::binary_inplace_strided_impl< + argTy, resTy, SubtractInplaceStridedFunctor, + subtract_inplace_strided_kernel>(exec_q, nelems, nd, shape_and_strides, + arg_p, arg_offset, res_p, res_offset, + depends, additional_depends); +} + +template +struct SubtractInplaceStridedFactory +{ + fnT get() + { + if constexpr (!SubtractInplaceTypePairSupport::is_defined) { + fnT fn = nullptr; + return fn; + } + else { + fnT fn = subtract_inplace_strided_impl; + return fn; + } + } +}; + +template +class subtract_inplace_row_matrix_broadcast_sg_krn; + +template +using SubtractInplaceRowMatrixBroadcastingFunctor = + elementwise_common::BinaryInplaceRowMatrixBroadcastingFunctor< + argT, + resT, + SubtractInplaceFunctor>; + +template +sycl::event subtract_inplace_row_matrix_broadcast_impl( + sycl::queue &exec_q, + std::vector &host_tasks, + std::size_t n0, + std::size_t n1, + const char *vec_p, // typeless pointer to (n1,) contiguous row + ssize_t vec_offset, + char *mat_p, // typeless pointer to (n0, n1) C-contiguous matrix + ssize_t mat_offset, + const std::vector &depends = {}) +{ + return elementwise_common::binary_inplace_row_matrix_broadcast_impl< + argT, resT, SubtractInplaceRowMatrixBroadcastingFunctor, + subtract_inplace_row_matrix_broadcast_sg_krn>( + exec_q, host_tasks, n0, n1, vec_p, vec_offset, mat_p, mat_offset, + depends); +} + +template +struct SubtractInplaceRowMatrixBroadcastFactory +{ + fnT get() + { + if constexpr (!SubtractInplaceTypePairSupport::is_defined) { + fnT fn = nullptr; + return fn; + } + else { + if constexpr (dpctl::tensor::type_utils::is_complex::value || + dpctl::tensor::type_utils::is_complex::value) + { + fnT fn = nullptr; + return fn; + } + else { + fnT fn = subtract_inplace_row_matrix_broadcast_impl; + return fn; + } + } + } +}; + +} // namespace dpctl::tensor::kernels::subtract diff --git a/dpctl_ext/tensor/libtensor/source/elementwise_functions/copysign.cpp b/dpctl_ext/tensor/libtensor/source/elementwise_functions/copysign.cpp new file mode 100644 index 00000000000..dea66b6c940 --- /dev/null +++ b/dpctl_ext/tensor/libtensor/source/elementwise_functions/copysign.cpp @@ -0,0 +1,145 @@ +//***************************************************************************** +// Copyright (c) 2026, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// - Neither the name of the copyright holder nor the names of its contributors +// may be used to endorse or promote products derived from this software +// without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** +// +//===---------------------------------------------------------------------===// +/// +/// \file +/// This file defines functions of dpctl.tensor._tensor_elementwise_impl +/// extension, specifically functions for elementwise operations. +//===---------------------------------------------------------------------===// + +#include + +#include + +#include "dpnp4pybind11.hpp" +#include +#include +#include + +#include "copysign.hpp" +#include "elementwise_functions.hpp" +#include "utils/type_dispatch.hpp" + +#include "kernels/elementwise_functions/common.hpp" +#include "kernels/elementwise_functions/copysign.hpp" + +namespace dpctl::tensor::py_internal +{ + +namespace py = pybind11; +namespace td_ns = dpctl::tensor::type_dispatch; + +namespace ew_cmn_ns = dpctl::tensor::kernels::elementwise_common; +using ew_cmn_ns::binary_contig_impl_fn_ptr_t; +using ew_cmn_ns::binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t; +using ew_cmn_ns::binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t; +using ew_cmn_ns::binary_strided_impl_fn_ptr_t; + +// B25: ===== COPYSIGN (x1, x2) +namespace impl +{ +namespace copysign_fn_ns = dpctl::tensor::kernels::copysign; + +static binary_contig_impl_fn_ptr_t + copysign_contig_dispatch_table[td_ns::num_types][td_ns::num_types]; +static int copysign_output_id_table[td_ns::num_types][td_ns::num_types]; + +static binary_strided_impl_fn_ptr_t + copysign_strided_dispatch_table[td_ns::num_types][td_ns::num_types]; + +void populate_copysign_dispatch_tables(void) +{ + using namespace td_ns; + namespace fn_ns = copysign_fn_ns; + + // which input types are supported, and what is the type of the result + using fn_ns::CopysignTypeMapFactory; + DispatchTableBuilder dtb1; + dtb1.populate_dispatch_table(copysign_output_id_table); + + // function pointers for operation on general strided arrays + using fn_ns::CopysignStridedFactory; + DispatchTableBuilder + dtb2; + dtb2.populate_dispatch_table(copysign_strided_dispatch_table); + + // function pointers for operation on contiguous inputs and output + using fn_ns::CopysignContigFactory; + DispatchTableBuilder + dtb3; + dtb3.populate_dispatch_table(copysign_contig_dispatch_table); +}; + +} // namespace impl + +void init_copysign(py::module_ m) +{ + using arrayT = dpctl::tensor::usm_ndarray; + using event_vecT = std::vector; + { + impl::populate_copysign_dispatch_tables(); + using impl::copysign_contig_dispatch_table; + using impl::copysign_output_id_table; + using impl::copysign_strided_dispatch_table; + + auto copysign_pyapi = [&](const arrayT &src1, const arrayT &src2, + const arrayT &dst, sycl::queue &exec_q, + const event_vecT &depends = {}) { + return py_binary_ufunc( + src1, src2, dst, exec_q, depends, copysign_output_id_table, + // function pointers to handle operation on contiguous arrays + // (pointers may be nullptr) + copysign_contig_dispatch_table, + // function pointers to handle operation on strided arrays (most + // general case) + copysign_strided_dispatch_table, + // function pointers to handle operation of c-contig matrix and + // c-contig row with broadcasting (may be nullptr) + td_ns::NullPtrTable< + binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t>{}, + // function pointers to handle operation of c-contig matrix and + // c-contig row with broadcasting (may be nullptr) + td_ns::NullPtrTable< + binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t>{}); + }; + auto copysign_result_type_pyapi = [&](const py::dtype &dtype1, + const py::dtype &dtype2) { + return py_binary_ufunc_result_type(dtype1, dtype2, + copysign_output_id_table); + }; + m.def("_copysign", copysign_pyapi, "", py::arg("src1"), py::arg("src2"), + py::arg("dst"), py::arg("sycl_queue"), + py::arg("depends") = py::list()); + m.def("_copysign_result_type", copysign_result_type_pyapi, ""); + } +} + +} // namespace dpctl::tensor::py_internal diff --git a/dpctl_ext/tensor/libtensor/source/elementwise_functions/copysign.hpp b/dpctl_ext/tensor/libtensor/source/elementwise_functions/copysign.hpp new file mode 100644 index 00000000000..875443d792c --- /dev/null +++ b/dpctl_ext/tensor/libtensor/source/elementwise_functions/copysign.hpp @@ -0,0 +1,46 @@ +//***************************************************************************** +// Copyright (c) 2026, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// - Neither the name of the copyright holder nor the names of its contributors +// may be used to endorse or promote products derived from this software +// without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** +// +//===---------------------------------------------------------------------===// +/// +/// \file +/// This file defines functions of dpctl.tensor._tensor_elementwise_impl +/// extension, specifically functions for elementwise operations. +//===---------------------------------------------------------------------===// + +#pragma once +#include + +namespace py = pybind11; + +namespace dpctl::tensor::py_internal +{ + +extern void init_copysign(py::module_ m); + +} // namespace dpctl::tensor::py_internal diff --git a/dpctl_ext/tensor/libtensor/source/elementwise_functions/elementwise_common.cpp b/dpctl_ext/tensor/libtensor/source/elementwise_functions/elementwise_common.cpp index efee3ef529f..dc09318d66a 100644 --- a/dpctl_ext/tensor/libtensor/source/elementwise_functions/elementwise_common.cpp +++ b/dpctl_ext/tensor/libtensor/source/elementwise_functions/elementwise_common.cpp @@ -54,7 +54,7 @@ #include "cbrt.hpp" #include "ceil.hpp" #include "conj.hpp" -// #include "copysign.hpp" +#include "copysign.hpp" #include "cos.hpp" #include "cosh.hpp" #include "equal.hpp" @@ -77,22 +77,22 @@ #include "log1p.hpp" #include "log2.hpp" #include "logaddexp.hpp" -// #include "logical_and.hpp" +#include "logical_and.hpp" #include "logical_not.hpp" -// #include "logical_or.hpp" -// #include "logical_xor.hpp" -// #include "maximum.hpp" -// #include "minimum.hpp" -// #include "multiply.hpp" +#include "logical_or.hpp" +#include "logical_xor.hpp" +#include "maximum.hpp" +#include "minimum.hpp" +#include "multiply.hpp" #include "negative.hpp" -// #include "nextafter.hpp" -// #include "not_equal.hpp" +#include "nextafter.hpp" +#include "not_equal.hpp" #include "positive.hpp" -// #include "pow.hpp" +#include "pow.hpp" #include "proj.hpp" #include "real.hpp" #include "reciprocal.hpp" -// #include "remainder.hpp" +#include "remainder.hpp" #include "round.hpp" #include "rsqrt.hpp" #include "sign.hpp" @@ -101,7 +101,7 @@ #include "sinh.hpp" #include "sqrt.hpp" #include "square.hpp" -// #include "subtract.hpp" +#include "subtract.hpp" #include "tan.hpp" #include "tanh.hpp" #include "true_divide.hpp" @@ -134,7 +134,7 @@ void init_elementwise_functions(py::module_ m) init_cbrt(m); init_ceil(m); init_conj(m); - // init_copysign(m); + init_copysign(m); init_cos(m); init_cosh(m); init_divide(m); @@ -158,22 +158,22 @@ void init_elementwise_functions(py::module_ m) init_log1p(m); init_log2(m); init_logaddexp(m); - // init_logical_and(m); + init_logical_and(m); init_logical_not(m); - // init_logical_or(m); - // init_logical_xor(m); - // init_maximum(m); - // init_minimum(m); - // init_multiply(m); - // init_nextafter(m); + init_logical_or(m); + init_logical_xor(m); + init_maximum(m); + init_minimum(m); + init_multiply(m); + init_nextafter(m); init_negative(m); - // init_not_equal(m); + init_not_equal(m); init_positive(m); - // init_pow(m); + init_pow(m); init_proj(m); init_real(m); init_reciprocal(m); - // init_remainder(m); + init_remainder(m); init_round(m); init_rsqrt(m); init_sign(m); @@ -182,7 +182,7 @@ void init_elementwise_functions(py::module_ m) init_sinh(m); init_sqrt(m); init_square(m); - // init_subtract(m); + init_subtract(m); init_tan(m); init_tanh(m); init_trunc(m); diff --git a/dpctl_ext/tensor/libtensor/source/elementwise_functions/logical_and.cpp b/dpctl_ext/tensor/libtensor/source/elementwise_functions/logical_and.cpp new file mode 100644 index 00000000000..7b7d21301e4 --- /dev/null +++ b/dpctl_ext/tensor/libtensor/source/elementwise_functions/logical_and.cpp @@ -0,0 +1,145 @@ +//***************************************************************************** +// Copyright (c) 2026, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// - Neither the name of the copyright holder nor the names of its contributors +// may be used to endorse or promote products derived from this software +// without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** +// +//===---------------------------------------------------------------------===// +/// +/// \file +/// This file defines functions of dpctl.tensor._tensor_elementwise_impl +/// extension, specifically functions for elementwise operations. +//===---------------------------------------------------------------------===// + +#include + +#include + +#include "dpnp4pybind11.hpp" +#include +#include +#include + +#include "elementwise_functions.hpp" +#include "logical_and.hpp" +#include "utils/type_dispatch.hpp" + +#include "kernels/elementwise_functions/common.hpp" +#include "kernels/elementwise_functions/logical_and.hpp" + +namespace dpctl::tensor::py_internal +{ + +namespace py = pybind11; +namespace td_ns = dpctl::tensor::type_dispatch; + +namespace ew_cmn_ns = dpctl::tensor::kernels::elementwise_common; +using ew_cmn_ns::binary_contig_impl_fn_ptr_t; +using ew_cmn_ns::binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t; +using ew_cmn_ns::binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t; +using ew_cmn_ns::binary_strided_impl_fn_ptr_t; + +// B16: ===== LOGICAL_AND (x1, x2) +namespace impl +{ +namespace logical_and_fn_ns = dpctl::tensor::kernels::logical_and; + +static binary_contig_impl_fn_ptr_t + logical_and_contig_dispatch_table[td_ns::num_types][td_ns::num_types]; +static int logical_and_output_id_table[td_ns::num_types][td_ns::num_types]; + +static binary_strided_impl_fn_ptr_t + logical_and_strided_dispatch_table[td_ns::num_types][td_ns::num_types]; + +void populate_logical_and_dispatch_tables(void) +{ + using namespace td_ns; + namespace fn_ns = logical_and_fn_ns; + + // which input types are supported, and what is the type of the result + using fn_ns::LogicalAndTypeMapFactory; + DispatchTableBuilder dtb1; + dtb1.populate_dispatch_table(logical_and_output_id_table); + + // function pointers for operation on general strided arrays + using fn_ns::LogicalAndStridedFactory; + DispatchTableBuilder + dtb2; + dtb2.populate_dispatch_table(logical_and_strided_dispatch_table); + + // function pointers for operation on contiguous inputs and output + using fn_ns::LogicalAndContigFactory; + DispatchTableBuilder + dtb3; + dtb3.populate_dispatch_table(logical_and_contig_dispatch_table); +}; + +} // namespace impl + +void init_logical_and(py::module_ m) +{ + using arrayT = dpctl::tensor::usm_ndarray; + using event_vecT = std::vector; + { + impl::populate_logical_and_dispatch_tables(); + using impl::logical_and_contig_dispatch_table; + using impl::logical_and_output_id_table; + using impl::logical_and_strided_dispatch_table; + + auto logical_and_pyapi = [&](const arrayT &src1, const arrayT &src2, + const arrayT &dst, sycl::queue &exec_q, + const event_vecT &depends = {}) { + return py_binary_ufunc( + src1, src2, dst, exec_q, depends, logical_and_output_id_table, + // function pointers to handle operation on contiguous arrays + // (pointers may be nullptr) + logical_and_contig_dispatch_table, + // function pointers to handle operation on strided arrays (most + // general case) + logical_and_strided_dispatch_table, + // function pointers to handle operation of c-contig matrix and + // c-contig row with broadcasting (may be nullptr) + td_ns::NullPtrTable< + binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t>{}, + // function pointers to handle operation of c-contig matrix and + // c-contig row with broadcasting (may be nullptr) + td_ns::NullPtrTable< + binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t>{}); + }; + auto logical_and_result_type_pyapi = [&](const py::dtype &dtype1, + const py::dtype &dtype2) { + return py_binary_ufunc_result_type(dtype1, dtype2, + logical_and_output_id_table); + }; + m.def("_logical_and", logical_and_pyapi, "", py::arg("src1"), + py::arg("src2"), py::arg("dst"), py::arg("sycl_queue"), + py::arg("depends") = py::list()); + m.def("_logical_and_result_type", logical_and_result_type_pyapi, ""); + } +} + +} // namespace dpctl::tensor::py_internal diff --git a/dpctl_ext/tensor/libtensor/source/elementwise_functions/logical_and.hpp b/dpctl_ext/tensor/libtensor/source/elementwise_functions/logical_and.hpp new file mode 100644 index 00000000000..c22a98f2414 --- /dev/null +++ b/dpctl_ext/tensor/libtensor/source/elementwise_functions/logical_and.hpp @@ -0,0 +1,46 @@ +//***************************************************************************** +// Copyright (c) 2026, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// - Neither the name of the copyright holder nor the names of its contributors +// may be used to endorse or promote products derived from this software +// without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** +// +//===---------------------------------------------------------------------===// +/// +/// \file +/// This file defines functions of dpctl.tensor._tensor_elementwise_impl +/// extension, specifically functions for elementwise operations. +//===---------------------------------------------------------------------===// + +#pragma once +#include + +namespace py = pybind11; + +namespace dpctl::tensor::py_internal +{ + +extern void init_logical_and(py::module_ m); + +} // namespace dpctl::tensor::py_internal diff --git a/dpctl_ext/tensor/libtensor/source/elementwise_functions/logical_or.cpp b/dpctl_ext/tensor/libtensor/source/elementwise_functions/logical_or.cpp new file mode 100644 index 00000000000..270b257a80e --- /dev/null +++ b/dpctl_ext/tensor/libtensor/source/elementwise_functions/logical_or.cpp @@ -0,0 +1,145 @@ +//***************************************************************************** +// Copyright (c) 2026, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// - Neither the name of the copyright holder nor the names of its contributors +// may be used to endorse or promote products derived from this software +// without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** +// +//===---------------------------------------------------------------------===// +/// +/// \file +/// This file defines functions of dpctl.tensor._tensor_elementwise_impl +/// extension, specifically functions for elementwise operations. +//===---------------------------------------------------------------------===// + +#include + +#include + +#include "dpnp4pybind11.hpp" +#include +#include +#include + +#include "elementwise_functions.hpp" +#include "logical_or.hpp" +#include "utils/type_dispatch.hpp" + +#include "kernels/elementwise_functions/common.hpp" +#include "kernels/elementwise_functions/logical_or.hpp" + +namespace dpctl::tensor::py_internal +{ + +namespace py = pybind11; +namespace td_ns = dpctl::tensor::type_dispatch; + +namespace ew_cmn_ns = dpctl::tensor::kernels::elementwise_common; +using ew_cmn_ns::binary_contig_impl_fn_ptr_t; +using ew_cmn_ns::binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t; +using ew_cmn_ns::binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t; +using ew_cmn_ns::binary_strided_impl_fn_ptr_t; + +// B17: ===== LOGICAL_OR (x1, x2) +namespace impl +{ +namespace logical_or_fn_ns = dpctl::tensor::kernels::logical_or; + +static binary_contig_impl_fn_ptr_t + logical_or_contig_dispatch_table[td_ns::num_types][td_ns::num_types]; +static int logical_or_output_id_table[td_ns::num_types][td_ns::num_types]; + +static binary_strided_impl_fn_ptr_t + logical_or_strided_dispatch_table[td_ns::num_types][td_ns::num_types]; + +void populate_logical_or_dispatch_tables(void) +{ + using namespace td_ns; + namespace fn_ns = logical_or_fn_ns; + + // which input types are supported, and what is the type of the result + using fn_ns::LogicalOrTypeMapFactory; + DispatchTableBuilder dtb1; + dtb1.populate_dispatch_table(logical_or_output_id_table); + + // function pointers for operation on general strided arrays + using fn_ns::LogicalOrStridedFactory; + DispatchTableBuilder + dtb2; + dtb2.populate_dispatch_table(logical_or_strided_dispatch_table); + + // function pointers for operation on contiguous inputs and output + using fn_ns::LogicalOrContigFactory; + DispatchTableBuilder + dtb3; + dtb3.populate_dispatch_table(logical_or_contig_dispatch_table); +}; + +} // namespace impl + +void init_logical_or(py::module_ m) +{ + using arrayT = dpctl::tensor::usm_ndarray; + using event_vecT = std::vector; + { + impl::populate_logical_or_dispatch_tables(); + using impl::logical_or_contig_dispatch_table; + using impl::logical_or_output_id_table; + using impl::logical_or_strided_dispatch_table; + + auto logical_or_pyapi = [&](const arrayT &src1, const arrayT &src2, + const arrayT &dst, sycl::queue &exec_q, + const event_vecT &depends = {}) { + return py_binary_ufunc( + src1, src2, dst, exec_q, depends, logical_or_output_id_table, + // function pointers to handle operation on contiguous arrays + // (pointers may be nullptr) + logical_or_contig_dispatch_table, + // function pointers to handle operation on strided arrays (most + // general case) + logical_or_strided_dispatch_table, + // function pointers to handle operation of c-contig matrix and + // c-contig row with broadcasting (may be nullptr) + td_ns::NullPtrTable< + binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t>{}, + // function pointers to handle operation of c-contig matrix and + // c-contig row with broadcasting (may be nullptr) + td_ns::NullPtrTable< + binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t>{}); + }; + auto logical_or_result_type_pyapi = [&](const py::dtype &dtype1, + const py::dtype &dtype2) { + return py_binary_ufunc_result_type(dtype1, dtype2, + logical_or_output_id_table); + }; + m.def("_logical_or", logical_or_pyapi, "", py::arg("src1"), + py::arg("src2"), py::arg("dst"), py::arg("sycl_queue"), + py::arg("depends") = py::list()); + m.def("_logical_or_result_type", logical_or_result_type_pyapi, ""); + } +} + +} // namespace dpctl::tensor::py_internal diff --git a/dpctl_ext/tensor/libtensor/source/elementwise_functions/logical_or.hpp b/dpctl_ext/tensor/libtensor/source/elementwise_functions/logical_or.hpp new file mode 100644 index 00000000000..11e83fe8ced --- /dev/null +++ b/dpctl_ext/tensor/libtensor/source/elementwise_functions/logical_or.hpp @@ -0,0 +1,46 @@ +//***************************************************************************** +// Copyright (c) 2026, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// - Neither the name of the copyright holder nor the names of its contributors +// may be used to endorse or promote products derived from this software +// without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** +// +//===---------------------------------------------------------------------===// +/// +/// \file +/// This file defines functions of dpctl.tensor._tensor_elementwise_impl +/// extension, specifically functions for elementwise operations. +//===---------------------------------------------------------------------===// + +#pragma once +#include + +namespace py = pybind11; + +namespace dpctl::tensor::py_internal +{ + +extern void init_logical_or(py::module_ m); + +} // namespace dpctl::tensor::py_internal diff --git a/dpctl_ext/tensor/libtensor/source/elementwise_functions/logical_xor.cpp b/dpctl_ext/tensor/libtensor/source/elementwise_functions/logical_xor.cpp new file mode 100644 index 00000000000..d49b52cfc42 --- /dev/null +++ b/dpctl_ext/tensor/libtensor/source/elementwise_functions/logical_xor.cpp @@ -0,0 +1,145 @@ +//***************************************************************************** +// Copyright (c) 2026, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// - Neither the name of the copyright holder nor the names of its contributors +// may be used to endorse or promote products derived from this software +// without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** +// +//===---------------------------------------------------------------------===// +/// +/// \file +/// This file defines functions of dpctl.tensor._tensor_elementwise_impl +/// extension, specifically functions for elementwise operations. +//===---------------------------------------------------------------------===// + +#include + +#include + +#include "dpnp4pybind11.hpp" +#include +#include +#include + +#include "elementwise_functions.hpp" +#include "logical_xor.hpp" +#include "utils/type_dispatch.hpp" + +#include "kernels/elementwise_functions/common.hpp" +#include "kernels/elementwise_functions/logical_xor.hpp" + +namespace dpctl::tensor::py_internal +{ + +namespace py = pybind11; +namespace td_ns = dpctl::tensor::type_dispatch; + +namespace ew_cmn_ns = dpctl::tensor::kernels::elementwise_common; +using ew_cmn_ns::binary_contig_impl_fn_ptr_t; +using ew_cmn_ns::binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t; +using ew_cmn_ns::binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t; +using ew_cmn_ns::binary_strided_impl_fn_ptr_t; + +// B18: ===== LOGICAL_XOR (x1, x2) +namespace impl +{ +namespace logical_xor_fn_ns = dpctl::tensor::kernels::logical_xor; + +static binary_contig_impl_fn_ptr_t + logical_xor_contig_dispatch_table[td_ns::num_types][td_ns::num_types]; +static int logical_xor_output_id_table[td_ns::num_types][td_ns::num_types]; + +static binary_strided_impl_fn_ptr_t + logical_xor_strided_dispatch_table[td_ns::num_types][td_ns::num_types]; + +void populate_logical_xor_dispatch_tables(void) +{ + using namespace td_ns; + namespace fn_ns = logical_xor_fn_ns; + + // which input types are supported, and what is the type of the result + using fn_ns::LogicalXorTypeMapFactory; + DispatchTableBuilder dtb1; + dtb1.populate_dispatch_table(logical_xor_output_id_table); + + // function pointers for operation on general strided arrays + using fn_ns::LogicalXorStridedFactory; + DispatchTableBuilder + dtb2; + dtb2.populate_dispatch_table(logical_xor_strided_dispatch_table); + + // function pointers for operation on contiguous inputs and output + using fn_ns::LogicalXorContigFactory; + DispatchTableBuilder + dtb3; + dtb3.populate_dispatch_table(logical_xor_contig_dispatch_table); +}; + +} // namespace impl + +void init_logical_xor(py::module_ m) +{ + using arrayT = dpctl::tensor::usm_ndarray; + using event_vecT = std::vector; + { + impl::populate_logical_xor_dispatch_tables(); + using impl::logical_xor_contig_dispatch_table; + using impl::logical_xor_output_id_table; + using impl::logical_xor_strided_dispatch_table; + + auto logical_xor_pyapi = [&](const arrayT &src1, const arrayT &src2, + const arrayT &dst, sycl::queue &exec_q, + const event_vecT &depends = {}) { + return py_binary_ufunc( + src1, src2, dst, exec_q, depends, logical_xor_output_id_table, + // function pointers to handle operation on contiguous arrays + // (pointers may be nullptr) + logical_xor_contig_dispatch_table, + // function pointers to handle operation on strided arrays (most + // general case) + logical_xor_strided_dispatch_table, + // function pointers to handle operation of c-contig matrix and + // c-contig row with broadcasting (may be nullptr) + td_ns::NullPtrTable< + binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t>{}, + // function pointers to handle operation of c-contig matrix and + // c-contig row with broadcasting (may be nullptr) + td_ns::NullPtrTable< + binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t>{}); + }; + auto logical_xor_result_type_pyapi = [&](const py::dtype &dtype1, + const py::dtype &dtype2) { + return py_binary_ufunc_result_type(dtype1, dtype2, + logical_xor_output_id_table); + }; + m.def("_logical_xor", logical_xor_pyapi, "", py::arg("src1"), + py::arg("src2"), py::arg("dst"), py::arg("sycl_queue"), + py::arg("depends") = py::list()); + m.def("_logical_xor_result_type", logical_xor_result_type_pyapi, ""); + } +} + +} // namespace dpctl::tensor::py_internal diff --git a/dpctl_ext/tensor/libtensor/source/elementwise_functions/logical_xor.hpp b/dpctl_ext/tensor/libtensor/source/elementwise_functions/logical_xor.hpp new file mode 100644 index 00000000000..24c16324912 --- /dev/null +++ b/dpctl_ext/tensor/libtensor/source/elementwise_functions/logical_xor.hpp @@ -0,0 +1,46 @@ +//***************************************************************************** +// Copyright (c) 2026, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// - Neither the name of the copyright holder nor the names of its contributors +// may be used to endorse or promote products derived from this software +// without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** +// +//===---------------------------------------------------------------------===// +/// +/// \file +/// This file defines functions of dpctl.tensor._tensor_elementwise_impl +/// extension, specifically functions for elementwise operations. +//===---------------------------------------------------------------------===// + +#pragma once +#include + +namespace py = pybind11; + +namespace dpctl::tensor::py_internal +{ + +extern void init_logical_xor(py::module_ m); + +} // namespace dpctl::tensor::py_internal diff --git a/dpctl_ext/tensor/libtensor/source/elementwise_functions/maximum.cpp b/dpctl_ext/tensor/libtensor/source/elementwise_functions/maximum.cpp new file mode 100644 index 00000000000..dc88f2b9ad8 --- /dev/null +++ b/dpctl_ext/tensor/libtensor/source/elementwise_functions/maximum.cpp @@ -0,0 +1,145 @@ +//***************************************************************************** +// Copyright (c) 2026, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// - Neither the name of the copyright holder nor the names of its contributors +// may be used to endorse or promote products derived from this software +// without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** +// +//===---------------------------------------------------------------------===// +/// +/// \file +/// This file defines functions of dpctl.tensor._tensor_elementwise_impl +/// extension, specifically functions for elementwise operations. +//===---------------------------------------------------------------------===// + +#include + +#include + +#include "dpnp4pybind11.hpp" +#include +#include +#include + +#include "elementwise_functions.hpp" +#include "maximum.hpp" +#include "utils/type_dispatch.hpp" + +#include "kernels/elementwise_functions/common.hpp" +#include "kernels/elementwise_functions/maximum.hpp" + +namespace dpctl::tensor::py_internal +{ + +namespace py = pybind11; +namespace td_ns = dpctl::tensor::type_dispatch; + +namespace ew_cmn_ns = dpctl::tensor::kernels::elementwise_common; +using ew_cmn_ns::binary_contig_impl_fn_ptr_t; +using ew_cmn_ns::binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t; +using ew_cmn_ns::binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t; +using ew_cmn_ns::binary_strided_impl_fn_ptr_t; + +// B26: ===== MAXIMUM (x1, x2) +namespace impl +{ +namespace maximum_fn_ns = dpctl::tensor::kernels::maximum; + +static binary_contig_impl_fn_ptr_t + maximum_contig_dispatch_table[td_ns::num_types][td_ns::num_types]; +static int maximum_output_id_table[td_ns::num_types][td_ns::num_types]; + +static binary_strided_impl_fn_ptr_t + maximum_strided_dispatch_table[td_ns::num_types][td_ns::num_types]; + +void populate_maximum_dispatch_tables(void) +{ + using namespace td_ns; + namespace fn_ns = maximum_fn_ns; + + // which input types are supported, and what is the type of the result + using fn_ns::MaximumTypeMapFactory; + DispatchTableBuilder dtb1; + dtb1.populate_dispatch_table(maximum_output_id_table); + + // function pointers for operation on general strided arrays + using fn_ns::MaximumStridedFactory; + DispatchTableBuilder + dtb2; + dtb2.populate_dispatch_table(maximum_strided_dispatch_table); + + // function pointers for operation on contiguous inputs and output + using fn_ns::MaximumContigFactory; + DispatchTableBuilder + dtb3; + dtb3.populate_dispatch_table(maximum_contig_dispatch_table); +}; + +} // namespace impl + +void init_maximum(py::module_ m) +{ + using arrayT = dpctl::tensor::usm_ndarray; + using event_vecT = std::vector; + { + impl::populate_maximum_dispatch_tables(); + using impl::maximum_contig_dispatch_table; + using impl::maximum_output_id_table; + using impl::maximum_strided_dispatch_table; + + auto maximum_pyapi = [&](const arrayT &src1, const arrayT &src2, + const arrayT &dst, sycl::queue &exec_q, + const event_vecT &depends = {}) { + return py_binary_ufunc( + src1, src2, dst, exec_q, depends, maximum_output_id_table, + // function pointers to handle operation on contiguous arrays + // (pointers may be nullptr) + maximum_contig_dispatch_table, + // function pointers to handle operation on strided arrays (most + // general case) + maximum_strided_dispatch_table, + // function pointers to handle operation of c-contig matrix and + // c-contig row with broadcasting (may be nullptr) + td_ns::NullPtrTable< + binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t>{}, + // function pointers to handle operation of c-contig matrix and + // c-contig row with broadcasting (may be nullptr) + td_ns::NullPtrTable< + binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t>{}); + }; + auto maximum_result_type_pyapi = [&](const py::dtype &dtype1, + const py::dtype &dtype2) { + return py_binary_ufunc_result_type(dtype1, dtype2, + maximum_output_id_table); + }; + m.def("_maximum", maximum_pyapi, "", py::arg("src1"), py::arg("src2"), + py::arg("dst"), py::arg("sycl_queue"), + py::arg("depends") = py::list()); + m.def("_maximum_result_type", maximum_result_type_pyapi, ""); + } +} + +} // namespace dpctl::tensor::py_internal diff --git a/dpctl_ext/tensor/libtensor/source/elementwise_functions/maximum.hpp b/dpctl_ext/tensor/libtensor/source/elementwise_functions/maximum.hpp new file mode 100644 index 00000000000..1f8fc027ac1 --- /dev/null +++ b/dpctl_ext/tensor/libtensor/source/elementwise_functions/maximum.hpp @@ -0,0 +1,46 @@ +//***************************************************************************** +// Copyright (c) 2026, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// - Neither the name of the copyright holder nor the names of its contributors +// may be used to endorse or promote products derived from this software +// without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** +// +//===---------------------------------------------------------------------===// +/// +/// \file +/// This file defines functions of dpctl.tensor._tensor_elementwise_impl +/// extension, specifically functions for elementwise operations. +//===---------------------------------------------------------------------===// + +#pragma once +#include + +namespace py = pybind11; + +namespace dpctl::tensor::py_internal +{ + +extern void init_maximum(py::module_ m); + +} // namespace dpctl::tensor::py_internal diff --git a/dpctl_ext/tensor/libtensor/source/elementwise_functions/minimum.cpp b/dpctl_ext/tensor/libtensor/source/elementwise_functions/minimum.cpp new file mode 100644 index 00000000000..ef48b765860 --- /dev/null +++ b/dpctl_ext/tensor/libtensor/source/elementwise_functions/minimum.cpp @@ -0,0 +1,145 @@ +//***************************************************************************** +// Copyright (c) 2026, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// - Neither the name of the copyright holder nor the names of its contributors +// may be used to endorse or promote products derived from this software +// without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** +// +//===---------------------------------------------------------------------===// +/// +/// \file +/// This file defines functions of dpctl.tensor._tensor_elementwise_impl +/// extension, specifically functions for elementwise operations. +//===---------------------------------------------------------------------===// + +#include + +#include + +#include "dpnp4pybind11.hpp" +#include +#include +#include + +#include "elementwise_functions.hpp" +#include "minimum.hpp" +#include "utils/type_dispatch.hpp" + +#include "kernels/elementwise_functions/common.hpp" +#include "kernels/elementwise_functions/minimum.hpp" + +namespace dpctl::tensor::py_internal +{ + +namespace py = pybind11; +namespace td_ns = dpctl::tensor::type_dispatch; + +namespace ew_cmn_ns = dpctl::tensor::kernels::elementwise_common; +using ew_cmn_ns::binary_contig_impl_fn_ptr_t; +using ew_cmn_ns::binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t; +using ew_cmn_ns::binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t; +using ew_cmn_ns::binary_strided_impl_fn_ptr_t; + +// B27: ===== MINIMUM (x1, x2) +namespace impl +{ +namespace minimum_fn_ns = dpctl::tensor::kernels::minimum; + +static binary_contig_impl_fn_ptr_t + minimum_contig_dispatch_table[td_ns::num_types][td_ns::num_types]; +static int minimum_output_id_table[td_ns::num_types][td_ns::num_types]; + +static binary_strided_impl_fn_ptr_t + minimum_strided_dispatch_table[td_ns::num_types][td_ns::num_types]; + +void populate_minimum_dispatch_tables(void) +{ + using namespace td_ns; + namespace fn_ns = minimum_fn_ns; + + // which input types are supported, and what is the type of the result + using fn_ns::MinimumTypeMapFactory; + DispatchTableBuilder dtb1; + dtb1.populate_dispatch_table(minimum_output_id_table); + + // function pointers for operation on general strided arrays + using fn_ns::MinimumStridedFactory; + DispatchTableBuilder + dtb2; + dtb2.populate_dispatch_table(minimum_strided_dispatch_table); + + // function pointers for operation on contiguous inputs and output + using fn_ns::MinimumContigFactory; + DispatchTableBuilder + dtb3; + dtb3.populate_dispatch_table(minimum_contig_dispatch_table); +}; + +} // namespace impl + +void init_minimum(py::module_ m) +{ + using arrayT = dpctl::tensor::usm_ndarray; + using event_vecT = std::vector; + { + impl::populate_minimum_dispatch_tables(); + using impl::minimum_contig_dispatch_table; + using impl::minimum_output_id_table; + using impl::minimum_strided_dispatch_table; + + auto minimum_pyapi = [&](const arrayT &src1, const arrayT &src2, + const arrayT &dst, sycl::queue &exec_q, + const event_vecT &depends = {}) { + return py_binary_ufunc( + src1, src2, dst, exec_q, depends, minimum_output_id_table, + // function pointers to handle operation on contiguous arrays + // (pointers may be nullptr) + minimum_contig_dispatch_table, + // function pointers to handle operation on strided arrays (most + // general case) + minimum_strided_dispatch_table, + // function pointers to handle operation of c-contig matrix and + // c-contig row with broadcasting (may be nullptr) + td_ns::NullPtrTable< + binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t>{}, + // function pointers to handle operation of c-contig matrix and + // c-contig row with broadcasting (may be nullptr) + td_ns::NullPtrTable< + binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t>{}); + }; + auto minimum_result_type_pyapi = [&](const py::dtype &dtype1, + const py::dtype &dtype2) { + return py_binary_ufunc_result_type(dtype1, dtype2, + minimum_output_id_table); + }; + m.def("_minimum", minimum_pyapi, "", py::arg("src1"), py::arg("src2"), + py::arg("dst"), py::arg("sycl_queue"), + py::arg("depends") = py::list()); + m.def("_minimum_result_type", minimum_result_type_pyapi, ""); + } +} + +} // namespace dpctl::tensor::py_internal diff --git a/dpctl_ext/tensor/libtensor/source/elementwise_functions/minimum.hpp b/dpctl_ext/tensor/libtensor/source/elementwise_functions/minimum.hpp new file mode 100644 index 00000000000..be2e18a9b37 --- /dev/null +++ b/dpctl_ext/tensor/libtensor/source/elementwise_functions/minimum.hpp @@ -0,0 +1,46 @@ +//***************************************************************************** +// Copyright (c) 2026, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// - Neither the name of the copyright holder nor the names of its contributors +// may be used to endorse or promote products derived from this software +// without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** +// +//===---------------------------------------------------------------------===// +/// +/// \file +/// This file defines functions of dpctl.tensor._tensor_elementwise_impl +/// extension, specifically functions for elementwise operations. +//===---------------------------------------------------------------------===// + +#pragma once +#include + +namespace py = pybind11; + +namespace dpctl::tensor::py_internal +{ + +extern void init_minimum(py::module_ m); + +} // namespace dpctl::tensor::py_internal diff --git a/dpctl_ext/tensor/libtensor/source/elementwise_functions/multiply.cpp b/dpctl_ext/tensor/libtensor/source/elementwise_functions/multiply.cpp new file mode 100644 index 00000000000..49611aaaf38 --- /dev/null +++ b/dpctl_ext/tensor/libtensor/source/elementwise_functions/multiply.cpp @@ -0,0 +1,243 @@ +//***************************************************************************** +// Copyright (c) 2026, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// - Neither the name of the copyright holder nor the names of its contributors +// may be used to endorse or promote products derived from this software +// without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** +// +//===---------------------------------------------------------------------===// +/// +/// \file +/// This file defines functions of dpctl.tensor._tensor_elementwise_impl +/// extension, specifically functions for elementwise operations. +//===---------------------------------------------------------------------===// + +#include + +#include + +#include "dpnp4pybind11.hpp" +#include +#include +#include + +#include "elementwise_functions.hpp" +#include "multiply.hpp" +#include "utils/type_dispatch.hpp" + +#include "kernels/elementwise_functions/common.hpp" +#include "kernels/elementwise_functions/common_inplace.hpp" +#include "kernels/elementwise_functions/multiply.hpp" + +namespace dpctl::tensor::py_internal +{ + +namespace py = pybind11; +namespace td_ns = dpctl::tensor::type_dispatch; + +namespace ew_cmn_ns = dpctl::tensor::kernels::elementwise_common; +using ew_cmn_ns::binary_contig_impl_fn_ptr_t; +using ew_cmn_ns::binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t; +using ew_cmn_ns::binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t; +using ew_cmn_ns::binary_strided_impl_fn_ptr_t; + +using ew_cmn_ns::binary_inplace_contig_impl_fn_ptr_t; +using ew_cmn_ns::binary_inplace_row_matrix_broadcast_impl_fn_ptr_t; +using ew_cmn_ns::binary_inplace_strided_impl_fn_ptr_t; + +// B19: ===== MULTIPLY (x1, x2) +namespace impl +{ + +namespace multiply_fn_ns = dpctl::tensor::kernels::multiply; + +static binary_contig_impl_fn_ptr_t + multiply_contig_dispatch_table[td_ns::num_types][td_ns::num_types]; + +static int multiply_output_id_table[td_ns::num_types][td_ns::num_types]; +static int multiply_inplace_output_id_table[td_ns::num_types][td_ns::num_types]; + +static binary_strided_impl_fn_ptr_t + multiply_strided_dispatch_table[td_ns::num_types][td_ns::num_types]; + +// mul(matrix, row) +static binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t + multiply_contig_matrix_contig_row_broadcast_dispatch_table + [td_ns::num_types][td_ns::num_types]; + +// mul(row, matrix) +static binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t + multiply_contig_row_contig_matrix_broadcast_dispatch_table + [td_ns::num_types][td_ns::num_types]; + +static binary_inplace_contig_impl_fn_ptr_t + multiply_inplace_contig_dispatch_table[td_ns::num_types][td_ns::num_types]; +static binary_inplace_strided_impl_fn_ptr_t + multiply_inplace_strided_dispatch_table[td_ns::num_types][td_ns::num_types]; +static binary_inplace_row_matrix_broadcast_impl_fn_ptr_t + multiply_inplace_row_matrix_dispatch_table[td_ns::num_types] + [td_ns::num_types]; + +void populate_multiply_dispatch_tables(void) +{ + using namespace td_ns; + namespace fn_ns = multiply_fn_ns; + + // which input types are supported, and what is the type of the result + using fn_ns::MultiplyTypeMapFactory; + DispatchTableBuilder dtb1; + dtb1.populate_dispatch_table(multiply_output_id_table); + + // function pointers for operation on general strided arrays + using fn_ns::MultiplyStridedFactory; + DispatchTableBuilder + dtb2; + dtb2.populate_dispatch_table(multiply_strided_dispatch_table); + + // function pointers for operation on contiguous inputs and output + using fn_ns::MultiplyContigFactory; + DispatchTableBuilder + dtb3; + dtb3.populate_dispatch_table(multiply_contig_dispatch_table); + + // function pointers for operation on contiguous matrix, contiguous row + // with contiguous matrix output + using fn_ns::MultiplyContigMatrixContigRowBroadcastFactory; + DispatchTableBuilder< + binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t, + MultiplyContigMatrixContigRowBroadcastFactory, num_types> + dtb4; + dtb4.populate_dispatch_table( + multiply_contig_matrix_contig_row_broadcast_dispatch_table); + + // function pointers for operation on contiguous row, contiguous matrix + // with contiguous matrix output + using fn_ns::MultiplyContigRowContigMatrixBroadcastFactory; + DispatchTableBuilder< + binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t, + MultiplyContigRowContigMatrixBroadcastFactory, num_types> + dtb5; + dtb5.populate_dispatch_table( + multiply_contig_row_contig_matrix_broadcast_dispatch_table); + + // function pointers for inplace operation on general strided arrays + using fn_ns::MultiplyInplaceStridedFactory; + DispatchTableBuilder + dtb6; + dtb6.populate_dispatch_table(multiply_inplace_strided_dispatch_table); + + // function pointers for inplace operation on contiguous inputs and output + using fn_ns::MultiplyInplaceContigFactory; + DispatchTableBuilder + dtb7; + dtb7.populate_dispatch_table(multiply_inplace_contig_dispatch_table); + + // function pointers for inplace operation on contiguous matrix + // and contiguous row + using fn_ns::MultiplyInplaceRowMatrixBroadcastFactory; + DispatchTableBuilder + dtb8; + dtb8.populate_dispatch_table(multiply_inplace_row_matrix_dispatch_table); + + // which types are supported by the in-place kernels + using fn_ns::MultiplyInplaceTypeMapFactory; + DispatchTableBuilder dtb9; + dtb9.populate_dispatch_table(multiply_inplace_output_id_table); +}; + +} // namespace impl + +void init_multiply(py::module_ m) +{ + using arrayT = dpctl::tensor::usm_ndarray; + using event_vecT = std::vector; + { + impl::populate_multiply_dispatch_tables(); + using impl::multiply_contig_dispatch_table; + using impl::multiply_contig_matrix_contig_row_broadcast_dispatch_table; + using impl::multiply_contig_row_contig_matrix_broadcast_dispatch_table; + using impl::multiply_output_id_table; + using impl::multiply_strided_dispatch_table; + + auto multiply_pyapi = [&](const arrayT &src1, const arrayT &src2, + const arrayT &dst, sycl::queue &exec_q, + const event_vecT &depends = {}) { + return py_binary_ufunc( + src1, src2, dst, exec_q, depends, multiply_output_id_table, + // function pointers to handle operation on contiguous + // arrays (pointers may be nullptr) + multiply_contig_dispatch_table, + // function pointers to handle operation on strided arrays + // (most general case) + multiply_strided_dispatch_table, + // function pointers to handle operation of c-contig matrix + // and c-contig row with broadcasting (may be nullptr) + multiply_contig_matrix_contig_row_broadcast_dispatch_table, + // function pointers to handle operation of c-contig matrix + // and c-contig row with broadcasting (may be nullptr) + multiply_contig_row_contig_matrix_broadcast_dispatch_table); + }; + auto multiply_result_type_pyapi = [&](const py::dtype &dtype1, + const py::dtype &dtype2) { + return py_binary_ufunc_result_type(dtype1, dtype2, + multiply_output_id_table); + }; + m.def("_multiply", multiply_pyapi, "", py::arg("src1"), py::arg("src2"), + py::arg("dst"), py::arg("sycl_queue"), + py::arg("depends") = py::list()); + m.def("_multiply_result_type", multiply_result_type_pyapi, ""); + + using impl::multiply_inplace_contig_dispatch_table; + using impl::multiply_inplace_output_id_table; + using impl::multiply_inplace_row_matrix_dispatch_table; + using impl::multiply_inplace_strided_dispatch_table; + + auto multiply_inplace_pyapi = [&](const arrayT &src, const arrayT &dst, + sycl::queue &exec_q, + const event_vecT &depends = {}) { + return py_binary_inplace_ufunc( + src, dst, exec_q, depends, multiply_inplace_output_id_table, + // function pointers to handle inplace operation on + // contiguous arrays (pointers may be nullptr) + multiply_inplace_contig_dispatch_table, + // function pointers to handle inplace operation on strided + // arrays (most general case) + multiply_inplace_strided_dispatch_table, + // function pointers to handle inplace operation on + // c-contig matrix with c-contig row with broadcasting + // (may be nullptr) + multiply_inplace_row_matrix_dispatch_table); + }; + m.def("_multiply_inplace", multiply_inplace_pyapi, "", py::arg("lhs"), + py::arg("rhs"), py::arg("sycl_queue"), + py::arg("depends") = py::list()); + } +} + +} // namespace dpctl::tensor::py_internal diff --git a/dpctl_ext/tensor/libtensor/source/elementwise_functions/multiply.hpp b/dpctl_ext/tensor/libtensor/source/elementwise_functions/multiply.hpp new file mode 100644 index 00000000000..a4ed946a850 --- /dev/null +++ b/dpctl_ext/tensor/libtensor/source/elementwise_functions/multiply.hpp @@ -0,0 +1,46 @@ +//***************************************************************************** +// Copyright (c) 2026, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// - Neither the name of the copyright holder nor the names of its contributors +// may be used to endorse or promote products derived from this software +// without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** +// +//===---------------------------------------------------------------------===// +/// +/// \file +/// This file defines functions of dpctl.tensor._tensor_elementwise_impl +/// extension, specifically functions for elementwise operations. +//===---------------------------------------------------------------------===// + +#pragma once +#include + +namespace py = pybind11; + +namespace dpctl::tensor::py_internal +{ + +extern void init_multiply(py::module_ m); + +} // namespace dpctl::tensor::py_internal diff --git a/dpctl_ext/tensor/libtensor/source/elementwise_functions/nextafter.cpp b/dpctl_ext/tensor/libtensor/source/elementwise_functions/nextafter.cpp new file mode 100644 index 00000000000..2fd706f667e --- /dev/null +++ b/dpctl_ext/tensor/libtensor/source/elementwise_functions/nextafter.cpp @@ -0,0 +1,145 @@ +//***************************************************************************** +// Copyright (c) 2026, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// - Neither the name of the copyright holder nor the names of its contributors +// may be used to endorse or promote products derived from this software +// without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** +// +//===---------------------------------------------------------------------===// +/// +/// \file +/// This file defines functions of dpctl.tensor._tensor_elementwise_impl +/// extension, specifically functions for elementwise operations. +//===---------------------------------------------------------------------===// + +#include + +#include + +#include "dpnp4pybind11.hpp" +#include +#include +#include + +#include "elementwise_functions.hpp" +#include "nextafter.hpp" +#include "utils/type_dispatch.hpp" + +#include "kernels/elementwise_functions/common.hpp" +#include "kernels/elementwise_functions/nextafter.hpp" + +namespace dpctl::tensor::py_internal +{ + +namespace py = pybind11; +namespace td_ns = dpctl::tensor::type_dispatch; + +namespace ew_cmn_ns = dpctl::tensor::kernels::elementwise_common; +using ew_cmn_ns::binary_contig_impl_fn_ptr_t; +using ew_cmn_ns::binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t; +using ew_cmn_ns::binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t; +using ew_cmn_ns::binary_strided_impl_fn_ptr_t; + +// B28: ===== NEXTAFTER (x1, x2) +namespace impl +{ +namespace nextafter_fn_ns = dpctl::tensor::kernels::nextafter; + +static binary_contig_impl_fn_ptr_t + nextafter_contig_dispatch_table[td_ns::num_types][td_ns::num_types]; +static int nextafter_output_id_table[td_ns::num_types][td_ns::num_types]; + +static binary_strided_impl_fn_ptr_t + nextafter_strided_dispatch_table[td_ns::num_types][td_ns::num_types]; + +void populate_nextafter_dispatch_tables(void) +{ + using namespace td_ns; + namespace fn_ns = nextafter_fn_ns; + + // which input types are supported, and what is the type of the result + using fn_ns::NextafterTypeMapFactory; + DispatchTableBuilder dtb1; + dtb1.populate_dispatch_table(nextafter_output_id_table); + + // function pointers for operation on general strided arrays + using fn_ns::NextafterStridedFactory; + DispatchTableBuilder + dtb2; + dtb2.populate_dispatch_table(nextafter_strided_dispatch_table); + + // function pointers for operation on contiguous inputs and output + using fn_ns::NextafterContigFactory; + DispatchTableBuilder + dtb3; + dtb3.populate_dispatch_table(nextafter_contig_dispatch_table); +}; + +} // namespace impl + +void init_nextafter(py::module_ m) +{ + using arrayT = dpctl::tensor::usm_ndarray; + using event_vecT = std::vector; + { + impl::populate_nextafter_dispatch_tables(); + using impl::nextafter_contig_dispatch_table; + using impl::nextafter_output_id_table; + using impl::nextafter_strided_dispatch_table; + + auto nextafter_pyapi = [&](const arrayT &src1, const arrayT &src2, + const arrayT &dst, sycl::queue &exec_q, + const event_vecT &depends = {}) { + return py_binary_ufunc( + src1, src2, dst, exec_q, depends, nextafter_output_id_table, + // function pointers to handle operation on contiguous arrays + // (pointers may be nullptr) + nextafter_contig_dispatch_table, + // function pointers to handle operation on strided arrays (most + // general case) + nextafter_strided_dispatch_table, + // function pointers to handle operation of c-contig matrix and + // c-contig row with broadcasting (may be nullptr) + td_ns::NullPtrTable< + binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t>{}, + // function pointers to handle operation of c-contig matrix and + // c-contig row with broadcasting (may be nullptr) + td_ns::NullPtrTable< + binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t>{}); + }; + auto nextafter_result_type_pyapi = [&](const py::dtype &dtype1, + const py::dtype &dtype2) { + return py_binary_ufunc_result_type(dtype1, dtype2, + nextafter_output_id_table); + }; + m.def("_nextafter", nextafter_pyapi, "", py::arg("src1"), + py::arg("src2"), py::arg("dst"), py::arg("sycl_queue"), + py::arg("depends") = py::list()); + m.def("_nextafter_result_type", nextafter_result_type_pyapi, ""); + } +} + +} // namespace dpctl::tensor::py_internal diff --git a/dpctl_ext/tensor/libtensor/source/elementwise_functions/nextafter.hpp b/dpctl_ext/tensor/libtensor/source/elementwise_functions/nextafter.hpp new file mode 100644 index 00000000000..76ad701d401 --- /dev/null +++ b/dpctl_ext/tensor/libtensor/source/elementwise_functions/nextafter.hpp @@ -0,0 +1,46 @@ +//***************************************************************************** +// Copyright (c) 2026, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// - Neither the name of the copyright holder nor the names of its contributors +// may be used to endorse or promote products derived from this software +// without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** +// +//===---------------------------------------------------------------------===// +/// +/// \file +/// This file defines functions of dpctl.tensor._tensor_elementwise_impl +/// extension, specifically functions for elementwise operations. +//===---------------------------------------------------------------------===// + +#pragma once +#include + +namespace py = pybind11; + +namespace dpctl::tensor::py_internal +{ + +extern void init_nextafter(py::module_ m); + +} // namespace dpctl::tensor::py_internal diff --git a/dpctl_ext/tensor/libtensor/source/elementwise_functions/not_equal.cpp b/dpctl_ext/tensor/libtensor/source/elementwise_functions/not_equal.cpp new file mode 100644 index 00000000000..967bfda4e8d --- /dev/null +++ b/dpctl_ext/tensor/libtensor/source/elementwise_functions/not_equal.cpp @@ -0,0 +1,145 @@ +//***************************************************************************** +// Copyright (c) 2026, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// - Neither the name of the copyright holder nor the names of its contributors +// may be used to endorse or promote products derived from this software +// without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** +// +//===---------------------------------------------------------------------===// +/// +/// \file +/// This file defines functions of dpctl.tensor._tensor_elementwise_impl +/// extension, specifically functions for elementwise operations. +//===---------------------------------------------------------------------===// + +#include + +#include + +#include "dpnp4pybind11.hpp" +#include +#include +#include + +#include "elementwise_functions.hpp" +#include "not_equal.hpp" +#include "utils/type_dispatch.hpp" + +#include "kernels/elementwise_functions/common.hpp" +#include "kernels/elementwise_functions/not_equal.hpp" + +namespace dpctl::tensor::py_internal +{ + +namespace py = pybind11; +namespace td_ns = dpctl::tensor::type_dispatch; + +namespace ew_cmn_ns = dpctl::tensor::kernels::elementwise_common; +using ew_cmn_ns::binary_contig_impl_fn_ptr_t; +using ew_cmn_ns::binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t; +using ew_cmn_ns::binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t; +using ew_cmn_ns::binary_strided_impl_fn_ptr_t; + +// B20: ===== NOT_EQUAL (x1, x2) +namespace impl +{ +namespace not_equal_fn_ns = dpctl::tensor::kernels::not_equal; + +static binary_contig_impl_fn_ptr_t + not_equal_contig_dispatch_table[td_ns::num_types][td_ns::num_types]; +static int not_equal_output_id_table[td_ns::num_types][td_ns::num_types]; + +static binary_strided_impl_fn_ptr_t + not_equal_strided_dispatch_table[td_ns::num_types][td_ns::num_types]; + +void populate_not_equal_dispatch_tables(void) +{ + using namespace td_ns; + namespace fn_ns = not_equal_fn_ns; + + // which input types are supported, and what is the type of the result + using fn_ns::NotEqualTypeMapFactory; + DispatchTableBuilder dtb1; + dtb1.populate_dispatch_table(not_equal_output_id_table); + + // function pointers for operation on general strided arrays + using fn_ns::NotEqualStridedFactory; + DispatchTableBuilder + dtb2; + dtb2.populate_dispatch_table(not_equal_strided_dispatch_table); + + // function pointers for operation on contiguous inputs and output + using fn_ns::NotEqualContigFactory; + DispatchTableBuilder + dtb3; + dtb3.populate_dispatch_table(not_equal_contig_dispatch_table); +}; + +} // namespace impl + +void init_not_equal(py::module_ m) +{ + using arrayT = dpctl::tensor::usm_ndarray; + using event_vecT = std::vector; + { + impl::populate_not_equal_dispatch_tables(); + using impl::not_equal_contig_dispatch_table; + using impl::not_equal_output_id_table; + using impl::not_equal_strided_dispatch_table; + + auto not_equal_pyapi = [&](const arrayT &src1, const arrayT &src2, + const arrayT &dst, sycl::queue &exec_q, + const event_vecT &depends = {}) { + return py_binary_ufunc( + src1, src2, dst, exec_q, depends, not_equal_output_id_table, + // function pointers to handle operation on contiguous arrays + // (pointers may be nullptr) + not_equal_contig_dispatch_table, + // function pointers to handle operation on strided arrays (most + // general case) + not_equal_strided_dispatch_table, + // function pointers to handle operation of c-contig matrix and + // c-contig row with broadcasting (may be nullptr) + td_ns::NullPtrTable< + binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t>{}, + // function pointers to handle operation of c-contig matrix and + // c-contig row with broadcasting (may be nullptr) + td_ns::NullPtrTable< + binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t>{}); + }; + auto not_equal_result_type_pyapi = [&](const py::dtype &dtype1, + const py::dtype &dtype2) { + return py_binary_ufunc_result_type(dtype1, dtype2, + not_equal_output_id_table); + }; + m.def("_not_equal", not_equal_pyapi, "", py::arg("src1"), + py::arg("src2"), py::arg("dst"), py::arg("sycl_queue"), + py::arg("depends") = py::list()); + m.def("_not_equal_result_type", not_equal_result_type_pyapi, ""); + } +} + +} // namespace dpctl::tensor::py_internal diff --git a/dpctl_ext/tensor/libtensor/source/elementwise_functions/not_equal.hpp b/dpctl_ext/tensor/libtensor/source/elementwise_functions/not_equal.hpp new file mode 100644 index 00000000000..c6c99bb793b --- /dev/null +++ b/dpctl_ext/tensor/libtensor/source/elementwise_functions/not_equal.hpp @@ -0,0 +1,46 @@ +//***************************************************************************** +// Copyright (c) 2026, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// - Neither the name of the copyright holder nor the names of its contributors +// may be used to endorse or promote products derived from this software +// without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** +// +//===---------------------------------------------------------------------===// +/// +/// \file +/// This file defines functions of dpctl.tensor._tensor_elementwise_impl +/// extension, specifically functions for elementwise operations. +//===---------------------------------------------------------------------===// + +#pragma once +#include + +namespace py = pybind11; + +namespace dpctl::tensor::py_internal +{ + +extern void init_not_equal(py::module_ m); + +} // namespace dpctl::tensor::py_internal diff --git a/dpctl_ext/tensor/libtensor/source/elementwise_functions/pow.cpp b/dpctl_ext/tensor/libtensor/source/elementwise_functions/pow.cpp new file mode 100644 index 00000000000..cfb0649e317 --- /dev/null +++ b/dpctl_ext/tensor/libtensor/source/elementwise_functions/pow.cpp @@ -0,0 +1,202 @@ +//***************************************************************************** +// Copyright (c) 2026, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// - Neither the name of the copyright holder nor the names of its contributors +// may be used to endorse or promote products derived from this software +// without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** +// +//===---------------------------------------------------------------------===// +/// +/// \file +/// This file defines functions of dpctl.tensor._tensor_elementwise_impl +/// extension, specifically functions for elementwise operations. +//===---------------------------------------------------------------------===// + +#include + +#include + +#include "dpnp4pybind11.hpp" +#include +#include +#include + +#include "elementwise_functions.hpp" +#include "pow.hpp" +#include "utils/type_dispatch.hpp" + +#include "kernels/elementwise_functions/common.hpp" +#include "kernels/elementwise_functions/common_inplace.hpp" +#include "kernels/elementwise_functions/pow.hpp" + +namespace dpctl::tensor::py_internal +{ + +namespace py = pybind11; +namespace td_ns = dpctl::tensor::type_dispatch; + +namespace ew_cmn_ns = dpctl::tensor::kernels::elementwise_common; +using ew_cmn_ns::binary_contig_impl_fn_ptr_t; +using ew_cmn_ns::binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t; +using ew_cmn_ns::binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t; +using ew_cmn_ns::binary_strided_impl_fn_ptr_t; + +using ew_cmn_ns::binary_inplace_contig_impl_fn_ptr_t; +using ew_cmn_ns::binary_inplace_row_matrix_broadcast_impl_fn_ptr_t; +using ew_cmn_ns::binary_inplace_strided_impl_fn_ptr_t; + +// B21: ===== POW (x1, x2) +namespace impl +{ + +namespace pow_fn_ns = dpctl::tensor::kernels::pow; + +static binary_contig_impl_fn_ptr_t pow_contig_dispatch_table[td_ns::num_types] + [td_ns::num_types]; + +static int pow_output_id_table[td_ns::num_types][td_ns::num_types]; +static int pow_inplace_output_id_table[td_ns::num_types][td_ns::num_types]; + +static binary_strided_impl_fn_ptr_t + pow_strided_dispatch_table[td_ns::num_types][td_ns::num_types]; + +static binary_inplace_contig_impl_fn_ptr_t + pow_inplace_contig_dispatch_table[td_ns::num_types][td_ns::num_types]; +static binary_inplace_strided_impl_fn_ptr_t + pow_inplace_strided_dispatch_table[td_ns::num_types][td_ns::num_types]; + +void populate_pow_dispatch_tables(void) +{ + using namespace td_ns; + namespace fn_ns = pow_fn_ns; + + // which input types are supported, and what is the type of the result + using fn_ns::PowTypeMapFactory; + DispatchTableBuilder dtb1; + dtb1.populate_dispatch_table(pow_output_id_table); + + // function pointers for operation on general strided arrays + using fn_ns::PowStridedFactory; + DispatchTableBuilder + dtb2; + dtb2.populate_dispatch_table(pow_strided_dispatch_table); + + // function pointers for operation on contiguous inputs and output + using fn_ns::PowContigFactory; + DispatchTableBuilder + dtb3; + dtb3.populate_dispatch_table(pow_contig_dispatch_table); + + // function pointers for inplace operation on general strided arrays + using fn_ns::PowInplaceStridedFactory; + DispatchTableBuilder + dtb4; + dtb4.populate_dispatch_table(pow_inplace_strided_dispatch_table); + + // function pointers for inplace operation on contiguous inputs and output + using fn_ns::PowInplaceContigFactory; + DispatchTableBuilder + dtb5; + dtb5.populate_dispatch_table(pow_inplace_contig_dispatch_table); + + // which types are supported by the in-place kernels + using fn_ns::PowInplaceTypeMapFactory; + DispatchTableBuilder dtb6; + dtb6.populate_dispatch_table(pow_inplace_output_id_table); +}; + +} // namespace impl + +void init_pow(py::module_ m) +{ + using arrayT = dpctl::tensor::usm_ndarray; + using event_vecT = std::vector; + { + impl::populate_pow_dispatch_tables(); + using impl::pow_contig_dispatch_table; + using impl::pow_output_id_table; + using impl::pow_strided_dispatch_table; + + auto pow_pyapi = [&](const arrayT &src1, const arrayT &src2, + const arrayT &dst, sycl::queue &exec_q, + const event_vecT &depends = {}) { + return py_binary_ufunc( + src1, src2, dst, exec_q, depends, pow_output_id_table, + // function pointers to handle operation on contiguous arrays + // (pointers may be nullptr) + pow_contig_dispatch_table, + // function pointers to handle operation on strided arrays (most + // general case) + pow_strided_dispatch_table, + // function pointers to handle operation of c-contig matrix and + // c-contig row with broadcasting (may be nullptr) + td_ns::NullPtrTable< + binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t>{}, + // function pointers to handle operation of c-contig matrix and + // c-contig row with broadcasting (may be nullptr) + td_ns::NullPtrTable< + binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t>{}); + }; + auto pow_result_type_pyapi = [&](const py::dtype &dtype1, + const py::dtype &dtype2) { + return py_binary_ufunc_result_type(dtype1, dtype2, + pow_output_id_table); + }; + m.def("_pow", pow_pyapi, "", py::arg("src1"), py::arg("src2"), + py::arg("dst"), py::arg("sycl_queue"), + py::arg("depends") = py::list()); + m.def("_pow_result_type", pow_result_type_pyapi, ""); + + using impl::pow_inplace_contig_dispatch_table; + using impl::pow_inplace_output_id_table; + using impl::pow_inplace_strided_dispatch_table; + + auto pow_inplace_pyapi = [&](const arrayT &src, const arrayT &dst, + sycl::queue &exec_q, + const event_vecT &depends = {}) { + return py_binary_inplace_ufunc( + src, dst, exec_q, depends, pow_inplace_output_id_table, + // function pointers to handle inplace operation on + // contiguous arrays (pointers may be nullptr) + pow_inplace_contig_dispatch_table, + // function pointers to handle inplace operation on strided + // arrays (most general case) + pow_inplace_strided_dispatch_table, + // function pointers to handle inplace operation on + // c-contig matrix with c-contig row with broadcasting + // (may be nullptr) + td_ns::NullPtrTable< + binary_inplace_row_matrix_broadcast_impl_fn_ptr_t>{}); + }; + m.def("_pow_inplace", pow_inplace_pyapi, "", py::arg("lhs"), + py::arg("rhs"), py::arg("sycl_queue"), + py::arg("depends") = py::list()); + } +} + +} // namespace dpctl::tensor::py_internal diff --git a/dpctl_ext/tensor/libtensor/source/elementwise_functions/pow.hpp b/dpctl_ext/tensor/libtensor/source/elementwise_functions/pow.hpp new file mode 100644 index 00000000000..197a23b80d8 --- /dev/null +++ b/dpctl_ext/tensor/libtensor/source/elementwise_functions/pow.hpp @@ -0,0 +1,46 @@ +//***************************************************************************** +// Copyright (c) 2026, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// - Neither the name of the copyright holder nor the names of its contributors +// may be used to endorse or promote products derived from this software +// without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** +// +//===---------------------------------------------------------------------===// +/// +/// \file +/// This file defines functions of dpctl.tensor._tensor_elementwise_impl +/// extension, specifically functions for elementwise operations. +//===---------------------------------------------------------------------===// + +#pragma once +#include + +namespace py = pybind11; + +namespace dpctl::tensor::py_internal +{ + +extern void init_pow(py::module_ m); + +} // namespace dpctl::tensor::py_internal diff --git a/dpctl_ext/tensor/libtensor/source/elementwise_functions/remainder.cpp b/dpctl_ext/tensor/libtensor/source/elementwise_functions/remainder.cpp new file mode 100644 index 00000000000..a4a8c7fbd6e --- /dev/null +++ b/dpctl_ext/tensor/libtensor/source/elementwise_functions/remainder.cpp @@ -0,0 +1,204 @@ +//***************************************************************************** +// Copyright (c) 2026, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// - Neither the name of the copyright holder nor the names of its contributors +// may be used to endorse or promote products derived from this software +// without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** +// +//===---------------------------------------------------------------------===// +/// +/// \file +/// This file defines functions of dpctl.tensor._tensor_elementwise_impl +/// extension, specifically functions for elementwise operations. +//===---------------------------------------------------------------------===// + +#include + +#include + +#include "dpnp4pybind11.hpp" +#include +#include +#include + +#include "elementwise_functions.hpp" +#include "remainder.hpp" +#include "utils/type_dispatch.hpp" + +#include "kernels/elementwise_functions/common.hpp" +#include "kernels/elementwise_functions/common_inplace.hpp" +#include "kernels/elementwise_functions/remainder.hpp" + +namespace dpctl::tensor::py_internal +{ + +namespace py = pybind11; +namespace td_ns = dpctl::tensor::type_dispatch; + +namespace ew_cmn_ns = dpctl::tensor::kernels::elementwise_common; +using ew_cmn_ns::binary_contig_impl_fn_ptr_t; +using ew_cmn_ns::binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t; +using ew_cmn_ns::binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t; +using ew_cmn_ns::binary_strided_impl_fn_ptr_t; + +using ew_cmn_ns::binary_inplace_contig_impl_fn_ptr_t; +using ew_cmn_ns::binary_inplace_row_matrix_broadcast_impl_fn_ptr_t; +using ew_cmn_ns::binary_inplace_strided_impl_fn_ptr_t; + +// B22: ===== REMAINDER (x1, x2) +namespace impl +{ + +namespace remainder_fn_ns = dpctl::tensor::kernels::remainder; + +static binary_contig_impl_fn_ptr_t + remainder_contig_dispatch_table[td_ns::num_types][td_ns::num_types]; + +static int remainder_output_id_table[td_ns::num_types][td_ns::num_types]; +static int remainder_inplace_output_id_table[td_ns::num_types] + [td_ns::num_types]; + +static binary_strided_impl_fn_ptr_t + remainder_strided_dispatch_table[td_ns::num_types][td_ns::num_types]; + +static binary_inplace_contig_impl_fn_ptr_t + remainder_inplace_contig_dispatch_table[td_ns::num_types][td_ns::num_types]; +static binary_inplace_strided_impl_fn_ptr_t + remainder_inplace_strided_dispatch_table[td_ns::num_types] + [td_ns::num_types]; + +void populate_remainder_dispatch_tables(void) +{ + using namespace td_ns; + namespace fn_ns = remainder_fn_ns; + + // which input types are supported, and what is the type of the result + using fn_ns::RemainderTypeMapFactory; + DispatchTableBuilder dtb1; + dtb1.populate_dispatch_table(remainder_output_id_table); + + // function pointers for operation on general strided arrays + using fn_ns::RemainderStridedFactory; + DispatchTableBuilder + dtb2; + dtb2.populate_dispatch_table(remainder_strided_dispatch_table); + + // function pointers for operation on contiguous inputs and output + using fn_ns::RemainderContigFactory; + DispatchTableBuilder + dtb3; + dtb3.populate_dispatch_table(remainder_contig_dispatch_table); + + // function pointers for inplace operation on general strided arrays + using fn_ns::RemainderInplaceStridedFactory; + DispatchTableBuilder + dtb4; + dtb4.populate_dispatch_table(remainder_inplace_strided_dispatch_table); + + // function pointers for inplace operation on contiguous inputs and output + using fn_ns::RemainderInplaceContigFactory; + DispatchTableBuilder + dtb5; + dtb5.populate_dispatch_table(remainder_inplace_contig_dispatch_table); + + // which types are supported by the in-place kernels + using fn_ns::RemainderInplaceTypeMapFactory; + DispatchTableBuilder dtb6; + dtb6.populate_dispatch_table(remainder_inplace_output_id_table); +} + +} // namespace impl + +void init_remainder(py::module_ m) +{ + using arrayT = dpctl::tensor::usm_ndarray; + using event_vecT = std::vector; + { + impl::populate_remainder_dispatch_tables(); + using impl::remainder_contig_dispatch_table; + using impl::remainder_output_id_table; + using impl::remainder_strided_dispatch_table; + + auto remainder_pyapi = [&](const arrayT &src1, const arrayT &src2, + const arrayT &dst, sycl::queue &exec_q, + const event_vecT &depends = {}) { + return py_binary_ufunc( + src1, src2, dst, exec_q, depends, remainder_output_id_table, + // function pointers to handle operation on contiguous arrays + // (pointers may be nullptr) + remainder_contig_dispatch_table, + // function pointers to handle operation on strided arrays (most + // general case) + remainder_strided_dispatch_table, + // function pointers to handle operation of c-contig matrix and + // c-contig row with broadcasting (may be nullptr) + td_ns::NullPtrTable< + binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t>{}, + // function pointers to handle operation of c-contig matrix and + // c-contig row with broadcasting (may be nullptr) + td_ns::NullPtrTable< + binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t>{}); + }; + auto remainder_result_type_pyapi = [&](const py::dtype &dtype1, + const py::dtype &dtype2) { + return py_binary_ufunc_result_type(dtype1, dtype2, + remainder_output_id_table); + }; + m.def("_remainder", remainder_pyapi, "", py::arg("src1"), + py::arg("src2"), py::arg("dst"), py::arg("sycl_queue"), + py::arg("depends") = py::list()); + m.def("_remainder_result_type", remainder_result_type_pyapi, ""); + + using impl::remainder_inplace_contig_dispatch_table; + using impl::remainder_inplace_output_id_table; + using impl::remainder_inplace_strided_dispatch_table; + + auto remainder_inplace_pyapi = [&](const arrayT &src, const arrayT &dst, + sycl::queue &exec_q, + const event_vecT &depends = {}) { + return py_binary_inplace_ufunc( + src, dst, exec_q, depends, remainder_inplace_output_id_table, + // function pointers to handle inplace operation on + // contiguous arrays (pointers may be nullptr) + remainder_inplace_contig_dispatch_table, + // function pointers to handle inplace operation on strided + // arrays (most general case) + remainder_inplace_strided_dispatch_table, + // function pointers to handle inplace operation on + // c-contig matrix with c-contig row with broadcasting + // (may be nullptr) + td_ns::NullPtrTable< + binary_inplace_row_matrix_broadcast_impl_fn_ptr_t>{}); + }; + m.def("_remainder_inplace", remainder_inplace_pyapi, "", py::arg("lhs"), + py::arg("rhs"), py::arg("sycl_queue"), + py::arg("depends") = py::list()); + } +} + +} // namespace dpctl::tensor::py_internal diff --git a/dpctl_ext/tensor/libtensor/source/elementwise_functions/remainder.hpp b/dpctl_ext/tensor/libtensor/source/elementwise_functions/remainder.hpp new file mode 100644 index 00000000000..c00bdc9e0e6 --- /dev/null +++ b/dpctl_ext/tensor/libtensor/source/elementwise_functions/remainder.hpp @@ -0,0 +1,46 @@ +//***************************************************************************** +// Copyright (c) 2026, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// - Neither the name of the copyright holder nor the names of its contributors +// may be used to endorse or promote products derived from this software +// without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** +// +//===---------------------------------------------------------------------===// +/// +/// \file +/// This file defines functions of dpctl.tensor._tensor_elementwise_impl +/// extension, specifically functions for elementwise operations. +//===---------------------------------------------------------------------===// + +#pragma once +#include + +namespace py = pybind11; + +namespace dpctl::tensor::py_internal +{ + +extern void init_remainder(py::module_ m); + +} // namespace dpctl::tensor::py_internal diff --git a/dpctl_ext/tensor/libtensor/source/elementwise_functions/subtract.cpp b/dpctl_ext/tensor/libtensor/source/elementwise_functions/subtract.cpp new file mode 100644 index 00000000000..bcd15c2b0b5 --- /dev/null +++ b/dpctl_ext/tensor/libtensor/source/elementwise_functions/subtract.cpp @@ -0,0 +1,242 @@ +//***************************************************************************** +// Copyright (c) 2026, Intel Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// - Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// - Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// - Neither the name of the copyright holder nor the names of its contributors +// may be used to endorse or promote products derived from this software +// without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +// THE POSSIBILITY OF SUCH DAMAGE. +//***************************************************************************** +// +//===---------------------------------------------------------------------===// +/// +/// \file +/// This file defines functions of dpctl.tensor._tensor_elementwise_impl +/// extension, specifically functions for elementwise operations. +//===---------------------------------------------------------------------===// + +#include + +#include + +#include "dpnp4pybind11.hpp" +#include +#include +#include + +#include "elementwise_functions.hpp" +#include "subtract.hpp" +#include "utils/type_dispatch.hpp" + +#include "kernels/elementwise_functions/common.hpp" +#include "kernels/elementwise_functions/common_inplace.hpp" +#include "kernels/elementwise_functions/subtract.hpp" + +namespace dpctl::tensor::py_internal +{ + +namespace py = pybind11; +namespace td_ns = dpctl::tensor::type_dispatch; + +namespace ew_cmn_ns = dpctl::tensor::kernels::elementwise_common; +using ew_cmn_ns::binary_contig_impl_fn_ptr_t; +using ew_cmn_ns::binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t; +using ew_cmn_ns::binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t; +using ew_cmn_ns::binary_strided_impl_fn_ptr_t; + +using ew_cmn_ns::binary_inplace_contig_impl_fn_ptr_t; +using ew_cmn_ns::binary_inplace_row_matrix_broadcast_impl_fn_ptr_t; +using ew_cmn_ns::binary_inplace_strided_impl_fn_ptr_t; + +// B23: ===== SUBTRACT (x1, x2) +namespace impl +{ +namespace subtract_fn_ns = dpctl::tensor::kernels::subtract; + +static binary_contig_impl_fn_ptr_t + subtract_contig_dispatch_table[td_ns::num_types][td_ns::num_types]; + +static int subtract_output_id_table[td_ns::num_types][td_ns::num_types]; +static int subtract_inplace_output_id_table[td_ns::num_types][td_ns::num_types]; + +static binary_strided_impl_fn_ptr_t + subtract_strided_dispatch_table[td_ns::num_types][td_ns::num_types]; + +// sub(matrix, row) +static binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t + subtract_contig_matrix_contig_row_broadcast_dispatch_table + [td_ns::num_types][td_ns::num_types]; + +// sub(row, matrix) +static binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t + subtract_contig_row_contig_matrix_broadcast_dispatch_table + [td_ns::num_types][td_ns::num_types]; + +static binary_inplace_contig_impl_fn_ptr_t + subtract_inplace_contig_dispatch_table[td_ns::num_types][td_ns::num_types]; +static binary_inplace_strided_impl_fn_ptr_t + subtract_inplace_strided_dispatch_table[td_ns::num_types][td_ns::num_types]; +static binary_inplace_row_matrix_broadcast_impl_fn_ptr_t + subtract_inplace_row_matrix_dispatch_table[td_ns::num_types] + [td_ns::num_types]; + +void populate_subtract_dispatch_tables(void) +{ + using namespace td_ns; + namespace fn_ns = subtract_fn_ns; + + // which input types are supported, and what is the type of the result + using fn_ns::SubtractTypeMapFactory; + DispatchTableBuilder dtb1; + dtb1.populate_dispatch_table(subtract_output_id_table); + + // function pointers for operation on general strided arrays + using fn_ns::SubtractStridedFactory; + DispatchTableBuilder + dtb2; + dtb2.populate_dispatch_table(subtract_strided_dispatch_table); + + // function pointers for operation on contiguous inputs and output + using fn_ns::SubtractContigFactory; + DispatchTableBuilder + dtb3; + dtb3.populate_dispatch_table(subtract_contig_dispatch_table); + + // function pointers for operation on contiguous matrix, contiguous row + // with contiguous matrix output + using fn_ns::SubtractContigMatrixContigRowBroadcastFactory; + DispatchTableBuilder< + binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t, + SubtractContigMatrixContigRowBroadcastFactory, num_types> + dtb4; + dtb4.populate_dispatch_table( + subtract_contig_matrix_contig_row_broadcast_dispatch_table); + + // function pointers for operation on contiguous row, contiguous matrix + // with contiguous matrix output + using fn_ns::SubtractContigRowContigMatrixBroadcastFactory; + DispatchTableBuilder< + binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t, + SubtractContigRowContigMatrixBroadcastFactory, num_types> + dtb5; + dtb5.populate_dispatch_table( + subtract_contig_row_contig_matrix_broadcast_dispatch_table); + + // function pointers for inplace operation on general strided arrays + using fn_ns::SubtractInplaceStridedFactory; + DispatchTableBuilder + dtb6; + dtb6.populate_dispatch_table(subtract_inplace_strided_dispatch_table); + + // function pointers for inplace operation on contiguous inputs and output + using fn_ns::SubtractInplaceContigFactory; + DispatchTableBuilder + dtb7; + dtb7.populate_dispatch_table(subtract_inplace_contig_dispatch_table); + + // function pointers for inplace operation on contiguous matrix + // and contiguous row + using fn_ns::SubtractInplaceRowMatrixBroadcastFactory; + DispatchTableBuilder + dtb8; + dtb8.populate_dispatch_table(subtract_inplace_row_matrix_dispatch_table); + + // which types are supported by the in-place kernels + using fn_ns::SubtractInplaceTypeMapFactory; + DispatchTableBuilder dtb9; + dtb9.populate_dispatch_table(subtract_inplace_output_id_table); +}; + +} // namespace impl + +void init_subtract(py::module_ m) +{ + using arrayT = dpctl::tensor::usm_ndarray; + using event_vecT = std::vector; + { + impl::populate_subtract_dispatch_tables(); + using impl::subtract_contig_dispatch_table; + using impl::subtract_contig_matrix_contig_row_broadcast_dispatch_table; + using impl::subtract_contig_row_contig_matrix_broadcast_dispatch_table; + using impl::subtract_output_id_table; + using impl::subtract_strided_dispatch_table; + + auto subtract_pyapi = [&](const arrayT &src1, const arrayT &src2, + const arrayT &dst, sycl::queue &exec_q, + const event_vecT &depends = {}) { + return py_binary_ufunc( + src1, src2, dst, exec_q, depends, subtract_output_id_table, + // function pointers to handle operation on contiguous + // arrays (pointers may be nullptr) + subtract_contig_dispatch_table, + // function pointers to handle operation on strided arrays + // (most general case) + subtract_strided_dispatch_table, + // function pointers to handle operation of c-contig matrix + // and c-contig row with broadcasting (may be nullptr) + subtract_contig_matrix_contig_row_broadcast_dispatch_table, + // function pointers to handle operation of c-contig matrix + // and c-contig row with broadcasting (may be nullptr) + subtract_contig_row_contig_matrix_broadcast_dispatch_table); + }; + auto subtract_result_type_pyapi = [&](const py::dtype &dtype1, + const py::dtype &dtype2) { + return py_binary_ufunc_result_type(dtype1, dtype2, + subtract_output_id_table); + }; + m.def("_subtract", subtract_pyapi, "", py::arg("src1"), py::arg("src2"), + py::arg("dst"), py::arg("sycl_queue"), + py::arg("depends") = py::list()); + m.def("_subtract_result_type", subtract_result_type_pyapi, ""); + + using impl::subtract_inplace_contig_dispatch_table; + using impl::subtract_inplace_output_id_table; + using impl::subtract_inplace_row_matrix_dispatch_table; + using impl::subtract_inplace_strided_dispatch_table; + + auto subtract_inplace_pyapi = [&](const arrayT &src, const arrayT &dst, + sycl::queue &exec_q, + const event_vecT &depends = {}) { + return py_binary_inplace_ufunc( + src, dst, exec_q, depends, subtract_inplace_output_id_table, + // function pointers to handle inplace operation on + // contiguous arrays (pointers may be nullptr) + subtract_inplace_contig_dispatch_table, + // function pointers to handle inplace operation on strided + // arrays (most general case) + subtract_inplace_strided_dispatch_table, + // function pointers to handle inplace operation on + // c-contig matrix with c-contig row with broadcasting + // (may be nullptr) + subtract_inplace_row_matrix_dispatch_table); + }; + m.def("_subtract_inplace", subtract_inplace_pyapi, "", py::arg("lhs"), + py::arg("rhs"), py::arg("sycl_queue"), + py::arg("depends") = py::list()); + } +} + +} // namespace dpctl::tensor::py_internal diff --git a/dpctl_ext/tensor/libtensor/source/elementwise_functions/subtract.hpp b/dpctl_ext/tensor/libtensor/source/elementwise_functions/subtract.hpp new file mode 100644 index 00000000000..89cdfd6d0ea --- /dev/null +++ b/dpctl_ext/tensor/libtensor/source/elementwise_functions/subtract.hpp @@ -0,0 +1,42 @@ +//===----------- Implementation of _tensor_impl module ---------*-C++-*-/===// +// +// Data Parallel Control (dpctl) +// +// Copyright 2020-2025 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file defines functions of dpctl.tensor._tensor_impl extensions, +/// specifically functions for elementwise operations. +//===----------------------------------------------------------------------===// + +#pragma once +#include + +namespace py = pybind11; + +namespace dpctl +{ +namespace tensor +{ +namespace py_internal +{ + +extern void init_subtract(py::module_ m); + +} // namespace py_internal +} // namespace tensor +} // namespace dpctl diff --git a/dpnp/dpnp_iface_logic.py b/dpnp/dpnp_iface_logic.py index a9ec3ebd0a8..711ecaf9722 100644 --- a/dpnp/dpnp_iface_logic.py +++ b/dpnp/dpnp_iface_logic.py @@ -43,15 +43,13 @@ # pylint: disable=duplicate-code # pylint: disable=no-name-in-module - -import dpctl.tensor._tensor_elementwise_impl as ti import dpctl.utils as dpu import numpy # TODO: revert to `import dpctl.tensor...` # when dpnp fully migrates dpctl/tensor import dpctl_ext.tensor as dpt -import dpctl_ext.tensor._tensor_elementwise_impl as ti_ext +import dpctl_ext.tensor._tensor_elementwise_impl as ti import dpnp import dpnp.backend.extensions.ufunc._ufunc_impl as ufi from dpnp.dpnp_algo.dpnp_elementwise_common import DPNPBinaryFunc, DPNPUnaryFunc @@ -661,8 +659,8 @@ def array_equiv(a1, a2): equal = DPNPBinaryFunc( "equal", - ti_ext._equal_result_type, - ti_ext._equal, + ti._equal_result_type, + ti._equal, _EQUAL_DOCSTRING, ) @@ -737,8 +735,8 @@ def array_equiv(a1, a2): greater = DPNPBinaryFunc( "greater", - ti_ext._greater_result_type, - ti_ext._greater, + ti._greater_result_type, + ti._greater, _GREATER_DOCSTRING, ) @@ -814,8 +812,8 @@ def array_equiv(a1, a2): greater_equal = DPNPBinaryFunc( "greater_equal", - ti_ext._greater_equal_result_type, - ti_ext._greater_equal, + ti._greater_equal_result_type, + ti._greater_equal, _GREATER_EQUAL_DOCSTRING, ) @@ -1095,8 +1093,8 @@ def iscomplexobj(x): isfinite = DPNPUnaryFunc( "isfinite", - ti_ext._isfinite_result_type, - ti_ext._isfinite, + ti._isfinite_result_type, + ti._isfinite, _ISFINITE_DOCSTRING, ) @@ -1338,8 +1336,8 @@ def isin( isinf = DPNPUnaryFunc( "isinf", - ti_ext._isinf_result_type, - ti_ext._isinf, + ti._isinf_result_type, + ti._isinf, _ISINF_DOCSTRING, ) @@ -1396,8 +1394,8 @@ def isin( isnan = DPNPUnaryFunc( "isnan", - ti_ext._isnan_result_type, - ti_ext._isnan, + ti._isnan_result_type, + ti._isnan, _ISNAN_DOCSTRING, ) @@ -1750,8 +1748,8 @@ def isscalar(element): less = DPNPBinaryFunc( "less", - ti_ext._less_result_type, - ti_ext._less, + ti._less_result_type, + ti._less, _LESS_DOCSTRING, ) @@ -1826,8 +1824,8 @@ def isscalar(element): less_equal = DPNPBinaryFunc( "less_equal", - ti_ext._less_equal_result_type, - ti_ext._less_equal, + ti._less_equal_result_type, + ti._less_equal, _LESS_EQUAL_DOCSTRING, ) @@ -1969,8 +1967,8 @@ def isscalar(element): logical_not = DPNPUnaryFunc( "logical_not", - ti_ext._logical_not_result_type, - ti_ext._logical_not, + ti._logical_not_result_type, + ti._logical_not, _LOGICAL_NOT_DOCSTRING, ) diff --git a/dpnp/dpnp_iface_mathematical.py b/dpnp/dpnp_iface_mathematical.py index 54a17cec0c3..89bc0868160 100644 --- a/dpnp/dpnp_iface_mathematical.py +++ b/dpnp/dpnp_iface_mathematical.py @@ -47,14 +47,13 @@ import builtins import warnings -import dpctl.tensor._tensor_elementwise_impl as ti import dpctl.utils as dpu import numpy # TODO: revert to `import dpctl.tensor...` # when dpnp fully migrates dpctl/tensor import dpctl_ext.tensor as dpt -import dpctl_ext.tensor._tensor_elementwise_impl as ti_ext +import dpctl_ext.tensor._tensor_elementwise_impl as ti import dpctl_ext.tensor._type_utils as dtu import dpnp import dpnp.backend.extensions.ufunc._ufunc_impl as ufi @@ -385,8 +384,8 @@ def _validate_interp_param(param, name, exec_q, usm_type, dtype=None): abs = DPNPUnaryFunc( "abs", - ti_ext._abs_result_type, - ti_ext._abs, + ti._abs_result_type, + ti._abs, _ABS_DOCSTRING, mkl_fn_to_call="_mkl_abs_to_call", mkl_impl_fn="_abs", @@ -469,8 +468,8 @@ def _validate_interp_param(param, name, exec_q, usm_type, dtype=None): add = DPNPBinaryFunc( "add", - ti_ext._add_result_type, - ti_ext._add, + ti._add_result_type, + ti._add, _ADD_DOCSTRING, mkl_fn_to_call="_mkl_add_to_call", mkl_impl_fn="_add", @@ -541,8 +540,8 @@ def _validate_interp_param(param, name, exec_q, usm_type, dtype=None): angle = DPNPAngle( "angle", - ti_ext._angle_result_type, - ti_ext._angle, + ti._angle_result_type, + ti._angle, _ANGLE_DOCSTRING, mkl_fn_to_call="_mkl_arg_to_call", mkl_impl_fn="_arg", @@ -647,8 +646,8 @@ def around(x, /, decimals=0, out=None): ceil = DPNPUnaryFunc( "ceil", - ti_ext._ceil_result_type, - ti_ext._ceil, + ti._ceil_result_type, + ti._ceil, _CEIL_DOCSTRING, mkl_fn_to_call="_mkl_ceil_to_call", mkl_impl_fn="_ceil", @@ -782,8 +781,8 @@ def clip(a, /, min=None, max=None, *, out=None, order="K", **kwargs): conj = DPNPUnaryFunc( "conj", - ti_ext._conj_result_type, - ti_ext._conj, + ti._conj_result_type, + ti._conj, _CONJ_DOCSTRING, mkl_fn_to_call="_mkl_conj_to_call", mkl_impl_fn="_conj", @@ -1558,12 +1557,12 @@ def diff(a, n=1, axis=-1, prepend=None, append=None): divide = DPNPBinaryFunc( "divide", - ti_ext._divide_result_type, - ti_ext._divide, + ti._divide_result_type, + ti._divide, _DIVIDE_DOCSTRING, mkl_fn_to_call="_mkl_div_to_call", mkl_impl_fn="_div", - binary_inplace_fn=ti_ext._divide_inplace, + binary_inplace_fn=ti._divide_inplace, acceptance_fn=dtu._acceptance_fn_divide, ) @@ -2057,8 +2056,8 @@ def ediff1d(ary, to_end=None, to_begin=None): floor = DPNPUnaryFunc( "floor", - ti_ext._floor_result_type, - ti_ext._floor, + ti._floor_result_type, + ti._floor, _FLOOR_DOCSTRING, mkl_fn_to_call="_mkl_floor_to_call", mkl_impl_fn="_floor", @@ -2139,10 +2138,10 @@ def ediff1d(ary, to_end=None, to_begin=None): floor_divide = DPNPBinaryFunc( "floor_divide", - ti_ext._floor_divide_result_type, - ti_ext._floor_divide, + ti._floor_divide_result_type, + ti._floor_divide, _FLOOR_DIVIDE_DOCSTRING, - binary_inplace_fn=ti_ext._floor_divide_inplace, + binary_inplace_fn=ti._floor_divide_inplace, ) @@ -2941,8 +2940,8 @@ def gradient(f, *varargs, axis=None, edge_order=1): imag = DPNPImag( "imag", - ti_ext._imag_result_type, - ti_ext._imag, + ti._imag_result_type, + ti._imag, _IMAG_DOCSTRING, ) @@ -3852,8 +3851,8 @@ def _check_nan_inf(val, val_dt): negative = DPNPUnaryFunc( "negative", - ti_ext._negative_result_type, - ti_ext._negative, + ti._negative_result_type, + ti._negative, _NEGATIVE_DOCSTRING, acceptance_fn=acceptance_fn_negative, ) @@ -3988,8 +3987,8 @@ def _check_nan_inf(val, val_dt): positive = DPNPUnaryFunc( "positive", - ti_ext._positive_result_type, - ti_ext._positive, + ti._positive_result_type, + ti._positive, _POSITIVE_DOCSTRING, acceptance_fn=acceptance_fn_positive, ) @@ -4250,8 +4249,8 @@ def prod( proj = DPNPUnaryFunc( "proj", - ti_ext._proj_result_type, - ti_ext._proj, + ti._proj_result_type, + ti._proj, _PROJ_DOCSTRING, ) @@ -4313,8 +4312,8 @@ def prod( real = DPNPReal( "real", - ti_ext._real_result_type, - ti_ext._real, + ti._real_result_type, + ti._real, _REAL_DOCSTRING, ) @@ -4596,8 +4595,8 @@ def real_if_close(a, tol=100): round = DPNPRound( "round", - ti_ext._round_result_type, - ti_ext._round, + ti._round_result_type, + ti._round, _ROUND_DOCSTRING, mkl_fn_to_call="_mkl_round_to_call", mkl_impl_fn="_round", @@ -4668,8 +4667,8 @@ def real_if_close(a, tol=100): sign = DPNPUnaryFunc( "sign", - ti_ext._sign_result_type, - ti_ext._sign, + ti._sign_result_type, + ti._sign, _SIGN_DOCSTRING, acceptance_fn=acceptance_fn_sign, ) @@ -4730,8 +4729,8 @@ def real_if_close(a, tol=100): signbit = DPNPUnaryFunc( "signbit", - ti_ext._signbit_result_type, - ti_ext._signbit, + ti._signbit_result_type, + ti._signbit, _SIGNBIT_DOCSTRING, ) @@ -5229,8 +5228,8 @@ def trapezoid(y, x=None, dx=1.0, axis=-1): trunc = DPNPUnaryFunc( "trunc", - ti_ext._trunc_result_type, - ti_ext._trunc, + ti._trunc_result_type, + ti._trunc, _TRUNC_DOCSTRING, mkl_fn_to_call="_mkl_trunc_to_call", mkl_impl_fn="_trunc",