diff --git a/dpctl_ext/tensor/CMakeLists.txt b/dpctl_ext/tensor/CMakeLists.txt
index 261204223ddd..ef3565f9827e 100644
--- a/dpctl_ext/tensor/CMakeLists.txt
+++ b/dpctl_ext/tensor/CMakeLists.txt
@@ -88,7 +88,7 @@ set(_elementwise_sources
     #${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/bitwise_or.cpp
     #${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/bitwise_right_shift.cpp
     #${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/bitwise_xor.cpp
-    #${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/cbrt.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/cbrt.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/ceil.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/conj.cpp
     #${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/copysign.cpp
@@ -96,7 +96,7 @@ set(_elementwise_sources
     ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/cosh.cpp
     #${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/equal.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/exp.cpp
-    #${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/exp2.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/exp2.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/expm1.cpp
     #${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/floor_divide.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/floor.cpp
@@ -126,23 +126,23 @@ set(_elementwise_sources
     #${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/not_equal.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/positive.cpp
     #${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/pow.cpp
-    #${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/proj.cpp
-    #${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/real.cpp
-    #${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/reciprocal.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/proj.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/real.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/reciprocal.cpp
     #${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/remainder.cpp
-    #${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/round.cpp
-    #${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/rsqrt.cpp
-    #${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/sign.cpp
-    #${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/signbit.cpp
-    #${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/sin.cpp
-    #${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/sinh.cpp
-    #${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/sqrt.cpp
-    #${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/square.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/round.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/rsqrt.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/sign.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/signbit.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/sin.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/sinh.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/sqrt.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/square.cpp
     #${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/subtract.cpp
-    #${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/tan.cpp
-    #${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/tanh.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/tan.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/tanh.cpp
     #${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/true_divide.cpp
-    #${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/trunc.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/elementwise_functions/trunc.cpp
 )
 set(_reduction_sources
     ${CMAKE_CURRENT_SOURCE_DIR}/libtensor/source/reductions/reduction_common.cpp
diff --git a/dpctl_ext/tensor/__init__.py b/dpctl_ext/tensor/__init__.py
index a0dfe97c1eca..70352687c5d6 100644
--- a/dpctl_ext/tensor/__init__.py
+++ b/dpctl_ext/tensor/__init__.py
@@ -27,16 +27,16 @@
 # *****************************************************************************
 
 
-from dpctl.tensor._search_functions import where
-
-from dpctl_ext.tensor._copy_utils import (
+from ._accumulation import cumulative_logsumexp, cumulative_prod, cumulative_sum
+from ._clip import clip
+from ._copy_utils import (
     asnumpy,
     astype,
     copy,
     from_numpy,
     to_numpy,
 )
-from dpctl_ext.tensor._ctors import (
+from ._ctors import (
     arange,
     asarray,
     empty,
@@ -53,36 +53,6 @@
     zeros,
     zeros_like,
 )
-from dpctl_ext.tensor._indexing_functions import (
-    extract,
-    nonzero,
-    place,
-    put,
-    put_along_axis,
-    take,
-    take_along_axis,
-)
-from dpctl_ext.tensor._manipulation_functions import (
-    broadcast_arrays,
-    broadcast_to,
-    concat,
-    expand_dims,
-    flip,
-    moveaxis,
-    permute_dims,
-    repeat,
-    roll,
-    squeeze,
-    stack,
-    swapaxes,
-    tile,
-    unstack,
-)
-from dpctl_ext.tensor._reshape import reshape
-from dpctl_ext.tensor._utility_functions import all, any, diff
-
-from ._accumulation import cumulative_logsumexp, cumulative_prod, cumulative_sum
-from ._clip import clip
 from ._elementwise_funcs import (
     abs,
     acos,
@@ -93,11 +63,13 @@
     atan,
     atanh,
     bitwise_invert,
+    cbrt,
     ceil,
     conj,
     cos,
     cosh,
     exp,
+    exp2,
     expm1,
     floor,
     imag,
@@ -111,6 +83,45 @@
     logical_not,
     negative,
     positive,
+    proj,
+    real,
+    reciprocal,
+    round,
+    rsqrt,
+    sign,
+    signbit,
+    sin,
+    sinh,
+    sqrt,
+    square,
+    tan,
+    tanh,
+    trunc,
+)
+from ._indexing_functions import (
+    extract,
+    nonzero,
+    place,
+    put,
+    put_along_axis,
+    take,
+    take_along_axis,
+)
+from ._manipulation_functions import (
+    broadcast_arrays,
+    broadcast_to,
+    concat,
+    expand_dims,
+    flip,
+    moveaxis,
+    permute_dims,
+    repeat,
+    roll,
+    squeeze,
+    stack,
+    swapaxes,
+    tile,
+    unstack,
 )
 from ._reduction import (
     argmax,
@@ -123,6 +134,8 @@
     reduce_hypot,
     sum,
 )
+from ._reshape import reshape
+from ._search_functions import where
 from ._searchsorted import searchsorted
 from ._set_functions import (
     isin,
@@ -133,6 +146,7 @@
 )
 from ._sorting import argsort, sort, top_k
 from ._type_utils import can_cast, finfo, iinfo, isdtype, result_type
+from ._utility_functions import all, any, diff
 
 __all__ = [
     "abs",
@@ -156,6 +170,7 @@
     "broadcast_arrays",
     "broadcast_to",
     "can_cast",
+    "cbrt",
     "ceil",
     "concat",
     "conj",
@@ -174,6 +189,7 @@
     "expand_dims",
     "eye",
     "exp",
+    "exp2",
     "expm1",
     "finfo",
     "flip",
@@ -207,26 +223,40 @@
     "place",
     "positive",
     "prod",
+    "proj",
     "put",
     "put_along_axis",
+    "real",
+    "reciprocal",
     "reduce_hypot",
     "repeat",
     "reshape",
     "result_type",
     "roll",
+    "round",
+    "rsqrt",
     "searchsorted",
+    "sign",
+    "signbit",
+    "sin",
+    "sinh",
     "sort",
+    "sqrt",
+    "square",
     "squeeze",
     "stack",
     "sum",
     "swapaxes",
     "take",
     "take_along_axis",
+    "tan",
+    "tanh",
     "tile",
     "top_k",
     "to_numpy",
     "tril",
     "triu",
+    "trunc",
     "unique_all",
     "unique_counts",
     "unique_inverse",
diff --git a/dpctl_ext/tensor/_accumulation.py b/dpctl_ext/tensor/_accumulation.py
index 17596e5647fc..2dfe9656e198 100644
--- a/dpctl_ext/tensor/_accumulation.py
+++ b/dpctl_ext/tensor/_accumulation.py
@@ -35,14 +35,14 @@
 import dpctl_ext.tensor as dpt_ext
 import dpctl_ext.tensor._tensor_accumulation_impl as tai
 import dpctl_ext.tensor._tensor_impl as ti
-from dpctl_ext.tensor._type_utils import (
+
+from ._numpy_helper import normalize_axis_index
+from ._type_utils import (
     _default_accumulation_dtype,
     _default_accumulation_dtype_fp_types,
     _to_device_supported_dtype,
 )
 
-from ._numpy_helper import normalize_axis_index
-
 
 def _accumulate_common(
     x,
diff --git a/dpctl_ext/tensor/_clip.py b/dpctl_ext/tensor/_clip.py
index 9fc42abc0d8b..ef07269c4ea0 100644
--- a/dpctl_ext/tensor/_clip.py
+++ b/dpctl_ext/tensor/_clip.py
@@ -35,14 +35,13 @@
 # when dpnp fully migrates dpctl/tensor
 import dpctl_ext.tensor as dpt_ext
 import dpctl_ext.tensor._tensor_impl as ti
-from dpctl_ext.tensor._copy_utils import (
+
+from ._copy_utils import (
     _empty_like_orderK,
     _empty_like_pair_orderK,
     _empty_like_triple_orderK,
 )
-from dpctl_ext.tensor._manipulation_functions import _broadcast_shape_impl
-from dpctl_ext.tensor._type_utils import _can_cast
-
+from ._manipulation_functions import _broadcast_shape_impl
 from ._scalar_utils import (
     _get_dtype,
     _get_queue_usm_type,
@@ -50,6 +49,7 @@
     _validate_dtype,
 )
 from ._type_utils import (
+    _can_cast,
     _resolve_one_strong_one_weak_types,
     _resolve_one_strong_two_weak_types,
 )
diff --git a/dpctl_ext/tensor/_copy_utils.py b/dpctl_ext/tensor/_copy_utils.py
index 878dabc581d2..37879997b788 100644
--- a/dpctl_ext/tensor/_copy_utils.py
+++ b/dpctl_ext/tensor/_copy_utils.py
@@ -42,9 +42,9 @@
 # when dpnp fully migrates dpctl/tensor
 import dpctl_ext.tensor as dpt_ext
 import dpctl_ext.tensor._tensor_impl as ti
-from dpctl_ext.tensor._type_utils import _dtype_supported_by_device_impl
 
 from ._numpy_helper import normalize_axis_index
+from ._type_utils import _dtype_supported_by_device_impl
 
 __doc__ = (
     "Implementation module for copy- and cast- operations on "
@@ -299,7 +299,7 @@ def _prepare_indices_arrays(inds, q, usm_type):
     inds = tuple(
         map(
             lambda ind: (
-                ind if ind.dtype == ind_dt else dpt.astype(ind, ind_dt)
+                ind if ind.dtype == ind_dt else dpt_ext.astype(ind, ind_dt)
             ),
             inds,
         )
diff --git a/dpctl_ext/tensor/_ctors.py b/dpctl_ext/tensor/_ctors.py
index 532802c0c519..0b7650873fe3 100644
--- a/dpctl_ext/tensor/_ctors.py
+++ b/dpctl_ext/tensor/_ctors.py
@@ -42,7 +42,8 @@
 # when dpnp fully migrates dpctl/tensor
 import dpctl_ext.tensor as dpt_ext
 import dpctl_ext.tensor._tensor_impl as ti
-from dpctl_ext.tensor._copy_utils import (
+
+from ._copy_utils import (
     _empty_like_orderK,
     _from_numpy_empty_like_orderK,
 )
@@ -1440,7 +1441,7 @@ def linspace(
     )
     _manager.add_event_pair(hev, la_ev)
 
-    return res if int_dt is None else dpt.astype(res, int_dt)
+    return res if int_dt is None else dpt_ext.astype(res, int_dt)
 
 
 def meshgrid(*arrays, indexing="xy"):
diff --git a/dpctl_ext/tensor/_elementwise_funcs.py b/dpctl_ext/tensor/_elementwise_funcs.py
index b57074ae9784..ae0ef8aa3496 100644
--- a/dpctl_ext/tensor/_elementwise_funcs.py
+++ b/dpctl_ext/tensor/_elementwise_funcs.py
@@ -33,6 +33,7 @@
 from ._elementwise_common import UnaryElementwiseFunc
 from ._type_utils import (
     _acceptance_fn_negative,
+    _acceptance_fn_reciprocal,
 )
 
 # U01: ==== ABS    (x)
@@ -782,6 +783,473 @@
 )
 del _positive_docstring_
 
+# U27: ==== REAL        (x)
+_real_docstring = r"""
+real(x, /, \*, out=None, order='K')
+
+Computes real part of each element `x_i` for input array `x`.
+
+Args:
+    x (usm_ndarray):
+        Input array. May have any data type.
+    out (Union[usm_ndarray, None], optional):
+        Output array to populate.
+        Array must have the correct shape and the expected data type.
+    order ("C","F","A","K", optional):
+        Memory layout of the new output array, if parameter
+        `out` is ``None``.
+        Default: "K".
+
+Returns:
+    usm_ndarray:
+        An array containing the element-wise real component of input.
+        If the input is a real-valued data type, the returned array has
+        the same data type. If the input is a complex floating-point
+        data type, the returned array has a floating-point data type
+        with the same floating-point precision as complex input.
+"""
+
+real = UnaryElementwiseFunc(
+    "real", ti._real_result_type, ti._real, _real_docstring
+)
+del _real_docstring
+
+# U28: ==== ROUND       (x)
+_round_docstring = r"""
+round(x, /, \*, out=None, order='K')
+
+Rounds each element `x_i` of the input array `x` to
+the nearest integer-valued number.
+
+When two integers are equally close to `x_i`, the result is the nearest even
+integer to `x_i`.
+
+Args:
+    x (usm_ndarray):
+        Input array, expected to have a numeric data type.
+    out (Union[usm_ndarray, None], optional):
+        Output array to populate.
+        Array must have the correct shape and the expected data type.
+    order ("C","F","A","K", optional):
+        Memory layout of the new output array, if parameter
+        `out` is ``None``.
+        Default: "K".
+
+Returns:
+    usm_ndarray:
+        An array containing the element-wise rounded values.
+"""
+
+round = UnaryElementwiseFunc(
+    "round", ti._round_result_type, ti._round, _round_docstring
+)
+del _round_docstring
+
+# U29: ==== SIGN        (x)
+_sign_docstring = r"""
+sign(x, /, \*, out=None, order='K')
+
+Computes an indication of the sign of each element `x_i` of input array `x`
+using the signum function.
+
+The signum function returns `-1` if `x_i` is less than `0`,
+`0` if `x_i` is equal to `0`, and `1` if `x_i` is greater than `0`.
+
+Args:
+    x (usm_ndarray):
+        Input array, expected to have a numeric data type.
+    out (Union[usm_ndarray, None], optional):
+        Output array to populate.
+        Array must have the correct shape and the expected data type.
+    order ("C","F","A","K", optional):
+        Memory layout of the new output array, if parameter
+        `out` is ``None``.
+        Default: "K".
+
+Returns:
+    usm_ndarray:
+        An array containing the element-wise result of the signum function. The
+        data type of the returned array is determined by the Type Promotion
+        Rules.
+"""
+
+sign = UnaryElementwiseFunc(
+    "sign", ti._sign_result_type, ti._sign, _sign_docstring
+)
+del _sign_docstring
+
+# U30: ==== SIN         (x)
+_sin_docstring = r"""
+sin(x, /, \*, out=None, order='K')
+
+Computes sine for each element `x_i` of input array `x`.
+
+Args:
+    x (usm_ndarray):
+        Input array, expected to have a real-valued floating-point data type.
+    out (Union[usm_ndarray, None], optional):
+        Output array to populate.
+        Array must have the correct shape and the expected data type.
+    order ("C","F","A","K", optional):
+        Memory layout of the new output array, if parameter
+        `out` is ``None``.
+        Default: "K".
+
+Returns:
+    usm_ndarray:
+        An array containing the element-wise sine. The data type of the
+        returned array is determined by the Type Promotion Rules.
+"""
+
+sin = UnaryElementwiseFunc("sin", ti._sin_result_type, ti._sin, _sin_docstring)
+del _sin_docstring
+
+# U31: ==== SINH        (x)
+_sinh_docstring = r"""
+sinh(x, /, \*, out=None, order='K')
+
+Computes hyperbolic sine for each element `x_i` for input array `x`.
+
+Args:
+    x (usm_ndarray):
+        Input array, expected to have a floating-point data type.
+    out (Union[usm_ndarray, None], optional):
+        Output array to populate.
+        Array must have the correct shape and the expected data type.
+    order ("C","F","A","K", optional):
+        Memory layout of the new output array, if parameter
+        `out` is ``None``.
+        Default: "K".
+
+Returns:
+    usm_ndarray:
+        An array containing the element-wise hyperbolic sine. The data type
+        of the returned array is determined by the Type Promotion Rules.
+"""
+
+sinh = UnaryElementwiseFunc(
+    "sinh", ti._sinh_result_type, ti._sinh, _sinh_docstring
+)
+del _sinh_docstring
+
+# U32: ==== SQUARE      (x)
+_square_docstring_ = r"""
+square(x, /, \*, out=None, order='K')
+
+Squares each element `x_i` of input array `x`.
+
+Args:
+    x (usm_ndarray):
+        Input array. May have any data type.
+    out (Union[usm_ndarray, None], optional):
+        Output array to populate.
+        Array must have the correct shape and the expected data type.
+    order ("C","F","A","K", optional):
+        Memory layout of the new output array, if parameter
+        `out` is ``None``.
+        Default: "K".
+
+Returns:
+    usm_ndarray:
+        An array containing the element-wise squares of `x`. The data type of
+        the returned array is determined by the Type Promotion Rules.
+"""
+
+square = UnaryElementwiseFunc(
+    "square", ti._square_result_type, ti._square, _square_docstring_
+)
+del _square_docstring_
+
+# U33: ==== SQRT        (x)
+_sqrt_docstring_ = r"""
+sqrt(x, /, \*, out=None, order='K')
+
+Computes the positive square-root for each element `x_i` of input array `x`.
+
+Args:
+    x (usm_ndarray):
+        Input array, expected to have a floating-point data type.
+    out (Union[usm_ndarray, None], optional):
+        Output array to populate.
+        Array must have the correct shape and the expected data type.
+    order ("C","F","A","K", optional):
+        Memory layout of the new output array, if parameter
+        `out` is ``None``.
+        Default: "K".
+
+Returns:
+    usm_ndarray:
+        An array containing the element-wise positive square-roots of `x`. The
+        data type of the returned array is determined by the Type Promotion
+        Rules.
+"""
+
+sqrt = UnaryElementwiseFunc(
+    "sqrt", ti._sqrt_result_type, ti._sqrt, _sqrt_docstring_
+)
+del _sqrt_docstring_
+
+# U34: ==== TAN         (x)
+_tan_docstring = r"""
+tan(x, /, \*, out=None, order='K')
+
+Computes tangent for each element `x_i` for input array `x`.
+
+Args:
+    x (usm_ndarray):
+        Input array, expected to have a floating-point data type.
+    out (Union[usm_ndarray, None], optional):
+        Output array to populate.
+        Array must have the correct shape and the expected data type.
+    order ("C","F","A","K", optional):
+        Memory layout of the new output array, if parameter
+        `out` is ``None``.
+        Default: "K".
+
+Returns:
+    usm_ndarray:
+        An array containing the element-wise tangent. The data type
+        of the returned array is determined by the Type Promotion Rules.
+"""
+
+tan = UnaryElementwiseFunc("tan", ti._tan_result_type, ti._tan, _tan_docstring)
+del _tan_docstring
+
+# U35: ==== TANH        (x)
+_tanh_docstring = r"""
+tanh(x, /, \*, out=None, order='K')
+
+Computes hyperbolic tangent for each element `x_i` for input array `x`.
+
+Args:
+    x (usm_ndarray):
+        Input array, expected to have a floating-point data type.
+    out (Union[usm_ndarray, None], optional):
+        Output array to populate.
+        Array must have the correct shape and the expected data type.
+    order ("C","F","A","K", optional):
+        Memory layout of the new output array, if parameter
+        `out` is ``None``.
+        Default: "K".
+
+Returns:
+    usm_ndarray:
+        An array containing the element-wise hyperbolic tangent. The data type
+        of the returned array is determined by the Type Promotion Rules.
+"""
+
+tanh = UnaryElementwiseFunc(
+    "tanh", ti._tanh_result_type, ti._tanh, _tanh_docstring
+)
+del _tanh_docstring
+
+# U36: ==== TRUNC       (x)
+_trunc_docstring = r"""
+trunc(x, /, \*, out=None, order='K')
+
+Returns the truncated value for each element `x_i` for input array `x`.
+
+The truncated value of the scalar `x` is the nearest integer i which is
+closer to zero than `x` is. In short, the fractional part of the
+signed number `x` is discarded.
+
+Args:
+    x (usm_ndarray):
+        Input array, expected to have a boolean or real-valued data type.
+    out (Union[usm_ndarray, None], optional):
+        Output array to populate.
+        Array must have the correct shape and the expected data type.
+    order ("C","F","A","K", optional):
+        Memory layout of the new output array, if parameter
+        `out` is ``None``.
+        Default: "K".
+
+Returns:
+    usm_ndarray:
+        An array containing the result of element-wise division. The data type
+        of the returned array is determined by the Type Promotion Rules.
+"""
+trunc = UnaryElementwiseFunc(
+    "trunc", ti._trunc_result_type, ti._trunc, _trunc_docstring
+)
+del _trunc_docstring
+
+# U37: ==== CBRT        (x)
+_cbrt_docstring_ = r"""
+cbrt(x, /, \*, out=None, order='K')
+
+Computes the cube-root for each element `x_i` for input array `x`.
+
+Args:
+    x (usm_ndarray):
+        Input array, expected to have a real-valued floating-point data type.
+    out (Union[usm_ndarray, None], optional):
+        Output array to populate.
+        Array have the correct shape and the expected data type.
+    order ("C","F","A","K", optional):
+        Memory layout of the new output array, if parameter
+        `out` is ``None``.
+        Default: "K".
+
+Returns:
+    usm_ndarray:
+        An array containing the element-wise cube-root.
+        The data type of the returned array is determined by
+        the Type Promotion Rules.
+"""
+
+cbrt = UnaryElementwiseFunc(
+    "cbrt", ti._cbrt_result_type, ti._cbrt, _cbrt_docstring_
+)
+del _cbrt_docstring_
+
+# U38: ==== EXP2        (x)
+_exp2_docstring_ = r"""
+exp2(x, /, \*, out=None, order='K')
+
+Computes the base-2 exponential for each element `x_i` for input array `x`.
+
+Args:
+    x (usm_ndarray):
+        Input array, expected to have a floating-point data type.
+    out (Union[usm_ndarray, None], optional):
+        Output array to populate.
+        Array have the correct shape and the expected data type.
+    order ("C","F","A","K", optional):
+        Memory layout of the new output array, if parameter
+        `out` is ``None``.
+        Default: "K".
+
+Returns:
+    usm_ndarray:
+        An array containing the element-wise base-2 exponentials.
+        The data type of the returned array is determined by
+        the Type Promotion Rules.
+"""
+
+exp2 = UnaryElementwiseFunc(
+    "exp2", ti._exp2_result_type, ti._exp2, _exp2_docstring_
+)
+del _exp2_docstring_
+
+# U39: ==== RSQRT        (x)
+_rsqrt_docstring_ = r"""
+rsqrt(x, /, \*, out=None, order='K')
+
+Computes the reciprocal square-root for each element `x_i` for input array `x`.
+
+Args:
+    x (usm_ndarray):
+        Input array, expected to have a real-valued floating-point data type.
+    out (Union[usm_ndarray, None], optional):
+        Output array to populate.
+        Array have the correct shape and the expected data type.
+    order ("C","F","A","K", optional):
+        Memory layout of the new output array, if parameter
+        `out` is ``None``.
+        Default: "K".
+
+Returns:
+    usm_ndarray:
+        An array containing the element-wise reciprocal square-root.
+        The returned array has a floating-point data type determined by
+        the Type Promotion Rules.
+"""
+
+rsqrt = UnaryElementwiseFunc(
+    "rsqrt", ti._rsqrt_result_type, ti._rsqrt, _rsqrt_docstring_
+)
+del _rsqrt_docstring_
+
+# U40: ==== PROJ        (x)
+_proj_docstring = r"""
+proj(x, /, \*, out=None, order='K')
+
+Computes projection of each element `x_i` for input array `x`.
+
+Args:
+    x (usm_ndarray):
+        Input array, expected to have a complex data type.
+    out (Union[usm_ndarray, None], optional):
+        Output array to populate.
+        Array must have the correct shape and the expected data type.
+    order ("C","F","A","K", optional):
+        Memory layout of the new output array, if parameter
+        `out` is ``None``.
+        Default: "K".
+
+Returns:
+    usm_ndarray:
+        An array containing the element-wise projection.
+"""
+
+proj = UnaryElementwiseFunc(
+    "proj", ti._proj_result_type, ti._proj, _proj_docstring
+)
+del _proj_docstring
+
+# U41: ==== SIGNBIT        (x)
+_signbit_docstring = r"""
+signbit(x, /, \*, out=None, order='K')
+
+Computes an indication of whether the sign bit of each element `x_i` of
+input array `x` is set.
+
+Args:
+    x (usm_ndarray):
+        Input array, expected to have a real-valued floating-point data type.
+    out (Union[usm_ndarray, None], optional):
+        Output array to populate.
+        Array must have the correct shape and the expected data type.
+    order ("C","F","A","K", optional):
+        Memory layout of the new output array, if parameter
+        `out` is ``None``.
+        Default: "K".
+
+Returns:
+    usm_ndarray:
+        An array containing the element-wise signbit results. The returned array
+        must have a data type of `bool`.
+"""
+
+signbit = UnaryElementwiseFunc(
+    "signbit", ti._signbit_result_type, ti._signbit, _signbit_docstring
+)
+del _signbit_docstring
+
+# U42: ==== RECIPROCAL        (x)
+_reciprocal_docstring = r"""
+reciprocal(x, /, \*, out=None, order='K')
+
+Computes the reciprocal of each element `x_i` for input array `x`.
+
+Args:
+    x (usm_ndarray):
+        Input array, expected to have a floating-point data type.
+    out (Union[usm_ndarray, None], optional):
+        Output array to populate.
+        Array have the correct shape and the expected data type.
+    order ("C","F","A","K", optional):
+        Memory layout of the new output array, if parameter
+        `out` is ``None``.
+        Default: "K".
+
+Returns:
+    usm_ndarray:
+        An array containing the element-wise reciprocals.
+        The returned array has a floating-point data type determined
+        by the Type Promotion Rules.
+"""
+
+reciprocal = UnaryElementwiseFunc(
+    "reciprocal",
+    ti._reciprocal_result_type,
+    ti._reciprocal,
+    _reciprocal_docstring,
+    acceptance_fn=_acceptance_fn_reciprocal,
+)
+del _reciprocal_docstring
+
 # U43: ==== ANGLE        (x)
 _angle_docstring = r"""
 angle(x, /, \*, out=None, order='K')
diff --git a/dpctl_ext/tensor/_indexing_functions.py b/dpctl_ext/tensor/_indexing_functions.py
index 91ffc759a920..5b4eb1aaf7a2 100644
--- a/dpctl_ext/tensor/_indexing_functions.py
+++ b/dpctl_ext/tensor/_indexing_functions.py
@@ -190,7 +190,7 @@ def place(arr, mask, vals):
     if vals.dtype == arr.dtype:
         rhs = vals
     else:
-        rhs = dpt.astype(vals, arr.dtype)
+        rhs = dpt_ext.astype(vals, arr.dtype)
     hev, pl_ev = ti._place(
         dst=arr,
         cumsum=cumsum,
diff --git a/dpctl_ext/tensor/_reshape.py b/dpctl_ext/tensor/_reshape.py
index b7b6b068bfd0..23cf47a83568 100644
--- a/dpctl_ext/tensor/_reshape.py
+++ b/dpctl_ext/tensor/_reshape.py
@@ -31,16 +31,17 @@
 import dpctl.tensor as dpt
 import dpctl.utils
 import numpy as np
-from dpctl.tensor._tensor_impl import (
-    _copy_usm_ndarray_for_reshape,
-    _ravel_multi_index,
-    _unravel_index,
-)
 
 # TODO: revert to `import dpctl.tensor...`
 # when dpnp fully migrates dpctl/tensor
 import dpctl_ext.tensor as dpt_ext
 
+from ._tensor_impl import (
+    _copy_usm_ndarray_for_reshape,
+    _ravel_multi_index,
+    _unravel_index,
+)
+
 __doc__ = "Implementation module for :func:`dpctl.tensor.reshape`."
 
 
diff --git a/dpctl_ext/tensor/_search_functions.py b/dpctl_ext/tensor/_search_functions.py
index 26100b0479f7..285a02b42bb8 100644
--- a/dpctl_ext/tensor/_search_functions.py
+++ b/dpctl_ext/tensor/_search_functions.py
@@ -34,9 +34,9 @@
 # when dpnp fully migrates dpctl/tensor
 import dpctl_ext.tensor as dpt_ext
 import dpctl_ext.tensor._tensor_impl as ti
-from dpctl_ext.tensor._manipulation_functions import _broadcast_shape_impl
 
 from ._copy_utils import _empty_like_orderK, _empty_like_triple_orderK
+from ._manipulation_functions import _broadcast_shape_impl
 from ._scalar_utils import (
     _get_dtype,
     _get_queue_usm_type,
diff --git a/dpctl_ext/tensor/libtensor/include/kernels/clip.hpp b/dpctl_ext/tensor/libtensor/include/kernels/clip.hpp
index 7ce50b2b62bc..58a86a8f82d6 100644
--- a/dpctl_ext/tensor/libtensor/include/kernels/clip.hpp
+++ b/dpctl_ext/tensor/libtensor/include/kernels/clip.hpp
@@ -35,10 +35,10 @@
 #pragma once
 #include <algorithm>
 #include <cmath>
-#include <complex>
 #include <cstddef>
 #include <cstdint>
 #include <type_traits>
+#include <vector>
 
 #include <sycl/sycl.hpp>
 
diff --git a/dpctl_ext/tensor/libtensor/include/kernels/constructors.hpp b/dpctl_ext/tensor/libtensor/include/kernels/constructors.hpp
index 27074cd2d246..67f2502067ca 100644
--- a/dpctl_ext/tensor/libtensor/include/kernels/constructors.hpp
+++ b/dpctl_ext/tensor/libtensor/include/kernels/constructors.hpp
@@ -33,9 +33,10 @@
 //===----------------------------------------------------------------------===//
 
 #pragma once
+
 #include <array>
-#include <complex>
 #include <cstddef>
+#include <type_traits>
 #include <vector>
 
 #include <sycl/sycl.hpp>
@@ -130,7 +131,7 @@ sycl::event lin_space_step_impl(sycl::queue &exec_q,
 }
 
 // Constructor to populate tensor with linear sequence defined by
-// start and and data
+// start and data
 
 template <typename Ty, typename wTy>
 class LinearSequenceAffineFunctor
@@ -191,7 +192,7 @@ class LinearSequenceAffineFunctor
  *
  * @param exec_q  Sycl queue to which kernel is submitted for execution.
  * @param nelems  Length of the sequence.
- * @param start_v Stating value of the sequence.
+ * @param start_v Starting value of the sequence.
  * @param end_v   End-value of the sequence.
  * @param include_endpoint  Whether the end-value is included in the sequence.
  * @param array_data Kernel accessible USM pointer to the start of array to be
diff --git a/dpctl_ext/tensor/libtensor/include/kernels/elementwise_functions/cbrt.hpp b/dpctl_ext/tensor/libtensor/include/kernels/elementwise_functions/cbrt.hpp
new file mode 100644
index 000000000000..072ee2b153ba
--- /dev/null
+++ b/dpctl_ext/tensor/libtensor/include/kernels/elementwise_functions/cbrt.hpp
@@ -0,0 +1,209 @@
+//*****************************************************************************
+// Copyright (c) 2026, Intel Corporation
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+// - Redistributions of source code must retain the above copyright notice,
+//   this list of conditions and the following disclaimer.
+// - Redistributions in binary form must reproduce the above copyright notice,
+//   this list of conditions and the following disclaimer in the documentation
+//   and/or other materials provided with the distribution.
+// - Neither the name of the copyright holder nor the names of its contributors
+//   may be used to endorse or promote products derived from this software
+//   without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+// THE POSSIBILITY OF SUCH DAMAGE.
+//*****************************************************************************
+//
+//===---------------------------------------------------------------------===//
+///
+/// \file
+/// This file defines kernels for elementwise evaluation of CBRT(x)
+/// function that compute a square root.
+//===---------------------------------------------------------------------===//
+
+#pragma once
+#include <cstddef>
+#include <cstdint>
+#include <type_traits>
+#include <vector>
+
+#include <sycl/sycl.hpp>
+
+#include "vec_size_util.hpp"
+
+#include "kernels/dpctl_tensor_types.hpp"
+#include "kernels/elementwise_functions/common.hpp"
+
+#include "utils/type_dispatch_building.hpp"
+
+namespace dpctl::tensor::kernels::cbrt
+{
+
+using dpctl::tensor::ssize_t;
+namespace td_ns = dpctl::tensor::type_dispatch;
+
+template <typename argT, typename resT>
+struct CbrtFunctor
+{
+
+    // is function constant for given argT
+    using is_constant = typename std::false_type;
+    // constant value, if constant
+    // constexpr resT constant_value = resT{};
+    // is function defined for sycl::vec
+    using supports_vec = typename std::false_type;
+    // do both argTy and resTy support sugroup store/load operation
+    using supports_sg_loadstore = typename std::true_type;
+
+    resT operator()(const argT &in) const
+    {
+        return sycl::cbrt(in);
+    }
+};
+
+template <typename argTy,
+          typename resTy = argTy,
+          std::uint8_t vec_sz = 4u,
+          std::uint8_t n_vecs = 2u,
+          bool enable_sg_loadstore = true>
+using CbrtContigFunctor =
+    elementwise_common::UnaryContigFunctor<argTy,
+                                           resTy,
+                                           CbrtFunctor<argTy, resTy>,
+                                           vec_sz,
+                                           n_vecs,
+                                           enable_sg_loadstore>;
+
+template <typename argTy, typename resTy, typename IndexerT>
+using CbrtStridedFunctor = elementwise_common::
+    UnaryStridedFunctor<argTy, resTy, IndexerT, CbrtFunctor<argTy, resTy>>;
+
+template <typename T>
+struct CbrtOutputType
+{
+    using value_type = typename std::disjunction<
+        td_ns::TypeMapResultEntry<T, sycl::half, sycl::half>,
+        td_ns::TypeMapResultEntry<T, float, float>,
+        td_ns::TypeMapResultEntry<T, double, double>,
+        td_ns::DefaultResultEntry<void>>::result_type;
+
+    static constexpr bool is_defined = !std::is_same_v<value_type, void>;
+};
+
+namespace hyperparam_detail
+{
+
+namespace vsu_ns = dpctl::tensor::kernels::vec_size_utils;
+
+using vsu_ns::ContigHyperparameterSetDefault;
+using vsu_ns::UnaryContigHyperparameterSetEntry;
+
+template <typename argTy>
+struct CbrtContigHyperparameterSet
+{
+    using value_type =
+        typename std::disjunction<ContigHyperparameterSetDefault<4u, 2u>>;
+
+    constexpr static auto vec_sz = value_type::vec_sz;
+    constexpr static auto n_vecs = value_type::n_vecs;
+};
+
+} // end of namespace hyperparam_detail
+
+template <typename T1, typename T2, std::uint8_t vec_sz, std::uint8_t n_vecs>
+class cbrt_contig_kernel;
+
+template <typename argTy>
+sycl::event cbrt_contig_impl(sycl::queue &exec_q,
+                             std::size_t nelems,
+                             const char *arg_p,
+                             char *res_p,
+                             const std::vector<sycl::event> &depends = {})
+{
+    using CbrtHS = hyperparam_detail::CbrtContigHyperparameterSet<argTy>;
+    static constexpr std::uint8_t vec_sz = CbrtHS::vec_sz;
+    static constexpr std::uint8_t n_vecs = CbrtHS::n_vecs;
+
+    return elementwise_common::unary_contig_impl<
+        argTy, CbrtOutputType, CbrtContigFunctor, cbrt_contig_kernel, vec_sz,
+        n_vecs>(exec_q, nelems, arg_p, res_p, depends);
+}
+
+template <typename fnT, typename T>
+struct CbrtContigFactory
+{
+    fnT get()
+    {
+        if constexpr (!CbrtOutputType<T>::is_defined) {
+            fnT fn = nullptr;
+            return fn;
+        }
+        else {
+            fnT fn = cbrt_contig_impl<T>;
+            return fn;
+        }
+    }
+};
+
+template <typename fnT, typename T>
+struct CbrtTypeMapFactory
+{
+    /*! @brief get typeid for output type of std::cbrt(T x) */
+    std::enable_if_t<std::is_same<fnT, int>::value, int> get()
+    {
+        using rT = typename CbrtOutputType<T>::value_type;
+        return td_ns::GetTypeid<rT>{}.get();
+    }
+};
+
+template <typename T1, typename T2, typename T3>
+class cbrt_strided_kernel;
+
+template <typename argTy>
+sycl::event
+    cbrt_strided_impl(sycl::queue &exec_q,
+                      std::size_t nelems,
+                      int nd,
+                      const ssize_t *shape_and_strides,
+                      const char *arg_p,
+                      ssize_t arg_offset,
+                      char *res_p,
+                      ssize_t res_offset,
+                      const std::vector<sycl::event> &depends,
+                      const std::vector<sycl::event> &additional_depends)
+{
+    return elementwise_common::unary_strided_impl<
+        argTy, CbrtOutputType, CbrtStridedFunctor, cbrt_strided_kernel>(
+        exec_q, nelems, nd, shape_and_strides, arg_p, arg_offset, res_p,
+        res_offset, depends, additional_depends);
+}
+
+template <typename fnT, typename T>
+struct CbrtStridedFactory
+{
+    fnT get()
+    {
+        if constexpr (!CbrtOutputType<T>::is_defined) {
+            fnT fn = nullptr;
+            return fn;
+        }
+        else {
+            fnT fn = cbrt_strided_impl<T>;
+            return fn;
+        }
+    }
+};
+
+} // namespace dpctl::tensor::kernels::cbrt
diff --git a/dpctl_ext/tensor/libtensor/include/kernels/elementwise_functions/common.hpp b/dpctl_ext/tensor/libtensor/include/kernels/elementwise_functions/common.hpp
index d19930b722a9..e83426df8aa9 100644
--- a/dpctl_ext/tensor/libtensor/include/kernels/elementwise_functions/common.hpp
+++ b/dpctl_ext/tensor/libtensor/include/kernels/elementwise_functions/common.hpp
@@ -33,11 +33,8 @@
 #pragma once
 
 #include <algorithm>
-#include <cmath>
-#include <complex>
 #include <cstddef>
 #include <cstdint>
-#include <limits>
 #include <type_traits>
 #include <vector>
 
diff --git a/dpctl_ext/tensor/libtensor/include/kernels/elementwise_functions/exp2.hpp b/dpctl_ext/tensor/libtensor/include/kernels/elementwise_functions/exp2.hpp
new file mode 100644
index 000000000000..39a5a4906a27
--- /dev/null
+++ b/dpctl_ext/tensor/libtensor/include/kernels/elementwise_functions/exp2.hpp
@@ -0,0 +1,271 @@
+//*****************************************************************************
+// Copyright (c) 2026, Intel Corporation
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+// - Redistributions of source code must retain the above copyright notice,
+//   this list of conditions and the following disclaimer.
+// - Redistributions in binary form must reproduce the above copyright notice,
+//   this list of conditions and the following disclaimer in the documentation
+//   and/or other materials provided with the distribution.
+// - Neither the name of the copyright holder nor the names of its contributors
+//   may be used to endorse or promote products derived from this software
+//   without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+// THE POSSIBILITY OF SUCH DAMAGE.
+//*****************************************************************************
+//
+//===---------------------------------------------------------------------===//
+///
+/// \file
+/// This file defines kernels for elementwise evaluation of EXP2(x) function.
+//===---------------------------------------------------------------------===//
+
+#pragma once
+#include <complex>
+#include <cstddef>
+#include <cstdint>
+#include <limits>
+#include <type_traits>
+#include <vector>
+
+#include <sycl/sycl.hpp>
+
+#include "sycl_complex.hpp"
+#include "vec_size_util.hpp"
+
+#include "kernels/dpctl_tensor_types.hpp"
+#include "kernels/elementwise_functions/common.hpp"
+
+#include "utils/type_dispatch_building.hpp"
+#include "utils/type_utils.hpp"
+
+namespace dpctl::tensor::kernels::exp2
+{
+
+using dpctl::tensor::ssize_t;
+namespace td_ns = dpctl::tensor::type_dispatch;
+
+using dpctl::tensor::type_utils::is_complex;
+
+template <typename argT, typename resT>
+struct Exp2Functor
+{
+    // is function constant for given argT
+    using is_constant = typename std::false_type;
+    // constant value, if constant
+    // constexpr resT constant_value = resT{};
+    // is function defined for sycl::vec
+    using supports_vec = typename std::false_type;
+    // do both argTy and resTy support sugroup store/load operation
+    using supports_sg_loadstore = typename std::negation<
+        std::disjunction<is_complex<resT>, is_complex<argT>>>;
+
+    resT operator()(const argT &in) const
+    {
+        if constexpr (is_complex<argT>::value) {
+            using realT = typename argT::value_type;
+
+            const argT tmp = in * sycl::log(realT(2));
+
+            static constexpr realT q_nan =
+                std::numeric_limits<realT>::quiet_NaN();
+
+            const realT x = std::real(tmp);
+            const realT y = std::imag(tmp);
+            if (std::isfinite(x)) {
+                if (std::isfinite(y)) {
+                    return exprm_ns::exp(exprm_ns::complex<realT>(tmp));
+                }
+                else {
+                    return resT{q_nan, q_nan};
+                }
+            }
+            else if (std::isnan(x)) {
+                /* x is nan */
+                if (y == realT(0)) {
+                    return resT{in};
+                }
+                else {
+                    return resT{x, q_nan};
+                }
+            }
+            else {
+                if (!sycl::signbit(x)) { /* x is +inf */
+                    if (y == realT(0)) {
+                        return resT{x, y};
+                    }
+                    else if (std::isfinite(y)) {
+                        return resT{x * sycl::cos(y), x * sycl::sin(y)};
+                    }
+                    else {
+                        /* x = +inf, y = +-inf || nan */
+                        return resT{x, q_nan};
+                    }
+                }
+                else { /* x is -inf */
+                    if (std::isfinite(y)) {
+                        realT exp_x = sycl::exp(x);
+                        return resT{exp_x * sycl::cos(y), exp_x * sycl::sin(y)};
+                    }
+                    else {
+                        /* x = -inf, y = +-inf || nan */
+                        return resT{0, 0};
+                    }
+                }
+            }
+        }
+        else {
+            return sycl::exp2(in);
+        }
+    }
+};
+
+template <typename argTy,
+          typename resTy = argTy,
+          std::uint8_t vec_sz = 4u,
+          std::uint8_t n_vecs = 2u,
+          bool enable_sg_loadstore = true>
+using Exp2ContigFunctor =
+    elementwise_common::UnaryContigFunctor<argTy,
+                                           resTy,
+                                           Exp2Functor<argTy, resTy>,
+                                           vec_sz,
+                                           n_vecs,
+                                           enable_sg_loadstore>;
+
+template <typename argTy, typename resTy, typename IndexerT>
+using Exp2StridedFunctor = elementwise_common::
+    UnaryStridedFunctor<argTy, resTy, IndexerT, Exp2Functor<argTy, resTy>>;
+
+template <typename T>
+struct Exp2OutputType
+{
+    using value_type = typename std::disjunction<
+        td_ns::TypeMapResultEntry<T, sycl::half>,
+        td_ns::TypeMapResultEntry<T, float>,
+        td_ns::TypeMapResultEntry<T, double>,
+        td_ns::TypeMapResultEntry<T, std::complex<float>>,
+        td_ns::TypeMapResultEntry<T, std::complex<double>>,
+        td_ns::DefaultResultEntry<void>>::result_type;
+
+    static constexpr bool is_defined = !std::is_same_v<value_type, void>;
+};
+
+namespace hyperparam_detail
+{
+
+namespace vsu_ns = dpctl::tensor::kernels::vec_size_utils;
+
+using vsu_ns::ContigHyperparameterSetDefault;
+using vsu_ns::UnaryContigHyperparameterSetEntry;
+
+template <typename argTy>
+struct Exp2ContigHyperparameterSet
+{
+    using value_type =
+        typename std::disjunction<ContigHyperparameterSetDefault<4u, 2u>>;
+
+    constexpr static auto vec_sz = value_type::vec_sz;
+    constexpr static auto n_vecs = value_type::n_vecs;
+};
+
+} // end of namespace hyperparam_detail
+
+template <typename T1, typename T2, std::uint8_t vec_sz, std::uint8_t n_vecs>
+class exp2_contig_kernel;
+
+template <typename argTy>
+sycl::event exp2_contig_impl(sycl::queue &exec_q,
+                             std::size_t nelems,
+                             const char *arg_p,
+                             char *res_p,
+                             const std::vector<sycl::event> &depends = {})
+{
+    using Exp2HS = hyperparam_detail::Exp2ContigHyperparameterSet<argTy>;
+
+    static constexpr std::uint8_t vec_sz = Exp2HS::vec_sz;
+    static constexpr std::uint8_t n_vecs = Exp2HS::n_vecs;
+
+    return elementwise_common::unary_contig_impl<
+        argTy, Exp2OutputType, Exp2ContigFunctor, exp2_contig_kernel, vec_sz,
+        n_vecs>(exec_q, nelems, arg_p, res_p, depends);
+}
+
+template <typename fnT, typename T>
+struct Exp2ContigFactory
+{
+    fnT get()
+    {
+        if constexpr (!Exp2OutputType<T>::is_defined) {
+            fnT fn = nullptr;
+            return fn;
+        }
+        else {
+            fnT fn = exp2_contig_impl<T>;
+            return fn;
+        }
+    }
+};
+
+template <typename fnT, typename T>
+struct Exp2TypeMapFactory
+{
+    /*! @brief get typeid for output type of sycl::exp2(T x) */
+    std::enable_if_t<std::is_same<fnT, int>::value, int> get()
+    {
+        using rT = typename Exp2OutputType<T>::value_type;
+        return td_ns::GetTypeid<rT>{}.get();
+    }
+};
+
+template <typename T1, typename T2, typename T3>
+class exp2_strided_kernel;
+
+template <typename argTy>
+sycl::event
+    exp2_strided_impl(sycl::queue &exec_q,
+                      std::size_t nelems,
+                      int nd,
+                      const ssize_t *shape_and_strides,
+                      const char *arg_p,
+                      ssize_t arg_offset,
+                      char *res_p,
+                      ssize_t res_offset,
+                      const std::vector<sycl::event> &depends,
+                      const std::vector<sycl::event> &additional_depends)
+{
+    return elementwise_common::unary_strided_impl<
+        argTy, Exp2OutputType, Exp2StridedFunctor, exp2_strided_kernel>(
+        exec_q, nelems, nd, shape_and_strides, arg_p, arg_offset, res_p,
+        res_offset, depends, additional_depends);
+}
+
+template <typename fnT, typename T>
+struct Exp2StridedFactory
+{
+    fnT get()
+    {
+        if constexpr (!Exp2OutputType<T>::is_defined) {
+            fnT fn = nullptr;
+            return fn;
+        }
+        else {
+            fnT fn = exp2_strided_impl<T>;
+            return fn;
+        }
+    }
+};
+
+} // namespace dpctl::tensor::kernels::exp2
diff --git a/dpctl_ext/tensor/libtensor/include/kernels/elementwise_functions/logaddexp.hpp b/dpctl_ext/tensor/libtensor/include/kernels/elementwise_functions/logaddexp.hpp
index 93d91904a7f9..af93b089f0b2 100644
--- a/dpctl_ext/tensor/libtensor/include/kernels/elementwise_functions/logaddexp.hpp
+++ b/dpctl_ext/tensor/libtensor/include/kernels/elementwise_functions/logaddexp.hpp
@@ -47,7 +47,6 @@
 #include "vec_size_util.hpp"
 
 #include "utils/math_utils.hpp"
-#include "utils/offset_utils.hpp"
 #include "utils/type_dispatch_building.hpp"
 #include "utils/type_utils.hpp"
 
diff --git a/dpctl_ext/tensor/libtensor/include/kernels/elementwise_functions/maximum.hpp b/dpctl_ext/tensor/libtensor/include/kernels/elementwise_functions/maximum.hpp
index 067ccd84f059..f204b6640042 100644
--- a/dpctl_ext/tensor/libtensor/include/kernels/elementwise_functions/maximum.hpp
+++ b/dpctl_ext/tensor/libtensor/include/kernels/elementwise_functions/maximum.hpp
@@ -45,7 +45,6 @@
 #include "vec_size_util.hpp"
 
 #include "utils/math_utils.hpp"
-#include "utils/offset_utils.hpp"
 #include "utils/type_dispatch_building.hpp"
 #include "utils/type_utils.hpp"
 
diff --git a/dpctl_ext/tensor/libtensor/include/kernels/elementwise_functions/minimum.hpp b/dpctl_ext/tensor/libtensor/include/kernels/elementwise_functions/minimum.hpp
index a38945f89a25..d18577a5cf4e 100644
--- a/dpctl_ext/tensor/libtensor/include/kernels/elementwise_functions/minimum.hpp
+++ b/dpctl_ext/tensor/libtensor/include/kernels/elementwise_functions/minimum.hpp
@@ -44,7 +44,6 @@
 #include "vec_size_util.hpp"
 
 #include "utils/math_utils.hpp"
-#include "utils/offset_utils.hpp"
 #include "utils/type_dispatch_building.hpp"
 #include "utils/type_utils.hpp"
 
diff --git a/dpctl_ext/tensor/libtensor/include/kernels/elementwise_functions/proj.hpp b/dpctl_ext/tensor/libtensor/include/kernels/elementwise_functions/proj.hpp
new file mode 100644
index 000000000000..039da657cfd2
--- /dev/null
+++ b/dpctl_ext/tensor/libtensor/include/kernels/elementwise_functions/proj.hpp
@@ -0,0 +1,239 @@
+//*****************************************************************************
+// Copyright (c) 2026, Intel Corporation
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+// - Redistributions of source code must retain the above copyright notice,
+//   this list of conditions and the following disclaimer.
+// - Redistributions in binary form must reproduce the above copyright notice,
+//   this list of conditions and the following disclaimer in the documentation
+//   and/or other materials provided with the distribution.
+// - Neither the name of the copyright holder nor the names of its contributors
+//   may be used to endorse or promote products derived from this software
+//   without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+// THE POSSIBILITY OF SUCH DAMAGE.
+//*****************************************************************************
+//
+//===---------------------------------------------------------------------===//
+///
+/// \file
+/// This file defines kernels for elementwise evaluation of PROJ(x) function.
+//===---------------------------------------------------------------------===//
+
+#pragma once
+#include <cmath>
+#include <complex>
+#include <cstddef>
+#include <cstdint>
+#include <limits>
+#include <type_traits>
+#include <vector>
+
+#include <sycl/sycl.hpp>
+
+#include "vec_size_util.hpp"
+
+#include "kernels/dpctl_tensor_types.hpp"
+#include "kernels/elementwise_functions/common.hpp"
+
+#include "utils/type_dispatch_building.hpp"
+#include "utils/type_utils.hpp"
+
+namespace dpctl::tensor::kernels::proj
+{
+
+using dpctl::tensor::ssize_t;
+namespace td_ns = dpctl::tensor::type_dispatch;
+
+using dpctl::tensor::type_utils::is_complex;
+
+template <typename argT, typename resT>
+struct ProjFunctor
+{
+
+    // is function constant for given argT
+    using is_constant = typename std::false_type;
+    // constant value, if constant
+    // constexpr resT constant_value = resT{};
+    // is function defined for sycl::vec
+    using supports_vec = typename std::false_type;
+    // do both argTy and resTy support sugroup store/load operation
+    using supports_sg_loadstore = typename std::false_type;
+
+    resT operator()(const argT &in) const
+    {
+        using realT = typename argT::value_type;
+        const realT x = std::real(in);
+        const realT y = std::imag(in);
+
+        if (std::isinf(x)) {
+            return value_at_infinity(y);
+        }
+        else if (std::isinf(y)) {
+            return value_at_infinity(y);
+        }
+        else {
+            return in;
+        }
+    }
+
+private:
+    template <typename T>
+    std::complex<T> value_at_infinity(const T &y) const
+    {
+        const T res_im = sycl::copysign(T(0), y);
+        return std::complex<T>{std::numeric_limits<T>::infinity(), res_im};
+    }
+};
+
+template <typename argTy,
+          typename resTy = argTy,
+          std::uint8_t vec_sz = 4u,
+          std::uint8_t n_vecs = 2u,
+          bool enable_sg_loadstore = true>
+using ProjContigFunctor =
+    elementwise_common::UnaryContigFunctor<argTy,
+                                           resTy,
+                                           ProjFunctor<argTy, resTy>,
+                                           vec_sz,
+                                           n_vecs,
+                                           enable_sg_loadstore>;
+
+template <typename argTy, typename resTy, typename IndexerT>
+using ProjStridedFunctor = elementwise_common::
+    UnaryStridedFunctor<argTy, resTy, IndexerT, ProjFunctor<argTy, resTy>>;
+
+template <typename T>
+struct ProjOutputType
+{
+    using value_type = typename std::disjunction<
+        td_ns::TypeMapResultEntry<T, std::complex<float>>,
+        td_ns::TypeMapResultEntry<T, std::complex<double>>,
+        td_ns::DefaultResultEntry<void>>::result_type;
+
+    static constexpr bool is_defined = !std::is_same_v<value_type, void>;
+};
+
+namespace hyperparam_detail
+{
+
+namespace vsu_ns = dpctl::tensor::kernels::vec_size_utils;
+
+using vsu_ns::ContigHyperparameterSetDefault;
+using vsu_ns::UnaryContigHyperparameterSetEntry;
+
+template <typename argTy>
+struct ProjContigHyperparameterSet
+{
+    using value_type =
+        typename std::disjunction<ContigHyperparameterSetDefault<4u, 2u>>;
+
+    constexpr static auto vec_sz = value_type::vec_sz;
+    constexpr static auto n_vecs = value_type::n_vecs;
+};
+
+} // end of namespace hyperparam_detail
+
+template <typename T1, typename T2, std::uint8_t vec_sz, std::uint8_t n_vecs>
+class proj_contig_kernel;
+
+template <typename argTy>
+sycl::event proj_contig_impl(sycl::queue &exec_q,
+                             std::size_t nelems,
+                             const char *arg_p,
+                             char *res_p,
+                             const std::vector<sycl::event> &depends = {})
+{
+    using ProjHS = hyperparam_detail::ProjContigHyperparameterSet<argTy>;
+    static constexpr std::uint8_t vec_sz = ProjHS::vec_sz;
+    static constexpr std::uint8_t n_vecs = ProjHS::n_vecs;
+
+    return elementwise_common::unary_contig_impl<
+        argTy, ProjOutputType, ProjContigFunctor, proj_contig_kernel, vec_sz,
+        n_vecs>(exec_q, nelems, arg_p, res_p, depends);
+}
+
+template <typename fnT, typename T>
+struct ProjContigFactory
+{
+    fnT get()
+    {
+        if constexpr (!ProjOutputType<T>::is_defined) {
+            fnT fn = nullptr;
+            return fn;
+        }
+        else {
+            if constexpr (std::is_same_v<T, std::complex<double>>) {
+                fnT fn = proj_contig_impl<T>;
+                return fn;
+            }
+            else {
+                fnT fn = proj_contig_impl<T>;
+                return fn;
+            }
+        }
+    }
+};
+
+template <typename fnT, typename T>
+struct ProjTypeMapFactory
+{
+    /*! @brief get typeid for output type of std::proj(T x) */
+    std::enable_if_t<std::is_same<fnT, int>::value, int> get()
+    {
+        using rT = typename ProjOutputType<T>::value_type;
+        return td_ns::GetTypeid<rT>{}.get();
+    }
+};
+
+template <typename T1, typename T2, typename T3>
+class proj_strided_kernel;
+
+template <typename argTy>
+sycl::event
+    proj_strided_impl(sycl::queue &exec_q,
+                      std::size_t nelems,
+                      int nd,
+                      const ssize_t *shape_and_strides,
+                      const char *arg_p,
+                      ssize_t arg_offset,
+                      char *res_p,
+                      ssize_t res_offset,
+                      const std::vector<sycl::event> &depends,
+                      const std::vector<sycl::event> &additional_depends)
+{
+    return elementwise_common::unary_strided_impl<
+        argTy, ProjOutputType, ProjStridedFunctor, proj_strided_kernel>(
+        exec_q, nelems, nd, shape_and_strides, arg_p, arg_offset, res_p,
+        res_offset, depends, additional_depends);
+}
+
+template <typename fnT, typename T>
+struct ProjStridedFactory
+{
+    fnT get()
+    {
+        if constexpr (!ProjOutputType<T>::is_defined) {
+            fnT fn = nullptr;
+            return fn;
+        }
+        else {
+            fnT fn = proj_strided_impl<T>;
+            return fn;
+        }
+    }
+};
+
+} // namespace dpctl::tensor::kernels::proj
diff --git a/dpctl_ext/tensor/libtensor/include/kernels/elementwise_functions/real.hpp b/dpctl_ext/tensor/libtensor/include/kernels/elementwise_functions/real.hpp
new file mode 100644
index 000000000000..d21a9e6baa7d
--- /dev/null
+++ b/dpctl_ext/tensor/libtensor/include/kernels/elementwise_functions/real.hpp
@@ -0,0 +1,231 @@
+//*****************************************************************************
+// Copyright (c) 2026, Intel Corporation
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+// - Redistributions of source code must retain the above copyright notice,
+//   this list of conditions and the following disclaimer.
+// - Redistributions in binary form must reproduce the above copyright notice,
+//   this list of conditions and the following disclaimer in the documentation
+//   and/or other materials provided with the distribution.
+// - Neither the name of the copyright holder nor the names of its contributors
+//   may be used to endorse or promote products derived from this software
+//   without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+// THE POSSIBILITY OF SUCH DAMAGE.
+//*****************************************************************************
+//
+//===---------------------------------------------------------------------===//
+///
+/// \file
+/// This file defines kernels for elementwise evaluation of REAL(x) function.
+//===---------------------------------------------------------------------===//
+
+#pragma once
+#include <complex>
+#include <cstddef>
+#include <cstdint>
+#include <type_traits>
+#include <vector>
+
+#include <sycl/sycl.hpp>
+
+#include "vec_size_util.hpp"
+
+#include "kernels/dpctl_tensor_types.hpp"
+#include "kernels/elementwise_functions/common.hpp"
+
+#include "utils/type_dispatch_building.hpp"
+#include "utils/type_utils.hpp"
+
+namespace dpctl::tensor::kernels::real
+{
+
+using dpctl::tensor::ssize_t;
+namespace td_ns = dpctl::tensor::type_dispatch;
+
+using dpctl::tensor::type_utils::is_complex;
+using dpctl::tensor::type_utils::is_complex_v;
+
+template <typename argT, typename resT>
+struct RealFunctor
+{
+
+    // is function constant for given argT
+    using is_constant = typename std::false_type;
+    // constant value, if constant
+    // constexpr resT constant_value = resT{};
+    // is function defined for sycl::vec
+    using supports_vec = typename std::false_type;
+    // do both argTy and resTy support sugroup store/load operation
+    using supports_sg_loadstore = typename std::negation<
+        std::disjunction<is_complex<resT>, is_complex<argT>>>;
+
+    resT operator()(const argT &in) const
+    {
+        if constexpr (is_complex_v<argT>) {
+            return std::real(in);
+        }
+        else {
+            static_assert(std::is_same_v<resT, argT>);
+            return in;
+        }
+    }
+};
+
+template <typename argTy,
+          typename resTy = argTy,
+          std::uint8_t vec_sz = 4u,
+          std::uint8_t n_vecs = 2u,
+          bool enable_sg_loadstore = true>
+using RealContigFunctor =
+    elementwise_common::UnaryContigFunctor<argTy,
+                                           resTy,
+                                           RealFunctor<argTy, resTy>,
+                                           vec_sz,
+                                           n_vecs,
+                                           enable_sg_loadstore>;
+
+template <typename argTy, typename resTy, typename IndexerT>
+using RealStridedFunctor = elementwise_common::
+    UnaryStridedFunctor<argTy, resTy, IndexerT, RealFunctor<argTy, resTy>>;
+
+template <typename T>
+struct RealOutputType
+{
+    using value_type = typename std::disjunction<
+        td_ns::TypeMapResultEntry<T, bool>,
+        td_ns::TypeMapResultEntry<T, std::uint8_t>,
+        td_ns::TypeMapResultEntry<T, std::uint16_t>,
+        td_ns::TypeMapResultEntry<T, std::uint32_t>,
+        td_ns::TypeMapResultEntry<T, std::uint64_t>,
+        td_ns::TypeMapResultEntry<T, std::int8_t>,
+        td_ns::TypeMapResultEntry<T, std::int16_t>,
+        td_ns::TypeMapResultEntry<T, std::int32_t>,
+        td_ns::TypeMapResultEntry<T, std::int64_t>,
+        td_ns::TypeMapResultEntry<T, sycl::half>,
+        td_ns::TypeMapResultEntry<T, float>,
+        td_ns::TypeMapResultEntry<T, double>,
+        td_ns::TypeMapResultEntry<T, std::complex<float>, float>,
+        td_ns::TypeMapResultEntry<T, std::complex<double>, double>,
+        td_ns::DefaultResultEntry<void>>::result_type;
+
+    static constexpr bool is_defined = !std::is_same_v<value_type, void>;
+};
+
+namespace hyperparam_detail
+{
+
+namespace vsu_ns = dpctl::tensor::kernels::vec_size_utils;
+
+using vsu_ns::ContigHyperparameterSetDefault;
+using vsu_ns::UnaryContigHyperparameterSetEntry;
+
+template <typename argTy>
+struct RealContigHyperparameterSet
+{
+    using value_type =
+        typename std::disjunction<ContigHyperparameterSetDefault<4u, 2u>>;
+
+    constexpr static auto vec_sz = value_type::vec_sz;
+    constexpr static auto n_vecs = value_type::n_vecs;
+};
+
+} // end of namespace hyperparam_detail
+
+template <typename T1, typename T2, std::uint8_t vec_sz, std::uint8_t n_vecs>
+class real_contig_kernel;
+
+template <typename argTy>
+sycl::event real_contig_impl(sycl::queue &exec_q,
+                             std::size_t nelems,
+                             const char *arg_p,
+                             char *res_p,
+                             const std::vector<sycl::event> &depends = {})
+{
+    using RealHS = hyperparam_detail::RealContigHyperparameterSet<argTy>;
+    static constexpr std::uint8_t vec_sz = RealHS::vec_sz;
+    static constexpr std::uint8_t n_vecs = RealHS::n_vecs;
+
+    return elementwise_common::unary_contig_impl<
+        argTy, RealOutputType, RealContigFunctor, real_contig_kernel, vec_sz,
+        n_vecs>(exec_q, nelems, arg_p, res_p, depends);
+}
+
+template <typename fnT, typename T>
+struct RealContigFactory
+{
+    fnT get()
+    {
+        if constexpr (!RealOutputType<T>::is_defined) {
+            fnT fn = nullptr;
+            return fn;
+        }
+        else {
+            fnT fn = real_contig_impl<T>;
+            return fn;
+        }
+    }
+};
+
+template <typename fnT, typename T>
+struct RealTypeMapFactory
+{
+    /*! @brief get typeid for output type of std::real(T x) */
+    std::enable_if_t<std::is_same<fnT, int>::value, int> get()
+    {
+        using rT = typename RealOutputType<T>::value_type;
+        return td_ns::GetTypeid<rT>{}.get();
+    }
+};
+
+template <typename T1, typename T2, typename T3>
+class real_strided_kernel;
+
+template <typename argTy>
+sycl::event
+    real_strided_impl(sycl::queue &exec_q,
+                      std::size_t nelems,
+                      int nd,
+                      const ssize_t *shape_and_strides,
+                      const char *arg_p,
+                      ssize_t arg_offset,
+                      char *res_p,
+                      ssize_t res_offset,
+                      const std::vector<sycl::event> &depends,
+                      const std::vector<sycl::event> &additional_depends)
+{
+    return elementwise_common::unary_strided_impl<
+        argTy, RealOutputType, RealStridedFunctor, real_strided_kernel>(
+        exec_q, nelems, nd, shape_and_strides, arg_p, arg_offset, res_p,
+        res_offset, depends, additional_depends);
+}
+
+template <typename fnT, typename T>
+struct RealStridedFactory
+{
+    fnT get()
+    {
+        if constexpr (!RealOutputType<T>::is_defined) {
+            fnT fn = nullptr;
+            return fn;
+        }
+        else {
+            fnT fn = real_strided_impl<T>;
+            return fn;
+        }
+    }
+};
+
+} // namespace dpctl::tensor::kernels::real
diff --git a/dpctl_ext/tensor/libtensor/include/kernels/elementwise_functions/reciprocal.hpp b/dpctl_ext/tensor/libtensor/include/kernels/elementwise_functions/reciprocal.hpp
new file mode 100644
index 000000000000..f26f4043c9ab
--- /dev/null
+++ b/dpctl_ext/tensor/libtensor/include/kernels/elementwise_functions/reciprocal.hpp
@@ -0,0 +1,229 @@
+//*****************************************************************************
+// Copyright (c) 2026, Intel Corporation
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+// - Redistributions of source code must retain the above copyright notice,
+//   this list of conditions and the following disclaimer.
+// - Redistributions in binary form must reproduce the above copyright notice,
+//   this list of conditions and the following disclaimer in the documentation
+//   and/or other materials provided with the distribution.
+// - Neither the name of the copyright holder nor the names of its contributors
+//   may be used to endorse or promote products derived from this software
+//   without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+// THE POSSIBILITY OF SUCH DAMAGE.
+//*****************************************************************************
+//
+//===---------------------------------------------------------------------===//
+///
+/// \file
+/// This file defines kernels for elementwise evaluation of RECIPROCAL(x)
+/// function.
+//===---------------------------------------------------------------------===//
+
+#pragma once
+#include <complex>
+#include <cstddef>
+#include <cstdint>
+#include <type_traits>
+#include <vector>
+
+#include <sycl/sycl.hpp>
+
+#include "sycl_complex.hpp"
+#include "vec_size_util.hpp"
+
+#include "kernels/dpctl_tensor_types.hpp"
+#include "kernels/elementwise_functions/common.hpp"
+
+#include "utils/type_dispatch_building.hpp"
+#include "utils/type_utils.hpp"
+
+namespace dpctl::tensor::kernels::reciprocal
+{
+
+using dpctl::tensor::ssize_t;
+namespace td_ns = dpctl::tensor::type_dispatch;
+
+using dpctl::tensor::type_utils::is_complex;
+
+template <typename argT, typename resT>
+struct ReciprocalFunctor
+{
+    // is function constant for given argT
+    using is_constant = typename std::false_type;
+    // constant value, if constant
+    // constexpr resT constant_value = resT{};
+    // is function defined for sycl::vec
+    using supports_vec = typename std::false_type;
+    // do both argTy and resTy support sugroup store/load operation
+    using supports_sg_loadstore = typename std::negation<
+        std::disjunction<is_complex<resT>, is_complex<argT>>>;
+
+    resT operator()(const argT &in) const
+    {
+        if constexpr (is_complex<argT>::value) {
+
+            using realT = typename argT::value_type;
+
+            return realT(1) / exprm_ns::complex<realT>(in);
+        }
+        else {
+            return argT(1) / in;
+        }
+    }
+};
+
+template <typename argTy,
+          typename resTy = argTy,
+          std::uint8_t vec_sz = 4u,
+          std::uint8_t n_vecs = 2u,
+          bool enable_sg_loadstore = true>
+using ReciprocalContigFunctor =
+    elementwise_common::UnaryContigFunctor<argTy,
+                                           resTy,
+                                           ReciprocalFunctor<argTy, resTy>,
+                                           vec_sz,
+                                           n_vecs,
+                                           enable_sg_loadstore>;
+
+template <typename argTy, typename resTy, typename IndexerT>
+using ReciprocalStridedFunctor =
+    elementwise_common::UnaryStridedFunctor<argTy,
+                                            resTy,
+                                            IndexerT,
+                                            ReciprocalFunctor<argTy, resTy>>;
+
+template <typename T>
+struct ReciprocalOutputType
+{
+    using value_type = typename std::disjunction<
+        td_ns::TypeMapResultEntry<T, sycl::half>,
+        td_ns::TypeMapResultEntry<T, float>,
+        td_ns::TypeMapResultEntry<T, double>,
+        td_ns::TypeMapResultEntry<T, std::complex<float>>,
+        td_ns::TypeMapResultEntry<T, std::complex<double>>,
+        td_ns::DefaultResultEntry<void>>::result_type;
+
+    static constexpr bool is_defined = !std::is_same_v<value_type, void>;
+};
+
+namespace hyperparam_detail
+{
+
+namespace vsu_ns = dpctl::tensor::kernels::vec_size_utils;
+
+using vsu_ns::ContigHyperparameterSetDefault;
+using vsu_ns::UnaryContigHyperparameterSetEntry;
+
+template <typename argTy>
+struct ReciprocalContigHyperparameterSet
+{
+    using value_type =
+        typename std::disjunction<ContigHyperparameterSetDefault<4u, 2u>>;
+
+    constexpr static auto vec_sz = value_type::vec_sz;
+    constexpr static auto n_vecs = value_type::n_vecs;
+};
+
+} // end of namespace hyperparam_detail
+
+template <typename T1, typename T2, std::uint8_t vec_sz, std::uint8_t n_vecs>
+class reciprocal_contig_kernel;
+
+template <typename argTy>
+sycl::event reciprocal_contig_impl(sycl::queue &exec_q,
+                                   std::size_t nelems,
+                                   const char *arg_p,
+                                   char *res_p,
+                                   const std::vector<sycl::event> &depends = {})
+{
+    using RecipHS = hyperparam_detail::ReciprocalContigHyperparameterSet<argTy>;
+    static constexpr std::uint8_t vec_sz = RecipHS::vec_sz;
+    static constexpr std::uint8_t n_vecs = RecipHS::n_vecs;
+
+    return elementwise_common::unary_contig_impl<
+        argTy, ReciprocalOutputType, ReciprocalContigFunctor,
+        reciprocal_contig_kernel, vec_sz, n_vecs>(exec_q, nelems, arg_p, res_p,
+                                                  depends);
+}
+
+template <typename fnT, typename T>
+struct ReciprocalContigFactory
+{
+    fnT get()
+    {
+        if constexpr (!ReciprocalOutputType<T>::is_defined) {
+            fnT fn = nullptr;
+            return fn;
+        }
+        else {
+            fnT fn = reciprocal_contig_impl<T>;
+            return fn;
+        }
+    }
+};
+
+template <typename fnT, typename T>
+struct ReciprocalTypeMapFactory
+{
+    /*! @brief get typeid for output type of 1 / x */
+    std::enable_if_t<std::is_same<fnT, int>::value, int> get()
+    {
+        using rT = typename ReciprocalOutputType<T>::value_type;
+        return td_ns::GetTypeid<rT>{}.get();
+    }
+};
+
+template <typename T1, typename T2, typename T3>
+class reciprocal_strided_kernel;
+
+template <typename argTy>
+sycl::event
+    reciprocal_strided_impl(sycl::queue &exec_q,
+                            std::size_t nelems,
+                            int nd,
+                            const ssize_t *shape_and_strides,
+                            const char *arg_p,
+                            ssize_t arg_offset,
+                            char *res_p,
+                            ssize_t res_offset,
+                            const std::vector<sycl::event> &depends,
+                            const std::vector<sycl::event> &additional_depends)
+{
+    return elementwise_common::unary_strided_impl<argTy, ReciprocalOutputType,
+                                                  ReciprocalStridedFunctor,
+                                                  reciprocal_strided_kernel>(
+        exec_q, nelems, nd, shape_and_strides, arg_p, arg_offset, res_p,
+        res_offset, depends, additional_depends);
+}
+
+template <typename fnT, typename T>
+struct ReciprocalStridedFactory
+{
+    fnT get()
+    {
+        if constexpr (!ReciprocalOutputType<T>::is_defined) {
+            fnT fn = nullptr;
+            return fn;
+        }
+        else {
+            fnT fn = reciprocal_strided_impl<T>;
+            return fn;
+        }
+    }
+};
+
+} // namespace dpctl::tensor::kernels::reciprocal
diff --git a/dpctl_ext/tensor/libtensor/include/kernels/elementwise_functions/round.hpp b/dpctl_ext/tensor/libtensor/include/kernels/elementwise_functions/round.hpp
new file mode 100644
index 000000000000..b20166a4d505
--- /dev/null
+++ b/dpctl_ext/tensor/libtensor/include/kernels/elementwise_functions/round.hpp
@@ -0,0 +1,241 @@
+//*****************************************************************************
+// Copyright (c) 2026, Intel Corporation
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+// - Redistributions of source code must retain the above copyright notice,
+//   this list of conditions and the following disclaimer.
+// - Redistributions in binary form must reproduce the above copyright notice,
+//   this list of conditions and the following disclaimer in the documentation
+//   and/or other materials provided with the distribution.
+// - Neither the name of the copyright holder nor the names of its contributors
+//   may be used to endorse or promote products derived from this software
+//   without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+// THE POSSIBILITY OF SUCH DAMAGE.
+//*****************************************************************************
+//
+//===---------------------------------------------------------------------===//
+///
+/// \file
+/// This file defines kernels for elementwise evaluation of ROUND(x) function.
+//===---------------------------------------------------------------------===//
+
+#pragma once
+#include <complex>
+#include <cstddef>
+#include <cstdint>
+#include <type_traits>
+#include <vector>
+
+#include <sycl/sycl.hpp>
+
+#include "vec_size_util.hpp"
+
+#include "kernels/dpctl_tensor_types.hpp"
+#include "kernels/elementwise_functions/common.hpp"
+
+#include "utils/type_dispatch_building.hpp"
+#include "utils/type_utils.hpp"
+
+namespace dpctl::tensor::kernels::round
+{
+
+using dpctl::tensor::ssize_t;
+namespace td_ns = dpctl::tensor::type_dispatch;
+
+using dpctl::tensor::type_utils::is_complex;
+
+template <typename argT, typename resT>
+struct RoundFunctor
+{
+
+    // is function constant for given argT
+    using is_constant = typename std::false_type;
+    // constant value, if constant
+    // constexpr resT constant_value = resT{};
+    // is function defined for sycl::vec
+    using supports_vec = typename std::false_type;
+    // do both argTy and resTy support sugroup store/load operation
+    using supports_sg_loadstore = typename std::negation<
+        std::disjunction<is_complex<resT>, is_complex<argT>>>;
+
+    resT operator()(const argT &in) const
+    {
+
+        if constexpr (std::is_integral_v<argT>) {
+            return in;
+        }
+        else if constexpr (is_complex<argT>::value) {
+            using realT = typename argT::value_type;
+            return resT{round_func<realT>(std::real(in)),
+                        round_func<realT>(std::imag(in))};
+        }
+        else {
+            return round_func<argT>(in);
+        }
+    }
+
+private:
+    template <typename T>
+    T round_func(const T &input) const
+    {
+        return sycl::rint(input);
+    }
+};
+
+template <typename argTy,
+          typename resTy = argTy,
+          std::uint8_t vec_sz = 4u,
+          std::uint8_t n_vecs = 2u,
+          bool enable_sg_loadstore = true>
+using RoundContigFunctor =
+    elementwise_common::UnaryContigFunctor<argTy,
+                                           resTy,
+                                           RoundFunctor<argTy, resTy>,
+                                           vec_sz,
+                                           n_vecs,
+                                           enable_sg_loadstore>;
+
+template <typename argTy, typename resTy, typename IndexerT>
+using RoundStridedFunctor = elementwise_common::
+    UnaryStridedFunctor<argTy, resTy, IndexerT, RoundFunctor<argTy, resTy>>;
+
+template <typename T>
+struct RoundOutputType
+{
+    using value_type = typename std::disjunction<
+        td_ns::TypeMapResultEntry<T, std::uint8_t>,
+        td_ns::TypeMapResultEntry<T, std::uint16_t>,
+        td_ns::TypeMapResultEntry<T, std::uint32_t>,
+        td_ns::TypeMapResultEntry<T, std::uint64_t>,
+        td_ns::TypeMapResultEntry<T, std::int8_t>,
+        td_ns::TypeMapResultEntry<T, std::int16_t>,
+        td_ns::TypeMapResultEntry<T, std::int32_t>,
+        td_ns::TypeMapResultEntry<T, std::int64_t>,
+        td_ns::TypeMapResultEntry<T, sycl::half>,
+        td_ns::TypeMapResultEntry<T, float>,
+        td_ns::TypeMapResultEntry<T, double>,
+        td_ns::TypeMapResultEntry<T, std::complex<float>>,
+        td_ns::TypeMapResultEntry<T, std::complex<double>>,
+        td_ns::DefaultResultEntry<void>>::result_type;
+
+    static constexpr bool is_defined = !std::is_same_v<value_type, void>;
+};
+
+namespace hyperparam_detail
+{
+
+namespace vsu_ns = dpctl::tensor::kernels::vec_size_utils;
+
+using vsu_ns::ContigHyperparameterSetDefault;
+using vsu_ns::UnaryContigHyperparameterSetEntry;
+
+template <typename argTy>
+struct RoundContigHyperparameterSet
+{
+    using value_type =
+        typename std::disjunction<ContigHyperparameterSetDefault<4u, 2u>>;
+
+    constexpr static auto vec_sz = value_type::vec_sz;
+    constexpr static auto n_vecs = value_type::n_vecs;
+};
+
+} // end of namespace hyperparam_detail
+
+template <typename T1, typename T2, std::uint8_t vec_sz, std::uint8_t n_vecs>
+class round_contig_kernel;
+
+template <typename argTy>
+sycl::event round_contig_impl(sycl::queue &exec_q,
+                              std::size_t nelems,
+                              const char *arg_p,
+                              char *res_p,
+                              const std::vector<sycl::event> &depends = {})
+{
+    using RoundHS = hyperparam_detail::RoundContigHyperparameterSet<argTy>;
+    static constexpr std::uint8_t vec_sz = RoundHS::vec_sz;
+    static constexpr std::uint8_t n_vecs = RoundHS::n_vecs;
+
+    return elementwise_common::unary_contig_impl<
+        argTy, RoundOutputType, RoundContigFunctor, round_contig_kernel, vec_sz,
+        n_vecs>(exec_q, nelems, arg_p, res_p, depends);
+}
+
+template <typename fnT, typename T>
+struct RoundContigFactory
+{
+    fnT get()
+    {
+        if constexpr (!RoundOutputType<T>::is_defined) {
+            fnT fn = nullptr;
+            return fn;
+        }
+        else {
+            fnT fn = round_contig_impl<T>;
+            return fn;
+        }
+    }
+};
+
+template <typename fnT, typename T>
+struct RoundTypeMapFactory
+{
+    /*! @brief get typeid for output type of sycl::round(T x) */
+    std::enable_if_t<std::is_same<fnT, int>::value, int> get()
+    {
+        using rT = typename RoundOutputType<T>::value_type;
+        return td_ns::GetTypeid<rT>{}.get();
+    }
+};
+
+template <typename T1, typename T2, typename T3>
+class round_strided_kernel;
+
+template <typename argTy>
+sycl::event
+    round_strided_impl(sycl::queue &exec_q,
+                       std::size_t nelems,
+                       int nd,
+                       const ssize_t *shape_and_strides,
+                       const char *arg_p,
+                       ssize_t arg_offset,
+                       char *res_p,
+                       ssize_t res_offset,
+                       const std::vector<sycl::event> &depends,
+                       const std::vector<sycl::event> &additional_depends)
+{
+    return elementwise_common::unary_strided_impl<
+        argTy, RoundOutputType, RoundStridedFunctor, round_strided_kernel>(
+        exec_q, nelems, nd, shape_and_strides, arg_p, arg_offset, res_p,
+        res_offset, depends, additional_depends);
+}
+
+template <typename fnT, typename T>
+struct RoundStridedFactory
+{
+    fnT get()
+    {
+        if constexpr (!RoundOutputType<T>::is_defined) {
+            fnT fn = nullptr;
+            return fn;
+        }
+        else {
+            fnT fn = round_strided_impl<T>;
+            return fn;
+        }
+    }
+};
+
+} // namespace dpctl::tensor::kernels::round
diff --git a/dpctl_ext/tensor/libtensor/include/kernels/elementwise_functions/rsqrt.hpp b/dpctl_ext/tensor/libtensor/include/kernels/elementwise_functions/rsqrt.hpp
new file mode 100644
index 000000000000..0228aecdca67
--- /dev/null
+++ b/dpctl_ext/tensor/libtensor/include/kernels/elementwise_functions/rsqrt.hpp
@@ -0,0 +1,209 @@
+//*****************************************************************************
+// Copyright (c) 2026, Intel Corporation
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+// - Redistributions of source code must retain the above copyright notice,
+//   this list of conditions and the following disclaimer.
+// - Redistributions in binary form must reproduce the above copyright notice,
+//   this list of conditions and the following disclaimer in the documentation
+//   and/or other materials provided with the distribution.
+// - Neither the name of the copyright holder nor the names of its contributors
+//   may be used to endorse or promote products derived from this software
+//   without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+// THE POSSIBILITY OF SUCH DAMAGE.
+//*****************************************************************************
+//
+//===---------------------------------------------------------------------===//
+///
+/// \file
+/// This file defines kernels for elementwise evaluation of RSQRT(x)
+/// function that computes the reciprocal square root.
+//===---------------------------------------------------------------------===//
+
+#pragma once
+#include <cstddef>
+#include <cstdint>
+#include <type_traits>
+#include <vector>
+
+#include <sycl/sycl.hpp>
+
+#include "vec_size_util.hpp"
+
+#include "kernels/dpctl_tensor_types.hpp"
+#include "kernels/elementwise_functions/common.hpp"
+
+#include "utils/type_dispatch_building.hpp"
+
+namespace dpctl::tensor::kernels::rsqrt
+{
+
+using dpctl::tensor::ssize_t;
+namespace td_ns = dpctl::tensor::type_dispatch;
+
+template <typename argT, typename resT>
+struct RsqrtFunctor
+{
+
+    // is function constant for given argT
+    using is_constant = typename std::false_type;
+    // constant value, if constant
+    // constexpr resT constant_value = resT{};
+    // is function defined for sycl::vec
+    using supports_vec = typename std::false_type;
+    // do both argTy and resTy support sugroup store/load operation
+    using supports_sg_loadstore = typename std::true_type;
+
+    resT operator()(const argT &in) const
+    {
+        return sycl::rsqrt(in);
+    }
+};
+
+template <typename argTy,
+          typename resTy = argTy,
+          std::uint8_t vec_sz = 4u,
+          std::uint8_t n_vecs = 2u,
+          bool enable_sg_loadstore = true>
+using RsqrtContigFunctor =
+    elementwise_common::UnaryContigFunctor<argTy,
+                                           resTy,
+                                           RsqrtFunctor<argTy, resTy>,
+                                           vec_sz,
+                                           n_vecs,
+                                           enable_sg_loadstore>;
+
+template <typename argTy, typename resTy, typename IndexerT>
+using RsqrtStridedFunctor = elementwise_common::
+    UnaryStridedFunctor<argTy, resTy, IndexerT, RsqrtFunctor<argTy, resTy>>;
+
+template <typename T>
+struct RsqrtOutputType
+{
+    using value_type = typename std::disjunction<
+        td_ns::TypeMapResultEntry<T, sycl::half, sycl::half>,
+        td_ns::TypeMapResultEntry<T, float, float>,
+        td_ns::TypeMapResultEntry<T, double, double>,
+        td_ns::DefaultResultEntry<void>>::result_type;
+
+    static constexpr bool is_defined = !std::is_same_v<value_type, void>;
+};
+
+namespace hyperparam_detail
+{
+
+namespace vsu_ns = dpctl::tensor::kernels::vec_size_utils;
+
+using vsu_ns::ContigHyperparameterSetDefault;
+using vsu_ns::UnaryContigHyperparameterSetEntry;
+
+template <typename argTy>
+struct RsqrtContigHyperparameterSet
+{
+    using value_type =
+        typename std::disjunction<ContigHyperparameterSetDefault<4u, 2u>>;
+
+    constexpr static auto vec_sz = value_type::vec_sz;
+    constexpr static auto n_vecs = value_type::n_vecs;
+};
+
+} // namespace hyperparam_detail
+
+template <typename T1, typename T2, std::uint8_t vec_sz, std::uint8_t n_vecs>
+class rsqrt_contig_kernel;
+
+template <typename argTy>
+sycl::event rsqrt_contig_impl(sycl::queue &exec_q,
+                              std::size_t nelems,
+                              const char *arg_p,
+                              char *res_p,
+                              const std::vector<sycl::event> &depends = {})
+{
+    using RsqrtHS = hyperparam_detail::RsqrtContigHyperparameterSet<argTy>;
+    static constexpr std::uint8_t vec_sz = RsqrtHS::vec_sz;
+    static constexpr std::uint8_t n_vecs = RsqrtHS::n_vecs;
+
+    return elementwise_common::unary_contig_impl<
+        argTy, RsqrtOutputType, RsqrtContigFunctor, rsqrt_contig_kernel, vec_sz,
+        n_vecs>(exec_q, nelems, arg_p, res_p, depends);
+}
+
+template <typename fnT, typename T>
+struct RsqrtContigFactory
+{
+    fnT get()
+    {
+        if constexpr (!RsqrtOutputType<T>::is_defined) {
+            fnT fn = nullptr;
+            return fn;
+        }
+        else {
+            fnT fn = rsqrt_contig_impl<T>;
+            return fn;
+        }
+    }
+};
+
+template <typename fnT, typename T>
+struct RsqrtTypeMapFactory
+{
+    /*! @brief get typeid for output type of sycl::rsqrt(T x) */
+    std::enable_if_t<std::is_same<fnT, int>::value, int> get()
+    {
+        using rT = typename RsqrtOutputType<T>::value_type;
+        return td_ns::GetTypeid<rT>{}.get();
+    }
+};
+
+template <typename T1, typename T2, typename T3>
+class rsqrt_strided_kernel;
+
+template <typename argTy>
+sycl::event
+    rsqrt_strided_impl(sycl::queue &exec_q,
+                       std::size_t nelems,
+                       int nd,
+                       const ssize_t *shape_and_strides,
+                       const char *arg_p,
+                       ssize_t arg_offset,
+                       char *res_p,
+                       ssize_t res_offset,
+                       const std::vector<sycl::event> &depends,
+                       const std::vector<sycl::event> &additional_depends)
+{
+    return elementwise_common::unary_strided_impl<
+        argTy, RsqrtOutputType, RsqrtStridedFunctor, rsqrt_strided_kernel>(
+        exec_q, nelems, nd, shape_and_strides, arg_p, arg_offset, res_p,
+        res_offset, depends, additional_depends);
+}
+
+template <typename fnT, typename T>
+struct RsqrtStridedFactory
+{
+    fnT get()
+    {
+        if constexpr (!RsqrtOutputType<T>::is_defined) {
+            fnT fn = nullptr;
+            return fn;
+        }
+        else {
+            fnT fn = rsqrt_strided_impl<T>;
+            return fn;
+        }
+    }
+};
+
+} // namespace dpctl::tensor::kernels::rsqrt
diff --git a/dpctl_ext/tensor/libtensor/include/kernels/elementwise_functions/sign.hpp b/dpctl_ext/tensor/libtensor/include/kernels/elementwise_functions/sign.hpp
new file mode 100644
index 000000000000..ceb3d1320f9c
--- /dev/null
+++ b/dpctl_ext/tensor/libtensor/include/kernels/elementwise_functions/sign.hpp
@@ -0,0 +1,258 @@
+//*****************************************************************************
+// Copyright (c) 2026, Intel Corporation
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+// - Redistributions of source code must retain the above copyright notice,
+//   this list of conditions and the following disclaimer.
+// - Redistributions in binary form must reproduce the above copyright notice,
+//   this list of conditions and the following disclaimer in the documentation
+//   and/or other materials provided with the distribution.
+// - Neither the name of the copyright holder nor the names of its contributors
+//   may be used to endorse or promote products derived from this software
+//   without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+// THE POSSIBILITY OF SUCH DAMAGE.
+//*****************************************************************************
+//
+//===---------------------------------------------------------------------===//
+///
+/// \file
+/// This file defines kernels for elementwise evaluation of SIGN(x) function.
+//===---------------------------------------------------------------------===//
+
+#pragma once
+#include <cmath>
+#include <complex>
+#include <cstddef>
+#include <cstdint>
+#include <limits>
+#include <type_traits>
+#include <vector>
+
+#include <sycl/sycl.hpp>
+
+#include "cabs_impl.hpp"
+#include "vec_size_util.hpp"
+
+#include "kernels/dpctl_tensor_types.hpp"
+#include "kernels/elementwise_functions/common.hpp"
+
+#include "utils/type_dispatch_building.hpp"
+#include "utils/type_utils.hpp"
+
+namespace dpctl::tensor::kernels::sign
+{
+
+using dpctl::tensor::ssize_t;
+namespace td_ns = dpctl::tensor::type_dispatch;
+
+using dpctl::tensor::type_utils::is_complex;
+using dpctl::tensor::type_utils::vec_cast;
+
+template <typename argT, typename resT>
+struct SignFunctor
+{
+    static_assert(std::is_same_v<resT, argT>);
+    using is_constant = typename std::false_type;
+    // constexpr resT constant_value = resT{};
+    using supports_vec = typename std::negation<
+        std::disjunction<is_complex<resT>, is_complex<argT>>>;
+    using supports_sg_loadstore = std::false_type;
+
+    resT operator()(const argT &in) const
+    {
+        if constexpr (std::is_integral_v<argT>) {
+            if constexpr (std::is_unsigned_v<argT>) {
+                return resT(0 < in);
+            }
+            else {
+                return sign_impl<argT>(in);
+            }
+        }
+        else {
+            if constexpr (is_complex<argT>::value) {
+                using realT = typename argT::value_type;
+
+                if (in == argT(0)) {
+                    return resT(0);
+                }
+                else {
+                    auto z = exprm_ns::complex<realT>(in);
+                    return (z / detail::cabs(in));
+                }
+            }
+            else {
+                if (std::isnan(in)) {
+                    return std::numeric_limits<resT>::quiet_NaN();
+                }
+                else {
+                    return sign_impl<argT>(in);
+                }
+            }
+        }
+    }
+
+private:
+    template <typename T>
+    T sign_impl(const T &v) const
+    {
+        return (T(0) < v) - (v < T(0));
+    }
+};
+
+template <typename argT,
+          typename resT = argT,
+          std::uint8_t vec_sz = 4u,
+          std::uint8_t n_vecs = 2u,
+          bool enable_sg_loadstore = true>
+using SignContigFunctor =
+    elementwise_common::UnaryContigFunctor<argT,
+                                           resT,
+                                           SignFunctor<argT, resT>,
+                                           vec_sz,
+                                           n_vecs,
+                                           enable_sg_loadstore>;
+
+template <typename T>
+struct SignOutputType
+{
+    using value_type = typename std::disjunction<
+        td_ns::TypeMapResultEntry<T, std::uint8_t>,
+        td_ns::TypeMapResultEntry<T, std::uint16_t>,
+        td_ns::TypeMapResultEntry<T, std::uint32_t>,
+        td_ns::TypeMapResultEntry<T, std::uint64_t>,
+        td_ns::TypeMapResultEntry<T, std::int8_t>,
+        td_ns::TypeMapResultEntry<T, std::int16_t>,
+        td_ns::TypeMapResultEntry<T, std::int32_t>,
+        td_ns::TypeMapResultEntry<T, std::int64_t>,
+        td_ns::TypeMapResultEntry<T, sycl::half>,
+        td_ns::TypeMapResultEntry<T, float>,
+        td_ns::TypeMapResultEntry<T, double>,
+        td_ns::TypeMapResultEntry<T, std::complex<float>>,
+        td_ns::TypeMapResultEntry<T, std::complex<double>>,
+        td_ns::DefaultResultEntry<void>>::result_type;
+
+    static constexpr bool is_defined = !std::is_same_v<value_type, void>;
+};
+
+namespace hyperparam_detail
+{
+
+namespace vsu_ns = dpctl::tensor::kernels::vec_size_utils;
+
+using vsu_ns::ContigHyperparameterSetDefault;
+using vsu_ns::UnaryContigHyperparameterSetEntry;
+
+template <typename argTy>
+struct SignContigHyperparameterSet
+{
+    using value_type =
+        typename std::disjunction<ContigHyperparameterSetDefault<4u, 2u>>;
+
+    constexpr static auto vec_sz = value_type::vec_sz;
+    constexpr static auto n_vecs = value_type::n_vecs;
+};
+
+} // end of namespace hyperparam_detail
+
+template <typename T1, typename T2, std::uint8_t vec_sz, std::uint8_t n_vecs>
+class sign_contig_kernel;
+
+template <typename argTy>
+sycl::event sign_contig_impl(sycl::queue &exec_q,
+                             std::size_t nelems,
+                             const char *arg_p,
+                             char *res_p,
+                             const std::vector<sycl::event> &depends = {})
+{
+    using SignHS = hyperparam_detail::SignContigHyperparameterSet<argTy>;
+    static constexpr std::uint8_t vec_sz = SignHS::vec_sz;
+    static constexpr std::uint8_t n_vecs = SignHS::n_vecs;
+
+    return elementwise_common::unary_contig_impl<
+        argTy, SignOutputType, SignContigFunctor, sign_contig_kernel, vec_sz,
+        n_vecs>(exec_q, nelems, arg_p, res_p, depends);
+}
+
+template <typename fnT, typename T>
+struct SignContigFactory
+{
+    fnT get()
+    {
+        if constexpr (!SignOutputType<T>::is_defined) {
+            fnT fn = nullptr;
+            return fn;
+        }
+        else {
+            fnT fn = sign_contig_impl<T>;
+            return fn;
+        }
+    }
+};
+
+template <typename fnT, typename T>
+struct SignTypeMapFactory
+{
+    /*! @brief get typeid for output type of sign(T x) */
+    std::enable_if_t<std::is_same<fnT, int>::value, int> get()
+    {
+        using rT = typename SignOutputType<T>::value_type;
+        return td_ns::GetTypeid<rT>{}.get();
+    }
+};
+
+template <typename argTy, typename resTy, typename IndexerT>
+using SignStridedFunctor = elementwise_common::
+    UnaryStridedFunctor<argTy, resTy, IndexerT, SignFunctor<argTy, resTy>>;
+
+template <typename T1, typename T2, typename T3>
+class sign_strided_kernel;
+
+template <typename argTy>
+sycl::event
+    sign_strided_impl(sycl::queue &exec_q,
+                      std::size_t nelems,
+                      int nd,
+                      const ssize_t *shape_and_strides,
+                      const char *arg_p,
+                      ssize_t arg_offset,
+                      char *res_p,
+                      ssize_t res_offset,
+                      const std::vector<sycl::event> &depends,
+                      const std::vector<sycl::event> &additional_depends)
+{
+    return elementwise_common::unary_strided_impl<
+        argTy, SignOutputType, SignStridedFunctor, sign_strided_kernel>(
+        exec_q, nelems, nd, shape_and_strides, arg_p, arg_offset, res_p,
+        res_offset, depends, additional_depends);
+}
+
+template <typename fnT, typename T>
+struct SignStridedFactory
+{
+    fnT get()
+    {
+        if constexpr (!SignOutputType<T>::is_defined) {
+            fnT fn = nullptr;
+            return fn;
+        }
+        else {
+            fnT fn = sign_strided_impl<T>;
+            return fn;
+        }
+    }
+};
+
+} // namespace dpctl::tensor::kernels::sign
diff --git a/dpctl_ext/tensor/libtensor/include/kernels/elementwise_functions/signbit.hpp b/dpctl_ext/tensor/libtensor/include/kernels/elementwise_functions/signbit.hpp
new file mode 100644
index 000000000000..d67120633efd
--- /dev/null
+++ b/dpctl_ext/tensor/libtensor/include/kernels/elementwise_functions/signbit.hpp
@@ -0,0 +1,223 @@
+//*****************************************************************************
+// Copyright (c) 2026, Intel Corporation
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+// - Redistributions of source code must retain the above copyright notice,
+//   this list of conditions and the following disclaimer.
+// - Redistributions in binary form must reproduce the above copyright notice,
+//   this list of conditions and the following disclaimer in the documentation
+//   and/or other materials provided with the distribution.
+// - Neither the name of the copyright holder nor the names of its contributors
+//   may be used to endorse or promote products derived from this software
+//   without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+// THE POSSIBILITY OF SUCH DAMAGE.
+//*****************************************************************************
+//
+//===---------------------------------------------------------------------===//
+///
+/// \file
+/// This file defines kernels for elementwise evaluation of SIGNBIT(x)
+/// function that tests whether the sign bit of the tensor element is set.
+//===---------------------------------------------------------------------===//
+
+#pragma once
+#include <cmath>
+#include <cstddef>
+#include <cstdint>
+#include <type_traits>
+#include <vector>
+
+#include <sycl/sycl.hpp>
+
+#include "vec_size_util.hpp"
+
+#include "kernels/dpctl_tensor_types.hpp"
+#include "kernels/elementwise_functions/common.hpp"
+
+#include "utils/type_dispatch_building.hpp"
+#include "utils/type_utils.hpp"
+
+namespace dpctl::tensor::kernels::signbit
+{
+
+using dpctl::tensor::ssize_t;
+namespace td_ns = dpctl::tensor::type_dispatch;
+
+using dpctl::tensor::type_utils::is_complex;
+using dpctl::tensor::type_utils::vec_cast;
+
+template <typename argT, typename resT>
+struct SignbitFunctor
+{
+    static_assert(std::is_same_v<resT, bool>);
+
+    using is_constant = std::false_type;
+    static constexpr resT constant_value = false;
+    using supports_vec = std::true_type;
+    using supports_sg_loadstore = std::true_type;
+
+    resT operator()(const argT &in) const
+    {
+        return std::signbit(in);
+    }
+
+    template <int vec_sz>
+    sycl::vec<resT, vec_sz> operator()(const sycl::vec<argT, vec_sz> &in) const
+    {
+        auto const &res_vec = sycl::signbit(in);
+
+        using deducedT = typename std::remove_cv_t<
+            std::remove_reference_t<decltype(res_vec)>>::element_type;
+
+        return vec_cast<resT, deducedT, vec_sz>(res_vec);
+    }
+};
+
+template <typename argT,
+          typename resT = bool,
+          std::uint8_t vec_sz = 4u,
+          std::uint8_t n_vecs = 2u,
+          bool enable_sg_loadstore = true>
+using SignbitContigFunctor =
+    elementwise_common::UnaryContigFunctor<argT,
+                                           resT,
+                                           SignbitFunctor<argT, resT>,
+                                           vec_sz,
+                                           n_vecs,
+                                           enable_sg_loadstore>;
+
+template <typename argTy, typename resTy, typename IndexerT>
+using SignbitStridedFunctor = elementwise_common::
+    UnaryStridedFunctor<argTy, resTy, IndexerT, SignbitFunctor<argTy, resTy>>;
+
+template <typename argTy>
+struct SignbitOutputType
+{
+    using value_type = typename std::disjunction<
+        td_ns::TypeMapResultEntry<argTy, sycl::half, bool>,
+        td_ns::TypeMapResultEntry<argTy, float, bool>,
+        td_ns::TypeMapResultEntry<argTy, double, bool>,
+        td_ns::DefaultResultEntry<void>>::result_type;
+
+    static constexpr bool is_defined = !std::is_same_v<value_type, void>;
+};
+
+namespace hyperparam_detail
+{
+
+namespace vsu_ns = dpctl::tensor::kernels::vec_size_utils;
+
+using vsu_ns::ContigHyperparameterSetDefault;
+using vsu_ns::UnaryContigHyperparameterSetEntry;
+
+template <typename argTy>
+struct SignbitContigHyperparameterSet
+{
+    using value_type =
+        typename std::disjunction<ContigHyperparameterSetDefault<4u, 2u>>;
+
+    constexpr static auto vec_sz = value_type::vec_sz;
+    constexpr static auto n_vecs = value_type::n_vecs;
+};
+
+} // end of namespace hyperparam_detail
+
+template <typename T1, typename T2, std::uint8_t vec_sz, std::uint8_t n_vecs>
+class signbit_contig_kernel;
+
+template <typename argTy>
+sycl::event signbit_contig_impl(sycl::queue &exec_q,
+                                std::size_t nelems,
+                                const char *arg_p,
+                                char *res_p,
+                                const std::vector<sycl::event> &depends = {})
+{
+    using SignbitHS = hyperparam_detail::SignbitContigHyperparameterSet<argTy>;
+    static constexpr std::uint8_t vec_sz = SignbitHS::vec_sz;
+    static constexpr std::uint8_t n_vecs = SignbitHS::n_vecs;
+
+    return elementwise_common::unary_contig_impl<
+        argTy, SignbitOutputType, SignbitContigFunctor, signbit_contig_kernel,
+        vec_sz, n_vecs>(exec_q, nelems, arg_p, res_p, depends);
+}
+
+template <typename fnT, typename T>
+struct SignbitContigFactory
+{
+    fnT get()
+    {
+        if constexpr (!SignbitOutputType<T>::is_defined) {
+            fnT fn = nullptr;
+            return fn;
+        }
+        else {
+            fnT fn = signbit_contig_impl<T>;
+            return fn;
+        }
+    }
+};
+
+template <typename fnT, typename T>
+struct SignbitTypeMapFactory
+{
+    /*! @brief get typeid for output type of sycl::isinf(T x) */
+    std::enable_if_t<std::is_same<fnT, int>::value, int> get()
+    {
+        using rT = typename SignbitOutputType<T>::value_type;
+        return td_ns::GetTypeid<rT>{}.get();
+    }
+};
+
+template <typename T1, typename T2, typename T3>
+class signbit_strided_kernel;
+
+template <typename argTy>
+sycl::event
+    signbit_strided_impl(sycl::queue &exec_q,
+                         std::size_t nelems,
+                         int nd,
+                         const ssize_t *shape_and_strides,
+                         const char *arg_p,
+                         ssize_t arg_offset,
+                         char *res_p,
+                         ssize_t res_offset,
+                         const std::vector<sycl::event> &depends,
+                         const std::vector<sycl::event> &additional_depends)
+{
+    return elementwise_common::unary_strided_impl<argTy, SignbitOutputType,
+                                                  SignbitStridedFunctor,
+                                                  signbit_strided_kernel>(
+        exec_q, nelems, nd, shape_and_strides, arg_p, arg_offset, res_p,
+        res_offset, depends, additional_depends);
+}
+
+template <typename fnT, typename T>
+struct SignbitStridedFactory
+{
+    fnT get()
+    {
+        if constexpr (!SignbitOutputType<T>::is_defined) {
+            fnT fn = nullptr;
+            return fn;
+        }
+        else {
+            fnT fn = signbit_strided_impl<T>;
+            return fn;
+        }
+    }
+};
+
+} // namespace dpctl::tensor::kernels::signbit
diff --git a/dpctl_ext/tensor/libtensor/include/kernels/elementwise_functions/sin.hpp b/dpctl_ext/tensor/libtensor/include/kernels/elementwise_functions/sin.hpp
new file mode 100644
index 000000000000..d1e3caa9effe
--- /dev/null
+++ b/dpctl_ext/tensor/libtensor/include/kernels/elementwise_functions/sin.hpp
@@ -0,0 +1,333 @@
+//*****************************************************************************
+// Copyright (c) 2026, Intel Corporation
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+// - Redistributions of source code must retain the above copyright notice,
+//   this list of conditions and the following disclaimer.
+// - Redistributions in binary form must reproduce the above copyright notice,
+//   this list of conditions and the following disclaimer in the documentation
+//   and/or other materials provided with the distribution.
+// - Neither the name of the copyright holder nor the names of its contributors
+//   may be used to endorse or promote products derived from this software
+//   without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+// THE POSSIBILITY OF SUCH DAMAGE.
+//*****************************************************************************
+//
+//===---------------------------------------------------------------------===//
+///
+/// \file
+/// This file defines kernels for elementwise evaluation of SIN(x) function.
+//===---------------------------------------------------------------------===//
+
+#pragma once
+#include <cmath>
+#include <complex>
+#include <cstddef>
+#include <cstdint>
+#include <limits>
+#include <type_traits>
+#include <vector>
+
+#include <sycl/sycl.hpp>
+
+#include "sycl_complex.hpp"
+#include "vec_size_util.hpp"
+
+#include "kernels/dpctl_tensor_types.hpp"
+#include "kernels/elementwise_functions/common.hpp"
+
+#include "utils/type_dispatch_building.hpp"
+#include "utils/type_utils.hpp"
+
+namespace dpctl::tensor::kernels::sin
+{
+
+using dpctl::tensor::ssize_t;
+namespace td_ns = dpctl::tensor::type_dispatch;
+
+using dpctl::tensor::type_utils::is_complex;
+
+template <typename argT, typename resT>
+struct SinFunctor
+{
+    // is function constant for given argT
+    using is_constant = typename std::false_type;
+    // constant value, if constant
+    // constexpr resT constant_value = resT{};
+    // is function defined for sycl::vec
+    using supports_vec = typename std::false_type;
+    // do both argTy and resTy support sugroup store/load operation
+    using supports_sg_loadstore = typename std::negation<
+        std::disjunction<is_complex<resT>, is_complex<argT>>>;
+
+    resT operator()(const argT &in) const
+    {
+        if constexpr (is_complex<argT>::value) {
+            using realT = typename argT::value_type;
+
+            static constexpr realT q_nan =
+                std::numeric_limits<realT>::quiet_NaN();
+
+            realT const &in_re = std::real(in);
+            realT const &in_im = std::imag(in);
+
+            const bool in_re_finite = std::isfinite(in_re);
+            const bool in_im_finite = std::isfinite(in_im);
+            /*
+             * Handle the nearly-non-exceptional cases where
+             * real and imaginary parts of input are finite.
+             */
+            if (in_re_finite && in_im_finite) {
+                resT res =
+                    exprm_ns::sin(exprm_ns::complex<realT>(in)); // sin(in);
+                if (in_re == realT(0)) {
+                    res.real(sycl::copysign(realT(0), in_re));
+                }
+                return res;
+            }
+
+            /*
+             * since sin(in) = -I * sinh(I * in), for special cases,
+             * we calculate real and imaginary parts of z = sinh(I * in) and
+             * then return { imag(z) , -real(z) } which is sin(in).
+             */
+            const realT x = -in_im;
+            const realT y = in_re;
+            const bool xfinite = in_im_finite;
+            const bool yfinite = in_re_finite;
+            /*
+             * sinh(+-0 +- I Inf) = sign(d(+-0, dNaN))0 + I dNaN.
+             * The sign of 0 in the result is unspecified.  Choice = normally
+             * the same as dNaN.
+             *
+             * sinh(+-0 +- I NaN) = sign(d(+-0, NaN))0 + I d(NaN).
+             * The sign of 0 in the result is unspecified.  Choice = normally
+             * the same as d(NaN).
+             */
+            if (x == realT(0) && !yfinite) {
+                const realT sinh_im = q_nan;
+                const realT sinh_re = sycl::copysign(realT(0), x * sinh_im);
+                return resT{sinh_im, -sinh_re};
+            }
+
+            /*
+             * sinh(+-Inf +- I 0) = +-Inf + I +-0.
+             *
+             * sinh(NaN +- I 0)   = d(NaN) + I +-0.
+             */
+            if (y == realT(0) && !xfinite) {
+                if (std::isnan(x)) {
+                    const realT sinh_re = x;
+                    const realT sinh_im = y;
+                    return resT{sinh_im, -sinh_re};
+                }
+                const realT sinh_re = x;
+                const realT sinh_im = sycl::copysign(realT(0), y);
+                return resT{sinh_im, -sinh_re};
+            }
+
+            /*
+             * sinh(x +- I Inf) = dNaN + I dNaN.
+             *
+             * sinh(x + I NaN) = d(NaN) + I d(NaN).
+             */
+            if (xfinite && !yfinite) {
+                const realT sinh_re = q_nan;
+                const realT sinh_im = x * sinh_re;
+                return resT{sinh_im, -sinh_re};
+            }
+
+            /*
+             * sinh(+-Inf + I NaN)  = +-Inf + I d(NaN).
+             * The sign of Inf in the result is unspecified.  Choice = normally
+             * the same as d(NaN).
+             *
+             * sinh(+-Inf +- I Inf) = +Inf + I dNaN.
+             * The sign of Inf in the result is unspecified.
+             * Choice = always - here for sinh to have positive result for
+             * imaginary part of sin.
+             *
+             * sinh(+-Inf + I y)   = +-Inf cos(y) + I Inf sin(y)
+             */
+            if (std::isinf(x)) {
+                if (!yfinite) {
+                    const realT sinh_re = -x * x;
+                    const realT sinh_im = x * (y - y);
+                    return resT{sinh_im, -sinh_re};
+                }
+                const realT sinh_re = x * sycl::cos(y);
+                const realT sinh_im =
+                    std::numeric_limits<realT>::infinity() * sycl::sin(y);
+                return resT{sinh_im, -sinh_re};
+            }
+
+            /*
+             * sinh(NaN + I NaN)  = d(NaN) + I d(NaN).
+             *
+             * sinh(NaN +- I Inf) = d(NaN) + I d(NaN).
+             *
+             * sinh(NaN + I y)    = d(NaN) + I d(NaN).
+             */
+            const realT y_m_y = (y - y);
+            const realT sinh_re = (x * x) * y_m_y;
+            const realT sinh_im = (x + x) * y_m_y;
+            return resT{sinh_im, -sinh_re};
+        }
+        else {
+            static_assert(std::is_same_v<argT, resT>);
+            if (in == 0) {
+                return in;
+            }
+            return sycl::sin(in);
+        }
+    }
+};
+
+template <typename argTy,
+          typename resTy = argTy,
+          std::uint8_t vec_sz = 4u,
+          std::uint8_t n_vecs = 2u,
+          bool enable_sg_loadstore = true>
+using SinContigFunctor =
+    elementwise_common::UnaryContigFunctor<argTy,
+                                           resTy,
+                                           SinFunctor<argTy, resTy>,
+                                           vec_sz,
+                                           n_vecs,
+                                           enable_sg_loadstore>;
+
+template <typename argTy, typename resTy, typename IndexerT>
+using SinStridedFunctor = elementwise_common::
+    UnaryStridedFunctor<argTy, resTy, IndexerT, SinFunctor<argTy, resTy>>;
+
+template <typename T>
+struct SinOutputType
+{
+    using value_type = typename std::disjunction<
+        td_ns::TypeMapResultEntry<T, sycl::half>,
+        td_ns::TypeMapResultEntry<T, float>,
+        td_ns::TypeMapResultEntry<T, double>,
+        td_ns::TypeMapResultEntry<T, std::complex<float>>,
+        td_ns::TypeMapResultEntry<T, std::complex<double>>,
+        td_ns::DefaultResultEntry<void>>::result_type;
+
+    static constexpr bool is_defined = !std::is_same_v<value_type, void>;
+};
+
+namespace hyperparam_detail
+{
+
+namespace vsu_ns = dpctl::tensor::kernels::vec_size_utils;
+
+using vsu_ns::ContigHyperparameterSetDefault;
+using vsu_ns::UnaryContigHyperparameterSetEntry;
+
+template <typename argTy>
+struct SinContigHyperparameterSet
+{
+    using value_type =
+        typename std::disjunction<ContigHyperparameterSetDefault<4u, 2u>>;
+
+    constexpr static auto vec_sz = value_type::vec_sz;
+    constexpr static auto n_vecs = value_type::n_vecs;
+};
+
+} // end of namespace hyperparam_detail
+
+template <typename T1, typename T2, std::uint8_t vec_sz, std::uint8_t n_vecs>
+class sin_contig_kernel;
+
+template <typename argTy>
+sycl::event sin_contig_impl(sycl::queue &exec_q,
+                            std::size_t nelems,
+                            const char *arg_p,
+                            char *res_p,
+                            const std::vector<sycl::event> &depends = {})
+{
+    using SinHS = hyperparam_detail::SinContigHyperparameterSet<argTy>;
+    static constexpr std::uint8_t vec_sz = SinHS::vec_sz;
+    static constexpr std::uint8_t n_vecs = SinHS::n_vecs;
+
+    return elementwise_common::unary_contig_impl<
+        argTy, SinOutputType, SinContigFunctor, sin_contig_kernel, vec_sz,
+        n_vecs>(exec_q, nelems, arg_p, res_p, depends);
+}
+
+template <typename fnT, typename T>
+struct SinContigFactory
+{
+    fnT get()
+    {
+        if constexpr (!SinOutputType<T>::is_defined) {
+            fnT fn = nullptr;
+            return fn;
+        }
+        else {
+            fnT fn = sin_contig_impl<T>;
+            return fn;
+        }
+    }
+};
+
+template <typename fnT, typename T>
+struct SinTypeMapFactory
+{
+    /*! @brief get typeid for output type of sycl::sin(T x) */
+    std::enable_if_t<std::is_same<fnT, int>::value, int> get()
+    {
+        using rT = typename SinOutputType<T>::value_type;
+        return td_ns::GetTypeid<rT>{}.get();
+    }
+};
+
+template <typename T1, typename T2, typename T3>
+class sin_strided_kernel;
+
+template <typename argTy>
+sycl::event sin_strided_impl(sycl::queue &exec_q,
+                             std::size_t nelems,
+                             int nd,
+                             const ssize_t *shape_and_strides,
+                             const char *arg_p,
+                             ssize_t arg_offset,
+                             char *res_p,
+                             ssize_t res_offset,
+                             const std::vector<sycl::event> &depends,
+                             const std::vector<sycl::event> &additional_depends)
+{
+    return elementwise_common::unary_strided_impl<
+        argTy, SinOutputType, SinStridedFunctor, sin_strided_kernel>(
+        exec_q, nelems, nd, shape_and_strides, arg_p, arg_offset, res_p,
+        res_offset, depends, additional_depends);
+}
+
+template <typename fnT, typename T>
+struct SinStridedFactory
+{
+    fnT get()
+    {
+        if constexpr (!SinOutputType<T>::is_defined) {
+            fnT fn = nullptr;
+            return fn;
+        }
+        else {
+            fnT fn = sin_strided_impl<T>;
+            return fn;
+        }
+    }
+};
+
+} // namespace dpctl::tensor::kernels::sin
diff --git a/dpctl_ext/tensor/libtensor/include/kernels/elementwise_functions/sinh.hpp b/dpctl_ext/tensor/libtensor/include/kernels/elementwise_functions/sinh.hpp
new file mode 100644
index 000000000000..f81a2730fd17
--- /dev/null
+++ b/dpctl_ext/tensor/libtensor/include/kernels/elementwise_functions/sinh.hpp
@@ -0,0 +1,302 @@
+//*****************************************************************************
+// Copyright (c) 2026, Intel Corporation
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+// - Redistributions of source code must retain the above copyright notice,
+//   this list of conditions and the following disclaimer.
+// - Redistributions in binary form must reproduce the above copyright notice,
+//   this list of conditions and the following disclaimer in the documentation
+//   and/or other materials provided with the distribution.
+// - Neither the name of the copyright holder nor the names of its contributors
+//   may be used to endorse or promote products derived from this software
+//   without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+// THE POSSIBILITY OF SUCH DAMAGE.
+//*****************************************************************************
+//
+//===---------------------------------------------------------------------===//
+///
+/// \file
+/// This file defines kernels for elementwise evaluation of SINH(x) function.
+//===---------------------------------------------------------------------===//
+
+#pragma once
+#include <cmath>
+#include <complex>
+#include <cstddef>
+#include <cstdint>
+#include <limits>
+#include <type_traits>
+#include <vector>
+
+#include <sycl/sycl.hpp>
+
+#include "sycl_complex.hpp"
+#include "vec_size_util.hpp"
+
+#include "kernels/dpctl_tensor_types.hpp"
+#include "kernels/elementwise_functions/common.hpp"
+
+#include "utils/type_dispatch_building.hpp"
+#include "utils/type_utils.hpp"
+
+namespace dpctl::tensor::kernels::sinh
+{
+
+using dpctl::tensor::ssize_t;
+namespace td_ns = dpctl::tensor::type_dispatch;
+
+using dpctl::tensor::type_utils::is_complex;
+
+template <typename argT, typename resT>
+struct SinhFunctor
+{
+
+    // is function constant for given argT
+    using is_constant = typename std::false_type;
+    // constant value, if constant
+    // constexpr resT constant_value = resT{};
+    // is function defined for sycl::vec
+    using supports_vec = typename std::false_type;
+    // do both argTy and resTy support sugroup store/load operation
+    using supports_sg_loadstore = typename std::negation<
+        std::disjunction<is_complex<resT>, is_complex<argT>>>;
+
+    resT operator()(const argT &in) const
+    {
+        if constexpr (is_complex<argT>::value) {
+            using realT = typename argT::value_type;
+
+            const realT x = std::real(in);
+            const realT y = std::imag(in);
+
+            const bool xfinite = std::isfinite(x);
+            const bool yfinite = std::isfinite(y);
+
+            /*
+             * Handle the nearly-non-exceptional cases where
+             * real and imaginary parts of input are finite.
+             */
+            if (xfinite && yfinite) {
+                return exprm_ns::sinh(exprm_ns::complex<realT>(in));
+            }
+            /*
+             * sinh(+-0 +- I Inf) = sign(d(+-0, dNaN))0 + I dNaN.
+             * The sign of 0 in the result is unspecified.  Choice = normally
+             * the same as dNaN.
+             *
+             * sinh(+-0 +- I NaN) = sign(d(+-0, NaN))0 + I d(NaN).
+             * The sign of 0 in the result is unspecified.  Choice = normally
+             * the same as d(NaN).
+             */
+            if (x == realT(0) && !yfinite) {
+                const realT res_re = sycl::copysign(realT(0), x * (y - y));
+                return resT{res_re, y - y};
+            }
+
+            /*
+             * sinh(+-Inf +- I 0) = +-Inf + I +-0.
+             *
+             * sinh(NaN +- I 0)   = d(NaN) + I +-0.
+             */
+            if (y == realT(0) && !xfinite) {
+                if (std::isnan(x)) {
+                    return resT{x, y};
+                }
+                const realT res_im = sycl::copysign(realT(0), y);
+                return resT{x, res_im};
+            }
+
+            /*
+             * sinh(x +- I Inf) = dNaN + I dNaN.
+             *
+             * sinh(x + I NaN) = d(NaN) + I d(NaN).
+             */
+            if (xfinite && !yfinite) {
+                return resT{y - y, x * (y - y)};
+            }
+
+            /*
+             * sinh(+-Inf + I NaN)  = +-Inf + I d(NaN).
+             * The sign of Inf in the result is unspecified.  Choice = normally
+             * the same as d(NaN).
+             *
+             * sinh(+-Inf +- I Inf) = +Inf + I dNaN.
+             * The sign of Inf in the result is unspecified.  Choice = always +.
+             *
+             * sinh(+-Inf + I y)   = +-Inf cos(y) + I Inf sin(y)
+             */
+            if (!xfinite && !std::isnan(x)) {
+                if (!yfinite) {
+                    return resT{x * x, x * (y - y)};
+                }
+                return resT{x * sycl::cos(y),
+                            std::numeric_limits<realT>::infinity() *
+                                sycl::sin(y)};
+            }
+
+            /*
+             * sinh(NaN + I NaN)  = d(NaN) + I d(NaN).
+             *
+             * sinh(NaN +- I Inf) = d(NaN) + I d(NaN).
+             *
+             * sinh(NaN + I y)    = d(NaN) + I d(NaN).
+             */
+            return resT{(x * x) * (y - y), (x + x) * (y - y)};
+        }
+        else {
+            static_assert(std::is_floating_point_v<argT> ||
+                          std::is_same_v<argT, sycl::half>);
+            return sycl::sinh(in);
+        }
+    }
+};
+
+template <typename argTy,
+          typename resTy = argTy,
+          std::uint8_t vec_sz = 4u,
+          std::uint8_t n_vecs = 2u,
+          bool enable_sg_loadstore = true>
+using SinhContigFunctor =
+    elementwise_common::UnaryContigFunctor<argTy,
+                                           resTy,
+                                           SinhFunctor<argTy, resTy>,
+                                           vec_sz,
+                                           n_vecs,
+                                           enable_sg_loadstore>;
+
+template <typename argTy, typename resTy, typename IndexerT>
+using SinhStridedFunctor = elementwise_common::
+    UnaryStridedFunctor<argTy, resTy, IndexerT, SinhFunctor<argTy, resTy>>;
+
+template <typename T>
+struct SinhOutputType
+{
+    using value_type = typename std::disjunction<
+        td_ns::TypeMapResultEntry<T, sycl::half>,
+        td_ns::TypeMapResultEntry<T, float>,
+        td_ns::TypeMapResultEntry<T, double>,
+        td_ns::TypeMapResultEntry<T, std::complex<float>>,
+        td_ns::TypeMapResultEntry<T, std::complex<double>>,
+        td_ns::DefaultResultEntry<void>>::result_type;
+
+    static constexpr bool is_defined = !std::is_same_v<value_type, void>;
+};
+
+namespace hyperparam_detail
+{
+
+namespace vsu_ns = dpctl::tensor::kernels::vec_size_utils;
+
+using vsu_ns::ContigHyperparameterSetDefault;
+using vsu_ns::UnaryContigHyperparameterSetEntry;
+
+template <typename argTy>
+struct SinhContigHyperparameterSet
+{
+    using value_type =
+        typename std::disjunction<ContigHyperparameterSetDefault<4u, 2u>>;
+
+    constexpr static auto vec_sz = value_type::vec_sz;
+    constexpr static auto n_vecs = value_type::n_vecs;
+};
+
+} // end of namespace hyperparam_detail
+
+template <typename T1, typename T2, std::uint8_t vec_sz, std::uint8_t n_vecs>
+class sinh_contig_kernel;
+
+template <typename argTy>
+sycl::event sinh_contig_impl(sycl::queue &exec_q,
+                             std::size_t nelems,
+                             const char *arg_p,
+                             char *res_p,
+                             const std::vector<sycl::event> &depends = {})
+{
+    using SinhHS = hyperparam_detail::SinhContigHyperparameterSet<argTy>;
+    static constexpr std::uint8_t vec_sz = SinhHS::vec_sz;
+    static constexpr std::uint8_t n_vecs = SinhHS::n_vecs;
+
+    return elementwise_common::unary_contig_impl<
+        argTy, SinhOutputType, SinhContigFunctor, sinh_contig_kernel, vec_sz,
+        n_vecs>(exec_q, nelems, arg_p, res_p, depends);
+}
+
+template <typename fnT, typename T>
+struct SinhContigFactory
+{
+    fnT get()
+    {
+        if constexpr (!SinhOutputType<T>::is_defined) {
+            fnT fn = nullptr;
+            return fn;
+        }
+        else {
+            fnT fn = sinh_contig_impl<T>;
+            return fn;
+        }
+    }
+};
+
+template <typename fnT, typename T>
+struct SinhTypeMapFactory
+{
+    /*! @brief get typeid for output type of sycl::sinh(T x) */
+    std::enable_if_t<std::is_same<fnT, int>::value, int> get()
+    {
+        using rT = typename SinhOutputType<T>::value_type;
+        return td_ns::GetTypeid<rT>{}.get();
+    }
+};
+
+template <typename T1, typename T2, typename T3>
+class sinh_strided_kernel;
+
+template <typename argTy>
+sycl::event
+    sinh_strided_impl(sycl::queue &exec_q,
+                      std::size_t nelems,
+                      int nd,
+                      const ssize_t *shape_and_strides,
+                      const char *arg_p,
+                      ssize_t arg_offset,
+                      char *res_p,
+                      ssize_t res_offset,
+                      const std::vector<sycl::event> &depends,
+                      const std::vector<sycl::event> &additional_depends)
+{
+    return elementwise_common::unary_strided_impl<
+        argTy, SinhOutputType, SinhStridedFunctor, sinh_strided_kernel>(
+        exec_q, nelems, nd, shape_and_strides, arg_p, arg_offset, res_p,
+        res_offset, depends, additional_depends);
+}
+
+template <typename fnT, typename T>
+struct SinhStridedFactory
+{
+    fnT get()
+    {
+        if constexpr (!SinhOutputType<T>::is_defined) {
+            fnT fn = nullptr;
+            return fn;
+        }
+        else {
+            fnT fn = sinh_strided_impl<T>;
+            return fn;
+        }
+    }
+};
+
+} // namespace dpctl::tensor::kernels::sinh
diff --git a/dpctl_ext/tensor/libtensor/include/kernels/elementwise_functions/sqrt.hpp b/dpctl_ext/tensor/libtensor/include/kernels/elementwise_functions/sqrt.hpp
new file mode 100644
index 000000000000..db092ca29595
--- /dev/null
+++ b/dpctl_ext/tensor/libtensor/include/kernels/elementwise_functions/sqrt.hpp
@@ -0,0 +1,224 @@
+//*****************************************************************************
+// Copyright (c) 2026, Intel Corporation
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+// - Redistributions of source code must retain the above copyright notice,
+//   this list of conditions and the following disclaimer.
+// - Redistributions in binary form must reproduce the above copyright notice,
+//   this list of conditions and the following disclaimer in the documentation
+//   and/or other materials provided with the distribution.
+// - Neither the name of the copyright holder nor the names of its contributors
+//   may be used to endorse or promote products derived from this software
+//   without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+// THE POSSIBILITY OF SUCH DAMAGE.
+//*****************************************************************************
+//
+//===---------------------------------------------------------------------===//
+///
+/// \file
+/// This file defines kernels for elementwise evaluation of SQRT(x)
+/// function that compute a square root.
+//===---------------------------------------------------------------------===//
+
+#pragma once
+#include <complex>
+#include <cstddef>
+#include <cstdint>
+#include <type_traits>
+#include <vector>
+
+#include <sycl/sycl.hpp>
+
+#include "sycl_complex.hpp"
+#include "vec_size_util.hpp"
+
+#include "kernels/dpctl_tensor_types.hpp"
+#include "kernels/elementwise_functions/common.hpp"
+
+#include "utils/type_dispatch_building.hpp"
+#include "utils/type_utils.hpp"
+
+namespace dpctl::tensor::kernels::sqrt
+{
+
+using dpctl::tensor::ssize_t;
+namespace td_ns = dpctl::tensor::type_dispatch;
+
+using dpctl::tensor::type_utils::is_complex;
+
+template <typename argT, typename resT>
+struct SqrtFunctor
+{
+
+    // is function constant for given argT
+    using is_constant = typename std::false_type;
+    // constant value, if constant
+    // constexpr resT constant_value = resT{};
+    // is function defined for sycl::vec
+    using supports_vec = typename std::false_type;
+    // do both argTy and resTy support sugroup store/load operation
+    using supports_sg_loadstore = typename std::negation<
+        std::disjunction<is_complex<resT>, is_complex<argT>>>;
+
+    resT operator()(const argT &in) const
+    {
+        if constexpr (is_complex<argT>::value) {
+            using realT = typename argT::value_type;
+            return exprm_ns::sqrt(exprm_ns::complex<realT>(in));
+        }
+        else {
+            return sycl::sqrt(in);
+        }
+    }
+};
+
+template <typename argTy,
+          typename resTy = argTy,
+          std::uint8_t vec_sz = 4u,
+          std::uint8_t n_vecs = 2u,
+          bool enable_sg_loadstore = true>
+using SqrtContigFunctor =
+    elementwise_common::UnaryContigFunctor<argTy,
+                                           resTy,
+                                           SqrtFunctor<argTy, resTy>,
+                                           vec_sz,
+                                           n_vecs,
+                                           enable_sg_loadstore>;
+
+template <typename argTy, typename resTy, typename IndexerT>
+using SqrtStridedFunctor = elementwise_common::
+    UnaryStridedFunctor<argTy, resTy, IndexerT, SqrtFunctor<argTy, resTy>>;
+
+template <typename T>
+struct SqrtOutputType
+{
+    using value_type = typename std::disjunction<
+        td_ns::TypeMapResultEntry<T, sycl::half, sycl::half>,
+        td_ns::TypeMapResultEntry<T, float, float>,
+        td_ns::TypeMapResultEntry<T, double, double>,
+        td_ns::TypeMapResultEntry<T, std::complex<float>, std::complex<float>>,
+        td_ns::
+            TypeMapResultEntry<T, std::complex<double>, std::complex<double>>,
+        td_ns::DefaultResultEntry<void>>::result_type;
+
+    static constexpr bool is_defined = !std::is_same_v<value_type, void>;
+};
+
+namespace hyperparam_detail
+{
+
+namespace vsu_ns = dpctl::tensor::kernels::vec_size_utils;
+
+using vsu_ns::ContigHyperparameterSetDefault;
+using vsu_ns::UnaryContigHyperparameterSetEntry;
+
+template <typename argTy>
+struct SqrtContigHyperparameterSet
+{
+    using value_type =
+        typename std::disjunction<ContigHyperparameterSetDefault<4u, 2u>>;
+
+    constexpr static auto vec_sz = value_type::vec_sz;
+    constexpr static auto n_vecs = value_type::n_vecs;
+};
+
+} // end of namespace hyperparam_detail
+
+template <typename T1, typename T2, std::uint8_t vec_sz, std::uint8_t n_vecs>
+class sqrt_contig_kernel;
+
+template <typename argTy>
+sycl::event sqrt_contig_impl(sycl::queue &exec_q,
+                             std::size_t nelems,
+                             const char *arg_p,
+                             char *res_p,
+                             const std::vector<sycl::event> &depends = {})
+{
+    using SqrtHS = hyperparam_detail::SqrtContigHyperparameterSet<argTy>;
+    static constexpr std::uint8_t vec_sz = SqrtHS::vec_sz;
+    static constexpr std::uint8_t n_vecs = SqrtHS::n_vecs;
+
+    return elementwise_common::unary_contig_impl<
+        argTy, SqrtOutputType, SqrtContigFunctor, sqrt_contig_kernel, vec_sz,
+        n_vecs>(exec_q, nelems, arg_p, res_p, depends);
+}
+
+template <typename fnT, typename T>
+struct SqrtContigFactory
+{
+    fnT get()
+    {
+        if constexpr (!SqrtOutputType<T>::is_defined) {
+            fnT fn = nullptr;
+            return fn;
+        }
+        else {
+            fnT fn = sqrt_contig_impl<T>;
+            return fn;
+        }
+    }
+};
+
+template <typename fnT, typename T>
+struct SqrtTypeMapFactory
+{
+    /*! @brief get typeid for output type of std::sqrt(T x) */
+    std::enable_if_t<std::is_same<fnT, int>::value, int> get()
+    {
+        using rT = typename SqrtOutputType<T>::value_type;
+        return td_ns::GetTypeid<rT>{}.get();
+    }
+};
+
+template <typename T1, typename T2, typename T3>
+class sqrt_strided_kernel;
+
+template <typename argTy>
+sycl::event
+    sqrt_strided_impl(sycl::queue &exec_q,
+                      std::size_t nelems,
+                      int nd,
+                      const ssize_t *shape_and_strides,
+                      const char *arg_p,
+                      ssize_t arg_offset,
+                      char *res_p,
+                      ssize_t res_offset,
+                      const std::vector<sycl::event> &depends,
+                      const std::vector<sycl::event> &additional_depends)
+{
+    return elementwise_common::unary_strided_impl<
+        argTy, SqrtOutputType, SqrtStridedFunctor, sqrt_strided_kernel>(
+        exec_q, nelems, nd, shape_and_strides, arg_p, arg_offset, res_p,
+        res_offset, depends, additional_depends);
+}
+
+template <typename fnT, typename T>
+struct SqrtStridedFactory
+{
+    fnT get()
+    {
+        if constexpr (!SqrtOutputType<T>::is_defined) {
+            fnT fn = nullptr;
+            return fn;
+        }
+        else {
+            fnT fn = sqrt_strided_impl<T>;
+            return fn;
+        }
+    }
+};
+
+} // namespace dpctl::tensor::kernels::sqrt
diff --git a/dpctl_ext/tensor/libtensor/include/kernels/elementwise_functions/square.hpp b/dpctl_ext/tensor/libtensor/include/kernels/elementwise_functions/square.hpp
new file mode 100644
index 000000000000..de3007acfbea
--- /dev/null
+++ b/dpctl_ext/tensor/libtensor/include/kernels/elementwise_functions/square.hpp
@@ -0,0 +1,251 @@
+//*****************************************************************************
+// Copyright (c) 2026, Intel Corporation
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+// - Redistributions of source code must retain the above copyright notice,
+//   this list of conditions and the following disclaimer.
+// - Redistributions in binary form must reproduce the above copyright notice,
+//   this list of conditions and the following disclaimer in the documentation
+//   and/or other materials provided with the distribution.
+// - Neither the name of the copyright holder nor the names of its contributors
+//   may be used to endorse or promote products derived from this software
+//   without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+// THE POSSIBILITY OF SUCH DAMAGE.
+//*****************************************************************************
+//
+//===---------------------------------------------------------------------===//
+///
+/// \file
+/// This file defines kernels for elementwise evaluation of SQUARE(x)
+///
+//===---------------------------------------------------------------------===//
+
+#pragma once
+#include <complex>
+#include <cstddef>
+#include <cstdint>
+#include <type_traits>
+#include <vector>
+
+#include <sycl/sycl.hpp>
+
+#include "sycl_complex.hpp"
+#include "vec_size_util.hpp"
+
+#include "kernels/dpctl_tensor_types.hpp"
+#include "kernels/elementwise_functions/common.hpp"
+
+#include "utils/type_dispatch_building.hpp"
+#include "utils/type_utils.hpp"
+
+namespace dpctl::tensor::kernels::square
+{
+
+using dpctl::tensor::ssize_t;
+namespace td_ns = dpctl::tensor::type_dispatch;
+
+using dpctl::tensor::type_utils::is_complex;
+using dpctl::tensor::type_utils::vec_cast;
+
+template <typename argT, typename resT>
+struct SquareFunctor
+{
+
+    // is function constant for given argT
+    using is_constant = typename std::false_type;
+    // constant value, if constant
+    // constexpr resT constant_value = resT{};
+    // is function defined for sycl::vec
+    using supports_vec = typename std::negation<
+        std::disjunction<is_complex<resT>, is_complex<argT>>>;
+    // do both argTy and resTy support sugroup store/load operation
+    using supports_sg_loadstore = typename std::negation<
+        std::disjunction<is_complex<resT>, is_complex<argT>>>;
+
+    resT operator()(const argT &in) const
+    {
+        if constexpr (is_complex<argT>::value) {
+            using realT = typename argT::value_type;
+
+            auto z = exprm_ns::complex<realT>(in);
+
+            return z * z;
+        }
+        else {
+            return in * in;
+        }
+    }
+
+    template <int vec_sz>
+    sycl::vec<resT, vec_sz> operator()(const sycl::vec<argT, vec_sz> &in) const
+    {
+        auto const &res_vec = in * in;
+        using deducedT = typename std::remove_cv_t<
+            std::remove_reference_t<decltype(res_vec)>>::element_type;
+        if constexpr (std::is_same_v<resT, deducedT>) {
+            return res_vec;
+        }
+        else {
+            return vec_cast<resT, deducedT, vec_sz>(res_vec);
+        }
+    }
+};
+
+template <typename argTy,
+          typename resTy = argTy,
+          std::uint8_t vec_sz = 4u,
+          std::uint8_t n_vecs = 2u,
+          bool enable_sg_loadstore = true>
+using SquareContigFunctor =
+    elementwise_common::UnaryContigFunctor<argTy,
+                                           resTy,
+                                           SquareFunctor<argTy, resTy>,
+                                           vec_sz,
+                                           n_vecs,
+                                           enable_sg_loadstore>;
+
+template <typename argTy, typename resTy, typename IndexerT>
+using SquareStridedFunctor = elementwise_common::
+    UnaryStridedFunctor<argTy, resTy, IndexerT, SquareFunctor<argTy, resTy>>;
+
+template <typename T>
+struct SquareOutputType
+{
+    using value_type = typename std::disjunction<
+        td_ns::TypeMapResultEntry<T, bool, std::int8_t>,
+        td_ns::TypeMapResultEntry<T, std::uint8_t>,
+        td_ns::TypeMapResultEntry<T, std::uint16_t>,
+        td_ns::TypeMapResultEntry<T, std::uint32_t>,
+        td_ns::TypeMapResultEntry<T, std::uint64_t>,
+        td_ns::TypeMapResultEntry<T, std::int8_t>,
+        td_ns::TypeMapResultEntry<T, std::int16_t>,
+        td_ns::TypeMapResultEntry<T, std::int32_t>,
+        td_ns::TypeMapResultEntry<T, std::int64_t>,
+        td_ns::TypeMapResultEntry<T, sycl::half>,
+        td_ns::TypeMapResultEntry<T, float>,
+        td_ns::TypeMapResultEntry<T, double>,
+        td_ns::TypeMapResultEntry<T, std::complex<float>>,
+        td_ns::TypeMapResultEntry<T, std::complex<double>>,
+        td_ns::DefaultResultEntry<void>>::result_type;
+
+    static constexpr bool is_defined = !std::is_same_v<value_type, void>;
+};
+
+namespace hyperparam_detail
+{
+
+namespace vsu_ns = dpctl::tensor::kernels::vec_size_utils;
+
+using vsu_ns::ContigHyperparameterSetDefault;
+using vsu_ns::UnaryContigHyperparameterSetEntry;
+
+template <typename argTy>
+struct SquareContigHyperparameterSet
+{
+    using value_type =
+        typename std::disjunction<ContigHyperparameterSetDefault<4u, 2u>>;
+
+    constexpr static auto vec_sz = value_type::vec_sz;
+    constexpr static auto n_vecs = value_type::n_vecs;
+};
+
+} // end of namespace hyperparam_detail
+
+template <typename T1, typename T2, std::uint8_t vec_sz, std::uint8_t n_vecs>
+class square_contig_kernel;
+
+template <typename argTy>
+sycl::event square_contig_impl(sycl::queue &exec_q,
+                               std::size_t nelems,
+                               const char *arg_p,
+                               char *res_p,
+                               const std::vector<sycl::event> &depends = {})
+{
+    using SquareHS = hyperparam_detail::SquareContigHyperparameterSet<argTy>;
+    static constexpr std::uint8_t vec_sz = SquareHS::vec_sz;
+    static constexpr std::uint8_t n_vecs = SquareHS::n_vecs;
+
+    return elementwise_common::unary_contig_impl<
+        argTy, SquareOutputType, SquareContigFunctor, square_contig_kernel,
+        vec_sz, n_vecs>(exec_q, nelems, arg_p, res_p, depends);
+}
+
+template <typename fnT, typename T>
+struct SquareContigFactory
+{
+    fnT get()
+    {
+        if constexpr (!SquareOutputType<T>::is_defined) {
+            fnT fn = nullptr;
+            return fn;
+        }
+        else {
+            fnT fn = square_contig_impl<T>;
+            return fn;
+        }
+    }
+};
+
+template <typename fnT, typename T>
+struct SquareTypeMapFactory
+{
+    /*! @brief get typeid for output type of x * x */
+    std::enable_if_t<std::is_same<fnT, int>::value, int> get()
+    {
+        using rT = typename SquareOutputType<T>::value_type;
+        return td_ns::GetTypeid<rT>{}.get();
+    }
+};
+
+template <typename T1, typename T2, typename T3>
+class square_strided_kernel;
+
+template <typename argTy>
+sycl::event
+    square_strided_impl(sycl::queue &exec_q,
+                        std::size_t nelems,
+                        int nd,
+                        const ssize_t *shape_and_strides,
+                        const char *arg_p,
+                        ssize_t arg_offset,
+                        char *res_p,
+                        ssize_t res_offset,
+                        const std::vector<sycl::event> &depends,
+                        const std::vector<sycl::event> &additional_depends)
+{
+    return elementwise_common::unary_strided_impl<
+        argTy, SquareOutputType, SquareStridedFunctor, square_strided_kernel>(
+        exec_q, nelems, nd, shape_and_strides, arg_p, arg_offset, res_p,
+        res_offset, depends, additional_depends);
+}
+
+template <typename fnT, typename T>
+struct SquareStridedFactory
+{
+    fnT get()
+    {
+        if constexpr (!SquareOutputType<T>::is_defined) {
+            fnT fn = nullptr;
+            return fn;
+        }
+        else {
+            fnT fn = square_strided_impl<T>;
+            return fn;
+        }
+    }
+};
+
+} // namespace dpctl::tensor::kernels::square
diff --git a/dpctl_ext/tensor/libtensor/include/kernels/elementwise_functions/tan.hpp b/dpctl_ext/tensor/libtensor/include/kernels/elementwise_functions/tan.hpp
new file mode 100644
index 000000000000..2db2a6b5fbf8
--- /dev/null
+++ b/dpctl_ext/tensor/libtensor/include/kernels/elementwise_functions/tan.hpp
@@ -0,0 +1,276 @@
+//*****************************************************************************
+// Copyright (c) 2026, Intel Corporation
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+// - Redistributions of source code must retain the above copyright notice,
+//   this list of conditions and the following disclaimer.
+// - Redistributions in binary form must reproduce the above copyright notice,
+//   this list of conditions and the following disclaimer in the documentation
+//   and/or other materials provided with the distribution.
+// - Neither the name of the copyright holder nor the names of its contributors
+//   may be used to endorse or promote products derived from this software
+//   without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+// THE POSSIBILITY OF SUCH DAMAGE.
+//*****************************************************************************
+//
+//===---------------------------------------------------------------------===//
+///
+/// \file
+/// This file defines kernels for elementwise evaluation of TAN(x) function.
+//===---------------------------------------------------------------------===//
+
+#pragma once
+#include <cmath>
+#include <complex>
+#include <cstddef>
+#include <cstdint>
+#include <limits>
+#include <type_traits>
+#include <vector>
+
+#include <sycl/sycl.hpp>
+
+#include "sycl_complex.hpp"
+#include "vec_size_util.hpp"
+
+#include "kernels/dpctl_tensor_types.hpp"
+#include "kernels/elementwise_functions/common.hpp"
+
+#include "utils/type_dispatch_building.hpp"
+#include "utils/type_utils.hpp"
+
+namespace dpctl::tensor::kernels::tan
+{
+
+using dpctl::tensor::ssize_t;
+namespace td_ns = dpctl::tensor::type_dispatch;
+
+using dpctl::tensor::type_utils::is_complex;
+
+template <typename argT, typename resT>
+struct TanFunctor
+{
+
+    // is function constant for given argT
+    using is_constant = typename std::false_type;
+    // constant value, if constant
+    // constexpr resT constant_value = resT{};
+    // is function defined for sycl::vec
+    using supports_vec = typename std::false_type;
+    // do both argTy and resTy support sugroup store/load operation
+    using supports_sg_loadstore = typename std::negation<
+        std::disjunction<is_complex<resT>, is_complex<argT>>>;
+
+    resT operator()(const argT &in) const
+    {
+        if constexpr (is_complex<argT>::value) {
+
+            using realT = typename argT::value_type;
+
+            static constexpr realT q_nan =
+                std::numeric_limits<realT>::quiet_NaN();
+            /*
+             * since tan(in) = -I * tanh(I * in), for special cases,
+             * we calculate real and imaginary parts of z = tanh(I * in) and
+             * return { imag(z) , -real(z) } which is tan(in).
+             */
+            const realT x = -std::imag(in);
+            const realT y = std::real(in);
+            /*
+             * tanh(NaN + i 0) = NaN + i 0
+             *
+             * tanh(NaN + i y) = NaN + i NaN        for y != 0
+             *
+             * The imaginary part has the sign of x*sin(2*y), but there's no
+             * special effort to get this right.
+             *
+             * tanh(+-Inf +- i Inf) = +-1 +- 0
+             *
+             * tanh(+-Inf + i y) = +-1 + 0 sin(2y)        for y finite
+             *
+             * The imaginary part of the sign is unspecified.  This special
+             * case is only needed to avoid a spurious invalid exception when
+             * y is infinite.
+             */
+            if (!std::isfinite(x)) {
+                if (std::isnan(x)) {
+                    const realT tanh_re = x;
+                    const realT tanh_im = (y == realT(0) ? y : x * y);
+                    return resT{tanh_im, -tanh_re};
+                }
+                const realT tanh_re = sycl::copysign(realT(1), x);
+                const realT tanh_im = sycl::copysign(
+                    realT(0), std::isinf(y) ? y : sycl::sin(y) * sycl::cos(y));
+                return resT{tanh_im, -tanh_re};
+            }
+            /*
+             * tanh(x + i NAN) = NaN + i NaN for non-zero x
+             * tanh(x +- i Inf) = NaN + i NaN for non-zero x
+             * tanh(0 + i NAN) = 0 + i NaN
+             * tanh(0 +- i Inf) = 0 + i NaN
+             */
+            if (!std::isfinite(y)) {
+                if (x == realT(0)) {
+                    return resT{q_nan, x};
+                }
+                return resT{q_nan, q_nan};
+            }
+            /* ordinary cases */
+            return exprm_ns::tan(exprm_ns::complex<realT>(in)); // tan(in);
+        }
+        else {
+            static_assert(std::is_floating_point_v<argT> ||
+                          std::is_same_v<argT, sycl::half>);
+            return sycl::tan(in);
+        }
+    }
+};
+
+template <typename argTy,
+          typename resTy = argTy,
+          std::uint8_t vec_sz = 4u,
+          std::uint8_t n_vecs = 2u,
+          bool enable_sg_loadstore = true>
+using TanContigFunctor =
+    elementwise_common::UnaryContigFunctor<argTy,
+                                           resTy,
+                                           TanFunctor<argTy, resTy>,
+                                           vec_sz,
+                                           n_vecs,
+                                           enable_sg_loadstore>;
+
+template <typename argTy, typename resTy, typename IndexerT>
+using TanStridedFunctor = elementwise_common::
+    UnaryStridedFunctor<argTy, resTy, IndexerT, TanFunctor<argTy, resTy>>;
+
+template <typename T>
+struct TanOutputType
+{
+    using value_type = typename std::disjunction<
+        td_ns::TypeMapResultEntry<T, sycl::half>,
+        td_ns::TypeMapResultEntry<T, float>,
+        td_ns::TypeMapResultEntry<T, double>,
+        td_ns::TypeMapResultEntry<T, std::complex<float>>,
+        td_ns::TypeMapResultEntry<T, std::complex<double>>,
+        td_ns::DefaultResultEntry<void>>::result_type;
+
+    static constexpr bool is_defined = !std::is_same_v<value_type, void>;
+};
+
+namespace hyperparam_detail
+{
+
+namespace vsu_ns = dpctl::tensor::kernels::vec_size_utils;
+
+using vsu_ns::ContigHyperparameterSetDefault;
+using vsu_ns::UnaryContigHyperparameterSetEntry;
+
+template <typename argTy>
+struct TanContigHyperparameterSet
+{
+    using value_type =
+        typename std::disjunction<ContigHyperparameterSetDefault<4u, 2u>>;
+
+    constexpr static auto vec_sz = value_type::vec_sz;
+    constexpr static auto n_vecs = value_type::n_vecs;
+};
+
+} // end of namespace hyperparam_detail
+
+template <typename T1, typename T2, std::uint8_t vec_sz, std::uint8_t n_vecs>
+class tan_contig_kernel;
+
+template <typename argTy>
+sycl::event tan_contig_impl(sycl::queue &exec_q,
+                            std::size_t nelems,
+                            const char *arg_p,
+                            char *res_p,
+                            const std::vector<sycl::event> &depends = {})
+{
+    using TanHS = hyperparam_detail::TanContigHyperparameterSet<argTy>;
+    static constexpr std::uint8_t vec_sz = TanHS::vec_sz;
+    static constexpr std::uint8_t n_vecs = TanHS::n_vecs;
+
+    return elementwise_common::unary_contig_impl<
+        argTy, TanOutputType, TanContigFunctor, tan_contig_kernel, vec_sz,
+        n_vecs>(exec_q, nelems, arg_p, res_p, depends);
+}
+
+template <typename fnT, typename T>
+struct TanContigFactory
+{
+    fnT get()
+    {
+        if constexpr (!TanOutputType<T>::is_defined) {
+            fnT fn = nullptr;
+            return fn;
+        }
+        else {
+            fnT fn = tan_contig_impl<T>;
+            return fn;
+        }
+    }
+};
+
+template <typename fnT, typename T>
+struct TanTypeMapFactory
+{
+    /*! @brief get typeid for output type of sycl::tan(T x) */
+    std::enable_if_t<std::is_same<fnT, int>::value, int> get()
+    {
+        using rT = typename TanOutputType<T>::value_type;
+        return td_ns::GetTypeid<rT>{}.get();
+    }
+};
+
+template <typename T1, typename T2, typename T3>
+class tan_strided_kernel;
+
+template <typename argTy>
+sycl::event tan_strided_impl(sycl::queue &exec_q,
+                             std::size_t nelems,
+                             int nd,
+                             const ssize_t *shape_and_strides,
+                             const char *arg_p,
+                             ssize_t arg_offset,
+                             char *res_p,
+                             ssize_t res_offset,
+                             const std::vector<sycl::event> &depends,
+                             const std::vector<sycl::event> &additional_depends)
+{
+    return elementwise_common::unary_strided_impl<
+        argTy, TanOutputType, TanStridedFunctor, tan_strided_kernel>(
+        exec_q, nelems, nd, shape_and_strides, arg_p, arg_offset, res_p,
+        res_offset, depends, additional_depends);
+}
+
+template <typename fnT, typename T>
+struct TanStridedFactory
+{
+    fnT get()
+    {
+        if constexpr (!TanOutputType<T>::is_defined) {
+            fnT fn = nullptr;
+            return fn;
+        }
+        else {
+            fnT fn = tan_strided_impl<T>;
+            return fn;
+        }
+    }
+};
+
+} // namespace dpctl::tensor::kernels::tan
diff --git a/dpctl_ext/tensor/libtensor/include/kernels/elementwise_functions/tanh.hpp b/dpctl_ext/tensor/libtensor/include/kernels/elementwise_functions/tanh.hpp
new file mode 100644
index 000000000000..dde16128fb1a
--- /dev/null
+++ b/dpctl_ext/tensor/libtensor/include/kernels/elementwise_functions/tanh.hpp
@@ -0,0 +1,270 @@
+//*****************************************************************************
+// Copyright (c) 2026, Intel Corporation
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+// - Redistributions of source code must retain the above copyright notice,
+//   this list of conditions and the following disclaimer.
+// - Redistributions in binary form must reproduce the above copyright notice,
+//   this list of conditions and the following disclaimer in the documentation
+//   and/or other materials provided with the distribution.
+// - Neither the name of the copyright holder nor the names of its contributors
+//   may be used to endorse or promote products derived from this software
+//   without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+// THE POSSIBILITY OF SUCH DAMAGE.
+//*****************************************************************************
+//
+//===---------------------------------------------------------------------===//
+///
+/// \file
+/// This file defines kernels for elementwise evaluation of TANH(x) function.
+//===---------------------------------------------------------------------===//
+
+#pragma once
+#include <cmath>
+#include <complex>
+#include <cstddef>
+#include <cstdint>
+#include <limits>
+#include <type_traits>
+#include <vector>
+
+#include <sycl/sycl.hpp>
+
+#include "sycl_complex.hpp"
+#include "vec_size_util.hpp"
+
+#include "kernels/dpctl_tensor_types.hpp"
+#include "kernels/elementwise_functions/common.hpp"
+
+#include "utils/type_dispatch_building.hpp"
+#include "utils/type_utils.hpp"
+
+namespace dpctl::tensor::kernels::tanh
+{
+
+using dpctl::tensor::ssize_t;
+namespace td_ns = dpctl::tensor::type_dispatch;
+
+using dpctl::tensor::type_utils::is_complex;
+
+template <typename argT, typename resT>
+struct TanhFunctor
+{
+
+    // is function constant for given argT
+    using is_constant = typename std::false_type;
+    // constant value, if constant
+    // constexpr resT constant_value = resT{};
+    // is function defined for sycl::vec
+    using supports_vec = typename std::false_type;
+    // do both argTy and resTy support sugroup store/load operation
+    using supports_sg_loadstore = typename std::negation<
+        std::disjunction<is_complex<resT>, is_complex<argT>>>;
+
+    resT operator()(const argT &in) const
+    {
+        if constexpr (is_complex<argT>::value) {
+            using realT = typename argT::value_type;
+
+            static constexpr realT q_nan =
+                std::numeric_limits<realT>::quiet_NaN();
+
+            const realT x = std::real(in);
+            const realT y = std::imag(in);
+            /*
+             * tanh(NaN + i 0) = NaN + i 0
+             *
+             * tanh(NaN + i y) = NaN + i NaN        for y != 0
+             *
+             * The imaginary part has the sign of x*sin(2*y), but there's no
+             * special effort to get this right.
+             *
+             * tanh(+-Inf +- i Inf) = +-1 +- 0
+             *
+             * tanh(+-Inf + i y) = +-1 + 0 sin(2y)        for y finite
+             *
+             * The imaginary part of the sign is unspecified.  This special
+             * case is only needed to avoid a spurious invalid exception when
+             * y is infinite.
+             */
+            if (!std::isfinite(x)) {
+                if (std::isnan(x)) {
+                    return resT{q_nan, (y == realT(0) ? y : q_nan)};
+                }
+                const realT res_re = sycl::copysign(realT(1), x);
+                const realT res_im = sycl::copysign(
+                    realT(0), std::isinf(y) ? y : sycl::sin(y) * sycl::cos(y));
+                return resT{res_re, res_im};
+            }
+            /*
+             * tanh(x + i NAN) = NaN + i NaN for non-zero x
+             * tanh(x +- i Inf) = NaN + i NaN for non-zero x
+             * tanh(0 + i NAN) = 0 + i NaN
+             * tanh(0 +- i Inf) = 0 + i NaN
+             */
+            if (!std::isfinite(y)) {
+                if (x == realT(0)) {
+                    return resT{x, q_nan};
+                }
+                return resT{q_nan, q_nan};
+            }
+            /* ordinary cases */
+            return exprm_ns::tanh(exprm_ns::complex<realT>(in)); // tanh(in);
+        }
+        else {
+            static_assert(std::is_floating_point_v<argT> ||
+                          std::is_same_v<argT, sycl::half>);
+            return sycl::tanh(in);
+        }
+    }
+};
+
+template <typename argTy,
+          typename resTy = argTy,
+          std::uint8_t vec_sz = 4u,
+          std::uint8_t n_vecs = 2u,
+          bool enable_sg_loadstore = true>
+using TanhContigFunctor =
+    elementwise_common::UnaryContigFunctor<argTy,
+                                           resTy,
+                                           TanhFunctor<argTy, resTy>,
+                                           vec_sz,
+                                           n_vecs,
+                                           enable_sg_loadstore>;
+
+template <typename argTy, typename resTy, typename IndexerT>
+using TanhStridedFunctor = elementwise_common::
+    UnaryStridedFunctor<argTy, resTy, IndexerT, TanhFunctor<argTy, resTy>>;
+
+template <typename T>
+struct TanhOutputType
+{
+    using value_type = typename std::disjunction<
+        td_ns::TypeMapResultEntry<T, sycl::half>,
+        td_ns::TypeMapResultEntry<T, float>,
+        td_ns::TypeMapResultEntry<T, double>,
+        td_ns::TypeMapResultEntry<T, std::complex<float>>,
+        td_ns::TypeMapResultEntry<T, std::complex<double>>,
+        td_ns::DefaultResultEntry<void>>::result_type;
+
+    static constexpr bool is_defined = !std::is_same_v<value_type, void>;
+};
+
+namespace hyperparam_detail
+{
+
+namespace vsu_ns = dpctl::tensor::kernels::vec_size_utils;
+
+using vsu_ns::ContigHyperparameterSetDefault;
+using vsu_ns::UnaryContigHyperparameterSetEntry;
+
+template <typename argTy>
+struct TanhContigHyperparameterSet
+{
+    using value_type =
+        typename std::disjunction<ContigHyperparameterSetDefault<4u, 2u>>;
+
+    constexpr static auto vec_sz = value_type::vec_sz;
+    constexpr static auto n_vecs = value_type::n_vecs;
+};
+
+} // end of namespace hyperparam_detail
+
+template <typename T1, typename T2, std::uint8_t vec_sz, std::uint8_t n_vecs>
+class tanh_contig_kernel;
+
+template <typename argTy>
+sycl::event tanh_contig_impl(sycl::queue &exec_q,
+                             std::size_t nelems,
+                             const char *arg_p,
+                             char *res_p,
+                             const std::vector<sycl::event> &depends = {})
+{
+    using TanhHS = hyperparam_detail::TanhContigHyperparameterSet<argTy>;
+    static constexpr std::uint8_t vec_sz = TanhHS::vec_sz;
+    static constexpr std::uint8_t n_vecs = TanhHS::n_vecs;
+
+    return elementwise_common::unary_contig_impl<
+        argTy, TanhOutputType, TanhContigFunctor, tanh_contig_kernel, vec_sz,
+        n_vecs>(exec_q, nelems, arg_p, res_p, depends);
+}
+
+template <typename fnT, typename T>
+struct TanhContigFactory
+{
+    fnT get()
+    {
+        if constexpr (!TanhOutputType<T>::is_defined) {
+            fnT fn = nullptr;
+            return fn;
+        }
+        else {
+            fnT fn = tanh_contig_impl<T>;
+            return fn;
+        }
+    }
+};
+
+template <typename fnT, typename T>
+struct TanhTypeMapFactory
+{
+    /*! @brief get typeid for output type of sycl::tanh(T x) */
+    std::enable_if_t<std::is_same<fnT, int>::value, int> get()
+    {
+        using rT = typename TanhOutputType<T>::value_type;
+        return td_ns::GetTypeid<rT>{}.get();
+    }
+};
+
+template <typename T1, typename T2, typename T3>
+class tanh_strided_kernel;
+
+template <typename argTy>
+sycl::event
+    tanh_strided_impl(sycl::queue &exec_q,
+                      std::size_t nelems,
+                      int nd,
+                      const ssize_t *shape_and_strides,
+                      const char *arg_p,
+                      ssize_t arg_offset,
+                      char *res_p,
+                      ssize_t res_offset,
+                      const std::vector<sycl::event> &depends,
+                      const std::vector<sycl::event> &additional_depends)
+{
+    return elementwise_common::unary_strided_impl<
+        argTy, TanhOutputType, TanhStridedFunctor, tanh_strided_kernel>(
+        exec_q, nelems, nd, shape_and_strides, arg_p, arg_offset, res_p,
+        res_offset, depends, additional_depends);
+}
+
+template <typename fnT, typename T>
+struct TanhStridedFactory
+{
+    fnT get()
+    {
+        if constexpr (!TanhOutputType<T>::is_defined) {
+            fnT fn = nullptr;
+            return fn;
+        }
+        else {
+            fnT fn = tanh_strided_impl<T>;
+            return fn;
+        }
+    }
+};
+
+} // namespace dpctl::tensor::kernels::tanh
diff --git a/dpctl_ext/tensor/libtensor/include/kernels/elementwise_functions/trunc.hpp b/dpctl_ext/tensor/libtensor/include/kernels/elementwise_functions/trunc.hpp
new file mode 100644
index 000000000000..6fae9c4f27e5
--- /dev/null
+++ b/dpctl_ext/tensor/libtensor/include/kernels/elementwise_functions/trunc.hpp
@@ -0,0 +1,226 @@
+//*****************************************************************************
+// Copyright (c) 2026, Intel Corporation
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+// - Redistributions of source code must retain the above copyright notice,
+//   this list of conditions and the following disclaimer.
+// - Redistributions in binary form must reproduce the above copyright notice,
+//   this list of conditions and the following disclaimer in the documentation
+//   and/or other materials provided with the distribution.
+// - Neither the name of the copyright holder nor the names of its contributors
+//   may be used to endorse or promote products derived from this software
+//   without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+// THE POSSIBILITY OF SUCH DAMAGE.
+//*****************************************************************************
+//
+//===---------------------------------------------------------------------===//
+///
+/// \file
+/// This file defines kernels for elementwise evaluation of TRUNC(x) function.
+//===---------------------------------------------------------------------===//
+
+#pragma once
+#include <cstddef>
+#include <cstdint>
+#include <type_traits>
+#include <vector>
+
+#include <sycl/sycl.hpp>
+
+#include "vec_size_util.hpp"
+
+#include "kernels/dpctl_tensor_types.hpp"
+#include "kernels/elementwise_functions/common.hpp"
+
+#include "utils/type_dispatch_building.hpp"
+#include "utils/type_utils.hpp"
+
+namespace dpctl::tensor::kernels::trunc
+{
+
+using dpctl::tensor::ssize_t;
+namespace td_ns = dpctl::tensor::type_dispatch;
+
+using dpctl::tensor::type_utils::is_complex;
+
+template <typename argT, typename resT>
+struct TruncFunctor
+{
+
+    // is function constant for given argT
+    using is_constant = typename std::false_type;
+    // constant value, if constant
+    // constexpr resT constant_value = resT{};
+    // is function defined for sycl::vec
+    using supports_vec = typename std::false_type;
+    // do both argTy and resTy support sugroup store/load operation
+    using supports_sg_loadstore = typename std::negation<
+        std::disjunction<is_complex<resT>, is_complex<argT>>>;
+
+    resT operator()(const argT &in) const
+    {
+        if constexpr (std::is_integral_v<argT>) {
+            return in;
+        }
+        else {
+            return sycl::trunc(in);
+        }
+    }
+};
+
+template <typename argTy,
+          typename resTy = argTy,
+          std::uint8_t vec_sz = 4u,
+          std::uint8_t n_vecs = 2u,
+          bool enable_sg_loadstore = true>
+using TruncContigFunctor =
+    elementwise_common::UnaryContigFunctor<argTy,
+                                           resTy,
+                                           TruncFunctor<argTy, resTy>,
+                                           vec_sz,
+                                           n_vecs,
+                                           enable_sg_loadstore>;
+
+template <typename argTy, typename resTy, typename IndexerT>
+using TruncStridedFunctor = elementwise_common::
+    UnaryStridedFunctor<argTy, resTy, IndexerT, TruncFunctor<argTy, resTy>>;
+
+template <typename T>
+struct TruncOutputType
+{
+    using value_type =
+        typename std::disjunction<td_ns::TypeMapResultEntry<T, bool>,
+                                  td_ns::TypeMapResultEntry<T, std::uint8_t>,
+                                  td_ns::TypeMapResultEntry<T, std::uint16_t>,
+                                  td_ns::TypeMapResultEntry<T, std::uint32_t>,
+                                  td_ns::TypeMapResultEntry<T, std::uint64_t>,
+                                  td_ns::TypeMapResultEntry<T, std::int8_t>,
+                                  td_ns::TypeMapResultEntry<T, std::int16_t>,
+                                  td_ns::TypeMapResultEntry<T, std::int32_t>,
+                                  td_ns::TypeMapResultEntry<T, std::int64_t>,
+                                  td_ns::TypeMapResultEntry<T, sycl::half>,
+                                  td_ns::TypeMapResultEntry<T, float>,
+                                  td_ns::TypeMapResultEntry<T, double>,
+                                  td_ns::DefaultResultEntry<void>>::result_type;
+
+    static constexpr bool is_defined = !std::is_same_v<value_type, void>;
+};
+
+namespace hyperparam_detail
+{
+
+namespace vsu_ns = dpctl::tensor::kernels::vec_size_utils;
+
+using vsu_ns::ContigHyperparameterSetDefault;
+using vsu_ns::UnaryContigHyperparameterSetEntry;
+
+template <typename argTy>
+struct TruncContigHyperparameterSet
+{
+    using value_type =
+        typename std::disjunction<ContigHyperparameterSetDefault<4u, 2u>>;
+
+    constexpr static auto vec_sz = value_type::vec_sz;
+    constexpr static auto n_vecs = value_type::n_vecs;
+};
+
+} // end of namespace hyperparam_detail
+
+template <typename T1, typename T2, std::uint8_t vec_sz, std::uint8_t n_vecs>
+class trunc_contig_kernel;
+
+template <typename argTy>
+sycl::event trunc_contig_impl(sycl::queue &exec_q,
+                              std::size_t nelems,
+                              const char *arg_p,
+                              char *res_p,
+                              const std::vector<sycl::event> &depends = {})
+{
+    using TruncHS = hyperparam_detail::TruncContigHyperparameterSet<argTy>;
+    static constexpr std::uint8_t vec_sz = TruncHS::vec_sz;
+    static constexpr std::uint8_t n_vecs = TruncHS::n_vecs;
+
+    return elementwise_common::unary_contig_impl<
+        argTy, TruncOutputType, TruncContigFunctor, trunc_contig_kernel, vec_sz,
+        n_vecs>(exec_q, nelems, arg_p, res_p, depends);
+}
+
+template <typename fnT, typename T>
+struct TruncContigFactory
+{
+    fnT get()
+    {
+        if constexpr (!TruncOutputType<T>::is_defined) {
+            fnT fn = nullptr;
+            return fn;
+        }
+        else {
+            fnT fn = trunc_contig_impl<T>;
+            return fn;
+        }
+    }
+};
+
+template <typename fnT, typename T>
+struct TruncTypeMapFactory
+{
+    /*! @brief get typeid for output type of sycl::trunc(T x) */
+    std::enable_if_t<std::is_same<fnT, int>::value, int> get()
+    {
+        using rT = typename TruncOutputType<T>::value_type;
+        return td_ns::GetTypeid<rT>{}.get();
+    }
+};
+
+template <typename T1, typename T2, typename T3>
+class trunc_strided_kernel;
+
+template <typename argTy>
+sycl::event
+    trunc_strided_impl(sycl::queue &exec_q,
+                       std::size_t nelems,
+                       int nd,
+                       const ssize_t *shape_and_strides,
+                       const char *arg_p,
+                       ssize_t arg_offset,
+                       char *res_p,
+                       ssize_t res_offset,
+                       const std::vector<sycl::event> &depends,
+                       const std::vector<sycl::event> &additional_depends)
+{
+    return elementwise_common::unary_strided_impl<
+        argTy, TruncOutputType, TruncStridedFunctor, trunc_strided_kernel>(
+        exec_q, nelems, nd, shape_and_strides, arg_p, arg_offset, res_p,
+        res_offset, depends, additional_depends);
+}
+
+template <typename fnT, typename T>
+struct TruncStridedFactory
+{
+    fnT get()
+    {
+        if constexpr (!TruncOutputType<T>::is_defined) {
+            fnT fn = nullptr;
+            return fn;
+        }
+        else {
+            fnT fn = trunc_strided_impl<T>;
+            return fn;
+        }
+    }
+};
+
+} // namespace dpctl::tensor::kernels::trunc
diff --git a/dpctl_ext/tensor/libtensor/include/kernels/integer_advanced_indexing.hpp b/dpctl_ext/tensor/libtensor/include/kernels/integer_advanced_indexing.hpp
index 7be2b3ea8591..f6d2f0175ce8 100644
--- a/dpctl_ext/tensor/libtensor/include/kernels/integer_advanced_indexing.hpp
+++ b/dpctl_ext/tensor/libtensor/include/kernels/integer_advanced_indexing.hpp
@@ -33,7 +33,7 @@
 //===----------------------------------------------------------------------===//
 
 #pragma once
-#include <complex>
+
 #include <cstddef>
 #include <type_traits>
 #include <vector>
diff --git a/dpctl_ext/tensor/libtensor/include/kernels/repeat.hpp b/dpctl_ext/tensor/libtensor/include/kernels/repeat.hpp
index aab9a709f010..83a520adb538 100644
--- a/dpctl_ext/tensor/libtensor/include/kernels/repeat.hpp
+++ b/dpctl_ext/tensor/libtensor/include/kernels/repeat.hpp
@@ -33,7 +33,6 @@
 //===----------------------------------------------------------------------===//
 
 #pragma once
-#include <complex>
 #include <cstddef>
 #include <cstdint>
 #include <vector>
@@ -42,7 +41,6 @@
 
 #include "dpctl_tensor_types.hpp"
 #include "utils/offset_utils.hpp"
-#include "utils/type_utils.hpp"
 
 namespace dpctl::tensor::kernels::repeat
 {
diff --git a/dpctl_ext/tensor/libtensor/include/kernels/where.hpp b/dpctl_ext/tensor/libtensor/include/kernels/where.hpp
index b92a3a76c9c1..454e1e61fa0d 100644
--- a/dpctl_ext/tensor/libtensor/include/kernels/where.hpp
+++ b/dpctl_ext/tensor/libtensor/include/kernels/where.hpp
@@ -34,7 +34,6 @@
 
 #pragma once
 #include <algorithm>
-#include <complex>
 #include <cstddef>
 #include <cstdint>
 #include <vector>
diff --git a/dpctl_ext/tensor/libtensor/source/accumulators.cpp b/dpctl_ext/tensor/libtensor/source/accumulators.cpp
index 82913010755a..c6ab96418d47 100644
--- a/dpctl_ext/tensor/libtensor/source/accumulators.cpp
+++ b/dpctl_ext/tensor/libtensor/source/accumulators.cpp
@@ -32,17 +32,18 @@
 /// This file defines functions of dpctl.tensor._tensor_impl extensions
 //===----------------------------------------------------------------------===//
 
-#include <algorithm>
 #include <cstddef>
 #include <cstdint>
 #include <stdexcept>
+#include <string>
+#include <tuple>
+#include <utility>
 #include <vector>
 
 #include <sycl/sycl.hpp>
 
 #include "dpnp4pybind11.hpp"
 #include <pybind11/pybind11.h>
-#include <pybind11/stl.h>
 
 #include "kernels/accumulators.hpp"
 #include "simplify_iteration_space.hpp"
@@ -196,8 +197,8 @@ std::size_t py_mask_positions(const dpctl::tensor::usm_ndarray &mask,
     int mask_nd = mask.get_ndim();
     int nd = mask_nd;
 
-    dpctl::tensor::py_internal::compact_iteration_space(
-        nd, shape, strides_vector, compact_shape, compact_strides);
+    compact_iteration_space(nd, shape, strides_vector, compact_shape,
+                            compact_strides);
 
     // Strided implementation
     auto strided_fn =
@@ -351,8 +352,8 @@ std::size_t py_cumsum_1d(const dpctl::tensor::usm_ndarray &src,
     int src_nd = src.get_ndim();
     int nd = src_nd;
 
-    dpctl::tensor::py_internal::compact_iteration_space(
-        nd, shape, strides_vector, compact_shape, compact_strides);
+    compact_iteration_space(nd, shape, strides_vector, compact_shape,
+                            compact_strides);
 
     // Strided implementation
     auto strided_fn = cumsum_1d_strided_dispatch_vector[src_typeid];
diff --git a/dpctl_ext/tensor/libtensor/source/accumulators.hpp b/dpctl_ext/tensor/libtensor/source/accumulators.hpp
index 42503093789b..e400aad2dceb 100644
--- a/dpctl_ext/tensor/libtensor/source/accumulators.hpp
+++ b/dpctl_ext/tensor/libtensor/source/accumulators.hpp
@@ -39,7 +39,6 @@
 #include <sycl/sycl.hpp>
 
 #include "dpnp4pybind11.hpp"
-#include <pybind11/pybind11.h>
 
 namespace dpctl::tensor::py_internal
 {
diff --git a/dpctl_ext/tensor/libtensor/source/accumulators/accumulate_over_axis.hpp b/dpctl_ext/tensor/libtensor/source/accumulators/accumulate_over_axis.hpp
index 712ab180ef37..4dd00620a260 100644
--- a/dpctl_ext/tensor/libtensor/source/accumulators/accumulate_over_axis.hpp
+++ b/dpctl_ext/tensor/libtensor/source/accumulators/accumulate_over_axis.hpp
@@ -35,9 +35,12 @@
 
 #pragma once
 
+#include <algorithm>
 #include <cstddef>
-#include <cstdint>
+#include <exception>
+#include <iterator>
 #include <stdexcept>
+#include <tuple>
 #include <type_traits>
 #include <utility>
 #include <vector>
diff --git a/dpctl_ext/tensor/libtensor/source/accumulators/cumulative_logsumexp.cpp b/dpctl_ext/tensor/libtensor/source/accumulators/cumulative_logsumexp.cpp
index 572c93c0066e..f1ad170caa58 100644
--- a/dpctl_ext/tensor/libtensor/source/accumulators/cumulative_logsumexp.cpp
+++ b/dpctl_ext/tensor/libtensor/source/accumulators/cumulative_logsumexp.cpp
@@ -311,7 +311,6 @@ void init_cumulative_logsumexp(py::module_ m)
                                   int trailing_dims_to_accumulate,
                                   const arrayT &dst, sycl::queue &exec_q,
                                   const event_vecT &depends = {}) {
-        using dpctl::tensor::py_internal::py_accumulate_over_axis;
         return py_accumulate_over_axis(src, trailing_dims_to_accumulate, dst,
                                        exec_q, depends,
                                        cumlogsumexp_strided_dispatch_table,
@@ -326,8 +325,6 @@ void init_cumulative_logsumexp(py::module_ m)
     auto cumlogsumexp_include_initial_pyapi =
         [&](const arrayT &src, const arrayT &dst, sycl::queue &exec_q,
             const event_vecT &depends = {}) {
-            using dpctl::tensor::py_internal::
-                py_accumulate_final_axis_include_initial;
             return py_accumulate_final_axis_include_initial(
                 src, dst, exec_q, depends,
                 cumlogsumexp_include_initial_strided_dispatch_table,
diff --git a/dpctl_ext/tensor/libtensor/source/accumulators/cumulative_prod.cpp b/dpctl_ext/tensor/libtensor/source/accumulators/cumulative_prod.cpp
index 07c802c8cffa..9a9961441d35 100644
--- a/dpctl_ext/tensor/libtensor/source/accumulators/cumulative_prod.cpp
+++ b/dpctl_ext/tensor/libtensor/source/accumulators/cumulative_prod.cpp
@@ -322,7 +322,6 @@ void init_cumulative_prod(py::module_ m)
     auto cumprod_pyapi = [&](const arrayT &src, int trailing_dims_to_accumulate,
                              const arrayT &dst, sycl::queue &exec_q,
                              const event_vecT &depends = {}) {
-        using dpctl::tensor::py_internal::py_accumulate_over_axis;
         return py_accumulate_over_axis(
             src, trailing_dims_to_accumulate, dst, exec_q, depends,
             cumprod_strided_dispatch_table, cumprod_1d_contig_dispatch_table);
@@ -336,8 +335,6 @@ void init_cumulative_prod(py::module_ m)
     auto cumprod_include_initial_pyapi =
         [&](const arrayT &src, const arrayT &dst, sycl::queue &exec_q,
             const event_vecT &depends = {}) {
-            using dpctl::tensor::py_internal::
-                py_accumulate_final_axis_include_initial;
             return py_accumulate_final_axis_include_initial(
                 src, dst, exec_q, depends,
                 cumprod_include_initial_strided_dispatch_table,
diff --git a/dpctl_ext/tensor/libtensor/source/accumulators/cumulative_sum.cpp b/dpctl_ext/tensor/libtensor/source/accumulators/cumulative_sum.cpp
index 3c422bfd0998..3a0ed6cf3ab5 100644
--- a/dpctl_ext/tensor/libtensor/source/accumulators/cumulative_sum.cpp
+++ b/dpctl_ext/tensor/libtensor/source/accumulators/cumulative_sum.cpp
@@ -320,7 +320,6 @@ void init_cumulative_sum(py::module_ m)
     auto cumsum_pyapi = [&](const arrayT &src, int trailing_dims_to_accumulate,
                             const arrayT &dst, sycl::queue &exec_q,
                             const event_vecT &depends = {}) {
-        using dpctl::tensor::py_internal::py_accumulate_over_axis;
         return py_accumulate_over_axis(
             src, trailing_dims_to_accumulate, dst, exec_q, depends,
             cumsum_strided_dispatch_table, cumsum_1d_contig_dispatch_table);
@@ -334,8 +333,6 @@ void init_cumulative_sum(py::module_ m)
     auto cumsum_include_initial_pyapi =
         [&](const arrayT &src, const arrayT &dst, sycl::queue &exec_q,
             const event_vecT &depends = {}) {
-            using dpctl::tensor::py_internal::
-                py_accumulate_final_axis_include_initial;
             return py_accumulate_final_axis_include_initial(
                 src, dst, exec_q, depends,
                 cumsum_include_initial_strided_dispatch_table,
diff --git a/dpctl_ext/tensor/libtensor/source/boolean_advanced_indexing.cpp b/dpctl_ext/tensor/libtensor/source/boolean_advanced_indexing.cpp
index a78cb1750b81..4c46e1e2fec8 100644
--- a/dpctl_ext/tensor/libtensor/source/boolean_advanced_indexing.cpp
+++ b/dpctl_ext/tensor/libtensor/source/boolean_advanced_indexing.cpp
@@ -336,21 +336,19 @@ std::pair<sycl::event, sycl::event>
         shT masked_src_shape;
         shT ortho_src_strides;
         shT masked_src_strides;
-        dpctl::tensor::py_internal::split_iteration_space(
-            src_shape_vec, src_strides_vec, axis_start, axis_end,
-            ortho_src_shape,
-            masked_src_shape, // 4 vectors modified
-            ortho_src_strides, masked_src_strides);
+        split_iteration_space(src_shape_vec, src_strides_vec, axis_start,
+                              axis_end, ortho_src_shape,
+                              masked_src_shape, // 4 vectors modified
+                              ortho_src_strides, masked_src_strides);
 
         shT ortho_dst_shape;
         shT masked_dst_shape;
         shT ortho_dst_strides;
         shT masked_dst_strides;
-        dpctl::tensor::py_internal::split_iteration_space(
-            dst_shape_vec, dst_strides_vec, axis_start, axis_start + 1,
-            ortho_dst_shape,
-            masked_dst_shape, // 4 vectors modified
-            ortho_dst_strides, masked_dst_strides);
+        split_iteration_space(dst_shape_vec, dst_strides_vec, axis_start,
+                              axis_start + 1, ortho_dst_shape,
+                              masked_dst_shape, // 4 vectors modified
+                              ortho_dst_strides, masked_dst_strides);
 
         assert(ortho_src_shape.size() == static_cast<std::size_t>(ortho_nd));
         assert(ortho_dst_shape.size() == static_cast<std::size_t>(ortho_nd));
@@ -366,7 +364,7 @@ std::pair<sycl::event, sycl::event>
         py::ssize_t ortho_src_offset(0);
         py::ssize_t ortho_dst_offset(0);
 
-        dpctl::tensor::py_internal::simplify_iteration_space(
+        simplify_iteration_space(
             ortho_nd, _shape, ortho_src_strides, ortho_dst_strides,
             // output
             simplified_ortho_shape, simplified_ortho_src_strides,
@@ -646,21 +644,19 @@ std::pair<sycl::event, sycl::event>
         shT masked_dst_shape;
         shT ortho_dst_strides;
         shT masked_dst_strides;
-        dpctl::tensor::py_internal::split_iteration_space(
-            dst_shape_vec, dst_strides_vec, axis_start, axis_end,
-            ortho_dst_shape,
-            masked_dst_shape, // 4 vectors modified
-            ortho_dst_strides, masked_dst_strides);
+        split_iteration_space(dst_shape_vec, dst_strides_vec, axis_start,
+                              axis_end, ortho_dst_shape,
+                              masked_dst_shape, // 4 vectors modified
+                              ortho_dst_strides, masked_dst_strides);
 
         shT ortho_rhs_shape;
         shT masked_rhs_shape;
         shT ortho_rhs_strides;
         shT masked_rhs_strides;
-        dpctl::tensor::py_internal::split_iteration_space(
-            rhs_shape_vec, rhs_strides_vec, axis_start, axis_start + 1,
-            ortho_rhs_shape,
-            masked_rhs_shape, // 4 vectors modified
-            ortho_rhs_strides, masked_rhs_strides);
+        split_iteration_space(rhs_shape_vec, rhs_strides_vec, axis_start,
+                              axis_start + 1, ortho_rhs_shape,
+                              masked_rhs_shape, // 4 vectors modified
+                              ortho_rhs_strides, masked_rhs_strides);
 
         assert(ortho_dst_shape.size() == static_cast<std::size_t>(ortho_nd));
         assert(ortho_rhs_shape.size() == static_cast<std::size_t>(ortho_nd));
@@ -676,7 +672,7 @@ std::pair<sycl::event, sycl::event>
         py::ssize_t ortho_dst_offset(0);
         py::ssize_t ortho_rhs_offset(0);
 
-        dpctl::tensor::py_internal::simplify_iteration_space(
+        simplify_iteration_space(
             ortho_nd, _shape, ortho_dst_strides, ortho_rhs_strides,
             simplified_ortho_shape, simplified_ortho_dst_strides,
             simplified_ortho_rhs_strides, ortho_dst_offset, ortho_rhs_offset);
diff --git a/dpctl_ext/tensor/libtensor/source/clip.cpp b/dpctl_ext/tensor/libtensor/source/clip.cpp
index 1414689bc4b7..3e1c5e8cd262 100644
--- a/dpctl_ext/tensor/libtensor/source/clip.cpp
+++ b/dpctl_ext/tensor/libtensor/source/clip.cpp
@@ -33,18 +33,16 @@
 /// dpctl.tensor.clip
 //===---------------------------------------------------------------------===//
 
-#include <complex>
+#include <cassert>
 #include <cstddef>
-#include <cstdint>
+#include <tuple>
 #include <utility>
 #include <vector>
 
 #include <sycl/sycl.hpp>
 
 #include "dpnp4pybind11.hpp"
-#include <pybind11/complex.h>
 #include <pybind11/pybind11.h>
-#include <pybind11/stl.h>
 
 #include "clip.hpp"
 #include "kernels/clip.hpp"
@@ -218,7 +216,7 @@ std::pair<sycl::event, sycl::event>
     py::ssize_t max_offset(0);
     py::ssize_t dst_offset(0);
 
-    dpctl::tensor::py_internal::simplify_iteration_space_4(
+    simplify_iteration_space_4(
         nd, src_shape, src_strides, min_strides, max_strides, dst_strides,
         // outputs
         simplified_shape, simplified_src_strides, simplified_min_strides,
diff --git a/dpctl_ext/tensor/libtensor/source/copy_and_cast_usm_to_usm.cpp b/dpctl_ext/tensor/libtensor/source/copy_and_cast_usm_to_usm.cpp
index 3d20be02f885..9ea49ae1d88b 100644
--- a/dpctl_ext/tensor/libtensor/source/copy_and_cast_usm_to_usm.cpp
+++ b/dpctl_ext/tensor/libtensor/source/copy_and_cast_usm_to_usm.cpp
@@ -188,11 +188,10 @@ std::pair<sycl::event, sycl::event> copy_usm_ndarray_into_usm_ndarray(
     const py::ssize_t *shape = src_shape;
 
     // nd, simplified_* and *_offset are modified by reference
-    dpctl::tensor::py_internal::simplify_iteration_space(
-        nd, shape, src_strides, dst_strides,
-        // output
-        simplified_shape, simplified_src_strides, simplified_dst_strides,
-        src_offset, dst_offset);
+    simplify_iteration_space(nd, shape, src_strides, dst_strides,
+                             // output
+                             simplified_shape, simplified_src_strides,
+                             simplified_dst_strides, src_offset, dst_offset);
 
     if (nd < 2) {
         if (nd == 1) {
diff --git a/dpctl_ext/tensor/libtensor/source/copy_as_contig.cpp b/dpctl_ext/tensor/libtensor/source/copy_as_contig.cpp
index bbee24c95d4d..5d78862651fc 100644
--- a/dpctl_ext/tensor/libtensor/source/copy_as_contig.cpp
+++ b/dpctl_ext/tensor/libtensor/source/copy_as_contig.cpp
@@ -225,11 +225,11 @@ std::pair<sycl::event, sycl::event>
     int nd = src_nd;
 
     // nd, simplified_* and *_offset are modified by reference
-    dpctl::tensor::py_internal::simplify_iteration_space(
-        nd, src_shape_vec.data(), src_strides_vec, dst.get_strides_vector(),
-        // output
-        simplified_shape, simplified_src_strides, simplified_dst_strides,
-        src_offset, dst_offset);
+    simplify_iteration_space(nd, src_shape_vec.data(), src_strides_vec,
+                             dst.get_strides_vector(),
+                             // output
+                             simplified_shape, simplified_src_strides,
+                             simplified_dst_strides, src_offset, dst_offset);
 
     if (!((0 == src_offset) && (0 == dst_offset))) {
         throw std::runtime_error(
@@ -359,11 +359,11 @@ std::pair<sycl::event, sycl::event>
     int nd = src_nd;
 
     // nd, simplified_* and *_offset are modified by reference
-    dpctl::tensor::py_internal::simplify_iteration_space(
-        nd, src_shape_vec.data(), src_strides_vec, dst.get_strides_vector(),
-        // output
-        simplified_shape, simplified_src_strides, simplified_dst_strides,
-        src_offset, dst_offset);
+    simplify_iteration_space(nd, src_shape_vec.data(), src_strides_vec,
+                             dst.get_strides_vector(),
+                             // output
+                             simplified_shape, simplified_src_strides,
+                             simplified_dst_strides, src_offset, dst_offset);
 
     if (!((0 == src_offset) && (0 == dst_offset))) {
         throw std::runtime_error(
@@ -521,12 +521,11 @@ std::pair<sycl::event, sycl::event>
     int nd = static_cast<int>(batch_shape_vec.size());
 
     // nd, simplified_* and *_offset are modified by reference
-    dpctl::tensor::py_internal::simplify_iteration_space(
-        nd, batch_shape_vec.data(), src_batch_strides_vec,
-        dst_batch_strides_vec,
-        // output
-        simplified_shape, simplified_src_strides, simplified_dst_strides,
-        src_offset, dst_offset);
+    simplify_iteration_space(nd, batch_shape_vec.data(), src_batch_strides_vec,
+                             dst_batch_strides_vec,
+                             // output
+                             simplified_shape, simplified_src_strides,
+                             simplified_dst_strides, src_offset, dst_offset);
 
     if (!((0 == src_offset) && (0 == dst_offset))) {
         throw std::runtime_error(
@@ -714,12 +713,11 @@ std::pair<sycl::event, sycl::event>
     int nd = static_cast<int>(batch_shape_vec.size());
 
     // nd, simplified_* and *_offset are modified by reference
-    dpctl::tensor::py_internal::simplify_iteration_space(
-        nd, batch_shape_vec.data(), src_batch_strides_vec,
-        dst_batch_strides_vec,
-        // output
-        simplified_shape, simplified_src_strides, simplified_dst_strides,
-        src_offset, dst_offset);
+    simplify_iteration_space(nd, batch_shape_vec.data(), src_batch_strides_vec,
+                             dst_batch_strides_vec,
+                             // output
+                             simplified_shape, simplified_src_strides,
+                             simplified_dst_strides, src_offset, dst_offset);
 
     if (!((0 == src_offset) && (0 == dst_offset))) {
         throw std::runtime_error(
diff --git a/dpctl_ext/tensor/libtensor/source/copy_for_roll.cpp b/dpctl_ext/tensor/libtensor/source/copy_for_roll.cpp
index a187b2247677..7742c1c96a4e 100644
--- a/dpctl_ext/tensor/libtensor/source/copy_for_roll.cpp
+++ b/dpctl_ext/tensor/libtensor/source/copy_for_roll.cpp
@@ -197,11 +197,10 @@ std::pair<sycl::event, sycl::event>
     const py::ssize_t *shape = src_shape_ptr;
 
     // nd, simplified_* and *_offset are modified by reference
-    dpctl::tensor::py_internal::simplify_iteration_space(
-        nd, shape, src_strides, dst_strides,
-        // output
-        simplified_shape, simplified_src_strides, simplified_dst_strides,
-        src_offset, dst_offset);
+    simplify_iteration_space(nd, shape, src_strides, dst_strides,
+                             // output
+                             simplified_shape, simplified_src_strides,
+                             simplified_dst_strides, src_offset, dst_offset);
 
     if (nd == 1 && simplified_src_strides[0] == 1 &&
         simplified_dst_strides[0] == 1) {
diff --git a/dpctl_ext/tensor/libtensor/source/device_support_queries.cpp b/dpctl_ext/tensor/libtensor/source/device_support_queries.cpp
index 97a8ba83831e..3cc0952c2080 100644
--- a/dpctl_ext/tensor/libtensor/source/device_support_queries.cpp
+++ b/dpctl_ext/tensor/libtensor/source/device_support_queries.cpp
@@ -36,7 +36,7 @@
 
 #include "dpnp4pybind11.hpp"
 #include <pybind11/pybind11.h>
-#include <pybind11/stl.h>
+
 #include <sycl/sycl.hpp>
 
 namespace dpctl::tensor::py_internal
diff --git a/dpctl_ext/tensor/libtensor/source/elementwise_functions/cbrt.cpp b/dpctl_ext/tensor/libtensor/source/elementwise_functions/cbrt.cpp
new file mode 100644
index 000000000000..3b8e1e85e9ff
--- /dev/null
+++ b/dpctl_ext/tensor/libtensor/source/elementwise_functions/cbrt.cpp
@@ -0,0 +1,124 @@
+//*****************************************************************************
+// Copyright (c) 2026, Intel Corporation
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+// - Redistributions of source code must retain the above copyright notice,
+//   this list of conditions and the following disclaimer.
+// - Redistributions in binary form must reproduce the above copyright notice,
+//   this list of conditions and the following disclaimer in the documentation
+//   and/or other materials provided with the distribution.
+// - Neither the name of the copyright holder nor the names of its contributors
+//   may be used to endorse or promote products derived from this software
+//   without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+// THE POSSIBILITY OF SUCH DAMAGE.
+//*****************************************************************************
+//
+//===---------------------------------------------------------------------===//
+///
+/// \file
+/// This file defines functions of dpctl.tensor._tensor_elementwise_impl
+/// extension, specifically functions for elementwise operations.
+//===---------------------------------------------------------------------===//
+
+#include <vector>
+
+#include <sycl/sycl.hpp>
+
+#include "dpnp4pybind11.hpp"
+#include <pybind11/numpy.h>
+#include <pybind11/pybind11.h>
+#include <pybind11/stl.h>
+
+#include "cbrt.hpp"
+#include "elementwise_functions.hpp"
+#include "utils/type_dispatch.hpp"
+
+#include "kernels/elementwise_functions/cbrt.hpp"
+#include "kernels/elementwise_functions/common.hpp"
+
+namespace dpctl::tensor::py_internal
+{
+
+namespace py = pybind11;
+namespace td_ns = dpctl::tensor::type_dispatch;
+
+namespace ew_cmn_ns = dpctl::tensor::kernels::elementwise_common;
+using ew_cmn_ns::unary_contig_impl_fn_ptr_t;
+using ew_cmn_ns::unary_strided_impl_fn_ptr_t;
+
+// U37: ==== CBRT   (x)
+namespace impl
+{
+
+namespace cbrt_fn_ns = dpctl::tensor::kernels::cbrt;
+
+static unary_contig_impl_fn_ptr_t cbrt_contig_dispatch_vector[td_ns::num_types];
+static int cbrt_output_typeid_vector[td_ns::num_types];
+static unary_strided_impl_fn_ptr_t
+    cbrt_strided_dispatch_vector[td_ns::num_types];
+
+void populate_cbrt_dispatch_vectors(void)
+{
+    using namespace td_ns;
+    namespace fn_ns = cbrt_fn_ns;
+
+    using fn_ns::CbrtContigFactory;
+    DispatchVectorBuilder<unary_contig_impl_fn_ptr_t, CbrtContigFactory,
+                          num_types>
+        dvb1;
+    dvb1.populate_dispatch_vector(cbrt_contig_dispatch_vector);
+
+    using fn_ns::CbrtStridedFactory;
+    DispatchVectorBuilder<unary_strided_impl_fn_ptr_t, CbrtStridedFactory,
+                          num_types>
+        dvb2;
+    dvb2.populate_dispatch_vector(cbrt_strided_dispatch_vector);
+
+    using fn_ns::CbrtTypeMapFactory;
+    DispatchVectorBuilder<int, CbrtTypeMapFactory, num_types> dvb3;
+    dvb3.populate_dispatch_vector(cbrt_output_typeid_vector);
+};
+
+} // namespace impl
+
+void init_cbrt(py::module_ m)
+{
+    using arrayT = dpctl::tensor::usm_ndarray;
+    using event_vecT = std::vector<sycl::event>;
+    {
+        impl::populate_cbrt_dispatch_vectors();
+        using impl::cbrt_contig_dispatch_vector;
+        using impl::cbrt_output_typeid_vector;
+        using impl::cbrt_strided_dispatch_vector;
+
+        auto cbrt_pyapi = [&](const arrayT &src, const arrayT &dst,
+                              sycl::queue &exec_q,
+                              const event_vecT &depends = {}) {
+            return py_unary_ufunc(
+                src, dst, exec_q, depends, cbrt_output_typeid_vector,
+                cbrt_contig_dispatch_vector, cbrt_strided_dispatch_vector);
+        };
+        m.def("_cbrt", cbrt_pyapi, "", py::arg("src"), py::arg("dst"),
+              py::arg("sycl_queue"), py::arg("depends") = py::list());
+
+        auto cbrt_result_type_pyapi = [&](const py::dtype &dtype) {
+            return py_unary_ufunc_result_type(dtype, cbrt_output_typeid_vector);
+        };
+        m.def("_cbrt_result_type", cbrt_result_type_pyapi);
+    }
+}
+
+} // namespace dpctl::tensor::py_internal
diff --git a/dpctl_ext/tensor/libtensor/source/elementwise_functions/cbrt.hpp b/dpctl_ext/tensor/libtensor/source/elementwise_functions/cbrt.hpp
new file mode 100644
index 000000000000..0d52f48420f1
--- /dev/null
+++ b/dpctl_ext/tensor/libtensor/source/elementwise_functions/cbrt.hpp
@@ -0,0 +1,48 @@
+//*****************************************************************************
+// Copyright (c) 2026, Intel Corporation
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+// - Redistributions of source code must retain the above copyright notice,
+//   this list of conditions and the following disclaimer.
+// - Redistributions in binary form must reproduce the above copyright notice,
+//   this list of conditions and the following disclaimer in the documentation
+//   and/or other materials provided with the distribution.
+// - Neither the name of the copyright holder nor the names of its contributors
+//   may be used to endorse or promote products derived from this software
+//   without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+// THE POSSIBILITY OF SUCH DAMAGE.
+//*****************************************************************************
+//
+//===---------------------------------------------------------------------===//
+///
+/// \file
+/// This file defines functions of dpctl.tensor._tensor_elementwise_impl
+/// extension, specifically functions for elementwise operations.
+//===---------------------------------------------------------------------===//
+
+#pragma once
+#include <pybind11/pybind11.h>
+
+namespace py = pybind11;
+
+namespace py = pybind11;
+
+namespace dpctl::tensor::py_internal
+{
+
+extern void init_cbrt(py::module_ m);
+
+} // namespace dpctl::tensor::py_internal
diff --git a/dpctl_ext/tensor/libtensor/source/elementwise_functions/elementwise_common.cpp b/dpctl_ext/tensor/libtensor/source/elementwise_functions/elementwise_common.cpp
index 0a0c02f7ed31..144e39be252f 100644
--- a/dpctl_ext/tensor/libtensor/source/elementwise_functions/elementwise_common.cpp
+++ b/dpctl_ext/tensor/libtensor/source/elementwise_functions/elementwise_common.cpp
@@ -51,7 +51,7 @@
 // #include "bitwise_or.hpp"
 // #include "bitwise_right_shift.hpp"
 // #include "bitwise_xor.hpp"
-// #include "cbrt.hpp"
+#include "cbrt.hpp"
 #include "ceil.hpp"
 #include "conj.hpp"
 // #include "copysign.hpp"
@@ -59,7 +59,7 @@
 #include "cosh.hpp"
 // #include "equal.hpp"
 #include "exp.hpp"
-// #include "exp2.hpp"
+#include "exp2.hpp"
 #include "expm1.hpp"
 #include "floor.hpp"
 // #include "floor_divide.hpp"
@@ -89,23 +89,23 @@
 // #include "not_equal.hpp"
 #include "positive.hpp"
 // #include "pow.hpp"
-// #include "proj.hpp"
-// #include "real.hpp"
-// #include "reciprocal.hpp"
+#include "proj.hpp"
+#include "real.hpp"
+#include "reciprocal.hpp"
 // #include "remainder.hpp"
-// #include "round.hpp"
-// #include "rsqrt.hpp"
-// #include "sign.hpp"
-// #include "signbit.hpp"
-// #include "sin.hpp"
-// #include "sinh.hpp"
-// #include "sqrt.hpp"
-// #include "square.hpp"
+#include "round.hpp"
+#include "rsqrt.hpp"
+#include "sign.hpp"
+#include "signbit.hpp"
+#include "sin.hpp"
+#include "sinh.hpp"
+#include "sqrt.hpp"
+#include "square.hpp"
 // #include "subtract.hpp"
-// #include "tan.hpp"
-// #include "tanh.hpp"
+#include "tan.hpp"
+#include "tanh.hpp"
 // #include "true_divide.hpp"
-// #include "trunc.hpp"
+#include "trunc.hpp"
 
 namespace dpctl::tensor::py_internal
 {
@@ -131,7 +131,7 @@ void init_elementwise_functions(py::module_ m)
     // init_bitwise_or(m);
     // init_bitwise_right_shift(m);
     // init_bitwise_xor(m);
-    // init_cbrt(m);
+    init_cbrt(m);
     init_ceil(m);
     init_conj(m);
     // init_copysign(m);
@@ -140,7 +140,7 @@ void init_elementwise_functions(py::module_ m)
     // init_divide(m);
     // init_equal(m);
     init_exp(m);
-    // init_exp2(m);
+    init_exp2(m);
     init_expm1(m);
     init_floor(m);
     // init_floor_divide(m);
@@ -170,22 +170,22 @@ void init_elementwise_functions(py::module_ m)
     // init_not_equal(m);
     init_positive(m);
     // init_pow(m);
-    // init_proj(m);
-    // init_real(m);
-    // init_reciprocal(m);
+    init_proj(m);
+    init_real(m);
+    init_reciprocal(m);
     // init_remainder(m);
-    // init_round(m);
-    // init_rsqrt(m);
-    // init_sign(m);
-    // init_signbit(m);
-    // init_sin(m);
-    // init_sinh(m);
-    // init_sqrt(m);
-    // init_square(m);
+    init_round(m);
+    init_rsqrt(m);
+    init_sign(m);
+    init_signbit(m);
+    init_sin(m);
+    init_sinh(m);
+    init_sqrt(m);
+    init_square(m);
     // init_subtract(m);
-    // init_tan(m);
-    // init_tanh(m);
-    // init_trunc(m);
+    init_tan(m);
+    init_tanh(m);
+    init_trunc(m);
 }
 
 } // namespace dpctl::tensor::py_internal
diff --git a/dpctl_ext/tensor/libtensor/source/elementwise_functions/exp2.cpp b/dpctl_ext/tensor/libtensor/source/elementwise_functions/exp2.cpp
new file mode 100644
index 000000000000..5e77d25e7b0c
--- /dev/null
+++ b/dpctl_ext/tensor/libtensor/source/elementwise_functions/exp2.cpp
@@ -0,0 +1,124 @@
+//*****************************************************************************
+// Copyright (c) 2026, Intel Corporation
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+// - Redistributions of source code must retain the above copyright notice,
+//   this list of conditions and the following disclaimer.
+// - Redistributions in binary form must reproduce the above copyright notice,
+//   this list of conditions and the following disclaimer in the documentation
+//   and/or other materials provided with the distribution.
+// - Neither the name of the copyright holder nor the names of its contributors
+//   may be used to endorse or promote products derived from this software
+//   without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+// THE POSSIBILITY OF SUCH DAMAGE.
+//*****************************************************************************
+//
+//===---------------------------------------------------------------------===//
+///
+/// \file
+/// This file defines functions of dpctl.tensor._tensor_elementwise_impl
+/// extension, specifically functions for elementwise operations.
+//===---------------------------------------------------------------------===//
+
+#include <vector>
+
+#include <sycl/sycl.hpp>
+
+#include "dpnp4pybind11.hpp"
+#include <pybind11/numpy.h>
+#include <pybind11/pybind11.h>
+#include <pybind11/stl.h>
+
+#include "elementwise_functions.hpp"
+#include "exp2.hpp"
+#include "utils/type_dispatch.hpp"
+
+#include "kernels/elementwise_functions/common.hpp"
+#include "kernels/elementwise_functions/exp2.hpp"
+
+namespace dpctl::tensor::py_internal
+{
+
+namespace py = pybind11;
+namespace td_ns = dpctl::tensor::type_dispatch;
+
+namespace ew_cmn_ns = dpctl::tensor::kernels::elementwise_common;
+using ew_cmn_ns::unary_contig_impl_fn_ptr_t;
+using ew_cmn_ns::unary_strided_impl_fn_ptr_t;
+
+// U38: ==== EXP2   (x)
+namespace impl
+{
+
+namespace exp2_fn_ns = dpctl::tensor::kernels::exp2;
+
+static unary_contig_impl_fn_ptr_t exp2_contig_dispatch_vector[td_ns::num_types];
+static int exp2_output_typeid_vector[td_ns::num_types];
+static unary_strided_impl_fn_ptr_t
+    exp2_strided_dispatch_vector[td_ns::num_types];
+
+void populate_exp2_dispatch_vectors(void)
+{
+    using namespace td_ns;
+    namespace fn_ns = exp2_fn_ns;
+
+    using fn_ns::Exp2ContigFactory;
+    DispatchVectorBuilder<unary_contig_impl_fn_ptr_t, Exp2ContigFactory,
+                          num_types>
+        dvb1;
+    dvb1.populate_dispatch_vector(exp2_contig_dispatch_vector);
+
+    using fn_ns::Exp2StridedFactory;
+    DispatchVectorBuilder<unary_strided_impl_fn_ptr_t, Exp2StridedFactory,
+                          num_types>
+        dvb2;
+    dvb2.populate_dispatch_vector(exp2_strided_dispatch_vector);
+
+    using fn_ns::Exp2TypeMapFactory;
+    DispatchVectorBuilder<int, Exp2TypeMapFactory, num_types> dvb3;
+    dvb3.populate_dispatch_vector(exp2_output_typeid_vector);
+};
+
+} // namespace impl
+
+void init_exp2(py::module_ m)
+{
+    using arrayT = dpctl::tensor::usm_ndarray;
+    using event_vecT = std::vector<sycl::event>;
+    {
+        impl::populate_exp2_dispatch_vectors();
+        using impl::exp2_contig_dispatch_vector;
+        using impl::exp2_output_typeid_vector;
+        using impl::exp2_strided_dispatch_vector;
+
+        auto exp2_pyapi = [&](const arrayT &src, const arrayT &dst,
+                              sycl::queue &exec_q,
+                              const event_vecT &depends = {}) {
+            return py_unary_ufunc(
+                src, dst, exec_q, depends, exp2_output_typeid_vector,
+                exp2_contig_dispatch_vector, exp2_strided_dispatch_vector);
+        };
+        m.def("_exp2", exp2_pyapi, "", py::arg("src"), py::arg("dst"),
+              py::arg("sycl_queue"), py::arg("depends") = py::list());
+
+        auto exp2_result_type_pyapi = [&](const py::dtype &dtype) {
+            return py_unary_ufunc_result_type(dtype, exp2_output_typeid_vector);
+        };
+        m.def("_exp2_result_type", exp2_result_type_pyapi);
+    }
+}
+
+} // namespace dpctl::tensor::py_internal
diff --git a/dpctl_ext/tensor/libtensor/source/elementwise_functions/exp2.hpp b/dpctl_ext/tensor/libtensor/source/elementwise_functions/exp2.hpp
new file mode 100644
index 000000000000..f9f315d14383
--- /dev/null
+++ b/dpctl_ext/tensor/libtensor/source/elementwise_functions/exp2.hpp
@@ -0,0 +1,46 @@
+//*****************************************************************************
+// Copyright (c) 2026, Intel Corporation
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+// - Redistributions of source code must retain the above copyright notice,
+//   this list of conditions and the following disclaimer.
+// - Redistributions in binary form must reproduce the above copyright notice,
+//   this list of conditions and the following disclaimer in the documentation
+//   and/or other materials provided with the distribution.
+// - Neither the name of the copyright holder nor the names of its contributors
+//   may be used to endorse or promote products derived from this software
+//   without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+// THE POSSIBILITY OF SUCH DAMAGE.
+//*****************************************************************************
+//
+//===---------------------------------------------------------------------===//
+///
+/// \file
+/// This file defines functions of dpctl.tensor._tensor_elementwise_impl
+/// extension, specifically functions for elementwise operations.
+//===---------------------------------------------------------------------===//
+
+#pragma once
+#include <pybind11/pybind11.h>
+
+namespace py = pybind11;
+
+namespace dpctl::tensor::py_internal
+{
+
+extern void init_exp2(py::module_ m);
+
+} // namespace dpctl::tensor::py_internal
diff --git a/dpctl_ext/tensor/libtensor/source/elementwise_functions/proj.cpp b/dpctl_ext/tensor/libtensor/source/elementwise_functions/proj.cpp
new file mode 100644
index 000000000000..fddc5030f665
--- /dev/null
+++ b/dpctl_ext/tensor/libtensor/source/elementwise_functions/proj.cpp
@@ -0,0 +1,124 @@
+//*****************************************************************************
+// Copyright (c) 2026, Intel Corporation
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+// - Redistributions of source code must retain the above copyright notice,
+//   this list of conditions and the following disclaimer.
+// - Redistributions in binary form must reproduce the above copyright notice,
+//   this list of conditions and the following disclaimer in the documentation
+//   and/or other materials provided with the distribution.
+// - Neither the name of the copyright holder nor the names of its contributors
+//   may be used to endorse or promote products derived from this software
+//   without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+// THE POSSIBILITY OF SUCH DAMAGE.
+//*****************************************************************************
+//
+//===---------------------------------------------------------------------===//
+///
+/// \file
+/// This file defines functions of dpctl.tensor._tensor_elementwise_impl
+/// extension, specifically functions for elementwise operations.
+//===---------------------------------------------------------------------===//
+
+#include <vector>
+
+#include <sycl/sycl.hpp>
+
+#include "dpnp4pybind11.hpp"
+#include <pybind11/numpy.h>
+#include <pybind11/pybind11.h>
+#include <pybind11/stl.h>
+
+#include "elementwise_functions.hpp"
+#include "proj.hpp"
+#include "utils/type_dispatch.hpp"
+
+#include "kernels/elementwise_functions/common.hpp"
+#include "kernels/elementwise_functions/proj.hpp"
+
+namespace dpctl::tensor::py_internal
+{
+
+namespace py = pybind11;
+namespace td_ns = dpctl::tensor::type_dispatch;
+
+namespace ew_cmn_ns = dpctl::tensor::kernels::elementwise_common;
+using ew_cmn_ns::unary_contig_impl_fn_ptr_t;
+using ew_cmn_ns::unary_strided_impl_fn_ptr_t;
+
+// U40: ==== PROJ   (x)
+namespace impl
+{
+
+namespace proj_fn_ns = dpctl::tensor::kernels::proj;
+
+static unary_contig_impl_fn_ptr_t proj_contig_dispatch_vector[td_ns::num_types];
+static int proj_output_typeid_vector[td_ns::num_types];
+static unary_strided_impl_fn_ptr_t
+    proj_strided_dispatch_vector[td_ns::num_types];
+
+void populate_proj_dispatch_vectors(void)
+{
+    using namespace td_ns;
+    namespace fn_ns = proj_fn_ns;
+
+    using fn_ns::ProjContigFactory;
+    DispatchVectorBuilder<unary_contig_impl_fn_ptr_t, ProjContigFactory,
+                          num_types>
+        dvb1;
+    dvb1.populate_dispatch_vector(proj_contig_dispatch_vector);
+
+    using fn_ns::ProjStridedFactory;
+    DispatchVectorBuilder<unary_strided_impl_fn_ptr_t, ProjStridedFactory,
+                          num_types>
+        dvb2;
+    dvb2.populate_dispatch_vector(proj_strided_dispatch_vector);
+
+    using fn_ns::ProjTypeMapFactory;
+    DispatchVectorBuilder<int, ProjTypeMapFactory, num_types> dvb3;
+    dvb3.populate_dispatch_vector(proj_output_typeid_vector);
+};
+
+} // namespace impl
+
+void init_proj(py::module_ m)
+{
+    using arrayT = dpctl::tensor::usm_ndarray;
+    using event_vecT = std::vector<sycl::event>;
+    {
+        impl::populate_proj_dispatch_vectors();
+        using impl::proj_contig_dispatch_vector;
+        using impl::proj_output_typeid_vector;
+        using impl::proj_strided_dispatch_vector;
+
+        auto proj_pyapi = [&](const arrayT &src, const arrayT &dst,
+                              sycl::queue &exec_q,
+                              const event_vecT &depends = {}) {
+            return py_unary_ufunc(
+                src, dst, exec_q, depends, proj_output_typeid_vector,
+                proj_contig_dispatch_vector, proj_strided_dispatch_vector);
+        };
+        m.def("_proj", proj_pyapi, "", py::arg("src"), py::arg("dst"),
+              py::arg("sycl_queue"), py::arg("depends") = py::list());
+
+        auto proj_result_type_pyapi = [&](const py::dtype &dtype) {
+            return py_unary_ufunc_result_type(dtype, proj_output_typeid_vector);
+        };
+        m.def("_proj_result_type", proj_result_type_pyapi);
+    }
+}
+
+} // namespace dpctl::tensor::py_internal
diff --git a/dpctl_ext/tensor/libtensor/source/elementwise_functions/proj.hpp b/dpctl_ext/tensor/libtensor/source/elementwise_functions/proj.hpp
new file mode 100644
index 000000000000..3cdc0e8271b0
--- /dev/null
+++ b/dpctl_ext/tensor/libtensor/source/elementwise_functions/proj.hpp
@@ -0,0 +1,46 @@
+//*****************************************************************************
+// Copyright (c) 2026, Intel Corporation
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+// - Redistributions of source code must retain the above copyright notice,
+//   this list of conditions and the following disclaimer.
+// - Redistributions in binary form must reproduce the above copyright notice,
+//   this list of conditions and the following disclaimer in the documentation
+//   and/or other materials provided with the distribution.
+// - Neither the name of the copyright holder nor the names of its contributors
+//   may be used to endorse or promote products derived from this software
+//   without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+// THE POSSIBILITY OF SUCH DAMAGE.
+//*****************************************************************************
+//
+//===---------------------------------------------------------------------===//
+///
+/// \file
+/// This file defines functions of dpctl.tensor._tensor_elementwise_impl
+/// extension, specifically functions for elementwise operations.
+//===---------------------------------------------------------------------===//
+
+#pragma once
+#include <pybind11/pybind11.h>
+
+namespace py = pybind11;
+
+namespace dpctl::tensor::py_internal
+{
+
+extern void init_proj(py::module_ m);
+
+} // namespace dpctl::tensor::py_internal
diff --git a/dpctl_ext/tensor/libtensor/source/elementwise_functions/real.cpp b/dpctl_ext/tensor/libtensor/source/elementwise_functions/real.cpp
new file mode 100644
index 000000000000..de3ec82260aa
--- /dev/null
+++ b/dpctl_ext/tensor/libtensor/source/elementwise_functions/real.cpp
@@ -0,0 +1,124 @@
+//*****************************************************************************
+// Copyright (c) 2026, Intel Corporation
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+// - Redistributions of source code must retain the above copyright notice,
+//   this list of conditions and the following disclaimer.
+// - Redistributions in binary form must reproduce the above copyright notice,
+//   this list of conditions and the following disclaimer in the documentation
+//   and/or other materials provided with the distribution.
+// - Neither the name of the copyright holder nor the names of its contributors
+//   may be used to endorse or promote products derived from this software
+//   without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+// THE POSSIBILITY OF SUCH DAMAGE.
+//*****************************************************************************
+//
+//===---------------------------------------------------------------------===//
+///
+/// \file
+/// This file defines functions of dpctl.tensor._tensor_elementwise_impl
+/// extension, specifically functions for elementwise operations.
+//===---------------------------------------------------------------------===//
+
+#include <vector>
+
+#include <sycl/sycl.hpp>
+
+#include "dpnp4pybind11.hpp"
+#include <pybind11/numpy.h>
+#include <pybind11/pybind11.h>
+#include <pybind11/stl.h>
+
+#include "elementwise_functions.hpp"
+#include "real.hpp"
+#include "utils/type_dispatch.hpp"
+
+#include "kernels/elementwise_functions/common.hpp"
+#include "kernels/elementwise_functions/real.hpp"
+
+namespace dpctl::tensor::py_internal
+{
+
+namespace py = pybind11;
+namespace td_ns = dpctl::tensor::type_dispatch;
+
+namespace ew_cmn_ns = dpctl::tensor::kernels::elementwise_common;
+using ew_cmn_ns::unary_contig_impl_fn_ptr_t;
+using ew_cmn_ns::unary_strided_impl_fn_ptr_t;
+
+// U27: ==== REAL   (x)
+namespace impl
+{
+
+namespace real_fn_ns = dpctl::tensor::kernels::real;
+
+static unary_contig_impl_fn_ptr_t real_contig_dispatch_vector[td_ns::num_types];
+static int real_output_typeid_vector[td_ns::num_types];
+static unary_strided_impl_fn_ptr_t
+    real_strided_dispatch_vector[td_ns::num_types];
+
+void populate_real_dispatch_vectors(void)
+{
+    using namespace td_ns;
+    namespace fn_ns = real_fn_ns;
+
+    using fn_ns::RealContigFactory;
+    DispatchVectorBuilder<unary_contig_impl_fn_ptr_t, RealContigFactory,
+                          num_types>
+        dvb1;
+    dvb1.populate_dispatch_vector(real_contig_dispatch_vector);
+
+    using fn_ns::RealStridedFactory;
+    DispatchVectorBuilder<unary_strided_impl_fn_ptr_t, RealStridedFactory,
+                          num_types>
+        dvb2;
+    dvb2.populate_dispatch_vector(real_strided_dispatch_vector);
+
+    using fn_ns::RealTypeMapFactory;
+    DispatchVectorBuilder<int, RealTypeMapFactory, num_types> dvb3;
+    dvb3.populate_dispatch_vector(real_output_typeid_vector);
+};
+
+} // namespace impl
+
+void init_real(py::module_ m)
+{
+    using arrayT = dpctl::tensor::usm_ndarray;
+    using event_vecT = std::vector<sycl::event>;
+    {
+        impl::populate_real_dispatch_vectors();
+        using impl::real_contig_dispatch_vector;
+        using impl::real_output_typeid_vector;
+        using impl::real_strided_dispatch_vector;
+
+        auto real_pyapi = [&](const arrayT &src, const arrayT &dst,
+                              sycl::queue &exec_q,
+                              const event_vecT &depends = {}) {
+            return py_unary_ufunc(
+                src, dst, exec_q, depends, real_output_typeid_vector,
+                real_contig_dispatch_vector, real_strided_dispatch_vector);
+        };
+        m.def("_real", real_pyapi, "", py::arg("src"), py::arg("dst"),
+              py::arg("sycl_queue"), py::arg("depends") = py::list());
+
+        auto real_result_type_pyapi = [&](const py::dtype &dtype) {
+            return py_unary_ufunc_result_type(dtype, real_output_typeid_vector);
+        };
+        m.def("_real_result_type", real_result_type_pyapi);
+    }
+}
+
+} // namespace dpctl::tensor::py_internal
diff --git a/dpctl_ext/tensor/libtensor/source/elementwise_functions/real.hpp b/dpctl_ext/tensor/libtensor/source/elementwise_functions/real.hpp
new file mode 100644
index 000000000000..81f4743e823b
--- /dev/null
+++ b/dpctl_ext/tensor/libtensor/source/elementwise_functions/real.hpp
@@ -0,0 +1,46 @@
+//*****************************************************************************
+// Copyright (c) 2026, Intel Corporation
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+// - Redistributions of source code must retain the above copyright notice,
+//   this list of conditions and the following disclaimer.
+// - Redistributions in binary form must reproduce the above copyright notice,
+//   this list of conditions and the following disclaimer in the documentation
+//   and/or other materials provided with the distribution.
+// - Neither the name of the copyright holder nor the names of its contributors
+//   may be used to endorse or promote products derived from this software
+//   without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+// THE POSSIBILITY OF SUCH DAMAGE.
+//*****************************************************************************
+//
+//===---------------------------------------------------------------------===//
+///
+/// \file
+/// This file defines functions of dpctl.tensor._tensor_elementwise_impl
+/// extension, specifically functions for elementwise operations.
+//===---------------------------------------------------------------------===//
+
+#pragma once
+#include <pybind11/pybind11.h>
+
+namespace py = pybind11;
+
+namespace dpctl::tensor::py_internal
+{
+
+extern void init_real(py::module_ m);
+
+} // namespace dpctl::tensor::py_internal
diff --git a/dpctl_ext/tensor/libtensor/source/elementwise_functions/reciprocal.cpp b/dpctl_ext/tensor/libtensor/source/elementwise_functions/reciprocal.cpp
new file mode 100644
index 000000000000..81ce427ade92
--- /dev/null
+++ b/dpctl_ext/tensor/libtensor/source/elementwise_functions/reciprocal.cpp
@@ -0,0 +1,128 @@
+//*****************************************************************************
+// Copyright (c) 2026, Intel Corporation
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+// - Redistributions of source code must retain the above copyright notice,
+//   this list of conditions and the following disclaimer.
+// - Redistributions in binary form must reproduce the above copyright notice,
+//   this list of conditions and the following disclaimer in the documentation
+//   and/or other materials provided with the distribution.
+// - Neither the name of the copyright holder nor the names of its contributors
+//   may be used to endorse or promote products derived from this software
+//   without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+// THE POSSIBILITY OF SUCH DAMAGE.
+//*****************************************************************************
+//
+//===---------------------------------------------------------------------===//
+///
+/// \file
+/// This file defines functions of dpctl.tensor._tensor_elementwise_impl
+/// extension, specifically functions for elementwise operations.
+//===---------------------------------------------------------------------===//
+
+#include <vector>
+
+#include <sycl/sycl.hpp>
+
+#include "dpnp4pybind11.hpp"
+#include <pybind11/numpy.h>
+#include <pybind11/pybind11.h>
+#include <pybind11/stl.h>
+
+#include "elementwise_functions.hpp"
+#include "reciprocal.hpp"
+#include "utils/type_dispatch.hpp"
+
+#include "kernels/elementwise_functions/common.hpp"
+#include "kernels/elementwise_functions/reciprocal.hpp"
+
+namespace dpctl::tensor::py_internal
+{
+
+namespace py = pybind11;
+namespace td_ns = dpctl::tensor::type_dispatch;
+
+namespace ew_cmn_ns = dpctl::tensor::kernels::elementwise_common;
+using ew_cmn_ns::unary_contig_impl_fn_ptr_t;
+using ew_cmn_ns::unary_strided_impl_fn_ptr_t;
+
+// U42: ==== REAL   (x)
+namespace impl
+{
+
+namespace reciprocal_fn_ns = dpctl::tensor::kernels::reciprocal;
+
+static unary_contig_impl_fn_ptr_t
+    reciprocal_contig_dispatch_vector[td_ns::num_types];
+static int reciprocal_output_typeid_vector[td_ns::num_types];
+static unary_strided_impl_fn_ptr_t
+    reciprocal_strided_dispatch_vector[td_ns::num_types];
+
+void populate_reciprocal_dispatch_vectors(void)
+{
+    using namespace td_ns;
+    namespace fn_ns = reciprocal_fn_ns;
+
+    using fn_ns::ReciprocalContigFactory;
+    DispatchVectorBuilder<unary_contig_impl_fn_ptr_t, ReciprocalContigFactory,
+                          num_types>
+        dvb1;
+    dvb1.populate_dispatch_vector(reciprocal_contig_dispatch_vector);
+
+    using fn_ns::ReciprocalStridedFactory;
+    DispatchVectorBuilder<unary_strided_impl_fn_ptr_t, ReciprocalStridedFactory,
+                          num_types>
+        dvb2;
+    dvb2.populate_dispatch_vector(reciprocal_strided_dispatch_vector);
+
+    using fn_ns::ReciprocalTypeMapFactory;
+    DispatchVectorBuilder<int, ReciprocalTypeMapFactory, num_types> dvb3;
+    dvb3.populate_dispatch_vector(reciprocal_output_typeid_vector);
+};
+
+} // namespace impl
+
+void init_reciprocal(py::module_ m)
+{
+    using arrayT = dpctl::tensor::usm_ndarray;
+    using event_vecT = std::vector<sycl::event>;
+    {
+        impl::populate_reciprocal_dispatch_vectors();
+        using impl::reciprocal_contig_dispatch_vector;
+        using impl::reciprocal_output_typeid_vector;
+        using impl::reciprocal_strided_dispatch_vector;
+
+        auto reciprocal_pyapi = [&](const arrayT &src, const arrayT &dst,
+                                    sycl::queue &exec_q,
+                                    const event_vecT &depends = {}) {
+            return py_unary_ufunc(src, dst, exec_q, depends,
+                                  reciprocal_output_typeid_vector,
+                                  reciprocal_contig_dispatch_vector,
+                                  reciprocal_strided_dispatch_vector);
+        };
+        m.def("_reciprocal", reciprocal_pyapi, "", py::arg("src"),
+              py::arg("dst"), py::arg("sycl_queue"),
+              py::arg("depends") = py::list());
+
+        auto reciprocal_result_type_pyapi = [&](const py::dtype &dtype) {
+            return py_unary_ufunc_result_type(dtype,
+                                              reciprocal_output_typeid_vector);
+        };
+        m.def("_reciprocal_result_type", reciprocal_result_type_pyapi);
+    }
+}
+
+} // namespace dpctl::tensor::py_internal
diff --git a/dpctl_ext/tensor/libtensor/source/elementwise_functions/reciprocal.hpp b/dpctl_ext/tensor/libtensor/source/elementwise_functions/reciprocal.hpp
new file mode 100644
index 000000000000..1d2156f3464e
--- /dev/null
+++ b/dpctl_ext/tensor/libtensor/source/elementwise_functions/reciprocal.hpp
@@ -0,0 +1,46 @@
+//*****************************************************************************
+// Copyright (c) 2026, Intel Corporation
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+// - Redistributions of source code must retain the above copyright notice,
+//   this list of conditions and the following disclaimer.
+// - Redistributions in binary form must reproduce the above copyright notice,
+//   this list of conditions and the following disclaimer in the documentation
+//   and/or other materials provided with the distribution.
+// - Neither the name of the copyright holder nor the names of its contributors
+//   may be used to endorse or promote products derived from this software
+//   without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+// THE POSSIBILITY OF SUCH DAMAGE.
+//*****************************************************************************
+//
+//===---------------------------------------------------------------------===//
+///
+/// \file
+/// This file defines functions of dpctl.tensor._tensor_elementwise_impl
+/// extension, specifically functions for elementwise operations.
+//===---------------------------------------------------------------------===//
+
+#pragma once
+#include <pybind11/pybind11.h>
+
+namespace py = pybind11;
+
+namespace dpctl::tensor::py_internal
+{
+
+extern void init_reciprocal(py::module_ m);
+
+} // namespace dpctl::tensor::py_internal
diff --git a/dpctl_ext/tensor/libtensor/source/elementwise_functions/round.cpp b/dpctl_ext/tensor/libtensor/source/elementwise_functions/round.cpp
new file mode 100644
index 000000000000..d906ecdf07af
--- /dev/null
+++ b/dpctl_ext/tensor/libtensor/source/elementwise_functions/round.cpp
@@ -0,0 +1,125 @@
+//*****************************************************************************
+// Copyright (c) 2026, Intel Corporation
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+// - Redistributions of source code must retain the above copyright notice,
+//   this list of conditions and the following disclaimer.
+// - Redistributions in binary form must reproduce the above copyright notice,
+//   this list of conditions and the following disclaimer in the documentation
+//   and/or other materials provided with the distribution.
+// - Neither the name of the copyright holder nor the names of its contributors
+//   may be used to endorse or promote products derived from this software
+//   without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+// THE POSSIBILITY OF SUCH DAMAGE.
+//*****************************************************************************
+//
+//===---------------------------------------------------------------------===//
+///
+/// \file
+/// This file defines functions of dpctl.tensor._tensor_elementwise_impl
+/// extension, specifically functions for elementwise operations.
+//===---------------------------------------------------------------------===//
+
+#include <vector>
+
+#include <sycl/sycl.hpp>
+
+#include "dpnp4pybind11.hpp"
+#include <pybind11/numpy.h>
+#include <pybind11/pybind11.h>
+#include <pybind11/stl.h>
+
+#include "elementwise_functions.hpp"
+#include "round.hpp"
+#include "utils/type_dispatch.hpp"
+
+#include "kernels/elementwise_functions/common.hpp"
+#include "kernels/elementwise_functions/round.hpp"
+
+namespace dpctl::tensor::py_internal
+{
+namespace py = pybind11;
+namespace td_ns = dpctl::tensor::type_dispatch;
+
+namespace ew_cmn_ns = dpctl::tensor::kernels::elementwise_common;
+using ew_cmn_ns::unary_contig_impl_fn_ptr_t;
+using ew_cmn_ns::unary_strided_impl_fn_ptr_t;
+
+// U28: ==== ROUND   (x)
+namespace impl
+{
+
+namespace round_fn_ns = dpctl::tensor::kernels::round;
+
+static unary_contig_impl_fn_ptr_t
+    round_contig_dispatch_vector[td_ns::num_types];
+static int round_output_typeid_vector[td_ns::num_types];
+static unary_strided_impl_fn_ptr_t
+    round_strided_dispatch_vector[td_ns::num_types];
+
+void populate_round_dispatch_vectors(void)
+{
+    using namespace td_ns;
+    namespace fn_ns = round_fn_ns;
+
+    using fn_ns::RoundContigFactory;
+    DispatchVectorBuilder<unary_contig_impl_fn_ptr_t, RoundContigFactory,
+                          num_types>
+        dvb1;
+    dvb1.populate_dispatch_vector(round_contig_dispatch_vector);
+
+    using fn_ns::RoundStridedFactory;
+    DispatchVectorBuilder<unary_strided_impl_fn_ptr_t, RoundStridedFactory,
+                          num_types>
+        dvb2;
+    dvb2.populate_dispatch_vector(round_strided_dispatch_vector);
+
+    using fn_ns::RoundTypeMapFactory;
+    DispatchVectorBuilder<int, RoundTypeMapFactory, num_types> dvb3;
+    dvb3.populate_dispatch_vector(round_output_typeid_vector);
+};
+
+} // namespace impl
+
+void init_round(py::module_ m)
+{
+    using arrayT = dpctl::tensor::usm_ndarray;
+    using event_vecT = std::vector<sycl::event>;
+    {
+        impl::populate_round_dispatch_vectors();
+        using impl::round_contig_dispatch_vector;
+        using impl::round_output_typeid_vector;
+        using impl::round_strided_dispatch_vector;
+
+        auto round_pyapi = [&](const arrayT &src, const arrayT &dst,
+                               sycl::queue &exec_q,
+                               const event_vecT &depends = {}) {
+            return py_unary_ufunc(
+                src, dst, exec_q, depends, round_output_typeid_vector,
+                round_contig_dispatch_vector, round_strided_dispatch_vector);
+        };
+        m.def("_round", round_pyapi, "", py::arg("src"), py::arg("dst"),
+              py::arg("sycl_queue"), py::arg("depends") = py::list());
+
+        auto round_result_type_pyapi = [&](const py::dtype &dtype) {
+            return py_unary_ufunc_result_type(dtype,
+                                              round_output_typeid_vector);
+        };
+        m.def("_round_result_type", round_result_type_pyapi);
+    }
+}
+
+} // namespace dpctl::tensor::py_internal
diff --git a/dpctl_ext/tensor/libtensor/source/elementwise_functions/round.hpp b/dpctl_ext/tensor/libtensor/source/elementwise_functions/round.hpp
new file mode 100644
index 000000000000..ca56e110eec5
--- /dev/null
+++ b/dpctl_ext/tensor/libtensor/source/elementwise_functions/round.hpp
@@ -0,0 +1,46 @@
+//*****************************************************************************
+// Copyright (c) 2026, Intel Corporation
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+// - Redistributions of source code must retain the above copyright notice,
+//   this list of conditions and the following disclaimer.
+// - Redistributions in binary form must reproduce the above copyright notice,
+//   this list of conditions and the following disclaimer in the documentation
+//   and/or other materials provided with the distribution.
+// - Neither the name of the copyright holder nor the names of its contributors
+//   may be used to endorse or promote products derived from this software
+//   without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+// THE POSSIBILITY OF SUCH DAMAGE.
+//*****************************************************************************
+//
+//===---------------------------------------------------------------------===//
+///
+/// \file
+/// This file defines functions of dpctl.tensor._tensor_elementwise_impl
+/// extension, specifically functions for elementwise operations.
+//===---------------------------------------------------------------------===//
+
+#pragma once
+#include <pybind11/pybind11.h>
+
+namespace py = pybind11;
+
+namespace dpctl::tensor::py_internal
+{
+
+extern void init_round(py::module_ m);
+
+} // namespace dpctl::tensor::py_internal
diff --git a/dpctl_ext/tensor/libtensor/source/elementwise_functions/rsqrt.cpp b/dpctl_ext/tensor/libtensor/source/elementwise_functions/rsqrt.cpp
new file mode 100644
index 000000000000..61a5c8bb94d5
--- /dev/null
+++ b/dpctl_ext/tensor/libtensor/source/elementwise_functions/rsqrt.cpp
@@ -0,0 +1,126 @@
+//*****************************************************************************
+// Copyright (c) 2026, Intel Corporation
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+// - Redistributions of source code must retain the above copyright notice,
+//   this list of conditions and the following disclaimer.
+// - Redistributions in binary form must reproduce the above copyright notice,
+//   this list of conditions and the following disclaimer in the documentation
+//   and/or other materials provided with the distribution.
+// - Neither the name of the copyright holder nor the names of its contributors
+//   may be used to endorse or promote products derived from this software
+//   without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+// THE POSSIBILITY OF SUCH DAMAGE.
+//*****************************************************************************
+//
+//===---------------------------------------------------------------------===//
+///
+/// \file
+/// This file defines functions of dpctl.tensor._tensor_elementwise_impl
+/// extension, specifically functions for elementwise operations.
+//===---------------------------------------------------------------------===//
+
+#include <vector>
+
+#include <sycl/sycl.hpp>
+
+#include "dpnp4pybind11.hpp"
+#include <pybind11/numpy.h>
+#include <pybind11/pybind11.h>
+#include <pybind11/stl.h>
+
+#include "elementwise_functions.hpp"
+#include "rsqrt.hpp"
+#include "utils/type_dispatch.hpp"
+
+#include "kernels/elementwise_functions/common.hpp"
+#include "kernels/elementwise_functions/rsqrt.hpp"
+
+namespace dpctl::tensor::py_internal
+{
+
+namespace py = pybind11;
+namespace td_ns = dpctl::tensor::type_dispatch;
+
+namespace ew_cmn_ns = dpctl::tensor::kernels::elementwise_common;
+using ew_cmn_ns::unary_contig_impl_fn_ptr_t;
+using ew_cmn_ns::unary_strided_impl_fn_ptr_t;
+
+// U39: ==== RSQRT   (x)
+namespace impl
+{
+
+namespace rsqrt_fn_ns = dpctl::tensor::kernels::rsqrt;
+
+static unary_contig_impl_fn_ptr_t
+    rsqrt_contig_dispatch_vector[td_ns::num_types];
+static int rsqrt_output_typeid_vector[td_ns::num_types];
+static unary_strided_impl_fn_ptr_t
+    rsqrt_strided_dispatch_vector[td_ns::num_types];
+
+void populate_rsqrt_dispatch_vectors(void)
+{
+    using namespace td_ns;
+    namespace fn_ns = rsqrt_fn_ns;
+
+    using fn_ns::RsqrtContigFactory;
+    DispatchVectorBuilder<unary_contig_impl_fn_ptr_t, RsqrtContigFactory,
+                          num_types>
+        dvb1;
+    dvb1.populate_dispatch_vector(rsqrt_contig_dispatch_vector);
+
+    using fn_ns::RsqrtStridedFactory;
+    DispatchVectorBuilder<unary_strided_impl_fn_ptr_t, RsqrtStridedFactory,
+                          num_types>
+        dvb2;
+    dvb2.populate_dispatch_vector(rsqrt_strided_dispatch_vector);
+
+    using fn_ns::RsqrtTypeMapFactory;
+    DispatchVectorBuilder<int, RsqrtTypeMapFactory, num_types> dvb3;
+    dvb3.populate_dispatch_vector(rsqrt_output_typeid_vector);
+};
+
+} // namespace impl
+
+void init_rsqrt(py::module_ m)
+{
+    using arrayT = dpctl::tensor::usm_ndarray;
+    using event_vecT = std::vector<sycl::event>;
+    {
+        impl::populate_rsqrt_dispatch_vectors();
+        using impl::rsqrt_contig_dispatch_vector;
+        using impl::rsqrt_output_typeid_vector;
+        using impl::rsqrt_strided_dispatch_vector;
+
+        auto rsqrt_pyapi = [&](const arrayT &src, const arrayT &dst,
+                               sycl::queue &exec_q,
+                               const event_vecT &depends = {}) {
+            return py_unary_ufunc(
+                src, dst, exec_q, depends, rsqrt_output_typeid_vector,
+                rsqrt_contig_dispatch_vector, rsqrt_strided_dispatch_vector);
+        };
+        m.def("_rsqrt", rsqrt_pyapi, "", py::arg("src"), py::arg("dst"),
+              py::arg("sycl_queue"), py::arg("depends") = py::list());
+
+        auto rsqrt_result_type_pyapi = [&](const py::dtype &dtype) {
+            return py_unary_ufunc_result_type(dtype,
+                                              rsqrt_output_typeid_vector);
+        };
+        m.def("_rsqrt_result_type", rsqrt_result_type_pyapi);
+    }
+}
+
+} // namespace dpctl::tensor::py_internal
diff --git a/dpctl_ext/tensor/libtensor/source/elementwise_functions/rsqrt.hpp b/dpctl_ext/tensor/libtensor/source/elementwise_functions/rsqrt.hpp
new file mode 100644
index 000000000000..4ba740a31777
--- /dev/null
+++ b/dpctl_ext/tensor/libtensor/source/elementwise_functions/rsqrt.hpp
@@ -0,0 +1,46 @@
+//*****************************************************************************
+// Copyright (c) 2026, Intel Corporation
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+// - Redistributions of source code must retain the above copyright notice,
+//   this list of conditions and the following disclaimer.
+// - Redistributions in binary form must reproduce the above copyright notice,
+//   this list of conditions and the following disclaimer in the documentation
+//   and/or other materials provided with the distribution.
+// - Neither the name of the copyright holder nor the names of its contributors
+//   may be used to endorse or promote products derived from this software
+//   without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+// THE POSSIBILITY OF SUCH DAMAGE.
+//*****************************************************************************
+//
+//===---------------------------------------------------------------------===//
+///
+/// \file
+/// This file defines functions of dpctl.tensor._tensor_elementwise_impl
+/// extension, specifically functions for elementwise operations.
+//===---------------------------------------------------------------------===//
+
+#pragma once
+#include <pybind11/pybind11.h>
+
+namespace py = pybind11;
+
+namespace dpctl::tensor::py_internal
+{
+
+extern void init_rsqrt(py::module_ m);
+
+} // namespace dpctl::tensor::py_internal
diff --git a/dpctl_ext/tensor/libtensor/source/elementwise_functions/sign.cpp b/dpctl_ext/tensor/libtensor/source/elementwise_functions/sign.cpp
new file mode 100644
index 000000000000..deb5360bcdd1
--- /dev/null
+++ b/dpctl_ext/tensor/libtensor/source/elementwise_functions/sign.cpp
@@ -0,0 +1,124 @@
+//*****************************************************************************
+// Copyright (c) 2026, Intel Corporation
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+// - Redistributions of source code must retain the above copyright notice,
+//   this list of conditions and the following disclaimer.
+// - Redistributions in binary form must reproduce the above copyright notice,
+//   this list of conditions and the following disclaimer in the documentation
+//   and/or other materials provided with the distribution.
+// - Neither the name of the copyright holder nor the names of its contributors
+//   may be used to endorse or promote products derived from this software
+//   without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+// THE POSSIBILITY OF SUCH DAMAGE.
+//*****************************************************************************
+//
+//===---------------------------------------------------------------------===//
+///
+/// \file
+/// This file defines functions of dpctl.tensor._tensor_elementwise_impl
+/// extension, specifically functions for elementwise operations.
+//===---------------------------------------------------------------------===//
+
+#include <vector>
+
+#include <sycl/sycl.hpp>
+
+#include "dpnp4pybind11.hpp"
+#include <pybind11/numpy.h>
+#include <pybind11/pybind11.h>
+#include <pybind11/stl.h>
+
+#include "elementwise_functions.hpp"
+#include "sign.hpp"
+#include "utils/type_dispatch.hpp"
+
+#include "kernels/elementwise_functions/common.hpp"
+#include "kernels/elementwise_functions/sign.hpp"
+
+namespace dpctl::tensor::py_internal
+{
+
+namespace py = pybind11;
+namespace td_ns = dpctl::tensor::type_dispatch;
+
+namespace ew_cmn_ns = dpctl::tensor::kernels::elementwise_common;
+using ew_cmn_ns::unary_contig_impl_fn_ptr_t;
+using ew_cmn_ns::unary_strided_impl_fn_ptr_t;
+
+// U29: ==== SIGN   (x)
+namespace impl
+{
+
+namespace sign_fn_ns = dpctl::tensor::kernels::sign;
+
+static unary_contig_impl_fn_ptr_t sign_contig_dispatch_vector[td_ns::num_types];
+static int sign_output_typeid_vector[td_ns::num_types];
+static unary_strided_impl_fn_ptr_t
+    sign_strided_dispatch_vector[td_ns::num_types];
+
+void populate_sign_dispatch_vectors(void)
+{
+    using namespace td_ns;
+    namespace fn_ns = sign_fn_ns;
+
+    using fn_ns::SignContigFactory;
+    DispatchVectorBuilder<unary_contig_impl_fn_ptr_t, SignContigFactory,
+                          num_types>
+        dvb1;
+    dvb1.populate_dispatch_vector(sign_contig_dispatch_vector);
+
+    using fn_ns::SignStridedFactory;
+    DispatchVectorBuilder<unary_strided_impl_fn_ptr_t, SignStridedFactory,
+                          num_types>
+        dvb2;
+    dvb2.populate_dispatch_vector(sign_strided_dispatch_vector);
+
+    using fn_ns::SignTypeMapFactory;
+    DispatchVectorBuilder<int, SignTypeMapFactory, num_types> dvb3;
+    dvb3.populate_dispatch_vector(sign_output_typeid_vector);
+};
+
+} // namespace impl
+
+void init_sign(py::module_ m)
+{
+    using arrayT = dpctl::tensor::usm_ndarray;
+    using event_vecT = std::vector<sycl::event>;
+    {
+        impl::populate_sign_dispatch_vectors();
+        using impl::sign_contig_dispatch_vector;
+        using impl::sign_output_typeid_vector;
+        using impl::sign_strided_dispatch_vector;
+
+        auto sign_pyapi = [&](const arrayT &src, const arrayT &dst,
+                              sycl::queue &exec_q,
+                              const event_vecT &depends = {}) {
+            return py_unary_ufunc(
+                src, dst, exec_q, depends, sign_output_typeid_vector,
+                sign_contig_dispatch_vector, sign_strided_dispatch_vector);
+        };
+        m.def("_sign", sign_pyapi, "", py::arg("src"), py::arg("dst"),
+              py::arg("sycl_queue"), py::arg("depends") = py::list());
+
+        auto sign_result_type_pyapi = [&](const py::dtype &dtype) {
+            return py_unary_ufunc_result_type(dtype, sign_output_typeid_vector);
+        };
+        m.def("_sign_result_type", sign_result_type_pyapi);
+    }
+}
+
+} // namespace dpctl::tensor::py_internal
diff --git a/dpctl_ext/tensor/libtensor/source/elementwise_functions/sign.hpp b/dpctl_ext/tensor/libtensor/source/elementwise_functions/sign.hpp
new file mode 100644
index 000000000000..19686ada3dbf
--- /dev/null
+++ b/dpctl_ext/tensor/libtensor/source/elementwise_functions/sign.hpp
@@ -0,0 +1,46 @@
+//*****************************************************************************
+// Copyright (c) 2026, Intel Corporation
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+// - Redistributions of source code must retain the above copyright notice,
+//   this list of conditions and the following disclaimer.
+// - Redistributions in binary form must reproduce the above copyright notice,
+//   this list of conditions and the following disclaimer in the documentation
+//   and/or other materials provided with the distribution.
+// - Neither the name of the copyright holder nor the names of its contributors
+//   may be used to endorse or promote products derived from this software
+//   without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+// THE POSSIBILITY OF SUCH DAMAGE.
+//*****************************************************************************
+//
+//===---------------------------------------------------------------------===//
+///
+/// \file
+/// This file defines functions of dpctl.tensor._tensor_elementwise_impl
+/// extension, specifically functions for elementwise operations.
+//===---------------------------------------------------------------------===//
+
+#pragma once
+#include <pybind11/pybind11.h>
+
+namespace py = pybind11;
+
+namespace dpctl::tensor::py_internal
+{
+
+extern void init_sign(py::module_ m);
+
+} // namespace dpctl::tensor::py_internal
diff --git a/dpctl_ext/tensor/libtensor/source/elementwise_functions/signbit.cpp b/dpctl_ext/tensor/libtensor/source/elementwise_functions/signbit.cpp
new file mode 100644
index 000000000000..3ed9eba46ea1
--- /dev/null
+++ b/dpctl_ext/tensor/libtensor/source/elementwise_functions/signbit.cpp
@@ -0,0 +1,127 @@
+//*****************************************************************************
+// Copyright (c) 2026, Intel Corporation
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+// - Redistributions of source code must retain the above copyright notice,
+//   this list of conditions and the following disclaimer.
+// - Redistributions in binary form must reproduce the above copyright notice,
+//   this list of conditions and the following disclaimer in the documentation
+//   and/or other materials provided with the distribution.
+// - Neither the name of the copyright holder nor the names of its contributors
+//   may be used to endorse or promote products derived from this software
+//   without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+// THE POSSIBILITY OF SUCH DAMAGE.
+//*****************************************************************************
+//
+//===---------------------------------------------------------------------===//
+///
+/// \file
+/// This file defines functions of dpctl.tensor._tensor_elementwise_impl
+/// extension, specifically functions for elementwise operations.
+//===---------------------------------------------------------------------===//
+
+#include <vector>
+
+#include <sycl/sycl.hpp>
+
+#include "dpnp4pybind11.hpp"
+#include <pybind11/numpy.h>
+#include <pybind11/pybind11.h>
+#include <pybind11/stl.h>
+
+#include "elementwise_functions.hpp"
+#include "signbit.hpp"
+#include "utils/type_dispatch.hpp"
+
+#include "kernels/elementwise_functions/common.hpp"
+#include "kernels/elementwise_functions/signbit.hpp"
+
+namespace dpctl::tensor::py_internal
+{
+
+namespace py = pybind11;
+namespace td_ns = dpctl::tensor::type_dispatch;
+
+namespace ew_cmn_ns = dpctl::tensor::kernels::elementwise_common;
+using ew_cmn_ns::unary_contig_impl_fn_ptr_t;
+using ew_cmn_ns::unary_strided_impl_fn_ptr_t;
+
+// U41: ==== SIGNBIT   (x)
+namespace impl
+{
+
+namespace signbit_fn_ns = dpctl::tensor::kernels::signbit;
+
+static unary_contig_impl_fn_ptr_t
+    signbit_contig_dispatch_vector[td_ns::num_types];
+static int signbit_output_typeid_vector[td_ns::num_types];
+static unary_strided_impl_fn_ptr_t
+    signbit_strided_dispatch_vector[td_ns::num_types];
+
+void populate_signbit_dispatch_vectors(void)
+{
+    using namespace td_ns;
+    namespace fn_ns = signbit_fn_ns;
+
+    using fn_ns::SignbitContigFactory;
+    DispatchVectorBuilder<unary_contig_impl_fn_ptr_t, SignbitContigFactory,
+                          num_types>
+        dvb1;
+    dvb1.populate_dispatch_vector(signbit_contig_dispatch_vector);
+
+    using fn_ns::SignbitStridedFactory;
+    DispatchVectorBuilder<unary_strided_impl_fn_ptr_t, SignbitStridedFactory,
+                          num_types>
+        dvb2;
+    dvb2.populate_dispatch_vector(signbit_strided_dispatch_vector);
+
+    using fn_ns::SignbitTypeMapFactory;
+    DispatchVectorBuilder<int, SignbitTypeMapFactory, num_types> dvb3;
+    dvb3.populate_dispatch_vector(signbit_output_typeid_vector);
+};
+
+} // namespace impl
+
+void init_signbit(py::module_ m)
+{
+    using arrayT = dpctl::tensor::usm_ndarray;
+    using event_vecT = std::vector<sycl::event>;
+    {
+        impl::populate_signbit_dispatch_vectors();
+        using impl::signbit_contig_dispatch_vector;
+        using impl::signbit_output_typeid_vector;
+        using impl::signbit_strided_dispatch_vector;
+
+        auto signbit_pyapi = [&](const arrayT &src, const arrayT &dst,
+                                 sycl::queue &exec_q,
+                                 const event_vecT &depends = {}) {
+            return py_unary_ufunc(src, dst, exec_q, depends,
+                                  signbit_output_typeid_vector,
+                                  signbit_contig_dispatch_vector,
+                                  signbit_strided_dispatch_vector);
+        };
+        m.def("_signbit", signbit_pyapi, "", py::arg("src"), py::arg("dst"),
+              py::arg("sycl_queue"), py::arg("depends") = py::list());
+
+        auto signbit_result_type_pyapi = [&](const py::dtype &dtype) {
+            return py_unary_ufunc_result_type(dtype,
+                                              signbit_output_typeid_vector);
+        };
+        m.def("_signbit_result_type", signbit_result_type_pyapi);
+    }
+}
+
+} // namespace dpctl::tensor::py_internal
diff --git a/dpctl_ext/tensor/libtensor/source/elementwise_functions/signbit.hpp b/dpctl_ext/tensor/libtensor/source/elementwise_functions/signbit.hpp
new file mode 100644
index 000000000000..292386b174fc
--- /dev/null
+++ b/dpctl_ext/tensor/libtensor/source/elementwise_functions/signbit.hpp
@@ -0,0 +1,46 @@
+//*****************************************************************************
+// Copyright (c) 2026, Intel Corporation
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+// - Redistributions of source code must retain the above copyright notice,
+//   this list of conditions and the following disclaimer.
+// - Redistributions in binary form must reproduce the above copyright notice,
+//   this list of conditions and the following disclaimer in the documentation
+//   and/or other materials provided with the distribution.
+// - Neither the name of the copyright holder nor the names of its contributors
+//   may be used to endorse or promote products derived from this software
+//   without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+// THE POSSIBILITY OF SUCH DAMAGE.
+//*****************************************************************************
+//
+//===---------------------------------------------------------------------===//
+///
+/// \file
+/// This file defines functions of dpctl.tensor._tensor_elementwise_impl
+/// extension, specifically functions for elementwise operations.
+//===---------------------------------------------------------------------===//
+
+#pragma once
+#include <pybind11/pybind11.h>
+
+namespace py = pybind11;
+
+namespace dpctl::tensor::py_internal
+{
+
+extern void init_signbit(py::module_ m);
+
+} // namespace dpctl::tensor::py_internal
diff --git a/dpctl_ext/tensor/libtensor/source/elementwise_functions/sin.cpp b/dpctl_ext/tensor/libtensor/source/elementwise_functions/sin.cpp
new file mode 100644
index 000000000000..7f8d0e79a42c
--- /dev/null
+++ b/dpctl_ext/tensor/libtensor/source/elementwise_functions/sin.cpp
@@ -0,0 +1,124 @@
+//*****************************************************************************
+// Copyright (c) 2026, Intel Corporation
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+// - Redistributions of source code must retain the above copyright notice,
+//   this list of conditions and the following disclaimer.
+// - Redistributions in binary form must reproduce the above copyright notice,
+//   this list of conditions and the following disclaimer in the documentation
+//   and/or other materials provided with the distribution.
+// - Neither the name of the copyright holder nor the names of its contributors
+//   may be used to endorse or promote products derived from this software
+//   without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+// THE POSSIBILITY OF SUCH DAMAGE.
+//*****************************************************************************
+//
+//===---------------------------------------------------------------------===//
+///
+/// \file
+/// This file defines functions of dpctl.tensor._tensor_elementwise_impl
+/// extension, specifically functions for elementwise operations.
+//===---------------------------------------------------------------------===//
+
+#include <vector>
+
+#include <sycl/sycl.hpp>
+
+#include "dpnp4pybind11.hpp"
+#include <pybind11/numpy.h>
+#include <pybind11/pybind11.h>
+#include <pybind11/stl.h>
+
+#include "elementwise_functions.hpp"
+#include "sin.hpp"
+#include "utils/type_dispatch.hpp"
+
+#include "kernels/elementwise_functions/common.hpp"
+#include "kernels/elementwise_functions/sin.hpp"
+
+namespace dpctl::tensor::py_internal
+{
+
+namespace py = pybind11;
+namespace td_ns = dpctl::tensor::type_dispatch;
+
+namespace ew_cmn_ns = dpctl::tensor::kernels::elementwise_common;
+using ew_cmn_ns::unary_contig_impl_fn_ptr_t;
+using ew_cmn_ns::unary_strided_impl_fn_ptr_t;
+
+// U30: ==== SIN   (x)
+namespace impl
+{
+
+namespace sin_fn_ns = dpctl::tensor::kernels::sin;
+
+static unary_contig_impl_fn_ptr_t sin_contig_dispatch_vector[td_ns::num_types];
+static int sin_output_typeid_vector[td_ns::num_types];
+static unary_strided_impl_fn_ptr_t
+    sin_strided_dispatch_vector[td_ns::num_types];
+
+void populate_sin_dispatch_vectors(void)
+{
+    using namespace td_ns;
+    namespace fn_ns = sin_fn_ns;
+
+    using fn_ns::SinContigFactory;
+    DispatchVectorBuilder<unary_contig_impl_fn_ptr_t, SinContigFactory,
+                          num_types>
+        dvb1;
+    dvb1.populate_dispatch_vector(sin_contig_dispatch_vector);
+
+    using fn_ns::SinStridedFactory;
+    DispatchVectorBuilder<unary_strided_impl_fn_ptr_t, SinStridedFactory,
+                          num_types>
+        dvb2;
+    dvb2.populate_dispatch_vector(sin_strided_dispatch_vector);
+
+    using fn_ns::SinTypeMapFactory;
+    DispatchVectorBuilder<int, SinTypeMapFactory, num_types> dvb3;
+    dvb3.populate_dispatch_vector(sin_output_typeid_vector);
+};
+
+} // namespace impl
+
+void init_sin(py::module_ m)
+{
+    using arrayT = dpctl::tensor::usm_ndarray;
+    using event_vecT = std::vector<sycl::event>;
+    {
+        impl::populate_sin_dispatch_vectors();
+        using impl::sin_contig_dispatch_vector;
+        using impl::sin_output_typeid_vector;
+        using impl::sin_strided_dispatch_vector;
+
+        auto sin_pyapi = [&](const arrayT &src, const arrayT &dst,
+                             sycl::queue &exec_q,
+                             const event_vecT &depends = {}) {
+            return py_unary_ufunc(
+                src, dst, exec_q, depends, sin_output_typeid_vector,
+                sin_contig_dispatch_vector, sin_strided_dispatch_vector);
+        };
+        m.def("_sin", sin_pyapi, "", py::arg("src"), py::arg("dst"),
+              py::arg("sycl_queue"), py::arg("depends") = py::list());
+
+        auto sin_result_type_pyapi = [&](const py::dtype &dtype) {
+            return py_unary_ufunc_result_type(dtype, sin_output_typeid_vector);
+        };
+        m.def("_sin_result_type", sin_result_type_pyapi);
+    }
+}
+
+} // namespace dpctl::tensor::py_internal
diff --git a/dpctl_ext/tensor/libtensor/source/elementwise_functions/sin.hpp b/dpctl_ext/tensor/libtensor/source/elementwise_functions/sin.hpp
new file mode 100644
index 000000000000..a4b3da08b7fc
--- /dev/null
+++ b/dpctl_ext/tensor/libtensor/source/elementwise_functions/sin.hpp
@@ -0,0 +1,46 @@
+//*****************************************************************************
+// Copyright (c) 2026, Intel Corporation
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+// - Redistributions of source code must retain the above copyright notice,
+//   this list of conditions and the following disclaimer.
+// - Redistributions in binary form must reproduce the above copyright notice,
+//   this list of conditions and the following disclaimer in the documentation
+//   and/or other materials provided with the distribution.
+// - Neither the name of the copyright holder nor the names of its contributors
+//   may be used to endorse or promote products derived from this software
+//   without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+// THE POSSIBILITY OF SUCH DAMAGE.
+//*****************************************************************************
+//
+//===---------------------------------------------------------------------===//
+///
+/// \file
+/// This file defines functions of dpctl.tensor._tensor_elementwise_impl
+/// extension, specifically functions for elementwise operations.
+//===---------------------------------------------------------------------===//
+
+#pragma once
+#include <pybind11/pybind11.h>
+
+namespace py = pybind11;
+
+namespace dpctl::tensor::py_internal
+{
+
+extern void init_sin(py::module_ m);
+
+} // namespace dpctl::tensor::py_internal
diff --git a/dpctl_ext/tensor/libtensor/source/elementwise_functions/sinh.cpp b/dpctl_ext/tensor/libtensor/source/elementwise_functions/sinh.cpp
new file mode 100644
index 000000000000..d63335fa5408
--- /dev/null
+++ b/dpctl_ext/tensor/libtensor/source/elementwise_functions/sinh.cpp
@@ -0,0 +1,124 @@
+//*****************************************************************************
+// Copyright (c) 2026, Intel Corporation
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+// - Redistributions of source code must retain the above copyright notice,
+//   this list of conditions and the following disclaimer.
+// - Redistributions in binary form must reproduce the above copyright notice,
+//   this list of conditions and the following disclaimer in the documentation
+//   and/or other materials provided with the distribution.
+// - Neither the name of the copyright holder nor the names of its contributors
+//   may be used to endorse or promote products derived from this software
+//   without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+// THE POSSIBILITY OF SUCH DAMAGE.
+//*****************************************************************************
+//
+//===---------------------------------------------------------------------===//
+///
+/// \file
+/// This file defines functions of dpctl.tensor._tensor_elementwise_impl
+/// extension, specifically functions for elementwise operations.
+//===---------------------------------------------------------------------===//
+
+#include <vector>
+
+#include <sycl/sycl.hpp>
+
+#include "dpnp4pybind11.hpp"
+#include <pybind11/numpy.h>
+#include <pybind11/pybind11.h>
+#include <pybind11/stl.h>
+
+#include "elementwise_functions.hpp"
+#include "sinh.hpp"
+#include "utils/type_dispatch.hpp"
+
+#include "kernels/elementwise_functions/common.hpp"
+#include "kernels/elementwise_functions/sinh.hpp"
+
+namespace dpctl::tensor::py_internal
+{
+
+namespace py = pybind11;
+namespace td_ns = dpctl::tensor::type_dispatch;
+
+namespace ew_cmn_ns = dpctl::tensor::kernels::elementwise_common;
+using ew_cmn_ns::unary_contig_impl_fn_ptr_t;
+using ew_cmn_ns::unary_strided_impl_fn_ptr_t;
+
+// U31: ==== SINH   (x)
+namespace impl
+{
+
+namespace sinh_fn_ns = dpctl::tensor::kernels::sinh;
+
+static unary_contig_impl_fn_ptr_t sinh_contig_dispatch_vector[td_ns::num_types];
+static int sinh_output_typeid_vector[td_ns::num_types];
+static unary_strided_impl_fn_ptr_t
+    sinh_strided_dispatch_vector[td_ns::num_types];
+
+void populate_sinh_dispatch_vectors(void)
+{
+    using namespace td_ns;
+    namespace fn_ns = sinh_fn_ns;
+
+    using fn_ns::SinhContigFactory;
+    DispatchVectorBuilder<unary_contig_impl_fn_ptr_t, SinhContigFactory,
+                          num_types>
+        dvb1;
+    dvb1.populate_dispatch_vector(sinh_contig_dispatch_vector);
+
+    using fn_ns::SinhStridedFactory;
+    DispatchVectorBuilder<unary_strided_impl_fn_ptr_t, SinhStridedFactory,
+                          num_types>
+        dvb2;
+    dvb2.populate_dispatch_vector(sinh_strided_dispatch_vector);
+
+    using fn_ns::SinhTypeMapFactory;
+    DispatchVectorBuilder<int, SinhTypeMapFactory, num_types> dvb3;
+    dvb3.populate_dispatch_vector(sinh_output_typeid_vector);
+};
+
+} // namespace impl
+
+void init_sinh(py::module_ m)
+{
+    using arrayT = dpctl::tensor::usm_ndarray;
+    using event_vecT = std::vector<sycl::event>;
+    {
+        impl::populate_sinh_dispatch_vectors();
+        using impl::sinh_contig_dispatch_vector;
+        using impl::sinh_output_typeid_vector;
+        using impl::sinh_strided_dispatch_vector;
+
+        auto sinh_pyapi = [&](const arrayT &src, const arrayT &dst,
+                              sycl::queue &exec_q,
+                              const event_vecT &depends = {}) {
+            return py_unary_ufunc(
+                src, dst, exec_q, depends, sinh_output_typeid_vector,
+                sinh_contig_dispatch_vector, sinh_strided_dispatch_vector);
+        };
+        m.def("_sinh", sinh_pyapi, "", py::arg("src"), py::arg("dst"),
+              py::arg("sycl_queue"), py::arg("depends") = py::list());
+
+        auto sinh_result_type_pyapi = [&](const py::dtype &dtype) {
+            return py_unary_ufunc_result_type(dtype, sinh_output_typeid_vector);
+        };
+        m.def("_sinh_result_type", sinh_result_type_pyapi);
+    }
+}
+
+} // namespace dpctl::tensor::py_internal
diff --git a/dpctl_ext/tensor/libtensor/source/elementwise_functions/sinh.hpp b/dpctl_ext/tensor/libtensor/source/elementwise_functions/sinh.hpp
new file mode 100644
index 000000000000..4a0d90d24c8c
--- /dev/null
+++ b/dpctl_ext/tensor/libtensor/source/elementwise_functions/sinh.hpp
@@ -0,0 +1,46 @@
+//*****************************************************************************
+// Copyright (c) 2026, Intel Corporation
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+// - Redistributions of source code must retain the above copyright notice,
+//   this list of conditions and the following disclaimer.
+// - Redistributions in binary form must reproduce the above copyright notice,
+//   this list of conditions and the following disclaimer in the documentation
+//   and/or other materials provided with the distribution.
+// - Neither the name of the copyright holder nor the names of its contributors
+//   may be used to endorse or promote products derived from this software
+//   without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+// THE POSSIBILITY OF SUCH DAMAGE.
+//*****************************************************************************
+//
+//===---------------------------------------------------------------------===//
+///
+/// \file
+/// This file defines functions of dpctl.tensor._tensor_elementwise_impl
+/// extension, specifically functions for elementwise operations.
+//===---------------------------------------------------------------------===//
+
+#pragma once
+#include <pybind11/pybind11.h>
+
+namespace py = pybind11;
+
+namespace dpctl::tensor::py_internal
+{
+
+extern void init_sinh(py::module_ m);
+
+} // namespace dpctl::tensor::py_internal
diff --git a/dpctl_ext/tensor/libtensor/source/elementwise_functions/sqrt.cpp b/dpctl_ext/tensor/libtensor/source/elementwise_functions/sqrt.cpp
new file mode 100644
index 000000000000..f5483f5a05a9
--- /dev/null
+++ b/dpctl_ext/tensor/libtensor/source/elementwise_functions/sqrt.cpp
@@ -0,0 +1,124 @@
+//*****************************************************************************
+// Copyright (c) 2026, Intel Corporation
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+// - Redistributions of source code must retain the above copyright notice,
+//   this list of conditions and the following disclaimer.
+// - Redistributions in binary form must reproduce the above copyright notice,
+//   this list of conditions and the following disclaimer in the documentation
+//   and/or other materials provided with the distribution.
+// - Neither the name of the copyright holder nor the names of its contributors
+//   may be used to endorse or promote products derived from this software
+//   without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+// THE POSSIBILITY OF SUCH DAMAGE.
+//*****************************************************************************
+//
+//===---------------------------------------------------------------------===//
+///
+/// \file
+/// This file defines functions of dpctl.tensor._tensor_elementwise_impl
+/// extension, specifically functions for elementwise operations.
+//===---------------------------------------------------------------------===//
+
+#include <vector>
+
+#include <sycl/sycl.hpp>
+
+#include "dpnp4pybind11.hpp"
+#include <pybind11/numpy.h>
+#include <pybind11/pybind11.h>
+#include <pybind11/stl.h>
+
+#include "elementwise_functions.hpp"
+#include "sqrt.hpp"
+#include "utils/type_dispatch.hpp"
+
+#include "kernels/elementwise_functions/common.hpp"
+#include "kernels/elementwise_functions/sqrt.hpp"
+
+namespace dpctl::tensor::py_internal
+{
+
+namespace py = pybind11;
+namespace td_ns = dpctl::tensor::type_dispatch;
+
+namespace ew_cmn_ns = dpctl::tensor::kernels::elementwise_common;
+using ew_cmn_ns::unary_contig_impl_fn_ptr_t;
+using ew_cmn_ns::unary_strided_impl_fn_ptr_t;
+
+// U33: ==== SQRT   (x)
+namespace impl
+{
+
+namespace sqrt_fn_ns = dpctl::tensor::kernels::sqrt;
+
+static unary_contig_impl_fn_ptr_t sqrt_contig_dispatch_vector[td_ns::num_types];
+static int sqrt_output_typeid_vector[td_ns::num_types];
+static unary_strided_impl_fn_ptr_t
+    sqrt_strided_dispatch_vector[td_ns::num_types];
+
+void populate_sqrt_dispatch_vectors(void)
+{
+    using namespace td_ns;
+    namespace fn_ns = sqrt_fn_ns;
+
+    using fn_ns::SqrtContigFactory;
+    DispatchVectorBuilder<unary_contig_impl_fn_ptr_t, SqrtContigFactory,
+                          num_types>
+        dvb1;
+    dvb1.populate_dispatch_vector(sqrt_contig_dispatch_vector);
+
+    using fn_ns::SqrtStridedFactory;
+    DispatchVectorBuilder<unary_strided_impl_fn_ptr_t, SqrtStridedFactory,
+                          num_types>
+        dvb2;
+    dvb2.populate_dispatch_vector(sqrt_strided_dispatch_vector);
+
+    using fn_ns::SqrtTypeMapFactory;
+    DispatchVectorBuilder<int, SqrtTypeMapFactory, num_types> dvb3;
+    dvb3.populate_dispatch_vector(sqrt_output_typeid_vector);
+};
+
+} // namespace impl
+
+void init_sqrt(py::module_ m)
+{
+    using arrayT = dpctl::tensor::usm_ndarray;
+    using event_vecT = std::vector<sycl::event>;
+    {
+        impl::populate_sqrt_dispatch_vectors();
+        using impl::sqrt_contig_dispatch_vector;
+        using impl::sqrt_output_typeid_vector;
+        using impl::sqrt_strided_dispatch_vector;
+
+        auto sqrt_pyapi = [&](const arrayT &src, const arrayT &dst,
+                              sycl::queue &exec_q,
+                              const event_vecT &depends = {}) {
+            return py_unary_ufunc(
+                src, dst, exec_q, depends, sqrt_output_typeid_vector,
+                sqrt_contig_dispatch_vector, sqrt_strided_dispatch_vector);
+        };
+        m.def("_sqrt", sqrt_pyapi, "", py::arg("src"), py::arg("dst"),
+              py::arg("sycl_queue"), py::arg("depends") = py::list());
+
+        auto sqrt_result_type_pyapi = [&](const py::dtype &dtype) {
+            return py_unary_ufunc_result_type(dtype, sqrt_output_typeid_vector);
+        };
+        m.def("_sqrt_result_type", sqrt_result_type_pyapi);
+    }
+}
+
+} // namespace dpctl::tensor::py_internal
diff --git a/dpctl_ext/tensor/libtensor/source/elementwise_functions/sqrt.hpp b/dpctl_ext/tensor/libtensor/source/elementwise_functions/sqrt.hpp
new file mode 100644
index 000000000000..e8f7014c1afc
--- /dev/null
+++ b/dpctl_ext/tensor/libtensor/source/elementwise_functions/sqrt.hpp
@@ -0,0 +1,46 @@
+//*****************************************************************************
+// Copyright (c) 2026, Intel Corporation
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+// - Redistributions of source code must retain the above copyright notice,
+//   this list of conditions and the following disclaimer.
+// - Redistributions in binary form must reproduce the above copyright notice,
+//   this list of conditions and the following disclaimer in the documentation
+//   and/or other materials provided with the distribution.
+// - Neither the name of the copyright holder nor the names of its contributors
+//   may be used to endorse or promote products derived from this software
+//   without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+// THE POSSIBILITY OF SUCH DAMAGE.
+//*****************************************************************************
+//
+//===---------------------------------------------------------------------===//
+///
+/// \file
+/// This file defines functions of dpctl.tensor._tensor_elementwise_impl
+/// extension, specifically functions for elementwise operations.
+//===---------------------------------------------------------------------===//
+
+#pragma once
+#include <pybind11/pybind11.h>
+
+namespace py = pybind11;
+
+namespace dpctl::tensor::py_internal
+{
+
+extern void init_sqrt(py::module_ m);
+
+} // namespace dpctl::tensor::py_internal
diff --git a/dpctl_ext/tensor/libtensor/source/elementwise_functions/square.cpp b/dpctl_ext/tensor/libtensor/source/elementwise_functions/square.cpp
new file mode 100644
index 000000000000..b7116bc38bfc
--- /dev/null
+++ b/dpctl_ext/tensor/libtensor/source/elementwise_functions/square.cpp
@@ -0,0 +1,126 @@
+//*****************************************************************************
+// Copyright (c) 2026, Intel Corporation
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+// - Redistributions of source code must retain the above copyright notice,
+//   this list of conditions and the following disclaimer.
+// - Redistributions in binary form must reproduce the above copyright notice,
+//   this list of conditions and the following disclaimer in the documentation
+//   and/or other materials provided with the distribution.
+// - Neither the name of the copyright holder nor the names of its contributors
+//   may be used to endorse or promote products derived from this software
+//   without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+// THE POSSIBILITY OF SUCH DAMAGE.
+//*****************************************************************************
+//
+//===---------------------------------------------------------------------===//
+///
+/// \file
+/// This file defines functions of dpctl.tensor._tensor_elementwise_impl
+/// extension, specifically functions for elementwise operations.
+//===---------------------------------------------------------------------===//
+
+#include <vector>
+
+#include <sycl/sycl.hpp>
+
+#include "dpnp4pybind11.hpp"
+#include <pybind11/numpy.h>
+#include <pybind11/pybind11.h>
+#include <pybind11/stl.h>
+
+#include "elementwise_functions.hpp"
+#include "square.hpp"
+#include "utils/type_dispatch.hpp"
+
+#include "kernels/elementwise_functions/common.hpp"
+#include "kernels/elementwise_functions/square.hpp"
+
+namespace dpctl::tensor::py_internal
+{
+
+namespace py = pybind11;
+namespace td_ns = dpctl::tensor::type_dispatch;
+
+namespace ew_cmn_ns = dpctl::tensor::kernels::elementwise_common;
+using ew_cmn_ns::unary_contig_impl_fn_ptr_t;
+using ew_cmn_ns::unary_strided_impl_fn_ptr_t;
+
+// U32: ==== SQUARE   (x)
+namespace impl
+{
+
+namespace square_fn_ns = dpctl::tensor::kernels::square;
+
+static unary_contig_impl_fn_ptr_t
+    square_contig_dispatch_vector[td_ns::num_types];
+static int square_output_typeid_vector[td_ns::num_types];
+static unary_strided_impl_fn_ptr_t
+    square_strided_dispatch_vector[td_ns::num_types];
+
+void populate_square_dispatch_vectors(void)
+{
+    using namespace td_ns;
+    namespace fn_ns = square_fn_ns;
+
+    using fn_ns::SquareContigFactory;
+    DispatchVectorBuilder<unary_contig_impl_fn_ptr_t, SquareContigFactory,
+                          num_types>
+        dvb1;
+    dvb1.populate_dispatch_vector(square_contig_dispatch_vector);
+
+    using fn_ns::SquareStridedFactory;
+    DispatchVectorBuilder<unary_strided_impl_fn_ptr_t, SquareStridedFactory,
+                          num_types>
+        dvb2;
+    dvb2.populate_dispatch_vector(square_strided_dispatch_vector);
+
+    using fn_ns::SquareTypeMapFactory;
+    DispatchVectorBuilder<int, SquareTypeMapFactory, num_types> dvb3;
+    dvb3.populate_dispatch_vector(square_output_typeid_vector);
+};
+
+} // namespace impl
+
+void init_square(py::module_ m)
+{
+    using arrayT = dpctl::tensor::usm_ndarray;
+    using event_vecT = std::vector<sycl::event>;
+    {
+        impl::populate_square_dispatch_vectors();
+        using impl::square_contig_dispatch_vector;
+        using impl::square_output_typeid_vector;
+        using impl::square_strided_dispatch_vector;
+
+        auto square_pyapi = [&](const arrayT &src, const arrayT &dst,
+                                sycl::queue &exec_q,
+                                const event_vecT &depends = {}) {
+            return py_unary_ufunc(
+                src, dst, exec_q, depends, square_output_typeid_vector,
+                square_contig_dispatch_vector, square_strided_dispatch_vector);
+        };
+        m.def("_square", square_pyapi, "", py::arg("src"), py::arg("dst"),
+              py::arg("sycl_queue"), py::arg("depends") = py::list());
+
+        auto square_result_type_pyapi = [&](const py::dtype &dtype) {
+            return py_unary_ufunc_result_type(dtype,
+                                              square_output_typeid_vector);
+        };
+        m.def("_square_result_type", square_result_type_pyapi);
+    }
+}
+
+} // namespace dpctl::tensor::py_internal
diff --git a/dpctl_ext/tensor/libtensor/source/elementwise_functions/square.hpp b/dpctl_ext/tensor/libtensor/source/elementwise_functions/square.hpp
new file mode 100644
index 000000000000..3f23f184499c
--- /dev/null
+++ b/dpctl_ext/tensor/libtensor/source/elementwise_functions/square.hpp
@@ -0,0 +1,46 @@
+//*****************************************************************************
+// Copyright (c) 2026, Intel Corporation
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+// - Redistributions of source code must retain the above copyright notice,
+//   this list of conditions and the following disclaimer.
+// - Redistributions in binary form must reproduce the above copyright notice,
+//   this list of conditions and the following disclaimer in the documentation
+//   and/or other materials provided with the distribution.
+// - Neither the name of the copyright holder nor the names of its contributors
+//   may be used to endorse or promote products derived from this software
+//   without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+// THE POSSIBILITY OF SUCH DAMAGE.
+//*****************************************************************************
+//
+//===---------------------------------------------------------------------===//
+///
+/// \file
+/// This file defines functions of dpctl.tensor._tensor_elementwise_impl
+/// extension, specifically functions for elementwise operations.
+//===---------------------------------------------------------------------===//
+
+#pragma once
+#include <pybind11/pybind11.h>
+
+namespace py = pybind11;
+
+namespace dpctl::tensor::py_internal
+{
+
+extern void init_square(py::module_ m);
+
+} // namespace dpctl::tensor::py_internal
diff --git a/dpctl_ext/tensor/libtensor/source/elementwise_functions/tan.cpp b/dpctl_ext/tensor/libtensor/source/elementwise_functions/tan.cpp
new file mode 100644
index 000000000000..d8e8116bafeb
--- /dev/null
+++ b/dpctl_ext/tensor/libtensor/source/elementwise_functions/tan.cpp
@@ -0,0 +1,124 @@
+//*****************************************************************************
+// Copyright (c) 2026, Intel Corporation
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+// - Redistributions of source code must retain the above copyright notice,
+//   this list of conditions and the following disclaimer.
+// - Redistributions in binary form must reproduce the above copyright notice,
+//   this list of conditions and the following disclaimer in the documentation
+//   and/or other materials provided with the distribution.
+// - Neither the name of the copyright holder nor the names of its contributors
+//   may be used to endorse or promote products derived from this software
+//   without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+// THE POSSIBILITY OF SUCH DAMAGE.
+//*****************************************************************************
+//
+//===---------------------------------------------------------------------===//
+///
+/// \file
+/// This file defines functions of dpctl.tensor._tensor_elementwise_impl
+/// extension, specifically functions for elementwise operations.
+//===---------------------------------------------------------------------===//
+
+#include <vector>
+
+#include <sycl/sycl.hpp>
+
+#include "dpnp4pybind11.hpp"
+#include <pybind11/numpy.h>
+#include <pybind11/pybind11.h>
+#include <pybind11/stl.h>
+
+#include "elementwise_functions.hpp"
+#include "tan.hpp"
+#include "utils/type_dispatch.hpp"
+
+#include "kernels/elementwise_functions/common.hpp"
+#include "kernels/elementwise_functions/tan.hpp"
+
+namespace dpctl::tensor::py_internal
+{
+
+namespace py = pybind11;
+namespace td_ns = dpctl::tensor::type_dispatch;
+
+namespace ew_cmn_ns = dpctl::tensor::kernels::elementwise_common;
+using ew_cmn_ns::unary_contig_impl_fn_ptr_t;
+using ew_cmn_ns::unary_strided_impl_fn_ptr_t;
+
+// U34: ==== TAN   (x)
+namespace impl
+{
+
+namespace tan_fn_ns = dpctl::tensor::kernels::tan;
+
+static unary_contig_impl_fn_ptr_t tan_contig_dispatch_vector[td_ns::num_types];
+static int tan_output_typeid_vector[td_ns::num_types];
+static unary_strided_impl_fn_ptr_t
+    tan_strided_dispatch_vector[td_ns::num_types];
+
+void populate_tan_dispatch_vectors(void)
+{
+    using namespace td_ns;
+    namespace fn_ns = tan_fn_ns;
+
+    using fn_ns::TanContigFactory;
+    DispatchVectorBuilder<unary_contig_impl_fn_ptr_t, TanContigFactory,
+                          num_types>
+        dvb1;
+    dvb1.populate_dispatch_vector(tan_contig_dispatch_vector);
+
+    using fn_ns::TanStridedFactory;
+    DispatchVectorBuilder<unary_strided_impl_fn_ptr_t, TanStridedFactory,
+                          num_types>
+        dvb2;
+    dvb2.populate_dispatch_vector(tan_strided_dispatch_vector);
+
+    using fn_ns::TanTypeMapFactory;
+    DispatchVectorBuilder<int, TanTypeMapFactory, num_types> dvb3;
+    dvb3.populate_dispatch_vector(tan_output_typeid_vector);
+};
+
+} // namespace impl
+
+void init_tan(py::module_ m)
+{
+    using arrayT = dpctl::tensor::usm_ndarray;
+    using event_vecT = std::vector<sycl::event>;
+    {
+        impl::populate_tan_dispatch_vectors();
+        using impl::tan_contig_dispatch_vector;
+        using impl::tan_output_typeid_vector;
+        using impl::tan_strided_dispatch_vector;
+
+        auto tan_pyapi = [&](const arrayT &src, const arrayT &dst,
+                             sycl::queue &exec_q,
+                             const event_vecT &depends = {}) {
+            return py_unary_ufunc(
+                src, dst, exec_q, depends, tan_output_typeid_vector,
+                tan_contig_dispatch_vector, tan_strided_dispatch_vector);
+        };
+        m.def("_tan", tan_pyapi, "", py::arg("src"), py::arg("dst"),
+              py::arg("sycl_queue"), py::arg("depends") = py::list());
+
+        auto tan_result_type_pyapi = [&](const py::dtype &dtype) {
+            return py_unary_ufunc_result_type(dtype, tan_output_typeid_vector);
+        };
+        m.def("_tan_result_type", tan_result_type_pyapi);
+    }
+}
+
+} // namespace dpctl::tensor::py_internal
diff --git a/dpctl_ext/tensor/libtensor/source/elementwise_functions/tan.hpp b/dpctl_ext/tensor/libtensor/source/elementwise_functions/tan.hpp
new file mode 100644
index 000000000000..b0818a9a85c2
--- /dev/null
+++ b/dpctl_ext/tensor/libtensor/source/elementwise_functions/tan.hpp
@@ -0,0 +1,46 @@
+//*****************************************************************************
+// Copyright (c) 2026, Intel Corporation
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+// - Redistributions of source code must retain the above copyright notice,
+//   this list of conditions and the following disclaimer.
+// - Redistributions in binary form must reproduce the above copyright notice,
+//   this list of conditions and the following disclaimer in the documentation
+//   and/or other materials provided with the distribution.
+// - Neither the name of the copyright holder nor the names of its contributors
+//   may be used to endorse or promote products derived from this software
+//   without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+// THE POSSIBILITY OF SUCH DAMAGE.
+//*****************************************************************************
+//
+//===---------------------------------------------------------------------===//
+///
+/// \file
+/// This file defines functions of dpctl.tensor._tensor_elementwise_impl
+/// extension, specifically functions for elementwise operations.
+//===---------------------------------------------------------------------===//
+
+#pragma once
+#include <pybind11/pybind11.h>
+
+namespace py = pybind11;
+
+namespace dpctl::tensor::py_internal
+{
+
+extern void init_tan(py::module_ m);
+
+} // namespace dpctl::tensor::py_internal
diff --git a/dpctl_ext/tensor/libtensor/source/elementwise_functions/tanh.cpp b/dpctl_ext/tensor/libtensor/source/elementwise_functions/tanh.cpp
new file mode 100644
index 000000000000..c6546730a52a
--- /dev/null
+++ b/dpctl_ext/tensor/libtensor/source/elementwise_functions/tanh.cpp
@@ -0,0 +1,124 @@
+//*****************************************************************************
+// Copyright (c) 2026, Intel Corporation
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+// - Redistributions of source code must retain the above copyright notice,
+//   this list of conditions and the following disclaimer.
+// - Redistributions in binary form must reproduce the above copyright notice,
+//   this list of conditions and the following disclaimer in the documentation
+//   and/or other materials provided with the distribution.
+// - Neither the name of the copyright holder nor the names of its contributors
+//   may be used to endorse or promote products derived from this software
+//   without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+// THE POSSIBILITY OF SUCH DAMAGE.
+//*****************************************************************************
+//
+//===---------------------------------------------------------------------===//
+///
+/// \file
+/// This file defines functions of dpctl.tensor._tensor_elementwise_impl
+/// extension, specifically functions for elementwise operations.
+//===---------------------------------------------------------------------===//
+
+#include <vector>
+
+#include <sycl/sycl.hpp>
+
+#include "dpnp4pybind11.hpp"
+#include <pybind11/numpy.h>
+#include <pybind11/pybind11.h>
+#include <pybind11/stl.h>
+
+#include "elementwise_functions.hpp"
+#include "tanh.hpp"
+#include "utils/type_dispatch.hpp"
+
+#include "kernels/elementwise_functions/common.hpp"
+#include "kernels/elementwise_functions/tanh.hpp"
+
+namespace dpctl::tensor::py_internal
+{
+
+namespace py = pybind11;
+namespace td_ns = dpctl::tensor::type_dispatch;
+
+namespace ew_cmn_ns = dpctl::tensor::kernels::elementwise_common;
+using ew_cmn_ns::unary_contig_impl_fn_ptr_t;
+using ew_cmn_ns::unary_strided_impl_fn_ptr_t;
+
+// U35: ==== TANH   (x)
+namespace impl
+{
+
+namespace tanh_fn_ns = dpctl::tensor::kernels::tanh;
+
+static unary_contig_impl_fn_ptr_t tanh_contig_dispatch_vector[td_ns::num_types];
+static int tanh_output_typeid_vector[td_ns::num_types];
+static unary_strided_impl_fn_ptr_t
+    tanh_strided_dispatch_vector[td_ns::num_types];
+
+void populate_tanh_dispatch_vectors(void)
+{
+    using namespace td_ns;
+    namespace fn_ns = tanh_fn_ns;
+
+    using fn_ns::TanhContigFactory;
+    DispatchVectorBuilder<unary_contig_impl_fn_ptr_t, TanhContigFactory,
+                          num_types>
+        dvb1;
+    dvb1.populate_dispatch_vector(tanh_contig_dispatch_vector);
+
+    using fn_ns::TanhStridedFactory;
+    DispatchVectorBuilder<unary_strided_impl_fn_ptr_t, TanhStridedFactory,
+                          num_types>
+        dvb2;
+    dvb2.populate_dispatch_vector(tanh_strided_dispatch_vector);
+
+    using fn_ns::TanhTypeMapFactory;
+    DispatchVectorBuilder<int, TanhTypeMapFactory, num_types> dvb3;
+    dvb3.populate_dispatch_vector(tanh_output_typeid_vector);
+};
+
+} // namespace impl
+
+void init_tanh(py::module_ m)
+{
+    using arrayT = dpctl::tensor::usm_ndarray;
+    using event_vecT = std::vector<sycl::event>;
+    {
+        impl::populate_tanh_dispatch_vectors();
+        using impl::tanh_contig_dispatch_vector;
+        using impl::tanh_output_typeid_vector;
+        using impl::tanh_strided_dispatch_vector;
+
+        auto tanh_pyapi = [&](const arrayT &src, const arrayT &dst,
+                              sycl::queue &exec_q,
+                              const event_vecT &depends = {}) {
+            return py_unary_ufunc(
+                src, dst, exec_q, depends, tanh_output_typeid_vector,
+                tanh_contig_dispatch_vector, tanh_strided_dispatch_vector);
+        };
+        m.def("_tanh", tanh_pyapi, "", py::arg("src"), py::arg("dst"),
+              py::arg("sycl_queue"), py::arg("depends") = py::list());
+
+        auto tanh_result_type_pyapi = [&](const py::dtype &dtype) {
+            return py_unary_ufunc_result_type(dtype, tanh_output_typeid_vector);
+        };
+        m.def("_tanh_result_type", tanh_result_type_pyapi);
+    }
+}
+
+} // namespace dpctl::tensor::py_internal
diff --git a/dpctl_ext/tensor/libtensor/source/elementwise_functions/tanh.hpp b/dpctl_ext/tensor/libtensor/source/elementwise_functions/tanh.hpp
new file mode 100644
index 000000000000..d29c924d5e73
--- /dev/null
+++ b/dpctl_ext/tensor/libtensor/source/elementwise_functions/tanh.hpp
@@ -0,0 +1,46 @@
+//*****************************************************************************
+// Copyright (c) 2026, Intel Corporation
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+// - Redistributions of source code must retain the above copyright notice,
+//   this list of conditions and the following disclaimer.
+// - Redistributions in binary form must reproduce the above copyright notice,
+//   this list of conditions and the following disclaimer in the documentation
+//   and/or other materials provided with the distribution.
+// - Neither the name of the copyright holder nor the names of its contributors
+//   may be used to endorse or promote products derived from this software
+//   without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+// THE POSSIBILITY OF SUCH DAMAGE.
+//*****************************************************************************
+//
+//===---------------------------------------------------------------------===//
+///
+/// \file
+/// This file defines functions of dpctl.tensor._tensor_elementwise_impl
+/// extension, specifically functions for elementwise operations.
+//===---------------------------------------------------------------------===//
+
+#pragma once
+#include <pybind11/pybind11.h>
+
+namespace py = pybind11;
+
+namespace dpctl::tensor::py_internal
+{
+
+extern void init_tanh(py::module_ m);
+
+} // namespace dpctl::tensor::py_internal
diff --git a/dpctl_ext/tensor/libtensor/source/elementwise_functions/trunc.cpp b/dpctl_ext/tensor/libtensor/source/elementwise_functions/trunc.cpp
new file mode 100644
index 000000000000..9014dc3800ba
--- /dev/null
+++ b/dpctl_ext/tensor/libtensor/source/elementwise_functions/trunc.cpp
@@ -0,0 +1,126 @@
+//*****************************************************************************
+// Copyright (c) 2026, Intel Corporation
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+// - Redistributions of source code must retain the above copyright notice,
+//   this list of conditions and the following disclaimer.
+// - Redistributions in binary form must reproduce the above copyright notice,
+//   this list of conditions and the following disclaimer in the documentation
+//   and/or other materials provided with the distribution.
+// - Neither the name of the copyright holder nor the names of its contributors
+//   may be used to endorse or promote products derived from this software
+//   without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+// THE POSSIBILITY OF SUCH DAMAGE.
+//*****************************************************************************
+//
+//===---------------------------------------------------------------------===//
+///
+/// \file
+/// This file defines functions of dpctl.tensor._tensor_elementwise_impl
+/// extension, specifically functions for elementwise operations.
+//===---------------------------------------------------------------------===//
+
+#include <vector>
+
+#include <sycl/sycl.hpp>
+
+#include "dpnp4pybind11.hpp"
+#include <pybind11/numpy.h>
+#include <pybind11/pybind11.h>
+#include <pybind11/stl.h>
+
+#include "elementwise_functions.hpp"
+#include "trunc.hpp"
+#include "utils/type_dispatch.hpp"
+
+#include "kernels/elementwise_functions/common.hpp"
+#include "kernels/elementwise_functions/trunc.hpp"
+
+namespace dpctl::tensor::py_internal
+{
+
+namespace py = pybind11;
+namespace td_ns = dpctl::tensor::type_dispatch;
+
+namespace ew_cmn_ns = dpctl::tensor::kernels::elementwise_common;
+using ew_cmn_ns::unary_contig_impl_fn_ptr_t;
+using ew_cmn_ns::unary_strided_impl_fn_ptr_t;
+
+// U36: ==== TRUNC   (x)
+namespace impl
+{
+
+namespace trunc_fn_ns = dpctl::tensor::kernels::trunc;
+
+static unary_contig_impl_fn_ptr_t
+    trunc_contig_dispatch_vector[td_ns::num_types];
+static int trunc_output_typeid_vector[td_ns::num_types];
+static unary_strided_impl_fn_ptr_t
+    trunc_strided_dispatch_vector[td_ns::num_types];
+
+void populate_trunc_dispatch_vectors(void)
+{
+    using namespace td_ns;
+    namespace fn_ns = trunc_fn_ns;
+
+    using fn_ns::TruncContigFactory;
+    DispatchVectorBuilder<unary_contig_impl_fn_ptr_t, TruncContigFactory,
+                          num_types>
+        dvb1;
+    dvb1.populate_dispatch_vector(trunc_contig_dispatch_vector);
+
+    using fn_ns::TruncStridedFactory;
+    DispatchVectorBuilder<unary_strided_impl_fn_ptr_t, TruncStridedFactory,
+                          num_types>
+        dvb2;
+    dvb2.populate_dispatch_vector(trunc_strided_dispatch_vector);
+
+    using fn_ns::TruncTypeMapFactory;
+    DispatchVectorBuilder<int, TruncTypeMapFactory, num_types> dvb3;
+    dvb3.populate_dispatch_vector(trunc_output_typeid_vector);
+};
+
+} // namespace impl
+
+void init_trunc(py::module_ m)
+{
+    using arrayT = dpctl::tensor::usm_ndarray;
+    using event_vecT = std::vector<sycl::event>;
+    {
+        impl::populate_trunc_dispatch_vectors();
+        using impl::trunc_contig_dispatch_vector;
+        using impl::trunc_output_typeid_vector;
+        using impl::trunc_strided_dispatch_vector;
+
+        auto trunc_pyapi = [&](const arrayT &src, const arrayT &dst,
+                               sycl::queue &exec_q,
+                               const event_vecT &depends = {}) {
+            return py_unary_ufunc(
+                src, dst, exec_q, depends, trunc_output_typeid_vector,
+                trunc_contig_dispatch_vector, trunc_strided_dispatch_vector);
+        };
+        m.def("_trunc", trunc_pyapi, "", py::arg("src"), py::arg("dst"),
+              py::arg("sycl_queue"), py::arg("depends") = py::list());
+
+        auto trunc_result_type_pyapi = [&](const py::dtype &dtype) {
+            return py_unary_ufunc_result_type(dtype,
+                                              trunc_output_typeid_vector);
+        };
+        m.def("_trunc_result_type", trunc_result_type_pyapi);
+    }
+}
+
+} // namespace dpctl::tensor::py_internal
diff --git a/dpctl_ext/tensor/libtensor/source/elementwise_functions/trunc.hpp b/dpctl_ext/tensor/libtensor/source/elementwise_functions/trunc.hpp
new file mode 100644
index 000000000000..79ed6b5ded14
--- /dev/null
+++ b/dpctl_ext/tensor/libtensor/source/elementwise_functions/trunc.hpp
@@ -0,0 +1,46 @@
+//*****************************************************************************
+// Copyright (c) 2026, Intel Corporation
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+// - Redistributions of source code must retain the above copyright notice,
+//   this list of conditions and the following disclaimer.
+// - Redistributions in binary form must reproduce the above copyright notice,
+//   this list of conditions and the following disclaimer in the documentation
+//   and/or other materials provided with the distribution.
+// - Neither the name of the copyright holder nor the names of its contributors
+//   may be used to endorse or promote products derived from this software
+//   without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+// THE POSSIBILITY OF SUCH DAMAGE.
+//*****************************************************************************
+//
+//===---------------------------------------------------------------------===//
+///
+/// \file
+/// This file defines functions of dpctl.tensor._tensor_elementwise_impl
+/// extension, specifically functions for elementwise operations.
+//===---------------------------------------------------------------------===//
+
+#pragma once
+#include <pybind11/pybind11.h>
+
+namespace py = pybind11;
+
+namespace dpctl::tensor::py_internal
+{
+
+extern void init_trunc(py::module_ m);
+
+} // namespace dpctl::tensor::py_internal
diff --git a/dpctl_ext/tensor/libtensor/source/full_ctor.cpp b/dpctl_ext/tensor/libtensor/source/full_ctor.cpp
index aef57836666e..dfe1d25b769c 100644
--- a/dpctl_ext/tensor/libtensor/source/full_ctor.cpp
+++ b/dpctl_ext/tensor/libtensor/source/full_ctor.cpp
@@ -32,7 +32,6 @@
 /// This file defines functions of dpctl.tensor._tensor_impl extensions
 //===--------------------------------------------------------------------===//
 
-#include <complex>
 #include <cstddef>
 #include <cstdint>
 #include <tuple>
@@ -42,11 +41,13 @@
 #include <sycl/sycl.hpp>
 
 #include "dpnp4pybind11.hpp"
-#include <pybind11/complex.h>
+#include <pybind11/complex.h> // py::cast<std::complex<T>>
 #include <pybind11/pybind11.h>
 
 #include "kernels/constructors.hpp"
+#include "utils/offset_utils.hpp"
 #include "utils/output_validation.hpp"
+#include "utils/sycl_alloc_utils.hpp"
 #include "utils/type_dispatch.hpp"
 #include "utils/type_utils.hpp"
 
diff --git a/dpctl_ext/tensor/libtensor/source/integer_advanced_indexing.cpp b/dpctl_ext/tensor/libtensor/source/integer_advanced_indexing.cpp
index 925cc2e895ed..c6021bdfd2d1 100644
--- a/dpctl_ext/tensor/libtensor/source/integer_advanced_indexing.cpp
+++ b/dpctl_ext/tensor/libtensor/source/integer_advanced_indexing.cpp
@@ -34,7 +34,6 @@
 //===----------------------------------------------------------------------===//
 
 #include <algorithm>
-#include <complex>
 #include <cstddef>
 #include <cstdint>
 #include <iterator>
@@ -47,9 +46,7 @@
 #include <sycl/sycl.hpp>
 
 #include "dpnp4pybind11.hpp"
-#include <pybind11/complex.h>
 #include <pybind11/pybind11.h>
-#include <pybind11/stl.h>
 
 #include "kernels/integer_advanced_indexing.hpp"
 #include "utils/memory_overlap.hpp"
diff --git a/dpctl_ext/tensor/libtensor/source/linear_sequences.cpp b/dpctl_ext/tensor/libtensor/source/linear_sequences.cpp
index 5204f24b3724..9a7bf2dbcc0f 100644
--- a/dpctl_ext/tensor/libtensor/source/linear_sequences.cpp
+++ b/dpctl_ext/tensor/libtensor/source/linear_sequences.cpp
@@ -32,7 +32,6 @@
 /// This file defines functions of dpctl.tensor._tensor_impl extensions
 //===---------------------------------------------------------------------===//
 
-#include <complex>
 #include <cstddef>
 #include <utility>
 #include <vector>
@@ -40,13 +39,12 @@
 #include <sycl/sycl.hpp>
 
 #include "dpnp4pybind11.hpp"
-#include <pybind11/complex.h>
+#include <pybind11/complex.h> // py::cast<std::complex<T>>
 #include <pybind11/pybind11.h>
 
 #include "kernels/constructors.hpp"
 #include "utils/output_validation.hpp"
 #include "utils/type_dispatch.hpp"
-#include "utils/type_utils.hpp"
 
 #include "linear_sequences.hpp"
 
diff --git a/dpctl_ext/tensor/libtensor/source/repeat.cpp b/dpctl_ext/tensor/libtensor/source/repeat.cpp
index 4bba1d35a08e..919f51f9a4d1 100644
--- a/dpctl_ext/tensor/libtensor/source/repeat.cpp
+++ b/dpctl_ext/tensor/libtensor/source/repeat.cpp
@@ -33,8 +33,9 @@
 //===--------------------------------------------------------------------===//
 
 #include <algorithm>
+#include <cassert>
 #include <cstddef>
-#include <cstdint>
+#include <tuple>
 #include <utility>
 #include <vector>
 
@@ -42,7 +43,6 @@
 
 #include "dpnp4pybind11.hpp"
 #include <pybind11/pybind11.h>
-#include <pybind11/stl.h>
 
 #include "kernels/repeat.hpp"
 #include "utils/memory_overlap.hpp"
@@ -285,17 +285,17 @@ std::pair<sycl::event, sycl::event>
         shT orthog_src_strides;
         shT axis_src_shape;
         shT axis_src_stride;
-        dpctl::tensor::py_internal::split_iteration_space(
-            src_shape_vec, src_strides_vec, axis, axis + 1, orthog_src_shape,
-            axis_src_shape, orthog_src_strides, axis_src_stride);
+        split_iteration_space(src_shape_vec, src_strides_vec, axis, axis + 1,
+                              orthog_src_shape, axis_src_shape,
+                              orthog_src_strides, axis_src_stride);
 
         shT orthog_dst_shape;
         shT orthog_dst_strides;
         shT axis_dst_shape;
         shT axis_dst_stride;
-        dpctl::tensor::py_internal::split_iteration_space(
-            dst_shape_vec, dst_strides_vec, axis, axis + 1, orthog_dst_shape,
-            axis_dst_shape, orthog_dst_strides, axis_dst_stride);
+        split_iteration_space(dst_shape_vec, dst_strides_vec, axis, axis + 1,
+                              orthog_dst_shape, axis_dst_shape,
+                              orthog_dst_strides, axis_dst_stride);
 
         assert(orthog_src_shape.size() == static_cast<std::size_t>(orthog_nd));
         assert(orthog_dst_shape.size() == static_cast<std::size_t>(orthog_nd));
@@ -310,7 +310,7 @@ std::pair<sycl::event, sycl::event>
 
         py::ssize_t orthog_src_offset(0);
         py::ssize_t orthog_dst_offset(0);
-        dpctl::tensor::py_internal::simplify_iteration_space(
+        simplify_iteration_space(
             orthog_nd, _shape, orthog_src_strides, orthog_dst_strides,
             // output
             simplified_orthog_shape, simplified_orthog_src_strides,
@@ -641,17 +641,17 @@ std::pair<sycl::event, sycl::event>
         shT orthog_src_strides;
         shT axis_src_shape;
         shT axis_src_stride;
-        dpctl::tensor::py_internal::split_iteration_space(
-            src_shape_vec, src_strides_vec, axis, axis + 1, orthog_src_shape,
-            axis_src_shape, orthog_src_strides, axis_src_stride);
+        split_iteration_space(src_shape_vec, src_strides_vec, axis, axis + 1,
+                              orthog_src_shape, axis_src_shape,
+                              orthog_src_strides, axis_src_stride);
 
         shT orthog_dst_shape;
         shT orthog_dst_strides;
         shT axis_dst_shape;
         shT axis_dst_stride;
-        dpctl::tensor::py_internal::split_iteration_space(
-            dst_shape_vec, dst_strides_vec, axis, axis + 1, orthog_dst_shape,
-            axis_dst_shape, orthog_dst_strides, axis_dst_stride);
+        split_iteration_space(dst_shape_vec, dst_strides_vec, axis, axis + 1,
+                              orthog_dst_shape, axis_dst_shape,
+                              orthog_dst_strides, axis_dst_stride);
 
         assert(orthog_src_shape.size() == static_cast<std::size_t>(orthog_nd));
         assert(orthog_dst_shape.size() == static_cast<std::size_t>(orthog_nd));
@@ -667,7 +667,7 @@ std::pair<sycl::event, sycl::event>
         py::ssize_t orthog_src_offset(0);
         py::ssize_t orthog_dst_offset(0);
 
-        dpctl::tensor::py_internal::simplify_iteration_space(
+        simplify_iteration_space(
             orthog_nd, _shape, orthog_src_strides, orthog_dst_strides,
             // output
             simplified_orthog_shape, simplified_orthog_src_strides,
diff --git a/dpctl_ext/tensor/libtensor/source/tensor_ctors.cpp b/dpctl_ext/tensor/libtensor/source/tensor_ctors.cpp
index 7bed4df01d29..cdd6e43ed9c5 100644
--- a/dpctl_ext/tensor/libtensor/source/tensor_ctors.cpp
+++ b/dpctl_ext/tensor/libtensor/source/tensor_ctors.cpp
@@ -72,9 +72,6 @@ static_assert(std::is_same_v<py::ssize_t, dpctl::tensor::ssize_t>);
 namespace
 {
 
-using dpctl::tensor::c_contiguous_strides;
-using dpctl::tensor::f_contiguous_strides;
-
 using dpctl::tensor::overlap::MemoryOverlap;
 using dpctl::tensor::overlap::SameLogicalTensors;
 
diff --git a/dpctl_ext/tensor/libtensor/source/where.cpp b/dpctl_ext/tensor/libtensor/source/where.cpp
index 1afdbf45c66b..46c52cf83b34 100644
--- a/dpctl_ext/tensor/libtensor/source/where.cpp
+++ b/dpctl_ext/tensor/libtensor/source/where.cpp
@@ -33,16 +33,15 @@
 /// dpctl.tensor.where
 //===---------------------------------------------------------------------===//
 
-#include <complex>
+#include <cassert>
 #include <cstddef>
-#include <cstdint>
+#include <tuple>
 #include <utility>
 #include <vector>
 
 #include <sycl/sycl.hpp>
 
 #include "dpnp4pybind11.hpp"
-#include <pybind11/complex.h>
 #include <pybind11/pybind11.h>
 
 #include "kernels/where.hpp"
@@ -201,7 +200,7 @@ std::pair<sycl::event, sycl::event>
     py::ssize_t x2_offset(0);
     py::ssize_t dst_offset(0);
 
-    dpctl::tensor::py_internal::simplify_iteration_space_4(
+    simplify_iteration_space_4(
         nd, x1_shape, cond_strides, x1_strides, x2_strides, dst_strides,
         // outputs
         simplified_shape, simplified_cond_strides, simplified_x1_strides,
diff --git a/dpctl_ext/tensor/libtensor/source/zeros_ctor.cpp b/dpctl_ext/tensor/libtensor/source/zeros_ctor.cpp
index 2eb05e49f382..b9a2e01bea4a 100644
--- a/dpctl_ext/tensor/libtensor/source/zeros_ctor.cpp
+++ b/dpctl_ext/tensor/libtensor/source/zeros_ctor.cpp
@@ -32,7 +32,6 @@
 /// This file defines functions of dpctl.tensor._tensor_impl extensions
 //===--------------------------------------------------------------------===//
 
-#include <complex>
 #include <cstddef>
 #include <stdexcept>
 #include <utility>
@@ -41,7 +40,6 @@
 #include <sycl/sycl.hpp>
 
 #include "dpnp4pybind11.hpp"
-#include <pybind11/complex.h>
 #include <pybind11/pybind11.h>
 
 #include "utils/output_validation.hpp"
diff --git a/dpctl_ext/tensor/libtensor/source/zeros_ctor.hpp b/dpctl_ext/tensor/libtensor/source/zeros_ctor.hpp
index 51a1903a0f36..d104e37f5533 100644
--- a/dpctl_ext/tensor/libtensor/source/zeros_ctor.hpp
+++ b/dpctl_ext/tensor/libtensor/source/zeros_ctor.hpp
@@ -39,7 +39,6 @@
 #include <sycl/sycl.hpp>
 
 #include "dpnp4pybind11.hpp"
-#include <pybind11/pybind11.h>
 
 namespace dpctl::tensor::py_internal
 {
diff --git a/dpnp/dpnp_algo/dpnp_elementwise_common.py b/dpnp/dpnp_algo/dpnp_elementwise_common.py
index b3e0c74c228e..d7eeccf78489 100644
--- a/dpnp/dpnp_algo/dpnp_elementwise_common.py
+++ b/dpnp/dpnp_algo/dpnp_elementwise_common.py
@@ -30,7 +30,6 @@
 from functools import wraps
 
 import dpctl.tensor as dpt
-import dpctl.tensor._copy_utils as dtc
 import dpctl.tensor._type_utils as dtu
 import dpctl.utils as dpu
 import numpy
@@ -48,6 +47,7 @@
 # TODO: revert to `import dpctl.tensor...`
 # when dpnp fully migrates dpctl/tensor
 import dpctl_ext.tensor as dpt_ext
+import dpctl_ext.tensor._copy_utils as dtc
 import dpctl_ext.tensor._tensor_impl as dti
 import dpnp
 import dpnp.backend.extensions.vm._vm_impl as vmi
diff --git a/dpnp/dpnp_algo/dpnp_fill.py b/dpnp/dpnp_algo/dpnp_fill.py
index 7e0a70f25ff9..c9ae58a114a9 100644
--- a/dpnp/dpnp_algo/dpnp_fill.py
+++ b/dpnp/dpnp_algo/dpnp_fill.py
@@ -29,12 +29,12 @@
 from numbers import Number
 
 import dpctl.utils as dpu
-from dpctl.tensor._ctors import _cast_fill_val
 
 # TODO: revert to `from dpctl.tensor...`
 # when dpnp fully migrates dpctl/tensor
 import dpctl_ext.tensor as dpt
 import dpnp
+from dpctl_ext.tensor._ctors import _cast_fill_val
 from dpctl_ext.tensor._tensor_impl import (
     _copy_usm_ndarray_into_usm_ndarray,
     _full_usm_ndarray,
diff --git a/dpnp/dpnp_array.py b/dpnp/dpnp_array.py
index 0d05ef8d49d5..6418302d6e7b 100644
--- a/dpnp/dpnp_array.py
+++ b/dpnp/dpnp_array.py
@@ -38,13 +38,13 @@
 import warnings
 
 import dpctl.tensor as dpt
-from dpctl.tensor._numpy_helper import AxisError
 
 # TODO: revert to `import dpctl.tensor...`
 # when dpnp fully migrates dpctl/tensor
 import dpctl_ext.tensor as dpt_ext
 import dpctl_ext.tensor._type_utils as dtu
 import dpnp
+from dpctl_ext.tensor._numpy_helper import AxisError
 
 from . import memory as dpm
 
diff --git a/dpnp/dpnp_iface_functional.py b/dpnp/dpnp_iface_functional.py
index 1985eced2e71..797d8a736276 100644
--- a/dpnp/dpnp_iface_functional.py
+++ b/dpnp/dpnp_iface_functional.py
@@ -41,13 +41,15 @@
 
 # pylint: disable=protected-access
 
-from dpctl.tensor._numpy_helper import (
+import dpnp
+
+# TODO: revert to `from dpctl.tensor...`
+# when dpnp fully migrates dpctl/tensor
+from dpctl_ext.tensor._numpy_helper import (
     normalize_axis_index,
     normalize_axis_tuple,
 )
 
-import dpnp
-
 # pylint: disable=no-name-in-module
 from dpnp.dpnp_utils import get_usm_allocations
 
diff --git a/dpnp/dpnp_iface_manipulation.py b/dpnp/dpnp_iface_manipulation.py
index 7c9e4957c32b..b5afd9523d67 100644
--- a/dpnp/dpnp_iface_manipulation.py
+++ b/dpnp/dpnp_iface_manipulation.py
@@ -47,16 +47,16 @@
 import dpctl
 import dpctl.tensor as dpt
 import numpy
-from dpctl.tensor._numpy_helper import (
-    AxisError,
-    normalize_axis_index,
-    normalize_axis_tuple,
-)
 
 # TODO: revert to `import dpctl.tensor...`
 # when dpnp fully migrates dpctl/tensor
 import dpctl_ext.tensor as dpt_ext
 import dpnp
+from dpctl_ext.tensor._numpy_helper import (
+    AxisError,
+    normalize_axis_index,
+    normalize_axis_tuple,
+)
 
 from .dpnp_array import dpnp_array
 
diff --git a/dpnp/dpnp_iface_mathematical.py b/dpnp/dpnp_iface_mathematical.py
index 49e65fa10151..c84b61dad4bf 100644
--- a/dpnp/dpnp_iface_mathematical.py
+++ b/dpnp/dpnp_iface_mathematical.py
@@ -50,10 +50,6 @@
 import dpctl.tensor._tensor_elementwise_impl as ti
 import dpctl.utils as dpu
 import numpy
-from dpctl.tensor._numpy_helper import (
-    normalize_axis_index,
-    normalize_axis_tuple,
-)
 
 # TODO: revert to `import dpctl.tensor...`
 # when dpnp fully migrates dpctl/tensor
@@ -62,6 +58,10 @@
 import dpctl_ext.tensor._type_utils as dtu
 import dpnp
 import dpnp.backend.extensions.ufunc._ufunc_impl as ufi
+from dpctl_ext.tensor._numpy_helper import (
+    normalize_axis_index,
+    normalize_axis_tuple,
+)
 
 from .dpnp_algo.dpnp_elementwise_common import (
     DPNPI0,
@@ -4250,8 +4250,8 @@ def prod(
 
 proj = DPNPUnaryFunc(
     "proj",
-    ti._proj_result_type,
-    ti._proj,
+    ti_ext._proj_result_type,
+    ti_ext._proj,
     _PROJ_DOCSTRING,
 )
 
@@ -4313,8 +4313,8 @@ def prod(
 
 real = DPNPReal(
     "real",
-    ti._real_result_type,
-    ti._real,
+    ti_ext._real_result_type,
+    ti_ext._real,
     _REAL_DOCSTRING,
 )
 
@@ -4596,8 +4596,8 @@ def real_if_close(a, tol=100):
 
 round = DPNPRound(
     "round",
-    ti._round_result_type,
-    ti._round,
+    ti_ext._round_result_type,
+    ti_ext._round,
     _ROUND_DOCSTRING,
     mkl_fn_to_call="_mkl_round_to_call",
     mkl_impl_fn="_round",
@@ -4668,8 +4668,8 @@ def real_if_close(a, tol=100):
 
 sign = DPNPUnaryFunc(
     "sign",
-    ti._sign_result_type,
-    ti._sign,
+    ti_ext._sign_result_type,
+    ti_ext._sign,
     _SIGN_DOCSTRING,
     acceptance_fn=acceptance_fn_sign,
 )
@@ -4730,8 +4730,8 @@ def real_if_close(a, tol=100):
 
 signbit = DPNPUnaryFunc(
     "signbit",
-    ti._signbit_result_type,
-    ti._signbit,
+    ti_ext._signbit_result_type,
+    ti_ext._signbit,
     _SIGNBIT_DOCSTRING,
 )
 
@@ -5229,8 +5229,8 @@ def trapezoid(y, x=None, dx=1.0, axis=-1):
 
 trunc = DPNPUnaryFunc(
     "trunc",
-    ti._trunc_result_type,
-    ti._trunc,
+    ti_ext._trunc_result_type,
+    ti_ext._trunc,
     _TRUNC_DOCSTRING,
     mkl_fn_to_call="_mkl_trunc_to_call",
     mkl_impl_fn="_trunc",
diff --git a/dpnp/dpnp_iface_statistics.py b/dpnp/dpnp_iface_statistics.py
index 97c6aab14058..75fe215837b9 100644
--- a/dpnp/dpnp_iface_statistics.py
+++ b/dpnp/dpnp_iface_statistics.py
@@ -45,7 +45,6 @@
 import dpctl.tensor._tensor_elementwise_impl as ti
 import dpctl.utils as dpu
 import numpy
-from dpctl.tensor._numpy_helper import normalize_axis_index
 
 # TODO: revert to `import dpctl.tensor...`
 # when dpnp fully migrates dpctl/tensor
@@ -54,6 +53,7 @@
 
 # pylint: disable=no-name-in-module
 import dpnp.backend.extensions.statistics._statistics_impl as statistics_ext
+from dpctl_ext.tensor._numpy_helper import normalize_axis_index
 from dpnp.dpnp_utils.dpnp_utils_common import (
     result_type_for_device,
     to_supported_dtypes,
diff --git a/dpnp/dpnp_iface_trigonometric.py b/dpnp/dpnp_iface_trigonometric.py
index d459a3392311..6deab3a8876c 100644
--- a/dpnp/dpnp_iface_trigonometric.py
+++ b/dpnp/dpnp_iface_trigonometric.py
@@ -718,8 +718,8 @@ def _get_accumulation_res_dt(a, dtype):
 
 cbrt = DPNPUnaryFunc(
     "cbrt",
-    ti._cbrt_result_type,
-    ti._cbrt,
+    ti_ext._cbrt_result_type,
+    ti_ext._cbrt,
     _CBRT_DOCSTRING,
     mkl_fn_to_call="_mkl_cbrt_to_call",
     mkl_impl_fn="_cbrt",
@@ -1187,8 +1187,8 @@ def cumlogsumexp(
 
 exp2 = DPNPUnaryFunc(
     "exp2",
-    ti._exp2_result_type,
-    ti._exp2,
+    ti_ext._exp2_result_type,
+    ti_ext._exp2,
     _EXP2_DOCSTRING,
     mkl_fn_to_call="_mkl_exp2_to_call",
     mkl_impl_fn="_exp2",
@@ -2107,8 +2107,8 @@ def logsumexp(x, /, *, axis=None, dtype=None, keepdims=False, out=None):
 
 reciprocal = DPNPUnaryFunc(
     "reciprocal",
-    ti._reciprocal_result_type,
-    ti._reciprocal,
+    ti_ext._reciprocal_result_type,
+    ti_ext._reciprocal,
     _RECIPROCAL_DOCSTRING,
     mkl_fn_to_call="_mkl_inv_to_call",
     mkl_impl_fn="_inv",
@@ -2252,8 +2252,8 @@ def reduce_hypot(x, /, *, axis=None, dtype=None, keepdims=False, out=None):
 
 rsqrt = DPNPUnaryFunc(
     "rsqrt",
-    ti._rsqrt_result_type,
-    ti._rsqrt,
+    ti_ext._rsqrt_result_type,
+    ti_ext._rsqrt,
     _RSQRT_DOCSTRING,
 )
 
@@ -2309,8 +2309,8 @@ def reduce_hypot(x, /, *, axis=None, dtype=None, keepdims=False, out=None):
 
 sin = DPNPUnaryFunc(
     "sin",
-    ti._sin_result_type,
-    ti._sin,
+    ti_ext._sin_result_type,
+    ti_ext._sin,
     _SIN_DOCSTRING,
     mkl_fn_to_call="_mkl_sin_to_call",
     mkl_impl_fn="_sin",
@@ -2372,8 +2372,8 @@ def reduce_hypot(x, /, *, axis=None, dtype=None, keepdims=False, out=None):
 
 sinh = DPNPUnaryFunc(
     "sinh",
-    ti._sinh_result_type,
-    ti._sinh,
+    ti_ext._sinh_result_type,
+    ti_ext._sinh,
     _SINH_DOCSTRING,
     mkl_fn_to_call="_mkl_sinh_to_call",
     mkl_impl_fn="_sinh",
@@ -2449,8 +2449,8 @@ def reduce_hypot(x, /, *, axis=None, dtype=None, keepdims=False, out=None):
 
 sqrt = DPNPUnaryFunc(
     "sqrt",
-    ti._sqrt_result_type,
-    ti._sqrt,
+    ti_ext._sqrt_result_type,
+    ti_ext._sqrt,
     _SQRT_DOCSTRING,
     mkl_fn_to_call="_mkl_sqrt_to_call",
     mkl_impl_fn="_sqrt",
@@ -2508,8 +2508,8 @@ def reduce_hypot(x, /, *, axis=None, dtype=None, keepdims=False, out=None):
 
 square = DPNPUnaryFunc(
     "square",
-    ti._square_result_type,
-    ti._square,
+    ti_ext._square_result_type,
+    ti_ext._square,
     _SQUARE_DOCSTRING,
     mkl_fn_to_call="_mkl_sqr_to_call",
     mkl_impl_fn="_sqr",
@@ -2567,8 +2567,8 @@ def reduce_hypot(x, /, *, axis=None, dtype=None, keepdims=False, out=None):
 
 tan = DPNPUnaryFunc(
     "tan",
-    ti._tan_result_type,
-    ti._tan,
+    ti_ext._tan_result_type,
+    ti_ext._tan,
     _TAN_DOCSTRING,
     mkl_fn_to_call="_mkl_tan_to_call",
     mkl_impl_fn="_tan",
@@ -2632,8 +2632,8 @@ def reduce_hypot(x, /, *, axis=None, dtype=None, keepdims=False, out=None):
 
 tanh = DPNPUnaryFunc(
     "tanh",
-    ti._tanh_result_type,
-    ti._tanh,
+    ti_ext._tanh_result_type,
+    ti_ext._tanh,
     _TANH_DOCSTRING,
     mkl_fn_to_call="_mkl_tanh_to_call",
     mkl_impl_fn="_tanh",
diff --git a/dpnp/dpnp_utils/dpnp_utils_linearalgebra.py b/dpnp/dpnp_utils/dpnp_utils_linearalgebra.py
index 9ad97742ee18..b01f57eaecdd 100644
--- a/dpnp/dpnp_utils/dpnp_utils_linearalgebra.py
+++ b/dpnp/dpnp_utils/dpnp_utils_linearalgebra.py
@@ -30,11 +30,6 @@
 import dpctl.tensor as dpt
 import dpctl.utils as dpu
 import numpy
-from dpctl.tensor._numpy_helper import (
-    AxisError,
-    normalize_axis_index,
-    normalize_axis_tuple,
-)
 from dpctl.utils import ExecutionPlacementError
 
 # pylint: disable=no-name-in-module
@@ -43,6 +38,11 @@
 import dpctl_ext.tensor._tensor_impl as ti
 import dpnp
 import dpnp.backend.extensions.blas._blas_impl as bi
+from dpctl_ext.tensor._numpy_helper import (
+    AxisError,
+    normalize_axis_index,
+    normalize_axis_tuple,
+)
 from dpnp.dpnp_array import dpnp_array
 from dpnp.dpnp_utils import get_usm_allocations
 
diff --git a/dpnp/dpnp_utils/dpnp_utils_statistics.py b/dpnp/dpnp_utils/dpnp_utils_statistics.py
index 3a3bc04a31af..ec67b619a13f 100644
--- a/dpnp/dpnp_utils/dpnp_utils_statistics.py
+++ b/dpnp/dpnp_utils/dpnp_utils_statistics.py
@@ -30,10 +30,13 @@
 
 import dpctl
 import dpctl.tensor as dpt
-from dpctl.tensor._numpy_helper import normalize_axis_tuple
 from dpctl.utils import ExecutionPlacementError
 
 import dpnp
+
+# TODO: revert to `from dpctl.tensor...`
+# when dpnp fully migrates dpctl/tensor
+from dpctl_ext.tensor._numpy_helper import normalize_axis_tuple
 from dpnp.dpnp_array import dpnp_array
 
 __all__ = ["dpnp_cov", "dpnp_median"]
diff --git a/dpnp/fft/dpnp_utils_fft.py b/dpnp/fft/dpnp_utils_fft.py
index e3f35a0201ec..20d0dcd0cff2 100644
--- a/dpnp/fft/dpnp_utils_fft.py
+++ b/dpnp/fft/dpnp_utils_fft.py
@@ -44,10 +44,6 @@
 import dpctl
 import dpctl.utils as dpu
 import numpy
-from dpctl.tensor._numpy_helper import (
-    normalize_axis_index,
-    normalize_axis_tuple,
-)
 from dpctl.utils import ExecutionPlacementError
 
 # TODO: revert to `import dpctl.tensor...`
@@ -55,6 +51,10 @@
 import dpctl_ext.tensor._tensor_impl as ti
 import dpnp
 import dpnp.backend.extensions.fft._fft_impl as fi
+from dpctl_ext.tensor._numpy_helper import (
+    normalize_axis_index,
+    normalize_axis_tuple,
+)
 
 from ..dpnp_array import dpnp_array
 from ..dpnp_utils import map_dtype_to_device
diff --git a/dpnp/linalg/dpnp_iface_linalg.py b/dpnp/linalg/dpnp_iface_linalg.py
index 6959565ecf17..f4e0f96da5e6 100644
--- a/dpnp/linalg/dpnp_iface_linalg.py
+++ b/dpnp/linalg/dpnp_iface_linalg.py
@@ -45,9 +45,12 @@
 from typing import NamedTuple
 
 import numpy
-from dpctl.tensor._numpy_helper import normalize_axis_tuple
 
 import dpnp
+
+# TODO: revert to `from dpctl.tensor...`
+# when dpnp fully migrates dpctl/tensor
+from dpctl_ext.tensor._numpy_helper import normalize_axis_tuple
 from dpnp.backend.extensions.lapack._lapack_impl import LinAlgError
 
 from .dpnp_utils_linalg import (
diff --git a/dpnp/linalg/dpnp_utils_linalg.py b/dpnp/linalg/dpnp_utils_linalg.py
index c6897e7b0614..28e11f6188c5 100644
--- a/dpnp/linalg/dpnp_utils_linalg.py
+++ b/dpnp/linalg/dpnp_utils_linalg.py
@@ -44,7 +44,6 @@
 
 import dpctl.utils as dpu
 import numpy
-from dpctl.tensor._numpy_helper import normalize_axis_index
 from numpy import prod
 
 # pylint: disable=no-name-in-module
@@ -53,6 +52,7 @@
 import dpctl_ext.tensor._tensor_impl as ti
 import dpnp
 import dpnp.backend.extensions.lapack._lapack_impl as li
+from dpctl_ext.tensor._numpy_helper import normalize_axis_index
 from dpnp.dpnp_utils import get_usm_allocations
 
 
diff --git a/dpnp/tests/test_arraycreation.py b/dpnp/tests/test_arraycreation.py
index 698e22b9f873..8d89f2a42ca8 100644
--- a/dpnp/tests/test_arraycreation.py
+++ b/dpnp/tests/test_arraycreation.py
@@ -2,7 +2,6 @@
 from math import prod
 
 import dpctl
-import dpctl.tensor as dpt
 import numpy
 import pytest
 from numpy.testing import (
@@ -15,7 +14,7 @@
 
 # TODO: revert to `import dpctl.tensor...`
 # when dpnp fully migrates dpctl/tensor
-import dpctl_ext.tensor as dpt_ext
+import dpctl_ext.tensor as dpt
 import dpnp
 
 from .helper import (
@@ -668,7 +667,7 @@ def test_tri_default_dtype():
         5,
         numpy.array(1),
         dpnp.array(2),
-        dpt_ext.asarray(3),
+        dpt.asarray(3),
     ],
     ids=[
         "-3",
@@ -682,7 +681,7 @@ def test_tri_default_dtype():
         "5",
         "np.array(1)",
         "dpnp.array(2)",
-        "dpt_ext.asarray(3)",
+        "dpt.asarray(3)",
     ],
 )
 @pytest.mark.parametrize(
@@ -725,7 +724,7 @@ def test_tril(m, k, dtype):
         5,
         numpy.array(1),
         dpnp.array(2),
-        dpt_ext.asarray(3),
+        dpt.asarray(3),
     ],
     ids=[
         "-3",
@@ -739,7 +738,7 @@ def test_tril(m, k, dtype):
         "5",
         "np.array(1)",
         "dpnp.array(2)",
-        "dpt_ext.asarray(3)",
+        "dpt.asarray(3)",
     ],
 )
 @pytest.mark.parametrize(
@@ -972,7 +971,7 @@ def test_ones_like(array, dtype, order):
     ],
 )
 def test_dpctl_tensor_input(func, args):
-    x0 = dpt_ext.reshape(dpt_ext.arange(9), (3, 3))
+    x0 = dpt.reshape(dpt.arange(9), (3, 3))
     new_args = [eval(val, {"x0": x0}) for val in args]
     X = getattr(dpt, func)(*new_args)
     Y = getattr(dpnp, func)(*new_args)
diff --git a/dpnp/tests/test_arraymanipulation.py b/dpnp/tests/test_arraymanipulation.py
index 7d5d2efeebb1..f7df6387caf6 100644
--- a/dpnp/tests/test_arraymanipulation.py
+++ b/dpnp/tests/test_arraymanipulation.py
@@ -1,6 +1,5 @@
 import numpy
 import pytest
-from dpctl.tensor._numpy_helper import AxisError
 from numpy.testing import assert_array_equal, assert_equal, assert_raises
 
 # TODO: revert to `import dpctl.tensor...`
@@ -8,6 +7,10 @@
 import dpctl_ext.tensor as dpt
 import dpnp
 
+# TODO: revert to `from dpctl.tensor...`
+# when dpnp fully migrates dpctl/tensor
+from dpctl_ext.tensor._numpy_helper import AxisError
+
 from .helper import get_all_dtypes, get_float_complex_dtypes
 from .third_party.cupy import testing
 
diff --git a/dpnp/tests/test_counting.py b/dpnp/tests/test_counting.py
index 762abd58b687..9210e7c1b3dd 100644
--- a/dpnp/tests/test_counting.py
+++ b/dpnp/tests/test_counting.py
@@ -1,6 +1,5 @@
 import numpy
 import pytest
-from dpctl.tensor._numpy_helper import AxisError
 from numpy.testing import (
     assert_allclose,
     assert_equal,
@@ -9,6 +8,10 @@
 
 import dpnp
 
+# TODO: revert to `from dpctl.tensor...`
+# when dpnp fully migrates dpctl/tensor
+from dpctl_ext.tensor._numpy_helper import AxisError
+
 from .helper import (
     get_all_dtypes,
     get_float_dtypes,
diff --git a/dpnp/tests/test_flipping.py b/dpnp/tests/test_flipping.py
index cc84242f4557..cd55846e3668 100644
--- a/dpnp/tests/test_flipping.py
+++ b/dpnp/tests/test_flipping.py
@@ -2,13 +2,16 @@
 
 import numpy
 import pytest
-from dpctl.tensor._numpy_helper import AxisError
 from numpy.testing import (
     assert_equal,
 )
 
 import dpnp
 
+# TODO: revert to `from dpctl.tensor...`
+# when dpnp fully migrates dpctl/tensor
+from dpctl_ext.tensor._numpy_helper import AxisError
+
 from .helper import (
     get_all_dtypes,
 )
diff --git a/dpnp/tests/test_linalg.py b/dpnp/tests/test_linalg.py
index b9673d21a161..dfd6e21c2a95 100644
--- a/dpnp/tests/test_linalg.py
+++ b/dpnp/tests/test_linalg.py
@@ -3,7 +3,6 @@
 import dpctl
 import numpy
 import pytest
-from dpctl.tensor._numpy_helper import AxisError
 from dpctl.utils import ExecutionPlacementError
 from numpy.testing import (
     assert_allclose,
@@ -18,6 +17,10 @@
 import dpctl_ext.tensor as dpt
 import dpnp
 
+# TODO: revert to `from dpctl.tensor...`
+# when dpnp fully migrates dpctl/tensor
+from dpctl_ext.tensor._numpy_helper import AxisError
+
 from .helper import (
     assert_dtype_allclose,
     generate_random_numpy_array,
diff --git a/dpnp/tests/test_manipulation.py b/dpnp/tests/test_manipulation.py
index 8095a0daa858..d30c08a65f1e 100644
--- a/dpnp/tests/test_manipulation.py
+++ b/dpnp/tests/test_manipulation.py
@@ -2,7 +2,6 @@
 
 import numpy
 import pytest
-from dpctl.tensor._numpy_helper import AxisError
 from numpy.testing import (
     assert_array_equal,
     assert_equal,
@@ -14,6 +13,10 @@
 import dpctl_ext.tensor as dpt
 import dpnp
 
+# TODO: revert to `from dpctl.tensor...`
+# when dpnp fully migrates dpctl/tensor
+from dpctl_ext.tensor._numpy_helper import AxisError
+
 from .helper import (
     assert_dtype_allclose,
     get_all_dtypes,
diff --git a/dpnp/tests/test_mathematical.py b/dpnp/tests/test_mathematical.py
index 841494bde1ed..c03787790280 100644
--- a/dpnp/tests/test_mathematical.py
+++ b/dpnp/tests/test_mathematical.py
@@ -2,10 +2,6 @@
 import dpctl.tensor as dpt
 import numpy
 import pytest
-from dpctl.tensor._numpy_helper import (
-    AxisError,
-    normalize_axis_index,
-)
 from dpctl.utils import ExecutionPlacementError
 from numpy.testing import (
     assert_allclose,
@@ -19,6 +15,13 @@
 # when dpnp fully migrates dpctl/tensor
 import dpctl_ext.tensor as dpt_ext
 import dpnp
+
+# TODO: revert to `from dpctl.tensor...`
+# when dpnp fully migrates dpctl/tensor
+from dpctl_ext.tensor._numpy_helper import (
+    AxisError,
+    normalize_axis_index,
+)
 from dpnp.dpnp_array import dpnp_array
 from dpnp.dpnp_utils import map_dtype_to_device
 
diff --git a/dpnp/tests/test_product.py b/dpnp/tests/test_product.py
index afe767a5e5d9..9c2bc54e30b5 100644
--- a/dpnp/tests/test_product.py
+++ b/dpnp/tests/test_product.py
@@ -1,11 +1,14 @@
 import dpctl
 import numpy
 import pytest
-from dpctl.tensor._numpy_helper import AxisError
 from dpctl.utils import ExecutionPlacementError
 from numpy.testing import assert_allclose, assert_array_equal, assert_raises
 
 import dpnp
+
+# TODO: revert to `from dpctl.tensor...`
+# when dpnp fully migrates dpctl/tensor
+from dpctl_ext.tensor._numpy_helper import AxisError
 from dpnp.dpnp_utils import map_dtype_to_device
 
 from .helper import (
diff --git a/dpnp/tests/test_search.py b/dpnp/tests/test_search.py
index 05bc56b11d0b..36e0032ccff1 100644
--- a/dpnp/tests/test_search.py
+++ b/dpnp/tests/test_search.py
@@ -1,11 +1,10 @@
-import dpctl.tensor as dpt
 import numpy
 import pytest
 from numpy.testing import assert_array_equal, assert_equal, assert_raises
 
 # TODO: revert to `import dpctl.tensor...`
 # when dpnp fully migrates dpctl/tensor
-import dpctl_ext.tensor as dpt_ext
+import dpctl_ext.tensor as dpt
 import dpnp
 
 from .helper import (
@@ -39,7 +38,7 @@ def test_out(self, func):
         assert_array_equal(result, expected)
 
         # out is usm_ndarray
-        dpt_out = dpt_ext.empty(expected.shape, dtype=expected.dtype)
+        dpt_out = dpt.empty(expected.shape, dtype=expected.dtype)
         result = getattr(dpnp, func)(ia, axis=0, out=dpt_out)
         assert dpt_out is result.get_array()
         assert_array_equal(result, expected)
diff --git a/dpnp/tests/test_sort.py b/dpnp/tests/test_sort.py
index 5e883c575f85..73eac4064892 100644
--- a/dpnp/tests/test_sort.py
+++ b/dpnp/tests/test_sort.py
@@ -1,10 +1,13 @@
 import numpy
 import pytest
-from dpctl.tensor._numpy_helper import AxisError
 from numpy.testing import assert_array_equal, assert_equal, assert_raises
 
 import dpnp
 
+# TODO: revert to `from dpctl.tensor...`
+# when dpnp fully migrates dpctl/tensor
+from dpctl_ext.tensor._numpy_helper import AxisError
+
 from .helper import (
     assert_dtype_allclose,
     generate_random_numpy_array,
diff --git a/dpnp/tests/third_party/cupy/core_tests/test_ndarray.py b/dpnp/tests/third_party/cupy/core_tests/test_ndarray.py
index 95d753c90473..085261317ead 100644
--- a/dpnp/tests/third_party/cupy/core_tests/test_ndarray.py
+++ b/dpnp/tests/third_party/cupy/core_tests/test_ndarray.py
@@ -6,13 +6,16 @@
 import dpctl
 import numpy
 import pytest
-from dpctl.tensor._numpy_helper import AxisError
 
 # from cupy_backends.cuda.api import driver
 # from cupy_backends.cuda.api import runtime
 # from cupy_backends.cuda import stream as stream_module
 import dpnp as cupy
 
+# TODO: revert to `from dpctl.tensor...`
+# when dpnp fully migrates dpctl/tensor
+from dpctl_ext.tensor._numpy_helper import AxisError
+
 # from cupy import _util
 # from cupy import _core
 # from cupy import cuda
diff --git a/dpnp/tests/third_party/cupy/lib_tests/test_shape_base.py b/dpnp/tests/third_party/cupy/lib_tests/test_shape_base.py
index c241824fa81d..a1309f3ed83d 100644
--- a/dpnp/tests/third_party/cupy/lib_tests/test_shape_base.py
+++ b/dpnp/tests/third_party/cupy/lib_tests/test_shape_base.py
@@ -2,9 +2,12 @@
 
 import numpy
 import pytest
-from dpctl.tensor._numpy_helper import AxisError
 
 import dpnp as cupy
+
+# TODO: revert to `from dpctl.tensor...`
+# when dpnp fully migrates dpctl/tensor
+from dpctl_ext.tensor._numpy_helper import AxisError
 from dpnp.tests.helper import has_support_aspect64
 from dpnp.tests.third_party.cupy import testing
 
diff --git a/dpnp/tests/third_party/cupy/manipulation_tests/test_dims.py b/dpnp/tests/third_party/cupy/manipulation_tests/test_dims.py
index 7355d07e1d9b..8944a6b944c9 100644
--- a/dpnp/tests/third_party/cupy/manipulation_tests/test_dims.py
+++ b/dpnp/tests/third_party/cupy/manipulation_tests/test_dims.py
@@ -2,9 +2,12 @@
 
 import numpy
 import pytest
-from dpctl.tensor._numpy_helper import AxisError
 
 import dpnp as cupy
+
+# TODO: revert to `from dpctl.tensor...`
+# when dpnp fully migrates dpctl/tensor
+from dpctl_ext.tensor._numpy_helper import AxisError
 from dpnp.tests.third_party.cupy import testing
 
 
diff --git a/dpnp/tests/third_party/cupy/manipulation_tests/test_transpose.py b/dpnp/tests/third_party/cupy/manipulation_tests/test_transpose.py
index 7e7a62dce52a..0f6bed1c2ced 100644
--- a/dpnp/tests/third_party/cupy/manipulation_tests/test_transpose.py
+++ b/dpnp/tests/third_party/cupy/manipulation_tests/test_transpose.py
@@ -2,9 +2,12 @@
 
 import numpy
 import pytest
-from dpctl.tensor._numpy_helper import AxisError
 
 import dpnp as cupy
+
+# TODO: revert to `from dpctl.tensor...`
+# when dpnp fully migrates dpctl/tensor
+from dpctl_ext.tensor._numpy_helper import AxisError
 from dpnp.tests.third_party.cupy import testing
 
 
diff --git a/dpnp/tests/third_party/cupy/math_tests/test_sumprod.py b/dpnp/tests/third_party/cupy/math_tests/test_sumprod.py
index b8f98456a13a..cb7200c1b13b 100644
--- a/dpnp/tests/third_party/cupy/math_tests/test_sumprod.py
+++ b/dpnp/tests/third_party/cupy/math_tests/test_sumprod.py
@@ -2,9 +2,12 @@
 
 import numpy
 import pytest
-from dpctl.tensor._numpy_helper import AxisError
 
 import dpnp as cupy
+
+# TODO: revert to `from dpctl.tensor...`
+# when dpnp fully migrates dpctl/tensor
+from dpctl_ext.tensor._numpy_helper import AxisError
 from dpnp.tests.helper import (
     has_support_aspect16,
     has_support_aspect64,
diff --git a/dpnp/tests/third_party/cupy/sorting_tests/test_sort.py b/dpnp/tests/third_party/cupy/sorting_tests/test_sort.py
index 7e0eade13254..8359ba580a25 100644
--- a/dpnp/tests/third_party/cupy/sorting_tests/test_sort.py
+++ b/dpnp/tests/third_party/cupy/sorting_tests/test_sort.py
@@ -4,9 +4,12 @@
 
 import numpy
 import pytest
-from dpctl.tensor._numpy_helper import AxisError
 
 import dpnp as cupy
+
+# TODO: revert to `from dpctl.tensor...`
+# when dpnp fully migrates dpctl/tensor
+from dpctl_ext.tensor._numpy_helper import AxisError
 from dpnp.tests.helper import has_support_aspect64
 from dpnp.tests.third_party.cupy import testing
 
diff --git a/dpnp/tests/third_party/cupy/statistics_tests/test_meanvar.py b/dpnp/tests/third_party/cupy/statistics_tests/test_meanvar.py
index bf5d37df2fba..d355d18985f2 100644
--- a/dpnp/tests/third_party/cupy/statistics_tests/test_meanvar.py
+++ b/dpnp/tests/third_party/cupy/statistics_tests/test_meanvar.py
@@ -2,9 +2,12 @@
 
 import numpy
 import pytest
-from dpctl.tensor._numpy_helper import AxisError
 
 import dpnp as cupy
+
+# TODO: revert to `from dpctl.tensor...`
+# when dpnp fully migrates dpctl/tensor
+from dpctl_ext.tensor._numpy_helper import AxisError
 from dpnp.tests.helper import has_support_aspect16, has_support_aspect64
 from dpnp.tests.third_party.cupy import testing
 
diff --git a/dpnp/tests/third_party/cupy/testing/_loops.py b/dpnp/tests/third_party/cupy/testing/_loops.py
index 63cd09147c4b..66c243a3d7f7 100644
--- a/dpnp/tests/third_party/cupy/testing/_loops.py
+++ b/dpnp/tests/third_party/cupy/testing/_loops.py
@@ -10,9 +10,12 @@
 import numpy
 import pytest
 from dpctl import select_default_device
-from dpctl.tensor._numpy_helper import AxisError
 
 import dpnp as cupy
+
+# TODO: revert to `from dpctl.tensor...`
+# when dpnp fully migrates dpctl/tensor
+from dpctl_ext.tensor._numpy_helper import AxisError
 from dpnp.tests import config
 from dpnp.tests.third_party.cupy.testing import _array, _parameterized
 from dpnp.tests.third_party.cupy.testing._pytest_impl import is_available