diff --git a/dpctl_ext/tensor/__init__.py b/dpctl_ext/tensor/__init__.py
index 71ef714c642..7a6923169c1 100644
--- a/dpctl_ext/tensor/__init__.py
+++ b/dpctl_ext/tensor/__init__.py
@@ -179,6 +179,8 @@
     unique_values,
 )
 from ._sorting import argsort, sort, top_k
+from ._statistical_functions import mean, std, var
+from ._testing import allclose
 from ._type_utils import can_cast, finfo, iinfo, isdtype, result_type
 from ._utility_functions import all, any, diff
 
@@ -188,6 +190,7 @@
     "acosh",
     "add",
     "all",
+    "allclose",
     "angle",
     "any",
     "arange",
@@ -267,6 +270,7 @@
     "log10",
     "max",
     "maximum",
+    "mean",
     "meshgrid",
     "min",
     "minimum",
@@ -308,6 +312,7 @@
     "square",
     "squeeze",
     "stack",
+    "std",
     "subtract",
     "sum",
     "swapaxes",
@@ -327,6 +332,7 @@
     "unique_inverse",
     "unique_values",
     "unstack",
+    "var",
     "vecdot",
     "where",
     "zeros",
diff --git a/dpctl_ext/tensor/_clip.py b/dpctl_ext/tensor/_clip.py
index ef07269c4ea..c21d601966b 100644
--- a/dpctl_ext/tensor/_clip.py
+++ b/dpctl_ext/tensor/_clip.py
@@ -28,12 +28,12 @@
 
 import dpctl
 import dpctl.tensor as dpt
-import dpctl.tensor._tensor_elementwise_impl as tei
 from dpctl.utils import ExecutionPlacementError, SequentialOrderManager
 
 # TODO: revert to `import dpctl.tensor...`
 # when dpnp fully migrates dpctl/tensor
 import dpctl_ext.tensor as dpt_ext
+import dpctl_ext.tensor._tensor_elementwise_impl as tei
 import dpctl_ext.tensor._tensor_impl as ti
 
 from ._copy_utils import (
diff --git a/dpctl_ext/tensor/_ctors.py b/dpctl_ext/tensor/_ctors.py
index 0b7650873fe..21c3d007718 100644
--- a/dpctl_ext/tensor/_ctors.py
+++ b/dpctl_ext/tensor/_ctors.py
@@ -361,7 +361,7 @@ def _copy_through_host_walker(seq_o, usm_res):
             )
             is None
         ):
-            usm_res[...] = dpt.asnumpy(seq_o).copy()
+            usm_res[...] = dpt_ext.asnumpy(seq_o).copy()
             return
         else:
             usm_res[...] = seq_o
diff --git a/dpctl_ext/tensor/_reduction.py b/dpctl_ext/tensor/_reduction.py
index b8fdcf4f37e..2daf07b81d8 100644
--- a/dpctl_ext/tensor/_reduction.py
+++ b/dpctl_ext/tensor/_reduction.py
@@ -506,7 +506,7 @@ def count_nonzero(x, /, *, axis=None, keepdims=False, out=None):
             type.
     """
     if x.dtype != dpt.bool:
-        x = dpt.astype(x, dpt.bool, copy=False)
+        x = dpt_ext.astype(x, dpt.bool, copy=False)
     return sum(
         x,
         axis=axis,
diff --git a/dpctl_ext/tensor/_set_functions.py b/dpctl_ext/tensor/_set_functions.py
index 93f81f044fd..2672e082d18 100644
--- a/dpctl_ext/tensor/_set_functions.py
+++ b/dpctl_ext/tensor/_set_functions.py
@@ -30,11 +30,11 @@
 
 import dpctl.tensor as dpt
 import dpctl.utils as du
-from dpctl.tensor._tensor_elementwise_impl import _not_equal, _subtract
 
 # TODO: revert to `import dpctl.tensor...`
 # when dpnp fully migrates dpctl/tensor
 import dpctl_ext.tensor as dpt_ext
+from dpctl_ext.tensor._tensor_elementwise_impl import _not_equal, _subtract
 
 from ._copy_utils import _empty_like_orderK
 from ._scalar_utils import (
diff --git a/dpctl_ext/tensor/_statistical_functions.py b/dpctl_ext/tensor/_statistical_functions.py
new file mode 100644
index 00000000000..5513dfa7a65
--- /dev/null
+++ b/dpctl_ext/tensor/_statistical_functions.py
@@ -0,0 +1,384 @@
+# *****************************************************************************
+# Copyright (c) 2026, Intel Corporation
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+# - Redistributions of source code must retain the above copyright notice,
+#   this list of conditions and the following disclaimer.
+# - Redistributions in binary form must reproduce the above copyright notice,
+#   this list of conditions and the following disclaimer in the documentation
+#   and/or other materials provided with the distribution.
+# - Neither the name of the copyright holder nor the names of its contributors
+#   may be used to endorse or promote products derived from this software
+#   without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+# THE POSSIBILITY OF SUCH DAMAGE.
+
+import dpctl.tensor as dpt
+import dpctl.utils as du
+
+# TODO: revert to `import dpctl.tensor...`
+# when dpnp fully migrates dpctl/tensor
+import dpctl_ext.tensor as dpt_ext
+import dpctl_ext.tensor._tensor_elementwise_impl as tei
+import dpctl_ext.tensor._tensor_impl as ti
+import dpctl_ext.tensor._tensor_reductions_impl as tri
+
+from ._numpy_helper import normalize_axis_tuple
+
+
+def _var_impl(x, axis, correction, keepdims):
+    nd = x.ndim
+    if axis is None:
+        axis = tuple(range(nd))
+    if not isinstance(axis, (tuple, list)):
+        axis = (axis,)
+    axis = normalize_axis_tuple(axis, nd, "axis")
+    perm = []
+    nelems = 1
+    for i in range(nd):
+        if i not in axis:
+            perm.append(i)
+        else:
+            nelems *= x.shape[i]
+    red_nd = len(axis)
+    perm = perm + list(axis)
+    q = x.sycl_queue
+    inp_dt = x.dtype
+    res_dt = (
+        inp_dt
+        if inp_dt.kind == "f"
+        else dpt.dtype(ti.default_device_fp_type(q))
+    )
+    res_usm_type = x.usm_type
+
+    _manager = du.SequentialOrderManager[q]
+    dep_evs = _manager.submitted_events
+    if inp_dt != res_dt:
+        buf = dpt_ext.empty_like(x, dtype=res_dt)
+        ht_e_buf, c_e1 = ti._copy_usm_ndarray_into_usm_ndarray(
+            src=x, dst=buf, sycl_queue=q, depends=dep_evs
+        )
+        _manager.add_event_pair(ht_e_buf, c_e1)
+    else:
+        buf = x
+    # calculate mean
+    buf2 = dpt_ext.permute_dims(buf, perm)
+    res_shape = buf2.shape[: nd - red_nd]
+    # use keepdims=True path for later broadcasting
+    if red_nd == 0:
+        mean_ary = dpt_ext.empty_like(buf)
+        dep_evs = _manager.submitted_events
+        ht_e1, c_e2 = ti._copy_usm_ndarray_into_usm_ndarray(
+            src=buf, dst=mean_ary, sycl_queue=q, depends=dep_evs
+        )
+        _manager.add_event_pair(ht_e1, c_e2)
+    else:
+        mean_ary = dpt_ext.empty(
+            res_shape,
+            dtype=res_dt,
+            usm_type=res_usm_type,
+            sycl_queue=q,
+        )
+        dep_evs = _manager.submitted_events
+        ht_e1, r_e1 = tri._sum_over_axis(
+            src=buf2,
+            trailing_dims_to_reduce=red_nd,
+            dst=mean_ary,
+            sycl_queue=q,
+            depends=dep_evs,
+        )
+        _manager.add_event_pair(ht_e1, r_e1)
+
+        mean_ary_shape = res_shape + (1,) * red_nd
+        inv_perm = sorted(range(nd), key=lambda d: perm[d])
+        mean_ary = dpt_ext.permute_dims(
+            dpt_ext.reshape(mean_ary, mean_ary_shape), inv_perm
+        )
+    # divide in-place to get mean
+    mean_ary_shape = mean_ary.shape
+
+    dep_evs = _manager.submitted_events
+    ht_e2, d_e1 = tei._divide_by_scalar(
+        src=mean_ary, scalar=nelems, dst=mean_ary, sycl_queue=q, depends=dep_evs
+    )
+    _manager.add_event_pair(ht_e2, d_e1)
+
+    # subtract mean from original array to get deviations
+    dev_ary = dpt_ext.empty_like(buf)
+    if mean_ary_shape != buf.shape:
+        mean_ary = dpt_ext.broadcast_to(mean_ary, buf.shape)
+    ht_e4, su_e = tei._subtract(
+        src1=buf, src2=mean_ary, dst=dev_ary, sycl_queue=q, depends=[d_e1]
+    )
+    _manager.add_event_pair(ht_e4, su_e)
+    # square deviations
+    ht_e5, sq_e = tei._square(
+        src=dev_ary, dst=dev_ary, sycl_queue=q, depends=[su_e]
+    )
+    _manager.add_event_pair(ht_e5, sq_e)
+
+    # take sum of squared deviations
+    dev_ary2 = dpt_ext.permute_dims(dev_ary, perm)
+    if red_nd == 0:
+        res = dev_ary
+    else:
+        res = dpt_ext.empty(
+            res_shape,
+            dtype=res_dt,
+            usm_type=res_usm_type,
+            sycl_queue=q,
+        )
+        ht_e6, r_e2 = tri._sum_over_axis(
+            src=dev_ary2,
+            trailing_dims_to_reduce=red_nd,
+            dst=res,
+            sycl_queue=q,
+            depends=[sq_e],
+        )
+        _manager.add_event_pair(ht_e6, r_e2)
+
+        if keepdims:
+            res_shape = res_shape + (1,) * red_nd
+            inv_perm = sorted(range(nd), key=lambda d: perm[d])
+            res = dpt_ext.permute_dims(
+                dpt_ext.reshape(res, res_shape), inv_perm
+            )
+    res_shape = res.shape
+    # when nelems - correction <= 0, yield nans
+    div = max(nelems - correction, 0)
+    if not div:
+        div = dpt.nan
+    dep_evs = _manager.submitted_events
+    ht_e7, d_e2 = tei._divide_by_scalar(
+        src=res, scalar=div, dst=res, sycl_queue=q, depends=dep_evs
+    )
+    _manager.add_event_pair(ht_e7, d_e2)
+    return res, [d_e2]
+
+
+def mean(x, axis=None, keepdims=False):
+    """mean(x, axis=None, keepdims=False)
+
+    Calculates the arithmetic mean of elements in the input array `x`.
+
+    Args:
+        x (usm_ndarray):
+            input array.
+        axis (Optional[int, Tuple[int, ...]]):
+            axis or axes along which the arithmetic means must be computed. If
+            a tuple of unique integers, the means are computed over multiple
+            axes. If `None`, the mean is computed over the entire array.
+            Default: `None`.
+        keepdims (Optional[bool]):
+            if `True`, the reduced axes (dimensions) are included in the result
+            as singleton dimensions, so that the returned array remains
+            compatible with the input array according to Array Broadcasting
+            rules. Otherwise, if `False`, the reduced axes are not included in
+            the returned array. Default: `False`.
+    Returns:
+        usm_ndarray:
+            an array containing the arithmetic means. If the mean was computed
+            over the entire array, a zero-dimensional array is returned.
+
+            If `x` has a floating-point data type, the returned array will have
+            the same data type as `x`.
+            If `x` has a boolean or integral data type, the returned array
+            will have the default floating point data type for the device
+            where input array `x` is allocated.
+    """
+    if not isinstance(x, dpt.usm_ndarray):
+        raise TypeError(f"Expected dpctl.tensor.usm_ndarray, got {type(x)}")
+    nd = x.ndim
+    if axis is None:
+        axis = tuple(range(nd))
+    if not isinstance(axis, (tuple, list)):
+        axis = (axis,)
+    axis = normalize_axis_tuple(axis, nd, "axis")
+    perm = []
+    nelems = 1
+    for i in range(nd):
+        if i not in axis:
+            perm.append(i)
+        else:
+            nelems *= x.shape[i]
+    sum_nd = len(axis)
+    perm = perm + list(axis)
+    arr2 = dpt_ext.permute_dims(x, perm)
+    res_shape = arr2.shape[: nd - sum_nd]
+    q = x.sycl_queue
+    inp_dt = x.dtype
+    res_dt = (
+        x.dtype
+        if x.dtype.kind in "fc"
+        else dpt.dtype(ti.default_device_fp_type(q))
+    )
+    res_usm_type = x.usm_type
+    if sum_nd == 0:
+        return dpt_ext.astype(x, res_dt, copy=True)
+
+    _manager = du.SequentialOrderManager[q]
+    dep_evs = _manager.submitted_events
+    if tri._sum_over_axis_dtype_supported(inp_dt, res_dt, res_usm_type, q):
+        res = dpt_ext.empty(
+            res_shape, dtype=res_dt, usm_type=res_usm_type, sycl_queue=q
+        )
+        ht_e1, r_e = tri._sum_over_axis(
+            src=arr2,
+            trailing_dims_to_reduce=sum_nd,
+            dst=res,
+            sycl_queue=q,
+            depends=dep_evs,
+        )
+        _manager.add_event_pair(ht_e1, r_e)
+    else:
+        tmp = dpt_ext.empty(
+            arr2.shape, dtype=res_dt, usm_type=res_usm_type, sycl_queue=q
+        )
+        ht_e_cpy, cpy_e = ti._copy_usm_ndarray_into_usm_ndarray(
+            src=arr2, dst=tmp, sycl_queue=q, depends=dep_evs
+        )
+        _manager.add_event_pair(ht_e_cpy, cpy_e)
+        res = dpt_ext.empty(
+            res_shape, dtype=res_dt, usm_type=res_usm_type, sycl_queue=q
+        )
+        ht_e_red, r_e = tri._sum_over_axis(
+            src=tmp,
+            trailing_dims_to_reduce=sum_nd,
+            dst=res,
+            sycl_queue=q,
+            depends=[cpy_e],
+        )
+        _manager.add_event_pair(ht_e_red, r_e)
+
+    if keepdims:
+        res_shape = res_shape + (1,) * sum_nd
+        inv_perm = sorted(range(nd), key=lambda d: perm[d])
+        res = dpt_ext.permute_dims(dpt_ext.reshape(res, res_shape), inv_perm)
+
+    dep_evs = _manager.submitted_events
+    ht_e2, div_e = tei._divide_by_scalar(
+        src=res, scalar=nelems, dst=res, sycl_queue=q, depends=dep_evs
+    )
+    _manager.add_event_pair(ht_e2, div_e)
+    return res
+
+
+def var(x, axis=None, correction=0.0, keepdims=False):
+    """var(x, axis=None, correction=0.0, keepdims=False)
+
+    Calculates the variance of elements in the input array `x`.
+
+    Args:
+        x (usm_ndarray):
+            input array.
+        axis (Optional[int, Tuple[int, ...]]):
+            axis or axes along which the variances must be computed. If a tuple
+            of unique integers, the variances are computed over multiple axes.
+            If `None`, the variance is computed over the entire array.
+            Default: `None`.
+        correction (Optional[float, int]):
+            degrees of freedom adjustment. The divisor used in calculating the
+            variance is `N - correction`, where `N` corresponds to the total
+            number of elements over which the variance is calculated.
+            Default: `0.0`.
+        keepdims (Optional[bool]):
+            if `True`, the reduced axes (dimensions) are included in the result
+            as singleton dimensions, so that the returned array remains
+            compatible with the input array according to Array Broadcasting
+            rules. Otherwise, if `False`, the reduced axes are not included in
+            the returned array. Default: `False`.
+    Returns:
+        usm_ndarray:
+            an array containing the variances. If the variance was computed
+            over the entire array, a zero-dimensional array is returned.
+
+            If `x` has a real-valued floating-point data type, the returned
+            array will have the same data type as `x`.
+            If `x` has a boolean or integral data type, the returned array
+            will have the default floating point data type for the device
+            where input array `x` is allocated.
+    """
+    if not isinstance(x, dpt.usm_ndarray):
+        raise TypeError(f"Expected dpctl.tensor.usm_ndarray, got {type(x)}")
+
+    if not isinstance(correction, (int, float)):
+        raise TypeError(
+            "Expected a Python integer or float for `correction`, got"
+            f"{type(x)}"
+        )
+
+    if x.dtype.kind == "c":
+        raise ValueError("`var` does not support complex types")
+
+    res, _ = _var_impl(x, axis, correction, keepdims)
+    return res
+
+
+def std(x, axis=None, correction=0.0, keepdims=False):
+    """std(x, axis=None, correction=0.0, keepdims=False)
+
+    Calculates the standard deviation of elements in the input array `x`.
+
+    Args:
+        x (usm_ndarray):
+            input array.
+        axis (Optional[int, Tuple[int, ...]]):
+            axis or axes along which the standard deviations must be computed.
+            If a tuple of unique integers, the standard deviations are computed
+            over multiple axes. If `None`, the standard deviation is computed
+            over the entire array. Default: `None`.
+        correction (Optional[float, int]):
+            degrees of freedom adjustment. The divisor used in calculating the
+            standard deviation is `N - correction`, where `N` corresponds to the
+            total number of elements over which the standard deviation is
+            calculated. Default: `0.0`.
+        keepdims (Optional[bool]):
+            if `True`, the reduced axes (dimensions) are included in the result
+            as singleton dimensions, so that the returned array remains
+            compatible with the input array according to Array Broadcasting
+            rules. Otherwise, if `False`, the reduced axes are not included in
+            the returned array. Default: `False`.
+    Returns:
+        usm_ndarray:
+            an array containing the standard deviations. If the standard
+            deviation was computed over the entire array, a zero-dimensional
+            array is returned.
+
+            If `x` has a real-valued floating-point data type, the returned
+            array will have the same data type as `x`.
+            If `x` has a boolean or integral data type, the returned array
+            will have the default floating point data type for the device
+            where input array `x` is allocated.
+    """
+    if not isinstance(x, dpt.usm_ndarray):
+        raise TypeError(f"Expected dpctl.tensor.usm_ndarray, got {type(x)}")
+
+    if not isinstance(correction, (int, float)):
+        raise TypeError(
+            "Expected a Python integer or float for `correction`,"
+            f"got {type(x)}"
+        )
+
+    if x.dtype.kind == "c":
+        raise ValueError("`std` does not support complex types")
+
+    exec_q = x.sycl_queue
+    _manager = du.SequentialOrderManager[exec_q]
+    res, deps = _var_impl(x, axis, correction, keepdims)
+    ht_ev, sqrt_ev = tei._sqrt(
+        src=res, dst=res, sycl_queue=exec_q, depends=deps
+    )
+    _manager.add_event_pair(ht_ev, sqrt_ev)
+    return res
diff --git a/dpctl_ext/tensor/_testing.py b/dpctl_ext/tensor/_testing.py
new file mode 100644
index 00000000000..5c7e9be0e2e
--- /dev/null
+++ b/dpctl_ext/tensor/_testing.py
@@ -0,0 +1,175 @@
+# *****************************************************************************
+# Copyright (c) 2026, Intel Corporation
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+# - Redistributions of source code must retain the above copyright notice,
+#   this list of conditions and the following disclaimer.
+# - Redistributions in binary form must reproduce the above copyright notice,
+#   this list of conditions and the following disclaimer in the documentation
+#   and/or other materials provided with the distribution.
+# - Neither the name of the copyright holder nor the names of its contributors
+#   may be used to endorse or promote products derived from this software
+#   without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+# THE POSSIBILITY OF SUCH DAMAGE.
+# *****************************************************************************
+
+import dpctl.tensor as dpt
+import dpctl.utils as du
+import numpy as np
+
+# TODO: revert to `import dpctl.tensor...`
+# when dpnp fully migrates dpctl/tensor
+import dpctl_ext.tensor as dpt_ext
+
+from ._manipulation_functions import _broadcast_shape_impl
+from ._type_utils import _to_device_supported_dtype
+
+
+def _allclose_complex_fp(z1, z2, atol, rtol, equal_nan):
+    z1r = dpt.real(z1)
+    z1i = dpt.imag(z1)
+    z2r = dpt.real(z2)
+    z2i = dpt.imag(z2)
+    if equal_nan:
+        check1 = dpt_ext.all(
+            dpt_ext.isnan(z1r) == dpt_ext.isnan(z2r)
+        ) and dpt_ext.all(dpt_ext.isnan(z1i) == dpt_ext.isnan(z2i))
+    else:
+        check1 = (
+            dpt_ext.logical_not(dpt_ext.any(dpt_ext.isnan(z1r)))
+            and dpt_ext.logical_not(dpt_ext.any(dpt_ext.isnan(z1i)))
+        ) and (
+            dpt_ext.logical_not(dpt_ext.any(dpt_ext.isnan(z2r)))
+            and dpt_ext.logical_not(dpt_ext.any(dpt_ext.isnan(z2i)))
+        )
+    if not check1:
+        return check1
+    mr = dpt_ext.isinf(z1r)
+    mi = dpt_ext.isinf(z1i)
+    check2 = dpt_ext.all(mr == dpt_ext.isinf(z2r)) and dpt_ext.all(
+        mi == dpt_ext.isinf(z2i)
+    )
+    if not check2:
+        return check2
+    check3 = dpt_ext.all(z1r[mr] == z2r[mr]) and dpt_ext.all(z1i[mi] == z2i[mi])
+    if not check3:
+        return check3
+    mr = dpt_ext.isfinite(z1r)
+    mi = dpt_ext.isfinite(z1i)
+    mv1 = z1r[mr]
+    mv2 = z2r[mr]
+    check4 = dpt_ext.all(
+        dpt_ext.abs(mv1 - mv2)
+        < dpt_ext.maximum(
+            atol, rtol * dpt_ext.maximum(dpt_ext.abs(mv1), dpt_ext.abs(mv2))
+        )
+    )
+    if not check4:
+        return check4
+    mv1 = z1i[mi]
+    mv2 = z2i[mi]
+    check5 = dpt_ext.all(
+        dpt_ext.abs(mv1 - mv2)
+        <= dpt_ext.maximum(
+            atol, rtol * dpt_ext.maximum(dpt_ext.abs(mv1), dpt_ext.abs(mv2))
+        )
+    )
+    return check5
+
+
+def _allclose_real_fp(r1, r2, atol, rtol, equal_nan):
+    if equal_nan:
+        check1 = dpt_ext.all(dpt_ext.isnan(r1) == dpt_ext.isnan(r2))
+    else:
+        check1 = dpt_ext.logical_not(
+            dpt_ext.any(dpt_ext.isnan(r1))
+        ) and dpt_ext.logical_not(dpt_ext.any(dpt_ext.isnan(r2)))
+    if not check1:
+        return check1
+    mr = dpt_ext.isinf(r1)
+    check2 = dpt_ext.all(mr == dpt_ext.isinf(r2))
+    if not check2:
+        return check2
+    check3 = dpt_ext.all(r1[mr] == r2[mr])
+    if not check3:
+        return check3
+    m = dpt_ext.isfinite(r1)
+    mv1 = r1[m]
+    mv2 = r2[m]
+    check4 = dpt_ext.all(
+        dpt_ext.abs(mv1 - mv2)
+        <= dpt_ext.maximum(
+            atol, rtol * dpt_ext.maximum(dpt_ext.abs(mv1), dpt_ext.abs(mv2))
+        )
+    )
+    return check4
+
+
+def _allclose_others(r1, r2):
+    return dpt_ext.all(r1 == r2)
+
+
+def allclose(a1, a2, atol=1e-8, rtol=1e-5, equal_nan=False):
+    """allclose(a1, a2, atol=1e-8, rtol=1e-5, equal_nan=False)
+
+    Returns True if two arrays are element-wise equal within tolerances.
+
+    The testing is based on the following elementwise comparison:
+
+           abs(a - b) <= max(atol, rtol * max(abs(a), abs(b)))
+    """
+    if not isinstance(a1, dpt.usm_ndarray):
+        raise TypeError(
+            f"Expected dpctl.tensor.usm_ndarray type, got {type(a1)}."
+        )
+    if not isinstance(a2, dpt.usm_ndarray):
+        raise TypeError(
+            f"Expected dpctl.tensor.usm_ndarray type, got {type(a2)}."
+        )
+    atol = float(atol)
+    rtol = float(rtol)
+    if atol < 0.0 or rtol < 0.0:
+        raise ValueError(
+            "Absolute and relative tolerances must be non-negative"
+        )
+    equal_nan = bool(equal_nan)
+    exec_q = du.get_execution_queue(tuple(a.sycl_queue for a in (a1, a2)))
+    if exec_q is None:
+        raise du.ExecutionPlacementError(
+            "Execution placement can not be unambiguously inferred "
+            "from input arguments."
+        )
+    res_sh = _broadcast_shape_impl([a1.shape, a2.shape])
+    b1 = a1
+    b2 = a2
+    if b1.dtype == b2.dtype:
+        res_dt = b1.dtype
+    else:
+        res_dt = np.promote_types(b1.dtype, b2.dtype)
+        res_dt = _to_device_supported_dtype(res_dt, exec_q.sycl_device)
+        b1 = dpt_ext.astype(b1, res_dt)
+        b2 = dpt_ext.astype(b2, res_dt)
+
+    b1 = dpt_ext.broadcast_to(b1, res_sh)
+    b2 = dpt_ext.broadcast_to(b2, res_sh)
+
+    k = b1.dtype.kind
+    if k == "c":
+        return _allclose_complex_fp(b1, b2, atol, rtol, equal_nan)
+    elif k == "f":
+        return _allclose_real_fp(b1, b2, atol, rtol, equal_nan)
+    else:
+        return _allclose_others(b1, b2)
diff --git a/dpctl_ext/tensor/_utility_functions.py b/dpctl_ext/tensor/_utility_functions.py
index a122ac3d6ce..821f0954017 100644
--- a/dpctl_ext/tensor/_utility_functions.py
+++ b/dpctl_ext/tensor/_utility_functions.py
@@ -489,7 +489,7 @@ def diff(x, /, *, axis=-1, n=1, prepend=None, append=None):
         slice(None) if i != axis else slice(None, -1) for i in range(x_nd)
     )
 
-    diff_op = dpt.not_equal if x.dtype == dpt.bool else dpt.subtract
+    diff_op = dpt_ext.not_equal if x.dtype == dpt.bool else dpt_ext.subtract
     if n > 1:
         arr_tmp0 = diff_op(arr[sl0], arr[sl1])
         arr_tmp1 = diff_op(arr_tmp0[sl0], arr_tmp0[sl1])
diff --git a/dpnp/dpnp_iface_statistics.py b/dpnp/dpnp_iface_statistics.py
index 75fe215837b..1d89d14c8df 100644
--- a/dpnp/dpnp_iface_statistics.py
+++ b/dpnp/dpnp_iface_statistics.py
@@ -41,14 +41,13 @@
 
 import math
 
-import dpctl.tensor as dpt
-import dpctl.tensor._tensor_elementwise_impl as ti
 import dpctl.utils as dpu
 import numpy
 
 # TODO: revert to `import dpctl.tensor...`
 # when dpnp fully migrates dpctl/tensor
-import dpctl_ext.tensor as dpt_ext
+import dpctl_ext.tensor as dpt
+import dpctl_ext.tensor._tensor_elementwise_impl as ti
 import dpnp
 
 # pylint: disable=no-name-in-module
@@ -1118,7 +1117,7 @@ def max(a, axis=None, out=None, keepdims=False, initial=None, where=True):
     return dpnp_wrap_reduction_call(
         usm_a,
         out,
-        dpt_ext.max,
+        dpt.max,
         a.dtype,
         axis=axis,
         keepdims=keepdims,
@@ -1207,7 +1206,7 @@ def mean(a, /, axis=None, dtype=None, out=None, keepdims=False, *, where=True):
     usm_a = dpnp.get_usm_ndarray(a)
     usm_res = dpt.mean(usm_a, axis=axis, keepdims=keepdims)
     if dtype is not None:
-        usm_res = dpt_ext.astype(usm_res, dtype)
+        usm_res = dpt.astype(usm_res, dtype)
 
     return dpnp.get_result_array(usm_res, out, casting="unsafe")
 
@@ -1395,7 +1394,7 @@ def min(a, axis=None, out=None, keepdims=False, initial=None, where=True):
     return dpnp_wrap_reduction_call(
         usm_a,
         out,
-        dpt_ext.min,
+        dpt.min,
         a.dtype,
         axis=axis,
         keepdims=keepdims,