diff --git a/.github/workflows/conda-package.yml b/.github/workflows/conda-package.yml
index d2ac90621aaa..eb66c91dc8c2 100644
--- a/.github/workflows/conda-package.yml
+++ b/.github/workflows/conda-package.yml
@@ -37,7 +37,7 @@ jobs:
actions: write
runs-on: ${{ matrix.os }}
- timeout-minutes: 60
+ timeout-minutes: 80
defaults:
run:
diff --git a/.gitignore b/.gitignore
index 0cfebe53f623..f8ed987fa0d9 100644
--- a/.gitignore
+++ b/.gitignore
@@ -36,3 +36,4 @@ core
# TODO: revert to `dpctl/`
# when dpnp fully migrates dpctl/tensor
dpctl_ext/**/*.cpython*.so
+dpctl_ext/include/
diff --git a/CMakeLists.txt b/CMakeLists.txt
index c7bb7f650dac..489283f45a43 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -344,5 +344,14 @@ if(DEFINED SKBUILD)
set(_ignore_me ${SKBUILD})
endif()
-add_subdirectory(dpnp)
+# TODO: Replace `${CMAKE_BINARY_DIR}` with a dedicated public include root
+# for dpctl_ext C-API headers
+# Unlike dpctl which exposes C-API from `dpctl/apis/include`,
+# dpctl_ext currently relies on generated headers in the build tree.
+# `${CMAKE_BINARY_DIR}` is a temporary workaround.
+
+add_library(DpctlExtCAPI INTERFACE)
+target_include_directories(DpctlExtCAPI INTERFACE ${CMAKE_BINARY_DIR})
+
add_subdirectory(dpctl_ext)
+add_subdirectory(dpnp)
diff --git a/dpctl_ext/CMakeLists.txt b/dpctl_ext/CMakeLists.txt
index e58693091422..a5524e8bb3db 100644
--- a/dpctl_ext/CMakeLists.txt
+++ b/dpctl_ext/CMakeLists.txt
@@ -112,8 +112,89 @@ else()
endif()
# at build time create include/ directory and copy header files over
-# set(DPCTL_INCLUDE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/include)
+set(DPCTL_INCLUDE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/include)
set(CMAKE_INSTALL_RPATH "$ORIGIN")
+function(build_dpctl_ext _trgt _src _dest)
+ set(options SYCL)
+ cmake_parse_arguments(BUILD_DPCTL_EXT "${options}" "RELATIVE_PATH" "" ${ARGN})
+ add_cython_target(${_trgt} ${_src} CXX OUTPUT_VAR _generated_src)
+ set(_cythonize_trgt "${_trgt}_cythonize_pyx")
+ python_add_library(${_trgt} MODULE WITH_SOABI ${_generated_src})
+ if(BUILD_DPCTL_EXT_SYCL)
+ add_sycl_to_target(TARGET ${_trgt} SOURCES ${_generated_src})
+ target_compile_options(${_trgt} PRIVATE -fno-sycl-id-queries-fit-in-int)
+ target_link_options(${_trgt} PRIVATE -fsycl-device-code-split=per_kernel)
+ if(DPCTL_OFFLOAD_COMPRESS)
+ target_link_options(${_trgt} PRIVATE --offload-compress)
+ endif()
+ if(_dpctl_sycl_targets)
+ # make fat binary
+ target_compile_options(
+ ${_trgt}
+ PRIVATE ${_dpctl_sycl_target_compile_options}
+ )
+ target_link_options(${_trgt} PRIVATE ${_dpctl_sycl_target_link_options})
+ endif()
+ endif()
+ target_link_libraries(${_trgt} PRIVATE Python::NumPy)
+ if(DPCTL_GENERATE_COVERAGE)
+ target_compile_definitions(${_trgt} PRIVATE CYTHON_TRACE=1 CYTHON_TRACE_NOGIL=1)
+ if(BUILD_DPCTL_EXT_SYCL)
+ target_compile_options(${_trgt} PRIVATE -fno-sycl-use-footer)
+ endif()
+ endif()
+ # Dpctl
+ target_include_directories(${_trgt} PRIVATE ${Dpctl_INCLUDE_DIR})
+ target_link_directories(${_trgt} PRIVATE ${Dpctl_INCLUDE_DIR}/..)
+ target_link_libraries(${_trgt} PRIVATE DPCTLSyclInterface)
+ set(_linker_options "LINKER:${DPCTL_LDFLAGS}")
+ target_link_options(${_trgt} PRIVATE ${_linker_options})
+ get_filename_component(_name_wle ${_generated_src} NAME_WLE)
+ get_filename_component(_generated_src_dir ${_generated_src} DIRECTORY)
+ set(_generated_public_h "${_generated_src_dir}/${_name_wle}.h")
+ set(_generated_api_h "${_generated_src_dir}/${_name_wle}_api.h")
+
+ # TODO: create separate folder inside build folder that contains only
+ # headers related to this target and appropriate folder structure to
+ # eliminate shadow dependencies
+ get_filename_component(_generated_src_dir_dir ${_generated_src_dir} DIRECTORY)
+ # TODO: do not set directory if we did not generate header
+ target_include_directories(${_trgt} INTERFACE ${_generated_src_dir_dir})
+ set(_rpath_value "$ORIGIN")
+ if(BUILD_DPCTL_EXT_RELATIVE_PATH)
+ set(_rpath_value "${_rpath_value}/${BUILD_DPCTL_EXT_RELATIVE_PATH}")
+ endif()
+ if(DPCTL_WITH_REDIST)
+ set(_rpath_value "${_rpath_value}:${_rpath_value}/../../..")
+ endif()
+ set_target_properties(${_trgt} PROPERTIES INSTALL_RPATH ${_rpath_value})
+
+ install(TARGETS ${_trgt} LIBRARY DESTINATION ${_dest})
+ install(
+ FILES ${_generated_api_h}
+ DESTINATION ${CMAKE_INSTALL_PREFIX}/dpctl_ext/include/${_dest}
+ OPTIONAL
+ )
+ install(
+ FILES ${_generated_public_h}
+ DESTINATION ${CMAKE_INSTALL_PREFIX}/dpctl_ext/include/${_dest}
+ OPTIONAL
+ )
+ if(DPCTL_GENERATE_COVERAGE)
+ get_filename_component(_original_src_dir ${_src} DIRECTORY)
+ file(RELATIVE_PATH _rel_dir ${CMAKE_SOURCE_DIR} ${_original_src_dir})
+ install(FILES ${_generated_src} DESTINATION ${CMAKE_INSTALL_PREFIX}/${_rel_dir})
+ endif()
+
+ # Create target with headers only, because python is managing all the
+ # library imports at runtime
+ set(_trgt_headers ${_trgt}_headers)
+ add_library(${_trgt_headers} INTERFACE)
+ add_dependencies(${_trgt_headers} ${_trgt})
+ get_target_property(_trgt_headers_dir ${_trgt} INTERFACE_INCLUDE_DIRECTORIES)
+ target_include_directories(${_trgt_headers} INTERFACE ${_trgt_headers_dir})
+endfunction()
+
add_subdirectory(tensor)
diff --git a/dpctl_ext/tensor/CMakeLists.txt b/dpctl_ext/tensor/CMakeLists.txt
index 6f286a8d7198..8df593b0838d 100644
--- a/dpctl_ext/tensor/CMakeLists.txt
+++ b/dpctl_ext/tensor/CMakeLists.txt
@@ -29,6 +29,15 @@
find_package(Python COMPONENTS Development.Module)
+file(GLOB _cython_sources *.pyx)
+foreach(_cy_file ${_cython_sources})
+ get_filename_component(_trgt ${_cy_file} NAME_WLE)
+ build_dpctl_ext(${_trgt} ${_cy_file} "dpctl_ext/tensor" RELATIVE_PATH "..")
+ target_include_directories(${_trgt} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include)
+ # target_link_libraries(DpctlCAPI INTERFACE ${_trgt}_headers)
+ target_link_libraries(DpctlExtCAPI INTERFACE ${_trgt}_headers)
+endforeach()
+
if(WIN32)
if(${CMAKE_VERSION} VERSION_LESS "3.27")
# this is a work-around for target_link_options inserting option after -link option, cause
@@ -338,6 +347,7 @@ foreach(python_module_name ${_py_trgts})
# dpctl4pybind11.hpp. It will allow to simplify dependency tree
# NOTE: dpctl C-API is resolved at runtime via Python
# target_link_libraries(${python_module_name} PRIVATE DpctlCAPI)
+ target_link_libraries(${python_module_name} PRIVATE DpctlExtCAPI)
if(DPNP_WITH_REDIST)
set_target_properties(
${python_module_name}
diff --git a/dpctl_ext/tensor/__init__.pxd b/dpctl_ext/tensor/__init__.pxd
new file mode 100644
index 000000000000..a4bcecfec1d1
--- /dev/null
+++ b/dpctl_ext/tensor/__init__.pxd
@@ -0,0 +1,36 @@
+# *****************************************************************************
+# Copyright (c) 2026, Intel Corporation
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+# - Redistributions of source code must retain the above copyright notice,
+# this list of conditions and the following disclaimer.
+# - Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+# - Neither the name of the copyright holder nor the names of its contributors
+# may be used to endorse or promote products derived from this software
+# without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+# THE POSSIBILITY OF SUCH DAMAGE.
+# *****************************************************************************
+
+""" This file declares the extension types and functions for the Cython API
+ implemented in _usmarray.pyx file.
+"""
+
+# distutils: language = c++
+# cython: language_level=3
+
+from ._usmarray cimport *
diff --git a/dpctl_ext/tensor/__init__.py b/dpctl_ext/tensor/__init__.py
index 7a6923169c1f..03980e194fd0 100644
--- a/dpctl_ext/tensor/__init__.py
+++ b/dpctl_ext/tensor/__init__.py
@@ -28,7 +28,9 @@
from ._accumulation import cumulative_logsumexp, cumulative_prod, cumulative_sum
+from ._array_api import __array_api_version__, __array_namespace_info__
from ._clip import clip
+from ._constants import e, inf, nan, newaxis, pi
from ._copy_utils import (
asnumpy,
astype,
@@ -53,6 +55,29 @@
zeros,
zeros_like,
)
+from ._data_types import (
+ bool,
+ complex64,
+ complex128,
+ dtype,
+ float16,
+ float32,
+ float64,
+ int8,
+ int16,
+ int32,
+ int64,
+ uint8,
+ uint16,
+ uint32,
+ uint64,
+)
+from ._device import Device
+from ._dldevice_conversions import (
+ dldevice_to_sycl_device,
+ sycl_device_to_dldevice,
+)
+from ._dlpack import from_dlpack
from ._elementwise_funcs import (
abs,
acos,
@@ -157,6 +182,13 @@
tile,
unstack,
)
+from ._print import (
+ get_print_options,
+ print_options,
+ set_print_options,
+ usm_ndarray_repr,
+ usm_ndarray_str,
+)
from ._reduction import (
argmax,
argmin,
@@ -168,6 +200,12 @@
reduce_hypot,
sum,
)
+
+# isort: off
+# placed here to avoid circular import
+from ._usmarray import DLDeviceType, usm_ndarray
+
+# isort: on
from ._reshape import reshape
from ._search_functions import where
from ._searchsorted import searchsorted
@@ -185,6 +223,32 @@
from ._utility_functions import all, any, diff
__all__ = [
+ "Device",
+ "DLDeviceType",
+ "usm_ndarray",
+ # data types
+ "bool",
+ "dtype",
+ "int8",
+ "uint8",
+ "int16",
+ "uint16",
+ "int32",
+ "uint32",
+ "int64",
+ "uint64",
+ "float16",
+ "float32",
+ "float64",
+ "complex64",
+ "complex128",
+ # constants
+ "e",
+ "inf",
+ "nan",
+ "newaxis",
+ "pi",
+ # functions
"abs",
"acos",
"acosh",
@@ -229,6 +293,7 @@
"cumulative_sum",
"diff",
"divide",
+ "dldevice_to_sycl_device",
"empty",
"empty_like",
"equal",
@@ -242,9 +307,11 @@
"flip",
"floor",
"floor_divide",
+ "from_dlpack",
"from_numpy",
"full",
"full_like",
+ "get_print_options",
"greater",
"greater_equal",
"hypot",
@@ -288,6 +355,7 @@
"place",
"positive",
"pow",
+ "print_options",
"prod",
"proj",
"put",
@@ -303,6 +371,7 @@
"round",
"rsqrt",
"searchsorted",
+ "set_print_options",
"sign",
"signbit",
"sin",
@@ -316,6 +385,7 @@
"subtract",
"sum",
"swapaxes",
+ "sycl_device_to_dldevice",
"take",
"take_along_axis",
"tan",
@@ -332,9 +402,13 @@
"unique_inverse",
"unique_values",
"unstack",
+ "usm_ndarray_repr",
+ "usm_ndarray_str",
"var",
"vecdot",
"where",
"zeros",
"zeros_like",
+ "__array_api_version__",
+ "__array_namespace_info__",
]
diff --git a/dpctl_ext/tensor/_accumulation.py b/dpctl_ext/tensor/_accumulation.py
index 2dfe9656e198..8628628f3bf8 100644
--- a/dpctl_ext/tensor/_accumulation.py
+++ b/dpctl_ext/tensor/_accumulation.py
@@ -27,12 +27,11 @@
# *****************************************************************************
import dpctl
-import dpctl.tensor as dpt
from dpctl.utils import ExecutionPlacementError, SequentialOrderManager
# TODO: revert to `import dpctl.tensor...`
# when dpnp fully migrates dpctl/tensor
-import dpctl_ext.tensor as dpt_ext
+import dpctl_ext.tensor as dpt
import dpctl_ext.tensor._tensor_accumulation_impl as tai
import dpctl_ext.tensor._tensor_impl as ti
@@ -82,7 +81,7 @@ def _accumulate_common(
perm = [i for i in range(nd) if i != axis] + [
axis,
]
- arr = dpt_ext.permute_dims(x, perm)
+ arr = dpt.permute_dims(x, perm)
q = x.sycl_queue
inp_dt = x.dtype
res_usm_type = x.usm_type
@@ -130,16 +129,16 @@ def _accumulate_common(
)
# permute out array dims if necessary
if a1 != nd:
- out = dpt_ext.permute_dims(out, perm)
+ out = dpt.permute_dims(out, perm)
orig_out = out
if ti._array_overlap(x, out) and implemented_types:
- out = dpt_ext.empty_like(out)
+ out = dpt.empty_like(out)
else:
- out = dpt_ext.empty(
+ out = dpt.empty(
res_sh, dtype=res_dt, usm_type=res_usm_type, sycl_queue=q
)
if a1 != nd:
- out = dpt_ext.permute_dims(out, perm)
+ out = dpt.permute_dims(out, perm)
_manager = SequentialOrderManager[q]
depends = _manager.submitted_events
@@ -166,7 +165,7 @@ def _accumulate_common(
out = orig_out
else:
if _dtype_supported(res_dt, res_dt):
- tmp = dpt_ext.empty(
+ tmp = dpt.empty(
arr.shape, dtype=res_dt, usm_type=res_usm_type, sycl_queue=q
)
ht_e_cpy, cpy_e = ti._copy_usm_ndarray_into_usm_ndarray(
@@ -191,18 +190,18 @@ def _accumulate_common(
_manager.add_event_pair(ht_e, acc_ev)
else:
buf_dt = _default_accumulation_type_fn(inp_dt, q)
- tmp = dpt_ext.empty(
+ tmp = dpt.empty(
arr.shape, dtype=buf_dt, usm_type=res_usm_type, sycl_queue=q
)
ht_e_cpy, cpy_e = ti._copy_usm_ndarray_into_usm_ndarray(
src=arr, dst=tmp, sycl_queue=q, depends=depends
)
_manager.add_event_pair(ht_e_cpy, cpy_e)
- tmp_res = dpt_ext.empty(
+ tmp_res = dpt.empty(
res_sh, dtype=buf_dt, usm_type=res_usm_type, sycl_queue=q
)
if a1 != nd:
- tmp_res = dpt_ext.permute_dims(tmp_res, perm)
+ tmp_res = dpt.permute_dims(tmp_res, perm)
if not include_initial:
ht_e, acc_ev = _accumulate_fn(
src=tmp,
@@ -225,10 +224,10 @@ def _accumulate_common(
_manager.add_event_pair(ht_e_cpy2, cpy_e2)
if appended_axis:
- out = dpt_ext.squeeze(out)
+ out = dpt.squeeze(out)
if a1 != nd:
inv_perm = sorted(range(nd), key=lambda d: perm[d])
- out = dpt_ext.permute_dims(out, inv_perm)
+ out = dpt.permute_dims(out, inv_perm)
return out
diff --git a/dpctl_ext/tensor/_array_api.py b/dpctl_ext/tensor/_array_api.py
new file mode 100644
index 000000000000..09f71bc1bdd3
--- /dev/null
+++ b/dpctl_ext/tensor/_array_api.py
@@ -0,0 +1,256 @@
+# *****************************************************************************
+# Copyright (c) 2026, Intel Corporation
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+# - Redistributions of source code must retain the above copyright notice,
+# this list of conditions and the following disclaimer.
+# - Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+# - Neither the name of the copyright holder nor the names of its contributors
+# may be used to endorse or promote products derived from this software
+# without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+# THE POSSIBILITY OF SUCH DAMAGE.
+# *****************************************************************************
+
+import dpctl
+
+# TODO: revert to `import dpctl.tensor...`
+# when dpnp fully migrates dpctl/tensor
+import dpctl_ext.tensor as dpt
+
+from ._tensor_impl import (
+ default_device_complex_type,
+ default_device_fp_type,
+ default_device_index_type,
+ default_device_int_type,
+)
+
+
+def _isdtype_impl(dtype, kind):
+ if isinstance(kind, str):
+ if kind == "bool":
+ return dtype.kind == "b"
+ elif kind == "signed integer":
+ return dtype.kind == "i"
+ elif kind == "unsigned integer":
+ return dtype.kind == "u"
+ elif kind == "integral":
+ return dtype.kind in "iu"
+ elif kind == "real floating":
+ return dtype.kind == "f"
+ elif kind == "complex floating":
+ return dtype.kind == "c"
+ elif kind == "numeric":
+ return dtype.kind in "iufc"
+ else:
+ raise ValueError(f"Unrecognized data type kind: {kind}")
+
+ elif isinstance(kind, tuple):
+ return any(_isdtype_impl(dtype, k) for k in kind)
+ else:
+ raise TypeError(f"Unsupported type for dtype kind: {type(kind)}")
+
+
+def _get_device_impl(d):
+ if d is None:
+ return dpctl.select_default_device()
+ elif isinstance(d, dpctl.SyclDevice):
+ return d
+ elif isinstance(d, (dpt.Device, dpctl.SyclQueue)):
+ return d.sycl_device
+ else:
+ try:
+ return dpctl.SyclDevice(d)
+ except TypeError:
+ raise TypeError(f"Unsupported type for device argument: {type(d)}")
+
+
+__array_api_version__ = "2024.12"
+
+
+class Info:
+ """namespace returned by ``__array_namespace_info__()``"""
+
+ def __init__(self):
+ self._capabilities = {
+ "boolean indexing": True,
+ "data-dependent shapes": True,
+ "max dimensions": None,
+ }
+ self._all_dtypes = {
+ "bool": dpt.bool,
+ "float32": dpt.float32,
+ "float64": dpt.float64,
+ "complex64": dpt.complex64,
+ "complex128": dpt.complex128,
+ "int8": dpt.int8,
+ "int16": dpt.int16,
+ "int32": dpt.int32,
+ "int64": dpt.int64,
+ "uint8": dpt.uint8,
+ "uint16": dpt.uint16,
+ "uint32": dpt.uint32,
+ "uint64": dpt.uint64,
+ }
+
+ def capabilities(self):
+ """
+ capabilities()
+
+ Returns a dictionary of ``dpctl``'s capabilities.
+
+ The dictionary contains the following keys:
+ ``"boolean indexing"``:
+ boolean indicating ``dpctl``'s support of boolean indexing.
+ Value: ``True``
+ ``"data-dependent shapes"``:
+ boolean indicating ``dpctl``'s support of data-dependent shapes.
+ Value: ``True``
+ ``max dimensions``:
+ integer indication the maximum array dimension supported by ``dpctl``.
+ Value: ``None``
+
+ Returns:
+ dict:
+ dictionary of ``dpctl``'s capabilities
+ """
+ return self._capabilities.copy()
+
+ def default_device(self):
+ """
+ default_device()
+
+ Returns the default SYCL device.
+ """
+ return dpctl.select_default_device()
+
+ def default_dtypes(self, *, device=None):
+ """
+ default_dtypes(*, device=None)
+
+ Returns a dictionary of default data types for ``device``.
+
+ Args:
+ device (Optional[:class:`dpctl.SyclDevice`, :class:`dpctl.SyclQueue`, :class:`dpctl.tensor.Device`, str]):
+ array API concept of device used in getting default data types.
+ ``device`` can be ``None`` (in which case the default device
+ is used), an instance of :class:`dpctl.SyclDevice`, an instance
+ of :class:`dpctl.SyclQueue`, a :class:`dpctl.tensor.Device`
+ object returned by :attr:`dpctl.tensor.usm_ndarray.device`, or
+ a filter selector string.
+ Default: ``None``.
+
+ Returns:
+ dict:
+ a dictionary of default data types for ``device``:
+
+ - ``"real floating"``: dtype
+ - ``"complex floating"``: dtype
+ - ``"integral"``: dtype
+ - ``"indexing"``: dtype
+ """
+ device = _get_device_impl(device)
+ return {
+ "real floating": dpt.dtype(default_device_fp_type(device)),
+ "complex floating": dpt.dtype(default_device_complex_type(device)),
+ "integral": dpt.dtype(default_device_int_type(device)),
+ "indexing": dpt.dtype(default_device_index_type(device)),
+ }
+
+ def dtypes(self, *, device=None, kind=None):
+ """
+ dtypes(*, device=None, kind=None)
+
+ Returns a dictionary of all Array API data types of a specified
+ ``kind`` supported by ``device``.
+
+ This dictionary only includes data types supported by the
+ `Python Array API `_
+ specification.
+
+ Args:
+ device (Optional[:class:`dpctl.SyclDevice`, :class:`dpctl.SyclQueue`, :class:`dpctl.tensor.Device`, str]):
+ array API concept of device used in getting default data types.
+ ``device`` can be ``None`` (in which case the default device is
+ used), an instance of :class:`dpctl.SyclDevice`, an instance of
+ :class:`dpctl.SyclQueue`, a :class:`dpctl.tensor.Device`
+ object returned by :attr:`dpctl.tensor.usm_ndarray.device`, or
+ a filter selector string.
+ Default: ``None``.
+
+ kind (Optional[str, Tuple[str, ...]]):
+ data type kind.
+
+ - if ``kind`` is ``None``, returns a dictionary of all data
+ types supported by `device`
+ - if ``kind`` is a string, returns a dictionary containing the
+ data types belonging to the data type kind specified.
+
+ Supports:
+
+ * ``"bool"``
+ * ``"signed integer"``
+ * ``"unsigned integer"``
+ * ``"integral"``
+ * ``"real floating"``
+ * ``"complex floating"``
+ * ``"numeric"``
+
+ - if ``kind`` is a tuple, the tuple represents a union of
+ ``kind`` strings, and returns a dictionary containing data
+ types corresponding to the-specified union.
+
+ Default: ``None``.
+
+ Returns:
+ dict:
+ a dictionary of the supported data types of the specified
+ ``kind``
+ """
+ device = _get_device_impl(device)
+ _fp64 = device.has_aspect_fp64
+ if kind is None:
+ return {
+ key: val
+ for key, val in self._all_dtypes.items()
+ if _fp64 or (key != "float64" and key != "complex128")
+ }
+ else:
+ return {
+ key: val
+ for key, val in self._all_dtypes.items()
+ if (_fp64 or (key != "float64" and key != "complex128"))
+ and _isdtype_impl(val, kind)
+ }
+
+ def devices(self):
+ """
+ devices()
+
+ Returns a list of supported devices.
+ """
+ return dpctl.get_devices()
+
+
+def __array_namespace_info__():
+ """
+ __array_namespace_info__()
+
+ Returns a namespace with Array API namespace inspection utilities.
+
+ """
+ return Info()
diff --git a/dpctl_ext/tensor/_clip.py b/dpctl_ext/tensor/_clip.py
index c21d601966bd..8071f13bee19 100644
--- a/dpctl_ext/tensor/_clip.py
+++ b/dpctl_ext/tensor/_clip.py
@@ -27,12 +27,11 @@
# *****************************************************************************
import dpctl
-import dpctl.tensor as dpt
from dpctl.utils import ExecutionPlacementError, SequentialOrderManager
# TODO: revert to `import dpctl.tensor...`
# when dpnp fully migrates dpctl/tensor
-import dpctl_ext.tensor as dpt_ext
+import dpctl_ext.tensor as dpt
import dpctl_ext.tensor._tensor_elementwise_impl as tei
import dpctl_ext.tensor._tensor_impl as ti
@@ -163,7 +162,7 @@ def _clip_none(x, val, out, order, _binary_fn):
if ti._array_overlap(x, out):
if not ti._same_logical_tensors(x, out):
- out = dpt_ext.empty_like(out)
+ out = dpt.empty_like(out)
if isinstance(val, dpt.usm_ndarray):
if (
@@ -171,12 +170,12 @@ def _clip_none(x, val, out, order, _binary_fn):
and not ti._same_logical_tensors(val, out)
and val_dtype == res_dt
):
- out = dpt_ext.empty_like(out)
+ out = dpt.empty_like(out)
if isinstance(val, dpt.usm_ndarray):
val_ary = val
else:
- val_ary = dpt_ext.asarray(val, dtype=val_dtype, sycl_queue=exec_q)
+ val_ary = dpt.asarray(val, dtype=val_dtype, sycl_queue=exec_q)
if order == "A":
order = (
@@ -197,7 +196,7 @@ def _clip_none(x, val, out, order, _binary_fn):
x, val_ary, res_dt, res_shape, res_usm_type, exec_q
)
else:
- out = dpt_ext.empty(
+ out = dpt.empty(
res_shape,
dtype=res_dt,
usm_type=res_usm_type,
@@ -205,9 +204,9 @@ def _clip_none(x, val, out, order, _binary_fn):
order=order,
)
if x_shape != res_shape:
- x = dpt_ext.broadcast_to(x, res_shape)
+ x = dpt.broadcast_to(x, res_shape)
if val_ary.shape != res_shape:
- val_ary = dpt_ext.broadcast_to(val_ary, res_shape)
+ val_ary = dpt.broadcast_to(val_ary, res_shape)
_manager = SequentialOrderManager[exec_q]
dep_evs = _manager.submitted_events
ht_binary_ev, binary_ev = _binary_fn(
@@ -229,7 +228,7 @@ def _clip_none(x, val, out, order, _binary_fn):
if order == "K":
buf = _empty_like_orderK(val_ary, res_dt)
else:
- buf = dpt_ext.empty_like(val_ary, dtype=res_dt, order=order)
+ buf = dpt.empty_like(val_ary, dtype=res_dt, order=order)
_manager = SequentialOrderManager[exec_q]
dep_evs = _manager.submitted_events
ht_copy_ev, copy_ev = ti._copy_usm_ndarray_into_usm_ndarray(
@@ -242,7 +241,7 @@ def _clip_none(x, val, out, order, _binary_fn):
x, buf, res_dt, res_shape, res_usm_type, exec_q
)
else:
- out = dpt_ext.empty(
+ out = dpt.empty(
res_shape,
dtype=res_dt,
usm_type=res_usm_type,
@@ -251,8 +250,8 @@ def _clip_none(x, val, out, order, _binary_fn):
)
if x_shape != res_shape:
- x = dpt_ext.broadcast_to(x, res_shape)
- buf = dpt_ext.broadcast_to(buf, res_shape)
+ x = dpt.broadcast_to(x, res_shape)
+ buf = dpt.broadcast_to(buf, res_shape)
ht_binary_ev, binary_ev = _binary_fn(
src1=x,
src2=buf,
@@ -313,9 +312,9 @@ def clip(x, /, min=None, max=None, out=None, order="K"):
if order not in ["K", "C", "F", "A"]:
order = "K"
if x.dtype.kind in "iu":
- if isinstance(min, int) and min <= dpt_ext.iinfo(x.dtype).min:
+ if isinstance(min, int) and min <= dpt.iinfo(x.dtype).min:
min = None
- if isinstance(max, int) and max >= dpt_ext.iinfo(x.dtype).max:
+ if isinstance(max, int) and max >= dpt.iinfo(x.dtype).max:
max = None
if min is None and max is None:
exec_q = x.sycl_queue
@@ -353,14 +352,14 @@ def clip(x, /, min=None, max=None, out=None, order="K"):
if ti._array_overlap(x, out):
if not ti._same_logical_tensors(x, out):
- out = dpt_ext.empty_like(out)
+ out = dpt.empty_like(out)
else:
return out
else:
if order == "K":
out = _empty_like_orderK(x, x.dtype)
else:
- out = dpt_ext.empty_like(x, order=order)
+ out = dpt.empty_like(x, order=order)
_manager = SequentialOrderManager[exec_q]
dep_evs = _manager.submitted_events
@@ -519,7 +518,7 @@ def clip(x, /, min=None, max=None, out=None, order="K"):
if ti._array_overlap(x, out):
if not ti._same_logical_tensors(x, out):
- out = dpt_ext.empty_like(out)
+ out = dpt.empty_like(out)
if isinstance(min, dpt.usm_ndarray):
if (
@@ -527,7 +526,7 @@ def clip(x, /, min=None, max=None, out=None, order="K"):
and not ti._same_logical_tensors(min, out)
and buf1_dt is None
):
- out = dpt_ext.empty_like(out)
+ out = dpt.empty_like(out)
if isinstance(max, dpt.usm_ndarray):
if (
@@ -535,16 +534,16 @@ def clip(x, /, min=None, max=None, out=None, order="K"):
and not ti._same_logical_tensors(max, out)
and buf2_dt is None
):
- out = dpt_ext.empty_like(out)
+ out = dpt.empty_like(out)
if isinstance(min, dpt.usm_ndarray):
a_min = min
else:
- a_min = dpt_ext.asarray(min, dtype=min_dtype, sycl_queue=exec_q)
+ a_min = dpt.asarray(min, dtype=min_dtype, sycl_queue=exec_q)
if isinstance(max, dpt.usm_ndarray):
a_max = max
else:
- a_max = dpt_ext.asarray(max, dtype=max_dtype, sycl_queue=exec_q)
+ a_max = dpt.asarray(max, dtype=max_dtype, sycl_queue=exec_q)
if order == "A":
order = (
@@ -572,7 +571,7 @@ def clip(x, /, min=None, max=None, out=None, order="K"):
exec_q,
)
else:
- out = dpt_ext.empty(
+ out = dpt.empty(
res_shape,
dtype=res_dt,
usm_type=res_usm_type,
@@ -580,11 +579,11 @@ def clip(x, /, min=None, max=None, out=None, order="K"):
order=order,
)
if x_shape != res_shape:
- x = dpt_ext.broadcast_to(x, res_shape)
+ x = dpt.broadcast_to(x, res_shape)
if a_min.shape != res_shape:
- a_min = dpt_ext.broadcast_to(a_min, res_shape)
+ a_min = dpt.broadcast_to(a_min, res_shape)
if a_max.shape != res_shape:
- a_max = dpt_ext.broadcast_to(a_max, res_shape)
+ a_max = dpt.broadcast_to(a_max, res_shape)
_manager = SequentialOrderManager[exec_q]
dep_ev = _manager.submitted_events
ht_binary_ev, binary_ev = ti._clip(
@@ -612,7 +611,7 @@ def clip(x, /, min=None, max=None, out=None, order="K"):
if order == "K":
buf2 = _empty_like_orderK(a_max, buf2_dt)
else:
- buf2 = dpt_ext.empty_like(a_max, dtype=buf2_dt, order=order)
+ buf2 = dpt.empty_like(a_max, dtype=buf2_dt, order=order)
_manager = SequentialOrderManager[exec_q]
dep_ev = _manager.submitted_events
ht_copy_ev, copy_ev = ti._copy_usm_ndarray_into_usm_ndarray(
@@ -631,7 +630,7 @@ def clip(x, /, min=None, max=None, out=None, order="K"):
exec_q,
)
else:
- out = dpt_ext.empty(
+ out = dpt.empty(
res_shape,
dtype=res_dt,
usm_type=res_usm_type,
@@ -639,10 +638,10 @@ def clip(x, /, min=None, max=None, out=None, order="K"):
order=order,
)
- x = dpt_ext.broadcast_to(x, res_shape)
+ x = dpt.broadcast_to(x, res_shape)
if a_min.shape != res_shape:
- a_min = dpt_ext.broadcast_to(a_min, res_shape)
- buf2 = dpt_ext.broadcast_to(buf2, res_shape)
+ a_min = dpt.broadcast_to(a_min, res_shape)
+ buf2 = dpt.broadcast_to(buf2, res_shape)
ht_binary_ev, binary_ev = ti._clip(
src=x,
min=a_min,
@@ -668,7 +667,7 @@ def clip(x, /, min=None, max=None, out=None, order="K"):
if order == "K":
buf1 = _empty_like_orderK(a_min, buf1_dt)
else:
- buf1 = dpt_ext.empty_like(a_min, dtype=buf1_dt, order=order)
+ buf1 = dpt.empty_like(a_min, dtype=buf1_dt, order=order)
_manager = SequentialOrderManager[exec_q]
dep_ev = _manager.submitted_events
ht_copy_ev, copy_ev = ti._copy_usm_ndarray_into_usm_ndarray(
@@ -687,7 +686,7 @@ def clip(x, /, min=None, max=None, out=None, order="K"):
exec_q,
)
else:
- out = dpt_ext.empty(
+ out = dpt.empty(
res_shape,
dtype=res_dt,
usm_type=res_usm_type,
@@ -695,10 +694,10 @@ def clip(x, /, min=None, max=None, out=None, order="K"):
order=order,
)
- x = dpt_ext.broadcast_to(x, res_shape)
- buf1 = dpt_ext.broadcast_to(buf1, res_shape)
+ x = dpt.broadcast_to(x, res_shape)
+ buf1 = dpt.broadcast_to(buf1, res_shape)
if a_max.shape != res_shape:
- a_max = dpt_ext.broadcast_to(a_max, res_shape)
+ a_max = dpt.broadcast_to(a_max, res_shape)
ht_binary_ev, binary_ev = ti._clip(
src=x,
min=buf1,
@@ -736,7 +735,7 @@ def clip(x, /, min=None, max=None, out=None, order="K"):
if order == "K":
buf1 = _empty_like_orderK(a_min, buf1_dt)
else:
- buf1 = dpt_ext.empty_like(a_min, dtype=buf1_dt, order=order)
+ buf1 = dpt.empty_like(a_min, dtype=buf1_dt, order=order)
_manager = SequentialOrderManager[exec_q]
dep_evs = _manager.submitted_events
@@ -747,7 +746,7 @@ def clip(x, /, min=None, max=None, out=None, order="K"):
if order == "K":
buf2 = _empty_like_orderK(a_max, buf2_dt)
else:
- buf2 = dpt_ext.empty_like(a_max, dtype=buf2_dt, order=order)
+ buf2 = dpt.empty_like(a_max, dtype=buf2_dt, order=order)
ht_copy2_ev, copy2_ev = ti._copy_usm_ndarray_into_usm_ndarray(
src=a_max, dst=buf2, sycl_queue=exec_q, depends=dep_evs
)
@@ -758,7 +757,7 @@ def clip(x, /, min=None, max=None, out=None, order="K"):
x, buf1, buf2, res_dt, res_shape, res_usm_type, exec_q
)
else:
- out = dpt_ext.empty(
+ out = dpt.empty(
res_shape,
dtype=res_dt,
usm_type=res_usm_type,
@@ -766,9 +765,9 @@ def clip(x, /, min=None, max=None, out=None, order="K"):
order=order,
)
- x = dpt_ext.broadcast_to(x, res_shape)
- buf1 = dpt_ext.broadcast_to(buf1, res_shape)
- buf2 = dpt_ext.broadcast_to(buf2, res_shape)
+ x = dpt.broadcast_to(x, res_shape)
+ buf1 = dpt.broadcast_to(buf1, res_shape)
+ buf2 = dpt.broadcast_to(buf2, res_shape)
ht_, clip_ev = ti._clip(
src=x,
min=buf1,
diff --git a/dpctl_ext/tensor/_constants.py b/dpctl_ext/tensor/_constants.py
new file mode 100644
index 000000000000..4c134bd9d375
--- /dev/null
+++ b/dpctl_ext/tensor/_constants.py
@@ -0,0 +1,36 @@
+# *****************************************************************************
+# Copyright (c) 2026, Intel Corporation
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+# - Redistributions of source code must retain the above copyright notice,
+# this list of conditions and the following disclaimer.
+# - Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+# - Neither the name of the copyright holder nor the names of its contributors
+# may be used to endorse or promote products derived from this software
+# without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+# THE POSSIBILITY OF SUCH DAMAGE.
+# *****************************************************************************
+
+import numpy as np
+
+newaxis = None
+
+pi = np.pi
+e = np.e
+nan = np.nan
+inf = np.inf
diff --git a/dpctl_ext/tensor/_copy_utils.py b/dpctl_ext/tensor/_copy_utils.py
index 37879997b788..b056511ac33b 100644
--- a/dpctl_ext/tensor/_copy_utils.py
+++ b/dpctl_ext/tensor/_copy_utils.py
@@ -32,17 +32,16 @@
import dpctl
import dpctl.memory as dpm
-import dpctl.tensor as dpt
import dpctl.utils
import numpy as np
-from dpctl.tensor._data_types import _get_dtype
-from dpctl.tensor._device import normalize_queue_device
# TODO: revert to `import dpctl.tensor...`
# when dpnp fully migrates dpctl/tensor
-import dpctl_ext.tensor as dpt_ext
+import dpctl_ext.tensor as dpt
import dpctl_ext.tensor._tensor_impl as ti
+from ._data_types import _get_dtype
+from ._device import normalize_queue_device
from ._numpy_helper import normalize_axis_index
from ._type_utils import _dtype_supported_by_device_impl
@@ -91,7 +90,7 @@ def _copy_from_numpy(np_ary, usm_type="device", sycl_queue=None):
)
else:
Xusm_dtype = dt
- Xusm = dpt_ext.empty(
+ Xusm = dpt.empty(
Xnp.shape, dtype=Xusm_dtype, usm_type=usm_type, sycl_queue=sycl_queue
)
_copy_from_numpy_into(Xusm, Xnp)
@@ -159,7 +158,7 @@ def _extract_impl(ary, ary_mask, axis=0):
elif isinstance(ary_mask, np.ndarray):
dst_usm_type = ary.usm_type
exec_q = ary.sycl_queue
- ary_mask = dpt_ext.asarray(
+ ary_mask = dpt.asarray(
ary_mask, usm_type=dst_usm_type, sycl_queue=exec_q
)
else:
@@ -176,7 +175,7 @@ def _extract_impl(ary, ary_mask, axis=0):
)
mask_nelems = ary_mask.size
cumsum_dt = dpt.int32 if mask_nelems < int32_t_max else dpt.int64
- cumsum = dpt_ext.empty(mask_nelems, dtype=cumsum_dt, device=ary_mask.device)
+ cumsum = dpt.empty(mask_nelems, dtype=cumsum_dt, device=ary_mask.device)
exec_q = cumsum.sycl_queue
_manager = dpctl.utils.SequentialOrderManager[exec_q]
dep_evs = _manager.submitted_events
@@ -184,7 +183,7 @@ def _extract_impl(ary, ary_mask, axis=0):
ary_mask, cumsum, sycl_queue=exec_q, depends=dep_evs
)
dst_shape = ary.shape[:pp] + (mask_count,) + ary.shape[pp + mask_nd :]
- dst = dpt_ext.empty(
+ dst = dpt.empty(
dst_shape, dtype=ary.dtype, usm_type=dst_usm_type, device=ary.device
)
if dst.size == 0:
@@ -247,7 +246,7 @@ def _nonzero_impl(ary):
usm_type = ary.usm_type
mask_nelems = ary.size
cumsum_dt = dpt.int32 if mask_nelems < int32_t_max else dpt.int64
- cumsum = dpt_ext.empty(
+ cumsum = dpt.empty(
mask_nelems, dtype=cumsum_dt, sycl_queue=exec_q, order="C"
)
_manager = dpctl.utils.SequentialOrderManager[exec_q]
@@ -256,7 +255,7 @@ def _nonzero_impl(ary):
ary, cumsum, sycl_queue=exec_q, depends=dep_evs
)
indexes_dt = ti.default_device_index_type(exec_q.sycl_device)
- indexes = dpt_ext.empty(
+ indexes = dpt.empty(
(ary.ndim, mask_count),
dtype=indexes_dt,
usm_type=usm_type,
@@ -284,14 +283,14 @@ def _prepare_indices_arrays(inds, q, usm_type):
lambda ind: (
ind
if isinstance(ind, dpt.usm_ndarray)
- else dpt_ext.asarray(ind, usm_type=usm_type, sycl_queue=q)
+ else dpt.asarray(ind, usm_type=usm_type, sycl_queue=q)
),
inds,
)
)
# promote to a common integral type if possible
- ind_dt = dpt_ext.result_type(*inds)
+ ind_dt = dpt.result_type(*inds)
if ind_dt.kind not in "ui":
raise ValueError(
"cannot safely promote indices to an integer data type"
@@ -299,18 +298,122 @@ def _prepare_indices_arrays(inds, q, usm_type):
inds = tuple(
map(
lambda ind: (
- ind if ind.dtype == ind_dt else dpt_ext.astype(ind, ind_dt)
+ ind if ind.dtype == ind_dt else dpt.astype(ind, ind_dt)
),
inds,
)
)
# broadcast
- inds = dpt_ext.broadcast_arrays(*inds)
+ inds = dpt.broadcast_arrays(*inds)
return inds
+def _place_impl(ary, ary_mask, vals, axis=0):
+ """
+ Extract elements of ary by applying mask starting from slot
+ dimension axis.
+ """
+ if not isinstance(ary, dpt.usm_ndarray):
+ raise TypeError(
+ f"Expecting type dpctl.tensor.usm_ndarray, got {type(ary)}"
+ )
+ if isinstance(ary_mask, dpt.usm_ndarray):
+ exec_q = dpctl.utils.get_execution_queue(
+ (
+ ary.sycl_queue,
+ ary_mask.sycl_queue,
+ )
+ )
+ coerced_usm_type = dpctl.utils.get_coerced_usm_type(
+ (
+ ary.usm_type,
+ ary_mask.usm_type,
+ )
+ )
+ if exec_q is None:
+ raise dpctl.utils.ExecutionPlacementError(
+ "arrays have different associated queues. "
+ "Use `y.to_device(x.device)` to migrate."
+ )
+ elif isinstance(ary_mask, np.ndarray):
+ exec_q = ary.sycl_queue
+ coerced_usm_type = ary.usm_type
+ ary_mask = dpt.asarray(
+ ary_mask, usm_type=coerced_usm_type, sycl_queue=exec_q
+ )
+ else:
+ raise TypeError(
+ "Expecting type dpctl.tensor.usm_ndarray or numpy.ndarray, got "
+ f"{type(ary_mask)}"
+ )
+ if exec_q is not None:
+ if not isinstance(vals, dpt.usm_ndarray):
+ vals = dpt.asarray(
+ vals,
+ dtype=ary.dtype,
+ usm_type=coerced_usm_type,
+ sycl_queue=exec_q,
+ )
+ else:
+ exec_q = dpctl.utils.get_execution_queue((exec_q, vals.sycl_queue))
+ coerced_usm_type = dpctl.utils.get_coerced_usm_type(
+ (
+ coerced_usm_type,
+ vals.usm_type,
+ )
+ )
+ if exec_q is None:
+ raise dpctl.utils.ExecutionPlacementError(
+ "arrays have different associated queues. "
+ "Use `Y.to_device(X.device)` to migrate."
+ )
+ ary_nd = ary.ndim
+ pp = normalize_axis_index(operator.index(axis), ary_nd)
+ mask_nd = ary_mask.ndim
+ if pp < 0 or pp + mask_nd > ary_nd:
+ raise ValueError(
+ "Parameter p is inconsistent with input array dimensions"
+ )
+ mask_nelems = ary_mask.size
+ cumsum_dt = dpt.int32 if mask_nelems < int32_t_max else dpt.int64
+ cumsum = dpt.empty(
+ mask_nelems,
+ dtype=cumsum_dt,
+ usm_type=coerced_usm_type,
+ device=ary_mask.device,
+ )
+ exec_q = cumsum.sycl_queue
+ _manager = dpctl.utils.SequentialOrderManager[exec_q]
+ dep_ev = _manager.submitted_events
+ mask_count = ti.mask_positions(
+ ary_mask, cumsum, sycl_queue=exec_q, depends=dep_ev
+ )
+ expected_vals_shape = (
+ ary.shape[:pp] + (mask_count,) + ary.shape[pp + mask_nd :]
+ )
+ if vals.dtype == ary.dtype:
+ rhs = vals
+ else:
+ rhs = dpt.astype(vals, ary.dtype)
+ rhs = dpt.broadcast_to(rhs, expected_vals_shape)
+ if mask_nelems == 0:
+ return
+ dep_ev = _manager.submitted_events
+ hev, pl_ev = ti._place(
+ dst=ary,
+ cumsum=cumsum,
+ axis_start=pp,
+ axis_end=pp + mask_nd,
+ rhs=rhs,
+ sycl_queue=exec_q,
+ depends=dep_ev,
+ )
+ _manager.add_event_pair(hev, pl_ev)
+ return
+
+
def _put_multi_index(ary, inds, p, vals, mode=0):
if not isinstance(ary, dpt.usm_ndarray):
raise TypeError(
@@ -332,7 +435,7 @@ def _put_multi_index(ary, inds, p, vals, mode=0):
if exec_q is not None:
if not isinstance(vals, dpt.usm_ndarray):
- vals = dpt_ext.asarray(
+ vals = dpt.asarray(
vals,
dtype=ary.dtype,
usm_type=coerced_usm_type,
@@ -367,8 +470,8 @@ def _put_multi_index(ary, inds, p, vals, mode=0):
if vals.dtype == ary.dtype:
rhs = vals
else:
- rhs = dpt_ext.astype(vals, ary.dtype)
- rhs = dpt_ext.broadcast_to(rhs, expected_vals_shape)
+ rhs = dpt.astype(vals, ary.dtype)
+ rhs = dpt.broadcast_to(rhs, expected_vals_shape)
_manager = dpctl.utils.SequentialOrderManager[exec_q]
dep_ev = _manager.submitted_events
hev, put_ev = ti._put(
@@ -418,7 +521,7 @@ def _take_multi_index(ary, inds, p, mode=0):
if 0 in ary_sh[p:p_end] and ind0.size != 0:
raise IndexError("cannot take non-empty indices from an empty axis")
res_shape = ary_sh[:p] + ind0.shape + ary_sh[p_end:]
- res = dpt_ext.empty(
+ res = dpt.empty(
res_shape, dtype=ary.dtype, usm_type=res_usm_type, sycl_queue=exec_q
)
_manager = dpctl.utils.SequentialOrderManager[exec_q]
@@ -681,9 +784,7 @@ def _make_empty_like_orderK(x, dt, usm_type, dev):
inv_perm = sorted(range(x.ndim), key=lambda i: perm[i])
sh = x.shape
sh_sorted = tuple(sh[i] for i in perm)
- R = dpt_ext.empty(
- sh_sorted, dtype=dt, usm_type=usm_type, device=dev, order="C"
- )
+ R = dpt.empty(sh_sorted, dtype=dt, usm_type=usm_type, device=dev, order="C")
if min(st) < 0:
st_sorted = [st[i] for i in perm]
sl = tuple(
@@ -695,7 +796,7 @@ def _make_empty_like_orderK(x, dt, usm_type, dev):
for i in range(x.ndim)
)
R = R[sl]
- return dpt_ext.permute_dims(R, inv_perm)
+ return dpt.permute_dims(R, inv_perm)
def _empty_like_orderK(x, dt, usm_type=None, dev=None):
@@ -714,11 +815,11 @@ def _empty_like_orderK(x, dt, usm_type=None, dev=None):
dev = x.device
fl = x.flags
if fl["C"] or x.size <= 1:
- return dpt_ext.empty_like(
+ return dpt.empty_like(
x, dtype=dt, usm_type=usm_type, device=dev, order="C"
)
elif fl["F"]:
- return dpt_ext.empty_like(
+ return dpt.empty_like(
x, dtype=dt, usm_type=usm_type, device=dev, order="F"
)
return _make_empty_like_orderK(x, dt, usm_type, dev)
@@ -736,11 +837,11 @@ def _from_numpy_empty_like_orderK(x, dt, usm_type, dev):
raise TypeError(f"Expected numpy.ndarray, got {type(x)}")
fl = x.flags
if fl["C"] or x.size <= 1:
- return dpt_ext.empty(
+ return dpt.empty(
x.shape, dtype=dt, usm_type=usm_type, device=dev, order="C"
)
elif fl["F"]:
- return dpt_ext.empty(
+ return dpt.empty(
x.shape, dtype=dt, usm_type=usm_type, device=dev, order="F"
)
return _make_empty_like_orderK(x, dt, usm_type, dev)
@@ -760,11 +861,11 @@ def _empty_like_pair_orderK(X1, X2, dt, res_shape, usm_type, dev):
fl1 = X1.flags
fl2 = X2.flags
if fl1["C"] or fl2["C"]:
- return dpt_ext.empty(
+ return dpt.empty(
res_shape, dtype=dt, usm_type=usm_type, device=dev, order="C"
)
if fl1["F"] and fl2["F"]:
- return dpt_ext.empty(
+ return dpt.empty(
res_shape, dtype=dt, usm_type=usm_type, device=dev, order="F"
)
st1 = list(X1.strides)
@@ -787,9 +888,7 @@ def _empty_like_pair_orderK(X1, X2, dt, res_shape, usm_type, dev):
st2_sorted = [st2[i] for i in perm]
sh = res_shape
sh_sorted = tuple(sh[i] for i in perm)
- R = dpt_ext.empty(
- sh_sorted, dtype=dt, usm_type=usm_type, device=dev, order="C"
- )
+ R = dpt.empty(sh_sorted, dtype=dt, usm_type=usm_type, device=dev, order="C")
if max(min(st1_sorted), min(st2_sorted)) < 0:
sl = tuple(
(
@@ -800,7 +899,7 @@ def _empty_like_pair_orderK(X1, X2, dt, res_shape, usm_type, dev):
for i in range(nd1)
)
R = R[sl]
- return dpt_ext.permute_dims(R, inv_perm)
+ return dpt.permute_dims(R, inv_perm)
def _empty_like_triple_orderK(X1, X2, X3, dt, res_shape, usm_type, dev):
@@ -827,11 +926,11 @@ def _empty_like_triple_orderK(X1, X2, X3, dt, res_shape, usm_type, dev):
fl2 = X2.flags
fl3 = X3.flags
if fl1["C"] or fl2["C"] or fl3["C"]:
- return dpt_ext.empty(
+ return dpt.empty(
res_shape, dtype=dt, usm_type=usm_type, device=dev, order="C"
)
if fl1["F"] and fl2["F"] and fl3["F"]:
- return dpt_ext.empty(
+ return dpt.empty(
res_shape, dtype=dt, usm_type=usm_type, device=dev, order="F"
)
st1 = list(X1.strides)
@@ -859,9 +958,7 @@ def _empty_like_triple_orderK(X1, X2, X3, dt, res_shape, usm_type, dev):
st3_sorted = [st3[i] for i in perm]
sh = res_shape
sh_sorted = tuple(sh[i] for i in perm)
- R = dpt_ext.empty(
- sh_sorted, dtype=dt, usm_type=usm_type, device=dev, order="C"
- )
+ R = dpt.empty(sh_sorted, dtype=dt, usm_type=usm_type, device=dev, order="C")
if max(min(st1_sorted), min(st2_sorted), min(st3_sorted)) < 0:
sl = tuple(
(
@@ -876,7 +973,7 @@ def _empty_like_triple_orderK(X1, X2, X3, dt, res_shape, usm_type, dev):
for i in range(nd1)
)
R = R[sl]
- return dpt_ext.permute_dims(R, inv_perm)
+ return dpt.permute_dims(R, inv_perm)
def copy(usm_ary, /, *, order="K"):
@@ -1019,7 +1116,7 @@ def astype(
else:
target_dtype = _get_dtype(newdtype, usm_ary.sycl_queue)
- if not dpt_ext.can_cast(ary_dtype, target_dtype, casting=casting):
+ if not dpt.can_cast(ary_dtype, target_dtype, casting=casting):
raise TypeError(
f"Can not cast from {ary_dtype} to {newdtype} "
f"according to rule {casting}."
diff --git a/dpctl_ext/tensor/_ctors.py b/dpctl_ext/tensor/_ctors.py
index 21c3d0077189..d249efa8a602 100644
--- a/dpctl_ext/tensor/_ctors.py
+++ b/dpctl_ext/tensor/_ctors.py
@@ -31,17 +31,16 @@
import dpctl
import dpctl.memory as dpm
-import dpctl.tensor as dpt
import dpctl.utils
import numpy as np
-from dpctl.tensor._data_types import _get_dtype
-from dpctl.tensor._device import normalize_queue_device
-from dpctl.tensor._usmarray import _is_object_with_buffer_protocol
# TODO: revert to `import dpctl.tensor...`
# when dpnp fully migrates dpctl/tensor
-import dpctl_ext.tensor as dpt_ext
+import dpctl_ext.tensor as dpt
import dpctl_ext.tensor._tensor_impl as ti
+from dpctl_ext.tensor._data_types import _get_dtype
+from dpctl_ext.tensor._device import normalize_queue_device
+from dpctl_ext.tensor._usmarray import _is_object_with_buffer_protocol
from ._copy_utils import (
_empty_like_orderK,
@@ -182,7 +181,7 @@ def _asarray_from_seq(
if order in "KA":
order = "C"
if isinstance(exec_q, dpctl.SyclQueue):
- res = dpt_ext.empty(
+ res = dpt.empty(
seq_shape,
dtype=dtype,
usm_type=usm_type,
@@ -193,7 +192,7 @@ def _asarray_from_seq(
_device_copy_walker(seq_obj, res, _manager)
return res
else:
- res = dpt_ext.empty(
+ res = dpt.empty(
seq_shape,
dtype=dtype,
usm_type=usm_type,
@@ -312,7 +311,7 @@ def _asarray_from_usm_ndarray(
)
_manager.add_event_pair(hev, cpy_ev)
else:
- tmp = dpt_ext.asnumpy(usm_ndary)
+ tmp = dpt.asnumpy(usm_ndary)
res[...] = tmp
return res
@@ -361,7 +360,7 @@ def _copy_through_host_walker(seq_o, usm_res):
)
is None
):
- usm_res[...] = dpt_ext.asnumpy(seq_o).copy()
+ usm_res[...] = dpt.asnumpy(seq_o).copy()
return
else:
usm_res[...] = seq_o
@@ -381,7 +380,7 @@ def _copy_through_host_walker(seq_o, usm_res):
)
is None
):
- usm_res[...] = dpt_ext.asnumpy(usm_ar).copy()
+ usm_res[...] = dpt.asnumpy(usm_ar).copy()
else:
usm_res[...] = usm_ar
return
@@ -1092,7 +1091,7 @@ def eye(
n_cols = n_rows if n_cols is None else operator.index(n_cols)
k = operator.index(k)
if k >= n_cols or -k >= n_rows:
- return dpt_ext.zeros(
+ return dpt.zeros(
(n_rows, n_cols),
dtype=dtype,
order=order,
@@ -1194,14 +1193,14 @@ def full(
sycl_queue = normalize_queue_device(
sycl_queue=sycl_queue, device=device
)
- X = dpt_ext.asarray(
+ X = dpt.asarray(
fill_value,
dtype=dtype,
order=order,
usm_type=usm_type,
sycl_queue=sycl_queue,
)
- return dpt_ext.copy(dpt_ext.broadcast_to(X, shape), order=order)
+ return dpt.copy(dpt.broadcast_to(X, shape), order=order)
else:
_validate_fill_value(fill_value)
@@ -1301,14 +1300,14 @@ def full_like(
if order == "K":
_ensure_native_dtype_device_support(dtype, sycl_queue.sycl_device)
if isinstance(fill_value, (dpt.usm_ndarray, np.ndarray, tuple, list)):
- X = dpt_ext.asarray(
+ X = dpt.asarray(
fill_value,
dtype=dtype,
order=order,
usm_type=usm_type,
sycl_queue=sycl_queue,
)
- X = dpt_ext.broadcast_to(X, sh)
+ X = dpt.broadcast_to(X, sh)
res = _empty_like_orderK(x, dtype, usm_type, sycl_queue)
_manager = dpctl.utils.SequentialOrderManager[sycl_queue]
# order copy after tasks populating X
@@ -1434,14 +1433,14 @@ def linspace(
start = float(start)
stop = float(stop)
- res = dpt_ext.empty(num, dtype=dt, usm_type=usm_type, sycl_queue=sycl_queue)
+ res = dpt.empty(num, dtype=dt, usm_type=usm_type, sycl_queue=sycl_queue)
_manager = dpctl.utils.SequentialOrderManager[sycl_queue]
hev, la_ev = ti._linspace_affine(
start, stop, dst=res, include_endpoint=endpoint, sycl_queue=sycl_queue
)
_manager.add_event_pair(hev, la_ev)
- return res if int_dt is None else dpt_ext.astype(res, int_dt)
+ return res if int_dt is None else dpt.astype(res, int_dt)
def meshgrid(*arrays, indexing="xy"):
@@ -1506,15 +1505,15 @@ def meshgrid(*arrays, indexing="xy"):
res = []
if n > 1 and indexing == "xy":
- res.append(dpt_ext.reshape(arrays[0], (1, -1) + sh[2:], copy=True))
- res.append(dpt_ext.reshape(arrays[1], sh, copy=True))
+ res.append(dpt.reshape(arrays[0], (1, -1) + sh[2:], copy=True))
+ res.append(dpt.reshape(arrays[1], sh, copy=True))
arrays, sh = arrays[2:], sh[-2:] + sh[:-2]
for array in arrays:
- res.append(dpt_ext.reshape(array, sh, copy=True))
+ res.append(dpt.reshape(array, sh, copy=True))
sh = sh[-1:] + sh[:-1]
- output = dpt_ext.broadcast_arrays(*res)
+ output = dpt.broadcast_arrays(*res)
return output
@@ -1707,7 +1706,7 @@ def tril(x, /, *, k=0):
q = x.sycl_queue
if k >= shape[nd - 1] - 1:
- res = dpt_ext.empty(
+ res = dpt.empty(
x.shape,
dtype=x.dtype,
order=order,
@@ -1721,7 +1720,7 @@ def tril(x, /, *, k=0):
)
_manager.add_event_pair(hev, cpy_ev)
elif k < -shape[nd - 2]:
- res = dpt_ext.zeros(
+ res = dpt.zeros(
x.shape,
dtype=x.dtype,
order=order,
@@ -1729,7 +1728,7 @@ def tril(x, /, *, k=0):
sycl_queue=q,
)
else:
- res = dpt_ext.empty(
+ res = dpt.empty(
x.shape,
dtype=x.dtype,
order=order,
@@ -1785,7 +1784,7 @@ def triu(x, /, *, k=0):
q = x.sycl_queue
if k > shape[nd - 1]:
- res = dpt_ext.zeros(
+ res = dpt.zeros(
x.shape,
dtype=x.dtype,
order=order,
@@ -1793,7 +1792,7 @@ def triu(x, /, *, k=0):
sycl_queue=q,
)
elif k <= -shape[nd - 2] + 1:
- res = dpt_ext.empty(
+ res = dpt.empty(
x.shape,
dtype=x.dtype,
order=order,
@@ -1807,7 +1806,7 @@ def triu(x, /, *, k=0):
)
_manager.add_event_pair(hev, cpy_ev)
else:
- res = dpt_ext.empty(
+ res = dpt.empty(
x.shape,
dtype=x.dtype,
order=order,
diff --git a/dpctl_ext/tensor/_data_types.py b/dpctl_ext/tensor/_data_types.py
new file mode 100644
index 000000000000..faf30ffdabd0
--- /dev/null
+++ b/dpctl_ext/tensor/_data_types.py
@@ -0,0 +1,104 @@
+# *****************************************************************************
+# Copyright (c) 2026, Intel Corporation
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+# - Redistributions of source code must retain the above copyright notice,
+# this list of conditions and the following disclaimer.
+# - Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+# - Neither the name of the copyright holder nor the names of its contributors
+# may be used to endorse or promote products derived from this software
+# without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+# THE POSSIBILITY OF SUCH DAMAGE.
+# *****************************************************************************
+
+from numpy import bool_ as np_bool_
+from numpy import complexfloating as np_complexfloating
+from numpy import dtype
+from numpy import floating as np_floating
+from numpy import integer as np_integer
+from numpy import issubdtype as np_issubdtype
+
+from ._tensor_impl import (
+ default_device_bool_type as ti_default_device_bool_type,
+)
+from ._tensor_impl import (
+ default_device_complex_type as ti_default_device_complex_type,
+)
+from ._tensor_impl import default_device_fp_type as ti_default_device_fp_type
+from ._tensor_impl import default_device_int_type as ti_default_device_int_type
+
+bool = dtype("bool")
+int8 = dtype("int8")
+int16 = dtype("int16")
+int32 = dtype("int32")
+int64 = dtype("int64")
+uint8 = dtype("uint8")
+uint16 = dtype("uint16")
+uint32 = dtype("uint32")
+uint64 = dtype("uint64")
+float16 = dtype("float16")
+float32 = dtype("float32")
+float64 = dtype("float64")
+complex64 = dtype("complex64")
+complex128 = dtype("complex128")
+
+
+def _get_dtype(inp_dt, sycl_obj, ref_type=None):
+ """
+ Type inference utility to construct data type
+ object with defaults based on reference type.
+
+ _get_dtype is used by dpctl.tensor.asarray
+ to infer data type of the output array from the
+ input sequence.
+ """
+ if inp_dt is None:
+ if ref_type in [None, float] or np_issubdtype(ref_type, np_floating):
+ fp_dt = ti_default_device_fp_type(sycl_obj)
+ return dtype(fp_dt)
+ if ref_type in [bool, np_bool_]:
+ bool_dt = ti_default_device_bool_type(sycl_obj)
+ return dtype(bool_dt)
+ if ref_type is int or np_issubdtype(ref_type, np_integer):
+ int_dt = ti_default_device_int_type(sycl_obj)
+ return dtype(int_dt)
+ if ref_type is complex or np_issubdtype(ref_type, np_complexfloating):
+ cfp_dt = ti_default_device_complex_type(sycl_obj)
+ return dtype(cfp_dt)
+ raise TypeError(f"Reference type {ref_type} not recognized.")
+ return dtype(inp_dt)
+
+
+__all__ = [
+ "dtype",
+ "_get_dtype",
+ "bool",
+ "int8",
+ "uint8",
+ "int16",
+ "uint16",
+ "int32",
+ "uint32",
+ "int64",
+ "uint64",
+ "float16",
+ "float32",
+ "float64",
+ "complex64",
+ "complex128",
+]
diff --git a/dpctl_ext/tensor/_device.py b/dpctl_ext/tensor/_device.py
new file mode 100644
index 000000000000..8d763bc721e3
--- /dev/null
+++ b/dpctl_ext/tensor/_device.py
@@ -0,0 +1,195 @@
+# *****************************************************************************
+# Copyright (c) 2026, Intel Corporation
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+# - Redistributions of source code must retain the above copyright notice,
+# this list of conditions and the following disclaimer.
+# - Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+# - Neither the name of the copyright holder nor the names of its contributors
+# may be used to endorse or promote products derived from this software
+# without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+# THE POSSIBILITY OF SUCH DAMAGE.
+# *****************************************************************************
+
+
+import dpctl
+from dpctl._sycl_device_factory import _cached_default_device
+from dpctl._sycl_queue_manager import get_device_cached_queue
+
+__doc__ = "Implementation of array API mandated Device class"
+
+
+class Device:
+ """
+ An object representing Data-API concept of device.
+
+ This is a wrapper around :class:`dpctl.SyclQueue` with custom
+ formatting. The class does not have public constructor,
+ but a class method :meth:`dpctl.tensor.Device.create_device` to construct
+ it from `device` keyword argument in Array-API functions.
+
+ Instance can be queried for ``sycl_queue``, ``sycl_context``,
+ or ``sycl_device``.
+ """
+
+ __device_queue_map__ = {}
+ sycl_queue_ = None
+
+ def __new__(cls, *args, **kwargs):
+ raise TypeError("No public constructor")
+
+ @classmethod
+ def create_device(cls, device=None):
+ """Device.create_device(device=None)
+
+ Creates instance of Device from argument.
+
+ Args:
+ device:
+ Device specification, i.e. `None`, :class:`.Device`,
+ :class:`dpctl.SyclQueue`, or a :class:`dpctl.SyclDevice`
+ corresponding to a root SYCL device.
+ Raises:
+ ValueError: if an instance of :class:`dpctl.SycDevice` corresponding
+ to a sub-device was specified as the argument
+ SyclQueueCreationError: if :class:`dpctl.SyclQueue` could not be
+ created from the argument
+ """
+ dev = device
+ obj = super().__new__(cls)
+ if isinstance(dev, Device):
+ obj.sycl_queue_ = dev.sycl_queue
+ elif isinstance(dev, dpctl.SyclQueue):
+ obj.sycl_queue_ = dev
+ elif isinstance(dev, dpctl.SyclDevice):
+ par = dev.parent_device
+ if par is None:
+ obj.sycl_queue_ = get_device_cached_queue(dev)
+ else:
+ raise ValueError(
+ f"Using non-root device {dev} to specify offloading "
+ "target is ambiguous. Please use dpctl.SyclQueue "
+ "targeting this device"
+ )
+ else:
+ if dev is None:
+ _dev = _cached_default_device()
+ else:
+ _dev = dpctl.SyclDevice(dev)
+ obj.sycl_queue_ = get_device_cached_queue(_dev)
+ return obj
+
+ @property
+ def sycl_queue(self):
+ """:class:`dpctl.SyclQueue` used to offload to this :class:`.Device`."""
+ return self.sycl_queue_
+
+ @property
+ def sycl_context(self):
+ """:class:`dpctl.SyclContext` associated with this :class:`.Device`."""
+ return self.sycl_queue_.sycl_context
+
+ @property
+ def sycl_device(self):
+ """:class:`dpctl.SyclDevice` targeted by this :class:`.Device`."""
+ return self.sycl_queue_.sycl_device
+
+ def __repr__(self):
+ try:
+ sd = self.sycl_device
+ except AttributeError as exc:
+ raise ValueError(
+ f"Instance of {self.__class__} is not initialized"
+ ) from exc
+ try:
+ fs = sd.filter_string
+ return f"Device({fs})"
+ except TypeError:
+ # This is a sub-device
+ return repr(self.sycl_queue)
+
+ def print_device_info(self):
+ """Outputs information about targeted SYCL device"""
+ self.sycl_device.print_device_info()
+
+ def wait(self):
+ """Call ``wait`` method of the underlying ``sycl_queue``."""
+ self.sycl_queue_.wait()
+
+ def __eq__(self, other):
+ """Equality comparison based on underlying ``sycl_queue``."""
+ if isinstance(other, Device):
+ return self.sycl_queue.__eq__(other.sycl_queue)
+ elif isinstance(other, dpctl.SyclQueue):
+ return self.sycl_queue.__eq__(other)
+ return False
+
+ def __hash__(self):
+ """Compute object's hash value."""
+ return self.sycl_queue.__hash__()
+
+
+def normalize_queue_device(sycl_queue=None, device=None):
+ """normalize_queue_device(sycl_queue=None, device=None)
+
+ Utility to process exclusive keyword arguments 'device'
+ and 'sycl_queue' in functions of `dpctl.tensor`.
+
+ Args:
+ sycl_queue (:class:`dpctl.SyclQueue`, optional):
+ explicitly indicates where USM allocation is done
+ and the population code (if any) is executed.
+ Value `None` is interpreted as get the SYCL queue
+ from `device` keyword, or use default queue.
+ Default: None
+ device (string, :class:`dpctl.SyclDevice`, :class:`dpctl.SyclQueue,
+ :class:`dpctl.tensor.Device`, optional):
+ array-API keyword indicating non-partitioned SYCL device
+ where array is allocated.
+
+ Returns
+ :class:`dpctl.SyclQueue` object implied by either of provided
+ keywords. If both are None, `dpctl.SyclQueue()` is returned.
+ If both are specified and imply the same queue, `sycl_queue`
+ is returned.
+
+ Raises:
+ TypeError: if argument is not of the expected type, or keywords
+ imply incompatible queues.
+ """
+ q = sycl_queue
+ d = device
+ if q is None:
+ d = Device.create_device(d)
+ return d.sycl_queue
+ if not isinstance(q, dpctl.SyclQueue):
+ raise TypeError(f"Expected dpctl.SyclQueue, got {type(q)}")
+ if d is None:
+ return q
+ d = Device.create_device(d)
+ qq = dpctl.utils.get_execution_queue(
+ (
+ q,
+ d.sycl_queue,
+ )
+ )
+ if qq is None:
+ raise TypeError(
+ "sycl_queue and device keywords can not be both specified"
+ )
+ return qq
diff --git a/dpctl_ext/tensor/_dldevice_conversions.py b/dpctl_ext/tensor/_dldevice_conversions.py
new file mode 100644
index 000000000000..595a280689a5
--- /dev/null
+++ b/dpctl_ext/tensor/_dldevice_conversions.py
@@ -0,0 +1,52 @@
+# *****************************************************************************
+# Copyright (c) 2026, Intel Corporation
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+# - Redistributions of source code must retain the above copyright notice,
+# this list of conditions and the following disclaimer.
+# - Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+# - Neither the name of the copyright holder nor the names of its contributors
+# may be used to endorse or promote products derived from this software
+# without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+# THE POSSIBILITY OF SUCH DAMAGE.
+# *****************************************************************************
+
+from dpctl._sycl_device import SyclDevice
+
+from ._usmarray import DLDeviceType
+
+
+def dldevice_to_sycl_device(dl_dev: tuple):
+ if isinstance(dl_dev, tuple):
+ if len(dl_dev) != 2:
+ raise ValueError("dldevice tuple must have length 2")
+ else:
+ raise TypeError(
+ f"dl_dev is expected to be a 2-tuple, got " f"{type(dl_dev)}"
+ )
+ if dl_dev[0] != DLDeviceType.kDLOneAPI:
+ raise ValueError("dldevice type must be kDLOneAPI")
+ return SyclDevice(str(dl_dev[1]))
+
+
+def sycl_device_to_dldevice(dev: SyclDevice):
+ if not isinstance(dev, SyclDevice):
+ raise TypeError(
+ "dev is expected to be a SyclDevice, got " f"{type(dev)}"
+ )
+ return (DLDeviceType.kDLOneAPI, dev.get_device_id())
diff --git a/dpctl_ext/tensor/_dlpack.pxd b/dpctl_ext/tensor/_dlpack.pxd
new file mode 100644
index 000000000000..75378bfa7a92
--- /dev/null
+++ b/dpctl_ext/tensor/_dlpack.pxd
@@ -0,0 +1,73 @@
+# *****************************************************************************
+# Copyright (c) 2026, Intel Corporation
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+# - Redistributions of source code must retain the above copyright notice,
+# this list of conditions and the following disclaimer.
+# - Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+# - Neither the name of the copyright holder nor the names of its contributors
+# may be used to endorse or promote products derived from this software
+# without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+# THE POSSIBILITY OF SUCH DAMAGE.
+# *****************************************************************************
+
+# distutils: language = c++
+# cython: language_level=3
+# cython: linetrace=True
+
+cdef extern from "numpy/npy_no_deprecated_api.h":
+ pass
+from dpctl._sycl_device cimport SyclDevice
+from numpy cimport ndarray
+
+from ._usmarray cimport usm_ndarray
+
+
+cdef extern from "dlpack/dlpack.h" nogil:
+ int device_CPU "kDLCPU"
+ int device_CUDA "kDLCUDA"
+ int device_CUDAHost "kDLCUDAHost"
+ int device_CUDAManaged "kDLCUDAManaged"
+ int device_DLROCM "kDLROCM"
+ int device_ROCMHost "kDLROCMHost"
+ int device_OpenCL "kDLOpenCL"
+ int device_Vulkan "kDLVulkan"
+ int device_Metal "kDLMetal"
+ int device_VPI "kDLVPI"
+ int device_OneAPI "kDLOneAPI"
+ int device_WebGPU "kDLWebGPU"
+ int device_Hexagon "kDLHexagon"
+ int device_MAIA "kDLMAIA"
+ int device_Trn "kDLTrn"
+
+cpdef object to_dlpack_capsule(usm_ndarray array) except +
+cpdef object to_dlpack_versioned_capsule(
+ usm_ndarray array, bint copied
+) except +
+cpdef object numpy_to_dlpack_versioned_capsule(
+ ndarray array, bint copied
+) except +
+cpdef object from_dlpack_capsule(object dltensor) except +
+
+cdef class DLPackCreationError(Exception):
+ """
+ A DLPackCreateError exception is raised when constructing
+ DLPack capsule from `usm_ndarray` based on a USM allocation
+ on a partitioned SYCL device.
+ """
+ pass
diff --git a/dpctl_ext/tensor/_dlpack.pyx b/dpctl_ext/tensor/_dlpack.pyx
new file mode 100644
index 000000000000..fde4415b7425
--- /dev/null
+++ b/dpctl_ext/tensor/_dlpack.pyx
@@ -0,0 +1,1245 @@
+# *****************************************************************************
+# Copyright (c) 2026, Intel Corporation
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+# - Redistributions of source code must retain the above copyright notice,
+# this list of conditions and the following disclaimer.
+# - Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+# - Neither the name of the copyright holder nor the names of its contributors
+# may be used to endorse or promote products derived from this software
+# without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+# THE POSSIBILITY OF SUCH DAMAGE.
+# *****************************************************************************
+
+# distutils: language = c++
+# cython: language_level=3
+# cython: linetrace=True
+
+cdef extern from "numpy/npy_no_deprecated_api.h":
+ pass
+
+cimport cpython
+cimport dpctl as c_dpctl
+cimport dpctl.memory as c_dpmem
+from dpctl._backend cimport (
+ DPCTLDevice_Delete,
+ DPCTLDevice_GetParentDevice,
+ DPCTLSyclDeviceRef,
+ DPCTLSyclUSMRef,
+)
+from dpctl._sycl_queue_manager cimport get_device_cached_queue
+from libc cimport stdlib
+from libc.stdint cimport int64_t, uint8_t, uint16_t, uint32_t, uint64_t
+from numpy cimport ndarray
+
+from ._usmarray cimport (
+ USM_ARRAY_C_CONTIGUOUS,
+ USM_ARRAY_F_CONTIGUOUS,
+ USM_ARRAY_WRITABLE,
+ usm_ndarray,
+)
+
+import ctypes
+
+import dpctl
+import dpctl.memory as dpmem
+import numpy as np
+
+from ._device import Device
+
+
+cdef extern from "dlpack/dlpack.h" nogil:
+ cdef int DLPACK_MAJOR_VERSION
+
+ cdef int DLPACK_MINOR_VERSION
+
+ cdef int DLPACK_FLAG_BITMASK_READ_ONLY
+
+ cdef int DLPACK_FLAG_BITMASK_IS_COPIED
+
+ ctypedef struct DLPackVersion:
+ uint32_t major
+ uint32_t minor
+
+ cdef enum DLDeviceType:
+ kDLCPU
+ kDLCUDA
+ kDLCUDAHost
+ kDLCUDAManaged
+ kDLROCM
+ kDLROCMHost
+ kDLOpenCL
+ kDLVulkan
+ kDLMetal
+ kDLVPI
+ kDLOneAPI
+ kDLWebGPU
+ kDLHexagon
+ kDLMAIA
+ kDLTrn
+
+ ctypedef struct DLDevice:
+ DLDeviceType device_type
+ int device_id
+
+ cdef enum DLDataTypeCode:
+ kDLInt
+ kDLUInt
+ kDLFloat
+ kDLBfloat
+ kDLComplex
+ kDLBool
+ kDLFloat8_e3m4
+ kDLFloat8_e4m3
+ kDLFloat8_e4m3b11fnuz
+ kDLFloat8_e4m3fn
+ kDLFloat8_e4m3fnuz
+ kDLFloat8_e5m2
+ kDLFloat8_e5m2fnuz
+ kDLFloat8_e8m0fnu
+ kDLFloat6_e2m3fn
+ kDLFloat6_e3m2fn
+ kDLFloat4_e2m1fn
+
+ ctypedef struct DLDataType:
+ uint8_t code
+ uint8_t bits
+ uint16_t lanes
+
+ ctypedef struct DLTensor:
+ void *data
+ DLDevice device
+ int ndim
+ DLDataType dtype
+ int64_t *shape
+ int64_t *strides
+ uint64_t byte_offset
+
+ ctypedef struct DLManagedTensor:
+ DLTensor dl_tensor
+ void *manager_ctx
+ void (*deleter)(DLManagedTensor *) # noqa: E211
+
+ ctypedef struct DLManagedTensorVersioned:
+ DLPackVersion version
+ void *manager_ctx
+ void (*deleter)(DLManagedTensorVersioned *) # noqa: E211
+ uint64_t flags
+ DLTensor dl_tensor
+
+
+def get_build_dlpack_version():
+ """
+ Returns a tuple of integers representing the `major` and `minor`
+ version of DLPack :module:`dpctl.tensor` was built with.
+ This tuple can be passed as the `max_version` argument to
+ `__dlpack__` to guarantee module:`dpctl.tensor` can properly
+ consume capsule.
+
+ Returns:
+ Tuple[int, int]
+ A tuple of integers representing the `major` and `minor`
+ version of DLPack used to build :module:`dpctl.tensor`.
+ """
+ return (DLPACK_MAJOR_VERSION, DLPACK_MINOR_VERSION)
+
+
+cdef void _pycapsule_deleter(object dlt_capsule) noexcept:
+ cdef DLManagedTensor *dlm_tensor = NULL
+ if cpython.PyCapsule_IsValid(dlt_capsule, "dltensor"):
+ dlm_tensor = cpython.PyCapsule_GetPointer(
+ dlt_capsule, "dltensor")
+ dlm_tensor.deleter(dlm_tensor)
+
+
+cdef void _managed_tensor_deleter(
+ DLManagedTensor *dlm_tensor
+) noexcept with gil:
+ if dlm_tensor is not NULL:
+ # we only delete shape, because we make single allocation to
+ # accommodate both shape and strides if strides are needed
+ stdlib.free(dlm_tensor.dl_tensor.shape)
+ cpython.Py_DECREF(dlm_tensor.manager_ctx)
+ dlm_tensor.manager_ctx = NULL
+ stdlib.free(dlm_tensor)
+
+
+cdef void _pycapsule_versioned_deleter(object dlt_capsule) noexcept:
+ cdef DLManagedTensorVersioned *dlmv_tensor = NULL
+ if cpython.PyCapsule_IsValid(dlt_capsule, "dltensor_versioned"):
+ dlmv_tensor = cpython.PyCapsule_GetPointer(
+ dlt_capsule, "dltensor_versioned")
+ dlmv_tensor.deleter(dlmv_tensor)
+
+
+cdef void _managed_tensor_versioned_deleter(
+ DLManagedTensorVersioned *dlmv_tensor
+) noexcept with gil:
+ if dlmv_tensor is not NULL:
+ # we only delete shape, because we make single allocation to
+ # accommodate both shape and strides if strides are needed
+ stdlib.free(dlmv_tensor.dl_tensor.shape)
+ cpython.Py_DECREF(dlmv_tensor.manager_ctx)
+ dlmv_tensor.manager_ctx = NULL
+ stdlib.free(dlmv_tensor)
+
+
+cdef object _get_default_context(c_dpctl.SyclDevice dev):
+ try:
+ default_context = dev.sycl_platform.default_context
+ except RuntimeError:
+ # RT does not support default_context
+ default_context = None
+
+ return default_context
+
+cdef int get_array_dlpack_device_id(
+ usm_ndarray usm_ary
+) except -1:
+ """Finds ordinal number of the parent of device where array
+ was allocated.
+ """
+ cdef c_dpctl.SyclQueue ary_sycl_queue
+ cdef c_dpctl.SyclDevice ary_sycl_device
+ cdef DPCTLSyclDeviceRef pDRef = NULL
+ cdef int device_id = -1
+
+ ary_sycl_queue = usm_ary.get_sycl_queue()
+ ary_sycl_device = ary_sycl_queue.get_sycl_device()
+
+ default_context = _get_default_context(ary_sycl_device)
+ if default_context is None:
+ # check that ary_sycl_device is a non-partitioned device
+ pDRef = DPCTLDevice_GetParentDevice(ary_sycl_device.get_device_ref())
+ if pDRef is not NULL:
+ DPCTLDevice_Delete(pDRef)
+ raise DLPackCreationError(
+ "to_dlpack_capsule: DLPack can only export arrays allocated "
+ "on non-partitioned SYCL devices on platforms where "
+ "default_context oneAPI extension is not supported."
+ )
+ else:
+ if not usm_ary.sycl_context == default_context:
+ raise DLPackCreationError(
+ "to_dlpack_capsule: DLPack can only export arrays based on USM "
+ "allocations bound to a default platform SYCL context"
+ )
+ device_id = ary_sycl_device.get_device_id()
+
+ if device_id < 0:
+ raise DLPackCreationError(
+ "get_array_dlpack_device_id: failed to determine device_id"
+ )
+
+ return device_id
+
+
+cpdef to_dlpack_capsule(usm_ndarray usm_ary):
+ """
+ to_dlpack_capsule(usm_ary)
+
+ Constructs named Python capsule object referencing
+ instance of ``DLManagedTensor`` from
+ :class:`dpctl.tensor.usm_ndarray` instance.
+
+ Args:
+ usm_ary: An instance of :class:`dpctl.tensor.usm_ndarray`
+ Returns:
+ A new capsule with name ``"dltensor"`` that contains
+ a pointer to ``DLManagedTensor`` struct.
+ Raises:
+ DLPackCreationError: when array can be represented as
+ DLPack tensor. This may happen when array was allocated
+ on a partitioned sycl device, or its USM allocation is
+ not bound to the platform default SYCL context.
+ MemoryError: when host allocation to needed for ``DLManagedTensor``
+ did not succeed.
+ ValueError: when array elements data type could not be represented
+ in ``DLManagedTensor``.
+ """
+ cdef DLManagedTensor *dlm_tensor = NULL
+ cdef DLTensor *dl_tensor = NULL
+ cdef int nd = usm_ary.get_ndim()
+ cdef char *data_ptr = usm_ary.get_data()
+ cdef Py_ssize_t *shape_ptr = NULL
+ cdef Py_ssize_t *strides_ptr = NULL
+ cdef int64_t *shape_strides_ptr = NULL
+ cdef int i = 0
+ cdef int device_id = -1
+ cdef int flags = 0
+ cdef Py_ssize_t element_offset = 0
+ cdef Py_ssize_t byte_offset = 0
+ cdef Py_ssize_t si = 1
+
+ ary_base = usm_ary.get_base()
+
+ device_id = get_array_dlpack_device_id(usm_ary)
+
+ dlm_tensor = stdlib.malloc(
+ sizeof(DLManagedTensor))
+ if dlm_tensor is NULL:
+ raise MemoryError(
+ "to_dlpack_capsule: Could not allocate memory for DLManagedTensor"
+ )
+ if nd > 0:
+ shape_strides_ptr = stdlib.malloc((sizeof(int64_t) * 2) * nd)
+ if shape_strides_ptr is NULL:
+ stdlib.free(dlm_tensor)
+ raise MemoryError(
+ "to_dlpack_capsule: Could not allocate memory for shape/strides"
+ )
+ shape_ptr = usm_ary.get_shape()
+ for i in range(nd):
+ shape_strides_ptr[i] = shape_ptr[i]
+ strides_ptr = usm_ary.get_strides()
+ flags = usm_ary.flags_
+ if strides_ptr:
+ for i in range(nd):
+ shape_strides_ptr[nd + i] = strides_ptr[i]
+ else:
+ if flags & USM_ARRAY_C_CONTIGUOUS:
+ si = 1
+ for i in range(nd - 1, -1, -1):
+ shape_strides_ptr[nd + i] = si
+ si = si * shape_ptr[i]
+ elif flags & USM_ARRAY_F_CONTIGUOUS:
+ si = 1
+ for i in range(0, nd):
+ shape_strides_ptr[nd + i] = si
+ si = si * shape_ptr[i]
+ else:
+ stdlib.free(shape_strides_ptr)
+ stdlib.free(dlm_tensor)
+ raise BufferError(
+ "to_dlpack_capsule: Invalid array encountered "
+ "when building strides"
+ )
+
+ strides_ptr = &shape_strides_ptr[nd]
+
+ ary_dt = usm_ary.dtype
+ ary_dtk = ary_dt.kind
+ element_offset = usm_ary.get_offset()
+ byte_offset = element_offset * (ary_dt.itemsize)
+
+ dl_tensor = &dlm_tensor.dl_tensor
+ dl_tensor.data = (data_ptr - byte_offset)
+ dl_tensor.ndim = nd
+ dl_tensor.byte_offset = byte_offset
+ dl_tensor.shape = &shape_strides_ptr[0] if nd > 0 else NULL
+ dl_tensor.strides = &shape_strides_ptr[nd] if nd > 0 else NULL
+ dl_tensor.device.device_type = kDLOneAPI
+ dl_tensor.device.device_id = device_id
+ dl_tensor.dtype.lanes = 1
+ dl_tensor.dtype.bits = (ary_dt.itemsize * 8)
+ if (ary_dtk == "b"):
+ dl_tensor.dtype.code = kDLBool
+ elif (ary_dtk == "u"):
+ dl_tensor.dtype.code = kDLUInt
+ elif (ary_dtk == "i"):
+ dl_tensor.dtype.code = kDLInt
+ elif (ary_dtk == "f"):
+ dl_tensor.dtype.code = kDLFloat
+ elif (ary_dtk == "c"):
+ dl_tensor.dtype.code = kDLComplex
+ else:
+ stdlib.free(shape_strides_ptr)
+ stdlib.free(dlm_tensor)
+ raise ValueError("Unrecognized array data type")
+
+ dlm_tensor.manager_ctx = ary_base
+ cpython.Py_INCREF(ary_base)
+ dlm_tensor.deleter = _managed_tensor_deleter
+
+ return cpython.PyCapsule_New(dlm_tensor, "dltensor", _pycapsule_deleter)
+
+
+cpdef to_dlpack_versioned_capsule(usm_ndarray usm_ary, bint copied):
+ """
+ to_dlpack_versioned_capsule(usm_ary, copied)
+
+ Constructs named Python capsule object referencing
+ instance of ``DLManagedTensorVersioned`` from
+ :class:`dpctl.tensor.usm_ndarray` instance.
+
+ Args:
+ usm_ary: An instance of :class:`dpctl.tensor.usm_ndarray`
+ copied: A bint representing whether the data was previously
+ copied in order to set the flags with the is-copied
+ bitmask.
+ Returns:
+ A new capsule with name ``"dltensor_versioned"`` that
+ contains a pointer to ``DLManagedTensorVersioned`` struct.
+ Raises:
+ DLPackCreationError: when array can be represented as
+ DLPack tensor. This may happen when array was allocated
+ on a partitioned sycl device, or its USM allocation is
+ not bound to the platform default SYCL context.
+ MemoryError: when host allocation to needed for
+ ``DLManagedTensorVersioned`` did not succeed.
+ ValueError: when array elements data type could not be represented
+ in ``DLManagedTensorVersioned``.
+ """
+ cdef DLManagedTensorVersioned *dlmv_tensor = NULL
+ cdef DLTensor *dl_tensor = NULL
+ cdef uint32_t dlmv_flags = 0
+ cdef int nd = usm_ary.get_ndim()
+ cdef char *data_ptr = usm_ary.get_data()
+ cdef Py_ssize_t *shape_ptr = NULL
+ cdef Py_ssize_t *strides_ptr = NULL
+ cdef int64_t *shape_strides_ptr = NULL
+ cdef int i = 0
+ cdef int device_id = -1
+ cdef int flags = 0
+ cdef Py_ssize_t element_offset = 0
+ cdef Py_ssize_t byte_offset = 0
+ cdef Py_ssize_t si = 1
+
+ ary_base = usm_ary.get_base()
+
+ # Find ordinal number of the parent device
+ device_id = get_array_dlpack_device_id(usm_ary)
+
+ dlmv_tensor = stdlib.malloc(
+ sizeof(DLManagedTensorVersioned))
+ if dlmv_tensor is NULL:
+ raise MemoryError(
+ "to_dlpack_versioned_capsule: Could not allocate memory "
+ "for DLManagedTensorVersioned"
+ )
+ if nd > 0:
+ shape_strides_ptr = stdlib.malloc((sizeof(int64_t) * 2) * nd)
+ if shape_strides_ptr is NULL:
+ stdlib.free(dlmv_tensor)
+ raise MemoryError(
+ "to_dlpack_versioned_capsule: Could not allocate memory "
+ "for shape/strides"
+ )
+ # this can be a separate function for handling shapes and strides
+ shape_ptr = usm_ary.get_shape()
+ for i in range(nd):
+ shape_strides_ptr[i] = shape_ptr[i]
+ strides_ptr = usm_ary.get_strides()
+ flags = usm_ary.flags_
+ if strides_ptr:
+ for i in range(nd):
+ shape_strides_ptr[nd + i] = strides_ptr[i]
+ else:
+ if flags & USM_ARRAY_C_CONTIGUOUS:
+ si = 1
+ for i in range(nd - 1, -1, -1):
+ shape_strides_ptr[nd + i] = si
+ si = si * shape_ptr[i]
+ elif flags & USM_ARRAY_F_CONTIGUOUS:
+ si = 1
+ for i in range(0, nd):
+ shape_strides_ptr[nd + i] = si
+ si = si * shape_ptr[i]
+ else:
+ stdlib.free(shape_strides_ptr)
+ stdlib.free(dlmv_tensor)
+ raise BufferError(
+ "to_dlpack_versioned_capsule: Invalid array encountered "
+ "when building strides"
+ )
+
+ strides_ptr = &shape_strides_ptr[nd]
+
+ # this can all be a function for building the dl_tensor
+ # object (separate from dlm/dlmv)
+ ary_dt = usm_ary.dtype
+ ary_dtk = ary_dt.kind
+ element_offset = usm_ary.get_offset()
+ byte_offset = element_offset * (ary_dt.itemsize)
+
+ dl_tensor = &dlmv_tensor.dl_tensor
+ dl_tensor.data = (data_ptr - byte_offset)
+ dl_tensor.ndim = nd
+ dl_tensor.byte_offset = byte_offset
+ dl_tensor.shape = &shape_strides_ptr[0] if nd > 0 else NULL
+ dl_tensor.strides = &shape_strides_ptr[nd] if nd > 0 else NULL
+ dl_tensor.device.device_type = kDLOneAPI
+ dl_tensor.device.device_id = device_id
+ dl_tensor.dtype.lanes = 1
+ dl_tensor.dtype.bits = (ary_dt.itemsize * 8)
+ if (ary_dtk == "b"):
+ dl_tensor.dtype.code = kDLBool
+ elif (ary_dtk == "u"):
+ dl_tensor.dtype.code = kDLUInt
+ elif (ary_dtk == "i"):
+ dl_tensor.dtype.code = kDLInt
+ elif (ary_dtk == "f"):
+ dl_tensor.dtype.code = kDLFloat
+ elif (ary_dtk == "c"):
+ dl_tensor.dtype.code = kDLComplex
+ else:
+ stdlib.free(shape_strides_ptr)
+ stdlib.free(dlmv_tensor)
+ raise ValueError("Unrecognized array data type")
+
+ # set flags down here
+ if copied:
+ dlmv_flags |= DLPACK_FLAG_BITMASK_IS_COPIED
+ if not (flags & USM_ARRAY_WRITABLE):
+ dlmv_flags |= DLPACK_FLAG_BITMASK_READ_ONLY
+ dlmv_tensor.flags = dlmv_flags
+
+ dlmv_tensor.version.major = DLPACK_MAJOR_VERSION
+ dlmv_tensor.version.minor = DLPACK_MINOR_VERSION
+
+ dlmv_tensor.manager_ctx = ary_base
+ cpython.Py_INCREF(ary_base)
+ dlmv_tensor.deleter = _managed_tensor_versioned_deleter
+
+ return cpython.PyCapsule_New(
+ dlmv_tensor, "dltensor_versioned", _pycapsule_versioned_deleter
+ )
+
+
+cpdef numpy_to_dlpack_versioned_capsule(ndarray npy_ary, bint copied):
+ """
+ to_dlpack_versioned_capsule(npy_ary, copied)
+
+ Constructs named Python capsule object referencing
+ instance of ``DLManagedTensorVersioned`` from
+ :class:`numpy.ndarray` instance.
+
+ Args:
+ npy_ary: An instance of :class:`numpy.ndarray`
+ copied: A bint representing whether the data was previously
+ copied in order to set the flags with the is-copied
+ bitmask.
+ Returns:
+ A new capsule with name ``"dltensor_versioned"`` that
+ contains a pointer to ``DLManagedTensorVersioned`` struct.
+ Raises:
+ DLPackCreationError: when array can be represented as
+ DLPack tensor.
+ MemoryError: when host allocation to needed for
+ ``DLManagedTensorVersioned`` did not succeed.
+ ValueError: when array elements data type could not be represented
+ in ``DLManagedTensorVersioned``.
+ """
+ cdef DLManagedTensorVersioned *dlmv_tensor = NULL
+ cdef DLTensor *dl_tensor = NULL
+ cdef uint32_t dlmv_flags = 0
+ cdef int nd = npy_ary.ndim
+ cdef int64_t *shape_strides_ptr = NULL
+ cdef int i = 0
+ cdef Py_ssize_t byte_offset = 0
+ cdef int itemsize = npy_ary.itemsize
+
+ dlmv_tensor = stdlib.malloc(
+ sizeof(DLManagedTensorVersioned))
+ if dlmv_tensor is NULL:
+ raise MemoryError(
+ "numpy_to_dlpack_versioned_capsule: Could not allocate memory "
+ "for DLManagedTensorVersioned"
+ )
+
+ shape = npy_ary.ctypes.shape_as(ctypes.c_int64)
+ strides = npy_ary.ctypes.strides_as(ctypes.c_int64)
+ if nd > 0:
+ if npy_ary.size != 1:
+ for i in range(nd):
+ if shape[i] != 1 and strides[i] % itemsize != 0:
+ stdlib.free(dlmv_tensor)
+ raise BufferError(
+ "numpy_to_dlpack_versioned_capsule: DLPack cannot "
+ "encode an array if strides are not a multiple of "
+ "itemsize"
+ )
+ shape_strides_ptr = stdlib.malloc((sizeof(int64_t) * 2) * nd)
+ if shape_strides_ptr is NULL:
+ stdlib.free(dlmv_tensor)
+ raise MemoryError(
+ "numpy_to_dlpack_versioned_capsule: Could not allocate memory "
+ "for shape/strides"
+ )
+ for i in range(nd):
+ shape_strides_ptr[i] = shape[i]
+ shape_strides_ptr[nd + i] = strides[i] // itemsize
+
+ writable_flag = npy_ary.flags["W"]
+
+ ary_dt = npy_ary.dtype
+ ary_dtk = ary_dt.kind
+
+ dl_tensor = &dlmv_tensor.dl_tensor
+ dl_tensor.data = npy_ary.data
+ dl_tensor.ndim = nd
+ dl_tensor.byte_offset = byte_offset
+ dl_tensor.shape = &shape_strides_ptr[0] if nd > 0 else NULL
+ dl_tensor.strides = &shape_strides_ptr[nd] if nd > 0 else NULL
+ dl_tensor.device.device_type = kDLCPU
+ dl_tensor.device.device_id = 0
+ dl_tensor.dtype.lanes = 1
+ dl_tensor.dtype.bits = (ary_dt.itemsize * 8)
+ if (ary_dtk == "b"):
+ dl_tensor.dtype.code = kDLBool
+ elif (ary_dtk == "u"):
+ dl_tensor.dtype.code = kDLUInt
+ elif (ary_dtk == "i"):
+ dl_tensor.dtype.code = kDLInt
+ elif (ary_dtk == "f" and ary_dt.itemsize <= 8):
+ dl_tensor.dtype.code = kDLFloat
+ elif (ary_dtk == "c" and ary_dt.itemsize <= 16):
+ dl_tensor.dtype.code = kDLComplex
+ else:
+ stdlib.free(shape_strides_ptr)
+ stdlib.free(dlmv_tensor)
+ raise ValueError("Unrecognized array data type")
+
+ # set flags down here
+ if copied:
+ dlmv_flags |= DLPACK_FLAG_BITMASK_IS_COPIED
+ if not writable_flag:
+ dlmv_flags |= DLPACK_FLAG_BITMASK_READ_ONLY
+ dlmv_tensor.flags = dlmv_flags
+
+ dlmv_tensor.version.major = DLPACK_MAJOR_VERSION
+ dlmv_tensor.version.minor = DLPACK_MINOR_VERSION
+
+ dlmv_tensor.manager_ctx = npy_ary
+ cpython.Py_INCREF(npy_ary)
+ dlmv_tensor.deleter = _managed_tensor_versioned_deleter
+
+ return cpython.PyCapsule_New(
+ dlmv_tensor, "dltensor_versioned", _pycapsule_versioned_deleter
+ )
+
+
+cdef class _DLManagedTensorOwner:
+ """
+ Helper class managing the lifetime of the DLManagedTensor struct
+ transferred from a 'dlpack' capsule.
+ """
+ cdef DLManagedTensor * dlm_tensor
+
+ def __cinit__(self):
+ self.dlm_tensor = NULL
+
+ def __dealloc__(self):
+ if self.dlm_tensor:
+ self.dlm_tensor.deleter(self.dlm_tensor)
+ self.dlm_tensor = NULL
+
+ @staticmethod
+ cdef _DLManagedTensorOwner _create(DLManagedTensor *dlm_tensor_src):
+ cdef _DLManagedTensorOwner res
+ res = _DLManagedTensorOwner.__new__(_DLManagedTensorOwner)
+ res.dlm_tensor = dlm_tensor_src
+ return res
+
+
+cdef class _DLManagedTensorVersionedOwner:
+ """
+ Helper class managing the lifetime of the DLManagedTensorVersioned
+ struct transferred from a 'dlpack_versioned' capsule.
+ """
+ cdef DLManagedTensorVersioned * dlmv_tensor
+
+ def __cinit__(self):
+ self.dlmv_tensor = NULL
+
+ def __dealloc__(self):
+ if self.dlmv_tensor:
+ self.dlmv_tensor.deleter(self.dlmv_tensor)
+ self.dlmv_tensor = NULL
+
+ @staticmethod
+ cdef _DLManagedTensorVersionedOwner _create(
+ DLManagedTensorVersioned *dlmv_tensor_src
+ ):
+ cdef _DLManagedTensorVersionedOwner res
+ res = _DLManagedTensorVersionedOwner.__new__(
+ _DLManagedTensorVersionedOwner
+ )
+ res.dlmv_tensor = dlmv_tensor_src
+ return res
+
+
+cdef dict _numpy_array_interface_from_dl_tensor(DLTensor *dlt, bint ro_flag):
+ """Constructs a NumPy `__array_interface__` dictionary from a DLTensor."""
+ cdef int itemsize = 0
+
+ if dlt.dtype.lanes != 1:
+ raise BufferError(
+ "Can not import DLPack tensor with lanes != 1"
+ )
+ itemsize = dlt.dtype.bits // 8
+ shape = list()
+ if (dlt.strides is NULL):
+ strides = None
+ for dim in range(dlt.ndim):
+ shape.append(dlt.shape[dim])
+ else:
+ strides = list()
+ for dim in range(dlt.ndim):
+ shape.append(dlt.shape[dim])
+ # convert to byte-strides
+ strides.append(dlt.strides[dim] * itemsize)
+ strides = tuple(strides)
+ shape = tuple(shape)
+ if (dlt.dtype.code == kDLUInt):
+ ary_dt = "u" + str(itemsize)
+ elif (dlt.dtype.code == kDLInt):
+ ary_dt = "i" + str(itemsize)
+ elif (dlt.dtype.code == kDLFloat):
+ ary_dt = "f" + str(itemsize)
+ elif (dlt.dtype.code == kDLComplex):
+ ary_dt = "c" + str(itemsize)
+ elif (dlt.dtype.code == kDLBool):
+ ary_dt = "b" + str(itemsize)
+ else:
+ raise BufferError(
+ "Can not import DLPack tensor with type code {}.".format(
+ dlt.dtype.code
+ )
+ )
+ typestr = "|" + ary_dt
+ return dict(
+ version=3,
+ shape=shape,
+ strides=strides,
+ data=( dlt.data, True if ro_flag else False),
+ offset=dlt.byte_offset,
+ typestr=typestr,
+ )
+
+
+class _numpy_array_interface_wrapper:
+ """
+ Class that wraps a Python capsule and dictionary for consumption by NumPy.
+
+ Implementation taken from
+ https://github.com/dmlc/dlpack/blob/main/apps/numpy_dlpack/dlpack/to_numpy.py
+
+ Args:
+ array_interface:
+ A dictionary describing the underlying memory. Formatted
+ to match `numpy.ndarray.__array_interface__`.
+
+ pycapsule:
+ A Python capsule wrapping the dlpack tensor that will be
+ converted to numpy.
+ """
+
+ def __init__(self, array_interface, memory_owner) -> None:
+ self.__array_interface__ = array_interface
+ self._memory_owner = memory_owner
+
+
+cdef bint _is_kdlcpu_device(DLDevice *dev):
+ "Check if DLTensor.DLDevice denotes (kDLCPU, 0)"
+ return (dev[0].device_type == kDLCPU) and (dev[0].device_id == 0)
+
+
+cpdef object from_dlpack_capsule(object py_caps):
+ """
+ from_dlpack_capsule(py_caps)
+
+ Reconstructs instance of :class:`dpctl.tensor.usm_ndarray` from
+ named Python capsule object referencing instance of ``DLManagedTensor``
+ without copy. The instance forms a view in the memory of the tensor.
+
+ Args:
+ caps:
+ Python capsule with name ``"dltensor"`` expected to reference
+ an instance of ``DLManagedTensor`` struct.
+ Returns:
+ Instance of :class:`dpctl.tensor.usm_ndarray` with a view into
+ memory of the tensor. Capsule is renamed to ``"used_dltensor"``
+ upon success.
+ Raises:
+ TypeError:
+ if argument is not a ``"dltensor"`` capsule.
+ ValueError:
+ if argument is ``"used_dltensor"`` capsule
+ BufferError:
+ if the USM pointer is not bound to the reconstructed
+ sycl context, or the DLPack's device_type is not supported
+ by :mod:`dpctl`.
+ """
+ cdef DLManagedTensorVersioned *dlmv_tensor = NULL
+ cdef DLManagedTensor *dlm_tensor = NULL
+ cdef DLTensor *dl_tensor = NULL
+ cdef int versioned = 0
+ cdef int readonly = 0
+ cdef bytes usm_type
+ cdef size_t sz = 1
+ cdef size_t alloc_sz = 1
+ cdef int i
+ cdef int device_id = -1
+ cdef int element_bytesize = 0
+ cdef Py_ssize_t offset_min = 0
+ cdef Py_ssize_t offset_max = 0
+ cdef char *mem_ptr = NULL
+ cdef Py_ssize_t mem_ptr_delta = 0
+ cdef Py_ssize_t element_offset = 0
+ cdef int64_t stride_i = -1
+ cdef int64_t shape_i = -1
+
+ if cpython.PyCapsule_IsValid(py_caps, "dltensor"):
+ dlm_tensor = cpython.PyCapsule_GetPointer(
+ py_caps, "dltensor")
+ dl_tensor = &dlm_tensor.dl_tensor
+ elif cpython.PyCapsule_IsValid(py_caps, "dltensor_versioned"):
+ dlmv_tensor = cpython.PyCapsule_GetPointer(
+ py_caps, "dltensor_versioned")
+ if dlmv_tensor.version.major > DLPACK_MAJOR_VERSION:
+ raise BufferError(
+ "Can not import DLPack tensor with major version "
+ f"greater than {DLPACK_MAJOR_VERSION}"
+ )
+ versioned = 1
+ readonly = (dlmv_tensor.flags & DLPACK_FLAG_BITMASK_READ_ONLY) != 0
+ dl_tensor = &dlmv_tensor.dl_tensor
+ elif (
+ cpython.PyCapsule_IsValid(py_caps, "used_dltensor")
+ or cpython.PyCapsule_IsValid(py_caps, "used_dltensor_versioned")
+ ):
+ raise ValueError(
+ "A DLPack tensor object can not be consumed multiple times"
+ )
+ else:
+ raise TypeError(
+ "`from_dlpack_capsule` expects a Python 'dltensor' capsule"
+ )
+
+ # Verify that we can work with this device
+ if dl_tensor.device.device_type == kDLOneAPI:
+ device_id = dl_tensor.device.device_id
+ root_device = dpctl.SyclDevice(str(device_id))
+ try:
+ default_context = root_device.sycl_platform.default_context
+ except RuntimeError:
+ default_context = get_device_cached_queue(root_device).sycl_context
+ if dl_tensor.data is NULL:
+ usm_type = b"device"
+ q = get_device_cached_queue((default_context, root_device,))
+ else:
+ usm_type = c_dpmem._Memory.get_pointer_type(
+ dl_tensor.data,
+ default_context)
+ if usm_type == b"unknown":
+ raise BufferError(
+ "Data pointer in DLPack is not bound to default sycl "
+ f"context of device '{device_id}', translated to "
+ f"{root_device.filter_string}"
+ )
+ alloc_device = c_dpmem._Memory.get_pointer_device(
+ dl_tensor.data,
+ default_context
+ )
+ q = get_device_cached_queue((default_context, alloc_device,))
+ if dl_tensor.dtype.bits % 8:
+ raise BufferError(
+ "Can not import DLPack tensor whose element's "
+ "bitsize is not a multiple of 8"
+ )
+ if dl_tensor.dtype.lanes != 1:
+ raise BufferError(
+ "Can not import DLPack tensor with lanes != 1"
+ )
+ if dl_tensor.ndim > 0:
+ offset_min = 0
+ offset_max = 0
+ for i in range(dl_tensor.ndim):
+ stride_i = dl_tensor.strides[i]
+ shape_i = dl_tensor.shape[i]
+ if shape_i > 1:
+ shape_i -= 1
+ if stride_i > 0:
+ offset_max = offset_max + stride_i * shape_i
+ else:
+ offset_min = offset_min + stride_i * shape_i
+ sz = offset_max - offset_min + 1
+ if sz == 0:
+ sz = 1
+
+ element_bytesize = (dl_tensor.dtype.bits // 8)
+ sz = sz * element_bytesize
+ element_offset = dl_tensor.byte_offset // element_bytesize
+
+ # transfer ownership
+ if not versioned:
+ dlm_holder = _DLManagedTensorOwner._create(dlm_tensor)
+ cpython.PyCapsule_SetName(py_caps, "used_dltensor")
+ else:
+ dlmv_holder = _DLManagedTensorVersionedOwner._create(dlmv_tensor)
+ cpython.PyCapsule_SetName(py_caps, "used_dltensor_versioned")
+
+ if dl_tensor.data is NULL:
+ usm_mem = dpmem.MemoryUSMDevice(sz, q)
+ else:
+ mem_ptr_delta = dl_tensor.byte_offset - (
+ element_offset * element_bytesize
+ )
+ mem_ptr = dl_tensor.data
+ alloc_sz = dl_tensor.byte_offset + (
+ (offset_max + 1) * element_bytesize)
+ tmp = c_dpmem._Memory.create_from_usm_pointer_size_qref(
+ mem_ptr,
+ max(alloc_sz, element_bytesize),
+ (q).get_queue_ref(),
+ memory_owner=dlmv_holder if versioned else dlm_holder
+ )
+ if mem_ptr_delta == 0:
+ usm_mem = tmp
+ else:
+ alloc_sz = dl_tensor.byte_offset + (
+ (offset_max * element_bytesize + mem_ptr_delta))
+ usm_mem = c_dpmem._Memory.create_from_usm_pointer_size_qref(
+ (
+ mem_ptr + (element_bytesize - mem_ptr_delta)
+ ),
+ max(alloc_sz, element_bytesize),
+ (q).get_queue_ref(),
+ memory_owner=tmp
+ )
+
+ py_shape = list()
+ if (dl_tensor.shape is not NULL):
+ for i in range(dl_tensor.ndim):
+ py_shape.append(dl_tensor.shape[i])
+ if (dl_tensor.strides is not NULL):
+ py_strides = list()
+ for i in range(dl_tensor.ndim):
+ py_strides.append(dl_tensor.strides[i])
+ else:
+ py_strides = None
+ if (dl_tensor.dtype.code == kDLUInt):
+ ary_dt = np.dtype("u" + str(element_bytesize))
+ elif (dl_tensor.dtype.code == kDLInt):
+ ary_dt = np.dtype("i" + str(element_bytesize))
+ elif (dl_tensor.dtype.code == kDLFloat):
+ ary_dt = np.dtype("f" + str(element_bytesize))
+ elif (dl_tensor.dtype.code == kDLComplex):
+ ary_dt = np.dtype("c" + str(element_bytesize))
+ elif (dl_tensor.dtype.code == kDLBool):
+ ary_dt = np.dtype("?")
+ else:
+ raise BufferError(
+ "Can not import DLPack tensor with type code {}.".format(
+ dl_tensor.dtype.code
+ )
+ )
+ res_ary = usm_ndarray(
+ py_shape,
+ dtype=ary_dt,
+ buffer=usm_mem,
+ strides=py_strides,
+ offset=element_offset
+ )
+ if readonly:
+ res_ary.flags_ = (res_ary.flags_ & ~USM_ARRAY_WRITABLE)
+ return res_ary
+ elif _is_kdlcpu_device(&dl_tensor.device):
+ ary_iface = _numpy_array_interface_from_dl_tensor(dl_tensor, readonly)
+ if not versioned:
+ dlm_holder = _DLManagedTensorOwner._create(dlm_tensor)
+ cpython.PyCapsule_SetName(py_caps, "used_dltensor")
+ return np.ctypeslib.as_array(
+ _numpy_array_interface_wrapper(ary_iface, dlm_holder)
+ )
+ else:
+ dlmv_holder = _DLManagedTensorVersionedOwner._create(dlmv_tensor)
+ cpython.PyCapsule_SetName(py_caps, "used_dltensor_versioned")
+ return np.ctypeslib.as_array(
+ _numpy_array_interface_wrapper(ary_iface, dlmv_holder)
+ )
+ else:
+ raise BufferError(
+ "The DLPack tensor resides on unsupported device."
+ )
+
+cdef usm_ndarray _to_usm_ary_from_host_blob(object host_blob, dev : Device):
+ q = dev.sycl_queue
+ np_ary = np.asarray(host_blob)
+ dt = np_ary.dtype
+ if dt.char in "dD" and q.sycl_device.has_aspect_fp64 is False:
+ Xusm_dtype = (
+ "float32" if dt.char == "d" else "complex64"
+ )
+ else:
+ Xusm_dtype = dt
+ usm_mem = dpmem.MemoryUSMDevice(np_ary.nbytes, queue=q)
+ usm_ary = usm_ndarray(np_ary.shape, dtype=Xusm_dtype, buffer=usm_mem)
+ usm_mem.copy_from_host(np.reshape(np_ary.view(dtype="u1"), -1))
+ return usm_ary
+
+
+# only cdef to make it private
+cdef object _create_device(object device, object dl_device):
+ if isinstance(device, Device):
+ return device
+ elif isinstance(device, dpctl.SyclDevice):
+ return Device.create_device(device)
+ else:
+ root_device = dpctl.SyclDevice(str(dl_device[1]))
+ return Device.create_device(root_device)
+
+
+def from_dlpack(x, /, *, device=None, copy=None):
+ """from_dlpack(x, /, *, device=None, copy=None)
+
+ Constructs :class:`dpctl.tensor.usm_ndarray` or :class:`numpy.ndarray`
+ instance from a Python object ``x`` that implements ``__dlpack__`` protocol.
+
+ Args:
+ x (object):
+ A Python object representing an array that supports
+ ``__dlpack__`` protocol.
+ device (
+ Optional[str, :class:`dpctl.SyclDevice`,
+ :class:`dpctl.SyclQueue`,
+ :class:`dpctl.tensor.Device`,
+ tuple([:class:`enum.IntEnum`, int])])
+ ):
+ Device where the output array is to be placed. ``device`` keyword
+ values can be:
+
+ * ``None``
+ The data remains on the same device.
+ * oneAPI filter selector string
+ SYCL device selected by :ref:`filter selector string
+ `.
+ * :class:`dpctl.SyclDevice`
+ explicit SYCL device that must correspond to
+ a non-partitioned SYCL device.
+ * :class:`dpctl.SyclQueue`
+ implies SYCL device targeted by the SYCL queue.
+ * :class:`dpctl.tensor.Device`
+ implies SYCL device `device.sycl_queue`. The `Device` object
+ is obtained via :attr:`dpctl.tensor.usm_ndarray.device`.
+ * ``(device_type, device_id)``
+ 2-tuple matching the format of the output of the
+ ``__dlpack_device__`` method: an integer enumerator representing
+ the device type followed by an integer representing the index of
+ the device. The only supported :class:`dpctl.tensor.DLDeviceType`
+ device types are ``"kDLCPU"`` and ``"kDLOneAPI"``.
+
+ Default: ``None``.
+
+ copy (bool, optional)
+ Boolean indicating whether or not to copy the input.
+
+ * If ``copy`` is ``True``, the input will always be
+ copied.
+ * If ``False``, a ``BufferError`` will be raised if a
+ copy is deemed necessary.
+ * If ``None``, a copy will be made only if deemed
+ necessary, otherwise, the existing memory buffer will
+ be reused.
+
+ Default: ``None``.
+
+ Returns:
+ Alternative[usm_ndarray, numpy.ndarray]:
+ An array containing the data in ``x``. When ``copy`` is
+ ``None`` or ``False``, this may be a view into the original
+ memory.
+
+ The type of the returned object
+ depends on where the data backing up input object ``x`` resides.
+ If it resides in a USM allocation on a SYCL device, the
+ type :class:`dpctl.tensor.usm_ndarray` is returned, otherwise if it
+ resides on ``"kDLCPU"`` device the type is :class:`numpy.ndarray`,
+ and otherwise an exception is raised.
+
+ .. note::
+
+ If the return type is :class:`dpctl.tensor.usm_ndarray`, the
+ associated SYCL queue is derived from the ``device`` keyword.
+ When ``device`` keyword value has type :class:`dpctl.SyclQueue`,
+ the explicit queue instance is used, when ``device`` keyword
+ value has type :class:`dpctl.tensor.Device`, the
+ ``device.sycl_queue`` is used. In all other cases, the cached
+ SYCL queue corresponding to the implied SYCL device is used.
+
+ Raises:
+ TypeError:
+ if ``x`` does not implement ``__dlpack__`` method
+ ValueError:
+ if data of the input object resides on an unsupported device
+
+ See https://dmlc.github.io/dlpack/latest/ for more details.
+
+ :Example:
+
+ .. code-block:: python
+
+ import dpctl
+ import dpctl.tensor as dpt
+
+ class Container:
+ "Helper class implementing `__dlpack__` protocol"
+ def __init__(self, array):
+ self._array = array
+
+ def __dlpack__(self, stream=None):
+ return self._array.__dlpack__(stream=stream)
+
+ def __dlpack_device__(self):
+ return self._array.__dlpack_device__()
+
+ C = Container(dpt.linspace(0, 100, num=20, dtype="int16"))
+ # create usm_ndarray view
+ X = dpt.from_dlpack(C)
+ # migrate content of the container to device of type kDLCPU
+ Y = dpt.from_dlpack(C, device=(dpt.DLDeviceType.kDLCPU, 0))
+
+ """
+ dlpack_attr = getattr(x, "__dlpack__", None)
+ dlpack_dev_attr = getattr(x, "__dlpack_device__", None)
+ if not callable(dlpack_attr) or not callable(dlpack_dev_attr):
+ raise TypeError(
+ f"The argument of type {type(x)} does not implement "
+ "`__dlpack__` and `__dlpack_device__` methods."
+ )
+ # device is converted to a dlpack_device if necessary
+ dl_device = None
+ if device:
+ if isinstance(device, tuple):
+ dl_device = device
+ if len(dl_device) != 2:
+ raise ValueError(
+ "Argument `device` specified as a tuple must have length 2"
+ )
+ else:
+ if not isinstance(device, dpctl.SyclDevice):
+ device = Device.create_device(device)
+ d = device.sycl_device
+ else:
+ d = device
+ dl_device = (device_OneAPI, d.get_device_id())
+ if dl_device is not None:
+ if (dl_device[0] not in [device_OneAPI, device_CPU]):
+ raise ValueError(
+ f"Argument `device`={device} is not supported."
+ )
+ got_type_error = False
+ got_buffer_error = False
+ got_other_error = False
+ saved_exception = None
+ # First DLPack version supporting dl_device, and copy
+ requested_ver = (1, 0)
+ cpu_dev = (device_CPU, 0)
+ try:
+ # setting max_version to minimal version that supports
+ # dl_device/copy keywords
+ dlpack_capsule = dlpack_attr(
+ max_version=requested_ver,
+ dl_device=dl_device,
+ copy=copy
+ )
+ except TypeError:
+ # exporter does not support max_version keyword
+ got_type_error = True
+ except (BufferError, NotImplementedError, ValueError) as e:
+ # Either dl_device, or copy cannot be satisfied
+ got_buffer_error = True
+ saved_exception = e
+ except Exception as e:
+ got_other_error = True
+ saved_exception = e
+ else:
+ # execution did not raise exceptions
+ return from_dlpack_capsule(dlpack_capsule)
+ finally:
+ if got_type_error:
+ # max_version/dl_device, copy keywords are not supported
+ # by __dlpack__
+ x_dldev = dlpack_dev_attr()
+ if (dl_device is None) or (dl_device == x_dldev):
+ dlpack_capsule = dlpack_attr()
+ return from_dlpack_capsule(dlpack_capsule)
+ # must copy via host
+ if copy is False:
+ raise BufferError(
+ "Importing data via DLPack requires copying, but "
+ "copy=False was provided"
+ )
+ # when max_version/dl_device/copy are not supported
+ # we can only support importing to OneAPI devices
+ # from host, or from another oneAPI device
+ is_supported_x_dldev = (
+ x_dldev == cpu_dev or
+ (x_dldev[0] == device_OneAPI)
+ )
+ is_supported_dl_device = (
+ dl_device == cpu_dev or
+ dl_device[0] == device_OneAPI
+ )
+ if is_supported_x_dldev and is_supported_dl_device:
+ dlpack_capsule = dlpack_attr()
+ blob = from_dlpack_capsule(dlpack_capsule)
+ else:
+ raise BufferError(
+ f"Can not import to requested device {dl_device}"
+ )
+ dev = _create_device(device, dl_device)
+ if x_dldev == cpu_dev and dl_device == cpu_dev:
+ # both source and destination are CPU
+ return blob
+ elif x_dldev == cpu_dev:
+ # source is CPU, destination is oneAPI
+ return _to_usm_ary_from_host_blob(blob, dev)
+ elif dl_device == cpu_dev:
+ # source is oneAPI, destination is CPU
+ cpu_caps = blob.__dlpack__(
+ max_version=get_build_dlpack_version(),
+ dl_device=cpu_dev
+ )
+ return from_dlpack_capsule(cpu_caps)
+ else:
+ # TODO: revert to `import dpctl.tensor`
+ # when dpnp fully migrates dpctl/tensor
+ import dpctl_ext.tensor as dpt
+ return dpt.asarray(blob, device=dev)
+ elif got_buffer_error:
+ # we are here, because dlpack_attr could not deal with requested
+ # dl_device, or copying was required
+ if copy is False:
+ raise BufferError(
+ "Importing data via DLPack requires copying, but "
+ "copy=False was provided"
+ )
+ if dl_device is None:
+ raise saved_exception
+ # must copy via host
+ if dl_device[0] != device_OneAPI:
+ raise BufferError(
+ f"Can not import to requested device {dl_device}"
+ )
+ x_dldev = dlpack_dev_attr()
+ if x_dldev == cpu_dev:
+ dlpack_capsule = dlpack_attr()
+ host_blob = from_dlpack_capsule(dlpack_capsule)
+ else:
+ dlpack_capsule = dlpack_attr(
+ max_version=requested_ver,
+ dl_device=cpu_dev,
+ copy=copy
+ )
+ host_blob = from_dlpack_capsule(dlpack_capsule)
+ dev = _create_device(device, dl_device)
+ return _to_usm_ary_from_host_blob(host_blob, dev)
+ elif got_other_error:
+ raise saved_exception
diff --git a/dpctl_ext/tensor/_elementwise_common.py b/dpctl_ext/tensor/_elementwise_common.py
index 7fd9dabf9614..ffe849db9cad 100644
--- a/dpctl_ext/tensor/_elementwise_common.py
+++ b/dpctl_ext/tensor/_elementwise_common.py
@@ -27,12 +27,11 @@
# *****************************************************************************
import dpctl
-import dpctl.tensor as dpt
from dpctl.utils import ExecutionPlacementError, SequentialOrderManager
# TODO: revert to `import dpctl.tensor...`
# when dpnp fully migrates dpctl/tensor
-import dpctl_ext.tensor as dpt_ext
+import dpctl_ext.tensor as dpt
import dpctl_ext.tensor._tensor_impl as ti
from ._copy_utils import _empty_like_orderK, _empty_like_pair_orderK
@@ -233,7 +232,7 @@ def __call__(self, x, /, *, out=None, order="K"):
# Allocate a temporary buffer to avoid memory overlapping.
# Note if `buf_dt` is not None, a temporary copy of `x` will be
# created, so the array overlap check isn't needed.
- out = dpt_ext.empty_like(out)
+ out = dpt.empty_like(out)
if (
dpctl.utils.get_execution_queue((x.sycl_queue, out.sycl_queue))
@@ -252,7 +251,7 @@ def __call__(self, x, /, *, out=None, order="K"):
else:
if order == "A":
order = "F" if x.flags.f_contiguous else "C"
- out = dpt_ext.empty_like(x, dtype=res_dt, order=order)
+ out = dpt.empty_like(x, dtype=res_dt, order=order)
dep_evs = _manager.submitted_events
ht_unary_ev, unary_ev = self.unary_fn_(
@@ -275,7 +274,7 @@ def __call__(self, x, /, *, out=None, order="K"):
else:
if order == "A":
order = "F" if x.flags.f_contiguous else "C"
- buf = dpt_ext.empty_like(x, dtype=buf_dt, order=order)
+ buf = dpt.empty_like(x, dtype=buf_dt, order=order)
dep_evs = _manager.submitted_events
ht_copy_ev, copy_ev = ti._copy_usm_ndarray_into_usm_ndarray(
@@ -286,7 +285,7 @@ def __call__(self, x, /, *, out=None, order="K"):
if order == "K":
out = _empty_like_orderK(buf, res_dt)
else:
- out = dpt_ext.empty_like(buf, dtype=res_dt, order=order)
+ out = dpt.empty_like(buf, dtype=res_dt, order=order)
ht, uf_ev = self.unary_fn_(
buf, out, sycl_queue=exec_q, depends=[copy_ev]
@@ -597,7 +596,7 @@ def __call__(self, o1, o2, /, *, out=None, order="K"):
if isinstance(o1, dpt.usm_ndarray):
if ti._array_overlap(o1, out) and buf1_dt is None:
if not ti._same_logical_tensors(o1, out):
- out = dpt_ext.empty_like(out)
+ out = dpt.empty_like(out)
elif self.binary_inplace_fn_ is not None:
# if there is a dedicated in-place kernel
# it can be called here, otherwise continues
@@ -610,12 +609,12 @@ def __call__(self, o1, o2, /, *, out=None, order="K"):
):
buf2_dt = o2_dtype
else:
- src2 = dpt_ext.asarray(
+ src2 = dpt.asarray(
o2, dtype=o2_dtype, sycl_queue=exec_q
)
if buf2_dt is None:
if src2.shape != res_shape:
- src2 = dpt_ext.broadcast_to(src2, res_shape)
+ src2 = dpt.broadcast_to(src2, res_shape)
dep_evs = _manager.submitted_events
ht_, comp_ev = self.binary_inplace_fn_(
lhs=o1,
@@ -625,7 +624,7 @@ def __call__(self, o1, o2, /, *, out=None, order="K"):
)
_manager.add_event_pair(ht_, comp_ev)
else:
- buf2 = dpt_ext.empty_like(src2, dtype=buf2_dt)
+ buf2 = dpt.empty_like(src2, dtype=buf2_dt)
dep_evs = _manager.submitted_events
(
ht_copy_ev,
@@ -638,7 +637,7 @@ def __call__(self, o1, o2, /, *, out=None, order="K"):
)
_manager.add_event_pair(ht_copy_ev, copy_ev)
- buf2 = dpt_ext.broadcast_to(buf2, res_shape)
+ buf2 = dpt.broadcast_to(buf2, res_shape)
ht_, bf_ev = self.binary_inplace_fn_(
lhs=o1,
rhs=buf2,
@@ -657,16 +656,16 @@ def __call__(self, o1, o2, /, *, out=None, order="K"):
):
# should not reach if out is reallocated
# after being checked against o1
- out = dpt_ext.empty_like(out)
+ out = dpt.empty_like(out)
if isinstance(o1, dpt.usm_ndarray):
src1 = o1
else:
- src1 = dpt_ext.asarray(o1, dtype=o1_dtype, sycl_queue=exec_q)
+ src1 = dpt.asarray(o1, dtype=o1_dtype, sycl_queue=exec_q)
if isinstance(o2, dpt.usm_ndarray):
src2 = o2
else:
- src2 = dpt_ext.asarray(o2, dtype=o2_dtype, sycl_queue=exec_q)
+ src2 = dpt.asarray(o2, dtype=o2_dtype, sycl_queue=exec_q)
if order == "A":
order = (
@@ -688,7 +687,7 @@ def __call__(self, o1, o2, /, *, out=None, order="K"):
src1, src2, res_dt, res_shape, res_usm_type, exec_q
)
else:
- out = dpt_ext.empty(
+ out = dpt.empty(
res_shape,
dtype=res_dt,
usm_type=res_usm_type,
@@ -696,9 +695,9 @@ def __call__(self, o1, o2, /, *, out=None, order="K"):
order=order,
)
if src1.shape != res_shape:
- src1 = dpt_ext.broadcast_to(src1, res_shape)
+ src1 = dpt.broadcast_to(src1, res_shape)
if src2.shape != res_shape:
- src2 = dpt_ext.broadcast_to(src2, res_shape)
+ src2 = dpt.broadcast_to(src2, res_shape)
deps_ev = _manager.submitted_events
ht_binary_ev, binary_ev = self.binary_fn_(
src1=src1,
@@ -723,7 +722,7 @@ def __call__(self, o1, o2, /, *, out=None, order="K"):
if order == "K":
buf2 = _empty_like_orderK(src2, buf2_dt)
else:
- buf2 = dpt_ext.empty_like(src2, dtype=buf2_dt, order=order)
+ buf2 = dpt.empty_like(src2, dtype=buf2_dt, order=order)
dep_evs = _manager.submitted_events
ht_copy_ev, copy_ev = ti._copy_usm_ndarray_into_usm_ndarray(
src=src2, dst=buf2, sycl_queue=exec_q, depends=dep_evs
@@ -735,7 +734,7 @@ def __call__(self, o1, o2, /, *, out=None, order="K"):
src1, buf2, res_dt, res_shape, res_usm_type, exec_q
)
else:
- out = dpt_ext.empty(
+ out = dpt.empty(
res_shape,
dtype=res_dt,
usm_type=res_usm_type,
@@ -744,8 +743,8 @@ def __call__(self, o1, o2, /, *, out=None, order="K"):
)
if src1.shape != res_shape:
- src1 = dpt_ext.broadcast_to(src1, res_shape)
- buf2 = dpt_ext.broadcast_to(buf2, res_shape)
+ src1 = dpt.broadcast_to(src1, res_shape)
+ buf2 = dpt.broadcast_to(buf2, res_shape)
ht_binary_ev, binary_ev = self.binary_fn_(
src1=src1,
src2=buf2,
@@ -769,7 +768,7 @@ def __call__(self, o1, o2, /, *, out=None, order="K"):
if order == "K":
buf1 = _empty_like_orderK(src1, buf1_dt)
else:
- buf1 = dpt_ext.empty_like(src1, dtype=buf1_dt, order=order)
+ buf1 = dpt.empty_like(src1, dtype=buf1_dt, order=order)
dep_evs = _manager.submitted_events
ht_copy_ev, copy_ev = ti._copy_usm_ndarray_into_usm_ndarray(
src=src1, dst=buf1, sycl_queue=exec_q, depends=dep_evs
@@ -781,7 +780,7 @@ def __call__(self, o1, o2, /, *, out=None, order="K"):
buf1, src2, res_dt, res_shape, res_usm_type, exec_q
)
else:
- out = dpt_ext.empty(
+ out = dpt.empty(
res_shape,
dtype=res_dt,
usm_type=res_usm_type,
@@ -789,9 +788,9 @@ def __call__(self, o1, o2, /, *, out=None, order="K"):
order=order,
)
- buf1 = dpt_ext.broadcast_to(buf1, res_shape)
+ buf1 = dpt.broadcast_to(buf1, res_shape)
if src2.shape != res_shape:
- src2 = dpt_ext.broadcast_to(src2, res_shape)
+ src2 = dpt.broadcast_to(src2, res_shape)
ht_binary_ev, binary_ev = self.binary_fn_(
src1=buf1,
src2=src2,
@@ -820,7 +819,7 @@ def __call__(self, o1, o2, /, *, out=None, order="K"):
if order == "K":
buf1 = _empty_like_orderK(src1, buf1_dt)
else:
- buf1 = dpt_ext.empty_like(src1, dtype=buf1_dt, order=order)
+ buf1 = dpt.empty_like(src1, dtype=buf1_dt, order=order)
dep_evs = _manager.submitted_events
ht_copy1_ev, copy1_ev = ti._copy_usm_ndarray_into_usm_ndarray(
src=src1, dst=buf1, sycl_queue=exec_q, depends=dep_evs
@@ -829,7 +828,7 @@ def __call__(self, o1, o2, /, *, out=None, order="K"):
if order == "K":
buf2 = _empty_like_orderK(src2, buf2_dt)
else:
- buf2 = dpt_ext.empty_like(src2, dtype=buf2_dt, order=order)
+ buf2 = dpt.empty_like(src2, dtype=buf2_dt, order=order)
ht_copy2_ev, copy2_ev = ti._copy_usm_ndarray_into_usm_ndarray(
src=src2, dst=buf2, sycl_queue=exec_q, depends=dep_evs
)
@@ -840,7 +839,7 @@ def __call__(self, o1, o2, /, *, out=None, order="K"):
buf1, buf2, res_dt, res_shape, res_usm_type, exec_q
)
else:
- out = dpt_ext.empty(
+ out = dpt.empty(
res_shape,
dtype=res_dt,
usm_type=res_usm_type,
@@ -848,8 +847,8 @@ def __call__(self, o1, o2, /, *, out=None, order="K"):
order=order,
)
- buf1 = dpt_ext.broadcast_to(buf1, res_shape)
- buf2 = dpt_ext.broadcast_to(buf2, res_shape)
+ buf1 = dpt.broadcast_to(buf1, res_shape)
+ buf2 = dpt.broadcast_to(buf2, res_shape)
ht_, bf_ev = self.binary_fn_(
src1=buf1,
src2=buf2,
@@ -960,10 +959,10 @@ def _inplace_op(self, o1, o2):
):
buf_dt = o2_dtype
else:
- src2 = dpt_ext.asarray(o2, dtype=o2_dtype, sycl_queue=exec_q)
+ src2 = dpt.asarray(o2, dtype=o2_dtype, sycl_queue=exec_q)
if buf_dt is None:
if src2.shape != res_shape:
- src2 = dpt_ext.broadcast_to(src2, res_shape)
+ src2 = dpt.broadcast_to(src2, res_shape)
dep_evs = _manager.submitted_events
ht_, comp_ev = self.binary_inplace_fn_(
lhs=o1,
@@ -973,7 +972,7 @@ def _inplace_op(self, o1, o2):
)
_manager.add_event_pair(ht_, comp_ev)
else:
- buf = dpt_ext.empty_like(src2, dtype=buf_dt)
+ buf = dpt.empty_like(src2, dtype=buf_dt)
dep_evs = _manager.submitted_events
(
ht_copy_ev,
@@ -986,7 +985,7 @@ def _inplace_op(self, o1, o2):
)
_manager.add_event_pair(ht_copy_ev, copy_ev)
- buf = dpt_ext.broadcast_to(buf, res_shape)
+ buf = dpt.broadcast_to(buf, res_shape)
ht_, bf_ev = self.binary_inplace_fn_(
lhs=o1,
rhs=buf,
diff --git a/dpctl_ext/tensor/_flags.pyx b/dpctl_ext/tensor/_flags.pyx
new file mode 100644
index 000000000000..322d52bd56c7
--- /dev/null
+++ b/dpctl_ext/tensor/_flags.pyx
@@ -0,0 +1,175 @@
+# *****************************************************************************
+# Copyright (c) 2026, Intel Corporation
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+# - Redistributions of source code must retain the above copyright notice,
+# this list of conditions and the following disclaimer.
+# - Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+# - Neither the name of the copyright holder nor the names of its contributors
+# may be used to endorse or promote products derived from this software
+# without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+# THE POSSIBILITY OF SUCH DAMAGE.
+# *****************************************************************************
+
+# distutils: language = c++
+# cython: language_level=3
+# cython: linetrace=True
+
+from libcpp cimport bool as cpp_bool
+
+from ._usmarray cimport (
+ USM_ARRAY_C_CONTIGUOUS,
+ USM_ARRAY_F_CONTIGUOUS,
+ USM_ARRAY_WRITABLE,
+ usm_ndarray,
+)
+
+
+cdef cpp_bool _check_bit(int flag, int mask):
+ return (flag & mask) == mask
+
+
+cdef class Flags:
+ """
+ Helper class to query the flags of a :class:`dpctl.tensor.usm_ndarray`
+ instance, which describe how the instance interfaces with its underlying
+ memory.
+ """
+ cdef int flags_
+ cdef usm_ndarray arr_
+
+ def __cinit__(self, usm_ndarray arr, int flags):
+ self.arr_ = arr
+ self.flags_ = flags
+
+ @property
+ def flags(self):
+ """
+ Integer representation of the memory layout flags of
+ :class:`dpctl.tensor.usm_ndarray` instance.
+ """
+ return self.flags_
+
+ @property
+ def c_contiguous(self):
+ """
+ True if the memory layout of the
+ :class:`dpctl.tensor.usm_ndarray` instance is C-contiguous.
+ """
+ return _check_bit(self.flags_, USM_ARRAY_C_CONTIGUOUS)
+
+ @property
+ def f_contiguous(self):
+ """
+ True if the memory layout of the
+ :class:`dpctl.tensor.usm_ndarray` instance is F-contiguous.
+ """
+ return _check_bit(self.flags_, USM_ARRAY_F_CONTIGUOUS)
+
+ @property
+ def writable(self):
+ """
+ True if :class:`dpctl.tensor.usm_ndarray` instance is writable.
+ """
+ return _check_bit(self.flags_, USM_ARRAY_WRITABLE)
+
+ @writable.setter
+ def writable(self, new_val):
+ if not isinstance(new_val, bool):
+ raise TypeError("Expecting a boolean value")
+ self.arr_._set_writable_flag(new_val)
+
+ @property
+ def fc(self):
+ """
+ True if the memory layout of the :class:`dpctl.tensor.usm_ndarray`
+ instance is C-contiguous and F-contiguous.
+ """
+ return (
+ _check_bit(self.flags_, USM_ARRAY_C_CONTIGUOUS)
+ and _check_bit(self.flags_, USM_ARRAY_F_CONTIGUOUS)
+ )
+
+ @property
+ def forc(self):
+ """
+ True if the memory layout of the :class:`dpctl.tensor.usm_ndarray`
+ instance is C-contiguous or F-contiguous.
+ """
+ return (
+ _check_bit(self.flags_, USM_ARRAY_C_CONTIGUOUS)
+ or _check_bit(self.flags_, USM_ARRAY_F_CONTIGUOUS)
+ )
+
+ @property
+ def fnc(self):
+ """
+ True if the memory layout of the :class:`dpctl.tensor.usm_ndarray`
+ instance is F-contiguous and not C-contiguous.
+ """
+ return (
+ _check_bit(self.flags_, USM_ARRAY_F_CONTIGUOUS)
+ and not _check_bit(self.flags_, USM_ARRAY_C_CONTIGUOUS)
+ )
+
+ @property
+ def contiguous(self):
+ """
+ True if the memory layout of the :class:`dpctl.tensor.usm_ndarray`
+ instance is C-contiguous and F-contiguous.
+ Equivalent to `forc.`
+ """
+ return self.forc
+
+ def __getitem__(self, name):
+ if name in ["C_CONTIGUOUS", "C"]:
+ return self.c_contiguous
+ elif name in ["F_CONTIGUOUS", "F"]:
+ return self.f_contiguous
+ elif name in ["WRITABLE", "W"]:
+ return self.writable
+ elif name == "FC":
+ return self.fc
+ elif name == "FNC":
+ return self.fnc
+ elif name in ["FORC", "CONTIGUOUS"]:
+ return self.forc
+
+ def __setitem__(self, name, val):
+ if name in ["WRITABLE", "W"]:
+ self.writable = val
+ else:
+ raise ValueError(
+ "Only writable ('W' or 'WRITABLE') flag can be set"
+ )
+
+ def __repr__(self):
+ out = []
+ for name in "C_CONTIGUOUS", "F_CONTIGUOUS", "WRITABLE":
+ out.append(" {} : {}".format(name, self[name]))
+ return "\n".join(out)
+
+ def __eq__(self, other):
+ cdef Flags other_
+ if isinstance(other, self.__class__):
+ other_ = other
+ return self.flags_ == other_.flags_
+ elif isinstance(other, int):
+ return self.flags_ == other
+ else:
+ return False
diff --git a/dpctl_ext/tensor/_indexing_functions.py b/dpctl_ext/tensor/_indexing_functions.py
index 5b4eb1aaf7a2..08db81c1b166 100644
--- a/dpctl_ext/tensor/_indexing_functions.py
+++ b/dpctl_ext/tensor/_indexing_functions.py
@@ -29,12 +29,11 @@
import operator
import dpctl
-import dpctl.tensor as dpt
import dpctl.utils
# TODO: revert to `import dpctl.tensor...`
# when dpnp fully migrates dpctl/tensor
-import dpctl_ext.tensor as dpt_ext
+import dpctl_ext.tensor as dpt
import dpctl_ext.tensor._tensor_impl as ti
from ._copy_utils import (
@@ -57,7 +56,7 @@ def _get_indexing_mode(name):
def _range(sh_i, i, nd, q, usm_t, dt):
- ind = dpt_ext.arange(sh_i, dtype=dt, usm_type=usm_t, sycl_queue=q)
+ ind = dpt.arange(sh_i, dtype=dt, usm_type=usm_t, sycl_queue=q)
ind.shape = tuple(sh_i if i == j else 1 for j in range(nd))
return ind
@@ -177,7 +176,7 @@ def place(arr, mask, vals):
raise dpctl.utils.ExecutionPlacementError
if arr.shape != mask.shape or vals.ndim != 1:
raise ValueError("Array sizes are not as required")
- cumsum = dpt_ext.empty(mask.size, dtype="i8", sycl_queue=exec_q)
+ cumsum = dpt.empty(mask.size, dtype="i8", sycl_queue=exec_q)
_manager = dpctl.utils.SequentialOrderManager[exec_q]
deps_ev = _manager.submitted_events
nz_count = ti.mask_positions(
@@ -190,7 +189,7 @@ def place(arr, mask, vals):
if vals.dtype == arr.dtype:
rhs = vals
else:
- rhs = dpt_ext.astype(vals, arr.dtype)
+ rhs = dpt.astype(vals, arr.dtype)
hev, pl_ev = ti._place(
dst=arr,
cumsum=cumsum,
@@ -329,7 +328,7 @@ def put_vec_duplicates(vec, ind, vals):
val_shape = indices.shape
if not isinstance(vals, dpt.usm_ndarray):
- vals = dpt_ext.asarray(
+ vals = dpt.asarray(
vals, dtype=x.dtype, usm_type=vals_usm_type, sycl_queue=exec_q
)
# choose to throw here for consistency with `place`
@@ -340,8 +339,8 @@ def put_vec_duplicates(vec, ind, vals):
if vals.dtype == x.dtype:
rhs = vals
else:
- rhs = dpt_ext.astype(vals, x.dtype)
- rhs = dpt_ext.broadcast_to(rhs, val_shape)
+ rhs = dpt.astype(vals, x.dtype)
+ rhs = dpt.broadcast_to(rhs, val_shape)
_manager = dpctl.utils.SequentialOrderManager[exec_q]
deps_ev = _manager.submitted_events
@@ -540,9 +539,9 @@ def take(x, indices, /, *, axis=None, out=None, mode="wrap"):
"Input and output allocation queues are not compatible"
)
if ti._array_overlap(x, out):
- out = dpt_ext.empty_like(out)
+ out = dpt.empty_like(out)
else:
- out = dpt_ext.empty(
+ out = dpt.empty(
res_shape, dtype=dt, usm_type=res_usm_type, sycl_queue=exec_q
)
diff --git a/dpctl_ext/tensor/_linear_algebra_functions.py b/dpctl_ext/tensor/_linear_algebra_functions.py
index 973050f93ac1..6dfb30e881b2 100644
--- a/dpctl_ext/tensor/_linear_algebra_functions.py
+++ b/dpctl_ext/tensor/_linear_algebra_functions.py
@@ -29,11 +29,11 @@
import operator
import dpctl
-import dpctl.tensor as dpt
from dpctl.utils import ExecutionPlacementError, SequentialOrderManager
# TODO: revert to `import dpctl.tensor...`
# when dpnp fully migrates dpctl/tensor
+import dpctl_ext.tensor as dpt
import dpctl_ext.tensor._tensor_elementwise_impl as tei
import dpctl_ext.tensor._tensor_impl as ti
import dpctl_ext.tensor._tensor_linalg_impl as tli
diff --git a/dpctl_ext/tensor/_manipulation_functions.py b/dpctl_ext/tensor/_manipulation_functions.py
index e2d55c533bc0..33817dd0aa2e 100644
--- a/dpctl_ext/tensor/_manipulation_functions.py
+++ b/dpctl_ext/tensor/_manipulation_functions.py
@@ -30,13 +30,12 @@
import operator
import dpctl
-import dpctl.tensor as dpt
import dpctl.utils as dputils
import numpy as np
# TODO: revert to `import dpctl.tensor...`
# when dpnp fully migrates dpctl/tensor
-import dpctl_ext.tensor as dpt_ext
+import dpctl_ext.tensor as dpt
import dpctl_ext.tensor._tensor_impl as ti
from ._numpy_helper import normalize_axis_index, normalize_axis_tuple
@@ -174,7 +173,7 @@ def _concat_axis_None(arrays):
res_shape = 0
for array in arrays:
res_shape += array.size
- res = dpt_ext.empty(
+ res = dpt.empty(
res_shape, dtype=res_dtype, usm_type=res_usm_type, sycl_queue=exec_q
)
@@ -185,7 +184,7 @@ def _concat_axis_None(arrays):
fill_end = fill_start + array.size
if array.flags.c_contiguous:
hev, cpy_ev = ti._copy_usm_ndarray_into_usm_ndarray(
- src=dpt_ext.reshape(array, -1),
+ src=dpt.reshape(array, -1),
dst=res[fill_start:fill_end],
sycl_queue=exec_q,
depends=deps,
@@ -196,7 +195,7 @@ def _concat_axis_None(arrays):
# _copy_usm_ndarray_for_reshape requires src and dst to have
# the same data type
if not array.dtype == res_dtype:
- src2_ = dpt_ext.empty_like(src_, dtype=res_dtype)
+ src2_ = dpt.empty_like(src_, dtype=res_dtype)
ht_copy_ev, cpy_ev = ti._copy_usm_ndarray_into_usm_ndarray(
src=src_, dst=src2_, sycl_queue=exec_q, depends=deps
)
@@ -334,7 +333,7 @@ def concat(arrays, /, *, axis=0):
X0_shape[i] if i != axis else res_shape_axis for i in range(X0.ndim)
)
- res = dpt_ext.empty(
+ res = dpt.empty(
res_shape, dtype=res_dtype, usm_type=res_usm_type, sycl_queue=exec_q
)
@@ -402,7 +401,7 @@ def expand_dims(X, /, *, axis=0):
shape_it = iter(X.shape)
shape = tuple(1 if ax in axis else next(shape_it) for ax in range(out_ndim))
- return dpt_ext.reshape(X, shape)
+ return dpt.reshape(X, shape)
def flip(X, /, *, axis=None):
@@ -485,7 +484,7 @@ def moveaxis(X, source, destination, /):
for src, dst in sorted(zip(destination, source)):
ind.insert(src, dst)
- return dpt_ext.permute_dims(X, tuple(ind))
+ return dpt.permute_dims(X, tuple(ind))
def permute_dims(X, /, axes):
@@ -602,7 +601,7 @@ def repeat(x, repeats, /, *, axis=None):
)
)
dpctl.utils.validate_usm_type(usm_type, allow_none=False)
- if not dpt_ext.can_cast(repeats.dtype, dpt.int64, casting="same_kind"):
+ if not dpt.can_cast(repeats.dtype, dpt.int64, casting="same_kind"):
raise TypeError(
f"'repeats' data type {repeats.dtype} cannot be cast to "
"'int64' according to the casting rule ''safe.''"
@@ -624,7 +623,7 @@ def repeat(x, repeats, /, *, axis=None):
"'repeats' array must be broadcastable to the size of "
"the repeated axis"
)
- if not dpt_ext.all(repeats >= 0):
+ if not dpt.all(repeats >= 0):
raise ValueError("'repeats' elements must be positive")
elif isinstance(repeats, (tuple, list, range)):
@@ -643,10 +642,10 @@ def repeat(x, repeats, /, *, axis=None):
"`repeats` sequence must have the same length as the "
"repeated axis"
)
- repeats = dpt_ext.asarray(
+ repeats = dpt.asarray(
repeats, dtype=dpt.int64, usm_type=usm_type, sycl_queue=exec_q
)
- if not dpt_ext.all(repeats >= 0):
+ if not dpt.all(repeats >= 0):
raise ValueError("`repeats` elements must be positive")
else:
raise TypeError(
@@ -662,7 +661,7 @@ def repeat(x, repeats, /, *, axis=None):
res_shape = x_shape[:axis] + (res_axis_size,) + x_shape[axis + 1 :]
else:
res_shape = (res_axis_size,)
- res = dpt_ext.empty(
+ res = dpt.empty(
res_shape, dtype=x.dtype, usm_type=usm_type, sycl_queue=exec_q
)
if res_axis_size > 0:
@@ -677,7 +676,7 @@ def repeat(x, repeats, /, *, axis=None):
_manager.add_event_pair(ht_rep_ev, rep_ev)
else:
if repeats.dtype != dpt.int64:
- rep_buf = dpt_ext.empty(
+ rep_buf = dpt.empty(
repeats.shape,
dtype=dpt.int64,
usm_type=usm_type,
@@ -687,7 +686,7 @@ def repeat(x, repeats, /, *, axis=None):
src=repeats, dst=rep_buf, sycl_queue=exec_q, depends=dep_evs
)
_manager.add_event_pair(ht_copy_ev, copy_ev)
- cumsum = dpt_ext.empty(
+ cumsum = dpt.empty(
(axis_size,),
dtype=dpt.int64,
usm_type=usm_type,
@@ -703,7 +702,7 @@ def repeat(x, repeats, /, *, axis=None):
)
else:
res_shape = (res_axis_size,)
- res = dpt_ext.empty(
+ res = dpt.empty(
res_shape,
dtype=x.dtype,
usm_type=usm_type,
@@ -720,7 +719,7 @@ def repeat(x, repeats, /, *, axis=None):
)
_manager.add_event_pair(ht_rep_ev, rep_ev)
else:
- cumsum = dpt_ext.empty(
+ cumsum = dpt.empty(
(axis_size,),
dtype=dpt.int64,
usm_type=usm_type,
@@ -735,7 +734,7 @@ def repeat(x, repeats, /, *, axis=None):
)
else:
res_shape = (res_axis_size,)
- res = dpt_ext.empty(
+ res = dpt.empty(
res_shape,
dtype=x.dtype,
usm_type=usm_type,
@@ -792,7 +791,7 @@ def roll(x, /, shift, *, axis=None):
_manager = dputils.SequentialOrderManager[exec_q]
if axis is None:
shift = operator.index(shift)
- res = dpt_ext.empty(
+ res = dpt.empty(
x.shape, dtype=x.dtype, usm_type=x.usm_type, sycl_queue=exec_q
)
sz = operator.index(x.size)
@@ -819,7 +818,7 @@ def roll(x, /, shift, *, axis=None):
n_i = operator.index(shape[ax])
shifted = shifts[ax] + operator.index(sh)
shifts[ax] = (shifted % n_i) if n_i > 0 else 0
- res = dpt_ext.empty(
+ res = dpt.empty(
x.shape, dtype=x.dtype, usm_type=x.usm_type, sycl_queue=exec_q
)
dep_evs = _manager.submitted_events
@@ -872,7 +871,7 @@ def squeeze(X, /, axis=None):
if new_shape == X.shape:
return X
else:
- return dpt_ext.reshape(X, new_shape)
+ return dpt.reshape(X, new_shape)
def stack(arrays, /, *, axis=0):
@@ -917,7 +916,7 @@ def stack(arrays, /, *, axis=0):
for i in range(res_ndim)
)
- res = dpt_ext.empty(
+ res = dpt.empty(
res_shape, dtype=res_dtype, usm_type=res_usm_type, sycl_queue=exec_q
)
@@ -971,7 +970,7 @@ def swapaxes(X, axis1, axis2):
ind = list(range(0, X.ndim))
ind[axis1] = axis2
ind[axis2] = axis1
- return dpt_ext.permute_dims(X, tuple(ind))
+ return dpt.permute_dims(X, tuple(ind))
def unstack(X, /, *, axis=0):
@@ -998,7 +997,7 @@ def unstack(X, /, *, axis=0):
raise TypeError(f"Expected usm_ndarray type, got {type(X)}.")
axis = normalize_axis_index(axis, X.ndim)
- Y = dpt_ext.moveaxis(X, axis, 0)
+ Y = dpt.moveaxis(X, axis, 0)
return tuple(Y[i] for i in range(Y.shape[0]))
@@ -1049,11 +1048,11 @@ def tile(x, repetitions, /):
if rep_dims < x_dims:
repetitions = (x_dims - rep_dims) * (1,) + repetitions
elif x_dims < rep_dims:
- x = dpt_ext.reshape(x, (rep_dims - x_dims) * (1,) + x.shape)
+ x = dpt.reshape(x, (rep_dims - x_dims) * (1,) + x.shape)
res_shape = tuple(map(lambda sh, rep: sh * rep, x.shape, repetitions))
# case of empty input
if x.size == 0:
- return dpt_ext.empty(
+ return dpt.empty(
res_shape,
dtype=x.dtype,
usm_type=x.usm_type,
@@ -1061,7 +1060,7 @@ def tile(x, repetitions, /):
)
in_sh = x.shape
if res_shape == in_sh:
- return dpt_ext.copy(x)
+ return dpt.copy(x)
expanded_sh = []
broadcast_sh = []
out_sz = 1
@@ -1082,12 +1081,12 @@ def tile(x, repetitions, /):
exec_q = x.sycl_queue
xdt = x.dtype
xut = x.usm_type
- res = dpt_ext.empty((out_sz,), dtype=xdt, usm_type=xut, sycl_queue=exec_q)
+ res = dpt.empty((out_sz,), dtype=xdt, usm_type=xut, sycl_queue=exec_q)
# no need to copy data for empty output
if out_sz > 0:
- x = dpt_ext.broadcast_to(
+ x = dpt.broadcast_to(
# this reshape should never copy
- dpt_ext.reshape(x, expanded_sh),
+ dpt.reshape(x, expanded_sh),
broadcast_sh,
)
# copy broadcast input into flat array
@@ -1097,4 +1096,4 @@ def tile(x, repetitions, /):
src=x, dst=res, sycl_queue=exec_q, depends=dep_evs
)
_manager.add_event_pair(hev, cp_ev)
- return dpt_ext.reshape(res, res_shape)
+ return dpt.reshape(res, res_shape)
diff --git a/dpctl_ext/tensor/_print.py b/dpctl_ext/tensor/_print.py
new file mode 100644
index 000000000000..5385eadb2537
--- /dev/null
+++ b/dpctl_ext/tensor/_print.py
@@ -0,0 +1,503 @@
+# *****************************************************************************
+# Copyright (c) 2026, Intel Corporation
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+# - Redistributions of source code must retain the above copyright notice,
+# this list of conditions and the following disclaimer.
+# - Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+# - Neither the name of the copyright holder nor the names of its contributors
+# may be used to endorse or promote products derived from this software
+# without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+# THE POSSIBILITY OF SUCH DAMAGE.
+# *****************************************************************************
+
+import contextlib
+import itertools
+import operator
+
+import dpctl
+import dpctl.utils
+import numpy as np
+
+# TODO: revert to `import dpctl.tensor...`
+# when dpnp fully migrates dpctl/tensor
+import dpctl_ext.tensor as dpt
+import dpctl_ext.tensor._tensor_impl as ti
+
+__doc__ = "Print functions for :class:`dpctl.tensor.usm_ndarray`."
+
+_print_options = {
+ "linewidth": 75,
+ "edgeitems": 3,
+ "threshold": 1000,
+ "precision": 8,
+ "floatmode": "maxprec",
+ "suppress": False,
+ "nanstr": "nan",
+ "infstr": "inf",
+ "sign": "-",
+}
+
+
+def _move_to_next_line(string, s, line_width, prefix):
+ """Move string to next line if it doesn't fit in the current line."""
+ bottom_len = len(s) - (s.rfind("\n") + 1)
+ next_line = bottom_len + len(string) + 1 > line_width
+ string = ",\n" + " " * len(prefix) + string if next_line else ", " + string
+
+ return string
+
+
+def _options_dict(
+ linewidth=None,
+ edgeitems=None,
+ threshold=None,
+ precision=None,
+ floatmode=None,
+ suppress=None,
+ nanstr=None,
+ infstr=None,
+ sign=None,
+ numpy=False,
+):
+ if numpy:
+ numpy_options = np.get_printoptions()
+ options = {k: numpy_options[k] for k in _print_options.keys()}
+ else:
+ options = _print_options.copy()
+
+ if suppress:
+ options["suppress"] = True
+
+ local = dict(locals().items())
+ for int_arg in ["linewidth", "precision", "threshold", "edgeitems"]:
+ val = local[int_arg]
+ if val is not None:
+ options[int_arg] = operator.index(val)
+
+ for str_arg in ["nanstr", "infstr"]:
+ val = local[str_arg]
+ if val is not None:
+ if not isinstance(val, str):
+ raise TypeError(
+ "`{}` ".format(str_arg) + "must be of `string` type."
+ )
+ options[str_arg] = val
+
+ signs = ["-", "+", " "]
+ if sign is not None:
+ if sign not in signs:
+ raise ValueError(
+ "`sign` must be one of"
+ + ", ".join("`{}`".format(s) for s in signs)
+ )
+ options["sign"] = sign
+
+ floatmodes = ["fixed", "unique", "maxprec", "maxprec_equal"]
+ if floatmode is not None:
+ if floatmode not in floatmodes:
+ raise ValueError(
+ "`floatmode` must be one of"
+ + ", ".join("`{}`".format(m) for m in floatmodes)
+ )
+ options["floatmode"] = floatmode
+
+ return options
+
+
+def set_print_options(
+ linewidth=None,
+ edgeitems=None,
+ threshold=None,
+ precision=None,
+ floatmode=None,
+ suppress=None,
+ nanstr=None,
+ infstr=None,
+ sign=None,
+ numpy=False,
+):
+ """
+ set_print_options(linewidth=None, edgeitems=None, threshold=None,
+ precision=None, floatmode=None, suppress=None,
+ nanstr=None, infstr=None, sign=None, numpy=False)
+
+ Set options for printing :class:`dpctl.tensor.usm_ndarray` class.
+
+ Args:
+ linewidth (int, optional):
+ Number of characters printed per line.
+ Raises `TypeError` if linewidth is not an integer.
+ Default: `75`.
+ edgeitems (int, optional):
+ Number of elements at the beginning and end
+ when the printed array is abbreviated.
+ Raises `TypeError` if edgeitems is not an integer.
+ Default: `3`.
+ threshold (int, optional):
+ Number of elements that triggers array abbreviation.
+ Raises `TypeError` if threshold is not an integer.
+ Default: `1000`.
+ precision (int or None, optional):
+ Number of digits printed for floating point numbers.
+ Raises `TypeError` if precision is not an integer.
+ Default: `8`.
+ floatmode (str, optional):
+ Controls how floating point numbers are interpreted.
+ `"fixed:`:
+ Always prints exactly `precision` digits.
+ `"unique"`:
+ Ignores precision, prints the number of
+ digits necessary to uniquely specify each number.
+ `"maxprec"`:
+ Prints `precision` digits or fewer,
+ if fewer will uniquely represent a number.
+ `"maxprec_equal"`:
+ Prints an equal number of digits
+ for each number. This number is `precision` digits
+ or fewer, if fewer will uniquely represent each number.
+ Raises `ValueError` if floatmode is not one of
+ `fixed`, `unique`, `maxprec`, or `maxprec_equal`.
+ Default: "maxprec_equal"
+ suppress (bool, optional):
+ If `True,` numbers equal to zero in the current precision
+ will print as zero.
+ Default: `False`.
+ nanstr (str, optional):
+ String used to represent nan.
+ Raises `TypeError` if nanstr is not a string.
+ Default: `"nan"`.
+ infstr (str, optional):
+ String used to represent infinity.
+ Raises `TypeError` if infstr is not a string.
+ Default: `"inf"`.
+ sign (str, optional):
+ Controls the sign of floating point numbers.
+ `"-"`:
+ Omit the sign of positive numbers.
+ `"+"`:
+ Always print the sign of positive numbers.
+ `" "`:
+ Always print a whitespace in place of the
+ sign of positive numbers.
+ Raises `ValueError` if sign is not one of
+ `"-"`, `"+"`, or `" "`.
+ Default: `"-"`.
+ numpy (bool, optional): If `True,` then before other specified print
+ options are set, a dictionary of Numpy's print options
+ will be used to initialize dpctl's print options.
+ Default: "False"
+ """
+ options = _options_dict(
+ linewidth=linewidth,
+ edgeitems=edgeitems,
+ threshold=threshold,
+ precision=precision,
+ floatmode=floatmode,
+ suppress=suppress,
+ nanstr=nanstr,
+ infstr=infstr,
+ sign=sign,
+ numpy=numpy,
+ )
+ _print_options.update(options)
+
+
+def get_print_options():
+ """get_print_options()
+
+ Returns a copy of current options for printing
+ :class:`dpctl.tensor.usm_ndarray` class.
+
+ Returns:
+ dict: dictionary with array
+ printing option settings.
+
+ Options:
+ - "linewidth" : int, default 75
+ - "edgeitems" : int, default 3
+ - "threshold" : int, default 1000
+ - "precision" : int, default 8
+ - "floatmode" : str, default "maxprec_equal"
+ - "suppress" : bool, default False
+ - "nanstr" : str, default "nan"
+ - "infstr" : str, default "inf"
+ - "sign" : str, default "-"
+ """
+ return _print_options.copy()
+
+
+@contextlib.contextmanager
+def print_options(*args, **kwargs):
+ """
+ Context manager for print options.
+
+ Set print options for the scope of a `with` block.
+ `as` yields dictionary of print options.
+ """
+ options = dpt.get_print_options()
+ try:
+ dpt.set_print_options(*args, **kwargs)
+ yield dpt.get_print_options()
+ finally:
+ dpt.set_print_options(**options)
+
+
+def _nd_corners(arr_in, edge_items):
+ _shape = arr_in.shape
+ max_shape = 2 * edge_items + 1
+ if max(_shape) <= max_shape:
+ return dpt.asnumpy(arr_in)
+ res_shape = tuple(
+ max_shape if _shape[i] > max_shape else _shape[i]
+ for i in range(arr_in.ndim)
+ )
+
+ exec_q = arr_in.sycl_queue
+ arr_out = dpt.empty(
+ res_shape,
+ dtype=arr_in.dtype,
+ usm_type=arr_in.usm_type,
+ sycl_queue=exec_q,
+ )
+
+ blocks = []
+ for i in range(len(_shape)):
+ if _shape[i] > max_shape:
+ blocks.append(
+ (
+ np.s_[:edge_items],
+ np.s_[-edge_items:],
+ )
+ )
+ else:
+ blocks.append((np.s_[:],))
+
+ _manager = dpctl.utils.SequentialOrderManager[exec_q]
+ dep_evs = _manager.submitted_events
+ hev_list = []
+ for slc in itertools.product(*blocks):
+ hev, _ = ti._copy_usm_ndarray_into_usm_ndarray(
+ src=arr_in[slc],
+ dst=arr_out[slc],
+ sycl_queue=exec_q,
+ depends=dep_evs,
+ )
+ hev_list.append(hev)
+
+ dpctl.SyclEvent.wait_for(hev_list)
+ return dpt.asnumpy(arr_out)
+
+
+def usm_ndarray_str(
+ x,
+ line_width=None,
+ edge_items=None,
+ threshold=None,
+ precision=None,
+ floatmode=None,
+ suppress=None,
+ sign=None,
+ numpy=False,
+ separator=" ",
+ prefix="",
+ suffix="",
+):
+ """
+ usm_ndarray_str(x, line_width=None, edgeitems=None, threshold=None,
+ precision=None, floatmode=None, suppress=None,
+ sign=None, numpy=False, separator=" ", prefix="",
+ suffix="")
+
+ Returns a string representing the elements of a
+ :class:`dpctl.tensor.usm_ndarray`.
+
+ Args:
+ x (usm_ndarray):
+ Input array.
+ line_width (int, optional):
+ Number of characters printed per line.
+ Raises `TypeError` if line_width is not an integer.
+ Default: `75`.
+ edgeitems (int, optional):
+ Number of elements at the beginning and end
+ when the printed array is abbreviated.
+ Raises `TypeError` if edgeitems is not an integer.
+ Default: `3`.
+ threshold (int, optional):
+ Number of elements that triggers array abbreviation.
+ Raises `TypeError` if threshold is not an integer.
+ Default: `1000`.
+ precision (int or None, optional):
+ Number of digits printed for floating point numbers.
+ Raises `TypeError` if precision is not an integer.
+ Default: `8`.
+ floatmode (str, optional):
+ Controls how floating point numbers are interpreted.
+ `"fixed:`:
+ Always prints exactly `precision` digits.
+ `"unique"`:
+ Ignores precision, prints the number of
+ digits necessary to uniquely specify each number.
+ `"maxprec"`:
+ Prints `precision` digits or fewer,
+ if fewer will uniquely represent a number.
+ `"maxprec_equal"`:
+ Prints an equal number of digits for each number.
+ This number is `precision` digits or fewer,
+ if fewer will uniquely represent each number.
+ Raises `ValueError` if floatmode is not one of
+ `fixed`, `unique`, `maxprec`, or `maxprec_equal`.
+ Default: "maxprec_equal"
+ suppress (bool, optional):
+ If `True,` numbers equal to zero in the current precision
+ will print as zero.
+ Default: `False`.
+ sign (str, optional):
+ Controls the sign of floating point numbers.
+ `"-"`:
+ Omit the sign of positive numbers.
+ `"+"`:
+ Always print the sign of positive numbers.
+ `" "`:
+ Always print a whitespace in place of the
+ sign of positive numbers.
+ Raises `ValueError` if sign is not one of
+ `"-"`, `"+"`, or `" "`.
+ Default: `"-"`.
+ numpy (bool, optional):
+ If `True,` then before other specified print
+ options are set, a dictionary of Numpy's print options
+ will be used to initialize dpctl's print options.
+ Default: "False"
+ separator (str, optional):
+ String inserted between elements of the array string.
+ Default: " "
+ prefix (str, optional):
+ String used to determine spacing to the left of the array string.
+ Default: ""
+ suffix (str, optional):
+ String that determines length of the last line of the array string.
+ Default: ""
+
+ Returns:
+ str: string representation of input array.
+ """
+ if not isinstance(x, dpt.usm_ndarray):
+ raise TypeError(f"Expected dpctl.tensor.usm_ndarray, got {type(x)}")
+
+ options = get_print_options()
+ options.update(
+ _options_dict(
+ linewidth=line_width,
+ edgeitems=edge_items,
+ threshold=threshold,
+ precision=precision,
+ floatmode=floatmode,
+ suppress=suppress,
+ sign=sign,
+ numpy=numpy,
+ )
+ )
+
+ threshold = options["threshold"]
+ edge_items = options["edgeitems"]
+
+ if x.size > threshold:
+ data = _nd_corners(x, edge_items)
+ options["threshold"] = 0
+ else:
+ data = dpt.asnumpy(x)
+ with np.printoptions(**options):
+ s = np.array2string(
+ data, separator=separator, prefix=prefix, suffix=suffix
+ )
+ return s
+
+
+def usm_ndarray_repr(
+ x, line_width=None, precision=None, suppress=None, prefix="usm_ndarray"
+):
+ """
+ usm_ndarray_repr(x, line_width=None, precision=None,
+ suppress=None, prefix="")
+
+ Returns a formatted string representing the elements
+ of a :class:`dpctl.tensor.usm_ndarray` and its data type,
+ if not a default type.
+
+ Args:
+ x (usm_ndarray): Input array.
+ line_width (int, optional): Number of characters printed per line.
+ Raises `TypeError` if line_width is not an integer.
+ Default: `75`.
+ precision (int or None, optional): Number of digits printed for
+ floating point numbers.
+ Raises `TypeError` if precision is not an integer.
+ Default: `8`.
+ suppress (bool, optional): If `True,` numbers equal to zero
+ in the current precision will print as zero.
+ Default: `False`.
+ prefix (str, optional): String inserted at the start of the array
+ string.
+ Default: ""
+
+ Returns:
+ str: formatted string representing the input array
+ """
+ if not isinstance(x, dpt.usm_ndarray):
+ raise TypeError(f"Expected dpctl.tensor.usm_ndarray, got {type(x)}")
+
+ if line_width is None:
+ line_width = _print_options["linewidth"]
+
+ show_dtype = x.dtype not in [
+ dpt.bool,
+ dpt.int64,
+ dpt.float64,
+ dpt.complex128,
+ ]
+
+ prefix = prefix + "("
+ suffix = ")"
+
+ s = usm_ndarray_str(
+ x,
+ line_width=line_width,
+ precision=precision,
+ suppress=suppress,
+ separator=", ",
+ prefix=prefix,
+ suffix=suffix,
+ )
+
+ if show_dtype or x.size == 0:
+ dtype_str = f"dtype={x.dtype.name}"
+ dtype_str = _move_to_next_line(dtype_str, s, line_width, prefix)
+ else:
+ dtype_str = ""
+
+ options = get_print_options()
+ threshold = options["threshold"]
+ if (x.size == 0 and x.shape != (0,)) or x.size > threshold:
+ shape_str = f"shape={x.shape}"
+ shape_str = _move_to_next_line(shape_str, s, line_width, prefix)
+ else:
+ shape_str = ""
+
+ return prefix + s + shape_str + dtype_str + suffix
diff --git a/dpctl_ext/tensor/_reduction.py b/dpctl_ext/tensor/_reduction.py
index 2daf07b81d85..79e620605f07 100644
--- a/dpctl_ext/tensor/_reduction.py
+++ b/dpctl_ext/tensor/_reduction.py
@@ -27,12 +27,11 @@
# *****************************************************************************
import dpctl
-import dpctl.tensor as dpt
from dpctl.utils import ExecutionPlacementError, SequentialOrderManager
# TODO: revert to `import dpctl.tensor...`
# when dpnp fully migrates dpctl/tensor
-import dpctl_ext.tensor as dpt_ext
+import dpctl_ext.tensor as dpt
import dpctl_ext.tensor._tensor_impl as ti
import dpctl_ext.tensor._tensor_reductions_impl as tri
@@ -58,7 +57,7 @@ def _comparison_over_axis(x, axis, keepdims, out, _reduction_fn):
axis = (axis,)
axis = normalize_axis_tuple(axis, nd, "axis")
perm = [i for i in range(nd) if i not in axis] + list(axis)
- x_tmp = dpt_ext.permute_dims(x, perm)
+ x_tmp = dpt.permute_dims(x, perm)
red_nd = len(axis)
if any([x_tmp.shape[i] == 0 for i in range(-red_nd, 0)]):
raise ValueError("reduction cannot be performed over zero-size axes")
@@ -96,12 +95,12 @@ def _comparison_over_axis(x, axis, keepdims, out, _reduction_fn):
"Input and output allocation queues are not compatible"
)
if keepdims:
- out = dpt_ext.squeeze(out, axis=axis)
+ out = dpt.squeeze(out, axis=axis)
orig_out = out
if ti._array_overlap(x, out):
- out = dpt_ext.empty_like(out)
+ out = dpt.empty_like(out)
else:
- out = dpt_ext.empty(
+ out = dpt.empty(
res_shape, dtype=res_dt, usm_type=res_usm_type, sycl_queue=exec_q
)
@@ -138,7 +137,7 @@ def _comparison_over_axis(x, axis, keepdims, out, _reduction_fn):
if keepdims:
res_shape = res_shape + (1,) * red_nd
inv_perm = sorted(range(nd), key=lambda d: perm[d])
- out = dpt_ext.permute_dims(dpt_ext.reshape(out, res_shape), inv_perm)
+ out = dpt.permute_dims(dpt.reshape(out, res_shape), inv_perm)
return out
@@ -164,7 +163,7 @@ def _reduction_over_axis(
axis = (axis,)
axis = normalize_axis_tuple(axis, nd, "axis")
perm = [i for i in range(nd) if i not in axis] + list(axis)
- arr = dpt_ext.permute_dims(x, perm)
+ arr = dpt.permute_dims(x, perm)
red_nd = len(axis)
res_shape = arr.shape[: nd - red_nd]
q = x.sycl_queue
@@ -212,12 +211,12 @@ def _reduction_over_axis(
"Input and output allocation queues are not compatible"
)
if keepdims:
- out = dpt_ext.squeeze(out, axis=axis)
+ out = dpt.squeeze(out, axis=axis)
orig_out = out
if ti._array_overlap(x, out) and implemented_types:
- out = dpt_ext.empty_like(out)
+ out = dpt.empty_like(out)
else:
- out = dpt_ext.empty(
+ out = dpt.empty(
res_shape, dtype=res_dt, usm_type=res_usm_type, sycl_queue=q
)
@@ -253,7 +252,7 @@ def _reduction_over_axis(
out = orig_out
else:
if _dtype_supported(res_dt, res_dt, res_usm_type, q):
- tmp = dpt_ext.empty(
+ tmp = dpt.empty(
arr.shape, dtype=res_dt, usm_type=res_usm_type, sycl_queue=q
)
ht_e_cpy, cpy_e = ti._copy_usm_ndarray_into_usm_ndarray(
@@ -270,14 +269,14 @@ def _reduction_over_axis(
_manager.add_event_pair(ht_e_red, red_ev)
else:
buf_dt = _default_reduction_type_fn(inp_dt, q)
- tmp = dpt_ext.empty(
+ tmp = dpt.empty(
arr.shape, dtype=buf_dt, usm_type=res_usm_type, sycl_queue=q
)
ht_e_cpy, cpy_e = ti._copy_usm_ndarray_into_usm_ndarray(
src=arr, dst=tmp, sycl_queue=q, depends=dep_evs
)
_manager.add_event_pair(ht_e_cpy, cpy_e)
- tmp_res = dpt_ext.empty(
+ tmp_res = dpt.empty(
res_shape, dtype=buf_dt, usm_type=res_usm_type, sycl_queue=q
)
ht_e_red, r_e = _reduction_fn(
@@ -296,7 +295,7 @@ def _reduction_over_axis(
if keepdims:
res_shape = res_shape + (1,) * red_nd
inv_perm = sorted(range(nd), key=lambda d: perm[d])
- out = dpt_ext.permute_dims(dpt_ext.reshape(out, res_shape), inv_perm)
+ out = dpt.permute_dims(dpt.reshape(out, res_shape), inv_perm)
return out
@@ -320,7 +319,7 @@ def _search_over_axis(x, axis, keepdims, out, _reduction_fn):
)
axis = normalize_axis_tuple(axis, nd, "axis")
perm = [i for i in range(nd) if i not in axis] + list(axis)
- x_tmp = dpt_ext.permute_dims(x, perm)
+ x_tmp = dpt.permute_dims(x, perm)
axis = normalize_axis_tuple(axis, nd, "axis")
red_nd = len(axis)
if any([x_tmp.shape[i] == 0 for i in range(-red_nd, 0)]):
@@ -359,12 +358,12 @@ def _search_over_axis(x, axis, keepdims, out, _reduction_fn):
"Input and output allocation queues are not compatible"
)
if keepdims:
- out = dpt_ext.squeeze(out, axis=axis)
+ out = dpt.squeeze(out, axis=axis)
orig_out = out
if ti._array_overlap(x, out) and red_nd > 0:
- out = dpt_ext.empty_like(out)
+ out = dpt.empty_like(out)
else:
- out = dpt_ext.empty(
+ out = dpt.empty(
res_shape, dtype=res_dt, usm_type=res_usm_type, sycl_queue=exec_q
)
@@ -395,7 +394,7 @@ def _search_over_axis(x, axis, keepdims, out, _reduction_fn):
if keepdims:
res_shape = res_shape + (1,) * red_nd
inv_perm = sorted(range(nd), key=lambda d: perm[d])
- out = dpt_ext.permute_dims(dpt_ext.reshape(out, res_shape), inv_perm)
+ out = dpt.permute_dims(dpt.reshape(out, res_shape), inv_perm)
return out
@@ -506,7 +505,7 @@ def count_nonzero(x, /, *, axis=None, keepdims=False, out=None):
type.
"""
if x.dtype != dpt.bool:
- x = dpt_ext.astype(x, dpt.bool, copy=False)
+ x = dpt.astype(x, dpt.bool, copy=False)
return sum(
x,
axis=axis,
diff --git a/dpctl_ext/tensor/_reshape.py b/dpctl_ext/tensor/_reshape.py
index 23cf47a83568..7ecdace4fc42 100644
--- a/dpctl_ext/tensor/_reshape.py
+++ b/dpctl_ext/tensor/_reshape.py
@@ -28,13 +28,12 @@
import operator
-import dpctl.tensor as dpt
import dpctl.utils
import numpy as np
# TODO: revert to `import dpctl.tensor...`
# when dpnp fully migrates dpctl/tensor
-import dpctl_ext.tensor as dpt_ext
+import dpctl_ext.tensor as dpt
from ._tensor_impl import (
_copy_usm_ndarray_for_reshape,
@@ -189,7 +188,7 @@ def reshape(X, /, shape, *, order="C", copy=None):
src=X, dst=flat_res, sycl_queue=copy_q, depends=dep_evs
)
else:
- X_t = dpt_ext.permute_dims(X, range(X.ndim - 1, -1, -1))
+ X_t = dpt.permute_dims(X, range(X.ndim - 1, -1, -1))
hev, r_e = _copy_usm_ndarray_for_reshape(
src=X_t, dst=flat_res, sycl_queue=copy_q, depends=dep_evs
)
diff --git a/dpctl_ext/tensor/_scalar_utils.py b/dpctl_ext/tensor/_scalar_utils.py
index 3ab92b42ad00..832121aea857 100644
--- a/dpctl_ext/tensor/_scalar_utils.py
+++ b/dpctl_ext/tensor/_scalar_utils.py
@@ -29,13 +29,14 @@
import numbers
import dpctl.memory as dpm
-import dpctl.tensor as dpt
import numpy as np
-from dpctl.tensor._usmarray import _is_object_with_buffer_protocol as _is_buffer
# TODO: revert to `import dpctl.tensor...`
# when dpnp fully migrates dpctl/tensor
-import dpctl_ext.tensor as dpt_ext
+import dpctl_ext.tensor as dpt
+from dpctl_ext.tensor._usmarray import (
+ _is_object_with_buffer_protocol as _is_buffer,
+)
from ._type_utils import (
WeakBooleanType,
@@ -63,7 +64,7 @@ def _get_dtype(o, dev):
if isinstance(o, dpt.usm_ndarray):
return o.dtype
if hasattr(o, "__sycl_usm_array_interface__"):
- return dpt_ext.asarray(o).dtype
+ return dpt.asarray(o).dtype
if _is_buffer(o):
host_dt = np.array(o).dtype
dev_dt = _to_device_supported_dtype(host_dt, dev)
diff --git a/dpctl_ext/tensor/_search_functions.py b/dpctl_ext/tensor/_search_functions.py
index 285a02b42bb8..aae185b64e2b 100644
--- a/dpctl_ext/tensor/_search_functions.py
+++ b/dpctl_ext/tensor/_search_functions.py
@@ -27,12 +27,11 @@
# *****************************************************************************
import dpctl
-import dpctl.tensor as dpt
from dpctl.utils import ExecutionPlacementError, SequentialOrderManager
# TODO: revert to `import dpctl.tensor...`
# when dpnp fully migrates dpctl/tensor
-import dpctl_ext.tensor as dpt_ext
+import dpctl_ext.tensor as dpt
import dpctl_ext.tensor._tensor_impl as ti
from ._copy_utils import _empty_like_orderK, _empty_like_triple_orderK
@@ -111,7 +110,7 @@ def _resolve_two_weak_types(o1_dtype, o2_dtype, dev):
def _where_result_type(dt1, dt2, dev):
- res_dtype = dpt_ext.result_type(dt1, dt2)
+ res_dtype = dpt.result_type(dt1, dt2)
fp16 = dev.has_aspect_fp16
fp64 = dev.has_aspect_fp64
@@ -291,7 +290,7 @@ def where(condition, x1, x2, /, *, order="K", out=None):
if ti._array_overlap(condition, out) and not ti._same_logical_tensors(
condition, out
):
- out = dpt_ext.empty_like(out)
+ out = dpt.empty_like(out)
if isinstance(x1, dpt.usm_ndarray):
if (
@@ -299,7 +298,7 @@ def where(condition, x1, x2, /, *, order="K", out=None):
and not ti._same_logical_tensors(x1, out)
and x1_dtype == out_dtype
):
- out = dpt_ext.empty_like(out)
+ out = dpt.empty_like(out)
if isinstance(x2, dpt.usm_ndarray):
if (
@@ -307,7 +306,7 @@ def where(condition, x1, x2, /, *, order="K", out=None):
and not ti._same_logical_tensors(x2, out)
and x2_dtype == out_dtype
):
- out = dpt_ext.empty_like(out)
+ out = dpt.empty_like(out)
if order == "A":
order = (
@@ -323,9 +322,9 @@ def where(condition, x1, x2, /, *, order="K", out=None):
else "C"
)
if not isinstance(x1, dpt.usm_ndarray):
- x1 = dpt_ext.asarray(x1, dtype=x1_dtype, sycl_queue=exec_q)
+ x1 = dpt.asarray(x1, dtype=x1_dtype, sycl_queue=exec_q)
if not isinstance(x2, dpt.usm_ndarray):
- x2 = dpt_ext.asarray(x2, dtype=x2_dtype, sycl_queue=exec_q)
+ x2 = dpt.asarray(x2, dtype=x2_dtype, sycl_queue=exec_q)
if condition.size == 0:
if out is not None:
@@ -342,7 +341,7 @@ def where(condition, x1, x2, /, *, order="K", out=None):
exec_q,
)
else:
- return dpt_ext.empty(
+ return dpt.empty(
res_shape,
dtype=out_dtype,
order=order,
@@ -356,7 +355,7 @@ def where(condition, x1, x2, /, *, order="K", out=None):
if order == "K":
_x1 = _empty_like_orderK(x1, out_dtype)
else:
- _x1 = dpt_ext.empty_like(x1, dtype=out_dtype, order=order)
+ _x1 = dpt.empty_like(x1, dtype=out_dtype, order=order)
ht_copy1_ev, copy1_ev = ti._copy_usm_ndarray_into_usm_ndarray(
src=x1, dst=_x1, sycl_queue=exec_q, depends=dep_evs
)
@@ -367,7 +366,7 @@ def where(condition, x1, x2, /, *, order="K", out=None):
if order == "K":
_x2 = _empty_like_orderK(x2, out_dtype)
else:
- _x2 = dpt_ext.empty_like(x2, dtype=out_dtype, order=order)
+ _x2 = dpt.empty_like(x2, dtype=out_dtype, order=order)
ht_copy2_ev, copy2_ev = ti._copy_usm_ndarray_into_usm_ndarray(
src=x2, dst=_x2, sycl_queue=exec_q, depends=dep_evs
)
@@ -380,7 +379,7 @@ def where(condition, x1, x2, /, *, order="K", out=None):
condition, x1, x2, out_dtype, res_shape, out_usm_type, exec_q
)
else:
- out = dpt_ext.empty(
+ out = dpt.empty(
res_shape,
dtype=out_dtype,
order=order,
@@ -389,11 +388,11 @@ def where(condition, x1, x2, /, *, order="K", out=None):
)
if condition_shape != res_shape:
- condition = dpt_ext.broadcast_to(condition, res_shape)
+ condition = dpt.broadcast_to(condition, res_shape)
if x1_shape != res_shape:
- x1 = dpt_ext.broadcast_to(x1, res_shape)
+ x1 = dpt.broadcast_to(x1, res_shape)
if x2_shape != res_shape:
- x2 = dpt_ext.broadcast_to(x2, res_shape)
+ x2 = dpt.broadcast_to(x2, res_shape)
dep_evs = _manager.submitted_events
hev, where_ev = ti._where(
diff --git a/dpctl_ext/tensor/_searchsorted.py b/dpctl_ext/tensor/_searchsorted.py
index 2d4807fb0d0c..4c680a49b07b 100644
--- a/dpctl_ext/tensor/_searchsorted.py
+++ b/dpctl_ext/tensor/_searchsorted.py
@@ -32,10 +32,6 @@
import dpctl
import dpctl.utils as du
-# TODO: revert to `from ._usmarray import...`
-# when dpnp fully migrates dpctl/tensor
-from dpctl.tensor._usmarray import usm_ndarray
-
from ._copy_utils import _empty_like_orderK
from ._ctors import empty
from ._tensor_impl import _copy_usm_ndarray_into_usm_ndarray as ti_copy
@@ -46,6 +42,10 @@
from ._tensor_sorting_impl import _searchsorted_left, _searchsorted_right
from ._type_utils import isdtype, result_type
+# TODO: revert to `from ._usmarray import...`
+# when dpnp fully migrates dpctl/tensor
+from ._usmarray import usm_ndarray
+
def searchsorted(
x1: usm_ndarray,
diff --git a/dpctl_ext/tensor/_set_functions.py b/dpctl_ext/tensor/_set_functions.py
index 2672e082d18e..29e4914ad63b 100644
--- a/dpctl_ext/tensor/_set_functions.py
+++ b/dpctl_ext/tensor/_set_functions.py
@@ -28,12 +28,11 @@
from typing import NamedTuple, Optional, Union
-import dpctl.tensor as dpt
import dpctl.utils as du
# TODO: revert to `import dpctl.tensor...`
# when dpnp fully migrates dpctl/tensor
-import dpctl_ext.tensor as dpt_ext
+import dpctl_ext.tensor as dpt
from dpctl_ext.tensor._tensor_elementwise_impl import _not_equal, _subtract
from ._copy_utils import _empty_like_orderK
@@ -112,10 +111,10 @@ def unique_values(x: dpt.usm_ndarray) -> dpt.usm_ndarray:
if x.ndim == 1:
fx = x
else:
- fx = dpt_ext.reshape(x, (x.size,), order="C")
+ fx = dpt.reshape(x, (x.size,), order="C")
if fx.size == 0:
return fx
- s = dpt_ext.empty_like(fx, order="C")
+ s = dpt.empty_like(fx, order="C")
_manager = du.SequentialOrderManager[exec_q]
dep_evs = _manager.submitted_events
if fx.flags.c_contiguous:
@@ -128,7 +127,7 @@ def unique_values(x: dpt.usm_ndarray) -> dpt.usm_ndarray:
)
_manager.add_event_pair(ht_ev, sort_ev)
else:
- tmp = dpt_ext.empty_like(fx, order="C")
+ tmp = dpt.empty_like(fx, order="C")
ht_ev, copy_ev = _copy_usm_ndarray_into_usm_ndarray(
src=fx, dst=tmp, sycl_queue=exec_q, depends=dep_evs
)
@@ -141,7 +140,7 @@ def unique_values(x: dpt.usm_ndarray) -> dpt.usm_ndarray:
depends=[copy_ev],
)
_manager.add_event_pair(ht_ev, sort_ev)
- unique_mask = dpt_ext.empty(fx.shape, dtype="?", sycl_queue=exec_q)
+ unique_mask = dpt.empty(fx.shape, dtype="?", sycl_queue=exec_q)
ht_ev, uneq_ev = _not_equal(
src1=s[:-1],
src2=s[1:],
@@ -155,14 +154,14 @@ def unique_values(x: dpt.usm_ndarray) -> dpt.usm_ndarray:
fill_value=True, dst=unique_mask[0], sycl_queue=exec_q
)
_manager.add_event_pair(ht_ev, one_ev)
- cumsum = dpt_ext.empty(s.shape, dtype=dpt.int64, sycl_queue=exec_q)
+ cumsum = dpt.empty(s.shape, dtype=dpt.int64, sycl_queue=exec_q)
# synchronizing call
n_uniques = mask_positions(
unique_mask, cumsum, sycl_queue=exec_q, depends=[one_ev, uneq_ev]
)
if n_uniques == fx.size:
return s
- unique_vals = dpt_ext.empty(
+ unique_vals = dpt.empty(
n_uniques, dtype=x.dtype, usm_type=x.usm_type, sycl_queue=exec_q
)
ht_ev, ex_e = _extract(
@@ -206,11 +205,11 @@ def unique_counts(x: dpt.usm_ndarray) -> UniqueCountsResult:
if x.ndim == 1:
fx = x
else:
- fx = dpt_ext.reshape(x, (x.size,), order="C")
+ fx = dpt.reshape(x, (x.size,), order="C")
ind_dt = default_device_index_type(exec_q)
if fx.size == 0:
- return UniqueCountsResult(fx, dpt_ext.empty_like(fx, dtype=ind_dt))
- s = dpt_ext.empty_like(fx, order="C")
+ return UniqueCountsResult(fx, dpt.empty_like(fx, dtype=ind_dt))
+ s = dpt.empty_like(fx, order="C")
_manager = du.SequentialOrderManager[exec_q]
dep_evs = _manager.submitted_events
@@ -224,7 +223,7 @@ def unique_counts(x: dpt.usm_ndarray) -> UniqueCountsResult:
)
_manager.add_event_pair(ht_ev, sort_ev)
else:
- tmp = dpt_ext.empty_like(fx, order="C")
+ tmp = dpt.empty_like(fx, order="C")
ht_ev, copy_ev = _copy_usm_ndarray_into_usm_ndarray(
src=fx, dst=tmp, sycl_queue=exec_q, depends=dep_evs
)
@@ -237,7 +236,7 @@ def unique_counts(x: dpt.usm_ndarray) -> UniqueCountsResult:
depends=[copy_ev],
)
_manager.add_event_pair(ht_ev, sort_ev)
- unique_mask = dpt_ext.empty(s.shape, dtype="?", sycl_queue=exec_q)
+ unique_mask = dpt.empty(s.shape, dtype="?", sycl_queue=exec_q)
ht_ev, uneq_ev = _not_equal(
src1=s[:-1],
src2=s[1:],
@@ -251,9 +250,7 @@ def unique_counts(x: dpt.usm_ndarray) -> UniqueCountsResult:
fill_value=True, dst=unique_mask[0], sycl_queue=exec_q
)
_manager.add_event_pair(ht_ev, one_ev)
- cumsum = dpt_ext.empty(
- unique_mask.shape, dtype=dpt.int64, sycl_queue=exec_q
- )
+ cumsum = dpt.empty(unique_mask.shape, dtype=dpt.int64, sycl_queue=exec_q)
# synchronizing call
n_uniques = mask_positions(
unique_mask, cumsum, sycl_queue=exec_q, depends=[one_ev, uneq_ev]
@@ -261,11 +258,11 @@ def unique_counts(x: dpt.usm_ndarray) -> UniqueCountsResult:
if n_uniques == fx.size:
return UniqueCountsResult(
s,
- dpt_ext.ones(
+ dpt.ones(
n_uniques, dtype=ind_dt, usm_type=x_usm_type, sycl_queue=exec_q
),
)
- unique_vals = dpt_ext.empty(
+ unique_vals = dpt.empty(
n_uniques, dtype=x.dtype, usm_type=x_usm_type, sycl_queue=exec_q
)
# populate unique values
@@ -278,10 +275,10 @@ def unique_counts(x: dpt.usm_ndarray) -> UniqueCountsResult:
sycl_queue=exec_q,
)
_manager.add_event_pair(ht_ev, ex_e)
- unique_counts = dpt_ext.empty(
+ unique_counts = dpt.empty(
n_uniques + 1, dtype=ind_dt, usm_type=x_usm_type, sycl_queue=exec_q
)
- idx = dpt_ext.empty(x.size, dtype=ind_dt, sycl_queue=exec_q)
+ idx = dpt.empty(x.size, dtype=ind_dt, sycl_queue=exec_q)
# writing into new allocation, no dependency
ht_ev, id_ev = _linspace_step(start=0, dt=1, dst=idx, sycl_queue=exec_q)
_manager.add_event_pair(ht_ev, id_ev)
@@ -300,7 +297,7 @@ def unique_counts(x: dpt.usm_ndarray) -> UniqueCountsResult:
x.size, dst=unique_counts[-1], sycl_queue=exec_q
)
_manager.add_event_pair(ht_ev, set_ev)
- _counts = dpt_ext.empty_like(unique_counts[1:])
+ _counts = dpt.empty_like(unique_counts[1:])
ht_ev, sub_ev = _subtract(
src1=unique_counts[1:],
src2=unique_counts[:-1],
@@ -342,11 +339,11 @@ def unique_inverse(x):
if x.ndim == 1:
fx = x
else:
- fx = dpt_ext.reshape(x, (x.size,), order="C")
- sorting_ids = dpt_ext.empty_like(fx, dtype=ind_dt, order="C")
- unsorting_ids = dpt_ext.empty_like(sorting_ids, dtype=ind_dt, order="C")
+ fx = dpt.reshape(x, (x.size,), order="C")
+ sorting_ids = dpt.empty_like(fx, dtype=ind_dt, order="C")
+ unsorting_ids = dpt.empty_like(sorting_ids, dtype=ind_dt, order="C")
if fx.size == 0:
- return UniqueInverseResult(fx, dpt_ext.reshape(unsorting_ids, x.shape))
+ return UniqueInverseResult(fx, dpt.reshape(unsorting_ids, x.shape))
_manager = du.SequentialOrderManager[exec_q]
dep_evs = _manager.submitted_events
@@ -360,7 +357,7 @@ def unique_inverse(x):
)
_manager.add_event_pair(ht_ev, sort_ev)
else:
- tmp = dpt_ext.empty_like(fx, order="C")
+ tmp = dpt.empty_like(fx, order="C")
ht_ev, copy_ev = _copy_usm_ndarray_into_usm_ndarray(
src=fx, dst=tmp, sycl_queue=exec_q, depends=dep_evs
)
@@ -381,7 +378,7 @@ def unique_inverse(x):
depends=[sort_ev],
)
_manager.add_event_pair(ht_ev, argsort_ev)
- s = dpt_ext.empty_like(fx)
+ s = dpt.empty_like(fx)
# s = fx[sorting_ids]
ht_ev, take_ev = _take(
src=fx,
@@ -393,7 +390,7 @@ def unique_inverse(x):
depends=[sort_ev],
)
_manager.add_event_pair(ht_ev, take_ev)
- unique_mask = dpt_ext.empty(fx.shape, dtype="?", sycl_queue=exec_q)
+ unique_mask = dpt.empty(fx.shape, dtype="?", sycl_queue=exec_q)
ht_ev, uneq_ev = _not_equal(
src1=s[:-1],
src2=s[1:],
@@ -407,16 +404,14 @@ def unique_inverse(x):
fill_value=True, dst=unique_mask[0], sycl_queue=exec_q
)
_manager.add_event_pair(ht_ev, one_ev)
- cumsum = dpt_ext.empty(
- unique_mask.shape, dtype=dpt.int64, sycl_queue=exec_q
- )
+ cumsum = dpt.empty(unique_mask.shape, dtype=dpt.int64, sycl_queue=exec_q)
# synchronizing call
n_uniques = mask_positions(
unique_mask, cumsum, sycl_queue=exec_q, depends=[uneq_ev, one_ev]
)
if n_uniques == fx.size:
- return UniqueInverseResult(s, dpt_ext.reshape(unsorting_ids, x.shape))
- unique_vals = dpt_ext.empty(
+ return UniqueInverseResult(s, dpt.reshape(unsorting_ids, x.shape))
+ unique_vals = dpt.empty(
n_uniques, dtype=x.dtype, usm_type=x_usm_type, sycl_queue=exec_q
)
ht_ev, uv_ev = _extract(
@@ -428,10 +423,10 @@ def unique_inverse(x):
sycl_queue=exec_q,
)
_manager.add_event_pair(ht_ev, uv_ev)
- cum_unique_counts = dpt_ext.empty(
+ cum_unique_counts = dpt.empty(
n_uniques + 1, dtype=ind_dt, usm_type=x_usm_type, sycl_queue=exec_q
)
- idx = dpt_ext.empty(x.size, dtype=ind_dt, sycl_queue=exec_q)
+ idx = dpt.empty(x.size, dtype=ind_dt, sycl_queue=exec_q)
ht_ev, id_ev = _linspace_step(start=0, dt=1, dst=idx, sycl_queue=exec_q)
_manager.add_event_pair(ht_ev, id_ev)
ht_ev, extr_ev = _extract(
@@ -448,7 +443,7 @@ def unique_inverse(x):
x.size, dst=cum_unique_counts[-1], sycl_queue=exec_q
)
_manager.add_event_pair(ht_ev, set_ev)
- _counts = dpt_ext.empty_like(cum_unique_counts[1:])
+ _counts = dpt.empty_like(cum_unique_counts[1:])
ht_ev, sub_ev = _subtract(
src1=cum_unique_counts[1:],
src2=cum_unique_counts[:-1],
@@ -458,7 +453,7 @@ def unique_inverse(x):
)
_manager.add_event_pair(ht_ev, sub_ev)
- inv = dpt_ext.empty_like(x, dtype=ind_dt, order="C")
+ inv = dpt.empty_like(x, dtype=ind_dt, order="C")
ht_ev, ssl_ev = _searchsorted_left(
hay=unique_vals,
needles=x,
@@ -513,17 +508,17 @@ def unique_all(x: dpt.usm_ndarray) -> UniqueAllResult:
if x.ndim == 1:
fx = x
else:
- fx = dpt_ext.reshape(x, (x.size,), order="C")
- sorting_ids = dpt_ext.empty_like(fx, dtype=ind_dt, order="C")
- unsorting_ids = dpt_ext.empty_like(sorting_ids, dtype=ind_dt, order="C")
+ fx = dpt.reshape(x, (x.size,), order="C")
+ sorting_ids = dpt.empty_like(fx, dtype=ind_dt, order="C")
+ unsorting_ids = dpt.empty_like(sorting_ids, dtype=ind_dt, order="C")
if fx.size == 0:
# original array contains no data
# so it can be safely returned as values
return UniqueAllResult(
fx,
sorting_ids,
- dpt_ext.reshape(unsorting_ids, x.shape),
- dpt_ext.empty_like(fx, dtype=ind_dt),
+ dpt.reshape(unsorting_ids, x.shape),
+ dpt.empty_like(fx, dtype=ind_dt),
)
_manager = du.SequentialOrderManager[exec_q]
dep_evs = _manager.submitted_events
@@ -537,7 +532,7 @@ def unique_all(x: dpt.usm_ndarray) -> UniqueAllResult:
)
_manager.add_event_pair(ht_ev, sort_ev)
else:
- tmp = dpt_ext.empty_like(fx, order="C")
+ tmp = dpt.empty_like(fx, order="C")
ht_ev, copy_ev = _copy_usm_ndarray_into_usm_ndarray(
src=fx, dst=tmp, sycl_queue=exec_q, depends=dep_evs
)
@@ -558,7 +553,7 @@ def unique_all(x: dpt.usm_ndarray) -> UniqueAllResult:
depends=[sort_ev],
)
_manager.add_event_pair(ht_ev, args_ev)
- s = dpt_ext.empty_like(fx)
+ s = dpt.empty_like(fx)
# s = fx[sorting_ids]
ht_ev, take_ev = _take(
src=fx,
@@ -570,7 +565,7 @@ def unique_all(x: dpt.usm_ndarray) -> UniqueAllResult:
depends=[sort_ev],
)
_manager.add_event_pair(ht_ev, take_ev)
- unique_mask = dpt_ext.empty(fx.shape, dtype="?", sycl_queue=exec_q)
+ unique_mask = dpt.empty(fx.shape, dtype="?", sycl_queue=exec_q)
ht_ev, uneq_ev = _not_equal(
src1=s[:-1],
src2=s[1:],
@@ -583,24 +578,22 @@ def unique_all(x: dpt.usm_ndarray) -> UniqueAllResult:
fill_value=True, dst=unique_mask[0], sycl_queue=exec_q
)
_manager.add_event_pair(ht_ev, one_ev)
- cumsum = dpt_ext.empty(
- unique_mask.shape, dtype=dpt.int64, sycl_queue=exec_q
- )
+ cumsum = dpt.empty(unique_mask.shape, dtype=dpt.int64, sycl_queue=exec_q)
# synchronizing call
n_uniques = mask_positions(
unique_mask, cumsum, sycl_queue=exec_q, depends=[uneq_ev, one_ev]
)
if n_uniques == fx.size:
- _counts = dpt_ext.ones(
+ _counts = dpt.ones(
n_uniques, dtype=ind_dt, usm_type=x_usm_type, sycl_queue=exec_q
)
return UniqueAllResult(
s,
sorting_ids,
- dpt_ext.reshape(unsorting_ids, x.shape),
+ dpt.reshape(unsorting_ids, x.shape),
_counts,
)
- unique_vals = dpt_ext.empty(
+ unique_vals = dpt.empty(
n_uniques, dtype=x.dtype, usm_type=x_usm_type, sycl_queue=exec_q
)
ht_ev, uv_ev = _extract(
@@ -612,10 +605,10 @@ def unique_all(x: dpt.usm_ndarray) -> UniqueAllResult:
sycl_queue=exec_q,
)
_manager.add_event_pair(ht_ev, uv_ev)
- cum_unique_counts = dpt_ext.empty(
+ cum_unique_counts = dpt.empty(
n_uniques + 1, dtype=ind_dt, usm_type=x_usm_type, sycl_queue=exec_q
)
- idx = dpt_ext.empty(x.size, dtype=ind_dt, sycl_queue=exec_q)
+ idx = dpt.empty(x.size, dtype=ind_dt, sycl_queue=exec_q)
ht_ev, id_ev = _linspace_step(start=0, dt=1, dst=idx, sycl_queue=exec_q)
_manager.add_event_pair(ht_ev, id_ev)
ht_ev, extr_ev = _extract(
@@ -632,7 +625,7 @@ def unique_all(x: dpt.usm_ndarray) -> UniqueAllResult:
x.size, dst=cum_unique_counts[-1], sycl_queue=exec_q
)
_manager.add_event_pair(ht_ev, set_ev)
- _counts = dpt_ext.empty_like(cum_unique_counts[1:])
+ _counts = dpt.empty_like(cum_unique_counts[1:])
ht_ev, sub_ev = _subtract(
src1=cum_unique_counts[1:],
src2=cum_unique_counts[:-1],
@@ -642,7 +635,7 @@ def unique_all(x: dpt.usm_ndarray) -> UniqueAllResult:
)
_manager.add_event_pair(ht_ev, sub_ev)
- inv = dpt_ext.empty_like(x, dtype=ind_dt, order="C")
+ inv = dpt.empty_like(x, dtype=ind_dt, order="C")
ht_ev, ssl_ev = _searchsorted_left(
hay=unique_vals,
needles=x,
@@ -734,26 +727,26 @@ def isin(
x_sh = _get_shape(x)
if isinstance(test_elements, dpt.usm_ndarray) and test_elements.size == 0:
if invert:
- return dpt_ext.ones(
+ return dpt.ones(
x_sh, dtype=dpt.bool, usm_type=res_usm_type, sycl_queue=exec_q
)
else:
- return dpt_ext.zeros(
+ return dpt.zeros(
x_sh, dtype=dpt.bool, usm_type=res_usm_type, sycl_queue=exec_q
)
dt1, dt2 = _resolve_weak_types_all_py_ints(x_dt, test_dt, sycl_dev)
- dt = _to_device_supported_dtype(dpt_ext.result_type(dt1, dt2), sycl_dev)
+ dt = _to_device_supported_dtype(dpt.result_type(dt1, dt2), sycl_dev)
if not isinstance(x, dpt.usm_ndarray):
- x_arr = dpt_ext.asarray(
+ x_arr = dpt.asarray(
x, dtype=dt1, usm_type=res_usm_type, sycl_queue=exec_q
)
else:
x_arr = x
if not isinstance(test_elements, dpt.usm_ndarray):
- test_arr = dpt_ext.asarray(
+ test_arr = dpt.asarray(
test_elements, dtype=dt2, usm_type=res_usm_type, sycl_queue=exec_q
)
else:
@@ -773,7 +766,7 @@ def isin(
if test_dt != dt:
# copy into C-contiguous memory, because the array will be flattened
- test_buf = dpt_ext.empty_like(
+ test_buf = dpt.empty_like(
test_arr, dtype=dt, order="C", usm_type=res_usm_type
)
ht_ev, ev = _copy_usm_ndarray_into_usm_ndarray(
@@ -783,10 +776,10 @@ def isin(
else:
test_buf = test_arr
- test_buf = dpt_ext.reshape(test_buf, -1)
- test_buf = dpt_ext.sort(test_buf)
+ test_buf = dpt.reshape(test_buf, -1)
+ test_buf = dpt.sort(test_buf)
- dst = dpt_ext.empty_like(
+ dst = dpt.empty_like(
x_buf, dtype=dpt.bool, usm_type=res_usm_type, order="C"
)
diff --git a/dpctl_ext/tensor/_slicing.pxi b/dpctl_ext/tensor/_slicing.pxi
new file mode 100644
index 000000000000..86db56013e23
--- /dev/null
+++ b/dpctl_ext/tensor/_slicing.pxi
@@ -0,0 +1,383 @@
+# *****************************************************************************
+# Copyright (c) 2026, Intel Corporation
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+# - Redistributions of source code must retain the above copyright notice,
+# this list of conditions and the following disclaimer.
+# - Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+# - Neither the name of the copyright holder nor the names of its contributors
+# may be used to endorse or promote products derived from this software
+# without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+# THE POSSIBILITY OF SUCH DAMAGE.
+# *****************************************************************************
+
+import numbers
+from operator import index
+from cpython.buffer cimport PyObject_CheckBuffer
+from numpy import ndarray
+
+
+cdef bint _is_buffer(object o):
+ return PyObject_CheckBuffer(o)
+
+
+cdef Py_ssize_t _slice_len(
+ Py_ssize_t sl_start,
+ Py_ssize_t sl_stop,
+ Py_ssize_t sl_step
+):
+ """
+ Compute len(range(sl_start, sl_stop, sl_step))
+ """
+ if sl_start == sl_stop:
+ return 0
+ if sl_step > 0:
+ if sl_start > sl_stop:
+ return 0
+ # 1 + argmax k such htat sl_start + sl_step*k < sl_stop
+ return 1 + ((sl_stop - sl_start - 1) // sl_step)
+ else:
+ if sl_start < sl_stop:
+ return 0
+ return 1 + ((sl_stop - sl_start + 1) // sl_step)
+
+
+cdef bint _is_integral(object x) except *:
+ """Gives True if x is an integral slice spec"""
+ if isinstance(x, (ndarray, usm_ndarray)):
+ if x.ndim > 0:
+ return False
+ if x.dtype.kind not in "ui":
+ return False
+ return True
+ if isinstance(x, bool):
+ return False
+ if isinstance(x, int):
+ return True
+ if _is_buffer(x):
+ mbuf = memoryview(x)
+ if mbuf.ndim == 0:
+ f = mbuf.format
+ return f in "bBhHiIlLqQ"
+ else:
+ return False
+ if callable(getattr(x, "__index__", None)):
+ try:
+ index(x)
+ except (TypeError, ValueError):
+ return False
+ return True
+ return False
+
+
+cdef bint _is_boolean(object x) except *:
+ """Gives True if x is an integral slice spec"""
+ if isinstance(x, (ndarray, usm_ndarray)):
+ if x.ndim > 0:
+ return False
+ if x.dtype.kind not in "b":
+ return False
+ return True
+ if isinstance(x, bool):
+ return True
+ if isinstance(x, (int, float, complex)):
+ return False
+ if _is_buffer(x):
+ mbuf = memoryview(x)
+ if mbuf.ndim == 0:
+ f = mbuf.format
+ return f in "?"
+ else:
+ return False
+ if callable(getattr(x, "__bool__", None)):
+ try:
+ x.__bool__()
+ except (TypeError, ValueError):
+ return False
+ return True
+ return False
+
+
+def _basic_slice_meta(ind, shape : tuple, strides : tuple, offset : int):
+ """
+ Give basic slicing index `ind` and array layout information produce
+ a 5-tuple (resulting_shape, resulting_strides, resulting_offset,
+ advanced_ind, resulting_advanced_ind_pos)
+ used to construct a view into underlying array over which advanced
+ indexing, if any, is to be performed.
+
+ Raises IndexError for invalid index `ind`.
+ """
+ _no_advanced_ind = tuple()
+ _no_advanced_pos = -1
+ if ind is Ellipsis:
+ return (shape, strides, offset, _no_advanced_ind, _no_advanced_pos)
+ elif ind is None:
+ return (
+ (1,) + shape,
+ (0,) + strides,
+ offset,
+ _no_advanced_ind,
+ _no_advanced_pos,
+ )
+ elif isinstance(ind, slice):
+ sl_start, sl_stop, sl_step = ind.indices(shape[0])
+ sh0 = _slice_len(sl_start, sl_stop, sl_step)
+ str0 = sl_step * strides[0]
+ new_strides = (
+ strides if (sl_step == 1 or sh0 == 0) else (str0,) + strides[1:]
+ )
+ new_shape = (sh0, ) + shape[1:]
+ is_empty = any(sh_i == 0 for sh_i in new_shape)
+ new_offset = offset if is_empty else offset + sl_start * strides[0]
+ return (
+ new_shape,
+ new_strides,
+ new_offset,
+ _no_advanced_ind,
+ _no_advanced_pos,
+ )
+ elif _is_boolean(ind):
+ if ind:
+ return (
+ (1,) + shape,
+ (0,) + strides,
+ offset,
+ _no_advanced_ind,
+ _no_advanced_pos,
+ )
+ else:
+ return (
+ (0,) + shape,
+ (0,) + strides,
+ offset,
+ _no_advanced_ind,
+ _no_advanced_pos,
+ )
+ elif _is_integral(ind):
+ ind = index(ind)
+ new_shape = shape[1:]
+ new_strides = strides[1:]
+ is_empty = any(sh_i == 0 for sh_i in new_shape)
+ if 0 <= ind < shape[0]:
+ new_offset = offset if is_empty else offset + ind * strides[0]
+ return (
+ new_shape,
+ new_strides,
+ new_offset,
+ _no_advanced_ind,
+ _no_advanced_pos,
+ )
+ elif -shape[0] <= ind < 0:
+ new_offset = (
+ offset if is_empty else offset + (shape[0] + ind) * strides[0]
+ )
+ return (
+ new_shape,
+ new_strides,
+ new_offset,
+ _no_advanced_ind,
+ _no_advanced_pos,
+ )
+ else:
+ raise IndexError(
+ "Index {0} is out of range for axes 0 with "
+ "size {1}".format(ind, shape[0]))
+ elif isinstance(ind, (ndarray, usm_ndarray)):
+ return (shape, strides, offset, (ind,), 0)
+ elif isinstance(ind, tuple):
+ axes_referenced = 0
+ ellipses_count = 0
+ newaxis_count = 0
+ explicit_index = 0
+ seen_arrays_yet = False
+ array_streak_started = False
+ array_streak_interrupted = False
+ for i in ind:
+ if i is None:
+ newaxis_count += 1
+ if array_streak_started:
+ array_streak_interrupted = True
+ elif i is Ellipsis:
+ ellipses_count += 1
+ if array_streak_started:
+ array_streak_interrupted = True
+ elif isinstance(i, slice):
+ axes_referenced += 1
+ if array_streak_started:
+ array_streak_interrupted = True
+ elif _is_boolean(i):
+ newaxis_count += 1
+ if array_streak_started:
+ array_streak_interrupted = True
+ elif _is_integral(i):
+ axes_referenced += 1
+ if not array_streak_started and array_streak_interrupted:
+ explicit_index += 1
+ elif isinstance(i, (ndarray, usm_ndarray)):
+ if not seen_arrays_yet:
+ seen_arrays_yet = True
+ array_streak_started = True
+ array_streak_interrupted = False
+ if array_streak_interrupted:
+ raise IndexError(
+ "Advanced indexing array specs may not be "
+ "separated by basic slicing specs."
+ )
+ dt_k = i.dtype.kind
+ if dt_k == "b" and i.ndim > 0:
+ axes_referenced += i.ndim
+ elif dt_k in "ui" and i.ndim > 0:
+ axes_referenced += 1
+ else:
+ raise IndexError(
+ "arrays used as indices must be of integer "
+ "(or boolean) type"
+ )
+ else:
+ raise IndexError(
+ "Only integers, slices (`:`), ellipsis (`...`), "
+ "dpctl.tensor.newaxis (`None`) and integer and "
+ "boolean arrays are valid indices."
+ )
+ if ellipses_count > 1:
+ raise IndexError(
+ "an index can only have a single ellipsis ('...')")
+ if axes_referenced > len(shape):
+ raise IndexError(
+ "too many indices for an array, array is "
+ "{0}-dimensional, but {1} were indexed".format(
+ len(shape), axes_referenced))
+ if ellipses_count:
+ ellipses_count = len(shape) - axes_referenced
+ new_shape_len = (newaxis_count + ellipses_count
+ + axes_referenced - explicit_index)
+ new_shape = list()
+ new_strides = list()
+ new_advanced_ind = list()
+ k = 0
+ new_advanced_start_pos = -1
+ advanced_start_pos_set = False
+ new_offset = offset
+ is_empty = False
+ array_streak = False
+ for i in range(len(ind)):
+ ind_i = ind[i]
+ if (ind_i is Ellipsis):
+ k_new = k + ellipses_count
+ new_shape.extend(shape[k:k_new])
+ new_strides.extend(strides[k:k_new])
+ if any(dim == 0 for dim in shape[k:k_new]):
+ is_empty = True
+ new_offset = offset
+ k = k_new
+ if array_streak:
+ array_streak = False
+ elif ind_i is None:
+ new_shape.append(1)
+ new_strides.append(0)
+ if array_streak:
+ array_streak = False
+ elif isinstance(ind_i, slice):
+ k_new = k + 1
+ sl_start, sl_stop, sl_step = ind_i.indices(shape[k])
+ sh_i = _slice_len(sl_start, sl_stop, sl_step)
+ str_i = (1 if sh_i == 0 else sl_step) * strides[k]
+ new_shape.append(sh_i)
+ new_strides.append(str_i)
+ if sh_i > 0 and not is_empty:
+ new_offset = new_offset + sl_start * strides[k]
+ if sh_i == 0:
+ is_empty = True
+ new_offset = offset
+ k = k_new
+ if array_streak:
+ array_streak = False
+ elif _is_boolean(ind_i):
+ new_shape.append(1 if ind_i else 0)
+ new_strides.append(0)
+ if array_streak:
+ array_streak = False
+ elif _is_integral(ind_i):
+ if array_streak:
+ if not isinstance(ind_i, (ndarray, usm_ndarray)):
+ ind_i = index(ind_i)
+ # integer will be converted to an array,
+ # still raise if OOB
+ if not (
+ 0 <= ind_i < shape[k] or -shape[k] <= ind_i < 0
+ ):
+ raise IndexError(
+ "Index {0} is out of range for axes "
+ "{1} with size {2}".format(ind_i, k, shape[k])
+ )
+ new_advanced_ind.append(ind_i)
+ k_new = k + 1
+ new_shape.extend(shape[k:k_new])
+ new_strides.extend(strides[k:k_new])
+ k = k_new
+ else:
+ ind_i = index(ind_i)
+ if 0 <= ind_i < shape[k]:
+ k_new = k + 1
+ if not is_empty:
+ new_offset = new_offset + ind_i * strides[k]
+ k = k_new
+ elif -shape[k] <= ind_i < 0:
+ k_new = k + 1
+ if not is_empty:
+ new_offset = (
+ new_offset + (shape[k] + ind_i) * strides[k]
+ )
+ k = k_new
+ else:
+ raise IndexError(
+ "Index {0} is out of range for axes "
+ "{1} with size {2}".format(ind_i, k, shape[k])
+ )
+ elif isinstance(ind_i, (ndarray, usm_ndarray)):
+ if not array_streak:
+ array_streak = True
+ if not advanced_start_pos_set:
+ new_advanced_start_pos = len(new_shape)
+ advanced_start_pos_set = True
+ new_advanced_ind.append(ind_i)
+ dt_k = ind_i.dtype.kind
+ if dt_k == "b":
+ k_new = k + ind_i.ndim
+ else:
+ k_new = k + 1
+ new_shape.extend(shape[k:k_new])
+ new_strides.extend(strides[k:k_new])
+ k = k_new
+ new_shape.extend(shape[k:])
+ new_strides.extend(strides[k:])
+ new_shape_len += len(shape) - k
+ return (
+ tuple(new_shape),
+ tuple(new_strides),
+ new_offset,
+ tuple(new_advanced_ind),
+ new_advanced_start_pos
+ )
+ else:
+ raise IndexError(
+ "Only integers, slices (`:`), ellipsis (`...`), "
+ "dpctl.tensor.newaxis (`None`) and integer and "
+ "boolean arrays are valid indices."
+ )
diff --git a/dpctl_ext/tensor/_sorting.py b/dpctl_ext/tensor/_sorting.py
index 24693a408889..42cd9e1b44be 100644
--- a/dpctl_ext/tensor/_sorting.py
+++ b/dpctl_ext/tensor/_sorting.py
@@ -29,12 +29,11 @@
import operator
from typing import NamedTuple
-import dpctl.tensor as dpt
import dpctl.utils as du
# TODO: revert to `import dpctl.tensor...`
# when dpnp fully migrates dpctl/tensor
-import dpctl_ext.tensor as dpt_ext
+import dpctl_ext.tensor as dpt
import dpctl_ext.tensor._tensor_impl as ti
from ._numpy_helper import normalize_axis_index
@@ -98,7 +97,7 @@ def sort(x, /, *, axis=-1, descending=False, stable=True, kind=None):
nd = x.ndim
if nd == 0:
axis = normalize_axis_index(axis, ndim=1, msg_prefix="axis")
- return dpt_ext.copy(x, order="C")
+ return dpt.copy(x, order="C")
else:
axis = normalize_axis_index(axis, ndim=nd, msg_prefix="axis")
a1 = axis + 1
@@ -109,7 +108,7 @@ def sort(x, /, *, axis=-1, descending=False, stable=True, kind=None):
perm = [i for i in range(nd) if i != axis] + [
axis,
]
- arr = dpt_ext.permute_dims(x, perm)
+ arr = dpt.permute_dims(x, perm)
if kind is None:
kind = "stable"
if not isinstance(kind, str) or kind not in [
@@ -138,7 +137,7 @@ def sort(x, /, *, axis=-1, descending=False, stable=True, kind=None):
_manager = du.SequentialOrderManager[exec_q]
dep_evs = _manager.submitted_events
if arr.flags.c_contiguous:
- res = dpt_ext.empty_like(arr, order="C")
+ res = dpt.empty_like(arr, order="C")
ht_ev, impl_ev = impl_fn(
src=arr,
trailing_dims_to_sort=1,
@@ -148,12 +147,12 @@ def sort(x, /, *, axis=-1, descending=False, stable=True, kind=None):
)
_manager.add_event_pair(ht_ev, impl_ev)
else:
- tmp = dpt_ext.empty_like(arr, order="C")
+ tmp = dpt.empty_like(arr, order="C")
ht_ev, copy_ev = ti._copy_usm_ndarray_into_usm_ndarray(
src=arr, dst=tmp, sycl_queue=exec_q, depends=dep_evs
)
_manager.add_event_pair(ht_ev, copy_ev)
- res = dpt_ext.empty_like(arr, order="C")
+ res = dpt.empty_like(arr, order="C")
ht_ev, impl_ev = impl_fn(
src=tmp,
trailing_dims_to_sort=1,
@@ -164,7 +163,7 @@ def sort(x, /, *, axis=-1, descending=False, stable=True, kind=None):
_manager.add_event_pair(ht_ev, impl_ev)
if a1 != nd:
inv_perm = sorted(range(nd), key=lambda d: perm[d])
- res = dpt_ext.permute_dims(res, inv_perm)
+ res = dpt.permute_dims(res, inv_perm)
return res
@@ -214,7 +213,7 @@ def argsort(x, axis=-1, descending=False, stable=True, kind=None):
nd = x.ndim
if nd == 0:
axis = normalize_axis_index(axis, ndim=1, msg_prefix="axis")
- return dpt_ext.zeros_like(
+ return dpt.zeros_like(
x, dtype=ti.default_device_index_type(x.sycl_queue), order="C"
)
else:
@@ -227,7 +226,7 @@ def argsort(x, axis=-1, descending=False, stable=True, kind=None):
perm = [i for i in range(nd) if i != axis] + [
axis,
]
- arr = dpt_ext.permute_dims(x, perm)
+ arr = dpt.permute_dims(x, perm)
if kind is None:
kind = "stable"
if not isinstance(kind, str) or kind not in [
@@ -257,7 +256,7 @@ def argsort(x, axis=-1, descending=False, stable=True, kind=None):
dep_evs = _manager.submitted_events
index_dt = ti.default_device_index_type(exec_q)
if arr.flags.c_contiguous:
- res = dpt_ext.empty_like(arr, dtype=index_dt, order="C")
+ res = dpt.empty_like(arr, dtype=index_dt, order="C")
ht_ev, impl_ev = impl_fn(
src=arr,
trailing_dims_to_sort=1,
@@ -267,12 +266,12 @@ def argsort(x, axis=-1, descending=False, stable=True, kind=None):
)
_manager.add_event_pair(ht_ev, impl_ev)
else:
- tmp = dpt_ext.empty_like(arr, order="C")
+ tmp = dpt.empty_like(arr, order="C")
ht_ev, copy_ev = ti._copy_usm_ndarray_into_usm_ndarray(
src=arr, dst=tmp, sycl_queue=exec_q, depends=dep_evs
)
_manager.add_event_pair(ht_ev, copy_ev)
- res = dpt_ext.empty_like(arr, dtype=index_dt, order="C")
+ res = dpt.empty_like(arr, dtype=index_dt, order="C")
ht_ev, impl_ev = impl_fn(
src=tmp,
trailing_dims_to_sort=1,
@@ -283,7 +282,7 @@ def argsort(x, axis=-1, descending=False, stable=True, kind=None):
_manager.add_event_pair(ht_ev, impl_ev)
if a1 != nd:
inv_perm = sorted(range(nd), key=lambda d: perm[d])
- res = dpt_ext.permute_dims(res, inv_perm)
+ res = dpt.permute_dims(res, inv_perm)
return res
@@ -354,8 +353,8 @@ def top_k(x, k, /, *, axis=None, mode="largest"):
if k > 1:
raise ValueError(f"`k`={k} is out of bounds 1")
return TopKResult(
- dpt_ext.copy(x, order="C"),
- dpt_ext.zeros_like(
+ dpt.copy(x, order="C"),
+ dpt.zeros_like(
x, dtype=ti.default_device_index_type(x.sycl_queue)
),
)
@@ -373,7 +372,7 @@ def top_k(x, k, /, *, axis=None, mode="largest"):
perm = [i for i in range(nd) if i != axis] + [
axis,
]
- arr = dpt_ext.permute_dims(x, perm)
+ arr = dpt.permute_dims(x, perm)
n_search_dims = 1
res_sh = arr.shape[: nd - 1] + (k,)
@@ -386,14 +385,14 @@ def top_k(x, k, /, *, axis=None, mode="largest"):
res_usm_type = arr.usm_type
if arr.flags.c_contiguous:
- vals = dpt_ext.empty(
+ vals = dpt.empty(
res_sh,
dtype=arr.dtype,
usm_type=res_usm_type,
order="C",
sycl_queue=exec_q,
)
- inds = dpt_ext.empty(
+ inds = dpt.empty(
res_sh,
dtype=ti.default_device_index_type(exec_q),
usm_type=res_usm_type,
@@ -412,19 +411,19 @@ def top_k(x, k, /, *, axis=None, mode="largest"):
)
_manager.add_event_pair(ht_ev, impl_ev)
else:
- tmp = dpt_ext.empty_like(arr, order="C")
+ tmp = dpt.empty_like(arr, order="C")
ht_ev, copy_ev = ti._copy_usm_ndarray_into_usm_ndarray(
src=arr, dst=tmp, sycl_queue=exec_q, depends=dep_evs
)
_manager.add_event_pair(ht_ev, copy_ev)
- vals = dpt_ext.empty(
+ vals = dpt.empty(
res_sh,
dtype=arr.dtype,
usm_type=res_usm_type,
order="C",
sycl_queue=exec_q,
)
- inds = dpt_ext.empty(
+ inds = dpt.empty(
res_sh,
dtype=ti.default_device_index_type(exec_q),
usm_type=res_usm_type,
@@ -444,7 +443,7 @@ def top_k(x, k, /, *, axis=None, mode="largest"):
_manager.add_event_pair(ht_ev, impl_ev)
if axis is not None and a1 != nd:
inv_perm = sorted(range(nd), key=lambda d: perm[d])
- vals = dpt_ext.permute_dims(vals, inv_perm)
- inds = dpt_ext.permute_dims(inds, inv_perm)
+ vals = dpt.permute_dims(vals, inv_perm)
+ inds = dpt.permute_dims(inds, inv_perm)
return TopKResult(vals, inds)
diff --git a/dpctl_ext/tensor/_statistical_functions.py b/dpctl_ext/tensor/_statistical_functions.py
index 5513dfa7a65f..c1544b84c6a7 100644
--- a/dpctl_ext/tensor/_statistical_functions.py
+++ b/dpctl_ext/tensor/_statistical_functions.py
@@ -25,12 +25,11 @@
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
# THE POSSIBILITY OF SUCH DAMAGE.
-import dpctl.tensor as dpt
import dpctl.utils as du
# TODO: revert to `import dpctl.tensor...`
# when dpnp fully migrates dpctl/tensor
-import dpctl_ext.tensor as dpt_ext
+import dpctl_ext.tensor as dpt
import dpctl_ext.tensor._tensor_elementwise_impl as tei
import dpctl_ext.tensor._tensor_impl as ti
import dpctl_ext.tensor._tensor_reductions_impl as tri
@@ -66,7 +65,7 @@ def _var_impl(x, axis, correction, keepdims):
_manager = du.SequentialOrderManager[q]
dep_evs = _manager.submitted_events
if inp_dt != res_dt:
- buf = dpt_ext.empty_like(x, dtype=res_dt)
+ buf = dpt.empty_like(x, dtype=res_dt)
ht_e_buf, c_e1 = ti._copy_usm_ndarray_into_usm_ndarray(
src=x, dst=buf, sycl_queue=q, depends=dep_evs
)
@@ -74,18 +73,18 @@ def _var_impl(x, axis, correction, keepdims):
else:
buf = x
# calculate mean
- buf2 = dpt_ext.permute_dims(buf, perm)
+ buf2 = dpt.permute_dims(buf, perm)
res_shape = buf2.shape[: nd - red_nd]
# use keepdims=True path for later broadcasting
if red_nd == 0:
- mean_ary = dpt_ext.empty_like(buf)
+ mean_ary = dpt.empty_like(buf)
dep_evs = _manager.submitted_events
ht_e1, c_e2 = ti._copy_usm_ndarray_into_usm_ndarray(
src=buf, dst=mean_ary, sycl_queue=q, depends=dep_evs
)
_manager.add_event_pair(ht_e1, c_e2)
else:
- mean_ary = dpt_ext.empty(
+ mean_ary = dpt.empty(
res_shape,
dtype=res_dt,
usm_type=res_usm_type,
@@ -103,8 +102,8 @@ def _var_impl(x, axis, correction, keepdims):
mean_ary_shape = res_shape + (1,) * red_nd
inv_perm = sorted(range(nd), key=lambda d: perm[d])
- mean_ary = dpt_ext.permute_dims(
- dpt_ext.reshape(mean_ary, mean_ary_shape), inv_perm
+ mean_ary = dpt.permute_dims(
+ dpt.reshape(mean_ary, mean_ary_shape), inv_perm
)
# divide in-place to get mean
mean_ary_shape = mean_ary.shape
@@ -116,9 +115,9 @@ def _var_impl(x, axis, correction, keepdims):
_manager.add_event_pair(ht_e2, d_e1)
# subtract mean from original array to get deviations
- dev_ary = dpt_ext.empty_like(buf)
+ dev_ary = dpt.empty_like(buf)
if mean_ary_shape != buf.shape:
- mean_ary = dpt_ext.broadcast_to(mean_ary, buf.shape)
+ mean_ary = dpt.broadcast_to(mean_ary, buf.shape)
ht_e4, su_e = tei._subtract(
src1=buf, src2=mean_ary, dst=dev_ary, sycl_queue=q, depends=[d_e1]
)
@@ -130,11 +129,11 @@ def _var_impl(x, axis, correction, keepdims):
_manager.add_event_pair(ht_e5, sq_e)
# take sum of squared deviations
- dev_ary2 = dpt_ext.permute_dims(dev_ary, perm)
+ dev_ary2 = dpt.permute_dims(dev_ary, perm)
if red_nd == 0:
res = dev_ary
else:
- res = dpt_ext.empty(
+ res = dpt.empty(
res_shape,
dtype=res_dt,
usm_type=res_usm_type,
@@ -152,9 +151,7 @@ def _var_impl(x, axis, correction, keepdims):
if keepdims:
res_shape = res_shape + (1,) * red_nd
inv_perm = sorted(range(nd), key=lambda d: perm[d])
- res = dpt_ext.permute_dims(
- dpt_ext.reshape(res, res_shape), inv_perm
- )
+ res = dpt.permute_dims(dpt.reshape(res, res_shape), inv_perm)
res_shape = res.shape
# when nelems - correction <= 0, yield nans
div = max(nelems - correction, 0)
@@ -215,7 +212,7 @@ def mean(x, axis=None, keepdims=False):
nelems *= x.shape[i]
sum_nd = len(axis)
perm = perm + list(axis)
- arr2 = dpt_ext.permute_dims(x, perm)
+ arr2 = dpt.permute_dims(x, perm)
res_shape = arr2.shape[: nd - sum_nd]
q = x.sycl_queue
inp_dt = x.dtype
@@ -226,12 +223,12 @@ def mean(x, axis=None, keepdims=False):
)
res_usm_type = x.usm_type
if sum_nd == 0:
- return dpt_ext.astype(x, res_dt, copy=True)
+ return dpt.astype(x, res_dt, copy=True)
_manager = du.SequentialOrderManager[q]
dep_evs = _manager.submitted_events
if tri._sum_over_axis_dtype_supported(inp_dt, res_dt, res_usm_type, q):
- res = dpt_ext.empty(
+ res = dpt.empty(
res_shape, dtype=res_dt, usm_type=res_usm_type, sycl_queue=q
)
ht_e1, r_e = tri._sum_over_axis(
@@ -243,14 +240,14 @@ def mean(x, axis=None, keepdims=False):
)
_manager.add_event_pair(ht_e1, r_e)
else:
- tmp = dpt_ext.empty(
+ tmp = dpt.empty(
arr2.shape, dtype=res_dt, usm_type=res_usm_type, sycl_queue=q
)
ht_e_cpy, cpy_e = ti._copy_usm_ndarray_into_usm_ndarray(
src=arr2, dst=tmp, sycl_queue=q, depends=dep_evs
)
_manager.add_event_pair(ht_e_cpy, cpy_e)
- res = dpt_ext.empty(
+ res = dpt.empty(
res_shape, dtype=res_dt, usm_type=res_usm_type, sycl_queue=q
)
ht_e_red, r_e = tri._sum_over_axis(
@@ -265,7 +262,7 @@ def mean(x, axis=None, keepdims=False):
if keepdims:
res_shape = res_shape + (1,) * sum_nd
inv_perm = sorted(range(nd), key=lambda d: perm[d])
- res = dpt_ext.permute_dims(dpt_ext.reshape(res, res_shape), inv_perm)
+ res = dpt.permute_dims(dpt.reshape(res, res_shape), inv_perm)
dep_evs = _manager.submitted_events
ht_e2, div_e = tei._divide_by_scalar(
diff --git a/dpctl_ext/tensor/_stride_utils.pxi b/dpctl_ext/tensor/_stride_utils.pxi
new file mode 100644
index 000000000000..3caf8dd8fd1f
--- /dev/null
+++ b/dpctl_ext/tensor/_stride_utils.pxi
@@ -0,0 +1,314 @@
+# *****************************************************************************
+# Copyright (c) 2026, Intel Corporation
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+# - Redistributions of source code must retain the above copyright notice,
+# this list of conditions and the following disclaimer.
+# - Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+# - Neither the name of the copyright holder nor the names of its contributors
+# may be used to endorse or promote products derived from this software
+# without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+# THE POSSIBILITY OF SUCH DAMAGE.
+# *****************************************************************************
+
+# distutils: language = c++
+# cython: language_level=3
+
+from cpython.mem cimport PyMem_Malloc
+from cpython.ref cimport Py_INCREF
+from cpython.tuple cimport PyTuple_New, PyTuple_SetItem
+
+
+cdef int ERROR_MALLOC = 1
+cdef int ERROR_INTERNAL = -1
+cdef int ERROR_INCORRECT_ORDER = 2
+cdef int ERROR_UNEXPECTED_STRIDES = 3
+
+cdef int USM_ARRAY_C_CONTIGUOUS = 1
+cdef int USM_ARRAY_F_CONTIGUOUS = 2
+cdef int USM_ARRAY_WRITABLE = 4
+
+
+cdef Py_ssize_t shape_to_elem_count(int nd, Py_ssize_t *shape_arr):
+ """
+ Computes number of elements in an array.
+ """
+ cdef Py_ssize_t count = 1
+ for i in range(nd):
+ count *= shape_arr[i]
+ return count
+
+
+cdef int _from_input_shape_strides(
+ int nd, object shape, object strides, int itemsize, char order,
+ Py_ssize_t **shape_ptr, Py_ssize_t **strides_ptr,
+ Py_ssize_t *nelems, Py_ssize_t *min_disp, Py_ssize_t *max_disp,
+ int *contig
+):
+ """
+ Arguments: nd, shape, strides, itemsize, order
+ Modifies:
+ shape_ptr - pointer to C array for shape values
+ stride_ptr - pointer to C array for strides values
+ nelems - Number of elements in array
+ min_disp = min( dot(strides, index), index for shape)
+ max_disp = max( dor(strides, index), index for shape)
+ contig = enumeration for array contiguity
+ Returns: 0 on success, error code otherwise.
+ On success pointers point to allocated arrays,
+ Otherwise they are set to NULL
+ """
+ cdef int i
+ cdef int j
+ cdef bint all_incr = 1
+ cdef bint all_decr = 1
+ cdef bint strides_inspected = 0
+ cdef Py_ssize_t elem_count = 1
+ cdef Py_ssize_t min_shift = 0
+ cdef Py_ssize_t max_shift = 0
+ cdef Py_ssize_t str_i
+ cdef Py_ssize_t* shape_arr
+ cdef Py_ssize_t* strides_arr
+
+ if (int(order) not in [ord("C"), ord("F"), ord("c"), ord("f")]):
+ return ERROR_INCORRECT_ORDER
+
+ # 0-d array
+ if (nd == 0):
+ contig[0] = (USM_ARRAY_C_CONTIGUOUS | USM_ARRAY_F_CONTIGUOUS)
+ nelems[0] = 1
+ min_disp[0] = 0
+ max_disp[0] = 0
+ shape_ptr[0] = (0)
+ strides_ptr[0] = (0)
+ return 0
+
+ shape_arr = PyMem_Malloc(nd * sizeof(Py_ssize_t))
+ if (not shape_arr):
+ return ERROR_MALLOC
+ shape_ptr[0] = shape_arr
+ for i in range(0, nd):
+ shape_arr[i] = shape[i]
+ elem_count *= shape_arr[i]
+ if elem_count == 0:
+ contig[0] = (USM_ARRAY_C_CONTIGUOUS | USM_ARRAY_F_CONTIGUOUS)
+ nelems[0] = 1
+ min_disp[0] = 0
+ max_disp[0] = 0
+ if strides is None:
+ strides_ptr[0] = (0)
+ else:
+ strides_arr = PyMem_Malloc(nd * sizeof(Py_ssize_t))
+ if (not strides_arr):
+ PyMem_Free(shape_ptr[0])
+ shape_ptr[0] = (0)
+ return ERROR_MALLOC
+ strides_ptr[0] = strides_arr
+ for i in range(0, nd):
+ strides_arr[i] = strides[i]
+ return 0
+ nelems[0] = elem_count
+ if (strides is None):
+ # no need to allocate and populate strides
+ if order == ord("C") or order == ord("c"):
+ contig[0] = USM_ARRAY_C_CONTIGUOUS
+ else:
+ contig[0] = USM_ARRAY_F_CONTIGUOUS
+ if nd == 1:
+ contig[0] = USM_ARRAY_C_CONTIGUOUS | USM_ARRAY_F_CONTIGUOUS
+ else:
+ j = 0
+ for i in range(nd):
+ if shape_arr[i] > 1:
+ j = j + 1
+ if j < 2:
+ contig[0] = USM_ARRAY_C_CONTIGUOUS | USM_ARRAY_F_CONTIGUOUS
+ min_disp[0] = 0
+ max_disp[0] = (elem_count - 1)
+ strides_ptr[0] = (0)
+ return 0
+ elif ((isinstance(strides, (list, tuple)) or hasattr(strides, "tolist"))
+ and len(strides) == nd):
+ strides_arr = PyMem_Malloc(nd * sizeof(Py_ssize_t))
+ if (not strides_arr):
+ PyMem_Free(shape_ptr[0])
+ shape_ptr[0] = (0)
+ return ERROR_MALLOC
+ strides_ptr[0] = strides_arr
+ for i in range(0, nd):
+ str_i = strides[i]
+ strides_arr[i] = str_i
+ if str_i > 0:
+ max_shift += str_i * (shape_arr[i] - 1)
+ else:
+ min_shift += str_i * (shape_arr[i] - 1)
+ min_disp[0] = min_shift
+ max_disp[0] = max_shift
+ if max_shift == min_shift + (elem_count - 1):
+ if elem_count == 1:
+ contig[0] = (USM_ARRAY_C_CONTIGUOUS | USM_ARRAY_F_CONTIGUOUS)
+ return 0
+ if nd == 1:
+ if strides_arr[0] == 1:
+ contig[0] = USM_ARRAY_C_CONTIGUOUS | USM_ARRAY_F_CONTIGUOUS
+ else:
+ contig[0] = 0
+ return 0
+ i = 0
+ while i < nd:
+ if shape_arr[i] == 1:
+ i = i + 1
+ continue
+ j = i + 1
+ while (j < nd and shape_arr[j] == 1):
+ j = j + 1
+ if j < nd:
+ strides_inspected = 1
+ if all_incr:
+ all_incr = (
+ (strides_arr[i] > 0) and
+ (strides_arr[j] > 0) and
+ (strides_arr[i] <= strides_arr[j])
+ )
+ if all_decr:
+ all_decr = (
+ (strides_arr[i] > 0) and
+ (strides_arr[j] > 0) and
+ (strides_arr[i] >= strides_arr[j])
+ )
+ i = j
+ else:
+ if not strides_inspected:
+ # all dimensions have size 1 except
+ # dimension 'i'. Array is both C and F
+ # contiguous
+ strides_inspected = 1
+ all_incr = (strides_arr[i] == 1)
+ all_decr = all_incr
+ break
+ # should only set contig flags on actually obtained
+ # values, rather than default values
+ all_incr = all_incr and strides_inspected
+ all_decr = all_decr and strides_inspected
+ if all_incr and all_decr:
+ contig[0] = (USM_ARRAY_C_CONTIGUOUS | USM_ARRAY_F_CONTIGUOUS)
+ elif all_incr:
+ contig[0] = USM_ARRAY_F_CONTIGUOUS
+ elif all_decr:
+ contig[0] = USM_ARRAY_C_CONTIGUOUS
+ else:
+ contig[0] = 0
+ return 0
+ else:
+ contig[0] = 0 # non-contiguous
+ return 0
+ else:
+ PyMem_Free(shape_ptr[0])
+ shape_ptr[0] = (0)
+ return ERROR_UNEXPECTED_STRIDES
+ # return ERROR_INTERNAL
+
+
+cdef object _make_int_tuple(int nd, const Py_ssize_t *ary):
+ """
+ Makes Python tuple from C array
+ """
+ cdef tuple res
+ cdef object tmp
+ if (ary):
+ res = PyTuple_New(nd)
+ for i in range(nd):
+ tmp = ary[i]
+ Py_INCREF(tmp) # SetItem steals the reference
+ PyTuple_SetItem(res, i, tmp)
+ return res
+ else:
+ return None
+
+
+cdef object _make_reversed_int_tuple(int nd, const Py_ssize_t *ary):
+ """
+ Makes Python reversed tuple from C array
+ """
+ cdef tuple res
+ cdef object tmp
+ cdef int i
+ cdef int nd_1
+ if (ary):
+ res = PyTuple_New(nd)
+ nd_1 = nd - 1
+ for i in range(nd):
+ tmp = ary[i]
+ Py_INCREF(tmp) # SetItem steals the reference
+ PyTuple_SetItem(res, nd_1 - i, tmp)
+ return res
+ else:
+ return None
+
+
+cdef object _c_contig_strides(int nd, Py_ssize_t *shape):
+ """
+ Makes Python tuple for strides of C-contiguous array
+ """
+ cdef tuple cc_strides = PyTuple_New(nd)
+ cdef object si = 1
+ cdef int i
+ cdef int nd_1 = nd - 1
+ for i in range(0, nd):
+ Py_INCREF(si) # SetItem steals the reference
+ PyTuple_SetItem(cc_strides, nd_1 - i, si)
+ si = si * shape[nd_1 - i]
+ return cc_strides
+
+
+cdef object _f_contig_strides(int nd, Py_ssize_t *shape):
+ """
+ Makes Python tuple for strides of F-contiguous array
+ """
+ cdef tuple fc_strides = PyTuple_New(nd)
+ cdef object si = 1
+ for i in range(0, nd):
+ Py_INCREF(si) # SetItem steals the reference
+ PyTuple_SetItem(fc_strides, i, si)
+ si = si * shape[i]
+ return fc_strides
+
+cdef object _swap_last_two(tuple t):
+ """
+ Swap last two elements of a tuple
+ """
+ cdef int nd = len(t)
+ cdef tuple res
+ cdef int i
+ cdef object tmp
+ if (nd < 2):
+ return t
+ res = PyTuple_New(nd)
+ # copy all elements except the last two
+ for i in range(0, nd-2):
+ tmp = t[i]
+ Py_INCREF(tmp) # SetItem steals the reference
+ PyTuple_SetItem(res, i, tmp)
+ # swap the last two elements
+ tmp = t[nd-1]
+ Py_INCREF(tmp) # SetItem steals
+ PyTuple_SetItem(res, nd - 2, tmp)
+ tmp = t[nd-2]
+ Py_INCREF(tmp) # SetItem steals
+ PyTuple_SetItem(res, nd - 1, tmp)
+ return res
diff --git a/dpctl_ext/tensor/_testing.py b/dpctl_ext/tensor/_testing.py
index 5c7e9be0e2e3..4c9f5ebac9a4 100644
--- a/dpctl_ext/tensor/_testing.py
+++ b/dpctl_ext/tensor/_testing.py
@@ -26,13 +26,12 @@
# THE POSSIBILITY OF SUCH DAMAGE.
# *****************************************************************************
-import dpctl.tensor as dpt
import dpctl.utils as du
import numpy as np
# TODO: revert to `import dpctl.tensor...`
# when dpnp fully migrates dpctl/tensor
-import dpctl_ext.tensor as dpt_ext
+import dpctl_ext.tensor as dpt
from ._manipulation_functions import _broadcast_shape_impl
from ._type_utils import _to_device_supported_dtype
@@ -44,82 +43,74 @@ def _allclose_complex_fp(z1, z2, atol, rtol, equal_nan):
z2r = dpt.real(z2)
z2i = dpt.imag(z2)
if equal_nan:
- check1 = dpt_ext.all(
- dpt_ext.isnan(z1r) == dpt_ext.isnan(z2r)
- ) and dpt_ext.all(dpt_ext.isnan(z1i) == dpt_ext.isnan(z2i))
+ check1 = dpt.all(dpt.isnan(z1r) == dpt.isnan(z2r)) and dpt.all(
+ dpt.isnan(z1i) == dpt.isnan(z2i)
+ )
else:
check1 = (
- dpt_ext.logical_not(dpt_ext.any(dpt_ext.isnan(z1r)))
- and dpt_ext.logical_not(dpt_ext.any(dpt_ext.isnan(z1i)))
+ dpt.logical_not(dpt.any(dpt.isnan(z1r)))
+ and dpt.logical_not(dpt.any(dpt.isnan(z1i)))
) and (
- dpt_ext.logical_not(dpt_ext.any(dpt_ext.isnan(z2r)))
- and dpt_ext.logical_not(dpt_ext.any(dpt_ext.isnan(z2i)))
+ dpt.logical_not(dpt.any(dpt.isnan(z2r)))
+ and dpt.logical_not(dpt.any(dpt.isnan(z2i)))
)
if not check1:
return check1
- mr = dpt_ext.isinf(z1r)
- mi = dpt_ext.isinf(z1i)
- check2 = dpt_ext.all(mr == dpt_ext.isinf(z2r)) and dpt_ext.all(
- mi == dpt_ext.isinf(z2i)
- )
+ mr = dpt.isinf(z1r)
+ mi = dpt.isinf(z1i)
+ check2 = dpt.all(mr == dpt.isinf(z2r)) and dpt.all(mi == dpt.isinf(z2i))
if not check2:
return check2
- check3 = dpt_ext.all(z1r[mr] == z2r[mr]) and dpt_ext.all(z1i[mi] == z2i[mi])
+ check3 = dpt.all(z1r[mr] == z2r[mr]) and dpt.all(z1i[mi] == z2i[mi])
if not check3:
return check3
- mr = dpt_ext.isfinite(z1r)
- mi = dpt_ext.isfinite(z1i)
+ mr = dpt.isfinite(z1r)
+ mi = dpt.isfinite(z1i)
mv1 = z1r[mr]
mv2 = z2r[mr]
- check4 = dpt_ext.all(
- dpt_ext.abs(mv1 - mv2)
- < dpt_ext.maximum(
- atol, rtol * dpt_ext.maximum(dpt_ext.abs(mv1), dpt_ext.abs(mv2))
- )
+ check4 = dpt.all(
+ dpt.abs(mv1 - mv2)
+ < dpt.maximum(atol, rtol * dpt.maximum(dpt.abs(mv1), dpt.abs(mv2)))
)
if not check4:
return check4
mv1 = z1i[mi]
mv2 = z2i[mi]
- check5 = dpt_ext.all(
- dpt_ext.abs(mv1 - mv2)
- <= dpt_ext.maximum(
- atol, rtol * dpt_ext.maximum(dpt_ext.abs(mv1), dpt_ext.abs(mv2))
- )
+ check5 = dpt.all(
+ dpt.abs(mv1 - mv2)
+ <= dpt.maximum(atol, rtol * dpt.maximum(dpt.abs(mv1), dpt.abs(mv2)))
)
return check5
def _allclose_real_fp(r1, r2, atol, rtol, equal_nan):
if equal_nan:
- check1 = dpt_ext.all(dpt_ext.isnan(r1) == dpt_ext.isnan(r2))
+ check1 = dpt.all(dpt.isnan(r1) == dpt.isnan(r2))
else:
- check1 = dpt_ext.logical_not(
- dpt_ext.any(dpt_ext.isnan(r1))
- ) and dpt_ext.logical_not(dpt_ext.any(dpt_ext.isnan(r2)))
+ check1 = dpt.logical_not(dpt.any(dpt.isnan(r1))) and dpt.logical_not(
+ dpt.any(dpt.isnan(r2))
+ )
if not check1:
return check1
- mr = dpt_ext.isinf(r1)
- check2 = dpt_ext.all(mr == dpt_ext.isinf(r2))
+ mr = dpt.isinf(r1)
+ check2 = dpt.all(mr == dpt.isinf(r2))
if not check2:
return check2
- check3 = dpt_ext.all(r1[mr] == r2[mr])
+ check3 = dpt.all(r1[mr] == r2[mr])
if not check3:
return check3
- m = dpt_ext.isfinite(r1)
+ m = dpt.isfinite(r1)
mv1 = r1[m]
mv2 = r2[m]
- check4 = dpt_ext.all(
- dpt_ext.abs(mv1 - mv2)
- <= dpt_ext.maximum(
- atol, rtol * dpt_ext.maximum(dpt_ext.abs(mv1), dpt_ext.abs(mv2))
- )
+ check4 = dpt.all(
+ dpt.abs(mv1 - mv2)
+ <= dpt.maximum(atol, rtol * dpt.maximum(dpt.abs(mv1), dpt.abs(mv2)))
)
return check4
def _allclose_others(r1, r2):
- return dpt_ext.all(r1 == r2)
+ return dpt.all(r1 == r2)
def allclose(a1, a2, atol=1e-8, rtol=1e-5, equal_nan=False):
@@ -160,11 +151,11 @@ def allclose(a1, a2, atol=1e-8, rtol=1e-5, equal_nan=False):
else:
res_dt = np.promote_types(b1.dtype, b2.dtype)
res_dt = _to_device_supported_dtype(res_dt, exec_q.sycl_device)
- b1 = dpt_ext.astype(b1, res_dt)
- b2 = dpt_ext.astype(b2, res_dt)
+ b1 = dpt.astype(b1, res_dt)
+ b2 = dpt.astype(b2, res_dt)
- b1 = dpt_ext.broadcast_to(b1, res_sh)
- b2 = dpt_ext.broadcast_to(b2, res_sh)
+ b1 = dpt.broadcast_to(b1, res_sh)
+ b2 = dpt.broadcast_to(b2, res_sh)
k = b1.dtype.kind
if k == "c":
diff --git a/dpctl_ext/tensor/_type_utils.py b/dpctl_ext/tensor/_type_utils.py
index 1e386e15dfa3..8c15053cb4c1 100644
--- a/dpctl_ext/tensor/_type_utils.py
+++ b/dpctl_ext/tensor/_type_utils.py
@@ -28,12 +28,11 @@
from __future__ import annotations
-import dpctl.tensor as dpt
import numpy as np
# TODO: revert to `import dpctl.tensor...`
# when dpnp fully migrates dpctl/tensor
-import dpctl_ext.tensor as dpt_ext
+import dpctl_ext.tensor as dpt
import dpctl_ext.tensor._tensor_impl as ti
@@ -450,7 +449,7 @@ def _resolve_weak_types_all_py_ints(o1_dtype, o2_dtype, dev):
o1_dtype, WeakIntegralType
):
o1_val = o1_dtype.get()
- o2_iinfo = dpt_ext.iinfo(o2_dtype)
+ o2_iinfo = dpt.iinfo(o2_dtype)
if (o1_val < o2_iinfo.min) or (o1_val > o2_iinfo.max):
return dpt.dtype(np.min_scalar_type(o1_val)), o2_dtype
return o2_dtype, o2_dtype
@@ -473,7 +472,7 @@ def _resolve_weak_types_all_py_ints(o1_dtype, o2_dtype, dev):
o2_dtype, WeakIntegralType
):
o2_val = o2_dtype.get()
- o1_iinfo = dpt_ext.iinfo(o1_dtype)
+ o1_iinfo = dpt.iinfo(o1_dtype)
if (o2_val < o1_iinfo.min) or (o2_val > o1_iinfo.max):
return o1_dtype, dpt.dtype(np.min_scalar_type(o2_val))
return o1_dtype, o1_dtype
@@ -936,8 +935,8 @@ def _default_accumulation_dtype(inp_dt, q):
res_dt = inp_dt
elif inp_kind in "u":
res_dt = dpt.dtype(ti.default_device_uint_type(q))
- res_ii = dpt_ext.iinfo(res_dt)
- inp_ii = dpt_ext.iinfo(inp_dt)
+ res_ii = dpt.iinfo(res_dt)
+ inp_ii = dpt.iinfo(inp_dt)
if inp_ii.min >= res_ii.min and inp_ii.max <= res_ii.max:
pass
else:
@@ -956,7 +955,7 @@ def _default_accumulation_dtype_fp_types(inp_dt, q):
inp_kind = inp_dt.kind
if inp_kind in "biu":
res_dt = dpt.dtype(ti.default_device_fp_type(q))
- can_cast_v = dpt_ext.can_cast(inp_dt, res_dt)
+ can_cast_v = dpt.can_cast(inp_dt, res_dt)
if not can_cast_v:
_fp64 = q.sycl_device.has_aspect_fp64
res_dt = dpt.float64 if _fp64 else dpt.float32
diff --git a/dpctl_ext/tensor/_types.pxi b/dpctl_ext/tensor/_types.pxi
new file mode 100644
index 000000000000..090750658f4b
--- /dev/null
+++ b/dpctl_ext/tensor/_types.pxi
@@ -0,0 +1,169 @@
+# *****************************************************************************
+# Copyright (c) 2026, Intel Corporation
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+# - Redistributions of source code must retain the above copyright notice,
+# this list of conditions and the following disclaimer.
+# - Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+# - Neither the name of the copyright holder nor the names of its contributors
+# may be used to endorse or promote products derived from this software
+# without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+# THE POSSIBILITY OF SUCH DAMAGE.
+# *****************************************************************************
+
+# these typenum values are aligned to values in NumPy
+cdef:
+ int UAR_BOOL = 0 # pragma: no cover
+ int UAR_BYTE = 1 # pragma: no cover
+ int UAR_UBYTE = 2 # pragma: no cover
+ int UAR_SHORT = 3 # pragma: no cover
+ int UAR_USHORT = 4 # pragma: no cover
+ int UAR_INT = 5 # pragma: no cover
+ int UAR_UINT = 6 # pragma: no cover
+ int UAR_LONG = 7 # pragma: no cover
+ int UAR_ULONG = 8 # pragma: no cover
+ int UAR_LONGLONG = 9 # pragma: no cover
+ int UAR_ULONGLONG = 10 # pragma: no cover
+ int UAR_FLOAT = 11 # pragma: no cover
+ int UAR_DOUBLE = 12 # pragma: no cover
+ int UAR_CFLOAT = 14 # pragma: no cover
+ int UAR_CDOUBLE = 15 # pragma: no cover
+ int UAR_TYPE_SENTINEL = 17 # pragma: no cover
+ int UAR_HALF = 23 # pragma: no cover
+
+cdef int type_bytesize(int typenum):
+ """
+ NPY_BOOL=0 : 1
+ NPY_BYTE=1 : 1
+ NPY_UBYTE=2 : 1
+ NPY_SHORT=3 : 2
+ NPY_USHORT=4 : 2
+ NPY_INT=5 : sizeof(int)
+ NPY_UINT=6 : sizeof(unsigned int)
+ NPY_LONG=7 : sizeof(long)
+ NPY_ULONG=8 : sizeof(unsigned long)
+ NPY_LONGLONG=9 : 8
+ NPY_ULONGLONG=10 : 8
+ NPY_FLOAT=11 : 4
+ NPY_DOUBLE=12 : 8
+ NPY_LONGDOUBLE=13 : N/A
+ NPY_CFLOAT=14 : 8
+ NPY_CDOUBLE=15 : 16
+ NPY_CLONGDOUBLE=16 : N/A
+ NPY_HALF=23 : 2
+ """
+ cdef int *type_to_bytesize = [
+ 1,
+ sizeof(char),
+ sizeof(unsigned char),
+ sizeof(short),
+ sizeof(unsigned short),
+ sizeof(int),
+ sizeof(unsigned int),
+ sizeof(long),
+ sizeof(unsigned long),
+ sizeof(long long),
+ sizeof(unsigned long long),
+ sizeof(float),
+ sizeof(double), -1,
+ sizeof(float complex),
+ sizeof(double complex), -1]
+
+ if typenum < 0: # pragma: no cover
+ return -1
+ if typenum > 16:
+ if typenum == 23:
+ return 2
+ return -1
+
+ return type_to_bytesize[typenum]
+
+
+cdef str _make_typestr(int typenum):
+ """
+ Make typestring from type number
+ """
+ cdef type_to_str = ["|b", "|i", "|u", "|i", "|u",
+ "|i", "|u", "|i", "|u", "|i", "|u",
+ "|f", "|f", "", "|c", "|c", ""]
+
+ if (typenum < 0): # pragma: no cover
+ return ""
+ if (typenum > 16):
+ if (typenum == 23):
+ return "|f2"
+ return "" # pragma: no cover
+
+ return type_to_str[typenum] + str(type_bytesize(typenum))
+
+
+cdef int typenum_from_format(str s):
+ """
+ Internal utility to convert string describing type format
+
+ Format is [<|=>][biufc]#
+ Shortcuts for formats are i, u, d, D
+ """
+ if not s:
+ return -1
+ try:
+ dt = np.dtype(s)
+ except Exception:
+ return -1
+ if (dt.byteorder == ">"):
+ return -2
+ return dt.num
+
+
+cdef int descr_to_typenum(object dtype):
+ """
+ Returns typenum for argumentd dtype that has attribute descr,
+ assumed numpy.dtype
+ """
+ obj = getattr(dtype, "descr")
+ if (not isinstance(obj, list) or len(obj) != 1):
+ return -1 # token for ValueError
+ obj = obj[0]
+ if (
+ not isinstance(obj, tuple) or len(obj) != 2 or obj[0]
+ ): # pragma: no cover
+ return -1
+ obj = obj[1]
+ if not isinstance(obj, str): # pragma: no cover
+ return -1
+ return typenum_from_format(obj)
+
+
+cdef int dtype_to_typenum(dtype):
+ if isinstance(dtype, str):
+ return typenum_from_format(dtype)
+ elif isinstance(dtype, bytes):
+ return typenum_from_format(dtype.decode("UTF-8"))
+ elif hasattr(dtype, "descr"):
+ return descr_to_typenum(dtype)
+ else:
+ try:
+ dt = np.dtype(dtype)
+ except TypeError:
+ return -3
+ except Exception: # pragma: no cover
+ return -1
+ if hasattr(dt, "descr"):
+ return descr_to_typenum(dt)
+ else: # pragma: no cover
+ return -3 # token for TypeError
diff --git a/dpctl_ext/tensor/_usmarray.pxd b/dpctl_ext/tensor/_usmarray.pxd
new file mode 100644
index 000000000000..ccb8f4c796b7
--- /dev/null
+++ b/dpctl_ext/tensor/_usmarray.pxd
@@ -0,0 +1,88 @@
+# *****************************************************************************
+# Copyright (c) 2026, Intel Corporation
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+# - Redistributions of source code must retain the above copyright notice,
+# this list of conditions and the following disclaimer.
+# - Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+# - Neither the name of the copyright holder nor the names of its contributors
+# may be used to endorse or promote products derived from this software
+# without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+# THE POSSIBILITY OF SUCH DAMAGE.
+# *****************************************************************************
+
+# distutils: language = c++
+# cython: language_level=3
+
+cimport dpctl
+
+
+cdef public api int USM_ARRAY_C_CONTIGUOUS
+cdef public api int USM_ARRAY_F_CONTIGUOUS
+cdef public api int USM_ARRAY_WRITABLE
+
+cdef public api int UAR_BOOL
+cdef public api int UAR_BYTE
+cdef public api int UAR_UBYTE
+cdef public api int UAR_SHORT
+cdef public api int UAR_USHORT
+cdef public api int UAR_INT
+cdef public api int UAR_UINT
+cdef public api int UAR_LONG
+cdef public api int UAR_ULONG
+cdef public api int UAR_LONGLONG
+cdef public api int UAR_ULONGLONG
+cdef public api int UAR_FLOAT
+cdef public api int UAR_DOUBLE
+cdef public api int UAR_CFLOAT
+cdef public api int UAR_CDOUBLE
+cdef public api int UAR_TYPE_SENTINEL
+cdef public api int UAR_HALF
+
+
+cdef api class usm_ndarray [object PyUSMArrayObject, type PyUSMArrayType]:
+ # data fields
+ cdef char* data_
+ cdef int nd_
+ cdef Py_ssize_t *shape_
+ cdef Py_ssize_t *strides_
+ cdef int typenum_
+ cdef int flags_
+ cdef object base_
+ cdef object array_namespace_
+ # make usm_ndarray weak-referenceable
+ cdef object __weakref__
+
+ cdef void _reset(usm_ndarray self)
+ cdef void _cleanup(usm_ndarray self)
+ cdef Py_ssize_t get_offset(usm_ndarray self) except *
+
+ cdef char* get_data(self)
+ cdef int get_ndim(self)
+ cdef Py_ssize_t * get_shape(self)
+ cdef Py_ssize_t * get_strides(self)
+ cdef int get_typenum(self)
+ cdef int get_itemsize(self)
+ cdef int get_flags(self)
+ cdef object get_base(self)
+ cdef dpctl.DPCTLSyclQueueRef get_queue_ref(self) except *
+ cdef dpctl.SyclQueue get_sycl_queue(self)
+
+ cdef _set_writable_flag(self, int)
+
+ cdef __cythonbufferdefaults__ = {"mode": "strided"}
diff --git a/dpctl_ext/tensor/_usmarray.pyx b/dpctl_ext/tensor/_usmarray.pyx
new file mode 100644
index 000000000000..f5bca9b1635d
--- /dev/null
+++ b/dpctl_ext/tensor/_usmarray.pyx
@@ -0,0 +1,1986 @@
+# *****************************************************************************
+# Copyright (c) 2026, Intel Corporation
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+# - Redistributions of source code must retain the above copyright notice,
+# this list of conditions and the following disclaimer.
+# - Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+# - Neither the name of the copyright holder nor the names of its contributors
+# may be used to endorse or promote products derived from this software
+# without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+# THE POSSIBILITY OF SUCH DAMAGE.
+# *****************************************************************************
+
+# distutils: language = c++
+# cython: language_level=3
+# cython: linetrace=True
+
+import dpctl
+import dpctl.memory as dpmem
+import numpy as np
+
+from dpctl._backend cimport DPCTLSyclUSMRef
+from dpctl._sycl_device_factory cimport _cached_default_device
+
+# TODO: remote it when dpnp fully migrates dpctl/tensor
+import dpctl_ext
+
+from ._data_types import bool as dpt_bool
+from ._device import Device
+from ._print import usm_ndarray_repr, usm_ndarray_str
+
+cimport dpctl as c_dpctl
+cimport dpctl.memory as c_dpmem
+from cpython.mem cimport PyMem_Free
+from cpython.tuple cimport PyTuple_New, PyTuple_SetItem
+
+from . cimport _dlpack as c_dlpack
+
+from enum import IntEnum
+
+from . import _flags
+from ._dlpack import get_build_dlpack_version
+from ._tensor_impl import default_device_fp_type
+
+include "_stride_utils.pxi"
+include "_types.pxi"
+include "_slicing.pxi"
+
+
+class DLDeviceType(IntEnum):
+ """
+ An :class:`enum.IntEnum` for the types of DLDevices supported by the DLPack
+ protocol.
+
+ ``kDLCPU``:
+ CPU (host) device
+ ``kDLCUDA``:
+ CUDA GPU device
+ ``kDLCUDAHost``:
+ Pinned CUDA CPU memory by cudaMallocHost
+ ``kDLOpenCL``:
+ OpenCL device
+ ``kDLVulkan``:
+ Vulkan buffer
+ ``kDLMetal``:
+ Metal for Apple GPU
+ ``kDLVPI``:
+ Verilog simulator buffer
+ ``kDLROCM``:
+ ROCm GPU device
+ ``kDLROCMHost``:
+ Pinned ROCm CPU memory allocated by hipMallocHost
+ ``kDLExtDev``:
+ Reserved extension device type used to test new devices
+ ``kDLCUDAManaged``:
+ CUDA managed/unified memory allocated by cudaMallocManaged
+ ``kDLOneAPI``:
+ Unified shared memory allocated on a oneAPI non-partitioned device
+ ``kDLWebGPU``:
+ Device support for WebGPU standard
+ ``kDLHexagon``:
+ Qualcomm Hexagon DSP
+ ``kDLMAIA``:
+ Microsoft MAIA device
+ ``kDLTrn``:
+ AWS Trainium device
+ """
+ kDLCPU = c_dlpack.device_CPU
+ kDLCUDA = c_dlpack.device_CUDA
+ kDLCUDAHost = c_dlpack.device_CUDAHost
+ kDLCUDAManaged = c_dlpack.device_CUDAManaged
+ kDLROCM = c_dlpack.device_DLROCM
+ kDLROCMHost = c_dlpack.device_ROCMHost
+ kDLOpenCL = c_dlpack.device_OpenCL
+ kDLVulkan = c_dlpack.device_Vulkan
+ kDLMetal = c_dlpack.device_Metal
+ kDLVPI = c_dlpack.device_VPI
+ kDLOneAPI = c_dlpack.device_OneAPI
+ kDLWebGPU = c_dlpack.device_WebGPU
+ kDLHexagon = c_dlpack.device_Hexagon
+ kDLMAIA = c_dlpack.device_MAIA
+ kDLTrn = c_dlpack.device_Trn
+
+
+cdef class InternalUSMArrayError(Exception):
+ """
+ An InternalUSMArrayError exception is raised when internal
+ inconsistency has been detected in :class:`.usm_ndarray`.
+ """
+ pass
+
+
+cdef object _as_zero_dim_ndarray(object usm_ary):
+ "Convert size-1 array to NumPy 0d array"
+ mem_view = dpmem.as_usm_memory(usm_ary)
+ usm_ary.sycl_queue.wait()
+ host_buf = mem_view.copy_to_host()
+ view = host_buf.view(usm_ary.dtype)
+ view.shape = tuple()
+ return view
+
+
+cdef inline void _check_0d_scalar_conversion(object usm_ary) except *:
+ "Raise TypeError if array cannot be converted to a Python scalar"
+ if (usm_ary.ndim != 0):
+ raise TypeError(
+ "only 0-dimensional arrays can be converted to Python scalars"
+ )
+
+
+cdef int _copy_writable(int lhs_flags, int rhs_flags):
+ "Copy the WRITABLE flag to lhs_flags from rhs_flags"
+ return (lhs_flags & ~USM_ARRAY_WRITABLE) | (rhs_flags & USM_ARRAY_WRITABLE)
+
+
+cdef bint _is_host_cpu(object dl_device):
+ "Check if dl_device denotes (kDLCPU, 0)"
+ cdef object dl_type
+ cdef object dl_id
+ cdef Py_ssize_t n_elems = -1
+
+ try:
+ n_elems = len(dl_device)
+ except TypeError:
+ pass
+
+ if n_elems != 2:
+ return False
+
+ dl_type = dl_device[0]
+ dl_id = dl_device[1]
+ if isinstance(dl_type, str):
+ return (dl_type == "kDLCPU" and dl_id == 0)
+
+ return (dl_type == DLDeviceType.kDLCPU) and (dl_id == 0)
+
+
+cdef void _validate_and_use_stream(
+ object stream, c_dpctl.SyclQueue self_queue
+) except *:
+ if (stream is None or stream == self_queue):
+ pass
+ else:
+ if not isinstance(stream, dpctl.SyclQueue):
+ raise TypeError(
+ "stream argument type was expected to be dpctl.SyclQueue,"
+ f" got {type(stream)} instead"
+ )
+ ev = self_queue.submit_barrier()
+ stream.submit_barrier(dependent_events=[ev])
+
+cdef class usm_ndarray:
+ """ usm_ndarray(shape, dtype=None, strides=None, buffer="device", \
+ offset=0, order="C", buffer_ctor_kwargs=dict(), \
+ array_namespace=None)
+
+ An array object represents a multidimensional tensor of numeric
+ elements stored in a USM allocation on a SYCL device.
+
+ Arg:
+ shape (int, tuple):
+ Shape of the array to be created.
+ dtype (str, dtype):
+ Array data type, i.e. the type of array elements.
+ If ``dtype`` has the value ``None``, it is determined by default
+ floating point type supported by target device.
+ The supported types are
+
+ ``bool``:
+ boolean type
+ ``int8``, ``int16``, ``int32``, ``int64``:
+ signed integer types
+ ``uint8``, ``uint16``, ``uint32``, ``uint64``:
+ unsigned integer types
+ ``float16``:
+ half-precision floating type,
+ supported if target device's property
+ ``has_aspect_fp16`` is ``True``
+ ``float32``, ``complex64``:
+ single-precision real and complex floating types
+ ``float64``, ``complex128``:
+ double-precision real and complex floating
+ types, supported if target device's property
+ ``has_aspect_fp64`` is ``True``.
+
+ Default: ``None``.
+ strides (tuple, optional):
+ Strides of the array to be created in elements.
+ If ``strides`` has the value ``None``, it is determined by the
+ ``shape`` of the array and the requested ``order``.
+ Default: ``None``.
+ buffer (str, object, optional):
+ A string corresponding to the type of USM allocation to make,
+ or a Python object representing a USM memory allocation, i.e.
+ :class:`dpctl.memory.MemoryUSMDevice`,
+ :class:`dpctl.memory.MemoryUSMShared`, or
+ :class:`dpctl.memory.MemoryUSMHost`. Recognized strings are
+ ``"device"``, ``"shared"``, or ``"host"``. Additional arguments to
+ the USM memory allocators can be passed in a dictionary specified
+ via ``buffer_ctor_kwrds`` keyword parameter.
+ Default: ``"device"``.
+ offset (int, optional):
+ Offset of the array element with all zero indexes relative to the
+ start of the provided `buffer` in elements. The argument is ignored
+ if the ``buffer`` value is a string and the memory is allocated by
+ the constructor. Default: ``0``.
+ order ({"C", "F"}, optional):
+ The memory layout of the array when constructing using a new
+ allocation. Value ``"C"`` corresponds to C-contiguous, or row-major
+ memory layout, while value ``"F"`` corresponds to F-contiguous, or
+ column-major layout. Default: ``"C"``.
+ buffer_ctor_kwargs (dict, optional):
+ Dictionary with keyword parameters to use when creating a new USM
+ memory allocation. See :class:`dpctl.memory.MemoryUSMShared` for
+ supported keyword arguments.
+ array_namespace (module, optional):
+ Array namespace module associated with this array.
+ Default: ``None``.
+
+ ``buffer`` can be ``"shared"``, ``"host"``, ``"device"`` to allocate
+ new device memory by calling respective constructor with
+ the specified ``buffer_ctor_kwrds``; ``buffer`` can be an
+ instance of :class:`dpctl.memory.MemoryUSMShared`,
+ :class:`dpctl.memory.MemoryUSMDevice`, or
+ :class:`dpctl.memory.MemoryUSMHost`; ``buffer`` can also be
+ another :class:`dpctl.tensor.usm_ndarray` instance, in which case its
+ underlying ``MemoryUSM*`` buffer is used.
+ """
+
+ cdef void _reset(usm_ndarray self):
+ """
+ Initializes member fields
+ """
+ self.base_ = None
+ self.array_namespace_ = None
+ self.nd_ = -1
+ self.data_ = 0
+ self.shape_ = 0
+ self.strides_ = 0
+ self.flags_ = 0
+
+ cdef void _cleanup(usm_ndarray self):
+ if (self.shape_):
+ PyMem_Free(self.shape_)
+ if (self.strides_):
+ PyMem_Free(self.strides_)
+ self._reset()
+
+ def __cinit__(self, shape, dtype=None, strides=None, buffer="device",
+ Py_ssize_t offset=0, order="C",
+ buffer_ctor_kwargs=dict(),
+ array_namespace=None):
+ """
+ strides and offset must be given in units of array elements.
+ buffer can be strings ('device'|'shared'|'host' to allocate new memory)
+ or ``dpctl.memory.MemoryUSM*`` buffers, or ``usm_ndarray`` instances.
+ """
+ cdef int nd = 0
+ cdef int typenum = 0
+ cdef int itemsize = 0
+ cdef int err = 0
+ cdef int contig_flag = 0
+ cdef int writable_flag = USM_ARRAY_WRITABLE
+ cdef Py_ssize_t *shape_ptr = NULL
+ cdef Py_ssize_t ary_nelems = 0
+ cdef Py_ssize_t ary_nbytes = 0
+ cdef Py_ssize_t *strides_ptr = NULL
+ cdef Py_ssize_t _offset = offset
+ cdef Py_ssize_t ary_min_displacement = 0
+ cdef Py_ssize_t ary_max_displacement = 0
+ cdef bint is_fp64 = False
+ cdef bint is_fp16 = False
+
+ self._reset()
+ if not isinstance(shape, (list, tuple)):
+ if hasattr(shape, "tolist"):
+ fn = getattr(shape, "tolist")
+ if callable(fn):
+ shape = shape.tolist()
+ if not isinstance(shape, (list, tuple)):
+ try:
+ shape
+ shape = [shape, ]
+ except Exception as e:
+ raise TypeError(
+ "Argument shape must a non-negative integer, "
+ "or a list/tuple of such integers."
+ ) from e
+ nd = len(shape)
+ if dtype is None:
+ if isinstance(buffer, (dpmem._memory._Memory, usm_ndarray)):
+ q = buffer.sycl_queue
+ else:
+ q = buffer_ctor_kwargs.get("queue")
+ if q is not None:
+ dtype = default_device_fp_type(q)
+ else:
+ dev = _cached_default_device()
+ dtype = "f8" if dev.has_aspect_fp64 else "f4"
+ typenum = dtype_to_typenum(dtype)
+ if (typenum < 0):
+ if typenum == -2:
+ raise ValueError(
+ "Data type '" + str(dtype) +
+ "' can only have native byteorder."
+ )
+ elif typenum == -1:
+ raise ValueError(
+ "Data type '" + str(dtype) + "' is not understood."
+ )
+ raise TypeError(
+ f"Expected string or a dtype object, got {type(dtype)}"
+ )
+ itemsize = type_bytesize(typenum)
+ if (itemsize < 1):
+ raise TypeError(
+ "dtype=" + np.dtype(dtype).name + " is not supported."
+ )
+ # allocate host C-arrays for shape, strides
+ err = _from_input_shape_strides(
+ nd, shape, strides, itemsize, ord(order),
+ &shape_ptr, &strides_ptr, &ary_nelems,
+ &ary_min_displacement, &ary_max_displacement, &contig_flag
+ )
+ if (err):
+ self._cleanup()
+ if err == ERROR_MALLOC:
+ raise MemoryError("Memory allocation for shape/strides "
+ "array failed.")
+ elif err == ERROR_INCORRECT_ORDER:
+ raise ValueError(
+ "Unsupported order='{}' given. "
+ "Supported values are 'C' or 'F'.".format(order))
+ elif err == ERROR_UNEXPECTED_STRIDES:
+ raise ValueError(
+ "strides={} is not understood".format(strides))
+ else:
+ raise InternalUSMArrayError(
+ " .. while processing shape and strides.")
+ ary_nbytes = (ary_max_displacement -
+ ary_min_displacement + 1) * itemsize
+ if isinstance(buffer, dpmem._memory._Memory):
+ _buffer = buffer
+ elif isinstance(buffer, (str, bytes)):
+ if isinstance(buffer, bytes):
+ buffer = buffer.decode("UTF-8")
+ _offset = -ary_min_displacement
+ if (buffer == "shared"):
+ _buffer = dpmem.MemoryUSMShared(ary_nbytes,
+ **buffer_ctor_kwargs)
+ elif (buffer == "device"):
+ _buffer = dpmem.MemoryUSMDevice(ary_nbytes,
+ **buffer_ctor_kwargs)
+ elif (buffer == "host"):
+ _buffer = dpmem.MemoryUSMHost(ary_nbytes,
+ **buffer_ctor_kwargs)
+ else:
+ self._cleanup()
+ raise ValueError(
+ "buffer='{}' is not understood. "
+ "Recognized values are 'device', 'shared', 'host', "
+ "an instance of `MemoryUSM*` object, or a usm_ndarray"
+ "".format(buffer)
+ )
+ elif isinstance(buffer, usm_ndarray):
+ if not buffer.flags.writable:
+ writable_flag = 0
+ _buffer = buffer.usm_data
+ else:
+ self._cleanup()
+ raise ValueError("buffer='{}' was not understood.".format(buffer))
+ if (shape_to_elem_count(nd, shape_ptr) > 0 and
+ (_offset + ary_min_displacement < 0 or
+ (_offset + ary_max_displacement + 1) * itemsize > _buffer.nbytes)):
+ self._cleanup()
+ raise ValueError(("buffer='{}' can not accommodate "
+ "the requested array.").format(buffer))
+ is_fp64 = (typenum == UAR_DOUBLE or typenum == UAR_CDOUBLE)
+ is_fp16 = (typenum == UAR_HALF)
+ if (is_fp64 or is_fp16):
+ if (
+ (is_fp64 and not _buffer.sycl_device.has_aspect_fp64) or
+ (is_fp16 and not _buffer.sycl_device.has_aspect_fp16)
+ ):
+ raise ValueError(
+ f"Device {_buffer.sycl_device.name} does"
+ f" not support {dtype} natively."
+ )
+ self.base_ = _buffer
+ self.data_ = ( ( _buffer._pointer)) + itemsize * _offset
+ self.shape_ = shape_ptr
+ self.strides_ = strides_ptr
+ self.typenum_ = typenum
+ self.flags_ = (contig_flag | writable_flag)
+ self.nd_ = nd
+ self.array_namespace_ = array_namespace
+
+ def __dealloc__(self):
+ self._cleanup()
+
+ @property
+ def _pointer(self):
+ """
+ Returns USM pointer to the start of array (element with zero
+ multi-index) encoded as integer.
+ """
+ return self.get_data()
+
+ cdef Py_ssize_t get_offset(self) except *:
+ cdef char *mem_ptr = NULL
+ cdef char *ary_ptr = self.get_data()
+ mem_ptr = ( self.base_._pointer)
+ byte_offset = ary_ptr - mem_ptr
+ item_size = self.get_itemsize()
+ if (byte_offset % item_size):
+ raise InternalUSMArrayError(
+ "byte_offset is not a multiple of item_size.")
+ return byte_offset // item_size
+
+ @property
+ def _element_offset(self):
+ """Returns the offset of the zero-index element of the array, in
+ elements, relative to the start of memory allocation"""
+ return self.get_offset()
+
+ @property
+ def _byte_bounds(self):
+ """Returns a 2-tuple with pointers to the end-points of the array
+
+ :Example:
+
+ .. code-block:: python
+
+ from dpctl import tensor
+
+ x = tensor.ones((3, 10, 7))
+ y = tensor.flip(x[:, 1::2], axis=1)
+
+ beg_p, end_p = y._byte_bounds
+ # Bytes taken to store this array
+ bytes_extent = end_p - beg_p
+
+ # C-contiguous copy is more compact
+ yc = tensor.copy(y, order="C")
+ beg_pc, end_pc = yc._byte_bounds
+ assert bytes_extent < end_pc - beg_pc
+ """
+ cdef Py_ssize_t min_disp = 0
+ cdef Py_ssize_t max_disp = 0
+ cdef Py_ssize_t step_ = 0
+ cdef Py_ssize_t dim_ = 0
+ cdef int it = 0
+ cdef Py_ssize_t _itemsize = self.get_itemsize()
+
+ if (
+ (self.flags_ & USM_ARRAY_C_CONTIGUOUS)
+ or (self.flags_ & USM_ARRAY_F_CONTIGUOUS)
+ ):
+ return (
+ self._pointer,
+ self._pointer + shape_to_elem_count(
+ self.nd_, self.shape_
+ ) * _itemsize
+ )
+
+ for it in range(self.nd_):
+ dim_ = self.shape[it]
+ if dim_ > 0:
+ step_ = self.strides[it]
+ if step_ > 0:
+ max_disp += step_ * (dim_ - 1)
+ else:
+ min_disp += step_ * (dim_ - 1)
+
+ return (
+ self._pointer + min_disp * _itemsize,
+ self._pointer + (max_disp + 1) * _itemsize
+ )
+
+ cdef char* get_data(self):
+ """Returns the USM pointer for this array."""
+ return self.data_
+
+ cdef int get_ndim(self):
+ """
+ Returns the number of indices needed to address
+ an element of this array.
+ """
+ return self.nd_
+
+ cdef Py_ssize_t* get_shape(self):
+ """
+ Returns pointer to shape C-array for this array.
+
+ C-array has at least ``ndim`` non-negative elements,
+ which determine the range of permissible indices
+ addressing individual elements of this array.
+ """
+ return self.shape_
+
+ cdef Py_ssize_t* get_strides(self):
+ """
+ Returns pointer to strides C-array for this array.
+
+ The pointer can be NULL (contiguous array), or the
+ array size is at least ``ndim`` elements
+ """
+ return self.strides_
+
+ cdef int get_typenum(self):
+ """Returns typenum corresponding to values of this array"""
+ return self.typenum_
+
+ cdef int get_itemsize(self):
+ """
+ Returns itemsize of this arrays in bytes
+ """
+ return type_bytesize(self.typenum_)
+
+ cdef int get_flags(self):
+ """Returns flags of this array"""
+ return self.flags_
+
+ cdef object get_base(self):
+ """Returns the object owning the USM data addressed by this array"""
+ return self.base_
+
+ cdef c_dpctl.SyclQueue get_sycl_queue(self):
+ cdef c_dpmem._Memory mem
+ if not isinstance(self.base_, dpctl.memory._Memory):
+ raise InternalUSMArrayError(
+ "This array has unexpected memory owner"
+ )
+ mem = self.base_
+ return mem.queue
+
+ cdef c_dpctl.DPCTLSyclQueueRef get_queue_ref(self) except *:
+ """
+ Returns a copy of DPCTLSyclQueueRef associated with array
+ """
+ cdef c_dpctl.SyclQueue q = self.get_sycl_queue()
+ cdef c_dpctl.DPCTLSyclQueueRef QRef = q.get_queue_ref()
+ cdef c_dpctl.DPCTLSyclQueueRef QRefCopy = NULL
+ if QRef is not NULL:
+ QRefCopy = c_dpctl.DPCTLQueue_Copy(QRef)
+ return QRefCopy
+ else:
+ raise InternalUSMArrayError(
+ "Memory owner of this array is corrupted"
+ )
+
+ @property
+ def __sycl_usm_array_interface__(self):
+ """
+ Gives ``__sycl_usm_array_interface__`` dictionary describing
+ the array.
+ """
+ cdef Py_ssize_t byte_offset = -1
+ cdef int item_size = -1
+ cdef Py_ssize_t elem_offset = -1
+ cdef char *mem_ptr = NULL
+ cdef char *ary_ptr = NULL
+ if (not isinstance(self.base_, dpmem._memory._Memory)):
+ raise InternalUSMArrayError(
+ "Invalid instance of usm_ndarray encountered. "
+ "Private field base_ has an unexpected type {}.".format(
+ type(self.base_)
+ )
+ )
+ ary_iface = self.base_.__sycl_usm_array_interface__
+ mem_ptr = ( ary_iface["data"][0])
+ ary_ptr = ( self.data_)
+ ro_flag = False if (self.flags_ & USM_ARRAY_WRITABLE) else True
+ ary_iface["data"] = ( mem_ptr, ro_flag)
+ ary_iface["shape"] = self.shape
+ if (self.strides_):
+ ary_iface["strides"] = _make_int_tuple(self.nd_, self.strides_)
+ else:
+ if (self.flags_ & USM_ARRAY_C_CONTIGUOUS):
+ ary_iface["strides"] = None
+ elif (self.flags_ & USM_ARRAY_F_CONTIGUOUS):
+ ary_iface["strides"] = _f_contig_strides(self.nd_, self.shape_)
+ else:
+ raise InternalUSMArrayError(
+ "USM Array is not contiguous and has empty strides"
+ )
+ ary_iface["typestr"] = _make_typestr(self.typenum_)
+ byte_offset = ary_ptr - mem_ptr
+ item_size = self.get_itemsize()
+ if (byte_offset % item_size):
+ raise InternalUSMArrayError(
+ "byte_offset is not a multiple of item_size.")
+ elem_offset = byte_offset // item_size
+ ary_iface["offset"] = elem_offset
+ # must wait for content of the memory to finalize
+ self.sycl_queue.wait()
+ return ary_iface
+
+ @property
+ def ndim(self):
+ """
+ Gives the number of indices needed to address elements of this array.
+ """
+ return self.nd_
+
+ @property
+ def usm_data(self):
+ """
+ Gives USM memory object underlying :class:`.usm_ndarray` instance.
+ """
+ return self.get_base()
+
+ @property
+ def shape(self):
+ """
+ Elements of the shape tuple give the lengths of the
+ respective array dimensions.
+
+ Setting shape is allowed only when reshaping to the requested
+ dimensions can be returned as view, otherwise :exc:`AttributeError`
+ is raised. Use :func:`dpctl.tensor.reshape` to reshape the array
+ in all cases.
+
+ :Example:
+
+ .. code-block:: python
+
+ from dpctl import tensor
+
+ x = tensor.arange(899)
+ x.shape = (29, 31)
+ """
+ if self.nd_ > 0:
+ return _make_int_tuple(self.nd_, self.shape_)
+ else:
+ return tuple()
+
+ @shape.setter
+ def shape(self, new_shape):
+ """
+ Modifies usm_ndarray instance in-place by changing its metadata
+ about the shape and the strides of the array, or raises
+ `AttributeError` exception if in-place change is not possible.
+
+ Args:
+ new_shape: (tuple, int)
+ New shape. Only non-negative values are supported.
+ The new shape may not lead to the change in the
+ number of elements in the array.
+
+ Whether the array can be reshape in-place depends on its
+ strides. Use :func:`dpctl.tensor.reshape` function which
+ always succeeds to reshape the array by performing a copy
+ if necessary.
+ """
+ cdef int new_nd = -1
+ cdef Py_ssize_t nelems = -1
+ cdef int err = 0
+ cdef Py_ssize_t min_disp = 0
+ cdef Py_ssize_t max_disp = 0
+ cdef int contig_flag = 0
+ cdef Py_ssize_t *shape_ptr = NULL
+ cdef Py_ssize_t *strides_ptr = NULL
+ cdef Py_ssize_t size = -1
+ import operator
+
+ from ._reshape import reshaped_strides
+
+ try:
+ new_nd = len(new_shape)
+ except TypeError:
+ new_nd = 1
+ new_shape = (new_shape,)
+ try:
+ new_shape = tuple(operator.index(dim) for dim in new_shape)
+ except TypeError:
+ raise TypeError(
+ "Target shape must be a finite iterable of integers"
+ )
+ size = shape_to_elem_count(self.nd_, self.shape_)
+ if not np.prod(new_shape) == size:
+ raise TypeError(
+ f"Can not reshape array of size {self.size} into {new_shape}"
+ )
+ if size > 0:
+ new_strides = reshaped_strides(
+ self.shape,
+ self.strides,
+ new_shape
+ )
+ else:
+ new_strides = (1,) * len(new_shape)
+ if new_strides is None:
+ raise AttributeError(
+ "Incompatible shape for in-place modification. "
+ "Use `reshape()` to make a copy with the desired shape."
+ )
+ err = _from_input_shape_strides(
+ new_nd, new_shape, new_strides,
+ self.get_itemsize(),
+ b"C",
+ &shape_ptr, &strides_ptr,
+ &nelems, &min_disp, &max_disp, &contig_flag
+ )
+ if (err == 0):
+ if (self.shape_):
+ PyMem_Free(self.shape_)
+ if (self.strides_):
+ PyMem_Free(self.strides_)
+ self.flags_ = (contig_flag | (self.flags_ & USM_ARRAY_WRITABLE))
+ self.nd_ = new_nd
+ self.shape_ = shape_ptr
+ self.strides_ = strides_ptr
+ else:
+ raise InternalUSMArrayError(
+ "Encountered in shape setter, error code {err}".format(err)
+ )
+
+ @property
+ def strides(self):
+ """
+ Returns memory displacement in array elements, upon unit
+ change of respective index.
+
+ For example, for strides ``(s1, s2, s3)`` and multi-index
+ ``(i1, i2, i3)`` position of the respective element relative
+ to zero multi-index element is ``s1*s1 + s2*i2 + s3*i3``.
+
+ :Example:
+
+ .. code-block:: python
+
+ from dpctl import tensor
+
+ x = tensor.zeros((20, 30))
+ xv = x[10:, :15]
+
+ multi_id = (3, 5)
+ byte_displacement = xv[multi_id]._pointer - xv[0, 0]._pointer
+ element_displacement = sum(
+ i * s for i, s in zip(multi_id, xv.strides)
+ )
+ assert byte_displacement == element_displacement * xv.itemsize
+ """
+ if (self.strides_):
+ return _make_int_tuple(self.nd_, self.strides_)
+ else:
+ if (self.flags_ & USM_ARRAY_C_CONTIGUOUS):
+ return _c_contig_strides(self.nd_, self.shape_)
+ elif (self.flags_ & USM_ARRAY_F_CONTIGUOUS):
+ return _f_contig_strides(self.nd_, self.shape_)
+ else:
+ raise ValueError("Inconsistent usm_ndarray data")
+
+ @property
+ def flags(self):
+ """
+ Returns :class:`dpctl.tensor._flags.Flags` object.
+ """
+ return _flags.Flags(self, self.flags_)
+
+ cdef _set_writable_flag(self, int flag):
+ cdef int mask = (USM_ARRAY_WRITABLE if flag else 0)
+ self.flags_ = _copy_writable(self.flags_, mask)
+
+ @property
+ def usm_type(self):
+ """
+ USM type of underlying memory. Possible values are:
+
+ * ``"device"``
+ USM-device allocation in device memory, only accessible
+ to kernels executed on the device
+ * ``"shared"``
+ USM-shared allocation in device memory, accessible both
+ from the device and from host
+ * ``"host"``
+ USM-host allocation in host memory, accessible both
+ from the device and from host
+
+ See: https://docs.oneapi.com/versions/latest/dpcpp/iface/usm.html
+ """
+ return self.base_.get_usm_type()
+
+ @property
+ def itemsize(self):
+ """
+ Size of array element in bytes.
+ """
+ return self.get_itemsize()
+
+ @property
+ def nbytes(self):
+ """
+ Total bytes consumed by the elements of the array.
+ """
+ return (
+ shape_to_elem_count(self.nd_, self.shape_) *
+ self.get_itemsize())
+
+ @property
+ def size(self):
+ """
+ Number of elements in the array.
+ """
+ return shape_to_elem_count(self.nd_, self.shape_)
+
+ @property
+ def dtype(self):
+ """
+ Returns NumPy's dtype corresponding to the type of the array elements.
+ """
+ return np.dtype(_make_typestr(self.typenum_))
+
+ @property
+ def sycl_queue(self):
+ """
+ Returns :class:`dpctl.SyclQueue` object associated with USM data.
+ """
+ return self.get_sycl_queue()
+
+ @property
+ def sycl_device(self):
+ """
+ Returns :class:`dpctl.SyclDevice` object on which USM data
+ was allocated.
+ """
+ q = self.sycl_queue
+ return q.sycl_device
+
+ @property
+ def device(self):
+ """
+ Returns :class:`dpctl.tensor.Device` object representing
+ residence of the array data.
+
+ The ``Device`` object represents Array API notion of the
+ device, and contains :class:`dpctl.SyclQueue` associated
+ with this array. Hence, ``.device`` property provides
+ information distinct from ``.sycl_device`` property.
+
+ :Example:
+
+ .. code-block:: python
+
+ >>> from dpctl import tensor
+ >>> x = tensor.ones(10)
+ >>> x.device
+ Device(level_zero:gpu:0)
+ """
+ return Device.create_device(self.sycl_queue)
+
+ @property
+ def sycl_context(self):
+ """
+ Returns :class:`dpctl.SyclContext` object to which USM data is bound.
+ """
+ q = self.sycl_queue
+ return q.sycl_context
+
+ @property
+ def T(self):
+ """Returns transposed array for 2D array, raises ``ValueError``
+ otherwise.
+ """
+ if self.nd_ == 2:
+ return _transpose(self)
+ else:
+ raise ValueError(
+ "array.T requires array to have 2 dimensions. "
+ "Use array.mT to transpose stacks of matrices and "
+ "dpctl.tensor.permute_dims() to permute dimensions."
+ )
+
+ @property
+ def mT(self):
+ """ Returns array (a view) where the last two dimensions are
+ transposed.
+ """
+ if self.nd_ < 2:
+ raise ValueError(
+ "array.mT requires array to have at least 2 dimensions."
+ )
+ return _m_transpose(self)
+
+ @property
+ def real(self):
+ """
+ Returns view into real component for arrays with
+ complex data-types and returns itself for all other
+ data-types.
+
+ :Example:
+
+ .. code-block:: python
+
+ from dpctl import tensor
+
+ # Create complex array from
+ # arrays of real and imaginary parts
+
+ re = tensor.linspace(-1, 1, num=100, dtype="f4")
+ im = tensor.full_like(re, fill_value=tensor.pi)
+
+ z = tensor.empty_like(re, dtype="c8")
+ z.real[:] = re
+ z.imag[:] = im
+ """
+ # explicitly check for UAR_HALF, which is greater than UAR_CFLOAT
+ if (self.typenum_ < UAR_CFLOAT or self.typenum_ == UAR_HALF):
+ # elements are real
+ return self
+ if (self.typenum_ < UAR_TYPE_SENTINEL):
+ return _real_view(self)
+
+ @property
+ def imag(self):
+ """ Returns view into imaginary component for arrays with
+ complex data-types and returns new zero array for all other
+ data-types.
+
+ :Example:
+
+ .. code-block:: python
+
+ from dpctl import tensor
+
+ # Reset imaginary part of complex array
+
+ z = tensor.ones(100, dtype="c8")
+ z.imag[:] = dpt.pi/2
+ """
+ # explicitly check for UAR_HALF, which is greater than UAR_CFLOAT
+ if (self.typenum_ < UAR_CFLOAT or self.typenum_ == UAR_HALF):
+ # elements are real
+ return _zero_like(self)
+ if (self.typenum_ < UAR_TYPE_SENTINEL):
+ return _imag_view(self)
+
+ def __getitem__(self, ind):
+ cdef tuple _meta = _basic_slice_meta(
+ ind, (self).shape, ( self).strides,
+ self.get_offset())
+ cdef usm_ndarray res
+ cdef int i = 0
+ cdef bint matching = 1
+
+ if len(_meta) < 5:
+ raise RuntimeError
+
+ res = usm_ndarray.__new__(
+ usm_ndarray,
+ _meta[0],
+ dtype=_make_typestr(self.typenum_),
+ strides=_meta[1],
+ buffer=self.base_,
+ offset=_meta[2]
+ )
+ res.array_namespace_ = self.array_namespace_
+
+ adv_ind = _meta[3]
+ adv_ind_start_p = _meta[4]
+
+ if adv_ind_start_p < 0:
+ res.flags_ = _copy_writable(res.flags_, self.flags_)
+ return res
+
+ from ._copy_utils import _extract_impl, _nonzero_impl, _take_multi_index
+
+ # if len(adv_ind == 1), the (only) element is always an array
+ if len(adv_ind) == 1 and adv_ind[0].dtype == dpt_bool:
+ key_ = adv_ind[0]
+ adv_ind_end_p = key_.ndim + adv_ind_start_p
+ if adv_ind_end_p > res.ndim:
+ raise IndexError("too many indices for the array")
+ key_shape = key_.shape
+ arr_shape = res.shape[adv_ind_start_p:adv_ind_end_p]
+ for i in range(key_.ndim):
+ if matching:
+ if not key_shape[i] == arr_shape[i] and key_shape[i] > 0:
+ matching = 0
+ if not matching:
+ raise IndexError(
+ "boolean index did not match indexed array in dimensions"
+ )
+ res = _extract_impl(res, key_, axis=adv_ind_start_p)
+ res.flags_ = _copy_writable(res.flags_, self.flags_)
+ return res
+
+ if any(
+ (
+ isinstance(ind, usm_ndarray) and ind.dtype == dpt_bool
+ ) for ind in adv_ind
+ ):
+ adv_ind_int = list()
+ for ind in adv_ind:
+ if isinstance(ind, usm_ndarray) and ind.dtype == dpt_bool:
+ adv_ind_int.extend(_nonzero_impl(ind))
+ else:
+ adv_ind_int.append(ind)
+ res = _take_multi_index(res, tuple(adv_ind_int), adv_ind_start_p)
+ res.flags_ = _copy_writable(res.flags_, self.flags_)
+ return res
+
+ res = _take_multi_index(res, adv_ind, adv_ind_start_p)
+ res.flags_ = _copy_writable(res.flags_, self.flags_)
+ return res
+
+ def to_device(self, target_device, /, *, stream=None):
+ """ to_device(target_device, /, *, stream=None)
+
+ Transfers this array to specified target device.
+
+ :Example:
+ .. code-block:: python
+
+ import dpctl
+ import dpctl.tensor as dpt
+
+ x = dpt.full(10**6, 2, dtype="int64")
+ q_prof = dpctl.SyclQueue(
+ x.sycl_device, property="enable_profiling")
+ # return a view with profile-enabled queue
+ y = x.to_device(q_prof)
+ timer = dpctl.SyclTimer()
+ with timer(q_prof):
+ z = y * y
+ print(timer.dt)
+
+ Args:
+ target_device (object):
+ Array API concept of target device.
+ It can be a oneAPI filter selector string,
+ an instance of :class:`dpctl.SyclDevice` corresponding to a
+ non-partitioned SYCL device, an instance of
+ :class:`dpctl.SyclQueue`, or a :class:`dpctl.tensor.Device`
+ object returned by :attr:`dpctl.tensor.usm_ndarray.device`.
+ stream (:class:`dpctl.SyclQueue`, optional):
+ Execution queue to synchronize with. If ``None``,
+ synchronization is not performed.
+
+ Returns:
+ usm_ndarray:
+ A view if data copy is not required, and a copy otherwise.
+ If copying is required, it is done by copying from the original
+ allocation device to the host, followed by copying from host
+ to the target device.
+ """
+ cdef c_dpctl.DPCTLSyclQueueRef QRef = NULL
+ cdef c_dpmem._Memory arr_buf
+ d = Device.create_device(target_device)
+
+ _validate_and_use_stream(stream, self.sycl_queue)
+
+ if (d.sycl_context == self.sycl_context):
+ arr_buf = self.usm_data
+ QRef = ( d.sycl_queue).get_queue_ref()
+ view_buffer = c_dpmem._Memory.create_from_usm_pointer_size_qref(
+ arr_buf.get_data_ptr(),
+ arr_buf.nbytes,
+ QRef,
+ memory_owner=arr_buf
+ )
+ res = usm_ndarray(
+ self.shape,
+ self.dtype,
+ buffer=view_buffer,
+ strides=self.strides,
+ offset=self.get_offset()
+ )
+ res.flags_ = self.flags_
+ return res
+ else:
+ nbytes = self.usm_data.nbytes
+ copy_buffer = type(self.usm_data)(
+ nbytes, queue=d.sycl_queue
+ )
+ copy_buffer.copy_from_device(self.usm_data)
+ res = usm_ndarray(
+ self.shape,
+ self.dtype,
+ buffer=copy_buffer,
+ strides=self.strides,
+ offset=self.get_offset()
+ )
+ res.flags_ = self.flags_
+ return res
+
+ def _set_namespace(self, mod):
+ """ Sets array namespace to given module `mod`. """
+ self.array_namespace_ = mod
+
+ def __array_namespace__(self, api_version=None):
+ """
+ Returns array namespace, member functions of which
+ implement data API.
+
+ Args:
+ api_version (str, optional)
+ Request namespace compliant with given version of
+ array API. If ``None``, namespace for the most
+ recent supported version is returned.
+ Default: ``None``.
+ """
+ if api_version is not None:
+ from ._array_api import __array_api_version__
+ if not isinstance(api_version, str):
+ raise TypeError(f"Expected type str, got {type(api_version)}")
+ if api_version != __array_api_version__:
+ raise ValueError(f"Only {__array_api_version__} is supported")
+ return (
+ self.array_namespace_
+ if self.array_namespace_ is not None
+ # TODO: revert to `else dpctl.tensor`
+ # when dpnp fully migrates dpctl/tensor
+ else dpctl_ext.tensor
+ )
+
+ def __bool__(self):
+ if self.size == 1:
+ _check_0d_scalar_conversion(self)
+ view = _as_zero_dim_ndarray(self)
+ return view.__bool__()
+
+ if self.size == 0:
+ raise ValueError(
+ "The truth value of an empty array is ambiguous"
+ )
+
+ raise ValueError(
+ "The truth value of an array with more than one element is "
+ "ambiguous. Use dpctl.tensor.any() or dpctl.tensor.all()"
+ )
+
+ def __float__(self):
+ if self.size == 1:
+ _check_0d_scalar_conversion(self)
+ view = _as_zero_dim_ndarray(self)
+ return view.__float__()
+
+ raise ValueError(
+ "only size-1 arrays can be converted to Python scalars"
+ )
+
+ def __complex__(self):
+ if self.size == 1:
+ _check_0d_scalar_conversion(self)
+ view = _as_zero_dim_ndarray(self)
+ return view.__complex__()
+
+ raise ValueError(
+ "only size-1 arrays can be converted to Python scalars"
+ )
+
+ def __int__(self):
+ if self.size == 1:
+ _check_0d_scalar_conversion(self)
+ view = _as_zero_dim_ndarray(self)
+ return view.__int__()
+
+ raise ValueError(
+ "only size-1 arrays can be converted to Python scalars"
+ )
+
+ def __index__(self):
+ if np.issubdtype(self.dtype, np.integer):
+ return int(self)
+
+ raise IndexError("only integer arrays are valid indices")
+
+ def __abs__(self):
+ # TODO: revert to `return dpctl.tensor...`
+ # when dpnp fully migrates dpctl/tensor
+ return dpctl_ext.tensor.abs(self)
+
+ def __add__(self, other):
+ """
+ Implementation for operator.add
+ """
+ return dpctl_ext.tensor.add(self, other)
+
+ def __and__(self, other):
+ "Implementation for operator.and"
+ return dpctl_ext.tensor.bitwise_and(self, other)
+
+ def __dlpack__(
+ self, *, stream=None, max_version=None, dl_device=None, copy=None
+ ):
+ """
+ Produces DLPack capsule.
+
+ Args:
+ stream (:class:`dpctl.SyclQueue`, optional):
+ Execution queue to synchronize with.
+ If ``None``, synchronization is not performed.
+ Default: ``None``.
+ max_version (tuple[int, int], optional):
+ The maximum DLPack version the consumer (caller of
+ ``__dlpack__``) supports. As ``__dlpack__`` may not
+ always return a DLPack capsule with version
+ `max_version`, the consumer must verify the version
+ even if this argument is passed.
+ Default: ``None``.
+ dl_device (tuple[enum.Enum, int], optional):
+ The device the returned DLPack capsule will be
+ placed on.
+ The device must be a 2-tuple matching the format of
+ ``__dlpack_device__`` method, an integer enumerator
+ representing the device type followed by an integer
+ representing the index of the device.
+ Default: ``None``.
+ copy (bool, optional):
+ Boolean indicating whether or not to copy the input.
+
+ * If ``copy`` is ``True``, the input will always be
+ copied.
+ * If ``False``, a ``BufferError`` will be raised if a
+ copy is deemed necessary.
+ * If ``None``, a copy will be made only if deemed
+ necessary, otherwise, the existing memory buffer will
+ be reused.
+
+ Default: ``None``.
+
+ Raises:
+ MemoryError:
+ when host memory can not be allocated.
+ DLPackCreationError:
+ when array is allocated on a partitioned
+ SYCL device, or with a non-default context.
+ BufferError:
+ when a copy is deemed necessary but ``copy``
+ is ``False`` or when the provided ``dl_device``
+ cannot be handled.
+ """
+ if max_version is None:
+ # legacy path for DLManagedTensor
+ # copy kwarg ignored because copy flag can't be set
+ _caps = c_dlpack.to_dlpack_capsule(self)
+ _validate_and_use_stream(stream, self.sycl_queue)
+ return _caps
+ else:
+ if not isinstance(max_version, tuple) or len(max_version) != 2:
+ raise TypeError(
+ "`__dlpack__` expects `max_version` to be a "
+ "2-tuple of integers `(major, minor)`, instead "
+ f"got {max_version}"
+ )
+ dpctl_dlpack_version = get_build_dlpack_version()
+ if max_version[0] >= dpctl_dlpack_version[0]:
+ # DLManagedTensorVersioned path
+ if dl_device is not None:
+ if not isinstance(dl_device, tuple) or len(dl_device) != 2:
+ raise TypeError(
+ "`__dlpack__` expects `dl_device` to be a 2-tuple "
+ "of `(device_type, device_id)`, instead "
+ f"got {dl_device}"
+ )
+ if dl_device != self.__dlpack_device__():
+ if copy is False:
+ raise BufferError(
+ "array cannot be placed on the requested "
+ "device without a copy"
+ )
+ if _is_host_cpu(dl_device):
+ if stream is not None:
+ raise ValueError(
+ "`stream` must be `None` when `dl_device` "
+ "is of type `kDLCPU`"
+ )
+ from ._copy_utils import _copy_to_numpy
+ _arr = _copy_to_numpy(self)
+ _arr.flags["W"] = self.flags["W"]
+ return c_dlpack.numpy_to_dlpack_versioned_capsule(
+ _arr, True
+ )
+ else:
+ raise BufferError(
+ f"targeting `dl_device` {dl_device} with "
+ "`__dlpack__` is not yet implemented"
+ )
+ if copy is None:
+ copy = False
+ # TODO: strategy for handling stream on different device
+ # from dl_device
+ if copy:
+ _validate_and_use_stream(stream, self.sycl_queue)
+ nbytes = self.usm_data.nbytes
+ copy_buffer = type(self.usm_data)(
+ nbytes, queue=self.sycl_queue
+ )
+ copy_buffer.copy_from_device(self.usm_data)
+ _copied_arr = usm_ndarray(
+ self.shape,
+ self.dtype,
+ buffer=copy_buffer,
+ strides=self.strides,
+ offset=self.get_offset()
+ )
+ _copied_arr.flags_ = self.flags_
+ _caps = c_dlpack.to_dlpack_versioned_capsule(
+ _copied_arr, copy
+ )
+ else:
+ _caps = c_dlpack.to_dlpack_versioned_capsule(self, copy)
+ _validate_and_use_stream(stream, self.sycl_queue)
+ return _caps
+ else:
+ # legacy path for DLManagedTensor
+ _caps = c_dlpack.to_dlpack_capsule(self)
+ _validate_and_use_stream(stream, self.sycl_queue)
+ return _caps
+
+ def __dlpack_device__(self):
+ """
+ Gives a tuple (``device_type``, ``device_id``) corresponding to
+ ``DLDevice`` entry in ``DLTensor`` in DLPack protocol.
+
+ The tuple describes the non-partitioned device where the array has been
+ allocated, or the non-partitioned parent device of the allocation
+ device.
+
+ See :class:`dpctl.tensor.DLDeviceType` for a list of devices supported
+ by the DLPack protocol.
+
+ Raises:
+ DLPackCreationError:
+ when the ``device_id`` could not be determined.
+ """
+ try:
+ dev_id = self.sycl_device.get_device_id()
+ except ValueError as e:
+ raise c_dlpack.DLPackCreationError(
+ "Could not determine id of the device where array was "
+ "allocated."
+ )
+ return (
+ DLDeviceType.kDLOneAPI,
+ dev_id,
+ )
+
+ def __eq__(self, other):
+ # TODO: revert to `return dpctl.tensor...`
+ # when dpnp fully migrates dpctl/tensor
+ return dpctl_ext.tensor.equal(self, other)
+
+ def __floordiv__(self, other):
+ return dpctl_ext.tensor.floor_divide(self, other)
+
+ def __ge__(self, other):
+ return dpctl_ext.tensor.greater_equal(self, other)
+
+ def __gt__(self, other):
+ return dpctl_ext.tensor.greater(self, other)
+
+ def __invert__(self):
+ return dpctl_ext.tensor.bitwise_invert(self)
+
+ def __le__(self, other):
+ return dpctl_ext.tensor.less_equal(self, other)
+
+ def __len__(self):
+ if (self.nd_):
+ return self.shape[0]
+ else:
+ raise TypeError("len() of unsized object")
+
+ def __lshift__(self, other):
+ return dpctl_ext.tensor.bitwise_left_shift(self, other)
+
+ def __lt__(self, other):
+ return dpctl_ext.tensor.less(self, other)
+
+ def __matmul__(self, other):
+ return dpctl_ext.tensor.matmul(self, other)
+
+ def __mod__(self, other):
+ return dpctl_ext.tensor.remainder(self, other)
+
+ def __mul__(self, other):
+ return dpctl_ext.tensor.multiply(self, other)
+
+ def __ne__(self, other):
+ return dpctl_ext.tensor.not_equal(self, other)
+
+ def __neg__(self):
+ return dpctl_ext.tensor.negative(self)
+
+ def __or__(self, other):
+ return dpctl_ext.tensor.bitwise_or(self, other)
+
+ def __pos__(self):
+ return dpctl_ext.tensor.positive(self)
+
+ def __pow__(self, other):
+ return dpctl_ext.tensor.pow(self, other)
+
+ def __rshift__(self, other):
+ return dpctl_ext.tensor.bitwise_right_shift(self, other)
+
+ def __setitem__(self, key, rhs):
+ cdef tuple _meta
+ cdef usm_ndarray Xv
+
+ if (self.flags_ & USM_ARRAY_WRITABLE) == 0:
+ raise ValueError("Can not modify read-only array.")
+
+ _meta = _basic_slice_meta(
+ key, (self).shape, ( self).strides,
+ self.get_offset()
+ )
+
+ if len(_meta) < 5:
+ raise RuntimeError
+
+ Xv = usm_ndarray.__new__(
+ usm_ndarray,
+ _meta[0],
+ dtype=_make_typestr(self.typenum_),
+ strides=_meta[1],
+ buffer=self.base_,
+ offset=_meta[2],
+ )
+ # set namespace
+ Xv.array_namespace_ = self.array_namespace_
+
+ from ._copy_utils import (
+ _copy_from_numpy_into,
+ _copy_from_usm_ndarray_to_usm_ndarray,
+ _nonzero_impl,
+ _place_impl,
+ _put_multi_index,
+ )
+
+ adv_ind = _meta[3]
+ adv_ind_start_p = _meta[4]
+
+ if adv_ind_start_p < 0:
+ # basic slicing
+ if isinstance(rhs, usm_ndarray):
+ _copy_from_usm_ndarray_to_usm_ndarray(Xv, rhs)
+ else:
+ if hasattr(rhs, "__sycl_usm_array_interface__"):
+ from dpctl_ext.tensor import asarray
+ try:
+ rhs_ar = asarray(rhs)
+ _copy_from_usm_ndarray_to_usm_ndarray(Xv, rhs_ar)
+ except Exception:
+ raise ValueError(
+ f"Input of type {type(rhs)} could not be "
+ "converted to usm_ndarray"
+ )
+ else:
+ rhs_np = np.asarray(rhs)
+ if type_bytesize(rhs_np.dtype.num) < 0:
+ raise ValueError(
+ f"Input of type {type(rhs)} can not be "
+ "assigned to usm_ndarray because of "
+ f"unsupported data type '{rhs_np.dtype}'"
+ )
+ try:
+ _copy_from_numpy_into(Xv, rhs_np)
+ except Exception:
+ raise ValueError(
+ f"Input of type {type(rhs)} could not be "
+ "copied into dpctl.tensor.usm_ndarray"
+ )
+ return
+
+ if len(adv_ind) == 1 and adv_ind[0].dtype == dpt_bool:
+ _place_impl(Xv, adv_ind[0], rhs, axis=adv_ind_start_p)
+ return
+
+ if any(
+ (
+ isinstance(ind, usm_ndarray) and ind.dtype == dpt_bool
+ ) for ind in adv_ind
+ ):
+ adv_ind_int = list()
+ for ind in adv_ind:
+ if isinstance(ind, usm_ndarray) and ind.dtype == dpt_bool:
+ adv_ind_int.extend(_nonzero_impl(ind))
+ else:
+ adv_ind_int.append(ind)
+ _put_multi_index(Xv, tuple(adv_ind_int), adv_ind_start_p, rhs)
+ return
+
+ _put_multi_index(Xv, adv_ind, adv_ind_start_p, rhs)
+ return
+
+ def __sub__(self, other):
+ # TODO: revert to `return dpctl.tensor...`
+ # when dpnp fully migrates dpctl/tensor
+ return dpctl_ext.tensor.subtract(self, other)
+
+ def __truediv__(self, other):
+ return dpctl_ext.tensor.divide(self, other)
+
+ def __xor__(self, other):
+ return dpctl_ext.tensor.bitwise_xor(self, other)
+
+ def __radd__(self, other):
+ return dpctl_ext.tensor.add(other, self)
+
+ def __rand__(self, other):
+ return dpctl_ext.tensor.bitwise_and(other, self)
+
+ def __rfloordiv__(self, other):
+ return dpctl_ext.tensor.floor_divide(other, self)
+
+ def __rlshift__(self, other):
+ return dpctl_ext.tensor.bitwise_left_shift(other, self)
+
+ def __rmatmul__(self, other):
+ return dpctl_ext.tensor.matmul(other, self)
+
+ def __rmod__(self, other):
+ return dpctl_ext.tensor.remainder(other, self)
+
+ def __rmul__(self, other):
+ return dpctl_ext.tensor.multiply(other, self)
+
+ def __ror__(self, other):
+ return dpctl_ext.tensor.bitwise_or(other, self)
+
+ def __rpow__(self, other):
+ return dpctl_ext.tensor.pow(other, self)
+
+ def __rrshift__(self, other):
+ return dpctl_ext.tensor.bitwise_right_shift(other, self)
+
+ def __rsub__(self, other):
+ return dpctl_ext.tensor.subtract(other, self)
+
+ def __rtruediv__(self, other):
+ return dpctl_ext.tensor.divide(other, self)
+
+ def __rxor__(self, other):
+ return dpctl_ext.tensor.bitwise_xor(other, self)
+
+ def __iadd__(self, other):
+ return dpctl_ext.tensor.add._inplace_op(self, other)
+
+ def __iand__(self, other):
+ return dpctl_ext.tensor.bitwise_and._inplace_op(self, other)
+
+ def __ifloordiv__(self, other):
+ return dpctl_ext.tensor.floor_divide._inplace_op(self, other)
+
+ def __ilshift__(self, other):
+ return dpctl_ext.tensor.bitwise_left_shift._inplace_op(self, other)
+
+ def __imatmul__(self, other):
+ return dpctl_ext.tensor.matmul(self, other, out=self, dtype=self.dtype)
+
+ def __imod__(self, other):
+ return dpctl_ext.tensor.remainder._inplace_op(self, other)
+
+ def __imul__(self, other):
+ return dpctl_ext.tensor.multiply._inplace_op(self, other)
+
+ def __ior__(self, other):
+ return dpctl_ext.tensor.bitwise_or._inplace_op(self, other)
+
+ def __ipow__(self, other):
+ return dpctl_ext.tensor.pow._inplace_op(self, other)
+
+ def __irshift__(self, other):
+ return dpctl_ext.tensor.bitwise_right_shift._inplace_op(self, other)
+
+ def __isub__(self, other):
+ return dpctl_ext.tensor.subtract._inplace_op(self, other)
+
+ def __itruediv__(self, other):
+ return dpctl_ext.tensor.divide._inplace_op(self, other)
+
+ def __ixor__(self, other):
+ return dpctl_ext.tensor.bitwise_xor._inplace_op(self, other)
+
+ def __str__(self):
+ return usm_ndarray_str(self)
+
+ def __repr__(self):
+ return usm_ndarray_repr(self)
+
+ def __array__(self, dtype=None, /, *, copy=None):
+ """NumPy's array protocol method to disallow implicit conversion.
+
+ Without this definition, `numpy.asarray(usm_ar)` converts
+ usm_ndarray instance into NumPy array with data type `object`
+ and every element being 0d usm_ndarray.
+
+ https://github.com/IntelPython/dpctl/pull/1384#issuecomment-1707212972
+ """
+ raise TypeError(
+ "Implicit conversion to a NumPy array is not allowed. "
+ "Use `dpctl.tensor.asnumpy` to copy data from this "
+ "`dpctl.tensor.usm_ndarray` instance to NumPy array"
+ )
+
+
+cdef usm_ndarray _real_view(usm_ndarray ary):
+ """
+ View into real parts of a complex type array
+ """
+ cdef int r_typenum_ = -1
+ cdef usm_ndarray r = None
+ cdef Py_ssize_t offset_elems = 0
+
+ if (ary.typenum_ == UAR_CFLOAT):
+ r_typenum_ = UAR_FLOAT
+ elif (ary.typenum_ == UAR_CDOUBLE):
+ r_typenum_ = UAR_DOUBLE
+ else:
+ raise InternalUSMArrayError(
+ "_real_view call on array of non-complex type.")
+
+ offset_elems = ary.get_offset() * 2
+ r = usm_ndarray.__new__(
+ usm_ndarray,
+ _make_int_tuple(ary.nd_, ary.shape_) if ary.nd_ > 0 else tuple(),
+ dtype=_make_typestr(r_typenum_),
+ strides=tuple(2 * si for si in ary.strides),
+ buffer=ary.base_,
+ offset=offset_elems,
+ order=("C" if (ary.flags_ & USM_ARRAY_C_CONTIGUOUS) else "F")
+ )
+ r.flags_ = _copy_writable(r.flags_, ary.flags_)
+ r.array_namespace_ = ary.array_namespace_
+ return r
+
+
+cdef usm_ndarray _imag_view(usm_ndarray ary):
+ """
+ View into imaginary parts of a complex type array
+ """
+ cdef int r_typenum_ = -1
+ cdef usm_ndarray r = None
+ cdef Py_ssize_t offset_elems = 0
+
+ if (ary.typenum_ == UAR_CFLOAT):
+ r_typenum_ = UAR_FLOAT
+ elif (ary.typenum_ == UAR_CDOUBLE):
+ r_typenum_ = UAR_DOUBLE
+ else:
+ raise InternalUSMArrayError(
+ "_imag_view call on array of non-complex type.")
+
+ # displace pointer to imaginary part
+ offset_elems = 2 * ary.get_offset() + 1
+ r = usm_ndarray.__new__(
+ usm_ndarray,
+ _make_int_tuple(ary.nd_, ary.shape_) if ary.nd_ > 0 else tuple(),
+ dtype=_make_typestr(r_typenum_),
+ strides=tuple(2 * si for si in ary.strides),
+ buffer=ary.base_,
+ offset=offset_elems,
+ order=("C" if (ary.flags_ & USM_ARRAY_C_CONTIGUOUS) else "F")
+ )
+ r.flags_ = _copy_writable(r.flags_, ary.flags_)
+ r.array_namespace_ = ary.array_namespace_
+ return r
+
+
+cdef usm_ndarray _transpose(usm_ndarray ary):
+ """
+ Construct transposed array without copying the data
+ """
+ cdef usm_ndarray r = usm_ndarray.__new__(
+ usm_ndarray,
+ _make_reversed_int_tuple(ary.nd_, ary.shape_),
+ dtype=_make_typestr(ary.typenum_),
+ strides=(
+ _make_reversed_int_tuple(ary.nd_, ary.strides_)
+ if (ary.strides_) else None),
+ buffer=ary.base_,
+ order=("F" if (ary.flags_ & USM_ARRAY_C_CONTIGUOUS) else "C"),
+ offset=ary.get_offset()
+ )
+ r.flags_ = _copy_writable(r.flags_, ary.flags_)
+ return r
+
+
+cdef usm_ndarray _m_transpose(usm_ndarray ary):
+ """
+ Construct matrix transposed array
+ """
+ cdef usm_ndarray r = usm_ndarray.__new__(
+ usm_ndarray,
+ _swap_last_two(_make_int_tuple(ary.nd_, ary.shape_)),
+ dtype=_make_typestr(ary.typenum_),
+ strides=_swap_last_two(ary.strides),
+ buffer=ary.base_,
+ order=("F" if (ary.flags_ & USM_ARRAY_C_CONTIGUOUS) else "C"),
+ offset=ary.get_offset()
+ )
+ r.flags_ = _copy_writable(r.flags_, ary.flags_)
+ return r
+
+
+cdef usm_ndarray _zero_like(usm_ndarray ary):
+ """
+ Make C-contiguous array of zero elements with same shape,
+ type, device, and sycl_queue as ary.
+ """
+ cdef dt = _make_typestr(ary.typenum_)
+ cdef usm_ndarray r = usm_ndarray(
+ _make_int_tuple(ary.nd_, ary.shape_) if ary.nd_ > 0 else tuple(),
+ dtype=dt,
+ buffer=ary.base_.get_usm_type(),
+ buffer_ctor_kwargs={"queue": ary.get_sycl_queue()},
+ )
+ r.base_.memset()
+ return r
+
+
+cdef api char* UsmNDArray_GetData(usm_ndarray arr):
+ """Get allocation pointer of zero index element of array """
+ return arr.get_data()
+
+
+cdef api int UsmNDArray_GetNDim(usm_ndarray arr):
+ """Get array rank: length of its shape"""
+ return arr.get_ndim()
+
+
+cdef api Py_ssize_t* UsmNDArray_GetShape(usm_ndarray arr):
+ """Get host pointer to shape vector"""
+ return arr.get_shape()
+
+
+cdef api Py_ssize_t* UsmNDArray_GetStrides(usm_ndarray arr):
+ """Get host pointer to strides vector"""
+ return arr.get_strides()
+
+
+cdef api int UsmNDArray_GetTypenum(usm_ndarray arr):
+ """Get type number for data type of array elements"""
+ return arr.get_typenum()
+
+
+cdef api int UsmNDArray_GetElementSize(usm_ndarray arr):
+ """Get array element size in bytes"""
+ return arr.get_itemsize()
+
+
+cdef api int UsmNDArray_GetFlags(usm_ndarray arr):
+ """Get flags of array"""
+ return arr.get_flags()
+
+
+cdef api c_dpctl.DPCTLSyclQueueRef UsmNDArray_GetQueueRef(usm_ndarray arr):
+ """Get DPCTLSyclQueueRef for queue associated with the array"""
+ return arr.get_queue_ref()
+
+
+cdef api Py_ssize_t UsmNDArray_GetOffset(usm_ndarray arr):
+ """Get offset of zero-index array element from the beginning of the USM
+ allocation"""
+ return arr.get_offset()
+
+
+cdef api object UsmNDArray_GetUSMData(usm_ndarray arr):
+ """Get USM data object underlying the array"""
+ return arr.get_base()
+
+
+cdef api void UsmNDArray_SetWritableFlag(usm_ndarray arr, int flag):
+ """Set/unset USM_ARRAY_WRITABLE in the given array `arr`."""
+ arr._set_writable_flag(flag)
+
+
+cdef api object UsmNDArray_MakeSimpleFromMemory(
+ int nd, const Py_ssize_t *shape, int typenum,
+ c_dpmem._Memory mobj, Py_ssize_t offset, char order
+):
+ """Create contiguous usm_ndarray.
+
+ Args:
+ nd: number of dimensions (non-negative)
+ shape: array of nd non-negative array's sizes along each dimension
+ typenum: array elemental type number
+ ptr: pointer to the start of allocation
+ QRef: DPCTLSyclQueueRef associated with the allocation
+ offset: distance between element with zero multi-index and the
+ start of allocation
+ order: Memory layout of the array. Use 'C' for C-contiguous or
+ row-major layout; 'F' for F-contiguous or column-major layout
+ Returns:
+ Created usm_ndarray instance
+ """
+ cdef object shape_tuple = _make_int_tuple(nd, shape)
+ cdef usm_ndarray arr = usm_ndarray(
+ shape_tuple,
+ dtype=_make_typestr(typenum),
+ buffer=mobj,
+ offset=offset,
+ order=(order)
+ )
+ return arr
+
+
+cdef api object UsmNDArray_MakeSimpleFromPtr(
+ size_t nelems,
+ int typenum,
+ c_dpctl.DPCTLSyclUSMRef ptr,
+ c_dpctl.DPCTLSyclQueueRef QRef,
+ object owner
+):
+ """Create 1D contiguous usm_ndarray from pointer.
+
+ Args:
+ nelems: number of elements in array
+ typenum: array elemental type number
+ ptr: pointer to the start of allocation
+ QRef: DPCTLSyclQueueRef associated with the allocation
+ owner: Python object managing lifetime of USM allocation.
+ Value None implies transfer of USM allocation ownership
+ to the created array object.
+ Returns:
+ Created usm_ndarray instance
+ """
+ cdef int itemsize = type_bytesize(typenum)
+ if (itemsize < 1):
+ raise ValueError(
+ "dtype with typenum=" + str(typenum) + " is not supported."
+ )
+ cdef size_t nbytes = ( itemsize) * nelems
+ cdef c_dpmem._Memory mobj
+ mobj = c_dpmem._Memory.create_from_usm_pointer_size_qref(
+ ptr, nbytes, QRef, memory_owner=owner
+ )
+ cdef usm_ndarray arr = usm_ndarray(
+ (nelems,),
+ dtype=_make_typestr(typenum),
+ buffer=mobj
+ )
+ return arr
+
+cdef api object UsmNDArray_MakeFromPtr(
+ int nd,
+ const Py_ssize_t *shape,
+ int typenum,
+ const Py_ssize_t *strides,
+ c_dpctl.DPCTLSyclUSMRef ptr,
+ c_dpctl.DPCTLSyclQueueRef QRef,
+ Py_ssize_t offset,
+ object owner
+):
+ """
+ General usm_ndarray constructor from externally made USM-allocation.
+
+ Args:
+ nd: number of dimensions (non-negative)
+ shape: array of nd non-negative array's sizes along each dimension
+ typenum: array elemental type number
+ strides: array of nd strides along each dimension in elements
+ ptr: pointer to the start of allocation
+ QRef: DPCTLSyclQueueRef associated with the allocation
+ offset: distance between element with zero multi-index and the
+ start of allocation
+ owner: Python object managing lifetime of USM allocation.
+ Value None implies transfer of USM allocation ownership
+ to the created array object.
+ Returns:
+ Created usm_ndarray instance
+ """
+ cdef int itemsize = type_bytesize(typenum)
+ cdef size_t nelems = 1
+ cdef Py_ssize_t min_disp = 0
+ cdef Py_ssize_t max_disp = 0
+ cdef Py_ssize_t step_ = 0
+ cdef Py_ssize_t dim_ = 0
+ cdef it = 0
+ cdef c_dpmem._Memory mobj
+ cdef usm_ndarray arr
+ cdef object obj_shape
+ cdef object obj_strides
+
+ if (itemsize < 1):
+ raise ValueError(
+ "dtype with typenum=" + str(typenum) + " is not supported."
+ )
+ if (nd < 0):
+ raise ValueError("Dimensionality must be non-negative")
+ if (ptr is NULL or QRef is NULL):
+ raise ValueError(
+ "Non-null USM allocation pointer and QRef are expected"
+ )
+ if (nd == 0):
+ # case of 0d scalars
+ mobj = c_dpmem._Memory.create_from_usm_pointer_size_qref(
+ ptr, itemsize, QRef, memory_owner=owner
+ )
+ arr = usm_ndarray(
+ tuple(),
+ dtype=_make_typestr(typenum),
+ buffer=mobj
+ )
+ return arr
+ if (shape is NULL or strides is NULL):
+ raise ValueError("Both shape and stride vectors are required")
+ for it in range(nd):
+ dim_ = shape[it]
+ if dim_ < 0:
+ raise ValueError(
+ f"Dimension along axis {it} must be non-negative"
+ )
+ nelems *= dim_
+ if dim_ > 0:
+ step_ = strides[it]
+ if step_ > 0:
+ max_disp += step_ * (dim_ - 1)
+ else:
+ min_disp += step_ * (dim_ - 1)
+
+ obj_shape = _make_int_tuple(nd, shape)
+ obj_strides = _make_int_tuple(nd, strides)
+ if nelems == 0:
+ mobj = c_dpmem._Memory.create_from_usm_pointer_size_qref(
+ ptr, itemsize, QRef, memory_owner=owner
+ )
+ arr = usm_ndarray(
+ obj_shape,
+ dtype=_make_typestr(typenum),
+ strides=obj_strides,
+ buffer=mobj,
+ offset=0
+ )
+ return arr
+ if offset + min_disp < 0:
+ raise ValueError(
+ "Given shape, strides and offset reference out-of-bound memory"
+ )
+ nbytes = ( itemsize) * (offset + max_disp + 1)
+ mobj = c_dpmem._Memory.create_from_usm_pointer_size_qref(
+ ptr, nbytes, QRef, memory_owner=owner
+ )
+ arr = usm_ndarray(
+ obj_shape,
+ dtype=_make_typestr(typenum),
+ strides=obj_strides,
+ buffer=mobj,
+ offset=offset
+ )
+ return arr
+
+
+def _is_object_with_buffer_protocol(o):
+ "Returns True if object supports Python buffer protocol"
+ return _is_buffer(o)
diff --git a/dpctl_ext/tensor/_utility_functions.py b/dpctl_ext/tensor/_utility_functions.py
index 821f0954017a..c892d777102d 100644
--- a/dpctl_ext/tensor/_utility_functions.py
+++ b/dpctl_ext/tensor/_utility_functions.py
@@ -29,12 +29,11 @@
import builtins
import operator
-import dpctl.tensor as dpt
import dpctl.utils as du
# TODO: revert to `import dpctl.tensor...`
# when dpnp fully migrates dpctl/tensor
-import dpctl_ext.tensor as dpt_ext
+import dpctl_ext.tensor as dpt
import dpctl_ext.tensor._tensor_impl as ti
import dpctl_ext.tensor._tensor_reductions_impl as tri
@@ -60,7 +59,7 @@ def _boolean_reduction(x, axis, keepdims, func):
red_nd = nd
# case of a scalar
if red_nd == 0:
- return dpt_ext.astype(x, dpt.bool)
+ return dpt.astype(x, dpt.bool)
x_tmp = x
res_shape = ()
perm = list(range(nd))
@@ -72,9 +71,9 @@ def _boolean_reduction(x, axis, keepdims, func):
red_nd = len(axis)
# check for axis=()
if red_nd == 0:
- return dpt_ext.astype(x, dpt.bool)
+ return dpt.astype(x, dpt.bool)
perm = [i for i in range(nd) if i not in axis] + list(axis)
- x_tmp = dpt_ext.permute_dims(x, perm)
+ x_tmp = dpt.permute_dims(x, perm)
res_shape = x_tmp.shape[: nd - red_nd]
exec_q = x.sycl_queue
@@ -85,7 +84,7 @@ def _boolean_reduction(x, axis, keepdims, func):
# always allocate the temporary as
# int32 and usm-device to ensure that atomic updates
# are supported
- res_tmp = dpt_ext.empty(
+ res_tmp = dpt.empty(
res_shape,
dtype=dpt.int32,
usm_type="device",
@@ -101,7 +100,7 @@ def _boolean_reduction(x, axis, keepdims, func):
_manager.add_event_pair(hev0, ev0)
# copy to boolean result array
- res = dpt_ext.empty(
+ res = dpt.empty(
res_shape,
dtype=dpt.bool,
usm_type=res_usm_type,
@@ -115,7 +114,7 @@ def _boolean_reduction(x, axis, keepdims, func):
if keepdims:
res_shape = res_shape + (1,) * red_nd
inv_perm = sorted(range(nd), key=lambda d: perm[d])
- res = dpt_ext.permute_dims(dpt_ext.reshape(res, res_shape), inv_perm)
+ res = dpt.permute_dims(dpt.reshape(res, res_shape), inv_perm)
return res
@@ -292,7 +291,7 @@ def _concat_diff_input(arr, axis, prepend, append):
if isinstance(prepend, dpt.usm_ndarray):
a_prepend = prepend
else:
- a_prepend = dpt_ext.asarray(
+ a_prepend = dpt.asarray(
prepend,
dtype=prepend_dtype,
usm_type=coerced_usm_type,
@@ -301,7 +300,7 @@ def _concat_diff_input(arr, axis, prepend, append):
if isinstance(append, dpt.usm_ndarray):
a_append = append
else:
- a_append = dpt_ext.asarray(
+ a_append = dpt.asarray(
append,
dtype=append_dtype,
usm_type=coerced_usm_type,
@@ -309,11 +308,11 @@ def _concat_diff_input(arr, axis, prepend, append):
)
if not prepend_shape:
prepend_shape = arr_shape[:axis] + (1,) + arr_shape[axis + 1 :]
- a_prepend = dpt_ext.broadcast_to(a_prepend, prepend_shape)
+ a_prepend = dpt.broadcast_to(a_prepend, prepend_shape)
if not append_shape:
append_shape = arr_shape[:axis] + (1,) + arr_shape[axis + 1 :]
- a_append = dpt_ext.broadcast_to(a_append, append_shape)
- return dpt_ext.concat((a_prepend, arr, a_append), axis=axis)
+ a_append = dpt.broadcast_to(a_append, append_shape)
+ return dpt.concat((a_prepend, arr, a_append), axis=axis)
elif prepend is not None:
q1, x_usm_type = arr.sycl_queue, arr.usm_type
q2, prepend_usm_type = _get_queue_usm_type(prepend)
@@ -361,7 +360,7 @@ def _concat_diff_input(arr, axis, prepend, append):
if isinstance(prepend, dpt.usm_ndarray):
a_prepend = prepend
else:
- a_prepend = dpt_ext.asarray(
+ a_prepend = dpt.asarray(
prepend,
dtype=prepend_dtype,
usm_type=coerced_usm_type,
@@ -369,8 +368,8 @@ def _concat_diff_input(arr, axis, prepend, append):
)
if not prepend_shape:
prepend_shape = arr_shape[:axis] + (1,) + arr_shape[axis + 1 :]
- a_prepend = dpt_ext.broadcast_to(a_prepend, prepend_shape)
- return dpt_ext.concat((a_prepend, arr), axis=axis)
+ a_prepend = dpt.broadcast_to(a_prepend, prepend_shape)
+ return dpt.concat((a_prepend, arr), axis=axis)
elif append is not None:
q1, x_usm_type = arr.sycl_queue, arr.usm_type
q2, append_usm_type = _get_queue_usm_type(append)
@@ -416,7 +415,7 @@ def _concat_diff_input(arr, axis, prepend, append):
if isinstance(append, dpt.usm_ndarray):
a_append = append
else:
- a_append = dpt_ext.asarray(
+ a_append = dpt.asarray(
append,
dtype=append_dtype,
usm_type=coerced_usm_type,
@@ -424,8 +423,8 @@ def _concat_diff_input(arr, axis, prepend, append):
)
if not append_shape:
append_shape = arr_shape[:axis] + (1,) + arr_shape[axis + 1 :]
- a_append = dpt_ext.broadcast_to(a_append, append_shape)
- return dpt_ext.concat((arr, a_append), axis=axis)
+ a_append = dpt.broadcast_to(a_append, append_shape)
+ return dpt.concat((arr, a_append), axis=axis)
else:
arr1 = arr
return arr1
@@ -489,7 +488,7 @@ def diff(x, /, *, axis=-1, n=1, prepend=None, append=None):
slice(None) if i != axis else slice(None, -1) for i in range(x_nd)
)
- diff_op = dpt_ext.not_equal if x.dtype == dpt.bool else dpt_ext.subtract
+ diff_op = dpt.not_equal if x.dtype == dpt.bool else dpt.subtract
if n > 1:
arr_tmp0 = diff_op(arr[sl0], arr[sl1])
arr_tmp1 = diff_op(arr_tmp0[sl0], arr_tmp0[sl1])
diff --git a/dpctl_ext/tensor/include/dlpack/LICENSE.third-party b/dpctl_ext/tensor/include/dlpack/LICENSE.third-party
new file mode 100644
index 000000000000..20a9c8a7b4dc
--- /dev/null
+++ b/dpctl_ext/tensor/include/dlpack/LICENSE.third-party
@@ -0,0 +1,201 @@
+ Apache License
+ Version 2.0, January 2004
+ http://www.apache.org/licenses/
+
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+ 1. Definitions.
+
+ "License" shall mean the terms and conditions for use, reproduction,
+ and distribution as defined by Sections 1 through 9 of this document.
+
+ "Licensor" shall mean the copyright owner or entity authorized by
+ the copyright owner that is granting the License.
+
+ "Legal Entity" shall mean the union of the acting entity and all
+ other entities that control, are controlled by, or are under common
+ control with that entity. For the purposes of this definition,
+ "control" means (i) the power, direct or indirect, to cause the
+ direction or management of such entity, whether by contract or
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
+ outstanding shares, or (iii) beneficial ownership of such entity.
+
+ "You" (or "Your") shall mean an individual or Legal Entity
+ exercising permissions granted by this License.
+
+ "Source" form shall mean the preferred form for making modifications,
+ including but not limited to software source code, documentation
+ source, and configuration files.
+
+ "Object" form shall mean any form resulting from mechanical
+ transformation or translation of a Source form, including but
+ not limited to compiled object code, generated documentation,
+ and conversions to other media types.
+
+ "Work" shall mean the work of authorship, whether in Source or
+ Object form, made available under the License, as indicated by a
+ copyright notice that is included in or attached to the work
+ (an example is provided in the Appendix below).
+
+ "Derivative Works" shall mean any work, whether in Source or Object
+ form, that is based on (or derived from) the Work and for which the
+ editorial revisions, annotations, elaborations, or other modifications
+ represent, as a whole, an original work of authorship. For the purposes
+ of this License, Derivative Works shall not include works that remain
+ separable from, or merely link (or bind by name) to the interfaces of,
+ the Work and Derivative Works thereof.
+
+ "Contribution" shall mean any work of authorship, including
+ the original version of the Work and any modifications or additions
+ to that Work or Derivative Works thereof, that is intentionally
+ submitted to Licensor for inclusion in the Work by the copyright owner
+ or by an individual or Legal Entity authorized to submit on behalf of
+ the copyright owner. For the purposes of this definition, "submitted"
+ means any form of electronic, verbal, or written communication sent
+ to the Licensor or its representatives, including but not limited to
+ communication on electronic mailing lists, source code control systems,
+ and issue tracking systems that are managed by, or on behalf of, the
+ Licensor for the purpose of discussing and improving the Work, but
+ excluding communication that is conspicuously marked or otherwise
+ designated in writing by the copyright owner as "Not a Contribution."
+
+ "Contributor" shall mean Licensor and any individual or Legal Entity
+ on behalf of whom a Contribution has been received by Licensor and
+ subsequently incorporated within the Work.
+
+ 2. Grant of Copyright License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ copyright license to reproduce, prepare Derivative Works of,
+ publicly display, publicly perform, sublicense, and distribute the
+ Work and such Derivative Works in Source or Object form.
+
+ 3. Grant of Patent License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ (except as stated in this section) patent license to make, have made,
+ use, offer to sell, sell, import, and otherwise transfer the Work,
+ where such license applies only to those patent claims licensable
+ by such Contributor that are necessarily infringed by their
+ Contribution(s) alone or by combination of their Contribution(s)
+ with the Work to which such Contribution(s) was submitted. If You
+ institute patent litigation against any entity (including a
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
+ or a Contribution incorporated within the Work constitutes direct
+ or contributory patent infringement, then any patent licenses
+ granted to You under this License for that Work shall terminate
+ as of the date such litigation is filed.
+
+ 4. Redistribution. You may reproduce and distribute copies of the
+ Work or Derivative Works thereof in any medium, with or without
+ modifications, and in Source or Object form, provided that You
+ meet the following conditions:
+
+ (a) You must give any other recipients of the Work or
+ Derivative Works a copy of this License; and
+
+ (b) You must cause any modified files to carry prominent notices
+ stating that You changed the files; and
+
+ (c) You must retain, in the Source form of any Derivative Works
+ that You distribute, all copyright, patent, trademark, and
+ attribution notices from the Source form of the Work,
+ excluding those notices that do not pertain to any part of
+ the Derivative Works; and
+
+ (d) If the Work includes a "NOTICE" text file as part of its
+ distribution, then any Derivative Works that You distribute must
+ include a readable copy of the attribution notices contained
+ within such NOTICE file, excluding those notices that do not
+ pertain to any part of the Derivative Works, in at least one
+ of the following places: within a NOTICE text file distributed
+ as part of the Derivative Works; within the Source form or
+ documentation, if provided along with the Derivative Works; or,
+ within a display generated by the Derivative Works, if and
+ wherever such third-party notices normally appear. The contents
+ of the NOTICE file are for informational purposes only and
+ do not modify the License. You may add Your own attribution
+ notices within Derivative Works that You distribute, alongside
+ or as an addendum to the NOTICE text from the Work, provided
+ that such additional attribution notices cannot be construed
+ as modifying the License.
+
+ You may add Your own copyright statement to Your modifications and
+ may provide additional or different license terms and conditions
+ for use, reproduction, or distribution of Your modifications, or
+ for any such Derivative Works as a whole, provided Your use,
+ reproduction, and distribution of the Work otherwise complies with
+ the conditions stated in this License.
+
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
+ any Contribution intentionally submitted for inclusion in the Work
+ by You to the Licensor shall be under the terms and conditions of
+ this License, without any additional terms or conditions.
+ Notwithstanding the above, nothing herein shall supersede or modify
+ the terms of any separate license agreement you may have executed
+ with Licensor regarding such Contributions.
+
+ 6. Trademarks. This License does not grant permission to use the trade
+ names, trademarks, service marks, or product names of the Licensor,
+ except as required for reasonable and customary use in describing the
+ origin of the Work and reproducing the content of the NOTICE file.
+
+ 7. Disclaimer of Warranty. Unless required by applicable law or
+ agreed to in writing, Licensor provides the Work (and each
+ Contributor provides its Contributions) on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ implied, including, without limitation, any warranties or conditions
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+ PARTICULAR PURPOSE. You are solely responsible for determining the
+ appropriateness of using or redistributing the Work and assume any
+ risks associated with Your exercise of permissions under this License.
+
+ 8. Limitation of Liability. In no event and under no legal theory,
+ whether in tort (including negligence), contract, or otherwise,
+ unless required by applicable law (such as deliberate and grossly
+ negligent acts) or agreed to in writing, shall any Contributor be
+ liable to You for damages, including any direct, indirect, special,
+ incidental, or consequential damages of any character arising as a
+ result of this License or out of the use or inability to use the
+ Work (including but not limited to damages for loss of goodwill,
+ work stoppage, computer failure or malfunction, or any and all
+ other commercial damages or losses), even if such Contributor
+ has been advised of the possibility of such damages.
+
+ 9. Accepting Warranty or Additional Liability. While redistributing
+ the Work or Derivative Works thereof, You may choose to offer,
+ and charge a fee for, acceptance of support, warranty, indemnity,
+ or other liability obligations and/or rights consistent with this
+ License. However, in accepting such obligations, You may act only
+ on Your own behalf and on Your sole responsibility, not on behalf
+ of any other Contributor, and only if You agree to indemnify,
+ defend, and hold each Contributor harmless for any liability
+ incurred by, or claims asserted against, such Contributor by reason
+ of your accepting any such warranty or additional liability.
+
+ END OF TERMS AND CONDITIONS
+
+ APPENDIX: How to apply the Apache License to your work.
+
+ To apply the Apache License to your work, attach the following
+ boilerplate notice, with the fields enclosed by brackets "{}"
+ replaced with your own identifying information. (Don't include
+ the brackets!) The text should be enclosed in the appropriate
+ comment syntax for the file format. We also recommend that a
+ file or class name and description of purpose be included on the
+ same "printed page" as the copyright notice for easier
+ identification within third-party archives.
+
+ Copyright 2017 by Contributors
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
diff --git a/dpctl_ext/tensor/include/dlpack/README.md b/dpctl_ext/tensor/include/dlpack/README.md
new file mode 100644
index 000000000000..3a7bc6d422cd
--- /dev/null
+++ b/dpctl_ext/tensor/include/dlpack/README.md
@@ -0,0 +1,7 @@
+# DLPack header
+
+The header `dlpack.h` downloaded from `https://github.com/dmlc/dlpack.git` remote at tag v1.0rc commit [`62100c1`](https://github.com/dmlc/dlpack/commit/62100c123144ae7a80061f4220be2dbd3cbaefc7).
+
+The file can also be viewed using github web interface at https://github.com/dmlc/dlpack/blob/62100c123144ae7a80061f4220be2dbd3cbaefc7/include/dlpack/dlpack.h
+
+License file was retrieved from https://github.com/dmlc/dlpack/blob/main/LICENSE
diff --git a/dpctl_ext/tensor/include/dlpack/dlpack.h b/dpctl_ext/tensor/include/dlpack/dlpack.h
new file mode 100644
index 000000000000..cd71e799be3c
--- /dev/null
+++ b/dpctl_ext/tensor/include/dlpack/dlpack.h
@@ -0,0 +1,675 @@
+/*!
+ * Copyright (c) 2017 - by Contributors
+ * \file dlpack.h
+ * \brief The common header of DLPack.
+ */
+#ifndef DLPACK_DLPACK_H_
+#define DLPACK_DLPACK_H_
+
+/**
+ * \brief Compatibility with C++
+ */
+#ifdef __cplusplus
+#define DLPACK_EXTERN_C extern "C"
+#else
+#define DLPACK_EXTERN_C
+#endif
+
+/*! \brief The current major version of dlpack */
+#define DLPACK_MAJOR_VERSION 1
+
+/*! \brief The current minor version of dlpack */
+#define DLPACK_MINOR_VERSION 2
+
+/*! \brief DLPACK_DLL prefix for windows */
+#ifdef _WIN32
+#ifdef DLPACK_EXPORTS
+#define DLPACK_DLL __declspec(dllexport)
+#else
+#define DLPACK_DLL __declspec(dllimport)
+#endif
+#else
+#define DLPACK_DLL
+#endif
+
+#include
+#include
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+
+ /*!
+ * \brief The DLPack version.
+ *
+ * A change in major version indicates that we have changed the
+ * data layout of the ABI - DLManagedTensorVersioned.
+ *
+ * A change in minor version indicates that we have added new
+ * code, such as a new device type, but the ABI is kept the same.
+ *
+ * If an obtained DLPack tensor has a major version that disagrees
+ * with the version number specified in this header file
+ * (i.e. major != DLPACK_MAJOR_VERSION), the consumer must call the deleter
+ * (and it is safe to do so). It is not safe to access any other fields
+ * as the memory layout will have changed.
+ *
+ * In the case of a minor version mismatch, the tensor can be safely used as
+ * long as the consumer knows how to interpret all fields. Minor version
+ * updates indicate the addition of enumeration values.
+ */
+ typedef struct
+ {
+ /*! \brief DLPack major version. */
+ uint32_t major;
+ /*! \brief DLPack minor version. */
+ uint32_t minor;
+ } DLPackVersion;
+
+/*!
+ * \brief The device type in DLDevice.
+ */
+#ifdef __cplusplus
+ typedef enum : int32_t
+ {
+#else
+typedef enum
+{
+#endif
+ /*! \brief CPU device */
+ kDLCPU = 1,
+ /*! \brief CUDA GPU device */
+ kDLCUDA = 2,
+ /*!
+ * \brief Pinned CUDA CPU memory by cudaMallocHost
+ */
+ kDLCUDAHost = 3,
+ /*! \brief OpenCL devices. */
+ kDLOpenCL = 4,
+ /*! \brief Vulkan buffer for next generation graphics. */
+ kDLVulkan = 7,
+ /*! \brief Metal for Apple GPU. */
+ kDLMetal = 8,
+ /*! \brief Verilog simulator buffer */
+ kDLVPI = 9,
+ /*! \brief ROCm GPUs for AMD GPUs */
+ kDLROCM = 10,
+ /*!
+ * \brief Pinned ROCm CPU memory allocated by hipMallocHost
+ */
+ kDLROCMHost = 11,
+ /*!
+ * \brief Reserved extension device type,
+ * used for quickly test extension device
+ * The semantics can differ depending on the implementation.
+ */
+ kDLExtDev = 12,
+ /*!
+ * \brief CUDA managed/unified memory allocated by cudaMallocManaged
+ */
+ kDLCUDAManaged = 13,
+ /*!
+ * \brief Unified shared memory allocated on a oneAPI non-partititioned
+ * device. Call to oneAPI runtime is required to determine the device
+ * type, the USM allocation type and the sycl context it is bound to.
+ *
+ */
+ kDLOneAPI = 14,
+ /*! \brief GPU support for next generation WebGPU standard. */
+ kDLWebGPU = 15,
+ /*! \brief Qualcomm Hexagon DSP */
+ kDLHexagon = 16,
+ /*! \brief Microsoft MAIA devices */
+ kDLMAIA = 17,
+ /*! \brief AWS Trainium */
+ kDLTrn = 18,
+ } DLDeviceType;
+
+ /*!
+ * \brief A Device for Tensor and operator.
+ */
+ typedef struct
+ {
+ /*! \brief The device type used in the device. */
+ DLDeviceType device_type;
+ /*!
+ * \brief The device index.
+ * For vanilla CPU memory, pinned memory, or managed memory, this is set
+ * to 0.
+ */
+ int32_t device_id;
+ } DLDevice;
+
+ /*!
+ * \brief The type code options DLDataType.
+ */
+ typedef enum
+ {
+ /*! \brief signed integer */
+ kDLInt = 0U,
+ /*! \brief unsigned integer */
+ kDLUInt = 1U,
+ /*! \brief IEEE floating point */
+ kDLFloat = 2U,
+ /*!
+ * \brief Opaque handle type, reserved for testing purposes.
+ * Frameworks need to agree on the handle data type for the exchange to
+ * be well-defined.
+ */
+ kDLOpaqueHandle = 3U,
+ /*! \brief bfloat16 */
+ kDLBfloat = 4U,
+ /*!
+ * \brief complex number
+ * (C/C++/Python layout: compact struct per complex number)
+ */
+ kDLComplex = 5U,
+ /*! \brief boolean */
+ kDLBool = 6U,
+ /*! \brief FP8 data types */
+ kDLFloat8_e3m4 = 7U,
+ kDLFloat8_e4m3 = 8U,
+ kDLFloat8_e4m3b11fnuz = 9U,
+ kDLFloat8_e4m3fn = 10U,
+ kDLFloat8_e4m3fnuz = 11U,
+ kDLFloat8_e5m2 = 12U,
+ kDLFloat8_e5m2fnuz = 13U,
+ kDLFloat8_e8m0fnu = 14U,
+ /*! \brief FP6 data types
+ * Setting bits != 6 is currently unspecified, and the producer must
+ * ensure it is set while the consumer must stop importing if the value
+ * is unexpected.
+ */
+ kDLFloat6_e2m3fn = 15U,
+ kDLFloat6_e3m2fn = 16U,
+ /*! \brief FP4 data types
+ * Setting bits != 4 is currently unspecified, and the producer must
+ * ensure it is set while the consumer must stop importing if the value
+ * is unexpected.
+ */
+ kDLFloat4_e2m1fn = 17U,
+ } DLDataTypeCode;
+
+ /*!
+ * \brief The data type the tensor can hold. The data type is assumed to
+ * follow the native endian-ness. An explicit error message should be raised
+ * when attempting to export an array with non-native endianness
+ *
+ * Examples
+ * - float: type_code = 2, bits = 32, lanes = 1
+ * - float4(vectorized 4 float): type_code = 2, bits = 32, lanes = 4
+ * - int8: type_code = 0, bits = 8, lanes = 1
+ * - std::complex: type_code = 5, bits = 64, lanes = 1
+ * - bool: type_code = 6, bits = 8, lanes = 1 (as per common array library
+ * convention, the underlying storage size of bool is 8 bits)
+ * - float8_e4m3: type_code = 8, bits = 8, lanes = 1 (packed in memory)
+ * - float6_e3m2fn: type_code = 16, bits = 6, lanes = 1 (packed in memory)
+ * - float4_e2m1fn: type_code = 17, bits = 4, lanes = 1 (packed in memory)
+ *
+ * When a sub-byte type is packed, DLPack requires the data to be in little
+ * bit-endian, i.e., for a packed data set D ((D >> (i * bits)) && bit_mask)
+ * stores the i-th element.
+ */
+ typedef struct
+ {
+ /*!
+ * \brief Type code of base types.
+ * We keep it uint8_t instead of DLDataTypeCode for minimal memory
+ * footprint, but the value should be one of DLDataTypeCode enum values.
+ * */
+ uint8_t code;
+ /*!
+ * \brief Number of bits, common choices are 8, 16, 32.
+ */
+ uint8_t bits;
+ /*! \brief Number of lanes in the type, used for vector types. */
+ uint16_t lanes;
+ } DLDataType;
+
+ /*!
+ * \brief Plain C Tensor object, does not manage memory.
+ */
+ typedef struct
+ {
+ /*!
+ * \brief The data pointer points to the allocated data. This will be
+ * CUDA device pointer or cl_mem handle in OpenCL. It may be opaque on
+ * some device types. This pointer is always aligned to 256 bytes as in
+ * CUDA. The `byte_offset` field should be used to point to the
+ * beginning of the data.
+ *
+ * Note that as of Nov 2021, multiple libraries (CuPy, PyTorch,
+ * TensorFlow, TVM, perhaps others) do not adhere to this 256 byte
+ * alignment requirement on CPU/CUDA/ROCm, and always use
+ * `byte_offset=0`. This must be fixed (after which this note will be
+ * updated); at the moment it is recommended to not rely on the data
+ * pointer being correctly aligned.
+ *
+ * For given DLTensor, the size of memory required to store the contents
+ * of data is calculated as follows:
+ *
+ * \code{.c}
+ * static inline size_t GetDataSize(const DLTensor* t) {
+ * size_t size = 1;
+ * for (tvm_index_t i = 0; i < t->ndim; ++i) {
+ * size *= t->shape[i];
+ * }
+ * size *= (t->dtype.bits * t->dtype.lanes + 7) / 8;
+ * return size;
+ * }
+ * \endcode
+ *
+ * Note that if the tensor is of size zero, then the data pointer should
+ * be set to `NULL`.
+ */
+ void *data;
+ /*! \brief The device of the tensor */
+ DLDevice device;
+ /*! \brief Number of dimensions */
+ int32_t ndim;
+ /*! \brief The data type of the pointer*/
+ DLDataType dtype;
+ /*!
+ * \brief The shape of the tensor
+ *
+ * When ndim == 0, shape can be set to NULL.
+ */
+ int64_t *shape;
+ /*!
+ * \brief strides of the tensor (in number of elements, not bytes),
+ * can not be NULL if ndim != 0, must points to
+ * an array of ndim elements that specifies the strides,
+ * so consumer can always rely on strides[dim] being valid for 0 <= dim
+ * < ndim.
+ *
+ * When ndim == 0, strides can be set to NULL.
+ *
+ * \note Before DLPack v1.2, strides can be NULL to indicate contiguous
+ * data. This is not allowed in DLPack v1.2 and later. The rationale is
+ * to simplify the consumer handling.
+ */
+ int64_t *strides;
+ /*! \brief The offset in bytes to the beginning pointer to data */
+ uint64_t byte_offset;
+ } DLTensor;
+
+ /*!
+ * \brief C Tensor object, manage memory of DLTensor. This data structure is
+ * intended to facilitate the borrowing of DLTensor by another framework.
+ * It is not meant to transfer the tensor. When the borrowing framework
+ * doesn't need the tensor, it should call the deleter to notify the host
+ * that the resource is no longer needed.
+ *
+ * \note This data structure is used as Legacy DLManagedTensor
+ * in DLPack exchange and is deprecated after DLPack v0.8
+ * Use DLManagedTensorVersioned instead.
+ * This data structure may get renamed or deleted in future versions.
+ *
+ * \sa DLManagedTensorVersioned
+ */
+ typedef struct DLManagedTensor
+ {
+ /*! \brief DLTensor which is being memory managed */
+ DLTensor dl_tensor;
+ /*! \brief the context of the original host framework of DLManagedTensor
+ * in which DLManagedTensor is used in the framework. It can also be
+ * NULL.
+ */
+ void *manager_ctx;
+ /*!
+ * \brief Destructor - this should be called
+ * to destruct the manager_ctx which backs the DLManagedTensor. It can
+ * be NULL if there is no way for the caller to provide a reasonable
+ * destructor. The destructor deletes the argument self as well.
+ */
+ void (*deleter)(struct DLManagedTensor *self);
+ } DLManagedTensor;
+
+// bit masks used in the DLManagedTensorVersioned
+
+/*! \brief bit mask to indicate that the tensor is read only. */
+#define DLPACK_FLAG_BITMASK_READ_ONLY (1UL << 0UL)
+
+/*!
+ * \brief bit mask to indicate that the tensor is a copy made by the producer.
+ *
+ * If set, the tensor is considered solely owned throughout its lifetime by the
+ * consumer, until the producer-provided deleter is invoked.
+ */
+#define DLPACK_FLAG_BITMASK_IS_COPIED (1UL << 1UL)
+
+/*!
+ * \brief bit mask to indicate that whether a sub-byte type is packed or padded.
+ *
+ * The default for sub-byte types (ex: fp4/fp6) is assumed packed. This flag can
+ * be set by the producer to signal that a tensor of sub-byte type is padded.
+ */
+#define DLPACK_FLAG_BITMASK_IS_SUBBYTE_TYPE_PADDED (1UL << 2UL)
+
+ /*!
+ * \brief A versioned and managed C Tensor object, manage memory of
+ * DLTensor.
+ *
+ * This data structure is intended to facilitate the borrowing of DLTensor
+ * by another framework. It is not meant to transfer the tensor. When the
+ * borrowing framework doesn't need the tensor, it should call the deleter
+ * to notify the host that the resource is no longer needed.
+ *
+ * \note This is the current standard DLPack exchange data structure.
+ */
+ typedef struct DLManagedTensorVersioned
+ {
+ /*!
+ * \brief The API and ABI version of the current managed Tensor
+ */
+ DLPackVersion version;
+ /*!
+ * \brief the context of the original host framework.
+ *
+ * Stores DLManagedTensorVersioned is used in the
+ * framework. It can also be NULL.
+ */
+ void *manager_ctx;
+ /*!
+ * \brief Destructor.
+ *
+ * This should be called to destruct manager_ctx which holds the
+ * DLManagedTensorVersioned. It can be NULL if there is no way for the
+ * caller to provide a reasonable destructor. The destructor deletes the
+ * argument self as well.
+ */
+ void (*deleter)(struct DLManagedTensorVersioned *self);
+ /*!
+ * \brief Additional bitmask flags information about the tensor.
+ *
+ * By default the flags should be set to 0.
+ *
+ * \note Future ABI changes should keep everything until this field
+ * stable, to ensure that deleter can be correctly called.
+ *
+ * \sa DLPACK_FLAG_BITMASK_READ_ONLY
+ * \sa DLPACK_FLAG_BITMASK_IS_COPIED
+ */
+ uint64_t flags;
+ /*! \brief DLTensor which is being memory managed */
+ DLTensor dl_tensor;
+ } DLManagedTensorVersioned;
+
+ //----------------------------------------------------------------------
+ // DLPack `__c_dlpack_exchange_api__` fast exchange protocol definitions
+ //----------------------------------------------------------------------
+ /*!
+ * \brief Request a producer library to create a new tensor.
+ *
+ * Create a new `DLManagedTensorVersioned` within the context of the
+ * producer library. The allocation is defined via the prototype DLTensor.
+ *
+ * This function is exposed by the framework through the DLPackExchangeAPI.
+ *
+ * \param prototype The prototype DLTensor. Only the dtype, ndim, shape,
+ * and device fields are used.
+ * \param out The output DLManagedTensorVersioned.
+ * \param error_ctx Context for `SetError`.
+ * \param SetError The function to set the error.
+ * \return The owning DLManagedTensorVersioned* or NULL on failure.
+ * SetError is called exactly when NULL is returned (the implementer
+ * must ensure this).
+ * \note - As a C function, must not thrown C++ exceptions.
+ * - Error propagation via SetError to avoid any direct need
+ * of Python API. Due to this `SetError` may have to ensure the GIL
+ * is held since it will presumably set a Python error.
+ *
+ * \sa DLPackExchangeAPI
+ */
+ typedef int (*DLPackManagedTensorAllocator)( //
+ DLTensor *prototype,
+ DLManagedTensorVersioned **out,
+ void *error_ctx, //
+ void (*SetError)(void *error_ctx,
+ const char *kind,
+ const char *message) //
+ );
+
+ /*!
+ * \brief Exports a PyObject* Tensor/NDArray to a DLManagedTensorVersioned.
+ *
+ * This function does not perform any stream synchronization. The consumer
+ * should query DLPackCurrentWorkStream to get the current work stream and
+ * launch kernels on it.
+ *
+ * This function is exposed by the framework through the DLPackExchangeAPI.
+ *
+ * \param py_object The Python object to convert. Must have the same type
+ * as the one the `DLPackExchangeAPI` was discovered from.
+ * \return The owning DLManagedTensorVersioned* or NULL on failure with a
+ * Python exception set. If the data cannot be described using
+ * DLPack this should be a BufferError if possible. \note - As a C function,
+ * must not thrown C++ exceptions.
+ *
+ * \sa DLPackExchangeAPI, DLPackCurrentWorkStream
+ */
+ typedef int (*DLPackManagedTensorFromPyObjectNoSync)( //
+ void *py_object, //
+ DLManagedTensorVersioned **out //
+ );
+
+ /*!
+ * \brief Exports a PyObject* Tensor/NDArray to a provided DLTensor.
+ *
+ * This function provides a faster interface for temporary, non-owning,
+ * exchange. The producer (implementer) still owns the memory of data,
+ * strides, shape. The liveness of the DLTensor and the data it views is
+ * only guaranteed until control is returned.
+ *
+ * This function currently assumes that the producer (implementer) can fill
+ * in the DLTensor shape and strides without the need for temporary
+ * allocations.
+ *
+ * This function does not perform any stream synchronization. The consumer
+ * should query DLPackCurrentWorkStream to get the current work stream and
+ * launch kernels on it.
+ *
+ * This function is exposed by the framework through the DLPackExchangeAPI.
+ *
+ * \param py_object The Python object to convert. Must have the same type
+ * as the one the `DLPackExchangeAPI` was discovered from.
+ * \param out The output DLTensor, whose space is pre-allocated on stack.
+ * \return 0 on success, -1 on failure with a Python exception set.
+ * \note - As a C function, must not thrown C++ exceptions.
+ *
+ * \sa DLPackExchangeAPI, DLPackCurrentWorkStream
+ */
+ typedef int (*DLPackDLTensorFromPyObjectNoSync)( //
+ void *py_object, //
+ DLTensor *out //
+ );
+
+ /*!
+ * \brief Obtain the current work stream of a device.
+ *
+ * Obtain the current work stream of a device from the producer framework.
+ * For example, it should map to torch.cuda.current_stream in PyTorch.
+ *
+ * When device_type is kDLCPU, the consumer do not have to query the stream
+ * and the producer can simply return NULL when queried.
+ * The consumer do not have to do anything on stream sync or setting.
+ * So CPU only framework can just provide a dummy implementation that
+ * always set out_current_stream[0] to NULL.
+ *
+ * \param device_type The device type.
+ * \param device_id The device id.
+ * \param out_current_stream The output current work stream.
+ *
+ * \return 0 on success, -1 on failure with a Python exception set.
+ * \note - As a C function, must not thrown C++ exceptions.
+ *
+ * \sa DLPackExchangeAPI
+ */
+ typedef int (*DLPackCurrentWorkStream)( //
+ DLDeviceType device_type, //
+ int32_t device_id, //
+ void **out_current_stream //
+ );
+
+ /*!
+ * \brief Imports a DLManagedTensorVersioned to a PyObject* Tensor/NDArray.
+ *
+ * Convert an owning DLManagedTensorVersioned* to the Python tensor of the
+ * producer (implementer) library with the correct type.
+ *
+ * This function does not perform any stream synchronization.
+ *
+ * This function is exposed by the framework through the DLPackExchangeAPI.
+ *
+ * \param tensor The DLManagedTensorVersioned to convert the ownership of
+ * the tensor is stolen. \param out_py_object The output Python object.
+ * \return 0 on success, -1 on failure with a Python exception set.
+ *
+ * \sa DLPackExchangeAPI
+ */
+ typedef int (*DLPackManagedTensorToPyObjectNoSync)( //
+ DLManagedTensorVersioned *tensor, //
+ void **out_py_object //
+ );
+
+ /*!
+ * \brief DLPackExchangeAPI stable header.
+ * \sa DLPackExchangeAPI
+ */
+ typedef struct DLPackExchangeAPIHeader
+ {
+ /*!
+ * \brief The provided DLPack version the consumer must check major
+ * version compatibility before using this struct.
+ */
+ DLPackVersion version;
+ /*!
+ * \brief Optional pointer to an older DLPackExchangeAPI in the chain.
+ *
+ * It must be NULL if the framework does not support older versions.
+ * If the current major version is larger than the one supported by the
+ * consumer, the consumer may walk this to find an earlier supported
+ * version.
+ *
+ * \sa DLPackExchangeAPI
+ */
+ struct DLPackExchangeAPIHeader *prev_api;
+ } DLPackExchangeAPIHeader;
+
+ /*!
+ * \brief Framework-specific function pointers table for DLPack exchange.
+ *
+ * Additionally to `__dlpack__()` we define a C function table sharable by
+ * Python implementations via `__c_dlpack_exchange_api__`.
+ * This attribute must be set on the type as a Python integer compatible
+ * with `PyLong_FromVoidPtr`/`PyLong_AsVoidPtr`.
+ *
+ * A consumer library may use a pattern such as:
+ *
+ * \code
+ *
+ * PyObject *api_obj = type(tensor_obj).__c_dlpack_exchange_api__; // as
+ * C-code MyDLPackExchangeAPI *api = PyLong_AsVoidPtr(api_obj); if (api ==
+ * NULL && PyErr_Occurred()) { goto handle_error; }
+ *
+ * \endcode
+ *
+ * Note that this must be defined on the type. The consumer should look up
+ * the attribute on the type and may cache the result for each unique type.
+ *
+ * The precise API table is given by:
+ * \code
+ * struct MyDLPackExchangeAPI : public DLPackExchangeAPI {
+ * MyDLPackExchangeAPI() {
+ * header.version.major = DLPACK_MAJOR_VERSION;
+ * header.version.minor = DLPACK_MINOR_VERSION;
+ * header.prev_version_api = nullptr;
+ *
+ * managed_tensor_allocator = MyDLPackManagedTensorAllocator;
+ * managed_tensor_from_py_object_no_sync =
+ * MyDLPackManagedTensorFromPyObjectNoSync;
+ * managed_tensor_to_py_object_no_sync =
+ * MyDLPackManagedTensorToPyObjectNoSync; dltensor_from_py_object_no_sync =
+ * MyDLPackDLTensorFromPyObjectNoSync; current_work_stream =
+ * MyDLPackCurrentWorkStream;
+ * }
+ *
+ * static const DLPackExchangeAPI* Global() {
+ * static MyDLPackExchangeAPI inst;
+ * return &inst;
+ * }
+ * };
+ * \endcode
+ *
+ * Guidelines for leveraging DLPackExchangeAPI:
+ *
+ * There are generally two kinds of consumer needs for DLPack exchange:
+ * - N0: library support, where consumer.kernel(x, y, z) would like to run a
+ * kernel with the data from x, y, z. The consumer is also expected to run
+ * the kernel with the same stream context as the producer. For example,
+ * when x, y, z is torch.Tensor, consumer should query
+ * exchange_api->current_work_stream to get the current stream and launch
+ * the kernel with the same stream. This setup is necessary for no
+ * synchronization in kernel launch and maximum compatibility with CUDA
+ * graph capture in the producer. This is the desirable behavior for library
+ * extension support for frameworks like PyTorch.
+ * - N1: data ingestion and retention
+ *
+ * Note that obj.__dlpack__() API should provide useful ways for N1.
+ * The primary focus of the current DLPackExchangeAPI is to enable faster
+ * exchange N0 with the support of the function pointer current_work_stream.
+ *
+ * Array/Tensor libraries should statically create and initialize this
+ * structure then return a pointer to DLPackExchangeAPI as an int value in
+ * Tensor/Array. The DLPackExchangeAPI* must stay alive throughout the
+ * lifetime of the process.
+ *
+ * One simple way to do so is to create a static instance of
+ * DLPackExchangeAPI within the framework and return a pointer to it. The
+ * following code shows an example to do so in C++. It should also be
+ * reasonably easy to do so in other languages.
+ */
+ typedef struct DLPackExchangeAPI
+ {
+ /*!
+ * \brief The header that remains stable across versions.
+ */
+ DLPackExchangeAPIHeader header;
+ /*!
+ * \brief Producer function pointer for DLPackManagedTensorAllocator
+ * This function must not be NULL.
+ * \sa DLPackManagedTensorAllocator
+ */
+ DLPackManagedTensorAllocator managed_tensor_allocator;
+ /*!
+ * \brief Producer function pointer for DLPackManagedTensorFromPyObject
+ * This function must be not NULL.
+ * \sa DLPackManagedTensorFromPyObject
+ */
+ DLPackManagedTensorFromPyObjectNoSync
+ managed_tensor_from_py_object_no_sync;
+ /*!
+ * \brief Producer function pointer for DLPackManagedTensorToPyObject
+ * This function must be not NULL.
+ * \sa DLPackManagedTensorToPyObject
+ */
+ DLPackManagedTensorToPyObjectNoSync managed_tensor_to_py_object_no_sync;
+ /*!
+ * \brief Producer function pointer for DLPackDLTensorFromPyObject
+ * This function can be NULL when the producer does not support
+ * this function. \sa DLPackDLTensorFromPyObjectNoSync
+ */
+ DLPackDLTensorFromPyObjectNoSync dltensor_from_py_object_no_sync;
+ /*!
+ * \brief Producer function pointer for DLPackCurrentWorkStream
+ * This function must be not NULL.
+ * \sa DLPackCurrentWorkStream
+ */
+ DLPackCurrentWorkStream current_work_stream;
+ } DLPackExchangeAPI;
+
+#ifdef __cplusplus
+} // DLPACK_EXTERN_C
+#endif
+#endif // DLPACK_DLPACK_H_
diff --git a/dpnp/__init__.py b/dpnp/__init__.py
index 02420107972f..0d5c79b9a671 100644
--- a/dpnp/__init__.py
+++ b/dpnp/__init__.py
@@ -64,7 +64,7 @@
# Borrowed from DPCTL
with warnings.catch_warnings():
warnings.simplefilter("ignore", DeprecationWarning)
- from dpctl.tensor import __array_api_version__, DLDeviceType
+ from dpctl_ext.tensor import __array_api_version__, DLDeviceType
from .dpnp_array import dpnp_array as ndarray
from .dpnp_array_api_info import __array_namespace_info__
diff --git a/dpnp/backend/extensions/blas/CMakeLists.txt b/dpnp/backend/extensions/blas/CMakeLists.txt
index 69a99b996d97..2dce27001bbd 100644
--- a/dpnp/backend/extensions/blas/CMakeLists.txt
+++ b/dpnp/backend/extensions/blas/CMakeLists.txt
@@ -39,6 +39,8 @@ set(_module_src
pybind11_add_module(${python_module_name} MODULE ${_module_src})
add_sycl_to_target(TARGET ${python_module_name} SOURCES ${_module_src})
+target_link_libraries(${python_module_name} PRIVATE DpctlExtCAPI)
+
if(_dpnp_sycl_targets)
# make fat binary
target_compile_options(
diff --git a/dpnp/backend/extensions/fft/CMakeLists.txt b/dpnp/backend/extensions/fft/CMakeLists.txt
index 8a96d8cbd25a..bfebe1ed4226 100644
--- a/dpnp/backend/extensions/fft/CMakeLists.txt
+++ b/dpnp/backend/extensions/fft/CMakeLists.txt
@@ -33,6 +33,8 @@ set(_module_src ${CMAKE_CURRENT_SOURCE_DIR}/fft_py.cpp)
pybind11_add_module(${python_module_name} MODULE ${_module_src})
add_sycl_to_target(TARGET ${python_module_name} SOURCES ${_module_src})
+target_link_libraries(${python_module_name} PRIVATE DpctlExtCAPI)
+
if(_dpnp_sycl_targets)
# make fat binary
target_compile_options(
diff --git a/dpnp/backend/extensions/indexing/CMakeLists.txt b/dpnp/backend/extensions/indexing/CMakeLists.txt
index 373c6152f662..7729e2807a4d 100644
--- a/dpnp/backend/extensions/indexing/CMakeLists.txt
+++ b/dpnp/backend/extensions/indexing/CMakeLists.txt
@@ -36,6 +36,8 @@ set(_module_src
pybind11_add_module(${python_module_name} MODULE ${_module_src})
add_sycl_to_target(TARGET ${python_module_name} SOURCES ${_module_src})
+target_link_libraries(${python_module_name} PRIVATE DpctlExtCAPI)
+
if(_dpnp_sycl_targets)
# make fat binary
target_compile_options(
diff --git a/dpnp/backend/extensions/lapack/CMakeLists.txt b/dpnp/backend/extensions/lapack/CMakeLists.txt
index 2bac0932a673..a3ee4bae8ee5 100644
--- a/dpnp/backend/extensions/lapack/CMakeLists.txt
+++ b/dpnp/backend/extensions/lapack/CMakeLists.txt
@@ -55,6 +55,7 @@ set(_module_src
pybind11_add_module(${python_module_name} MODULE ${_module_src})
add_sycl_to_target(TARGET ${python_module_name} SOURCES ${_module_src})
+target_link_libraries(${python_module_name} PRIVATE DpctlExtCAPI)
if(_dpnp_sycl_targets)
# make fat binary
diff --git a/dpnp/backend/extensions/statistics/CMakeLists.txt b/dpnp/backend/extensions/statistics/CMakeLists.txt
index 60d26295acf8..88b3f185e6f6 100644
--- a/dpnp/backend/extensions/statistics/CMakeLists.txt
+++ b/dpnp/backend/extensions/statistics/CMakeLists.txt
@@ -41,6 +41,8 @@ set(_module_src
pybind11_add_module(${python_module_name} MODULE ${_module_src})
add_sycl_to_target(TARGET ${python_module_name} SOURCES ${_module_src})
+target_link_libraries(${python_module_name} PRIVATE DpctlExtCAPI)
+
if(_dpnp_sycl_targets)
# make fat binary
target_compile_options(
diff --git a/dpnp/backend/extensions/ufunc/CMakeLists.txt b/dpnp/backend/extensions/ufunc/CMakeLists.txt
index 45d2706fb48d..d954316dcb2a 100644
--- a/dpnp/backend/extensions/ufunc/CMakeLists.txt
+++ b/dpnp/backend/extensions/ufunc/CMakeLists.txt
@@ -67,6 +67,8 @@ set(_module_src
pybind11_add_module(${python_module_name} MODULE ${_module_src})
add_sycl_to_target(TARGET ${python_module_name} SOURCES ${_module_src})
+target_link_libraries(${python_module_name} PRIVATE DpctlExtCAPI)
+
if(WIN32)
if(${CMAKE_VERSION} VERSION_LESS "3.27")
# this is a work-around for target_link_options inserting option after -link option, cause
diff --git a/dpnp/backend/extensions/vm/CMakeLists.txt b/dpnp/backend/extensions/vm/CMakeLists.txt
index 32f7d4281c2f..0d69c4e79c03 100644
--- a/dpnp/backend/extensions/vm/CMakeLists.txt
+++ b/dpnp/backend/extensions/vm/CMakeLists.txt
@@ -90,6 +90,8 @@ set(python_module_name _vm_impl)
pybind11_add_module(${python_module_name} MODULE ${_module_src})
add_sycl_to_target(TARGET ${python_module_name} SOURCES ${_module_src})
+target_link_libraries(${python_module_name} PRIVATE DpctlExtCAPI)
+
if(WIN32)
if(${CMAKE_VERSION} VERSION_LESS "3.27")
# this is a work-around for target_link_options inserting option after -link option, cause
diff --git a/dpnp/backend/extensions/window/CMakeLists.txt b/dpnp/backend/extensions/window/CMakeLists.txt
index 5b7921ad324c..c8cbd7c03bbc 100644
--- a/dpnp/backend/extensions/window/CMakeLists.txt
+++ b/dpnp/backend/extensions/window/CMakeLists.txt
@@ -36,6 +36,8 @@ set(_module_src
pybind11_add_module(${python_module_name} MODULE ${_module_src})
add_sycl_to_target(TARGET ${python_module_name} SOURCES ${_module_src})
+target_link_libraries(${python_module_name} PRIVATE DpctlExtCAPI)
+
if(_dpnp_sycl_targets)
# make fat binary
target_compile_options(
diff --git a/dpnp/backend/include/dpnp4pybind11.hpp b/dpnp/backend/include/dpnp4pybind11.hpp
index cd287989bef2..af2f5f866eba 100644
--- a/dpnp/backend/include/dpnp4pybind11.hpp
+++ b/dpnp/backend/include/dpnp4pybind11.hpp
@@ -28,7 +28,66 @@
#pragma once
-#include "dpctl_capi.h"
+// TODO: Enable dpctl_capi.h once dpctl.tensor is removed.
+// Also call `import_dpctl_ext__tensor___usmarray();` right after
+// `import_dpctl()` (line 334) to initialize the dpctl_ext tensor C-API.
+//
+// Now we include dpctl C-API headers explicitly in order to
+// integrate dpctl_ext tensor C-API.
+
+// #include "dpctl_capi.h"
+
+// clang-format off
+// Ordering of includes is important here. dpctl_sycl_types and
+// dpctl_sycl_extension_interface define types used by dpctl's Python
+// C-API headers.
+#include "syclinterface/dpctl_sycl_types.h"
+#include "syclinterface/dpctl_sycl_extension_interface.h"
+#ifdef __cplusplus
+#define CYTHON_EXTERN_C extern "C"
+#else
+#define CYTHON_EXTERN_C
+#endif
+#include "dpctl/_sycl_device.h"
+#include "dpctl/_sycl_device_api.h"
+#include "dpctl/_sycl_context.h"
+#include "dpctl/_sycl_context_api.h"
+#include "dpctl/_sycl_event.h"
+#include "dpctl/_sycl_event_api.h"
+#include "dpctl/_sycl_queue.h"
+#include "dpctl/_sycl_queue_api.h"
+#include "dpctl/memory/_memory.h"
+#include "dpctl/memory/_memory_api.h"
+#include "dpctl/program/_program.h"
+#include "dpctl/program/_program_api.h"
+
+// clang-format on
+
+// TODO: Keep these includes once `dpctl.tensor` is removed from dpctl,
+// but replace the hardcoded relative path with a proper include pathы
+#include
+#include
+
+/*
+ * Function to import dpctl and make C-API functions available.
+ * C functions can use dpctl's C-API functions without linking to
+ * shared objects defining this symbols, if they call `import_dpctl()`
+ * prior to using those symbols.
+ *
+ * It is declared inline to allow multiple definitions in
+ * different translation units
+ */
+static inline void import_dpctl(void)
+{
+ import_dpctl___sycl_device();
+ import_dpctl___sycl_context();
+ import_dpctl___sycl_event();
+ import_dpctl___sycl_queue();
+ import_dpctl__memory___memory();
+ import_dpctl_ext__tensor___usmarray();
+ import_dpctl__program___program();
+ return;
+}
#include
#include // for std::size_t for C++ linkage
@@ -410,8 +469,10 @@ class dpctl_capi
default_usm_memory_ = std::shared_ptr(
new py::object{py_default_usm_memory}, Deleter{});
+ // TODO: revert to `py::module_::import("dpctl.tensor._usmarray");`
+ // when dpnp fully migrates dpctl/tensor
py::module_ mod_usmarray =
- py::module_::import("dpctl.tensor._usmarray");
+ py::module_::import("dpctl_ext.tensor._usmarray");
auto tensor_kl = mod_usmarray.attr("usm_ndarray");
const py::object &py_default_usm_ndarray =
diff --git a/dpnp/dpnp_algo/dpnp_arraycreation.py b/dpnp/dpnp_algo/dpnp_arraycreation.py
index 4e2ee8531a18..fb277dd4d310 100644
--- a/dpnp/dpnp_algo/dpnp_arraycreation.py
+++ b/dpnp/dpnp_algo/dpnp_arraycreation.py
@@ -29,13 +29,12 @@
import math
import operator
-import dpctl.tensor as dpt
import dpctl.utils as dpu
import numpy
# TODO: revert to `import dpctl.tensor...`
# when dpnp fully migrates dpctl/tensor
-import dpctl_ext.tensor as dpt_ext
+import dpctl_ext.tensor as dpt
import dpnp
from dpnp.dpnp_array import dpnp_array
from dpnp.dpnp_utils import get_usm_allocations, map_dtype_to_device
@@ -53,7 +52,7 @@ def _as_usm_ndarray(a, usm_type, sycl_queue):
if isinstance(a, dpnp_array):
a = a.get_array()
- return dpt_ext.asarray(a, usm_type=usm_type, sycl_queue=sycl_queue)
+ return dpt.asarray(a, usm_type=usm_type, sycl_queue=sycl_queue)
def _check_has_zero_val(a):
@@ -196,7 +195,7 @@ def dpnp_linspace(
if dpnp.isscalar(start) and dpnp.isscalar(stop):
# Call linspace() function for scalars.
- usm_res = dpt_ext.linspace(
+ usm_res = dpt.linspace(
start,
stop,
num,
@@ -213,19 +212,19 @@ def dpnp_linspace(
else:
step = dpnp.nan
else:
- usm_start = dpt_ext.asarray(
+ usm_start = dpt.asarray(
start,
dtype=dt,
usm_type=_usm_type,
sycl_queue=sycl_queue_normalized,
)
- usm_stop = dpt_ext.asarray(
+ usm_stop = dpt.asarray(
stop, dtype=dt, usm_type=_usm_type, sycl_queue=sycl_queue_normalized
)
delta = usm_stop - usm_start
- usm_res = dpt_ext.arange(
+ usm_res = dpt.arange(
0,
stop=num,
step=1,
@@ -233,9 +232,7 @@ def dpnp_linspace(
usm_type=_usm_type,
sycl_queue=sycl_queue_normalized,
)
- usm_res = dpt_ext.reshape(
- usm_res, (-1,) + (1,) * delta.ndim, copy=False
- )
+ usm_res = dpt.reshape(usm_res, (-1,) + (1,) * delta.ndim, copy=False)
if step_num > 0:
step = delta / step_num
@@ -243,7 +240,7 @@ def dpnp_linspace(
# Needed a special handling for denormal numbers (when step == 0),
# see numpy#5437 for more details.
# Note, dpt.where() is used to avoid a synchronization branch.
- usm_res = dpt_ext.where(
+ usm_res = dpt.where(
step == 0, (usm_res / step_num) * delta, usm_res * step
)
else:
@@ -256,17 +253,17 @@ def dpnp_linspace(
usm_res[-1, ...] = usm_stop
if axis != 0:
- usm_res = dpt_ext.moveaxis(usm_res, 0, axis)
+ usm_res = dpt.moveaxis(usm_res, 0, axis)
if dpnp.issubdtype(dtype, dpnp.integer):
dpt.floor(usm_res, out=usm_res)
- res = dpt_ext.astype(usm_res, dtype, copy=False)
+ res = dpt.astype(usm_res, dtype, copy=False)
res = dpnp_array._create_from_usm_ndarray(res)
if retstep is True:
if dpnp.isscalar(step):
- step = dpt_ext.asarray(
+ step = dpt.asarray(
step, usm_type=res.usm_type, sycl_queue=res.sycl_queue
)
return res, dpnp_array._create_from_usm_ndarray(step)
diff --git a/dpnp/dpnp_algo/dpnp_elementwise_common.py b/dpnp/dpnp_algo/dpnp_elementwise_common.py
index d7eeccf78489..271013b58090 100644
--- a/dpnp/dpnp_algo/dpnp_elementwise_common.py
+++ b/dpnp/dpnp_algo/dpnp_elementwise_common.py
@@ -29,28 +29,27 @@
import warnings
from functools import wraps
-import dpctl.tensor as dpt
-import dpctl.tensor._type_utils as dtu
import dpctl.utils as dpu
import numpy
-from dpctl.tensor._elementwise_common import (
- BinaryElementwiseFunc,
- UnaryElementwiseFunc,
-)
-from dpctl.tensor._scalar_utils import (
- _get_dtype,
- _get_shape,
- _validate_dtype,
-)
# pylint: disable=no-name-in-module
# TODO: revert to `import dpctl.tensor...`
# when dpnp fully migrates dpctl/tensor
-import dpctl_ext.tensor as dpt_ext
+import dpctl_ext.tensor as dpt
import dpctl_ext.tensor._copy_utils as dtc
import dpctl_ext.tensor._tensor_impl as dti
+import dpctl_ext.tensor._type_utils as dtu
import dpnp
import dpnp.backend.extensions.vm._vm_impl as vmi
+from dpctl_ext.tensor._elementwise_common import (
+ BinaryElementwiseFunc,
+ UnaryElementwiseFunc,
+)
+from dpctl_ext.tensor._scalar_utils import (
+ _get_dtype,
+ _get_shape,
+ _validate_dtype,
+)
from dpnp.dpnp_array import dpnp_array
from dpnp.dpnp_utils import get_usm_allocations
from dpnp.dpnp_utils.dpnp_utils_common import (
@@ -213,7 +212,7 @@ def __call__(
x_usm = dpnp.get_usm_ndarray(x)
if dtype is not None:
- x_usm = dpt_ext.astype(x_usm, dtype, copy=False)
+ x_usm = dpt.astype(x_usm, dtype, copy=False)
out = self._unpack_out_kw(out)
out_usm = None if out is None else dpnp.get_usm_ndarray(out)
@@ -467,7 +466,7 @@ def __call__(
)
# Allocate a temporary buffer with the required dtype
- out[i] = dpt_ext.empty_like(res, dtype=res_dt)
+ out[i] = dpt.empty_like(res, dtype=res_dt)
elif (
buf_dt is None
and dti._array_overlap(x, res)
@@ -476,7 +475,7 @@ def __call__(
# Allocate a temporary buffer to avoid memory overlapping.
# Note if `buf_dt` is not None, a temporary copy of `x` will be
# created, so the array overlap check isn't needed.
- out[i] = dpt_ext.empty_like(res)
+ out[i] = dpt.empty_like(res)
_manager = dpu.SequentialOrderManager[exec_q]
dep_evs = _manager.submitted_events
@@ -486,7 +485,7 @@ def __call__(
if order == "K":
buf = dtc._empty_like_orderK(x, buf_dt)
else:
- buf = dpt_ext.empty_like(x, dtype=buf_dt, order=order)
+ buf = dpt.empty_like(x, dtype=buf_dt, order=order)
ht_copy_ev, copy_ev = dti._copy_usm_ndarray_into_usm_ndarray(
src=x, dst=buf, sycl_queue=exec_q, depends=dep_evs
@@ -503,7 +502,7 @@ def __call__(
if order == "K":
out[i] = dtc._empty_like_orderK(x, res_dt)
else:
- out[i] = dpt_ext.empty_like(x, dtype=res_dt, order=order)
+ out[i] = dpt.empty_like(x, dtype=res_dt, order=order)
# Call the unary function with input and output arrays
ht_unary_ev, unary_ev = self.get_implementation_function()(
@@ -713,24 +712,24 @@ def __call__(
if dtype is not None:
if dpnp.isscalar(x1):
- x1_usm = dpt_ext.asarray(
+ x1_usm = dpt.asarray(
x1,
dtype=dtype,
sycl_queue=x2.sycl_queue,
usm_type=x2.usm_type,
)
- x2_usm = dpt_ext.astype(x2_usm, dtype, copy=False)
+ x2_usm = dpt.astype(x2_usm, dtype, copy=False)
elif dpnp.isscalar(x2):
- x1_usm = dpt_ext.astype(x1_usm, dtype, copy=False)
- x2_usm = dpt_ext.asarray(
+ x1_usm = dpt.astype(x1_usm, dtype, copy=False)
+ x2_usm = dpt.asarray(
x2,
dtype=dtype,
sycl_queue=x1.sycl_queue,
usm_type=x1.usm_type,
)
else:
- x1_usm = dpt_ext.astype(x1_usm, dtype, copy=False)
- x2_usm = dpt_ext.astype(x2_usm, dtype, copy=False)
+ x1_usm = dpt.astype(x1_usm, dtype, copy=False)
+ x2_usm = dpt.astype(x2_usm, dtype, copy=False)
res_usm = super().__call__(x1_usm, x2_usm, out=out_usm, order=order)
@@ -1078,7 +1077,7 @@ def __call__(
)
# Allocate a temporary buffer with the required dtype
- out[i] = dpt_ext.empty_like(res, dtype=res_dt)
+ out[i] = dpt.empty_like(res, dtype=res_dt)
else:
# If `dt` is not None, a temporary copy of `x` will be created,
# so the array overlap check isn't needed.
@@ -1094,7 +1093,7 @@ def __call__(
for x in x_to_check
):
# allocate a temporary buffer to avoid memory overlapping
- out[i] = dpt_ext.empty_like(res)
+ out[i] = dpt.empty_like(res)
x1 = dpnp.as_usm_ndarray(x1, dtype=x1_dt, sycl_queue=exec_q)
x2 = dpnp.as_usm_ndarray(x2, dtype=x2_dt, sycl_queue=exec_q)
@@ -1127,7 +1126,7 @@ def __call__(
if order == "K":
buf = dtc._empty_like_orderK(x, buf_dt)
else:
- buf = dpt_ext.empty_like(x, dtype=buf_dt, order=order)
+ buf = dpt.empty_like(x, dtype=buf_dt, order=order)
ht_copy_ev, copy_ev = dti._copy_usm_ndarray_into_usm_ndarray(
src=x, dst=buf, sycl_queue=exec_q, depends=dep_evs
@@ -1146,7 +1145,7 @@ def __call__(
x1, x2, res_dt, res_shape, res_usm_type, exec_q
)
else:
- out[i] = dpt_ext.empty(
+ out[i] = dpt.empty(
res_shape,
dtype=res_dt,
order=order,
@@ -1156,9 +1155,9 @@ def __call__(
# Broadcast shapes of input arrays
if x1.shape != res_shape:
- x1 = dpt_ext.broadcast_to(x1, res_shape)
+ x1 = dpt.broadcast_to(x1, res_shape)
if x2.shape != res_shape:
- x2 = dpt_ext.broadcast_to(x2, res_shape)
+ x2 = dpt.broadcast_to(x2, res_shape)
# Call the binary function with input and output arrays
ht_binary_ev, binary_ev = self.get_implementation_function()(
@@ -1326,7 +1325,7 @@ def __call__(self, x, /, decimals=0, out=None, *, dtype=None):
res_usm = dpt.divide(x_usm, 10**decimals, out=out_usm)
if dtype is not None:
- res_usm = dpt_ext.astype(res_usm, dtype, copy=False)
+ res_usm = dpt.astype(res_usm, dtype, copy=False)
if out is not None and isinstance(out, dpnp_array):
return out
diff --git a/dpnp/dpnp_array.py b/dpnp/dpnp_array.py
index 6418302d6e7b..cbb5835bbfc4 100644
--- a/dpnp/dpnp_array.py
+++ b/dpnp/dpnp_array.py
@@ -37,11 +37,9 @@
import warnings
-import dpctl.tensor as dpt
-
# TODO: revert to `import dpctl.tensor...`
# when dpnp fully migrates dpctl/tensor
-import dpctl_ext.tensor as dpt_ext
+import dpctl_ext.tensor as dpt
import dpctl_ext.tensor._type_utils as dtu
import dpnp
from dpctl_ext.tensor._numpy_helper import AxisError
@@ -777,7 +775,7 @@ def asnumpy(self):
"""
- return dpt_ext.asnumpy(self._array_obj)
+ return dpt.asnumpy(self._array_obj)
def astype(
self,
@@ -2283,7 +2281,7 @@ def transpose(self, *axes):
# self.transpose(None).shape == self.shape[::-1]
axes = tuple((ndim - x - 1) for x in range(ndim))
- usm_res = dpt_ext.permute_dims(self._array_obj, axes)
+ usm_res = dpt.permute_dims(self._array_obj, axes)
return dpnp_array._create_from_usm_ndarray(usm_res)
def var(
diff --git a/dpnp/dpnp_array_api_info.py b/dpnp/dpnp_array_api_info.py
index 6a3939d046b0..f792600cbb66 100644
--- a/dpnp/dpnp_array_api_info.py
+++ b/dpnp/dpnp_array_api_info.py
@@ -36,7 +36,9 @@
"""
-import dpctl.tensor as dpt
+# TODO: revert to `import dpctl.tensor...`
+# when dpnp fully migrates dpctl/tensor
+import dpctl_ext.tensor as dpt
def __array_namespace_info__():
diff --git a/dpnp/dpnp_iface.py b/dpnp/dpnp_iface.py
index 9fca083a6413..13b957ffff8f 100644
--- a/dpnp/dpnp_iface.py
+++ b/dpnp/dpnp_iface.py
@@ -45,17 +45,16 @@
import os
import dpctl
-import dpctl.tensor as dpt
import dpctl.utils as dpu
import numpy
-from dpctl.tensor._device import normalize_queue_device
# pylint: disable=no-name-in-module
# TODO: revert to `import dpctl.tensor...`
# when dpnp fully migrates dpctl/tensor
-import dpctl_ext.tensor as dpt_ext
+import dpctl_ext.tensor as dpt
import dpctl_ext.tensor._tensor_impl as ti
import dpnp
+from dpctl_ext.tensor._device import normalize_queue_device
from .dpnp_array import dpnp_array
from .dpnp_utils import (
@@ -137,7 +136,7 @@ def asnumpy(a, order="C"):
return a.asnumpy()
if isinstance(a, dpt.usm_ndarray):
- return dpt_ext.asnumpy(a)
+ return dpt.asnumpy(a)
return numpy.asarray(a, order=order)
@@ -191,7 +190,7 @@ def as_usm_ndarray(a, dtype=None, device=None, usm_type=None, sycl_queue=None):
if is_supported_array_type(a):
return get_usm_ndarray(a)
- return dpt_ext.asarray(
+ return dpt.asarray(
a, dtype=dtype, device=device, usm_type=usm_type, sycl_queue=sycl_queue
)
diff --git a/dpnp/dpnp_iface_arraycreation.py b/dpnp/dpnp_iface_arraycreation.py
index d09cc17bde79..2800df0b2ac8 100644
--- a/dpnp/dpnp_iface_arraycreation.py
+++ b/dpnp/dpnp_iface_arraycreation.py
@@ -43,12 +43,11 @@
import operator
-import dpctl.tensor as dpt
import numpy
# TODO: revert to `import dpctl.tensor...`
# when dpnp fully migrates dpctl/tensor
-import dpctl_ext.tensor as dpt_ext
+import dpctl_ext.tensor as dpt
import dpnp
from dpnp import dpnp_container
@@ -937,7 +936,7 @@ def astype(x, dtype, /, *, order="K", casting="unsafe", copy=True, device=None):
order = "K"
usm_x = dpnp.get_usm_ndarray(x)
- usm_res = dpt_ext.astype(
+ usm_res = dpt.astype(
usm_x, dtype, order=order, casting=casting, copy=copy, device=device
)
@@ -3119,7 +3118,7 @@ def meshgrid(*xi, copy=True, sparse=False, indexing="xy"):
s0 = (1,) * ndim
output = [
- dpt_ext.reshape(dpnp.get_usm_ndarray(x), s0[:i] + (-1,) + s0[i + 1 :])
+ dpt.reshape(dpnp.get_usm_ndarray(x), s0[:i] + (-1,) + s0[i + 1 :])
for i, x in enumerate(xi)
]
@@ -3127,14 +3126,14 @@ def meshgrid(*xi, copy=True, sparse=False, indexing="xy"):
_, _ = get_usm_allocations(output)
if indexing == "xy" and ndim > 1:
- output[0] = dpt_ext.reshape(output[0], (1, -1) + s0[2:])
- output[1] = dpt_ext.reshape(output[1], (-1, 1) + s0[2:])
+ output[0] = dpt.reshape(output[0], (1, -1) + s0[2:])
+ output[1] = dpt.reshape(output[1], (-1, 1) + s0[2:])
if not sparse:
- output = dpt_ext.broadcast_arrays(*output)
+ output = dpt.broadcast_arrays(*output)
if copy:
- output = [dpt_ext.copy(x) for x in output]
+ output = [dpt.copy(x) for x in output]
return [dpnp_array._create_from_usm_ndarray(x) for x in output]
@@ -3696,7 +3695,7 @@ def tri(
if usm_type is None:
usm_type = "device"
- m = dpt_ext.ones(
+ m = dpt.ones(
(N, M),
dtype=_dtype,
device=device,
@@ -3912,7 +3911,7 @@ def vander(
if dpnp.is_supported_array_type(x):
x = dpnp.get_usm_ndarray(x)
- usm_x = dpt_ext.asarray(
+ usm_x = dpt.asarray(
x, device=device, usm_type=usm_type, sycl_queue=sycl_queue
)
@@ -3934,8 +3933,8 @@ def vander(
tmp = m[:, ::-1] if not increasing else m
dpnp.power(
- dpt_ext.reshape(usm_x, (-1, 1)),
- dpt_ext.arange(
+ dpt.reshape(usm_x, (-1, 1)),
+ dpt.arange(
N, dtype=_dtype, usm_type=x_usm_type, sycl_queue=x_sycl_queue
),
out=tmp,
diff --git a/dpnp/dpnp_iface_indexing.py b/dpnp/dpnp_iface_indexing.py
index a52196e9e4db..4b8fb7bb6a38 100644
--- a/dpnp/dpnp_iface_indexing.py
+++ b/dpnp/dpnp_iface_indexing.py
@@ -44,14 +44,13 @@
import operator
from collections.abc import Iterable
-import dpctl.tensor as dpt
import dpctl.utils as dpu
import numpy
# pylint: disable=no-name-in-module
# TODO: revert to `import dpctl.tensor...`
# when dpnp fully migrates dpctl/tensor
-import dpctl_ext.tensor as dpt_ext
+import dpctl_ext.tensor as dpt
import dpctl_ext.tensor._tensor_impl as ti
import dpnp
@@ -141,9 +140,9 @@ def _choose_run(inds, chcs, q, usm_type, out=None, mode=0):
ti._array_overlap(out, chc) for chc in chcs
):
# Allocate a temporary buffer to avoid memory overlapping.
- out = dpt_ext.empty_like(out)
+ out = dpt.empty_like(out)
else:
- out = dpt_ext.empty(
+ out = dpt.empty(
inds.shape, dtype=chcs[0].dtype, usm_type=usm_type, sycl_queue=q
)
@@ -242,7 +241,7 @@ def choose(a, choices, out=None, mode="wrap"):
# NumPy will cast up to int64 in general but
# int32 is more than safe for bool
if ind_dt == dpnp.bool:
- inds = dpt_ext.astype(inds, dpt.int32)
+ inds = dpt.astype(inds, dpt.int32)
else:
raise TypeError("input index array must be of integer data type")
@@ -250,17 +249,17 @@ def choose(a, choices, out=None, mode="wrap"):
res_usm_type, exec_q = get_usm_allocations(choices + [inds])
# apply type promotion to input choices
- res_dt = dpt_ext.result_type(*choices)
+ res_dt = dpt.result_type(*choices)
if len(choices) > 1:
choices = tuple(
map(
lambda chc: (
- chc if chc.dtype == res_dt else dpt_ext.astype(chc, res_dt)
+ chc if chc.dtype == res_dt else dpt.astype(chc, res_dt)
),
choices,
)
)
- arrs_broadcast = dpt_ext.broadcast_arrays(inds, *choices)
+ arrs_broadcast = dpt.broadcast_arrays(inds, *choices)
inds = arrs_broadcast[0]
choices = tuple(arrs_broadcast[1:])
@@ -301,11 +300,9 @@ def _take_index(x, inds, axis, q, usm_type, out=None, mode=0):
if ti._array_overlap(x, out):
# Allocate a temporary buffer to avoid memory overlapping.
- out = dpt_ext.empty_like(out)
+ out = dpt.empty_like(out)
else:
- out = dpt_ext.empty(
- res_sh, dtype=x.dtype, usm_type=usm_type, sycl_queue=q
- )
+ out = dpt.empty(res_sh, dtype=x.dtype, usm_type=usm_type, sycl_queue=q)
_manager = dpu.SequentialOrderManager[q]
dep_evs = _manager.submitted_events
@@ -816,16 +813,16 @@ def extract(condition, a):
)
if usm_cond.size != usm_a.size:
- usm_a = dpt_ext.reshape(usm_a, -1)
- usm_cond = dpt_ext.reshape(usm_cond, -1)
+ usm_a = dpt.reshape(usm_a, -1)
+ usm_cond = dpt.reshape(usm_cond, -1)
- usm_res = dpt_ext.take(usm_a, dpt_ext.nonzero(usm_cond)[0])
+ usm_res = dpt.take(usm_a, dpt.nonzero(usm_cond)[0])
else:
if usm_cond.shape != usm_a.shape:
- usm_a = dpt_ext.reshape(usm_a, -1)
- usm_cond = dpt_ext.reshape(usm_cond, -1)
+ usm_a = dpt.reshape(usm_a, -1)
+ usm_cond = dpt.reshape(usm_cond, -1)
- usm_res = dpt_ext.extract(usm_cond, usm_a)
+ usm_res = dpt.extract(usm_cond, usm_a)
return dpnp_array._create_from_usm_ndarray(usm_res)
@@ -960,18 +957,18 @@ def fill_diagonal(a, val, wrap=False):
# a.flat[:end:step] = val
# but need to consider use case when `a` is usm_ndarray also
a_sh = a.shape
- tmp_a = dpt_ext.reshape(usm_a, -1)
+ tmp_a = dpt.reshape(usm_a, -1)
if dpnp.isscalar(usm_val):
tmp_a[:end:step] = usm_val
else:
- usm_val = dpt_ext.reshape(usm_val, -1)
+ usm_val = dpt.reshape(usm_val, -1)
# Setitem can work only if index size equal val size.
# Using loop for general case without dependencies of val size.
for i in range(0, usm_val.size):
tmp_a[step * i : end : step * (i + 1)] = usm_val[i]
- tmp_a = dpt_ext.reshape(tmp_a, a_sh)
+ tmp_a = dpt.reshape(tmp_a, a_sh)
usm_a[:] = tmp_a
@@ -1548,7 +1545,7 @@ def nonzero(a):
usm_a = dpnp.get_usm_ndarray(a)
return tuple(
- dpnp_array._create_from_usm_ndarray(y) for y in dpt_ext.nonzero(usm_a)
+ dpnp_array._create_from_usm_ndarray(y) for y in dpt.nonzero(usm_a)
)
@@ -1612,16 +1609,14 @@ def place(a, mask, vals):
if usm_vals.ndim != 1:
# dpt.place supports only 1-D array of values
- usm_vals = dpt_ext.reshape(usm_vals, -1)
+ usm_vals = dpt.reshape(usm_vals, -1)
if usm_vals.dtype != usm_a.dtype:
# dpt.place casts values to a.dtype with "unsafe" rule,
# while numpy.place does that with "safe" casting rule
- usm_vals = dpt_ext.astype(
- usm_vals, usm_a.dtype, casting="safe", copy=False
- )
+ usm_vals = dpt.astype(usm_vals, usm_a.dtype, casting="safe", copy=False)
- dpt_ext.place(usm_a, usm_mask, usm_vals)
+ dpt.place(usm_a, usm_mask, usm_vals)
def put(a, ind, v, /, *, axis=None, mode="wrap"):
@@ -1711,19 +1706,19 @@ def put(a, ind, v, /, *, axis=None, mode="wrap"):
if usm_ind.ndim != 1:
# dpt.put supports only 1-D array of indices
- usm_ind = dpt_ext.reshape(usm_ind, -1, copy=False)
+ usm_ind = dpt.reshape(usm_ind, -1, copy=False)
if not dpnp.issubdtype(usm_ind.dtype, dpnp.integer):
# dpt.put supports only integer dtype for array of indices
- usm_ind = dpt_ext.astype(usm_ind, dpnp.intp, casting="safe")
+ usm_ind = dpt.astype(usm_ind, dpnp.intp, casting="safe")
in_usm_a = usm_a
if axis is None and usm_a.ndim > 1:
- usm_a = dpt_ext.reshape(usm_a, -1)
+ usm_a = dpt.reshape(usm_a, -1)
- dpt_ext.put(usm_a, usm_ind, usm_v, axis=axis, mode=mode)
+ dpt.put(usm_a, usm_ind, usm_v, axis=axis, mode=mode)
if in_usm_a._pointer != usm_a._pointer: # pylint: disable=protected-access
- in_usm_a[:] = dpt_ext.reshape(usm_a, in_usm_a.shape, copy=False)
+ in_usm_a[:] = dpt.reshape(usm_a, in_usm_a.shape, copy=False)
def put_along_axis(a, ind, values, axis, mode="wrap"):
@@ -1805,11 +1800,11 @@ def put_along_axis(a, ind, values, axis, mode="wrap"):
if dpnp.is_supported_array_type(values):
usm_vals = dpnp.get_usm_ndarray(values)
else:
- usm_vals = dpt_ext.asarray(
+ usm_vals = dpt.asarray(
values, usm_type=a.usm_type, sycl_queue=a.sycl_queue
)
- dpt_ext.put_along_axis(usm_a, usm_ind, usm_vals, axis=axis, mode=mode)
+ dpt.put_along_axis(usm_a, usm_ind, usm_vals, axis=axis, mode=mode)
def putmask(x1, mask, values):
@@ -2153,7 +2148,7 @@ def take(a, indices, /, *, axis=None, out=None, mode="wrap"):
usm_a = dpnp.get_usm_ndarray(a)
if not dpnp.is_supported_array_type(indices):
- usm_ind = dpt_ext.asarray(
+ usm_ind = dpt.asarray(
indices, usm_type=a.usm_type, sycl_queue=a.sycl_queue
)
else:
@@ -2165,7 +2160,7 @@ def take(a, indices, /, *, axis=None, out=None, mode="wrap"):
if axis is None:
if a_ndim > 1:
# flatten input array
- usm_a = dpt_ext.reshape(usm_a, -1)
+ usm_a = dpt.reshape(usm_a, -1)
axis = 0
elif a_ndim == 0:
axis = normalize_axis_index(operator.index(axis), 1)
@@ -2174,7 +2169,7 @@ def take(a, indices, /, *, axis=None, out=None, mode="wrap"):
if not dpnp.issubdtype(usm_ind.dtype, dpnp.integer):
# dpt.take supports only integer dtype for array of indices
- usm_ind = dpt_ext.astype(usm_ind, dpnp.intp, copy=False, casting="safe")
+ usm_ind = dpt.astype(usm_ind, dpnp.intp, copy=False, casting="safe")
usm_res = _take_index(
usm_a, usm_ind, axis, exec_q, res_usm_type, out=out, mode=mode
@@ -2297,7 +2292,7 @@ def take_along_axis(a, indices, axis=-1, mode="wrap"):
usm_a = dpnp.get_usm_ndarray(a)
usm_ind = dpnp.get_usm_ndarray(indices)
- usm_res = dpt_ext.take_along_axis(usm_a, usm_ind, axis=axis, mode=mode)
+ usm_res = dpt.take_along_axis(usm_a, usm_ind, axis=axis, mode=mode)
return dpnp_array._create_from_usm_ndarray(usm_res)
diff --git a/dpnp/dpnp_iface_manipulation.py b/dpnp/dpnp_iface_manipulation.py
index 2ff08cc6ec8b..0fc2c3f80fde 100644
--- a/dpnp/dpnp_iface_manipulation.py
+++ b/dpnp/dpnp_iface_manipulation.py
@@ -45,12 +45,11 @@
from typing import NamedTuple
import dpctl
-import dpctl.tensor as dpt
import numpy
# TODO: revert to `import dpctl.tensor...`
# when dpnp fully migrates dpctl/tensor
-import dpctl_ext.tensor as dpt_ext
+import dpctl_ext.tensor as dpt
import dpnp
from dpctl_ext.tensor._numpy_helper import (
AxisError,
@@ -375,27 +374,25 @@ def _get_first_nan_index(usm_a):
):
if dpnp.issubdtype(usm_a.dtype, dpnp.complexfloating):
# for complex all NaNs are considered equivalent
- true_val = dpt_ext.asarray(
+ true_val = dpt.asarray(
True, sycl_queue=usm_a.sycl_queue, usm_type=usm_a.usm_type
)
- return dpt_ext.searchsorted(
- dpt.isnan(usm_a), true_val, side="left"
- )
- return dpt_ext.searchsorted(usm_a, usm_a[-1], side="left")
+ return dpt.searchsorted(dpt.isnan(usm_a), true_val, side="left")
+ return dpt.searchsorted(usm_a, usm_a[-1], side="left")
return None
usm_ar = dpnp.get_usm_ndarray(ar)
num_of_flags = (return_index, return_inverse, return_counts).count(True)
if num_of_flags == 0:
- usm_res = dpt_ext.unique_values(usm_ar)
+ usm_res = dpt.unique_values(usm_ar)
usm_res = (usm_res,) # cast to a tuple to align with other cases
elif num_of_flags == 1 and return_inverse:
- usm_res = dpt_ext.unique_inverse(usm_ar)
+ usm_res = dpt.unique_inverse(usm_ar)
elif num_of_flags == 1 and return_counts:
- usm_res = dpt_ext.unique_counts(usm_ar)
+ usm_res = dpt.unique_counts(usm_ar)
else:
- usm_res = dpt_ext.unique_all(usm_ar)
+ usm_res = dpt.unique_all(usm_ar)
first_nan = None
if equal_nan:
@@ -417,10 +414,10 @@ def _get_first_nan_index(usm_a):
if first_nan is not None:
# all NaNs are collapsed, so need to replace the indices with
# the index of the first NaN value in result array of unique values
- dpt_ext.place(
+ dpt.place(
usm_res.inverse_indices,
usm_res.inverse_indices > first_nan,
- dpt_ext.reshape(first_nan, 1),
+ dpt.reshape(first_nan, 1),
)
result += (usm_res.inverse_indices,)
@@ -428,9 +425,7 @@ def _get_first_nan_index(usm_a):
if first_nan is not None:
# all NaNs are collapsed, so need to put a count of all NaNs
# at the last index
- dpt_ext.sum(
- usm_res.counts[first_nan:], out=usm_res.counts[first_nan]
- )
+ dpt.sum(usm_res.counts[first_nan:], out=usm_res.counts[first_nan])
result += (usm_res.counts[: first_nan + 1],)
else:
result += (usm_res.counts,)
@@ -1097,9 +1092,7 @@ def broadcast_arrays(*args, subok=False):
if len(args) == 0:
return []
- usm_arrays = dpt_ext.broadcast_arrays(
- *[dpnp.get_usm_ndarray(a) for a in args]
- )
+ usm_arrays = dpt.broadcast_arrays(*[dpnp.get_usm_ndarray(a) for a in args])
return [dpnp_array._create_from_usm_ndarray(a) for a in usm_arrays]
@@ -1184,7 +1177,7 @@ def broadcast_to(array, /, shape, subok=False):
raise NotImplementedError(f"subok={subok} is currently not supported")
usm_array = dpnp.get_usm_ndarray(array)
- new_array = dpt_ext.broadcast_to(usm_array, shape)
+ new_array = dpt.broadcast_to(usm_array, shape)
return dpnp_array._create_from_usm_ndarray(new_array)
@@ -1276,7 +1269,7 @@ def can_cast(from_, to, casting="safe"):
if dpnp.is_supported_array_type(from_)
else dpnp.dtype(from_)
)
- return dpt_ext.can_cast(dtype_from, to, casting=casting)
+ return dpt.can_cast(dtype_from, to, casting=casting)
def column_stack(tup):
@@ -1422,7 +1415,7 @@ def concatenate(
)
usm_arrays = [dpnp.get_usm_ndarray(x) for x in arrays]
- usm_res = dpt_ext.concat(usm_arrays, axis=axis)
+ usm_res = dpt.concat(usm_arrays, axis=axis)
res = dpnp_array._create_from_usm_ndarray(usm_res)
if dtype is not None:
@@ -1527,7 +1520,7 @@ def copyto(dst, src, casting="same_kind", where=True):
f"but got {where.dtype}"
)
- dst_usm, src_usm, mask_usm = dpt_ext.broadcast_arrays(
+ dst_usm, src_usm, mask_usm = dpt.broadcast_arrays(
dpnp.get_usm_ndarray(dst),
dpnp.get_usm_ndarray(src),
dpnp.get_usm_ndarray(where),
@@ -1855,7 +1848,7 @@ def expand_dims(a, axis):
"""
usm_a = dpnp.get_usm_ndarray(a)
- usm_res = dpt_ext.expand_dims(usm_a, axis=axis)
+ usm_res = dpt.expand_dims(usm_a, axis=axis)
return dpnp_array._create_from_usm_ndarray(usm_res)
@@ -1926,7 +1919,7 @@ def flip(m, axis=None):
"""
m_usm = dpnp.get_usm_ndarray(m)
- return dpnp_array._create_from_usm_ndarray(dpt_ext.flip(m_usm, axis=axis))
+ return dpnp_array._create_from_usm_ndarray(dpt.flip(m_usm, axis=axis))
def fliplr(m):
@@ -2370,7 +2363,7 @@ def matrix_transpose(x, /):
f"but it is {usm_x.ndim}"
)
- usm_res = dpt_ext.matrix_transpose(usm_x)
+ usm_res = dpt.matrix_transpose(usm_x)
return dpnp_array._create_from_usm_ndarray(usm_res)
@@ -2414,7 +2407,7 @@ def moveaxis(a, source, destination):
usm_array = dpnp.get_usm_ndarray(a)
return dpnp_array._create_from_usm_ndarray(
- dpt_ext.moveaxis(usm_array, source, destination)
+ dpt.moveaxis(usm_array, source, destination)
)
@@ -2843,7 +2836,7 @@ def repeat(a, repeats, axis=None):
a = dpnp.ravel(a)
usm_arr = dpnp.get_usm_ndarray(a)
- usm_res = dpt_ext.repeat(usm_arr, repeats, axis=axis)
+ usm_res = dpt.repeat(usm_arr, repeats, axis=axis)
return dpnp_array._create_from_usm_ndarray(usm_res)
@@ -3066,7 +3059,7 @@ def reshape(a, /, shape, order="C", *, copy=None):
)
usm_a = dpnp.get_usm_ndarray(a)
- usm_res = dpt_ext.reshape(usm_a, shape=shape, order=order, copy=copy)
+ usm_res = dpt.reshape(usm_a, shape=shape, order=order, copy=copy)
return dpnp_array._create_from_usm_ndarray(usm_res)
@@ -3201,7 +3194,7 @@ def result_type(*arrays_and_dtypes):
)
for X in arrays_and_dtypes
]
- return dpt_ext.result_type(*usm_arrays_and_dtypes)
+ return dpt.result_type(*usm_arrays_and_dtypes)
def roll(x, shift, axis=None):
@@ -3268,9 +3261,9 @@ def roll(x, shift, axis=None):
shift = dpnp.asnumpy(shift)
if axis is None:
- return roll(dpt_ext.reshape(usm_x, -1), shift, 0).reshape(x.shape)
+ return roll(dpt.reshape(usm_x, -1), shift, 0).reshape(x.shape)
- usm_res = dpt_ext.roll(usm_x, shift=shift, axis=axis)
+ usm_res = dpt.roll(usm_x, shift=shift, axis=axis)
return dpnp_array._create_from_usm_ndarray(usm_res)
@@ -3669,7 +3662,7 @@ def squeeze(a, /, axis=None):
"""
usm_a = dpnp.get_usm_ndarray(a)
- usm_res = dpt_ext.squeeze(usm_a, axis=axis)
+ usm_res = dpt.squeeze(usm_a, axis=axis)
return dpnp_array._create_from_usm_ndarray(usm_res)
@@ -3757,7 +3750,7 @@ def stack(arrays, /, *, axis=0, out=None, dtype=None, casting="same_kind"):
)
usm_arrays = [dpnp.get_usm_ndarray(x) for x in arrays]
- usm_res = dpt_ext.stack(usm_arrays, axis=axis)
+ usm_res = dpt.stack(usm_arrays, axis=axis)
res = dpnp_array._create_from_usm_ndarray(usm_res)
if dtype is not None:
@@ -3818,7 +3811,7 @@ def swapaxes(a, axis1, axis2):
"""
usm_a = dpnp.get_usm_ndarray(a)
- usm_res = dpt_ext.swapaxes(usm_a, axis1=axis1, axis2=axis2)
+ usm_res = dpt.swapaxes(usm_a, axis1=axis1, axis2=axis2)
return dpnp_array._create_from_usm_ndarray(usm_res)
@@ -3898,7 +3891,7 @@ def tile(A, reps):
"""
usm_a = dpnp.get_usm_ndarray(A)
- usm_res = dpt_ext.tile(usm_a, reps)
+ usm_res = dpt.tile(usm_a, reps)
return dpnp_array._create_from_usm_ndarray(usm_res)
@@ -4528,7 +4521,7 @@ def unstack(x, /, *, axis=0):
if usm_x.ndim == 0:
raise ValueError("Input array must be at least 1-d.")
- res = dpt_ext.unstack(usm_x, axis=axis)
+ res = dpt.unstack(usm_x, axis=axis)
return tuple(dpnp_array._create_from_usm_ndarray(a) for a in res)
diff --git a/dpnp/dpnp_iface_types.py b/dpnp/dpnp_iface_types.py
index f133333d6b83..7d2d60089d98 100644
--- a/dpnp/dpnp_iface_types.py
+++ b/dpnp/dpnp_iface_types.py
@@ -37,12 +37,11 @@
import functools
import dpctl
-import dpctl.tensor as dpt
import numpy
# TODO: revert to `import dpctl.tensor...`
# when dpnp fully migrates dpctl/tensor
-import dpctl_ext.tensor as dpt_ext
+import dpctl_ext.tensor as dpt
import dpnp
from .dpnp_array import dpnp_array
@@ -214,7 +213,7 @@ def finfo(dtype):
"""
if isinstance(dtype, dpnp_array):
dtype = dtype.dtype
- return dpt_ext.finfo(dtype)
+ return dpt.finfo(dtype)
# pylint: disable=redefined-outer-name
@@ -247,7 +246,7 @@ def iinfo(dtype):
if isinstance(dtype, dpnp_array):
dtype = dtype.dtype
- return dpt_ext.iinfo(dtype)
+ return dpt.iinfo(dtype)
def isdtype(dtype, kind):
@@ -301,7 +300,7 @@ def isdtype(dtype, kind):
elif isinstance(kind, tuple):
kind = tuple(dpt.dtype(k) if isinstance(k, type) else k for k in kind)
- return dpt_ext.isdtype(dtype, kind)
+ return dpt.isdtype(dtype, kind)
def issubdtype(arg1, arg2):
diff --git a/dpnp/dpnp_utils/dpnp_utils_statistics.py b/dpnp/dpnp_utils/dpnp_utils_statistics.py
index ec67b619a13f..cd9932cb7153 100644
--- a/dpnp/dpnp_utils/dpnp_utils_statistics.py
+++ b/dpnp/dpnp_utils/dpnp_utils_statistics.py
@@ -29,13 +29,12 @@
import warnings
import dpctl
-import dpctl.tensor as dpt
from dpctl.utils import ExecutionPlacementError
-import dpnp
-
# TODO: revert to `from dpctl.tensor...`
# when dpnp fully migrates dpctl/tensor
+import dpctl_ext.tensor as dpt
+import dpnp
from dpctl_ext.tensor._numpy_helper import normalize_axis_tuple
from dpnp.dpnp_array import dpnp_array
diff --git a/dpnp/exceptions/__init__.py b/dpnp/exceptions/__init__.py
index 26d78a853f41..7abcdbf0553f 100644
--- a/dpnp/exceptions/__init__.py
+++ b/dpnp/exceptions/__init__.py
@@ -32,10 +32,13 @@
SyclQueueCreationError,
)
from dpctl.memory import USMAllocationError
-from dpctl.tensor._dlpack import DLPackCreationError
from dpctl.utils import ExecutionPlacementError
from numpy.exceptions import AxisError
+# TODO: revert to `from dpctl.tensor...`
+# when dpnp fully migrates dpctl/tensor
+from dpctl_ext.tensor._dlpack import DLPackCreationError
+
__all__ = [
"AxisError",
"DLPackCreationError",
diff --git a/dpnp/memory/_memory.py b/dpnp/memory/_memory.py
index f978c5e50db2..3e95baacd424 100644
--- a/dpnp/memory/_memory.py
+++ b/dpnp/memory/_memory.py
@@ -26,11 +26,14 @@
# THE POSSIBILITY OF SUCH DAMAGE.
# *****************************************************************************
-import dpctl.tensor as dpt
from dpctl.memory import MemoryUSMDevice as DPCTLMemoryUSMDevice
from dpctl.memory import MemoryUSMHost as DPCTLMemoryUSMHost
from dpctl.memory import MemoryUSMShared as DPCTLMemoryUSMShared
+# TODO: revert to `from dpctl.tensor...`
+# when dpnp fully migrates dpctl/tensor
+import dpctl_ext.tensor as dpt
+
def _add_ptr_property(cls):
_storage_attr = "_ptr"
diff --git a/dpnp/tests/test_mathematical.py b/dpnp/tests/test_mathematical.py
index c03787790280..155f4cdb06fb 100644
--- a/dpnp/tests/test_mathematical.py
+++ b/dpnp/tests/test_mathematical.py
@@ -1,5 +1,4 @@
import dpctl
-import dpctl.tensor as dpt
import numpy
import pytest
from dpctl.utils import ExecutionPlacementError
@@ -13,7 +12,7 @@
# TODO: revert to `import dpctl.tensor...`
# when dpnp fully migrates dpctl/tensor
-import dpctl_ext.tensor as dpt_ext
+import dpctl_ext.tensor as dpt
import dpnp
# TODO: revert to `from dpctl.tensor...`
@@ -672,15 +671,15 @@ def test_to_begin_to_end(self, to_begin, to_end):
"to_begin, to_end",
[
(-20, 20),
- (dpt_ext.asarray([-20, -30]), dpt_ext.asarray([20, 15])),
- (dpt_ext.asarray([[-20, -30]]), dpt_ext.asarray([[20, 15]])),
+ (dpt.asarray([-20, -30]), dpt.asarray([20, 15])),
+ (dpt.asarray([[-20, -30]]), dpt.asarray([[20, 15]])),
([1, 2], [3, 4]),
((1, 2), (3, 4)),
],
)
def test_usm_ndarray(self, to_begin, to_end):
a = numpy.array([[1, 2, 0]])
- dpt_a = dpt_ext.asarray(a)
+ dpt_a = dpt.asarray(a)
if isinstance(to_begin, dpt.usm_ndarray):
np_to_begin = dpt.asnumpy(to_begin)
@@ -1581,7 +1580,7 @@ def test_out(self):
assert_allclose(result, expected)
# output is usm_ndarray
- dpt_out = dpt_ext.empty(expected.shape, dtype=expected.dtype)
+ dpt_out = dpt.empty(expected.shape, dtype=expected.dtype)
result = dpnp.prod(ia, axis=0, out=dpt_out)
assert dpt_out is result.get_array()
assert_allclose(result, expected)
@@ -2634,7 +2633,7 @@ def test_out_float16(self, func):
def test_out_usm_ndarray(self, func, dt):
a = generate_random_numpy_array(10, dt)
out = numpy.empty(a.shape, dtype=dt)
- ia, usm_out = dpnp.array(a), dpt_ext.asarray(out)
+ ia, usm_out = dpnp.array(a), dpt.asarray(out)
expected = getattr(numpy, func)(a, out=out)
result = getattr(dpnp, func)(ia, out=usm_out)
diff --git a/dpnp/tests/test_memory.py b/dpnp/tests/test_memory.py
index 94aeda33f505..dd87a993e1dc 100644
--- a/dpnp/tests/test_memory.py
+++ b/dpnp/tests/test_memory.py
@@ -1,10 +1,9 @@
-import dpctl.tensor as dpt
import numpy
import pytest
# TODO: revert to `import dpctl.tensor...`
# when dpnp fully migrates dpctl/tensor
-import dpctl_ext.tensor as dpt_ext
+import dpctl_ext.tensor as dpt
import dpnp
import dpnp.memory as dpm
@@ -24,7 +23,7 @@ def test_wrong_input_type(self, x):
dpm.create_data(x)
def test_wrong_usm_data(self):
- a = dpt_ext.ones(10)
+ a = dpt.ones(10)
d = IntUsmData(a.shape, buffer=a)
with pytest.raises(TypeError):
diff --git a/dpnp/tests/test_ndarray.py b/dpnp/tests/test_ndarray.py
index a27f0fe6aa14..8944043d90a0 100644
--- a/dpnp/tests/test_ndarray.py
+++ b/dpnp/tests/test_ndarray.py
@@ -1,4 +1,3 @@
-import dpctl.tensor as dpt
import numpy
import pytest
from numpy.testing import (
@@ -11,7 +10,7 @@
# TODO: revert to `import dpctl.tensor...`
# when dpnp fully migrates dpctl/tensor
-import dpctl_ext.tensor as dpt_ext
+import dpctl_ext.tensor as dpt
import dpnp
from .helper import (
@@ -410,7 +409,7 @@ def test_error(self):
class TestUsmNdarrayProtocol:
def test_basic(self):
a = dpnp.arange(256, dtype=dpnp.int64)
- usm_a = dpt_ext.asarray(a)
+ usm_a = dpt.asarray(a)
assert a.sycl_queue == usm_a.sycl_queue
assert a.usm_type == usm_a.usm_type
diff --git a/dpnp/tests/third_party/cupy/core_tests/test_dlpack.py b/dpnp/tests/third_party/cupy/core_tests/test_dlpack.py
index 41df0a82e0a0..e44f51f09b20 100644
--- a/dpnp/tests/third_party/cupy/core_tests/test_dlpack.py
+++ b/dpnp/tests/third_party/cupy/core_tests/test_dlpack.py
@@ -1,10 +1,12 @@
from __future__ import annotations
import dpctl
-import dpctl.tensor._dlpack as dlp
import numpy
import pytest
+# TODO: revert to `import dpctl.tensor...`
+# when dpnp fully migrates dpctl/tensor
+import dpctl_ext.tensor._dlpack as dlp
import dpnp as cupy
from dpnp.tests.third_party.cupy import testing