From cb03a497e2ada160fdf64e994b5b22a72bd8c4b6 Mon Sep 17 00:00:00 2001 From: Vladislav Perevezentsev Date: Thu, 5 Mar 2026 15:16:34 -0800 Subject: [PATCH 01/24] Move data types to dpctl_ext.tensor --- dpctl_ext/tensor/__init__.py | 34 +++++++++++ dpctl_ext/tensor/_data_types.py | 104 ++++++++++++++++++++++++++++++++ 2 files changed, 138 insertions(+) create mode 100644 dpctl_ext/tensor/_data_types.py diff --git a/dpctl_ext/tensor/__init__.py b/dpctl_ext/tensor/__init__.py index 7a6923169c1..d130b8231b3 100644 --- a/dpctl_ext/tensor/__init__.py +++ b/dpctl_ext/tensor/__init__.py @@ -53,6 +53,23 @@ zeros, zeros_like, ) +from ._data_types import ( + bool, + complex64, + complex128, + dtype, + float16, + float32, + float64, + int8, + int16, + int32, + int64, + uint8, + uint16, + uint32, + uint64, +) from ._elementwise_funcs import ( abs, acos, @@ -185,6 +202,23 @@ from ._utility_functions import all, any, diff __all__ = [ + # data types + "bool", + "dtype", + "int8", + "uint8", + "int16", + "uint16", + "int32", + "uint32", + "int64", + "uint64", + "float16", + "float32", + "float64", + "complex64", + "complex128", + # functions "abs", "acos", "acosh", diff --git a/dpctl_ext/tensor/_data_types.py b/dpctl_ext/tensor/_data_types.py new file mode 100644 index 00000000000..faf30ffdabd --- /dev/null +++ b/dpctl_ext/tensor/_data_types.py @@ -0,0 +1,104 @@ +# ***************************************************************************** +# Copyright (c) 2026, Intel Corporation +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# - Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# - Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# - Neither the name of the copyright holder nor the names of its contributors +# may be used to endorse or promote products derived from this software +# without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +# THE POSSIBILITY OF SUCH DAMAGE. +# ***************************************************************************** + +from numpy import bool_ as np_bool_ +from numpy import complexfloating as np_complexfloating +from numpy import dtype +from numpy import floating as np_floating +from numpy import integer as np_integer +from numpy import issubdtype as np_issubdtype + +from ._tensor_impl import ( + default_device_bool_type as ti_default_device_bool_type, +) +from ._tensor_impl import ( + default_device_complex_type as ti_default_device_complex_type, +) +from ._tensor_impl import default_device_fp_type as ti_default_device_fp_type +from ._tensor_impl import default_device_int_type as ti_default_device_int_type + +bool = dtype("bool") +int8 = dtype("int8") +int16 = dtype("int16") +int32 = dtype("int32") +int64 = dtype("int64") +uint8 = dtype("uint8") +uint16 = dtype("uint16") +uint32 = dtype("uint32") +uint64 = dtype("uint64") +float16 = dtype("float16") +float32 = dtype("float32") +float64 = dtype("float64") +complex64 = dtype("complex64") +complex128 = dtype("complex128") + + +def _get_dtype(inp_dt, sycl_obj, ref_type=None): + """ + Type inference utility to construct data type + object with defaults based on reference type. + + _get_dtype is used by dpctl.tensor.asarray + to infer data type of the output array from the + input sequence. + """ + if inp_dt is None: + if ref_type in [None, float] or np_issubdtype(ref_type, np_floating): + fp_dt = ti_default_device_fp_type(sycl_obj) + return dtype(fp_dt) + if ref_type in [bool, np_bool_]: + bool_dt = ti_default_device_bool_type(sycl_obj) + return dtype(bool_dt) + if ref_type is int or np_issubdtype(ref_type, np_integer): + int_dt = ti_default_device_int_type(sycl_obj) + return dtype(int_dt) + if ref_type is complex or np_issubdtype(ref_type, np_complexfloating): + cfp_dt = ti_default_device_complex_type(sycl_obj) + return dtype(cfp_dt) + raise TypeError(f"Reference type {ref_type} not recognized.") + return dtype(inp_dt) + + +__all__ = [ + "dtype", + "_get_dtype", + "bool", + "int8", + "uint8", + "int16", + "uint16", + "int32", + "uint32", + "int64", + "uint64", + "float16", + "float32", + "float64", + "complex64", + "complex128", +] From 93510c058bce5068f692bd466e6c10fb186b48b0 Mon Sep 17 00:00:00 2001 From: Vladislav Perevezentsev Date: Thu, 5 Mar 2026 15:21:09 -0800 Subject: [PATCH 02/24] Move class Device to dpctl_ext.tensor --- dpctl_ext/tensor/__init__.py | 2 + dpctl_ext/tensor/_device.py | 195 +++++++++++++++++++++++++++++++++++ 2 files changed, 197 insertions(+) create mode 100644 dpctl_ext/tensor/_device.py diff --git a/dpctl_ext/tensor/__init__.py b/dpctl_ext/tensor/__init__.py index d130b8231b3..f80edfb4b56 100644 --- a/dpctl_ext/tensor/__init__.py +++ b/dpctl_ext/tensor/__init__.py @@ -70,6 +70,7 @@ uint32, uint64, ) +from ._device import Device from ._elementwise_funcs import ( abs, acos, @@ -202,6 +203,7 @@ from ._utility_functions import all, any, diff __all__ = [ + "Device", # data types "bool", "dtype", diff --git a/dpctl_ext/tensor/_device.py b/dpctl_ext/tensor/_device.py new file mode 100644 index 00000000000..8d763bc721e --- /dev/null +++ b/dpctl_ext/tensor/_device.py @@ -0,0 +1,195 @@ +# ***************************************************************************** +# Copyright (c) 2026, Intel Corporation +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# - Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# - Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# - Neither the name of the copyright holder nor the names of its contributors +# may be used to endorse or promote products derived from this software +# without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +# THE POSSIBILITY OF SUCH DAMAGE. +# ***************************************************************************** + + +import dpctl +from dpctl._sycl_device_factory import _cached_default_device +from dpctl._sycl_queue_manager import get_device_cached_queue + +__doc__ = "Implementation of array API mandated Device class" + + +class Device: + """ + An object representing Data-API concept of device. + + This is a wrapper around :class:`dpctl.SyclQueue` with custom + formatting. The class does not have public constructor, + but a class method :meth:`dpctl.tensor.Device.create_device` to construct + it from `device` keyword argument in Array-API functions. + + Instance can be queried for ``sycl_queue``, ``sycl_context``, + or ``sycl_device``. + """ + + __device_queue_map__ = {} + sycl_queue_ = None + + def __new__(cls, *args, **kwargs): + raise TypeError("No public constructor") + + @classmethod + def create_device(cls, device=None): + """Device.create_device(device=None) + + Creates instance of Device from argument. + + Args: + device: + Device specification, i.e. `None`, :class:`.Device`, + :class:`dpctl.SyclQueue`, or a :class:`dpctl.SyclDevice` + corresponding to a root SYCL device. + Raises: + ValueError: if an instance of :class:`dpctl.SycDevice` corresponding + to a sub-device was specified as the argument + SyclQueueCreationError: if :class:`dpctl.SyclQueue` could not be + created from the argument + """ + dev = device + obj = super().__new__(cls) + if isinstance(dev, Device): + obj.sycl_queue_ = dev.sycl_queue + elif isinstance(dev, dpctl.SyclQueue): + obj.sycl_queue_ = dev + elif isinstance(dev, dpctl.SyclDevice): + par = dev.parent_device + if par is None: + obj.sycl_queue_ = get_device_cached_queue(dev) + else: + raise ValueError( + f"Using non-root device {dev} to specify offloading " + "target is ambiguous. Please use dpctl.SyclQueue " + "targeting this device" + ) + else: + if dev is None: + _dev = _cached_default_device() + else: + _dev = dpctl.SyclDevice(dev) + obj.sycl_queue_ = get_device_cached_queue(_dev) + return obj + + @property + def sycl_queue(self): + """:class:`dpctl.SyclQueue` used to offload to this :class:`.Device`.""" + return self.sycl_queue_ + + @property + def sycl_context(self): + """:class:`dpctl.SyclContext` associated with this :class:`.Device`.""" + return self.sycl_queue_.sycl_context + + @property + def sycl_device(self): + """:class:`dpctl.SyclDevice` targeted by this :class:`.Device`.""" + return self.sycl_queue_.sycl_device + + def __repr__(self): + try: + sd = self.sycl_device + except AttributeError as exc: + raise ValueError( + f"Instance of {self.__class__} is not initialized" + ) from exc + try: + fs = sd.filter_string + return f"Device({fs})" + except TypeError: + # This is a sub-device + return repr(self.sycl_queue) + + def print_device_info(self): + """Outputs information about targeted SYCL device""" + self.sycl_device.print_device_info() + + def wait(self): + """Call ``wait`` method of the underlying ``sycl_queue``.""" + self.sycl_queue_.wait() + + def __eq__(self, other): + """Equality comparison based on underlying ``sycl_queue``.""" + if isinstance(other, Device): + return self.sycl_queue.__eq__(other.sycl_queue) + elif isinstance(other, dpctl.SyclQueue): + return self.sycl_queue.__eq__(other) + return False + + def __hash__(self): + """Compute object's hash value.""" + return self.sycl_queue.__hash__() + + +def normalize_queue_device(sycl_queue=None, device=None): + """normalize_queue_device(sycl_queue=None, device=None) + + Utility to process exclusive keyword arguments 'device' + and 'sycl_queue' in functions of `dpctl.tensor`. + + Args: + sycl_queue (:class:`dpctl.SyclQueue`, optional): + explicitly indicates where USM allocation is done + and the population code (if any) is executed. + Value `None` is interpreted as get the SYCL queue + from `device` keyword, or use default queue. + Default: None + device (string, :class:`dpctl.SyclDevice`, :class:`dpctl.SyclQueue, + :class:`dpctl.tensor.Device`, optional): + array-API keyword indicating non-partitioned SYCL device + where array is allocated. + + Returns + :class:`dpctl.SyclQueue` object implied by either of provided + keywords. If both are None, `dpctl.SyclQueue()` is returned. + If both are specified and imply the same queue, `sycl_queue` + is returned. + + Raises: + TypeError: if argument is not of the expected type, or keywords + imply incompatible queues. + """ + q = sycl_queue + d = device + if q is None: + d = Device.create_device(d) + return d.sycl_queue + if not isinstance(q, dpctl.SyclQueue): + raise TypeError(f"Expected dpctl.SyclQueue, got {type(q)}") + if d is None: + return q + d = Device.create_device(d) + qq = dpctl.utils.get_execution_queue( + ( + q, + d.sycl_queue, + ) + ) + if qq is None: + raise TypeError( + "sycl_queue and device keywords can not be both specified" + ) + return qq From 8e11b23552678573bcc8cd722c19af70694a901a Mon Sep 17 00:00:00 2001 From: Vladislav Perevezentsev Date: Thu, 5 Mar 2026 15:28:58 -0800 Subject: [PATCH 03/24] Move constants to dpctl_ext.tensor --- dpctl_ext/tensor/__init__.py | 7 +++++++ dpctl_ext/tensor/_constants.py | 36 ++++++++++++++++++++++++++++++++++ 2 files changed, 43 insertions(+) create mode 100644 dpctl_ext/tensor/_constants.py diff --git a/dpctl_ext/tensor/__init__.py b/dpctl_ext/tensor/__init__.py index f80edfb4b56..d783372f5fe 100644 --- a/dpctl_ext/tensor/__init__.py +++ b/dpctl_ext/tensor/__init__.py @@ -29,6 +29,7 @@ from ._accumulation import cumulative_logsumexp, cumulative_prod, cumulative_sum from ._clip import clip +from ._constants import e, inf, nan, newaxis, pi from ._copy_utils import ( asnumpy, astype, @@ -220,6 +221,12 @@ "float64", "complex64", "complex128", + # constants + "e", + "inf", + "nan", + "newaxis", + "pi", # functions "abs", "acos", diff --git a/dpctl_ext/tensor/_constants.py b/dpctl_ext/tensor/_constants.py new file mode 100644 index 00000000000..4c134bd9d37 --- /dev/null +++ b/dpctl_ext/tensor/_constants.py @@ -0,0 +1,36 @@ +# ***************************************************************************** +# Copyright (c) 2026, Intel Corporation +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# - Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# - Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# - Neither the name of the copyright holder nor the names of its contributors +# may be used to endorse or promote products derived from this software +# without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +# THE POSSIBILITY OF SUCH DAMAGE. +# ***************************************************************************** + +import numpy as np + +newaxis = None + +pi = np.pi +e = np.e +nan = np.nan +inf = np.inf From 54fe33173131d865fbfc3ea4bf1a90ee91705654 Mon Sep 17 00:00:00 2001 From: Vladislav Perevezentsev Date: Thu, 5 Mar 2026 15:36:47 -0800 Subject: [PATCH 04/24] Move array API utilities --- dpctl_ext/tensor/__init__.py | 3 + dpctl_ext/tensor/_array_api.py | 256 +++++++++++++++++++++++++++++++++ 2 files changed, 259 insertions(+) create mode 100644 dpctl_ext/tensor/_array_api.py diff --git a/dpctl_ext/tensor/__init__.py b/dpctl_ext/tensor/__init__.py index d783372f5fe..5cc266d0748 100644 --- a/dpctl_ext/tensor/__init__.py +++ b/dpctl_ext/tensor/__init__.py @@ -28,6 +28,7 @@ from ._accumulation import cumulative_logsumexp, cumulative_prod, cumulative_sum +from ._array_api import __array_api_version__, __array_namespace_info__ from ._clip import clip from ._constants import e, inf, nan, newaxis, pi from ._copy_utils import ( @@ -380,4 +381,6 @@ "where", "zeros", "zeros_like", + "__array_api_version__", + "__array_namespace_info__", ] diff --git a/dpctl_ext/tensor/_array_api.py b/dpctl_ext/tensor/_array_api.py new file mode 100644 index 00000000000..09f71bc1bdd --- /dev/null +++ b/dpctl_ext/tensor/_array_api.py @@ -0,0 +1,256 @@ +# ***************************************************************************** +# Copyright (c) 2026, Intel Corporation +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# - Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# - Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# - Neither the name of the copyright holder nor the names of its contributors +# may be used to endorse or promote products derived from this software +# without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +# THE POSSIBILITY OF SUCH DAMAGE. +# ***************************************************************************** + +import dpctl + +# TODO: revert to `import dpctl.tensor...` +# when dpnp fully migrates dpctl/tensor +import dpctl_ext.tensor as dpt + +from ._tensor_impl import ( + default_device_complex_type, + default_device_fp_type, + default_device_index_type, + default_device_int_type, +) + + +def _isdtype_impl(dtype, kind): + if isinstance(kind, str): + if kind == "bool": + return dtype.kind == "b" + elif kind == "signed integer": + return dtype.kind == "i" + elif kind == "unsigned integer": + return dtype.kind == "u" + elif kind == "integral": + return dtype.kind in "iu" + elif kind == "real floating": + return dtype.kind == "f" + elif kind == "complex floating": + return dtype.kind == "c" + elif kind == "numeric": + return dtype.kind in "iufc" + else: + raise ValueError(f"Unrecognized data type kind: {kind}") + + elif isinstance(kind, tuple): + return any(_isdtype_impl(dtype, k) for k in kind) + else: + raise TypeError(f"Unsupported type for dtype kind: {type(kind)}") + + +def _get_device_impl(d): + if d is None: + return dpctl.select_default_device() + elif isinstance(d, dpctl.SyclDevice): + return d + elif isinstance(d, (dpt.Device, dpctl.SyclQueue)): + return d.sycl_device + else: + try: + return dpctl.SyclDevice(d) + except TypeError: + raise TypeError(f"Unsupported type for device argument: {type(d)}") + + +__array_api_version__ = "2024.12" + + +class Info: + """namespace returned by ``__array_namespace_info__()``""" + + def __init__(self): + self._capabilities = { + "boolean indexing": True, + "data-dependent shapes": True, + "max dimensions": None, + } + self._all_dtypes = { + "bool": dpt.bool, + "float32": dpt.float32, + "float64": dpt.float64, + "complex64": dpt.complex64, + "complex128": dpt.complex128, + "int8": dpt.int8, + "int16": dpt.int16, + "int32": dpt.int32, + "int64": dpt.int64, + "uint8": dpt.uint8, + "uint16": dpt.uint16, + "uint32": dpt.uint32, + "uint64": dpt.uint64, + } + + def capabilities(self): + """ + capabilities() + + Returns a dictionary of ``dpctl``'s capabilities. + + The dictionary contains the following keys: + ``"boolean indexing"``: + boolean indicating ``dpctl``'s support of boolean indexing. + Value: ``True`` + ``"data-dependent shapes"``: + boolean indicating ``dpctl``'s support of data-dependent shapes. + Value: ``True`` + ``max dimensions``: + integer indication the maximum array dimension supported by ``dpctl``. + Value: ``None`` + + Returns: + dict: + dictionary of ``dpctl``'s capabilities + """ + return self._capabilities.copy() + + def default_device(self): + """ + default_device() + + Returns the default SYCL device. + """ + return dpctl.select_default_device() + + def default_dtypes(self, *, device=None): + """ + default_dtypes(*, device=None) + + Returns a dictionary of default data types for ``device``. + + Args: + device (Optional[:class:`dpctl.SyclDevice`, :class:`dpctl.SyclQueue`, :class:`dpctl.tensor.Device`, str]): + array API concept of device used in getting default data types. + ``device`` can be ``None`` (in which case the default device + is used), an instance of :class:`dpctl.SyclDevice`, an instance + of :class:`dpctl.SyclQueue`, a :class:`dpctl.tensor.Device` + object returned by :attr:`dpctl.tensor.usm_ndarray.device`, or + a filter selector string. + Default: ``None``. + + Returns: + dict: + a dictionary of default data types for ``device``: + + - ``"real floating"``: dtype + - ``"complex floating"``: dtype + - ``"integral"``: dtype + - ``"indexing"``: dtype + """ + device = _get_device_impl(device) + return { + "real floating": dpt.dtype(default_device_fp_type(device)), + "complex floating": dpt.dtype(default_device_complex_type(device)), + "integral": dpt.dtype(default_device_int_type(device)), + "indexing": dpt.dtype(default_device_index_type(device)), + } + + def dtypes(self, *, device=None, kind=None): + """ + dtypes(*, device=None, kind=None) + + Returns a dictionary of all Array API data types of a specified + ``kind`` supported by ``device``. + + This dictionary only includes data types supported by the + `Python Array API `_ + specification. + + Args: + device (Optional[:class:`dpctl.SyclDevice`, :class:`dpctl.SyclQueue`, :class:`dpctl.tensor.Device`, str]): + array API concept of device used in getting default data types. + ``device`` can be ``None`` (in which case the default device is + used), an instance of :class:`dpctl.SyclDevice`, an instance of + :class:`dpctl.SyclQueue`, a :class:`dpctl.tensor.Device` + object returned by :attr:`dpctl.tensor.usm_ndarray.device`, or + a filter selector string. + Default: ``None``. + + kind (Optional[str, Tuple[str, ...]]): + data type kind. + + - if ``kind`` is ``None``, returns a dictionary of all data + types supported by `device` + - if ``kind`` is a string, returns a dictionary containing the + data types belonging to the data type kind specified. + + Supports: + + * ``"bool"`` + * ``"signed integer"`` + * ``"unsigned integer"`` + * ``"integral"`` + * ``"real floating"`` + * ``"complex floating"`` + * ``"numeric"`` + + - if ``kind`` is a tuple, the tuple represents a union of + ``kind`` strings, and returns a dictionary containing data + types corresponding to the-specified union. + + Default: ``None``. + + Returns: + dict: + a dictionary of the supported data types of the specified + ``kind`` + """ + device = _get_device_impl(device) + _fp64 = device.has_aspect_fp64 + if kind is None: + return { + key: val + for key, val in self._all_dtypes.items() + if _fp64 or (key != "float64" and key != "complex128") + } + else: + return { + key: val + for key, val in self._all_dtypes.items() + if (_fp64 or (key != "float64" and key != "complex128")) + and _isdtype_impl(val, kind) + } + + def devices(self): + """ + devices() + + Returns a list of supported devices. + """ + return dpctl.get_devices() + + +def __array_namespace_info__(): + """ + __array_namespace_info__() + + Returns a namespace with Array API namespace inspection utilities. + + """ + return Info() From 60bba8f9f2b3bd49548a3b23fb31659f3e0af43c Mon Sep 17 00:00:00 2001 From: Vladislav Perevezentsev Date: Thu, 5 Mar 2026 15:42:04 -0800 Subject: [PATCH 05/24] Move print functions to dpctl_ext.tensor --- dpctl_ext/tensor/__init__.py | 12 + dpctl_ext/tensor/_print.py | 503 +++++++++++++++++++++++++++++++++++ 2 files changed, 515 insertions(+) create mode 100644 dpctl_ext/tensor/_print.py diff --git a/dpctl_ext/tensor/__init__.py b/dpctl_ext/tensor/__init__.py index 5cc266d0748..2624e7dfea1 100644 --- a/dpctl_ext/tensor/__init__.py +++ b/dpctl_ext/tensor/__init__.py @@ -177,6 +177,13 @@ tile, unstack, ) +from ._print import ( + get_print_options, + print_options, + set_print_options, + usm_ndarray_repr, + usm_ndarray_str, +) from ._reduction import ( argmax, argmin, @@ -289,6 +296,7 @@ "from_numpy", "full", "full_like", + "get_print_options", "greater", "greater_equal", "hypot", @@ -332,6 +340,7 @@ "place", "positive", "pow", + "print_options", "prod", "proj", "put", @@ -347,6 +356,7 @@ "round", "rsqrt", "searchsorted", + "set_print_options", "sign", "signbit", "sin", @@ -376,6 +386,8 @@ "unique_inverse", "unique_values", "unstack", + "usm_ndarray_repr", + "usm_ndarray_str", "var", "vecdot", "where", diff --git a/dpctl_ext/tensor/_print.py b/dpctl_ext/tensor/_print.py new file mode 100644 index 00000000000..5385eadb253 --- /dev/null +++ b/dpctl_ext/tensor/_print.py @@ -0,0 +1,503 @@ +# ***************************************************************************** +# Copyright (c) 2026, Intel Corporation +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# - Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# - Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# - Neither the name of the copyright holder nor the names of its contributors +# may be used to endorse or promote products derived from this software +# without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +# THE POSSIBILITY OF SUCH DAMAGE. +# ***************************************************************************** + +import contextlib +import itertools +import operator + +import dpctl +import dpctl.utils +import numpy as np + +# TODO: revert to `import dpctl.tensor...` +# when dpnp fully migrates dpctl/tensor +import dpctl_ext.tensor as dpt +import dpctl_ext.tensor._tensor_impl as ti + +__doc__ = "Print functions for :class:`dpctl.tensor.usm_ndarray`." + +_print_options = { + "linewidth": 75, + "edgeitems": 3, + "threshold": 1000, + "precision": 8, + "floatmode": "maxprec", + "suppress": False, + "nanstr": "nan", + "infstr": "inf", + "sign": "-", +} + + +def _move_to_next_line(string, s, line_width, prefix): + """Move string to next line if it doesn't fit in the current line.""" + bottom_len = len(s) - (s.rfind("\n") + 1) + next_line = bottom_len + len(string) + 1 > line_width + string = ",\n" + " " * len(prefix) + string if next_line else ", " + string + + return string + + +def _options_dict( + linewidth=None, + edgeitems=None, + threshold=None, + precision=None, + floatmode=None, + suppress=None, + nanstr=None, + infstr=None, + sign=None, + numpy=False, +): + if numpy: + numpy_options = np.get_printoptions() + options = {k: numpy_options[k] for k in _print_options.keys()} + else: + options = _print_options.copy() + + if suppress: + options["suppress"] = True + + local = dict(locals().items()) + for int_arg in ["linewidth", "precision", "threshold", "edgeitems"]: + val = local[int_arg] + if val is not None: + options[int_arg] = operator.index(val) + + for str_arg in ["nanstr", "infstr"]: + val = local[str_arg] + if val is not None: + if not isinstance(val, str): + raise TypeError( + "`{}` ".format(str_arg) + "must be of `string` type." + ) + options[str_arg] = val + + signs = ["-", "+", " "] + if sign is not None: + if sign not in signs: + raise ValueError( + "`sign` must be one of" + + ", ".join("`{}`".format(s) for s in signs) + ) + options["sign"] = sign + + floatmodes = ["fixed", "unique", "maxprec", "maxprec_equal"] + if floatmode is not None: + if floatmode not in floatmodes: + raise ValueError( + "`floatmode` must be one of" + + ", ".join("`{}`".format(m) for m in floatmodes) + ) + options["floatmode"] = floatmode + + return options + + +def set_print_options( + linewidth=None, + edgeitems=None, + threshold=None, + precision=None, + floatmode=None, + suppress=None, + nanstr=None, + infstr=None, + sign=None, + numpy=False, +): + """ + set_print_options(linewidth=None, edgeitems=None, threshold=None, + precision=None, floatmode=None, suppress=None, + nanstr=None, infstr=None, sign=None, numpy=False) + + Set options for printing :class:`dpctl.tensor.usm_ndarray` class. + + Args: + linewidth (int, optional): + Number of characters printed per line. + Raises `TypeError` if linewidth is not an integer. + Default: `75`. + edgeitems (int, optional): + Number of elements at the beginning and end + when the printed array is abbreviated. + Raises `TypeError` if edgeitems is not an integer. + Default: `3`. + threshold (int, optional): + Number of elements that triggers array abbreviation. + Raises `TypeError` if threshold is not an integer. + Default: `1000`. + precision (int or None, optional): + Number of digits printed for floating point numbers. + Raises `TypeError` if precision is not an integer. + Default: `8`. + floatmode (str, optional): + Controls how floating point numbers are interpreted. + `"fixed:`: + Always prints exactly `precision` digits. + `"unique"`: + Ignores precision, prints the number of + digits necessary to uniquely specify each number. + `"maxprec"`: + Prints `precision` digits or fewer, + if fewer will uniquely represent a number. + `"maxprec_equal"`: + Prints an equal number of digits + for each number. This number is `precision` digits + or fewer, if fewer will uniquely represent each number. + Raises `ValueError` if floatmode is not one of + `fixed`, `unique`, `maxprec`, or `maxprec_equal`. + Default: "maxprec_equal" + suppress (bool, optional): + If `True,` numbers equal to zero in the current precision + will print as zero. + Default: `False`. + nanstr (str, optional): + String used to represent nan. + Raises `TypeError` if nanstr is not a string. + Default: `"nan"`. + infstr (str, optional): + String used to represent infinity. + Raises `TypeError` if infstr is not a string. + Default: `"inf"`. + sign (str, optional): + Controls the sign of floating point numbers. + `"-"`: + Omit the sign of positive numbers. + `"+"`: + Always print the sign of positive numbers. + `" "`: + Always print a whitespace in place of the + sign of positive numbers. + Raises `ValueError` if sign is not one of + `"-"`, `"+"`, or `" "`. + Default: `"-"`. + numpy (bool, optional): If `True,` then before other specified print + options are set, a dictionary of Numpy's print options + will be used to initialize dpctl's print options. + Default: "False" + """ + options = _options_dict( + linewidth=linewidth, + edgeitems=edgeitems, + threshold=threshold, + precision=precision, + floatmode=floatmode, + suppress=suppress, + nanstr=nanstr, + infstr=infstr, + sign=sign, + numpy=numpy, + ) + _print_options.update(options) + + +def get_print_options(): + """get_print_options() + + Returns a copy of current options for printing + :class:`dpctl.tensor.usm_ndarray` class. + + Returns: + dict: dictionary with array + printing option settings. + + Options: + - "linewidth" : int, default 75 + - "edgeitems" : int, default 3 + - "threshold" : int, default 1000 + - "precision" : int, default 8 + - "floatmode" : str, default "maxprec_equal" + - "suppress" : bool, default False + - "nanstr" : str, default "nan" + - "infstr" : str, default "inf" + - "sign" : str, default "-" + """ + return _print_options.copy() + + +@contextlib.contextmanager +def print_options(*args, **kwargs): + """ + Context manager for print options. + + Set print options for the scope of a `with` block. + `as` yields dictionary of print options. + """ + options = dpt.get_print_options() + try: + dpt.set_print_options(*args, **kwargs) + yield dpt.get_print_options() + finally: + dpt.set_print_options(**options) + + +def _nd_corners(arr_in, edge_items): + _shape = arr_in.shape + max_shape = 2 * edge_items + 1 + if max(_shape) <= max_shape: + return dpt.asnumpy(arr_in) + res_shape = tuple( + max_shape if _shape[i] > max_shape else _shape[i] + for i in range(arr_in.ndim) + ) + + exec_q = arr_in.sycl_queue + arr_out = dpt.empty( + res_shape, + dtype=arr_in.dtype, + usm_type=arr_in.usm_type, + sycl_queue=exec_q, + ) + + blocks = [] + for i in range(len(_shape)): + if _shape[i] > max_shape: + blocks.append( + ( + np.s_[:edge_items], + np.s_[-edge_items:], + ) + ) + else: + blocks.append((np.s_[:],)) + + _manager = dpctl.utils.SequentialOrderManager[exec_q] + dep_evs = _manager.submitted_events + hev_list = [] + for slc in itertools.product(*blocks): + hev, _ = ti._copy_usm_ndarray_into_usm_ndarray( + src=arr_in[slc], + dst=arr_out[slc], + sycl_queue=exec_q, + depends=dep_evs, + ) + hev_list.append(hev) + + dpctl.SyclEvent.wait_for(hev_list) + return dpt.asnumpy(arr_out) + + +def usm_ndarray_str( + x, + line_width=None, + edge_items=None, + threshold=None, + precision=None, + floatmode=None, + suppress=None, + sign=None, + numpy=False, + separator=" ", + prefix="", + suffix="", +): + """ + usm_ndarray_str(x, line_width=None, edgeitems=None, threshold=None, + precision=None, floatmode=None, suppress=None, + sign=None, numpy=False, separator=" ", prefix="", + suffix="") + + Returns a string representing the elements of a + :class:`dpctl.tensor.usm_ndarray`. + + Args: + x (usm_ndarray): + Input array. + line_width (int, optional): + Number of characters printed per line. + Raises `TypeError` if line_width is not an integer. + Default: `75`. + edgeitems (int, optional): + Number of elements at the beginning and end + when the printed array is abbreviated. + Raises `TypeError` if edgeitems is not an integer. + Default: `3`. + threshold (int, optional): + Number of elements that triggers array abbreviation. + Raises `TypeError` if threshold is not an integer. + Default: `1000`. + precision (int or None, optional): + Number of digits printed for floating point numbers. + Raises `TypeError` if precision is not an integer. + Default: `8`. + floatmode (str, optional): + Controls how floating point numbers are interpreted. + `"fixed:`: + Always prints exactly `precision` digits. + `"unique"`: + Ignores precision, prints the number of + digits necessary to uniquely specify each number. + `"maxprec"`: + Prints `precision` digits or fewer, + if fewer will uniquely represent a number. + `"maxprec_equal"`: + Prints an equal number of digits for each number. + This number is `precision` digits or fewer, + if fewer will uniquely represent each number. + Raises `ValueError` if floatmode is not one of + `fixed`, `unique`, `maxprec`, or `maxprec_equal`. + Default: "maxprec_equal" + suppress (bool, optional): + If `True,` numbers equal to zero in the current precision + will print as zero. + Default: `False`. + sign (str, optional): + Controls the sign of floating point numbers. + `"-"`: + Omit the sign of positive numbers. + `"+"`: + Always print the sign of positive numbers. + `" "`: + Always print a whitespace in place of the + sign of positive numbers. + Raises `ValueError` if sign is not one of + `"-"`, `"+"`, or `" "`. + Default: `"-"`. + numpy (bool, optional): + If `True,` then before other specified print + options are set, a dictionary of Numpy's print options + will be used to initialize dpctl's print options. + Default: "False" + separator (str, optional): + String inserted between elements of the array string. + Default: " " + prefix (str, optional): + String used to determine spacing to the left of the array string. + Default: "" + suffix (str, optional): + String that determines length of the last line of the array string. + Default: "" + + Returns: + str: string representation of input array. + """ + if not isinstance(x, dpt.usm_ndarray): + raise TypeError(f"Expected dpctl.tensor.usm_ndarray, got {type(x)}") + + options = get_print_options() + options.update( + _options_dict( + linewidth=line_width, + edgeitems=edge_items, + threshold=threshold, + precision=precision, + floatmode=floatmode, + suppress=suppress, + sign=sign, + numpy=numpy, + ) + ) + + threshold = options["threshold"] + edge_items = options["edgeitems"] + + if x.size > threshold: + data = _nd_corners(x, edge_items) + options["threshold"] = 0 + else: + data = dpt.asnumpy(x) + with np.printoptions(**options): + s = np.array2string( + data, separator=separator, prefix=prefix, suffix=suffix + ) + return s + + +def usm_ndarray_repr( + x, line_width=None, precision=None, suppress=None, prefix="usm_ndarray" +): + """ + usm_ndarray_repr(x, line_width=None, precision=None, + suppress=None, prefix="") + + Returns a formatted string representing the elements + of a :class:`dpctl.tensor.usm_ndarray` and its data type, + if not a default type. + + Args: + x (usm_ndarray): Input array. + line_width (int, optional): Number of characters printed per line. + Raises `TypeError` if line_width is not an integer. + Default: `75`. + precision (int or None, optional): Number of digits printed for + floating point numbers. + Raises `TypeError` if precision is not an integer. + Default: `8`. + suppress (bool, optional): If `True,` numbers equal to zero + in the current precision will print as zero. + Default: `False`. + prefix (str, optional): String inserted at the start of the array + string. + Default: "" + + Returns: + str: formatted string representing the input array + """ + if not isinstance(x, dpt.usm_ndarray): + raise TypeError(f"Expected dpctl.tensor.usm_ndarray, got {type(x)}") + + if line_width is None: + line_width = _print_options["linewidth"] + + show_dtype = x.dtype not in [ + dpt.bool, + dpt.int64, + dpt.float64, + dpt.complex128, + ] + + prefix = prefix + "(" + suffix = ")" + + s = usm_ndarray_str( + x, + line_width=line_width, + precision=precision, + suppress=suppress, + separator=", ", + prefix=prefix, + suffix=suffix, + ) + + if show_dtype or x.size == 0: + dtype_str = f"dtype={x.dtype.name}" + dtype_str = _move_to_next_line(dtype_str, s, line_width, prefix) + else: + dtype_str = "" + + options = get_print_options() + threshold = options["threshold"] + if (x.size == 0 and x.shape != (0,)) or x.size > threshold: + shape_str = f"shape={x.shape}" + shape_str = _move_to_next_line(shape_str, s, line_width, prefix) + else: + shape_str = "" + + return prefix + s + shape_str + dtype_str + suffix From b4fa02346eed8fb8b4d61721911aa5c59c1a956a Mon Sep 17 00:00:00 2001 From: Vladislav Perevezentsev Date: Thu, 5 Mar 2026 15:52:38 -0800 Subject: [PATCH 06/24] Move include/dlpack to dpctl_ext.tensor --- .../tensor/include/dlpack/LICENSE.third-party | 201 ++++++ dpctl_ext/tensor/include/dlpack/README.md | 7 + dpctl_ext/tensor/include/dlpack/dlpack.h | 675 ++++++++++++++++++ 3 files changed, 883 insertions(+) create mode 100644 dpctl_ext/tensor/include/dlpack/LICENSE.third-party create mode 100644 dpctl_ext/tensor/include/dlpack/README.md create mode 100644 dpctl_ext/tensor/include/dlpack/dlpack.h diff --git a/dpctl_ext/tensor/include/dlpack/LICENSE.third-party b/dpctl_ext/tensor/include/dlpack/LICENSE.third-party new file mode 100644 index 00000000000..20a9c8a7b4d --- /dev/null +++ b/dpctl_ext/tensor/include/dlpack/LICENSE.third-party @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "{}" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright 2017 by Contributors + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/dpctl_ext/tensor/include/dlpack/README.md b/dpctl_ext/tensor/include/dlpack/README.md new file mode 100644 index 00000000000..3a7bc6d422c --- /dev/null +++ b/dpctl_ext/tensor/include/dlpack/README.md @@ -0,0 +1,7 @@ +# DLPack header + +The header `dlpack.h` downloaded from `https://github.com/dmlc/dlpack.git` remote at tag v1.0rc commit [`62100c1`](https://github.com/dmlc/dlpack/commit/62100c123144ae7a80061f4220be2dbd3cbaefc7). + +The file can also be viewed using github web interface at https://github.com/dmlc/dlpack/blob/62100c123144ae7a80061f4220be2dbd3cbaefc7/include/dlpack/dlpack.h + +License file was retrieved from https://github.com/dmlc/dlpack/blob/main/LICENSE diff --git a/dpctl_ext/tensor/include/dlpack/dlpack.h b/dpctl_ext/tensor/include/dlpack/dlpack.h new file mode 100644 index 00000000000..cd71e799be3 --- /dev/null +++ b/dpctl_ext/tensor/include/dlpack/dlpack.h @@ -0,0 +1,675 @@ +/*! + * Copyright (c) 2017 - by Contributors + * \file dlpack.h + * \brief The common header of DLPack. + */ +#ifndef DLPACK_DLPACK_H_ +#define DLPACK_DLPACK_H_ + +/** + * \brief Compatibility with C++ + */ +#ifdef __cplusplus +#define DLPACK_EXTERN_C extern "C" +#else +#define DLPACK_EXTERN_C +#endif + +/*! \brief The current major version of dlpack */ +#define DLPACK_MAJOR_VERSION 1 + +/*! \brief The current minor version of dlpack */ +#define DLPACK_MINOR_VERSION 2 + +/*! \brief DLPACK_DLL prefix for windows */ +#ifdef _WIN32 +#ifdef DLPACK_EXPORTS +#define DLPACK_DLL __declspec(dllexport) +#else +#define DLPACK_DLL __declspec(dllimport) +#endif +#else +#define DLPACK_DLL +#endif + +#include +#include + +#ifdef __cplusplus +extern "C" +{ +#endif + + /*! + * \brief The DLPack version. + * + * A change in major version indicates that we have changed the + * data layout of the ABI - DLManagedTensorVersioned. + * + * A change in minor version indicates that we have added new + * code, such as a new device type, but the ABI is kept the same. + * + * If an obtained DLPack tensor has a major version that disagrees + * with the version number specified in this header file + * (i.e. major != DLPACK_MAJOR_VERSION), the consumer must call the deleter + * (and it is safe to do so). It is not safe to access any other fields + * as the memory layout will have changed. + * + * In the case of a minor version mismatch, the tensor can be safely used as + * long as the consumer knows how to interpret all fields. Minor version + * updates indicate the addition of enumeration values. + */ + typedef struct + { + /*! \brief DLPack major version. */ + uint32_t major; + /*! \brief DLPack minor version. */ + uint32_t minor; + } DLPackVersion; + +/*! + * \brief The device type in DLDevice. + */ +#ifdef __cplusplus + typedef enum : int32_t + { +#else +typedef enum +{ +#endif + /*! \brief CPU device */ + kDLCPU = 1, + /*! \brief CUDA GPU device */ + kDLCUDA = 2, + /*! + * \brief Pinned CUDA CPU memory by cudaMallocHost + */ + kDLCUDAHost = 3, + /*! \brief OpenCL devices. */ + kDLOpenCL = 4, + /*! \brief Vulkan buffer for next generation graphics. */ + kDLVulkan = 7, + /*! \brief Metal for Apple GPU. */ + kDLMetal = 8, + /*! \brief Verilog simulator buffer */ + kDLVPI = 9, + /*! \brief ROCm GPUs for AMD GPUs */ + kDLROCM = 10, + /*! + * \brief Pinned ROCm CPU memory allocated by hipMallocHost + */ + kDLROCMHost = 11, + /*! + * \brief Reserved extension device type, + * used for quickly test extension device + * The semantics can differ depending on the implementation. + */ + kDLExtDev = 12, + /*! + * \brief CUDA managed/unified memory allocated by cudaMallocManaged + */ + kDLCUDAManaged = 13, + /*! + * \brief Unified shared memory allocated on a oneAPI non-partititioned + * device. Call to oneAPI runtime is required to determine the device + * type, the USM allocation type and the sycl context it is bound to. + * + */ + kDLOneAPI = 14, + /*! \brief GPU support for next generation WebGPU standard. */ + kDLWebGPU = 15, + /*! \brief Qualcomm Hexagon DSP */ + kDLHexagon = 16, + /*! \brief Microsoft MAIA devices */ + kDLMAIA = 17, + /*! \brief AWS Trainium */ + kDLTrn = 18, + } DLDeviceType; + + /*! + * \brief A Device for Tensor and operator. + */ + typedef struct + { + /*! \brief The device type used in the device. */ + DLDeviceType device_type; + /*! + * \brief The device index. + * For vanilla CPU memory, pinned memory, or managed memory, this is set + * to 0. + */ + int32_t device_id; + } DLDevice; + + /*! + * \brief The type code options DLDataType. + */ + typedef enum + { + /*! \brief signed integer */ + kDLInt = 0U, + /*! \brief unsigned integer */ + kDLUInt = 1U, + /*! \brief IEEE floating point */ + kDLFloat = 2U, + /*! + * \brief Opaque handle type, reserved for testing purposes. + * Frameworks need to agree on the handle data type for the exchange to + * be well-defined. + */ + kDLOpaqueHandle = 3U, + /*! \brief bfloat16 */ + kDLBfloat = 4U, + /*! + * \brief complex number + * (C/C++/Python layout: compact struct per complex number) + */ + kDLComplex = 5U, + /*! \brief boolean */ + kDLBool = 6U, + /*! \brief FP8 data types */ + kDLFloat8_e3m4 = 7U, + kDLFloat8_e4m3 = 8U, + kDLFloat8_e4m3b11fnuz = 9U, + kDLFloat8_e4m3fn = 10U, + kDLFloat8_e4m3fnuz = 11U, + kDLFloat8_e5m2 = 12U, + kDLFloat8_e5m2fnuz = 13U, + kDLFloat8_e8m0fnu = 14U, + /*! \brief FP6 data types + * Setting bits != 6 is currently unspecified, and the producer must + * ensure it is set while the consumer must stop importing if the value + * is unexpected. + */ + kDLFloat6_e2m3fn = 15U, + kDLFloat6_e3m2fn = 16U, + /*! \brief FP4 data types + * Setting bits != 4 is currently unspecified, and the producer must + * ensure it is set while the consumer must stop importing if the value + * is unexpected. + */ + kDLFloat4_e2m1fn = 17U, + } DLDataTypeCode; + + /*! + * \brief The data type the tensor can hold. The data type is assumed to + * follow the native endian-ness. An explicit error message should be raised + * when attempting to export an array with non-native endianness + * + * Examples + * - float: type_code = 2, bits = 32, lanes = 1 + * - float4(vectorized 4 float): type_code = 2, bits = 32, lanes = 4 + * - int8: type_code = 0, bits = 8, lanes = 1 + * - std::complex: type_code = 5, bits = 64, lanes = 1 + * - bool: type_code = 6, bits = 8, lanes = 1 (as per common array library + * convention, the underlying storage size of bool is 8 bits) + * - float8_e4m3: type_code = 8, bits = 8, lanes = 1 (packed in memory) + * - float6_e3m2fn: type_code = 16, bits = 6, lanes = 1 (packed in memory) + * - float4_e2m1fn: type_code = 17, bits = 4, lanes = 1 (packed in memory) + * + * When a sub-byte type is packed, DLPack requires the data to be in little + * bit-endian, i.e., for a packed data set D ((D >> (i * bits)) && bit_mask) + * stores the i-th element. + */ + typedef struct + { + /*! + * \brief Type code of base types. + * We keep it uint8_t instead of DLDataTypeCode for minimal memory + * footprint, but the value should be one of DLDataTypeCode enum values. + * */ + uint8_t code; + /*! + * \brief Number of bits, common choices are 8, 16, 32. + */ + uint8_t bits; + /*! \brief Number of lanes in the type, used for vector types. */ + uint16_t lanes; + } DLDataType; + + /*! + * \brief Plain C Tensor object, does not manage memory. + */ + typedef struct + { + /*! + * \brief The data pointer points to the allocated data. This will be + * CUDA device pointer or cl_mem handle in OpenCL. It may be opaque on + * some device types. This pointer is always aligned to 256 bytes as in + * CUDA. The `byte_offset` field should be used to point to the + * beginning of the data. + * + * Note that as of Nov 2021, multiple libraries (CuPy, PyTorch, + * TensorFlow, TVM, perhaps others) do not adhere to this 256 byte + * alignment requirement on CPU/CUDA/ROCm, and always use + * `byte_offset=0`. This must be fixed (after which this note will be + * updated); at the moment it is recommended to not rely on the data + * pointer being correctly aligned. + * + * For given DLTensor, the size of memory required to store the contents + * of data is calculated as follows: + * + * \code{.c} + * static inline size_t GetDataSize(const DLTensor* t) { + * size_t size = 1; + * for (tvm_index_t i = 0; i < t->ndim; ++i) { + * size *= t->shape[i]; + * } + * size *= (t->dtype.bits * t->dtype.lanes + 7) / 8; + * return size; + * } + * \endcode + * + * Note that if the tensor is of size zero, then the data pointer should + * be set to `NULL`. + */ + void *data; + /*! \brief The device of the tensor */ + DLDevice device; + /*! \brief Number of dimensions */ + int32_t ndim; + /*! \brief The data type of the pointer*/ + DLDataType dtype; + /*! + * \brief The shape of the tensor + * + * When ndim == 0, shape can be set to NULL. + */ + int64_t *shape; + /*! + * \brief strides of the tensor (in number of elements, not bytes), + * can not be NULL if ndim != 0, must points to + * an array of ndim elements that specifies the strides, + * so consumer can always rely on strides[dim] being valid for 0 <= dim + * < ndim. + * + * When ndim == 0, strides can be set to NULL. + * + * \note Before DLPack v1.2, strides can be NULL to indicate contiguous + * data. This is not allowed in DLPack v1.2 and later. The rationale is + * to simplify the consumer handling. + */ + int64_t *strides; + /*! \brief The offset in bytes to the beginning pointer to data */ + uint64_t byte_offset; + } DLTensor; + + /*! + * \brief C Tensor object, manage memory of DLTensor. This data structure is + * intended to facilitate the borrowing of DLTensor by another framework. + * It is not meant to transfer the tensor. When the borrowing framework + * doesn't need the tensor, it should call the deleter to notify the host + * that the resource is no longer needed. + * + * \note This data structure is used as Legacy DLManagedTensor + * in DLPack exchange and is deprecated after DLPack v0.8 + * Use DLManagedTensorVersioned instead. + * This data structure may get renamed or deleted in future versions. + * + * \sa DLManagedTensorVersioned + */ + typedef struct DLManagedTensor + { + /*! \brief DLTensor which is being memory managed */ + DLTensor dl_tensor; + /*! \brief the context of the original host framework of DLManagedTensor + * in which DLManagedTensor is used in the framework. It can also be + * NULL. + */ + void *manager_ctx; + /*! + * \brief Destructor - this should be called + * to destruct the manager_ctx which backs the DLManagedTensor. It can + * be NULL if there is no way for the caller to provide a reasonable + * destructor. The destructor deletes the argument self as well. + */ + void (*deleter)(struct DLManagedTensor *self); + } DLManagedTensor; + +// bit masks used in the DLManagedTensorVersioned + +/*! \brief bit mask to indicate that the tensor is read only. */ +#define DLPACK_FLAG_BITMASK_READ_ONLY (1UL << 0UL) + +/*! + * \brief bit mask to indicate that the tensor is a copy made by the producer. + * + * If set, the tensor is considered solely owned throughout its lifetime by the + * consumer, until the producer-provided deleter is invoked. + */ +#define DLPACK_FLAG_BITMASK_IS_COPIED (1UL << 1UL) + +/*! + * \brief bit mask to indicate that whether a sub-byte type is packed or padded. + * + * The default for sub-byte types (ex: fp4/fp6) is assumed packed. This flag can + * be set by the producer to signal that a tensor of sub-byte type is padded. + */ +#define DLPACK_FLAG_BITMASK_IS_SUBBYTE_TYPE_PADDED (1UL << 2UL) + + /*! + * \brief A versioned and managed C Tensor object, manage memory of + * DLTensor. + * + * This data structure is intended to facilitate the borrowing of DLTensor + * by another framework. It is not meant to transfer the tensor. When the + * borrowing framework doesn't need the tensor, it should call the deleter + * to notify the host that the resource is no longer needed. + * + * \note This is the current standard DLPack exchange data structure. + */ + typedef struct DLManagedTensorVersioned + { + /*! + * \brief The API and ABI version of the current managed Tensor + */ + DLPackVersion version; + /*! + * \brief the context of the original host framework. + * + * Stores DLManagedTensorVersioned is used in the + * framework. It can also be NULL. + */ + void *manager_ctx; + /*! + * \brief Destructor. + * + * This should be called to destruct manager_ctx which holds the + * DLManagedTensorVersioned. It can be NULL if there is no way for the + * caller to provide a reasonable destructor. The destructor deletes the + * argument self as well. + */ + void (*deleter)(struct DLManagedTensorVersioned *self); + /*! + * \brief Additional bitmask flags information about the tensor. + * + * By default the flags should be set to 0. + * + * \note Future ABI changes should keep everything until this field + * stable, to ensure that deleter can be correctly called. + * + * \sa DLPACK_FLAG_BITMASK_READ_ONLY + * \sa DLPACK_FLAG_BITMASK_IS_COPIED + */ + uint64_t flags; + /*! \brief DLTensor which is being memory managed */ + DLTensor dl_tensor; + } DLManagedTensorVersioned; + + //---------------------------------------------------------------------- + // DLPack `__c_dlpack_exchange_api__` fast exchange protocol definitions + //---------------------------------------------------------------------- + /*! + * \brief Request a producer library to create a new tensor. + * + * Create a new `DLManagedTensorVersioned` within the context of the + * producer library. The allocation is defined via the prototype DLTensor. + * + * This function is exposed by the framework through the DLPackExchangeAPI. + * + * \param prototype The prototype DLTensor. Only the dtype, ndim, shape, + * and device fields are used. + * \param out The output DLManagedTensorVersioned. + * \param error_ctx Context for `SetError`. + * \param SetError The function to set the error. + * \return The owning DLManagedTensorVersioned* or NULL on failure. + * SetError is called exactly when NULL is returned (the implementer + * must ensure this). + * \note - As a C function, must not thrown C++ exceptions. + * - Error propagation via SetError to avoid any direct need + * of Python API. Due to this `SetError` may have to ensure the GIL + * is held since it will presumably set a Python error. + * + * \sa DLPackExchangeAPI + */ + typedef int (*DLPackManagedTensorAllocator)( // + DLTensor *prototype, + DLManagedTensorVersioned **out, + void *error_ctx, // + void (*SetError)(void *error_ctx, + const char *kind, + const char *message) // + ); + + /*! + * \brief Exports a PyObject* Tensor/NDArray to a DLManagedTensorVersioned. + * + * This function does not perform any stream synchronization. The consumer + * should query DLPackCurrentWorkStream to get the current work stream and + * launch kernels on it. + * + * This function is exposed by the framework through the DLPackExchangeAPI. + * + * \param py_object The Python object to convert. Must have the same type + * as the one the `DLPackExchangeAPI` was discovered from. + * \return The owning DLManagedTensorVersioned* or NULL on failure with a + * Python exception set. If the data cannot be described using + * DLPack this should be a BufferError if possible. \note - As a C function, + * must not thrown C++ exceptions. + * + * \sa DLPackExchangeAPI, DLPackCurrentWorkStream + */ + typedef int (*DLPackManagedTensorFromPyObjectNoSync)( // + void *py_object, // + DLManagedTensorVersioned **out // + ); + + /*! + * \brief Exports a PyObject* Tensor/NDArray to a provided DLTensor. + * + * This function provides a faster interface for temporary, non-owning, + * exchange. The producer (implementer) still owns the memory of data, + * strides, shape. The liveness of the DLTensor and the data it views is + * only guaranteed until control is returned. + * + * This function currently assumes that the producer (implementer) can fill + * in the DLTensor shape and strides without the need for temporary + * allocations. + * + * This function does not perform any stream synchronization. The consumer + * should query DLPackCurrentWorkStream to get the current work stream and + * launch kernels on it. + * + * This function is exposed by the framework through the DLPackExchangeAPI. + * + * \param py_object The Python object to convert. Must have the same type + * as the one the `DLPackExchangeAPI` was discovered from. + * \param out The output DLTensor, whose space is pre-allocated on stack. + * \return 0 on success, -1 on failure with a Python exception set. + * \note - As a C function, must not thrown C++ exceptions. + * + * \sa DLPackExchangeAPI, DLPackCurrentWorkStream + */ + typedef int (*DLPackDLTensorFromPyObjectNoSync)( // + void *py_object, // + DLTensor *out // + ); + + /*! + * \brief Obtain the current work stream of a device. + * + * Obtain the current work stream of a device from the producer framework. + * For example, it should map to torch.cuda.current_stream in PyTorch. + * + * When device_type is kDLCPU, the consumer do not have to query the stream + * and the producer can simply return NULL when queried. + * The consumer do not have to do anything on stream sync or setting. + * So CPU only framework can just provide a dummy implementation that + * always set out_current_stream[0] to NULL. + * + * \param device_type The device type. + * \param device_id The device id. + * \param out_current_stream The output current work stream. + * + * \return 0 on success, -1 on failure with a Python exception set. + * \note - As a C function, must not thrown C++ exceptions. + * + * \sa DLPackExchangeAPI + */ + typedef int (*DLPackCurrentWorkStream)( // + DLDeviceType device_type, // + int32_t device_id, // + void **out_current_stream // + ); + + /*! + * \brief Imports a DLManagedTensorVersioned to a PyObject* Tensor/NDArray. + * + * Convert an owning DLManagedTensorVersioned* to the Python tensor of the + * producer (implementer) library with the correct type. + * + * This function does not perform any stream synchronization. + * + * This function is exposed by the framework through the DLPackExchangeAPI. + * + * \param tensor The DLManagedTensorVersioned to convert the ownership of + * the tensor is stolen. \param out_py_object The output Python object. + * \return 0 on success, -1 on failure with a Python exception set. + * + * \sa DLPackExchangeAPI + */ + typedef int (*DLPackManagedTensorToPyObjectNoSync)( // + DLManagedTensorVersioned *tensor, // + void **out_py_object // + ); + + /*! + * \brief DLPackExchangeAPI stable header. + * \sa DLPackExchangeAPI + */ + typedef struct DLPackExchangeAPIHeader + { + /*! + * \brief The provided DLPack version the consumer must check major + * version compatibility before using this struct. + */ + DLPackVersion version; + /*! + * \brief Optional pointer to an older DLPackExchangeAPI in the chain. + * + * It must be NULL if the framework does not support older versions. + * If the current major version is larger than the one supported by the + * consumer, the consumer may walk this to find an earlier supported + * version. + * + * \sa DLPackExchangeAPI + */ + struct DLPackExchangeAPIHeader *prev_api; + } DLPackExchangeAPIHeader; + + /*! + * \brief Framework-specific function pointers table for DLPack exchange. + * + * Additionally to `__dlpack__()` we define a C function table sharable by + * Python implementations via `__c_dlpack_exchange_api__`. + * This attribute must be set on the type as a Python integer compatible + * with `PyLong_FromVoidPtr`/`PyLong_AsVoidPtr`. + * + * A consumer library may use a pattern such as: + * + * \code + * + * PyObject *api_obj = type(tensor_obj).__c_dlpack_exchange_api__; // as + * C-code MyDLPackExchangeAPI *api = PyLong_AsVoidPtr(api_obj); if (api == + * NULL && PyErr_Occurred()) { goto handle_error; } + * + * \endcode + * + * Note that this must be defined on the type. The consumer should look up + * the attribute on the type and may cache the result for each unique type. + * + * The precise API table is given by: + * \code + * struct MyDLPackExchangeAPI : public DLPackExchangeAPI { + * MyDLPackExchangeAPI() { + * header.version.major = DLPACK_MAJOR_VERSION; + * header.version.minor = DLPACK_MINOR_VERSION; + * header.prev_version_api = nullptr; + * + * managed_tensor_allocator = MyDLPackManagedTensorAllocator; + * managed_tensor_from_py_object_no_sync = + * MyDLPackManagedTensorFromPyObjectNoSync; + * managed_tensor_to_py_object_no_sync = + * MyDLPackManagedTensorToPyObjectNoSync; dltensor_from_py_object_no_sync = + * MyDLPackDLTensorFromPyObjectNoSync; current_work_stream = + * MyDLPackCurrentWorkStream; + * } + * + * static const DLPackExchangeAPI* Global() { + * static MyDLPackExchangeAPI inst; + * return &inst; + * } + * }; + * \endcode + * + * Guidelines for leveraging DLPackExchangeAPI: + * + * There are generally two kinds of consumer needs for DLPack exchange: + * - N0: library support, where consumer.kernel(x, y, z) would like to run a + * kernel with the data from x, y, z. The consumer is also expected to run + * the kernel with the same stream context as the producer. For example, + * when x, y, z is torch.Tensor, consumer should query + * exchange_api->current_work_stream to get the current stream and launch + * the kernel with the same stream. This setup is necessary for no + * synchronization in kernel launch and maximum compatibility with CUDA + * graph capture in the producer. This is the desirable behavior for library + * extension support for frameworks like PyTorch. + * - N1: data ingestion and retention + * + * Note that obj.__dlpack__() API should provide useful ways for N1. + * The primary focus of the current DLPackExchangeAPI is to enable faster + * exchange N0 with the support of the function pointer current_work_stream. + * + * Array/Tensor libraries should statically create and initialize this + * structure then return a pointer to DLPackExchangeAPI as an int value in + * Tensor/Array. The DLPackExchangeAPI* must stay alive throughout the + * lifetime of the process. + * + * One simple way to do so is to create a static instance of + * DLPackExchangeAPI within the framework and return a pointer to it. The + * following code shows an example to do so in C++. It should also be + * reasonably easy to do so in other languages. + */ + typedef struct DLPackExchangeAPI + { + /*! + * \brief The header that remains stable across versions. + */ + DLPackExchangeAPIHeader header; + /*! + * \brief Producer function pointer for DLPackManagedTensorAllocator + * This function must not be NULL. + * \sa DLPackManagedTensorAllocator + */ + DLPackManagedTensorAllocator managed_tensor_allocator; + /*! + * \brief Producer function pointer for DLPackManagedTensorFromPyObject + * This function must be not NULL. + * \sa DLPackManagedTensorFromPyObject + */ + DLPackManagedTensorFromPyObjectNoSync + managed_tensor_from_py_object_no_sync; + /*! + * \brief Producer function pointer for DLPackManagedTensorToPyObject + * This function must be not NULL. + * \sa DLPackManagedTensorToPyObject + */ + DLPackManagedTensorToPyObjectNoSync managed_tensor_to_py_object_no_sync; + /*! + * \brief Producer function pointer for DLPackDLTensorFromPyObject + * This function can be NULL when the producer does not support + * this function. \sa DLPackDLTensorFromPyObjectNoSync + */ + DLPackDLTensorFromPyObjectNoSync dltensor_from_py_object_no_sync; + /*! + * \brief Producer function pointer for DLPackCurrentWorkStream + * This function must be not NULL. + * \sa DLPackCurrentWorkStream + */ + DLPackCurrentWorkStream current_work_stream; + } DLPackExchangeAPI; + +#ifdef __cplusplus +} // DLPACK_EXTERN_C +#endif +#endif // DLPACK_DLPACK_H_ From fb8b77edca301396a2a2a0e76a7c5347bc2f1bd5 Mon Sep 17 00:00:00 2001 From: Vladislav Perevezentsev Date: Thu, 5 Mar 2026 15:57:53 -0800 Subject: [PATCH 07/24] Move _dlpack.pyx/pxd to dpctl_ext.tensor --- dpctl_ext/tensor/_dlpack.pxd | 73 ++ dpctl_ext/tensor/_dlpack.pyx | 1243 ++++++++++++++++++++++++++++++++++ 2 files changed, 1316 insertions(+) create mode 100644 dpctl_ext/tensor/_dlpack.pxd create mode 100644 dpctl_ext/tensor/_dlpack.pyx diff --git a/dpctl_ext/tensor/_dlpack.pxd b/dpctl_ext/tensor/_dlpack.pxd new file mode 100644 index 00000000000..75378bfa7a9 --- /dev/null +++ b/dpctl_ext/tensor/_dlpack.pxd @@ -0,0 +1,73 @@ +# ***************************************************************************** +# Copyright (c) 2026, Intel Corporation +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# - Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# - Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# - Neither the name of the copyright holder nor the names of its contributors +# may be used to endorse or promote products derived from this software +# without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +# THE POSSIBILITY OF SUCH DAMAGE. +# ***************************************************************************** + +# distutils: language = c++ +# cython: language_level=3 +# cython: linetrace=True + +cdef extern from "numpy/npy_no_deprecated_api.h": + pass +from dpctl._sycl_device cimport SyclDevice +from numpy cimport ndarray + +from ._usmarray cimport usm_ndarray + + +cdef extern from "dlpack/dlpack.h" nogil: + int device_CPU "kDLCPU" + int device_CUDA "kDLCUDA" + int device_CUDAHost "kDLCUDAHost" + int device_CUDAManaged "kDLCUDAManaged" + int device_DLROCM "kDLROCM" + int device_ROCMHost "kDLROCMHost" + int device_OpenCL "kDLOpenCL" + int device_Vulkan "kDLVulkan" + int device_Metal "kDLMetal" + int device_VPI "kDLVPI" + int device_OneAPI "kDLOneAPI" + int device_WebGPU "kDLWebGPU" + int device_Hexagon "kDLHexagon" + int device_MAIA "kDLMAIA" + int device_Trn "kDLTrn" + +cpdef object to_dlpack_capsule(usm_ndarray array) except + +cpdef object to_dlpack_versioned_capsule( + usm_ndarray array, bint copied +) except + +cpdef object numpy_to_dlpack_versioned_capsule( + ndarray array, bint copied +) except + +cpdef object from_dlpack_capsule(object dltensor) except + + +cdef class DLPackCreationError(Exception): + """ + A DLPackCreateError exception is raised when constructing + DLPack capsule from `usm_ndarray` based on a USM allocation + on a partitioned SYCL device. + """ + pass diff --git a/dpctl_ext/tensor/_dlpack.pyx b/dpctl_ext/tensor/_dlpack.pyx new file mode 100644 index 00000000000..62d71037b4c --- /dev/null +++ b/dpctl_ext/tensor/_dlpack.pyx @@ -0,0 +1,1243 @@ +# ***************************************************************************** +# Copyright (c) 2026, Intel Corporation +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# - Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# - Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# - Neither the name of the copyright holder nor the names of its contributors +# may be used to endorse or promote products derived from this software +# without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +# THE POSSIBILITY OF SUCH DAMAGE. +# ***************************************************************************** + +# distutils: language = c++ +# cython: language_level=3 +# cython: linetrace=True + +cdef extern from "numpy/npy_no_deprecated_api.h": + pass + +cimport cpython +cimport dpctl as c_dpctl +cimport dpctl.memory as c_dpmem +from dpctl._backend cimport ( + DPCTLDevice_Delete, + DPCTLDevice_GetParentDevice, + DPCTLSyclDeviceRef, + DPCTLSyclUSMRef, +) +from dpctl._sycl_queue_manager cimport get_device_cached_queue +from libc cimport stdlib +from libc.stdint cimport int64_t, uint8_t, uint16_t, uint32_t, uint64_t +from numpy cimport ndarray + +from ._usmarray cimport ( + USM_ARRAY_C_CONTIGUOUS, + USM_ARRAY_F_CONTIGUOUS, + USM_ARRAY_WRITABLE, + usm_ndarray, +) + +import ctypes + +import dpctl +import dpctl.memory as dpmem +import numpy as np + +from ._device import Device + + +cdef extern from "dlpack/dlpack.h" nogil: + cdef int DLPACK_MAJOR_VERSION + + cdef int DLPACK_MINOR_VERSION + + cdef int DLPACK_FLAG_BITMASK_READ_ONLY + + cdef int DLPACK_FLAG_BITMASK_IS_COPIED + + ctypedef struct DLPackVersion: + uint32_t major + uint32_t minor + + cdef enum DLDeviceType: + kDLCPU + kDLCUDA + kDLCUDAHost + kDLCUDAManaged + kDLROCM + kDLROCMHost + kDLOpenCL + kDLVulkan + kDLMetal + kDLVPI + kDLOneAPI + kDLWebGPU + kDLHexagon + kDLMAIA + kDLTrn + + ctypedef struct DLDevice: + DLDeviceType device_type + int device_id + + cdef enum DLDataTypeCode: + kDLInt + kDLUInt + kDLFloat + kDLBfloat + kDLComplex + kDLBool + kDLFloat8_e3m4 + kDLFloat8_e4m3 + kDLFloat8_e4m3b11fnuz + kDLFloat8_e4m3fn + kDLFloat8_e4m3fnuz + kDLFloat8_e5m2 + kDLFloat8_e5m2fnuz + kDLFloat8_e8m0fnu + kDLFloat6_e2m3fn + kDLFloat6_e3m2fn + kDLFloat4_e2m1fn + + ctypedef struct DLDataType: + uint8_t code + uint8_t bits + uint16_t lanes + + ctypedef struct DLTensor: + void *data + DLDevice device + int ndim + DLDataType dtype + int64_t *shape + int64_t *strides + uint64_t byte_offset + + ctypedef struct DLManagedTensor: + DLTensor dl_tensor + void *manager_ctx + void (*deleter)(DLManagedTensor *) # noqa: E211 + + ctypedef struct DLManagedTensorVersioned: + DLPackVersion version + void *manager_ctx + void (*deleter)(DLManagedTensorVersioned *) # noqa: E211 + uint64_t flags + DLTensor dl_tensor + + +def get_build_dlpack_version(): + """ + Returns a tuple of integers representing the `major` and `minor` + version of DLPack :module:`dpctl.tensor` was built with. + This tuple can be passed as the `max_version` argument to + `__dlpack__` to guarantee module:`dpctl.tensor` can properly + consume capsule. + + Returns: + Tuple[int, int] + A tuple of integers representing the `major` and `minor` + version of DLPack used to build :module:`dpctl.tensor`. + """ + return (DLPACK_MAJOR_VERSION, DLPACK_MINOR_VERSION) + + +cdef void _pycapsule_deleter(object dlt_capsule) noexcept: + cdef DLManagedTensor *dlm_tensor = NULL + if cpython.PyCapsule_IsValid(dlt_capsule, "dltensor"): + dlm_tensor = cpython.PyCapsule_GetPointer( + dlt_capsule, "dltensor") + dlm_tensor.deleter(dlm_tensor) + + +cdef void _managed_tensor_deleter( + DLManagedTensor *dlm_tensor +) noexcept with gil: + if dlm_tensor is not NULL: + # we only delete shape, because we make single allocation to + # accommodate both shape and strides if strides are needed + stdlib.free(dlm_tensor.dl_tensor.shape) + cpython.Py_DECREF(dlm_tensor.manager_ctx) + dlm_tensor.manager_ctx = NULL + stdlib.free(dlm_tensor) + + +cdef void _pycapsule_versioned_deleter(object dlt_capsule) noexcept: + cdef DLManagedTensorVersioned *dlmv_tensor = NULL + if cpython.PyCapsule_IsValid(dlt_capsule, "dltensor_versioned"): + dlmv_tensor = cpython.PyCapsule_GetPointer( + dlt_capsule, "dltensor_versioned") + dlmv_tensor.deleter(dlmv_tensor) + + +cdef void _managed_tensor_versioned_deleter( + DLManagedTensorVersioned *dlmv_tensor +) noexcept with gil: + if dlmv_tensor is not NULL: + # we only delete shape, because we make single allocation to + # accommodate both shape and strides if strides are needed + stdlib.free(dlmv_tensor.dl_tensor.shape) + cpython.Py_DECREF(dlmv_tensor.manager_ctx) + dlmv_tensor.manager_ctx = NULL + stdlib.free(dlmv_tensor) + + +cdef object _get_default_context(c_dpctl.SyclDevice dev): + try: + default_context = dev.sycl_platform.default_context + except RuntimeError: + # RT does not support default_context + default_context = None + + return default_context + +cdef int get_array_dlpack_device_id( + usm_ndarray usm_ary +) except -1: + """Finds ordinal number of the parent of device where array + was allocated. + """ + cdef c_dpctl.SyclQueue ary_sycl_queue + cdef c_dpctl.SyclDevice ary_sycl_device + cdef DPCTLSyclDeviceRef pDRef = NULL + cdef int device_id = -1 + + ary_sycl_queue = usm_ary.get_sycl_queue() + ary_sycl_device = ary_sycl_queue.get_sycl_device() + + default_context = _get_default_context(ary_sycl_device) + if default_context is None: + # check that ary_sycl_device is a non-partitioned device + pDRef = DPCTLDevice_GetParentDevice(ary_sycl_device.get_device_ref()) + if pDRef is not NULL: + DPCTLDevice_Delete(pDRef) + raise DLPackCreationError( + "to_dlpack_capsule: DLPack can only export arrays allocated " + "on non-partitioned SYCL devices on platforms where " + "default_context oneAPI extension is not supported." + ) + else: + if not usm_ary.sycl_context == default_context: + raise DLPackCreationError( + "to_dlpack_capsule: DLPack can only export arrays based on USM " + "allocations bound to a default platform SYCL context" + ) + device_id = ary_sycl_device.get_device_id() + + if device_id < 0: + raise DLPackCreationError( + "get_array_dlpack_device_id: failed to determine device_id" + ) + + return device_id + + +cpdef to_dlpack_capsule(usm_ndarray usm_ary): + """ + to_dlpack_capsule(usm_ary) + + Constructs named Python capsule object referencing + instance of ``DLManagedTensor`` from + :class:`dpctl.tensor.usm_ndarray` instance. + + Args: + usm_ary: An instance of :class:`dpctl.tensor.usm_ndarray` + Returns: + A new capsule with name ``"dltensor"`` that contains + a pointer to ``DLManagedTensor`` struct. + Raises: + DLPackCreationError: when array can be represented as + DLPack tensor. This may happen when array was allocated + on a partitioned sycl device, or its USM allocation is + not bound to the platform default SYCL context. + MemoryError: when host allocation to needed for ``DLManagedTensor`` + did not succeed. + ValueError: when array elements data type could not be represented + in ``DLManagedTensor``. + """ + cdef DLManagedTensor *dlm_tensor = NULL + cdef DLTensor *dl_tensor = NULL + cdef int nd = usm_ary.get_ndim() + cdef char *data_ptr = usm_ary.get_data() + cdef Py_ssize_t *shape_ptr = NULL + cdef Py_ssize_t *strides_ptr = NULL + cdef int64_t *shape_strides_ptr = NULL + cdef int i = 0 + cdef int device_id = -1 + cdef int flags = 0 + cdef Py_ssize_t element_offset = 0 + cdef Py_ssize_t byte_offset = 0 + cdef Py_ssize_t si = 1 + + ary_base = usm_ary.get_base() + + device_id = get_array_dlpack_device_id(usm_ary) + + dlm_tensor = stdlib.malloc( + sizeof(DLManagedTensor)) + if dlm_tensor is NULL: + raise MemoryError( + "to_dlpack_capsule: Could not allocate memory for DLManagedTensor" + ) + if nd > 0: + shape_strides_ptr = stdlib.malloc((sizeof(int64_t) * 2) * nd) + if shape_strides_ptr is NULL: + stdlib.free(dlm_tensor) + raise MemoryError( + "to_dlpack_capsule: Could not allocate memory for shape/strides" + ) + shape_ptr = usm_ary.get_shape() + for i in range(nd): + shape_strides_ptr[i] = shape_ptr[i] + strides_ptr = usm_ary.get_strides() + flags = usm_ary.flags_ + if strides_ptr: + for i in range(nd): + shape_strides_ptr[nd + i] = strides_ptr[i] + else: + if flags & USM_ARRAY_C_CONTIGUOUS: + si = 1 + for i in range(nd - 1, -1, -1): + shape_strides_ptr[nd + i] = si + si = si * shape_ptr[i] + elif flags & USM_ARRAY_F_CONTIGUOUS: + si = 1 + for i in range(0, nd): + shape_strides_ptr[nd + i] = si + si = si * shape_ptr[i] + else: + stdlib.free(shape_strides_ptr) + stdlib.free(dlm_tensor) + raise BufferError( + "to_dlpack_capsule: Invalid array encountered " + "when building strides" + ) + + strides_ptr = &shape_strides_ptr[nd] + + ary_dt = usm_ary.dtype + ary_dtk = ary_dt.kind + element_offset = usm_ary.get_offset() + byte_offset = element_offset * (ary_dt.itemsize) + + dl_tensor = &dlm_tensor.dl_tensor + dl_tensor.data = (data_ptr - byte_offset) + dl_tensor.ndim = nd + dl_tensor.byte_offset = byte_offset + dl_tensor.shape = &shape_strides_ptr[0] if nd > 0 else NULL + dl_tensor.strides = &shape_strides_ptr[nd] if nd > 0 else NULL + dl_tensor.device.device_type = kDLOneAPI + dl_tensor.device.device_id = device_id + dl_tensor.dtype.lanes = 1 + dl_tensor.dtype.bits = (ary_dt.itemsize * 8) + if (ary_dtk == "b"): + dl_tensor.dtype.code = kDLBool + elif (ary_dtk == "u"): + dl_tensor.dtype.code = kDLUInt + elif (ary_dtk == "i"): + dl_tensor.dtype.code = kDLInt + elif (ary_dtk == "f"): + dl_tensor.dtype.code = kDLFloat + elif (ary_dtk == "c"): + dl_tensor.dtype.code = kDLComplex + else: + stdlib.free(shape_strides_ptr) + stdlib.free(dlm_tensor) + raise ValueError("Unrecognized array data type") + + dlm_tensor.manager_ctx = ary_base + cpython.Py_INCREF(ary_base) + dlm_tensor.deleter = _managed_tensor_deleter + + return cpython.PyCapsule_New(dlm_tensor, "dltensor", _pycapsule_deleter) + + +cpdef to_dlpack_versioned_capsule(usm_ndarray usm_ary, bint copied): + """ + to_dlpack_versioned_capsule(usm_ary, copied) + + Constructs named Python capsule object referencing + instance of ``DLManagedTensorVersioned`` from + :class:`dpctl.tensor.usm_ndarray` instance. + + Args: + usm_ary: An instance of :class:`dpctl.tensor.usm_ndarray` + copied: A bint representing whether the data was previously + copied in order to set the flags with the is-copied + bitmask. + Returns: + A new capsule with name ``"dltensor_versioned"`` that + contains a pointer to ``DLManagedTensorVersioned`` struct. + Raises: + DLPackCreationError: when array can be represented as + DLPack tensor. This may happen when array was allocated + on a partitioned sycl device, or its USM allocation is + not bound to the platform default SYCL context. + MemoryError: when host allocation to needed for + ``DLManagedTensorVersioned`` did not succeed. + ValueError: when array elements data type could not be represented + in ``DLManagedTensorVersioned``. + """ + cdef DLManagedTensorVersioned *dlmv_tensor = NULL + cdef DLTensor *dl_tensor = NULL + cdef uint32_t dlmv_flags = 0 + cdef int nd = usm_ary.get_ndim() + cdef char *data_ptr = usm_ary.get_data() + cdef Py_ssize_t *shape_ptr = NULL + cdef Py_ssize_t *strides_ptr = NULL + cdef int64_t *shape_strides_ptr = NULL + cdef int i = 0 + cdef int device_id = -1 + cdef int flags = 0 + cdef Py_ssize_t element_offset = 0 + cdef Py_ssize_t byte_offset = 0 + cdef Py_ssize_t si = 1 + + ary_base = usm_ary.get_base() + + # Find ordinal number of the parent device + device_id = get_array_dlpack_device_id(usm_ary) + + dlmv_tensor = stdlib.malloc( + sizeof(DLManagedTensorVersioned)) + if dlmv_tensor is NULL: + raise MemoryError( + "to_dlpack_versioned_capsule: Could not allocate memory " + "for DLManagedTensorVersioned" + ) + if nd > 0: + shape_strides_ptr = stdlib.malloc((sizeof(int64_t) * 2) * nd) + if shape_strides_ptr is NULL: + stdlib.free(dlmv_tensor) + raise MemoryError( + "to_dlpack_versioned_capsule: Could not allocate memory " + "for shape/strides" + ) + # this can be a separate function for handling shapes and strides + shape_ptr = usm_ary.get_shape() + for i in range(nd): + shape_strides_ptr[i] = shape_ptr[i] + strides_ptr = usm_ary.get_strides() + flags = usm_ary.flags_ + if strides_ptr: + for i in range(nd): + shape_strides_ptr[nd + i] = strides_ptr[i] + else: + if flags & USM_ARRAY_C_CONTIGUOUS: + si = 1 + for i in range(nd - 1, -1, -1): + shape_strides_ptr[nd + i] = si + si = si * shape_ptr[i] + elif flags & USM_ARRAY_F_CONTIGUOUS: + si = 1 + for i in range(0, nd): + shape_strides_ptr[nd + i] = si + si = si * shape_ptr[i] + else: + stdlib.free(shape_strides_ptr) + stdlib.free(dlmv_tensor) + raise BufferError( + "to_dlpack_versioned_capsule: Invalid array encountered " + "when building strides" + ) + + strides_ptr = &shape_strides_ptr[nd] + + # this can all be a function for building the dl_tensor + # object (separate from dlm/dlmv) + ary_dt = usm_ary.dtype + ary_dtk = ary_dt.kind + element_offset = usm_ary.get_offset() + byte_offset = element_offset * (ary_dt.itemsize) + + dl_tensor = &dlmv_tensor.dl_tensor + dl_tensor.data = (data_ptr - byte_offset) + dl_tensor.ndim = nd + dl_tensor.byte_offset = byte_offset + dl_tensor.shape = &shape_strides_ptr[0] if nd > 0 else NULL + dl_tensor.strides = &shape_strides_ptr[nd] if nd > 0 else NULL + dl_tensor.device.device_type = kDLOneAPI + dl_tensor.device.device_id = device_id + dl_tensor.dtype.lanes = 1 + dl_tensor.dtype.bits = (ary_dt.itemsize * 8) + if (ary_dtk == "b"): + dl_tensor.dtype.code = kDLBool + elif (ary_dtk == "u"): + dl_tensor.dtype.code = kDLUInt + elif (ary_dtk == "i"): + dl_tensor.dtype.code = kDLInt + elif (ary_dtk == "f"): + dl_tensor.dtype.code = kDLFloat + elif (ary_dtk == "c"): + dl_tensor.dtype.code = kDLComplex + else: + stdlib.free(shape_strides_ptr) + stdlib.free(dlmv_tensor) + raise ValueError("Unrecognized array data type") + + # set flags down here + if copied: + dlmv_flags |= DLPACK_FLAG_BITMASK_IS_COPIED + if not (flags & USM_ARRAY_WRITABLE): + dlmv_flags |= DLPACK_FLAG_BITMASK_READ_ONLY + dlmv_tensor.flags = dlmv_flags + + dlmv_tensor.version.major = DLPACK_MAJOR_VERSION + dlmv_tensor.version.minor = DLPACK_MINOR_VERSION + + dlmv_tensor.manager_ctx = ary_base + cpython.Py_INCREF(ary_base) + dlmv_tensor.deleter = _managed_tensor_versioned_deleter + + return cpython.PyCapsule_New( + dlmv_tensor, "dltensor_versioned", _pycapsule_versioned_deleter + ) + + +cpdef numpy_to_dlpack_versioned_capsule(ndarray npy_ary, bint copied): + """ + to_dlpack_versioned_capsule(npy_ary, copied) + + Constructs named Python capsule object referencing + instance of ``DLManagedTensorVersioned`` from + :class:`numpy.ndarray` instance. + + Args: + npy_ary: An instance of :class:`numpy.ndarray` + copied: A bint representing whether the data was previously + copied in order to set the flags with the is-copied + bitmask. + Returns: + A new capsule with name ``"dltensor_versioned"`` that + contains a pointer to ``DLManagedTensorVersioned`` struct. + Raises: + DLPackCreationError: when array can be represented as + DLPack tensor. + MemoryError: when host allocation to needed for + ``DLManagedTensorVersioned`` did not succeed. + ValueError: when array elements data type could not be represented + in ``DLManagedTensorVersioned``. + """ + cdef DLManagedTensorVersioned *dlmv_tensor = NULL + cdef DLTensor *dl_tensor = NULL + cdef uint32_t dlmv_flags = 0 + cdef int nd = npy_ary.ndim + cdef int64_t *shape_strides_ptr = NULL + cdef int i = 0 + cdef Py_ssize_t byte_offset = 0 + cdef int itemsize = npy_ary.itemsize + + dlmv_tensor = stdlib.malloc( + sizeof(DLManagedTensorVersioned)) + if dlmv_tensor is NULL: + raise MemoryError( + "numpy_to_dlpack_versioned_capsule: Could not allocate memory " + "for DLManagedTensorVersioned" + ) + + shape = npy_ary.ctypes.shape_as(ctypes.c_int64) + strides = npy_ary.ctypes.strides_as(ctypes.c_int64) + if nd > 0: + if npy_ary.size != 1: + for i in range(nd): + if shape[i] != 1 and strides[i] % itemsize != 0: + stdlib.free(dlmv_tensor) + raise BufferError( + "numpy_to_dlpack_versioned_capsule: DLPack cannot " + "encode an array if strides are not a multiple of " + "itemsize" + ) + shape_strides_ptr = stdlib.malloc((sizeof(int64_t) * 2) * nd) + if shape_strides_ptr is NULL: + stdlib.free(dlmv_tensor) + raise MemoryError( + "numpy_to_dlpack_versioned_capsule: Could not allocate memory " + "for shape/strides" + ) + for i in range(nd): + shape_strides_ptr[i] = shape[i] + shape_strides_ptr[nd + i] = strides[i] // itemsize + + writable_flag = npy_ary.flags["W"] + + ary_dt = npy_ary.dtype + ary_dtk = ary_dt.kind + + dl_tensor = &dlmv_tensor.dl_tensor + dl_tensor.data = npy_ary.data + dl_tensor.ndim = nd + dl_tensor.byte_offset = byte_offset + dl_tensor.shape = &shape_strides_ptr[0] if nd > 0 else NULL + dl_tensor.strides = &shape_strides_ptr[nd] if nd > 0 else NULL + dl_tensor.device.device_type = kDLCPU + dl_tensor.device.device_id = 0 + dl_tensor.dtype.lanes = 1 + dl_tensor.dtype.bits = (ary_dt.itemsize * 8) + if (ary_dtk == "b"): + dl_tensor.dtype.code = kDLBool + elif (ary_dtk == "u"): + dl_tensor.dtype.code = kDLUInt + elif (ary_dtk == "i"): + dl_tensor.dtype.code = kDLInt + elif (ary_dtk == "f" and ary_dt.itemsize <= 8): + dl_tensor.dtype.code = kDLFloat + elif (ary_dtk == "c" and ary_dt.itemsize <= 16): + dl_tensor.dtype.code = kDLComplex + else: + stdlib.free(shape_strides_ptr) + stdlib.free(dlmv_tensor) + raise ValueError("Unrecognized array data type") + + # set flags down here + if copied: + dlmv_flags |= DLPACK_FLAG_BITMASK_IS_COPIED + if not writable_flag: + dlmv_flags |= DLPACK_FLAG_BITMASK_READ_ONLY + dlmv_tensor.flags = dlmv_flags + + dlmv_tensor.version.major = DLPACK_MAJOR_VERSION + dlmv_tensor.version.minor = DLPACK_MINOR_VERSION + + dlmv_tensor.manager_ctx = npy_ary + cpython.Py_INCREF(npy_ary) + dlmv_tensor.deleter = _managed_tensor_versioned_deleter + + return cpython.PyCapsule_New( + dlmv_tensor, "dltensor_versioned", _pycapsule_versioned_deleter + ) + + +cdef class _DLManagedTensorOwner: + """ + Helper class managing the lifetime of the DLManagedTensor struct + transferred from a 'dlpack' capsule. + """ + cdef DLManagedTensor * dlm_tensor + + def __cinit__(self): + self.dlm_tensor = NULL + + def __dealloc__(self): + if self.dlm_tensor: + self.dlm_tensor.deleter(self.dlm_tensor) + self.dlm_tensor = NULL + + @staticmethod + cdef _DLManagedTensorOwner _create(DLManagedTensor *dlm_tensor_src): + cdef _DLManagedTensorOwner res + res = _DLManagedTensorOwner.__new__(_DLManagedTensorOwner) + res.dlm_tensor = dlm_tensor_src + return res + + +cdef class _DLManagedTensorVersionedOwner: + """ + Helper class managing the lifetime of the DLManagedTensorVersioned + struct transferred from a 'dlpack_versioned' capsule. + """ + cdef DLManagedTensorVersioned * dlmv_tensor + + def __cinit__(self): + self.dlmv_tensor = NULL + + def __dealloc__(self): + if self.dlmv_tensor: + self.dlmv_tensor.deleter(self.dlmv_tensor) + self.dlmv_tensor = NULL + + @staticmethod + cdef _DLManagedTensorVersionedOwner _create( + DLManagedTensorVersioned *dlmv_tensor_src + ): + cdef _DLManagedTensorVersionedOwner res + res = _DLManagedTensorVersionedOwner.__new__( + _DLManagedTensorVersionedOwner + ) + res.dlmv_tensor = dlmv_tensor_src + return res + + +cdef dict _numpy_array_interface_from_dl_tensor(DLTensor *dlt, bint ro_flag): + """Constructs a NumPy `__array_interface__` dictionary from a DLTensor.""" + cdef int itemsize = 0 + + if dlt.dtype.lanes != 1: + raise BufferError( + "Can not import DLPack tensor with lanes != 1" + ) + itemsize = dlt.dtype.bits // 8 + shape = list() + if (dlt.strides is NULL): + strides = None + for dim in range(dlt.ndim): + shape.append(dlt.shape[dim]) + else: + strides = list() + for dim in range(dlt.ndim): + shape.append(dlt.shape[dim]) + # convert to byte-strides + strides.append(dlt.strides[dim] * itemsize) + strides = tuple(strides) + shape = tuple(shape) + if (dlt.dtype.code == kDLUInt): + ary_dt = "u" + str(itemsize) + elif (dlt.dtype.code == kDLInt): + ary_dt = "i" + str(itemsize) + elif (dlt.dtype.code == kDLFloat): + ary_dt = "f" + str(itemsize) + elif (dlt.dtype.code == kDLComplex): + ary_dt = "c" + str(itemsize) + elif (dlt.dtype.code == kDLBool): + ary_dt = "b" + str(itemsize) + else: + raise BufferError( + "Can not import DLPack tensor with type code {}.".format( + dlt.dtype.code + ) + ) + typestr = "|" + ary_dt + return dict( + version=3, + shape=shape, + strides=strides, + data=( dlt.data, True if ro_flag else False), + offset=dlt.byte_offset, + typestr=typestr, + ) + + +class _numpy_array_interface_wrapper: + """ + Class that wraps a Python capsule and dictionary for consumption by NumPy. + + Implementation taken from + https://github.com/dmlc/dlpack/blob/main/apps/numpy_dlpack/dlpack/to_numpy.py + + Args: + array_interface: + A dictionary describing the underlying memory. Formatted + to match `numpy.ndarray.__array_interface__`. + + pycapsule: + A Python capsule wrapping the dlpack tensor that will be + converted to numpy. + """ + + def __init__(self, array_interface, memory_owner) -> None: + self.__array_interface__ = array_interface + self._memory_owner = memory_owner + + +cdef bint _is_kdlcpu_device(DLDevice *dev): + "Check if DLTensor.DLDevice denotes (kDLCPU, 0)" + return (dev[0].device_type == kDLCPU) and (dev[0].device_id == 0) + + +cpdef object from_dlpack_capsule(object py_caps): + """ + from_dlpack_capsule(py_caps) + + Reconstructs instance of :class:`dpctl.tensor.usm_ndarray` from + named Python capsule object referencing instance of ``DLManagedTensor`` + without copy. The instance forms a view in the memory of the tensor. + + Args: + caps: + Python capsule with name ``"dltensor"`` expected to reference + an instance of ``DLManagedTensor`` struct. + Returns: + Instance of :class:`dpctl.tensor.usm_ndarray` with a view into + memory of the tensor. Capsule is renamed to ``"used_dltensor"`` + upon success. + Raises: + TypeError: + if argument is not a ``"dltensor"`` capsule. + ValueError: + if argument is ``"used_dltensor"`` capsule + BufferError: + if the USM pointer is not bound to the reconstructed + sycl context, or the DLPack's device_type is not supported + by :mod:`dpctl`. + """ + cdef DLManagedTensorVersioned *dlmv_tensor = NULL + cdef DLManagedTensor *dlm_tensor = NULL + cdef DLTensor *dl_tensor = NULL + cdef int versioned = 0 + cdef int readonly = 0 + cdef bytes usm_type + cdef size_t sz = 1 + cdef size_t alloc_sz = 1 + cdef int i + cdef int device_id = -1 + cdef int element_bytesize = 0 + cdef Py_ssize_t offset_min = 0 + cdef Py_ssize_t offset_max = 0 + cdef char *mem_ptr = NULL + cdef Py_ssize_t mem_ptr_delta = 0 + cdef Py_ssize_t element_offset = 0 + cdef int64_t stride_i = -1 + cdef int64_t shape_i = -1 + + if cpython.PyCapsule_IsValid(py_caps, "dltensor"): + dlm_tensor = cpython.PyCapsule_GetPointer( + py_caps, "dltensor") + dl_tensor = &dlm_tensor.dl_tensor + elif cpython.PyCapsule_IsValid(py_caps, "dltensor_versioned"): + dlmv_tensor = cpython.PyCapsule_GetPointer( + py_caps, "dltensor_versioned") + if dlmv_tensor.version.major > DLPACK_MAJOR_VERSION: + raise BufferError( + "Can not import DLPack tensor with major version " + f"greater than {DLPACK_MAJOR_VERSION}" + ) + versioned = 1 + readonly = (dlmv_tensor.flags & DLPACK_FLAG_BITMASK_READ_ONLY) != 0 + dl_tensor = &dlmv_tensor.dl_tensor + elif ( + cpython.PyCapsule_IsValid(py_caps, "used_dltensor") + or cpython.PyCapsule_IsValid(py_caps, "used_dltensor_versioned") + ): + raise ValueError( + "A DLPack tensor object can not be consumed multiple times" + ) + else: + raise TypeError( + "`from_dlpack_capsule` expects a Python 'dltensor' capsule" + ) + + # Verify that we can work with this device + if dl_tensor.device.device_type == kDLOneAPI: + device_id = dl_tensor.device.device_id + root_device = dpctl.SyclDevice(str(device_id)) + try: + default_context = root_device.sycl_platform.default_context + except RuntimeError: + default_context = get_device_cached_queue(root_device).sycl_context + if dl_tensor.data is NULL: + usm_type = b"device" + q = get_device_cached_queue((default_context, root_device,)) + else: + usm_type = c_dpmem._Memory.get_pointer_type( + dl_tensor.data, + default_context) + if usm_type == b"unknown": + raise BufferError( + "Data pointer in DLPack is not bound to default sycl " + f"context of device '{device_id}', translated to " + f"{root_device.filter_string}" + ) + alloc_device = c_dpmem._Memory.get_pointer_device( + dl_tensor.data, + default_context + ) + q = get_device_cached_queue((default_context, alloc_device,)) + if dl_tensor.dtype.bits % 8: + raise BufferError( + "Can not import DLPack tensor whose element's " + "bitsize is not a multiple of 8" + ) + if dl_tensor.dtype.lanes != 1: + raise BufferError( + "Can not import DLPack tensor with lanes != 1" + ) + if dl_tensor.ndim > 0: + offset_min = 0 + offset_max = 0 + for i in range(dl_tensor.ndim): + stride_i = dl_tensor.strides[i] + shape_i = dl_tensor.shape[i] + if shape_i > 1: + shape_i -= 1 + if stride_i > 0: + offset_max = offset_max + stride_i * shape_i + else: + offset_min = offset_min + stride_i * shape_i + sz = offset_max - offset_min + 1 + if sz == 0: + sz = 1 + + element_bytesize = (dl_tensor.dtype.bits // 8) + sz = sz * element_bytesize + element_offset = dl_tensor.byte_offset // element_bytesize + + # transfer ownership + if not versioned: + dlm_holder = _DLManagedTensorOwner._create(dlm_tensor) + cpython.PyCapsule_SetName(py_caps, "used_dltensor") + else: + dlmv_holder = _DLManagedTensorVersionedOwner._create(dlmv_tensor) + cpython.PyCapsule_SetName(py_caps, "used_dltensor_versioned") + + if dl_tensor.data is NULL: + usm_mem = dpmem.MemoryUSMDevice(sz, q) + else: + mem_ptr_delta = dl_tensor.byte_offset - ( + element_offset * element_bytesize + ) + mem_ptr = dl_tensor.data + alloc_sz = dl_tensor.byte_offset + ( + (offset_max + 1) * element_bytesize) + tmp = c_dpmem._Memory.create_from_usm_pointer_size_qref( + mem_ptr, + max(alloc_sz, element_bytesize), + (q).get_queue_ref(), + memory_owner=dlmv_holder if versioned else dlm_holder + ) + if mem_ptr_delta == 0: + usm_mem = tmp + else: + alloc_sz = dl_tensor.byte_offset + ( + (offset_max * element_bytesize + mem_ptr_delta)) + usm_mem = c_dpmem._Memory.create_from_usm_pointer_size_qref( + ( + mem_ptr + (element_bytesize - mem_ptr_delta) + ), + max(alloc_sz, element_bytesize), + (q).get_queue_ref(), + memory_owner=tmp + ) + + py_shape = list() + if (dl_tensor.shape is not NULL): + for i in range(dl_tensor.ndim): + py_shape.append(dl_tensor.shape[i]) + if (dl_tensor.strides is not NULL): + py_strides = list() + for i in range(dl_tensor.ndim): + py_strides.append(dl_tensor.strides[i]) + else: + py_strides = None + if (dl_tensor.dtype.code == kDLUInt): + ary_dt = np.dtype("u" + str(element_bytesize)) + elif (dl_tensor.dtype.code == kDLInt): + ary_dt = np.dtype("i" + str(element_bytesize)) + elif (dl_tensor.dtype.code == kDLFloat): + ary_dt = np.dtype("f" + str(element_bytesize)) + elif (dl_tensor.dtype.code == kDLComplex): + ary_dt = np.dtype("c" + str(element_bytesize)) + elif (dl_tensor.dtype.code == kDLBool): + ary_dt = np.dtype("?") + else: + raise BufferError( + "Can not import DLPack tensor with type code {}.".format( + dl_tensor.dtype.code + ) + ) + res_ary = usm_ndarray( + py_shape, + dtype=ary_dt, + buffer=usm_mem, + strides=py_strides, + offset=element_offset + ) + if readonly: + res_ary.flags_ = (res_ary.flags_ & ~USM_ARRAY_WRITABLE) + return res_ary + elif _is_kdlcpu_device(&dl_tensor.device): + ary_iface = _numpy_array_interface_from_dl_tensor(dl_tensor, readonly) + if not versioned: + dlm_holder = _DLManagedTensorOwner._create(dlm_tensor) + cpython.PyCapsule_SetName(py_caps, "used_dltensor") + return np.ctypeslib.as_array( + _numpy_array_interface_wrapper(ary_iface, dlm_holder) + ) + else: + dlmv_holder = _DLManagedTensorVersionedOwner._create(dlmv_tensor) + cpython.PyCapsule_SetName(py_caps, "used_dltensor_versioned") + return np.ctypeslib.as_array( + _numpy_array_interface_wrapper(ary_iface, dlmv_holder) + ) + else: + raise BufferError( + "The DLPack tensor resides on unsupported device." + ) + +cdef usm_ndarray _to_usm_ary_from_host_blob(object host_blob, dev : Device): + q = dev.sycl_queue + np_ary = np.asarray(host_blob) + dt = np_ary.dtype + if dt.char in "dD" and q.sycl_device.has_aspect_fp64 is False: + Xusm_dtype = ( + "float32" if dt.char == "d" else "complex64" + ) + else: + Xusm_dtype = dt + usm_mem = dpmem.MemoryUSMDevice(np_ary.nbytes, queue=q) + usm_ary = usm_ndarray(np_ary.shape, dtype=Xusm_dtype, buffer=usm_mem) + usm_mem.copy_from_host(np.reshape(np_ary.view(dtype="u1"), -1)) + return usm_ary + + +# only cdef to make it private +cdef object _create_device(object device, object dl_device): + if isinstance(device, Device): + return device + elif isinstance(device, dpctl.SyclDevice): + return Device.create_device(device) + else: + root_device = dpctl.SyclDevice(str(dl_device[1])) + return Device.create_device(root_device) + + +def from_dlpack(x, /, *, device=None, copy=None): + """from_dlpack(x, /, *, device=None, copy=None) + + Constructs :class:`dpctl.tensor.usm_ndarray` or :class:`numpy.ndarray` + instance from a Python object ``x`` that implements ``__dlpack__`` protocol. + + Args: + x (object): + A Python object representing an array that supports + ``__dlpack__`` protocol. + device ( + Optional[str, :class:`dpctl.SyclDevice`, + :class:`dpctl.SyclQueue`, + :class:`dpctl.tensor.Device`, + tuple([:class:`enum.IntEnum`, int])]) + ): + Device where the output array is to be placed. ``device`` keyword + values can be: + + * ``None`` + The data remains on the same device. + * oneAPI filter selector string + SYCL device selected by :ref:`filter selector string + `. + * :class:`dpctl.SyclDevice` + explicit SYCL device that must correspond to + a non-partitioned SYCL device. + * :class:`dpctl.SyclQueue` + implies SYCL device targeted by the SYCL queue. + * :class:`dpctl.tensor.Device` + implies SYCL device `device.sycl_queue`. The `Device` object + is obtained via :attr:`dpctl.tensor.usm_ndarray.device`. + * ``(device_type, device_id)`` + 2-tuple matching the format of the output of the + ``__dlpack_device__`` method: an integer enumerator representing + the device type followed by an integer representing the index of + the device. The only supported :class:`dpctl.tensor.DLDeviceType` + device types are ``"kDLCPU"`` and ``"kDLOneAPI"``. + + Default: ``None``. + + copy (bool, optional) + Boolean indicating whether or not to copy the input. + + * If ``copy`` is ``True``, the input will always be + copied. + * If ``False``, a ``BufferError`` will be raised if a + copy is deemed necessary. + * If ``None``, a copy will be made only if deemed + necessary, otherwise, the existing memory buffer will + be reused. + + Default: ``None``. + + Returns: + Alternative[usm_ndarray, numpy.ndarray]: + An array containing the data in ``x``. When ``copy`` is + ``None`` or ``False``, this may be a view into the original + memory. + + The type of the returned object + depends on where the data backing up input object ``x`` resides. + If it resides in a USM allocation on a SYCL device, the + type :class:`dpctl.tensor.usm_ndarray` is returned, otherwise if it + resides on ``"kDLCPU"`` device the type is :class:`numpy.ndarray`, + and otherwise an exception is raised. + + .. note:: + + If the return type is :class:`dpctl.tensor.usm_ndarray`, the + associated SYCL queue is derived from the ``device`` keyword. + When ``device`` keyword value has type :class:`dpctl.SyclQueue`, + the explicit queue instance is used, when ``device`` keyword + value has type :class:`dpctl.tensor.Device`, the + ``device.sycl_queue`` is used. In all other cases, the cached + SYCL queue corresponding to the implied SYCL device is used. + + Raises: + TypeError: + if ``x`` does not implement ``__dlpack__`` method + ValueError: + if data of the input object resides on an unsupported device + + See https://dmlc.github.io/dlpack/latest/ for more details. + + :Example: + + .. code-block:: python + + import dpctl + import dpctl.tensor as dpt + + class Container: + "Helper class implementing `__dlpack__` protocol" + def __init__(self, array): + self._array = array + + def __dlpack__(self, stream=None): + return self._array.__dlpack__(stream=stream) + + def __dlpack_device__(self): + return self._array.__dlpack_device__() + + C = Container(dpt.linspace(0, 100, num=20, dtype="int16")) + # create usm_ndarray view + X = dpt.from_dlpack(C) + # migrate content of the container to device of type kDLCPU + Y = dpt.from_dlpack(C, device=(dpt.DLDeviceType.kDLCPU, 0)) + + """ + dlpack_attr = getattr(x, "__dlpack__", None) + dlpack_dev_attr = getattr(x, "__dlpack_device__", None) + if not callable(dlpack_attr) or not callable(dlpack_dev_attr): + raise TypeError( + f"The argument of type {type(x)} does not implement " + "`__dlpack__` and `__dlpack_device__` methods." + ) + # device is converted to a dlpack_device if necessary + dl_device = None + if device: + if isinstance(device, tuple): + dl_device = device + if len(dl_device) != 2: + raise ValueError( + "Argument `device` specified as a tuple must have length 2" + ) + else: + if not isinstance(device, dpctl.SyclDevice): + device = Device.create_device(device) + d = device.sycl_device + else: + d = device + dl_device = (device_OneAPI, d.get_device_id()) + if dl_device is not None: + if (dl_device[0] not in [device_OneAPI, device_CPU]): + raise ValueError( + f"Argument `device`={device} is not supported." + ) + got_type_error = False + got_buffer_error = False + got_other_error = False + saved_exception = None + # First DLPack version supporting dl_device, and copy + requested_ver = (1, 0) + cpu_dev = (device_CPU, 0) + try: + # setting max_version to minimal version that supports + # dl_device/copy keywords + dlpack_capsule = dlpack_attr( + max_version=requested_ver, + dl_device=dl_device, + copy=copy + ) + except TypeError: + # exporter does not support max_version keyword + got_type_error = True + except (BufferError, NotImplementedError, ValueError) as e: + # Either dl_device, or copy cannot be satisfied + got_buffer_error = True + saved_exception = e + except Exception as e: + got_other_error = True + saved_exception = e + else: + # execution did not raise exceptions + return from_dlpack_capsule(dlpack_capsule) + finally: + if got_type_error: + # max_version/dl_device, copy keywords are not supported + # by __dlpack__ + x_dldev = dlpack_dev_attr() + if (dl_device is None) or (dl_device == x_dldev): + dlpack_capsule = dlpack_attr() + return from_dlpack_capsule(dlpack_capsule) + # must copy via host + if copy is False: + raise BufferError( + "Importing data via DLPack requires copying, but " + "copy=False was provided" + ) + # when max_version/dl_device/copy are not supported + # we can only support importing to OneAPI devices + # from host, or from another oneAPI device + is_supported_x_dldev = ( + x_dldev == cpu_dev or + (x_dldev[0] == device_OneAPI) + ) + is_supported_dl_device = ( + dl_device == cpu_dev or + dl_device[0] == device_OneAPI + ) + if is_supported_x_dldev and is_supported_dl_device: + dlpack_capsule = dlpack_attr() + blob = from_dlpack_capsule(dlpack_capsule) + else: + raise BufferError( + f"Can not import to requested device {dl_device}" + ) + dev = _create_device(device, dl_device) + if x_dldev == cpu_dev and dl_device == cpu_dev: + # both source and destination are CPU + return blob + elif x_dldev == cpu_dev: + # source is CPU, destination is oneAPI + return _to_usm_ary_from_host_blob(blob, dev) + elif dl_device == cpu_dev: + # source is oneAPI, destination is CPU + cpu_caps = blob.__dlpack__( + max_version=get_build_dlpack_version(), + dl_device=cpu_dev + ) + return from_dlpack_capsule(cpu_caps) + else: + import dpctl.tensor as dpt + return dpt.asarray(blob, device=dev) + elif got_buffer_error: + # we are here, because dlpack_attr could not deal with requested + # dl_device, or copying was required + if copy is False: + raise BufferError( + "Importing data via DLPack requires copying, but " + "copy=False was provided" + ) + if dl_device is None: + raise saved_exception + # must copy via host + if dl_device[0] != device_OneAPI: + raise BufferError( + f"Can not import to requested device {dl_device}" + ) + x_dldev = dlpack_dev_attr() + if x_dldev == cpu_dev: + dlpack_capsule = dlpack_attr() + host_blob = from_dlpack_capsule(dlpack_capsule) + else: + dlpack_capsule = dlpack_attr( + max_version=requested_ver, + dl_device=cpu_dev, + copy=copy + ) + host_blob = from_dlpack_capsule(dlpack_capsule) + dev = _create_device(device, dl_device) + return _to_usm_ary_from_host_blob(host_blob, dev) + elif got_other_error: + raise saved_exception From 5c9e183a1774d0c28bf101f341955c3ca7b0c6a5 Mon Sep 17 00:00:00 2001 From: Vladislav Perevezentsev Date: Thu, 5 Mar 2026 15:59:22 -0800 Subject: [PATCH 08/24] Move _flags.pyx to dpctl_ext.tensor --- dpctl_ext/tensor/_flags.pyx | 175 ++++++++++++++++++++++++++++++++++++ 1 file changed, 175 insertions(+) create mode 100644 dpctl_ext/tensor/_flags.pyx diff --git a/dpctl_ext/tensor/_flags.pyx b/dpctl_ext/tensor/_flags.pyx new file mode 100644 index 00000000000..322d52bd56c --- /dev/null +++ b/dpctl_ext/tensor/_flags.pyx @@ -0,0 +1,175 @@ +# ***************************************************************************** +# Copyright (c) 2026, Intel Corporation +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# - Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# - Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# - Neither the name of the copyright holder nor the names of its contributors +# may be used to endorse or promote products derived from this software +# without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +# THE POSSIBILITY OF SUCH DAMAGE. +# ***************************************************************************** + +# distutils: language = c++ +# cython: language_level=3 +# cython: linetrace=True + +from libcpp cimport bool as cpp_bool + +from ._usmarray cimport ( + USM_ARRAY_C_CONTIGUOUS, + USM_ARRAY_F_CONTIGUOUS, + USM_ARRAY_WRITABLE, + usm_ndarray, +) + + +cdef cpp_bool _check_bit(int flag, int mask): + return (flag & mask) == mask + + +cdef class Flags: + """ + Helper class to query the flags of a :class:`dpctl.tensor.usm_ndarray` + instance, which describe how the instance interfaces with its underlying + memory. + """ + cdef int flags_ + cdef usm_ndarray arr_ + + def __cinit__(self, usm_ndarray arr, int flags): + self.arr_ = arr + self.flags_ = flags + + @property + def flags(self): + """ + Integer representation of the memory layout flags of + :class:`dpctl.tensor.usm_ndarray` instance. + """ + return self.flags_ + + @property + def c_contiguous(self): + """ + True if the memory layout of the + :class:`dpctl.tensor.usm_ndarray` instance is C-contiguous. + """ + return _check_bit(self.flags_, USM_ARRAY_C_CONTIGUOUS) + + @property + def f_contiguous(self): + """ + True if the memory layout of the + :class:`dpctl.tensor.usm_ndarray` instance is F-contiguous. + """ + return _check_bit(self.flags_, USM_ARRAY_F_CONTIGUOUS) + + @property + def writable(self): + """ + True if :class:`dpctl.tensor.usm_ndarray` instance is writable. + """ + return _check_bit(self.flags_, USM_ARRAY_WRITABLE) + + @writable.setter + def writable(self, new_val): + if not isinstance(new_val, bool): + raise TypeError("Expecting a boolean value") + self.arr_._set_writable_flag(new_val) + + @property + def fc(self): + """ + True if the memory layout of the :class:`dpctl.tensor.usm_ndarray` + instance is C-contiguous and F-contiguous. + """ + return ( + _check_bit(self.flags_, USM_ARRAY_C_CONTIGUOUS) + and _check_bit(self.flags_, USM_ARRAY_F_CONTIGUOUS) + ) + + @property + def forc(self): + """ + True if the memory layout of the :class:`dpctl.tensor.usm_ndarray` + instance is C-contiguous or F-contiguous. + """ + return ( + _check_bit(self.flags_, USM_ARRAY_C_CONTIGUOUS) + or _check_bit(self.flags_, USM_ARRAY_F_CONTIGUOUS) + ) + + @property + def fnc(self): + """ + True if the memory layout of the :class:`dpctl.tensor.usm_ndarray` + instance is F-contiguous and not C-contiguous. + """ + return ( + _check_bit(self.flags_, USM_ARRAY_F_CONTIGUOUS) + and not _check_bit(self.flags_, USM_ARRAY_C_CONTIGUOUS) + ) + + @property + def contiguous(self): + """ + True if the memory layout of the :class:`dpctl.tensor.usm_ndarray` + instance is C-contiguous and F-contiguous. + Equivalent to `forc.` + """ + return self.forc + + def __getitem__(self, name): + if name in ["C_CONTIGUOUS", "C"]: + return self.c_contiguous + elif name in ["F_CONTIGUOUS", "F"]: + return self.f_contiguous + elif name in ["WRITABLE", "W"]: + return self.writable + elif name == "FC": + return self.fc + elif name == "FNC": + return self.fnc + elif name in ["FORC", "CONTIGUOUS"]: + return self.forc + + def __setitem__(self, name, val): + if name in ["WRITABLE", "W"]: + self.writable = val + else: + raise ValueError( + "Only writable ('W' or 'WRITABLE') flag can be set" + ) + + def __repr__(self): + out = [] + for name in "C_CONTIGUOUS", "F_CONTIGUOUS", "WRITABLE": + out.append(" {} : {}".format(name, self[name])) + return "\n".join(out) + + def __eq__(self, other): + cdef Flags other_ + if isinstance(other, self.__class__): + other_ = other + return self.flags_ == other_.flags_ + elif isinstance(other, int): + return self.flags_ == other + else: + return False From 8f44c372193bff701cd0eedef65fed3b7e66668b Mon Sep 17 00:00:00 2001 From: Vladislav Perevezentsev Date: Thu, 5 Mar 2026 16:01:13 -0800 Subject: [PATCH 09/24] Move cython helper files --- dpctl_ext/tensor/_slicing.pxi | 383 +++++++++++++++++++++++++++++ dpctl_ext/tensor/_stride_utils.pxi | 314 +++++++++++++++++++++++ dpctl_ext/tensor/_types.pxi | 169 +++++++++++++ 3 files changed, 866 insertions(+) create mode 100644 dpctl_ext/tensor/_slicing.pxi create mode 100644 dpctl_ext/tensor/_stride_utils.pxi create mode 100644 dpctl_ext/tensor/_types.pxi diff --git a/dpctl_ext/tensor/_slicing.pxi b/dpctl_ext/tensor/_slicing.pxi new file mode 100644 index 00000000000..86db56013e2 --- /dev/null +++ b/dpctl_ext/tensor/_slicing.pxi @@ -0,0 +1,383 @@ +# ***************************************************************************** +# Copyright (c) 2026, Intel Corporation +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# - Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# - Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# - Neither the name of the copyright holder nor the names of its contributors +# may be used to endorse or promote products derived from this software +# without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +# THE POSSIBILITY OF SUCH DAMAGE. +# ***************************************************************************** + +import numbers +from operator import index +from cpython.buffer cimport PyObject_CheckBuffer +from numpy import ndarray + + +cdef bint _is_buffer(object o): + return PyObject_CheckBuffer(o) + + +cdef Py_ssize_t _slice_len( + Py_ssize_t sl_start, + Py_ssize_t sl_stop, + Py_ssize_t sl_step +): + """ + Compute len(range(sl_start, sl_stop, sl_step)) + """ + if sl_start == sl_stop: + return 0 + if sl_step > 0: + if sl_start > sl_stop: + return 0 + # 1 + argmax k such htat sl_start + sl_step*k < sl_stop + return 1 + ((sl_stop - sl_start - 1) // sl_step) + else: + if sl_start < sl_stop: + return 0 + return 1 + ((sl_stop - sl_start + 1) // sl_step) + + +cdef bint _is_integral(object x) except *: + """Gives True if x is an integral slice spec""" + if isinstance(x, (ndarray, usm_ndarray)): + if x.ndim > 0: + return False + if x.dtype.kind not in "ui": + return False + return True + if isinstance(x, bool): + return False + if isinstance(x, int): + return True + if _is_buffer(x): + mbuf = memoryview(x) + if mbuf.ndim == 0: + f = mbuf.format + return f in "bBhHiIlLqQ" + else: + return False + if callable(getattr(x, "__index__", None)): + try: + index(x) + except (TypeError, ValueError): + return False + return True + return False + + +cdef bint _is_boolean(object x) except *: + """Gives True if x is an integral slice spec""" + if isinstance(x, (ndarray, usm_ndarray)): + if x.ndim > 0: + return False + if x.dtype.kind not in "b": + return False + return True + if isinstance(x, bool): + return True + if isinstance(x, (int, float, complex)): + return False + if _is_buffer(x): + mbuf = memoryview(x) + if mbuf.ndim == 0: + f = mbuf.format + return f in "?" + else: + return False + if callable(getattr(x, "__bool__", None)): + try: + x.__bool__() + except (TypeError, ValueError): + return False + return True + return False + + +def _basic_slice_meta(ind, shape : tuple, strides : tuple, offset : int): + """ + Give basic slicing index `ind` and array layout information produce + a 5-tuple (resulting_shape, resulting_strides, resulting_offset, + advanced_ind, resulting_advanced_ind_pos) + used to construct a view into underlying array over which advanced + indexing, if any, is to be performed. + + Raises IndexError for invalid index `ind`. + """ + _no_advanced_ind = tuple() + _no_advanced_pos = -1 + if ind is Ellipsis: + return (shape, strides, offset, _no_advanced_ind, _no_advanced_pos) + elif ind is None: + return ( + (1,) + shape, + (0,) + strides, + offset, + _no_advanced_ind, + _no_advanced_pos, + ) + elif isinstance(ind, slice): + sl_start, sl_stop, sl_step = ind.indices(shape[0]) + sh0 = _slice_len(sl_start, sl_stop, sl_step) + str0 = sl_step * strides[0] + new_strides = ( + strides if (sl_step == 1 or sh0 == 0) else (str0,) + strides[1:] + ) + new_shape = (sh0, ) + shape[1:] + is_empty = any(sh_i == 0 for sh_i in new_shape) + new_offset = offset if is_empty else offset + sl_start * strides[0] + return ( + new_shape, + new_strides, + new_offset, + _no_advanced_ind, + _no_advanced_pos, + ) + elif _is_boolean(ind): + if ind: + return ( + (1,) + shape, + (0,) + strides, + offset, + _no_advanced_ind, + _no_advanced_pos, + ) + else: + return ( + (0,) + shape, + (0,) + strides, + offset, + _no_advanced_ind, + _no_advanced_pos, + ) + elif _is_integral(ind): + ind = index(ind) + new_shape = shape[1:] + new_strides = strides[1:] + is_empty = any(sh_i == 0 for sh_i in new_shape) + if 0 <= ind < shape[0]: + new_offset = offset if is_empty else offset + ind * strides[0] + return ( + new_shape, + new_strides, + new_offset, + _no_advanced_ind, + _no_advanced_pos, + ) + elif -shape[0] <= ind < 0: + new_offset = ( + offset if is_empty else offset + (shape[0] + ind) * strides[0] + ) + return ( + new_shape, + new_strides, + new_offset, + _no_advanced_ind, + _no_advanced_pos, + ) + else: + raise IndexError( + "Index {0} is out of range for axes 0 with " + "size {1}".format(ind, shape[0])) + elif isinstance(ind, (ndarray, usm_ndarray)): + return (shape, strides, offset, (ind,), 0) + elif isinstance(ind, tuple): + axes_referenced = 0 + ellipses_count = 0 + newaxis_count = 0 + explicit_index = 0 + seen_arrays_yet = False + array_streak_started = False + array_streak_interrupted = False + for i in ind: + if i is None: + newaxis_count += 1 + if array_streak_started: + array_streak_interrupted = True + elif i is Ellipsis: + ellipses_count += 1 + if array_streak_started: + array_streak_interrupted = True + elif isinstance(i, slice): + axes_referenced += 1 + if array_streak_started: + array_streak_interrupted = True + elif _is_boolean(i): + newaxis_count += 1 + if array_streak_started: + array_streak_interrupted = True + elif _is_integral(i): + axes_referenced += 1 + if not array_streak_started and array_streak_interrupted: + explicit_index += 1 + elif isinstance(i, (ndarray, usm_ndarray)): + if not seen_arrays_yet: + seen_arrays_yet = True + array_streak_started = True + array_streak_interrupted = False + if array_streak_interrupted: + raise IndexError( + "Advanced indexing array specs may not be " + "separated by basic slicing specs." + ) + dt_k = i.dtype.kind + if dt_k == "b" and i.ndim > 0: + axes_referenced += i.ndim + elif dt_k in "ui" and i.ndim > 0: + axes_referenced += 1 + else: + raise IndexError( + "arrays used as indices must be of integer " + "(or boolean) type" + ) + else: + raise IndexError( + "Only integers, slices (`:`), ellipsis (`...`), " + "dpctl.tensor.newaxis (`None`) and integer and " + "boolean arrays are valid indices." + ) + if ellipses_count > 1: + raise IndexError( + "an index can only have a single ellipsis ('...')") + if axes_referenced > len(shape): + raise IndexError( + "too many indices for an array, array is " + "{0}-dimensional, but {1} were indexed".format( + len(shape), axes_referenced)) + if ellipses_count: + ellipses_count = len(shape) - axes_referenced + new_shape_len = (newaxis_count + ellipses_count + + axes_referenced - explicit_index) + new_shape = list() + new_strides = list() + new_advanced_ind = list() + k = 0 + new_advanced_start_pos = -1 + advanced_start_pos_set = False + new_offset = offset + is_empty = False + array_streak = False + for i in range(len(ind)): + ind_i = ind[i] + if (ind_i is Ellipsis): + k_new = k + ellipses_count + new_shape.extend(shape[k:k_new]) + new_strides.extend(strides[k:k_new]) + if any(dim == 0 for dim in shape[k:k_new]): + is_empty = True + new_offset = offset + k = k_new + if array_streak: + array_streak = False + elif ind_i is None: + new_shape.append(1) + new_strides.append(0) + if array_streak: + array_streak = False + elif isinstance(ind_i, slice): + k_new = k + 1 + sl_start, sl_stop, sl_step = ind_i.indices(shape[k]) + sh_i = _slice_len(sl_start, sl_stop, sl_step) + str_i = (1 if sh_i == 0 else sl_step) * strides[k] + new_shape.append(sh_i) + new_strides.append(str_i) + if sh_i > 0 and not is_empty: + new_offset = new_offset + sl_start * strides[k] + if sh_i == 0: + is_empty = True + new_offset = offset + k = k_new + if array_streak: + array_streak = False + elif _is_boolean(ind_i): + new_shape.append(1 if ind_i else 0) + new_strides.append(0) + if array_streak: + array_streak = False + elif _is_integral(ind_i): + if array_streak: + if not isinstance(ind_i, (ndarray, usm_ndarray)): + ind_i = index(ind_i) + # integer will be converted to an array, + # still raise if OOB + if not ( + 0 <= ind_i < shape[k] or -shape[k] <= ind_i < 0 + ): + raise IndexError( + "Index {0} is out of range for axes " + "{1} with size {2}".format(ind_i, k, shape[k]) + ) + new_advanced_ind.append(ind_i) + k_new = k + 1 + new_shape.extend(shape[k:k_new]) + new_strides.extend(strides[k:k_new]) + k = k_new + else: + ind_i = index(ind_i) + if 0 <= ind_i < shape[k]: + k_new = k + 1 + if not is_empty: + new_offset = new_offset + ind_i * strides[k] + k = k_new + elif -shape[k] <= ind_i < 0: + k_new = k + 1 + if not is_empty: + new_offset = ( + new_offset + (shape[k] + ind_i) * strides[k] + ) + k = k_new + else: + raise IndexError( + "Index {0} is out of range for axes " + "{1} with size {2}".format(ind_i, k, shape[k]) + ) + elif isinstance(ind_i, (ndarray, usm_ndarray)): + if not array_streak: + array_streak = True + if not advanced_start_pos_set: + new_advanced_start_pos = len(new_shape) + advanced_start_pos_set = True + new_advanced_ind.append(ind_i) + dt_k = ind_i.dtype.kind + if dt_k == "b": + k_new = k + ind_i.ndim + else: + k_new = k + 1 + new_shape.extend(shape[k:k_new]) + new_strides.extend(strides[k:k_new]) + k = k_new + new_shape.extend(shape[k:]) + new_strides.extend(strides[k:]) + new_shape_len += len(shape) - k + return ( + tuple(new_shape), + tuple(new_strides), + new_offset, + tuple(new_advanced_ind), + new_advanced_start_pos + ) + else: + raise IndexError( + "Only integers, slices (`:`), ellipsis (`...`), " + "dpctl.tensor.newaxis (`None`) and integer and " + "boolean arrays are valid indices." + ) diff --git a/dpctl_ext/tensor/_stride_utils.pxi b/dpctl_ext/tensor/_stride_utils.pxi new file mode 100644 index 00000000000..3caf8dd8fd1 --- /dev/null +++ b/dpctl_ext/tensor/_stride_utils.pxi @@ -0,0 +1,314 @@ +# ***************************************************************************** +# Copyright (c) 2026, Intel Corporation +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# - Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# - Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# - Neither the name of the copyright holder nor the names of its contributors +# may be used to endorse or promote products derived from this software +# without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +# THE POSSIBILITY OF SUCH DAMAGE. +# ***************************************************************************** + +# distutils: language = c++ +# cython: language_level=3 + +from cpython.mem cimport PyMem_Malloc +from cpython.ref cimport Py_INCREF +from cpython.tuple cimport PyTuple_New, PyTuple_SetItem + + +cdef int ERROR_MALLOC = 1 +cdef int ERROR_INTERNAL = -1 +cdef int ERROR_INCORRECT_ORDER = 2 +cdef int ERROR_UNEXPECTED_STRIDES = 3 + +cdef int USM_ARRAY_C_CONTIGUOUS = 1 +cdef int USM_ARRAY_F_CONTIGUOUS = 2 +cdef int USM_ARRAY_WRITABLE = 4 + + +cdef Py_ssize_t shape_to_elem_count(int nd, Py_ssize_t *shape_arr): + """ + Computes number of elements in an array. + """ + cdef Py_ssize_t count = 1 + for i in range(nd): + count *= shape_arr[i] + return count + + +cdef int _from_input_shape_strides( + int nd, object shape, object strides, int itemsize, char order, + Py_ssize_t **shape_ptr, Py_ssize_t **strides_ptr, + Py_ssize_t *nelems, Py_ssize_t *min_disp, Py_ssize_t *max_disp, + int *contig +): + """ + Arguments: nd, shape, strides, itemsize, order + Modifies: + shape_ptr - pointer to C array for shape values + stride_ptr - pointer to C array for strides values + nelems - Number of elements in array + min_disp = min( dot(strides, index), index for shape) + max_disp = max( dor(strides, index), index for shape) + contig = enumeration for array contiguity + Returns: 0 on success, error code otherwise. + On success pointers point to allocated arrays, + Otherwise they are set to NULL + """ + cdef int i + cdef int j + cdef bint all_incr = 1 + cdef bint all_decr = 1 + cdef bint strides_inspected = 0 + cdef Py_ssize_t elem_count = 1 + cdef Py_ssize_t min_shift = 0 + cdef Py_ssize_t max_shift = 0 + cdef Py_ssize_t str_i + cdef Py_ssize_t* shape_arr + cdef Py_ssize_t* strides_arr + + if (int(order) not in [ord("C"), ord("F"), ord("c"), ord("f")]): + return ERROR_INCORRECT_ORDER + + # 0-d array + if (nd == 0): + contig[0] = (USM_ARRAY_C_CONTIGUOUS | USM_ARRAY_F_CONTIGUOUS) + nelems[0] = 1 + min_disp[0] = 0 + max_disp[0] = 0 + shape_ptr[0] = (0) + strides_ptr[0] = (0) + return 0 + + shape_arr = PyMem_Malloc(nd * sizeof(Py_ssize_t)) + if (not shape_arr): + return ERROR_MALLOC + shape_ptr[0] = shape_arr + for i in range(0, nd): + shape_arr[i] = shape[i] + elem_count *= shape_arr[i] + if elem_count == 0: + contig[0] = (USM_ARRAY_C_CONTIGUOUS | USM_ARRAY_F_CONTIGUOUS) + nelems[0] = 1 + min_disp[0] = 0 + max_disp[0] = 0 + if strides is None: + strides_ptr[0] = (0) + else: + strides_arr = PyMem_Malloc(nd * sizeof(Py_ssize_t)) + if (not strides_arr): + PyMem_Free(shape_ptr[0]) + shape_ptr[0] = (0) + return ERROR_MALLOC + strides_ptr[0] = strides_arr + for i in range(0, nd): + strides_arr[i] = strides[i] + return 0 + nelems[0] = elem_count + if (strides is None): + # no need to allocate and populate strides + if order == ord("C") or order == ord("c"): + contig[0] = USM_ARRAY_C_CONTIGUOUS + else: + contig[0] = USM_ARRAY_F_CONTIGUOUS + if nd == 1: + contig[0] = USM_ARRAY_C_CONTIGUOUS | USM_ARRAY_F_CONTIGUOUS + else: + j = 0 + for i in range(nd): + if shape_arr[i] > 1: + j = j + 1 + if j < 2: + contig[0] = USM_ARRAY_C_CONTIGUOUS | USM_ARRAY_F_CONTIGUOUS + min_disp[0] = 0 + max_disp[0] = (elem_count - 1) + strides_ptr[0] = (0) + return 0 + elif ((isinstance(strides, (list, tuple)) or hasattr(strides, "tolist")) + and len(strides) == nd): + strides_arr = PyMem_Malloc(nd * sizeof(Py_ssize_t)) + if (not strides_arr): + PyMem_Free(shape_ptr[0]) + shape_ptr[0] = (0) + return ERROR_MALLOC + strides_ptr[0] = strides_arr + for i in range(0, nd): + str_i = strides[i] + strides_arr[i] = str_i + if str_i > 0: + max_shift += str_i * (shape_arr[i] - 1) + else: + min_shift += str_i * (shape_arr[i] - 1) + min_disp[0] = min_shift + max_disp[0] = max_shift + if max_shift == min_shift + (elem_count - 1): + if elem_count == 1: + contig[0] = (USM_ARRAY_C_CONTIGUOUS | USM_ARRAY_F_CONTIGUOUS) + return 0 + if nd == 1: + if strides_arr[0] == 1: + contig[0] = USM_ARRAY_C_CONTIGUOUS | USM_ARRAY_F_CONTIGUOUS + else: + contig[0] = 0 + return 0 + i = 0 + while i < nd: + if shape_arr[i] == 1: + i = i + 1 + continue + j = i + 1 + while (j < nd and shape_arr[j] == 1): + j = j + 1 + if j < nd: + strides_inspected = 1 + if all_incr: + all_incr = ( + (strides_arr[i] > 0) and + (strides_arr[j] > 0) and + (strides_arr[i] <= strides_arr[j]) + ) + if all_decr: + all_decr = ( + (strides_arr[i] > 0) and + (strides_arr[j] > 0) and + (strides_arr[i] >= strides_arr[j]) + ) + i = j + else: + if not strides_inspected: + # all dimensions have size 1 except + # dimension 'i'. Array is both C and F + # contiguous + strides_inspected = 1 + all_incr = (strides_arr[i] == 1) + all_decr = all_incr + break + # should only set contig flags on actually obtained + # values, rather than default values + all_incr = all_incr and strides_inspected + all_decr = all_decr and strides_inspected + if all_incr and all_decr: + contig[0] = (USM_ARRAY_C_CONTIGUOUS | USM_ARRAY_F_CONTIGUOUS) + elif all_incr: + contig[0] = USM_ARRAY_F_CONTIGUOUS + elif all_decr: + contig[0] = USM_ARRAY_C_CONTIGUOUS + else: + contig[0] = 0 + return 0 + else: + contig[0] = 0 # non-contiguous + return 0 + else: + PyMem_Free(shape_ptr[0]) + shape_ptr[0] = (0) + return ERROR_UNEXPECTED_STRIDES + # return ERROR_INTERNAL + + +cdef object _make_int_tuple(int nd, const Py_ssize_t *ary): + """ + Makes Python tuple from C array + """ + cdef tuple res + cdef object tmp + if (ary): + res = PyTuple_New(nd) + for i in range(nd): + tmp = ary[i] + Py_INCREF(tmp) # SetItem steals the reference + PyTuple_SetItem(res, i, tmp) + return res + else: + return None + + +cdef object _make_reversed_int_tuple(int nd, const Py_ssize_t *ary): + """ + Makes Python reversed tuple from C array + """ + cdef tuple res + cdef object tmp + cdef int i + cdef int nd_1 + if (ary): + res = PyTuple_New(nd) + nd_1 = nd - 1 + for i in range(nd): + tmp = ary[i] + Py_INCREF(tmp) # SetItem steals the reference + PyTuple_SetItem(res, nd_1 - i, tmp) + return res + else: + return None + + +cdef object _c_contig_strides(int nd, Py_ssize_t *shape): + """ + Makes Python tuple for strides of C-contiguous array + """ + cdef tuple cc_strides = PyTuple_New(nd) + cdef object si = 1 + cdef int i + cdef int nd_1 = nd - 1 + for i in range(0, nd): + Py_INCREF(si) # SetItem steals the reference + PyTuple_SetItem(cc_strides, nd_1 - i, si) + si = si * shape[nd_1 - i] + return cc_strides + + +cdef object _f_contig_strides(int nd, Py_ssize_t *shape): + """ + Makes Python tuple for strides of F-contiguous array + """ + cdef tuple fc_strides = PyTuple_New(nd) + cdef object si = 1 + for i in range(0, nd): + Py_INCREF(si) # SetItem steals the reference + PyTuple_SetItem(fc_strides, i, si) + si = si * shape[i] + return fc_strides + +cdef object _swap_last_two(tuple t): + """ + Swap last two elements of a tuple + """ + cdef int nd = len(t) + cdef tuple res + cdef int i + cdef object tmp + if (nd < 2): + return t + res = PyTuple_New(nd) + # copy all elements except the last two + for i in range(0, nd-2): + tmp = t[i] + Py_INCREF(tmp) # SetItem steals the reference + PyTuple_SetItem(res, i, tmp) + # swap the last two elements + tmp = t[nd-1] + Py_INCREF(tmp) # SetItem steals + PyTuple_SetItem(res, nd - 2, tmp) + tmp = t[nd-2] + Py_INCREF(tmp) # SetItem steals + PyTuple_SetItem(res, nd - 1, tmp) + return res diff --git a/dpctl_ext/tensor/_types.pxi b/dpctl_ext/tensor/_types.pxi new file mode 100644 index 00000000000..090750658f4 --- /dev/null +++ b/dpctl_ext/tensor/_types.pxi @@ -0,0 +1,169 @@ +# ***************************************************************************** +# Copyright (c) 2026, Intel Corporation +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# - Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# - Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# - Neither the name of the copyright holder nor the names of its contributors +# may be used to endorse or promote products derived from this software +# without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +# THE POSSIBILITY OF SUCH DAMAGE. +# ***************************************************************************** + +# these typenum values are aligned to values in NumPy +cdef: + int UAR_BOOL = 0 # pragma: no cover + int UAR_BYTE = 1 # pragma: no cover + int UAR_UBYTE = 2 # pragma: no cover + int UAR_SHORT = 3 # pragma: no cover + int UAR_USHORT = 4 # pragma: no cover + int UAR_INT = 5 # pragma: no cover + int UAR_UINT = 6 # pragma: no cover + int UAR_LONG = 7 # pragma: no cover + int UAR_ULONG = 8 # pragma: no cover + int UAR_LONGLONG = 9 # pragma: no cover + int UAR_ULONGLONG = 10 # pragma: no cover + int UAR_FLOAT = 11 # pragma: no cover + int UAR_DOUBLE = 12 # pragma: no cover + int UAR_CFLOAT = 14 # pragma: no cover + int UAR_CDOUBLE = 15 # pragma: no cover + int UAR_TYPE_SENTINEL = 17 # pragma: no cover + int UAR_HALF = 23 # pragma: no cover + +cdef int type_bytesize(int typenum): + """ + NPY_BOOL=0 : 1 + NPY_BYTE=1 : 1 + NPY_UBYTE=2 : 1 + NPY_SHORT=3 : 2 + NPY_USHORT=4 : 2 + NPY_INT=5 : sizeof(int) + NPY_UINT=6 : sizeof(unsigned int) + NPY_LONG=7 : sizeof(long) + NPY_ULONG=8 : sizeof(unsigned long) + NPY_LONGLONG=9 : 8 + NPY_ULONGLONG=10 : 8 + NPY_FLOAT=11 : 4 + NPY_DOUBLE=12 : 8 + NPY_LONGDOUBLE=13 : N/A + NPY_CFLOAT=14 : 8 + NPY_CDOUBLE=15 : 16 + NPY_CLONGDOUBLE=16 : N/A + NPY_HALF=23 : 2 + """ + cdef int *type_to_bytesize = [ + 1, + sizeof(char), + sizeof(unsigned char), + sizeof(short), + sizeof(unsigned short), + sizeof(int), + sizeof(unsigned int), + sizeof(long), + sizeof(unsigned long), + sizeof(long long), + sizeof(unsigned long long), + sizeof(float), + sizeof(double), -1, + sizeof(float complex), + sizeof(double complex), -1] + + if typenum < 0: # pragma: no cover + return -1 + if typenum > 16: + if typenum == 23: + return 2 + return -1 + + return type_to_bytesize[typenum] + + +cdef str _make_typestr(int typenum): + """ + Make typestring from type number + """ + cdef type_to_str = ["|b", "|i", "|u", "|i", "|u", + "|i", "|u", "|i", "|u", "|i", "|u", + "|f", "|f", "", "|c", "|c", ""] + + if (typenum < 0): # pragma: no cover + return "" + if (typenum > 16): + if (typenum == 23): + return "|f2" + return "" # pragma: no cover + + return type_to_str[typenum] + str(type_bytesize(typenum)) + + +cdef int typenum_from_format(str s): + """ + Internal utility to convert string describing type format + + Format is [<|=>][biufc]# + Shortcuts for formats are i, u, d, D + """ + if not s: + return -1 + try: + dt = np.dtype(s) + except Exception: + return -1 + if (dt.byteorder == ">"): + return -2 + return dt.num + + +cdef int descr_to_typenum(object dtype): + """ + Returns typenum for argumentd dtype that has attribute descr, + assumed numpy.dtype + """ + obj = getattr(dtype, "descr") + if (not isinstance(obj, list) or len(obj) != 1): + return -1 # token for ValueError + obj = obj[0] + if ( + not isinstance(obj, tuple) or len(obj) != 2 or obj[0] + ): # pragma: no cover + return -1 + obj = obj[1] + if not isinstance(obj, str): # pragma: no cover + return -1 + return typenum_from_format(obj) + + +cdef int dtype_to_typenum(dtype): + if isinstance(dtype, str): + return typenum_from_format(dtype) + elif isinstance(dtype, bytes): + return typenum_from_format(dtype.decode("UTF-8")) + elif hasattr(dtype, "descr"): + return descr_to_typenum(dtype) + else: + try: + dt = np.dtype(dtype) + except TypeError: + return -3 + except Exception: # pragma: no cover + return -1 + if hasattr(dt, "descr"): + return descr_to_typenum(dt) + else: # pragma: no cover + return -3 # token for TypeError From e2441eb5431667bb864eda42303c7d1e73614081 Mon Sep 17 00:00:00 2001 From: Vladislav Perevezentsev Date: Thu, 5 Mar 2026 16:04:30 -0800 Subject: [PATCH 10/24] Move dldevice_conversions functions --- dpctl_ext/tensor/__init__.py | 6 +++ dpctl_ext/tensor/_dldevice_conversions.py | 52 +++++++++++++++++++++++ 2 files changed, 58 insertions(+) create mode 100644 dpctl_ext/tensor/_dldevice_conversions.py diff --git a/dpctl_ext/tensor/__init__.py b/dpctl_ext/tensor/__init__.py index 2624e7dfea1..6da5b8557bc 100644 --- a/dpctl_ext/tensor/__init__.py +++ b/dpctl_ext/tensor/__init__.py @@ -73,6 +73,10 @@ uint64, ) from ._device import Device +from ._dldevice_conversions import ( + dldevice_to_sycl_device, + sycl_device_to_dldevice, +) from ._elementwise_funcs import ( abs, acos, @@ -280,6 +284,7 @@ "cumulative_sum", "diff", "divide", + "dldevice_to_sycl_device", "empty", "empty_like", "equal", @@ -370,6 +375,7 @@ "subtract", "sum", "swapaxes", + "sycl_device_to_dldevice", "take", "take_along_axis", "tan", diff --git a/dpctl_ext/tensor/_dldevice_conversions.py b/dpctl_ext/tensor/_dldevice_conversions.py new file mode 100644 index 00000000000..595a280689a --- /dev/null +++ b/dpctl_ext/tensor/_dldevice_conversions.py @@ -0,0 +1,52 @@ +# ***************************************************************************** +# Copyright (c) 2026, Intel Corporation +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# - Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# - Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# - Neither the name of the copyright holder nor the names of its contributors +# may be used to endorse or promote products derived from this software +# without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +# THE POSSIBILITY OF SUCH DAMAGE. +# ***************************************************************************** + +from dpctl._sycl_device import SyclDevice + +from ._usmarray import DLDeviceType + + +def dldevice_to_sycl_device(dl_dev: tuple): + if isinstance(dl_dev, tuple): + if len(dl_dev) != 2: + raise ValueError("dldevice tuple must have length 2") + else: + raise TypeError( + f"dl_dev is expected to be a 2-tuple, got " f"{type(dl_dev)}" + ) + if dl_dev[0] != DLDeviceType.kDLOneAPI: + raise ValueError("dldevice type must be kDLOneAPI") + return SyclDevice(str(dl_dev[1])) + + +def sycl_device_to_dldevice(dev: SyclDevice): + if not isinstance(dev, SyclDevice): + raise TypeError( + "dev is expected to be a SyclDevice, got " f"{type(dev)}" + ) + return (DLDeviceType.kDLOneAPI, dev.get_device_id()) From 422e87e299cc18bfbed5b5d5bb811c7b8534f37c Mon Sep 17 00:00:00 2001 From: Vladislav Perevezentsev Date: Thu, 5 Mar 2026 16:12:49 -0800 Subject: [PATCH 11/24] Move usm_ndarray to dpctl_ext.tensor --- dpctl_ext/tensor/__init__.pxd | 36 + dpctl_ext/tensor/__init__.py | 3 + dpctl_ext/tensor/_usmarray.pxd | 88 ++ dpctl_ext/tensor/_usmarray.pyx | 1975 ++++++++++++++++++++++++++++++++ 4 files changed, 2102 insertions(+) create mode 100644 dpctl_ext/tensor/__init__.pxd create mode 100644 dpctl_ext/tensor/_usmarray.pxd create mode 100644 dpctl_ext/tensor/_usmarray.pyx diff --git a/dpctl_ext/tensor/__init__.pxd b/dpctl_ext/tensor/__init__.pxd new file mode 100644 index 00000000000..a4bcecfec1d --- /dev/null +++ b/dpctl_ext/tensor/__init__.pxd @@ -0,0 +1,36 @@ +# ***************************************************************************** +# Copyright (c) 2026, Intel Corporation +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# - Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# - Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# - Neither the name of the copyright holder nor the names of its contributors +# may be used to endorse or promote products derived from this software +# without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +# THE POSSIBILITY OF SUCH DAMAGE. +# ***************************************************************************** + +""" This file declares the extension types and functions for the Cython API + implemented in _usmarray.pyx file. +""" + +# distutils: language = c++ +# cython: language_level=3 + +from ._usmarray cimport * diff --git a/dpctl_ext/tensor/__init__.py b/dpctl_ext/tensor/__init__.py index 6da5b8557bc..076f7eae970 100644 --- a/dpctl_ext/tensor/__init__.py +++ b/dpctl_ext/tensor/__init__.py @@ -213,10 +213,13 @@ from ._statistical_functions import mean, std, var from ._testing import allclose from ._type_utils import can_cast, finfo, iinfo, isdtype, result_type +from ._usmarray import DLDeviceType, usm_ndarray from ._utility_functions import all, any, diff __all__ = [ "Device", + "DLDeviceType", + "usm_ndarray", # data types "bool", "dtype", diff --git a/dpctl_ext/tensor/_usmarray.pxd b/dpctl_ext/tensor/_usmarray.pxd new file mode 100644 index 00000000000..ccb8f4c796b --- /dev/null +++ b/dpctl_ext/tensor/_usmarray.pxd @@ -0,0 +1,88 @@ +# ***************************************************************************** +# Copyright (c) 2026, Intel Corporation +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# - Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# - Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# - Neither the name of the copyright holder nor the names of its contributors +# may be used to endorse or promote products derived from this software +# without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +# THE POSSIBILITY OF SUCH DAMAGE. +# ***************************************************************************** + +# distutils: language = c++ +# cython: language_level=3 + +cimport dpctl + + +cdef public api int USM_ARRAY_C_CONTIGUOUS +cdef public api int USM_ARRAY_F_CONTIGUOUS +cdef public api int USM_ARRAY_WRITABLE + +cdef public api int UAR_BOOL +cdef public api int UAR_BYTE +cdef public api int UAR_UBYTE +cdef public api int UAR_SHORT +cdef public api int UAR_USHORT +cdef public api int UAR_INT +cdef public api int UAR_UINT +cdef public api int UAR_LONG +cdef public api int UAR_ULONG +cdef public api int UAR_LONGLONG +cdef public api int UAR_ULONGLONG +cdef public api int UAR_FLOAT +cdef public api int UAR_DOUBLE +cdef public api int UAR_CFLOAT +cdef public api int UAR_CDOUBLE +cdef public api int UAR_TYPE_SENTINEL +cdef public api int UAR_HALF + + +cdef api class usm_ndarray [object PyUSMArrayObject, type PyUSMArrayType]: + # data fields + cdef char* data_ + cdef int nd_ + cdef Py_ssize_t *shape_ + cdef Py_ssize_t *strides_ + cdef int typenum_ + cdef int flags_ + cdef object base_ + cdef object array_namespace_ + # make usm_ndarray weak-referenceable + cdef object __weakref__ + + cdef void _reset(usm_ndarray self) + cdef void _cleanup(usm_ndarray self) + cdef Py_ssize_t get_offset(usm_ndarray self) except * + + cdef char* get_data(self) + cdef int get_ndim(self) + cdef Py_ssize_t * get_shape(self) + cdef Py_ssize_t * get_strides(self) + cdef int get_typenum(self) + cdef int get_itemsize(self) + cdef int get_flags(self) + cdef object get_base(self) + cdef dpctl.DPCTLSyclQueueRef get_queue_ref(self) except * + cdef dpctl.SyclQueue get_sycl_queue(self) + + cdef _set_writable_flag(self, int) + + cdef __cythonbufferdefaults__ = {"mode": "strided"} diff --git a/dpctl_ext/tensor/_usmarray.pyx b/dpctl_ext/tensor/_usmarray.pyx new file mode 100644 index 00000000000..af681732c99 --- /dev/null +++ b/dpctl_ext/tensor/_usmarray.pyx @@ -0,0 +1,1975 @@ +# ***************************************************************************** +# Copyright (c) 2026, Intel Corporation +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# - Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# - Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# - Neither the name of the copyright holder nor the names of its contributors +# may be used to endorse or promote products derived from this software +# without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +# THE POSSIBILITY OF SUCH DAMAGE. +# ***************************************************************************** + +# distutils: language = c++ +# cython: language_level=3 +# cython: linetrace=True + +import dpctl +import dpctl.memory as dpmem +import numpy as np + +from dpctl._backend cimport DPCTLSyclUSMRef +from dpctl._sycl_device_factory cimport _cached_default_device + +from ._data_types import bool as dpt_bool +from ._device import Device +from ._print import usm_ndarray_repr, usm_ndarray_str + +cimport dpctl as c_dpctl +cimport dpctl.memory as c_dpmem +from cpython.mem cimport PyMem_Free +from cpython.tuple cimport PyTuple_New, PyTuple_SetItem + +cimport ._dlpack as c_dlpack + +from enum import IntEnum + +import ._flags as _flags +from ._dlpack import get_build_dlpack_version +from ._tensor_impl import default_device_fp_type + +include "_stride_utils.pxi" +include "_types.pxi" +include "_slicing.pxi" + + +class DLDeviceType(IntEnum): + """ + An :class:`enum.IntEnum` for the types of DLDevices supported by the DLPack + protocol. + + ``kDLCPU``: + CPU (host) device + ``kDLCUDA``: + CUDA GPU device + ``kDLCUDAHost``: + Pinned CUDA CPU memory by cudaMallocHost + ``kDLOpenCL``: + OpenCL device + ``kDLVulkan``: + Vulkan buffer + ``kDLMetal``: + Metal for Apple GPU + ``kDLVPI``: + Verilog simulator buffer + ``kDLROCM``: + ROCm GPU device + ``kDLROCMHost``: + Pinned ROCm CPU memory allocated by hipMallocHost + ``kDLExtDev``: + Reserved extension device type used to test new devices + ``kDLCUDAManaged``: + CUDA managed/unified memory allocated by cudaMallocManaged + ``kDLOneAPI``: + Unified shared memory allocated on a oneAPI non-partitioned device + ``kDLWebGPU``: + Device support for WebGPU standard + ``kDLHexagon``: + Qualcomm Hexagon DSP + ``kDLMAIA``: + Microsoft MAIA device + ``kDLTrn``: + AWS Trainium device + """ + kDLCPU = c_dlpack.device_CPU + kDLCUDA = c_dlpack.device_CUDA + kDLCUDAHost = c_dlpack.device_CUDAHost + kDLCUDAManaged = c_dlpack.device_CUDAManaged + kDLROCM = c_dlpack.device_DLROCM + kDLROCMHost = c_dlpack.device_ROCMHost + kDLOpenCL = c_dlpack.device_OpenCL + kDLVulkan = c_dlpack.device_Vulkan + kDLMetal = c_dlpack.device_Metal + kDLVPI = c_dlpack.device_VPI + kDLOneAPI = c_dlpack.device_OneAPI + kDLWebGPU = c_dlpack.device_WebGPU + kDLHexagon = c_dlpack.device_Hexagon + kDLMAIA = c_dlpack.device_MAIA + kDLTrn = c_dlpack.device_Trn + + +cdef class InternalUSMArrayError(Exception): + """ + An InternalUSMArrayError exception is raised when internal + inconsistency has been detected in :class:`.usm_ndarray`. + """ + pass + + +cdef object _as_zero_dim_ndarray(object usm_ary): + "Convert size-1 array to NumPy 0d array" + mem_view = dpmem.as_usm_memory(usm_ary) + usm_ary.sycl_queue.wait() + host_buf = mem_view.copy_to_host() + view = host_buf.view(usm_ary.dtype) + view.shape = tuple() + return view + + +cdef inline void _check_0d_scalar_conversion(object usm_ary) except *: + "Raise TypeError if array cannot be converted to a Python scalar" + if (usm_ary.ndim != 0): + raise TypeError( + "only 0-dimensional arrays can be converted to Python scalars" + ) + + +cdef int _copy_writable(int lhs_flags, int rhs_flags): + "Copy the WRITABLE flag to lhs_flags from rhs_flags" + return (lhs_flags & ~USM_ARRAY_WRITABLE) | (rhs_flags & USM_ARRAY_WRITABLE) + + +cdef bint _is_host_cpu(object dl_device): + "Check if dl_device denotes (kDLCPU, 0)" + cdef object dl_type + cdef object dl_id + cdef Py_ssize_t n_elems = -1 + + try: + n_elems = len(dl_device) + except TypeError: + pass + + if n_elems != 2: + return False + + dl_type = dl_device[0] + dl_id = dl_device[1] + if isinstance(dl_type, str): + return (dl_type == "kDLCPU" and dl_id == 0) + + return (dl_type == DLDeviceType.kDLCPU) and (dl_id == 0) + + +cdef void _validate_and_use_stream( + object stream, c_dpctl.SyclQueue self_queue +) except *: + if (stream is None or stream == self_queue): + pass + else: + if not isinstance(stream, dpctl.SyclQueue): + raise TypeError( + "stream argument type was expected to be dpctl.SyclQueue," + f" got {type(stream)} instead" + ) + ev = self_queue.submit_barrier() + stream.submit_barrier(dependent_events=[ev]) + +cdef class usm_ndarray: + """ usm_ndarray(shape, dtype=None, strides=None, buffer="device", \ + offset=0, order="C", buffer_ctor_kwargs=dict(), \ + array_namespace=None) + + An array object represents a multidimensional tensor of numeric + elements stored in a USM allocation on a SYCL device. + + Arg: + shape (int, tuple): + Shape of the array to be created. + dtype (str, dtype): + Array data type, i.e. the type of array elements. + If ``dtype`` has the value ``None``, it is determined by default + floating point type supported by target device. + The supported types are + + ``bool``: + boolean type + ``int8``, ``int16``, ``int32``, ``int64``: + signed integer types + ``uint8``, ``uint16``, ``uint32``, ``uint64``: + unsigned integer types + ``float16``: + half-precision floating type, + supported if target device's property + ``has_aspect_fp16`` is ``True`` + ``float32``, ``complex64``: + single-precision real and complex floating types + ``float64``, ``complex128``: + double-precision real and complex floating + types, supported if target device's property + ``has_aspect_fp64`` is ``True``. + + Default: ``None``. + strides (tuple, optional): + Strides of the array to be created in elements. + If ``strides`` has the value ``None``, it is determined by the + ``shape`` of the array and the requested ``order``. + Default: ``None``. + buffer (str, object, optional): + A string corresponding to the type of USM allocation to make, + or a Python object representing a USM memory allocation, i.e. + :class:`dpctl.memory.MemoryUSMDevice`, + :class:`dpctl.memory.MemoryUSMShared`, or + :class:`dpctl.memory.MemoryUSMHost`. Recognized strings are + ``"device"``, ``"shared"``, or ``"host"``. Additional arguments to + the USM memory allocators can be passed in a dictionary specified + via ``buffer_ctor_kwrds`` keyword parameter. + Default: ``"device"``. + offset (int, optional): + Offset of the array element with all zero indexes relative to the + start of the provided `buffer` in elements. The argument is ignored + if the ``buffer`` value is a string and the memory is allocated by + the constructor. Default: ``0``. + order ({"C", "F"}, optional): + The memory layout of the array when constructing using a new + allocation. Value ``"C"`` corresponds to C-contiguous, or row-major + memory layout, while value ``"F"`` corresponds to F-contiguous, or + column-major layout. Default: ``"C"``. + buffer_ctor_kwargs (dict, optional): + Dictionary with keyword parameters to use when creating a new USM + memory allocation. See :class:`dpctl.memory.MemoryUSMShared` for + supported keyword arguments. + array_namespace (module, optional): + Array namespace module associated with this array. + Default: ``None``. + + ``buffer`` can be ``"shared"``, ``"host"``, ``"device"`` to allocate + new device memory by calling respective constructor with + the specified ``buffer_ctor_kwrds``; ``buffer`` can be an + instance of :class:`dpctl.memory.MemoryUSMShared`, + :class:`dpctl.memory.MemoryUSMDevice`, or + :class:`dpctl.memory.MemoryUSMHost`; ``buffer`` can also be + another :class:`dpctl.tensor.usm_ndarray` instance, in which case its + underlying ``MemoryUSM*`` buffer is used. + """ + + cdef void _reset(usm_ndarray self): + """ + Initializes member fields + """ + self.base_ = None + self.array_namespace_ = None + self.nd_ = -1 + self.data_ = 0 + self.shape_ = 0 + self.strides_ = 0 + self.flags_ = 0 + + cdef void _cleanup(usm_ndarray self): + if (self.shape_): + PyMem_Free(self.shape_) + if (self.strides_): + PyMem_Free(self.strides_) + self._reset() + + def __cinit__(self, shape, dtype=None, strides=None, buffer="device", + Py_ssize_t offset=0, order="C", + buffer_ctor_kwargs=dict(), + array_namespace=None): + """ + strides and offset must be given in units of array elements. + buffer can be strings ('device'|'shared'|'host' to allocate new memory) + or ``dpctl.memory.MemoryUSM*`` buffers, or ``usm_ndarray`` instances. + """ + cdef int nd = 0 + cdef int typenum = 0 + cdef int itemsize = 0 + cdef int err = 0 + cdef int contig_flag = 0 + cdef int writable_flag = USM_ARRAY_WRITABLE + cdef Py_ssize_t *shape_ptr = NULL + cdef Py_ssize_t ary_nelems = 0 + cdef Py_ssize_t ary_nbytes = 0 + cdef Py_ssize_t *strides_ptr = NULL + cdef Py_ssize_t _offset = offset + cdef Py_ssize_t ary_min_displacement = 0 + cdef Py_ssize_t ary_max_displacement = 0 + cdef bint is_fp64 = False + cdef bint is_fp16 = False + + self._reset() + if not isinstance(shape, (list, tuple)): + if hasattr(shape, "tolist"): + fn = getattr(shape, "tolist") + if callable(fn): + shape = shape.tolist() + if not isinstance(shape, (list, tuple)): + try: + shape + shape = [shape, ] + except Exception as e: + raise TypeError( + "Argument shape must a non-negative integer, " + "or a list/tuple of such integers." + ) from e + nd = len(shape) + if dtype is None: + if isinstance(buffer, (dpmem._memory._Memory, usm_ndarray)): + q = buffer.sycl_queue + else: + q = buffer_ctor_kwargs.get("queue") + if q is not None: + dtype = default_device_fp_type(q) + else: + dev = _cached_default_device() + dtype = "f8" if dev.has_aspect_fp64 else "f4" + typenum = dtype_to_typenum(dtype) + if (typenum < 0): + if typenum == -2: + raise ValueError( + "Data type '" + str(dtype) + + "' can only have native byteorder." + ) + elif typenum == -1: + raise ValueError( + "Data type '" + str(dtype) + "' is not understood." + ) + raise TypeError( + f"Expected string or a dtype object, got {type(dtype)}" + ) + itemsize = type_bytesize(typenum) + if (itemsize < 1): + raise TypeError( + "dtype=" + np.dtype(dtype).name + " is not supported." + ) + # allocate host C-arrays for shape, strides + err = _from_input_shape_strides( + nd, shape, strides, itemsize, ord(order), + &shape_ptr, &strides_ptr, &ary_nelems, + &ary_min_displacement, &ary_max_displacement, &contig_flag + ) + if (err): + self._cleanup() + if err == ERROR_MALLOC: + raise MemoryError("Memory allocation for shape/strides " + "array failed.") + elif err == ERROR_INCORRECT_ORDER: + raise ValueError( + "Unsupported order='{}' given. " + "Supported values are 'C' or 'F'.".format(order)) + elif err == ERROR_UNEXPECTED_STRIDES: + raise ValueError( + "strides={} is not understood".format(strides)) + else: + raise InternalUSMArrayError( + " .. while processing shape and strides.") + ary_nbytes = (ary_max_displacement - + ary_min_displacement + 1) * itemsize + if isinstance(buffer, dpmem._memory._Memory): + _buffer = buffer + elif isinstance(buffer, (str, bytes)): + if isinstance(buffer, bytes): + buffer = buffer.decode("UTF-8") + _offset = -ary_min_displacement + if (buffer == "shared"): + _buffer = dpmem.MemoryUSMShared(ary_nbytes, + **buffer_ctor_kwargs) + elif (buffer == "device"): + _buffer = dpmem.MemoryUSMDevice(ary_nbytes, + **buffer_ctor_kwargs) + elif (buffer == "host"): + _buffer = dpmem.MemoryUSMHost(ary_nbytes, + **buffer_ctor_kwargs) + else: + self._cleanup() + raise ValueError( + "buffer='{}' is not understood. " + "Recognized values are 'device', 'shared', 'host', " + "an instance of `MemoryUSM*` object, or a usm_ndarray" + "".format(buffer) + ) + elif isinstance(buffer, usm_ndarray): + if not buffer.flags.writable: + writable_flag = 0 + _buffer = buffer.usm_data + else: + self._cleanup() + raise ValueError("buffer='{}' was not understood.".format(buffer)) + if (shape_to_elem_count(nd, shape_ptr) > 0 and + (_offset + ary_min_displacement < 0 or + (_offset + ary_max_displacement + 1) * itemsize > _buffer.nbytes)): + self._cleanup() + raise ValueError(("buffer='{}' can not accommodate " + "the requested array.").format(buffer)) + is_fp64 = (typenum == UAR_DOUBLE or typenum == UAR_CDOUBLE) + is_fp16 = (typenum == UAR_HALF) + if (is_fp64 or is_fp16): + if ( + (is_fp64 and not _buffer.sycl_device.has_aspect_fp64) or + (is_fp16 and not _buffer.sycl_device.has_aspect_fp16) + ): + raise ValueError( + f"Device {_buffer.sycl_device.name} does" + f" not support {dtype} natively." + ) + self.base_ = _buffer + self.data_ = ( ( _buffer._pointer)) + itemsize * _offset + self.shape_ = shape_ptr + self.strides_ = strides_ptr + self.typenum_ = typenum + self.flags_ = (contig_flag | writable_flag) + self.nd_ = nd + self.array_namespace_ = array_namespace + + def __dealloc__(self): + self._cleanup() + + @property + def _pointer(self): + """ + Returns USM pointer to the start of array (element with zero + multi-index) encoded as integer. + """ + return self.get_data() + + cdef Py_ssize_t get_offset(self) except *: + cdef char *mem_ptr = NULL + cdef char *ary_ptr = self.get_data() + mem_ptr = ( self.base_._pointer) + byte_offset = ary_ptr - mem_ptr + item_size = self.get_itemsize() + if (byte_offset % item_size): + raise InternalUSMArrayError( + "byte_offset is not a multiple of item_size.") + return byte_offset // item_size + + @property + def _element_offset(self): + """Returns the offset of the zero-index element of the array, in + elements, relative to the start of memory allocation""" + return self.get_offset() + + @property + def _byte_bounds(self): + """Returns a 2-tuple with pointers to the end-points of the array + + :Example: + + .. code-block:: python + + from dpctl import tensor + + x = tensor.ones((3, 10, 7)) + y = tensor.flip(x[:, 1::2], axis=1) + + beg_p, end_p = y._byte_bounds + # Bytes taken to store this array + bytes_extent = end_p - beg_p + + # C-contiguous copy is more compact + yc = tensor.copy(y, order="C") + beg_pc, end_pc = yc._byte_bounds + assert bytes_extent < end_pc - beg_pc + """ + cdef Py_ssize_t min_disp = 0 + cdef Py_ssize_t max_disp = 0 + cdef Py_ssize_t step_ = 0 + cdef Py_ssize_t dim_ = 0 + cdef int it = 0 + cdef Py_ssize_t _itemsize = self.get_itemsize() + + if ( + (self.flags_ & USM_ARRAY_C_CONTIGUOUS) + or (self.flags_ & USM_ARRAY_F_CONTIGUOUS) + ): + return ( + self._pointer, + self._pointer + shape_to_elem_count( + self.nd_, self.shape_ + ) * _itemsize + ) + + for it in range(self.nd_): + dim_ = self.shape[it] + if dim_ > 0: + step_ = self.strides[it] + if step_ > 0: + max_disp += step_ * (dim_ - 1) + else: + min_disp += step_ * (dim_ - 1) + + return ( + self._pointer + min_disp * _itemsize, + self._pointer + (max_disp + 1) * _itemsize + ) + + cdef char* get_data(self): + """Returns the USM pointer for this array.""" + return self.data_ + + cdef int get_ndim(self): + """ + Returns the number of indices needed to address + an element of this array. + """ + return self.nd_ + + cdef Py_ssize_t* get_shape(self): + """ + Returns pointer to shape C-array for this array. + + C-array has at least ``ndim`` non-negative elements, + which determine the range of permissible indices + addressing individual elements of this array. + """ + return self.shape_ + + cdef Py_ssize_t* get_strides(self): + """ + Returns pointer to strides C-array for this array. + + The pointer can be NULL (contiguous array), or the + array size is at least ``ndim`` elements + """ + return self.strides_ + + cdef int get_typenum(self): + """Returns typenum corresponding to values of this array""" + return self.typenum_ + + cdef int get_itemsize(self): + """ + Returns itemsize of this arrays in bytes + """ + return type_bytesize(self.typenum_) + + cdef int get_flags(self): + """Returns flags of this array""" + return self.flags_ + + cdef object get_base(self): + """Returns the object owning the USM data addressed by this array""" + return self.base_ + + cdef c_dpctl.SyclQueue get_sycl_queue(self): + cdef c_dpmem._Memory mem + if not isinstance(self.base_, dpctl.memory._Memory): + raise InternalUSMArrayError( + "This array has unexpected memory owner" + ) + mem = self.base_ + return mem.queue + + cdef c_dpctl.DPCTLSyclQueueRef get_queue_ref(self) except *: + """ + Returns a copy of DPCTLSyclQueueRef associated with array + """ + cdef c_dpctl.SyclQueue q = self.get_sycl_queue() + cdef c_dpctl.DPCTLSyclQueueRef QRef = q.get_queue_ref() + cdef c_dpctl.DPCTLSyclQueueRef QRefCopy = NULL + if QRef is not NULL: + QRefCopy = c_dpctl.DPCTLQueue_Copy(QRef) + return QRefCopy + else: + raise InternalUSMArrayError( + "Memory owner of this array is corrupted" + ) + + @property + def __sycl_usm_array_interface__(self): + """ + Gives ``__sycl_usm_array_interface__`` dictionary describing + the array. + """ + cdef Py_ssize_t byte_offset = -1 + cdef int item_size = -1 + cdef Py_ssize_t elem_offset = -1 + cdef char *mem_ptr = NULL + cdef char *ary_ptr = NULL + if (not isinstance(self.base_, dpmem._memory._Memory)): + raise InternalUSMArrayError( + "Invalid instance of usm_ndarray encountered. " + "Private field base_ has an unexpected type {}.".format( + type(self.base_) + ) + ) + ary_iface = self.base_.__sycl_usm_array_interface__ + mem_ptr = ( ary_iface["data"][0]) + ary_ptr = ( self.data_) + ro_flag = False if (self.flags_ & USM_ARRAY_WRITABLE) else True + ary_iface["data"] = ( mem_ptr, ro_flag) + ary_iface["shape"] = self.shape + if (self.strides_): + ary_iface["strides"] = _make_int_tuple(self.nd_, self.strides_) + else: + if (self.flags_ & USM_ARRAY_C_CONTIGUOUS): + ary_iface["strides"] = None + elif (self.flags_ & USM_ARRAY_F_CONTIGUOUS): + ary_iface["strides"] = _f_contig_strides(self.nd_, self.shape_) + else: + raise InternalUSMArrayError( + "USM Array is not contiguous and has empty strides" + ) + ary_iface["typestr"] = _make_typestr(self.typenum_) + byte_offset = ary_ptr - mem_ptr + item_size = self.get_itemsize() + if (byte_offset % item_size): + raise InternalUSMArrayError( + "byte_offset is not a multiple of item_size.") + elem_offset = byte_offset // item_size + ary_iface["offset"] = elem_offset + # must wait for content of the memory to finalize + self.sycl_queue.wait() + return ary_iface + + @property + def ndim(self): + """ + Gives the number of indices needed to address elements of this array. + """ + return self.nd_ + + @property + def usm_data(self): + """ + Gives USM memory object underlying :class:`.usm_ndarray` instance. + """ + return self.get_base() + + @property + def shape(self): + """ + Elements of the shape tuple give the lengths of the + respective array dimensions. + + Setting shape is allowed only when reshaping to the requested + dimensions can be returned as view, otherwise :exc:`AttributeError` + is raised. Use :func:`dpctl.tensor.reshape` to reshape the array + in all cases. + + :Example: + + .. code-block:: python + + from dpctl import tensor + + x = tensor.arange(899) + x.shape = (29, 31) + """ + if self.nd_ > 0: + return _make_int_tuple(self.nd_, self.shape_) + else: + return tuple() + + @shape.setter + def shape(self, new_shape): + """ + Modifies usm_ndarray instance in-place by changing its metadata + about the shape and the strides of the array, or raises + `AttributeError` exception if in-place change is not possible. + + Args: + new_shape: (tuple, int) + New shape. Only non-negative values are supported. + The new shape may not lead to the change in the + number of elements in the array. + + Whether the array can be reshape in-place depends on its + strides. Use :func:`dpctl.tensor.reshape` function which + always succeeds to reshape the array by performing a copy + if necessary. + """ + cdef int new_nd = -1 + cdef Py_ssize_t nelems = -1 + cdef int err = 0 + cdef Py_ssize_t min_disp = 0 + cdef Py_ssize_t max_disp = 0 + cdef int contig_flag = 0 + cdef Py_ssize_t *shape_ptr = NULL + cdef Py_ssize_t *strides_ptr = NULL + cdef Py_ssize_t size = -1 + import operator + + from ._reshape import reshaped_strides + + try: + new_nd = len(new_shape) + except TypeError: + new_nd = 1 + new_shape = (new_shape,) + try: + new_shape = tuple(operator.index(dim) for dim in new_shape) + except TypeError: + raise TypeError( + "Target shape must be a finite iterable of integers" + ) + size = shape_to_elem_count(self.nd_, self.shape_) + if not np.prod(new_shape) == size: + raise TypeError( + f"Can not reshape array of size {self.size} into {new_shape}" + ) + if size > 0: + new_strides = reshaped_strides( + self.shape, + self.strides, + new_shape + ) + else: + new_strides = (1,) * len(new_shape) + if new_strides is None: + raise AttributeError( + "Incompatible shape for in-place modification. " + "Use `reshape()` to make a copy with the desired shape." + ) + err = _from_input_shape_strides( + new_nd, new_shape, new_strides, + self.get_itemsize(), + b"C", + &shape_ptr, &strides_ptr, + &nelems, &min_disp, &max_disp, &contig_flag + ) + if (err == 0): + if (self.shape_): + PyMem_Free(self.shape_) + if (self.strides_): + PyMem_Free(self.strides_) + self.flags_ = (contig_flag | (self.flags_ & USM_ARRAY_WRITABLE)) + self.nd_ = new_nd + self.shape_ = shape_ptr + self.strides_ = strides_ptr + else: + raise InternalUSMArrayError( + "Encountered in shape setter, error code {err}".format(err) + ) + + @property + def strides(self): + """ + Returns memory displacement in array elements, upon unit + change of respective index. + + For example, for strides ``(s1, s2, s3)`` and multi-index + ``(i1, i2, i3)`` position of the respective element relative + to zero multi-index element is ``s1*s1 + s2*i2 + s3*i3``. + + :Example: + + .. code-block:: python + + from dpctl import tensor + + x = tensor.zeros((20, 30)) + xv = x[10:, :15] + + multi_id = (3, 5) + byte_displacement = xv[multi_id]._pointer - xv[0, 0]._pointer + element_displacement = sum( + i * s for i, s in zip(multi_id, xv.strides) + ) + assert byte_displacement == element_displacement * xv.itemsize + """ + if (self.strides_): + return _make_int_tuple(self.nd_, self.strides_) + else: + if (self.flags_ & USM_ARRAY_C_CONTIGUOUS): + return _c_contig_strides(self.nd_, self.shape_) + elif (self.flags_ & USM_ARRAY_F_CONTIGUOUS): + return _f_contig_strides(self.nd_, self.shape_) + else: + raise ValueError("Inconsistent usm_ndarray data") + + @property + def flags(self): + """ + Returns :class:`dpctl.tensor._flags.Flags` object. + """ + return _flags.Flags(self, self.flags_) + + cdef _set_writable_flag(self, int flag): + cdef int mask = (USM_ARRAY_WRITABLE if flag else 0) + self.flags_ = _copy_writable(self.flags_, mask) + + @property + def usm_type(self): + """ + USM type of underlying memory. Possible values are: + + * ``"device"`` + USM-device allocation in device memory, only accessible + to kernels executed on the device + * ``"shared"`` + USM-shared allocation in device memory, accessible both + from the device and from host + * ``"host"`` + USM-host allocation in host memory, accessible both + from the device and from host + + See: https://docs.oneapi.com/versions/latest/dpcpp/iface/usm.html + """ + return self.base_.get_usm_type() + + @property + def itemsize(self): + """ + Size of array element in bytes. + """ + return self.get_itemsize() + + @property + def nbytes(self): + """ + Total bytes consumed by the elements of the array. + """ + return ( + shape_to_elem_count(self.nd_, self.shape_) * + self.get_itemsize()) + + @property + def size(self): + """ + Number of elements in the array. + """ + return shape_to_elem_count(self.nd_, self.shape_) + + @property + def dtype(self): + """ + Returns NumPy's dtype corresponding to the type of the array elements. + """ + return np.dtype(_make_typestr(self.typenum_)) + + @property + def sycl_queue(self): + """ + Returns :class:`dpctl.SyclQueue` object associated with USM data. + """ + return self.get_sycl_queue() + + @property + def sycl_device(self): + """ + Returns :class:`dpctl.SyclDevice` object on which USM data + was allocated. + """ + q = self.sycl_queue + return q.sycl_device + + @property + def device(self): + """ + Returns :class:`dpctl.tensor.Device` object representing + residence of the array data. + + The ``Device`` object represents Array API notion of the + device, and contains :class:`dpctl.SyclQueue` associated + with this array. Hence, ``.device`` property provides + information distinct from ``.sycl_device`` property. + + :Example: + + .. code-block:: python + + >>> from dpctl import tensor + >>> x = tensor.ones(10) + >>> x.device + Device(level_zero:gpu:0) + """ + return Device.create_device(self.sycl_queue) + + @property + def sycl_context(self): + """ + Returns :class:`dpctl.SyclContext` object to which USM data is bound. + """ + q = self.sycl_queue + return q.sycl_context + + @property + def T(self): + """Returns transposed array for 2D array, raises ``ValueError`` + otherwise. + """ + if self.nd_ == 2: + return _transpose(self) + else: + raise ValueError( + "array.T requires array to have 2 dimensions. " + "Use array.mT to transpose stacks of matrices and " + "dpctl.tensor.permute_dims() to permute dimensions." + ) + + @property + def mT(self): + """ Returns array (a view) where the last two dimensions are + transposed. + """ + if self.nd_ < 2: + raise ValueError( + "array.mT requires array to have at least 2 dimensions." + ) + return _m_transpose(self) + + @property + def real(self): + """ + Returns view into real component for arrays with + complex data-types and returns itself for all other + data-types. + + :Example: + + .. code-block:: python + + from dpctl import tensor + + # Create complex array from + # arrays of real and imaginary parts + + re = tensor.linspace(-1, 1, num=100, dtype="f4") + im = tensor.full_like(re, fill_value=tensor.pi) + + z = tensor.empty_like(re, dtype="c8") + z.real[:] = re + z.imag[:] = im + """ + # explicitly check for UAR_HALF, which is greater than UAR_CFLOAT + if (self.typenum_ < UAR_CFLOAT or self.typenum_ == UAR_HALF): + # elements are real + return self + if (self.typenum_ < UAR_TYPE_SENTINEL): + return _real_view(self) + + @property + def imag(self): + """ Returns view into imaginary component for arrays with + complex data-types and returns new zero array for all other + data-types. + + :Example: + + .. code-block:: python + + from dpctl import tensor + + # Reset imaginary part of complex array + + z = tensor.ones(100, dtype="c8") + z.imag[:] = dpt.pi/2 + """ + # explicitly check for UAR_HALF, which is greater than UAR_CFLOAT + if (self.typenum_ < UAR_CFLOAT or self.typenum_ == UAR_HALF): + # elements are real + return _zero_like(self) + if (self.typenum_ < UAR_TYPE_SENTINEL): + return _imag_view(self) + + def __getitem__(self, ind): + cdef tuple _meta = _basic_slice_meta( + ind, (self).shape, ( self).strides, + self.get_offset()) + cdef usm_ndarray res + cdef int i = 0 + cdef bint matching = 1 + + if len(_meta) < 5: + raise RuntimeError + + res = usm_ndarray.__new__( + usm_ndarray, + _meta[0], + dtype=_make_typestr(self.typenum_), + strides=_meta[1], + buffer=self.base_, + offset=_meta[2] + ) + res.array_namespace_ = self.array_namespace_ + + adv_ind = _meta[3] + adv_ind_start_p = _meta[4] + + if adv_ind_start_p < 0: + res.flags_ = _copy_writable(res.flags_, self.flags_) + return res + + from ._copy_utils import _extract_impl, _nonzero_impl, _take_multi_index + + # if len(adv_ind == 1), the (only) element is always an array + if len(adv_ind) == 1 and adv_ind[0].dtype == dpt_bool: + key_ = adv_ind[0] + adv_ind_end_p = key_.ndim + adv_ind_start_p + if adv_ind_end_p > res.ndim: + raise IndexError("too many indices for the array") + key_shape = key_.shape + arr_shape = res.shape[adv_ind_start_p:adv_ind_end_p] + for i in range(key_.ndim): + if matching: + if not key_shape[i] == arr_shape[i] and key_shape[i] > 0: + matching = 0 + if not matching: + raise IndexError( + "boolean index did not match indexed array in dimensions" + ) + res = _extract_impl(res, key_, axis=adv_ind_start_p) + res.flags_ = _copy_writable(res.flags_, self.flags_) + return res + + if any( + ( + isinstance(ind, usm_ndarray) and ind.dtype == dpt_bool + ) for ind in adv_ind + ): + adv_ind_int = list() + for ind in adv_ind: + if isinstance(ind, usm_ndarray) and ind.dtype == dpt_bool: + adv_ind_int.extend(_nonzero_impl(ind)) + else: + adv_ind_int.append(ind) + res = _take_multi_index(res, tuple(adv_ind_int), adv_ind_start_p) + res.flags_ = _copy_writable(res.flags_, self.flags_) + return res + + res = _take_multi_index(res, adv_ind, adv_ind_start_p) + res.flags_ = _copy_writable(res.flags_, self.flags_) + return res + + def to_device(self, target_device, /, *, stream=None): + """ to_device(target_device, /, *, stream=None) + + Transfers this array to specified target device. + + :Example: + .. code-block:: python + + import dpctl + import dpctl.tensor as dpt + + x = dpt.full(10**6, 2, dtype="int64") + q_prof = dpctl.SyclQueue( + x.sycl_device, property="enable_profiling") + # return a view with profile-enabled queue + y = x.to_device(q_prof) + timer = dpctl.SyclTimer() + with timer(q_prof): + z = y * y + print(timer.dt) + + Args: + target_device (object): + Array API concept of target device. + It can be a oneAPI filter selector string, + an instance of :class:`dpctl.SyclDevice` corresponding to a + non-partitioned SYCL device, an instance of + :class:`dpctl.SyclQueue`, or a :class:`dpctl.tensor.Device` + object returned by :attr:`dpctl.tensor.usm_ndarray.device`. + stream (:class:`dpctl.SyclQueue`, optional): + Execution queue to synchronize with. If ``None``, + synchronization is not performed. + + Returns: + usm_ndarray: + A view if data copy is not required, and a copy otherwise. + If copying is required, it is done by copying from the original + allocation device to the host, followed by copying from host + to the target device. + """ + cdef c_dpctl.DPCTLSyclQueueRef QRef = NULL + cdef c_dpmem._Memory arr_buf + d = Device.create_device(target_device) + + _validate_and_use_stream(stream, self.sycl_queue) + + if (d.sycl_context == self.sycl_context): + arr_buf = self.usm_data + QRef = ( d.sycl_queue).get_queue_ref() + view_buffer = c_dpmem._Memory.create_from_usm_pointer_size_qref( + arr_buf.get_data_ptr(), + arr_buf.nbytes, + QRef, + memory_owner=arr_buf + ) + res = usm_ndarray( + self.shape, + self.dtype, + buffer=view_buffer, + strides=self.strides, + offset=self.get_offset() + ) + res.flags_ = self.flags_ + return res + else: + nbytes = self.usm_data.nbytes + copy_buffer = type(self.usm_data)( + nbytes, queue=d.sycl_queue + ) + copy_buffer.copy_from_device(self.usm_data) + res = usm_ndarray( + self.shape, + self.dtype, + buffer=copy_buffer, + strides=self.strides, + offset=self.get_offset() + ) + res.flags_ = self.flags_ + return res + + def _set_namespace(self, mod): + """ Sets array namespace to given module `mod`. """ + self.array_namespace_ = mod + + def __array_namespace__(self, api_version=None): + """ + Returns array namespace, member functions of which + implement data API. + + Args: + api_version (str, optional) + Request namespace compliant with given version of + array API. If ``None``, namespace for the most + recent supported version is returned. + Default: ``None``. + """ + if api_version is not None: + from ._array_api import __array_api_version__ + if not isinstance(api_version, str): + raise TypeError(f"Expected type str, got {type(api_version)}") + if api_version != __array_api_version__: + raise ValueError(f"Only {__array_api_version__} is supported") + return ( + self.array_namespace_ + if self.array_namespace_ is not None + else dpctl.tensor + ) + + def __bool__(self): + if self.size == 1: + _check_0d_scalar_conversion(self) + view = _as_zero_dim_ndarray(self) + return view.__bool__() + + if self.size == 0: + raise ValueError( + "The truth value of an empty array is ambiguous" + ) + + raise ValueError( + "The truth value of an array with more than one element is " + "ambiguous. Use dpctl.tensor.any() or dpctl.tensor.all()" + ) + + def __float__(self): + if self.size == 1: + _check_0d_scalar_conversion(self) + view = _as_zero_dim_ndarray(self) + return view.__float__() + + raise ValueError( + "only size-1 arrays can be converted to Python scalars" + ) + + def __complex__(self): + if self.size == 1: + _check_0d_scalar_conversion(self) + view = _as_zero_dim_ndarray(self) + return view.__complex__() + + raise ValueError( + "only size-1 arrays can be converted to Python scalars" + ) + + def __int__(self): + if self.size == 1: + _check_0d_scalar_conversion(self) + view = _as_zero_dim_ndarray(self) + return view.__int__() + + raise ValueError( + "only size-1 arrays can be converted to Python scalars" + ) + + def __index__(self): + if np.issubdtype(self.dtype, np.integer): + return int(self) + + raise IndexError("only integer arrays are valid indices") + + def __abs__(self): + return dpctl.tensor.abs(self) + + def __add__(self, other): + """ + Implementation for operator.add + """ + return dpctl.tensor.add(self, other) + + def __and__(self, other): + "Implementation for operator.and" + return dpctl.tensor.bitwise_and(self, other) + + def __dlpack__( + self, *, stream=None, max_version=None, dl_device=None, copy=None + ): + """ + Produces DLPack capsule. + + Args: + stream (:class:`dpctl.SyclQueue`, optional): + Execution queue to synchronize with. + If ``None``, synchronization is not performed. + Default: ``None``. + max_version (tuple[int, int], optional): + The maximum DLPack version the consumer (caller of + ``__dlpack__``) supports. As ``__dlpack__`` may not + always return a DLPack capsule with version + `max_version`, the consumer must verify the version + even if this argument is passed. + Default: ``None``. + dl_device (tuple[enum.Enum, int], optional): + The device the returned DLPack capsule will be + placed on. + The device must be a 2-tuple matching the format of + ``__dlpack_device__`` method, an integer enumerator + representing the device type followed by an integer + representing the index of the device. + Default: ``None``. + copy (bool, optional): + Boolean indicating whether or not to copy the input. + + * If ``copy`` is ``True``, the input will always be + copied. + * If ``False``, a ``BufferError`` will be raised if a + copy is deemed necessary. + * If ``None``, a copy will be made only if deemed + necessary, otherwise, the existing memory buffer will + be reused. + + Default: ``None``. + + Raises: + MemoryError: + when host memory can not be allocated. + DLPackCreationError: + when array is allocated on a partitioned + SYCL device, or with a non-default context. + BufferError: + when a copy is deemed necessary but ``copy`` + is ``False`` or when the provided ``dl_device`` + cannot be handled. + """ + if max_version is None: + # legacy path for DLManagedTensor + # copy kwarg ignored because copy flag can't be set + _caps = c_dlpack.to_dlpack_capsule(self) + _validate_and_use_stream(stream, self.sycl_queue) + return _caps + else: + if not isinstance(max_version, tuple) or len(max_version) != 2: + raise TypeError( + "`__dlpack__` expects `max_version` to be a " + "2-tuple of integers `(major, minor)`, instead " + f"got {max_version}" + ) + dpctl_dlpack_version = get_build_dlpack_version() + if max_version[0] >= dpctl_dlpack_version[0]: + # DLManagedTensorVersioned path + if dl_device is not None: + if not isinstance(dl_device, tuple) or len(dl_device) != 2: + raise TypeError( + "`__dlpack__` expects `dl_device` to be a 2-tuple " + "of `(device_type, device_id)`, instead " + f"got {dl_device}" + ) + if dl_device != self.__dlpack_device__(): + if copy is False: + raise BufferError( + "array cannot be placed on the requested " + "device without a copy" + ) + if _is_host_cpu(dl_device): + if stream is not None: + raise ValueError( + "`stream` must be `None` when `dl_device` " + "is of type `kDLCPU`" + ) + from ._copy_utils import _copy_to_numpy + _arr = _copy_to_numpy(self) + _arr.flags["W"] = self.flags["W"] + return c_dlpack.numpy_to_dlpack_versioned_capsule( + _arr, True + ) + else: + raise BufferError( + f"targeting `dl_device` {dl_device} with " + "`__dlpack__` is not yet implemented" + ) + if copy is None: + copy = False + # TODO: strategy for handling stream on different device + # from dl_device + if copy: + _validate_and_use_stream(stream, self.sycl_queue) + nbytes = self.usm_data.nbytes + copy_buffer = type(self.usm_data)( + nbytes, queue=self.sycl_queue + ) + copy_buffer.copy_from_device(self.usm_data) + _copied_arr = usm_ndarray( + self.shape, + self.dtype, + buffer=copy_buffer, + strides=self.strides, + offset=self.get_offset() + ) + _copied_arr.flags_ = self.flags_ + _caps = c_dlpack.to_dlpack_versioned_capsule( + _copied_arr, copy + ) + else: + _caps = c_dlpack.to_dlpack_versioned_capsule(self, copy) + _validate_and_use_stream(stream, self.sycl_queue) + return _caps + else: + # legacy path for DLManagedTensor + _caps = c_dlpack.to_dlpack_capsule(self) + _validate_and_use_stream(stream, self.sycl_queue) + return _caps + + def __dlpack_device__(self): + """ + Gives a tuple (``device_type``, ``device_id``) corresponding to + ``DLDevice`` entry in ``DLTensor`` in DLPack protocol. + + The tuple describes the non-partitioned device where the array has been + allocated, or the non-partitioned parent device of the allocation + device. + + See :class:`dpctl.tensor.DLDeviceType` for a list of devices supported + by the DLPack protocol. + + Raises: + DLPackCreationError: + when the ``device_id`` could not be determined. + """ + try: + dev_id = self.sycl_device.get_device_id() + except ValueError as e: + raise c_dlpack.DLPackCreationError( + "Could not determine id of the device where array was " + "allocated." + ) + return ( + DLDeviceType.kDLOneAPI, + dev_id, + ) + + def __eq__(self, other): + return dpctl.tensor.equal(self, other) + + def __floordiv__(self, other): + return dpctl.tensor.floor_divide(self, other) + + def __ge__(self, other): + return dpctl.tensor.greater_equal(self, other) + + def __gt__(self, other): + return dpctl.tensor.greater(self, other) + + def __invert__(self): + return dpctl.tensor.bitwise_invert(self) + + def __le__(self, other): + return dpctl.tensor.less_equal(self, other) + + def __len__(self): + if (self.nd_): + return self.shape[0] + else: + raise TypeError("len() of unsized object") + + def __lshift__(self, other): + return dpctl.tensor.bitwise_left_shift(self, other) + + def __lt__(self, other): + return dpctl.tensor.less(self, other) + + def __matmul__(self, other): + return dpctl.tensor.matmul(self, other) + + def __mod__(self, other): + return dpctl.tensor.remainder(self, other) + + def __mul__(self, other): + return dpctl.tensor.multiply(self, other) + + def __ne__(self, other): + return dpctl.tensor.not_equal(self, other) + + def __neg__(self): + return dpctl.tensor.negative(self) + + def __or__(self, other): + return dpctl.tensor.bitwise_or(self, other) + + def __pos__(self): + return dpctl.tensor.positive(self) + + def __pow__(self, other): + return dpctl.tensor.pow(self, other) + + def __rshift__(self, other): + return dpctl.tensor.bitwise_right_shift(self, other) + + def __setitem__(self, key, rhs): + cdef tuple _meta + cdef usm_ndarray Xv + + if (self.flags_ & USM_ARRAY_WRITABLE) == 0: + raise ValueError("Can not modify read-only array.") + + _meta = _basic_slice_meta( + key, (self).shape, ( self).strides, + self.get_offset() + ) + + if len(_meta) < 5: + raise RuntimeError + + Xv = usm_ndarray.__new__( + usm_ndarray, + _meta[0], + dtype=_make_typestr(self.typenum_), + strides=_meta[1], + buffer=self.base_, + offset=_meta[2], + ) + # set namespace + Xv.array_namespace_ = self.array_namespace_ + + from ._copy_utils import ( + _copy_from_numpy_into, + _copy_from_usm_ndarray_to_usm_ndarray, + _nonzero_impl, + _place_impl, + _put_multi_index, + ) + + adv_ind = _meta[3] + adv_ind_start_p = _meta[4] + + if adv_ind_start_p < 0: + # basic slicing + if isinstance(rhs, usm_ndarray): + _copy_from_usm_ndarray_to_usm_ndarray(Xv, rhs) + else: + if hasattr(rhs, "__sycl_usm_array_interface__"): + from dpctl.tensor import asarray + try: + rhs_ar = asarray(rhs) + _copy_from_usm_ndarray_to_usm_ndarray(Xv, rhs_ar) + except Exception: + raise ValueError( + f"Input of type {type(rhs)} could not be " + "converted to usm_ndarray" + ) + else: + rhs_np = np.asarray(rhs) + if type_bytesize(rhs_np.dtype.num) < 0: + raise ValueError( + f"Input of type {type(rhs)} can not be " + "assigned to usm_ndarray because of " + f"unsupported data type '{rhs_np.dtype}'" + ) + try: + _copy_from_numpy_into(Xv, rhs_np) + except Exception: + raise ValueError( + f"Input of type {type(rhs)} could not be " + "copied into dpctl.tensor.usm_ndarray" + ) + return + + if len(adv_ind) == 1 and adv_ind[0].dtype == dpt_bool: + _place_impl(Xv, adv_ind[0], rhs, axis=adv_ind_start_p) + return + + if any( + ( + isinstance(ind, usm_ndarray) and ind.dtype == dpt_bool + ) for ind in adv_ind + ): + adv_ind_int = list() + for ind in adv_ind: + if isinstance(ind, usm_ndarray) and ind.dtype == dpt_bool: + adv_ind_int.extend(_nonzero_impl(ind)) + else: + adv_ind_int.append(ind) + _put_multi_index(Xv, tuple(adv_ind_int), adv_ind_start_p, rhs) + return + + _put_multi_index(Xv, adv_ind, adv_ind_start_p, rhs) + return + + def __sub__(self, other): + return dpctl.tensor.subtract(self, other) + + def __truediv__(self, other): + return dpctl.tensor.divide(self, other) + + def __xor__(self, other): + return dpctl.tensor.bitwise_xor(self, other) + + def __radd__(self, other): + return dpctl.tensor.add(other, self) + + def __rand__(self, other): + return dpctl.tensor.bitwise_and(other, self) + + def __rfloordiv__(self, other): + return dpctl.tensor.floor_divide(other, self) + + def __rlshift__(self, other): + return dpctl.tensor.bitwise_left_shift(other, self) + + def __rmatmul__(self, other): + return dpctl.tensor.matmul(other, self) + + def __rmod__(self, other): + return dpctl.tensor.remainder(other, self) + + def __rmul__(self, other): + return dpctl.tensor.multiply(other, self) + + def __ror__(self, other): + return dpctl.tensor.bitwise_or(other, self) + + def __rpow__(self, other): + return dpctl.tensor.pow(other, self) + + def __rrshift__(self, other): + return dpctl.tensor.bitwise_right_shift(other, self) + + def __rsub__(self, other): + return dpctl.tensor.subtract(other, self) + + def __rtruediv__(self, other): + return dpctl.tensor.divide(other, self) + + def __rxor__(self, other): + return dpctl.tensor.bitwise_xor(other, self) + + def __iadd__(self, other): + return dpctl.tensor.add._inplace_op(self, other) + + def __iand__(self, other): + return dpctl.tensor.bitwise_and._inplace_op(self, other) + + def __ifloordiv__(self, other): + return dpctl.tensor.floor_divide._inplace_op(self, other) + + def __ilshift__(self, other): + return dpctl.tensor.bitwise_left_shift._inplace_op(self, other) + + def __imatmul__(self, other): + return dpctl.tensor.matmul(self, other, out=self, dtype=self.dtype) + + def __imod__(self, other): + return dpctl.tensor.remainder._inplace_op(self, other) + + def __imul__(self, other): + return dpctl.tensor.multiply._inplace_op(self, other) + + def __ior__(self, other): + return dpctl.tensor.bitwise_or._inplace_op(self, other) + + def __ipow__(self, other): + return dpctl.tensor.pow._inplace_op(self, other) + + def __irshift__(self, other): + return dpctl.tensor.bitwise_right_shift._inplace_op(self, other) + + def __isub__(self, other): + return dpctl.tensor.subtract._inplace_op(self, other) + + def __itruediv__(self, other): + return dpctl.tensor.divide._inplace_op(self, other) + + def __ixor__(self, other): + return dpctl.tensor.bitwise_xor._inplace_op(self, other) + + def __str__(self): + return usm_ndarray_str(self) + + def __repr__(self): + return usm_ndarray_repr(self) + + def __array__(self, dtype=None, /, *, copy=None): + """NumPy's array protocol method to disallow implicit conversion. + + Without this definition, `numpy.asarray(usm_ar)` converts + usm_ndarray instance into NumPy array with data type `object` + and every element being 0d usm_ndarray. + + https://github.com/IntelPython/dpctl/pull/1384#issuecomment-1707212972 + """ + raise TypeError( + "Implicit conversion to a NumPy array is not allowed. " + "Use `dpctl.tensor.asnumpy` to copy data from this " + "`dpctl.tensor.usm_ndarray` instance to NumPy array" + ) + + +cdef usm_ndarray _real_view(usm_ndarray ary): + """ + View into real parts of a complex type array + """ + cdef int r_typenum_ = -1 + cdef usm_ndarray r = None + cdef Py_ssize_t offset_elems = 0 + + if (ary.typenum_ == UAR_CFLOAT): + r_typenum_ = UAR_FLOAT + elif (ary.typenum_ == UAR_CDOUBLE): + r_typenum_ = UAR_DOUBLE + else: + raise InternalUSMArrayError( + "_real_view call on array of non-complex type.") + + offset_elems = ary.get_offset() * 2 + r = usm_ndarray.__new__( + usm_ndarray, + _make_int_tuple(ary.nd_, ary.shape_) if ary.nd_ > 0 else tuple(), + dtype=_make_typestr(r_typenum_), + strides=tuple(2 * si for si in ary.strides), + buffer=ary.base_, + offset=offset_elems, + order=("C" if (ary.flags_ & USM_ARRAY_C_CONTIGUOUS) else "F") + ) + r.flags_ = _copy_writable(r.flags_, ary.flags_) + r.array_namespace_ = ary.array_namespace_ + return r + + +cdef usm_ndarray _imag_view(usm_ndarray ary): + """ + View into imaginary parts of a complex type array + """ + cdef int r_typenum_ = -1 + cdef usm_ndarray r = None + cdef Py_ssize_t offset_elems = 0 + + if (ary.typenum_ == UAR_CFLOAT): + r_typenum_ = UAR_FLOAT + elif (ary.typenum_ == UAR_CDOUBLE): + r_typenum_ = UAR_DOUBLE + else: + raise InternalUSMArrayError( + "_imag_view call on array of non-complex type.") + + # displace pointer to imaginary part + offset_elems = 2 * ary.get_offset() + 1 + r = usm_ndarray.__new__( + usm_ndarray, + _make_int_tuple(ary.nd_, ary.shape_) if ary.nd_ > 0 else tuple(), + dtype=_make_typestr(r_typenum_), + strides=tuple(2 * si for si in ary.strides), + buffer=ary.base_, + offset=offset_elems, + order=("C" if (ary.flags_ & USM_ARRAY_C_CONTIGUOUS) else "F") + ) + r.flags_ = _copy_writable(r.flags_, ary.flags_) + r.array_namespace_ = ary.array_namespace_ + return r + + +cdef usm_ndarray _transpose(usm_ndarray ary): + """ + Construct transposed array without copying the data + """ + cdef usm_ndarray r = usm_ndarray.__new__( + usm_ndarray, + _make_reversed_int_tuple(ary.nd_, ary.shape_), + dtype=_make_typestr(ary.typenum_), + strides=( + _make_reversed_int_tuple(ary.nd_, ary.strides_) + if (ary.strides_) else None), + buffer=ary.base_, + order=("F" if (ary.flags_ & USM_ARRAY_C_CONTIGUOUS) else "C"), + offset=ary.get_offset() + ) + r.flags_ = _copy_writable(r.flags_, ary.flags_) + return r + + +cdef usm_ndarray _m_transpose(usm_ndarray ary): + """ + Construct matrix transposed array + """ + cdef usm_ndarray r = usm_ndarray.__new__( + usm_ndarray, + _swap_last_two(_make_int_tuple(ary.nd_, ary.shape_)), + dtype=_make_typestr(ary.typenum_), + strides=_swap_last_two(ary.strides), + buffer=ary.base_, + order=("F" if (ary.flags_ & USM_ARRAY_C_CONTIGUOUS) else "C"), + offset=ary.get_offset() + ) + r.flags_ = _copy_writable(r.flags_, ary.flags_) + return r + + +cdef usm_ndarray _zero_like(usm_ndarray ary): + """ + Make C-contiguous array of zero elements with same shape, + type, device, and sycl_queue as ary. + """ + cdef dt = _make_typestr(ary.typenum_) + cdef usm_ndarray r = usm_ndarray( + _make_int_tuple(ary.nd_, ary.shape_) if ary.nd_ > 0 else tuple(), + dtype=dt, + buffer=ary.base_.get_usm_type(), + buffer_ctor_kwargs={"queue": ary.get_sycl_queue()}, + ) + r.base_.memset() + return r + + +cdef api char* UsmNDArray_GetData(usm_ndarray arr): + """Get allocation pointer of zero index element of array """ + return arr.get_data() + + +cdef api int UsmNDArray_GetNDim(usm_ndarray arr): + """Get array rank: length of its shape""" + return arr.get_ndim() + + +cdef api Py_ssize_t* UsmNDArray_GetShape(usm_ndarray arr): + """Get host pointer to shape vector""" + return arr.get_shape() + + +cdef api Py_ssize_t* UsmNDArray_GetStrides(usm_ndarray arr): + """Get host pointer to strides vector""" + return arr.get_strides() + + +cdef api int UsmNDArray_GetTypenum(usm_ndarray arr): + """Get type number for data type of array elements""" + return arr.get_typenum() + + +cdef api int UsmNDArray_GetElementSize(usm_ndarray arr): + """Get array element size in bytes""" + return arr.get_itemsize() + + +cdef api int UsmNDArray_GetFlags(usm_ndarray arr): + """Get flags of array""" + return arr.get_flags() + + +cdef api c_dpctl.DPCTLSyclQueueRef UsmNDArray_GetQueueRef(usm_ndarray arr): + """Get DPCTLSyclQueueRef for queue associated with the array""" + return arr.get_queue_ref() + + +cdef api Py_ssize_t UsmNDArray_GetOffset(usm_ndarray arr): + """Get offset of zero-index array element from the beginning of the USM + allocation""" + return arr.get_offset() + + +cdef api object UsmNDArray_GetUSMData(usm_ndarray arr): + """Get USM data object underlying the array""" + return arr.get_base() + + +cdef api void UsmNDArray_SetWritableFlag(usm_ndarray arr, int flag): + """Set/unset USM_ARRAY_WRITABLE in the given array `arr`.""" + arr._set_writable_flag(flag) + + +cdef api object UsmNDArray_MakeSimpleFromMemory( + int nd, const Py_ssize_t *shape, int typenum, + c_dpmem._Memory mobj, Py_ssize_t offset, char order +): + """Create contiguous usm_ndarray. + + Args: + nd: number of dimensions (non-negative) + shape: array of nd non-negative array's sizes along each dimension + typenum: array elemental type number + ptr: pointer to the start of allocation + QRef: DPCTLSyclQueueRef associated with the allocation + offset: distance between element with zero multi-index and the + start of allocation + order: Memory layout of the array. Use 'C' for C-contiguous or + row-major layout; 'F' for F-contiguous or column-major layout + Returns: + Created usm_ndarray instance + """ + cdef object shape_tuple = _make_int_tuple(nd, shape) + cdef usm_ndarray arr = usm_ndarray( + shape_tuple, + dtype=_make_typestr(typenum), + buffer=mobj, + offset=offset, + order=(order) + ) + return arr + + +cdef api object UsmNDArray_MakeSimpleFromPtr( + size_t nelems, + int typenum, + c_dpctl.DPCTLSyclUSMRef ptr, + c_dpctl.DPCTLSyclQueueRef QRef, + object owner +): + """Create 1D contiguous usm_ndarray from pointer. + + Args: + nelems: number of elements in array + typenum: array elemental type number + ptr: pointer to the start of allocation + QRef: DPCTLSyclQueueRef associated with the allocation + owner: Python object managing lifetime of USM allocation. + Value None implies transfer of USM allocation ownership + to the created array object. + Returns: + Created usm_ndarray instance + """ + cdef int itemsize = type_bytesize(typenum) + if (itemsize < 1): + raise ValueError( + "dtype with typenum=" + str(typenum) + " is not supported." + ) + cdef size_t nbytes = ( itemsize) * nelems + cdef c_dpmem._Memory mobj + mobj = c_dpmem._Memory.create_from_usm_pointer_size_qref( + ptr, nbytes, QRef, memory_owner=owner + ) + cdef usm_ndarray arr = usm_ndarray( + (nelems,), + dtype=_make_typestr(typenum), + buffer=mobj + ) + return arr + +cdef api object UsmNDArray_MakeFromPtr( + int nd, + const Py_ssize_t *shape, + int typenum, + const Py_ssize_t *strides, + c_dpctl.DPCTLSyclUSMRef ptr, + c_dpctl.DPCTLSyclQueueRef QRef, + Py_ssize_t offset, + object owner +): + """ + General usm_ndarray constructor from externally made USM-allocation. + + Args: + nd: number of dimensions (non-negative) + shape: array of nd non-negative array's sizes along each dimension + typenum: array elemental type number + strides: array of nd strides along each dimension in elements + ptr: pointer to the start of allocation + QRef: DPCTLSyclQueueRef associated with the allocation + offset: distance between element with zero multi-index and the + start of allocation + owner: Python object managing lifetime of USM allocation. + Value None implies transfer of USM allocation ownership + to the created array object. + Returns: + Created usm_ndarray instance + """ + cdef int itemsize = type_bytesize(typenum) + cdef size_t nelems = 1 + cdef Py_ssize_t min_disp = 0 + cdef Py_ssize_t max_disp = 0 + cdef Py_ssize_t step_ = 0 + cdef Py_ssize_t dim_ = 0 + cdef it = 0 + cdef c_dpmem._Memory mobj + cdef usm_ndarray arr + cdef object obj_shape + cdef object obj_strides + + if (itemsize < 1): + raise ValueError( + "dtype with typenum=" + str(typenum) + " is not supported." + ) + if (nd < 0): + raise ValueError("Dimensionality must be non-negative") + if (ptr is NULL or QRef is NULL): + raise ValueError( + "Non-null USM allocation pointer and QRef are expected" + ) + if (nd == 0): + # case of 0d scalars + mobj = c_dpmem._Memory.create_from_usm_pointer_size_qref( + ptr, itemsize, QRef, memory_owner=owner + ) + arr = usm_ndarray( + tuple(), + dtype=_make_typestr(typenum), + buffer=mobj + ) + return arr + if (shape is NULL or strides is NULL): + raise ValueError("Both shape and stride vectors are required") + for it in range(nd): + dim_ = shape[it] + if dim_ < 0: + raise ValueError( + f"Dimension along axis {it} must be non-negative" + ) + nelems *= dim_ + if dim_ > 0: + step_ = strides[it] + if step_ > 0: + max_disp += step_ * (dim_ - 1) + else: + min_disp += step_ * (dim_ - 1) + + obj_shape = _make_int_tuple(nd, shape) + obj_strides = _make_int_tuple(nd, strides) + if nelems == 0: + mobj = c_dpmem._Memory.create_from_usm_pointer_size_qref( + ptr, itemsize, QRef, memory_owner=owner + ) + arr = usm_ndarray( + obj_shape, + dtype=_make_typestr(typenum), + strides=obj_strides, + buffer=mobj, + offset=0 + ) + return arr + if offset + min_disp < 0: + raise ValueError( + "Given shape, strides and offset reference out-of-bound memory" + ) + nbytes = ( itemsize) * (offset + max_disp + 1) + mobj = c_dpmem._Memory.create_from_usm_pointer_size_qref( + ptr, nbytes, QRef, memory_owner=owner + ) + arr = usm_ndarray( + obj_shape, + dtype=_make_typestr(typenum), + strides=obj_strides, + buffer=mobj, + offset=offset + ) + return arr + + +def _is_object_with_buffer_protocol(o): + "Returns True if object supports Python buffer protocol" + return _is_buffer(o) From e1148088f9122fbe4c1bf3874deb4af5e423c722 Mon Sep 17 00:00:00 2001 From: Vladislav Perevezentsev Date: Thu, 5 Mar 2026 16:50:55 -0800 Subject: [PATCH 12/24] Fix import _flags and _dlpack in _usmarray.pyx --- dpctl_ext/tensor/_usmarray.pyx | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dpctl_ext/tensor/_usmarray.pyx b/dpctl_ext/tensor/_usmarray.pyx index af681732c99..958ae3f3703 100644 --- a/dpctl_ext/tensor/_usmarray.pyx +++ b/dpctl_ext/tensor/_usmarray.pyx @@ -46,11 +46,11 @@ cimport dpctl.memory as c_dpmem from cpython.mem cimport PyMem_Free from cpython.tuple cimport PyTuple_New, PyTuple_SetItem -cimport ._dlpack as c_dlpack +from . cimport _dlpack as c_dlpack from enum import IntEnum -import ._flags as _flags +from . import _flags from ._dlpack import get_build_dlpack_version from ._tensor_impl import default_device_fp_type From 39c05718183e7b1f97ecc7719b807a68d591e013 Mon Sep 17 00:00:00 2001 From: Vladislav Perevezentsev Date: Thu, 5 Mar 2026 16:51:44 -0800 Subject: [PATCH 13/24] Update CMakes files to build usm_ndarray --- dpctl_ext/CMakeLists.txt | 83 ++++++++++++++++++++++++++++++++- dpctl_ext/tensor/CMakeLists.txt | 8 ++++ 2 files changed, 90 insertions(+), 1 deletion(-) diff --git a/dpctl_ext/CMakeLists.txt b/dpctl_ext/CMakeLists.txt index e5869309142..a5524e8bb3d 100644 --- a/dpctl_ext/CMakeLists.txt +++ b/dpctl_ext/CMakeLists.txt @@ -112,8 +112,89 @@ else() endif() # at build time create include/ directory and copy header files over -# set(DPCTL_INCLUDE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/include) +set(DPCTL_INCLUDE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/include) set(CMAKE_INSTALL_RPATH "$ORIGIN") +function(build_dpctl_ext _trgt _src _dest) + set(options SYCL) + cmake_parse_arguments(BUILD_DPCTL_EXT "${options}" "RELATIVE_PATH" "" ${ARGN}) + add_cython_target(${_trgt} ${_src} CXX OUTPUT_VAR _generated_src) + set(_cythonize_trgt "${_trgt}_cythonize_pyx") + python_add_library(${_trgt} MODULE WITH_SOABI ${_generated_src}) + if(BUILD_DPCTL_EXT_SYCL) + add_sycl_to_target(TARGET ${_trgt} SOURCES ${_generated_src}) + target_compile_options(${_trgt} PRIVATE -fno-sycl-id-queries-fit-in-int) + target_link_options(${_trgt} PRIVATE -fsycl-device-code-split=per_kernel) + if(DPCTL_OFFLOAD_COMPRESS) + target_link_options(${_trgt} PRIVATE --offload-compress) + endif() + if(_dpctl_sycl_targets) + # make fat binary + target_compile_options( + ${_trgt} + PRIVATE ${_dpctl_sycl_target_compile_options} + ) + target_link_options(${_trgt} PRIVATE ${_dpctl_sycl_target_link_options}) + endif() + endif() + target_link_libraries(${_trgt} PRIVATE Python::NumPy) + if(DPCTL_GENERATE_COVERAGE) + target_compile_definitions(${_trgt} PRIVATE CYTHON_TRACE=1 CYTHON_TRACE_NOGIL=1) + if(BUILD_DPCTL_EXT_SYCL) + target_compile_options(${_trgt} PRIVATE -fno-sycl-use-footer) + endif() + endif() + # Dpctl + target_include_directories(${_trgt} PRIVATE ${Dpctl_INCLUDE_DIR}) + target_link_directories(${_trgt} PRIVATE ${Dpctl_INCLUDE_DIR}/..) + target_link_libraries(${_trgt} PRIVATE DPCTLSyclInterface) + set(_linker_options "LINKER:${DPCTL_LDFLAGS}") + target_link_options(${_trgt} PRIVATE ${_linker_options}) + get_filename_component(_name_wle ${_generated_src} NAME_WLE) + get_filename_component(_generated_src_dir ${_generated_src} DIRECTORY) + set(_generated_public_h "${_generated_src_dir}/${_name_wle}.h") + set(_generated_api_h "${_generated_src_dir}/${_name_wle}_api.h") + + # TODO: create separate folder inside build folder that contains only + # headers related to this target and appropriate folder structure to + # eliminate shadow dependencies + get_filename_component(_generated_src_dir_dir ${_generated_src_dir} DIRECTORY) + # TODO: do not set directory if we did not generate header + target_include_directories(${_trgt} INTERFACE ${_generated_src_dir_dir}) + set(_rpath_value "$ORIGIN") + if(BUILD_DPCTL_EXT_RELATIVE_PATH) + set(_rpath_value "${_rpath_value}/${BUILD_DPCTL_EXT_RELATIVE_PATH}") + endif() + if(DPCTL_WITH_REDIST) + set(_rpath_value "${_rpath_value}:${_rpath_value}/../../..") + endif() + set_target_properties(${_trgt} PROPERTIES INSTALL_RPATH ${_rpath_value}) + + install(TARGETS ${_trgt} LIBRARY DESTINATION ${_dest}) + install( + FILES ${_generated_api_h} + DESTINATION ${CMAKE_INSTALL_PREFIX}/dpctl_ext/include/${_dest} + OPTIONAL + ) + install( + FILES ${_generated_public_h} + DESTINATION ${CMAKE_INSTALL_PREFIX}/dpctl_ext/include/${_dest} + OPTIONAL + ) + if(DPCTL_GENERATE_COVERAGE) + get_filename_component(_original_src_dir ${_src} DIRECTORY) + file(RELATIVE_PATH _rel_dir ${CMAKE_SOURCE_DIR} ${_original_src_dir}) + install(FILES ${_generated_src} DESTINATION ${CMAKE_INSTALL_PREFIX}/${_rel_dir}) + endif() + + # Create target with headers only, because python is managing all the + # library imports at runtime + set(_trgt_headers ${_trgt}_headers) + add_library(${_trgt_headers} INTERFACE) + add_dependencies(${_trgt_headers} ${_trgt}) + get_target_property(_trgt_headers_dir ${_trgt} INTERFACE_INCLUDE_DIRECTORIES) + target_include_directories(${_trgt_headers} INTERFACE ${_trgt_headers_dir}) +endfunction() + add_subdirectory(tensor) diff --git a/dpctl_ext/tensor/CMakeLists.txt b/dpctl_ext/tensor/CMakeLists.txt index 6f286a8d719..16d6cfa7e13 100644 --- a/dpctl_ext/tensor/CMakeLists.txt +++ b/dpctl_ext/tensor/CMakeLists.txt @@ -29,6 +29,14 @@ find_package(Python COMPONENTS Development.Module) +file(GLOB _cython_sources *.pyx) +foreach(_cy_file ${_cython_sources}) + get_filename_component(_trgt ${_cy_file} NAME_WLE) + build_dpctl_ext(${_trgt} ${_cy_file} "dpctl_ext/tensor" RELATIVE_PATH "..") + target_include_directories(${_trgt} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include) + # target_link_libraries(DpctlCAPI INTERFACE ${_trgt}_headers) +endforeach() + if(WIN32) if(${CMAKE_VERSION} VERSION_LESS "3.27") # this is a work-around for target_link_options inserting option after -link option, cause From 3c428a62eaf344b5570883d1166395f600802275 Mon Sep 17 00:00:00 2001 From: Vladislav Perevezentsev Date: Thu, 5 Mar 2026 17:15:47 -0800 Subject: [PATCH 14/24] Switch fully to dpctl_ext.tensor in dpctl_ext.tensor --- dpctl_ext/tensor/_accumulation.py | 25 ++-- dpctl_ext/tensor/_clip.py | 81 ++++++------ dpctl_ext/tensor/_copy_utils.py | 71 +++++----- dpctl_ext/tensor/_ctors.py | 53 ++++---- dpctl_ext/tensor/_elementwise_common.py | 67 +++++----- dpctl_ext/tensor/_indexing_functions.py | 19 ++- dpctl_ext/tensor/_linear_algebra_functions.py | 2 +- dpctl_ext/tensor/_manipulation_functions.py | 61 +++++---- dpctl_ext/tensor/_reduction.py | 41 +++--- dpctl_ext/tensor/_reshape.py | 5 +- dpctl_ext/tensor/_scalar_utils.py | 9 +- dpctl_ext/tensor/_search_functions.py | 29 ++--- dpctl_ext/tensor/_searchsorted.py | 8 +- dpctl_ext/tensor/_set_functions.py | 121 +++++++++--------- dpctl_ext/tensor/_sorting.py | 47 ++++--- dpctl_ext/tensor/_statistical_functions.py | 39 +++--- dpctl_ext/tensor/_testing.py | 81 ++++++------ dpctl_ext/tensor/_type_utils.py | 13 +- dpctl_ext/tensor/_utility_functions.py | 39 +++--- 19 files changed, 387 insertions(+), 424 deletions(-) diff --git a/dpctl_ext/tensor/_accumulation.py b/dpctl_ext/tensor/_accumulation.py index 2dfe9656e19..8628628f3bf 100644 --- a/dpctl_ext/tensor/_accumulation.py +++ b/dpctl_ext/tensor/_accumulation.py @@ -27,12 +27,11 @@ # ***************************************************************************** import dpctl -import dpctl.tensor as dpt from dpctl.utils import ExecutionPlacementError, SequentialOrderManager # TODO: revert to `import dpctl.tensor...` # when dpnp fully migrates dpctl/tensor -import dpctl_ext.tensor as dpt_ext +import dpctl_ext.tensor as dpt import dpctl_ext.tensor._tensor_accumulation_impl as tai import dpctl_ext.tensor._tensor_impl as ti @@ -82,7 +81,7 @@ def _accumulate_common( perm = [i for i in range(nd) if i != axis] + [ axis, ] - arr = dpt_ext.permute_dims(x, perm) + arr = dpt.permute_dims(x, perm) q = x.sycl_queue inp_dt = x.dtype res_usm_type = x.usm_type @@ -130,16 +129,16 @@ def _accumulate_common( ) # permute out array dims if necessary if a1 != nd: - out = dpt_ext.permute_dims(out, perm) + out = dpt.permute_dims(out, perm) orig_out = out if ti._array_overlap(x, out) and implemented_types: - out = dpt_ext.empty_like(out) + out = dpt.empty_like(out) else: - out = dpt_ext.empty( + out = dpt.empty( res_sh, dtype=res_dt, usm_type=res_usm_type, sycl_queue=q ) if a1 != nd: - out = dpt_ext.permute_dims(out, perm) + out = dpt.permute_dims(out, perm) _manager = SequentialOrderManager[q] depends = _manager.submitted_events @@ -166,7 +165,7 @@ def _accumulate_common( out = orig_out else: if _dtype_supported(res_dt, res_dt): - tmp = dpt_ext.empty( + tmp = dpt.empty( arr.shape, dtype=res_dt, usm_type=res_usm_type, sycl_queue=q ) ht_e_cpy, cpy_e = ti._copy_usm_ndarray_into_usm_ndarray( @@ -191,18 +190,18 @@ def _accumulate_common( _manager.add_event_pair(ht_e, acc_ev) else: buf_dt = _default_accumulation_type_fn(inp_dt, q) - tmp = dpt_ext.empty( + tmp = dpt.empty( arr.shape, dtype=buf_dt, usm_type=res_usm_type, sycl_queue=q ) ht_e_cpy, cpy_e = ti._copy_usm_ndarray_into_usm_ndarray( src=arr, dst=tmp, sycl_queue=q, depends=depends ) _manager.add_event_pair(ht_e_cpy, cpy_e) - tmp_res = dpt_ext.empty( + tmp_res = dpt.empty( res_sh, dtype=buf_dt, usm_type=res_usm_type, sycl_queue=q ) if a1 != nd: - tmp_res = dpt_ext.permute_dims(tmp_res, perm) + tmp_res = dpt.permute_dims(tmp_res, perm) if not include_initial: ht_e, acc_ev = _accumulate_fn( src=tmp, @@ -225,10 +224,10 @@ def _accumulate_common( _manager.add_event_pair(ht_e_cpy2, cpy_e2) if appended_axis: - out = dpt_ext.squeeze(out) + out = dpt.squeeze(out) if a1 != nd: inv_perm = sorted(range(nd), key=lambda d: perm[d]) - out = dpt_ext.permute_dims(out, inv_perm) + out = dpt.permute_dims(out, inv_perm) return out diff --git a/dpctl_ext/tensor/_clip.py b/dpctl_ext/tensor/_clip.py index c21d601966b..8071f13bee1 100644 --- a/dpctl_ext/tensor/_clip.py +++ b/dpctl_ext/tensor/_clip.py @@ -27,12 +27,11 @@ # ***************************************************************************** import dpctl -import dpctl.tensor as dpt from dpctl.utils import ExecutionPlacementError, SequentialOrderManager # TODO: revert to `import dpctl.tensor...` # when dpnp fully migrates dpctl/tensor -import dpctl_ext.tensor as dpt_ext +import dpctl_ext.tensor as dpt import dpctl_ext.tensor._tensor_elementwise_impl as tei import dpctl_ext.tensor._tensor_impl as ti @@ -163,7 +162,7 @@ def _clip_none(x, val, out, order, _binary_fn): if ti._array_overlap(x, out): if not ti._same_logical_tensors(x, out): - out = dpt_ext.empty_like(out) + out = dpt.empty_like(out) if isinstance(val, dpt.usm_ndarray): if ( @@ -171,12 +170,12 @@ def _clip_none(x, val, out, order, _binary_fn): and not ti._same_logical_tensors(val, out) and val_dtype == res_dt ): - out = dpt_ext.empty_like(out) + out = dpt.empty_like(out) if isinstance(val, dpt.usm_ndarray): val_ary = val else: - val_ary = dpt_ext.asarray(val, dtype=val_dtype, sycl_queue=exec_q) + val_ary = dpt.asarray(val, dtype=val_dtype, sycl_queue=exec_q) if order == "A": order = ( @@ -197,7 +196,7 @@ def _clip_none(x, val, out, order, _binary_fn): x, val_ary, res_dt, res_shape, res_usm_type, exec_q ) else: - out = dpt_ext.empty( + out = dpt.empty( res_shape, dtype=res_dt, usm_type=res_usm_type, @@ -205,9 +204,9 @@ def _clip_none(x, val, out, order, _binary_fn): order=order, ) if x_shape != res_shape: - x = dpt_ext.broadcast_to(x, res_shape) + x = dpt.broadcast_to(x, res_shape) if val_ary.shape != res_shape: - val_ary = dpt_ext.broadcast_to(val_ary, res_shape) + val_ary = dpt.broadcast_to(val_ary, res_shape) _manager = SequentialOrderManager[exec_q] dep_evs = _manager.submitted_events ht_binary_ev, binary_ev = _binary_fn( @@ -229,7 +228,7 @@ def _clip_none(x, val, out, order, _binary_fn): if order == "K": buf = _empty_like_orderK(val_ary, res_dt) else: - buf = dpt_ext.empty_like(val_ary, dtype=res_dt, order=order) + buf = dpt.empty_like(val_ary, dtype=res_dt, order=order) _manager = SequentialOrderManager[exec_q] dep_evs = _manager.submitted_events ht_copy_ev, copy_ev = ti._copy_usm_ndarray_into_usm_ndarray( @@ -242,7 +241,7 @@ def _clip_none(x, val, out, order, _binary_fn): x, buf, res_dt, res_shape, res_usm_type, exec_q ) else: - out = dpt_ext.empty( + out = dpt.empty( res_shape, dtype=res_dt, usm_type=res_usm_type, @@ -251,8 +250,8 @@ def _clip_none(x, val, out, order, _binary_fn): ) if x_shape != res_shape: - x = dpt_ext.broadcast_to(x, res_shape) - buf = dpt_ext.broadcast_to(buf, res_shape) + x = dpt.broadcast_to(x, res_shape) + buf = dpt.broadcast_to(buf, res_shape) ht_binary_ev, binary_ev = _binary_fn( src1=x, src2=buf, @@ -313,9 +312,9 @@ def clip(x, /, min=None, max=None, out=None, order="K"): if order not in ["K", "C", "F", "A"]: order = "K" if x.dtype.kind in "iu": - if isinstance(min, int) and min <= dpt_ext.iinfo(x.dtype).min: + if isinstance(min, int) and min <= dpt.iinfo(x.dtype).min: min = None - if isinstance(max, int) and max >= dpt_ext.iinfo(x.dtype).max: + if isinstance(max, int) and max >= dpt.iinfo(x.dtype).max: max = None if min is None and max is None: exec_q = x.sycl_queue @@ -353,14 +352,14 @@ def clip(x, /, min=None, max=None, out=None, order="K"): if ti._array_overlap(x, out): if not ti._same_logical_tensors(x, out): - out = dpt_ext.empty_like(out) + out = dpt.empty_like(out) else: return out else: if order == "K": out = _empty_like_orderK(x, x.dtype) else: - out = dpt_ext.empty_like(x, order=order) + out = dpt.empty_like(x, order=order) _manager = SequentialOrderManager[exec_q] dep_evs = _manager.submitted_events @@ -519,7 +518,7 @@ def clip(x, /, min=None, max=None, out=None, order="K"): if ti._array_overlap(x, out): if not ti._same_logical_tensors(x, out): - out = dpt_ext.empty_like(out) + out = dpt.empty_like(out) if isinstance(min, dpt.usm_ndarray): if ( @@ -527,7 +526,7 @@ def clip(x, /, min=None, max=None, out=None, order="K"): and not ti._same_logical_tensors(min, out) and buf1_dt is None ): - out = dpt_ext.empty_like(out) + out = dpt.empty_like(out) if isinstance(max, dpt.usm_ndarray): if ( @@ -535,16 +534,16 @@ def clip(x, /, min=None, max=None, out=None, order="K"): and not ti._same_logical_tensors(max, out) and buf2_dt is None ): - out = dpt_ext.empty_like(out) + out = dpt.empty_like(out) if isinstance(min, dpt.usm_ndarray): a_min = min else: - a_min = dpt_ext.asarray(min, dtype=min_dtype, sycl_queue=exec_q) + a_min = dpt.asarray(min, dtype=min_dtype, sycl_queue=exec_q) if isinstance(max, dpt.usm_ndarray): a_max = max else: - a_max = dpt_ext.asarray(max, dtype=max_dtype, sycl_queue=exec_q) + a_max = dpt.asarray(max, dtype=max_dtype, sycl_queue=exec_q) if order == "A": order = ( @@ -572,7 +571,7 @@ def clip(x, /, min=None, max=None, out=None, order="K"): exec_q, ) else: - out = dpt_ext.empty( + out = dpt.empty( res_shape, dtype=res_dt, usm_type=res_usm_type, @@ -580,11 +579,11 @@ def clip(x, /, min=None, max=None, out=None, order="K"): order=order, ) if x_shape != res_shape: - x = dpt_ext.broadcast_to(x, res_shape) + x = dpt.broadcast_to(x, res_shape) if a_min.shape != res_shape: - a_min = dpt_ext.broadcast_to(a_min, res_shape) + a_min = dpt.broadcast_to(a_min, res_shape) if a_max.shape != res_shape: - a_max = dpt_ext.broadcast_to(a_max, res_shape) + a_max = dpt.broadcast_to(a_max, res_shape) _manager = SequentialOrderManager[exec_q] dep_ev = _manager.submitted_events ht_binary_ev, binary_ev = ti._clip( @@ -612,7 +611,7 @@ def clip(x, /, min=None, max=None, out=None, order="K"): if order == "K": buf2 = _empty_like_orderK(a_max, buf2_dt) else: - buf2 = dpt_ext.empty_like(a_max, dtype=buf2_dt, order=order) + buf2 = dpt.empty_like(a_max, dtype=buf2_dt, order=order) _manager = SequentialOrderManager[exec_q] dep_ev = _manager.submitted_events ht_copy_ev, copy_ev = ti._copy_usm_ndarray_into_usm_ndarray( @@ -631,7 +630,7 @@ def clip(x, /, min=None, max=None, out=None, order="K"): exec_q, ) else: - out = dpt_ext.empty( + out = dpt.empty( res_shape, dtype=res_dt, usm_type=res_usm_type, @@ -639,10 +638,10 @@ def clip(x, /, min=None, max=None, out=None, order="K"): order=order, ) - x = dpt_ext.broadcast_to(x, res_shape) + x = dpt.broadcast_to(x, res_shape) if a_min.shape != res_shape: - a_min = dpt_ext.broadcast_to(a_min, res_shape) - buf2 = dpt_ext.broadcast_to(buf2, res_shape) + a_min = dpt.broadcast_to(a_min, res_shape) + buf2 = dpt.broadcast_to(buf2, res_shape) ht_binary_ev, binary_ev = ti._clip( src=x, min=a_min, @@ -668,7 +667,7 @@ def clip(x, /, min=None, max=None, out=None, order="K"): if order == "K": buf1 = _empty_like_orderK(a_min, buf1_dt) else: - buf1 = dpt_ext.empty_like(a_min, dtype=buf1_dt, order=order) + buf1 = dpt.empty_like(a_min, dtype=buf1_dt, order=order) _manager = SequentialOrderManager[exec_q] dep_ev = _manager.submitted_events ht_copy_ev, copy_ev = ti._copy_usm_ndarray_into_usm_ndarray( @@ -687,7 +686,7 @@ def clip(x, /, min=None, max=None, out=None, order="K"): exec_q, ) else: - out = dpt_ext.empty( + out = dpt.empty( res_shape, dtype=res_dt, usm_type=res_usm_type, @@ -695,10 +694,10 @@ def clip(x, /, min=None, max=None, out=None, order="K"): order=order, ) - x = dpt_ext.broadcast_to(x, res_shape) - buf1 = dpt_ext.broadcast_to(buf1, res_shape) + x = dpt.broadcast_to(x, res_shape) + buf1 = dpt.broadcast_to(buf1, res_shape) if a_max.shape != res_shape: - a_max = dpt_ext.broadcast_to(a_max, res_shape) + a_max = dpt.broadcast_to(a_max, res_shape) ht_binary_ev, binary_ev = ti._clip( src=x, min=buf1, @@ -736,7 +735,7 @@ def clip(x, /, min=None, max=None, out=None, order="K"): if order == "K": buf1 = _empty_like_orderK(a_min, buf1_dt) else: - buf1 = dpt_ext.empty_like(a_min, dtype=buf1_dt, order=order) + buf1 = dpt.empty_like(a_min, dtype=buf1_dt, order=order) _manager = SequentialOrderManager[exec_q] dep_evs = _manager.submitted_events @@ -747,7 +746,7 @@ def clip(x, /, min=None, max=None, out=None, order="K"): if order == "K": buf2 = _empty_like_orderK(a_max, buf2_dt) else: - buf2 = dpt_ext.empty_like(a_max, dtype=buf2_dt, order=order) + buf2 = dpt.empty_like(a_max, dtype=buf2_dt, order=order) ht_copy2_ev, copy2_ev = ti._copy_usm_ndarray_into_usm_ndarray( src=a_max, dst=buf2, sycl_queue=exec_q, depends=dep_evs ) @@ -758,7 +757,7 @@ def clip(x, /, min=None, max=None, out=None, order="K"): x, buf1, buf2, res_dt, res_shape, res_usm_type, exec_q ) else: - out = dpt_ext.empty( + out = dpt.empty( res_shape, dtype=res_dt, usm_type=res_usm_type, @@ -766,9 +765,9 @@ def clip(x, /, min=None, max=None, out=None, order="K"): order=order, ) - x = dpt_ext.broadcast_to(x, res_shape) - buf1 = dpt_ext.broadcast_to(buf1, res_shape) - buf2 = dpt_ext.broadcast_to(buf2, res_shape) + x = dpt.broadcast_to(x, res_shape) + buf1 = dpt.broadcast_to(buf1, res_shape) + buf2 = dpt.broadcast_to(buf2, res_shape) ht_, clip_ev = ti._clip( src=x, min=buf1, diff --git a/dpctl_ext/tensor/_copy_utils.py b/dpctl_ext/tensor/_copy_utils.py index 37879997b78..44fbfc404cf 100644 --- a/dpctl_ext/tensor/_copy_utils.py +++ b/dpctl_ext/tensor/_copy_utils.py @@ -32,17 +32,16 @@ import dpctl import dpctl.memory as dpm -import dpctl.tensor as dpt import dpctl.utils import numpy as np -from dpctl.tensor._data_types import _get_dtype -from dpctl.tensor._device import normalize_queue_device # TODO: revert to `import dpctl.tensor...` # when dpnp fully migrates dpctl/tensor -import dpctl_ext.tensor as dpt_ext +import dpctl_ext.tensor as dpt import dpctl_ext.tensor._tensor_impl as ti +from ._data_types import _get_dtype +from ._device import normalize_queue_device from ._numpy_helper import normalize_axis_index from ._type_utils import _dtype_supported_by_device_impl @@ -91,7 +90,7 @@ def _copy_from_numpy(np_ary, usm_type="device", sycl_queue=None): ) else: Xusm_dtype = dt - Xusm = dpt_ext.empty( + Xusm = dpt.empty( Xnp.shape, dtype=Xusm_dtype, usm_type=usm_type, sycl_queue=sycl_queue ) _copy_from_numpy_into(Xusm, Xnp) @@ -159,7 +158,7 @@ def _extract_impl(ary, ary_mask, axis=0): elif isinstance(ary_mask, np.ndarray): dst_usm_type = ary.usm_type exec_q = ary.sycl_queue - ary_mask = dpt_ext.asarray( + ary_mask = dpt.asarray( ary_mask, usm_type=dst_usm_type, sycl_queue=exec_q ) else: @@ -176,7 +175,7 @@ def _extract_impl(ary, ary_mask, axis=0): ) mask_nelems = ary_mask.size cumsum_dt = dpt.int32 if mask_nelems < int32_t_max else dpt.int64 - cumsum = dpt_ext.empty(mask_nelems, dtype=cumsum_dt, device=ary_mask.device) + cumsum = dpt.empty(mask_nelems, dtype=cumsum_dt, device=ary_mask.device) exec_q = cumsum.sycl_queue _manager = dpctl.utils.SequentialOrderManager[exec_q] dep_evs = _manager.submitted_events @@ -184,7 +183,7 @@ def _extract_impl(ary, ary_mask, axis=0): ary_mask, cumsum, sycl_queue=exec_q, depends=dep_evs ) dst_shape = ary.shape[:pp] + (mask_count,) + ary.shape[pp + mask_nd :] - dst = dpt_ext.empty( + dst = dpt.empty( dst_shape, dtype=ary.dtype, usm_type=dst_usm_type, device=ary.device ) if dst.size == 0: @@ -247,7 +246,7 @@ def _nonzero_impl(ary): usm_type = ary.usm_type mask_nelems = ary.size cumsum_dt = dpt.int32 if mask_nelems < int32_t_max else dpt.int64 - cumsum = dpt_ext.empty( + cumsum = dpt.empty( mask_nelems, dtype=cumsum_dt, sycl_queue=exec_q, order="C" ) _manager = dpctl.utils.SequentialOrderManager[exec_q] @@ -256,7 +255,7 @@ def _nonzero_impl(ary): ary, cumsum, sycl_queue=exec_q, depends=dep_evs ) indexes_dt = ti.default_device_index_type(exec_q.sycl_device) - indexes = dpt_ext.empty( + indexes = dpt.empty( (ary.ndim, mask_count), dtype=indexes_dt, usm_type=usm_type, @@ -284,14 +283,14 @@ def _prepare_indices_arrays(inds, q, usm_type): lambda ind: ( ind if isinstance(ind, dpt.usm_ndarray) - else dpt_ext.asarray(ind, usm_type=usm_type, sycl_queue=q) + else dpt.asarray(ind, usm_type=usm_type, sycl_queue=q) ), inds, ) ) # promote to a common integral type if possible - ind_dt = dpt_ext.result_type(*inds) + ind_dt = dpt.result_type(*inds) if ind_dt.kind not in "ui": raise ValueError( "cannot safely promote indices to an integer data type" @@ -299,14 +298,14 @@ def _prepare_indices_arrays(inds, q, usm_type): inds = tuple( map( lambda ind: ( - ind if ind.dtype == ind_dt else dpt_ext.astype(ind, ind_dt) + ind if ind.dtype == ind_dt else dpt.astype(ind, ind_dt) ), inds, ) ) # broadcast - inds = dpt_ext.broadcast_arrays(*inds) + inds = dpt.broadcast_arrays(*inds) return inds @@ -332,7 +331,7 @@ def _put_multi_index(ary, inds, p, vals, mode=0): if exec_q is not None: if not isinstance(vals, dpt.usm_ndarray): - vals = dpt_ext.asarray( + vals = dpt.asarray( vals, dtype=ary.dtype, usm_type=coerced_usm_type, @@ -367,8 +366,8 @@ def _put_multi_index(ary, inds, p, vals, mode=0): if vals.dtype == ary.dtype: rhs = vals else: - rhs = dpt_ext.astype(vals, ary.dtype) - rhs = dpt_ext.broadcast_to(rhs, expected_vals_shape) + rhs = dpt.astype(vals, ary.dtype) + rhs = dpt.broadcast_to(rhs, expected_vals_shape) _manager = dpctl.utils.SequentialOrderManager[exec_q] dep_ev = _manager.submitted_events hev, put_ev = ti._put( @@ -418,7 +417,7 @@ def _take_multi_index(ary, inds, p, mode=0): if 0 in ary_sh[p:p_end] and ind0.size != 0: raise IndexError("cannot take non-empty indices from an empty axis") res_shape = ary_sh[:p] + ind0.shape + ary_sh[p_end:] - res = dpt_ext.empty( + res = dpt.empty( res_shape, dtype=ary.dtype, usm_type=res_usm_type, sycl_queue=exec_q ) _manager = dpctl.utils.SequentialOrderManager[exec_q] @@ -681,9 +680,7 @@ def _make_empty_like_orderK(x, dt, usm_type, dev): inv_perm = sorted(range(x.ndim), key=lambda i: perm[i]) sh = x.shape sh_sorted = tuple(sh[i] for i in perm) - R = dpt_ext.empty( - sh_sorted, dtype=dt, usm_type=usm_type, device=dev, order="C" - ) + R = dpt.empty(sh_sorted, dtype=dt, usm_type=usm_type, device=dev, order="C") if min(st) < 0: st_sorted = [st[i] for i in perm] sl = tuple( @@ -695,7 +692,7 @@ def _make_empty_like_orderK(x, dt, usm_type, dev): for i in range(x.ndim) ) R = R[sl] - return dpt_ext.permute_dims(R, inv_perm) + return dpt.permute_dims(R, inv_perm) def _empty_like_orderK(x, dt, usm_type=None, dev=None): @@ -714,11 +711,11 @@ def _empty_like_orderK(x, dt, usm_type=None, dev=None): dev = x.device fl = x.flags if fl["C"] or x.size <= 1: - return dpt_ext.empty_like( + return dpt.empty_like( x, dtype=dt, usm_type=usm_type, device=dev, order="C" ) elif fl["F"]: - return dpt_ext.empty_like( + return dpt.empty_like( x, dtype=dt, usm_type=usm_type, device=dev, order="F" ) return _make_empty_like_orderK(x, dt, usm_type, dev) @@ -736,11 +733,11 @@ def _from_numpy_empty_like_orderK(x, dt, usm_type, dev): raise TypeError(f"Expected numpy.ndarray, got {type(x)}") fl = x.flags if fl["C"] or x.size <= 1: - return dpt_ext.empty( + return dpt.empty( x.shape, dtype=dt, usm_type=usm_type, device=dev, order="C" ) elif fl["F"]: - return dpt_ext.empty( + return dpt.empty( x.shape, dtype=dt, usm_type=usm_type, device=dev, order="F" ) return _make_empty_like_orderK(x, dt, usm_type, dev) @@ -760,11 +757,11 @@ def _empty_like_pair_orderK(X1, X2, dt, res_shape, usm_type, dev): fl1 = X1.flags fl2 = X2.flags if fl1["C"] or fl2["C"]: - return dpt_ext.empty( + return dpt.empty( res_shape, dtype=dt, usm_type=usm_type, device=dev, order="C" ) if fl1["F"] and fl2["F"]: - return dpt_ext.empty( + return dpt.empty( res_shape, dtype=dt, usm_type=usm_type, device=dev, order="F" ) st1 = list(X1.strides) @@ -787,9 +784,7 @@ def _empty_like_pair_orderK(X1, X2, dt, res_shape, usm_type, dev): st2_sorted = [st2[i] for i in perm] sh = res_shape sh_sorted = tuple(sh[i] for i in perm) - R = dpt_ext.empty( - sh_sorted, dtype=dt, usm_type=usm_type, device=dev, order="C" - ) + R = dpt.empty(sh_sorted, dtype=dt, usm_type=usm_type, device=dev, order="C") if max(min(st1_sorted), min(st2_sorted)) < 0: sl = tuple( ( @@ -800,7 +795,7 @@ def _empty_like_pair_orderK(X1, X2, dt, res_shape, usm_type, dev): for i in range(nd1) ) R = R[sl] - return dpt_ext.permute_dims(R, inv_perm) + return dpt.permute_dims(R, inv_perm) def _empty_like_triple_orderK(X1, X2, X3, dt, res_shape, usm_type, dev): @@ -827,11 +822,11 @@ def _empty_like_triple_orderK(X1, X2, X3, dt, res_shape, usm_type, dev): fl2 = X2.flags fl3 = X3.flags if fl1["C"] or fl2["C"] or fl3["C"]: - return dpt_ext.empty( + return dpt.empty( res_shape, dtype=dt, usm_type=usm_type, device=dev, order="C" ) if fl1["F"] and fl2["F"] and fl3["F"]: - return dpt_ext.empty( + return dpt.empty( res_shape, dtype=dt, usm_type=usm_type, device=dev, order="F" ) st1 = list(X1.strides) @@ -859,9 +854,7 @@ def _empty_like_triple_orderK(X1, X2, X3, dt, res_shape, usm_type, dev): st3_sorted = [st3[i] for i in perm] sh = res_shape sh_sorted = tuple(sh[i] for i in perm) - R = dpt_ext.empty( - sh_sorted, dtype=dt, usm_type=usm_type, device=dev, order="C" - ) + R = dpt.empty(sh_sorted, dtype=dt, usm_type=usm_type, device=dev, order="C") if max(min(st1_sorted), min(st2_sorted), min(st3_sorted)) < 0: sl = tuple( ( @@ -876,7 +869,7 @@ def _empty_like_triple_orderK(X1, X2, X3, dt, res_shape, usm_type, dev): for i in range(nd1) ) R = R[sl] - return dpt_ext.permute_dims(R, inv_perm) + return dpt.permute_dims(R, inv_perm) def copy(usm_ary, /, *, order="K"): @@ -1019,7 +1012,7 @@ def astype( else: target_dtype = _get_dtype(newdtype, usm_ary.sycl_queue) - if not dpt_ext.can_cast(ary_dtype, target_dtype, casting=casting): + if not dpt.can_cast(ary_dtype, target_dtype, casting=casting): raise TypeError( f"Can not cast from {ary_dtype} to {newdtype} " f"according to rule {casting}." diff --git a/dpctl_ext/tensor/_ctors.py b/dpctl_ext/tensor/_ctors.py index 21c3d007718..d249efa8a60 100644 --- a/dpctl_ext/tensor/_ctors.py +++ b/dpctl_ext/tensor/_ctors.py @@ -31,17 +31,16 @@ import dpctl import dpctl.memory as dpm -import dpctl.tensor as dpt import dpctl.utils import numpy as np -from dpctl.tensor._data_types import _get_dtype -from dpctl.tensor._device import normalize_queue_device -from dpctl.tensor._usmarray import _is_object_with_buffer_protocol # TODO: revert to `import dpctl.tensor...` # when dpnp fully migrates dpctl/tensor -import dpctl_ext.tensor as dpt_ext +import dpctl_ext.tensor as dpt import dpctl_ext.tensor._tensor_impl as ti +from dpctl_ext.tensor._data_types import _get_dtype +from dpctl_ext.tensor._device import normalize_queue_device +from dpctl_ext.tensor._usmarray import _is_object_with_buffer_protocol from ._copy_utils import ( _empty_like_orderK, @@ -182,7 +181,7 @@ def _asarray_from_seq( if order in "KA": order = "C" if isinstance(exec_q, dpctl.SyclQueue): - res = dpt_ext.empty( + res = dpt.empty( seq_shape, dtype=dtype, usm_type=usm_type, @@ -193,7 +192,7 @@ def _asarray_from_seq( _device_copy_walker(seq_obj, res, _manager) return res else: - res = dpt_ext.empty( + res = dpt.empty( seq_shape, dtype=dtype, usm_type=usm_type, @@ -312,7 +311,7 @@ def _asarray_from_usm_ndarray( ) _manager.add_event_pair(hev, cpy_ev) else: - tmp = dpt_ext.asnumpy(usm_ndary) + tmp = dpt.asnumpy(usm_ndary) res[...] = tmp return res @@ -361,7 +360,7 @@ def _copy_through_host_walker(seq_o, usm_res): ) is None ): - usm_res[...] = dpt_ext.asnumpy(seq_o).copy() + usm_res[...] = dpt.asnumpy(seq_o).copy() return else: usm_res[...] = seq_o @@ -381,7 +380,7 @@ def _copy_through_host_walker(seq_o, usm_res): ) is None ): - usm_res[...] = dpt_ext.asnumpy(usm_ar).copy() + usm_res[...] = dpt.asnumpy(usm_ar).copy() else: usm_res[...] = usm_ar return @@ -1092,7 +1091,7 @@ def eye( n_cols = n_rows if n_cols is None else operator.index(n_cols) k = operator.index(k) if k >= n_cols or -k >= n_rows: - return dpt_ext.zeros( + return dpt.zeros( (n_rows, n_cols), dtype=dtype, order=order, @@ -1194,14 +1193,14 @@ def full( sycl_queue = normalize_queue_device( sycl_queue=sycl_queue, device=device ) - X = dpt_ext.asarray( + X = dpt.asarray( fill_value, dtype=dtype, order=order, usm_type=usm_type, sycl_queue=sycl_queue, ) - return dpt_ext.copy(dpt_ext.broadcast_to(X, shape), order=order) + return dpt.copy(dpt.broadcast_to(X, shape), order=order) else: _validate_fill_value(fill_value) @@ -1301,14 +1300,14 @@ def full_like( if order == "K": _ensure_native_dtype_device_support(dtype, sycl_queue.sycl_device) if isinstance(fill_value, (dpt.usm_ndarray, np.ndarray, tuple, list)): - X = dpt_ext.asarray( + X = dpt.asarray( fill_value, dtype=dtype, order=order, usm_type=usm_type, sycl_queue=sycl_queue, ) - X = dpt_ext.broadcast_to(X, sh) + X = dpt.broadcast_to(X, sh) res = _empty_like_orderK(x, dtype, usm_type, sycl_queue) _manager = dpctl.utils.SequentialOrderManager[sycl_queue] # order copy after tasks populating X @@ -1434,14 +1433,14 @@ def linspace( start = float(start) stop = float(stop) - res = dpt_ext.empty(num, dtype=dt, usm_type=usm_type, sycl_queue=sycl_queue) + res = dpt.empty(num, dtype=dt, usm_type=usm_type, sycl_queue=sycl_queue) _manager = dpctl.utils.SequentialOrderManager[sycl_queue] hev, la_ev = ti._linspace_affine( start, stop, dst=res, include_endpoint=endpoint, sycl_queue=sycl_queue ) _manager.add_event_pair(hev, la_ev) - return res if int_dt is None else dpt_ext.astype(res, int_dt) + return res if int_dt is None else dpt.astype(res, int_dt) def meshgrid(*arrays, indexing="xy"): @@ -1506,15 +1505,15 @@ def meshgrid(*arrays, indexing="xy"): res = [] if n > 1 and indexing == "xy": - res.append(dpt_ext.reshape(arrays[0], (1, -1) + sh[2:], copy=True)) - res.append(dpt_ext.reshape(arrays[1], sh, copy=True)) + res.append(dpt.reshape(arrays[0], (1, -1) + sh[2:], copy=True)) + res.append(dpt.reshape(arrays[1], sh, copy=True)) arrays, sh = arrays[2:], sh[-2:] + sh[:-2] for array in arrays: - res.append(dpt_ext.reshape(array, sh, copy=True)) + res.append(dpt.reshape(array, sh, copy=True)) sh = sh[-1:] + sh[:-1] - output = dpt_ext.broadcast_arrays(*res) + output = dpt.broadcast_arrays(*res) return output @@ -1707,7 +1706,7 @@ def tril(x, /, *, k=0): q = x.sycl_queue if k >= shape[nd - 1] - 1: - res = dpt_ext.empty( + res = dpt.empty( x.shape, dtype=x.dtype, order=order, @@ -1721,7 +1720,7 @@ def tril(x, /, *, k=0): ) _manager.add_event_pair(hev, cpy_ev) elif k < -shape[nd - 2]: - res = dpt_ext.zeros( + res = dpt.zeros( x.shape, dtype=x.dtype, order=order, @@ -1729,7 +1728,7 @@ def tril(x, /, *, k=0): sycl_queue=q, ) else: - res = dpt_ext.empty( + res = dpt.empty( x.shape, dtype=x.dtype, order=order, @@ -1785,7 +1784,7 @@ def triu(x, /, *, k=0): q = x.sycl_queue if k > shape[nd - 1]: - res = dpt_ext.zeros( + res = dpt.zeros( x.shape, dtype=x.dtype, order=order, @@ -1793,7 +1792,7 @@ def triu(x, /, *, k=0): sycl_queue=q, ) elif k <= -shape[nd - 2] + 1: - res = dpt_ext.empty( + res = dpt.empty( x.shape, dtype=x.dtype, order=order, @@ -1807,7 +1806,7 @@ def triu(x, /, *, k=0): ) _manager.add_event_pair(hev, cpy_ev) else: - res = dpt_ext.empty( + res = dpt.empty( x.shape, dtype=x.dtype, order=order, diff --git a/dpctl_ext/tensor/_elementwise_common.py b/dpctl_ext/tensor/_elementwise_common.py index 7fd9dabf961..ffe849db9ca 100644 --- a/dpctl_ext/tensor/_elementwise_common.py +++ b/dpctl_ext/tensor/_elementwise_common.py @@ -27,12 +27,11 @@ # ***************************************************************************** import dpctl -import dpctl.tensor as dpt from dpctl.utils import ExecutionPlacementError, SequentialOrderManager # TODO: revert to `import dpctl.tensor...` # when dpnp fully migrates dpctl/tensor -import dpctl_ext.tensor as dpt_ext +import dpctl_ext.tensor as dpt import dpctl_ext.tensor._tensor_impl as ti from ._copy_utils import _empty_like_orderK, _empty_like_pair_orderK @@ -233,7 +232,7 @@ def __call__(self, x, /, *, out=None, order="K"): # Allocate a temporary buffer to avoid memory overlapping. # Note if `buf_dt` is not None, a temporary copy of `x` will be # created, so the array overlap check isn't needed. - out = dpt_ext.empty_like(out) + out = dpt.empty_like(out) if ( dpctl.utils.get_execution_queue((x.sycl_queue, out.sycl_queue)) @@ -252,7 +251,7 @@ def __call__(self, x, /, *, out=None, order="K"): else: if order == "A": order = "F" if x.flags.f_contiguous else "C" - out = dpt_ext.empty_like(x, dtype=res_dt, order=order) + out = dpt.empty_like(x, dtype=res_dt, order=order) dep_evs = _manager.submitted_events ht_unary_ev, unary_ev = self.unary_fn_( @@ -275,7 +274,7 @@ def __call__(self, x, /, *, out=None, order="K"): else: if order == "A": order = "F" if x.flags.f_contiguous else "C" - buf = dpt_ext.empty_like(x, dtype=buf_dt, order=order) + buf = dpt.empty_like(x, dtype=buf_dt, order=order) dep_evs = _manager.submitted_events ht_copy_ev, copy_ev = ti._copy_usm_ndarray_into_usm_ndarray( @@ -286,7 +285,7 @@ def __call__(self, x, /, *, out=None, order="K"): if order == "K": out = _empty_like_orderK(buf, res_dt) else: - out = dpt_ext.empty_like(buf, dtype=res_dt, order=order) + out = dpt.empty_like(buf, dtype=res_dt, order=order) ht, uf_ev = self.unary_fn_( buf, out, sycl_queue=exec_q, depends=[copy_ev] @@ -597,7 +596,7 @@ def __call__(self, o1, o2, /, *, out=None, order="K"): if isinstance(o1, dpt.usm_ndarray): if ti._array_overlap(o1, out) and buf1_dt is None: if not ti._same_logical_tensors(o1, out): - out = dpt_ext.empty_like(out) + out = dpt.empty_like(out) elif self.binary_inplace_fn_ is not None: # if there is a dedicated in-place kernel # it can be called here, otherwise continues @@ -610,12 +609,12 @@ def __call__(self, o1, o2, /, *, out=None, order="K"): ): buf2_dt = o2_dtype else: - src2 = dpt_ext.asarray( + src2 = dpt.asarray( o2, dtype=o2_dtype, sycl_queue=exec_q ) if buf2_dt is None: if src2.shape != res_shape: - src2 = dpt_ext.broadcast_to(src2, res_shape) + src2 = dpt.broadcast_to(src2, res_shape) dep_evs = _manager.submitted_events ht_, comp_ev = self.binary_inplace_fn_( lhs=o1, @@ -625,7 +624,7 @@ def __call__(self, o1, o2, /, *, out=None, order="K"): ) _manager.add_event_pair(ht_, comp_ev) else: - buf2 = dpt_ext.empty_like(src2, dtype=buf2_dt) + buf2 = dpt.empty_like(src2, dtype=buf2_dt) dep_evs = _manager.submitted_events ( ht_copy_ev, @@ -638,7 +637,7 @@ def __call__(self, o1, o2, /, *, out=None, order="K"): ) _manager.add_event_pair(ht_copy_ev, copy_ev) - buf2 = dpt_ext.broadcast_to(buf2, res_shape) + buf2 = dpt.broadcast_to(buf2, res_shape) ht_, bf_ev = self.binary_inplace_fn_( lhs=o1, rhs=buf2, @@ -657,16 +656,16 @@ def __call__(self, o1, o2, /, *, out=None, order="K"): ): # should not reach if out is reallocated # after being checked against o1 - out = dpt_ext.empty_like(out) + out = dpt.empty_like(out) if isinstance(o1, dpt.usm_ndarray): src1 = o1 else: - src1 = dpt_ext.asarray(o1, dtype=o1_dtype, sycl_queue=exec_q) + src1 = dpt.asarray(o1, dtype=o1_dtype, sycl_queue=exec_q) if isinstance(o2, dpt.usm_ndarray): src2 = o2 else: - src2 = dpt_ext.asarray(o2, dtype=o2_dtype, sycl_queue=exec_q) + src2 = dpt.asarray(o2, dtype=o2_dtype, sycl_queue=exec_q) if order == "A": order = ( @@ -688,7 +687,7 @@ def __call__(self, o1, o2, /, *, out=None, order="K"): src1, src2, res_dt, res_shape, res_usm_type, exec_q ) else: - out = dpt_ext.empty( + out = dpt.empty( res_shape, dtype=res_dt, usm_type=res_usm_type, @@ -696,9 +695,9 @@ def __call__(self, o1, o2, /, *, out=None, order="K"): order=order, ) if src1.shape != res_shape: - src1 = dpt_ext.broadcast_to(src1, res_shape) + src1 = dpt.broadcast_to(src1, res_shape) if src2.shape != res_shape: - src2 = dpt_ext.broadcast_to(src2, res_shape) + src2 = dpt.broadcast_to(src2, res_shape) deps_ev = _manager.submitted_events ht_binary_ev, binary_ev = self.binary_fn_( src1=src1, @@ -723,7 +722,7 @@ def __call__(self, o1, o2, /, *, out=None, order="K"): if order == "K": buf2 = _empty_like_orderK(src2, buf2_dt) else: - buf2 = dpt_ext.empty_like(src2, dtype=buf2_dt, order=order) + buf2 = dpt.empty_like(src2, dtype=buf2_dt, order=order) dep_evs = _manager.submitted_events ht_copy_ev, copy_ev = ti._copy_usm_ndarray_into_usm_ndarray( src=src2, dst=buf2, sycl_queue=exec_q, depends=dep_evs @@ -735,7 +734,7 @@ def __call__(self, o1, o2, /, *, out=None, order="K"): src1, buf2, res_dt, res_shape, res_usm_type, exec_q ) else: - out = dpt_ext.empty( + out = dpt.empty( res_shape, dtype=res_dt, usm_type=res_usm_type, @@ -744,8 +743,8 @@ def __call__(self, o1, o2, /, *, out=None, order="K"): ) if src1.shape != res_shape: - src1 = dpt_ext.broadcast_to(src1, res_shape) - buf2 = dpt_ext.broadcast_to(buf2, res_shape) + src1 = dpt.broadcast_to(src1, res_shape) + buf2 = dpt.broadcast_to(buf2, res_shape) ht_binary_ev, binary_ev = self.binary_fn_( src1=src1, src2=buf2, @@ -769,7 +768,7 @@ def __call__(self, o1, o2, /, *, out=None, order="K"): if order == "K": buf1 = _empty_like_orderK(src1, buf1_dt) else: - buf1 = dpt_ext.empty_like(src1, dtype=buf1_dt, order=order) + buf1 = dpt.empty_like(src1, dtype=buf1_dt, order=order) dep_evs = _manager.submitted_events ht_copy_ev, copy_ev = ti._copy_usm_ndarray_into_usm_ndarray( src=src1, dst=buf1, sycl_queue=exec_q, depends=dep_evs @@ -781,7 +780,7 @@ def __call__(self, o1, o2, /, *, out=None, order="K"): buf1, src2, res_dt, res_shape, res_usm_type, exec_q ) else: - out = dpt_ext.empty( + out = dpt.empty( res_shape, dtype=res_dt, usm_type=res_usm_type, @@ -789,9 +788,9 @@ def __call__(self, o1, o2, /, *, out=None, order="K"): order=order, ) - buf1 = dpt_ext.broadcast_to(buf1, res_shape) + buf1 = dpt.broadcast_to(buf1, res_shape) if src2.shape != res_shape: - src2 = dpt_ext.broadcast_to(src2, res_shape) + src2 = dpt.broadcast_to(src2, res_shape) ht_binary_ev, binary_ev = self.binary_fn_( src1=buf1, src2=src2, @@ -820,7 +819,7 @@ def __call__(self, o1, o2, /, *, out=None, order="K"): if order == "K": buf1 = _empty_like_orderK(src1, buf1_dt) else: - buf1 = dpt_ext.empty_like(src1, dtype=buf1_dt, order=order) + buf1 = dpt.empty_like(src1, dtype=buf1_dt, order=order) dep_evs = _manager.submitted_events ht_copy1_ev, copy1_ev = ti._copy_usm_ndarray_into_usm_ndarray( src=src1, dst=buf1, sycl_queue=exec_q, depends=dep_evs @@ -829,7 +828,7 @@ def __call__(self, o1, o2, /, *, out=None, order="K"): if order == "K": buf2 = _empty_like_orderK(src2, buf2_dt) else: - buf2 = dpt_ext.empty_like(src2, dtype=buf2_dt, order=order) + buf2 = dpt.empty_like(src2, dtype=buf2_dt, order=order) ht_copy2_ev, copy2_ev = ti._copy_usm_ndarray_into_usm_ndarray( src=src2, dst=buf2, sycl_queue=exec_q, depends=dep_evs ) @@ -840,7 +839,7 @@ def __call__(self, o1, o2, /, *, out=None, order="K"): buf1, buf2, res_dt, res_shape, res_usm_type, exec_q ) else: - out = dpt_ext.empty( + out = dpt.empty( res_shape, dtype=res_dt, usm_type=res_usm_type, @@ -848,8 +847,8 @@ def __call__(self, o1, o2, /, *, out=None, order="K"): order=order, ) - buf1 = dpt_ext.broadcast_to(buf1, res_shape) - buf2 = dpt_ext.broadcast_to(buf2, res_shape) + buf1 = dpt.broadcast_to(buf1, res_shape) + buf2 = dpt.broadcast_to(buf2, res_shape) ht_, bf_ev = self.binary_fn_( src1=buf1, src2=buf2, @@ -960,10 +959,10 @@ def _inplace_op(self, o1, o2): ): buf_dt = o2_dtype else: - src2 = dpt_ext.asarray(o2, dtype=o2_dtype, sycl_queue=exec_q) + src2 = dpt.asarray(o2, dtype=o2_dtype, sycl_queue=exec_q) if buf_dt is None: if src2.shape != res_shape: - src2 = dpt_ext.broadcast_to(src2, res_shape) + src2 = dpt.broadcast_to(src2, res_shape) dep_evs = _manager.submitted_events ht_, comp_ev = self.binary_inplace_fn_( lhs=o1, @@ -973,7 +972,7 @@ def _inplace_op(self, o1, o2): ) _manager.add_event_pair(ht_, comp_ev) else: - buf = dpt_ext.empty_like(src2, dtype=buf_dt) + buf = dpt.empty_like(src2, dtype=buf_dt) dep_evs = _manager.submitted_events ( ht_copy_ev, @@ -986,7 +985,7 @@ def _inplace_op(self, o1, o2): ) _manager.add_event_pair(ht_copy_ev, copy_ev) - buf = dpt_ext.broadcast_to(buf, res_shape) + buf = dpt.broadcast_to(buf, res_shape) ht_, bf_ev = self.binary_inplace_fn_( lhs=o1, rhs=buf, diff --git a/dpctl_ext/tensor/_indexing_functions.py b/dpctl_ext/tensor/_indexing_functions.py index 5b4eb1aaf7a..08db81c1b16 100644 --- a/dpctl_ext/tensor/_indexing_functions.py +++ b/dpctl_ext/tensor/_indexing_functions.py @@ -29,12 +29,11 @@ import operator import dpctl -import dpctl.tensor as dpt import dpctl.utils # TODO: revert to `import dpctl.tensor...` # when dpnp fully migrates dpctl/tensor -import dpctl_ext.tensor as dpt_ext +import dpctl_ext.tensor as dpt import dpctl_ext.tensor._tensor_impl as ti from ._copy_utils import ( @@ -57,7 +56,7 @@ def _get_indexing_mode(name): def _range(sh_i, i, nd, q, usm_t, dt): - ind = dpt_ext.arange(sh_i, dtype=dt, usm_type=usm_t, sycl_queue=q) + ind = dpt.arange(sh_i, dtype=dt, usm_type=usm_t, sycl_queue=q) ind.shape = tuple(sh_i if i == j else 1 for j in range(nd)) return ind @@ -177,7 +176,7 @@ def place(arr, mask, vals): raise dpctl.utils.ExecutionPlacementError if arr.shape != mask.shape or vals.ndim != 1: raise ValueError("Array sizes are not as required") - cumsum = dpt_ext.empty(mask.size, dtype="i8", sycl_queue=exec_q) + cumsum = dpt.empty(mask.size, dtype="i8", sycl_queue=exec_q) _manager = dpctl.utils.SequentialOrderManager[exec_q] deps_ev = _manager.submitted_events nz_count = ti.mask_positions( @@ -190,7 +189,7 @@ def place(arr, mask, vals): if vals.dtype == arr.dtype: rhs = vals else: - rhs = dpt_ext.astype(vals, arr.dtype) + rhs = dpt.astype(vals, arr.dtype) hev, pl_ev = ti._place( dst=arr, cumsum=cumsum, @@ -329,7 +328,7 @@ def put_vec_duplicates(vec, ind, vals): val_shape = indices.shape if not isinstance(vals, dpt.usm_ndarray): - vals = dpt_ext.asarray( + vals = dpt.asarray( vals, dtype=x.dtype, usm_type=vals_usm_type, sycl_queue=exec_q ) # choose to throw here for consistency with `place` @@ -340,8 +339,8 @@ def put_vec_duplicates(vec, ind, vals): if vals.dtype == x.dtype: rhs = vals else: - rhs = dpt_ext.astype(vals, x.dtype) - rhs = dpt_ext.broadcast_to(rhs, val_shape) + rhs = dpt.astype(vals, x.dtype) + rhs = dpt.broadcast_to(rhs, val_shape) _manager = dpctl.utils.SequentialOrderManager[exec_q] deps_ev = _manager.submitted_events @@ -540,9 +539,9 @@ def take(x, indices, /, *, axis=None, out=None, mode="wrap"): "Input and output allocation queues are not compatible" ) if ti._array_overlap(x, out): - out = dpt_ext.empty_like(out) + out = dpt.empty_like(out) else: - out = dpt_ext.empty( + out = dpt.empty( res_shape, dtype=dt, usm_type=res_usm_type, sycl_queue=exec_q ) diff --git a/dpctl_ext/tensor/_linear_algebra_functions.py b/dpctl_ext/tensor/_linear_algebra_functions.py index 973050f93ac..6dfb30e881b 100644 --- a/dpctl_ext/tensor/_linear_algebra_functions.py +++ b/dpctl_ext/tensor/_linear_algebra_functions.py @@ -29,11 +29,11 @@ import operator import dpctl -import dpctl.tensor as dpt from dpctl.utils import ExecutionPlacementError, SequentialOrderManager # TODO: revert to `import dpctl.tensor...` # when dpnp fully migrates dpctl/tensor +import dpctl_ext.tensor as dpt import dpctl_ext.tensor._tensor_elementwise_impl as tei import dpctl_ext.tensor._tensor_impl as ti import dpctl_ext.tensor._tensor_linalg_impl as tli diff --git a/dpctl_ext/tensor/_manipulation_functions.py b/dpctl_ext/tensor/_manipulation_functions.py index e2d55c533bc..33817dd0aa2 100644 --- a/dpctl_ext/tensor/_manipulation_functions.py +++ b/dpctl_ext/tensor/_manipulation_functions.py @@ -30,13 +30,12 @@ import operator import dpctl -import dpctl.tensor as dpt import dpctl.utils as dputils import numpy as np # TODO: revert to `import dpctl.tensor...` # when dpnp fully migrates dpctl/tensor -import dpctl_ext.tensor as dpt_ext +import dpctl_ext.tensor as dpt import dpctl_ext.tensor._tensor_impl as ti from ._numpy_helper import normalize_axis_index, normalize_axis_tuple @@ -174,7 +173,7 @@ def _concat_axis_None(arrays): res_shape = 0 for array in arrays: res_shape += array.size - res = dpt_ext.empty( + res = dpt.empty( res_shape, dtype=res_dtype, usm_type=res_usm_type, sycl_queue=exec_q ) @@ -185,7 +184,7 @@ def _concat_axis_None(arrays): fill_end = fill_start + array.size if array.flags.c_contiguous: hev, cpy_ev = ti._copy_usm_ndarray_into_usm_ndarray( - src=dpt_ext.reshape(array, -1), + src=dpt.reshape(array, -1), dst=res[fill_start:fill_end], sycl_queue=exec_q, depends=deps, @@ -196,7 +195,7 @@ def _concat_axis_None(arrays): # _copy_usm_ndarray_for_reshape requires src and dst to have # the same data type if not array.dtype == res_dtype: - src2_ = dpt_ext.empty_like(src_, dtype=res_dtype) + src2_ = dpt.empty_like(src_, dtype=res_dtype) ht_copy_ev, cpy_ev = ti._copy_usm_ndarray_into_usm_ndarray( src=src_, dst=src2_, sycl_queue=exec_q, depends=deps ) @@ -334,7 +333,7 @@ def concat(arrays, /, *, axis=0): X0_shape[i] if i != axis else res_shape_axis for i in range(X0.ndim) ) - res = dpt_ext.empty( + res = dpt.empty( res_shape, dtype=res_dtype, usm_type=res_usm_type, sycl_queue=exec_q ) @@ -402,7 +401,7 @@ def expand_dims(X, /, *, axis=0): shape_it = iter(X.shape) shape = tuple(1 if ax in axis else next(shape_it) for ax in range(out_ndim)) - return dpt_ext.reshape(X, shape) + return dpt.reshape(X, shape) def flip(X, /, *, axis=None): @@ -485,7 +484,7 @@ def moveaxis(X, source, destination, /): for src, dst in sorted(zip(destination, source)): ind.insert(src, dst) - return dpt_ext.permute_dims(X, tuple(ind)) + return dpt.permute_dims(X, tuple(ind)) def permute_dims(X, /, axes): @@ -602,7 +601,7 @@ def repeat(x, repeats, /, *, axis=None): ) ) dpctl.utils.validate_usm_type(usm_type, allow_none=False) - if not dpt_ext.can_cast(repeats.dtype, dpt.int64, casting="same_kind"): + if not dpt.can_cast(repeats.dtype, dpt.int64, casting="same_kind"): raise TypeError( f"'repeats' data type {repeats.dtype} cannot be cast to " "'int64' according to the casting rule ''safe.''" @@ -624,7 +623,7 @@ def repeat(x, repeats, /, *, axis=None): "'repeats' array must be broadcastable to the size of " "the repeated axis" ) - if not dpt_ext.all(repeats >= 0): + if not dpt.all(repeats >= 0): raise ValueError("'repeats' elements must be positive") elif isinstance(repeats, (tuple, list, range)): @@ -643,10 +642,10 @@ def repeat(x, repeats, /, *, axis=None): "`repeats` sequence must have the same length as the " "repeated axis" ) - repeats = dpt_ext.asarray( + repeats = dpt.asarray( repeats, dtype=dpt.int64, usm_type=usm_type, sycl_queue=exec_q ) - if not dpt_ext.all(repeats >= 0): + if not dpt.all(repeats >= 0): raise ValueError("`repeats` elements must be positive") else: raise TypeError( @@ -662,7 +661,7 @@ def repeat(x, repeats, /, *, axis=None): res_shape = x_shape[:axis] + (res_axis_size,) + x_shape[axis + 1 :] else: res_shape = (res_axis_size,) - res = dpt_ext.empty( + res = dpt.empty( res_shape, dtype=x.dtype, usm_type=usm_type, sycl_queue=exec_q ) if res_axis_size > 0: @@ -677,7 +676,7 @@ def repeat(x, repeats, /, *, axis=None): _manager.add_event_pair(ht_rep_ev, rep_ev) else: if repeats.dtype != dpt.int64: - rep_buf = dpt_ext.empty( + rep_buf = dpt.empty( repeats.shape, dtype=dpt.int64, usm_type=usm_type, @@ -687,7 +686,7 @@ def repeat(x, repeats, /, *, axis=None): src=repeats, dst=rep_buf, sycl_queue=exec_q, depends=dep_evs ) _manager.add_event_pair(ht_copy_ev, copy_ev) - cumsum = dpt_ext.empty( + cumsum = dpt.empty( (axis_size,), dtype=dpt.int64, usm_type=usm_type, @@ -703,7 +702,7 @@ def repeat(x, repeats, /, *, axis=None): ) else: res_shape = (res_axis_size,) - res = dpt_ext.empty( + res = dpt.empty( res_shape, dtype=x.dtype, usm_type=usm_type, @@ -720,7 +719,7 @@ def repeat(x, repeats, /, *, axis=None): ) _manager.add_event_pair(ht_rep_ev, rep_ev) else: - cumsum = dpt_ext.empty( + cumsum = dpt.empty( (axis_size,), dtype=dpt.int64, usm_type=usm_type, @@ -735,7 +734,7 @@ def repeat(x, repeats, /, *, axis=None): ) else: res_shape = (res_axis_size,) - res = dpt_ext.empty( + res = dpt.empty( res_shape, dtype=x.dtype, usm_type=usm_type, @@ -792,7 +791,7 @@ def roll(x, /, shift, *, axis=None): _manager = dputils.SequentialOrderManager[exec_q] if axis is None: shift = operator.index(shift) - res = dpt_ext.empty( + res = dpt.empty( x.shape, dtype=x.dtype, usm_type=x.usm_type, sycl_queue=exec_q ) sz = operator.index(x.size) @@ -819,7 +818,7 @@ def roll(x, /, shift, *, axis=None): n_i = operator.index(shape[ax]) shifted = shifts[ax] + operator.index(sh) shifts[ax] = (shifted % n_i) if n_i > 0 else 0 - res = dpt_ext.empty( + res = dpt.empty( x.shape, dtype=x.dtype, usm_type=x.usm_type, sycl_queue=exec_q ) dep_evs = _manager.submitted_events @@ -872,7 +871,7 @@ def squeeze(X, /, axis=None): if new_shape == X.shape: return X else: - return dpt_ext.reshape(X, new_shape) + return dpt.reshape(X, new_shape) def stack(arrays, /, *, axis=0): @@ -917,7 +916,7 @@ def stack(arrays, /, *, axis=0): for i in range(res_ndim) ) - res = dpt_ext.empty( + res = dpt.empty( res_shape, dtype=res_dtype, usm_type=res_usm_type, sycl_queue=exec_q ) @@ -971,7 +970,7 @@ def swapaxes(X, axis1, axis2): ind = list(range(0, X.ndim)) ind[axis1] = axis2 ind[axis2] = axis1 - return dpt_ext.permute_dims(X, tuple(ind)) + return dpt.permute_dims(X, tuple(ind)) def unstack(X, /, *, axis=0): @@ -998,7 +997,7 @@ def unstack(X, /, *, axis=0): raise TypeError(f"Expected usm_ndarray type, got {type(X)}.") axis = normalize_axis_index(axis, X.ndim) - Y = dpt_ext.moveaxis(X, axis, 0) + Y = dpt.moveaxis(X, axis, 0) return tuple(Y[i] for i in range(Y.shape[0])) @@ -1049,11 +1048,11 @@ def tile(x, repetitions, /): if rep_dims < x_dims: repetitions = (x_dims - rep_dims) * (1,) + repetitions elif x_dims < rep_dims: - x = dpt_ext.reshape(x, (rep_dims - x_dims) * (1,) + x.shape) + x = dpt.reshape(x, (rep_dims - x_dims) * (1,) + x.shape) res_shape = tuple(map(lambda sh, rep: sh * rep, x.shape, repetitions)) # case of empty input if x.size == 0: - return dpt_ext.empty( + return dpt.empty( res_shape, dtype=x.dtype, usm_type=x.usm_type, @@ -1061,7 +1060,7 @@ def tile(x, repetitions, /): ) in_sh = x.shape if res_shape == in_sh: - return dpt_ext.copy(x) + return dpt.copy(x) expanded_sh = [] broadcast_sh = [] out_sz = 1 @@ -1082,12 +1081,12 @@ def tile(x, repetitions, /): exec_q = x.sycl_queue xdt = x.dtype xut = x.usm_type - res = dpt_ext.empty((out_sz,), dtype=xdt, usm_type=xut, sycl_queue=exec_q) + res = dpt.empty((out_sz,), dtype=xdt, usm_type=xut, sycl_queue=exec_q) # no need to copy data for empty output if out_sz > 0: - x = dpt_ext.broadcast_to( + x = dpt.broadcast_to( # this reshape should never copy - dpt_ext.reshape(x, expanded_sh), + dpt.reshape(x, expanded_sh), broadcast_sh, ) # copy broadcast input into flat array @@ -1097,4 +1096,4 @@ def tile(x, repetitions, /): src=x, dst=res, sycl_queue=exec_q, depends=dep_evs ) _manager.add_event_pair(hev, cp_ev) - return dpt_ext.reshape(res, res_shape) + return dpt.reshape(res, res_shape) diff --git a/dpctl_ext/tensor/_reduction.py b/dpctl_ext/tensor/_reduction.py index 2daf07b81d8..79e620605f0 100644 --- a/dpctl_ext/tensor/_reduction.py +++ b/dpctl_ext/tensor/_reduction.py @@ -27,12 +27,11 @@ # ***************************************************************************** import dpctl -import dpctl.tensor as dpt from dpctl.utils import ExecutionPlacementError, SequentialOrderManager # TODO: revert to `import dpctl.tensor...` # when dpnp fully migrates dpctl/tensor -import dpctl_ext.tensor as dpt_ext +import dpctl_ext.tensor as dpt import dpctl_ext.tensor._tensor_impl as ti import dpctl_ext.tensor._tensor_reductions_impl as tri @@ -58,7 +57,7 @@ def _comparison_over_axis(x, axis, keepdims, out, _reduction_fn): axis = (axis,) axis = normalize_axis_tuple(axis, nd, "axis") perm = [i for i in range(nd) if i not in axis] + list(axis) - x_tmp = dpt_ext.permute_dims(x, perm) + x_tmp = dpt.permute_dims(x, perm) red_nd = len(axis) if any([x_tmp.shape[i] == 0 for i in range(-red_nd, 0)]): raise ValueError("reduction cannot be performed over zero-size axes") @@ -96,12 +95,12 @@ def _comparison_over_axis(x, axis, keepdims, out, _reduction_fn): "Input and output allocation queues are not compatible" ) if keepdims: - out = dpt_ext.squeeze(out, axis=axis) + out = dpt.squeeze(out, axis=axis) orig_out = out if ti._array_overlap(x, out): - out = dpt_ext.empty_like(out) + out = dpt.empty_like(out) else: - out = dpt_ext.empty( + out = dpt.empty( res_shape, dtype=res_dt, usm_type=res_usm_type, sycl_queue=exec_q ) @@ -138,7 +137,7 @@ def _comparison_over_axis(x, axis, keepdims, out, _reduction_fn): if keepdims: res_shape = res_shape + (1,) * red_nd inv_perm = sorted(range(nd), key=lambda d: perm[d]) - out = dpt_ext.permute_dims(dpt_ext.reshape(out, res_shape), inv_perm) + out = dpt.permute_dims(dpt.reshape(out, res_shape), inv_perm) return out @@ -164,7 +163,7 @@ def _reduction_over_axis( axis = (axis,) axis = normalize_axis_tuple(axis, nd, "axis") perm = [i for i in range(nd) if i not in axis] + list(axis) - arr = dpt_ext.permute_dims(x, perm) + arr = dpt.permute_dims(x, perm) red_nd = len(axis) res_shape = arr.shape[: nd - red_nd] q = x.sycl_queue @@ -212,12 +211,12 @@ def _reduction_over_axis( "Input and output allocation queues are not compatible" ) if keepdims: - out = dpt_ext.squeeze(out, axis=axis) + out = dpt.squeeze(out, axis=axis) orig_out = out if ti._array_overlap(x, out) and implemented_types: - out = dpt_ext.empty_like(out) + out = dpt.empty_like(out) else: - out = dpt_ext.empty( + out = dpt.empty( res_shape, dtype=res_dt, usm_type=res_usm_type, sycl_queue=q ) @@ -253,7 +252,7 @@ def _reduction_over_axis( out = orig_out else: if _dtype_supported(res_dt, res_dt, res_usm_type, q): - tmp = dpt_ext.empty( + tmp = dpt.empty( arr.shape, dtype=res_dt, usm_type=res_usm_type, sycl_queue=q ) ht_e_cpy, cpy_e = ti._copy_usm_ndarray_into_usm_ndarray( @@ -270,14 +269,14 @@ def _reduction_over_axis( _manager.add_event_pair(ht_e_red, red_ev) else: buf_dt = _default_reduction_type_fn(inp_dt, q) - tmp = dpt_ext.empty( + tmp = dpt.empty( arr.shape, dtype=buf_dt, usm_type=res_usm_type, sycl_queue=q ) ht_e_cpy, cpy_e = ti._copy_usm_ndarray_into_usm_ndarray( src=arr, dst=tmp, sycl_queue=q, depends=dep_evs ) _manager.add_event_pair(ht_e_cpy, cpy_e) - tmp_res = dpt_ext.empty( + tmp_res = dpt.empty( res_shape, dtype=buf_dt, usm_type=res_usm_type, sycl_queue=q ) ht_e_red, r_e = _reduction_fn( @@ -296,7 +295,7 @@ def _reduction_over_axis( if keepdims: res_shape = res_shape + (1,) * red_nd inv_perm = sorted(range(nd), key=lambda d: perm[d]) - out = dpt_ext.permute_dims(dpt_ext.reshape(out, res_shape), inv_perm) + out = dpt.permute_dims(dpt.reshape(out, res_shape), inv_perm) return out @@ -320,7 +319,7 @@ def _search_over_axis(x, axis, keepdims, out, _reduction_fn): ) axis = normalize_axis_tuple(axis, nd, "axis") perm = [i for i in range(nd) if i not in axis] + list(axis) - x_tmp = dpt_ext.permute_dims(x, perm) + x_tmp = dpt.permute_dims(x, perm) axis = normalize_axis_tuple(axis, nd, "axis") red_nd = len(axis) if any([x_tmp.shape[i] == 0 for i in range(-red_nd, 0)]): @@ -359,12 +358,12 @@ def _search_over_axis(x, axis, keepdims, out, _reduction_fn): "Input and output allocation queues are not compatible" ) if keepdims: - out = dpt_ext.squeeze(out, axis=axis) + out = dpt.squeeze(out, axis=axis) orig_out = out if ti._array_overlap(x, out) and red_nd > 0: - out = dpt_ext.empty_like(out) + out = dpt.empty_like(out) else: - out = dpt_ext.empty( + out = dpt.empty( res_shape, dtype=res_dt, usm_type=res_usm_type, sycl_queue=exec_q ) @@ -395,7 +394,7 @@ def _search_over_axis(x, axis, keepdims, out, _reduction_fn): if keepdims: res_shape = res_shape + (1,) * red_nd inv_perm = sorted(range(nd), key=lambda d: perm[d]) - out = dpt_ext.permute_dims(dpt_ext.reshape(out, res_shape), inv_perm) + out = dpt.permute_dims(dpt.reshape(out, res_shape), inv_perm) return out @@ -506,7 +505,7 @@ def count_nonzero(x, /, *, axis=None, keepdims=False, out=None): type. """ if x.dtype != dpt.bool: - x = dpt_ext.astype(x, dpt.bool, copy=False) + x = dpt.astype(x, dpt.bool, copy=False) return sum( x, axis=axis, diff --git a/dpctl_ext/tensor/_reshape.py b/dpctl_ext/tensor/_reshape.py index 23cf47a8356..7ecdace4fc4 100644 --- a/dpctl_ext/tensor/_reshape.py +++ b/dpctl_ext/tensor/_reshape.py @@ -28,13 +28,12 @@ import operator -import dpctl.tensor as dpt import dpctl.utils import numpy as np # TODO: revert to `import dpctl.tensor...` # when dpnp fully migrates dpctl/tensor -import dpctl_ext.tensor as dpt_ext +import dpctl_ext.tensor as dpt from ._tensor_impl import ( _copy_usm_ndarray_for_reshape, @@ -189,7 +188,7 @@ def reshape(X, /, shape, *, order="C", copy=None): src=X, dst=flat_res, sycl_queue=copy_q, depends=dep_evs ) else: - X_t = dpt_ext.permute_dims(X, range(X.ndim - 1, -1, -1)) + X_t = dpt.permute_dims(X, range(X.ndim - 1, -1, -1)) hev, r_e = _copy_usm_ndarray_for_reshape( src=X_t, dst=flat_res, sycl_queue=copy_q, depends=dep_evs ) diff --git a/dpctl_ext/tensor/_scalar_utils.py b/dpctl_ext/tensor/_scalar_utils.py index 3ab92b42ad0..832121aea85 100644 --- a/dpctl_ext/tensor/_scalar_utils.py +++ b/dpctl_ext/tensor/_scalar_utils.py @@ -29,13 +29,14 @@ import numbers import dpctl.memory as dpm -import dpctl.tensor as dpt import numpy as np -from dpctl.tensor._usmarray import _is_object_with_buffer_protocol as _is_buffer # TODO: revert to `import dpctl.tensor...` # when dpnp fully migrates dpctl/tensor -import dpctl_ext.tensor as dpt_ext +import dpctl_ext.tensor as dpt +from dpctl_ext.tensor._usmarray import ( + _is_object_with_buffer_protocol as _is_buffer, +) from ._type_utils import ( WeakBooleanType, @@ -63,7 +64,7 @@ def _get_dtype(o, dev): if isinstance(o, dpt.usm_ndarray): return o.dtype if hasattr(o, "__sycl_usm_array_interface__"): - return dpt_ext.asarray(o).dtype + return dpt.asarray(o).dtype if _is_buffer(o): host_dt = np.array(o).dtype dev_dt = _to_device_supported_dtype(host_dt, dev) diff --git a/dpctl_ext/tensor/_search_functions.py b/dpctl_ext/tensor/_search_functions.py index 285a02b42bb..aae185b64e2 100644 --- a/dpctl_ext/tensor/_search_functions.py +++ b/dpctl_ext/tensor/_search_functions.py @@ -27,12 +27,11 @@ # ***************************************************************************** import dpctl -import dpctl.tensor as dpt from dpctl.utils import ExecutionPlacementError, SequentialOrderManager # TODO: revert to `import dpctl.tensor...` # when dpnp fully migrates dpctl/tensor -import dpctl_ext.tensor as dpt_ext +import dpctl_ext.tensor as dpt import dpctl_ext.tensor._tensor_impl as ti from ._copy_utils import _empty_like_orderK, _empty_like_triple_orderK @@ -111,7 +110,7 @@ def _resolve_two_weak_types(o1_dtype, o2_dtype, dev): def _where_result_type(dt1, dt2, dev): - res_dtype = dpt_ext.result_type(dt1, dt2) + res_dtype = dpt.result_type(dt1, dt2) fp16 = dev.has_aspect_fp16 fp64 = dev.has_aspect_fp64 @@ -291,7 +290,7 @@ def where(condition, x1, x2, /, *, order="K", out=None): if ti._array_overlap(condition, out) and not ti._same_logical_tensors( condition, out ): - out = dpt_ext.empty_like(out) + out = dpt.empty_like(out) if isinstance(x1, dpt.usm_ndarray): if ( @@ -299,7 +298,7 @@ def where(condition, x1, x2, /, *, order="K", out=None): and not ti._same_logical_tensors(x1, out) and x1_dtype == out_dtype ): - out = dpt_ext.empty_like(out) + out = dpt.empty_like(out) if isinstance(x2, dpt.usm_ndarray): if ( @@ -307,7 +306,7 @@ def where(condition, x1, x2, /, *, order="K", out=None): and not ti._same_logical_tensors(x2, out) and x2_dtype == out_dtype ): - out = dpt_ext.empty_like(out) + out = dpt.empty_like(out) if order == "A": order = ( @@ -323,9 +322,9 @@ def where(condition, x1, x2, /, *, order="K", out=None): else "C" ) if not isinstance(x1, dpt.usm_ndarray): - x1 = dpt_ext.asarray(x1, dtype=x1_dtype, sycl_queue=exec_q) + x1 = dpt.asarray(x1, dtype=x1_dtype, sycl_queue=exec_q) if not isinstance(x2, dpt.usm_ndarray): - x2 = dpt_ext.asarray(x2, dtype=x2_dtype, sycl_queue=exec_q) + x2 = dpt.asarray(x2, dtype=x2_dtype, sycl_queue=exec_q) if condition.size == 0: if out is not None: @@ -342,7 +341,7 @@ def where(condition, x1, x2, /, *, order="K", out=None): exec_q, ) else: - return dpt_ext.empty( + return dpt.empty( res_shape, dtype=out_dtype, order=order, @@ -356,7 +355,7 @@ def where(condition, x1, x2, /, *, order="K", out=None): if order == "K": _x1 = _empty_like_orderK(x1, out_dtype) else: - _x1 = dpt_ext.empty_like(x1, dtype=out_dtype, order=order) + _x1 = dpt.empty_like(x1, dtype=out_dtype, order=order) ht_copy1_ev, copy1_ev = ti._copy_usm_ndarray_into_usm_ndarray( src=x1, dst=_x1, sycl_queue=exec_q, depends=dep_evs ) @@ -367,7 +366,7 @@ def where(condition, x1, x2, /, *, order="K", out=None): if order == "K": _x2 = _empty_like_orderK(x2, out_dtype) else: - _x2 = dpt_ext.empty_like(x2, dtype=out_dtype, order=order) + _x2 = dpt.empty_like(x2, dtype=out_dtype, order=order) ht_copy2_ev, copy2_ev = ti._copy_usm_ndarray_into_usm_ndarray( src=x2, dst=_x2, sycl_queue=exec_q, depends=dep_evs ) @@ -380,7 +379,7 @@ def where(condition, x1, x2, /, *, order="K", out=None): condition, x1, x2, out_dtype, res_shape, out_usm_type, exec_q ) else: - out = dpt_ext.empty( + out = dpt.empty( res_shape, dtype=out_dtype, order=order, @@ -389,11 +388,11 @@ def where(condition, x1, x2, /, *, order="K", out=None): ) if condition_shape != res_shape: - condition = dpt_ext.broadcast_to(condition, res_shape) + condition = dpt.broadcast_to(condition, res_shape) if x1_shape != res_shape: - x1 = dpt_ext.broadcast_to(x1, res_shape) + x1 = dpt.broadcast_to(x1, res_shape) if x2_shape != res_shape: - x2 = dpt_ext.broadcast_to(x2, res_shape) + x2 = dpt.broadcast_to(x2, res_shape) dep_evs = _manager.submitted_events hev, where_ev = ti._where( diff --git a/dpctl_ext/tensor/_searchsorted.py b/dpctl_ext/tensor/_searchsorted.py index 2d4807fb0d0..4c680a49b07 100644 --- a/dpctl_ext/tensor/_searchsorted.py +++ b/dpctl_ext/tensor/_searchsorted.py @@ -32,10 +32,6 @@ import dpctl import dpctl.utils as du -# TODO: revert to `from ._usmarray import...` -# when dpnp fully migrates dpctl/tensor -from dpctl.tensor._usmarray import usm_ndarray - from ._copy_utils import _empty_like_orderK from ._ctors import empty from ._tensor_impl import _copy_usm_ndarray_into_usm_ndarray as ti_copy @@ -46,6 +42,10 @@ from ._tensor_sorting_impl import _searchsorted_left, _searchsorted_right from ._type_utils import isdtype, result_type +# TODO: revert to `from ._usmarray import...` +# when dpnp fully migrates dpctl/tensor +from ._usmarray import usm_ndarray + def searchsorted( x1: usm_ndarray, diff --git a/dpctl_ext/tensor/_set_functions.py b/dpctl_ext/tensor/_set_functions.py index 2672e082d18..29e4914ad63 100644 --- a/dpctl_ext/tensor/_set_functions.py +++ b/dpctl_ext/tensor/_set_functions.py @@ -28,12 +28,11 @@ from typing import NamedTuple, Optional, Union -import dpctl.tensor as dpt import dpctl.utils as du # TODO: revert to `import dpctl.tensor...` # when dpnp fully migrates dpctl/tensor -import dpctl_ext.tensor as dpt_ext +import dpctl_ext.tensor as dpt from dpctl_ext.tensor._tensor_elementwise_impl import _not_equal, _subtract from ._copy_utils import _empty_like_orderK @@ -112,10 +111,10 @@ def unique_values(x: dpt.usm_ndarray) -> dpt.usm_ndarray: if x.ndim == 1: fx = x else: - fx = dpt_ext.reshape(x, (x.size,), order="C") + fx = dpt.reshape(x, (x.size,), order="C") if fx.size == 0: return fx - s = dpt_ext.empty_like(fx, order="C") + s = dpt.empty_like(fx, order="C") _manager = du.SequentialOrderManager[exec_q] dep_evs = _manager.submitted_events if fx.flags.c_contiguous: @@ -128,7 +127,7 @@ def unique_values(x: dpt.usm_ndarray) -> dpt.usm_ndarray: ) _manager.add_event_pair(ht_ev, sort_ev) else: - tmp = dpt_ext.empty_like(fx, order="C") + tmp = dpt.empty_like(fx, order="C") ht_ev, copy_ev = _copy_usm_ndarray_into_usm_ndarray( src=fx, dst=tmp, sycl_queue=exec_q, depends=dep_evs ) @@ -141,7 +140,7 @@ def unique_values(x: dpt.usm_ndarray) -> dpt.usm_ndarray: depends=[copy_ev], ) _manager.add_event_pair(ht_ev, sort_ev) - unique_mask = dpt_ext.empty(fx.shape, dtype="?", sycl_queue=exec_q) + unique_mask = dpt.empty(fx.shape, dtype="?", sycl_queue=exec_q) ht_ev, uneq_ev = _not_equal( src1=s[:-1], src2=s[1:], @@ -155,14 +154,14 @@ def unique_values(x: dpt.usm_ndarray) -> dpt.usm_ndarray: fill_value=True, dst=unique_mask[0], sycl_queue=exec_q ) _manager.add_event_pair(ht_ev, one_ev) - cumsum = dpt_ext.empty(s.shape, dtype=dpt.int64, sycl_queue=exec_q) + cumsum = dpt.empty(s.shape, dtype=dpt.int64, sycl_queue=exec_q) # synchronizing call n_uniques = mask_positions( unique_mask, cumsum, sycl_queue=exec_q, depends=[one_ev, uneq_ev] ) if n_uniques == fx.size: return s - unique_vals = dpt_ext.empty( + unique_vals = dpt.empty( n_uniques, dtype=x.dtype, usm_type=x.usm_type, sycl_queue=exec_q ) ht_ev, ex_e = _extract( @@ -206,11 +205,11 @@ def unique_counts(x: dpt.usm_ndarray) -> UniqueCountsResult: if x.ndim == 1: fx = x else: - fx = dpt_ext.reshape(x, (x.size,), order="C") + fx = dpt.reshape(x, (x.size,), order="C") ind_dt = default_device_index_type(exec_q) if fx.size == 0: - return UniqueCountsResult(fx, dpt_ext.empty_like(fx, dtype=ind_dt)) - s = dpt_ext.empty_like(fx, order="C") + return UniqueCountsResult(fx, dpt.empty_like(fx, dtype=ind_dt)) + s = dpt.empty_like(fx, order="C") _manager = du.SequentialOrderManager[exec_q] dep_evs = _manager.submitted_events @@ -224,7 +223,7 @@ def unique_counts(x: dpt.usm_ndarray) -> UniqueCountsResult: ) _manager.add_event_pair(ht_ev, sort_ev) else: - tmp = dpt_ext.empty_like(fx, order="C") + tmp = dpt.empty_like(fx, order="C") ht_ev, copy_ev = _copy_usm_ndarray_into_usm_ndarray( src=fx, dst=tmp, sycl_queue=exec_q, depends=dep_evs ) @@ -237,7 +236,7 @@ def unique_counts(x: dpt.usm_ndarray) -> UniqueCountsResult: depends=[copy_ev], ) _manager.add_event_pair(ht_ev, sort_ev) - unique_mask = dpt_ext.empty(s.shape, dtype="?", sycl_queue=exec_q) + unique_mask = dpt.empty(s.shape, dtype="?", sycl_queue=exec_q) ht_ev, uneq_ev = _not_equal( src1=s[:-1], src2=s[1:], @@ -251,9 +250,7 @@ def unique_counts(x: dpt.usm_ndarray) -> UniqueCountsResult: fill_value=True, dst=unique_mask[0], sycl_queue=exec_q ) _manager.add_event_pair(ht_ev, one_ev) - cumsum = dpt_ext.empty( - unique_mask.shape, dtype=dpt.int64, sycl_queue=exec_q - ) + cumsum = dpt.empty(unique_mask.shape, dtype=dpt.int64, sycl_queue=exec_q) # synchronizing call n_uniques = mask_positions( unique_mask, cumsum, sycl_queue=exec_q, depends=[one_ev, uneq_ev] @@ -261,11 +258,11 @@ def unique_counts(x: dpt.usm_ndarray) -> UniqueCountsResult: if n_uniques == fx.size: return UniqueCountsResult( s, - dpt_ext.ones( + dpt.ones( n_uniques, dtype=ind_dt, usm_type=x_usm_type, sycl_queue=exec_q ), ) - unique_vals = dpt_ext.empty( + unique_vals = dpt.empty( n_uniques, dtype=x.dtype, usm_type=x_usm_type, sycl_queue=exec_q ) # populate unique values @@ -278,10 +275,10 @@ def unique_counts(x: dpt.usm_ndarray) -> UniqueCountsResult: sycl_queue=exec_q, ) _manager.add_event_pair(ht_ev, ex_e) - unique_counts = dpt_ext.empty( + unique_counts = dpt.empty( n_uniques + 1, dtype=ind_dt, usm_type=x_usm_type, sycl_queue=exec_q ) - idx = dpt_ext.empty(x.size, dtype=ind_dt, sycl_queue=exec_q) + idx = dpt.empty(x.size, dtype=ind_dt, sycl_queue=exec_q) # writing into new allocation, no dependency ht_ev, id_ev = _linspace_step(start=0, dt=1, dst=idx, sycl_queue=exec_q) _manager.add_event_pair(ht_ev, id_ev) @@ -300,7 +297,7 @@ def unique_counts(x: dpt.usm_ndarray) -> UniqueCountsResult: x.size, dst=unique_counts[-1], sycl_queue=exec_q ) _manager.add_event_pair(ht_ev, set_ev) - _counts = dpt_ext.empty_like(unique_counts[1:]) + _counts = dpt.empty_like(unique_counts[1:]) ht_ev, sub_ev = _subtract( src1=unique_counts[1:], src2=unique_counts[:-1], @@ -342,11 +339,11 @@ def unique_inverse(x): if x.ndim == 1: fx = x else: - fx = dpt_ext.reshape(x, (x.size,), order="C") - sorting_ids = dpt_ext.empty_like(fx, dtype=ind_dt, order="C") - unsorting_ids = dpt_ext.empty_like(sorting_ids, dtype=ind_dt, order="C") + fx = dpt.reshape(x, (x.size,), order="C") + sorting_ids = dpt.empty_like(fx, dtype=ind_dt, order="C") + unsorting_ids = dpt.empty_like(sorting_ids, dtype=ind_dt, order="C") if fx.size == 0: - return UniqueInverseResult(fx, dpt_ext.reshape(unsorting_ids, x.shape)) + return UniqueInverseResult(fx, dpt.reshape(unsorting_ids, x.shape)) _manager = du.SequentialOrderManager[exec_q] dep_evs = _manager.submitted_events @@ -360,7 +357,7 @@ def unique_inverse(x): ) _manager.add_event_pair(ht_ev, sort_ev) else: - tmp = dpt_ext.empty_like(fx, order="C") + tmp = dpt.empty_like(fx, order="C") ht_ev, copy_ev = _copy_usm_ndarray_into_usm_ndarray( src=fx, dst=tmp, sycl_queue=exec_q, depends=dep_evs ) @@ -381,7 +378,7 @@ def unique_inverse(x): depends=[sort_ev], ) _manager.add_event_pair(ht_ev, argsort_ev) - s = dpt_ext.empty_like(fx) + s = dpt.empty_like(fx) # s = fx[sorting_ids] ht_ev, take_ev = _take( src=fx, @@ -393,7 +390,7 @@ def unique_inverse(x): depends=[sort_ev], ) _manager.add_event_pair(ht_ev, take_ev) - unique_mask = dpt_ext.empty(fx.shape, dtype="?", sycl_queue=exec_q) + unique_mask = dpt.empty(fx.shape, dtype="?", sycl_queue=exec_q) ht_ev, uneq_ev = _not_equal( src1=s[:-1], src2=s[1:], @@ -407,16 +404,14 @@ def unique_inverse(x): fill_value=True, dst=unique_mask[0], sycl_queue=exec_q ) _manager.add_event_pair(ht_ev, one_ev) - cumsum = dpt_ext.empty( - unique_mask.shape, dtype=dpt.int64, sycl_queue=exec_q - ) + cumsum = dpt.empty(unique_mask.shape, dtype=dpt.int64, sycl_queue=exec_q) # synchronizing call n_uniques = mask_positions( unique_mask, cumsum, sycl_queue=exec_q, depends=[uneq_ev, one_ev] ) if n_uniques == fx.size: - return UniqueInverseResult(s, dpt_ext.reshape(unsorting_ids, x.shape)) - unique_vals = dpt_ext.empty( + return UniqueInverseResult(s, dpt.reshape(unsorting_ids, x.shape)) + unique_vals = dpt.empty( n_uniques, dtype=x.dtype, usm_type=x_usm_type, sycl_queue=exec_q ) ht_ev, uv_ev = _extract( @@ -428,10 +423,10 @@ def unique_inverse(x): sycl_queue=exec_q, ) _manager.add_event_pair(ht_ev, uv_ev) - cum_unique_counts = dpt_ext.empty( + cum_unique_counts = dpt.empty( n_uniques + 1, dtype=ind_dt, usm_type=x_usm_type, sycl_queue=exec_q ) - idx = dpt_ext.empty(x.size, dtype=ind_dt, sycl_queue=exec_q) + idx = dpt.empty(x.size, dtype=ind_dt, sycl_queue=exec_q) ht_ev, id_ev = _linspace_step(start=0, dt=1, dst=idx, sycl_queue=exec_q) _manager.add_event_pair(ht_ev, id_ev) ht_ev, extr_ev = _extract( @@ -448,7 +443,7 @@ def unique_inverse(x): x.size, dst=cum_unique_counts[-1], sycl_queue=exec_q ) _manager.add_event_pair(ht_ev, set_ev) - _counts = dpt_ext.empty_like(cum_unique_counts[1:]) + _counts = dpt.empty_like(cum_unique_counts[1:]) ht_ev, sub_ev = _subtract( src1=cum_unique_counts[1:], src2=cum_unique_counts[:-1], @@ -458,7 +453,7 @@ def unique_inverse(x): ) _manager.add_event_pair(ht_ev, sub_ev) - inv = dpt_ext.empty_like(x, dtype=ind_dt, order="C") + inv = dpt.empty_like(x, dtype=ind_dt, order="C") ht_ev, ssl_ev = _searchsorted_left( hay=unique_vals, needles=x, @@ -513,17 +508,17 @@ def unique_all(x: dpt.usm_ndarray) -> UniqueAllResult: if x.ndim == 1: fx = x else: - fx = dpt_ext.reshape(x, (x.size,), order="C") - sorting_ids = dpt_ext.empty_like(fx, dtype=ind_dt, order="C") - unsorting_ids = dpt_ext.empty_like(sorting_ids, dtype=ind_dt, order="C") + fx = dpt.reshape(x, (x.size,), order="C") + sorting_ids = dpt.empty_like(fx, dtype=ind_dt, order="C") + unsorting_ids = dpt.empty_like(sorting_ids, dtype=ind_dt, order="C") if fx.size == 0: # original array contains no data # so it can be safely returned as values return UniqueAllResult( fx, sorting_ids, - dpt_ext.reshape(unsorting_ids, x.shape), - dpt_ext.empty_like(fx, dtype=ind_dt), + dpt.reshape(unsorting_ids, x.shape), + dpt.empty_like(fx, dtype=ind_dt), ) _manager = du.SequentialOrderManager[exec_q] dep_evs = _manager.submitted_events @@ -537,7 +532,7 @@ def unique_all(x: dpt.usm_ndarray) -> UniqueAllResult: ) _manager.add_event_pair(ht_ev, sort_ev) else: - tmp = dpt_ext.empty_like(fx, order="C") + tmp = dpt.empty_like(fx, order="C") ht_ev, copy_ev = _copy_usm_ndarray_into_usm_ndarray( src=fx, dst=tmp, sycl_queue=exec_q, depends=dep_evs ) @@ -558,7 +553,7 @@ def unique_all(x: dpt.usm_ndarray) -> UniqueAllResult: depends=[sort_ev], ) _manager.add_event_pair(ht_ev, args_ev) - s = dpt_ext.empty_like(fx) + s = dpt.empty_like(fx) # s = fx[sorting_ids] ht_ev, take_ev = _take( src=fx, @@ -570,7 +565,7 @@ def unique_all(x: dpt.usm_ndarray) -> UniqueAllResult: depends=[sort_ev], ) _manager.add_event_pair(ht_ev, take_ev) - unique_mask = dpt_ext.empty(fx.shape, dtype="?", sycl_queue=exec_q) + unique_mask = dpt.empty(fx.shape, dtype="?", sycl_queue=exec_q) ht_ev, uneq_ev = _not_equal( src1=s[:-1], src2=s[1:], @@ -583,24 +578,22 @@ def unique_all(x: dpt.usm_ndarray) -> UniqueAllResult: fill_value=True, dst=unique_mask[0], sycl_queue=exec_q ) _manager.add_event_pair(ht_ev, one_ev) - cumsum = dpt_ext.empty( - unique_mask.shape, dtype=dpt.int64, sycl_queue=exec_q - ) + cumsum = dpt.empty(unique_mask.shape, dtype=dpt.int64, sycl_queue=exec_q) # synchronizing call n_uniques = mask_positions( unique_mask, cumsum, sycl_queue=exec_q, depends=[uneq_ev, one_ev] ) if n_uniques == fx.size: - _counts = dpt_ext.ones( + _counts = dpt.ones( n_uniques, dtype=ind_dt, usm_type=x_usm_type, sycl_queue=exec_q ) return UniqueAllResult( s, sorting_ids, - dpt_ext.reshape(unsorting_ids, x.shape), + dpt.reshape(unsorting_ids, x.shape), _counts, ) - unique_vals = dpt_ext.empty( + unique_vals = dpt.empty( n_uniques, dtype=x.dtype, usm_type=x_usm_type, sycl_queue=exec_q ) ht_ev, uv_ev = _extract( @@ -612,10 +605,10 @@ def unique_all(x: dpt.usm_ndarray) -> UniqueAllResult: sycl_queue=exec_q, ) _manager.add_event_pair(ht_ev, uv_ev) - cum_unique_counts = dpt_ext.empty( + cum_unique_counts = dpt.empty( n_uniques + 1, dtype=ind_dt, usm_type=x_usm_type, sycl_queue=exec_q ) - idx = dpt_ext.empty(x.size, dtype=ind_dt, sycl_queue=exec_q) + idx = dpt.empty(x.size, dtype=ind_dt, sycl_queue=exec_q) ht_ev, id_ev = _linspace_step(start=0, dt=1, dst=idx, sycl_queue=exec_q) _manager.add_event_pair(ht_ev, id_ev) ht_ev, extr_ev = _extract( @@ -632,7 +625,7 @@ def unique_all(x: dpt.usm_ndarray) -> UniqueAllResult: x.size, dst=cum_unique_counts[-1], sycl_queue=exec_q ) _manager.add_event_pair(ht_ev, set_ev) - _counts = dpt_ext.empty_like(cum_unique_counts[1:]) + _counts = dpt.empty_like(cum_unique_counts[1:]) ht_ev, sub_ev = _subtract( src1=cum_unique_counts[1:], src2=cum_unique_counts[:-1], @@ -642,7 +635,7 @@ def unique_all(x: dpt.usm_ndarray) -> UniqueAllResult: ) _manager.add_event_pair(ht_ev, sub_ev) - inv = dpt_ext.empty_like(x, dtype=ind_dt, order="C") + inv = dpt.empty_like(x, dtype=ind_dt, order="C") ht_ev, ssl_ev = _searchsorted_left( hay=unique_vals, needles=x, @@ -734,26 +727,26 @@ def isin( x_sh = _get_shape(x) if isinstance(test_elements, dpt.usm_ndarray) and test_elements.size == 0: if invert: - return dpt_ext.ones( + return dpt.ones( x_sh, dtype=dpt.bool, usm_type=res_usm_type, sycl_queue=exec_q ) else: - return dpt_ext.zeros( + return dpt.zeros( x_sh, dtype=dpt.bool, usm_type=res_usm_type, sycl_queue=exec_q ) dt1, dt2 = _resolve_weak_types_all_py_ints(x_dt, test_dt, sycl_dev) - dt = _to_device_supported_dtype(dpt_ext.result_type(dt1, dt2), sycl_dev) + dt = _to_device_supported_dtype(dpt.result_type(dt1, dt2), sycl_dev) if not isinstance(x, dpt.usm_ndarray): - x_arr = dpt_ext.asarray( + x_arr = dpt.asarray( x, dtype=dt1, usm_type=res_usm_type, sycl_queue=exec_q ) else: x_arr = x if not isinstance(test_elements, dpt.usm_ndarray): - test_arr = dpt_ext.asarray( + test_arr = dpt.asarray( test_elements, dtype=dt2, usm_type=res_usm_type, sycl_queue=exec_q ) else: @@ -773,7 +766,7 @@ def isin( if test_dt != dt: # copy into C-contiguous memory, because the array will be flattened - test_buf = dpt_ext.empty_like( + test_buf = dpt.empty_like( test_arr, dtype=dt, order="C", usm_type=res_usm_type ) ht_ev, ev = _copy_usm_ndarray_into_usm_ndarray( @@ -783,10 +776,10 @@ def isin( else: test_buf = test_arr - test_buf = dpt_ext.reshape(test_buf, -1) - test_buf = dpt_ext.sort(test_buf) + test_buf = dpt.reshape(test_buf, -1) + test_buf = dpt.sort(test_buf) - dst = dpt_ext.empty_like( + dst = dpt.empty_like( x_buf, dtype=dpt.bool, usm_type=res_usm_type, order="C" ) diff --git a/dpctl_ext/tensor/_sorting.py b/dpctl_ext/tensor/_sorting.py index 24693a40888..42cd9e1b44b 100644 --- a/dpctl_ext/tensor/_sorting.py +++ b/dpctl_ext/tensor/_sorting.py @@ -29,12 +29,11 @@ import operator from typing import NamedTuple -import dpctl.tensor as dpt import dpctl.utils as du # TODO: revert to `import dpctl.tensor...` # when dpnp fully migrates dpctl/tensor -import dpctl_ext.tensor as dpt_ext +import dpctl_ext.tensor as dpt import dpctl_ext.tensor._tensor_impl as ti from ._numpy_helper import normalize_axis_index @@ -98,7 +97,7 @@ def sort(x, /, *, axis=-1, descending=False, stable=True, kind=None): nd = x.ndim if nd == 0: axis = normalize_axis_index(axis, ndim=1, msg_prefix="axis") - return dpt_ext.copy(x, order="C") + return dpt.copy(x, order="C") else: axis = normalize_axis_index(axis, ndim=nd, msg_prefix="axis") a1 = axis + 1 @@ -109,7 +108,7 @@ def sort(x, /, *, axis=-1, descending=False, stable=True, kind=None): perm = [i for i in range(nd) if i != axis] + [ axis, ] - arr = dpt_ext.permute_dims(x, perm) + arr = dpt.permute_dims(x, perm) if kind is None: kind = "stable" if not isinstance(kind, str) or kind not in [ @@ -138,7 +137,7 @@ def sort(x, /, *, axis=-1, descending=False, stable=True, kind=None): _manager = du.SequentialOrderManager[exec_q] dep_evs = _manager.submitted_events if arr.flags.c_contiguous: - res = dpt_ext.empty_like(arr, order="C") + res = dpt.empty_like(arr, order="C") ht_ev, impl_ev = impl_fn( src=arr, trailing_dims_to_sort=1, @@ -148,12 +147,12 @@ def sort(x, /, *, axis=-1, descending=False, stable=True, kind=None): ) _manager.add_event_pair(ht_ev, impl_ev) else: - tmp = dpt_ext.empty_like(arr, order="C") + tmp = dpt.empty_like(arr, order="C") ht_ev, copy_ev = ti._copy_usm_ndarray_into_usm_ndarray( src=arr, dst=tmp, sycl_queue=exec_q, depends=dep_evs ) _manager.add_event_pair(ht_ev, copy_ev) - res = dpt_ext.empty_like(arr, order="C") + res = dpt.empty_like(arr, order="C") ht_ev, impl_ev = impl_fn( src=tmp, trailing_dims_to_sort=1, @@ -164,7 +163,7 @@ def sort(x, /, *, axis=-1, descending=False, stable=True, kind=None): _manager.add_event_pair(ht_ev, impl_ev) if a1 != nd: inv_perm = sorted(range(nd), key=lambda d: perm[d]) - res = dpt_ext.permute_dims(res, inv_perm) + res = dpt.permute_dims(res, inv_perm) return res @@ -214,7 +213,7 @@ def argsort(x, axis=-1, descending=False, stable=True, kind=None): nd = x.ndim if nd == 0: axis = normalize_axis_index(axis, ndim=1, msg_prefix="axis") - return dpt_ext.zeros_like( + return dpt.zeros_like( x, dtype=ti.default_device_index_type(x.sycl_queue), order="C" ) else: @@ -227,7 +226,7 @@ def argsort(x, axis=-1, descending=False, stable=True, kind=None): perm = [i for i in range(nd) if i != axis] + [ axis, ] - arr = dpt_ext.permute_dims(x, perm) + arr = dpt.permute_dims(x, perm) if kind is None: kind = "stable" if not isinstance(kind, str) or kind not in [ @@ -257,7 +256,7 @@ def argsort(x, axis=-1, descending=False, stable=True, kind=None): dep_evs = _manager.submitted_events index_dt = ti.default_device_index_type(exec_q) if arr.flags.c_contiguous: - res = dpt_ext.empty_like(arr, dtype=index_dt, order="C") + res = dpt.empty_like(arr, dtype=index_dt, order="C") ht_ev, impl_ev = impl_fn( src=arr, trailing_dims_to_sort=1, @@ -267,12 +266,12 @@ def argsort(x, axis=-1, descending=False, stable=True, kind=None): ) _manager.add_event_pair(ht_ev, impl_ev) else: - tmp = dpt_ext.empty_like(arr, order="C") + tmp = dpt.empty_like(arr, order="C") ht_ev, copy_ev = ti._copy_usm_ndarray_into_usm_ndarray( src=arr, dst=tmp, sycl_queue=exec_q, depends=dep_evs ) _manager.add_event_pair(ht_ev, copy_ev) - res = dpt_ext.empty_like(arr, dtype=index_dt, order="C") + res = dpt.empty_like(arr, dtype=index_dt, order="C") ht_ev, impl_ev = impl_fn( src=tmp, trailing_dims_to_sort=1, @@ -283,7 +282,7 @@ def argsort(x, axis=-1, descending=False, stable=True, kind=None): _manager.add_event_pair(ht_ev, impl_ev) if a1 != nd: inv_perm = sorted(range(nd), key=lambda d: perm[d]) - res = dpt_ext.permute_dims(res, inv_perm) + res = dpt.permute_dims(res, inv_perm) return res @@ -354,8 +353,8 @@ def top_k(x, k, /, *, axis=None, mode="largest"): if k > 1: raise ValueError(f"`k`={k} is out of bounds 1") return TopKResult( - dpt_ext.copy(x, order="C"), - dpt_ext.zeros_like( + dpt.copy(x, order="C"), + dpt.zeros_like( x, dtype=ti.default_device_index_type(x.sycl_queue) ), ) @@ -373,7 +372,7 @@ def top_k(x, k, /, *, axis=None, mode="largest"): perm = [i for i in range(nd) if i != axis] + [ axis, ] - arr = dpt_ext.permute_dims(x, perm) + arr = dpt.permute_dims(x, perm) n_search_dims = 1 res_sh = arr.shape[: nd - 1] + (k,) @@ -386,14 +385,14 @@ def top_k(x, k, /, *, axis=None, mode="largest"): res_usm_type = arr.usm_type if arr.flags.c_contiguous: - vals = dpt_ext.empty( + vals = dpt.empty( res_sh, dtype=arr.dtype, usm_type=res_usm_type, order="C", sycl_queue=exec_q, ) - inds = dpt_ext.empty( + inds = dpt.empty( res_sh, dtype=ti.default_device_index_type(exec_q), usm_type=res_usm_type, @@ -412,19 +411,19 @@ def top_k(x, k, /, *, axis=None, mode="largest"): ) _manager.add_event_pair(ht_ev, impl_ev) else: - tmp = dpt_ext.empty_like(arr, order="C") + tmp = dpt.empty_like(arr, order="C") ht_ev, copy_ev = ti._copy_usm_ndarray_into_usm_ndarray( src=arr, dst=tmp, sycl_queue=exec_q, depends=dep_evs ) _manager.add_event_pair(ht_ev, copy_ev) - vals = dpt_ext.empty( + vals = dpt.empty( res_sh, dtype=arr.dtype, usm_type=res_usm_type, order="C", sycl_queue=exec_q, ) - inds = dpt_ext.empty( + inds = dpt.empty( res_sh, dtype=ti.default_device_index_type(exec_q), usm_type=res_usm_type, @@ -444,7 +443,7 @@ def top_k(x, k, /, *, axis=None, mode="largest"): _manager.add_event_pair(ht_ev, impl_ev) if axis is not None and a1 != nd: inv_perm = sorted(range(nd), key=lambda d: perm[d]) - vals = dpt_ext.permute_dims(vals, inv_perm) - inds = dpt_ext.permute_dims(inds, inv_perm) + vals = dpt.permute_dims(vals, inv_perm) + inds = dpt.permute_dims(inds, inv_perm) return TopKResult(vals, inds) diff --git a/dpctl_ext/tensor/_statistical_functions.py b/dpctl_ext/tensor/_statistical_functions.py index 5513dfa7a65..c1544b84c6a 100644 --- a/dpctl_ext/tensor/_statistical_functions.py +++ b/dpctl_ext/tensor/_statistical_functions.py @@ -25,12 +25,11 @@ # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF # THE POSSIBILITY OF SUCH DAMAGE. -import dpctl.tensor as dpt import dpctl.utils as du # TODO: revert to `import dpctl.tensor...` # when dpnp fully migrates dpctl/tensor -import dpctl_ext.tensor as dpt_ext +import dpctl_ext.tensor as dpt import dpctl_ext.tensor._tensor_elementwise_impl as tei import dpctl_ext.tensor._tensor_impl as ti import dpctl_ext.tensor._tensor_reductions_impl as tri @@ -66,7 +65,7 @@ def _var_impl(x, axis, correction, keepdims): _manager = du.SequentialOrderManager[q] dep_evs = _manager.submitted_events if inp_dt != res_dt: - buf = dpt_ext.empty_like(x, dtype=res_dt) + buf = dpt.empty_like(x, dtype=res_dt) ht_e_buf, c_e1 = ti._copy_usm_ndarray_into_usm_ndarray( src=x, dst=buf, sycl_queue=q, depends=dep_evs ) @@ -74,18 +73,18 @@ def _var_impl(x, axis, correction, keepdims): else: buf = x # calculate mean - buf2 = dpt_ext.permute_dims(buf, perm) + buf2 = dpt.permute_dims(buf, perm) res_shape = buf2.shape[: nd - red_nd] # use keepdims=True path for later broadcasting if red_nd == 0: - mean_ary = dpt_ext.empty_like(buf) + mean_ary = dpt.empty_like(buf) dep_evs = _manager.submitted_events ht_e1, c_e2 = ti._copy_usm_ndarray_into_usm_ndarray( src=buf, dst=mean_ary, sycl_queue=q, depends=dep_evs ) _manager.add_event_pair(ht_e1, c_e2) else: - mean_ary = dpt_ext.empty( + mean_ary = dpt.empty( res_shape, dtype=res_dt, usm_type=res_usm_type, @@ -103,8 +102,8 @@ def _var_impl(x, axis, correction, keepdims): mean_ary_shape = res_shape + (1,) * red_nd inv_perm = sorted(range(nd), key=lambda d: perm[d]) - mean_ary = dpt_ext.permute_dims( - dpt_ext.reshape(mean_ary, mean_ary_shape), inv_perm + mean_ary = dpt.permute_dims( + dpt.reshape(mean_ary, mean_ary_shape), inv_perm ) # divide in-place to get mean mean_ary_shape = mean_ary.shape @@ -116,9 +115,9 @@ def _var_impl(x, axis, correction, keepdims): _manager.add_event_pair(ht_e2, d_e1) # subtract mean from original array to get deviations - dev_ary = dpt_ext.empty_like(buf) + dev_ary = dpt.empty_like(buf) if mean_ary_shape != buf.shape: - mean_ary = dpt_ext.broadcast_to(mean_ary, buf.shape) + mean_ary = dpt.broadcast_to(mean_ary, buf.shape) ht_e4, su_e = tei._subtract( src1=buf, src2=mean_ary, dst=dev_ary, sycl_queue=q, depends=[d_e1] ) @@ -130,11 +129,11 @@ def _var_impl(x, axis, correction, keepdims): _manager.add_event_pair(ht_e5, sq_e) # take sum of squared deviations - dev_ary2 = dpt_ext.permute_dims(dev_ary, perm) + dev_ary2 = dpt.permute_dims(dev_ary, perm) if red_nd == 0: res = dev_ary else: - res = dpt_ext.empty( + res = dpt.empty( res_shape, dtype=res_dt, usm_type=res_usm_type, @@ -152,9 +151,7 @@ def _var_impl(x, axis, correction, keepdims): if keepdims: res_shape = res_shape + (1,) * red_nd inv_perm = sorted(range(nd), key=lambda d: perm[d]) - res = dpt_ext.permute_dims( - dpt_ext.reshape(res, res_shape), inv_perm - ) + res = dpt.permute_dims(dpt.reshape(res, res_shape), inv_perm) res_shape = res.shape # when nelems - correction <= 0, yield nans div = max(nelems - correction, 0) @@ -215,7 +212,7 @@ def mean(x, axis=None, keepdims=False): nelems *= x.shape[i] sum_nd = len(axis) perm = perm + list(axis) - arr2 = dpt_ext.permute_dims(x, perm) + arr2 = dpt.permute_dims(x, perm) res_shape = arr2.shape[: nd - sum_nd] q = x.sycl_queue inp_dt = x.dtype @@ -226,12 +223,12 @@ def mean(x, axis=None, keepdims=False): ) res_usm_type = x.usm_type if sum_nd == 0: - return dpt_ext.astype(x, res_dt, copy=True) + return dpt.astype(x, res_dt, copy=True) _manager = du.SequentialOrderManager[q] dep_evs = _manager.submitted_events if tri._sum_over_axis_dtype_supported(inp_dt, res_dt, res_usm_type, q): - res = dpt_ext.empty( + res = dpt.empty( res_shape, dtype=res_dt, usm_type=res_usm_type, sycl_queue=q ) ht_e1, r_e = tri._sum_over_axis( @@ -243,14 +240,14 @@ def mean(x, axis=None, keepdims=False): ) _manager.add_event_pair(ht_e1, r_e) else: - tmp = dpt_ext.empty( + tmp = dpt.empty( arr2.shape, dtype=res_dt, usm_type=res_usm_type, sycl_queue=q ) ht_e_cpy, cpy_e = ti._copy_usm_ndarray_into_usm_ndarray( src=arr2, dst=tmp, sycl_queue=q, depends=dep_evs ) _manager.add_event_pair(ht_e_cpy, cpy_e) - res = dpt_ext.empty( + res = dpt.empty( res_shape, dtype=res_dt, usm_type=res_usm_type, sycl_queue=q ) ht_e_red, r_e = tri._sum_over_axis( @@ -265,7 +262,7 @@ def mean(x, axis=None, keepdims=False): if keepdims: res_shape = res_shape + (1,) * sum_nd inv_perm = sorted(range(nd), key=lambda d: perm[d]) - res = dpt_ext.permute_dims(dpt_ext.reshape(res, res_shape), inv_perm) + res = dpt.permute_dims(dpt.reshape(res, res_shape), inv_perm) dep_evs = _manager.submitted_events ht_e2, div_e = tei._divide_by_scalar( diff --git a/dpctl_ext/tensor/_testing.py b/dpctl_ext/tensor/_testing.py index 5c7e9be0e2e..4c9f5ebac9a 100644 --- a/dpctl_ext/tensor/_testing.py +++ b/dpctl_ext/tensor/_testing.py @@ -26,13 +26,12 @@ # THE POSSIBILITY OF SUCH DAMAGE. # ***************************************************************************** -import dpctl.tensor as dpt import dpctl.utils as du import numpy as np # TODO: revert to `import dpctl.tensor...` # when dpnp fully migrates dpctl/tensor -import dpctl_ext.tensor as dpt_ext +import dpctl_ext.tensor as dpt from ._manipulation_functions import _broadcast_shape_impl from ._type_utils import _to_device_supported_dtype @@ -44,82 +43,74 @@ def _allclose_complex_fp(z1, z2, atol, rtol, equal_nan): z2r = dpt.real(z2) z2i = dpt.imag(z2) if equal_nan: - check1 = dpt_ext.all( - dpt_ext.isnan(z1r) == dpt_ext.isnan(z2r) - ) and dpt_ext.all(dpt_ext.isnan(z1i) == dpt_ext.isnan(z2i)) + check1 = dpt.all(dpt.isnan(z1r) == dpt.isnan(z2r)) and dpt.all( + dpt.isnan(z1i) == dpt.isnan(z2i) + ) else: check1 = ( - dpt_ext.logical_not(dpt_ext.any(dpt_ext.isnan(z1r))) - and dpt_ext.logical_not(dpt_ext.any(dpt_ext.isnan(z1i))) + dpt.logical_not(dpt.any(dpt.isnan(z1r))) + and dpt.logical_not(dpt.any(dpt.isnan(z1i))) ) and ( - dpt_ext.logical_not(dpt_ext.any(dpt_ext.isnan(z2r))) - and dpt_ext.logical_not(dpt_ext.any(dpt_ext.isnan(z2i))) + dpt.logical_not(dpt.any(dpt.isnan(z2r))) + and dpt.logical_not(dpt.any(dpt.isnan(z2i))) ) if not check1: return check1 - mr = dpt_ext.isinf(z1r) - mi = dpt_ext.isinf(z1i) - check2 = dpt_ext.all(mr == dpt_ext.isinf(z2r)) and dpt_ext.all( - mi == dpt_ext.isinf(z2i) - ) + mr = dpt.isinf(z1r) + mi = dpt.isinf(z1i) + check2 = dpt.all(mr == dpt.isinf(z2r)) and dpt.all(mi == dpt.isinf(z2i)) if not check2: return check2 - check3 = dpt_ext.all(z1r[mr] == z2r[mr]) and dpt_ext.all(z1i[mi] == z2i[mi]) + check3 = dpt.all(z1r[mr] == z2r[mr]) and dpt.all(z1i[mi] == z2i[mi]) if not check3: return check3 - mr = dpt_ext.isfinite(z1r) - mi = dpt_ext.isfinite(z1i) + mr = dpt.isfinite(z1r) + mi = dpt.isfinite(z1i) mv1 = z1r[mr] mv2 = z2r[mr] - check4 = dpt_ext.all( - dpt_ext.abs(mv1 - mv2) - < dpt_ext.maximum( - atol, rtol * dpt_ext.maximum(dpt_ext.abs(mv1), dpt_ext.abs(mv2)) - ) + check4 = dpt.all( + dpt.abs(mv1 - mv2) + < dpt.maximum(atol, rtol * dpt.maximum(dpt.abs(mv1), dpt.abs(mv2))) ) if not check4: return check4 mv1 = z1i[mi] mv2 = z2i[mi] - check5 = dpt_ext.all( - dpt_ext.abs(mv1 - mv2) - <= dpt_ext.maximum( - atol, rtol * dpt_ext.maximum(dpt_ext.abs(mv1), dpt_ext.abs(mv2)) - ) + check5 = dpt.all( + dpt.abs(mv1 - mv2) + <= dpt.maximum(atol, rtol * dpt.maximum(dpt.abs(mv1), dpt.abs(mv2))) ) return check5 def _allclose_real_fp(r1, r2, atol, rtol, equal_nan): if equal_nan: - check1 = dpt_ext.all(dpt_ext.isnan(r1) == dpt_ext.isnan(r2)) + check1 = dpt.all(dpt.isnan(r1) == dpt.isnan(r2)) else: - check1 = dpt_ext.logical_not( - dpt_ext.any(dpt_ext.isnan(r1)) - ) and dpt_ext.logical_not(dpt_ext.any(dpt_ext.isnan(r2))) + check1 = dpt.logical_not(dpt.any(dpt.isnan(r1))) and dpt.logical_not( + dpt.any(dpt.isnan(r2)) + ) if not check1: return check1 - mr = dpt_ext.isinf(r1) - check2 = dpt_ext.all(mr == dpt_ext.isinf(r2)) + mr = dpt.isinf(r1) + check2 = dpt.all(mr == dpt.isinf(r2)) if not check2: return check2 - check3 = dpt_ext.all(r1[mr] == r2[mr]) + check3 = dpt.all(r1[mr] == r2[mr]) if not check3: return check3 - m = dpt_ext.isfinite(r1) + m = dpt.isfinite(r1) mv1 = r1[m] mv2 = r2[m] - check4 = dpt_ext.all( - dpt_ext.abs(mv1 - mv2) - <= dpt_ext.maximum( - atol, rtol * dpt_ext.maximum(dpt_ext.abs(mv1), dpt_ext.abs(mv2)) - ) + check4 = dpt.all( + dpt.abs(mv1 - mv2) + <= dpt.maximum(atol, rtol * dpt.maximum(dpt.abs(mv1), dpt.abs(mv2))) ) return check4 def _allclose_others(r1, r2): - return dpt_ext.all(r1 == r2) + return dpt.all(r1 == r2) def allclose(a1, a2, atol=1e-8, rtol=1e-5, equal_nan=False): @@ -160,11 +151,11 @@ def allclose(a1, a2, atol=1e-8, rtol=1e-5, equal_nan=False): else: res_dt = np.promote_types(b1.dtype, b2.dtype) res_dt = _to_device_supported_dtype(res_dt, exec_q.sycl_device) - b1 = dpt_ext.astype(b1, res_dt) - b2 = dpt_ext.astype(b2, res_dt) + b1 = dpt.astype(b1, res_dt) + b2 = dpt.astype(b2, res_dt) - b1 = dpt_ext.broadcast_to(b1, res_sh) - b2 = dpt_ext.broadcast_to(b2, res_sh) + b1 = dpt.broadcast_to(b1, res_sh) + b2 = dpt.broadcast_to(b2, res_sh) k = b1.dtype.kind if k == "c": diff --git a/dpctl_ext/tensor/_type_utils.py b/dpctl_ext/tensor/_type_utils.py index 1e386e15dfa..8c15053cb4c 100644 --- a/dpctl_ext/tensor/_type_utils.py +++ b/dpctl_ext/tensor/_type_utils.py @@ -28,12 +28,11 @@ from __future__ import annotations -import dpctl.tensor as dpt import numpy as np # TODO: revert to `import dpctl.tensor...` # when dpnp fully migrates dpctl/tensor -import dpctl_ext.tensor as dpt_ext +import dpctl_ext.tensor as dpt import dpctl_ext.tensor._tensor_impl as ti @@ -450,7 +449,7 @@ def _resolve_weak_types_all_py_ints(o1_dtype, o2_dtype, dev): o1_dtype, WeakIntegralType ): o1_val = o1_dtype.get() - o2_iinfo = dpt_ext.iinfo(o2_dtype) + o2_iinfo = dpt.iinfo(o2_dtype) if (o1_val < o2_iinfo.min) or (o1_val > o2_iinfo.max): return dpt.dtype(np.min_scalar_type(o1_val)), o2_dtype return o2_dtype, o2_dtype @@ -473,7 +472,7 @@ def _resolve_weak_types_all_py_ints(o1_dtype, o2_dtype, dev): o2_dtype, WeakIntegralType ): o2_val = o2_dtype.get() - o1_iinfo = dpt_ext.iinfo(o1_dtype) + o1_iinfo = dpt.iinfo(o1_dtype) if (o2_val < o1_iinfo.min) or (o2_val > o1_iinfo.max): return o1_dtype, dpt.dtype(np.min_scalar_type(o2_val)) return o1_dtype, o1_dtype @@ -936,8 +935,8 @@ def _default_accumulation_dtype(inp_dt, q): res_dt = inp_dt elif inp_kind in "u": res_dt = dpt.dtype(ti.default_device_uint_type(q)) - res_ii = dpt_ext.iinfo(res_dt) - inp_ii = dpt_ext.iinfo(inp_dt) + res_ii = dpt.iinfo(res_dt) + inp_ii = dpt.iinfo(inp_dt) if inp_ii.min >= res_ii.min and inp_ii.max <= res_ii.max: pass else: @@ -956,7 +955,7 @@ def _default_accumulation_dtype_fp_types(inp_dt, q): inp_kind = inp_dt.kind if inp_kind in "biu": res_dt = dpt.dtype(ti.default_device_fp_type(q)) - can_cast_v = dpt_ext.can_cast(inp_dt, res_dt) + can_cast_v = dpt.can_cast(inp_dt, res_dt) if not can_cast_v: _fp64 = q.sycl_device.has_aspect_fp64 res_dt = dpt.float64 if _fp64 else dpt.float32 diff --git a/dpctl_ext/tensor/_utility_functions.py b/dpctl_ext/tensor/_utility_functions.py index 821f0954017..c892d777102 100644 --- a/dpctl_ext/tensor/_utility_functions.py +++ b/dpctl_ext/tensor/_utility_functions.py @@ -29,12 +29,11 @@ import builtins import operator -import dpctl.tensor as dpt import dpctl.utils as du # TODO: revert to `import dpctl.tensor...` # when dpnp fully migrates dpctl/tensor -import dpctl_ext.tensor as dpt_ext +import dpctl_ext.tensor as dpt import dpctl_ext.tensor._tensor_impl as ti import dpctl_ext.tensor._tensor_reductions_impl as tri @@ -60,7 +59,7 @@ def _boolean_reduction(x, axis, keepdims, func): red_nd = nd # case of a scalar if red_nd == 0: - return dpt_ext.astype(x, dpt.bool) + return dpt.astype(x, dpt.bool) x_tmp = x res_shape = () perm = list(range(nd)) @@ -72,9 +71,9 @@ def _boolean_reduction(x, axis, keepdims, func): red_nd = len(axis) # check for axis=() if red_nd == 0: - return dpt_ext.astype(x, dpt.bool) + return dpt.astype(x, dpt.bool) perm = [i for i in range(nd) if i not in axis] + list(axis) - x_tmp = dpt_ext.permute_dims(x, perm) + x_tmp = dpt.permute_dims(x, perm) res_shape = x_tmp.shape[: nd - red_nd] exec_q = x.sycl_queue @@ -85,7 +84,7 @@ def _boolean_reduction(x, axis, keepdims, func): # always allocate the temporary as # int32 and usm-device to ensure that atomic updates # are supported - res_tmp = dpt_ext.empty( + res_tmp = dpt.empty( res_shape, dtype=dpt.int32, usm_type="device", @@ -101,7 +100,7 @@ def _boolean_reduction(x, axis, keepdims, func): _manager.add_event_pair(hev0, ev0) # copy to boolean result array - res = dpt_ext.empty( + res = dpt.empty( res_shape, dtype=dpt.bool, usm_type=res_usm_type, @@ -115,7 +114,7 @@ def _boolean_reduction(x, axis, keepdims, func): if keepdims: res_shape = res_shape + (1,) * red_nd inv_perm = sorted(range(nd), key=lambda d: perm[d]) - res = dpt_ext.permute_dims(dpt_ext.reshape(res, res_shape), inv_perm) + res = dpt.permute_dims(dpt.reshape(res, res_shape), inv_perm) return res @@ -292,7 +291,7 @@ def _concat_diff_input(arr, axis, prepend, append): if isinstance(prepend, dpt.usm_ndarray): a_prepend = prepend else: - a_prepend = dpt_ext.asarray( + a_prepend = dpt.asarray( prepend, dtype=prepend_dtype, usm_type=coerced_usm_type, @@ -301,7 +300,7 @@ def _concat_diff_input(arr, axis, prepend, append): if isinstance(append, dpt.usm_ndarray): a_append = append else: - a_append = dpt_ext.asarray( + a_append = dpt.asarray( append, dtype=append_dtype, usm_type=coerced_usm_type, @@ -309,11 +308,11 @@ def _concat_diff_input(arr, axis, prepend, append): ) if not prepend_shape: prepend_shape = arr_shape[:axis] + (1,) + arr_shape[axis + 1 :] - a_prepend = dpt_ext.broadcast_to(a_prepend, prepend_shape) + a_prepend = dpt.broadcast_to(a_prepend, prepend_shape) if not append_shape: append_shape = arr_shape[:axis] + (1,) + arr_shape[axis + 1 :] - a_append = dpt_ext.broadcast_to(a_append, append_shape) - return dpt_ext.concat((a_prepend, arr, a_append), axis=axis) + a_append = dpt.broadcast_to(a_append, append_shape) + return dpt.concat((a_prepend, arr, a_append), axis=axis) elif prepend is not None: q1, x_usm_type = arr.sycl_queue, arr.usm_type q2, prepend_usm_type = _get_queue_usm_type(prepend) @@ -361,7 +360,7 @@ def _concat_diff_input(arr, axis, prepend, append): if isinstance(prepend, dpt.usm_ndarray): a_prepend = prepend else: - a_prepend = dpt_ext.asarray( + a_prepend = dpt.asarray( prepend, dtype=prepend_dtype, usm_type=coerced_usm_type, @@ -369,8 +368,8 @@ def _concat_diff_input(arr, axis, prepend, append): ) if not prepend_shape: prepend_shape = arr_shape[:axis] + (1,) + arr_shape[axis + 1 :] - a_prepend = dpt_ext.broadcast_to(a_prepend, prepend_shape) - return dpt_ext.concat((a_prepend, arr), axis=axis) + a_prepend = dpt.broadcast_to(a_prepend, prepend_shape) + return dpt.concat((a_prepend, arr), axis=axis) elif append is not None: q1, x_usm_type = arr.sycl_queue, arr.usm_type q2, append_usm_type = _get_queue_usm_type(append) @@ -416,7 +415,7 @@ def _concat_diff_input(arr, axis, prepend, append): if isinstance(append, dpt.usm_ndarray): a_append = append else: - a_append = dpt_ext.asarray( + a_append = dpt.asarray( append, dtype=append_dtype, usm_type=coerced_usm_type, @@ -424,8 +423,8 @@ def _concat_diff_input(arr, axis, prepend, append): ) if not append_shape: append_shape = arr_shape[:axis] + (1,) + arr_shape[axis + 1 :] - a_append = dpt_ext.broadcast_to(a_append, append_shape) - return dpt_ext.concat((arr, a_append), axis=axis) + a_append = dpt.broadcast_to(a_append, append_shape) + return dpt.concat((arr, a_append), axis=axis) else: arr1 = arr return arr1 @@ -489,7 +488,7 @@ def diff(x, /, *, axis=-1, n=1, prepend=None, append=None): slice(None) if i != axis else slice(None, -1) for i in range(x_nd) ) - diff_op = dpt_ext.not_equal if x.dtype == dpt.bool else dpt_ext.subtract + diff_op = dpt.not_equal if x.dtype == dpt.bool else dpt.subtract if n > 1: arr_tmp0 = diff_op(arr[sl0], arr[sl1]) arr_tmp1 = diff_op(arr_tmp0[sl0], arr_tmp0[sl1]) From 3883a1cc4e247fe8a049654074e4efebef2bc93a Mon Sep 17 00:00:00 2001 From: Vladislav Perevezentsev Date: Thu, 5 Mar 2026 17:17:45 -0800 Subject: [PATCH 15/24] Switch fully to dpctl_ext.tensor in dpnp --- dpnp/__init__.py | 2 +- dpnp/dpnp_algo/dpnp_arraycreation.py | 25 +++---- dpnp/dpnp_algo/dpnp_elementwise_common.py | 59 ++++++++------- dpnp/dpnp_array.py | 8 +-- dpnp/dpnp_array_api_info.py | 4 +- dpnp/dpnp_iface.py | 9 ++- dpnp/dpnp_iface_arraycreation.py | 23 +++--- dpnp/dpnp_iface_indexing.py | 71 +++++++++---------- dpnp/dpnp_iface_manipulation.py | 67 ++++++++--------- dpnp/dpnp_iface_types.py | 9 ++- dpnp/dpnp_utils/dpnp_utils_statistics.py | 5 +- dpnp/exceptions/__init__.py | 5 +- dpnp/memory/_memory.py | 5 +- dpnp/tests/test_mathematical.py | 13 ++-- dpnp/tests/test_memory.py | 5 +- dpnp/tests/test_ndarray.py | 5 +- .../cupy/core_tests/test_dlpack.py | 4 +- 17 files changed, 152 insertions(+), 167 deletions(-) diff --git a/dpnp/__init__.py b/dpnp/__init__.py index 02420107972..0d5c79b9a67 100644 --- a/dpnp/__init__.py +++ b/dpnp/__init__.py @@ -64,7 +64,7 @@ # Borrowed from DPCTL with warnings.catch_warnings(): warnings.simplefilter("ignore", DeprecationWarning) - from dpctl.tensor import __array_api_version__, DLDeviceType + from dpctl_ext.tensor import __array_api_version__, DLDeviceType from .dpnp_array import dpnp_array as ndarray from .dpnp_array_api_info import __array_namespace_info__ diff --git a/dpnp/dpnp_algo/dpnp_arraycreation.py b/dpnp/dpnp_algo/dpnp_arraycreation.py index 4e2ee8531a1..fb277dd4d31 100644 --- a/dpnp/dpnp_algo/dpnp_arraycreation.py +++ b/dpnp/dpnp_algo/dpnp_arraycreation.py @@ -29,13 +29,12 @@ import math import operator -import dpctl.tensor as dpt import dpctl.utils as dpu import numpy # TODO: revert to `import dpctl.tensor...` # when dpnp fully migrates dpctl/tensor -import dpctl_ext.tensor as dpt_ext +import dpctl_ext.tensor as dpt import dpnp from dpnp.dpnp_array import dpnp_array from dpnp.dpnp_utils import get_usm_allocations, map_dtype_to_device @@ -53,7 +52,7 @@ def _as_usm_ndarray(a, usm_type, sycl_queue): if isinstance(a, dpnp_array): a = a.get_array() - return dpt_ext.asarray(a, usm_type=usm_type, sycl_queue=sycl_queue) + return dpt.asarray(a, usm_type=usm_type, sycl_queue=sycl_queue) def _check_has_zero_val(a): @@ -196,7 +195,7 @@ def dpnp_linspace( if dpnp.isscalar(start) and dpnp.isscalar(stop): # Call linspace() function for scalars. - usm_res = dpt_ext.linspace( + usm_res = dpt.linspace( start, stop, num, @@ -213,19 +212,19 @@ def dpnp_linspace( else: step = dpnp.nan else: - usm_start = dpt_ext.asarray( + usm_start = dpt.asarray( start, dtype=dt, usm_type=_usm_type, sycl_queue=sycl_queue_normalized, ) - usm_stop = dpt_ext.asarray( + usm_stop = dpt.asarray( stop, dtype=dt, usm_type=_usm_type, sycl_queue=sycl_queue_normalized ) delta = usm_stop - usm_start - usm_res = dpt_ext.arange( + usm_res = dpt.arange( 0, stop=num, step=1, @@ -233,9 +232,7 @@ def dpnp_linspace( usm_type=_usm_type, sycl_queue=sycl_queue_normalized, ) - usm_res = dpt_ext.reshape( - usm_res, (-1,) + (1,) * delta.ndim, copy=False - ) + usm_res = dpt.reshape(usm_res, (-1,) + (1,) * delta.ndim, copy=False) if step_num > 0: step = delta / step_num @@ -243,7 +240,7 @@ def dpnp_linspace( # Needed a special handling for denormal numbers (when step == 0), # see numpy#5437 for more details. # Note, dpt.where() is used to avoid a synchronization branch. - usm_res = dpt_ext.where( + usm_res = dpt.where( step == 0, (usm_res / step_num) * delta, usm_res * step ) else: @@ -256,17 +253,17 @@ def dpnp_linspace( usm_res[-1, ...] = usm_stop if axis != 0: - usm_res = dpt_ext.moveaxis(usm_res, 0, axis) + usm_res = dpt.moveaxis(usm_res, 0, axis) if dpnp.issubdtype(dtype, dpnp.integer): dpt.floor(usm_res, out=usm_res) - res = dpt_ext.astype(usm_res, dtype, copy=False) + res = dpt.astype(usm_res, dtype, copy=False) res = dpnp_array._create_from_usm_ndarray(res) if retstep is True: if dpnp.isscalar(step): - step = dpt_ext.asarray( + step = dpt.asarray( step, usm_type=res.usm_type, sycl_queue=res.sycl_queue ) return res, dpnp_array._create_from_usm_ndarray(step) diff --git a/dpnp/dpnp_algo/dpnp_elementwise_common.py b/dpnp/dpnp_algo/dpnp_elementwise_common.py index d7eeccf7848..271013b5809 100644 --- a/dpnp/dpnp_algo/dpnp_elementwise_common.py +++ b/dpnp/dpnp_algo/dpnp_elementwise_common.py @@ -29,28 +29,27 @@ import warnings from functools import wraps -import dpctl.tensor as dpt -import dpctl.tensor._type_utils as dtu import dpctl.utils as dpu import numpy -from dpctl.tensor._elementwise_common import ( - BinaryElementwiseFunc, - UnaryElementwiseFunc, -) -from dpctl.tensor._scalar_utils import ( - _get_dtype, - _get_shape, - _validate_dtype, -) # pylint: disable=no-name-in-module # TODO: revert to `import dpctl.tensor...` # when dpnp fully migrates dpctl/tensor -import dpctl_ext.tensor as dpt_ext +import dpctl_ext.tensor as dpt import dpctl_ext.tensor._copy_utils as dtc import dpctl_ext.tensor._tensor_impl as dti +import dpctl_ext.tensor._type_utils as dtu import dpnp import dpnp.backend.extensions.vm._vm_impl as vmi +from dpctl_ext.tensor._elementwise_common import ( + BinaryElementwiseFunc, + UnaryElementwiseFunc, +) +from dpctl_ext.tensor._scalar_utils import ( + _get_dtype, + _get_shape, + _validate_dtype, +) from dpnp.dpnp_array import dpnp_array from dpnp.dpnp_utils import get_usm_allocations from dpnp.dpnp_utils.dpnp_utils_common import ( @@ -213,7 +212,7 @@ def __call__( x_usm = dpnp.get_usm_ndarray(x) if dtype is not None: - x_usm = dpt_ext.astype(x_usm, dtype, copy=False) + x_usm = dpt.astype(x_usm, dtype, copy=False) out = self._unpack_out_kw(out) out_usm = None if out is None else dpnp.get_usm_ndarray(out) @@ -467,7 +466,7 @@ def __call__( ) # Allocate a temporary buffer with the required dtype - out[i] = dpt_ext.empty_like(res, dtype=res_dt) + out[i] = dpt.empty_like(res, dtype=res_dt) elif ( buf_dt is None and dti._array_overlap(x, res) @@ -476,7 +475,7 @@ def __call__( # Allocate a temporary buffer to avoid memory overlapping. # Note if `buf_dt` is not None, a temporary copy of `x` will be # created, so the array overlap check isn't needed. - out[i] = dpt_ext.empty_like(res) + out[i] = dpt.empty_like(res) _manager = dpu.SequentialOrderManager[exec_q] dep_evs = _manager.submitted_events @@ -486,7 +485,7 @@ def __call__( if order == "K": buf = dtc._empty_like_orderK(x, buf_dt) else: - buf = dpt_ext.empty_like(x, dtype=buf_dt, order=order) + buf = dpt.empty_like(x, dtype=buf_dt, order=order) ht_copy_ev, copy_ev = dti._copy_usm_ndarray_into_usm_ndarray( src=x, dst=buf, sycl_queue=exec_q, depends=dep_evs @@ -503,7 +502,7 @@ def __call__( if order == "K": out[i] = dtc._empty_like_orderK(x, res_dt) else: - out[i] = dpt_ext.empty_like(x, dtype=res_dt, order=order) + out[i] = dpt.empty_like(x, dtype=res_dt, order=order) # Call the unary function with input and output arrays ht_unary_ev, unary_ev = self.get_implementation_function()( @@ -713,24 +712,24 @@ def __call__( if dtype is not None: if dpnp.isscalar(x1): - x1_usm = dpt_ext.asarray( + x1_usm = dpt.asarray( x1, dtype=dtype, sycl_queue=x2.sycl_queue, usm_type=x2.usm_type, ) - x2_usm = dpt_ext.astype(x2_usm, dtype, copy=False) + x2_usm = dpt.astype(x2_usm, dtype, copy=False) elif dpnp.isscalar(x2): - x1_usm = dpt_ext.astype(x1_usm, dtype, copy=False) - x2_usm = dpt_ext.asarray( + x1_usm = dpt.astype(x1_usm, dtype, copy=False) + x2_usm = dpt.asarray( x2, dtype=dtype, sycl_queue=x1.sycl_queue, usm_type=x1.usm_type, ) else: - x1_usm = dpt_ext.astype(x1_usm, dtype, copy=False) - x2_usm = dpt_ext.astype(x2_usm, dtype, copy=False) + x1_usm = dpt.astype(x1_usm, dtype, copy=False) + x2_usm = dpt.astype(x2_usm, dtype, copy=False) res_usm = super().__call__(x1_usm, x2_usm, out=out_usm, order=order) @@ -1078,7 +1077,7 @@ def __call__( ) # Allocate a temporary buffer with the required dtype - out[i] = dpt_ext.empty_like(res, dtype=res_dt) + out[i] = dpt.empty_like(res, dtype=res_dt) else: # If `dt` is not None, a temporary copy of `x` will be created, # so the array overlap check isn't needed. @@ -1094,7 +1093,7 @@ def __call__( for x in x_to_check ): # allocate a temporary buffer to avoid memory overlapping - out[i] = dpt_ext.empty_like(res) + out[i] = dpt.empty_like(res) x1 = dpnp.as_usm_ndarray(x1, dtype=x1_dt, sycl_queue=exec_q) x2 = dpnp.as_usm_ndarray(x2, dtype=x2_dt, sycl_queue=exec_q) @@ -1127,7 +1126,7 @@ def __call__( if order == "K": buf = dtc._empty_like_orderK(x, buf_dt) else: - buf = dpt_ext.empty_like(x, dtype=buf_dt, order=order) + buf = dpt.empty_like(x, dtype=buf_dt, order=order) ht_copy_ev, copy_ev = dti._copy_usm_ndarray_into_usm_ndarray( src=x, dst=buf, sycl_queue=exec_q, depends=dep_evs @@ -1146,7 +1145,7 @@ def __call__( x1, x2, res_dt, res_shape, res_usm_type, exec_q ) else: - out[i] = dpt_ext.empty( + out[i] = dpt.empty( res_shape, dtype=res_dt, order=order, @@ -1156,9 +1155,9 @@ def __call__( # Broadcast shapes of input arrays if x1.shape != res_shape: - x1 = dpt_ext.broadcast_to(x1, res_shape) + x1 = dpt.broadcast_to(x1, res_shape) if x2.shape != res_shape: - x2 = dpt_ext.broadcast_to(x2, res_shape) + x2 = dpt.broadcast_to(x2, res_shape) # Call the binary function with input and output arrays ht_binary_ev, binary_ev = self.get_implementation_function()( @@ -1326,7 +1325,7 @@ def __call__(self, x, /, decimals=0, out=None, *, dtype=None): res_usm = dpt.divide(x_usm, 10**decimals, out=out_usm) if dtype is not None: - res_usm = dpt_ext.astype(res_usm, dtype, copy=False) + res_usm = dpt.astype(res_usm, dtype, copy=False) if out is not None and isinstance(out, dpnp_array): return out diff --git a/dpnp/dpnp_array.py b/dpnp/dpnp_array.py index 6418302d6e7..cbb5835bbfc 100644 --- a/dpnp/dpnp_array.py +++ b/dpnp/dpnp_array.py @@ -37,11 +37,9 @@ import warnings -import dpctl.tensor as dpt - # TODO: revert to `import dpctl.tensor...` # when dpnp fully migrates dpctl/tensor -import dpctl_ext.tensor as dpt_ext +import dpctl_ext.tensor as dpt import dpctl_ext.tensor._type_utils as dtu import dpnp from dpctl_ext.tensor._numpy_helper import AxisError @@ -777,7 +775,7 @@ def asnumpy(self): """ - return dpt_ext.asnumpy(self._array_obj) + return dpt.asnumpy(self._array_obj) def astype( self, @@ -2283,7 +2281,7 @@ def transpose(self, *axes): # self.transpose(None).shape == self.shape[::-1] axes = tuple((ndim - x - 1) for x in range(ndim)) - usm_res = dpt_ext.permute_dims(self._array_obj, axes) + usm_res = dpt.permute_dims(self._array_obj, axes) return dpnp_array._create_from_usm_ndarray(usm_res) def var( diff --git a/dpnp/dpnp_array_api_info.py b/dpnp/dpnp_array_api_info.py index 6a3939d046b..f792600cbb6 100644 --- a/dpnp/dpnp_array_api_info.py +++ b/dpnp/dpnp_array_api_info.py @@ -36,7 +36,9 @@ """ -import dpctl.tensor as dpt +# TODO: revert to `import dpctl.tensor...` +# when dpnp fully migrates dpctl/tensor +import dpctl_ext.tensor as dpt def __array_namespace_info__(): diff --git a/dpnp/dpnp_iface.py b/dpnp/dpnp_iface.py index 9fca083a641..13b957ffff8 100644 --- a/dpnp/dpnp_iface.py +++ b/dpnp/dpnp_iface.py @@ -45,17 +45,16 @@ import os import dpctl -import dpctl.tensor as dpt import dpctl.utils as dpu import numpy -from dpctl.tensor._device import normalize_queue_device # pylint: disable=no-name-in-module # TODO: revert to `import dpctl.tensor...` # when dpnp fully migrates dpctl/tensor -import dpctl_ext.tensor as dpt_ext +import dpctl_ext.tensor as dpt import dpctl_ext.tensor._tensor_impl as ti import dpnp +from dpctl_ext.tensor._device import normalize_queue_device from .dpnp_array import dpnp_array from .dpnp_utils import ( @@ -137,7 +136,7 @@ def asnumpy(a, order="C"): return a.asnumpy() if isinstance(a, dpt.usm_ndarray): - return dpt_ext.asnumpy(a) + return dpt.asnumpy(a) return numpy.asarray(a, order=order) @@ -191,7 +190,7 @@ def as_usm_ndarray(a, dtype=None, device=None, usm_type=None, sycl_queue=None): if is_supported_array_type(a): return get_usm_ndarray(a) - return dpt_ext.asarray( + return dpt.asarray( a, dtype=dtype, device=device, usm_type=usm_type, sycl_queue=sycl_queue ) diff --git a/dpnp/dpnp_iface_arraycreation.py b/dpnp/dpnp_iface_arraycreation.py index d09cc17bde7..2800df0b2ac 100644 --- a/dpnp/dpnp_iface_arraycreation.py +++ b/dpnp/dpnp_iface_arraycreation.py @@ -43,12 +43,11 @@ import operator -import dpctl.tensor as dpt import numpy # TODO: revert to `import dpctl.tensor...` # when dpnp fully migrates dpctl/tensor -import dpctl_ext.tensor as dpt_ext +import dpctl_ext.tensor as dpt import dpnp from dpnp import dpnp_container @@ -937,7 +936,7 @@ def astype(x, dtype, /, *, order="K", casting="unsafe", copy=True, device=None): order = "K" usm_x = dpnp.get_usm_ndarray(x) - usm_res = dpt_ext.astype( + usm_res = dpt.astype( usm_x, dtype, order=order, casting=casting, copy=copy, device=device ) @@ -3119,7 +3118,7 @@ def meshgrid(*xi, copy=True, sparse=False, indexing="xy"): s0 = (1,) * ndim output = [ - dpt_ext.reshape(dpnp.get_usm_ndarray(x), s0[:i] + (-1,) + s0[i + 1 :]) + dpt.reshape(dpnp.get_usm_ndarray(x), s0[:i] + (-1,) + s0[i + 1 :]) for i, x in enumerate(xi) ] @@ -3127,14 +3126,14 @@ def meshgrid(*xi, copy=True, sparse=False, indexing="xy"): _, _ = get_usm_allocations(output) if indexing == "xy" and ndim > 1: - output[0] = dpt_ext.reshape(output[0], (1, -1) + s0[2:]) - output[1] = dpt_ext.reshape(output[1], (-1, 1) + s0[2:]) + output[0] = dpt.reshape(output[0], (1, -1) + s0[2:]) + output[1] = dpt.reshape(output[1], (-1, 1) + s0[2:]) if not sparse: - output = dpt_ext.broadcast_arrays(*output) + output = dpt.broadcast_arrays(*output) if copy: - output = [dpt_ext.copy(x) for x in output] + output = [dpt.copy(x) for x in output] return [dpnp_array._create_from_usm_ndarray(x) for x in output] @@ -3696,7 +3695,7 @@ def tri( if usm_type is None: usm_type = "device" - m = dpt_ext.ones( + m = dpt.ones( (N, M), dtype=_dtype, device=device, @@ -3912,7 +3911,7 @@ def vander( if dpnp.is_supported_array_type(x): x = dpnp.get_usm_ndarray(x) - usm_x = dpt_ext.asarray( + usm_x = dpt.asarray( x, device=device, usm_type=usm_type, sycl_queue=sycl_queue ) @@ -3934,8 +3933,8 @@ def vander( tmp = m[:, ::-1] if not increasing else m dpnp.power( - dpt_ext.reshape(usm_x, (-1, 1)), - dpt_ext.arange( + dpt.reshape(usm_x, (-1, 1)), + dpt.arange( N, dtype=_dtype, usm_type=x_usm_type, sycl_queue=x_sycl_queue ), out=tmp, diff --git a/dpnp/dpnp_iface_indexing.py b/dpnp/dpnp_iface_indexing.py index a52196e9e4d..4b8fb7bb6a3 100644 --- a/dpnp/dpnp_iface_indexing.py +++ b/dpnp/dpnp_iface_indexing.py @@ -44,14 +44,13 @@ import operator from collections.abc import Iterable -import dpctl.tensor as dpt import dpctl.utils as dpu import numpy # pylint: disable=no-name-in-module # TODO: revert to `import dpctl.tensor...` # when dpnp fully migrates dpctl/tensor -import dpctl_ext.tensor as dpt_ext +import dpctl_ext.tensor as dpt import dpctl_ext.tensor._tensor_impl as ti import dpnp @@ -141,9 +140,9 @@ def _choose_run(inds, chcs, q, usm_type, out=None, mode=0): ti._array_overlap(out, chc) for chc in chcs ): # Allocate a temporary buffer to avoid memory overlapping. - out = dpt_ext.empty_like(out) + out = dpt.empty_like(out) else: - out = dpt_ext.empty( + out = dpt.empty( inds.shape, dtype=chcs[0].dtype, usm_type=usm_type, sycl_queue=q ) @@ -242,7 +241,7 @@ def choose(a, choices, out=None, mode="wrap"): # NumPy will cast up to int64 in general but # int32 is more than safe for bool if ind_dt == dpnp.bool: - inds = dpt_ext.astype(inds, dpt.int32) + inds = dpt.astype(inds, dpt.int32) else: raise TypeError("input index array must be of integer data type") @@ -250,17 +249,17 @@ def choose(a, choices, out=None, mode="wrap"): res_usm_type, exec_q = get_usm_allocations(choices + [inds]) # apply type promotion to input choices - res_dt = dpt_ext.result_type(*choices) + res_dt = dpt.result_type(*choices) if len(choices) > 1: choices = tuple( map( lambda chc: ( - chc if chc.dtype == res_dt else dpt_ext.astype(chc, res_dt) + chc if chc.dtype == res_dt else dpt.astype(chc, res_dt) ), choices, ) ) - arrs_broadcast = dpt_ext.broadcast_arrays(inds, *choices) + arrs_broadcast = dpt.broadcast_arrays(inds, *choices) inds = arrs_broadcast[0] choices = tuple(arrs_broadcast[1:]) @@ -301,11 +300,9 @@ def _take_index(x, inds, axis, q, usm_type, out=None, mode=0): if ti._array_overlap(x, out): # Allocate a temporary buffer to avoid memory overlapping. - out = dpt_ext.empty_like(out) + out = dpt.empty_like(out) else: - out = dpt_ext.empty( - res_sh, dtype=x.dtype, usm_type=usm_type, sycl_queue=q - ) + out = dpt.empty(res_sh, dtype=x.dtype, usm_type=usm_type, sycl_queue=q) _manager = dpu.SequentialOrderManager[q] dep_evs = _manager.submitted_events @@ -816,16 +813,16 @@ def extract(condition, a): ) if usm_cond.size != usm_a.size: - usm_a = dpt_ext.reshape(usm_a, -1) - usm_cond = dpt_ext.reshape(usm_cond, -1) + usm_a = dpt.reshape(usm_a, -1) + usm_cond = dpt.reshape(usm_cond, -1) - usm_res = dpt_ext.take(usm_a, dpt_ext.nonzero(usm_cond)[0]) + usm_res = dpt.take(usm_a, dpt.nonzero(usm_cond)[0]) else: if usm_cond.shape != usm_a.shape: - usm_a = dpt_ext.reshape(usm_a, -1) - usm_cond = dpt_ext.reshape(usm_cond, -1) + usm_a = dpt.reshape(usm_a, -1) + usm_cond = dpt.reshape(usm_cond, -1) - usm_res = dpt_ext.extract(usm_cond, usm_a) + usm_res = dpt.extract(usm_cond, usm_a) return dpnp_array._create_from_usm_ndarray(usm_res) @@ -960,18 +957,18 @@ def fill_diagonal(a, val, wrap=False): # a.flat[:end:step] = val # but need to consider use case when `a` is usm_ndarray also a_sh = a.shape - tmp_a = dpt_ext.reshape(usm_a, -1) + tmp_a = dpt.reshape(usm_a, -1) if dpnp.isscalar(usm_val): tmp_a[:end:step] = usm_val else: - usm_val = dpt_ext.reshape(usm_val, -1) + usm_val = dpt.reshape(usm_val, -1) # Setitem can work only if index size equal val size. # Using loop for general case without dependencies of val size. for i in range(0, usm_val.size): tmp_a[step * i : end : step * (i + 1)] = usm_val[i] - tmp_a = dpt_ext.reshape(tmp_a, a_sh) + tmp_a = dpt.reshape(tmp_a, a_sh) usm_a[:] = tmp_a @@ -1548,7 +1545,7 @@ def nonzero(a): usm_a = dpnp.get_usm_ndarray(a) return tuple( - dpnp_array._create_from_usm_ndarray(y) for y in dpt_ext.nonzero(usm_a) + dpnp_array._create_from_usm_ndarray(y) for y in dpt.nonzero(usm_a) ) @@ -1612,16 +1609,14 @@ def place(a, mask, vals): if usm_vals.ndim != 1: # dpt.place supports only 1-D array of values - usm_vals = dpt_ext.reshape(usm_vals, -1) + usm_vals = dpt.reshape(usm_vals, -1) if usm_vals.dtype != usm_a.dtype: # dpt.place casts values to a.dtype with "unsafe" rule, # while numpy.place does that with "safe" casting rule - usm_vals = dpt_ext.astype( - usm_vals, usm_a.dtype, casting="safe", copy=False - ) + usm_vals = dpt.astype(usm_vals, usm_a.dtype, casting="safe", copy=False) - dpt_ext.place(usm_a, usm_mask, usm_vals) + dpt.place(usm_a, usm_mask, usm_vals) def put(a, ind, v, /, *, axis=None, mode="wrap"): @@ -1711,19 +1706,19 @@ def put(a, ind, v, /, *, axis=None, mode="wrap"): if usm_ind.ndim != 1: # dpt.put supports only 1-D array of indices - usm_ind = dpt_ext.reshape(usm_ind, -1, copy=False) + usm_ind = dpt.reshape(usm_ind, -1, copy=False) if not dpnp.issubdtype(usm_ind.dtype, dpnp.integer): # dpt.put supports only integer dtype for array of indices - usm_ind = dpt_ext.astype(usm_ind, dpnp.intp, casting="safe") + usm_ind = dpt.astype(usm_ind, dpnp.intp, casting="safe") in_usm_a = usm_a if axis is None and usm_a.ndim > 1: - usm_a = dpt_ext.reshape(usm_a, -1) + usm_a = dpt.reshape(usm_a, -1) - dpt_ext.put(usm_a, usm_ind, usm_v, axis=axis, mode=mode) + dpt.put(usm_a, usm_ind, usm_v, axis=axis, mode=mode) if in_usm_a._pointer != usm_a._pointer: # pylint: disable=protected-access - in_usm_a[:] = dpt_ext.reshape(usm_a, in_usm_a.shape, copy=False) + in_usm_a[:] = dpt.reshape(usm_a, in_usm_a.shape, copy=False) def put_along_axis(a, ind, values, axis, mode="wrap"): @@ -1805,11 +1800,11 @@ def put_along_axis(a, ind, values, axis, mode="wrap"): if dpnp.is_supported_array_type(values): usm_vals = dpnp.get_usm_ndarray(values) else: - usm_vals = dpt_ext.asarray( + usm_vals = dpt.asarray( values, usm_type=a.usm_type, sycl_queue=a.sycl_queue ) - dpt_ext.put_along_axis(usm_a, usm_ind, usm_vals, axis=axis, mode=mode) + dpt.put_along_axis(usm_a, usm_ind, usm_vals, axis=axis, mode=mode) def putmask(x1, mask, values): @@ -2153,7 +2148,7 @@ def take(a, indices, /, *, axis=None, out=None, mode="wrap"): usm_a = dpnp.get_usm_ndarray(a) if not dpnp.is_supported_array_type(indices): - usm_ind = dpt_ext.asarray( + usm_ind = dpt.asarray( indices, usm_type=a.usm_type, sycl_queue=a.sycl_queue ) else: @@ -2165,7 +2160,7 @@ def take(a, indices, /, *, axis=None, out=None, mode="wrap"): if axis is None: if a_ndim > 1: # flatten input array - usm_a = dpt_ext.reshape(usm_a, -1) + usm_a = dpt.reshape(usm_a, -1) axis = 0 elif a_ndim == 0: axis = normalize_axis_index(operator.index(axis), 1) @@ -2174,7 +2169,7 @@ def take(a, indices, /, *, axis=None, out=None, mode="wrap"): if not dpnp.issubdtype(usm_ind.dtype, dpnp.integer): # dpt.take supports only integer dtype for array of indices - usm_ind = dpt_ext.astype(usm_ind, dpnp.intp, copy=False, casting="safe") + usm_ind = dpt.astype(usm_ind, dpnp.intp, copy=False, casting="safe") usm_res = _take_index( usm_a, usm_ind, axis, exec_q, res_usm_type, out=out, mode=mode @@ -2297,7 +2292,7 @@ def take_along_axis(a, indices, axis=-1, mode="wrap"): usm_a = dpnp.get_usm_ndarray(a) usm_ind = dpnp.get_usm_ndarray(indices) - usm_res = dpt_ext.take_along_axis(usm_a, usm_ind, axis=axis, mode=mode) + usm_res = dpt.take_along_axis(usm_a, usm_ind, axis=axis, mode=mode) return dpnp_array._create_from_usm_ndarray(usm_res) diff --git a/dpnp/dpnp_iface_manipulation.py b/dpnp/dpnp_iface_manipulation.py index 2ff08cc6ec8..0fc2c3f80fd 100644 --- a/dpnp/dpnp_iface_manipulation.py +++ b/dpnp/dpnp_iface_manipulation.py @@ -45,12 +45,11 @@ from typing import NamedTuple import dpctl -import dpctl.tensor as dpt import numpy # TODO: revert to `import dpctl.tensor...` # when dpnp fully migrates dpctl/tensor -import dpctl_ext.tensor as dpt_ext +import dpctl_ext.tensor as dpt import dpnp from dpctl_ext.tensor._numpy_helper import ( AxisError, @@ -375,27 +374,25 @@ def _get_first_nan_index(usm_a): ): if dpnp.issubdtype(usm_a.dtype, dpnp.complexfloating): # for complex all NaNs are considered equivalent - true_val = dpt_ext.asarray( + true_val = dpt.asarray( True, sycl_queue=usm_a.sycl_queue, usm_type=usm_a.usm_type ) - return dpt_ext.searchsorted( - dpt.isnan(usm_a), true_val, side="left" - ) - return dpt_ext.searchsorted(usm_a, usm_a[-1], side="left") + return dpt.searchsorted(dpt.isnan(usm_a), true_val, side="left") + return dpt.searchsorted(usm_a, usm_a[-1], side="left") return None usm_ar = dpnp.get_usm_ndarray(ar) num_of_flags = (return_index, return_inverse, return_counts).count(True) if num_of_flags == 0: - usm_res = dpt_ext.unique_values(usm_ar) + usm_res = dpt.unique_values(usm_ar) usm_res = (usm_res,) # cast to a tuple to align with other cases elif num_of_flags == 1 and return_inverse: - usm_res = dpt_ext.unique_inverse(usm_ar) + usm_res = dpt.unique_inverse(usm_ar) elif num_of_flags == 1 and return_counts: - usm_res = dpt_ext.unique_counts(usm_ar) + usm_res = dpt.unique_counts(usm_ar) else: - usm_res = dpt_ext.unique_all(usm_ar) + usm_res = dpt.unique_all(usm_ar) first_nan = None if equal_nan: @@ -417,10 +414,10 @@ def _get_first_nan_index(usm_a): if first_nan is not None: # all NaNs are collapsed, so need to replace the indices with # the index of the first NaN value in result array of unique values - dpt_ext.place( + dpt.place( usm_res.inverse_indices, usm_res.inverse_indices > first_nan, - dpt_ext.reshape(first_nan, 1), + dpt.reshape(first_nan, 1), ) result += (usm_res.inverse_indices,) @@ -428,9 +425,7 @@ def _get_first_nan_index(usm_a): if first_nan is not None: # all NaNs are collapsed, so need to put a count of all NaNs # at the last index - dpt_ext.sum( - usm_res.counts[first_nan:], out=usm_res.counts[first_nan] - ) + dpt.sum(usm_res.counts[first_nan:], out=usm_res.counts[first_nan]) result += (usm_res.counts[: first_nan + 1],) else: result += (usm_res.counts,) @@ -1097,9 +1092,7 @@ def broadcast_arrays(*args, subok=False): if len(args) == 0: return [] - usm_arrays = dpt_ext.broadcast_arrays( - *[dpnp.get_usm_ndarray(a) for a in args] - ) + usm_arrays = dpt.broadcast_arrays(*[dpnp.get_usm_ndarray(a) for a in args]) return [dpnp_array._create_from_usm_ndarray(a) for a in usm_arrays] @@ -1184,7 +1177,7 @@ def broadcast_to(array, /, shape, subok=False): raise NotImplementedError(f"subok={subok} is currently not supported") usm_array = dpnp.get_usm_ndarray(array) - new_array = dpt_ext.broadcast_to(usm_array, shape) + new_array = dpt.broadcast_to(usm_array, shape) return dpnp_array._create_from_usm_ndarray(new_array) @@ -1276,7 +1269,7 @@ def can_cast(from_, to, casting="safe"): if dpnp.is_supported_array_type(from_) else dpnp.dtype(from_) ) - return dpt_ext.can_cast(dtype_from, to, casting=casting) + return dpt.can_cast(dtype_from, to, casting=casting) def column_stack(tup): @@ -1422,7 +1415,7 @@ def concatenate( ) usm_arrays = [dpnp.get_usm_ndarray(x) for x in arrays] - usm_res = dpt_ext.concat(usm_arrays, axis=axis) + usm_res = dpt.concat(usm_arrays, axis=axis) res = dpnp_array._create_from_usm_ndarray(usm_res) if dtype is not None: @@ -1527,7 +1520,7 @@ def copyto(dst, src, casting="same_kind", where=True): f"but got {where.dtype}" ) - dst_usm, src_usm, mask_usm = dpt_ext.broadcast_arrays( + dst_usm, src_usm, mask_usm = dpt.broadcast_arrays( dpnp.get_usm_ndarray(dst), dpnp.get_usm_ndarray(src), dpnp.get_usm_ndarray(where), @@ -1855,7 +1848,7 @@ def expand_dims(a, axis): """ usm_a = dpnp.get_usm_ndarray(a) - usm_res = dpt_ext.expand_dims(usm_a, axis=axis) + usm_res = dpt.expand_dims(usm_a, axis=axis) return dpnp_array._create_from_usm_ndarray(usm_res) @@ -1926,7 +1919,7 @@ def flip(m, axis=None): """ m_usm = dpnp.get_usm_ndarray(m) - return dpnp_array._create_from_usm_ndarray(dpt_ext.flip(m_usm, axis=axis)) + return dpnp_array._create_from_usm_ndarray(dpt.flip(m_usm, axis=axis)) def fliplr(m): @@ -2370,7 +2363,7 @@ def matrix_transpose(x, /): f"but it is {usm_x.ndim}" ) - usm_res = dpt_ext.matrix_transpose(usm_x) + usm_res = dpt.matrix_transpose(usm_x) return dpnp_array._create_from_usm_ndarray(usm_res) @@ -2414,7 +2407,7 @@ def moveaxis(a, source, destination): usm_array = dpnp.get_usm_ndarray(a) return dpnp_array._create_from_usm_ndarray( - dpt_ext.moveaxis(usm_array, source, destination) + dpt.moveaxis(usm_array, source, destination) ) @@ -2843,7 +2836,7 @@ def repeat(a, repeats, axis=None): a = dpnp.ravel(a) usm_arr = dpnp.get_usm_ndarray(a) - usm_res = dpt_ext.repeat(usm_arr, repeats, axis=axis) + usm_res = dpt.repeat(usm_arr, repeats, axis=axis) return dpnp_array._create_from_usm_ndarray(usm_res) @@ -3066,7 +3059,7 @@ def reshape(a, /, shape, order="C", *, copy=None): ) usm_a = dpnp.get_usm_ndarray(a) - usm_res = dpt_ext.reshape(usm_a, shape=shape, order=order, copy=copy) + usm_res = dpt.reshape(usm_a, shape=shape, order=order, copy=copy) return dpnp_array._create_from_usm_ndarray(usm_res) @@ -3201,7 +3194,7 @@ def result_type(*arrays_and_dtypes): ) for X in arrays_and_dtypes ] - return dpt_ext.result_type(*usm_arrays_and_dtypes) + return dpt.result_type(*usm_arrays_and_dtypes) def roll(x, shift, axis=None): @@ -3268,9 +3261,9 @@ def roll(x, shift, axis=None): shift = dpnp.asnumpy(shift) if axis is None: - return roll(dpt_ext.reshape(usm_x, -1), shift, 0).reshape(x.shape) + return roll(dpt.reshape(usm_x, -1), shift, 0).reshape(x.shape) - usm_res = dpt_ext.roll(usm_x, shift=shift, axis=axis) + usm_res = dpt.roll(usm_x, shift=shift, axis=axis) return dpnp_array._create_from_usm_ndarray(usm_res) @@ -3669,7 +3662,7 @@ def squeeze(a, /, axis=None): """ usm_a = dpnp.get_usm_ndarray(a) - usm_res = dpt_ext.squeeze(usm_a, axis=axis) + usm_res = dpt.squeeze(usm_a, axis=axis) return dpnp_array._create_from_usm_ndarray(usm_res) @@ -3757,7 +3750,7 @@ def stack(arrays, /, *, axis=0, out=None, dtype=None, casting="same_kind"): ) usm_arrays = [dpnp.get_usm_ndarray(x) for x in arrays] - usm_res = dpt_ext.stack(usm_arrays, axis=axis) + usm_res = dpt.stack(usm_arrays, axis=axis) res = dpnp_array._create_from_usm_ndarray(usm_res) if dtype is not None: @@ -3818,7 +3811,7 @@ def swapaxes(a, axis1, axis2): """ usm_a = dpnp.get_usm_ndarray(a) - usm_res = dpt_ext.swapaxes(usm_a, axis1=axis1, axis2=axis2) + usm_res = dpt.swapaxes(usm_a, axis1=axis1, axis2=axis2) return dpnp_array._create_from_usm_ndarray(usm_res) @@ -3898,7 +3891,7 @@ def tile(A, reps): """ usm_a = dpnp.get_usm_ndarray(A) - usm_res = dpt_ext.tile(usm_a, reps) + usm_res = dpt.tile(usm_a, reps) return dpnp_array._create_from_usm_ndarray(usm_res) @@ -4528,7 +4521,7 @@ def unstack(x, /, *, axis=0): if usm_x.ndim == 0: raise ValueError("Input array must be at least 1-d.") - res = dpt_ext.unstack(usm_x, axis=axis) + res = dpt.unstack(usm_x, axis=axis) return tuple(dpnp_array._create_from_usm_ndarray(a) for a in res) diff --git a/dpnp/dpnp_iface_types.py b/dpnp/dpnp_iface_types.py index f133333d6b8..7d2d60089d9 100644 --- a/dpnp/dpnp_iface_types.py +++ b/dpnp/dpnp_iface_types.py @@ -37,12 +37,11 @@ import functools import dpctl -import dpctl.tensor as dpt import numpy # TODO: revert to `import dpctl.tensor...` # when dpnp fully migrates dpctl/tensor -import dpctl_ext.tensor as dpt_ext +import dpctl_ext.tensor as dpt import dpnp from .dpnp_array import dpnp_array @@ -214,7 +213,7 @@ def finfo(dtype): """ if isinstance(dtype, dpnp_array): dtype = dtype.dtype - return dpt_ext.finfo(dtype) + return dpt.finfo(dtype) # pylint: disable=redefined-outer-name @@ -247,7 +246,7 @@ def iinfo(dtype): if isinstance(dtype, dpnp_array): dtype = dtype.dtype - return dpt_ext.iinfo(dtype) + return dpt.iinfo(dtype) def isdtype(dtype, kind): @@ -301,7 +300,7 @@ def isdtype(dtype, kind): elif isinstance(kind, tuple): kind = tuple(dpt.dtype(k) if isinstance(k, type) else k for k in kind) - return dpt_ext.isdtype(dtype, kind) + return dpt.isdtype(dtype, kind) def issubdtype(arg1, arg2): diff --git a/dpnp/dpnp_utils/dpnp_utils_statistics.py b/dpnp/dpnp_utils/dpnp_utils_statistics.py index ec67b619a13..cd9932cb715 100644 --- a/dpnp/dpnp_utils/dpnp_utils_statistics.py +++ b/dpnp/dpnp_utils/dpnp_utils_statistics.py @@ -29,13 +29,12 @@ import warnings import dpctl -import dpctl.tensor as dpt from dpctl.utils import ExecutionPlacementError -import dpnp - # TODO: revert to `from dpctl.tensor...` # when dpnp fully migrates dpctl/tensor +import dpctl_ext.tensor as dpt +import dpnp from dpctl_ext.tensor._numpy_helper import normalize_axis_tuple from dpnp.dpnp_array import dpnp_array diff --git a/dpnp/exceptions/__init__.py b/dpnp/exceptions/__init__.py index 26d78a853f4..7abcdbf0553 100644 --- a/dpnp/exceptions/__init__.py +++ b/dpnp/exceptions/__init__.py @@ -32,10 +32,13 @@ SyclQueueCreationError, ) from dpctl.memory import USMAllocationError -from dpctl.tensor._dlpack import DLPackCreationError from dpctl.utils import ExecutionPlacementError from numpy.exceptions import AxisError +# TODO: revert to `from dpctl.tensor...` +# when dpnp fully migrates dpctl/tensor +from dpctl_ext.tensor._dlpack import DLPackCreationError + __all__ = [ "AxisError", "DLPackCreationError", diff --git a/dpnp/memory/_memory.py b/dpnp/memory/_memory.py index f978c5e50db..3e95baacd42 100644 --- a/dpnp/memory/_memory.py +++ b/dpnp/memory/_memory.py @@ -26,11 +26,14 @@ # THE POSSIBILITY OF SUCH DAMAGE. # ***************************************************************************** -import dpctl.tensor as dpt from dpctl.memory import MemoryUSMDevice as DPCTLMemoryUSMDevice from dpctl.memory import MemoryUSMHost as DPCTLMemoryUSMHost from dpctl.memory import MemoryUSMShared as DPCTLMemoryUSMShared +# TODO: revert to `from dpctl.tensor...` +# when dpnp fully migrates dpctl/tensor +import dpctl_ext.tensor as dpt + def _add_ptr_property(cls): _storage_attr = "_ptr" diff --git a/dpnp/tests/test_mathematical.py b/dpnp/tests/test_mathematical.py index c0378779028..155f4cdb06f 100644 --- a/dpnp/tests/test_mathematical.py +++ b/dpnp/tests/test_mathematical.py @@ -1,5 +1,4 @@ import dpctl -import dpctl.tensor as dpt import numpy import pytest from dpctl.utils import ExecutionPlacementError @@ -13,7 +12,7 @@ # TODO: revert to `import dpctl.tensor...` # when dpnp fully migrates dpctl/tensor -import dpctl_ext.tensor as dpt_ext +import dpctl_ext.tensor as dpt import dpnp # TODO: revert to `from dpctl.tensor...` @@ -672,15 +671,15 @@ def test_to_begin_to_end(self, to_begin, to_end): "to_begin, to_end", [ (-20, 20), - (dpt_ext.asarray([-20, -30]), dpt_ext.asarray([20, 15])), - (dpt_ext.asarray([[-20, -30]]), dpt_ext.asarray([[20, 15]])), + (dpt.asarray([-20, -30]), dpt.asarray([20, 15])), + (dpt.asarray([[-20, -30]]), dpt.asarray([[20, 15]])), ([1, 2], [3, 4]), ((1, 2), (3, 4)), ], ) def test_usm_ndarray(self, to_begin, to_end): a = numpy.array([[1, 2, 0]]) - dpt_a = dpt_ext.asarray(a) + dpt_a = dpt.asarray(a) if isinstance(to_begin, dpt.usm_ndarray): np_to_begin = dpt.asnumpy(to_begin) @@ -1581,7 +1580,7 @@ def test_out(self): assert_allclose(result, expected) # output is usm_ndarray - dpt_out = dpt_ext.empty(expected.shape, dtype=expected.dtype) + dpt_out = dpt.empty(expected.shape, dtype=expected.dtype) result = dpnp.prod(ia, axis=0, out=dpt_out) assert dpt_out is result.get_array() assert_allclose(result, expected) @@ -2634,7 +2633,7 @@ def test_out_float16(self, func): def test_out_usm_ndarray(self, func, dt): a = generate_random_numpy_array(10, dt) out = numpy.empty(a.shape, dtype=dt) - ia, usm_out = dpnp.array(a), dpt_ext.asarray(out) + ia, usm_out = dpnp.array(a), dpt.asarray(out) expected = getattr(numpy, func)(a, out=out) result = getattr(dpnp, func)(ia, out=usm_out) diff --git a/dpnp/tests/test_memory.py b/dpnp/tests/test_memory.py index 94aeda33f50..dd87a993e1d 100644 --- a/dpnp/tests/test_memory.py +++ b/dpnp/tests/test_memory.py @@ -1,10 +1,9 @@ -import dpctl.tensor as dpt import numpy import pytest # TODO: revert to `import dpctl.tensor...` # when dpnp fully migrates dpctl/tensor -import dpctl_ext.tensor as dpt_ext +import dpctl_ext.tensor as dpt import dpnp import dpnp.memory as dpm @@ -24,7 +23,7 @@ def test_wrong_input_type(self, x): dpm.create_data(x) def test_wrong_usm_data(self): - a = dpt_ext.ones(10) + a = dpt.ones(10) d = IntUsmData(a.shape, buffer=a) with pytest.raises(TypeError): diff --git a/dpnp/tests/test_ndarray.py b/dpnp/tests/test_ndarray.py index a27f0fe6aa1..8944043d90a 100644 --- a/dpnp/tests/test_ndarray.py +++ b/dpnp/tests/test_ndarray.py @@ -1,4 +1,3 @@ -import dpctl.tensor as dpt import numpy import pytest from numpy.testing import ( @@ -11,7 +10,7 @@ # TODO: revert to `import dpctl.tensor...` # when dpnp fully migrates dpctl/tensor -import dpctl_ext.tensor as dpt_ext +import dpctl_ext.tensor as dpt import dpnp from .helper import ( @@ -410,7 +409,7 @@ def test_error(self): class TestUsmNdarrayProtocol: def test_basic(self): a = dpnp.arange(256, dtype=dpnp.int64) - usm_a = dpt_ext.asarray(a) + usm_a = dpt.asarray(a) assert a.sycl_queue == usm_a.sycl_queue assert a.usm_type == usm_a.usm_type diff --git a/dpnp/tests/third_party/cupy/core_tests/test_dlpack.py b/dpnp/tests/third_party/cupy/core_tests/test_dlpack.py index 41df0a82e0a..e44f51f09b2 100644 --- a/dpnp/tests/third_party/cupy/core_tests/test_dlpack.py +++ b/dpnp/tests/third_party/cupy/core_tests/test_dlpack.py @@ -1,10 +1,12 @@ from __future__ import annotations import dpctl -import dpctl.tensor._dlpack as dlp import numpy import pytest +# TODO: revert to `import dpctl.tensor...` +# when dpnp fully migrates dpctl/tensor +import dpctl_ext.tensor._dlpack as dlp import dpnp as cupy from dpnp.tests.third_party.cupy import testing From 23164aca4566d013b4bf401444ea1f8b5ec49fad Mon Sep 17 00:00:00 2001 From: Vladislav Perevezentsev Date: Fri, 6 Mar 2026 04:39:06 -0800 Subject: [PATCH 16/24] Reorder _usmarray import in __init__.py --- dpctl_ext/tensor/__init__.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/dpctl_ext/tensor/__init__.py b/dpctl_ext/tensor/__init__.py index 076f7eae970..8b9bcfa2b2a 100644 --- a/dpctl_ext/tensor/__init__.py +++ b/dpctl_ext/tensor/__init__.py @@ -199,6 +199,12 @@ reduce_hypot, sum, ) + +# isort: off +# placed here to avoid circular import +from ._usmarray import DLDeviceType, usm_ndarray + +# isort: on from ._reshape import reshape from ._search_functions import where from ._searchsorted import searchsorted @@ -213,7 +219,6 @@ from ._statistical_functions import mean, std, var from ._testing import allclose from ._type_utils import can_cast, finfo, iinfo, isdtype, result_type -from ._usmarray import DLDeviceType, usm_ndarray from ._utility_functions import all, any, diff __all__ = [ From 18c3d61b6651ec35095c856c421438c3c88eca87 Mon Sep 17 00:00:00 2001 From: Vladislav Perevezentsev Date: Fri, 6 Mar 2026 04:41:29 -0800 Subject: [PATCH 17/24] Add missing _place_impl() to _copy_utils.py --- dpctl_ext/tensor/_copy_utils.py | 104 ++++++++++++++++++++++++++++++++ 1 file changed, 104 insertions(+) diff --git a/dpctl_ext/tensor/_copy_utils.py b/dpctl_ext/tensor/_copy_utils.py index 44fbfc404cf..b056511ac33 100644 --- a/dpctl_ext/tensor/_copy_utils.py +++ b/dpctl_ext/tensor/_copy_utils.py @@ -310,6 +310,110 @@ def _prepare_indices_arrays(inds, q, usm_type): return inds +def _place_impl(ary, ary_mask, vals, axis=0): + """ + Extract elements of ary by applying mask starting from slot + dimension axis. + """ + if not isinstance(ary, dpt.usm_ndarray): + raise TypeError( + f"Expecting type dpctl.tensor.usm_ndarray, got {type(ary)}" + ) + if isinstance(ary_mask, dpt.usm_ndarray): + exec_q = dpctl.utils.get_execution_queue( + ( + ary.sycl_queue, + ary_mask.sycl_queue, + ) + ) + coerced_usm_type = dpctl.utils.get_coerced_usm_type( + ( + ary.usm_type, + ary_mask.usm_type, + ) + ) + if exec_q is None: + raise dpctl.utils.ExecutionPlacementError( + "arrays have different associated queues. " + "Use `y.to_device(x.device)` to migrate." + ) + elif isinstance(ary_mask, np.ndarray): + exec_q = ary.sycl_queue + coerced_usm_type = ary.usm_type + ary_mask = dpt.asarray( + ary_mask, usm_type=coerced_usm_type, sycl_queue=exec_q + ) + else: + raise TypeError( + "Expecting type dpctl.tensor.usm_ndarray or numpy.ndarray, got " + f"{type(ary_mask)}" + ) + if exec_q is not None: + if not isinstance(vals, dpt.usm_ndarray): + vals = dpt.asarray( + vals, + dtype=ary.dtype, + usm_type=coerced_usm_type, + sycl_queue=exec_q, + ) + else: + exec_q = dpctl.utils.get_execution_queue((exec_q, vals.sycl_queue)) + coerced_usm_type = dpctl.utils.get_coerced_usm_type( + ( + coerced_usm_type, + vals.usm_type, + ) + ) + if exec_q is None: + raise dpctl.utils.ExecutionPlacementError( + "arrays have different associated queues. " + "Use `Y.to_device(X.device)` to migrate." + ) + ary_nd = ary.ndim + pp = normalize_axis_index(operator.index(axis), ary_nd) + mask_nd = ary_mask.ndim + if pp < 0 or pp + mask_nd > ary_nd: + raise ValueError( + "Parameter p is inconsistent with input array dimensions" + ) + mask_nelems = ary_mask.size + cumsum_dt = dpt.int32 if mask_nelems < int32_t_max else dpt.int64 + cumsum = dpt.empty( + mask_nelems, + dtype=cumsum_dt, + usm_type=coerced_usm_type, + device=ary_mask.device, + ) + exec_q = cumsum.sycl_queue + _manager = dpctl.utils.SequentialOrderManager[exec_q] + dep_ev = _manager.submitted_events + mask_count = ti.mask_positions( + ary_mask, cumsum, sycl_queue=exec_q, depends=dep_ev + ) + expected_vals_shape = ( + ary.shape[:pp] + (mask_count,) + ary.shape[pp + mask_nd :] + ) + if vals.dtype == ary.dtype: + rhs = vals + else: + rhs = dpt.astype(vals, ary.dtype) + rhs = dpt.broadcast_to(rhs, expected_vals_shape) + if mask_nelems == 0: + return + dep_ev = _manager.submitted_events + hev, pl_ev = ti._place( + dst=ary, + cumsum=cumsum, + axis_start=pp, + axis_end=pp + mask_nd, + rhs=rhs, + sycl_queue=exec_q, + depends=dep_ev, + ) + _manager.add_event_pair(hev, pl_ev) + return + + def _put_multi_index(ary, inds, p, vals, mode=0): if not isinstance(ary, dpt.usm_ndarray): raise TypeError( From 7f14dfc9ffde2c2427dbb464f64ab1f03ca2d29f Mon Sep 17 00:00:00 2001 From: Vladislav Perevezentsev Date: Fri, 6 Mar 2026 05:06:11 -0800 Subject: [PATCH 18/24] Update _dlpack.pyx to use dpctl_ext.tensor --- dpctl_ext/tensor/_dlpack.pyx | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/dpctl_ext/tensor/_dlpack.pyx b/dpctl_ext/tensor/_dlpack.pyx index 62d71037b4c..fde4415b742 100644 --- a/dpctl_ext/tensor/_dlpack.pyx +++ b/dpctl_ext/tensor/_dlpack.pyx @@ -1209,7 +1209,9 @@ def from_dlpack(x, /, *, device=None, copy=None): ) return from_dlpack_capsule(cpu_caps) else: - import dpctl.tensor as dpt + # TODO: revert to `import dpctl.tensor` + # when dpnp fully migrates dpctl/tensor + import dpctl_ext.tensor as dpt return dpt.asarray(blob, device=dev) elif got_buffer_error: # we are here, because dlpack_attr could not deal with requested From 5e7123d736d975e16fde4a3d4691a1fe1df6391c Mon Sep 17 00:00:00 2001 From: Vladislav Perevezentsev Date: Fri, 6 Mar 2026 05:06:38 -0800 Subject: [PATCH 19/24] Update _usmarray.pyx to use dpctl_ext.tensor --- dpctl_ext/tensor/_usmarray.pyx | 113 ++++++++++++++++++--------------- 1 file changed, 62 insertions(+), 51 deletions(-) diff --git a/dpctl_ext/tensor/_usmarray.pyx b/dpctl_ext/tensor/_usmarray.pyx index 958ae3f3703..f5bca9b1635 100644 --- a/dpctl_ext/tensor/_usmarray.pyx +++ b/dpctl_ext/tensor/_usmarray.pyx @@ -37,6 +37,9 @@ import numpy as np from dpctl._backend cimport DPCTLSyclUSMRef from dpctl._sycl_device_factory cimport _cached_default_device +# TODO: remote it when dpnp fully migrates dpctl/tensor +import dpctl_ext + from ._data_types import bool as dpt_bool from ._device import Device from ._print import usm_ndarray_repr, usm_ndarray_str @@ -1143,7 +1146,9 @@ cdef class usm_ndarray: return ( self.array_namespace_ if self.array_namespace_ is not None - else dpctl.tensor + # TODO: revert to `else dpctl.tensor` + # when dpnp fully migrates dpctl/tensor + else dpctl_ext.tensor ) def __bool__(self): @@ -1199,17 +1204,19 @@ cdef class usm_ndarray: raise IndexError("only integer arrays are valid indices") def __abs__(self): - return dpctl.tensor.abs(self) + # TODO: revert to `return dpctl.tensor...` + # when dpnp fully migrates dpctl/tensor + return dpctl_ext.tensor.abs(self) def __add__(self, other): """ Implementation for operator.add """ - return dpctl.tensor.add(self, other) + return dpctl_ext.tensor.add(self, other) def __and__(self, other): "Implementation for operator.and" - return dpctl.tensor.bitwise_and(self, other) + return dpctl_ext.tensor.bitwise_and(self, other) def __dlpack__( self, *, stream=None, max_version=None, dl_device=None, copy=None @@ -1368,22 +1375,24 @@ cdef class usm_ndarray: ) def __eq__(self, other): - return dpctl.tensor.equal(self, other) + # TODO: revert to `return dpctl.tensor...` + # when dpnp fully migrates dpctl/tensor + return dpctl_ext.tensor.equal(self, other) def __floordiv__(self, other): - return dpctl.tensor.floor_divide(self, other) + return dpctl_ext.tensor.floor_divide(self, other) def __ge__(self, other): - return dpctl.tensor.greater_equal(self, other) + return dpctl_ext.tensor.greater_equal(self, other) def __gt__(self, other): - return dpctl.tensor.greater(self, other) + return dpctl_ext.tensor.greater(self, other) def __invert__(self): - return dpctl.tensor.bitwise_invert(self) + return dpctl_ext.tensor.bitwise_invert(self) def __le__(self, other): - return dpctl.tensor.less_equal(self, other) + return dpctl_ext.tensor.less_equal(self, other) def __len__(self): if (self.nd_): @@ -1392,37 +1401,37 @@ cdef class usm_ndarray: raise TypeError("len() of unsized object") def __lshift__(self, other): - return dpctl.tensor.bitwise_left_shift(self, other) + return dpctl_ext.tensor.bitwise_left_shift(self, other) def __lt__(self, other): - return dpctl.tensor.less(self, other) + return dpctl_ext.tensor.less(self, other) def __matmul__(self, other): - return dpctl.tensor.matmul(self, other) + return dpctl_ext.tensor.matmul(self, other) def __mod__(self, other): - return dpctl.tensor.remainder(self, other) + return dpctl_ext.tensor.remainder(self, other) def __mul__(self, other): - return dpctl.tensor.multiply(self, other) + return dpctl_ext.tensor.multiply(self, other) def __ne__(self, other): - return dpctl.tensor.not_equal(self, other) + return dpctl_ext.tensor.not_equal(self, other) def __neg__(self): - return dpctl.tensor.negative(self) + return dpctl_ext.tensor.negative(self) def __or__(self, other): - return dpctl.tensor.bitwise_or(self, other) + return dpctl_ext.tensor.bitwise_or(self, other) def __pos__(self): - return dpctl.tensor.positive(self) + return dpctl_ext.tensor.positive(self) def __pow__(self, other): - return dpctl.tensor.pow(self, other) + return dpctl_ext.tensor.pow(self, other) def __rshift__(self, other): - return dpctl.tensor.bitwise_right_shift(self, other) + return dpctl_ext.tensor.bitwise_right_shift(self, other) def __setitem__(self, key, rhs): cdef tuple _meta @@ -1467,7 +1476,7 @@ cdef class usm_ndarray: _copy_from_usm_ndarray_to_usm_ndarray(Xv, rhs) else: if hasattr(rhs, "__sycl_usm_array_interface__"): - from dpctl.tensor import asarray + from dpctl_ext.tensor import asarray try: rhs_ar = asarray(rhs) _copy_from_usm_ndarray_to_usm_ndarray(Xv, rhs_ar) @@ -1515,91 +1524,93 @@ cdef class usm_ndarray: return def __sub__(self, other): - return dpctl.tensor.subtract(self, other) + # TODO: revert to `return dpctl.tensor...` + # when dpnp fully migrates dpctl/tensor + return dpctl_ext.tensor.subtract(self, other) def __truediv__(self, other): - return dpctl.tensor.divide(self, other) + return dpctl_ext.tensor.divide(self, other) def __xor__(self, other): - return dpctl.tensor.bitwise_xor(self, other) + return dpctl_ext.tensor.bitwise_xor(self, other) def __radd__(self, other): - return dpctl.tensor.add(other, self) + return dpctl_ext.tensor.add(other, self) def __rand__(self, other): - return dpctl.tensor.bitwise_and(other, self) + return dpctl_ext.tensor.bitwise_and(other, self) def __rfloordiv__(self, other): - return dpctl.tensor.floor_divide(other, self) + return dpctl_ext.tensor.floor_divide(other, self) def __rlshift__(self, other): - return dpctl.tensor.bitwise_left_shift(other, self) + return dpctl_ext.tensor.bitwise_left_shift(other, self) def __rmatmul__(self, other): - return dpctl.tensor.matmul(other, self) + return dpctl_ext.tensor.matmul(other, self) def __rmod__(self, other): - return dpctl.tensor.remainder(other, self) + return dpctl_ext.tensor.remainder(other, self) def __rmul__(self, other): - return dpctl.tensor.multiply(other, self) + return dpctl_ext.tensor.multiply(other, self) def __ror__(self, other): - return dpctl.tensor.bitwise_or(other, self) + return dpctl_ext.tensor.bitwise_or(other, self) def __rpow__(self, other): - return dpctl.tensor.pow(other, self) + return dpctl_ext.tensor.pow(other, self) def __rrshift__(self, other): - return dpctl.tensor.bitwise_right_shift(other, self) + return dpctl_ext.tensor.bitwise_right_shift(other, self) def __rsub__(self, other): - return dpctl.tensor.subtract(other, self) + return dpctl_ext.tensor.subtract(other, self) def __rtruediv__(self, other): - return dpctl.tensor.divide(other, self) + return dpctl_ext.tensor.divide(other, self) def __rxor__(self, other): - return dpctl.tensor.bitwise_xor(other, self) + return dpctl_ext.tensor.bitwise_xor(other, self) def __iadd__(self, other): - return dpctl.tensor.add._inplace_op(self, other) + return dpctl_ext.tensor.add._inplace_op(self, other) def __iand__(self, other): - return dpctl.tensor.bitwise_and._inplace_op(self, other) + return dpctl_ext.tensor.bitwise_and._inplace_op(self, other) def __ifloordiv__(self, other): - return dpctl.tensor.floor_divide._inplace_op(self, other) + return dpctl_ext.tensor.floor_divide._inplace_op(self, other) def __ilshift__(self, other): - return dpctl.tensor.bitwise_left_shift._inplace_op(self, other) + return dpctl_ext.tensor.bitwise_left_shift._inplace_op(self, other) def __imatmul__(self, other): - return dpctl.tensor.matmul(self, other, out=self, dtype=self.dtype) + return dpctl_ext.tensor.matmul(self, other, out=self, dtype=self.dtype) def __imod__(self, other): - return dpctl.tensor.remainder._inplace_op(self, other) + return dpctl_ext.tensor.remainder._inplace_op(self, other) def __imul__(self, other): - return dpctl.tensor.multiply._inplace_op(self, other) + return dpctl_ext.tensor.multiply._inplace_op(self, other) def __ior__(self, other): - return dpctl.tensor.bitwise_or._inplace_op(self, other) + return dpctl_ext.tensor.bitwise_or._inplace_op(self, other) def __ipow__(self, other): - return dpctl.tensor.pow._inplace_op(self, other) + return dpctl_ext.tensor.pow._inplace_op(self, other) def __irshift__(self, other): - return dpctl.tensor.bitwise_right_shift._inplace_op(self, other) + return dpctl_ext.tensor.bitwise_right_shift._inplace_op(self, other) def __isub__(self, other): - return dpctl.tensor.subtract._inplace_op(self, other) + return dpctl_ext.tensor.subtract._inplace_op(self, other) def __itruediv__(self, other): - return dpctl.tensor.divide._inplace_op(self, other) + return dpctl_ext.tensor.divide._inplace_op(self, other) def __ixor__(self, other): - return dpctl.tensor.bitwise_xor._inplace_op(self, other) + return dpctl_ext.tensor.bitwise_xor._inplace_op(self, other) def __str__(self): return usm_ndarray_str(self) From f4da0de1ee73013edd7725e20f70191c14ba5d5f Mon Sep 17 00:00:00 2001 From: Vladislav Perevezentsev Date: Fri, 6 Mar 2026 05:27:55 -0800 Subject: [PATCH 20/24] Integrate dpctl_ext.tensor C-API to dpnp4pybind11.hpp --- dpnp/backend/include/dpnp4pybind11.hpp | 65 +++++++++++++++++++++++++- 1 file changed, 63 insertions(+), 2 deletions(-) diff --git a/dpnp/backend/include/dpnp4pybind11.hpp b/dpnp/backend/include/dpnp4pybind11.hpp index cd287989bef..f2db8de18f7 100644 --- a/dpnp/backend/include/dpnp4pybind11.hpp +++ b/dpnp/backend/include/dpnp4pybind11.hpp @@ -28,7 +28,66 @@ #pragma once -#include "dpctl_capi.h" +// TODO: Enable dpctl_capi.h once dpctl.tensor is removed. +// Also call `import_dpctl_ext__tensor___usmarray();` right after +// `import_dpctl()` (line 334) to initialize the dpctl_ext tensor C-API. +// +// Now we include dpctl C-API headers explicitly in order to +// integrate dpctl_ext tensor C-API. + +// #include "dpctl_capi.h" + +// clang-format off +// Ordering of includes is important here. dpctl_sycl_types and +// dpctl_sycl_extension_interface define types used by dpctl's Python +// C-API headers. +#include "syclinterface/dpctl_sycl_types.h" +#include "syclinterface/dpctl_sycl_extension_interface.h" +#ifdef __cplusplus +#define CYTHON_EXTERN_C extern "C" +#else +#define CYTHON_EXTERN_C +#endif +#include "dpctl/_sycl_device.h" +#include "dpctl/_sycl_device_api.h" +#include "dpctl/_sycl_context.h" +#include "dpctl/_sycl_context_api.h" +#include "dpctl/_sycl_event.h" +#include "dpctl/_sycl_event_api.h" +#include "dpctl/_sycl_queue.h" +#include "dpctl/_sycl_queue_api.h" +#include "dpctl/memory/_memory.h" +#include "dpctl/memory/_memory_api.h" +#include "dpctl/program/_program.h" +#include "dpctl/program/_program_api.h" + +// clang-format on + +// TODO: Keep these includes once `dpctl.tensor` is removed from dpctl, +// but replace the hardcoded relative path with a proper include pathы +#include "../../../dpctl_ext/include/dpctl_ext/tensor/_usmarray.h" +#include "../../../dpctl_ext/include/dpctl_ext/tensor/_usmarray_api.h" + +/* + * Function to import dpctl and make C-API functions available. + * C functions can use dpctl's C-API functions without linking to + * shared objects defining this symbols, if they call `import_dpctl()` + * prior to using those symbols. + * + * It is declared inline to allow multiple definitions in + * different translation units + */ +static inline void import_dpctl(void) +{ + import_dpctl___sycl_device(); + import_dpctl___sycl_context(); + import_dpctl___sycl_event(); + import_dpctl___sycl_queue(); + import_dpctl__memory___memory(); + import_dpctl_ext__tensor___usmarray(); + import_dpctl__program___program(); + return; +} #include #include // for std::size_t for C++ linkage @@ -410,8 +469,10 @@ class dpctl_capi default_usm_memory_ = std::shared_ptr( new py::object{py_default_usm_memory}, Deleter{}); + // TODO: revert to `py::module_::import("dpctl.tensor._usmarray");` + // when dpnp fully migrates dpctl/tensor py::module_ mod_usmarray = - py::module_::import("dpctl.tensor._usmarray"); + py::module_::import("dpctl_ext.tensor._usmarray"); auto tensor_kl = mod_usmarray.attr("usm_ndarray"); const py::object &py_default_usm_ndarray = From 1e4902d00b11a0f9c1bc6db04a1409180628713c Mon Sep 17 00:00:00 2001 From: Vladislav Perevezentsev Date: Mon, 9 Mar 2026 06:34:42 -0700 Subject: [PATCH 21/24] Add from_dlpack to API dpctl_ext.tensor --- dpctl_ext/tensor/__init__.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/dpctl_ext/tensor/__init__.py b/dpctl_ext/tensor/__init__.py index 8b9bcfa2b2a..03980e194fd 100644 --- a/dpctl_ext/tensor/__init__.py +++ b/dpctl_ext/tensor/__init__.py @@ -77,6 +77,7 @@ dldevice_to_sycl_device, sycl_device_to_dldevice, ) +from ._dlpack import from_dlpack from ._elementwise_funcs import ( abs, acos, @@ -306,6 +307,7 @@ "flip", "floor", "floor_divide", + "from_dlpack", "from_numpy", "full", "full_like", From 9a50f9fec579687b35ce02eedb0bf2d822a1abf4 Mon Sep 17 00:00:00 2001 From: Vladislav Perevezentsev Date: Mon, 9 Mar 2026 07:56:47 -0700 Subject: [PATCH 22/24] Extend .gitignore for dpctl_ext/include --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 0cfebe53f62..f8ed987fa0d 100644 --- a/.gitignore +++ b/.gitignore @@ -36,3 +36,4 @@ core # TODO: revert to `dpctl/` # when dpnp fully migrates dpctl/tensor dpctl_ext/**/*.cpython*.so +dpctl_ext/include/ From 153a91bb0a5615a3f236f6ab8f23287157f1cb88 Mon Sep 17 00:00:00 2001 From: Vladislav Perevezentsev Date: Mon, 9 Mar 2026 07:58:39 -0700 Subject: [PATCH 23/24] Add DpctlExtCAPI interface target --- CMakeLists.txt | 11 ++++++++++- dpctl_ext/tensor/CMakeLists.txt | 2 ++ dpnp/backend/extensions/blas/CMakeLists.txt | 2 ++ dpnp/backend/extensions/fft/CMakeLists.txt | 2 ++ dpnp/backend/extensions/indexing/CMakeLists.txt | 2 ++ dpnp/backend/extensions/lapack/CMakeLists.txt | 1 + dpnp/backend/extensions/statistics/CMakeLists.txt | 2 ++ dpnp/backend/extensions/ufunc/CMakeLists.txt | 2 ++ dpnp/backend/extensions/vm/CMakeLists.txt | 2 ++ dpnp/backend/extensions/window/CMakeLists.txt | 2 ++ dpnp/backend/include/dpnp4pybind11.hpp | 4 ++-- 11 files changed, 29 insertions(+), 3 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index c7bb7f650da..489283f45a4 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -344,5 +344,14 @@ if(DEFINED SKBUILD) set(_ignore_me ${SKBUILD}) endif() -add_subdirectory(dpnp) +# TODO: Replace `${CMAKE_BINARY_DIR}` with a dedicated public include root +# for dpctl_ext C-API headers +# Unlike dpctl which exposes C-API from `dpctl/apis/include`, +# dpctl_ext currently relies on generated headers in the build tree. +# `${CMAKE_BINARY_DIR}` is a temporary workaround. + +add_library(DpctlExtCAPI INTERFACE) +target_include_directories(DpctlExtCAPI INTERFACE ${CMAKE_BINARY_DIR}) + add_subdirectory(dpctl_ext) +add_subdirectory(dpnp) diff --git a/dpctl_ext/tensor/CMakeLists.txt b/dpctl_ext/tensor/CMakeLists.txt index 16d6cfa7e13..8df593b0838 100644 --- a/dpctl_ext/tensor/CMakeLists.txt +++ b/dpctl_ext/tensor/CMakeLists.txt @@ -35,6 +35,7 @@ foreach(_cy_file ${_cython_sources}) build_dpctl_ext(${_trgt} ${_cy_file} "dpctl_ext/tensor" RELATIVE_PATH "..") target_include_directories(${_trgt} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include) # target_link_libraries(DpctlCAPI INTERFACE ${_trgt}_headers) + target_link_libraries(DpctlExtCAPI INTERFACE ${_trgt}_headers) endforeach() if(WIN32) @@ -346,6 +347,7 @@ foreach(python_module_name ${_py_trgts}) # dpctl4pybind11.hpp. It will allow to simplify dependency tree # NOTE: dpctl C-API is resolved at runtime via Python # target_link_libraries(${python_module_name} PRIVATE DpctlCAPI) + target_link_libraries(${python_module_name} PRIVATE DpctlExtCAPI) if(DPNP_WITH_REDIST) set_target_properties( ${python_module_name} diff --git a/dpnp/backend/extensions/blas/CMakeLists.txt b/dpnp/backend/extensions/blas/CMakeLists.txt index 69a99b996d9..2dce27001bb 100644 --- a/dpnp/backend/extensions/blas/CMakeLists.txt +++ b/dpnp/backend/extensions/blas/CMakeLists.txt @@ -39,6 +39,8 @@ set(_module_src pybind11_add_module(${python_module_name} MODULE ${_module_src}) add_sycl_to_target(TARGET ${python_module_name} SOURCES ${_module_src}) +target_link_libraries(${python_module_name} PRIVATE DpctlExtCAPI) + if(_dpnp_sycl_targets) # make fat binary target_compile_options( diff --git a/dpnp/backend/extensions/fft/CMakeLists.txt b/dpnp/backend/extensions/fft/CMakeLists.txt index 8a96d8cbd25..bfebe1ed422 100644 --- a/dpnp/backend/extensions/fft/CMakeLists.txt +++ b/dpnp/backend/extensions/fft/CMakeLists.txt @@ -33,6 +33,8 @@ set(_module_src ${CMAKE_CURRENT_SOURCE_DIR}/fft_py.cpp) pybind11_add_module(${python_module_name} MODULE ${_module_src}) add_sycl_to_target(TARGET ${python_module_name} SOURCES ${_module_src}) +target_link_libraries(${python_module_name} PRIVATE DpctlExtCAPI) + if(_dpnp_sycl_targets) # make fat binary target_compile_options( diff --git a/dpnp/backend/extensions/indexing/CMakeLists.txt b/dpnp/backend/extensions/indexing/CMakeLists.txt index 373c6152f66..7729e2807a4 100644 --- a/dpnp/backend/extensions/indexing/CMakeLists.txt +++ b/dpnp/backend/extensions/indexing/CMakeLists.txt @@ -36,6 +36,8 @@ set(_module_src pybind11_add_module(${python_module_name} MODULE ${_module_src}) add_sycl_to_target(TARGET ${python_module_name} SOURCES ${_module_src}) +target_link_libraries(${python_module_name} PRIVATE DpctlExtCAPI) + if(_dpnp_sycl_targets) # make fat binary target_compile_options( diff --git a/dpnp/backend/extensions/lapack/CMakeLists.txt b/dpnp/backend/extensions/lapack/CMakeLists.txt index 2bac0932a67..a3ee4bae8ee 100644 --- a/dpnp/backend/extensions/lapack/CMakeLists.txt +++ b/dpnp/backend/extensions/lapack/CMakeLists.txt @@ -55,6 +55,7 @@ set(_module_src pybind11_add_module(${python_module_name} MODULE ${_module_src}) add_sycl_to_target(TARGET ${python_module_name} SOURCES ${_module_src}) +target_link_libraries(${python_module_name} PRIVATE DpctlExtCAPI) if(_dpnp_sycl_targets) # make fat binary diff --git a/dpnp/backend/extensions/statistics/CMakeLists.txt b/dpnp/backend/extensions/statistics/CMakeLists.txt index 60d26295acf..88b3f185e6f 100644 --- a/dpnp/backend/extensions/statistics/CMakeLists.txt +++ b/dpnp/backend/extensions/statistics/CMakeLists.txt @@ -41,6 +41,8 @@ set(_module_src pybind11_add_module(${python_module_name} MODULE ${_module_src}) add_sycl_to_target(TARGET ${python_module_name} SOURCES ${_module_src}) +target_link_libraries(${python_module_name} PRIVATE DpctlExtCAPI) + if(_dpnp_sycl_targets) # make fat binary target_compile_options( diff --git a/dpnp/backend/extensions/ufunc/CMakeLists.txt b/dpnp/backend/extensions/ufunc/CMakeLists.txt index 45d2706fb48..d954316dcb2 100644 --- a/dpnp/backend/extensions/ufunc/CMakeLists.txt +++ b/dpnp/backend/extensions/ufunc/CMakeLists.txt @@ -67,6 +67,8 @@ set(_module_src pybind11_add_module(${python_module_name} MODULE ${_module_src}) add_sycl_to_target(TARGET ${python_module_name} SOURCES ${_module_src}) +target_link_libraries(${python_module_name} PRIVATE DpctlExtCAPI) + if(WIN32) if(${CMAKE_VERSION} VERSION_LESS "3.27") # this is a work-around for target_link_options inserting option after -link option, cause diff --git a/dpnp/backend/extensions/vm/CMakeLists.txt b/dpnp/backend/extensions/vm/CMakeLists.txt index 32f7d4281c2..0d69c4e79c0 100644 --- a/dpnp/backend/extensions/vm/CMakeLists.txt +++ b/dpnp/backend/extensions/vm/CMakeLists.txt @@ -90,6 +90,8 @@ set(python_module_name _vm_impl) pybind11_add_module(${python_module_name} MODULE ${_module_src}) add_sycl_to_target(TARGET ${python_module_name} SOURCES ${_module_src}) +target_link_libraries(${python_module_name} PRIVATE DpctlExtCAPI) + if(WIN32) if(${CMAKE_VERSION} VERSION_LESS "3.27") # this is a work-around for target_link_options inserting option after -link option, cause diff --git a/dpnp/backend/extensions/window/CMakeLists.txt b/dpnp/backend/extensions/window/CMakeLists.txt index 5b7921ad324..c8cbd7c03bb 100644 --- a/dpnp/backend/extensions/window/CMakeLists.txt +++ b/dpnp/backend/extensions/window/CMakeLists.txt @@ -36,6 +36,8 @@ set(_module_src pybind11_add_module(${python_module_name} MODULE ${_module_src}) add_sycl_to_target(TARGET ${python_module_name} SOURCES ${_module_src}) +target_link_libraries(${python_module_name} PRIVATE DpctlExtCAPI) + if(_dpnp_sycl_targets) # make fat binary target_compile_options( diff --git a/dpnp/backend/include/dpnp4pybind11.hpp b/dpnp/backend/include/dpnp4pybind11.hpp index f2db8de18f7..af2f5f866eb 100644 --- a/dpnp/backend/include/dpnp4pybind11.hpp +++ b/dpnp/backend/include/dpnp4pybind11.hpp @@ -65,8 +65,8 @@ // TODO: Keep these includes once `dpctl.tensor` is removed from dpctl, // but replace the hardcoded relative path with a proper include pathы -#include "../../../dpctl_ext/include/dpctl_ext/tensor/_usmarray.h" -#include "../../../dpctl_ext/include/dpctl_ext/tensor/_usmarray_api.h" +#include +#include /* * Function to import dpctl and make C-API functions available. From 95acc3d518a3772c6a5cb58075c145654a28ca7c Mon Sep 17 00:00:00 2001 From: Vladislav Perevezentsev Date: Mon, 9 Mar 2026 09:32:38 -0700 Subject: [PATCH 24/24] Increase build time for public CI --- .github/workflows/conda-package.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/conda-package.yml b/.github/workflows/conda-package.yml index d2ac90621aa..eb66c91dc8c 100644 --- a/.github/workflows/conda-package.yml +++ b/.github/workflows/conda-package.yml @@ -37,7 +37,7 @@ jobs: actions: write runs-on: ${{ matrix.os }} - timeout-minutes: 60 + timeout-minutes: 80 defaults: run: