Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
20 commits
Select commit Hold shift + click to select a range
cb03a49
Move data types to dpctl_ext.tensor
vlad-perevezentsev Mar 5, 2026
93510c0
Move class Device to dpctl_ext.tensor
vlad-perevezentsev Mar 5, 2026
8e11b23
Move constants to dpctl_ext.tensor
vlad-perevezentsev Mar 5, 2026
54fe331
Move array API utilities
vlad-perevezentsev Mar 5, 2026
60bba8f
Move print functions to dpctl_ext.tensor
vlad-perevezentsev Mar 5, 2026
b4fa023
Move include/dlpack to dpctl_ext.tensor
vlad-perevezentsev Mar 5, 2026
fb8b77e
Move _dlpack.pyx/pxd to dpctl_ext.tensor
vlad-perevezentsev Mar 5, 2026
5c9e183
Move _flags.pyx to dpctl_ext.tensor
vlad-perevezentsev Mar 5, 2026
8f44c37
Move cython helper files
vlad-perevezentsev Mar 6, 2026
e2441eb
Move dldevice_conversions functions
vlad-perevezentsev Mar 6, 2026
422e87e
Move usm_ndarray to dpctl_ext.tensor
vlad-perevezentsev Mar 6, 2026
e114808
Fix import _flags and _dlpack in _usmarray.pyx
vlad-perevezentsev Mar 6, 2026
39c0571
Update CMakes files to build usm_ndarray
vlad-perevezentsev Mar 6, 2026
3c428a6
Switch fully to dpctl_ext.tensor in dpctl_ext.tensor
vlad-perevezentsev Mar 6, 2026
3883a1c
Switch fully to dpctl_ext.tensor in dpnp
vlad-perevezentsev Mar 6, 2026
23164ac
Reorder _usmarray import in __init__.py
vlad-perevezentsev Mar 6, 2026
18c3d61
Add missing _place_impl() to _copy_utils.py
vlad-perevezentsev Mar 6, 2026
7f14dfc
Update _dlpack.pyx to use dpctl_ext.tensor
vlad-perevezentsev Mar 6, 2026
5e7123d
Update _usmarray.pyx to use dpctl_ext.tensor
vlad-perevezentsev Mar 6, 2026
ade264e
Integrate dpctl_ext.tensor C-API to dpnp4pybind11.hpp
vlad-perevezentsev Mar 6, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
83 changes: 82 additions & 1 deletion dpctl_ext/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -112,8 +112,89 @@ else()
endif()

# at build time create include/ directory and copy header files over
# set(DPCTL_INCLUDE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/include)
set(DPCTL_INCLUDE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/include)

set(CMAKE_INSTALL_RPATH "$ORIGIN")

function(build_dpctl_ext _trgt _src _dest)
set(options SYCL)
cmake_parse_arguments(BUILD_DPCTL_EXT "${options}" "RELATIVE_PATH" "" ${ARGN})
add_cython_target(${_trgt} ${_src} CXX OUTPUT_VAR _generated_src)
set(_cythonize_trgt "${_trgt}_cythonize_pyx")
python_add_library(${_trgt} MODULE WITH_SOABI ${_generated_src})
if(BUILD_DPCTL_EXT_SYCL)
add_sycl_to_target(TARGET ${_trgt} SOURCES ${_generated_src})
target_compile_options(${_trgt} PRIVATE -fno-sycl-id-queries-fit-in-int)
target_link_options(${_trgt} PRIVATE -fsycl-device-code-split=per_kernel)
if(DPCTL_OFFLOAD_COMPRESS)
target_link_options(${_trgt} PRIVATE --offload-compress)
endif()
if(_dpctl_sycl_targets)
# make fat binary
target_compile_options(
${_trgt}
PRIVATE ${_dpctl_sycl_target_compile_options}
)
target_link_options(${_trgt} PRIVATE ${_dpctl_sycl_target_link_options})
endif()
endif()
target_link_libraries(${_trgt} PRIVATE Python::NumPy)
if(DPCTL_GENERATE_COVERAGE)
target_compile_definitions(${_trgt} PRIVATE CYTHON_TRACE=1 CYTHON_TRACE_NOGIL=1)
if(BUILD_DPCTL_EXT_SYCL)
target_compile_options(${_trgt} PRIVATE -fno-sycl-use-footer)
endif()
endif()
# Dpctl
target_include_directories(${_trgt} PRIVATE ${Dpctl_INCLUDE_DIR})
target_link_directories(${_trgt} PRIVATE ${Dpctl_INCLUDE_DIR}/..)
target_link_libraries(${_trgt} PRIVATE DPCTLSyclInterface)
set(_linker_options "LINKER:${DPCTL_LDFLAGS}")
target_link_options(${_trgt} PRIVATE ${_linker_options})
get_filename_component(_name_wle ${_generated_src} NAME_WLE)
get_filename_component(_generated_src_dir ${_generated_src} DIRECTORY)
set(_generated_public_h "${_generated_src_dir}/${_name_wle}.h")
set(_generated_api_h "${_generated_src_dir}/${_name_wle}_api.h")

# TODO: create separate folder inside build folder that contains only
# headers related to this target and appropriate folder structure to
# eliminate shadow dependencies
get_filename_component(_generated_src_dir_dir ${_generated_src_dir} DIRECTORY)
# TODO: do not set directory if we did not generate header
target_include_directories(${_trgt} INTERFACE ${_generated_src_dir_dir})
set(_rpath_value "$ORIGIN")
if(BUILD_DPCTL_EXT_RELATIVE_PATH)
set(_rpath_value "${_rpath_value}/${BUILD_DPCTL_EXT_RELATIVE_PATH}")
endif()
if(DPCTL_WITH_REDIST)
set(_rpath_value "${_rpath_value}:${_rpath_value}/../../..")
endif()
set_target_properties(${_trgt} PROPERTIES INSTALL_RPATH ${_rpath_value})

install(TARGETS ${_trgt} LIBRARY DESTINATION ${_dest})
install(
FILES ${_generated_api_h}
DESTINATION ${CMAKE_INSTALL_PREFIX}/dpctl_ext/include/${_dest}
OPTIONAL
)
install(
FILES ${_generated_public_h}
DESTINATION ${CMAKE_INSTALL_PREFIX}/dpctl_ext/include/${_dest}
OPTIONAL
)
if(DPCTL_GENERATE_COVERAGE)
get_filename_component(_original_src_dir ${_src} DIRECTORY)
file(RELATIVE_PATH _rel_dir ${CMAKE_SOURCE_DIR} ${_original_src_dir})
install(FILES ${_generated_src} DESTINATION ${CMAKE_INSTALL_PREFIX}/${_rel_dir})
endif()

# Create target with headers only, because python is managing all the
# library imports at runtime
set(_trgt_headers ${_trgt}_headers)
add_library(${_trgt_headers} INTERFACE)
add_dependencies(${_trgt_headers} ${_trgt})
get_target_property(_trgt_headers_dir ${_trgt} INTERFACE_INCLUDE_DIRECTORIES)
target_include_directories(${_trgt_headers} INTERFACE ${_trgt_headers_dir})
endfunction()

add_subdirectory(tensor)
8 changes: 8 additions & 0 deletions dpctl_ext/tensor/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,14 @@

find_package(Python COMPONENTS Development.Module)

file(GLOB _cython_sources *.pyx)
foreach(_cy_file ${_cython_sources})
get_filename_component(_trgt ${_cy_file} NAME_WLE)
build_dpctl_ext(${_trgt} ${_cy_file} "dpctl_ext/tensor" RELATIVE_PATH "..")
target_include_directories(${_trgt} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include)
# target_link_libraries(DpctlCAPI INTERFACE ${_trgt}_headers)
endforeach()

if(WIN32)
if(${CMAKE_VERSION} VERSION_LESS "3.27")
# this is a work-around for target_link_options inserting option after -link option, cause
Expand Down
36 changes: 36 additions & 0 deletions dpctl_ext/tensor/__init__.pxd
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
# *****************************************************************************
# Copyright (c) 2026, Intel Corporation
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
# - Redistributions of source code must retain the above copyright notice,
# this list of conditions and the following disclaimer.
# - Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
# - Neither the name of the copyright holder nor the names of its contributors
# may be used to endorse or promote products derived from this software
# without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
# THE POSSIBILITY OF SUCH DAMAGE.
# *****************************************************************************

""" This file declares the extension types and functions for the Cython API
implemented in _usmarray.pyx file.
"""

# distutils: language = c++
# cython: language_level=3

from ._usmarray cimport *
72 changes: 72 additions & 0 deletions dpctl_ext/tensor/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,9 @@


from ._accumulation import cumulative_logsumexp, cumulative_prod, cumulative_sum
from ._array_api import __array_api_version__, __array_namespace_info__
from ._clip import clip
from ._constants import e, inf, nan, newaxis, pi
from ._copy_utils import (
asnumpy,
astype,
Expand All @@ -53,6 +55,28 @@
zeros,
zeros_like,
)
from ._data_types import (
bool,
complex64,
complex128,
dtype,
float16,
float32,
float64,
int8,
int16,
int32,
int64,
uint8,
uint16,
uint32,
uint64,
)
from ._device import Device
from ._dldevice_conversions import (
dldevice_to_sycl_device,
sycl_device_to_dldevice,
)
from ._elementwise_funcs import (
abs,
acos,
Expand Down Expand Up @@ -157,6 +181,13 @@
tile,
unstack,
)
from ._print import (
get_print_options,
print_options,
set_print_options,
usm_ndarray_repr,
usm_ndarray_str,
)
from ._reduction import (
argmax,
argmin,
Expand All @@ -168,6 +199,12 @@
reduce_hypot,
sum,
)

# isort: off
# placed here to avoid circular import
from ._usmarray import DLDeviceType, usm_ndarray

# isort: on
from ._reshape import reshape
from ._search_functions import where
from ._searchsorted import searchsorted
Expand All @@ -185,6 +222,32 @@
from ._utility_functions import all, any, diff

__all__ = [
"Device",
"DLDeviceType",
"usm_ndarray",
# data types
"bool",
"dtype",
"int8",
"uint8",
"int16",
"uint16",
"int32",
"uint32",
"int64",
"uint64",
"float16",
"float32",
"float64",
"complex64",
"complex128",
# constants
"e",
"inf",
"nan",
"newaxis",
"pi",
# functions
"abs",
"acos",
"acosh",
Expand Down Expand Up @@ -229,6 +292,7 @@
"cumulative_sum",
"diff",
"divide",
"dldevice_to_sycl_device",
"empty",
"empty_like",
"equal",
Expand All @@ -245,6 +309,7 @@
"from_numpy",
"full",
"full_like",
"get_print_options",
"greater",
"greater_equal",
"hypot",
Expand Down Expand Up @@ -288,6 +353,7 @@
"place",
"positive",
"pow",
"print_options",
"prod",
"proj",
"put",
Expand All @@ -303,6 +369,7 @@
"round",
"rsqrt",
"searchsorted",
"set_print_options",
"sign",
"signbit",
"sin",
Expand All @@ -316,6 +383,7 @@
"subtract",
"sum",
"swapaxes",
"sycl_device_to_dldevice",
"take",
"take_along_axis",
"tan",
Expand All @@ -332,9 +400,13 @@
"unique_inverse",
"unique_values",
"unstack",
"usm_ndarray_repr",
"usm_ndarray_str",
"var",
"vecdot",
"where",
"zeros",
"zeros_like",
"__array_api_version__",
"__array_namespace_info__",
]
25 changes: 12 additions & 13 deletions dpctl_ext/tensor/_accumulation.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,12 +27,11 @@
# *****************************************************************************

import dpctl
import dpctl.tensor as dpt
from dpctl.utils import ExecutionPlacementError, SequentialOrderManager

# TODO: revert to `import dpctl.tensor...`
# when dpnp fully migrates dpctl/tensor
import dpctl_ext.tensor as dpt_ext
import dpctl_ext.tensor as dpt
import dpctl_ext.tensor._tensor_accumulation_impl as tai
import dpctl_ext.tensor._tensor_impl as ti

Expand Down Expand Up @@ -82,7 +81,7 @@ def _accumulate_common(
perm = [i for i in range(nd) if i != axis] + [
axis,
]
arr = dpt_ext.permute_dims(x, perm)
arr = dpt.permute_dims(x, perm)
q = x.sycl_queue
inp_dt = x.dtype
res_usm_type = x.usm_type
Expand Down Expand Up @@ -130,16 +129,16 @@ def _accumulate_common(
)
# permute out array dims if necessary
if a1 != nd:
out = dpt_ext.permute_dims(out, perm)
out = dpt.permute_dims(out, perm)
orig_out = out
if ti._array_overlap(x, out) and implemented_types:
out = dpt_ext.empty_like(out)
out = dpt.empty_like(out)
else:
out = dpt_ext.empty(
out = dpt.empty(
res_sh, dtype=res_dt, usm_type=res_usm_type, sycl_queue=q
)
if a1 != nd:
out = dpt_ext.permute_dims(out, perm)
out = dpt.permute_dims(out, perm)

_manager = SequentialOrderManager[q]
depends = _manager.submitted_events
Expand All @@ -166,7 +165,7 @@ def _accumulate_common(
out = orig_out
else:
if _dtype_supported(res_dt, res_dt):
tmp = dpt_ext.empty(
tmp = dpt.empty(
arr.shape, dtype=res_dt, usm_type=res_usm_type, sycl_queue=q
)
ht_e_cpy, cpy_e = ti._copy_usm_ndarray_into_usm_ndarray(
Expand All @@ -191,18 +190,18 @@ def _accumulate_common(
_manager.add_event_pair(ht_e, acc_ev)
else:
buf_dt = _default_accumulation_type_fn(inp_dt, q)
tmp = dpt_ext.empty(
tmp = dpt.empty(
arr.shape, dtype=buf_dt, usm_type=res_usm_type, sycl_queue=q
)
ht_e_cpy, cpy_e = ti._copy_usm_ndarray_into_usm_ndarray(
src=arr, dst=tmp, sycl_queue=q, depends=depends
)
_manager.add_event_pair(ht_e_cpy, cpy_e)
tmp_res = dpt_ext.empty(
tmp_res = dpt.empty(
res_sh, dtype=buf_dt, usm_type=res_usm_type, sycl_queue=q
)
if a1 != nd:
tmp_res = dpt_ext.permute_dims(tmp_res, perm)
tmp_res = dpt.permute_dims(tmp_res, perm)
if not include_initial:
ht_e, acc_ev = _accumulate_fn(
src=tmp,
Expand All @@ -225,10 +224,10 @@ def _accumulate_common(
_manager.add_event_pair(ht_e_cpy2, cpy_e2)

if appended_axis:
out = dpt_ext.squeeze(out)
out = dpt.squeeze(out)
if a1 != nd:
inv_perm = sorted(range(nd), key=lambda d: perm[d])
out = dpt_ext.permute_dims(out, inv_perm)
out = dpt.permute_dims(out, inv_perm)

return out

Expand Down
Loading
Loading