diff --git a/src/array.jl b/src/array.jl index f219ad5f..3a8ffddb 100644 --- a/src/array.jl +++ b/src/array.jl @@ -85,7 +85,7 @@ mutable struct oneArray{T,N,B} <: AbstractGPUArray{T,N} data::DataRef{B} maxsize::Int # maximum data size; excluding any selector bytes - offset::Int # offset of the data in the buffer, in number of elements + offset::Int # offset of the data in the buffer, in bytes dims::Dims{N} function oneArray{T,N,B}(::UndefInitializer, dims::Dims{N}) where {T,N,B} @@ -337,11 +337,11 @@ function Base.unsafe_convert(::Type{Ptr{T}}, x::oneArray{T}) where {T} if is_device(x) throw(ArgumentError("cannot take the CPU address of a $(typeof(x))")) end - convert(Ptr{T}, x.data[]) + x.offset*Base.elsize(x) + convert(Ptr{T}, x.data[]) + x.offset end function Base.unsafe_convert(::Type{ZePtr{T}}, x::oneArray{T}) where {T} - convert(ZePtr{T}, x.data[]) + x.offset*Base.elsize(x) + convert(ZePtr{T}, x.data[]) + x.offset end @@ -363,15 +363,19 @@ end function Base.unsafe_convert(::Type{oneDeviceArray{T,N,AS.CrossWorkgroup}}, a::oneArray{T,N}) where {T,N} oneDeviceArray{T,N,AS.CrossWorkgroup}(size(a), reinterpret(LLVMPtr{T,AS.CrossWorkgroup}, pointer(a)), - a.maxsize - a.offset*Base.elsize(a)) + a.maxsize - a.offset) end ## memory copying typetagdata(a::Array, i=1) = ccall(:jl_array_typetagdata, Ptr{UInt8}, (Any,), a) + i - 1 -typetagdata(a::oneArray, i=1) = - convert(ZePtr{UInt8}, a.data[]) + a.maxsize + a.offset + i - 1 +function typetagdata(a::oneArray, i=1) + # for zero-size element types (e.g. singleton unions), the byte offset + # is always zero, so the corresponding element offset is also zero + elem_offset = iszero(Base.elsize(a)) ? 0 : a.offset ÷ Base.elsize(a) + return convert(ZePtr{UInt8}, a.data[]) + a.maxsize + elem_offset + i - 1 +end function Base.copyto!(dest::oneArray{T}, doffs::Integer, src::Array{T}, soffs::Integer, n::Integer) where T @@ -536,12 +540,10 @@ end ## derived arrays function GPUArrays.derive(::Type{T}, a::oneArray, dims::Dims{N}, offset::Int) where {T,N} - offset = if sizeof(T) == 0 + if sizeof(T) == 0 Base.elsize(a) == 0 || error("Cannot derive a singleton array from non-singleton inputs") - offset - else - (a.offset * Base.elsize(a)) ÷ sizeof(T) + offset end + offset = a.offset + offset * sizeof(T) oneArray{T,N}(a.data, dims; a.maxsize, offset) end diff --git a/test/array.jl b/test/array.jl index 73121f86..aadc8928 100644 --- a/test/array.jl +++ b/test/array.jl @@ -43,6 +43,15 @@ end @test Array(xs) == [0,1,0] end +@testset "reinterpret of view with non-aligned offset" begin + # reinterpreting a view to a larger element type where the byte offset + # is not a multiple of the new element size + a = oneArray(Int32[1,2,3,4,5,6,7,8,9]) + v = view(a, 2:7) # offset of 1 Int32 = 4 bytes + r = reinterpret(Int64, v) # Int64 = 8 bytes; 4 is not a multiple of 8 + @test Array(r) == reinterpret(Int64, @view Array(a)[2:7]) +end + @testset "shared buffers & unsafe_wrap" begin a = oneVector{Int,oneL0.SharedBuffer}(undef, 2)