resulted in the following error.
GPU compilation of MethodInstance for CUDA.partial_mapreduce_grid(::ComposedFunction{Base.Fix1{typeof(*), Float64}, Type{Float32}}, ::typeof(Base.add_sum), ::Float64, ::CartesianIndices{2, Tuple{Base.OneTo{Int64}, Base.OneTo{Int64}}}, ::CartesianIndices{2, Tuple{Base.OneTo{Int64}, Base.OneTo{Int64}}}, ::Val{true}, ::CuDeviceMatrix{Float64, 1}, ::CuDeviceMatrix{Int32, 1}) failed
KernelError: passing non-bitstype argument
Argument 2 to your kernel function is of type ComposedFunction{Base.Fix1{typeof(*), Float64}, Type{Float32}}, which is not a bitstype:
.inner is of type Type{Float32} which is not isbits.
Only bitstypes, which are "plain data" types that are immutable
and contain no references to other values, can be used in GPU kernels.
For more information, see the `Base.isbitstype` function.
Stacktrace:
[1] check_invocation(job::GPUCompiler.CompilerJob)
@ GPUCompiler /pscratch/sd/y/yuanru/.julia/packages/GPUCompiler/Ecaql/src/validation.jl:108
[2] macro expansion
@ /pscratch/sd/y/yuanru/.julia/packages/GPUCompiler/Ecaql/src/driver.jl:87 [inlined]
[3] macro expansion
@ /pscratch/sd/y/yuanru/.julia/packages/Tracy/slmNc/src/tracepoint.jl:163 [inlined]
[4] compile_unhooked(output::Symbol, job::GPUCompiler.CompilerJob; kwargs::@Kwargs{})
@ GPUCompiler /pscratch/sd/y/yuanru/.julia/packages/GPUCompiler/Ecaql/src/driver.jl:85
[5] compile_unhooked
@ /pscratch/sd/y/yuanru/.julia/packages/GPUCompiler/Ecaql/src/driver.jl:80 [inlined]
[6] compile(target::Symbol, job::GPUCompiler.CompilerJob; kwargs::@Kwargs{})
@ GPUCompiler /pscratch/sd/y/yuanru/.julia/packages/GPUCompiler/Ecaql/src/driver.jl:67
[7] compile
@ /pscratch/sd/y/yuanru/.julia/packages/GPUCompiler/Ecaql/src/driver.jl:55 [inlined]
[8] #1182
@ /pscratch/sd/y/yuanru/.julia/packages/CUDA/ja0IX/src/compiler/compilation.jl:250 [inlined]
[9] JuliaContext(f::CUDA.var"#1182#1185"{GPUCompiler.CompilerJob{GPUCompiler.PTXCompilerTarget, CUDA.CUDACompilerParams}}; kwargs::@Kwargs{})
@ GPUCompiler /pscratch/sd/y/yuanru/.julia/packages/GPUCompiler/Ecaql/src/driver.jl:34
[10] JuliaContext(f::Function)
@ GPUCompiler /pscratch/sd/y/yuanru/.julia/packages/GPUCompiler/Ecaql/src/driver.jl:25
[11] compile(job::GPUCompiler.CompilerJob)
@ CUDA /pscratch/sd/y/yuanru/.julia/packages/CUDA/ja0IX/src/compiler/compilation.jl:249
[12] actual_compilation(cache::Dict{Any, CuFunction}, src::Core.MethodInstance, world::UInt64, cfg::GPUCompiler.CompilerConfig{GPUCompiler.PTXCompilerTarget, CUDA.CUDACompilerParams}, compiler::typeof(CUDA.compile), linker::typeof(CUDA.link))
@ GPUCompiler /pscratch/sd/y/yuanru/.julia/packages/GPUCompiler/Ecaql/src/execution.jl:245
[13] cached_compilation(cache::Dict{Any, CuFunction}, src::Core.MethodInstance, cfg::GPUCompiler.CompilerConfig{GPUCompiler.PTXCompilerTarget, CUDA.CUDACompilerParams}, compiler::Function, linker::Function)
@ GPUCompiler /pscratch/sd/y/yuanru/.julia/packages/GPUCompiler/Ecaql/src/execution.jl:159
[14] macro expansion
@ /pscratch/sd/y/yuanru/.julia/packages/CUDA/ja0IX/src/compiler/execution.jl:373 [inlined]
[15] macro expansion
@ ./lock.jl:267 [inlined]
[16] cufunction(f::typeof(CUDA.partial_mapreduce_grid), tt::Type{Tuple{ComposedFunction{Base.Fix1{typeof(*), Float64}, Type{Float32}}, typeof(Base.add_sum), Float64, CartesianIndices{2, Tuple{Base.OneTo{Int64}, Base.OneTo{Int64}}}, CartesianIndices{2, Tuple{Base.OneTo{Int64}, Base.OneTo{Int64}}}, Val{true}, CuDeviceMatrix{Float64, 1}, CuDeviceMatrix{Int32, 1}}}; kwargs::@Kwargs{})
@ CUDA /pscratch/sd/y/yuanru/.julia/packages/CUDA/ja0IX/src/compiler/execution.jl:368
[17] cufunction(f::typeof(CUDA.partial_mapreduce_grid), tt::Type{Tuple{ComposedFunction{Base.Fix1{typeof(*), Float64}, Type{Float32}}, typeof(Base.add_sum), Float64, CartesianIndices{2, Tuple{Base.OneTo{Int64}, Base.OneTo{Int64}}}, CartesianIndices{2, Tuple{Base.OneTo{Int64}, Base.OneTo{Int64}}}, Val{true}, CuDeviceMatrix{Float64, 1}, CuDeviceMatrix{Int32, 1}}})
@ CUDA /pscratch/sd/y/yuanru/.julia/packages/CUDA/ja0IX/src/compiler/execution.jl:365
[18] macro expansion
@ /pscratch/sd/y/yuanru/.julia/packages/CUDA/ja0IX/src/compiler/execution.jl:112 [inlined]
[19] mapreducedim!(f::ComposedFunction{Base.Fix1{typeof(*), Float64}, Type{Float32}}, op::typeof(Base.add_sum), R::CuArray{Float64, 2, CUDA.DeviceMemory}, A::CuArray{Int32, 2, CUDA.DeviceMemory}; init::Float64)
@ CUDA /pscratch/sd/y/yuanru/.julia/packages/CUDA/ja0IX/src/mapreduce.jl:229
[20] mapreducedim!
@ /pscratch/sd/y/yuanru/.julia/packages/CUDA/ja0IX/src/mapreduce.jl:169 [inlined]
[21] _mapreduce(f::ComposedFunction{Base.Fix1{typeof(*), Float64}, Type{Float32}}, op::typeof(Base.add_sum), As::CuArray{Int32, 2, CUDA.DeviceMemory}; dims::Int64, init::Nothing)
@ GPUArrays /pscratch/sd/y/yuanru/.julia/packages/GPUArrays/u6tui/src/host/mapreduce.jl:76
[22] mapreduce(::Function, ::Function, ::CuArray{Int32, 2, CUDA.DeviceMemory}; dims::Int64, init::Nothing)
@ GPUArrays /pscratch/sd/y/yuanru/.julia/packages/GPUArrays/u6tui/src/host/mapreduce.jl:28
[23] mapreduce
@ /pscratch/sd/y/yuanru/.julia/packages/GPUArrays/u6tui/src/host/mapreduce.jl:28 [inlined]
[24] _sum
@ ./reducedim.jl:1041 [inlined]
[25] sum
@ ./reducedim.jl:1013 [inlined]
[26] _mean
@ /pscratch/sd/y/yuanru/.julia/packages/GPUArrays/u6tui/src/host/statistics.jl:37 [inlined]
[27] #mean#1
@ /global/cfs/cdirs/m2676/users/yuanru/.juliaup/juliaup/julia-1.10.10+0.x64.linux.gnu/share/julia/stdlib/v1.10/Statistics/src/Statistics.jl:104 [inlined]
[28] top-level scope
@ REPL[5]:1
[29] top-level scope
@ none:1
The CUDA version is v5.8.2.
resulted in the following error.
Note that it works if I remove either
Float32ordims=1The CUDA version is v5.8.2.
Version Info