diff --git a/src/webgpu/api/operation/command_buffer/programmable/immediate.spec.ts b/src/webgpu/api/operation/command_buffer/programmable/immediate.spec.ts index 13512fbac35a..a20336b6a5ad 100644 --- a/src/webgpu/api/operation/command_buffer/programmable/immediate.spec.ts +++ b/src/webgpu/api/operation/command_buffer/programmable/immediate.spec.ts @@ -18,6 +18,11 @@ import { kProgrammableEncoderTypes, ProgrammableEncoderType, } from '../../../../util/command_buffer_maker.js'; +import { align } from '../../../../util/math.js'; + +const kRenderTargetFormat = 'rgba32uint' as const; +const kBytesPerPixel = 16; // rgba32uint = 4 x u32 = 16 bytes +const kMinBytesPerRow = 256; // WebGPU requires bytesPerRow to be a multiple of 256 class ImmediateDataOperationTest extends AllFeaturesMaxLimitsGPUTest { override async init() { @@ -28,93 +33,115 @@ class ImmediateDataOperationTest extends AllFeaturesMaxLimitsGPUTest { return; } } - - skipIfStorageBuffersInFragmentStageNotAvailable(encoderType: ProgrammableEncoderType) { - if (!this.isCompatibility) { - return; - } - const needsStorageBuffersInFragmentStage = - encoderType === 'render pass' || encoderType === 'render bundle'; - this.skipIf( - needsStorageBuffersInFragmentStage && - !(this.device.limits.maxStorageBuffersInFragmentStage! >= 1), - `maxStorageBuffersInFragmentStage(${this.device.limits.maxStorageBuffersInFragmentStage}) < 1` - ); - } } +/** + * Creates a pipeline for testing immediate data. + * + * For compute pipelines: uses a storage buffer to write results. + * For render pipelines: returns results via the fragment shader's rgba32uint color output, + * avoiding the need for storage buffers in the fragment stage. + * + * @param copyCode - Code that writes to `output[]` array (used by compute shader) + * @param fragmentReturnExpr - WGSL expression returning vec4u (used by fragment shader) + * @param renderTargetWidth - Width of the render target in pixels (for vertex positioning) + */ function createPipeline( t: AllFeaturesMaxLimitsGPUTest, encoderType: ProgrammableEncoderType, wgslDecl: string, copyCode: string, + fragmentReturnExpr: string, immediateSize: number, + renderTargetWidth: number = 4, pipelineLayout?: GPUPipelineLayout ) { - const layout = - pipelineLayout || - t.device.createPipelineLayout({ - bindGroupLayouts: [ - t.device.createBindGroupLayout({ - entries: [ - { - binding: 0, - visibility: GPUShaderStage.COMPUTE | GPUShaderStage.FRAGMENT, - buffer: { type: 'storage' }, - }, - { - binding: 1, - visibility: GPUShaderStage.COMPUTE | GPUShaderStage.FRAGMENT | GPUShaderStage.VERTEX, - buffer: { type: 'uniform', hasDynamicOffset: true }, - }, - ], - }), - ], - immediateSize, - }); + if (encoderType === 'compute pass') { + const layout = + pipelineLayout || + t.device.createPipelineLayout({ + bindGroupLayouts: [ + t.device.createBindGroupLayout({ + entries: [ + { + binding: 0, + visibility: GPUShaderStage.COMPUTE, + buffer: { type: 'storage' }, + }, + { + binding: 1, + visibility: GPUShaderStage.COMPUTE, + buffer: { type: 'uniform', hasDynamicOffset: true }, + }, + ], + }), + ], + immediateSize, + }); - const fullCode = ` - ${wgslDecl} - @group(0) @binding(0) var output: array; - @group(0) @binding(1) var outIndex: u32; + const computeCode = ` + ${wgslDecl} + @group(0) @binding(0) var output: array; + @group(0) @binding(1) var outIndex: u32; - @compute @workgroup_size(1) fn cs_main() { - ${copyCode} - } - @fragment fn fs_main() -> @location(0) vec4u { - ${copyCode} - return vec4u(0); - } - `; + @compute @workgroup_size(1) fn cs_main() { + ${copyCode} + } + `; - if (encoderType === 'compute pass') { return t.device.createComputePipeline({ layout, compute: { - module: t.device.createShaderModule({ code: fullCode }), + module: t.device.createShaderModule({ code: computeCode }), }, }); } else { + // Render pipeline: no storage buffer needed. + // The fragment shader returns results via the render target color output. + const layout = + pipelineLayout || + t.device.createPipelineLayout({ + bindGroupLayouts: [ + t.device.createBindGroupLayout({ + entries: [ + { + binding: 0, + visibility: GPUShaderStage.FRAGMENT | GPUShaderStage.VERTEX, + buffer: { type: 'uniform', hasDynamicOffset: true }, + }, + ], + }), + ], + immediateSize, + }); + + const vertexCode = ` + @group(0) @binding(0) var outIndex: u32; + + @vertex fn vs_main() -> @builtin(position) vec4f { + // Map outIndex to pixel centers in a ${renderTargetWidth}x1 render target. + let x = (f32(outIndex) + 0.5) / f32(${renderTargetWidth}) * 2.0 - 1.0; + return vec4f(x, 0.0, 0.0, 1.0); + } + `; + + const fragmentCode = ` + ${wgslDecl} + @group(0) @binding(0) var outIndex: u32; + + @fragment fn fs_main() -> @location(0) vec4u { + return ${fragmentReturnExpr}; + } + `; + return t.device.createRenderPipeline({ layout, vertex: { - module: t.device.createShaderModule({ - code: ` - // Re-declare outIndex in the vertex shader - @group(0) @binding(1) var outIndex: u32; - - @vertex fn vs_main() -> @builtin(position) vec4f { - // Map outIndex 0..3 to pixel centers in a 4x1 render target. - // x = (f32(outIndex) + 0.5) / 2.0 - 1.0 - let x = (f32(outIndex) + 0.5) / 2.0 - 1.0; - return vec4f(x, 0.0, 0.0, 1.0); - } - `, - }), + module: t.device.createShaderModule({ code: vertexCode }), }, fragment: { - module: t.device.createShaderModule({ code: fullCode }), - targets: [{ format: 'r32uint' }], + module: t.device.createShaderModule({ code: fragmentCode }), + targets: [{ format: kRenderTargetFormat }], }, primitive: { topology: 'point-list', @@ -155,20 +182,26 @@ function createOutputIndexBuffer(t: AllFeaturesMaxLimitsGPUTest, count: number): return buffer; } +/** + * Encode a pass for the given encoder type. + * For render paths, creates an rgba32uint render target and returns it so callers can read back. + */ function encodeForPassType( t: AllFeaturesMaxLimitsGPUTest, encoderType: ProgrammableEncoderType, commandEncoder: GPUCommandEncoder, - fn: (pass: GPURenderPassEncoder | GPUComputePassEncoder | GPURenderBundleEncoder) => void -) { + fn: (pass: GPURenderPassEncoder | GPUComputePassEncoder | GPURenderBundleEncoder) => void, + renderTargetWidth: number = 4 +): GPUTexture | undefined { if (encoderType === 'compute pass') { const pass = commandEncoder.beginComputePass(); fn(pass); pass.end(); + return undefined; } else { const renderTargetTexture = t.createTextureTracked({ - size: [4, 1, 1], - format: 'r32uint', + size: [renderTargetWidth, 1, 1], + format: kRenderTargetFormat, usage: GPUTextureUsage.RENDER_ATTACHMENT | GPUTextureUsage.COPY_SRC, }); @@ -188,7 +221,7 @@ function encodeForPassType( } else { // Render Bundle const bundleEncoder = t.device.createRenderBundleEncoder({ - colorFormats: ['r32uint'], + colorFormats: [kRenderTargetFormat], }); fn(bundleEncoder); const bundle = bundleEncoder.finish(); @@ -206,6 +239,8 @@ function encodeForPassType( pass.executeBundles([bundle]); pass.end(); } + + return renderTargetTexture; } } @@ -223,6 +258,19 @@ function setPipeline( } } +/** + * Run a pipeline and check the output values. + * + * For compute: writes to a storage buffer and checks it directly. + * For render: reads back the rgba32uint render target and checks pixel values. + * + * Simple mode (encodeFn omitted): a single draw/dispatch. The caller provides setImmediatesFn + * which is called after setPipeline + setBindGroup. expectedValues.length must be <= 4. + * + * Multi-draw mode (encodeFn provided): the caller drives all bind group / immediate / draw calls + * via encodeFn(enc, bindGroup, indexUniformBuffer). numDraws and outputU32sPerDraw control the + * output buffer size and render target width. + */ function runAndCheck( t: AllFeaturesMaxLimitsGPUTest, encoderType: ProgrammableEncoderType, @@ -230,41 +278,110 @@ function runAndCheck( setImmediatesFn: ( encoder: GPUComputePassEncoder | GPURenderPassEncoder | GPURenderBundleEncoder ) => void, - expectedValues: number[] + expectedValues: number[], + { + numDraws = 1, + outputU32sPerDraw, + encodeFn, + renderTargetWidth = 4, + }: { + numDraws?: number; + outputU32sPerDraw?: number; + encodeFn?: ( + enc: GPUComputePassEncoder | GPURenderPassEncoder | GPURenderBundleEncoder, + bindGroup: GPUBindGroup, + indexUniformBuffer: GPUBuffer + ) => void; + renderTargetWidth?: number; + } = {} ) { assert(expectedValues.length > 0, 'expectedValues must not be empty'); - const outputBuffer = t.createBufferTracked({ - size: expectedValues.length * 4, - usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC, - }); - // A dynamic-offset uniform buffer supplies outIndex = 0 here. - // We use a uniform buffer (rather than e.g. firstVertex via @builtin(vertex_index)) because: - // - It works across all shader stages (compute, vertex, fragment). - // - firstVertex is emulated via root constants on D3D12, which is the same mechanism - // backing var, so using it could mask bugs in the path under test. - // The pipeline layout declares hasDynamicOffset, so we must always pass a dynamic offset - // array — even though this simple helper only ever uses offset [0]. - const indexUniformBuffer = t.makeBufferWithContents(new Uint32Array([0]), GPUBufferUsage.UNIFORM); - - const bindGroup = t.device.createBindGroup({ - layout: pipeline.getBindGroupLayout(0), - entries: [ - { binding: 0, resource: { buffer: outputBuffer } }, - { binding: 1, resource: { buffer: indexUniformBuffer, size: 4 } }, - ], - }); - const commandEncoder = t.device.createCommandEncoder(); - encodeForPassType(t, encoderType, commandEncoder, encoder => { - setPipeline(encoderType, encoder, pipeline); - encoder.setBindGroup(0, bindGroup, [0]); - setImmediatesFn(encoder); - dispatchOrDraw(encoderType, encoder); - }); + // In multi-draw mode, outputU32sPerDraw must be specified. + // In simple mode, it defaults to expectedValues.length (single draw). + const u32sPerDraw = outputU32sPerDraw ?? expectedValues.length; + + const indexUniformBuffer = encodeFn + ? createOutputIndexBuffer(t, numDraws) + : t.makeBufferWithContents(new Uint32Array([0]), GPUBufferUsage.UNIFORM); + + if (encoderType === 'compute pass') { + const outputBuffer = t.createBufferTracked({ + size: u32sPerDraw * 4 * numDraws, + usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC, + }); - t.device.queue.submit([commandEncoder.finish()]); + const bindGroup = t.device.createBindGroup({ + layout: pipeline.getBindGroupLayout(0), + entries: [ + { binding: 0, resource: { buffer: outputBuffer } }, + { binding: 1, resource: { buffer: indexUniformBuffer, size: 4 } }, + ], + }); + + const commandEncoder = t.device.createCommandEncoder(); + encodeForPassType(t, encoderType, commandEncoder, encoder => { + setPipeline(encoderType, encoder, pipeline); + if (encodeFn) { + encodeFn(encoder, bindGroup, indexUniformBuffer); + } else { + encoder.setBindGroup(0, bindGroup, [0]); + setImmediatesFn(encoder); + dispatchOrDraw(encoderType, encoder); + } + }); + + t.device.queue.submit([commandEncoder.finish()]); + t.expectGPUBufferValuesEqual(outputBuffer, new Uint32Array(expectedValues)); + } else { + assert(u32sPerDraw <= 4, 'runAndCheck supports at most 4 u32s per draw (one rgba32uint pixel)'); - t.expectGPUBufferValuesEqual(outputBuffer, new Uint32Array(expectedValues)); + const bindGroup = t.device.createBindGroup({ + layout: pipeline.getBindGroupLayout(0), + entries: [{ binding: 0, resource: { buffer: indexUniformBuffer, size: 4 } }], + }); + + const commandEncoder = t.device.createCommandEncoder(); + const pixelWidth = encodeFn ? renderTargetWidth : 1; + const renderTarget = encodeForPassType( + t, + encoderType, + commandEncoder, + encoder => { + setPipeline(encoderType, encoder, pipeline); + if (encodeFn) { + encodeFn(encoder, bindGroup, indexUniformBuffer); + } else { + encoder.setBindGroup(0, bindGroup, [0]); + setImmediatesFn(encoder); + dispatchOrDraw(encoderType, encoder); + } + }, + pixelWidth + )!; + + const bytesPerRow = align(pixelWidth * kBytesPerPixel, kMinBytesPerRow); + const readbackBuffer = t.createBufferTracked({ + size: bytesPerRow, + usage: GPUBufferUsage.COPY_DST | GPUBufferUsage.COPY_SRC, + }); + commandEncoder.copyTextureToBuffer( + { texture: renderTarget }, + { buffer: readbackBuffer, bytesPerRow }, + [pixelWidth, 1] + ); + + t.device.queue.submit([commandEncoder.finish()]); + + // Each pixel is 4 u32s (rgba32uint). Pad each draw's output to 4. + const paddedExpected = new Uint32Array(pixelWidth * 4); + for (let d = 0; d < numDraws; d++) { + for (let i = 0; i < u32sPerDraw; i++) { + paddedExpected[d * 4 + i] = expectedValues[d * u32sPerDraw + i]; + } + } + t.expectGPUBufferValuesEqual(readbackBuffer, paddedExpected); + } } export const g = makeTestGroup(ImmediateDataOperationTest); @@ -291,10 +408,10 @@ g.test('basic_execution') .fn(t => { const { encoderType, dataType, scalarType, vectorSize } = t.params; t.skipIf(scalarType === 'f16', 'Immediate data blocks do not yet support f16 types'); - t.skipIfStorageBuffersInFragmentStageNotAvailable(encoderType); let wgslDecl = ''; let copyCode = ''; + let fragmentReturnExpr = ''; let immediateSize = 0; let expected: number[] = []; let inputData: Uint32Array; @@ -306,6 +423,7 @@ g.test('basic_execution') var data: S; `; copyCode = 'output[0] = data.a; output[1] = data.b;'; + fragmentReturnExpr = 'vec4u(data.a, data.b, 0, 0)'; inputData = new Uint32Array([0xdeadbeef, 0xcafebabe]); expected = [0xdeadbeef, 0xcafebabe]; } else { @@ -322,6 +440,20 @@ g.test('basic_execution') copyCode += `output[${i}] = bitcast(${valExpr});\n`; } + // Build fragment return expression: pack values into vec4u, padding with 0. + if (vSize === 1) { + fragmentReturnExpr = 'vec4u(bitcast(data), 0, 0, 0)'; + } else if (vSize === 2) { + fragmentReturnExpr = 'vec4u(bitcast(data[0]), bitcast(data[1]), 0, 0)'; + } else if (vSize === 3) { + fragmentReturnExpr = + 'vec4u(bitcast(data[0]), bitcast(data[1]), bitcast(data[2]), 0)'; + } else { + // vSize === 4 + fragmentReturnExpr = + 'vec4u(bitcast(data[0]), bitcast(data[1]), bitcast(data[2]), bitcast(data[3]))'; + } + inputData = new Uint32Array(vSize); for (let i = 0; i < vSize; i++) { if (sType === 'u32') { @@ -342,7 +474,14 @@ g.test('basic_execution') } } - const pipeline = createPipeline(t, encoderType, wgslDecl, copyCode, immediateSize); + const pipeline = createPipeline( + t, + encoderType, + wgslDecl, + copyCode, + fragmentReturnExpr, + immediateSize + ); runAndCheck( t, @@ -360,7 +499,6 @@ g.test('update_data') .params(u => u.combine('encoderType', kProgrammableEncoderTypes)) .fn(t => { const { encoderType } = t.params; - t.skipIfStorageBuffersInFragmentStageNotAvailable(encoderType); const immediateSize = 16; const wgslDecl = 'var data: vec4;'; const copyCode = ` @@ -370,57 +508,37 @@ g.test('update_data') output[base + 2] = data[2]; output[base + 3] = data[3]; `; + const fragmentReturnExpr = 'vec4u(data[0], data[1], data[2], data[3])'; - const pipeline = createPipeline(t, encoderType, wgslDecl, copyCode, immediateSize); - - const outputBuffer = t.createBufferTracked({ - size: 4 * 4 * 3, // 3 steps, 4 u32s each - usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC, - }); - - // Uniform buffer with output indices [0, 1, 2] at 256-byte aligned offsets, - // used to direct each dispatch/draw step to a separate region of the output buffer. - const indexUniformBuffer = createOutputIndexBuffer(t, 3); - - const bindGroup = t.device.createBindGroup({ - layout: pipeline.getBindGroupLayout(0), - entries: [ - { binding: 0, resource: { buffer: outputBuffer } }, - { binding: 1, resource: { buffer: indexUniformBuffer, size: 4 } }, - ], - }); - - const commandEncoder = t.device.createCommandEncoder(); + const pipeline = createPipeline( + t, + encoderType, + wgslDecl, + copyCode, + fragmentReturnExpr, + immediateSize + ); - /** Set bind group with dynamic offset for output index, set immediates, and dispatch/draw. */ - const runStep = ( - pass: GPURenderPassEncoder | GPUComputePassEncoder | GPURenderBundleEncoder, - stepIndex: number, - data: Uint32Array, - dstOffset: number = 0 - ) => { - pass.setBindGroup(0, bindGroup, [stepIndex * 256]); - pass.setImmediates!(dstOffset, data); - dispatchOrDraw(encoderType, pass); - }; - - encodeForPassType(t, encoderType, commandEncoder, enc => { - setPipeline(encoderType, enc, pipeline); - - // Step 1: Full set [1, 2, 3, 4] - runStep(enc, 0, new Uint32Array([1, 2, 3, 4])); - - // Step 2: Full update [5, 6, 7, 8] - runStep(enc, 1, new Uint32Array([5, 6, 7, 8])); - - // Step 3: Partial update offset 4 bytes (index 1) with [9, 10] -> [5, 9, 10, 8] - runStep(enc, 2, new Uint32Array([9, 10]), 4); + runAndCheck(t, encoderType, pipeline, () => {}, [1, 2, 3, 4, 5, 6, 7, 8, 5, 9, 10, 8], { + numDraws: 3, + outputU32sPerDraw: 4, + encodeFn: (enc, bindGroup) => { + // Step 1: Full set [1, 2, 3, 4] + enc.setBindGroup(0, bindGroup, [0]); + enc.setImmediates!(0, new Uint32Array([1, 2, 3, 4])); + dispatchOrDraw(encoderType, enc); + + // Step 2: Full update [5, 6, 7, 8] + enc.setBindGroup(0, bindGroup, [256]); + enc.setImmediates!(0, new Uint32Array([5, 6, 7, 8])); + dispatchOrDraw(encoderType, enc); + + // Step 3: Partial update offset 4 bytes (index 1) with [9, 10] -> [5, 9, 10, 8] + enc.setBindGroup(0, bindGroup, [512]); + enc.setImmediates!(4, new Uint32Array([9, 10])); + dispatchOrDraw(encoderType, enc); + }, }); - - t.device.queue.submit([commandEncoder.finish()]); - - const expected = new Uint32Array([1, 2, 3, 4, 5, 6, 7, 8, 5, 9, 10, 8]); - t.expectGPUBufferValuesEqual(outputBuffer, expected); }); g.test('pipeline_switch') @@ -437,22 +555,24 @@ g.test('pipeline_switch') ) .fn(t => { const { encoderType, sameImmediateSize } = t.params; - t.skipIfStorageBuffersInFragmentStageNotAvailable(encoderType); // Pipeline A always uses vec4 (16 bytes). const wgslDeclA = 'var data: vec4;'; const copyCodeA = ` output[0] = data.x; output[1] = data.y; output[2] = data.z; output[3] = data.w; `; + const fragmentReturnExprA = 'vec4u(data.x, data.y, data.z, data.w)'; let wgslDeclB: string; let copyCodeB: string; + let fragmentReturnExprB: string; let immediateSizeB: number; if (sameImmediateSize) { // Pipeline B has the same immediate layout as A (vec4, 16 bytes). wgslDeclB = wgslDeclA; copyCodeB = copyCodeA; + fragmentReturnExprB = fragmentReturnExprA; immediateSizeB = 16; } else { // Pipeline B uses vec2 (8 bytes) — different/incompatible layout. @@ -460,6 +580,7 @@ g.test('pipeline_switch') copyCodeB = ` output[0] = data.x; output[1] = data.y; output[2] = 0u; output[3] = 0u; `; + fragmentReturnExprB = 'vec4u(data.x, data.y, 0, 0)'; immediateSizeB = 8; } @@ -468,67 +589,131 @@ g.test('pipeline_switch') const immDataSizeB = sameImmediateSize ? undefined : immediateSizeB / 4; const expectedB = sameImmediateSize ? [5, 6, 7, 8] : [5, 6, 0, 0]; + // Create a shared bind group layout for both pipelines so they are bind-group-compatible. + // Compute path needs storage + uniform; render path needs only uniform. const bindGroupLayout = t.device.createBindGroupLayout({ - entries: [ - { - binding: 0, - visibility: GPUShaderStage.COMPUTE | GPUShaderStage.FRAGMENT, - buffer: { type: 'storage' }, - }, - { - binding: 1, - visibility: GPUShaderStage.COMPUTE | GPUShaderStage.FRAGMENT | GPUShaderStage.VERTEX, - buffer: { type: 'uniform', hasDynamicOffset: true }, - }, - ], + entries: + encoderType === 'compute pass' + ? [ + { + binding: 0, + visibility: GPUShaderStage.COMPUTE, + buffer: { type: 'storage' }, + }, + { + binding: 1, + visibility: GPUShaderStage.COMPUTE, + buffer: { type: 'uniform', hasDynamicOffset: true }, + }, + ] + : [ + { + binding: 0, + visibility: GPUShaderStage.FRAGMENT | GPUShaderStage.VERTEX, + buffer: { type: 'uniform', hasDynamicOffset: true }, + }, + ], }); const layoutA = t.device.createPipelineLayout({ bindGroupLayouts: [bindGroupLayout], immediateSize: 16, }); - const pipelineA = createPipeline(t, encoderType, wgslDeclA, copyCodeA, 16, layoutA); + const pipelineA = createPipeline( + t, + encoderType, + wgslDeclA, + copyCodeA, + fragmentReturnExprA, + 16, + 4, + layoutA + ); const layoutB = t.device.createPipelineLayout({ bindGroupLayouts: [bindGroupLayout], immediateSize: immediateSizeB, }); - const pipelineB = createPipeline(t, encoderType, wgslDeclB, copyCodeB, immediateSizeB, layoutB); + const pipelineB = createPipeline( + t, + encoderType, + wgslDeclB, + copyCodeB, + fragmentReturnExprB, + immediateSizeB, + 4, + layoutB + ); - const outputBuffer = t.createBufferTracked({ - size: 16, // 4 u32s at outIndex 0 - usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC, - }); const indexUniformBuffer = createOutputIndexBuffer(t, 1); - const bindGroup = t.device.createBindGroup({ - layout: bindGroupLayout, - entries: [ - { binding: 0, resource: { buffer: outputBuffer } }, - { binding: 1, resource: { buffer: indexUniformBuffer, size: 4 } }, - ], - }); + let bindGroup: GPUBindGroup; + if (encoderType === 'compute pass') { + const outputBuffer = t.createBufferTracked({ + size: 16, // 4 u32s at outIndex 0 + usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC, + }); + bindGroup = t.device.createBindGroup({ + layout: bindGroupLayout, + entries: [ + { binding: 0, resource: { buffer: outputBuffer } }, + { binding: 1, resource: { buffer: indexUniformBuffer, size: 4 } }, + ], + }); - const commandEncoder = t.device.createCommandEncoder(); - encodeForPassType(t, encoderType, commandEncoder, enc => { - // Only set bind group once between bind group compatible pipelines. - setPipeline(encoderType, enc, pipelineA); - enc.setBindGroup(0, bindGroup, [0]); - enc.setImmediates!(0, new Uint32Array([1, 2, 3, 4])); - - // Switch to Pipeline B without re-setting the bind group. - // The bind group set under Pipeline A must remain valid. - setPipeline(encoderType, enc, pipelineB); - // Same source data; dataSize controls how many elements are written. - // Passing undefined for srcOffset/srcSize relies on WebIDL defaults (0 / array.length). - enc.setImmediates!(0, immDataB, undefined, immDataSizeB); - dispatchOrDraw(encoderType, enc); - }); + const commandEncoder = t.device.createCommandEncoder(); + encodeForPassType(t, encoderType, commandEncoder, enc => { + // Only set bind group once between bind group compatible pipelines. + setPipeline(encoderType, enc, pipelineA); + enc.setBindGroup(0, bindGroup, [0]); + enc.setImmediates!(0, new Uint32Array([1, 2, 3, 4])); + + // Switch to Pipeline B without re-setting the bind group. + setPipeline(encoderType, enc, pipelineB); + enc.setImmediates!(0, immDataB, undefined, immDataSizeB); + dispatchOrDraw(encoderType, enc); + }); - t.device.queue.submit([commandEncoder.finish()]); + t.device.queue.submit([commandEncoder.finish()]); + t.expectGPUBufferValuesEqual(outputBuffer, new Uint32Array(expectedB)); + } else { + bindGroup = t.device.createBindGroup({ + layout: bindGroupLayout, + entries: [{ binding: 0, resource: { buffer: indexUniformBuffer, size: 4 } }], + }); - // Pipeline B's draw used the bind group set under Pipeline A. - t.expectGPUBufferValuesEqual(outputBuffer, new Uint32Array(expectedB)); + const commandEncoder = t.device.createCommandEncoder(); + const renderTarget = encodeForPassType(t, encoderType, commandEncoder, enc => { + setPipeline(encoderType, enc, pipelineA); + enc.setBindGroup(0, bindGroup, [0]); + enc.setImmediates!(0, new Uint32Array([1, 2, 3, 4])); + + // Switch to Pipeline B without re-setting the bind group. + setPipeline(encoderType, enc, pipelineB); + enc.setImmediates!(0, immDataB, undefined, immDataSizeB); + dispatchOrDraw(encoderType, enc); + })!; + + const bytesPerRow = align(kBytesPerPixel, kMinBytesPerRow); + const readbackBuffer = t.createBufferTracked({ + size: bytesPerRow, + usage: GPUBufferUsage.COPY_DST | GPUBufferUsage.COPY_SRC, + }); + commandEncoder.copyTextureToBuffer( + { texture: renderTarget }, + { buffer: readbackBuffer, bytesPerRow }, + [1, 1] + ); + + t.device.queue.submit([commandEncoder.finish()]); + + // Pad expected to 4 components. + const paddedExpected = new Uint32Array(4); + for (let i = 0; i < expectedB.length; i++) { + paddedExpected[i] = expectedB[i]; + } + t.expectGPUBufferValuesEqual(readbackBuffer, paddedExpected); + } }); g.test('use_max_immediate_size') @@ -536,7 +721,6 @@ g.test('use_max_immediate_size') .params(u => u.combine('encoderType', kProgrammableEncoderTypes)) .fn(t => { const { encoderType } = t.params; - t.skipIfStorageBuffersInFragmentStageNotAvailable(encoderType); const maxImmediateSize = t.device.limits.maxImmediateSize; if (maxImmediateSize === undefined) { @@ -555,37 +739,30 @@ g.test('use_max_immediate_size') output[0] = data.m0; output[1] = data.m${count - 1}; `; + const fragmentReturnExpr = `vec4u(data.m0, data.m${count - 1}, 0, 0)`; - const pipeline = createPipeline(t, encoderType, wgslDecl, copyCode, maxImmediateSize); - - const outputBuffer = t.createBufferTracked({ - size: 8, - usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC, - }); - const indexUniformBuffer = createOutputIndexBuffer(t, 1); - - const bindGroup = t.device.createBindGroup({ - layout: pipeline.getBindGroupLayout(0), - entries: [ - { binding: 0, resource: { buffer: outputBuffer } }, - { binding: 1, resource: { buffer: indexUniformBuffer, size: 4 } }, - ], - }); + const pipeline = createPipeline( + t, + encoderType, + wgslDecl, + copyCode, + fragmentReturnExpr, + maxImmediateSize + ); - const commandEncoder = t.device.createCommandEncoder(); - encodeForPassType(t, encoderType, commandEncoder, enc => { - const data = new Uint32Array(count); - data[0] = 0xdeadbeef; - data[count - 1] = 0xcafebabe; - - setPipeline(encoderType, enc, pipeline); - enc.setBindGroup(0, bindGroup, [0]); - enc.setImmediates!(0, data); - dispatchOrDraw(encoderType, enc); - }); + const data = new Uint32Array(count); + data[0] = 0xdeadbeef; + data[count - 1] = 0xcafebabe; - t.device.queue.submit([commandEncoder.finish()]); - t.expectGPUBufferValuesEqual(outputBuffer, new Uint32Array([0xdeadbeef, 0xcafebabe])); + runAndCheck( + t, + encoderType, + pipeline, + encoder => { + encoder.setImmediates!(0, data); + }, + [0xdeadbeef, 0xcafebabe] + ); }); g.test('typed_array_arguments') @@ -610,7 +787,6 @@ g.test('typed_array_arguments') .fn(t => { const { typedArray, encoderType, dataOffset, dataSize } = t.params; t.skipIf(typedArray === 'Float16Array', 'TODO(#4297): Float16Array not yet supported'); - t.skipIfStorageBuffersInFragmentStageNotAvailable(encoderType); const Ctor = kTypedArrayBufferViews[typedArray]; const elementSize = Ctor.BYTES_PER_ELEMENT; @@ -644,7 +820,25 @@ g.test('typed_array_arguments') output[14] = data.m3.z; output[15] = data.m3.w; `; - const pipeline = createPipeline(t, encoderType, wgslDecl, copyCode, kImmediateByteSize); + // For the render path, use outIndex to select which vec4 to return. + // We do 4 draws at outIndex 0..3, each returning a different vec4. + const fragmentReturnExpr = `select(select(select( + vec4u(data.m3), + vec4u(data.m2), + outIndex == 2u), + vec4u(data.m1), + outIndex == 1u), + vec4u(data.m0), + outIndex == 0u)`; + + const pipeline = createPipeline( + t, + encoderType, + wgslDecl, + copyCode, + fragmentReturnExpr, + kImmediateByteSize + ); const actualDataOffset = dataOffset ?? 0; const maxElements = kImmediateByteSize / elementSize; @@ -675,35 +869,6 @@ g.test('typed_array_arguments') const clearData = new Uint32Array(kImmediateU32Count); for (let i = 0; i < kImmediateU32Count; i++) clearData[i] = 0xaaaaaaaa + i * 0x11111111; - const outputBuffer = t.createBufferTracked({ - size: kImmediateByteSize, - usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC, - }); - const indexUniformBuffer = createOutputIndexBuffer(t, 1); - const bindGroup = t.device.createBindGroup({ - layout: pipeline.getBindGroupLayout(0), - entries: [ - { binding: 0, resource: { buffer: outputBuffer } }, - { binding: 1, resource: { buffer: indexUniformBuffer, size: 4 } }, - ], - }); - - const commandEncoder = t.device.createCommandEncoder(); - encodeForPassType(t, encoderType, commandEncoder, enc => { - setPipeline(encoderType, enc, pipeline); - enc.setBindGroup(0, bindGroup, [0]); - - // Initialize immediates to the baseline clear pattern. - enc.setImmediates!(0, clearData); - - // Overwrite with typed array data using the parametrized offset/size. - // Passing undefined for dataOffset/dataSize uses the WebIDL default (0 / array.length). - enc.setImmediates!(0, arr, dataOffset, dataSize); - - dispatchOrDraw(encoderType, enc); - }); - t.device.queue.submit([commandEncoder.finish()]); - // Build expected: baseline pattern with the written typed-array bytes overlaid at offset 0. const expected = new Uint32Array(clearData); memcpy( @@ -715,7 +880,33 @@ g.test('typed_array_arguments') { dst: expected.buffer, start: 0 } ); - t.expectGPUBufferValuesEqual(outputBuffer, expected); + // For render path, we do 4 draws (one per vec4 member), each outputting 4 u32s to a pixel. + // For compute path, a single dispatch writes all 16 u32s. + const numDraws = encoderType === 'compute pass' ? 1 : 4; + + runAndCheck(t, encoderType, pipeline, () => {}, Array.from(expected), { + numDraws, + outputU32sPerDraw: encoderType === 'compute pass' ? kImmediateU32Count : 4, + encodeFn: (enc, bindGroup) => { + enc.setBindGroup(0, bindGroup, [0]); + + // Initialize immediates to the baseline clear pattern. + enc.setImmediates!(0, clearData); + + // Overwrite with typed array data using the parametrized offset/size. + enc.setImmediates!(0, arr, dataOffset, dataSize); + + if (encoderType === 'compute pass') { + dispatchOrDraw(encoderType, enc); + } else { + // Draw 4 times, each at a different outIndex to read a different vec4. + for (let i = 0; i < 4; i++) { + enc.setBindGroup(0, bindGroup, [i * 256]); + dispatchOrDraw(encoderType, enc); + } + } + }, + }); }); g.test('multiple_updates_before_draw_or_dispatch') @@ -725,12 +916,12 @@ g.test('multiple_updates_before_draw_or_dispatch') .params(u => u.combine('encoderType', kProgrammableEncoderTypes)) .fn(t => { const { encoderType } = t.params; - t.skipIfStorageBuffersInFragmentStageNotAvailable(encoderType); // Use vec4 to allow partial updates. const wgslDecl = 'var data: vec4;'; const copyCode = 'output[0] = data.x; output[1] = data.y; output[2] = data.z; output[3] = data.w;'; - const pipeline = createPipeline(t, encoderType, wgslDecl, copyCode, 16); + const fragmentReturnExpr = 'vec4u(data.x, data.y, data.z, data.w)'; + const pipeline = createPipeline(t, encoderType, wgslDecl, copyCode, fragmentReturnExpr, 16); runAndCheck( t, @@ -751,33 +942,31 @@ g.test('multiple_updates_before_draw_or_dispatch') g.test('render_pass_and_bundle_mix') .desc('Verify interaction between executeBundles and direct render pass commands.') .fn(t => { - t.skipIfStorageBuffersInFragmentStageNotAvailable('render pass'); const wgslDecl = 'var data: vec2;'; - const copyCode = ` - let base = outIndex * 2; - output[base] = data.x; - output[base + 1] = data.y; - `; - // Use 'render pass' type to create the pipeline, but it works for bundle too. - // Immediate size: vec2 = 2 * 4 bytes = 8 bytes. - const pipeline = createPipeline(t, 'render pass', wgslDecl, copyCode, 8) as GPURenderPipeline; + const fragmentReturnExpr = 'vec4u(data.x, data.y, 0, 0)'; + const renderTargetWidth = 2; + + const pipeline = createPipeline( + t, + 'render pass', + wgslDecl, + '', // copyCode unused for render-only test + fragmentReturnExpr, + 8, + renderTargetWidth + ) as GPURenderPipeline; - const outputBuffer = t.createBufferTracked({ - size: 16, - usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC, - }); const indexUniformBuffer = createOutputIndexBuffer(t, 2); const bindGroup = t.device.createBindGroup({ layout: pipeline.getBindGroupLayout(0), - entries: [ - { binding: 0, resource: { buffer: outputBuffer } }, - { binding: 1, resource: { buffer: indexUniformBuffer, size: 4 } }, - ], + entries: [{ binding: 0, resource: { buffer: indexUniformBuffer, size: 4 } }], }); // Bundle: Set [1, 10], Draw (Index 0) - const bundleEncoder = t.device.createRenderBundleEncoder({ colorFormats: ['r32uint'] }); + const bundleEncoder = t.device.createRenderBundleEncoder({ + colorFormats: [kRenderTargetFormat], + }); bundleEncoder.setPipeline(pipeline); bundleEncoder.setBindGroup(0, bindGroup, [0]); bundleEncoder.setImmediates!(0, new Uint32Array([1, 10])); @@ -785,8 +974,8 @@ g.test('render_pass_and_bundle_mix') const bundle = bundleEncoder.finish(); const renderTargetTexture = t.createTextureTracked({ - size: [4, 1, 1], - format: 'r32uint', + size: [renderTargetWidth, 1, 1], + format: kRenderTargetFormat, usage: GPUTextureUsage.RENDER_ATTACHMENT | GPUTextureUsage.COPY_SRC, }); const commandEncoder = t.device.createCommandEncoder(); @@ -811,39 +1000,65 @@ g.test('render_pass_and_bundle_mix') pass.draw(1); pass.end(); + + // Read back 2 pixels. + const bytesPerRow = align(renderTargetWidth * kBytesPerPixel, kMinBytesPerRow); + const readbackBuffer = t.createBufferTracked({ + size: bytesPerRow, + usage: GPUBufferUsage.COPY_DST | GPUBufferUsage.COPY_SRC, + }); + commandEncoder.copyTextureToBuffer( + { texture: renderTargetTexture }, + { buffer: readbackBuffer, bytesPerRow }, + [renderTargetWidth, 1] + ); + t.device.queue.submit([commandEncoder.finish()]); - t.expectGPUBufferValuesEqual(outputBuffer, new Uint32Array([1, 10, 2, 20])); + // Each pixel is vec4u; we only use the first 2 components. + t.expectGPUBufferValuesEqual( + readbackBuffer, + new Uint32Array([ + 1, + 10, + 0, + 0, // pixel 0 (bundle draw) + 2, + 20, + 0, + 0, // pixel 1 (pass draw) + ]) + ); }); g.test('render_bundle_isolation') .desc('Verify that immediate data state is isolated between bundles executed in the same pass.') .fn(t => { - t.skipIfStorageBuffersInFragmentStageNotAvailable('render bundle'); const wgslDecl = 'var data: vec2;'; - const copyCode = ` - let base = outIndex * 2; - output[base] = data.x; - output[base + 1] = data.y; - `; - const pipeline = createPipeline(t, 'render pass', wgslDecl, copyCode, 8) as GPURenderPipeline; + const fragmentReturnExpr = 'vec4u(data.x, data.y, 0, 0)'; + const renderTargetWidth = 2; + + const pipeline = createPipeline( + t, + 'render pass', + wgslDecl, + '', // copyCode unused for render-only test + fragmentReturnExpr, + 8, + renderTargetWidth + ) as GPURenderPipeline; - const outputBuffer = t.createBufferTracked({ - size: 16, // 2 draws * 2 u32s * 4 bytes - usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC, - }); const indexUniformBuffer = createOutputIndexBuffer(t, 2); const bindGroup = t.device.createBindGroup({ layout: pipeline.getBindGroupLayout(0), - entries: [ - { binding: 0, resource: { buffer: outputBuffer } }, - { binding: 1, resource: { buffer: indexUniformBuffer, size: 4 } }, - ], + entries: [{ binding: 0, resource: { buffer: indexUniformBuffer, size: 4 } }], }); // Bundle A: Set [1, 2], Draw (Index 0) - const bundleEncoderA = t.device.createRenderBundleEncoder({ colorFormats: ['r32uint'] }); + const bundleEncoderA = t.device.createRenderBundleEncoder({ + colorFormats: [kRenderTargetFormat], + }); bundleEncoderA.setPipeline(pipeline); bundleEncoderA.setBindGroup(0, bindGroup, [0]); bundleEncoderA.setImmediates!(0, new Uint32Array([1, 2])); @@ -851,7 +1066,9 @@ g.test('render_bundle_isolation') const bundleA = bundleEncoderA.finish(); // Bundle B: Set [3, 4], Draw (Index 1) - const bundleEncoderB = t.device.createRenderBundleEncoder({ colorFormats: ['r32uint'] }); + const bundleEncoderB = t.device.createRenderBundleEncoder({ + colorFormats: [kRenderTargetFormat], + }); bundleEncoderB.setPipeline(pipeline); bundleEncoderB.setBindGroup(0, bindGroup, [256]); bundleEncoderB.setImmediates!(0, new Uint32Array([3, 4])); @@ -859,8 +1076,8 @@ g.test('render_bundle_isolation') const bundleB = bundleEncoderB.finish(); const renderTargetTexture = t.createTextureTracked({ - size: [4, 1, 1], - format: 'r32uint', + size: [renderTargetWidth, 1, 1], + format: kRenderTargetFormat, usage: GPUTextureUsage.RENDER_ATTACHMENT | GPUTextureUsage.COPY_SRC, }); const commandEncoder = t.device.createCommandEncoder(); @@ -879,7 +1096,33 @@ g.test('render_bundle_isolation') pass.executeBundles([bundleA, bundleB]); pass.end(); + + // Read back 2 pixels. + const bytesPerRow = align(renderTargetWidth * kBytesPerPixel, kMinBytesPerRow); + const readbackBuffer = t.createBufferTracked({ + size: bytesPerRow, + usage: GPUBufferUsage.COPY_DST | GPUBufferUsage.COPY_SRC, + }); + commandEncoder.copyTextureToBuffer( + { texture: renderTargetTexture }, + { buffer: readbackBuffer, bytesPerRow }, + [renderTargetWidth, 1] + ); + t.device.queue.submit([commandEncoder.finish()]); - t.expectGPUBufferValuesEqual(outputBuffer, new Uint32Array([1, 2, 3, 4])); + // Each pixel is vec4u; we only use the first 2 components. + t.expectGPUBufferValuesEqual( + readbackBuffer, + new Uint32Array([ + 1, + 2, + 0, + 0, // pixel 0 (bundle A) + 3, + 4, + 0, + 0, // pixel 1 (bundle B) + ]) + ); });