Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
200 changes: 200 additions & 0 deletions src/webgpu/shader/execution/shader_io/compute_builtins.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -1168,3 +1168,203 @@ fn main(@builtin(local_invocation_id) local_id : vec3u,

t.expectOK(checkNumSubgroupsConsistency(countData, outputData, wgThreads, t.params.numWGs));
});

g.test('subgroup_size_attribute')
.desc(
'Tests that the value of the subgroup_size builtin must equal the value of the @subgroup_size attribute.'
)
.params(u =>
u.combine('numWorkGroups', [1, 2] as const).combine('numSubgroups', [1, 2, 4] as const)
)
.fn(async t => {
t.skipIfDeviceDoesNotHaveFeature('subgroup-size-control' as GPUFeatureName);

/**
* Returns a subgroup size value that is valid for use in the @subgroup_size
* attribute on the current adapter.
*
* On Intel gen-12lp, subgroupMinSize may be 8 in fragment stages, which is below the allowed range
* for `[WaveSize]` on D3D12 (can only be 16). subgroupMaxSize (16) is always within the explicit
* range, so it is returned for that architecture.
* On all other adapters, subgroupMinSize is returned as the conservative choice as on many D3D12
* drivers only `waveLaneCountMin` is reliable, while `waveLaneCountMax` is not.
*
* @param adapterInfo The GPUAdapterInfo of the current device's adapter.
* @returns A power-of-two subgroup size valid for @subgroup_size on this adapter.
*/
const getValidSubgroupSizeForSubgroupSizeAttribute = (adapterInfo: GPUAdapterInfo): number => {
interface SubgroupAdapterInfo extends GPUAdapterInfo {
subgroupMinSize: number;
subgroupMaxSize: number;
}
const { vendor, architecture, subgroupMinSize, subgroupMaxSize } =
adapterInfo as SubgroupAdapterInfo;
return vendor === 'intel' && architecture === 'gen-12lp' ? subgroupMaxSize : subgroupMinSize;
};

const subgroupSize = getValidSubgroupSizeForSubgroupSizeAttribute(t.device.adapterInfo);

const { numWorkGroups, numSubgroups } = t.params;
const wgx = subgroupSize * numSubgroups;

const wgsl = `
enable subgroups;
enable subgroup_size_control;

@group(0) @binding(0)
var<storage, read_write> output : array<u32>;

@compute @workgroup_size(${wgx}, 1, 1) @subgroup_size(${subgroupSize})
fn main(@builtin(subgroup_size) builtin_size : u32,
@builtin(local_invocation_index) lid : u32,
@builtin(workgroup_id) wgid : vec3u) {
let gid = lid + wgid.x * ${wgx}u;
// Store 1 if builtin subgroup_size matches the @subgroup_size attribute, 0 otherwise.
output[gid] = select(0u, 1u, builtin_size == ${subgroupSize}u);
}`;

const numInvocations = wgx * numWorkGroups;
const outputBuffer = t.makeBufferWithContents(
new Uint32Array([...iterRange(numInvocations, x => 0)]),
GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC | GPUBufferUsage.COPY_DST
);
t.trackForCleanup(outputBuffer);

const pipeline = t.device.createComputePipeline({
layout: 'auto',
compute: {
module: t.device.createShaderModule({
code: wgsl,
}),
entryPoint: 'main',
},
});
const bg = t.device.createBindGroup({
layout: pipeline.getBindGroupLayout(0),
entries: [
{
binding: 0,
resource: {
buffer: outputBuffer,
},
},
],
});

const encoder = t.device.createCommandEncoder();
const pass = encoder.beginComputePass();
pass.setPipeline(pipeline);
pass.setBindGroup(0, bg);
pass.dispatchWorkgroups(numWorkGroups, 1, 1);
pass.end();
t.queue.submit([encoder.finish()]);

const outputReadback = await t.readGPUBufferRangeTyped(outputBuffer, {
srcByteOffset: 0,
type: Uint32Array,
typedLength: numInvocations,
method: 'copy',
});
const outputData: Uint32Array = outputReadback.data;

for (let i = 0; i < numInvocations; i++) {
if (outputData[i] !== 1) {
t.fail(
`@subgroup_size(${subgroupSize}): invocation ${i} has builtin subgroup_size != ${subgroupSize}`
);
break;
}
}
});

g.test('subgroup_size_attribute_valid_size_exists')
.desc(
`Tests that at least one power-of-two value in [subgroupMinSize, subgroupMaxSize] can be
used as the @subgroup_size attribute in a simple compute pipeline.`
)
.fn(async t => {
t.skipIfDeviceDoesNotHaveFeature('subgroup-size-control' as GPUFeatureName);

interface SubgroupProperties extends GPUAdapterInfo {
subgroupMinSize: number;
subgroupMaxSize: number;
}
const { subgroupMinSize, subgroupMaxSize } = t.device.adapterInfo as SubgroupProperties;

let succeeded = false;

for (let subgroupSize = subgroupMinSize; subgroupSize <= subgroupMaxSize; subgroupSize *= 2) {
const wgsl = `
enable subgroups;
enable subgroup_size_control;

@group(0) @binding(0)
var<storage, read_write> output : array<u32>;

@compute @workgroup_size(${subgroupSize}, 1, 1) @subgroup_size(${subgroupSize})
fn main(@builtin(local_invocation_index) lid : u32) {
output[lid] = lid;
}`;

// Use an error scope to catch validation errors from pipeline creation
// without crashing the test.
t.device.pushErrorScope('validation');
const module = t.device.createShaderModule({ code: wgsl });
const pipeline = t.device.createComputePipeline({
layout: 'auto',
compute: { module, entryPoint: 'main' },
});
const error = await t.device.popErrorScope();
if (error) {
continue;
}

const outputBuffer = t.makeBufferWithContents(
new Uint32Array([...iterRange(subgroupSize, x => 0)]),
GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC | GPUBufferUsage.COPY_DST
);
t.trackForCleanup(outputBuffer);

const bg = t.device.createBindGroup({
layout: pipeline.getBindGroupLayout(0),
entries: [{ binding: 0, resource: { buffer: outputBuffer } }],
});

const encoder = t.device.createCommandEncoder();
const pass = encoder.beginComputePass();
pass.setPipeline(pipeline);
pass.setBindGroup(0, bg);
pass.dispatchWorkgroups(1, 1, 1);
pass.end();
t.queue.submit([encoder.finish()]);

const outputReadback = await t.readGPUBufferRangeTyped(outputBuffer, {
srcByteOffset: 0,
type: Uint32Array,
typedLength: subgroupSize,
method: 'copy',
});
const outputData: Uint32Array = outputReadback.data;

// Validate that each invocation wrote its local_invocation_index.
let valid = true;
for (let i = 0; i < subgroupSize; i++) {
if (outputData[i] !== i) {
t.fail(
`@subgroup_size(${subgroupSize}): output[${i}] expected ${i}, got ${outputData[i]}`
);
valid = false;
break;
}
}

if (valid) {
succeeded = true;
break;
}
}

if (!succeeded) {
t.fail(`No valid @subgroup_size value found in [${subgroupMinSize}, ${subgroupMaxSize}]`);
}
});
Loading