diff --git a/src/webgpu/api/operation/command_buffer/programmable/immediate.spec.ts b/src/webgpu/api/operation/command_buffer/programmable/immediate.spec.ts
index 13512fbac35a..a20336b6a5ad 100644
--- a/src/webgpu/api/operation/command_buffer/programmable/immediate.spec.ts
+++ b/src/webgpu/api/operation/command_buffer/programmable/immediate.spec.ts
@@ -18,6 +18,11 @@ import {
   kProgrammableEncoderTypes,
   ProgrammableEncoderType,
 } from '../../../../util/command_buffer_maker.js';
+import { align } from '../../../../util/math.js';
+
+const kRenderTargetFormat = 'rgba32uint' as const;
+const kBytesPerPixel = 16; // rgba32uint = 4 x u32 = 16 bytes
+const kMinBytesPerRow = 256; // WebGPU requires bytesPerRow to be a multiple of 256
 
 class ImmediateDataOperationTest extends AllFeaturesMaxLimitsGPUTest {
   override async init() {
@@ -28,93 +33,115 @@ class ImmediateDataOperationTest extends AllFeaturesMaxLimitsGPUTest {
       return;
     }
   }
-
-  skipIfStorageBuffersInFragmentStageNotAvailable(encoderType: ProgrammableEncoderType) {
-    if (!this.isCompatibility) {
-      return;
-    }
-    const needsStorageBuffersInFragmentStage =
-      encoderType === 'render pass' || encoderType === 'render bundle';
-    this.skipIf(
-      needsStorageBuffersInFragmentStage &&
-        !(this.device.limits.maxStorageBuffersInFragmentStage! >= 1),
-      `maxStorageBuffersInFragmentStage(${this.device.limits.maxStorageBuffersInFragmentStage}) < 1`
-    );
-  }
 }
 
+/**
+ * Creates a pipeline for testing immediate data.
+ *
+ * For compute pipelines: uses a storage buffer to write results.
+ * For render pipelines: returns results via the fragment shader's rgba32uint color output,
+ *   avoiding the need for storage buffers in the fragment stage.
+ *
+ * @param copyCode - Code that writes to `output[]` array (used by compute shader)
+ * @param fragmentReturnExpr - WGSL expression returning vec4u (used by fragment shader)
+ * @param renderTargetWidth - Width of the render target in pixels (for vertex positioning)
+ */
 function createPipeline(
   t: AllFeaturesMaxLimitsGPUTest,
   encoderType: ProgrammableEncoderType,
   wgslDecl: string,
   copyCode: string,
+  fragmentReturnExpr: string,
   immediateSize: number,
+  renderTargetWidth: number = 4,
   pipelineLayout?: GPUPipelineLayout
 ) {
-  const layout =
-    pipelineLayout ||
-    t.device.createPipelineLayout({
-      bindGroupLayouts: [
-        t.device.createBindGroupLayout({
-          entries: [
-            {
-              binding: 0,
-              visibility: GPUShaderStage.COMPUTE | GPUShaderStage.FRAGMENT,
-              buffer: { type: 'storage' },
-            },
-            {
-              binding: 1,
-              visibility: GPUShaderStage.COMPUTE | GPUShaderStage.FRAGMENT | GPUShaderStage.VERTEX,
-              buffer: { type: 'uniform', hasDynamicOffset: true },
-            },
-          ],
-        }),
-      ],
-      immediateSize,
-    });
+  if (encoderType === 'compute pass') {
+    const layout =
+      pipelineLayout ||
+      t.device.createPipelineLayout({
+        bindGroupLayouts: [
+          t.device.createBindGroupLayout({
+            entries: [
+              {
+                binding: 0,
+                visibility: GPUShaderStage.COMPUTE,
+                buffer: { type: 'storage' },
+              },
+              {
+                binding: 1,
+                visibility: GPUShaderStage.COMPUTE,
+                buffer: { type: 'uniform', hasDynamicOffset: true },
+              },
+            ],
+          }),
+        ],
+        immediateSize,
+      });
 
-  const fullCode = `
-    ${wgslDecl}
-    @group(0) @binding(0) var<storage, read_write> output: array<u32>;
-    @group(0) @binding(1) var<uniform> outIndex: u32;
+    const computeCode = `
+      ${wgslDecl}
+      @group(0) @binding(0) var<storage, read_write> output: array<u32>;
+      @group(0) @binding(1) var<uniform> outIndex: u32;
 
-    @compute @workgroup_size(1) fn cs_main() {
-      ${copyCode}
-    }
-    @fragment fn fs_main() -> @location(0) vec4u {
-      ${copyCode}
-      return vec4u(0);
-    }
-  `;
+      @compute @workgroup_size(1) fn cs_main() {
+        ${copyCode}
+      }
+    `;
 
-  if (encoderType === 'compute pass') {
     return t.device.createComputePipeline({
       layout,
       compute: {
-        module: t.device.createShaderModule({ code: fullCode }),
+        module: t.device.createShaderModule({ code: computeCode }),
       },
     });
   } else {
+    // Render pipeline: no storage buffer needed.
+    // The fragment shader returns results via the render target color output.
+    const layout =
+      pipelineLayout ||
+      t.device.createPipelineLayout({
+        bindGroupLayouts: [
+          t.device.createBindGroupLayout({
+            entries: [
+              {
+                binding: 0,
+                visibility: GPUShaderStage.FRAGMENT | GPUShaderStage.VERTEX,
+                buffer: { type: 'uniform', hasDynamicOffset: true },
+              },
+            ],
+          }),
+        ],
+        immediateSize,
+      });
+
+    const vertexCode = `
+      @group(0) @binding(0) var<uniform> outIndex: u32;
+
+      @vertex fn vs_main() -> @builtin(position) vec4f {
+        // Map outIndex to pixel centers in a ${renderTargetWidth}x1 render target.
+        let x = (f32(outIndex) + 0.5) / f32(${renderTargetWidth}) * 2.0 - 1.0;
+        return vec4f(x, 0.0, 0.0, 1.0);
+      }
+    `;
+
+    const fragmentCode = `
+      ${wgslDecl}
+      @group(0) @binding(0) var<uniform> outIndex: u32;
+
+      @fragment fn fs_main() -> @location(0) vec4u {
+        return ${fragmentReturnExpr};
+      }
+    `;
+
     return t.device.createRenderPipeline({
       layout,
       vertex: {
-        module: t.device.createShaderModule({
-          code: `
-            // Re-declare outIndex in the vertex shader
-            @group(0) @binding(1) var<uniform> outIndex: u32;
-
-            @vertex fn vs_main() -> @builtin(position) vec4f {
-              // Map outIndex 0..3 to pixel centers in a 4x1 render target.
-              // x = (f32(outIndex) + 0.5) / 2.0 - 1.0
-              let x = (f32(outIndex) + 0.5) / 2.0 - 1.0;
-              return vec4f(x, 0.0, 0.0, 1.0);
-            }
-          `,
-        }),
+        module: t.device.createShaderModule({ code: vertexCode }),
       },
       fragment: {
-        module: t.device.createShaderModule({ code: fullCode }),
-        targets: [{ format: 'r32uint' }],
+        module: t.device.createShaderModule({ code: fragmentCode }),
+        targets: [{ format: kRenderTargetFormat }],
       },
       primitive: {
         topology: 'point-list',
@@ -155,20 +182,26 @@ function createOutputIndexBuffer(t: AllFeaturesMaxLimitsGPUTest, count: number):
   return buffer;
 }
 
+/**
+ * Encode a pass for the given encoder type.
+ * For render paths, creates an rgba32uint render target and returns it so callers can read back.
+ */
 function encodeForPassType(
   t: AllFeaturesMaxLimitsGPUTest,
   encoderType: ProgrammableEncoderType,
   commandEncoder: GPUCommandEncoder,
-  fn: (pass: GPURenderPassEncoder | GPUComputePassEncoder | GPURenderBundleEncoder) => void
-) {
+  fn: (pass: GPURenderPassEncoder | GPUComputePassEncoder | GPURenderBundleEncoder) => void,
+  renderTargetWidth: number = 4
+): GPUTexture | undefined {
   if (encoderType === 'compute pass') {
     const pass = commandEncoder.beginComputePass();
     fn(pass);
     pass.end();
+    return undefined;
   } else {
     const renderTargetTexture = t.createTextureTracked({
-      size: [4, 1, 1],
-      format: 'r32uint',
+      size: [renderTargetWidth, 1, 1],
+      format: kRenderTargetFormat,
       usage: GPUTextureUsage.RENDER_ATTACHMENT | GPUTextureUsage.COPY_SRC,
     });
 
@@ -188,7 +221,7 @@ function encodeForPassType(
     } else {
       // Render Bundle
       const bundleEncoder = t.device.createRenderBundleEncoder({
-        colorFormats: ['r32uint'],
+        colorFormats: [kRenderTargetFormat],
       });
       fn(bundleEncoder);
       const bundle = bundleEncoder.finish();
@@ -206,6 +239,8 @@ function encodeForPassType(
       pass.executeBundles([bundle]);
       pass.end();
     }
+
+    return renderTargetTexture;
   }
 }
 
@@ -223,6 +258,19 @@ function setPipeline(
   }
 }
 
+/**
+ * Run a pipeline and check the output values.
+ *
+ * For compute: writes to a storage buffer and checks it directly.
+ * For render: reads back the rgba32uint render target and checks pixel values.
+ *
+ * Simple mode (encodeFn omitted): a single draw/dispatch. The caller provides setImmediatesFn
+ * which is called after setPipeline + setBindGroup. expectedValues.length must be <= 4.
+ *
+ * Multi-draw mode (encodeFn provided): the caller drives all bind group / immediate / draw calls
+ * via encodeFn(enc, bindGroup, indexUniformBuffer). numDraws and outputU32sPerDraw control the
+ * output buffer size and render target width.
+ */
 function runAndCheck(
   t: AllFeaturesMaxLimitsGPUTest,
   encoderType: ProgrammableEncoderType,
@@ -230,41 +278,110 @@ function runAndCheck(
   setImmediatesFn: (
     encoder: GPUComputePassEncoder | GPURenderPassEncoder | GPURenderBundleEncoder
   ) => void,
-  expectedValues: number[]
+  expectedValues: number[],
+  {
+    numDraws = 1,
+    outputU32sPerDraw,
+    encodeFn,
+    renderTargetWidth = 4,
+  }: {
+    numDraws?: number;
+    outputU32sPerDraw?: number;
+    encodeFn?: (
+      enc: GPUComputePassEncoder | GPURenderPassEncoder | GPURenderBundleEncoder,
+      bindGroup: GPUBindGroup,
+      indexUniformBuffer: GPUBuffer
+    ) => void;
+    renderTargetWidth?: number;
+  } = {}
 ) {
   assert(expectedValues.length > 0, 'expectedValues must not be empty');
-  const outputBuffer = t.createBufferTracked({
-    size: expectedValues.length * 4,
-    usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC,
-  });
-  // A dynamic-offset uniform buffer supplies outIndex = 0 here.
-  // We use a uniform buffer (rather than e.g. firstVertex via @builtin(vertex_index)) because:
-  //  - It works across all shader stages (compute, vertex, fragment).
-  //  - firstVertex is emulated via root constants on D3D12, which is the same mechanism
-  //    backing var<immediate>, so using it could mask bugs in the path under test.
-  // The pipeline layout declares hasDynamicOffset, so we must always pass a dynamic offset
-  // array — even though this simple helper only ever uses offset [0].
-  const indexUniformBuffer = t.makeBufferWithContents(new Uint32Array([0]), GPUBufferUsage.UNIFORM);
-
-  const bindGroup = t.device.createBindGroup({
-    layout: pipeline.getBindGroupLayout(0),
-    entries: [
-      { binding: 0, resource: { buffer: outputBuffer } },
-      { binding: 1, resource: { buffer: indexUniformBuffer, size: 4 } },
-    ],
-  });
 
-  const commandEncoder = t.device.createCommandEncoder();
-  encodeForPassType(t, encoderType, commandEncoder, encoder => {
-    setPipeline(encoderType, encoder, pipeline);
-    encoder.setBindGroup(0, bindGroup, [0]);
-    setImmediatesFn(encoder);
-    dispatchOrDraw(encoderType, encoder);
-  });
+  // In multi-draw mode, outputU32sPerDraw must be specified.
+  // In simple mode, it defaults to expectedValues.length (single draw).
+  const u32sPerDraw = outputU32sPerDraw ?? expectedValues.length;
+
+  const indexUniformBuffer = encodeFn
+    ? createOutputIndexBuffer(t, numDraws)
+    : t.makeBufferWithContents(new Uint32Array([0]), GPUBufferUsage.UNIFORM);
+
+  if (encoderType === 'compute pass') {
+    const outputBuffer = t.createBufferTracked({
+      size: u32sPerDraw * 4 * numDraws,
+      usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC,
+    });
 
-  t.device.queue.submit([commandEncoder.finish()]);
+    const bindGroup = t.device.createBindGroup({
+      layout: pipeline.getBindGroupLayout(0),
+      entries: [
+        { binding: 0, resource: { buffer: outputBuffer } },
+        { binding: 1, resource: { buffer: indexUniformBuffer, size: 4 } },
+      ],
+    });
+
+    const commandEncoder = t.device.createCommandEncoder();
+    encodeForPassType(t, encoderType, commandEncoder, encoder => {
+      setPipeline(encoderType, encoder, pipeline);
+      if (encodeFn) {
+        encodeFn(encoder, bindGroup, indexUniformBuffer);
+      } else {
+        encoder.setBindGroup(0, bindGroup, [0]);
+        setImmediatesFn(encoder);
+        dispatchOrDraw(encoderType, encoder);
+      }
+    });
+
+    t.device.queue.submit([commandEncoder.finish()]);
+    t.expectGPUBufferValuesEqual(outputBuffer, new Uint32Array(expectedValues));
+  } else {
+    assert(u32sPerDraw <= 4, 'runAndCheck supports at most 4 u32s per draw (one rgba32uint pixel)');
 
-  t.expectGPUBufferValuesEqual(outputBuffer, new Uint32Array(expectedValues));
+    const bindGroup = t.device.createBindGroup({
+      layout: pipeline.getBindGroupLayout(0),
+      entries: [{ binding: 0, resource: { buffer: indexUniformBuffer, size: 4 } }],
+    });
+
+    const commandEncoder = t.device.createCommandEncoder();
+    const pixelWidth = encodeFn ? renderTargetWidth : 1;
+    const renderTarget = encodeForPassType(
+      t,
+      encoderType,
+      commandEncoder,
+      encoder => {
+        setPipeline(encoderType, encoder, pipeline);
+        if (encodeFn) {
+          encodeFn(encoder, bindGroup, indexUniformBuffer);
+        } else {
+          encoder.setBindGroup(0, bindGroup, [0]);
+          setImmediatesFn(encoder);
+          dispatchOrDraw(encoderType, encoder);
+        }
+      },
+      pixelWidth
+    )!;
+
+    const bytesPerRow = align(pixelWidth * kBytesPerPixel, kMinBytesPerRow);
+    const readbackBuffer = t.createBufferTracked({
+      size: bytesPerRow,
+      usage: GPUBufferUsage.COPY_DST | GPUBufferUsage.COPY_SRC,
+    });
+    commandEncoder.copyTextureToBuffer(
+      { texture: renderTarget },
+      { buffer: readbackBuffer, bytesPerRow },
+      [pixelWidth, 1]
+    );
+
+    t.device.queue.submit([commandEncoder.finish()]);
+
+    // Each pixel is 4 u32s (rgba32uint). Pad each draw's output to 4.
+    const paddedExpected = new Uint32Array(pixelWidth * 4);
+    for (let d = 0; d < numDraws; d++) {
+      for (let i = 0; i < u32sPerDraw; i++) {
+        paddedExpected[d * 4 + i] = expectedValues[d * u32sPerDraw + i];
+      }
+    }
+    t.expectGPUBufferValuesEqual(readbackBuffer, paddedExpected);
+  }
 }
 
 export const g = makeTestGroup(ImmediateDataOperationTest);
@@ -291,10 +408,10 @@ g.test('basic_execution')
   .fn(t => {
     const { encoderType, dataType, scalarType, vectorSize } = t.params;
     t.skipIf(scalarType === 'f16', 'Immediate data blocks do not yet support f16 types');
-    t.skipIfStorageBuffersInFragmentStageNotAvailable(encoderType);
 
     let wgslDecl = '';
     let copyCode = '';
+    let fragmentReturnExpr = '';
     let immediateSize = 0;
     let expected: number[] = [];
     let inputData: Uint32Array;
@@ -306,6 +423,7 @@ g.test('basic_execution')
         var<immediate> data: S;
       `;
       copyCode = 'output[0] = data.a; output[1] = data.b;';
+      fragmentReturnExpr = 'vec4u(data.a, data.b, 0, 0)';
       inputData = new Uint32Array([0xdeadbeef, 0xcafebabe]);
       expected = [0xdeadbeef, 0xcafebabe];
     } else {
@@ -322,6 +440,20 @@ g.test('basic_execution')
         copyCode += `output[${i}] = bitcast<u32>(${valExpr});\n`;
       }
 
+      // Build fragment return expression: pack values into vec4u, padding with 0.
+      if (vSize === 1) {
+        fragmentReturnExpr = 'vec4u(bitcast<u32>(data), 0, 0, 0)';
+      } else if (vSize === 2) {
+        fragmentReturnExpr = 'vec4u(bitcast<u32>(data[0]), bitcast<u32>(data[1]), 0, 0)';
+      } else if (vSize === 3) {
+        fragmentReturnExpr =
+          'vec4u(bitcast<u32>(data[0]), bitcast<u32>(data[1]), bitcast<u32>(data[2]), 0)';
+      } else {
+        // vSize === 4
+        fragmentReturnExpr =
+          'vec4u(bitcast<u32>(data[0]), bitcast<u32>(data[1]), bitcast<u32>(data[2]), bitcast<u32>(data[3]))';
+      }
+
       inputData = new Uint32Array(vSize);
       for (let i = 0; i < vSize; i++) {
         if (sType === 'u32') {
@@ -342,7 +474,14 @@ g.test('basic_execution')
       }
     }
 
-    const pipeline = createPipeline(t, encoderType, wgslDecl, copyCode, immediateSize);
+    const pipeline = createPipeline(
+      t,
+      encoderType,
+      wgslDecl,
+      copyCode,
+      fragmentReturnExpr,
+      immediateSize
+    );
 
     runAndCheck(
       t,
@@ -360,7 +499,6 @@ g.test('update_data')
   .params(u => u.combine('encoderType', kProgrammableEncoderTypes))
   .fn(t => {
     const { encoderType } = t.params;
-    t.skipIfStorageBuffersInFragmentStageNotAvailable(encoderType);
     const immediateSize = 16;
     const wgslDecl = 'var<immediate> data: vec4<u32>;';
     const copyCode = `
@@ -370,57 +508,37 @@ g.test('update_data')
       output[base + 2] = data[2];
       output[base + 3] = data[3];
     `;
+    const fragmentReturnExpr = 'vec4u(data[0], data[1], data[2], data[3])';
 
-    const pipeline = createPipeline(t, encoderType, wgslDecl, copyCode, immediateSize);
-
-    const outputBuffer = t.createBufferTracked({
-      size: 4 * 4 * 3, // 3 steps, 4 u32s each
-      usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC,
-    });
-
-    // Uniform buffer with output indices [0, 1, 2] at 256-byte aligned offsets,
-    // used to direct each dispatch/draw step to a separate region of the output buffer.
-    const indexUniformBuffer = createOutputIndexBuffer(t, 3);
-
-    const bindGroup = t.device.createBindGroup({
-      layout: pipeline.getBindGroupLayout(0),
-      entries: [
-        { binding: 0, resource: { buffer: outputBuffer } },
-        { binding: 1, resource: { buffer: indexUniformBuffer, size: 4 } },
-      ],
-    });
-
-    const commandEncoder = t.device.createCommandEncoder();
+    const pipeline = createPipeline(
+      t,
+      encoderType,
+      wgslDecl,
+      copyCode,
+      fragmentReturnExpr,
+      immediateSize
+    );
 
-    /** Set bind group with dynamic offset for output index, set immediates, and dispatch/draw. */
-    const runStep = (
-      pass: GPURenderPassEncoder | GPUComputePassEncoder | GPURenderBundleEncoder,
-      stepIndex: number,
-      data: Uint32Array,
-      dstOffset: number = 0
-    ) => {
-      pass.setBindGroup(0, bindGroup, [stepIndex * 256]);
-      pass.setImmediates!(dstOffset, data);
-      dispatchOrDraw(encoderType, pass);
-    };
-
-    encodeForPassType(t, encoderType, commandEncoder, enc => {
-      setPipeline(encoderType, enc, pipeline);
-
-      // Step 1: Full set [1, 2, 3, 4]
-      runStep(enc, 0, new Uint32Array([1, 2, 3, 4]));
-
-      // Step 2: Full update [5, 6, 7, 8]
-      runStep(enc, 1, new Uint32Array([5, 6, 7, 8]));
-
-      // Step 3: Partial update offset 4 bytes (index 1) with [9, 10] -> [5, 9, 10, 8]
-      runStep(enc, 2, new Uint32Array([9, 10]), 4);
+    runAndCheck(t, encoderType, pipeline, () => {}, [1, 2, 3, 4, 5, 6, 7, 8, 5, 9, 10, 8], {
+      numDraws: 3,
+      outputU32sPerDraw: 4,
+      encodeFn: (enc, bindGroup) => {
+        // Step 1: Full set [1, 2, 3, 4]
+        enc.setBindGroup(0, bindGroup, [0]);
+        enc.setImmediates!(0, new Uint32Array([1, 2, 3, 4]));
+        dispatchOrDraw(encoderType, enc);
+
+        // Step 2: Full update [5, 6, 7, 8]
+        enc.setBindGroup(0, bindGroup, [256]);
+        enc.setImmediates!(0, new Uint32Array([5, 6, 7, 8]));
+        dispatchOrDraw(encoderType, enc);
+
+        // Step 3: Partial update offset 4 bytes (index 1) with [9, 10] -> [5, 9, 10, 8]
+        enc.setBindGroup(0, bindGroup, [512]);
+        enc.setImmediates!(4, new Uint32Array([9, 10]));
+        dispatchOrDraw(encoderType, enc);
+      },
     });
-
-    t.device.queue.submit([commandEncoder.finish()]);
-
-    const expected = new Uint32Array([1, 2, 3, 4, 5, 6, 7, 8, 5, 9, 10, 8]);
-    t.expectGPUBufferValuesEqual(outputBuffer, expected);
   });
 
 g.test('pipeline_switch')
@@ -437,22 +555,24 @@ g.test('pipeline_switch')
   )
   .fn(t => {
     const { encoderType, sameImmediateSize } = t.params;
-    t.skipIfStorageBuffersInFragmentStageNotAvailable(encoderType);
 
     // Pipeline A always uses vec4<u32> (16 bytes).
     const wgslDeclA = 'var<immediate> data: vec4<u32>;';
     const copyCodeA = `
       output[0] = data.x; output[1] = data.y; output[2] = data.z; output[3] = data.w;
     `;
+    const fragmentReturnExprA = 'vec4u(data.x, data.y, data.z, data.w)';
 
     let wgslDeclB: string;
     let copyCodeB: string;
+    let fragmentReturnExprB: string;
     let immediateSizeB: number;
 
     if (sameImmediateSize) {
       // Pipeline B has the same immediate layout as A (vec4<u32>, 16 bytes).
       wgslDeclB = wgslDeclA;
       copyCodeB = copyCodeA;
+      fragmentReturnExprB = fragmentReturnExprA;
       immediateSizeB = 16;
     } else {
       // Pipeline B uses vec2<u32> (8 bytes) — different/incompatible layout.
@@ -460,6 +580,7 @@ g.test('pipeline_switch')
       copyCodeB = `
         output[0] = data.x; output[1] = data.y; output[2] = 0u; output[3] = 0u;
       `;
+      fragmentReturnExprB = 'vec4u(data.x, data.y, 0, 0)';
       immediateSizeB = 8;
     }
 
@@ -468,67 +589,131 @@ g.test('pipeline_switch')
     const immDataSizeB = sameImmediateSize ? undefined : immediateSizeB / 4;
     const expectedB = sameImmediateSize ? [5, 6, 7, 8] : [5, 6, 0, 0];
 
+    // Create a shared bind group layout for both pipelines so they are bind-group-compatible.
+    // Compute path needs storage + uniform; render path needs only uniform.
     const bindGroupLayout = t.device.createBindGroupLayout({
-      entries: [
-        {
-          binding: 0,
-          visibility: GPUShaderStage.COMPUTE | GPUShaderStage.FRAGMENT,
-          buffer: { type: 'storage' },
-        },
-        {
-          binding: 1,
-          visibility: GPUShaderStage.COMPUTE | GPUShaderStage.FRAGMENT | GPUShaderStage.VERTEX,
-          buffer: { type: 'uniform', hasDynamicOffset: true },
-        },
-      ],
+      entries:
+        encoderType === 'compute pass'
+          ? [
+              {
+                binding: 0,
+                visibility: GPUShaderStage.COMPUTE,
+                buffer: { type: 'storage' },
+              },
+              {
+                binding: 1,
+                visibility: GPUShaderStage.COMPUTE,
+                buffer: { type: 'uniform', hasDynamicOffset: true },
+              },
+            ]
+          : [
+              {
+                binding: 0,
+                visibility: GPUShaderStage.FRAGMENT | GPUShaderStage.VERTEX,
+                buffer: { type: 'uniform', hasDynamicOffset: true },
+              },
+            ],
     });
 
     const layoutA = t.device.createPipelineLayout({
       bindGroupLayouts: [bindGroupLayout],
       immediateSize: 16,
     });
-    const pipelineA = createPipeline(t, encoderType, wgslDeclA, copyCodeA, 16, layoutA);
+    const pipelineA = createPipeline(
+      t,
+      encoderType,
+      wgslDeclA,
+      copyCodeA,
+      fragmentReturnExprA,
+      16,
+      4,
+      layoutA
+    );
 
     const layoutB = t.device.createPipelineLayout({
       bindGroupLayouts: [bindGroupLayout],
       immediateSize: immediateSizeB,
     });
-    const pipelineB = createPipeline(t, encoderType, wgslDeclB, copyCodeB, immediateSizeB, layoutB);
+    const pipelineB = createPipeline(
+      t,
+      encoderType,
+      wgslDeclB,
+      copyCodeB,
+      fragmentReturnExprB,
+      immediateSizeB,
+      4,
+      layoutB
+    );
 
-    const outputBuffer = t.createBufferTracked({
-      size: 16, // 4 u32s at outIndex 0
-      usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC,
-    });
     const indexUniformBuffer = createOutputIndexBuffer(t, 1);
 
-    const bindGroup = t.device.createBindGroup({
-      layout: bindGroupLayout,
-      entries: [
-        { binding: 0, resource: { buffer: outputBuffer } },
-        { binding: 1, resource: { buffer: indexUniformBuffer, size: 4 } },
-      ],
-    });
+    let bindGroup: GPUBindGroup;
+    if (encoderType === 'compute pass') {
+      const outputBuffer = t.createBufferTracked({
+        size: 16, // 4 u32s at outIndex 0
+        usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC,
+      });
+      bindGroup = t.device.createBindGroup({
+        layout: bindGroupLayout,
+        entries: [
+          { binding: 0, resource: { buffer: outputBuffer } },
+          { binding: 1, resource: { buffer: indexUniformBuffer, size: 4 } },
+        ],
+      });
 
-    const commandEncoder = t.device.createCommandEncoder();
-    encodeForPassType(t, encoderType, commandEncoder, enc => {
-      // Only set bind group once between bind group compatible pipelines.
-      setPipeline(encoderType, enc, pipelineA);
-      enc.setBindGroup(0, bindGroup, [0]);
-      enc.setImmediates!(0, new Uint32Array([1, 2, 3, 4]));
-
-      // Switch to Pipeline B without re-setting the bind group.
-      // The bind group set under Pipeline A must remain valid.
-      setPipeline(encoderType, enc, pipelineB);
-      // Same source data; dataSize controls how many elements are written.
-      // Passing undefined for srcOffset/srcSize relies on WebIDL defaults (0 / array.length).
-      enc.setImmediates!(0, immDataB, undefined, immDataSizeB);
-      dispatchOrDraw(encoderType, enc);
-    });
+      const commandEncoder = t.device.createCommandEncoder();
+      encodeForPassType(t, encoderType, commandEncoder, enc => {
+        // Only set bind group once between bind group compatible pipelines.
+        setPipeline(encoderType, enc, pipelineA);
+        enc.setBindGroup(0, bindGroup, [0]);
+        enc.setImmediates!(0, new Uint32Array([1, 2, 3, 4]));
+
+        // Switch to Pipeline B without re-setting the bind group.
+        setPipeline(encoderType, enc, pipelineB);
+        enc.setImmediates!(0, immDataB, undefined, immDataSizeB);
+        dispatchOrDraw(encoderType, enc);
+      });
 
-    t.device.queue.submit([commandEncoder.finish()]);
+      t.device.queue.submit([commandEncoder.finish()]);
+      t.expectGPUBufferValuesEqual(outputBuffer, new Uint32Array(expectedB));
+    } else {
+      bindGroup = t.device.createBindGroup({
+        layout: bindGroupLayout,
+        entries: [{ binding: 0, resource: { buffer: indexUniformBuffer, size: 4 } }],
+      });
 
-    // Pipeline B's draw used the bind group set under Pipeline A.
-    t.expectGPUBufferValuesEqual(outputBuffer, new Uint32Array(expectedB));
+      const commandEncoder = t.device.createCommandEncoder();
+      const renderTarget = encodeForPassType(t, encoderType, commandEncoder, enc => {
+        setPipeline(encoderType, enc, pipelineA);
+        enc.setBindGroup(0, bindGroup, [0]);
+        enc.setImmediates!(0, new Uint32Array([1, 2, 3, 4]));
+
+        // Switch to Pipeline B without re-setting the bind group.
+        setPipeline(encoderType, enc, pipelineB);
+        enc.setImmediates!(0, immDataB, undefined, immDataSizeB);
+        dispatchOrDraw(encoderType, enc);
+      })!;
+
+      const bytesPerRow = align(kBytesPerPixel, kMinBytesPerRow);
+      const readbackBuffer = t.createBufferTracked({
+        size: bytesPerRow,
+        usage: GPUBufferUsage.COPY_DST | GPUBufferUsage.COPY_SRC,
+      });
+      commandEncoder.copyTextureToBuffer(
+        { texture: renderTarget },
+        { buffer: readbackBuffer, bytesPerRow },
+        [1, 1]
+      );
+
+      t.device.queue.submit([commandEncoder.finish()]);
+
+      // Pad expected to 4 components.
+      const paddedExpected = new Uint32Array(4);
+      for (let i = 0; i < expectedB.length; i++) {
+        paddedExpected[i] = expectedB[i];
+      }
+      t.expectGPUBufferValuesEqual(readbackBuffer, paddedExpected);
+    }
   });
 
 g.test('use_max_immediate_size')
@@ -536,7 +721,6 @@ g.test('use_max_immediate_size')
   .params(u => u.combine('encoderType', kProgrammableEncoderTypes))
   .fn(t => {
     const { encoderType } = t.params;
-    t.skipIfStorageBuffersInFragmentStageNotAvailable(encoderType);
 
     const maxImmediateSize = t.device.limits.maxImmediateSize;
     if (maxImmediateSize === undefined) {
@@ -555,37 +739,30 @@ g.test('use_max_immediate_size')
       output[0] = data.m0;
       output[1] = data.m${count - 1};
     `;
+    const fragmentReturnExpr = `vec4u(data.m0, data.m${count - 1}, 0, 0)`;
 
-    const pipeline = createPipeline(t, encoderType, wgslDecl, copyCode, maxImmediateSize);
-
-    const outputBuffer = t.createBufferTracked({
-      size: 8,
-      usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC,
-    });
-    const indexUniformBuffer = createOutputIndexBuffer(t, 1);
-
-    const bindGroup = t.device.createBindGroup({
-      layout: pipeline.getBindGroupLayout(0),
-      entries: [
-        { binding: 0, resource: { buffer: outputBuffer } },
-        { binding: 1, resource: { buffer: indexUniformBuffer, size: 4 } },
-      ],
-    });
+    const pipeline = createPipeline(
+      t,
+      encoderType,
+      wgslDecl,
+      copyCode,
+      fragmentReturnExpr,
+      maxImmediateSize
+    );
 
-    const commandEncoder = t.device.createCommandEncoder();
-    encodeForPassType(t, encoderType, commandEncoder, enc => {
-      const data = new Uint32Array(count);
-      data[0] = 0xdeadbeef;
-      data[count - 1] = 0xcafebabe;
-
-      setPipeline(encoderType, enc, pipeline);
-      enc.setBindGroup(0, bindGroup, [0]);
-      enc.setImmediates!(0, data);
-      dispatchOrDraw(encoderType, enc);
-    });
+    const data = new Uint32Array(count);
+    data[0] = 0xdeadbeef;
+    data[count - 1] = 0xcafebabe;
 
-    t.device.queue.submit([commandEncoder.finish()]);
-    t.expectGPUBufferValuesEqual(outputBuffer, new Uint32Array([0xdeadbeef, 0xcafebabe]));
+    runAndCheck(
+      t,
+      encoderType,
+      pipeline,
+      encoder => {
+        encoder.setImmediates!(0, data);
+      },
+      [0xdeadbeef, 0xcafebabe]
+    );
   });
 
 g.test('typed_array_arguments')
@@ -610,7 +787,6 @@ g.test('typed_array_arguments')
   .fn(t => {
     const { typedArray, encoderType, dataOffset, dataSize } = t.params;
     t.skipIf(typedArray === 'Float16Array', 'TODO(#4297): Float16Array not yet supported');
-    t.skipIfStorageBuffersInFragmentStageNotAvailable(encoderType);
     const Ctor = kTypedArrayBufferViews[typedArray];
     const elementSize = Ctor.BYTES_PER_ELEMENT;
 
@@ -644,7 +820,25 @@ g.test('typed_array_arguments')
       output[14] = data.m3.z;
       output[15] = data.m3.w;
     `;
-    const pipeline = createPipeline(t, encoderType, wgslDecl, copyCode, kImmediateByteSize);
+    // For the render path, use outIndex to select which vec4 to return.
+    // We do 4 draws at outIndex 0..3, each returning a different vec4.
+    const fragmentReturnExpr = `select(select(select(
+      vec4u(data.m3),
+      vec4u(data.m2),
+      outIndex == 2u),
+      vec4u(data.m1),
+      outIndex == 1u),
+      vec4u(data.m0),
+      outIndex == 0u)`;
+
+    const pipeline = createPipeline(
+      t,
+      encoderType,
+      wgslDecl,
+      copyCode,
+      fragmentReturnExpr,
+      kImmediateByteSize
+    );
 
     const actualDataOffset = dataOffset ?? 0;
     const maxElements = kImmediateByteSize / elementSize;
@@ -675,35 +869,6 @@ g.test('typed_array_arguments')
     const clearData = new Uint32Array(kImmediateU32Count);
     for (let i = 0; i < kImmediateU32Count; i++) clearData[i] = 0xaaaaaaaa + i * 0x11111111;
 
-    const outputBuffer = t.createBufferTracked({
-      size: kImmediateByteSize,
-      usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC,
-    });
-    const indexUniformBuffer = createOutputIndexBuffer(t, 1);
-    const bindGroup = t.device.createBindGroup({
-      layout: pipeline.getBindGroupLayout(0),
-      entries: [
-        { binding: 0, resource: { buffer: outputBuffer } },
-        { binding: 1, resource: { buffer: indexUniformBuffer, size: 4 } },
-      ],
-    });
-
-    const commandEncoder = t.device.createCommandEncoder();
-    encodeForPassType(t, encoderType, commandEncoder, enc => {
-      setPipeline(encoderType, enc, pipeline);
-      enc.setBindGroup(0, bindGroup, [0]);
-
-      // Initialize immediates to the baseline clear pattern.
-      enc.setImmediates!(0, clearData);
-
-      // Overwrite with typed array data using the parametrized offset/size.
-      // Passing undefined for dataOffset/dataSize uses the WebIDL default (0 / array.length).
-      enc.setImmediates!(0, arr, dataOffset, dataSize);
-
-      dispatchOrDraw(encoderType, enc);
-    });
-    t.device.queue.submit([commandEncoder.finish()]);
-
     // Build expected: baseline pattern with the written typed-array bytes overlaid at offset 0.
     const expected = new Uint32Array(clearData);
     memcpy(
@@ -715,7 +880,33 @@ g.test('typed_array_arguments')
       { dst: expected.buffer, start: 0 }
     );
 
-    t.expectGPUBufferValuesEqual(outputBuffer, expected);
+    // For render path, we do 4 draws (one per vec4 member), each outputting 4 u32s to a pixel.
+    // For compute path, a single dispatch writes all 16 u32s.
+    const numDraws = encoderType === 'compute pass' ? 1 : 4;
+
+    runAndCheck(t, encoderType, pipeline, () => {}, Array.from(expected), {
+      numDraws,
+      outputU32sPerDraw: encoderType === 'compute pass' ? kImmediateU32Count : 4,
+      encodeFn: (enc, bindGroup) => {
+        enc.setBindGroup(0, bindGroup, [0]);
+
+        // Initialize immediates to the baseline clear pattern.
+        enc.setImmediates!(0, clearData);
+
+        // Overwrite with typed array data using the parametrized offset/size.
+        enc.setImmediates!(0, arr, dataOffset, dataSize);
+
+        if (encoderType === 'compute pass') {
+          dispatchOrDraw(encoderType, enc);
+        } else {
+          // Draw 4 times, each at a different outIndex to read a different vec4.
+          for (let i = 0; i < 4; i++) {
+            enc.setBindGroup(0, bindGroup, [i * 256]);
+            dispatchOrDraw(encoderType, enc);
+          }
+        }
+      },
+    });
   });
 
 g.test('multiple_updates_before_draw_or_dispatch')
@@ -725,12 +916,12 @@ g.test('multiple_updates_before_draw_or_dispatch')
   .params(u => u.combine('encoderType', kProgrammableEncoderTypes))
   .fn(t => {
     const { encoderType } = t.params;
-    t.skipIfStorageBuffersInFragmentStageNotAvailable(encoderType);
     // Use vec4<u32> to allow partial updates.
     const wgslDecl = 'var<immediate> data: vec4<u32>;';
     const copyCode =
       'output[0] = data.x; output[1] = data.y; output[2] = data.z; output[3] = data.w;';
-    const pipeline = createPipeline(t, encoderType, wgslDecl, copyCode, 16);
+    const fragmentReturnExpr = 'vec4u(data.x, data.y, data.z, data.w)';
+    const pipeline = createPipeline(t, encoderType, wgslDecl, copyCode, fragmentReturnExpr, 16);
 
     runAndCheck(
       t,
@@ -751,33 +942,31 @@ g.test('multiple_updates_before_draw_or_dispatch')
 g.test('render_pass_and_bundle_mix')
   .desc('Verify interaction between executeBundles and direct render pass commands.')
   .fn(t => {
-    t.skipIfStorageBuffersInFragmentStageNotAvailable('render pass');
     const wgslDecl = 'var<immediate> data: vec2<u32>;';
-    const copyCode = `
-      let base = outIndex * 2;
-      output[base] = data.x;
-      output[base + 1] = data.y;
-    `;
-    // Use 'render pass' type to create the pipeline, but it works for bundle too.
-    // Immediate size: vec2<u32> = 2 * 4 bytes = 8 bytes.
-    const pipeline = createPipeline(t, 'render pass', wgslDecl, copyCode, 8) as GPURenderPipeline;
+    const fragmentReturnExpr = 'vec4u(data.x, data.y, 0, 0)';
+    const renderTargetWidth = 2;
+
+    const pipeline = createPipeline(
+      t,
+      'render pass',
+      wgslDecl,
+      '', // copyCode unused for render-only test
+      fragmentReturnExpr,
+      8,
+      renderTargetWidth
+    ) as GPURenderPipeline;
 
-    const outputBuffer = t.createBufferTracked({
-      size: 16,
-      usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC,
-    });
     const indexUniformBuffer = createOutputIndexBuffer(t, 2);
 
     const bindGroup = t.device.createBindGroup({
       layout: pipeline.getBindGroupLayout(0),
-      entries: [
-        { binding: 0, resource: { buffer: outputBuffer } },
-        { binding: 1, resource: { buffer: indexUniformBuffer, size: 4 } },
-      ],
+      entries: [{ binding: 0, resource: { buffer: indexUniformBuffer, size: 4 } }],
     });
 
     // Bundle: Set [1, 10], Draw (Index 0)
-    const bundleEncoder = t.device.createRenderBundleEncoder({ colorFormats: ['r32uint'] });
+    const bundleEncoder = t.device.createRenderBundleEncoder({
+      colorFormats: [kRenderTargetFormat],
+    });
     bundleEncoder.setPipeline(pipeline);
     bundleEncoder.setBindGroup(0, bindGroup, [0]);
     bundleEncoder.setImmediates!(0, new Uint32Array([1, 10]));
@@ -785,8 +974,8 @@ g.test('render_pass_and_bundle_mix')
     const bundle = bundleEncoder.finish();
 
     const renderTargetTexture = t.createTextureTracked({
-      size: [4, 1, 1],
-      format: 'r32uint',
+      size: [renderTargetWidth, 1, 1],
+      format: kRenderTargetFormat,
       usage: GPUTextureUsage.RENDER_ATTACHMENT | GPUTextureUsage.COPY_SRC,
     });
     const commandEncoder = t.device.createCommandEncoder();
@@ -811,39 +1000,65 @@ g.test('render_pass_and_bundle_mix')
     pass.draw(1);
 
     pass.end();
+
+    // Read back 2 pixels.
+    const bytesPerRow = align(renderTargetWidth * kBytesPerPixel, kMinBytesPerRow);
+    const readbackBuffer = t.createBufferTracked({
+      size: bytesPerRow,
+      usage: GPUBufferUsage.COPY_DST | GPUBufferUsage.COPY_SRC,
+    });
+    commandEncoder.copyTextureToBuffer(
+      { texture: renderTargetTexture },
+      { buffer: readbackBuffer, bytesPerRow },
+      [renderTargetWidth, 1]
+    );
+
     t.device.queue.submit([commandEncoder.finish()]);
 
-    t.expectGPUBufferValuesEqual(outputBuffer, new Uint32Array([1, 10, 2, 20]));
+    // Each pixel is vec4u; we only use the first 2 components.
+    t.expectGPUBufferValuesEqual(
+      readbackBuffer,
+      new Uint32Array([
+        1,
+        10,
+        0,
+        0, // pixel 0 (bundle draw)
+        2,
+        20,
+        0,
+        0, // pixel 1 (pass draw)
+      ])
+    );
   });
 
 g.test('render_bundle_isolation')
   .desc('Verify that immediate data state is isolated between bundles executed in the same pass.')
   .fn(t => {
-    t.skipIfStorageBuffersInFragmentStageNotAvailable('render bundle');
     const wgslDecl = 'var<immediate> data: vec2<u32>;';
-    const copyCode = `
-      let base = outIndex * 2;
-      output[base] = data.x;
-      output[base + 1] = data.y;
-    `;
-    const pipeline = createPipeline(t, 'render pass', wgslDecl, copyCode, 8) as GPURenderPipeline;
+    const fragmentReturnExpr = 'vec4u(data.x, data.y, 0, 0)';
+    const renderTargetWidth = 2;
+
+    const pipeline = createPipeline(
+      t,
+      'render pass',
+      wgslDecl,
+      '', // copyCode unused for render-only test
+      fragmentReturnExpr,
+      8,
+      renderTargetWidth
+    ) as GPURenderPipeline;
 
-    const outputBuffer = t.createBufferTracked({
-      size: 16, // 2 draws * 2 u32s * 4 bytes
-      usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC,
-    });
     const indexUniformBuffer = createOutputIndexBuffer(t, 2);
 
     const bindGroup = t.device.createBindGroup({
       layout: pipeline.getBindGroupLayout(0),
-      entries: [
-        { binding: 0, resource: { buffer: outputBuffer } },
-        { binding: 1, resource: { buffer: indexUniformBuffer, size: 4 } },
-      ],
+      entries: [{ binding: 0, resource: { buffer: indexUniformBuffer, size: 4 } }],
     });
 
     // Bundle A: Set [1, 2], Draw (Index 0)
-    const bundleEncoderA = t.device.createRenderBundleEncoder({ colorFormats: ['r32uint'] });
+    const bundleEncoderA = t.device.createRenderBundleEncoder({
+      colorFormats: [kRenderTargetFormat],
+    });
     bundleEncoderA.setPipeline(pipeline);
     bundleEncoderA.setBindGroup(0, bindGroup, [0]);
     bundleEncoderA.setImmediates!(0, new Uint32Array([1, 2]));
@@ -851,7 +1066,9 @@ g.test('render_bundle_isolation')
     const bundleA = bundleEncoderA.finish();
 
     // Bundle B: Set [3, 4], Draw (Index 1)
-    const bundleEncoderB = t.device.createRenderBundleEncoder({ colorFormats: ['r32uint'] });
+    const bundleEncoderB = t.device.createRenderBundleEncoder({
+      colorFormats: [kRenderTargetFormat],
+    });
     bundleEncoderB.setPipeline(pipeline);
     bundleEncoderB.setBindGroup(0, bindGroup, [256]);
     bundleEncoderB.setImmediates!(0, new Uint32Array([3, 4]));
@@ -859,8 +1076,8 @@ g.test('render_bundle_isolation')
     const bundleB = bundleEncoderB.finish();
 
     const renderTargetTexture = t.createTextureTracked({
-      size: [4, 1, 1],
-      format: 'r32uint',
+      size: [renderTargetWidth, 1, 1],
+      format: kRenderTargetFormat,
       usage: GPUTextureUsage.RENDER_ATTACHMENT | GPUTextureUsage.COPY_SRC,
     });
     const commandEncoder = t.device.createCommandEncoder();
@@ -879,7 +1096,33 @@ g.test('render_bundle_isolation')
     pass.executeBundles([bundleA, bundleB]);
 
     pass.end();
+
+    // Read back 2 pixels.
+    const bytesPerRow = align(renderTargetWidth * kBytesPerPixel, kMinBytesPerRow);
+    const readbackBuffer = t.createBufferTracked({
+      size: bytesPerRow,
+      usage: GPUBufferUsage.COPY_DST | GPUBufferUsage.COPY_SRC,
+    });
+    commandEncoder.copyTextureToBuffer(
+      { texture: renderTargetTexture },
+      { buffer: readbackBuffer, bytesPerRow },
+      [renderTargetWidth, 1]
+    );
+
     t.device.queue.submit([commandEncoder.finish()]);
 
-    t.expectGPUBufferValuesEqual(outputBuffer, new Uint32Array([1, 2, 3, 4]));
+    // Each pixel is vec4u; we only use the first 2 components.
+    t.expectGPUBufferValuesEqual(
+      readbackBuffer,
+      new Uint32Array([
+        1,
+        2,
+        0,
+        0, // pixel 0 (bundle A)
+        3,
+        4,
+        0,
+        0, // pixel 1 (bundle B)
+      ])
+    );
   });