diff --git a/.github/docker-compose.yml b/.github/docker-compose.yml
index 400c044c4ed..76e3eb251d2 100644
--- a/.github/docker-compose.yml
+++ b/.github/docker-compose.yml
@@ -6,9 +6,9 @@ services:
       context: ../
       dockerfile: .github/Dockerfile
     ports:
-      - "3000:3000"
+      - "${STDB_PORT:-3000}:3000"
       # Postgres
-      - "5432:5432"
+      - "${STDB_PG_PORT:-5432}:5432"
     entrypoint: spacetime start --pg-port 5432
     privileged: true
     environment:
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index f01eefbe04f..6e9ad0f0fce 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -19,9 +19,9 @@ concurrency:
 
 jobs:
   docker_smoketests:
-    needs: [lints, llm_ci_check]
     name: Smoketests
     strategy:
+      fail-fast: false
       matrix:
         runner: [spacetimedb-new-runner, windows-latest]
         include:
@@ -38,6 +38,8 @@ jobs:
     timeout-minutes: 120
     env:
       CARGO_TARGET_DIR: ${{ github.workspace }}/target
+      # Note: clear_database and replication only work in private
+      SMOKETEST_ARGS: ${{ matrix.smoketest_args }} -x clear_database replication teams
     steps:
       - name: Find Git ref
         env:
@@ -84,800 +86,50 @@ jobs:
         if: runner.os == 'Windows'
         run: choco install psql -y --no-progress
         shell: powershell
+
       - name: Build crates
         run: cargo build -p spacetimedb-cli -p spacetimedb-standalone -p spacetimedb-update
-      - name: Start Docker daemon
+        # TODO: This should be helpful on any platform.. but on Windows, `cargo ci` still ends up doing
+        # a bunch of rebuilding when it runs `cargo build` internally.
         if: runner.os == 'Linux'
-        run: /usr/local/bin/start-docker.sh
+      - name: Find spacetime binary
+        shell: bash
+        run: |
+          set -x
+          which spacetime || true
+          find target/debug/ -maxdepth 1 -type f || true
 
-      - name: Build and start database (Linux)
+      - name: Expand Docker network pool
         if: runner.os == 'Linux'
         run: |
-          # Our .dockerignore omits `target`, which our CI Dockerfile needs.
-          rm .dockerignore
-          docker compose -f .github/docker-compose.yml up -d
-      - name: Build and start database (Windows)
-        if: runner.os == 'Windows'
-        run: |
-          # Fail properly if any individual command fails
-          $ErrorActionPreference = 'Stop'
-          $PSNativeCommandUseErrorActionPreference = $true
-
-          Start-Process target/debug/spacetimedb-cli.exe -ArgumentList 'start --pg-port 5432'
-          cd modules
-          # the sdk-manifests on windows-latest are messed up, so we need to update them
-          dotnet workload config --update-mode manifests
-          dotnet workload update
+          echo 'current contents:'
+          cat /etc/docker/daemon.json
+          echo 'New contents':
+          echo '{
+            "storage-driver": "vfs",
+            "default-address-pools": [
+              { "base": "10.10.0.0/16", "size": 24 },
+              { "base": "10.20.0.0/16", "size": 24 }
+            ]
+          }' | sudo tee /etc/docker/daemon.json
+          sudo service docker restart
+      - name: Start Docker daemon
+        if: runner.os == 'Linux'
+        run: /usr/local/bin/start-docker.sh
       - uses: actions/setup-python@v5
         with: { python-version: "3.12" }
         if: runner.os == 'Windows'
       - name: Install python deps
         run: python -m pip install -r smoketests/requirements.txt
-      - name: Run smoketests
-        # Note: clear_database and replication only work in private
-        run: cargo ci smoketests -- ${{ matrix.smoketest_args }} -x clear_database replication teams
-      - name: Stop containers (Linux)
-        if: always() && runner.os == 'Linux'
-        run: docker compose -f .github/docker-compose.yml down
-
-  test:
-    needs: [lints, llm_ci_check]
-    name: Test Suite
-    runs-on: spacetimedb-new-runner
-    container:
-      image: localhost:5000/spacetimedb-ci:latest
-      options: >-
-        --privileged
-    env:
-      CARGO_TARGET_DIR: ${{ github.workspace }}/target
-    steps:
-      - name: Find Git ref
-        env:
-          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-        run: |
-          PR_NUMBER="${{ github.event.inputs.pr_number || null }}"
-          if test -n "${PR_NUMBER}"; then
-            GIT_REF="$( gh pr view --repo clockworklabs/SpacetimeDB $PR_NUMBER --json headRefName --jq .headRefName )"
-          else
-            GIT_REF="${{ github.ref }}"
-          fi
-          echo "GIT_REF=${GIT_REF}" >>"$GITHUB_ENV"
-
-      - name: Checkout sources
-        uses: actions/checkout@v4
-        with:
-          ref: ${{ env.GIT_REF }}
-
-      - uses: dsherret/rust-toolchain-file@v1
-      - name: Cache Rust dependencies
-        uses: Swatinem/rust-cache@v2
-        with:
-          workspaces: ${{ github.workspace }}
-          shared-key: spacetimedb
-          # Let the smoketests job save the cache since it builds the most things
-          save-if: false
-          prefix-key: v1
-
-      - uses: actions/setup-dotnet@v3
-        with:
-          global-json-file: global.json
-
-      - name: Set up Node.js
-        uses: actions/setup-node@v4
-        with:
-          node-version: 18
-
-      - uses: pnpm/action-setup@v4
-        with:
-          run_install: true
-
-      - name: Build typescript module sdk
-        working-directory: crates/bindings-typescript
-        run: pnpm build
-
-      - name: Run tests
-        run: cargo ci test
-
-  lints:
-    name: Lints
-    runs-on: spacetimedb-new-runner
-    container:
-      image: localhost:5000/spacetimedb-ci:latest
-      options: >-
-        --privileged
-    env:
-      CARGO_TARGET_DIR: ${{ github.workspace }}/target
-    steps:
-      - name: Checkout sources
-        uses: actions/checkout@v3
-
-      - uses: dsherret/rust-toolchain-file@v1
-      - run: echo ::add-matcher::.github/workflows/rust_matcher.json
-
-      - name: Cache Rust dependencies
-        uses: Swatinem/rust-cache@v2
-        with:
-          workspaces: ${{ github.workspace }}
-          shared-key: spacetimedb
-          # Let the smoketests job save the cache since it builds the most things
-          save-if: false
-          prefix-key: v1
-
-      - uses: actions/setup-dotnet@v3
-        with:
-          global-json-file: global.json
-
-      - name: Run ci lint
-        run: cargo ci lint
-
-  wasm_bindings:
-    name: Build and test wasm bindings
-    runs-on: spacetimedb-new-runner
-    container:
-      image: localhost:5000/spacetimedb-ci:latest
-      options: >-
-        --privileged
-    env:
-      CARGO_TARGET_DIR: ${{ github.workspace }}/target
-    steps:
-      - uses: actions/checkout@v3
-
-      - uses: dsherret/rust-toolchain-file@v1
-      - run: echo ::add-matcher::.github/workflows/rust_matcher.json
-
-      - name: Cache Rust dependencies
-        uses: Swatinem/rust-cache@v2
-        with:
-          workspaces: ${{ github.workspace }}
-          shared-key: spacetimedb
-          # Let the smoketests job save the cache since it builds the most things
-          save-if: false
-          prefix-key: v1
-
-      - name: Run bindgen tests
-        run: cargo ci wasm-bindings
-
-  publish_checks:
-    name: Check that packages are publishable
-    runs-on: spacetimedb-new-runner
-    container:
-      image: localhost:5000/spacetimedb-ci:latest
-      options: >-
-        --privileged
-    permissions: read-all
-    steps:
-      - uses: actions/checkout@v3
-      - name: Set up Python env
-        run: |
-          test -d venv || python3 -m venv venv
-          venv/bin/pip3 install argparse toml
-      - name: Run checks
-        run: |
-          set -ueo pipefail
-          FAILED=0
-          ROOTS=(spacetimedb spacetimedb-sdk)
-          CRATES=$(venv/bin/python3 tools/find-publish-list.py --recursive --directories --quiet "${ROOTS[@]}")
-          for crate_dir in $CRATES; do
-            if ! venv/bin/python3 tools/crate-publish-checks.py "${crate_dir}"; then
-              FAILED=$(( $FAILED + 1 ))
-            fi
-          done
-          if [ $FAILED -gt 0 ]; then
-            exit 1
-          fi
-
-  update:
-    name: Test spacetimedb-update flow (${{ matrix.target }})
-    permissions: read-all
-    strategy:
-      matrix:
-        include:
-          - runner: spacetimedb-new-runner
-            target: x86_64-unknown-linux-gnu
-            container:
-              image: localhost:5000/spacetimedb-ci:latest
-              options: --privileged
-          - { target: aarch64-unknown-linux-gnu, runner: arm-runner }
-          - { target: aarch64-apple-darwin, runner: macos-latest }
-          - { target: x86_64-pc-windows-msvc, runner: windows-latest }
-    runs-on: ${{ matrix.runner }}
-    container: ${{ matrix.container }}
-    steps:
-      - name: Checkout
-        uses: actions/checkout@v3
-
-      - name: Install Rust
-        uses: dsherret/rust-toolchain-file@v1
-
-      - name: Install rust target
-        run: rustup target add ${{ matrix.target }}
-
-      - name: Install packages
-        if: ${{ matrix.runner == 'arm-runner' }}
-        shell: bash
-        run: sudo apt install -y libssl-dev
-
-      - name: Build spacetimedb-update
-        run: cargo build --features github-token-auth --target ${{ matrix.target }} -p spacetimedb-update
-        if: runner.os == 'Windows'
-
-      - name: Run self-install
-        env:
-          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-        shell: bash
+      - name: Run smoketests (Linux)
+        if: runner.os == 'Linux'
         run: |
-          ROOT_DIR="$(mktemp -d)"
-          # NOTE(bfops): We need the `github-token-auth` feature because we otherwise tend to get ratelimited when we try to fetch `/releases/latest`.
-          # My best guess is that, on the GitHub runners, the "anonymous" ratelimit is shared by *all* users of that runner (I think this because it
-          # happens very frequently on the `macos-runner`, but we haven't seen it on any others).
-          cargo run --features github-token-auth --target ${{ matrix.target }} -p spacetimedb-update -- self-install --root-dir="${ROOT_DIR}" --yes
-          "${ROOT_DIR}"/spacetime --root-dir="${ROOT_DIR}" help
+          export RUST_LOG=debug
+          # Pre-build the crates because this docker compose file uses whatever's pre-built
+          cargo build -p spacetimedb-cli -p spacetimedb-standalone -p spacetimedb-update
+          # Our .dockerignore omits `target`, which our CI Dockerfile needs.
+          rm .dockerignore
+          cargo ci smoketests --docker .github/docker-compose.yml --parallel -- ${SMOKETEST_ARGS}
+      - name: Run smoketests (Windows)
         if: runner.os == 'Windows'
-
-      - name: Test spacetimedb-update
-        env:
-          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-        run: cargo ci update-flow --target=${{ matrix.target }} --github-token-auth
-        if: runner.os != 'Windows'
-
-  unreal_engine_tests:
-    name: Unreal Engine Tests
-    # This can't go on e.g. ubuntu-latest because that runner runs out of disk space. ChatGPT suggested that the general solution tends to be to use
-    # a custom runner.
-    runs-on: spacetimedb-new-runner
-    # Disable the tests because they are very flaky at the moment.
-    # TODO: Remove this line and re-enable the `if` line just below here.
-    if: false
-    # Skip if this is an external contribution. GitHub secrets will be empty, so the step would fail anyway.
-    # if: ${{ github.event_name != 'pull_request' || !github.event.pull_request.head.repo.fork }}
-    container:
-      image: ghcr.io/epicgames/unreal-engine:dev-5.6
-      credentials:
-        # Note(bfops): I don't think that `github.actor` needs to match the user that the token is for, because I'm using a token for my account and
-        # it seems to be totally happy.
-        # However, the token needs to be for a user that has access to the EpicGames org (see
-        # https://dev.epicgames.com/documentation/en-us/unreal-engine/downloading-source-code-in-unreal-engine?application_version=5.6)
-        username: ${{ github.actor }}
-        password: ${{ secrets.GHCR_TOKEN }}
-      # Run as root because otherwise we get permission denied for various directories inside the container. I tried doing dances to allow it to run
-      # without this (reassigning env vars and stuff), but was unable to get it to work and it felt like an uphill battle.
-      options: --user 0:0
-    steps:
-      # Uncomment this before merging so that it will run properly if run manually through the GH actions flow. It was playing weird with rolled back
-      # commits though.
-      #      - name: Find Git ref
-      #        env:
-      #          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-      #        shell: bash
-      #        run: |
-      #          PR_NUMBER="${{ github.event.inputs.pr_number || null }}"
-      #          if test -n "${PR_NUMBER}"; then
-      #            GIT_REF="$( gh pr view --repo clockworklabs/SpacetimeDB $PR_NUMBER --json headRefName --jq .headRefName )"
-      #          else
-      #            GIT_REF="${{ github.ref }}"
-      #          fi
-      #          echo "GIT_REF=${GIT_REF}" >>"$GITHUB_ENV"
-      - name: Checkout sources
-        uses: actions/checkout@v4
-        with:
-          ref: ${{ env.GIT_REF }}
-      - uses: dsherret/rust-toolchain-file@v1
-      - name: Run Unreal Engine tests
-        working-directory: sdks/unreal
-        env:
-          UE_ROOT_PATH: /home/ue4/UnrealEngine
-        run: |
-
-          apt-get update
-          apt-get install -y acl curl ca-certificates
-
-          REPO="$GITHUB_WORKSPACE"
-          # Let ue4 read/write the workspace & tool caches without changing ownership
-          for p in "$REPO" "${RUNNER_TEMP:-/__t}" "${RUNNER_TOOL_CACHE:-/__t}"; do
-            [ -d "$p" ] && setfacl -R -m u:ue4:rwX -m d:u:ue4:rwX "$p" || true
-          done
-
-          # Rust tool caches live under the runner tool cache so they persist
-          export CARGO_HOME="${RUNNER_TOOL_CACHE:-/__t}/cargo"
-          export RUSTUP_HOME="${RUNNER_TOOL_CACHE:-/__t}/rustup"
-          mkdir -p "$CARGO_HOME" "$RUSTUP_HOME"
-          chown -R ue4:ue4 "$CARGO_HOME" "$RUSTUP_HOME"
-
-          # Make sure the UE build script is executable (and parents traversable)
-          UE_DIR="${UE_ROOT_PATH:-/home/ue4/UnrealEngine}"
-          chmod a+rx "$UE_DIR" "$UE_DIR/Engine" "$UE_DIR/Engine/Build" "$UE_DIR/Engine/Build/BatchFiles/Linux" || true
-          chmod a+rx "$UE_DIR/Engine/Build/BatchFiles/Linux/Build.sh" || true
-
-          # Run the build & tests as ue4 (who owns the UE tree)
-          sudo -E -H -u ue4 env \
-            HOME=/home/ue4 \
-            XDG_CONFIG_HOME=/home/ue4/.config \
-            CARGO_HOME="$CARGO_HOME" \
-            RUSTUP_HOME="$RUSTUP_HOME" \
-            PATH="$CARGO_HOME/bin:$PATH" \
-            bash -lc '
-              set -euxo pipefail
-              # Install rustup for ue4 if needed (uses the shared caches)
-              if ! command -v cargo >/dev/null 2>&1; then
-                curl -sSf https://sh.rustup.rs | sh -s -- -y
-              fi
-              rustup show >/dev/null
-              git config --global --add safe.directory "$GITHUB_WORKSPACE" || true
-
-              cd "$GITHUB_WORKSPACE/sdks/unreal"
-              cargo --version
-              cargo test -- --test-threads=1
-            '
-
-  ci_command_docs:
-    name: Check CI command docs
-    runs-on: ubuntu-latest
-    steps:
-      - name: Find Git ref
-        env:
-          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-        shell: bash
-        run: |
-          PR_NUMBER="${{ github.event.inputs.pr_number || null }}"
-          if test -n "${PR_NUMBER}"; then
-            GIT_REF="$( gh pr view --repo clockworklabs/SpacetimeDB $PR_NUMBER --json headRefName --jq .headRefName )"
-          else
-            GIT_REF="${{ github.ref }}"
-          fi
-          echo "GIT_REF=${GIT_REF}" >>"$GITHUB_ENV"
-
-      - name: Checkout sources
-        uses: actions/checkout@v4
-        with:
-          ref: ${{ env.GIT_REF }}
-
-      - uses: dsherret/rust-toolchain-file@v1
-
-      - name: Check for docs change
-        run: cargo ci self-docs --check
-
-  cli_docs:
-    name: Check CLI docs
-    permissions: read-all
-    runs-on: spacetimedb-new-runner
-    container:
-      image: localhost:5000/spacetimedb-ci:latest
-      options: >-
-        --privileged
-    env:
-      CARGO_TARGET_DIR: ${{ github.workspace }}/target
-    steps:
-      - name: Find Git ref
-        env:
-          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-        shell: bash
-        run: |
-          PR_NUMBER="${{ github.event.inputs.pr_number || null }}"
-          if test -n "${PR_NUMBER}"; then
-            GIT_REF="$( gh pr view --repo clockworklabs/SpacetimeDB $PR_NUMBER --json headRefName --jq .headRefName )"
-          else
-            GIT_REF="${{ github.ref }}"
-          fi
-          echo "GIT_REF=${GIT_REF}" >>"$GITHUB_ENV"
-
-      - name: Checkout sources
-        uses: actions/checkout@v4
-        with:
-          ref: ${{ env.GIT_REF }}
-
-      - name: Set up Node.js
-        uses: actions/setup-node@v4
-        with:
-          node-version: 18
-
-      - uses: pnpm/action-setup@v4
-        with:
-          run_install: true
-
-      - name: Get pnpm store directory
-        shell: bash
-        run: |
-          echo "STORE_PATH=$(pnpm store path --silent)" >> $GITHUB_ENV
-
-      - uses: actions/cache@v4
-        name: Setup pnpm cache
-        with:
-          path: ${{ env.STORE_PATH }}
-          key: ${{ runner.os }}-pnpm-store-${{ hashFiles('**/pnpm-lock.yaml') }}
-          restore-keys: |
-            ${{ runner.os }}-pnpm-store-
-
-      - uses: dsherret/rust-toolchain-file@v1
-
-      - name: Cache Rust dependencies
-        uses: Swatinem/rust-cache@v2
-        with:
-          workspaces: ${{ github.workspace }}
-          shared-key: spacetimedb
-          # Let the smoketests job save the cache since it builds the most things
-          save-if: false
-          prefix-key: v1
-
-      - name: Check for docs change
-        run: |
-          cargo ci cli-docs
-
-  llm_ci_check:
-    name: Verify LLM benchmark is up to date
-    permissions:
-      contents: read
-    runs-on: ubuntu-latest
-    steps:
-      - uses: actions/checkout@v4
-
-      - uses: dtolnay/rust-toolchain@stable
-      - uses: Swatinem/rust-cache@v2
-
-      - name: Run hash check (both langs)
-        run: cargo llm ci-check
-
-  unity-testsuite:
-    needs: [lints, llm_ci_check]
-    # Skip if this is an external contribution.
-    # The license secrets will be empty, so the step would fail anyway.
-    if: ${{ github.event_name != 'pull_request' || !github.event.pull_request.head.repo.fork }}
-    permissions:
-      contents: read
-      checks: write
-    runs-on: spacetimedb-new-runner
-    container:
-      image: localhost:5000/spacetimedb-ci:latest
-      options: >-
-        --privileged
-        --cgroupns=host
-    timeout-minutes: 30
-    env:
-      CARGO_TARGET_DIR: ${{ github.workspace }}/target
-    steps:
-      - name: Checkout repository
-        id: checkout-stdb
-        uses: actions/checkout@v4
-
-      # Run cheap .NET tests first. If those fail, no need to run expensive Unity tests.
-
-      - name: Setup dotnet
-        uses: actions/setup-dotnet@v3
-        with:
-          global-json-file: global.json
-
-      - name: Override NuGet packages
-        run: |
-          dotnet pack crates/bindings-csharp/BSATN.Runtime
-          dotnet pack crates/bindings-csharp/Runtime
-
-          # Write out the nuget config file to `nuget.config`. This causes the spacetimedb-csharp-sdk repository
-          # to be aware of the local versions of the `bindings-csharp` packages in SpacetimeDB, and use them if
-          # available. Otherwise, `spacetimedb-csharp-sdk` will use the NuGet versions of the packages.
-          # This means that (if version numbers match) we will test the local versions of the C# packages, even
-          # if they're not pushed to NuGet.
-          # See https://learn.microsoft.com/en-us/nuget/reference/nuget-config-file for more info on the config file.
-          cd sdks/csharp
-          ./tools~/write-nuget-config.sh ../..
-
-      # Now, setup the Unity tests.
-      - name: Patch spacetimedb dependency in Cargo.toml
-        working-directory: demo/Blackholio/server-rust
-        run: |
-          sed -i "s|spacetimedb *=.*|spacetimedb = \{ path = \"../../../crates/bindings\" \}|" Cargo.toml
-          cat Cargo.toml
-
-      - name: Install Rust toolchain
-        uses: dsherret/rust-toolchain-file@v1
-
-      - name: Cache Rust dependencies
-        uses: Swatinem/rust-cache@v2
-        with:
-          workspaces: ${{ github.workspace }}
-          shared-key: spacetimedb
-          # Let the main CI job save the cache since it builds the most things
-          save-if: false
-          prefix-key: v1
-
-      - name: Install SpacetimeDB CLI from the local checkout
-        run: |
-          cargo install --force --path crates/cli --locked --message-format=short
-          cargo install --force --path crates/standalone --locked --message-format=short
-          # Add a handy alias using the old binary name, so that we don't have to rewrite all scripts (incl. in submodules).
-          ln -sf $CARGO_HOME/bin/spacetimedb-cli $CARGO_HOME/bin/spacetime
-
-      - name: Generate client bindings
-        working-directory: demo/Blackholio/server-rust
-        run: bash ./generate.sh -y
-
-      - name: Check for changes
-        run: |
-          tools/check-diff.sh demo/Blackholio/client-unity/Assets/Scripts/autogen || {
-              echo 'Error: Bindings are dirty. Please run `demo/Blackholio/server-rust/generate.sh`.'
-              exit 1
-          }
-
-      - name: Check Unity meta files
-        uses: DeNA/unity-meta-check@v3
-        with:
-          enable_pr_comment: ${{ github.event_name == 'pull_request' }}
-          target_path: sdks/csharp
-        env:
-          GITHUB_TOKEN: "${{ secrets.GITHUB_TOKEN }}"
-
-      - name: Start SpacetimeDB
-        run: |
-          spacetime start &
-          disown
-
-      - name: Publish unity-tests module to SpacetimeDB
-        working-directory: demo/Blackholio/server-rust
-        run: |
-          spacetime logout && spacetime login --server-issued-login local
-          bash ./publish.sh
-
-      - name: Patch com.clockworklabs.spacetimedbsdk dependency in manifest.json
-        working-directory: demo/Blackholio/client-unity/Packages
-        run: |
-          # Replace the com.clockworklabs.spacetimedbsdk dependency with the current branch.
-          # Note: Pointing to a local directory does not work, because our earlier steps nuke our meta files, which then causes Unity to not properly respect the DLLs (e.g.
-          # codegen does not work properly).
-          yq e -i '.dependencies["com.clockworklabs.spacetimedbsdk"] = "https://github.com/clockworklabs/SpacetimeDB.git?path=sdks/csharp#${{ github.head_ref }}"' manifest.json
-          cat manifest.json
-
-      - uses: actions/cache@v3
-        with:
-          path: demo/Blackholio/client-unity/Library
-          key: Unity-${{ github.head_ref }}
-          restore-keys: Unity-
-
-        # We need this to support "Docker in Docker"
-      - name: Start Docker daemon
-        run: /usr/local/bin/start-docker.sh
-      - name: Run Unity tests
-        uses: game-ci/unity-test-runner@v4
-        with:
-          unityVersion: 2022.3.32f1 # Adjust Unity version to a valid tag
-          projectPath: demo/Blackholio/client-unity # Path to the Unity project subdirectory
-          githubToken: ${{ secrets.GITHUB_TOKEN }}
-          testMode: playmode
-          useHostNetwork: true
-          artifactsPath: ""
-        env:
-          UNITY_EMAIL: ${{ secrets.UNITY_EMAIL }}
-          UNITY_PASSWORD: ${{ secrets.UNITY_PASSWORD }}
-          UNITY_SERIAL: ${{ secrets.UNITY_SERIAL }}
-
-  csharp-testsuite:
-    needs: [lints, llm_ci_check]
-    runs-on: spacetimedb-new-runner
-    container:
-      image: localhost:5000/spacetimedb-ci:latest
-      options: >-
-        --privileged
-        --cgroupns=host
-    timeout-minutes: 30
-    env:
-      CARGO_TARGET_DIR: ${{ github.workspace }}/target
-    steps:
-      - name: Checkout repository
-        id: checkout-stdb
-        uses: actions/checkout@v4
-
-      # Run cheap .NET tests first. If those fail, no need to run expensive Unity tests.
-
-      - name: Setup dotnet
-        uses: actions/setup-dotnet@v3
-        with:
-          global-json-file: global.json
-
-      - name: Override NuGet packages
-        run: |
-          dotnet pack crates/bindings-csharp/BSATN.Runtime
-          dotnet pack crates/bindings-csharp/Runtime
-
-          # Write out the nuget config file to `nuget.config`. This causes the spacetimedb-csharp-sdk repository
-          # to be aware of the local versions of the `bindings-csharp` packages in SpacetimeDB, and use them if
-          # available. Otherwise, `spacetimedb-csharp-sdk` will use the NuGet versions of the packages.
-          # This means that (if version numbers match) we will test the local versions of the C# packages, even
-          # if they're not pushed to NuGet.
-          # See https://learn.microsoft.com/en-us/nuget/reference/nuget-config-file for more info on the config file.
-          cd sdks/csharp
-          ./tools~/write-nuget-config.sh ../..
-
-      - name: Run .NET tests
-        working-directory: sdks/csharp
-        run: dotnet test -warnaserror
-
-      - name: Verify C# formatting
-        working-directory: sdks/csharp
-        run: dotnet format --no-restore --verify-no-changes SpacetimeDB.ClientSDK.sln
-
-      - name: Install Rust toolchain
-        uses: dsherret/rust-toolchain-file@v1
-
-      - name: Cache Rust dependencies
-        uses: Swatinem/rust-cache@v2
-        with:
-          workspaces: ${{ github.workspace }}
-          shared-key: spacetimedb
-          # Let the main CI job save the cache since it builds the most things
-          save-if: false
-          prefix-key: v1
-
-      - name: Install SpacetimeDB CLI from the local checkout
-        run: |
-          cargo install --force --path crates/cli --locked --message-format=short
-          cargo install --force --path crates/standalone --locked --message-format=short
-          # Add a handy alias using the old binary name, so that we don't have to rewrite all scripts (incl. in submodules).
-          ln -sf $CARGO_HOME/bin/spacetimedb-cli $CARGO_HOME/bin/spacetime
-
-      # This step shouldn't be needed, but somehow we end up with caches that are missing librusty_v8.a.
-      # ChatGPT suspects that this could be due to different build invocations using the same target dir,
-      # and this makes sense to me because we only see it in this job where we mix `cargo build -p` with
-      # `cargo build --manifest-path` (which apparently build different dependency trees).
-      # However, we've been unable to fix it so... /shrug
-      - name: Check v8 outputs
-        run: |
-          find "${CARGO_TARGET_DIR}"/ -type f | grep '[/_]v8' || true
-          if ! [ -f "${CARGO_TARGET_DIR}"/debug/gn_out/obj/librusty_v8.a ]; then
-            echo "Could not find v8 output file librusty_v8.a; rebuilding manually."
-            cargo clean -p v8 || true
-            cargo build -p v8
-          fi
-
-      - name: Check quickstart-chat bindings are up to date
-        working-directory: sdks/csharp
-        run: |
-          bash tools~/gen-quickstart.sh
-          "${GITHUB_WORKSPACE}"/tools/check-diff.sh examples~/quickstart-chat || {
-              echo 'Error: quickstart-chat bindings have changed. Please run `sdks/csharp/tools~/gen-quickstart.sh`.'
-              exit 1
-          }
-
-      - name: Check client-api bindings are up to date
-        working-directory: sdks/csharp
-        run: |
-          bash tools~/gen-client-api.sh
-          "${GITHUB_WORKSPACE}"/tools/check-diff.sh src/SpacetimeDB/ClientApi || {
-              echo 'Error: Client API bindings are dirty. Please run `sdks/csharp/tools~/gen-client-api.sh`.'
-              exit 1
-          }
-
-      - name: Start SpacetimeDB
-        run: |
-          spacetime start &
-          disown
-
-      - name: Run regression tests
-        run: |
-          bash sdks/csharp/tools~/run-regression-tests.sh
-          tools/check-diff.sh sdks/csharp/examples~/regression-tests || {
-              echo 'Error: Bindings are dirty. Please run `sdks/csharp/tools~/gen-regression-tests.sh`.'
-              exit 1
-          }
-
-  internal-tests:
-    name: Internal Tests
-    needs: [lints, llm_ci_check]
-    # Skip if not a PR or a push to master
-    # Skip if this is an external contribution. GitHub secrets will be empty, so the step would fail anyway.
-    if: ${{ (github.event_name == 'pull_request' || (github.event_name == 'push' && github.ref == 'refs/heads/master'))
-      && (github.event_name != 'pull_request' || !github.event.pull_request.head.repo.fork) }}
-    permissions:
-      contents: read
-    runs-on: ubuntu-latest
-    env:
-      TARGET_OWNER: clockworklabs
-      TARGET_REPO: SpacetimeDBPrivate
-    steps:
-      - id: dispatch
-        name: Trigger tests
-        uses: actions/github-script@v7
-        with:
-          github-token: ${{ secrets.SPACETIMEDB_PRIVATE_TOKEN }}
-          script: |
-            const workflowId = 'ci.yml';
-            const targetRef = 'master';
-            const targetOwner = process.env.TARGET_OWNER;
-            const targetRepo = process.env.TARGET_REPO;
-            // Use the ref for pull requests because the head sha is brittle (github does some extra dance where it merges in master).
-            const publicRef = (context.eventName === 'pull_request') ? context.payload.pull_request.head.ref : context.sha;
-            const preDispatch = new Date().toISOString();
-  
-            // Dispatch the workflow in the target repository
-            await github.rest.actions.createWorkflowDispatch({
-              owner: targetOwner,
-              repo: targetRepo,
-              workflow_id: workflowId,
-              ref: targetRef,
-              inputs: { public_ref: publicRef }
-            });
-  
-            const sleep = (ms) => new Promise(r => setTimeout(r, ms));
-  
-            // Find the dispatched run by name
-            let runId = null;
-            for (let attempt = 0; attempt < 20 && !runId; attempt++) { // up to ~10 minutes to locate the run
-              await sleep(5000);
-              const runsResp = await github.rest.actions.listWorkflowRuns({
-                owner: targetOwner,
-                repo: targetRepo,
-                workflow_id: workflowId,
-                event: 'workflow_dispatch',
-                branch: targetRef,
-                per_page: 50,
-              });
-  
-              const expectedName = `CI [public_ref=${publicRef}]`;
-              const candidates = runsResp.data.workflow_runs
-                .filter(r => r.name === expectedName && new Date(r.created_at) >= new Date(preDispatch))
-                .sort((a, b) => new Date(b.created_at) - new Date(a.created_at));
-  
-              if (candidates.length > 0) {
-                runId = candidates[0].id;
-                break;
-              }
-            }
-  
-            if (!runId) {
-              core.setFailed('Failed to locate dispatched run in the private repository.');
-              return;
-            }
-  
-            const runUrl = `https://github.com/${targetOwner}/${targetRepo}/actions/runs/${runId}`;
-            core.info(`View run: ${runUrl}`);
-            core.setOutput('run_id', String(runId));
-            core.setOutput('run_url', runUrl);
-  
-      - name: Wait for Internal Tests to complete
-        uses: actions/github-script@v7
-        with:
-          github-token: ${{ secrets.SPACETIMEDB_PRIVATE_TOKEN }}
-          script: |
-            const targetOwner = process.env.TARGET_OWNER;
-            const targetRepo = process.env.TARGET_REPO;
-            const runId = Number(`${{ steps.dispatch.outputs.run_id }}`);
-            const runUrl = `${{ steps.dispatch.outputs.run_url }}`;
-            const sleep = (ms) => new Promise(r => setTimeout(r, ms));
-  
-            core.info(`Waiting for workflow result... ${runUrl}`);
-  
-            let conclusion = null;
-            for (let attempt = 0; attempt < 240; attempt++) { // up to ~2 hours
-              const runResp = await github.rest.actions.getWorkflowRun({
-                owner: targetOwner,
-                repo: targetRepo,
-                run_id: runId
-              });
-              const { status, conclusion: c } = runResp.data;
-              if (status === 'completed') {
-                conclusion = c || 'success';
-                break;
-              }
-              await sleep(30000);
-            }
-  
-            if (!conclusion) {
-              core.setFailed('Timed out waiting for private workflow to complete.');
-              return;
-            }
-  
-            if (conclusion !== 'success') {
-              core.setFailed(`Private workflow failed with conclusion: ${conclusion}`);
-            }
-  
-      - name: Cancel invoked run if workflow cancelled
-        if: ${{ cancelled() }}
-        uses: actions/github-script@v7
-        with:
-          github-token: ${{ secrets.SPACETIMEDB_PRIVATE_TOKEN }}
-          script: |
-            const targetOwner = process.env.TARGET_OWNER;
-            const targetRepo = process.env.TARGET_REPO;
-            const runId = Number(`${{ steps.dispatch.outputs.run_id }}`);
-            if (!runId) return;
-            await github.rest.actions.cancelWorkflowRun({
-              owner: targetOwner,
-              repo: targetRepo,
-              run_id: runId,
-            });
+        run: cargo ci smoketests --parallel -- ${SMOKETEST_ARGS}
diff --git a/Cargo.lock b/Cargo.lock
index 168fffc3df7..b06c54062ab 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -882,6 +882,7 @@ dependencies = [
  "env_logger 0.10.2",
  "log",
  "regex",
+ "serde_json",
  "tempfile",
 ]
 
diff --git a/docker-compose.yml b/docker-compose.yml
index 718281892dd..0b00256a808 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -24,11 +24,11 @@ services:
       - key_files:/etc/spacetimedb
       - /stdb
     ports:
-      - "3000:3000"
+      - "${STDB_PORT:-3000}:3000"
       # Postgres
-      - "5432:5432"
+      - "${STDB_PG_PORT:-5432}:5432"
       # Tracy
-      - "8086:8086"
+      - "${STDB_TRACY_PORT:-8086}:8086"
     entrypoint: cargo watch -i flamegraphs -i log.conf --why -C crates/standalone -x 'run start --data-dir=/stdb/data --jwt-pub-key-path=/etc/spacetimedb/id_ecdsa.pub --jwt-priv-key-path=/etc/spacetimedb/id_ecdsa --pg-port 5432'
     privileged: true
     environment:
diff --git a/smoketests/__init__.py b/smoketests/__init__.py
index 657b7740361..cd9da84680c 100644
--- a/smoketests/__init__.py
+++ b/smoketests/__init__.py
@@ -80,6 +80,7 @@ def requires_anonymous_login(item):
     return item
 
 def requires_local_server(item):
+    setattr(item, "_requires_local_server", True)
     if REMOTE_SERVER:
         return unittest.skip("running against a remote server")(item)
     return item
@@ -211,6 +212,8 @@ def cargo_manifest(cls, manifest_text):
 
     @classmethod
     def spacetime(cls, *args, **kwargs):
+        with open(cls.config_path, "r") as f:
+            print('Config file contents in %s:\n%s' % (cls.config_path, f.read()))
         return spacetime("--config-path", str(cls.config_path), *args, **kwargs)
 
     def _check_published(self):
@@ -236,6 +239,9 @@ def log_records(self, n):
         return list(map(json.loads, logs.splitlines()))
 
     def publish_module(self, domain=None, *, clear=True, capture_stderr=True, num_replicas=None, break_clients=False):
+        print ('Publish module')
+        with open(self.project_path / "src/lib.rs", "r") as f:
+            print('Module code in %s:\n%s' % (self.project_path / "src/lib.rs", f.read()))
         publish_output = self.spacetime(
             "publish",
             *[domain] if domain is not None else [],
@@ -249,13 +255,17 @@ def publish_module(self, domain=None, *, clear=True, capture_stderr=True, num_re
             *["--break-clients"] if break_clients else [],
             capture_stderr=capture_stderr,
         )
+        with open(self.project_path / "src/lib.rs", "r") as f:
+            print('Published module code in %s:\n%s' % (self.project_path / "src/lib.rs", f.read()))
         self.resolved_identity = re.search(r"identity: ([0-9a-fA-F]+)", publish_output)[1]
         self.database_identity = self.resolved_identity
+        print('Published module identity: %s' % self.database_identity)
 
     @classmethod
     def reset_config(cls):
         if not STDB_CONFIG:
             raise Exception("config toml has not been initialized yet")
+        print('Writing config to %s:\n%s' % (cls.config_path, STDB_CONFIG))
         cls.config_path.write_text(STDB_CONFIG)
 
     def fingerprint(self):
@@ -379,7 +389,8 @@ def setUpClass(cls):
 
         if cls.AUTOPUBLISH:
             logging.info(f"Compiling module for {cls.__qualname__}...")
-            cls.publish_module(cls, capture_stderr=True) # capture stderr because otherwise it clutters the top-level test logs for some reason.
+            #cls.publish_module(cls, capture_stderr=True) # capture stderr because otherwise it clutters the top-level test logs for some reason.
+            cls.publish_module(cls)
 
     def tearDown(self):
         # if this single test method published a database, clean it up now
diff --git a/smoketests/__main__.py b/smoketests/__main__.py
index cc3b0d004b6..350854700a2 100644
--- a/smoketests/__main__.py
+++ b/smoketests/__main__.py
@@ -77,16 +77,15 @@ def main():
     parser.add_argument("--no-docker-logs", action="store_true")
     parser.add_argument("--skip-dotnet", action="store_true", help="ignore tests which require dotnet")
     parser.add_argument("--show-all-output", action="store_true", help="show all stdout/stderr from the tests as they're running")
-    parser.add_argument("--parallel", action="store_true", help="run test classes in parallel")
-    parser.add_argument("-j", dest='jobs', help="Set number of jobs for parallel test runs. Default is `nproc`", type=int, default=0)
     parser.add_argument('-k', dest='testNamePatterns',
                         action='append', type=_convert_select_pattern,
                         help='Only run tests which match the given substring')
     parser.add_argument("-x", dest="exclude", nargs="*", default=[])
     parser.add_argument("--no-build-cli", action="store_true", help="don't cargo build the cli")
-    parser.add_argument("--list", action="store_true", help="list the tests that would be run, but don't run them")
+    parser.add_argument("--list", nargs="?", const="text", choices=("text", "json"), default=None, help="list the tests that would be run (optionally as 'text' or 'json'), but don't run them")
     parser.add_argument("--remote-server", action="store", help="Run against a remote server")
     parser.add_argument("--spacetime-login", action="store_true", help="Use `spacetime login` for these tests (and disable tests that don't work with that)")
+    parser.add_argument("--local-only", action="store_true", help="Only run tests that require a local server")
     args = parser.parse_args()
 
     if args.docker:
@@ -116,22 +115,59 @@ def main():
     loader.testNamePatterns = args.testNamePatterns
 
     tests = loader.loadTestsFromNames(testlist)
-    if args.list:
+
+    if args.local_only:
+        def _is_local_only(test_case):
+            method_name = getattr(test_case, "_testMethodName", None)
+            if method_name is not None and hasattr(test_case, method_name):
+                method = getattr(test_case, method_name)
+                if getattr(method, "_requires_local_server", False):
+                    return True
+            # Also allow class-level decoration
+            if getattr(test_case.__class__, "_requires_local_server", False):
+                return True
+            return False
+
+        filtered = unittest.TestSuite()
+        for t in _iter_all_tests(tests):
+            if _is_local_only(t):
+                filtered.addTest(t)
+        tests = filtered
+
+    if args.list is not None:
         failed_cls = getattr(unittest.loader, "_FailedTest", None)
         any_failed = False
+        test_names = []
+        failed_tests = []
         for test in _iter_all_tests(tests):
             name = test.id()
             if isinstance(test, failed_cls):
                 any_failed = True
-                print('')
-                print("Failed to construct %s:" % test.id())
                 exc = getattr(test, "_exception", None)
-                if exc is not None:
-                    tb = ''.join(traceback.format_exception(exc))
-                    print(tb.rstrip())
-                print('')
+                tb = ''.join(traceback.format_exception(exc)) if exc is not None else None
+                failed_tests.append({
+                    "test_id": name,
+                    "error": tb.rstrip() if tb is not None else None,
+                })
+                if args.list == "text":
+                    print('')
+                    print("Failed to construct %s:" % name)
+                    if tb is not None:
+                        print(tb.rstrip())
+                    print('')
             else:
-                print(f"{name}")
+                test_names.append(name)
+                if args.list == "text":
+                    print(f"{name}")
+
+        if args.list == "json":
+            output = {
+                "tests": test_names,
+                "errors": failed_tests,
+            }
+            print(json.dumps(output))
+            exit(0)
+
         exit(1 if any_failed else 0)
 
     if not args.no_build_cli:
@@ -176,14 +212,9 @@ def main():
     buffer = not args.show_all_output
     verbosity = 2
 
-    if args.parallel:
-        print("parallel test running is under construction, this will probably not work correctly")
-        from . import unittest_parallel
-        unittest_parallel.main(buffer=buffer, verbose=verbosity, level="class", discovered_tests=tests, jobs=args.jobs)
-    else:
-        result = unittest.TextTestRunner(buffer=buffer, verbosity=verbosity).run(tests)
-        if not result.wasSuccessful():
-            parser.exit(status=1)
+    result = unittest.TextTestRunner(buffer=buffer, verbosity=verbosity).run(tests)
+    if not result.wasSuccessful():
+        parser.exit(status=1)
 
 
 if __name__ == '__main__':
diff --git a/tools/ci/Cargo.toml b/tools/ci/Cargo.toml
index a79a4917ca7..47eb5f5836c 100644
--- a/tools/ci/Cargo.toml
+++ b/tools/ci/Cargo.toml
@@ -10,5 +10,6 @@ chrono = { workspace = true, features=["clock"] }
 clap.workspace = true
 regex.workspace = true
 duct.workspace = true
+serde_json.workspace = true
 tempfile.workspace = true
 env_logger.workspace = true
diff --git a/tools/ci/src/main.rs b/tools/ci/src/main.rs
index 036e8993033..532f9d4744e 100644
--- a/tools/ci/src/main.rs
+++ b/tools/ci/src/main.rs
@@ -1,8 +1,23 @@
-use anyhow::{bail, Result};
+use anyhow::{bail, Context, Result};
 use clap::{CommandFactory, Parser, Subcommand};
 use duct::cmd;
-use std::path::Path;
+use log::{debug, warn};
+use serde_json;
+use std::collections::HashSet;
+use std::net::TcpListener;
+use std::path::{Path, PathBuf};
+use std::sync::Mutex;
+use std::thread;
+use std::time::{Duration, Instant};
 use std::{env, fs};
+use tempfile::TempDir;
+
+static PRINT_LOCK: Mutex<()> = Mutex::new(());
+
+fn with_print_lock<F: FnOnce() -> R, R>(f: F) -> R {
+    let _guard = PRINT_LOCK.lock().expect("print lock poisoned");
+    f()
+}
 
 const README_PATH: &str = "tools/ci/README.md";
 
@@ -51,10 +66,74 @@ enum CiCmd {
     /// Executes the smoketests suite with some default exclusions.
     Smoketests {
         #[arg(
-            trailing_var_arg = true,
-            long_help = "Additional arguments to pass to the smoketests runner. These are usually set by the CI environment, such as `-- --docker`"
+            long = "python",
+            value_name = "PYTHON_PATH",
+            long_help = "Python interpreter to use for smoketests"
+        )]
+        python: Option<String>,
+
+        /// List the tests that would be run, but don't run them
+        #[arg(
+            long = "list",
+            num_args(0..=1),
+            default_missing_value = "text",
+            value_parser = ["text", "json"]
+        )]
+        list: Option<String>,
+
+        // Args that influence test selection
+        #[arg(
+            long = "docker",
+            value_name = "COMPOSE_FILE",
+            num_args(0..=1),
+            default_missing_value = "docker-compose.yml",
+            long_help = "Use docker for smoketests, specifying a docker compose file. If no value is provided, docker-compose.yml is used by default. This cannot be combined with --start-server."
         )]
-        args: Vec<String>,
+        docker: Option<String>,
+        /// Ignore tests which require dotnet
+        #[arg(long = "skip-dotnet", default_value_t = false)]
+        skip_dotnet: bool,
+        /// Only run tests which match the given substring (can be specified multiple times)
+        #[arg(short = 'k', action = clap::ArgAction::Append)]
+        test_name_patterns: Vec<String>,
+        /// Exclude tests matching these names/patterns
+        #[arg(short = 'x', num_args(0..))]
+        exclude: Vec<String>,
+        /// Run against a remote server
+        #[arg(long = "remote-server")]
+        remote_server: Option<String>,
+        /// Only run tests that require a local server
+        #[arg(long = "local-only", default_value_t = false)]
+        local_only: bool,
+        /// Use `spacetime login` for these tests (and disable tests that don't work with that)
+        #[arg(long = "spacetime-login", default_value_t = false)]
+        spacetime_login: bool,
+        /// Tests to run (positional); if omitted, run all
+        #[arg(value_name = "TEST")]
+        test: Vec<String>,
+
+        // Args that only influence test running
+        /// Show all stdout/stderr from the tests as they're running
+        #[arg(long = "show-all-output", default_value_t = false)]
+        show_all_output: bool,
+        /// Don't cargo build the CLI in the Python runner
+        #[arg(long = "no-build-cli", default_value_t = false)]
+        no_build_cli: bool,
+        /// Do not stream docker logs alongside test output
+        #[arg(long = "no-docker-logs", default_value_t = false)]
+        no_docker_logs: bool,
+        #[arg(
+            long = "start-server",
+            default_value_t = true,
+            long_help = "Whether to start a local SpacetimeDB server before running smoketests"
+        )]
+        start_server: bool,
+        #[arg(
+            long = "parallel",
+            default_value_t = false,
+            long_help = "Run smoketests in parallel batches grouped by test suite"
+        )]
+        parallel: bool,
     },
     /// Tests the update flow
     ///
@@ -108,6 +187,319 @@ fn run_all_clap_subcommands(skips: &[String]) -> Result<()> {
     }
     Ok(())
 }
+#[derive(Debug, Clone)]
+pub enum StartServer {
+    No,
+    Yes,
+    Docker { compose_file: PathBuf },
+}
+
+fn find_free_port() -> Result<u16> {
+    let listener = TcpListener::bind("127.0.0.1:0").context("failed to bind to an ephemeral port")?;
+    let port = listener
+        .local_addr()
+        .context("failed to read local address for ephemeral port")?
+        .port();
+    drop(listener);
+    Ok(port)
+}
+
+fn wait_until_http_ready(timeout: Duration, server_url: &str) -> Result<()> {
+    println!("Waiting for server to start: {server_url}..");
+    let deadline = Instant::now() + timeout;
+
+    while Instant::now() < deadline {
+        // Use duct::cmd directly so we can suppress output from the ping command.
+        let status = cmd(
+            "cargo",
+            &["run", "-p", "spacetimedb-cli", "--", "server", "ping", server_url],
+        )
+        .stdout_null()
+        .stderr_null()
+        .unchecked()
+        .run();
+
+        if let Ok(status) = status {
+            if status.status.success() {
+                debug!("Server started: {server_url}");
+                return Ok(());
+            }
+        }
+        thread::sleep(Duration::from_millis(500));
+    }
+    anyhow::bail!("Timed out waiting for {server_url}");
+}
+
+pub enum ServerState {
+    None,
+    Yes {
+        handle: thread::JoinHandle<()>,
+        data_dir: TempDir,
+    },
+    Docker {
+        handle: thread::JoinHandle<()>,
+        compose_file: PathBuf,
+        project: String,
+    },
+}
+
+impl ServerState {
+    fn start(start_mode: StartServer, args: &mut Vec<String>) -> Result<Self> {
+        Self::start_with_output(start_mode, args, None)
+    }
+
+    fn start_with_output(start_mode: StartServer, args: &mut Vec<String>, output: Option<&mut String>) -> Result<Self> {
+        // TODO: Currently the server output leaks. We should be capturing it and only printing if the test fails.
+
+        match start_mode {
+            StartServer::No => Ok(Self::None),
+            StartServer::Docker { compose_file } => {
+                if let Some(buf) = output {
+                    buf.push_str("Starting server..\n");
+                } else {
+                    println!("Starting server..");
+                }
+                let server_port = find_free_port()?;
+                let pg_port = find_free_port()?;
+                let tracy_port = find_free_port()?;
+                let project = format!("spacetimedb-smoketests-{server_port}");
+                args.push("--remote-server".into());
+                let server_url = format!("http://localhost:{server_port}");
+                args.push(server_url.clone());
+                let compose_str = compose_file.to_string_lossy().to_string();
+
+                // TODO: We don't capture the output from this, which pollutes the logs.
+                let handle = thread::spawn({
+                    let project = project.clone();
+                    move || {
+                        let _ = cmd!(
+                            "docker",
+                            "compose",
+                            "-f",
+                            &compose_str,
+                            "--project-name",
+                            &project,
+                            "up",
+                            "--abort-on-container-exit",
+                        )
+                        .env("STDB_PORT", server_port.to_string())
+                        .env("STDB_PG_PORT", pg_port.to_string())
+                        .env("STDB_TRACY_PORT", tracy_port.to_string())
+                        .run();
+                    }
+                });
+                wait_until_http_ready(Duration::from_secs(900), &server_url)?;
+                Ok(ServerState::Docker {
+                    handle,
+                    compose_file,
+                    project,
+                })
+            }
+            StartServer::Yes => {
+                // TODO: Make sure that this isn't brittle / multiple parallel batches don't grab the same port
+
+                // Create a temporary data directory for this server instance.
+                let data_dir = TempDir::new()?;
+
+                let server_port = find_free_port()?;
+                let pg_port = find_free_port()?;
+                args.push("--remote-server".into());
+                let server_url = format!("http://localhost:{server_port}");
+                args.push(server_url.clone());
+                if let Some(buf) = output {
+                    buf.push_str("Starting server..\n");
+                } else {
+                    println!("Starting server..");
+                }
+                let data_dir_str = data_dir.path().to_string_lossy().to_string();
+                let handle = thread::spawn(move || {
+                    let _ = cmd!(
+                        "cargo",
+                        "run",
+                        "-p",
+                        "spacetimedb-cli",
+                        "--",
+                        "start",
+                        "--listen-addr",
+                        &format!("0.0.0.0:{server_port}"),
+                        "--pg-port",
+                        pg_port.to_string(),
+                        "--data-dir",
+                        data_dir_str,
+                    )
+                    .read();
+                });
+                wait_until_http_ready(Duration::from_secs(1200), &server_url)?;
+                Ok(ServerState::Yes { handle, data_dir })
+            }
+        }
+    }
+}
+
+impl Drop for ServerState {
+    fn drop(&mut self) {
+        // TODO: Consider doing a dance to have the server thread die, instead of just dying with this process.
+        match self {
+            ServerState::None => {}
+            ServerState::Docker {
+                handle: _,
+                compose_file,
+                project,
+            } => {
+                with_print_lock(|| {
+                    println!("Shutting down server..");
+                });
+                let compose_str = compose_file.to_string_lossy().to_string();
+                let _ = cmd!(
+                    "docker",
+                    "compose",
+                    "-f",
+                    &compose_str,
+                    "--project-name",
+                    &project,
+                    "down",
+                )
+                .run();
+            }
+            ServerState::Yes { handle: _, data_dir } => {
+                with_print_lock(|| {
+                    println!("Shutting down server (temp data-dir will be dropped)..");
+                });
+                let _ = data_dir;
+            }
+        }
+    }
+}
+
+fn run_smoketests_batch(server_mode: StartServer, args: &[String], python: &str) -> Result<()> {
+    let mut args: Vec<_> = args.iter().cloned().collect();
+
+    let _server = ServerState::start(server_mode, &mut args)?;
+
+    println!("Running smoketests: {}", args.join(" "));
+    cmd(
+        python,
+        ["-m", "smoketests"].into_iter().map(|s| s.to_string()).chain(args),
+    )
+    .run()?;
+    Ok(())
+}
+
+// TODO: Fold this into `run_smoketests_batch`.
+fn run_smoketests_batch_captured(server_mode: StartServer, args: &[String], python: &str) -> (String, Result<()>) {
+    let mut args: Vec<_> = args.iter().cloned().collect();
+    let mut output = String::new();
+
+    let server = ServerState::start_with_output(server_mode, &mut args, Some(&mut output));
+    let _server = match server {
+        Ok(server) => server,
+        Err(e) => return (output, Err(e)),
+    };
+
+    output.push_str(&format!("Running smoketests: {}\n", args.join(" ")));
+
+    let res = cmd(
+        python,
+        ["-m", "smoketests"].into_iter().map(|s| s.to_string()).chain(args),
+    )
+    .stdout_capture()
+    .stderr_capture()
+    .unchecked()
+    .run();
+
+    let res = match res {
+        Ok(res) => res,
+        Err(e) => return (output, Err(e.into())),
+    };
+
+    let stdout = String::from_utf8_lossy(&res.stdout).to_string();
+    let stderr = String::from_utf8_lossy(&res.stderr).to_string();
+    if !stdout.is_empty() {
+        output.push_str(&stdout);
+        if !stdout.ends_with('\n') {
+            output.push('\n');
+        }
+    }
+    if !stderr.is_empty() {
+        output.push_str(&stderr);
+        if !stderr.ends_with('\n') {
+            output.push('\n');
+        }
+    }
+
+    if !res.status.success() {
+        return (
+            output,
+            Err(anyhow::anyhow!("smoketests exited with status: {}", res.status)),
+        );
+    }
+
+    (output, Ok(()))
+}
+
+fn server_start_config(start_server: bool, docker: Option<String>) -> StartServer {
+    match (start_server, docker.as_ref()) {
+        (start_server, Some(compose_file)) => {
+            if !start_server {
+                warn!("--docker implies --start-server=true");
+            }
+            StartServer::Docker {
+                compose_file: compose_file.into(),
+            }
+        }
+        (true, None) => StartServer::Yes,
+        (false, None) => StartServer::No,
+    }
+}
+
+fn common_args(
+    docker: Option<String>,
+    skip_dotnet: bool,
+    test_name_patterns: Vec<String>,
+    exclude: Vec<String>,
+    local_only: bool,
+    spacetime_login: bool,
+    show_all_output: bool,
+    no_build_cli: bool,
+    no_docker_logs: bool,
+) -> Vec<String> {
+    let mut args: Vec<String> = Vec::new();
+
+    if no_docker_logs {
+        args.push("--no-docker-logs".to_string());
+    }
+    if skip_dotnet {
+        args.push("--skip-dotnet".to_string());
+    }
+    if show_all_output {
+        args.push("--show-all-output".to_string());
+    }
+    for pat in test_name_patterns {
+        args.push("-k".to_string());
+        args.push(pat);
+    }
+    if !exclude.is_empty() {
+        args.push("-x".to_string());
+        args.push(exclude.join(" "));
+    }
+    if no_build_cli {
+        args.push("--no-build-cli".to_string());
+    }
+    if spacetime_login {
+        args.push("--spacetime-login".to_string());
+    }
+    if local_only {
+        args.push("--local-only".to_string());
+    }
+
+    if let Some(compose_file) = docker.as_ref() {
+        args.push("--docker".to_string());
+        args.push("--compose-file".to_string());
+        args.push(compose_file.to_string());
+    }
+
+    args
+}
 
 fn infer_python() -> String {
     let py3_available = cmd!("python3", "--version").run().is_ok();
@@ -118,11 +510,200 @@ fn infer_python() -> String {
     }
 }
 
+fn run_smoketests_serial(
+    python: String,
+    list: Option<String>,
+    docker: Option<String>,
+    skip_dotnet: bool,
+    test_name_patterns: Vec<String>,
+    exclude: Vec<String>,
+    remote_server: Option<String>,
+    local_only: bool,
+    spacetime_login: bool,
+    test: Vec<String>,
+    show_all_output: bool,
+    no_build_cli: bool,
+    no_docker_logs: bool,
+    start_server: StartServer,
+) -> Result<()> {
+    let mut args = Vec::new();
+    if let Some(list_mode) = list {
+        args.push(format!("--list={list_mode}").to_string());
+    }
+    if let Some(remote) = remote_server {
+        args.push("--remote-server".to_string());
+        args.push(remote);
+    }
+    for test in test {
+        args.push(test.clone());
+    }
+    // The python smoketests take -x X Y Z, which can be ambiguous with passing test names as args to run.
+    // So, we make sure the anonymous test name arg has been added _before_ the exclude args which are a part of common_args.
+    args.extend(common_args(
+        docker,
+        skip_dotnet,
+        test_name_patterns,
+        exclude,
+        local_only,
+        spacetime_login,
+        show_all_output,
+        no_build_cli,
+        no_docker_logs,
+    ));
+    run_smoketests_batch(start_server, &args, &python)?;
+    Ok(())
+}
+
+fn run_smoketests_parallel(
+    python: String,
+    list: Option<String>,
+    docker: Option<String>,
+    skip_dotnet: bool,
+    test_name_patterns: Vec<String>,
+    exclude: Vec<String>,
+    remote_server: Option<String>,
+    local_only: bool,
+    spacetime_login: bool,
+    test: Vec<String>,
+    show_all_output: bool,
+    no_build_cli: bool,
+    no_docker_logs: bool,
+    start_server: StartServer,
+) -> Result<()> {
+    let args = common_args(
+        docker,
+        skip_dotnet,
+        test_name_patterns,
+        exclude,
+        local_only,
+        spacetime_login,
+        show_all_output,
+        no_build_cli,
+        no_docker_logs,
+    );
+
+    if list.is_some() {
+        anyhow::bail!("--list does not make sense with --parallel");
+    }
+    if remote_server.is_some() {
+        // This is just because we manually provide --remote-server later, so it requires some refactoring.
+        anyhow::bail!("--remote-server is not supported in parallel mode");
+    }
+
+    // TODO: Handle --local-only tests separately, since we are passing --remote-server in all of our batches.
+
+    println!("Listing smoketests for parallel execution..");
+
+    let tests = {
+        let mut list_args: Vec<String> = args.clone();
+        list_args.push("--list=json".to_string());
+        // TODO: Are users able to list specific tests here, or just top-level test filenames?
+        // If they can list individual tests, then this won't work as expected (because we should past those restrictions later
+        // when we run each batch as well).
+        for test in test {
+            list_args.push(test.clone());
+        }
+
+        let output = cmd(
+            python.clone(),
+            ["-m", "smoketests"].into_iter().map(|s| s.to_string()).chain(list_args),
+        )
+        .stderr_to_stdout()
+        .read()
+        .expect("Failed to list smoketests");
+
+        let parsed: serde_json::Value = serde_json::from_str(&output)?;
+        let tests = parsed.get("tests").and_then(|v| v.as_array()).cloned().unwrap();
+        let errors = parsed
+            .get("errors")
+            .and_then(|v| v.as_array())
+            .cloned()
+            .unwrap_or_default();
+
+        if !errors.is_empty() {
+            println!("Errors while constructing smoketests:");
+            for err in &errors {
+                let test_id = err.get("test_id").and_then(|v| v.as_str()).unwrap();
+                let msg = err.get("error").and_then(|v| v.as_str()).unwrap();
+                println!("{test_id}");
+                println!("{msg}");
+            }
+            // If there were errors constructing tests, treat this as a failure
+            // and do not run any batches.
+            anyhow::bail!("Errors encountered while constructing smoketests; aborting parallel run");
+        }
+
+        tests
+    };
+
+    let batches: HashSet<String> = tests
+        .into_iter()
+        .map(|t| {
+            let name = t.as_str().unwrap();
+            let parts = name.split('.').collect::<Vec<&str>>();
+            parts[2].to_string()
+        })
+        .collect();
+
+    // Run each batch in parallel threads.
+    let mut handles = Vec::new();
+    for batch in batches {
+        let start_server_clone = start_server.clone();
+        let python = python.clone();
+        let mut batch_args: Vec<String> = Vec::new();
+        batch_args.push(batch.clone());
+        batch_args.extend(args.iter().cloned());
+
+        handles.push((
+            batch.clone(),
+            std::thread::spawn(move || {
+                let (captured, result) = run_smoketests_batch_captured(start_server_clone, &batch_args, &python);
+
+                with_print_lock(|| {
+                    println!("===== smoketests batch: {batch} =====");
+                    print!("{captured}");
+                    if let Err(e) = &result {
+                        println!("(batch failed) {e:?}");
+                    }
+                    println!("===== end smoketests batch: {batch} =====");
+                });
+
+                result
+            }),
+        ));
+    }
+
+    let mut failed_batches = vec![];
+    for (batch, handle) in handles {
+        // If the thread panicked or the batch failed, treat it as a failure.
+        let result = handle
+            .join()
+            .unwrap_or_else(|_| Err(anyhow::anyhow!("smoketest batch thread panicked",)));
+        if let Err(e) = result {
+            println!("Smoketest batch {batch} failed: {e:?}");
+            failed_batches.push(batch);
+        }
+    }
+
+    if !failed_batches.is_empty() {
+        anyhow::bail!("Smoketest batch(es) failed: {}", failed_batches.join(", "));
+    }
+
+    Ok(())
+}
+
 fn main() -> Result<()> {
     env_logger::init();
 
     let cli = Cli::parse();
 
+    // Remove all Cargo-provided env vars from the subcommand
+    for (key, _) in std::env::vars() {
+        if key.starts_with("CARGO_") && key != "CARGO_TARGET_DIR" {
+            std::env::remove_var(key);
+        }
+    }
+
     match cli.cmd {
         Some(CiCmd::Test) => {
             // TODO: This doesn't work on at least user Linux machines, because something here apparently uses `sudo`?
@@ -211,16 +792,89 @@ fn main() -> Result<()> {
             .run()?;
         }
 
-        Some(CiCmd::Smoketests { args: smoketest_args }) => {
-            let python = infer_python();
-            cmd(
-                python,
-                ["-m", "smoketests"]
-                    .into_iter()
-                    .map(|s| s.to_string())
-                    .chain(smoketest_args),
-            )
-            .run()?;
+        Some(CiCmd::Smoketests {
+            start_server,
+            docker,
+            test,
+            no_docker_logs,
+            skip_dotnet,
+            show_all_output,
+            test_name_patterns,
+            exclude,
+            mut no_build_cli,
+            list,
+            remote_server,
+            spacetime_login,
+            local_only,
+            parallel,
+            python,
+        }) => {
+            let start_server = server_start_config(start_server, docker.clone());
+            // Do initial server build
+            match start_server.clone() {
+                StartServer::No => {}
+                StartServer::Yes { .. } => {
+                    println!("Building SpacetimeDB..");
+
+                    // Pre-build so that `cargo run -p spacetimedb-cli` will immediately start. Otherwise we risk timing out waiting for the server to come up.
+                    cmd!(
+                        "cargo",
+                        "build",
+                        "-p",
+                        "spacetimedb-cli",
+                        "-p",
+                        "spacetimedb-standalone",
+                        "-p",
+                        "spacetimedb-update",
+                    )
+                    .run()?;
+                    no_build_cli = true;
+                }
+                StartServer::Docker { compose_file } => {
+                    println!("Building docker container..");
+                    let compose_str = compose_file.to_string_lossy().to_string();
+                    let _ = cmd!("docker", "compose", "-f", &compose_str, "build",).run()?;
+                }
+            }
+
+            let python = python.unwrap_or(infer_python());
+
+            // These are split into two separate functions, so that we can ensure all the args are considered in both cases.
+            if parallel {
+                run_smoketests_parallel(
+                    python,
+                    list,
+                    docker,
+                    skip_dotnet,
+                    test_name_patterns,
+                    exclude,
+                    remote_server,
+                    local_only,
+                    spacetime_login,
+                    test,
+                    show_all_output,
+                    no_build_cli,
+                    no_docker_logs,
+                    start_server,
+                )?;
+            } else {
+                run_smoketests_serial(
+                    python,
+                    list,
+                    docker,
+                    skip_dotnet,
+                    test_name_patterns,
+                    exclude,
+                    remote_server,
+                    local_only,
+                    spacetime_login,
+                    test,
+                    show_all_output,
+                    no_build_cli,
+                    no_docker_logs,
+                    start_server,
+                )?;
+            }
         }
 
         Some(CiCmd::UpdateFlow {
@@ -277,12 +931,12 @@ fn main() -> Result<()> {
             }
 
             cmd!("pnpm", "install", "--recursive").run()?;
-            cmd!("pnpm", "generate-cli-docs").dir("docs").run()?;
-            let out = cmd!("git", "status", "--porcelain", "--", "docs").read()?;
-            if out.is_empty() {
+            cmd!("pnpm", "generate-cli-docs").run()?;
+            let out = cmd!("git", "status", "--porcelain").read()?;
+            if out == "" {
                 log::info!("No docs changes detected");
             } else {
-                anyhow::bail!("CLI docs are out of date:\n{out}");
+                anyhow::bail!("CLI docs are out of date");
             }
         }
 
diff --git a/tools/xtask-llm-benchmark/src/benchmarks/basics/t_003_struct_in_table/answers/rust.rs b/tools/xtask-llm-benchmark/src/benchmarks/basics/t_003_struct_in_table/answers/rust.rs
index dd4679ea1bb..b0ba11abf29 100644
--- a/tools/xtask-llm-benchmark/src/benchmarks/basics/t_003_struct_in_table/answers/rust.rs
+++ b/tools/xtask-llm-benchmark/src/benchmarks/basics/t_003_struct_in_table/answers/rust.rs
@@ -12,4 +12,3 @@ pub struct Entity {
     pub id: i32,
     pub pos: Position,
 }
-
diff --git a/tools/xtask-llm-benchmark/src/benchmarks/basics/t_004_insert/spec.rs b/tools/xtask-llm-benchmark/src/benchmarks/basics/t_004_insert/spec.rs
index d918b7df607..0c14f418339 100644
--- a/tools/xtask-llm-benchmark/src/benchmarks/basics/t_004_insert/spec.rs
+++ b/tools/xtask-llm-benchmark/src/benchmarks/basics/t_004_insert/spec.rs
@@ -8,24 +8,22 @@ pub fn spec() -> BenchmarkSpec {
         let mut v = default_schema_parity_scorers(host_url, file!(), route_tag);
         let casing = casing_for_lang(lang);
         let sb = SqlBuilder::new(casing);
-        let select = sb.select_by_id("users", &["id","name","age","active"], "id", 1);
+        let select = sb.select_by_id("users", &["id", "name", "age", "active"], "id", 1);
         let reducer_name = ident("InsertUser", casing);
 
-        v.push(make_reducer_data_parity_scorer(host_url, ReducerDataParityConfig {
-            src_file: file!(),
-            route_tag,
-            reducer: reducer_name.into(),
-            args: vec![
-                Value::from(1),
-                Value::from("Alice"),
-                Value::from(30),
-                Value::from(true),
-            ],
-            select_query: select.clone(),
-            id_str: "data_parity_insert_user",
-            collapse_ws: true,
-            timeout: time::Duration::from_secs(10),
-        }));
+        v.push(make_reducer_data_parity_scorer(
+            host_url,
+            ReducerDataParityConfig {
+                src_file: file!(),
+                route_tag,
+                reducer: reducer_name.into(),
+                args: vec![Value::from(1), Value::from("Alice"), Value::from(30), Value::from(true)],
+                select_query: select.clone(),
+                id_str: "data_parity_insert_user",
+                collapse_ws: true,
+                timeout: time::Duration::from_secs(10),
+            },
+        ));
         v
     })
 }
diff --git a/tools/xtask-llm-benchmark/src/benchmarks/basics/t_005_update/answers/rust.rs b/tools/xtask-llm-benchmark/src/benchmarks/basics/t_005_update/answers/rust.rs
index 98602d0fb74..130f1c60ffa 100644
--- a/tools/xtask-llm-benchmark/src/benchmarks/basics/t_005_update/answers/rust.rs
+++ b/tools/xtask-llm-benchmark/src/benchmarks/basics/t_005_update/answers/rust.rs
@@ -12,4 +12,4 @@ pub struct User {
 #[reducer]
 pub fn update_user(ctx: &ReducerContext, id: i32, name: String, age: i32, active: bool) {
     ctx.db.users().id().update(User { id, name, age, active });
-}
\ No newline at end of file
+}
diff --git a/tools/xtask-llm-benchmark/src/benchmarks/basics/t_005_update/spec.rs b/tools/xtask-llm-benchmark/src/benchmarks/basics/t_005_update/spec.rs
index 5b61b5d333f..2505dc17a7d 100644
--- a/tools/xtask-llm-benchmark/src/benchmarks/basics/t_005_update/spec.rs
+++ b/tools/xtask-llm-benchmark/src/benchmarks/basics/t_005_update/spec.rs
@@ -1,7 +1,5 @@
 use crate::eval::defaults::{
-    default_schema_parity_scorers,
-    make_reducer_data_parity_scorer,
-    make_sql_exec_both_scorer,
+    default_schema_parity_scorers, make_reducer_data_parity_scorer, make_sql_exec_both_scorer,
 };
 use crate::eval::{casing_for_lang, ident, BenchmarkSpec, ReducerDataParityConfig, SqlBuilder};
 use serde_json::Value;
@@ -13,8 +11,12 @@ pub fn spec() -> BenchmarkSpec {
 
         let casing = casing_for_lang(lang);
         let sb = SqlBuilder::new(casing);
-        let seed = sb.insert_values("users", &["id","name","age","active"], &["1","'Alice'","30","true"]);
-        let select = sb.select_by_id("users", &["id","name","age","active"], "id", 1);
+        let seed = sb.insert_values(
+            "users",
+            &["id", "name", "age", "active"],
+            &["1", "'Alice'", "30", "true"],
+        );
+        let select = sb.select_by_id("users", &["id", "name", "age", "active"], "id", 1);
         let reducer_name = ident("UpdateUser", casing);
 
         v.push(make_sql_exec_both_scorer(
@@ -26,21 +28,24 @@ pub fn spec() -> BenchmarkSpec {
             time::Duration::from_secs(10),
         ));
 
-        v.push(make_reducer_data_parity_scorer(host_url, ReducerDataParityConfig {
-            src_file: file!(),
-            route_tag,
-            reducer: reducer_name.into(),
-            args: vec![
-                Value::from(1),
-                Value::from("Alice2"),
-                Value::from(31),
-                Value::from(false),
-            ],
-            select_query: select.clone(),
-            id_str: "data_parity_update_user",
-            collapse_ws: true,
-            timeout: time::Duration::from_secs(10),
-        }));
+        v.push(make_reducer_data_parity_scorer(
+            host_url,
+            ReducerDataParityConfig {
+                src_file: file!(),
+                route_tag,
+                reducer: reducer_name.into(),
+                args: vec![
+                    Value::from(1),
+                    Value::from("Alice2"),
+                    Value::from(31),
+                    Value::from(false),
+                ],
+                select_query: select.clone(),
+                id_str: "data_parity_update_user",
+                collapse_ws: true,
+                timeout: time::Duration::from_secs(10),
+            },
+        ));
 
         v
     })
diff --git a/tools/xtask-llm-benchmark/src/benchmarks/basics/t_006_delete/spec.rs b/tools/xtask-llm-benchmark/src/benchmarks/basics/t_006_delete/spec.rs
index 0d21ddd73fc..7a8531a7f99 100644
--- a/tools/xtask-llm-benchmark/src/benchmarks/basics/t_006_delete/spec.rs
+++ b/tools/xtask-llm-benchmark/src/benchmarks/basics/t_006_delete/spec.rs
@@ -1,8 +1,4 @@
-use crate::eval::defaults::{
-    default_schema_parity_scorers,
-    make_reducer_sql_count_scorer,
-    make_sql_exec_both_scorer,
-};
+use crate::eval::defaults::{default_schema_parity_scorers, make_reducer_sql_count_scorer, make_sql_exec_both_scorer};
 use crate::eval::{casing_for_lang, ident, BenchmarkSpec, ReducerSqlCountConfig, SqlBuilder};
 use serde_json::Value;
 use std::time;
@@ -13,7 +9,11 @@ pub fn spec() -> BenchmarkSpec {
 
         let casing = casing_for_lang(lang);
         let sb = SqlBuilder::new(casing);
-        let seed = sb.insert_values("users", &["id","name","age","active"], &["1","'Alice'","30","true"]);
+        let seed = sb.insert_values(
+            "users",
+            &["id", "name", "age", "active"],
+            &["1", "'Alice'", "30", "true"],
+        );
         let count = sb.count_by_id("users", "id", 1);
         let reducer_name = ident("DeleteUser", casing);
 
@@ -26,16 +26,19 @@ pub fn spec() -> BenchmarkSpec {
             time::Duration::from_secs(10),
         ));
 
-        v.push(make_reducer_sql_count_scorer(host_url, ReducerSqlCountConfig {
-            src_file: file!(),
-            route_tag,
-            reducer: reducer_name.into(),
-            args: vec![Value::from(1)],
-            sql_count_query: count.clone(),
-            expected_count: 0,
-            id_str: "delete_user_count_zero",
-            timeout: time::Duration::from_secs(10),
-        }));
+        v.push(make_reducer_sql_count_scorer(
+            host_url,
+            ReducerSqlCountConfig {
+                src_file: file!(),
+                route_tag,
+                reducer: reducer_name.into(),
+                args: vec![Value::from(1)],
+                sql_count_query: count.clone(),
+                expected_count: 0,
+                id_str: "delete_user_count_zero",
+                timeout: time::Duration::from_secs(10),
+            },
+        ));
 
         v
     })
diff --git a/tools/xtask-llm-benchmark/src/benchmarks/basics/t_007_crud/answers/rust.rs b/tools/xtask-llm-benchmark/src/benchmarks/basics/t_007_crud/answers/rust.rs
index 42258ca90e1..ae15d0c4413 100644
--- a/tools/xtask-llm-benchmark/src/benchmarks/basics/t_007_crud/answers/rust.rs
+++ b/tools/xtask-llm-benchmark/src/benchmarks/basics/t_007_crud/answers/rust.rs
@@ -11,8 +11,23 @@ pub struct User {
 
 #[reducer]
 pub fn crud(ctx: &ReducerContext) {
-    ctx.db.users().insert(User { id: 1, name: "Alice".into(), age: 30, active: true });
-    ctx.db.users().insert(User { id: 2, name: "Bob".into(),   age: 22, active: false });
-    ctx.db.users().id().update(User { id: 1, name: "Alice2".into(), age: 31, active: false });
+    ctx.db.users().insert(User {
+        id: 1,
+        name: "Alice".into(),
+        age: 30,
+        active: true,
+    });
+    ctx.db.users().insert(User {
+        id: 2,
+        name: "Bob".into(),
+        age: 22,
+        active: false,
+    });
+    ctx.db.users().id().update(User {
+        id: 1,
+        name: "Alice2".into(),
+        age: 31,
+        active: false,
+    });
     ctx.db.users().id().delete(2);
 }
diff --git a/tools/xtask-llm-benchmark/src/benchmarks/basics/t_007_crud/spec.rs b/tools/xtask-llm-benchmark/src/benchmarks/basics/t_007_crud/spec.rs
index 54b09dd240f..dc1cb669cfc 100644
--- a/tools/xtask-llm-benchmark/src/benchmarks/basics/t_007_crud/spec.rs
+++ b/tools/xtask-llm-benchmark/src/benchmarks/basics/t_007_crud/spec.rs
@@ -1,4 +1,6 @@
-use crate::eval::defaults::{default_schema_parity_scorers, make_reducer_data_parity_scorer, make_sql_count_only_scorer};
+use crate::eval::defaults::{
+    default_schema_parity_scorers, make_reducer_data_parity_scorer, make_sql_count_only_scorer,
+};
 use crate::eval::{casing_for_lang, ident, BenchmarkSpec, ReducerDataParityConfig, SqlBuilder};
 use std::time::Duration;
 
@@ -10,27 +12,42 @@ pub fn spec() -> BenchmarkSpec {
         let sb = SqlBuilder::new(casing);
         let reducer = ident("Crud", casing);
 
-        let select_id1 = sb.select_by_id("users", &["id","name","age","active"], "id", 1);
-        let count_id2  = sb.count_by_id("users", "id", 2);
-        let count_all  = "SELECT COUNT(*) AS n FROM users";
+        let select_id1 = sb.select_by_id("users", &["id", "name", "age", "active"], "id", 1);
+        let count_id2 = sb.count_by_id("users", "id", 2);
+        let count_all = "SELECT COUNT(*) AS n FROM users";
 
-        v.push(make_reducer_data_parity_scorer(host_url, ReducerDataParityConfig {
-            src_file: file!(),
-            route_tag,
-            reducer: reducer.into(),
-            args: vec![],
-            select_query: select_id1.clone(),
-            id_str: "crud_row_id1_parity",
-            collapse_ws: true,
-            timeout: Duration::from_secs(10),
-        }));
+        v.push(make_reducer_data_parity_scorer(
+            host_url,
+            ReducerDataParityConfig {
+                src_file: file!(),
+                route_tag,
+                reducer: reducer.into(),
+                args: vec![],
+                select_query: select_id1.clone(),
+                id_str: "crud_row_id1_parity",
+                collapse_ws: true,
+                timeout: Duration::from_secs(10),
+            },
+        ));
         v.push(make_sql_count_only_scorer(
-            host_url, file!(), route_tag, &count_id2, 0, "crud_row_id2_deleted", Duration::from_secs(10),
+            host_url,
+            file!(),
+            route_tag,
+            &count_id2,
+            0,
+            "crud_row_id2_deleted",
+            Duration::from_secs(10),
         ));
         v.push(make_sql_count_only_scorer(
-            host_url, file!(), route_tag, count_all, 1, "crud_total_count_one", Duration::from_secs(10),
+            host_url,
+            file!(),
+            route_tag,
+            count_all,
+            1,
+            "crud_total_count_one",
+            Duration::from_secs(10),
         ));
 
         v
     })
-}
\ No newline at end of file
+}
diff --git a/tools/xtask-llm-benchmark/src/benchmarks/basics/t_008_index_lookup/spec.rs b/tools/xtask-llm-benchmark/src/benchmarks/basics/t_008_index_lookup/spec.rs
index 3507304d5f3..d21d865252e 100644
--- a/tools/xtask-llm-benchmark/src/benchmarks/basics/t_008_index_lookup/spec.rs
+++ b/tools/xtask-llm-benchmark/src/benchmarks/basics/t_008_index_lookup/spec.rs
@@ -1,7 +1,5 @@
 use crate::eval::defaults::{
-    default_schema_parity_scorers,
-    make_reducer_data_parity_scorer,
-    make_sql_exec_both_scorer,
+    default_schema_parity_scorers, make_reducer_data_parity_scorer, make_sql_exec_both_scorer,
 };
 use crate::eval::{casing_for_lang, ident, BenchmarkSpec, ReducerDataParityConfig, SqlBuilder};
 use serde_json::Value;
@@ -18,8 +16,8 @@ pub fn spec() -> BenchmarkSpec {
         // Seed a user row in both DBs so the lookup has something to find
         let seed_users = sb.insert_values(
             "users",
-            &["id","name","age","active"],
-            &["1","'Alice'","30","true"],
+            &["id", "name", "age", "active"],
+            &["1", "'Alice'", "30", "true"],
         );
 
         v.push(make_sql_exec_both_scorer(
@@ -32,23 +30,21 @@ pub fn spec() -> BenchmarkSpec {
         ));
 
         // After calling the reducer, the projection should be present in results
-        let select_result = sb.select_by_id(
-            "results",
-            &["id","name"],
-            "id",
-            1,
-        );
+        let select_result = sb.select_by_id("results", &["id", "name"], "id", 1);
 
-        v.push(make_reducer_data_parity_scorer(host_url, ReducerDataParityConfig {
-            src_file: file!(),
-            route_tag,
-            reducer: reducer_name.into(),
-            args: vec![Value::from(1)],
-            select_query: select_result.clone(),
-            id_str: "index_lookup_projection_parity",
-            collapse_ws: true,
-            timeout: Duration::from_secs(10),
-        }));
+        v.push(make_reducer_data_parity_scorer(
+            host_url,
+            ReducerDataParityConfig {
+                src_file: file!(),
+                route_tag,
+                reducer: reducer_name.into(),
+                args: vec![Value::from(1)],
+                select_query: select_result.clone(),
+                id_str: "index_lookup_projection_parity",
+                collapse_ws: true,
+                timeout: Duration::from_secs(10),
+            },
+        ));
 
         v
     })
diff --git a/tools/xtask-llm-benchmark/src/benchmarks/basics/t_009_init/answers/rust.rs b/tools/xtask-llm-benchmark/src/benchmarks/basics/t_009_init/answers/rust.rs
index ebec263828d..e1be046a5ca 100644
--- a/tools/xtask-llm-benchmark/src/benchmarks/basics/t_009_init/answers/rust.rs
+++ b/tools/xtask-llm-benchmark/src/benchmarks/basics/t_009_init/answers/rust.rs
@@ -11,6 +11,16 @@ pub struct User {
 
 #[reducer(init)]
 pub fn init(ctx: &ReducerContext) {
-    ctx.db.users().insert(User { id: 1, name: "Alice".into(), age: 30, active: true });
-    ctx.db.users().insert(User { id: 2, name: "Bob".into(),   age: 22, active: false });
+    ctx.db.users().insert(User {
+        id: 1,
+        name: "Alice".into(),
+        age: 30,
+        active: true,
+    });
+    ctx.db.users().insert(User {
+        id: 2,
+        name: "Bob".into(),
+        age: 22,
+        active: false,
+    });
 }
diff --git a/tools/xtask-llm-benchmark/src/benchmarks/basics/t_009_init/spec.rs b/tools/xtask-llm-benchmark/src/benchmarks/basics/t_009_init/spec.rs
index 618237674a1..5774944feb2 100644
--- a/tools/xtask-llm-benchmark/src/benchmarks/basics/t_009_init/spec.rs
+++ b/tools/xtask-llm-benchmark/src/benchmarks/basics/t_009_init/spec.rs
@@ -7,19 +7,45 @@ pub fn spec() -> BenchmarkSpec {
         let mut v = default_schema_parity_scorers(host_url, file!(), route_tag);
 
         let sb = SqlBuilder::new(casing_for_lang(lang));
-        let id   = sb.cols(&["id"])[0].clone();
+        let id = sb.cols(&["id"])[0].clone();
         let name = sb.cols(&["name"])[0].clone();
-        let age  = sb.cols(&["age"])[0].clone();
-        let act  = sb.cols(&["active"])[0].clone();
+        let age = sb.cols(&["age"])[0].clone();
+        let act = sb.cols(&["active"])[0].clone();
 
-        let q_alice = format!("SELECT COUNT(*) AS n FROM users WHERE {id}=1 AND {name}='Alice' AND {age}=30 AND {act}=true");
-        let q_bob   = format!("SELECT COUNT(*) AS n FROM users WHERE {id}=2 AND {name}='Bob'   AND {age}=22 AND {act}=false");
+        let q_alice =
+            format!("SELECT COUNT(*) AS n FROM users WHERE {id}=1 AND {name}='Alice' AND {age}=30 AND {act}=true");
+        let q_bob =
+            format!("SELECT COUNT(*) AS n FROM users WHERE {id}=2 AND {name}='Bob'   AND {age}=22 AND {act}=false");
         let q_total = "SELECT COUNT(*) AS n FROM users";
 
-        v.push(make_sql_count_only_scorer(host_url, file!(), route_tag, q_alice, 1, "init_seed_alice", Duration::from_secs(10)));
-        v.push(make_sql_count_only_scorer(host_url, file!(), route_tag, q_bob,   1, "init_seed_bob",   Duration::from_secs(10)));
-        v.push(make_sql_count_only_scorer(host_url, file!(), route_tag, q_total, 2, "init_total_two",  Duration::from_secs(10)));
+        v.push(make_sql_count_only_scorer(
+            host_url,
+            file!(),
+            route_tag,
+            q_alice,
+            1,
+            "init_seed_alice",
+            Duration::from_secs(10),
+        ));
+        v.push(make_sql_count_only_scorer(
+            host_url,
+            file!(),
+            route_tag,
+            q_bob,
+            1,
+            "init_seed_bob",
+            Duration::from_secs(10),
+        ));
+        v.push(make_sql_count_only_scorer(
+            host_url,
+            file!(),
+            route_tag,
+            q_total,
+            2,
+            "init_total_two",
+            Duration::from_secs(10),
+        ));
 
         v
     })
-}
\ No newline at end of file
+}
diff --git a/tools/xtask-llm-benchmark/src/benchmarks/basics/t_010_connect/answers/rust.rs b/tools/xtask-llm-benchmark/src/benchmarks/basics/t_010_connect/answers/rust.rs
index cd143d09fd2..f21d04e2036 100644
--- a/tools/xtask-llm-benchmark/src/benchmarks/basics/t_010_connect/answers/rust.rs
+++ b/tools/xtask-llm-benchmark/src/benchmarks/basics/t_010_connect/answers/rust.rs
@@ -10,10 +10,16 @@ pub struct Event {
 
 #[reducer(client_connected)]
 pub fn client_connected(ctx: &ReducerContext) {
-    ctx.db.events().insert(Event { id: 0, kind: "connected".into() });
+    ctx.db.events().insert(Event {
+        id: 0,
+        kind: "connected".into(),
+    });
 }
 
 #[reducer(client_disconnected)]
 pub fn client_disconnected(ctx: &ReducerContext) {
-    ctx.db.events().insert(Event { id: 0, kind: "disconnected".into() });
+    ctx.db.events().insert(Event {
+        id: 0,
+        kind: "disconnected".into(),
+    });
 }
diff --git a/tools/xtask-llm-benchmark/src/benchmarks/basics/t_011_helper_function/answers/rust.rs b/tools/xtask-llm-benchmark/src/benchmarks/basics/t_011_helper_function/answers/rust.rs
index 95934e2b51b..642c0a289d8 100644
--- a/tools/xtask-llm-benchmark/src/benchmarks/basics/t_011_helper_function/answers/rust.rs
+++ b/tools/xtask-llm-benchmark/src/benchmarks/basics/t_011_helper_function/answers/rust.rs
@@ -7,7 +7,9 @@ pub struct ResultRow {
     pub sum: i32,
 }
 
-fn add(a: i32, b: i32) -> i32 { a + b }
+fn add(a: i32, b: i32) -> i32 {
+    a + b
+}
 
 #[reducer]
 pub fn compute_sum(ctx: &ReducerContext, id: i32, a: i32, b: i32) {
diff --git a/tools/xtask-llm-benchmark/src/benchmarks/basics/t_011_helper_function/spec.rs b/tools/xtask-llm-benchmark/src/benchmarks/basics/t_011_helper_function/spec.rs
index be2e9b19332..7e260aa117e 100644
--- a/tools/xtask-llm-benchmark/src/benchmarks/basics/t_011_helper_function/spec.rs
+++ b/tools/xtask-llm-benchmark/src/benchmarks/basics/t_011_helper_function/spec.rs
@@ -1,9 +1,10 @@
-use crate::eval::defaults::{default_schema_parity_scorers, make_reducer_data_parity_scorer, make_sql_count_only_scorer};
+use crate::eval::defaults::{
+    default_schema_parity_scorers, make_reducer_data_parity_scorer, make_sql_count_only_scorer,
+};
 use crate::eval::{casing_for_lang, ident, BenchmarkSpec, ReducerDataParityConfig, SqlBuilder};
 use serde_json::Value;
 use std::time::Duration;
 
-
 pub fn spec() -> BenchmarkSpec {
     BenchmarkSpec::from_tasks_auto(file!(), |lang, route_tag, host_url| {
         let mut v = default_schema_parity_scorers(host_url, file!(), route_tag);
@@ -11,23 +12,21 @@ pub fn spec() -> BenchmarkSpec {
         let casing = casing_for_lang(lang);
         let sb = SqlBuilder::new(casing);
         let reducer = ident("ComputeSum", casing);
-        let select = sb.select_by_id("results", &["id","sum"], "id", 1);
-
-        v.push(make_reducer_data_parity_scorer(host_url, ReducerDataParityConfig {
-            src_file: file!(),
-            route_tag,
-            reducer: reducer.into(),
-            args: vec![
-                Value::from(1),
-                Value::from(2),
-                Value::from(3),
-            ],
-            select_query: select.clone(),
-            id_str: "helper_func_sum_parity",
-            collapse_ws: true,
-            timeout: Duration::from_secs(10),
-        }));
+        let select = sb.select_by_id("results", &["id", "sum"], "id", 1);
 
+        v.push(make_reducer_data_parity_scorer(
+            host_url,
+            ReducerDataParityConfig {
+                src_file: file!(),
+                route_tag,
+                reducer: reducer.into(),
+                args: vec![Value::from(1), Value::from(2), Value::from(3)],
+                select_query: select.clone(),
+                id_str: "helper_func_sum_parity",
+                collapse_ws: true,
+                timeout: Duration::from_secs(10),
+            },
+        ));
 
         let id = sb.cols(&["id"])[0].clone();
         let sum = sb.cols(&["sum"])[0].clone();
@@ -45,4 +44,4 @@ pub fn spec() -> BenchmarkSpec {
 
         v
     })
-}
\ No newline at end of file
+}
diff --git a/tools/xtask-llm-benchmark/src/benchmarks/schema/t_012_spacetime_product_type/answers/rust.rs b/tools/xtask-llm-benchmark/src/benchmarks/schema/t_012_spacetime_product_type/answers/rust.rs
index fd58b0a55d8..a320d327fd5 100644
--- a/tools/xtask-llm-benchmark/src/benchmarks/schema/t_012_spacetime_product_type/answers/rust.rs
+++ b/tools/xtask-llm-benchmark/src/benchmarks/schema/t_012_spacetime_product_type/answers/rust.rs
@@ -15,5 +15,8 @@ pub struct ResultRow {
 
 #[reducer]
 pub fn set_score(ctx: &ReducerContext, id: i32, left: i32, right: i32) {
-    ctx.db.results().insert(ResultRow { id, value: Score { left, right } });
+    ctx.db.results().insert(ResultRow {
+        id,
+        value: Score { left, right },
+    });
 }
diff --git a/tools/xtask-llm-benchmark/src/benchmarks/schema/t_012_spacetime_product_type/spec.rs b/tools/xtask-llm-benchmark/src/benchmarks/schema/t_012_spacetime_product_type/spec.rs
index 7d54e077507..6943792fc8e 100644
--- a/tools/xtask-llm-benchmark/src/benchmarks/schema/t_012_spacetime_product_type/spec.rs
+++ b/tools/xtask-llm-benchmark/src/benchmarks/schema/t_012_spacetime_product_type/spec.rs
@@ -1,4 +1,6 @@
-use crate::eval::defaults::{default_schema_parity_scorers, make_reducer_data_parity_scorer, make_sql_count_only_scorer};
+use crate::eval::defaults::{
+    default_schema_parity_scorers, make_reducer_data_parity_scorer, make_sql_count_only_scorer,
+};
 use crate::eval::{casing_for_lang, ident, BenchmarkSpec, ReducerDataParityConfig, SqlBuilder};
 use serde_json::Value;
 use std::time::Duration;
@@ -12,22 +14,21 @@ pub fn spec() -> BenchmarkSpec {
         let reducer = ident("SetScore", casing);
 
         // Compare the full row (including the product-typed column) across golden/llm
-        let select = sb.select_by_id("results", &["id","value"], "id", 1);
+        let select = sb.select_by_id("results", &["id", "value"], "id", 1);
 
-        v.push(make_reducer_data_parity_scorer(host_url, ReducerDataParityConfig {
-            src_file: file!(),
-            route_tag,
-            reducer: reducer.into(),
-            args: vec![
-                Value::from(1),
-                Value::from(2),
-                Value::from(3),
-            ],
-            select_query: select.clone(),
-            id_str: "product_type_row_parity",
-            collapse_ws: true,
-            timeout: Duration::from_secs(10),
-        }));
+        v.push(make_reducer_data_parity_scorer(
+            host_url,
+            ReducerDataParityConfig {
+                src_file: file!(),
+                route_tag,
+                reducer: reducer.into(),
+                args: vec![Value::from(1), Value::from(2), Value::from(3)],
+                select_query: select.clone(),
+                id_str: "product_type_row_parity",
+                collapse_ws: true,
+                timeout: Duration::from_secs(10),
+            },
+        ));
 
         // Absolute sanity: exactly one row with id=1 exists
         let count = sb.count_by_id("results", "id", 1);
@@ -43,4 +44,4 @@ pub fn spec() -> BenchmarkSpec {
 
         v
     })
-}
\ No newline at end of file
+}
diff --git a/tools/xtask-llm-benchmark/src/benchmarks/schema/t_013_spacetime_sum_type/answers/rust.rs b/tools/xtask-llm-benchmark/src/benchmarks/schema/t_013_spacetime_sum_type/answers/rust.rs
index e0ea1f92b02..a4d25c37039 100644
--- a/tools/xtask-llm-benchmark/src/benchmarks/schema/t_013_spacetime_sum_type/answers/rust.rs
+++ b/tools/xtask-llm-benchmark/src/benchmarks/schema/t_013_spacetime_sum_type/answers/rust.rs
@@ -21,5 +21,8 @@ pub struct ResultRow {
 
 #[reducer]
 pub fn set_circle(ctx: &ReducerContext, id: i32, radius: i32) {
-    ctx.db.results().insert(ResultRow { id, value: Shape::Circle(radius) });
+    ctx.db.results().insert(ResultRow {
+        id,
+        value: Shape::Circle(radius),
+    });
 }
diff --git a/tools/xtask-llm-benchmark/src/benchmarks/schema/t_013_spacetime_sum_type/spec.rs b/tools/xtask-llm-benchmark/src/benchmarks/schema/t_013_spacetime_sum_type/spec.rs
index a2004097ae8..ad889594128 100644
--- a/tools/xtask-llm-benchmark/src/benchmarks/schema/t_013_spacetime_sum_type/spec.rs
+++ b/tools/xtask-llm-benchmark/src/benchmarks/schema/t_013_spacetime_sum_type/spec.rs
@@ -1,4 +1,6 @@
-use crate::eval::defaults::{default_schema_parity_scorers, make_reducer_data_parity_scorer, make_sql_count_only_scorer};
+use crate::eval::defaults::{
+    default_schema_parity_scorers, make_reducer_data_parity_scorer, make_sql_count_only_scorer,
+};
 use crate::eval::{casing_for_lang, ident, BenchmarkSpec, ReducerDataParityConfig, SqlBuilder};
 use serde_json::Value;
 use std::time::Duration;
@@ -10,21 +12,20 @@ pub fn spec() -> BenchmarkSpec {
         let sb = SqlBuilder::new(casing_for_lang(lang));
         let reducer = ident("SetCircle", casing);
 
-        let select = sb.select_by_id("results", &["id","value"], "id", 1);
-        v.push(make_reducer_data_parity_scorer(host_url, ReducerDataParityConfig {
-            src_file: file!(),
-            route_tag,
-            reducer: reducer.into(),
-            args: vec![
-                Value::from(1),
-                Value::from(10),
-            ],
-            select_query: select.clone(),
-            id_str: "sum_type_row_parity",
-            collapse_ws: true,
-            timeout: Duration::from_secs(10),
-        }));
-
+        let select = sb.select_by_id("results", &["id", "value"], "id", 1);
+        v.push(make_reducer_data_parity_scorer(
+            host_url,
+            ReducerDataParityConfig {
+                src_file: file!(),
+                route_tag,
+                reducer: reducer.into(),
+                args: vec![Value::from(1), Value::from(10)],
+                select_query: select.clone(),
+                id_str: "sum_type_row_parity",
+                collapse_ws: true,
+                timeout: Duration::from_secs(10),
+            },
+        ));
 
         let count = sb.count_by_id("results", "id", 1);
         v.push(make_sql_count_only_scorer(
@@ -39,4 +40,4 @@ pub fn spec() -> BenchmarkSpec {
 
         v
     })
-}
\ No newline at end of file
+}
diff --git a/tools/xtask-llm-benchmark/src/benchmarks/schema/t_014_elementary_columns/spec.rs b/tools/xtask-llm-benchmark/src/benchmarks/schema/t_014_elementary_columns/spec.rs
index 33d2e5d97c5..64892f89d67 100644
--- a/tools/xtask-llm-benchmark/src/benchmarks/schema/t_014_elementary_columns/spec.rs
+++ b/tools/xtask-llm-benchmark/src/benchmarks/schema/t_014_elementary_columns/spec.rs
@@ -1,8 +1,9 @@
-use crate::eval::defaults::{default_schema_parity_scorers, make_reducer_data_parity_scorer, make_sql_count_only_scorer};
+use crate::eval::defaults::{
+    default_schema_parity_scorers, make_reducer_data_parity_scorer, make_sql_count_only_scorer,
+};
 use crate::eval::{casing_for_lang, ident, BenchmarkSpec, ReducerDataParityConfig, SqlBuilder};
 use std::time::Duration;
 
-
 pub fn spec() -> BenchmarkSpec {
     BenchmarkSpec::from_tasks_auto(file!(), |lang, route_tag, host_url| {
         let mut v = default_schema_parity_scorers(host_url, file!(), route_tag);
@@ -12,21 +13,24 @@ pub fn spec() -> BenchmarkSpec {
 
         let select = sb.select_by_id(
             "primitives",
-            &["id","count","total","price","ratio","active","name"],
+            &["id", "count", "total", "price", "ratio", "active", "name"],
             "id",
-            1
+            1,
         );
 
-        v.push(make_reducer_data_parity_scorer(host_url, ReducerDataParityConfig {
-            src_file: file!(),
-            route_tag,
-            reducer: reducer.into(),
-            args: vec![], // no args
-            select_query: select.clone(),
-            id_str: "elementary_columns_row_parity",
-            collapse_ws: true,
-            timeout: Duration::from_secs(10),
-        }));
+        v.push(make_reducer_data_parity_scorer(
+            host_url,
+            ReducerDataParityConfig {
+                src_file: file!(),
+                route_tag,
+                reducer: reducer.into(),
+                args: vec![], // no args
+                select_query: select.clone(),
+                id_str: "elementary_columns_row_parity",
+                collapse_ws: true,
+                timeout: Duration::from_secs(10),
+            },
+        ));
 
         let count = sb.count_by_id("primitives", "id", 1);
         v.push(make_sql_count_only_scorer(
@@ -41,4 +45,4 @@ pub fn spec() -> BenchmarkSpec {
 
         v
     })
-}
\ No newline at end of file
+}
diff --git a/tools/xtask-llm-benchmark/src/benchmarks/schema/t_015_product_type_columns/answers/rust.rs b/tools/xtask-llm-benchmark/src/benchmarks/schema/t_015_product_type_columns/answers/rust.rs
index ac59eba4069..b62327f6ccc 100644
--- a/tools/xtask-llm-benchmark/src/benchmarks/schema/t_015_product_type_columns/answers/rust.rs
+++ b/tools/xtask-llm-benchmark/src/benchmarks/schema/t_015_product_type_columns/answers/rust.rs
@@ -25,8 +25,14 @@ pub struct Profile {
 pub fn seed(ctx: &ReducerContext) {
     ctx.db.profiles().insert(Profile {
         id: 1,
-        home: Address { street: "1 Main".into(),  zip: 11111 },
-        work: Address { street: "2 Broad".into(), zip: 22222 },
-        pos:  Position { x: 7, y: 9 },
+        home: Address {
+            street: "1 Main".into(),
+            zip: 11111,
+        },
+        work: Address {
+            street: "2 Broad".into(),
+            zip: 22222,
+        },
+        pos: Position { x: 7, y: 9 },
     });
 }
diff --git a/tools/xtask-llm-benchmark/src/benchmarks/schema/t_015_product_type_columns/spec.rs b/tools/xtask-llm-benchmark/src/benchmarks/schema/t_015_product_type_columns/spec.rs
index 60f07ea1fad..605da1872f1 100644
--- a/tools/xtask-llm-benchmark/src/benchmarks/schema/t_015_product_type_columns/spec.rs
+++ b/tools/xtask-llm-benchmark/src/benchmarks/schema/t_015_product_type_columns/spec.rs
@@ -1,7 +1,5 @@
 use crate::eval::defaults::{
-    default_schema_parity_scorers,
-    make_reducer_data_parity_scorer,
-    make_sql_count_only_scorer,
+    default_schema_parity_scorers, make_reducer_data_parity_scorer, make_sql_count_only_scorer,
 };
 use crate::eval::{casing_for_lang, ident, BenchmarkSpec, ReducerDataParityConfig, SqlBuilder};
 use std::time::Duration;
@@ -14,23 +12,21 @@ pub fn spec() -> BenchmarkSpec {
         let sb = SqlBuilder::new(casing);
         let reducer = ident("Seed", casing);
 
-        let select = sb.select_by_id(
-            "profiles",
-            &["id","home","work","pos"],
-            "id",
-            1
-        );
+        let select = sb.select_by_id("profiles", &["id", "home", "work", "pos"], "id", 1);
 
-        v.push(make_reducer_data_parity_scorer(host_url, ReducerDataParityConfig {
-            src_file: file!(),
-            route_tag,
-            reducer: reducer.into(),
-            args: vec![],
-            select_query: select.clone(),
-            id_str: "product_type_columns_row_parity",
-            collapse_ws: true,
-            timeout: Duration::from_secs(10),
-        }));
+        v.push(make_reducer_data_parity_scorer(
+            host_url,
+            ReducerDataParityConfig {
+                src_file: file!(),
+                route_tag,
+                reducer: reducer.into(),
+                args: vec![],
+                select_query: select.clone(),
+                id_str: "product_type_columns_row_parity",
+                collapse_ws: true,
+                timeout: Duration::from_secs(10),
+            },
+        ));
 
         let count = sb.count_by_id("profiles", "id", 1);
         v.push(make_sql_count_only_scorer(
diff --git a/tools/xtask-llm-benchmark/src/benchmarks/schema/t_016_sum_type_columns/spec.rs b/tools/xtask-llm-benchmark/src/benchmarks/schema/t_016_sum_type_columns/spec.rs
index f67e5c70e61..b75aa9883e5 100644
--- a/tools/xtask-llm-benchmark/src/benchmarks/schema/t_016_sum_type_columns/spec.rs
+++ b/tools/xtask-llm-benchmark/src/benchmarks/schema/t_016_sum_type_columns/spec.rs
@@ -1,7 +1,5 @@
 use crate::eval::defaults::{
-    default_schema_parity_scorers,
-    make_reducer_data_parity_scorer,
-    make_sql_count_only_scorer,
+    default_schema_parity_scorers, make_reducer_data_parity_scorer, make_sql_count_only_scorer,
 };
 use crate::eval::{casing_for_lang, ident, BenchmarkSpec, ReducerDataParityConfig, SqlBuilder};
 use std::time::Duration;
@@ -13,18 +11,21 @@ pub fn spec() -> BenchmarkSpec {
         let sb = SqlBuilder::new(casing);
         let reducer = ident("Seed", casing);
 
-        let select = sb.select_by_id("drawings", &["id","a","b"], "id", 1);
+        let select = sb.select_by_id("drawings", &["id", "a", "b"], "id", 1);
 
-        v.push(make_reducer_data_parity_scorer(host_url, ReducerDataParityConfig {
-            src_file: file!(),
-            route_tag,
-            reducer: reducer.into(),
-            args: vec![],
-            select_query: select.clone(),
-            id_str: "sum_type_columns_row_parity",
-            collapse_ws: true,
-            timeout: Duration::from_secs(10),
-        }));
+        v.push(make_reducer_data_parity_scorer(
+            host_url,
+            ReducerDataParityConfig {
+                src_file: file!(),
+                route_tag,
+                reducer: reducer.into(),
+                args: vec![],
+                select_query: select.clone(),
+                id_str: "sum_type_columns_row_parity",
+                collapse_ws: true,
+                timeout: Duration::from_secs(10),
+            },
+        ));
 
         let count = sb.count_by_id("drawings", "id", 1);
         v.push(make_sql_count_only_scorer(
diff --git a/tools/xtask-llm-benchmark/src/benchmarks/schema/t_017_scheduled_columns/answers/rust.rs b/tools/xtask-llm-benchmark/src/benchmarks/schema/t_017_scheduled_columns/answers/rust.rs
index c716e1f88cc..9d81c603f88 100644
--- a/tools/xtask-llm-benchmark/src/benchmarks/schema/t_017_scheduled_columns/answers/rust.rs
+++ b/tools/xtask-llm-benchmark/src/benchmarks/schema/t_017_scheduled_columns/answers/rust.rs
@@ -10,8 +10,7 @@ pub struct TickTimer {
 }
 
 #[reducer]
-pub fn tick(_ctx: &ReducerContext, _schedule: TickTimer) {
-}
+pub fn tick(_ctx: &ReducerContext, _schedule: TickTimer) {}
 
 #[reducer(init)]
 pub fn init(ctx: &ReducerContext) {
diff --git a/tools/xtask-llm-benchmark/src/benchmarks/schema/t_017_scheduled_columns/spec.rs b/tools/xtask-llm-benchmark/src/benchmarks/schema/t_017_scheduled_columns/spec.rs
index 5d022a3ad1f..f336b0219d5 100644
--- a/tools/xtask-llm-benchmark/src/benchmarks/schema/t_017_scheduled_columns/spec.rs
+++ b/tools/xtask-llm-benchmark/src/benchmarks/schema/t_017_scheduled_columns/spec.rs
@@ -1,7 +1,4 @@
-use crate::eval::defaults::{
-    default_schema_parity_scorers,
-    make_sql_count_only_scorer,
-};
+use crate::eval::defaults::{default_schema_parity_scorers, make_sql_count_only_scorer};
 use crate::eval::{casing_for_lang, ident, BenchmarkSpec, SqlBuilder};
 use std::time::Duration;
 
diff --git a/tools/xtask-llm-benchmark/src/benchmarks/schema/t_018_constraints/answers/rust.rs b/tools/xtask-llm-benchmark/src/benchmarks/schema/t_018_constraints/answers/rust.rs
index 9eb96d0d206..879fe511441 100644
--- a/tools/xtask-llm-benchmark/src/benchmarks/schema/t_018_constraints/answers/rust.rs
+++ b/tools/xtask-llm-benchmark/src/benchmarks/schema/t_018_constraints/answers/rust.rs
@@ -14,6 +14,14 @@ pub struct Account {
 
 #[reducer]
 pub fn seed(ctx: &ReducerContext) {
-    ctx.db.accounts().insert(Account { id: 1, email: "a@example.com".into(), name: "Alice".into() });
-    ctx.db.accounts().insert(Account { id: 2, email: "b@example.com".into(), name: "Bob".into() });
+    ctx.db.accounts().insert(Account {
+        id: 1,
+        email: "a@example.com".into(),
+        name: "Alice".into(),
+    });
+    ctx.db.accounts().insert(Account {
+        id: 2,
+        email: "b@example.com".into(),
+        name: "Bob".into(),
+    });
 }
diff --git a/tools/xtask-llm-benchmark/src/benchmarks/schema/t_018_constraints/spec.rs b/tools/xtask-llm-benchmark/src/benchmarks/schema/t_018_constraints/spec.rs
index 3b9ad64342e..b1e680e31eb 100644
--- a/tools/xtask-llm-benchmark/src/benchmarks/schema/t_018_constraints/spec.rs
+++ b/tools/xtask-llm-benchmark/src/benchmarks/schema/t_018_constraints/spec.rs
@@ -1,7 +1,5 @@
 use crate::eval::defaults::{
-    default_schema_parity_scorers,
-    make_reducer_data_parity_scorer,
-    make_sql_count_only_scorer,
+    default_schema_parity_scorers, make_reducer_data_parity_scorer, make_sql_count_only_scorer,
 };
 use crate::eval::{casing_for_lang, ident, BenchmarkSpec, ReducerDataParityConfig, SqlBuilder};
 use std::time::Duration;
@@ -14,17 +12,20 @@ pub fn spec() -> BenchmarkSpec {
         let sb = SqlBuilder::new(casing);
         let reducer = ident("Seed", casing);
 
-        let select = sb.select_by_id("accounts", &["id","email","name"], "id", 1);
-        v.push(make_reducer_data_parity_scorer(host_url, ReducerDataParityConfig {
-            src_file: file!(),
-            route_tag,
-            reducer: reducer.into(),
-            args: vec![],
-            select_query: select.clone(),
-            id_str: "constraints_row_parity_after_seed",
-            collapse_ws: true,
-            timeout: Duration::from_secs(10),
-        }));
+        let select = sb.select_by_id("accounts", &["id", "email", "name"], "id", 1);
+        v.push(make_reducer_data_parity_scorer(
+            host_url,
+            ReducerDataParityConfig {
+                src_file: file!(),
+                route_tag,
+                reducer: reducer.into(),
+                args: vec![],
+                select_query: select.clone(),
+                id_str: "constraints_row_parity_after_seed",
+                collapse_ws: true,
+                timeout: Duration::from_secs(10),
+            },
+        ));
 
         let count = sb.count_by_id("accounts", "id", 2);
         v.push(make_sql_count_only_scorer(
diff --git a/tools/xtask-llm-benchmark/src/benchmarks/schema/t_019_many_to_many/answers/rust.rs b/tools/xtask-llm-benchmark/src/benchmarks/schema/t_019_many_to_many/answers/rust.rs
index 02c565db081..d9fbfc8a436 100644
--- a/tools/xtask-llm-benchmark/src/benchmarks/schema/t_019_many_to_many/answers/rust.rs
+++ b/tools/xtask-llm-benchmark/src/benchmarks/schema/t_019_many_to_many/answers/rust.rs
@@ -28,13 +28,37 @@ pub struct Membership {
 
 #[reducer]
 pub fn seed(ctx: &ReducerContext) {
-    ctx.db.users().insert(User  { user_id: 1, name: "Alice".into() });
-    ctx.db.users().insert(User  { user_id: 2, name: "Bob".into()   });
+    ctx.db.users().insert(User {
+        user_id: 1,
+        name: "Alice".into(),
+    });
+    ctx.db.users().insert(User {
+        user_id: 2,
+        name: "Bob".into(),
+    });
 
-    ctx.db.groups().insert(Group { group_id: 10, title: "Admin".into() });
-    ctx.db.groups().insert(Group { group_id: 20, title: "Dev".into()   });
+    ctx.db.groups().insert(Group {
+        group_id: 10,
+        title: "Admin".into(),
+    });
+    ctx.db.groups().insert(Group {
+        group_id: 20,
+        title: "Dev".into(),
+    });
 
-    ctx.db.memberships().insert(Membership { id: 1, user_id: 1, group_id: 10 });
-    ctx.db.memberships().insert(Membership { id: 2, user_id: 1, group_id: 20 });
-    ctx.db.memberships().insert(Membership { id: 3, user_id: 2, group_id: 20 });
+    ctx.db.memberships().insert(Membership {
+        id: 1,
+        user_id: 1,
+        group_id: 10,
+    });
+    ctx.db.memberships().insert(Membership {
+        id: 2,
+        user_id: 1,
+        group_id: 20,
+    });
+    ctx.db.memberships().insert(Membership {
+        id: 3,
+        user_id: 2,
+        group_id: 20,
+    });
 }
diff --git a/tools/xtask-llm-benchmark/src/benchmarks/schema/t_019_many_to_many/spec.rs b/tools/xtask-llm-benchmark/src/benchmarks/schema/t_019_many_to_many/spec.rs
index 791f98dace4..8016bb0828f 100644
--- a/tools/xtask-llm-benchmark/src/benchmarks/schema/t_019_many_to_many/spec.rs
+++ b/tools/xtask-llm-benchmark/src/benchmarks/schema/t_019_many_to_many/spec.rs
@@ -1,7 +1,4 @@
-use crate::eval::defaults::{
-    default_schema_parity_scorers,
-    make_reducer_sql_count_scorer, make_sql_count_only_scorer,
-};
+use crate::eval::defaults::{default_schema_parity_scorers, make_reducer_sql_count_scorer, make_sql_count_only_scorer};
 use crate::eval::{casing_for_lang, ident, BenchmarkSpec, ReducerSqlCountConfig, SqlBuilder};
 use std::time::Duration;
 
@@ -16,18 +13,19 @@ pub fn spec() -> BenchmarkSpec {
         let user_id = ident("user_id", sb.case);
         let group_id = ident("group_id", sb.case);
 
-        v.push(make_reducer_sql_count_scorer(host_url, ReducerSqlCountConfig {
-            src_file: file!(),
-            route_tag,
-            reducer: reducer_name.into(),
-            args: vec![],
-            sql_count_query: format!(
-                "SELECT COUNT(*) AS n FROM memberships WHERE {user_id}=1 AND {group_id}=10"
-            ),
-            expected_count: 1,
-            id_str: "m2m_has_1_10",
-            timeout: Duration::from_secs(10),
-        }));
+        v.push(make_reducer_sql_count_scorer(
+            host_url,
+            ReducerSqlCountConfig {
+                src_file: file!(),
+                route_tag,
+                reducer: reducer_name.into(),
+                args: vec![],
+                sql_count_query: format!("SELECT COUNT(*) AS n FROM memberships WHERE {user_id}=1 AND {group_id}=10"),
+                expected_count: 1,
+                id_str: "m2m_has_1_10",
+                timeout: Duration::from_secs(10),
+            },
+        ));
 
         v.push(make_sql_count_only_scorer(
             host_url,
diff --git a/tools/xtask-llm-benchmark/src/benchmarks/schema/t_020_ecs/spec.rs b/tools/xtask-llm-benchmark/src/benchmarks/schema/t_020_ecs/spec.rs
index 1aeeb70f065..d472dec3859 100644
--- a/tools/xtask-llm-benchmark/src/benchmarks/schema/t_020_ecs/spec.rs
+++ b/tools/xtask-llm-benchmark/src/benchmarks/schema/t_020_ecs/spec.rs
@@ -1,7 +1,4 @@
-use crate::eval::defaults::{
-    default_schema_parity_scorers,
-    make_reducer_sql_count_scorer,
-};
+use crate::eval::defaults::{default_schema_parity_scorers, make_reducer_sql_count_scorer};
 use crate::eval::{casing_for_lang, ident, BenchmarkSpec, ReducerSqlCountConfig, SqlBuilder};
 use std::time::Duration;
 pub fn spec() -> BenchmarkSpec {
@@ -9,7 +6,7 @@ pub fn spec() -> BenchmarkSpec {
         let mut v = default_schema_parity_scorers(host_url, file!(), route_tag);
 
         let case = casing_for_lang(lang);
-        let sb   = SqlBuilder::new(case);
+        let sb = SqlBuilder::new(case);
 
         let seed = ident("Seed", case);
         let step = ident("Step", case);
@@ -29,39 +26,55 @@ pub fn spec() -> BenchmarkSpec {
             timeout: Duration::from_secs(10),
         };
 
-        v.push(make_reducer_sql_count_scorer(host_url, ReducerSqlCountConfig {
-            sql_count_query: "SELECT COUNT(*) AS n FROM positions".into(),
-            expected_count: 2,
-            id_str: "ecs_seed_positions_count",
-            ..base(&seed) // or base("seed") if it's a &str
-        }));
+        v.push(make_reducer_sql_count_scorer(
+            host_url,
+            ReducerSqlCountConfig {
+                sql_count_query: "SELECT COUNT(*) AS n FROM positions".into(),
+                expected_count: 2,
+                id_str: "ecs_seed_positions_count",
+                ..base(&seed) // or base("seed") if it's a &str
+            },
+        ));
 
-        v.push(make_reducer_sql_count_scorer(host_url, ReducerSqlCountConfig {
-            sql_count_query: "SELECT COUNT(*) AS n FROM next_positions".into(),
-            expected_count: 2,
-            id_str: "ecs_step_next_positions_count",
-            ..base(&step) // or base("step")
-        }));
+        v.push(make_reducer_sql_count_scorer(
+            host_url,
+            ReducerSqlCountConfig {
+                sql_count_query: "SELECT COUNT(*) AS n FROM next_positions".into(),
+                expected_count: 2,
+                id_str: "ecs_step_next_positions_count",
+                ..base(&step) // or base("step")
+            },
+        ));
 
-        v.push(make_reducer_sql_count_scorer(host_url, ReducerSqlCountConfig {
-            sql_count_query: format!(
-                "SELECT COUNT(*) AS n FROM next_positions WHERE {eid}=1 AND {x}=1 AND {y}=0",
-                eid = entity_id, x = x, y = y
-            ),
-            expected_count: 1,
-            id_str: "ecs_next_pos_entity1",
-            ..base(&step)
-        }));
+        v.push(make_reducer_sql_count_scorer(
+            host_url,
+            ReducerSqlCountConfig {
+                sql_count_query: format!(
+                    "SELECT COUNT(*) AS n FROM next_positions WHERE {eid}=1 AND {x}=1 AND {y}=0",
+                    eid = entity_id,
+                    x = x,
+                    y = y
+                ),
+                expected_count: 1,
+                id_str: "ecs_next_pos_entity1",
+                ..base(&step)
+            },
+        ));
 
-        v.push(make_reducer_sql_count_scorer(host_url, ReducerSqlCountConfig {
-            sql_count_query: format!(
-                "SELECT COUNT(*) AS n FROM next_positions WHERE {eid}=2 AND {x}=8 AND {y}=3",
-                eid = entity_id, x = x, y = y
-            ),
-            expected_count: 1,
-            id_str: "ecs_next_pos_entity2",
-            ..base(&step)
-        }));
+        v.push(make_reducer_sql_count_scorer(
+            host_url,
+            ReducerSqlCountConfig {
+                sql_count_query: format!(
+                    "SELECT COUNT(*) AS n FROM next_positions WHERE {eid}=2 AND {x}=8 AND {y}=3",
+                    eid = entity_id,
+                    x = x,
+                    y = y
+                ),
+                expected_count: 1,
+                id_str: "ecs_next_pos_entity2",
+                ..base(&step)
+            },
+        ));
 
         v
     })
diff --git a/tools/xtask-llm-benchmark/src/benchmarks/schema/t_021_multi_column_index/answers/rust.rs b/tools/xtask-llm-benchmark/src/benchmarks/schema/t_021_multi_column_index/answers/rust.rs
index 0a8df3ded1e..4b5bb34d7a5 100644
--- a/tools/xtask-llm-benchmark/src/benchmarks/schema/t_021_multi_column_index/answers/rust.rs
+++ b/tools/xtask-llm-benchmark/src/benchmarks/schema/t_021_multi_column_index/answers/rust.rs
@@ -14,7 +14,22 @@ pub struct Log {
 
 #[reducer]
 pub fn seed(ctx: &ReducerContext) {
-    ctx.db.logs().insert(Log { id: 1, user_id: 7, day: 1, message: "a".into() });
-    ctx.db.logs().insert(Log { id: 2, user_id: 7, day: 2, message: "b".into() });
-    ctx.db.logs().insert(Log { id: 3, user_id: 9, day: 1, message: "c".into() });
+    ctx.db.logs().insert(Log {
+        id: 1,
+        user_id: 7,
+        day: 1,
+        message: "a".into(),
+    });
+    ctx.db.logs().insert(Log {
+        id: 2,
+        user_id: 7,
+        day: 2,
+        message: "b".into(),
+    });
+    ctx.db.logs().insert(Log {
+        id: 3,
+        user_id: 9,
+        day: 1,
+        message: "c".into(),
+    });
 }
diff --git a/tools/xtask-llm-benchmark/src/benchmarks/schema/t_021_multi_column_index/spec.rs b/tools/xtask-llm-benchmark/src/benchmarks/schema/t_021_multi_column_index/spec.rs
index 06e7c2df56c..75f7c8faeaf 100644
--- a/tools/xtask-llm-benchmark/src/benchmarks/schema/t_021_multi_column_index/spec.rs
+++ b/tools/xtask-llm-benchmark/src/benchmarks/schema/t_021_multi_column_index/spec.rs
@@ -1,7 +1,4 @@
-use crate::eval::defaults::{
-    default_schema_parity_scorers,
-    make_reducer_sql_count_scorer,
-};
+use crate::eval::defaults::{default_schema_parity_scorers, make_reducer_sql_count_scorer};
 use crate::eval::{casing_for_lang, ident, BenchmarkSpec, ReducerSqlCountConfig, SqlBuilder};
 use std::time::Duration;
 
@@ -10,12 +7,12 @@ pub fn spec() -> BenchmarkSpec {
         let mut v = default_schema_parity_scorers(host_url, file!(), route_tag);
 
         let case = casing_for_lang(lang);
-        let sb   = SqlBuilder::new(case);
+        let sb = SqlBuilder::new(case);
 
         let seed = ident("Seed", case);
 
         let user_id = ident("user_id", sb.case);
-        let day     = ident("day", sb.case);
+        let day = ident("day", sb.case);
 
         let base = |reducer: &str| ReducerSqlCountConfig {
             src_file: file!(),
@@ -28,32 +25,43 @@ pub fn spec() -> BenchmarkSpec {
             timeout: Duration::from_secs(10),
         };
 
-        v.push(make_reducer_sql_count_scorer(host_url, ReducerSqlCountConfig {
-            sql_count_query: "SELECT COUNT(*) AS n FROM logs".into(),
-            expected_count: 3,
-            id_str: "mcindex_seed_count",
-            ..base(&seed)
-        }));
-
-        v.push(make_reducer_sql_count_scorer(host_url, ReducerSqlCountConfig {
-            sql_count_query: format!(
-                "SELECT COUNT(*) AS n FROM logs WHERE {u}=7 AND {d}=1",
-                u = user_id, d = day
-            ),
-            expected_count: 1,
-            id_str: "mcindex_lookup_u7_d1",
-            ..base(&seed)
-        }));
-
-        v.push(make_reducer_sql_count_scorer(host_url, ReducerSqlCountConfig {
-            sql_count_query: format!(
-                "SELECT COUNT(*) AS n FROM logs WHERE {u}=7 AND {d}=2",
-                u = user_id, d = day
-            ),
-            expected_count: 1,
-            id_str: "mcindex_lookup_u7_d2",
-            ..base(&seed)
-        }));
+        v.push(make_reducer_sql_count_scorer(
+            host_url,
+            ReducerSqlCountConfig {
+                sql_count_query: "SELECT COUNT(*) AS n FROM logs".into(),
+                expected_count: 3,
+                id_str: "mcindex_seed_count",
+                ..base(&seed)
+            },
+        ));
+
+        v.push(make_reducer_sql_count_scorer(
+            host_url,
+            ReducerSqlCountConfig {
+                sql_count_query: format!(
+                    "SELECT COUNT(*) AS n FROM logs WHERE {u}=7 AND {d}=1",
+                    u = user_id,
+                    d = day
+                ),
+                expected_count: 1,
+                id_str: "mcindex_lookup_u7_d1",
+                ..base(&seed)
+            },
+        ));
+
+        v.push(make_reducer_sql_count_scorer(
+            host_url,
+            ReducerSqlCountConfig {
+                sql_count_query: format!(
+                    "SELECT COUNT(*) AS n FROM logs WHERE {u}=7 AND {d}=2",
+                    u = user_id,
+                    d = day
+                ),
+                expected_count: 1,
+                id_str: "mcindex_lookup_u7_d2",
+                ..base(&seed)
+            },
+        ));
 
         v
     })
diff --git a/tools/xtask-llm-benchmark/src/templates/rust/server/src/lib.rs b/tools/xtask-llm-benchmark/src/templates/rust/server/src/lib.rs
index e69de29bb2d..8b137891791 100644
--- a/tools/xtask-llm-benchmark/src/templates/rust/server/src/lib.rs
+++ b/tools/xtask-llm-benchmark/src/templates/rust/server/src/lib.rs
@@ -0,0 +1 @@
+