diff --git a/.server-changes/realtime-runs-subscription-scalability.md b/.server-changes/realtime-runs-subscription-scalability.md
new file mode 100644
index 00000000000..5de00aae675
--- /dev/null
+++ b/.server-changes/realtime-runs-subscription-scalability.md
@@ -0,0 +1,6 @@
+---
+area: webapp
+type: feature
+---
+
+Add a new backend for the realtime runs feed (single runs, tags, and batches) that scales under high concurrency, available behind a feature flag
diff --git a/apps/supervisor/package.json b/apps/supervisor/package.json
index 7a3537dbc04..2725fe2b729 100644
--- a/apps/supervisor/package.json
+++ b/apps/supervisor/package.json
@@ -18,7 +18,7 @@
     "@kubernetes/client-node": "^1.0.0",
     "@trigger.dev/core": "workspace:*",
     "dockerode": "^4.0.6",
-    "ioredis": "^5.3.2",
+    "ioredis": "~5.6.0",
     "p-limit": "^6.2.0",
     "prom-client": "^15.1.0",
     "socket.io": "4.7.4",
diff --git a/apps/webapp/app/entry.server.tsx b/apps/webapp/app/entry.server.tsx
index 9996eb7b30a..8cc23bff089 100644
--- a/apps/webapp/app/entry.server.tsx
+++ b/apps/webapp/app/entry.server.tsx
@@ -27,6 +27,7 @@ import {
   registerRunEngineEventBusHandlers,
   setupBatchQueueCallbacks,
 } from "./v3/runEngineHandlers.server";
+import { registerRunChangeNotifierHandlers } from "./services/realtime/runChangeNotifierHandlers.server";
 // Touch the sessions replication singleton at entry so it boots deterministically
 // on webapp startup. The singleton's initializer wires start (gated on
 // `clickhouseFactory.isReady()`) and SIGTERM/SIGINT shutdown — mirrors
@@ -269,6 +270,9 @@ process.on("uncaughtException", (error, origin) => {
 
 singleton("RunEngineEventBusHandlers", registerRunEngineEventBusHandlers);
 singleton("SetupBatchQueueCallbacks", setupBatchQueueCallbacks);
+// Attach the run-changed notifier delegations to the engine event bus.
+// No-ops (registers nothing) unless REALTIME_NOTIFIER_ENABLED=1.
+singleton("RunChangeNotifierHandlers", registerRunChangeNotifierHandlers);
 
 // Wrapped in singleton() so Remix's dev-mode CJS reloads don't append
 // duplicate copies of the processor — Sentry's processor list lives in
diff --git a/apps/webapp/app/env.server.ts b/apps/webapp/app/env.server.ts
index c55bb424001..f01e8285916 100644
--- a/apps/webapp/app/env.server.ts
+++ b/apps/webapp/app/env.server.ts
@@ -300,6 +300,47 @@ const EnvironmentSchema = z
       .int()
       .default(24 * 60 * 60 * 1000), // 1 day in milliseconds
 
+    // Master switch for the notifier-backed realtime feed.
+    // "0" (default) = the existing realtime path serves everything, publishes are
+    // no-ops, and no notifier Redis connections are opened (zero-overhead off).
+    // "1" = run-changed signals are published and the per-org `realtimeBackend`
+    // feature flag selects the backend per request.
+    REALTIME_NOTIFIER_ENABLED: z.string().default("0"),
+    // Backstop wait before a live notifier request refetches the run (ms). Matches
+    // Electric's ~20s live long-poll hold so the client polling cadence is unchanged
+    // across backends (a ±15% jitter is applied per request to avoid refetch herds).
+    REALTIME_NOTIFIER_LIVE_POLL_TIMEOUT_MS: z.coerce.number().int().default(20_000),
+    // Hard cap on the tag-list snapshot size served by the notifier feed.
+    REALTIME_NOTIFIER_MAX_LIST_RESULTS: z.coerce.number().int().default(1_000),
+    // Short-TTL coalescing cache for the multi-run (tag-list/batch) resolve+hydrate.
+    // Concurrent same-filter feeds share one ClickHouse resolve + Postgres hydrate
+    // within this window, so an env-wide wake doesn't fan out into per-feed queries.
+    // Staleness budget: a newly-matching run is visible within ~ttl + poll interval.
+    REALTIME_NOTIFIER_RUNSET_CACHE_TTL_MS: z.coerce.number().int().default(1_000),
+    REALTIME_NOTIFIER_RUNSET_CACHE_MAX_ENTRIES: z.coerce.number().int().default(5_000),
+    // Cap on the per-handle working-set cache (runId -> updatedAt) the notifier keeps
+    // for diffing multi-run live polls.
+    REALTIME_NOTIFIER_WORKING_SET_MAX_ENTRIES: z.coerce.number().int().default(10_000),
+    // Quantize the tag-list createdAt lower bound to this epoch-aligned bucket (ms) so
+    // same-tag feeds that pin their window within the same bucket share one resolve+
+    // hydrate cache entry. Floored, so the window only ever widens by < bucket. 0
+    // disables bucketing (each feed keeps its exact lower bound).
+    REALTIME_NOTIFIER_RUNSET_CREATED_AT_BUCKET_MS: z.coerce.number().int().default(60_000),
+    // Leading-edge throttle (ms) on the per-env wake channel: a busy env's run-change
+    // firehose is collapsed to at most one feed-wake per window, decoupling wake load
+    // from run throughput. Lossless because consumers refetch current state on a wake.
+    // 0 disables coalescing (every change wakes immediately).
+    REALTIME_NOTIFIER_ENV_WAKE_COALESCE_WINDOW_MS: z.coerce.number().int().default(100),
+    // When "1", a multi-run live poll woken by a change irrelevant to its filter keeps
+    // holding the long-poll (re-resolving cheaply) instead of returning an empty
+    // up-to-date the client would immediately re-issue. "0" reverts to per-wake replies.
+    REALTIME_NOTIFIER_HOLD_ON_EMPTY: z.string().default("1"),
+    // Max concurrent fresh ClickHouse resolves (cache misses) per instance. Caps the
+    // distinct-filter reconnect stampede: a mass reconnect of N feeds on N different filters
+    // queues to this many concurrent CH queries instead of firing all N at once. Same-filter
+    // bursts collapse via the single-flight cache before taking a permit. 0 disables the gate.
+    REALTIME_NOTIFIER_RESOLVE_ADMISSION_LIMIT: z.coerce.number().int().default(16),
+
     PUBSUB_REDIS_HOST: z
       .string()
       .optional()
@@ -332,6 +373,41 @@ const EnvironmentSchema = z
     PUBSUB_REDIS_TLS_DISABLED: z.string().default(process.env.REDIS_TLS_DISABLED ?? "false"),
     PUBSUB_REDIS_CLUSTER_MODE_ENABLED: z.string().default("0"),
 
+    // Dedicated pub/sub Redis for the realtime runs feed's run-changed notifier, so
+    // its publish/subscribe traffic can run on its own instance. Each value falls
+    // back to the shared PUBSUB_REDIS_* (then REDIS_*) when unset, so the default is
+    // unchanged until explicitly pointed at a dedicated instance.
+    REALTIME_RUNS_PUBSUB_REDIS_HOST: z
+      .string()
+      .optional()
+      .transform((v) => v ?? process.env.PUBSUB_REDIS_HOST ?? process.env.REDIS_HOST),
+    REALTIME_RUNS_PUBSUB_REDIS_PORT: z.coerce
+      .number()
+      .optional()
+      .transform((v) => {
+        if (v !== undefined) return v;
+        const raw = process.env.PUBSUB_REDIS_PORT ?? process.env.REDIS_PORT;
+        return raw ? parseInt(raw) : undefined;
+      }),
+    REALTIME_RUNS_PUBSUB_REDIS_USERNAME: z
+      .string()
+      .optional()
+      .transform((v) => v ?? process.env.PUBSUB_REDIS_USERNAME ?? process.env.REDIS_USERNAME),
+    REALTIME_RUNS_PUBSUB_REDIS_PASSWORD: z
+      .string()
+      .optional()
+      .transform((v) => v ?? process.env.PUBSUB_REDIS_PASSWORD ?? process.env.REDIS_PASSWORD),
+    REALTIME_RUNS_PUBSUB_REDIS_TLS_DISABLED: z
+      .string()
+      .default(process.env.PUBSUB_REDIS_TLS_DISABLED ?? process.env.REDIS_TLS_DISABLED ?? "false"),
+    REALTIME_RUNS_PUBSUB_REDIS_CLUSTER_MODE_ENABLED: z
+      .string()
+      .default(process.env.PUBSUB_REDIS_CLUSTER_MODE_ENABLED ?? "0"),
+    // Use sharded pub/sub (SSUBSCRIBE/SPUBLISH) when in cluster mode, so a busy env's
+    // traffic stays on one shard instead of broadcasting to every node. Only takes
+    // effect alongside CLUSTER_MODE_ENABLED. "0" forces classic pub/sub on the cluster.
+    REALTIME_RUNS_PUBSUB_REDIS_SHARDED_ENABLED: z.string().default("1"),
+
     DEFAULT_ENV_EXECUTION_CONCURRENCY_LIMIT: z.coerce.number().int().default(100),
     DEFAULT_ENV_EXECUTION_CONCURRENCY_BURST_FACTOR: z.coerce.number().default(1.0),
     DEFAULT_ORG_EXECUTION_CONCURRENCY_LIMIT: z.coerce.number().int().default(300),
@@ -1608,6 +1684,20 @@ const EnvironmentSchema = z
       .enum(["log", "error", "warn", "info", "debug"])
       .default("info"),
     RUN_ENGINE_CLICKHOUSE_COMPRESSION_REQUEST: z.string().default("1"),
+    // ClickHouse client used by the realtime runs feed for tag/batch id resolution.
+    // Kept on its own URL + pool so the feed's reads can't contend with the main
+    // analytics client (CLICKHOUSE_URL). Falls back to the main URL when unset.
+    REALTIME_RUNS_CLICKHOUSE_URL: z
+      .string()
+      .optional()
+      .transform((v) => v ?? process.env.CLICKHOUSE_URL),
+    REALTIME_RUNS_CLICKHOUSE_KEEP_ALIVE_ENABLED: z.string().default("1"),
+    REALTIME_RUNS_CLICKHOUSE_KEEP_ALIVE_IDLE_SOCKET_TTL_MS: z.coerce.number().int().optional(),
+    REALTIME_RUNS_CLICKHOUSE_MAX_OPEN_CONNECTIONS: z.coerce.number().int().default(10),
+    REALTIME_RUNS_CLICKHOUSE_LOG_LEVEL: z
+      .enum(["log", "error", "warn", "info", "debug"])
+      .default("info"),
+    REALTIME_RUNS_CLICKHOUSE_COMPRESSION_REQUEST: z.string().default("1"),
     EVENTS_CLICKHOUSE_BATCH_SIZE: z.coerce.number().int().default(1000),
     EVENTS_CLICKHOUSE_FLUSH_INTERVAL_MS: z.coerce.number().int().default(1000),
     METRICS_CLICKHOUSE_BATCH_SIZE: z.coerce.number().int().default(10000),
diff --git a/apps/webapp/app/models/runtimeEnvironment.server.ts b/apps/webapp/app/models/runtimeEnvironment.server.ts
index 64b1da3be49..be05adaa8a7 100644
--- a/apps/webapp/app/models/runtimeEnvironment.server.ts
+++ b/apps/webapp/app/models/runtimeEnvironment.server.ts
@@ -237,10 +237,20 @@ export async function findEnvironmentBySlug(
   return environment ? toAuthenticated(environment) : null;
 }
 
+// The authenticated environment plus the run scalars the realtime publish needs.
+// Both come from one taskRun read — see findEnvironmentFromRun.
+export type EnvironmentFromRun = {
+  environment: AuthenticatedEnvironment;
+  runTags: string[];
+  batchId: string | null;
+};
+
 export async function findEnvironmentFromRun(
   runId: string,
   tx?: PrismaClientOrTransaction
-): Promise<AuthenticatedEnvironment | null> {
+): Promise<EnvironmentFromRun | null> {
+  // The include (no select) already pulls every taskRun scalar, so runTags/batchId
+  // ride along for free — no extra query for the realtime publish to send a full record.
   const taskRun = await (tx ?? $replica).taskRun.findFirst({
     where: {
       id: runId,
@@ -249,7 +259,14 @@ export async function findEnvironmentFromRun(
       runtimeEnvironment: { include: authIncludeBase },
     },
   });
-  return taskRun?.runtimeEnvironment ? toAuthenticated(taskRun.runtimeEnvironment) : null;
+  if (!taskRun?.runtimeEnvironment) {
+    return null;
+  }
+  return {
+    environment: toAuthenticated(taskRun.runtimeEnvironment),
+    runTags: taskRun.runTags,
+    batchId: taskRun.batchId,
+  };
 }
 
 export async function createNewSession(
diff --git a/apps/webapp/app/routes/api.v1.runs.$runId.metadata.ts b/apps/webapp/app/routes/api.v1.runs.$runId.metadata.ts
index ceae1efb4b4..c88009a84a4 100644
--- a/apps/webapp/app/routes/api.v1.runs.$runId.metadata.ts
+++ b/apps/webapp/app/routes/api.v1.runs.$runId.metadata.ts
@@ -12,6 +12,7 @@ import type { AuthenticatedEnvironment } from "~/services/apiAuth.server";
 import { authenticateApiRequest } from "~/services/apiAuth.server";
 import { logger } from "~/services/logger.server";
 import { updateMetadataService } from "~/services/metadata/updateMetadataInstance.server";
+import { publishChangeRecord } from "~/services/realtime/runChangeNotifierInstance.server";
 import { createActionApiRoute } from "~/services/routeBuilders/apiBuilder.server";
 import { ServiceValidationError } from "~/v3/services/common.server";
 import { applyMetadataMutationToBufferedRun } from "~/v3/mollifier/applyMetadataMutation.server";
@@ -184,7 +185,10 @@ const { action } = createActionApiRoute(
       return json({ error: "Internal Server Error" }, { status: 500 });
     }
     if (pgResult) {
-      return json(pgResult, { status: 200 });
+      // Reflect metadata.set() on a live feed before the next lifecycle event. Publish the
+      // internal id (the router keys single-run feeds by it, not the friendly id from the URL).
+      publishChangeRecord({ runId: pgResult.runId, envId: env.id, batchId: pgResult.batchId });
+      return json({ metadata: pgResult.metadata }, { status: 200 });
     }
 
     // PG miss. Target run is either buffered or genuinely absent.
diff --git a/apps/webapp/app/routes/api.v1.runs.$runId.tags.ts b/apps/webapp/app/routes/api.v1.runs.$runId.tags.ts
index ef7f3180bf3..c8fa5ea37d2 100644
--- a/apps/webapp/app/routes/api.v1.runs.$runId.tags.ts
+++ b/apps/webapp/app/routes/api.v1.runs.$runId.tags.ts
@@ -7,6 +7,7 @@ import { MAX_TAGS_PER_RUN } from "~/models/taskRunTag.server";
 import { authenticateApiRequest } from "~/services/apiAuth.server";
 import { getRequestAbortSignal } from "~/services/httpAsyncStorage.server";
 import { logger } from "~/services/logger.server";
+import { publishChangeRecord } from "~/services/realtime/runChangeNotifierInstance.server";
 import { mutateWithFallback } from "~/v3/mollifier/mutateWithFallback.server";
 
 // Pull the existing tags out of a buffer entry's serialised payload so
@@ -90,6 +91,13 @@ export async function action({ request, params }: ActionFunctionArgs) {
           },
           data: { runTags: { push: newTags } },
         });
+        // Publish a run-changed record with the NEW tag set so tag feeds reindex
+        // (no-op unless enabled).
+        publishChangeRecord({
+          runId: taskRun.id,
+          envId: env.id,
+          tags: existing.concat(newTags),
+        });
         return json({ message: `Successfully set ${newTags.length} new tags.` }, { status: 200 });
       },
       // Buffer-applied patch path. The mutateSnapshot Lua deduplicates
diff --git a/apps/webapp/app/routes/realtime.v1.batches.$batchId.ts b/apps/webapp/app/routes/realtime.v1.batches.$batchId.ts
index 2b8fb106681..973cd5f96cd 100644
--- a/apps/webapp/app/routes/realtime.v1.batches.$batchId.ts
+++ b/apps/webapp/app/routes/realtime.v1.batches.$batchId.ts
@@ -1,7 +1,7 @@
 import { z } from "zod";
 import { $replica } from "~/db.server";
 import { getRequestAbortSignal } from "~/services/httpAsyncStorage.server";
-import { realtimeClient } from "~/services/realtimeClientGlobal.server";
+import { resolveRealtimeStreamClient } from "~/services/realtime/resolveRealtimeStreamClient.server";
 import { anyResource, createLoaderApiRoute } from "~/services/routeBuilders/apiBuilder.server";
 
 const ParamsSchema = z.object({
@@ -33,7 +33,11 @@ export const loader = createLoaderApiRoute(
     },
   },
   async ({ authentication, request, resource: batchRun, apiVersion }) => {
-    return realtimeClient.streamBatch(
+    // Pick the Electric proxy or the notifier-backed batch feed
+    // per org (defaults to Electric). Both implement streamBatch.
+    const client = await resolveRealtimeStreamClient(authentication.environment);
+
+    return client.streamBatch(
       request.url,
       authentication.environment,
       batchRun.id,
diff --git a/apps/webapp/app/routes/realtime.v1.runs.$runId.ts b/apps/webapp/app/routes/realtime.v1.runs.$runId.ts
index e03787c6200..3e224ddedf2 100644
--- a/apps/webapp/app/routes/realtime.v1.runs.$runId.ts
+++ b/apps/webapp/app/routes/realtime.v1.runs.$runId.ts
@@ -2,7 +2,7 @@ import { json } from "@remix-run/server-runtime";
 import { z } from "zod";
 import { $replica } from "~/db.server";
 import { getRequestAbortSignal } from "~/services/httpAsyncStorage.server";
-import { realtimeClient } from "~/services/realtimeClientGlobal.server";
+import { resolveRealtimeStreamClient } from "~/services/realtime/resolveRealtimeStreamClient.server";
 import {
   anyResource,
   createLoaderApiRoute,
@@ -48,7 +48,12 @@ export const loader = createLoaderApiRoute(
     },
   },
   async ({ authentication, request, resource: run, apiVersion }) => {
-    return realtimeClient.streamRun(
+    // Pick the Electric proxy or the notifier-backed shim per org (defaults to
+    // Electric; controlled by REALTIME_NOTIFIER_ENABLED + the realtimeBackend
+    // feature flag). Both implement the same streamRun contract.
+    const client = await resolveRealtimeStreamClient(authentication.environment);
+
+    return client.streamRun(
       request.url,
       authentication.environment,
       run.id,
diff --git a/apps/webapp/app/routes/realtime.v1.runs.ts b/apps/webapp/app/routes/realtime.v1.runs.ts
index b04c2d55bbc..436f4ef48d8 100644
--- a/apps/webapp/app/routes/realtime.v1.runs.ts
+++ b/apps/webapp/app/routes/realtime.v1.runs.ts
@@ -1,6 +1,6 @@
 import { z } from "zod";
 import { getRequestAbortSignal } from "~/services/httpAsyncStorage.server";
-import { realtimeClient } from "~/services/realtimeClientGlobal.server";
+import { resolveRealtimeStreamClient } from "~/services/realtime/resolveRealtimeStreamClient.server";
 import {
   anyResource,
   createLoaderApiRoute,
@@ -39,7 +39,11 @@ export const loader = createLoaderApiRoute(
     },
   },
   async ({ searchParams, authentication, request, apiVersion }) => {
-    return realtimeClient.streamRuns(
+    // Pick the Electric proxy or the notifier-backed tag-list feed per org
+    // (defaults to Electric). Both implement streamRuns.
+    const client = await resolveRealtimeStreamClient(authentication.environment);
+
+    return client.streamRuns(
       request.url,
       authentication.environment,
       searchParams,
diff --git a/apps/webapp/app/services/clickhouse/clickhouseFactory.server.ts b/apps/webapp/app/services/clickhouse/clickhouseFactory.server.ts
index fb7f384fd27..c563621408c 100644
--- a/apps/webapp/app/services/clickhouse/clickhouseFactory.server.ts
+++ b/apps/webapp/app/services/clickhouse/clickhouseFactory.server.ts
@@ -211,6 +211,36 @@ function initializeRunEngineClickhouseClient(): ClickHouse {
   });
 }
 
+/** Realtime runs feed tag/batch id resolution (`REALTIME_RUNS_CLICKHOUSE_URL`);
+ *  falls back to the default client if unset. */
+const defaultRealtimeClickhouseClient = singleton(
+  "realtimeClickhouseClient",
+  initializeRealtimeClickhouseClient
+);
+
+function initializeRealtimeClickhouseClient(): ClickHouse {
+  if (!env.REALTIME_RUNS_CLICKHOUSE_URL) {
+    return defaultClickhouseClient;
+  }
+
+  const url = new URL(env.REALTIME_RUNS_CLICKHOUSE_URL);
+  url.searchParams.delete("secure");
+
+  return new ClickHouse({
+    url: url.toString(),
+    name: "realtime-runs-clickhouse",
+    keepAlive: {
+      enabled: env.REALTIME_RUNS_CLICKHOUSE_KEEP_ALIVE_ENABLED === "1",
+      idleSocketTtl: env.REALTIME_RUNS_CLICKHOUSE_KEEP_ALIVE_IDLE_SOCKET_TTL_MS,
+    },
+    logLevel: env.REALTIME_RUNS_CLICKHOUSE_LOG_LEVEL,
+    compression: {
+      request: env.REALTIME_RUNS_CLICKHOUSE_COMPRESSION_REQUEST === "1",
+    },
+    maxOpenConnections: env.REALTIME_RUNS_CLICKHOUSE_MAX_OPEN_CONNECTIONS,
+  });
+}
+
 /** Task events (`EVENTS_CLICKHOUSE_URL`); not exported — accessed via factory. */
 const defaultEventsClickhouseClient = singleton(
   "eventsClickhouseClient",
@@ -257,7 +287,8 @@ export type ClientType =
   | "logs"
   | "query"
   | "admin"
-  | "engine";
+  | "engine"
+  | "realtime";
 
 function buildOrgClickhouseClient(url: string, clientType: ClientType): ClickHouse {
   const parsed = new URL(url);
@@ -330,6 +361,20 @@ function buildOrgClickhouseClient(url: string, clientType: ClientType): ClickHou
         },
         maxOpenConnections: env.RUN_ENGINE_CLICKHOUSE_MAX_OPEN_CONNECTIONS,
       });
+    case "realtime":
+      return new ClickHouse({
+        url: parsed.toString(),
+        name,
+        keepAlive: {
+          enabled: env.REALTIME_RUNS_CLICKHOUSE_KEEP_ALIVE_ENABLED === "1",
+          idleSocketTtl: env.REALTIME_RUNS_CLICKHOUSE_KEEP_ALIVE_IDLE_SOCKET_TTL_MS,
+        },
+        logLevel: env.REALTIME_RUNS_CLICKHOUSE_LOG_LEVEL,
+        compression: {
+          request: env.REALTIME_RUNS_CLICKHOUSE_COMPRESSION_REQUEST === "1",
+        },
+        maxOpenConnections: env.REALTIME_RUNS_CLICKHOUSE_MAX_OPEN_CONNECTIONS,
+      });
     case "standard":
     case "query":
     case "admin":
@@ -398,6 +443,8 @@ export class ClickhouseFactory {
           return defaultAdminClickhouseClient;
         case "engine":
           return defaultRunEngineClickhouseClient;
+        case "realtime":
+          return defaultRealtimeClickhouseClient;
       }
     }
 
diff --git a/apps/webapp/app/services/metadata/updateMetadata.server.ts b/apps/webapp/app/services/metadata/updateMetadata.server.ts
index cfb946a1024..6422e3c5666 100644
--- a/apps/webapp/app/services/metadata/updateMetadata.server.ts
+++ b/apps/webapp/app/services/metadata/updateMetadata.server.ts
@@ -308,6 +308,7 @@ export class UpdateMetadataService {
           },
       select: {
         id: true,
+        batchId: true,
         completedAt: true,
         status: true,
         metadata: true,
@@ -355,6 +356,9 @@ export class UpdateMetadataService {
 
     return {
       metadata: newMetadata,
+      // Internal id + batchId, so callers can publish realtime records keyed how the router indexes feeds.
+      runId: taskRun.id,
+      batchId: taskRun.batchId,
     };
   }
 
diff --git a/apps/webapp/app/services/realtime/boundedTtlCache.ts b/apps/webapp/app/services/realtime/boundedTtlCache.ts
new file mode 100644
index 00000000000..8efcde55609
--- /dev/null
+++ b/apps/webapp/app/services/realtime/boundedTtlCache.ts
@@ -0,0 +1,59 @@
+/**
+ * Tiny in-process bounded TTL cache shared by the realtime feeds.
+ *
+ * Entries expire after `ttlMs`. An expired entry is evicted when read (`get`); on
+ * write, if the cache is at `maxEntries`, expired entries are swept and, if it's
+ * still full (pathologically all live), the oldest insertion is dropped. Node is
+ * single-threaded so no locking is needed. Used where a miss is cheap and
+ * correctness-safe (read-through hydration, per-handle working sets, per-org flag
+ * resolution).
+ *
+ * A stored value of `undefined` cannot be distinguished from a miss; callers that
+ * need to cache "absence" should store an explicit sentinel (e.g. `null`).
+ */
+export class BoundedTtlCache<V> {
+  readonly #entries = new Map<string, { value: V; expiresAt: number }>();
+
+  constructor(
+    private readonly ttlMs: number,
+    private readonly maxEntries: number
+  ) {}
+
+  get(key: string): V | undefined {
+    const entry = this.#entries.get(key);
+    if (!entry) {
+      return undefined;
+    }
+    if (entry.expiresAt > Date.now()) {
+      return entry.value;
+    }
+    // Evict on read so expired entries don't linger until the next at-capacity
+    // sweep — important for read-heavy / low-churn caches (per-handle working sets).
+    this.#entries.delete(key);
+    return undefined;
+  }
+
+  set(key: string, value: V): void {
+    // Only run capacity eviction when inserting a NEW key — updating an existing key
+    // doesn't grow the map, so it must never drop an unrelated live entry.
+    if (!this.#entries.has(key) && this.#entries.size >= this.maxEntries) {
+      const now = Date.now();
+      for (const [key, entry] of this.#entries) {
+        if (entry.expiresAt <= now) {
+          this.#entries.delete(key);
+        }
+      }
+      if (this.#entries.size >= this.maxEntries) {
+        const oldest = this.#entries.keys().next().value;
+        if (oldest !== undefined) {
+          this.#entries.delete(oldest);
+        }
+      }
+    }
+    this.#entries.set(key, { value, expiresAt: Date.now() + this.ttlMs });
+  }
+
+  get size(): number {
+    return this.#entries.size;
+  }
+}
diff --git a/apps/webapp/app/services/realtime/clickHouseRunListResolver.server.ts b/apps/webapp/app/services/realtime/clickHouseRunListResolver.server.ts
new file mode 100644
index 00000000000..003646bb74a
--- /dev/null
+++ b/apps/webapp/app/services/realtime/clickHouseRunListResolver.server.ts
@@ -0,0 +1,43 @@
+import { type ClickHouse } from "@internal/clickhouse";
+import { type PrismaClientOrTransaction } from "~/db.server";
+import { RunsRepository } from "~/services/runsRepository/runsRepository.server";
+import { type RunListFilter, type RunListResolver } from "./runReader.server";
+
+export type ClickHouseRunListResolverOptions = {
+  /** Resolves the per-organization ClickHouse client (multi-tenant routing). */
+  getClickhouse: (organizationId: string) => Promise<ClickHouse>;
+  prisma: PrismaClientOrTransaction;
+};
+
+/**
+ * Resolves the realtime tag/list filter into matching run ids via ClickHouse
+ * `listRunIds`. Tag matching is contains-ANY (OR), the same
+ * semantics the dashboard runs list uses. Filter-only: ids only, hydrated from
+ * Postgres by id afterward. This keeps the realtime tag feed off the Postgres
+ * `runTags` GIN index entirely.
+ *
+ * (Multi-tag subscribeToRunsWithTag is therefore OR, not the AND that Electric's
+ * `runTags @> ARRAY[...]` shape used. Restoring AND is a follow-up: add a
+ * `hasAll` mode to the ClickHouse runs filter and use it here.)
+ */
+export class ClickHouseRunListResolver implements RunListResolver {
+  constructor(private readonly options: ClickHouseRunListResolverOptions) {}
+
+  async resolveMatchingRunIds(filter: RunListFilter): Promise<string[]> {
+    const clickhouse = await this.options.getClickhouse(filter.organizationId);
+    const repository = new RunsRepository({ clickhouse, prisma: this.options.prisma });
+
+    const { runIds } = await repository.listRunIds({
+      organizationId: filter.organizationId,
+      projectId: filter.projectId,
+      environmentId: filter.environmentId,
+      tags: filter.tags && filter.tags.length > 0 ? filter.tags : undefined,
+      batchId: filter.batchId,
+      from: filter.createdAtAfter?.getTime(),
+      page: { size: filter.limit },
+    });
+
+    // listRunIds is keyset-paginated; runIds is already capped to page.size (= limit).
+    return runIds;
+  }
+}
diff --git a/apps/webapp/app/services/realtime/electricStreamProtocol.server.ts b/apps/webapp/app/services/realtime/electricStreamProtocol.server.ts
new file mode 100644
index 00000000000..6a276bcb03d
--- /dev/null
+++ b/apps/webapp/app/services/realtime/electricStreamProtocol.server.ts
@@ -0,0 +1,321 @@
+/**
+ * Electric HTTP shape-stream wire protocol serializer for the single-run feed.
+ *
+ * This re-emits the exact wire shape that the deployed `@electric-sql/client`
+ * (1.0.14 modern + 0.4.0 legacy) and the SDK's `SubscribeRunRawShape` expect,
+ * so the notifier-backed realtime feed stays byte-faithful to what those clients
+ * already expect.
+ *
+ * The module is intentionally pure: no DB, Redis, or env access, so the wire
+ * contract can be unit-tested by round-tripping through the real client parser
+ * + the SDK schema. Header rewrites, tokens, and transport live in the client.
+ *
+ * Wire facts this encodes (verified against @electric-sql/client@1.0.14):
+ *  - Response body is a JSON array of messages; an empty body is treated as `[]`.
+ *  - Each column value is wire-encoded as a STRING (or null); the client decodes
+ *    it back using the per-column `electric-schema` header. Columns absent from
+ *    the schema are passed through unparsed (so text/timestamp stay strings).
+ *  - `up-to-date` is the only control message that makes the client emit rows.
+ *  - Re-sending the full row each cycle is idempotent: the client merges by `key`.
+ */
+
+export type ElectricColumnType =
+  | "text"
+  | "timestamp"
+  | "int4"
+  | "int8"
+  | "float8"
+  | "bool"
+  | "jsonb";
+
+type ElectricColumn = {
+  name: string;
+  type: ElectricColumnType;
+  /** Array dimensionality. 1 => `type[]` (Postgres `{a,b}` literal). */
+  dims?: number;
+  /**
+   * Array columns only. True when the Postgres column has NO default, so an
+   * empty/absent value is stored as SQL NULL (Electric emits `null`) rather than
+   * an empty-array literal `{}`. Prisma erases this distinction — it coerces both
+   * NULL and `{}` to `[]` on read — so we re-derive the wire form from the column's
+   * known schema. `runTags` has no default; `realtimeStreams` has `@default([])`.
+   */
+  emptyArrayAsNull?: boolean;
+};
+
+/**
+ * The columns the realtime run feed exposes, mirroring `DEFAULT_ELECTRIC_COLUMNS`
+ * in `realtimeClient.server.ts` and their Postgres types from the `TaskRun`
+ * Prisma model. The `type`/`dims` drive both the `electric-schema` header and
+ * the value encoding. Keep in sync with `DEFAULT_ELECTRIC_COLUMNS`.
+ */
+export const RUN_ELECTRIC_COLUMNS: ReadonlyArray<ElectricColumn> = [
+  { name: "id", type: "text" },
+  { name: "taskIdentifier", type: "text" },
+  { name: "createdAt", type: "timestamp" },
+  { name: "updatedAt", type: "timestamp" },
+  { name: "startedAt", type: "timestamp" },
+  { name: "delayUntil", type: "timestamp" },
+  { name: "queuedAt", type: "timestamp" },
+  { name: "expiredAt", type: "timestamp" },
+  { name: "completedAt", type: "timestamp" },
+  { name: "friendlyId", type: "text" },
+  { name: "number", type: "int4" },
+  { name: "isTest", type: "bool" },
+  { name: "status", type: "text" },
+  { name: "usageDurationMs", type: "int4" },
+  { name: "costInCents", type: "float8" },
+  { name: "baseCostInCents", type: "float8" },
+  { name: "ttl", type: "text" },
+  { name: "payload", type: "text" },
+  { name: "payloadType", type: "text" },
+  { name: "metadata", type: "text" },
+  { name: "metadataType", type: "text" },
+  { name: "output", type: "text" },
+  { name: "outputType", type: "text" },
+  { name: "runTags", type: "text", dims: 1, emptyArrayAsNull: true },
+  { name: "error", type: "jsonb" },
+  { name: "realtimeStreams", type: "text", dims: 1 },
+];
+
+/** Columns that can never be skipped via `skipColumns` (mirrors realtimeClient). */
+export const RESERVED_COLUMNS = ["id", "taskIdentifier", "friendlyId", "status", "createdAt"];
+
+/**
+ * Shape of a single run hydrated for the realtime feed. Structurally compatible
+ * with the Prisma `TaskRun` projection produced by `RunHydrator`.
+ */
+export type RealtimeRunRow = {
+  id: string;
+  taskIdentifier: string;
+  createdAt: Date;
+  updatedAt: Date;
+  startedAt: Date | null;
+  delayUntil: Date | null;
+  queuedAt: Date | null;
+  expiredAt: Date | null;
+  completedAt: Date | null;
+  friendlyId: string;
+  number: number;
+  isTest: boolean;
+  status: string;
+  usageDurationMs: number;
+  costInCents: number;
+  baseCostInCents: number;
+  ttl: string | null;
+  payload: string;
+  payloadType: string;
+  metadata: string | null;
+  metadataType: string;
+  output: string | null;
+  outputType: string;
+  runTags: string[];
+  error: unknown;
+  realtimeStreams: string[];
+};
+
+type Operation = "insert" | "update" | "delete";
+
+type ChangeMessage = {
+  key: string;
+  value: Record<string, string | null>;
+  headers: { operation: Operation };
+};
+
+type ControlMessage = {
+  headers: { control: "up-to-date" | "must-refetch" };
+};
+
+type ShapeMessage = ChangeMessage | ControlMessage;
+
+const UP_TO_DATE: ControlMessage = { headers: { control: "up-to-date" } };
+
+function effectiveSkipColumns(skipColumns: string[]): Set<string> {
+  return new Set(skipColumns.filter((c) => c !== "" && !RESERVED_COLUMNS.includes(c)));
+}
+
+function quoteArrayElement(value: string): string {
+  return `"${value.replace(/\\/g, "\\\\").replace(/"/g, '\\"')}"`;
+}
+
+function pgArrayLiteral(values: unknown[]): string {
+  if (values.length === 0) {
+    return "{}";
+  }
+  return `{${values.map((v) => quoteArrayElement(String(v))).join(",")}}`;
+}
+
+function serializeValue(value: unknown, column: ElectricColumn): string | null {
+  if (value === null || value === undefined) {
+    return null;
+  }
+
+  if (column.dims && column.dims > 0) {
+    if (!Array.isArray(value)) {
+      return null;
+    }
+    // A no-default array column stores NULL when empty, so Electric emits `null`
+    // (not `{}`); match that here since Prisma handed us `[]` for the NULL value.
+    if (value.length === 0 && column.emptyArrayAsNull) {
+      return null;
+    }
+    return pgArrayLiteral(value);
+  }
+
+  switch (column.type) {
+    case "bool":
+      // Postgres text representation; the client's parseBool accepts "t"/"f".
+      return value ? "t" : "f";
+    case "timestamp":
+      // The SDK's RawShapeDate appends "Z" before parsing, so we emit the ISO
+      // string WITHOUT the trailing "Z".
+      return value instanceof Date ? value.toISOString().slice(0, -1) : String(value);
+    case "jsonb":
+      return JSON.stringify(value);
+    case "int4":
+    case "int8":
+    case "float8":
+    case "text":
+    default:
+      return String(value);
+  }
+}
+
+/** The merge key the client uses to reassemble a row across insert/update cycles. */
+export function runShapeKey(runId: string): string {
+  return `"public"."TaskRun"/"${runId}"`;
+}
+
+/** Encode a single run row into the wire `value` object (column -> string|null). */
+export function serializeRunRow(
+  row: RealtimeRunRow,
+  skipColumns: string[] = []
+): Record<string, string | null> {
+  const skip = effectiveSkipColumns(skipColumns);
+  const value: Record<string, string | null> = {};
+
+  for (const column of RUN_ELECTRIC_COLUMNS) {
+    if (skip.has(column.name)) {
+      continue;
+    }
+    value[column.name] = serializeValue((row as Record<string, unknown>)[column.name], column);
+  }
+
+  return value;
+}
+
+/** The `electric-schema` response header value for the (optionally trimmed) column set. */
+export function buildElectricSchemaHeader(skipColumns: string[] = []): string {
+  const skip = effectiveSkipColumns(skipColumns);
+  const schema: Record<string, { type: string; dims?: number }> = {};
+
+  for (const column of RUN_ELECTRIC_COLUMNS) {
+    if (skip.has(column.name)) {
+      continue;
+    }
+    schema[column.name] = column.dims ? { type: column.type, dims: column.dims } : { type: column.type };
+  }
+
+  return JSON.stringify(schema);
+}
+
+/**
+ * Initial snapshot body: a single `insert` for the row (if it exists) followed by
+ * `up-to-date`. An absent row emits a bare `up-to-date` (an empty shape), which is
+ * how Electric represents "no rows match".
+ */
+export function buildSnapshotBody(row: RealtimeRunRow | null, skipColumns: string[] = []): string {
+  const messages: ShapeMessage[] = [];
+  if (row) {
+    messages.push({
+      key: runShapeKey(row.id),
+      value: serializeRunRow(row, skipColumns),
+      headers: { operation: "insert" },
+    });
+  }
+  messages.push(UP_TO_DATE);
+  return JSON.stringify(messages);
+}
+
+/** Live body when the row advanced: a full-row `update` followed by `up-to-date`. */
+export function buildUpdateBody(row: RealtimeRunRow, skipColumns: string[] = []): string {
+  const messages: ShapeMessage[] = [
+    {
+      key: runShapeKey(row.id),
+      value: serializeRunRow(row, skipColumns),
+      headers: { operation: "update" },
+    },
+    UP_TO_DATE,
+  ];
+  return JSON.stringify(messages);
+}
+
+/** Live body when nothing advanced: a bare `up-to-date` (no row emission). */
+export function buildUpToDateBody(): string {
+  return JSON.stringify([UP_TO_DATE]);
+}
+
+export type RowChange = { row: RealtimeRunRow; operation: "insert" | "update" };
+
+/**
+ * Multi-row body for the tag-list feed: one change message per row (insert for
+ * rows new to the shape, update for rows that advanced) followed by `up-to-date`.
+ * An empty `changes` array emits a bare `up-to-date`. The client merges every row
+ * by key, so re-emitting a full row is idempotent.
+ */
+export function buildRowsBody(changes: RowChange[], skipColumns: string[] = []): string {
+  const messages: ShapeMessage[] = changes.map((change) => ({
+    key: runShapeKey(change.row.id),
+    value: serializeRunRow(change.row, skipColumns),
+    headers: { operation: change.operation },
+  }));
+  messages.push(UP_TO_DATE);
+  return JSON.stringify(messages);
+}
+
+/** A row change whose wire `value` was already serialized (once, shared across feeds by
+ * the EnvChangeRouter); the per-feed `operation` is applied here. */
+export type SerializedRowChange = {
+  runId: string;
+  value: Record<string, string | null>;
+  operation: "insert" | "update";
+};
+
+/** Like `buildRowsBody`, but from values serialized once per (runId, columnSet) upstream,
+ * so a run matching many feeds is serialized once and reused across their bodies. */
+export function buildRowsBodyFromSerialized(changes: SerializedRowChange[]): string {
+  const messages: ShapeMessage[] = changes.map((change) => ({
+    key: runShapeKey(change.runId),
+    value: change.value,
+    headers: { operation: change.operation },
+  }));
+  messages.push(UP_TO_DATE);
+  return JSON.stringify(messages);
+}
+
+export const INITIAL_OFFSET = "-1";
+
+/**
+ * Opaque offset token, formatted to satisfy the client's `${number}_${number}`
+ * type. The first segment is the row's `updatedAt` epoch-ms (lets a live request
+ * detect whether the replica row has advanced past what the client already has);
+ * the second is a per-connection sequence counter.
+ */
+export function encodeOffset(updatedAtMs: number, seq: number): string {
+  return `${Math.trunc(updatedAtMs)}_${Math.trunc(seq)}`;
+}
+
+/** Extract the `updatedAt` epoch-ms a client last saw from its echoed offset. */
+export function parseOffsetUpdatedAtMs(offset: string | null | undefined): number {
+  if (!offset) {
+    return 0;
+  }
+  const [first] = offset.split("_");
+  const value = Number(first);
+  return Number.isFinite(value) && value > 0 ? value : 0;
+}
+
+/** Mirror of realtimeClient's DEQUEUED->EXECUTING rewrite for non-current API versions. */
+export function rewriteBodyForLegacyApiVersion(body: string): string {
+  return body.replace(/"status":"DEQUEUED"/g, '"status":"EXECUTING"');
+}
diff --git a/apps/webapp/app/services/realtime/envChangeRouter.server.ts b/apps/webapp/app/services/realtime/envChangeRouter.server.ts
new file mode 100644
index 00000000000..0c68140e58b
--- /dev/null
+++ b/apps/webapp/app/services/realtime/envChangeRouter.server.ts
@@ -0,0 +1,347 @@
+import { type ChangeRecord } from "./runChangeNotifier.server";
+import { type RealtimeRunRow, serializeRunRow } from "./electricStreamProtocol.server";
+
+/**
+ * EnvChangeRouter — the per-instance routing layer that turns "feeds as predicates over
+ * one env stream" into cheap fan-out.
+ *
+ * It owns ONE subscription per environment (over the RunChangeNotifier) and an inverted
+ * index of the feeds currently held by THIS instance: `runId -> feeds`, `tag -> feeds`,
+ * `batchId -> feeds`. On a coalesced batch of ChangeRecords it:
+ *   1. routes each record to only the matching held feeds via the index (O(record-tags),
+ *      not O(feeds)) — a record that matches nothing costs nothing;
+ *   2. batch-hydrates the matched runs from Postgres ONCE per column set (collapsing the
+ *      hot-shared-tag fan-out: one run matching N feeds = one `hydrateByIds`, not N);
+ *   3. serializes each row's wire value ONCE per column set, reused across all matching
+ *      feeds;
+ *   4. resolves each matching feed's pending wait with its hydrated+serialized rows.
+ *
+ * It is stateless across reconnects: the index is rebuilt from whatever feeds this
+ * instance happens to hold, so no shape affinity or cross-poll memory is required. The
+ * per-handle working-set diff (insert vs update) stays in the consumer; the router only
+ * decides membership, hydrates, and serializes.
+ */
+
+export type WakeReason = "notify" | "timeout" | "abort";
+
+/** A feed's membership predicate over the env stream. */
+export type FeedFilter =
+  | { kind: "run"; runId: string }
+  | { kind: "tag"; tags: string[]; createdAtFloorMs?: number }
+  | { kind: "batch"; batchId: string };
+
+/** A matched run handed to a feed: the hydrated row (for the feed's working-set diff) and
+ * its wire `value` serialized once for this feed's column set (shared across feeds). */
+export type MatchedRow = { row: RealtimeRunRow; value: Record<string, string | null> };
+
+export type WaitResult = { reason: WakeReason; rows: MatchedRow[] };
+
+/** Minimal deps so the router is unit-testable without Redis/Postgres. */
+export interface EnvChangeSource {
+  subscribeToEnv(environmentId: string, onBatch: (records: ChangeRecord[]) => void): () => void;
+}
+export interface RowHydrator {
+  hydrateByIds(
+    environmentId: string,
+    ids: string[],
+    skipColumns: string[]
+  ): Promise<RealtimeRunRow[]>;
+}
+
+export type EnvChangeRouterOptions = {
+  source: EnvChangeSource;
+  hydrator: RowHydrator;
+  /** Observability: a hydrate-by-id batch ran (count = runs hydrated this tick). */
+  onHydrate?: (runCount: number) => void;
+};
+
+/** Handle a feed holds for the duration of one long-poll. */
+export type FeedRegistration = {
+  /** Wait for the next batch matching this feed (or timeout/abort), with the matched runs
+   * hydrated + serialized for this feed's columns. One wait active at a time. */
+  waitForMatch(signal: AbortSignal | undefined, timeoutMs: number): Promise<WaitResult>;
+  /** Deregister from the index; unsubscribes the env when the last feed leaves. */
+  close(): void;
+};
+
+type Feed = {
+  filter: FeedFilter;
+  skipColumns: string[];
+  columnSig: string;
+  /** The currently-waiting poll's resolver (null between polls). */
+  resolve: ((result: WaitResult) => void) | null;
+};
+
+type EnvState = {
+  unsubscribe: () => void;
+  feeds: Set<Feed>;
+  byRunId: Map<string, Set<Feed>>;
+  byTag: Map<string, Set<Feed>>;
+  byBatchId: Map<string, Set<Feed>>;
+  /** All tag feeds, for routing partial records (no tags) as hydrate-to-classify candidates. */
+  tagFeeds: Set<Feed>;
+};
+
+function addToIndex(index: Map<string, Set<Feed>>, key: string, feed: Feed) {
+  let set = index.get(key);
+  if (!set) {
+    set = new Set();
+    index.set(key, set);
+  }
+  set.add(feed);
+}
+
+function removeFromIndex(index: Map<string, Set<Feed>>, key: string, feed: Feed) {
+  const set = index.get(key);
+  if (set) {
+    set.delete(feed);
+    if (set.size === 0) {
+      index.delete(key);
+    }
+  }
+}
+
+export class EnvChangeRouter {
+  readonly #envs = new Map<string, EnvState>();
+
+  constructor(private readonly options: EnvChangeRouterOptions) {}
+
+  register(environmentId: string, filter: FeedFilter, skipColumns: string[]): FeedRegistration {
+    const env = this.#ensureEnv(environmentId);
+    const feed: Feed = {
+      filter,
+      skipColumns,
+      columnSig: skipColumns.length > 0 ? [...skipColumns].sort().join(",") : "",
+      resolve: null,
+    };
+
+    env.feeds.add(feed);
+    this.#indexFeed(env, feed);
+
+    const waitForMatch = (signal: AbortSignal | undefined, timeoutMs: number) =>
+      new Promise<WaitResult>((resolve) => {
+        if (signal?.aborted) {
+          resolve({ reason: "abort", rows: [] });
+          return;
+        }
+        let settled = false;
+        let timer: ReturnType<typeof setTimeout> | undefined;
+        let onAbort: (() => void) | undefined;
+        const settle = (result: WaitResult) => {
+          if (settled) return;
+          settled = true;
+          feed.resolve = null;
+          if (timer) clearTimeout(timer);
+          if (signal && onAbort) signal.removeEventListener("abort", onAbort);
+          resolve(result);
+        };
+        feed.resolve = settle;
+        timer = setTimeout(() => settle({ reason: "timeout", rows: [] }), timeoutMs);
+        timer.unref?.();
+        if (signal) {
+          onAbort = () => settle({ reason: "abort", rows: [] });
+          signal.addEventListener("abort", onAbort, { once: true });
+        }
+      });
+
+    const close = () => {
+      if (!env.feeds.has(feed)) {
+        return;
+      }
+      env.feeds.delete(feed);
+      this.#deindexFeed(env, feed);
+      // Resolve any in-flight wait so the poll doesn't hang.
+      feed.resolve?.({ reason: "abort", rows: [] });
+      feed.resolve = null;
+      if (env.feeds.size === 0) {
+        this.#envs.delete(environmentId);
+        env.unsubscribe();
+      }
+    };
+
+    return { waitForMatch, close };
+  }
+
+  /** Distinct environments currently routed (for metrics). */
+  get activeEnvCount(): number {
+    return this.#envs.size;
+  }
+
+  #ensureEnv(environmentId: string): EnvState {
+    const existing = this.#envs.get(environmentId);
+    if (existing) {
+      return existing;
+    }
+    const env: EnvState = {
+      unsubscribe: () => {},
+      feeds: new Set(),
+      byRunId: new Map(),
+      byTag: new Map(),
+      byBatchId: new Map(),
+      tagFeeds: new Set(),
+    };
+    this.#envs.set(environmentId, env);
+    env.unsubscribe = this.options.source.subscribeToEnv(environmentId, (records) => {
+      // Fire-and-forget; the notifier doesn't await us. Errors fall through to the feeds'
+      // backstop (a hydrate failure leaves waiters to time out into a full resolve).
+      void this.#onBatch(environmentId, env, records);
+    });
+    return env;
+  }
+
+  #indexFeed(env: EnvState, feed: Feed) {
+    switch (feed.filter.kind) {
+      case "run":
+        addToIndex(env.byRunId, feed.filter.runId, feed);
+        break;
+      case "batch":
+        addToIndex(env.byBatchId, feed.filter.batchId, feed);
+        break;
+      case "tag":
+        env.tagFeeds.add(feed);
+        for (const tag of feed.filter.tags) {
+          addToIndex(env.byTag, tag, feed);
+        }
+        break;
+    }
+  }
+
+  #deindexFeed(env: EnvState, feed: Feed) {
+    switch (feed.filter.kind) {
+      case "run":
+        removeFromIndex(env.byRunId, feed.filter.runId, feed);
+        break;
+      case "batch":
+        removeFromIndex(env.byBatchId, feed.filter.batchId, feed);
+        break;
+      case "tag":
+        env.tagFeeds.delete(feed);
+        for (const tag of feed.filter.tags) {
+          removeFromIndex(env.byTag, tag, feed);
+        }
+        break;
+    }
+  }
+
+  async #onBatch(environmentId: string, env: EnvState, records: ChangeRecord[]) {
+    // 1. Route each record to the held feeds it matches; collect matched runIds per feed.
+    const matchedRunIdsByFeed = new Map<Feed, Set<string>>();
+    const addMatch = (feed: Feed, runId: string) => {
+      if (!feed.resolve) {
+        // Feed isn't currently waiting (between polls). Drop — its backstop catches gaps.
+        return;
+      }
+      let set = matchedRunIdsByFeed.get(feed);
+      if (!set) {
+        set = new Set();
+        matchedRunIdsByFeed.set(feed, set);
+      }
+      set.add(runId);
+    };
+
+    for (const record of records) {
+      // run feeds: exact runId match.
+      const runFeeds = env.byRunId.get(record.runId);
+      if (runFeeds) {
+        for (const feed of runFeeds) addMatch(feed, record.runId);
+      }
+
+      // batch feeds: exact batchId match (only when the record carries one).
+      if (record.batchId) {
+        const batchFeeds = env.byBatchId.get(record.batchId);
+        if (batchFeeds) {
+          for (const feed of batchFeeds) addMatch(feed, record.runId);
+        }
+      }
+
+      // tag feeds.
+      if (record.tags !== undefined) {
+        // Full record: prune via the tag index; only feeds whose filter intersects match.
+        const seen = new Set<Feed>();
+        for (const tag of record.tags) {
+          const tagFeeds = env.byTag.get(tag);
+          if (!tagFeeds) continue;
+          for (const feed of tagFeeds) {
+            if (seen.has(feed)) continue;
+            seen.add(feed);
+            addMatch(feed, record.runId);
+          }
+        }
+      } else {
+        // Partial record (no membership data): route to every tag feed as a candidate to
+        // hydrate-and-classify (rare; the publish side emits full records in practice).
+        for (const feed of env.tagFeeds) addMatch(feed, record.runId);
+      }
+    }
+
+    if (matchedRunIdsByFeed.size === 0) {
+      return;
+    }
+
+    // 2. Batch-hydrate ONCE per column set, then 3. serialize ONCE per (runId, column set).
+    const runIdsByColumnSig = new Map<string, { skipColumns: string[]; runIds: Set<string> }>();
+    for (const [feed, runIds] of matchedRunIdsByFeed) {
+      let group = runIdsByColumnSig.get(feed.columnSig);
+      if (!group) {
+        group = { skipColumns: feed.skipColumns, runIds: new Set() };
+        runIdsByColumnSig.set(feed.columnSig, group);
+      }
+      for (const id of runIds) group.runIds.add(id);
+    }
+
+    const hydratedByColumnSig = new Map<string, Map<string, MatchedRow>>();
+    await Promise.all(
+      [...runIdsByColumnSig.entries()].map(async ([columnSig, group]) => {
+        const ids = [...group.runIds];
+        const rows = await this.options.hydrator.hydrateByIds(
+          environmentId,
+          ids,
+          group.skipColumns
+        );
+        this.options.onHydrate?.(rows.length);
+        const map = new Map<string, MatchedRow>();
+        for (const row of rows) {
+          map.set(row.id, { row, value: serializeRunRow(row, group.skipColumns) });
+        }
+        hydratedByColumnSig.set(columnSig, map);
+      })
+    );
+
+    // 4. Assemble each feed's matched rows (post-filtering tag feeds against the
+    //    authoritative hydrated row) and resolve its pending wait.
+    for (const [feed, runIds] of matchedRunIdsByFeed) {
+      if (!feed.resolve) {
+        continue; // stopped waiting while we hydrated; its next poll/backstop covers it
+      }
+      const hydrated = hydratedByColumnSig.get(feed.columnSig);
+      if (!hydrated) continue;
+
+      const rows: MatchedRow[] = [];
+      for (const runId of runIds) {
+        const matched = hydrated.get(runId);
+        if (!matched) continue; // run not found / left the table
+        if (feed.filter.kind === "tag" && !this.#tagRowMatches(matched.row, feed.filter)) {
+          continue; // re-confirm tags + createdAt floor against the authoritative row
+        }
+        rows.push(matched);
+      }
+
+      if (rows.length > 0) {
+        feed.resolve({ reason: "notify", rows });
+      }
+      // No surviving rows (e.g. a partial-record candidate that didn't actually match):
+      // leave the feed waiting; nothing relevant changed for it.
+    }
+  }
+
+  /** Authoritative re-check for tag feeds: the hydrated row's tags intersect the filter
+   * and its createdAt is within the feed's window. Handles partial-record candidates and
+   * guards record/row tag skew. */
+  #tagRowMatches(row: RealtimeRunRow, filter: Extract<FeedFilter, { kind: "tag" }>): boolean {
+    if (filter.createdAtFloorMs !== undefined && row.createdAt.getTime() < filter.createdAtFloorMs) {
+      return false;
+    }
+    const rowTags = row.runTags ?? [];
+    return filter.tags.some((tag) => rowTags.includes(tag));
+  }
+}
diff --git a/apps/webapp/app/services/realtime/notifierRealtimeClient.server.ts b/apps/webapp/app/services/realtime/notifierRealtimeClient.server.ts
new file mode 100644
index 00000000000..8d5d597c65b
--- /dev/null
+++ b/apps/webapp/app/services/realtime/notifierRealtimeClient.server.ts
@@ -0,0 +1,1026 @@
+import { json } from "@remix-run/server-runtime";
+import { safeParseNaturalLanguageDurationAgo } from "@trigger.dev/core/v3/isomorphic";
+import { randomUUID } from "node:crypto";
+import { API_VERSIONS, CURRENT_API_VERSION } from "~/api/versions";
+import {
+  type CachedLimitProvider,
+  type RealtimeEnvironment,
+  type RealtimeRequestOptions,
+  type RealtimeRunsParams,
+} from "../realtimeClient.server";
+import { logger } from "../logger.server";
+import {
+  buildElectricSchemaHeader,
+  buildRowsBody,
+  buildRowsBodyFromSerialized,
+  buildSnapshotBody,
+  buildUpdateBody,
+  buildUpToDateBody,
+  encodeOffset,
+  INITIAL_OFFSET,
+  parseOffsetUpdatedAtMs,
+  type RealtimeRunRow,
+  rewriteBodyForLegacyApiVersion,
+  RESERVED_COLUMNS,
+  type RowChange,
+  type SerializedRowChange,
+} from "./electricStreamProtocol.server";
+import { BoundedTtlCache } from "./boundedTtlCache";
+import {
+  type EnvChangeRouter,
+  type FeedFilter,
+  type MatchedRow,
+} from "./envChangeRouter.server";
+import { type RunHydrator, type RunListResolver } from "./runReader.server";
+import { type RealtimeConcurrencyLimiter } from "./realtimeConcurrencyLimiter.server";
+
+/** The tag-list feed resolves ids via ClickHouse, which needs org + project + env.
+ * `authentication.environment` (AuthenticatedEnvironment) provides projectId, so
+ * widening here avoids touching the Electric client's RealtimeEnvironment type. */
+export type RealtimeListEnvironment = RealtimeEnvironment & { projectId: string };
+
+/** The realtime feeds the run routes depend on (single-run, tag-list, batch). Both
+ * the Electric client and this notifier client satisfy it, so the routes can switch
+ * between them behind a flag. */
+export interface RealtimeStreamClient {
+  streamRun(
+    url: URL | string,
+    environment: RealtimeEnvironment,
+    runId: string,
+    apiVersion: API_VERSIONS,
+    requestOptions?: RealtimeRequestOptions,
+    clientVersion?: string,
+    signal?: AbortSignal
+  ): Promise<Response>;
+  streamRuns(
+    url: URL | string,
+    environment: RealtimeListEnvironment,
+    params: RealtimeRunsParams,
+    apiVersion: API_VERSIONS,
+    requestOptions?: RealtimeRequestOptions,
+    clientVersion?: string,
+    signal?: AbortSignal
+  ): Promise<Response>;
+  streamBatch(
+    url: URL | string,
+    environment: RealtimeListEnvironment,
+    batchId: string,
+    apiVersion: API_VERSIONS,
+    requestOptions?: RealtimeRequestOptions,
+    clientVersion?: string,
+    signal?: AbortSignal
+  ): Promise<Response>;
+}
+
+export type WakeupReason = "notify" | "timeout" | "abort";
+
+/** How a live poll resolved, for observability:
+ *  - `fast-hydrate`: the router woke this feed with matched rows (hydrated by id, NO
+ *    ClickHouse). Non-matching changes never wake the feed, so they cost nothing.
+ *  - `full-resolve`: the backstop timeout did a ClickHouse resolve (the correctness net). */
+export type LivePollPath = "fast-hydrate" | "full-resolve";
+
+export type NotifierRealtimeClientOptions = {
+  runReader: RunHydrator;
+  /** Resolves the tag/list filter into the matching id-set (filter-only). */
+  runListResolver: RunListResolver;
+  /** Per-instance routing layer over the single env change channel. */
+  router: EnvChangeRouter;
+  limiter: RealtimeConcurrencyLimiter;
+  cachedLimitProvider: CachedLimitProvider;
+  /** Backstop wait before refetching on a live request (ms). Defaults to 5000. */
+  livePollTimeoutMs?: number;
+  /** Ceiling for the tag-list createdAt lookback window (ms). */
+  maximumCreatedAtFilterAgeMs: number;
+  /** Hard cap on tag-list snapshot size. Defaults to 1000. */
+  maxListResults?: number;
+  /** TTL (ms) for the multi-run resolve+hydrate coalescing cache (initial + backstop). */
+  runSetResolveCacheTtlMs?: number;
+  /** Max entries in the resolve+hydrate cache. Defaults to 5000. */
+  runSetResolveCacheMaxEntries?: number;
+  /** Max entries in the per-handle working-set cache. Defaults to 10000. */
+  listCacheMaxEntries?: number;
+  /** Epoch-aligned bucket (ms) the tag-list createdAt lower bound is floored to, so
+   * same-tag feeds pinned within the same bucket share a cache entry. Defaults to
+   * 60000. 0 disables bucketing. */
+  runSetCreatedAtBucketMs?: number;
+  /** When true (default), a multi-run live poll holds the connection until a real delta
+   * or the backstop, rather than returning an empty up-to-date the client would re-issue. */
+  holdOnEmpty?: boolean;
+  /** Max concurrent fresh ClickHouse resolves (cache misses) across this instance. Bounds a
+   * distinct-filter reconnect stampede so it queues instead of hammering ClickHouse. Defaults
+   * to 16; 0 disables the gate (unbounded). */
+  resolveAdmissionLimit?: number;
+  /** Observability hook: why a live request woke (notify vs timeout vs abort). */
+  onWakeup?: (reason: WakeupReason) => void;
+  /** Observability hook: how a live poll resolved (fast path vs full resolve). */
+  onLivePollPath?: (path: LivePollPath) => void;
+  /** Observability hook: whether a multi-run resolve (initial/backstop) hit the cache,
+   * coalesced onto an in-flight resolve, or missed (fresh ClickHouse + Postgres). */
+  onRunSetResolve?: (result: "hit" | "miss" | "coalesced") => void;
+  /** Observability hook: latency (ms) of the ClickHouse resolve / Postgres hydrate. */
+  onRunSetQuery?: (stage: "resolve" | "hydrate", ms: number) => void;
+  /** Observability hook: a fresh resolve had to wait `ms` for an admission permit (the gate
+   * engaged — i.e. a stampede was throttled). Not called when a permit is free. */
+  onResolveAdmissionWait?: (ms: number) => void;
+};
+
+const DEFAULT_CONCURRENCY_LIMIT = 100_000;
+// Matches Electric's ~20s live long-poll hold (jittered ±15% per request).
+const DEFAULT_LIVE_POLL_TIMEOUT_MS = 20_000;
+const DEFAULT_MAX_LIST_RESULTS = 1_000;
+const LIST_CACHE_TTL_MS = 5 * 60_000;
+const LIST_CACHE_MAX_ENTRIES = 10_000;
+const DEFAULT_RUNSET_CACHE_TTL_MS = 1_000;
+const DEFAULT_RUNSET_CACHE_MAX_ENTRIES = 5_000;
+const DEFAULT_RUNSET_CREATED_AT_BUCKET_MS = 60_000;
+const DEFAULT_RESOLVE_ADMISSION_LIMIT = 16;
+
+/**
+ * Fair FIFO semaphore bounding how many fresh ClickHouse resolves run concurrently. It sits
+ * BEHIND the single-flight + TTL cache, so only genuine cache-miss resolves take a permit: a
+ * same-filter reconnect stampede still collapses to one in-flight resolve (one permit), while
+ * a distinct-filter stampede — where every filter is a different cache key and so can't
+ * coalesce — is throttled to `limit` concurrent CH queries instead of firing all N at the
+ * database at once. Trades a little connect latency under a stampede for bounded CH load.
+ */
+class ResolveAdmissionGate {
+  #available: number;
+  #inUse = 0;
+  readonly #waiters: Array<() => void> = [];
+
+  constructor(limit: number) {
+    this.#available = limit;
+  }
+
+  /** Permits currently held (for a metrics gauge); never exceeds the limit. */
+  get inUse(): number {
+    return this.#inUse;
+  }
+
+  async acquire(): Promise<void> {
+    if (this.#available > 0) {
+      this.#available--;
+      this.#inUse++;
+      return;
+    }
+    await new Promise<void>((resolve) => this.#waiters.push(resolve));
+    this.#inUse++;
+  }
+
+  release(): void {
+    this.#inUse--;
+    const next = this.#waiters.shift();
+    if (next) {
+      next(); // hand the freed permit straight to the next waiter (FIFO, no count churn)
+    } else {
+      this.#available++;
+    }
+  }
+}
+
+/** A multi-run feed's filter. Tag-list sets `tags` (+ pinned `createdAtAfter`);
+ * the batch feed sets `batchId`. Both resolve to an id-set via the resolver. */
+type RunSetFilter = {
+  tags?: string[];
+  batchId?: string;
+  createdAtAfter?: Date;
+};
+
+/** Per-handle working set: runId -> last-emitted updatedAt (ms), so live polls
+ * emit only rows that advanced. */
+type WorkingSet = Map<string, number>;
+
+type ResponseHeaderInput = {
+  offset: string;
+  handle: string;
+  cursor?: string;
+  schema?: string;
+};
+
+/**
+ * Notifier-backed implementation of the realtime run feeds. All three feeds are
+ * predicates over ONE per-environment change stream (the EnvChangeRouter); the router
+ * decides membership, hydrates the matched runs from a read replica, and serializes their
+ * wire values once. This client owns the snapshot, the per-handle working-set diff, the
+ * ClickHouse-backed backstop, and the wire response.
+ *
+ * Single-run (`streamRun`):
+ *  - initial (`offset=-1`): hydrate + emit `insert` + `up-to-date` (with schema).
+ *  - live: the router wakes this feed when its run changes; emit a full-row `update` when
+ *    `updatedAt` advanced past what the client has, else a bare `up-to-date`. The backstop
+ *    re-checks via `getRunById`.
+ *
+ * Multi-run feeds (`streamRuns` tag-list, `streamBatch`):
+ *  - initial: resolve the matching id-set via ClickHouse (filter-only), hydrate by-id from
+ *    Postgres, emit N `insert`s, seed the working set.
+ *  - live: the router wakes the feed with the matched runs already hydrated + serialized;
+ *    diff them on the authoritative Postgres `updatedAt` against the per-handle working
+ *    set and emit only new/advanced rows. The backstop (timeout) does a full ClickHouse
+ *    resolve — the correctness net that catches gaps and drops departed runs.
+ *
+ * Tokens are opaque: `offset` = `<maxUpdatedAtMs>_<seq>`, `handle` is per-shape, `cursor`
+ * is a live-only counter. The wire format is produced by `electricStreamProtocol`.
+ */
+export class NotifierRealtimeClient implements RealtimeStreamClient {
+  #seq = 0;
+  readonly #workingSetCache: BoundedTtlCache<WorkingSet>;
+  /** Coalescing cache for the multi-run (resolveIds -> hydrateByIds) pair used by the
+   * initial snapshot and the backstop, keyed by (env, filter, columns). Collapses a
+   * reconnect/snapshot stampede of identical filters into one shared resolve+hydrate. */
+  readonly #runSetCache: BoundedTtlCache<RealtimeRunRow[]>;
+  readonly #runSetInflight = new Map<string, Promise<RealtimeRunRow[]>>();
+  /** Bounds concurrent fresh CH resolves (undefined => unbounded). */
+  readonly #admissionGate?: ResolveAdmissionGate;
+
+  constructor(private readonly options: NotifierRealtimeClientOptions) {
+    this.#workingSetCache = new BoundedTtlCache(
+      LIST_CACHE_TTL_MS,
+      options.listCacheMaxEntries ?? LIST_CACHE_MAX_ENTRIES
+    );
+    this.#runSetCache = new BoundedTtlCache(
+      options.runSetResolveCacheTtlMs ?? DEFAULT_RUNSET_CACHE_TTL_MS,
+      options.runSetResolveCacheMaxEntries ?? DEFAULT_RUNSET_CACHE_MAX_ENTRIES
+    );
+    const admissionLimit = options.resolveAdmissionLimit ?? DEFAULT_RESOLVE_ADMISSION_LIMIT;
+    if (admissionLimit > 0) {
+      this.#admissionGate = new ResolveAdmissionGate(admissionLimit);
+    }
+  }
+
+  /** Current size of the per-handle working-set cache (for a metrics gauge). */
+  get workingSetCacheSize(): number {
+    return this.#workingSetCache.size;
+  }
+
+  /** Fresh CH resolves currently holding an admission permit (for a metrics gauge). */
+  get resolveAdmissionInUse(): number {
+    return this.#admissionGate?.inUse ?? 0;
+  }
+
+  async streamRun(
+    url: URL | string,
+    environment: RealtimeEnvironment,
+    runId: string,
+    apiVersion: API_VERSIONS,
+    requestOptions?: RealtimeRequestOptions,
+    clientVersion?: string,
+    signal?: AbortSignal
+  ): Promise<Response> {
+    const { offset, handle, isLive, skipColumns } = this.#parseStreamRequest(url, requestOptions);
+
+    // Initial snapshot — no prior offset/handle.
+    if (offset === INITIAL_OFFSET || !handle) {
+      const row = await this.options.runReader.getRunById(environment.id, runId);
+      return this.#snapshotResponse(runId, row, skipColumns, apiVersion, clientVersion);
+    }
+
+    if (isLive) {
+      return this.#liveResponse({
+        environment,
+        runId,
+        offset,
+        handle,
+        skipColumns,
+        apiVersion,
+        clientVersion,
+        signal,
+      });
+    }
+
+    // Non-live catch-up with a handle: re-emit the current snapshot (idempotent).
+    const row = await this.options.runReader.getRunById(environment.id, runId);
+    return this.#snapshotResponse(runId, row, skipColumns, apiVersion, clientVersion, handle);
+  }
+
+  async streamRuns(
+    url: URL | string,
+    environment: RealtimeListEnvironment,
+    params: RealtimeRunsParams,
+    apiVersion: API_VERSIONS,
+    requestOptions?: RealtimeRequestOptions,
+    clientVersion?: string,
+    signal?: AbortSignal
+  ): Promise<Response> {
+    const { offset, handle, isLive, skipColumns } = this.#parseStreamRequest(url, requestOptions);
+    const tags = params.tags ?? [];
+
+    // Initial snapshot — pin the createdAt window in a fresh handle.
+    if (offset === INITIAL_OFFSET || !handle) {
+      const createdAtFilterMs = this.#computeCreatedAtFilter(params.createdAt).getTime();
+      return this.#runSetSnapshotResponse(
+        environment,
+        { tags, createdAtAfter: new Date(createdAtFilterMs) },
+        this.#mintListHandle(createdAtFilterMs),
+        skipColumns,
+        apiVersion,
+        clientVersion
+      );
+    }
+
+    // Recover the pinned window from the handle so the lower bound never drifts.
+    // Re-clamp the recovered value to the max-age floor so a stale or crafted handle
+    // can't widen the lookback past the configured ceiling.
+    const recoveredMs = this.#filterMsFromHandle(handle);
+    const filter: RunSetFilter = {
+      tags,
+      createdAtAfter: new Date(
+        recoveredMs !== undefined
+          ? this.#clampCreatedAtFloor(recoveredMs)
+          : this.#computeCreatedAtFilter(params.createdAt).getTime()
+      ),
+    };
+
+    if (isLive) {
+      return this.#runSetLiveResponse(
+        environment,
+        filter,
+        handle,
+        offset,
+        skipColumns,
+        apiVersion,
+        clientVersion,
+        signal
+      );
+    }
+
+    // Non-live catch-up under the same handle.
+    return this.#runSetSnapshotResponse(
+      environment,
+      filter,
+      handle,
+      skipColumns,
+      apiVersion,
+      clientVersion
+    );
+  }
+
+  async streamBatch(
+    url: URL | string,
+    environment: RealtimeListEnvironment,
+    batchId: string,
+    apiVersion: API_VERSIONS,
+    requestOptions?: RealtimeRequestOptions,
+    clientVersion?: string,
+    signal?: AbortSignal
+  ): Promise<Response> {
+    const { offset, handle, isLive, skipColumns } = this.#parseStreamRequest(url, requestOptions);
+
+    const filter: RunSetFilter = { batchId };
+
+    if (offset !== INITIAL_OFFSET && handle && isLive) {
+      return this.#runSetLiveResponse(
+        environment,
+        filter,
+        handle,
+        offset,
+        skipColumns,
+        apiVersion,
+        clientVersion,
+        signal
+      );
+    }
+
+    // Initial snapshot + non-live catch-up. The handle must be per-connection, never
+    // derived from the batchId: working sets are keyed by handle, and a shared handle
+    // lets one subscriber's emit permanently suppress the same row for another.
+    return this.#runSetSnapshotResponse(
+      environment,
+      filter,
+      handle ?? this.#mintBatchHandle(batchId),
+      skipColumns,
+      apiVersion,
+      clientVersion
+    );
+  }
+
+  #snapshotResponse(
+    runId: string,
+    row: Awaited<ReturnType<RunHydrator["getRunById"]>>,
+    skipColumns: string[],
+    apiVersion: API_VERSIONS,
+    clientVersion?: string,
+    existingHandle?: string
+  ): Response {
+    const body = buildSnapshotBody(row, skipColumns);
+    const offset = row ? encodeOffset(row.updatedAt.getTime(), this.#nextSeq()) : encodeOffset(0, 0);
+    return this.#buildResponse(body, apiVersion, clientVersion, {
+      offset,
+      handle: existingHandle ?? this.#mintHandle(runId),
+      schema: buildElectricSchemaHeader(skipColumns),
+    });
+  }
+
+  /**
+   * Live poll for a single-run feed. The router wakes this feed when its run changes,
+   * with the run already hydrated + serialized (no ClickHouse, ever). On the backstop
+   * timeout it re-checks via `getRunById`. Only-on-advance: emit a full-row `update` when
+   * the row moved past what the client already has; else a bare `up-to-date`.
+   */
+  async #liveResponse(params: {
+    environment: RealtimeEnvironment;
+    runId: string;
+    offset: string;
+    handle: string;
+    skipColumns: string[];
+    apiVersion: API_VERSIONS;
+    clientVersion?: string;
+    signal?: AbortSignal;
+  }): Promise<Response> {
+    const { environment, runId, offset, handle, skipColumns, apiVersion, clientVersion, signal } =
+      params;
+
+    return this.#withConcurrencySlot(environment, async () => {
+      const lastSeenMs = parseOffsetUpdatedAtMs(offset);
+      const registration = this.options.router.register(
+        environment.id,
+        { kind: "run", runId },
+        skipColumns
+      );
+
+      try {
+        const { reason, rows } = await registration.waitForMatch(signal, this.#jitteredTimeout());
+        this.options.onWakeup?.(reason);
+
+        if (reason === "abort") {
+          return this.#buildResponse(buildUpToDateBody(), apiVersion, clientVersion, {
+            offset,
+            handle,
+            cursor: String(this.#nextSeq()),
+          });
+        }
+
+        if (reason === "notify" && rows.length > 0) {
+          // The router hydrated + serialized this run; emit it (only on advance).
+          this.options.onLivePollPath?.("fast-hydrate");
+          const matched = rows[0];
+          const updatedAtMs = matched.row.updatedAt.getTime();
+          const seq = this.#nextSeq();
+          if (updatedAtMs > lastSeenMs) {
+            return this.#buildResponse(
+              buildRowsBodyFromSerialized([
+                { runId: matched.row.id, value: matched.value, operation: "update" },
+              ]),
+              apiVersion,
+              clientVersion,
+              { offset: encodeOffset(updatedAtMs, seq), handle, cursor: String(seq) }
+            );
+          }
+          return this.#buildResponse(buildUpToDateBody(), apiVersion, clientVersion, {
+            offset,
+            handle,
+            cursor: String(seq),
+          });
+        }
+
+        // Backstop timeout: re-check the run directly (no ClickHouse for the single-run feed).
+        this.options.onLivePollPath?.("full-resolve");
+        const row = await this.options.runReader.getRunById(environment.id, runId);
+        const seq = this.#nextSeq();
+        if (row && row.updatedAt.getTime() > lastSeenMs) {
+          return this.#buildResponse(buildUpdateBody(row, skipColumns), apiVersion, clientVersion, {
+            offset: encodeOffset(row.updatedAt.getTime(), seq),
+            handle,
+            cursor: String(seq),
+          });
+        }
+        return this.#buildResponse(buildUpToDateBody(), apiVersion, clientVersion, {
+          offset,
+          handle,
+          cursor: String(seq),
+        });
+      } finally {
+        registration.close();
+      }
+    });
+  }
+
+  /** Initial (and non-live catch-up) snapshot for a multi-run feed: resolve the
+   * id-set, hydrate, emit every row as an `insert`, and seed the working set. */
+  async #runSetSnapshotResponse(
+    environment: RealtimeListEnvironment,
+    filter: RunSetFilter,
+    handle: string,
+    skipColumns: string[],
+    apiVersion: API_VERSIONS,
+    clientVersion?: string
+  ): Promise<Response> {
+    const rows = await this.#resolveAndHydrate(environment, filter, skipColumns);
+
+    const changes: RowChange[] = rows.map((row) => ({ row, operation: "insert" as const }));
+
+    // updatedAt comes from the authoritative Postgres hydrate, not ClickHouse.
+    const seen: WorkingSet = new Map();
+    let maxUpdatedAt = 0;
+    for (const row of rows) {
+      const updatedAtMs = row.updatedAt.getTime();
+      seen.set(row.id, updatedAtMs);
+      maxUpdatedAt = Math.max(maxUpdatedAt, updatedAtMs);
+    }
+    this.#workingSetCache.set(this.#workingSetKey(environment.id, handle), seen);
+
+    return this.#buildResponse(buildRowsBody(changes, skipColumns), apiVersion, clientVersion, {
+      offset: encodeOffset(maxUpdatedAt, this.#nextSeq()),
+      handle,
+      schema: buildElectricSchemaHeader(skipColumns),
+    });
+  }
+
+  /**
+   * Live poll for a multi-run feed. Two paths:
+   *  - Fast path (router notify): the router woke us with the matched runs already
+   *    membership-confirmed, hydrated, and serialized (no ClickHouse). Diff them against
+   *    the per-handle working set and emit new/advanced rows.
+   *  - Backstop (timeout): a full ClickHouse resolve + hydrate. The correctness net —
+   *    catches members missed during a gap and drops runs that left the filter.
+   * With hold-on-empty (default) the connection holds until a real delta or the backstop
+   * rather than returning an empty response the client would re-issue.
+   */
+  async #runSetLiveResponse(
+    environment: RealtimeListEnvironment,
+    filter: RunSetFilter,
+    handle: string,
+    offset: string,
+    skipColumns: string[],
+    apiVersion: API_VERSIONS,
+    clientVersion: string | undefined,
+    signal: AbortSignal | undefined
+  ): Promise<Response> {
+    return this.#withConcurrencySlot(environment, async () => {
+      const offsetFloorMs = parseOffsetUpdatedAtMs(offset);
+      // Total time to hold this long-poll, jittered to avoid synchronized refetch herds.
+      const deadline = Date.now() + this.#jitteredTimeout();
+      const holdOnEmpty = this.options.holdOnEmpty ?? true;
+
+      // Working set we diff against: seeded from the cache (or the offset floor on a
+      // miss) and advanced on each refetch within this held request.
+      const workingSetKey = this.#workingSetKey(environment.id, handle);
+      let prevSeen = this.#workingSetCache.get(workingSetKey);
+
+      const emitFromSerialized = (changes: SerializedRowChange[], maxUpdatedAt: number): Response => {
+        const seq = this.#nextSeq();
+        return this.#buildResponse(buildRowsBodyFromSerialized(changes), apiVersion, clientVersion, {
+          offset: encodeOffset(maxUpdatedAt, seq),
+          handle,
+          cursor: String(seq),
+        });
+      };
+      const emitFromRows = (changes: RowChange[], maxUpdatedAt: number): Response => {
+        const seq = this.#nextSeq();
+        return this.#buildResponse(buildRowsBody(changes, skipColumns), apiVersion, clientVersion, {
+          offset: encodeOffset(maxUpdatedAt, seq),
+          handle,
+          cursor: String(seq),
+        });
+      };
+      const emitUpToDate = (maxUpdatedAt: number): Response => {
+        const seq = this.#nextSeq();
+        return this.#buildResponse(buildUpToDateBody(), apiVersion, clientVersion, {
+          offset: encodeOffset(maxUpdatedAt, seq),
+          handle,
+          cursor: String(seq),
+        });
+      };
+
+      const registration = this.options.router.register(
+        environment.id,
+        this.#feedFilter(filter),
+        skipColumns
+      );
+
+      try {
+        while (true) {
+          const remaining = deadline - Date.now();
+          const { reason, rows } =
+            remaining > 0
+              ? await registration.waitForMatch(signal, remaining)
+              : { reason: "timeout" as const, rows: [] as MatchedRow[] };
+          this.options.onWakeup?.(reason);
+
+          if (reason === "abort") {
+            return emitUpToDate(offsetFloorMs);
+          }
+
+          // FAST PATH: the router already confirmed membership + the createdAt window and
+          // hydrated/serialized the matched runs. Just diff against the working set.
+          if (reason === "notify") {
+            this.options.onLivePollPath?.("fast-hydrate");
+            const { changes, maxUpdatedAt, touched } = this.#diffMatched(
+              rows,
+              prevSeen,
+              offsetFloorMs
+            );
+            // Merge (not replace): the router only surfaced the changed subset, so keep the
+            // rest of the working set intact. The backstop full-resolve rebuilds it.
+            const merged = this.#mergeWorkingSet(prevSeen, touched);
+            this.#workingSetCache.set(workingSetKey, merged);
+            prevSeen = merged;
+
+            if (changes.length > 0) {
+              return emitFromSerialized(changes, maxUpdatedAt);
+            }
+            // Matched but no row advanced (already seen). Keep holding.
+            if (holdOnEmpty) {
+              continue;
+            }
+            return emitUpToDate(maxUpdatedAt);
+          }
+
+          // BACKSTOP: full ClickHouse resolve + hydrate. Replaces the working set so runs
+          // that left the filter stop being tracked (the client keeps showing them).
+          this.options.onLivePollPath?.("full-resolve");
+          const resolved = await this.#resolveAndHydrate(environment, filter, skipColumns);
+          const { changes, maxUpdatedAt, touched } = this.#diffRows(
+            resolved,
+            prevSeen,
+            offsetFloorMs
+          );
+          this.#workingSetCache.set(workingSetKey, touched);
+          prevSeen = touched;
+
+          if (changes.length > 0) {
+            return emitFromRows(changes, maxUpdatedAt);
+          }
+          // Empty backstop diff: timeout returns up-to-date; (holdOnEmpty never reaches
+          // here on a notify — those are handled in the fast path above).
+          return emitUpToDate(maxUpdatedAt);
+        }
+      } finally {
+        registration.close();
+      }
+    });
+  }
+
+  /** Translate a multi-run filter into the router's membership predicate. */
+  #feedFilter(filter: RunSetFilter): FeedFilter {
+    if (filter.batchId !== undefined) {
+      return { kind: "batch", batchId: filter.batchId };
+    }
+    return {
+      kind: "tag",
+      tags: filter.tags ?? [],
+      createdAtFloorMs: filter.createdAtAfter?.getTime(),
+    };
+  }
+
+  /** Diff router-matched rows (already serialized) against the prior working set, pairing
+   * each row's shared `value` with this feed's operation. */
+  #diffMatched(
+    matched: MatchedRow[],
+    prevSeen: WorkingSet | undefined,
+    offsetFloorMs: number
+  ): { changes: SerializedRowChange[]; maxUpdatedAt: number; touched: WorkingSet } {
+    const changes: SerializedRowChange[] = [];
+    const touched: WorkingSet = new Map();
+    let maxUpdatedAt = offsetFloorMs;
+    for (const { row, value } of matched) {
+      const updatedAtMs = row.updatedAt.getTime();
+      touched.set(row.id, updatedAtMs);
+      maxUpdatedAt = Math.max(maxUpdatedAt, updatedAtMs);
+
+      if (prevSeen) {
+        const prior = prevSeen.get(row.id);
+        if (prior === undefined) {
+          changes.push({ runId: row.id, value, operation: "insert" });
+        } else if (updatedAtMs > prior) {
+          changes.push({ runId: row.id, value, operation: "update" });
+        }
+      } else if (updatedAtMs > offsetFloorMs) {
+        changes.push({ runId: row.id, value, operation: "update" });
+      }
+    }
+    return { changes, maxUpdatedAt, touched };
+  }
+
+  /**
+   * Diff hydrated rows against the prior working set on the authoritative Postgres
+   * `updatedAt`: a run not in the set is an `insert`, one whose `updatedAt` advanced is an
+   * `update`. On a working-set miss, anything past the offset floor is a merge-safe
+   * `update`. Used by the snapshot and the backstop full-resolve.
+   */
+  #diffRows(
+    rows: RealtimeRunRow[],
+    prevSeen: WorkingSet | undefined,
+    offsetFloorMs: number
+  ): { changes: RowChange[]; maxUpdatedAt: number; touched: WorkingSet } {
+    const changes: RowChange[] = [];
+    const touched: WorkingSet = new Map();
+    let maxUpdatedAt = offsetFloorMs;
+    for (const row of rows) {
+      const updatedAtMs = row.updatedAt.getTime();
+      touched.set(row.id, updatedAtMs);
+      maxUpdatedAt = Math.max(maxUpdatedAt, updatedAtMs);
+
+      if (prevSeen) {
+        const prior = prevSeen.get(row.id);
+        if (prior === undefined) {
+          changes.push({ row, operation: "insert" });
+        } else if (updatedAtMs > prior) {
+          changes.push({ row, operation: "update" });
+        }
+      } else if (updatedAtMs > offsetFloorMs) {
+        changes.push({ row, operation: "update" });
+      }
+    }
+    return { changes, maxUpdatedAt, touched };
+  }
+
+  /** Merge fast-path touched rows into the prior working set. The fast path only saw the
+   * changed subset, so we keep the rest (the backstop full-resolve does the exact rebuild). */
+  #mergeWorkingSet(prevSeen: WorkingSet | undefined, touched: WorkingSet): WorkingSet {
+    const merged: WorkingSet = new Map(prevSeen ?? undefined);
+    for (const [id, updatedAtMs] of touched) {
+      merged.set(id, updatedAtMs);
+    }
+    return merged;
+  }
+
+  /**
+   * Resolve the filter's id-set (ClickHouse) and hydrate the rows (Postgres), coalesced +
+   * short-TTL cached by (env, filter, columns). Used by the initial snapshot and the
+   * backstop. A reconnect/snapshot stampede of identical filters shares ONE resolve+hydrate
+   * (concurrent callers await the in-flight one; callers within the TTL reuse the rows).
+   */
+  async #resolveAndHydrate(
+    environment: RealtimeListEnvironment,
+    filter: RunSetFilter,
+    skipColumns: string[]
+  ): Promise<RealtimeRunRow[]> {
+    const key = this.#runSetCacheKey(environment.id, filter, skipColumns);
+
+    const cached = this.#runSetCache.get(key);
+    if (cached) {
+      this.options.onRunSetResolve?.("hit");
+      return cached;
+    }
+
+    const existing = this.#runSetInflight.get(key);
+    if (existing) {
+      this.options.onRunSetResolve?.("coalesced");
+      return existing;
+    }
+
+    this.options.onRunSetResolve?.("miss");
+    // Registered in #runSetInflight synchronously below, so same-filter callers that arrive
+    // while this is still waiting for an admission permit coalesce onto it (one permit, not N).
+    const promise = this.#admitAndResolveUncached(environment, filter, skipColumns)
+      .then((rows) => {
+        this.#runSetCache.set(key, rows);
+        return rows;
+      })
+      .finally(() => {
+        this.#runSetInflight.delete(key);
+      });
+
+    this.#runSetInflight.set(key, promise);
+    return promise;
+  }
+
+  /** Acquire an admission permit (if the gate is enabled) before the fresh CH+PG resolve, so
+   * a distinct-filter stampede is throttled to the configured concurrency. */
+  async #admitAndResolveUncached(
+    environment: RealtimeListEnvironment,
+    filter: RunSetFilter,
+    skipColumns: string[]
+  ): Promise<RealtimeRunRow[]> {
+    if (!this.#admissionGate) {
+      return this.#resolveAndHydrateUncached(environment, filter, skipColumns);
+    }
+    const waitStart = Date.now();
+    await this.#admissionGate.acquire();
+    const waited = Date.now() - waitStart;
+    if (waited > 0) {
+      this.options.onResolveAdmissionWait?.(waited);
+    }
+    try {
+      return await this.#resolveAndHydrateUncached(environment, filter, skipColumns);
+    } finally {
+      this.#admissionGate.release();
+    }
+  }
+
+  async #resolveAndHydrateUncached(
+    environment: RealtimeListEnvironment,
+    filter: RunSetFilter,
+    skipColumns: string[]
+  ): Promise<RealtimeRunRow[]> {
+    const resolveStart = Date.now();
+    const ids = await this.#resolveIds(environment, filter);
+    this.options.onRunSetQuery?.("resolve", Date.now() - resolveStart);
+
+    const hydrateStart = Date.now();
+    const rows = await this.options.runReader.hydrateByIds(environment.id, ids, skipColumns);
+    this.options.onRunSetQuery?.("hydrate", Date.now() - hydrateStart);
+
+    return rows;
+  }
+
+  /** Stable cache key for the resolve+hydrate cache. Same key => same id-set and the
+   * same projected columns, so cached rows always match the requesting feed. */
+  #runSetCacheKey(environmentId: string, filter: RunSetFilter, skipColumns: string[]): string {
+    // JSON-encode the arrays (not a join) so a value containing the separators —
+    // e.g. a tag with a comma — can't collide: ["a,b"] must not key the same as
+    // ["a","b"], which are different ClickHouse filters.
+    const tags = filter.tags && filter.tags.length > 0 ? JSON.stringify([...filter.tags].sort()) : "";
+    const cols = skipColumns.length > 0 ? JSON.stringify([...skipColumns].sort()) : "";
+    const maxListResults = this.options.maxListResults ?? DEFAULT_MAX_LIST_RESULTS;
+    return `${environmentId}|${tags}|${filter.batchId ?? ""}|${
+      filter.createdAtAfter?.getTime() ?? ""
+    }|${maxListResults}|${cols}`;
+  }
+
+  async #resolveIds(environment: RealtimeListEnvironment, filter: RunSetFilter): Promise<string[]> {
+    const maxListResults = this.options.maxListResults ?? DEFAULT_MAX_LIST_RESULTS;
+    const ids = await this.options.runListResolver.resolveMatchingRunIds({
+      organizationId: environment.organizationId,
+      projectId: environment.projectId,
+      environmentId: environment.id,
+      tags: filter.tags,
+      batchId: filter.batchId,
+      createdAtAfter: filter.createdAtAfter,
+      limit: maxListResults,
+    });
+
+    if (ids.length >= maxListResults) {
+      logger.warn("[notifierRealtimeClient] run-set feed hit the result cap", {
+        environmentId: environment.id,
+        filter,
+        cap: maxListResults,
+      });
+    }
+
+    return ids;
+  }
+
+  #computeCreatedAtFilter(createdAt: string | undefined): Date {
+    // Clamp to the maximum lookback window, mirroring realtimeClient.
+    const floor = new Date(Date.now() - this.options.maximumCreatedAtFilterAgeMs);
+    const parsed = safeParseNaturalLanguageDurationAgo(createdAt ?? "24h");
+    const resolved = !parsed || parsed < floor ? floor : parsed;
+    // Quantize the lower bound to a coarse epoch-aligned bucket and pin THAT in the
+    // handle, so same-tag feeds whose windows land in the same bucket resolve to the
+    // same filter -> same coalescing cache key -> one shared ClickHouse + Postgres
+    // query instead of one per feed. Floored (rounds the bound earlier), so the
+    // window only ever widens by < bucket and never drops a run the client should see.
+    return new Date(this.#bucketCreatedAtMs(resolved.getTime()));
+  }
+
+  #bucketCreatedAtMs(ms: number): number {
+    const bucket = this.options.runSetCreatedAtBucketMs ?? DEFAULT_RUNSET_CREATED_AT_BUCKET_MS;
+    return bucket > 0 ? Math.floor(ms / bucket) * bucket : ms;
+  }
+
+  /** Clamp a handle-recovered createdAt lower bound up to the max-age floor (so a
+   * stale or crafted handle can't widen the window past the ceiling), then re-bucket. */
+  #clampCreatedAtFloor(ms: number): number {
+    const floorMs = Date.now() - this.options.maximumCreatedAtFilterAgeMs;
+    return this.#bucketCreatedAtMs(Math.max(ms, floorMs));
+  }
+
+  #mintListHandle(createdAtFilterMs: number): string {
+    // Pins the createdAt threshold in the opaque handle so live polls reuse the
+    // same lower bound even on a working-set cache miss.
+    return `runs_${Math.trunc(createdAtFilterMs)}_${this.#mintUniqueSuffix()}`;
+  }
+
+  #mintBatchHandle(batchId: string): string {
+    return `batch_${batchId}_${this.#mintUniqueSuffix()}`;
+  }
+
+  #mintUniqueSuffix(): string {
+    // The seq alone isn't unique across instances/restarts; behind a non-sticky ALB a
+    // collision would land two connections on one working-set cache entry.
+    return `${this.#nextSeq()}_${randomUUID().slice(0, 8)}`;
+  }
+
+  #workingSetKey(environmentId: string, handle: string): string {
+    // The handle is client-echoed; env-prefix the key so a foreign handle can never
+    // read or overwrite another tenant's working set.
+    return `${environmentId}:${handle}`;
+  }
+
+  #filterMsFromHandle(handle: string): number | undefined {
+    const parts = handle.split("_");
+    if (parts[0] !== "runs") {
+      return undefined;
+    }
+    const ms = Number(parts[1]);
+    return Number.isFinite(ms) && ms > 0 ? ms : undefined;
+  }
+
+  #parseStreamRequest(
+    url: URL | string,
+    requestOptions?: RealtimeRequestOptions
+  ): { offset: string; handle: string | null; isLive: boolean; skipColumns: string[] } {
+    const $url = new URL(url.toString());
+    return {
+      offset: $url.searchParams.get("offset") ?? INITIAL_OFFSET,
+      handle: $url.searchParams.get("handle") ?? $url.searchParams.get("shape_id"),
+      isLive: $url.searchParams.get("live") === "true",
+      skipColumns: this.#resolveSkipColumns($url, requestOptions),
+    };
+  }
+
+  /**
+   * Runs `work` inside a per-env concurrency slot: acquires a slot (429 if over the
+   * org limit, 500 if the limit can't be read) and always releases it afterward.
+   */
+  async #withConcurrencySlot(
+    environment: RealtimeEnvironment,
+    work: () => Promise<Response>
+  ): Promise<Response> {
+    const requestId = randomUUID();
+    const concurrencyLimit = await this.options.cachedLimitProvider.getCachedLimit(
+      environment.organizationId,
+      DEFAULT_CONCURRENCY_LIMIT
+    );
+
+    if (concurrencyLimit == null) {
+      logger.error("[notifierRealtimeClient] Failed to get concurrency limit", {
+        organizationId: environment.organizationId,
+      });
+      return json({ error: "Failed to get concurrency limit" }, { status: 500 });
+    }
+
+    const canProceed = await this.options.limiter.incrementAndCheck(
+      environment.id,
+      requestId,
+      concurrencyLimit
+    );
+
+    if (!canProceed) {
+      return json({ error: "Too many concurrent requests" }, { status: 429 });
+    }
+
+    try {
+      return await work();
+    } finally {
+      await this.options.limiter.decrement(environment.id, requestId);
+    }
+  }
+
+  #jitteredTimeout(): number {
+    const base = this.options.livePollTimeoutMs ?? DEFAULT_LIVE_POLL_TIMEOUT_MS;
+    // +/-15% jitter to avoid synchronized refetch herds.
+    return Math.round(base * (0.85 + Math.random() * 0.3));
+  }
+
+  #buildResponse(
+    body: string,
+    apiVersion: API_VERSIONS,
+    clientVersion: string | undefined,
+    headers: ResponseHeaderInput
+  ): Response {
+    const finalBody =
+      apiVersion === CURRENT_API_VERSION ? body : rewriteBodyForLegacyApiVersion(body);
+
+    const responseHeaders = new Headers();
+    responseHeaders.set("content-type", "application/json");
+    responseHeaders.set("cache-control", "no-store");
+
+    // Carry CORS on the response itself, mirroring how the Electric upstream does
+    // (apiCors passes a response through untouched once it has allow-origin). Browsers
+    // can only read the electric-* headers cross-origin if they're explicitly exposed;
+    // without this the deployed react-hooks fail with MissingHeadersError. Bearer-token
+    // requests are non-credentialed, so a wildcard is safe.
+    responseHeaders.set("access-control-allow-origin", "*");
+    responseHeaders.set("access-control-expose-headers", "*");
+
+    // Modern clients (1.0.14) send `x-trigger-electric-version` and read the
+    // lowercase `electric-*` headers. Legacy clients (0.4.0) omit the version and
+    // read `electric-shape-id`/`electric-chunk-last-offset` (case-insensitive),
+    // matching realtimeClient's rewriteResponseHeaders behavior exactly.
+    if (clientVersion) {
+      responseHeaders.set("electric-offset", headers.offset);
+      responseHeaders.set("electric-handle", headers.handle);
+    } else {
+      responseHeaders.set("electric-chunk-last-offset", headers.offset);
+      responseHeaders.set("electric-shape-id", headers.handle);
+    }
+
+    if (headers.cursor !== undefined) {
+      responseHeaders.set("electric-cursor", headers.cursor);
+    }
+    if (headers.schema !== undefined) {
+      responseHeaders.set("electric-schema", headers.schema);
+    }
+
+    return new Response(finalBody, { status: 200, headers: responseHeaders });
+  }
+
+  #mintHandle(runId: string): string {
+    // Stable per-run handle: the single-run shape never changes columns, so the
+    // client never needs a must-refetch from a handle change.
+    return `run-${runId}`;
+  }
+
+  #nextSeq(): number {
+    this.#seq = (this.#seq + 1) % Number.MAX_SAFE_INTEGER;
+    return this.#seq;
+  }
+
+  #resolveSkipColumns(url: URL, requestOptions?: RealtimeRequestOptions): string[] {
+    const raw = requestOptions?.skipColumns ?? url.searchParams.get("skipColumns")?.split(",") ?? [];
+    return raw.map((c) => c.trim()).filter((c) => c !== "" && !RESERVED_COLUMNS.includes(c));
+  }
+}
diff --git a/apps/webapp/app/services/realtime/notifierRealtimeClientInstance.server.ts b/apps/webapp/app/services/realtime/notifierRealtimeClientInstance.server.ts
new file mode 100644
index 00000000000..24d5f13b0c6
--- /dev/null
+++ b/apps/webapp/app/services/realtime/notifierRealtimeClientInstance.server.ts
@@ -0,0 +1,142 @@
+import { Counter, Gauge, Histogram } from "prom-client";
+import { $replica } from "~/db.server";
+import { env } from "~/env.server";
+import { metricsRegister } from "~/metrics.server";
+import { singleton } from "~/utils/singleton";
+import { getCachedLimit } from "../platform.v3.server";
+import { clickhouseFactory } from "~/services/clickhouse/clickhouseFactoryInstance.server";
+import { ClickHouseRunListResolver } from "./clickHouseRunListResolver.server";
+import { EnvChangeRouter } from "./envChangeRouter.server";
+import { NotifierRealtimeClient } from "./notifierRealtimeClient.server";
+import { RealtimeConcurrencyLimiter } from "./realtimeConcurrencyLimiter.server";
+import { getRunChangeNotifier } from "./runChangeNotifierInstance.server";
+import { RunHydrator } from "./runReader.server";
+
+/**
+ * Process-singleton wiring for the notifier-backed realtime client. Only
+ * constructed when a request actually routes to the
+ * notifier backend, so a disabled webapp never instantiates it.
+ */
+function initializeNotifierRealtimeClient(): NotifierRealtimeClient {
+  const wakeups = new Counter({
+    name: "realtime_notifier_wakeups_total",
+    help: "Live realtime notifier wakeups by reason. A rising 'timeout' share suggests a write site is missing its publishChangeRecord delegate.",
+    labelNames: ["reason"] as const,
+    registers: [metricsRegister],
+  });
+
+  const runSetResolves = new Counter({
+    name: "realtime_notifier_runset_resolve_total",
+    help: "Multi-run (tag-list/batch) resolve+hydrate outcomes. 'hit'/'coalesced' vs 'miss' shows how effectively concurrent same-filter feeds share a single ClickHouse + Postgres query under an env-wide wake.",
+    labelNames: ["result"] as const,
+    registers: [metricsRegister],
+  });
+
+  const runSetQueryMs = new Histogram({
+    name: "realtime_notifier_runset_query_ms",
+    help: "Latency of the multi-run resolve (ClickHouse) and hydrate (Postgres) stages.",
+    labelNames: ["stage"] as const,
+    buckets: [1, 5, 10, 25, 50, 100, 250, 500, 1_000, 2_500, 5_000],
+    registers: [metricsRegister],
+  });
+
+  const livePollPaths = new Counter({
+    name: "realtime_notifier_live_poll_total",
+    help: "How live polls resolved. 'fast-hydrate' = the router woke the feed with matched runs hydrated by id (no ClickHouse); 'full-resolve' = the backstop timeout did a ClickHouse resolve. A high fast-path share is the local-membership routing working.",
+    labelNames: ["path"] as const,
+    registers: [metricsRegister],
+  });
+
+  const routerHydrates = new Counter({
+    name: "realtime_notifier_router_hydrated_runs_total",
+    help: "Runs hydrated by the EnvChangeRouter's batch-hydrate (one query per column set per wake, shared across all feeds matching the same run — the hot-shared-tag fan-out collapse).",
+    registers: [metricsRegister],
+  });
+
+  const resolveAdmissionWaits = new Counter({
+    name: "realtime_notifier_resolve_admission_waits_total",
+    help: "Fresh ClickHouse resolves that had to queue for an admission permit. A rising count means a distinct-filter reconnect stampede is being throttled (the gate is doing its job).",
+    registers: [metricsRegister],
+  });
+
+  const limiter = new RealtimeConcurrencyLimiter({
+    keyPrefix: "tr:realtime:notifier:concurrency",
+    redis: {
+      port: env.RATE_LIMIT_REDIS_PORT,
+      host: env.RATE_LIMIT_REDIS_HOST,
+      username: env.RATE_LIMIT_REDIS_USERNAME,
+      password: env.RATE_LIMIT_REDIS_PASSWORD,
+      tlsDisabled: env.RATE_LIMIT_REDIS_TLS_DISABLED === "true",
+      clusterMode: env.RATE_LIMIT_REDIS_CLUSTER_MODE_ENABLED === "1",
+    },
+  });
+
+  // One RunHydrator shared by the router (fast-path batch-hydrate) and the client
+  // (snapshot + backstop), so its single-flight + short-TTL cache covers both.
+  const runReader = new RunHydrator({ replica: $replica });
+
+  const router = new EnvChangeRouter({
+    source: getRunChangeNotifier(),
+    hydrator: runReader,
+    onHydrate: (runCount) => routerHydrates.inc(runCount),
+  });
+
+  const client = new NotifierRealtimeClient({
+    runReader,
+    runListResolver: new ClickHouseRunListResolver({
+      getClickhouse: (organizationId) =>
+        clickhouseFactory.getClickhouseForOrganization(organizationId, "realtime"),
+      prisma: $replica,
+    }),
+    router,
+    limiter,
+    cachedLimitProvider: {
+      async getCachedLimit(organizationId, defaultValue) {
+        const result = await getCachedLimit(
+          organizationId,
+          "realtimeConcurrentConnections",
+          defaultValue
+        );
+        return result.val;
+      },
+    },
+    livePollTimeoutMs: env.REALTIME_NOTIFIER_LIVE_POLL_TIMEOUT_MS,
+    maximumCreatedAtFilterAgeMs: env.REALTIME_MAXIMUM_CREATED_AT_FILTER_AGE_IN_MS,
+    maxListResults: env.REALTIME_NOTIFIER_MAX_LIST_RESULTS,
+    runSetResolveCacheTtlMs: env.REALTIME_NOTIFIER_RUNSET_CACHE_TTL_MS,
+    runSetResolveCacheMaxEntries: env.REALTIME_NOTIFIER_RUNSET_CACHE_MAX_ENTRIES,
+    listCacheMaxEntries: env.REALTIME_NOTIFIER_WORKING_SET_MAX_ENTRIES,
+    runSetCreatedAtBucketMs: env.REALTIME_NOTIFIER_RUNSET_CREATED_AT_BUCKET_MS,
+    holdOnEmpty: env.REALTIME_NOTIFIER_HOLD_ON_EMPTY === "1",
+    resolveAdmissionLimit: env.REALTIME_NOTIFIER_RESOLVE_ADMISSION_LIMIT,
+    onWakeup: (reason) => wakeups.inc({ reason }),
+    onLivePollPath: (path) => livePollPaths.inc({ path }),
+    onRunSetResolve: (result) => runSetResolves.inc({ result }),
+    onRunSetQuery: (stage, ms) => runSetQueryMs.observe({ stage }, ms),
+    onResolveAdmissionWait: () => resolveAdmissionWaits.inc(),
+  });
+
+  new Gauge({
+    name: "realtime_notifier_working_set_size",
+    help: "Entries in the per-handle working-set cache (one per active multi-run feed session).",
+    registers: [metricsRegister],
+    collect() {
+      this.set(client.workingSetCacheSize);
+    },
+  });
+
+  new Gauge({
+    name: "realtime_notifier_resolve_admission_in_use",
+    help: "Fresh ClickHouse resolves currently holding an admission permit (live concurrency against the gate's limit).",
+    registers: [metricsRegister],
+    collect() {
+      this.set(client.resolveAdmissionInUse);
+    },
+  });
+
+  return client;
+}
+
+export function getNotifierRealtimeClient(): NotifierRealtimeClient {
+  return singleton("notifierRealtimeClient", initializeNotifierRealtimeClient);
+}
diff --git a/apps/webapp/app/services/realtime/realtimeConcurrencyLimiter.server.ts b/apps/webapp/app/services/realtime/realtimeConcurrencyLimiter.server.ts
new file mode 100644
index 00000000000..a935858fef0
--- /dev/null
+++ b/apps/webapp/app/services/realtime/realtimeConcurrencyLimiter.server.ts
@@ -0,0 +1,111 @@
+import { Callback, Result } from "ioredis";
+import { createRedisClient, RedisClient, RedisWithClusterOptions } from "~/redis.server";
+import { logger } from "../logger.server";
+
+export type RealtimeConcurrencyLimiterOptions = {
+  redis: RedisWithClusterOptions;
+  keyPrefix: string;
+  /** How long a tracked request lives before it's swept as stale (seconds). */
+  expiryTimeInSeconds?: number;
+  connectionName?: string;
+};
+
+/**
+ * Per-environment concurrent-connection limiter for realtime long-polls.
+ *
+ * This is a standalone copy of the limiter embedded in `realtimeClient.server.ts`
+ * (Electric path), so the notifier-backed client can enforce the same per-env cap
+ * WITHOUT modifying the existing Electric client. The Lua + key shape are
+ * identical; only the key prefix differs, so the two paths track independently.
+ */
+export class RealtimeConcurrencyLimiter {
+  private redis: RedisClient;
+  private expiryTimeInSeconds: number;
+
+  constructor(private options: RealtimeConcurrencyLimiterOptions) {
+    this.redis = createRedisClient(
+      options.connectionName ?? "trigger:realtime:notifier:concurrency",
+      options.redis
+    );
+    this.expiryTimeInSeconds = options.expiryTimeInSeconds ?? 60 * 5;
+    this.#registerCommands();
+  }
+
+  async incrementAndCheck(environmentId: string, requestId: string, limit: number): Promise<boolean> {
+    const key = this.#getKey(environmentId);
+    const now = Date.now();
+
+    const result = await this.redis.incrementAndCheckRealtimeNotifierConcurrency(
+      key,
+      now.toString(),
+      requestId,
+      this.expiryTimeInSeconds.toString(),
+      (now - this.expiryTimeInSeconds * 1000).toString(),
+      limit.toString()
+    );
+
+    return result === 1;
+  }
+
+  async decrement(environmentId: string, requestId: string): Promise<void> {
+    const key = this.#getKey(environmentId);
+    await this.redis.zrem(key, requestId);
+  }
+
+  #getKey(environmentId: string): string {
+    return `${this.options.keyPrefix}:${environmentId}`;
+  }
+
+  #registerCommands() {
+    this.redis.defineCommand("incrementAndCheckRealtimeNotifierConcurrency", {
+      numberOfKeys: 1,
+      lua: /* lua */ `
+        local concurrencyKey = KEYS[1]
+
+        local timestamp = tonumber(ARGV[1])
+        local requestId = ARGV[2]
+        local expiryTime = tonumber(ARGV[3])
+        local cutoffTime = tonumber(ARGV[4])
+        local limit = tonumber(ARGV[5])
+
+        -- Remove expired entries
+        redis.call('ZREMRANGEBYSCORE', concurrencyKey, '-inf', cutoffTime)
+
+        -- Add the new request to the sorted set
+        redis.call('ZADD', concurrencyKey, timestamp, requestId)
+
+        -- Set the expiry time on the key
+        redis.call('EXPIRE', concurrencyKey, expiryTime)
+
+        -- Get the total number of concurrent requests
+        local totalRequests = redis.call('ZCARD', concurrencyKey)
+
+        -- Check if the limit has been exceeded
+        if totalRequests > limit then
+            redis.call('ZREM', concurrencyKey, requestId)
+            return 0
+        end
+
+        return 1
+      `,
+    });
+
+    this.redis.on("error", (error) => {
+      logger.error("[realtimeConcurrencyLimiter] redis error", { error });
+    });
+  }
+}
+
+declare module "ioredis" {
+  interface RedisCommander<Context> {
+    incrementAndCheckRealtimeNotifierConcurrency(
+      key: string,
+      timestamp: string,
+      requestId: string,
+      expiryTime: string,
+      cutoffTime: string,
+      limit: string,
+      callback?: Callback<number>
+    ): Result<number, Context>;
+  }
+}
diff --git a/apps/webapp/app/services/realtime/resolveRealtimeStreamClient.server.ts b/apps/webapp/app/services/realtime/resolveRealtimeStreamClient.server.ts
new file mode 100644
index 00000000000..220f79f9308
--- /dev/null
+++ b/apps/webapp/app/services/realtime/resolveRealtimeStreamClient.server.ts
@@ -0,0 +1,86 @@
+import { $replica } from "~/db.server";
+import { env } from "~/env.server";
+import { FEATURE_FLAG } from "~/v3/featureFlags";
+import { makeFlag } from "~/v3/featureFlags.server";
+import { logger } from "../logger.server";
+import { type RealtimeEnvironment } from "../realtimeClient.server";
+import { realtimeClient } from "../realtimeClientGlobal.server";
+import { BoundedTtlCache } from "./boundedTtlCache";
+import { type RealtimeStreamClient } from "./notifierRealtimeClient.server";
+import { getNotifierRealtimeClient } from "./notifierRealtimeClientInstance.server";
+import { getShadowRealtimeClient } from "./shadowRealtimeClientInstance.server";
+
+type RealtimeBackend = "electric" | "notifier" | "shadow";
+
+/**
+ * Chooses which backend serves a realtime run request.
+ *
+ * Two gates, both defaulting to the Electric path:
+ *  1. `REALTIME_NOTIFIER_ENABLED` (env master switch). When off, this returns the
+ *     Electric client immediately — no flag read, no notifier client construction,
+ *     byte-identical to pre-Electric-Sunset behavior.
+ *  2. the `realtimeBackend` feature flag (global + per-org, org wins), resolved per
+ *     org and cached in-process for 30s so the long-poll feed doesn't hit the DB
+ *     on every request.
+ */
+const notifierEnabled = env.REALTIME_NOTIFIER_ENABLED === "1";
+const BACKEND_CACHE_TTL_MS = 30_000;
+// Org count is bounded, but cap to avoid unbounded growth.
+const BACKEND_CACHE_MAX_ENTRIES = 50_000;
+
+const flag = makeFlag($replica);
+const backendCache = new BoundedTtlCache<RealtimeBackend>(
+  BACKEND_CACHE_TTL_MS,
+  BACKEND_CACHE_MAX_ENTRIES
+);
+
+export async function resolveRealtimeStreamClient(
+  environment: RealtimeEnvironment
+): Promise<RealtimeStreamClient> {
+  if (!notifierEnabled) {
+    return realtimeClient;
+  }
+
+  switch (await getRealtimeBackend(environment.organizationId)) {
+    case "notifier":
+      return getNotifierRealtimeClient();
+    case "shadow":
+      // Client is still served Electric; the notifier path is diffed in the background.
+      return getShadowRealtimeClient();
+    case "electric":
+    default:
+      return realtimeClient;
+  }
+}
+
+async function getRealtimeBackend(organizationId: string): Promise<RealtimeBackend> {
+  const cached = backendCache.get(organizationId);
+  if (cached !== undefined) {
+    return cached;
+  }
+
+  let backend: RealtimeBackend = "electric";
+
+  try {
+    const org = await $replica.organization.findFirst({
+      where: { id: organizationId },
+      select: { featureFlags: true },
+    });
+
+    backend = await flag({
+      key: FEATURE_FLAG.realtimeBackend,
+      defaultValue: "electric",
+      overrides: (org?.featureFlags as Record<string, unknown>) ?? {},
+    });
+  } catch (error) {
+    // Never let a flag lookup failure break the realtime feed — fall back to Electric.
+    logger.error("[resolveRealtimeStreamClient] failed to resolve realtimeBackend flag", {
+      organizationId,
+      error,
+    });
+    backend = "electric";
+  }
+
+  backendCache.set(organizationId, backend);
+  return backend;
+}
diff --git a/apps/webapp/app/services/realtime/runChangeNotifier.server.ts b/apps/webapp/app/services/realtime/runChangeNotifier.server.ts
new file mode 100644
index 00000000000..f975af05723
--- /dev/null
+++ b/apps/webapp/app/services/realtime/runChangeNotifier.server.ts
@@ -0,0 +1,370 @@
+import { createRedisClient, RedisClient, RedisWithClusterOptions } from "~/redis.server";
+import { logger } from "../logger.server";
+
+export const CHANGE_RECORD_VERSION = 1;
+
+/**
+ * A run-change fact, published once to the run's environment channel. Self-describing:
+ *  - `envId` routes it to its channel (mandatory).
+ *  - `tags` / `batchId` let a tag/batch feed decide membership LOCALLY, without a
+ *    ClickHouse re-resolve. `tags` present (even `[]`) marks a "full" record; `tags`
+ *    absent marks a "partial" record (envId+runId only) that a tag feed must hydrate to
+ *    classify. `batchId` present only when the run is in a batch.
+ *  - `runId` lets a single-run feed match; `createdAtMs` lets a tag feed apply its
+ *    createdAt floor locally; `updatedAtMs`/`status` are hints.
+ * Row state (payload/output/...) is never on the wire — it's refetched from Postgres.
+ */
+export type ChangeRecord = {
+  v: number;
+  runId: string;
+  envId: string;
+  tags?: string[];
+  batchId?: string | null;
+  createdAtMs?: number;
+  updatedAtMs?: number;
+  status?: string;
+};
+
+/** What a publish site provides; the notifier stamps the version. */
+export type ChangeRecordInput = Omit<ChangeRecord, "v">;
+
+export function encodeChangeRecord(record: ChangeRecord): string {
+  return JSON.stringify(record);
+}
+
+/** Decode a wire message into a ChangeRecord. Tolerant of a bare runId (no membership
+ * data) so a malformed/legacy frame degrades to a partial record (hydrate-to-classify)
+ * rather than throwing. */
+export function decodeChangeRecord(message: string): ChangeRecord {
+  if (message.length === 0 || message[0] !== "{") {
+    return { v: 0, runId: message, envId: "" };
+  }
+  try {
+    const parsed = JSON.parse(message) as Partial<ChangeRecord>;
+    if (parsed && typeof parsed.runId === "string") {
+      return {
+        v: parsed.v ?? 0,
+        runId: parsed.runId,
+        envId: parsed.envId ?? "",
+        tags: parsed.tags,
+        batchId: parsed.batchId,
+        createdAtMs: parsed.createdAtMs,
+        updatedAtMs: parsed.updatedAtMs,
+        status: parsed.status,
+      };
+    }
+  } catch {
+    // fall through to the bare-runId fallback
+  }
+  return { v: 0, runId: message, envId: "" };
+}
+
+export type RunChangeNotifierOptions = {
+  redis: RedisWithClusterOptions;
+  /** Channel name prefix; the envId is appended inside a hash-tag for slot locality. */
+  channelPrefix?: string;
+  connectionName?: string;
+  /**
+   * Leading-edge throttle (ms) for the per-env channel: deliver the first wake
+   * immediately, then at most one more per window while changes keep arriving. Bounds the
+   * wake rate per env regardless of run throughput. Defaults to 100ms. 0 disables it.
+   */
+  envWakeCoalesceWindowMs?: number;
+  /**
+   * Use Redis sharded pub/sub (SSUBSCRIBE/SPUBLISH) instead of classic pub/sub. Only
+   * valid against a Redis Cluster (channels are hash-tagged by envId, so each lands on one
+   * shard) and requires the client built with `clusterOptions.shardedSubscribers: true`.
+   * Classic PUBLISH in a cluster broadcasts to every node, so sharded pub/sub is what
+   * actually distributes the load. Defaults to false (classic, for single-node / local).
+   */
+  shardedPubSub?: boolean;
+};
+
+const DEFAULT_CHANNEL_PREFIX = "realtime:";
+const DEFAULT_ENV_WAKE_COALESCE_WINDOW_MS = 100;
+
+/**
+ * RunChangeNotifier — carries "run X changed" facts from write sites to the realtime
+ * feed over ONE per-environment channel.
+ *
+ * Design constraints baked in here:
+ *  - ONE channel type, `<prefix>env:{<envId>}`. A change is one fact published once; who
+ *    cares about it is a predicate evaluated by the consumer (the EnvChangeRouter), not a
+ *    second channel. Single-run, tag, and batch feeds all read this one stream.
+ *  - Minimal wire data (a self-describing `ChangeRecord` of small keys), never row
+ *    columns. Row state is always refetched from Postgres.
+ *  - ONE shared, multiplexed subscriber connection per process with a refcounted
+ *    `Map<channel, Set<listener>>`. The RunQueue pattern, deliberately NOT the
+ *    per-subscribe-connection pattern of ZodPubSub/tracePubSub (which would exhaust
+ *    ElastiCache `maxclients`).
+ *  - Connections are created lazily: a process that never publishes or subscribes (the
+ *    default, flag-off state) opens no Redis connections at all.
+ *  - `publish` is fire-and-forget and never throws; a dropped publish only costs latency
+ *    because the consumer has a timeout backstop.
+ *
+ * Channels are hash-tagged (`<prefix>env:{<envId>}`) so an env's traffic lands on one
+ * cluster slot. With `shardedPubSub` (cluster only) the feed uses SSUBSCRIBE/SPUBLISH so
+ * each env's traffic stays on one shard rather than broadcasting cluster-wide.
+ */
+export class RunChangeNotifier {
+  #publisher: RedisClient | undefined;
+  #subscriber: RedisClient | undefined;
+  readonly #listeners = new Map<string, Set<(records: ChangeRecord[]) => void>>();
+  /**
+   * Per-channel accumulator of records since the last delivery, deduped by runId. A
+   * coalesced env window collapses many publishes into one wake; this holds the batch so
+   * the wake carries every run that moved, not just the last one (latest record per run
+   * wins, keeping the freshest keys).
+   */
+  readonly #pending = new Map<string, Map<string, ChangeRecord>>();
+  readonly #channelPrefix: string;
+  readonly #connectionName: string;
+  readonly #coalesceWindowMs: number;
+  /** When true, use sharded pub/sub (SSUBSCRIBE/SPUBLISH/smessage) — see options. */
+  readonly #sharded: boolean;
+  /** Active coalescing windows per channel. */
+  readonly #coalesceTimers = new Map<string, ReturnType<typeof setTimeout>>();
+  /** Channels that received a message while their window was open (need a trailing wake). */
+  readonly #coalesceDirty = new Set<string>();
+
+  constructor(private readonly options: RunChangeNotifierOptions) {
+    this.#channelPrefix = options.channelPrefix ?? DEFAULT_CHANNEL_PREFIX;
+    this.#connectionName = options.connectionName ?? "trigger:realtime:run-change-notifier";
+    this.#coalesceWindowMs = options.envWakeCoalesceWindowMs ?? DEFAULT_ENV_WAKE_COALESCE_WINDOW_MS;
+    this.#sharded = options.shardedPubSub ?? false;
+  }
+
+  /**
+   * Fire-and-forget publish of a run-changed fact to the run's environment channel. Never
+   * throws. The notifier stamps the record version.
+   */
+  publish(input: ChangeRecordInput): void {
+    const record: ChangeRecord = { v: CHANGE_RECORD_VERSION, ...input };
+    this.#publishToChannel(this.#channelForEnv(record.envId), encodeChangeRecord(record));
+  }
+
+  /** Fire-and-forget publish of many run-changed facts. Never throws. */
+  publishMany(inputs: ChangeRecordInput[]): void {
+    for (const input of inputs) {
+      this.publish(input);
+    }
+  }
+
+  #publishToChannel(channel: string, payload: string): void {
+    try {
+      const publisher = this.#ensurePublisher();
+      // Sharded pub/sub (SPUBLISH) routes to the channel's slot owner; classic PUBLISH
+      // broadcasts cluster-wide. The channel is hash-tagged by envId.
+      const result = this.#sharded
+        ? publisher.spublish(channel, payload)
+        : publisher.publish(channel, payload);
+      if (typeof (result as Promise<number>)?.catch === "function") {
+        (result as Promise<number>).catch((error) => {
+          logger.error("[runChangeNotifier] Failed to publish run-changed notification", {
+            error,
+            channel,
+          });
+        });
+      }
+    } catch (error) {
+      logger.error("[runChangeNotifier] Failed to publish run-changed notification", {
+        error,
+        channel,
+      });
+    }
+  }
+
+  /**
+   * Subscribe (persistently) to an environment's run-change stream. `onBatch` is invoked
+   * with the coalesced batch of records on every wake until the returned unsubscribe is
+   * called. Refcounted over the shared subscriber: the first listener for an env issues
+   * SUBSCRIBE, the last one UNSUBSCRIBE.
+   */
+  subscribeToEnv(environmentId: string, onBatch: (records: ChangeRecord[]) => void): () => void {
+    const channel = this.#channelForEnv(environmentId);
+    const subscriber = this.#ensureSubscriber();
+
+    let listeners = this.#listeners.get(channel);
+    if (!listeners) {
+      listeners = new Set();
+      this.#listeners.set(channel, listeners);
+      this.#subscribeChannel(subscriber, channel).catch((error) => {
+        logger.error("[runChangeNotifier] Failed to subscribe to run-change channel", {
+          error,
+          channel,
+        });
+      });
+    }
+    listeners.add(onBatch);
+
+    let unsubscribed = false;
+    return () => {
+      if (unsubscribed) {
+        return;
+      }
+      unsubscribed = true;
+
+      const current = this.#listeners.get(channel);
+      if (!current) {
+        return;
+      }
+      current.delete(onBatch);
+      if (current.size === 0) {
+        // Drop the channel from the map only AFTER Redis confirms UNSUBSCRIBE, and only if
+        // no new listener re-subscribed while it was in flight. The map entry's existence
+        // mirrors "subscribed (or subscribe in flight) in Redis", so the subscribe path
+        // safely reuses it without a duplicate SUBSCRIBE.
+        this.#unsubscribeChannel(subscriber, channel)
+          .then(() => {
+            const latest = this.#listeners.get(channel);
+            if (!latest) {
+              return;
+            }
+            if (latest.size === 0) {
+              this.#listeners.delete(channel);
+            } else {
+              // A listener arrived during the in-flight UNSUBSCRIBE; the channel is now
+              // unsubscribed in Redis but has live listeners. Re-subscribe so they keep
+              // receiving messages (the long-poll backstop covers the gap).
+              this.#subscribeChannel(subscriber, channel).catch((error) => {
+                logger.error("[runChangeNotifier] Failed to re-subscribe to run-change channel", {
+                  error,
+                  channel,
+                });
+              });
+            }
+          })
+          .catch((error) => {
+            // UNSUBSCRIBE failed: the channel is likely still subscribed in Redis. Keep the
+            // (empty) map entry so a future subscriber reuses it without a duplicate
+            // SUBSCRIBE and #onMessage stays consistent with Redis state.
+            logger.error("[runChangeNotifier] Failed to unsubscribe from run-change channel", {
+              error,
+              channel,
+            });
+          });
+      }
+    };
+  }
+
+  /** Number of distinct env channels currently subscribed (for metrics). */
+  get activeSubscriptionCount(): number {
+    return this.#listeners.size;
+  }
+
+  async quit(): Promise<void> {
+    for (const timer of this.#coalesceTimers.values()) {
+      clearTimeout(timer);
+    }
+    this.#coalesceTimers.clear();
+    this.#coalesceDirty.clear();
+    this.#pending.clear();
+    await Promise.allSettled([this.#subscriber?.quit(), this.#publisher?.quit()]);
+    this.#subscriber = undefined;
+    this.#publisher = undefined;
+    this.#listeners.clear();
+  }
+
+  #ensurePublisher(): RedisClient {
+    if (!this.#publisher) {
+      this.#publisher = createRedisClient(`${this.#connectionName}:pub`, this.options.redis);
+    }
+    return this.#publisher;
+  }
+
+  #ensureSubscriber(): RedisClient {
+    if (!this.#subscriber) {
+      const subscriber = createRedisClient(`${this.#connectionName}:sub`, this.options.redis);
+      const onMessage = (channel: string, message: string) => this.#onMessage(channel, message);
+      // Classic pub/sub delivers "message"; sharded pub/sub delivers "smessage". Register
+      // both so the delivery path is identical regardless of mode.
+      subscriber.on("message", onMessage);
+      subscriber.on("smessage", onMessage);
+      this.#subscriber = subscriber;
+    }
+    return this.#subscriber;
+  }
+
+  /** SUBSCRIBE (classic) vs SSUBSCRIBE (sharded, cluster-only). */
+  #subscribeChannel(subscriber: RedisClient, channel: string): Promise<unknown> {
+    return this.#sharded ? subscriber.ssubscribe(channel) : subscriber.subscribe(channel);
+  }
+
+  /** UNSUBSCRIBE (classic) vs SUNSUBSCRIBE (sharded, cluster-only). */
+  #unsubscribeChannel(subscriber: RedisClient, channel: string): Promise<unknown> {
+    return this.#sharded ? subscriber.sunsubscribe(channel) : subscriber.unsubscribe(channel);
+  }
+
+  #onMessage(channel: string, message: string) {
+    // Accumulate the decoded record (deduped by runId) before delivering, so a coalesced
+    // wake carries every run that moved during the window.
+    this.#addPending(channel, decodeChangeRecord(message));
+
+    if (this.#coalesceWindowMs > 0) {
+      this.#deliverCoalesced(channel);
+      return;
+    }
+    this.#deliver(channel);
+  }
+
+  /** Accumulate a record into the channel's pending batch, deduped by runId (a later
+   * record for the same run replaces the earlier one, keeping the freshest keys). */
+  #addPending(channel: string, record: ChangeRecord) {
+    let batch = this.#pending.get(channel);
+    if (!batch) {
+      batch = new Map();
+      this.#pending.set(channel, batch);
+    }
+    batch.set(record.runId, record);
+  }
+
+  #deliver(channel: string) {
+    // Drain the accumulated batch (and clear it) so listeners woken now get every run that
+    // changed since the last delivery, and a later message starts a fresh batch.
+    const batchMap = this.#pending.get(channel);
+    const batch = batchMap ? [...batchMap.values()] : [];
+    this.#pending.delete(channel);
+
+    const listeners = this.#listeners.get(channel);
+    if (!listeners || batch.length === 0) {
+      return;
+    }
+    for (const onBatch of [...listeners]) {
+      onBatch(batch);
+    }
+  }
+
+  /**
+   * Leading-edge throttle: deliver the first wake immediately, then suppress further wakes
+   * for the window, delivering one trailing wake if any messages arrived during it (and
+   * re-opening while activity continues). Caps the wake rate per env to ~1/window no
+   * matter how fast runs change. Lossless: the batch accumulates across the window.
+   */
+  #deliverCoalesced(channel: string) {
+    if (this.#coalesceTimers.has(channel)) {
+      this.#coalesceDirty.add(channel);
+      return;
+    }
+    this.#deliver(channel);
+    this.#openCoalesceWindow(channel);
+  }
+
+  #openCoalesceWindow(channel: string) {
+    const timer = setTimeout(() => {
+      this.#coalesceTimers.delete(channel);
+      if (this.#coalesceDirty.delete(channel)) {
+        this.#deliver(channel);
+        this.#openCoalesceWindow(channel);
+      }
+    }, this.#coalesceWindowMs);
+    // Don't let a pending coalescing window hold the process open at shutdown.
+    timer.unref?.();
+    this.#coalesceTimers.set(channel, timer);
+  }
+
+  // Hash-tagged (`...{<envId>}`) so all of an env's traffic maps to one cluster slot (one
+  // shard) under sharded pub/sub.
+  #channelForEnv(environmentId: string): string {
+    return `${this.#channelPrefix}env:{${environmentId}}`;
+  }
+}
diff --git a/apps/webapp/app/services/realtime/runChangeNotifierHandlers.server.ts b/apps/webapp/app/services/realtime/runChangeNotifierHandlers.server.ts
new file mode 100644
index 00000000000..fa5f5681f90
--- /dev/null
+++ b/apps/webapp/app/services/realtime/runChangeNotifierHandlers.server.ts
@@ -0,0 +1,101 @@
+import { env } from "~/env.server";
+import { engine } from "~/v3/runEngine.server";
+import { logger } from "../logger.server";
+import { publishChangeRecord } from "./runChangeNotifierInstance.server";
+
+/**
+ * ChangeRecordBuilder — builds and publishes a self-describing `ChangeRecord` to the run's
+ * environment channel for the lifecycle events whose engine-bus payload already carries
+ * env + tags + batchId. One publish per change; `envId` is always present.
+ *
+ * The terminal transitions (runSucceeded/runFailed/runExpired/runCancelled),
+ * runAttemptFailed, and runMetadataUpdated publish from `runEngineHandlers.server.ts`
+ * instead — those events don't carry env/tags/batchId on the bus, but that file already
+ * re-reads the run (or resolves the env) for each, so the publish piggybacks on the
+ * existing read rather than widening the event bus. So fully disabling publishing is the
+ * env master switch (`REALTIME_NOTIFIER_ENABLED`), not just deleting this file.
+ *
+ * Coverage is intentionally not exhaustive: a dropped or uncovered transition only adds
+ * latency because the consumer has a periodic backstop full-resolve.
+ */
+export function registerRunChangeNotifierHandlers() {
+  // Return a truthy value in every path so the singleton() wrapper (which uses ??=) caches
+  // the result and never re-runs this factory — re-running would attach duplicate
+  // engine-bus listeners on each Remix dev-mode reload.
+  if (env.REALTIME_NOTIFIER_ENABLED !== "1") {
+    return true;
+  }
+
+  // Run created (trigger). The first signal a tag/batch feed gets for a brand-new run: a
+  // freshly-created run is born QUEUED with no status transition, so without this it only
+  // surfaces on the consumer's periodic backstop resolve (and not at all before ClickHouse
+  // ingests it). Routing the create record hydrates the new run by id straight from Postgres.
+  engine.eventBus.on("runCreated", ({ run, environment }) => {
+    publishChangeRecord({
+      runId: run.id,
+      envId: environment.id,
+      tags: run.runTags,
+      batchId: run.batchId,
+    });
+  });
+
+  // Status transitions (checkpoint suspend/resume, pending version, dequeue).
+  engine.eventBus.on("runStatusChanged", ({ run, environment }) => {
+    publishChangeRecord({
+      runId: run.id,
+      envId: environment.id,
+      tags: run.runTags,
+      batchId: run.batchId,
+    });
+  });
+
+  // Dequeue/lock (sets startedAt) and attempt start (DEQUEUED -> EXECUTING) — the
+  // most-watched "my run started" transitions.
+  engine.eventBus.on("runLocked", ({ run, environment }) => {
+    publishChangeRecord({
+      runId: run.id,
+      envId: environment.id,
+      tags: run.runTags,
+      batchId: run.batchId,
+    });
+  });
+  engine.eventBus.on("runAttemptStarted", ({ run, environment }) => {
+    publishChangeRecord({
+      runId: run.id,
+      envId: environment.id,
+      tags: run.runTags,
+      batchId: run.batchId,
+    });
+  });
+
+  engine.eventBus.on("runRetryScheduled", ({ run, environment }) => {
+    publishChangeRecord({
+      runId: run.id,
+      envId: environment.id,
+      tags: run.runTags,
+      batchId: run.batchId,
+    });
+  });
+
+  // Delay lifecycle (delayUntil / queued-after-delay changes).
+  engine.eventBus.on("runDelayRescheduled", ({ run, environment }) => {
+    publishChangeRecord({
+      runId: run.id,
+      envId: environment.id,
+      tags: run.runTags,
+      batchId: run.batchId,
+    });
+  });
+  engine.eventBus.on("runEnqueuedAfterDelay", ({ run, environment }) => {
+    publishChangeRecord({
+      runId: run.id,
+      envId: environment.id,
+      tags: run.runTags,
+      batchId: run.batchId,
+    });
+  });
+
+  logger.info("[runChangeNotifier] realtime change-record builder registered");
+
+  return true;
+}
diff --git a/apps/webapp/app/services/realtime/runChangeNotifierInstance.server.ts b/apps/webapp/app/services/realtime/runChangeNotifierInstance.server.ts
new file mode 100644
index 00000000000..ed1d1ce12b2
--- /dev/null
+++ b/apps/webapp/app/services/realtime/runChangeNotifierInstance.server.ts
@@ -0,0 +1,74 @@
+import { Gauge } from "prom-client";
+import { env } from "~/env.server";
+import { metricsRegister } from "~/metrics.server";
+import { singleton } from "~/utils/singleton";
+import { RunChangeNotifier, type ChangeRecordInput } from "./runChangeNotifier.server";
+
+/**
+ * Process-singleton wiring for the RunChangeNotifier plus the thin, gated
+ * convenience functions that write sites and the realtime route delegate to.
+ *
+ * The notifier is constructed lazily (only on the first publish/subscribe when
+ * enabled), so a webapp running with `REALTIME_NOTIFIER_ENABLED=0` (the default)
+ * opens no Redis connections and registers no metrics for this subsystem.
+ */
+const notifierEnabled = env.REALTIME_NOTIFIER_ENABLED === "1";
+
+function initializeRunChangeNotifier(): RunChangeNotifier {
+  const clusterMode = env.REALTIME_RUNS_PUBSUB_REDIS_CLUSTER_MODE_ENABLED === "1";
+  // Sharded pub/sub only works against a cluster; classic pub/sub there would
+  // broadcast every message to every node, so this is what actually shards load.
+  const shardedPubSub = clusterMode && env.REALTIME_RUNS_PUBSUB_REDIS_SHARDED_ENABLED === "1";
+
+  const notifier = new RunChangeNotifier({
+    redis: {
+      host: env.REALTIME_RUNS_PUBSUB_REDIS_HOST,
+      port: env.REALTIME_RUNS_PUBSUB_REDIS_PORT,
+      username: env.REALTIME_RUNS_PUBSUB_REDIS_USERNAME,
+      password: env.REALTIME_RUNS_PUBSUB_REDIS_PASSWORD,
+      tlsDisabled: env.REALTIME_RUNS_PUBSUB_REDIS_TLS_DISABLED === "true",
+      clusterMode,
+      // One subscriber connection per shard so SSUBSCRIBE routes to the slot owner.
+      ...(shardedPubSub ? { clusterOptions: { shardedSubscribers: true } } : {}),
+    },
+    envWakeCoalesceWindowMs: env.REALTIME_NOTIFIER_ENV_WAKE_COALESCE_WINDOW_MS,
+    shardedPubSub,
+  });
+
+  new Gauge({
+    name: "realtime_run_change_notifier_active_subscriptions",
+    help: "Distinct runs currently subscribed for realtime change notifications",
+    collect() {
+      this.set(notifier.activeSubscriptionCount);
+    },
+    registers: [metricsRegister],
+  });
+
+  return notifier;
+}
+
+/** Lazily construct (and memoize) the notifier singleton. */
+export function getRunChangeNotifier(): RunChangeNotifier {
+  return singleton("runChangeNotifier", initializeRunChangeNotifier);
+}
+
+/** Whether the notifier subsystem is enabled for this process. */
+export function isRunChangeNotifierEnabled(): boolean {
+  return notifierEnabled;
+}
+
+/** Fire-and-forget publish of a run-changed record. No-op (and no notifier construction)
+ * when disabled, so publish sites can call it unconditionally. */
+export function publishChangeRecord(input: ChangeRecordInput): void {
+  if (!notifierEnabled) {
+    return;
+  }
+  getRunChangeNotifier().publish(input);
+}
+
+export function publishManyChangeRecords(inputs: ChangeRecordInput[]): void {
+  if (!notifierEnabled) {
+    return;
+  }
+  getRunChangeNotifier().publishMany(inputs);
+}
diff --git a/apps/webapp/app/services/realtime/runReader.server.ts b/apps/webapp/app/services/realtime/runReader.server.ts
new file mode 100644
index 00000000000..4135e94366b
--- /dev/null
+++ b/apps/webapp/app/services/realtime/runReader.server.ts
@@ -0,0 +1,191 @@
+import { type Prisma, type PrismaClient } from "@trigger.dev/database";
+import { BoundedTtlCache } from "./boundedTtlCache";
+import { RESERVED_COLUMNS, type RealtimeRunRow } from "./electricStreamProtocol.server";
+
+/**
+ * RunReader — the pluggable read half of the notifier-backed realtime feed.
+ *
+ * The mandate: ClickHouse is filter-only and resolves IDs,
+ * Postgres always hydrates row columns. This file owns the Postgres hydration
+ * half (`RunHydrator`, by-id) and the `RunListResolver` interface (the tag/list
+ * filter -> id-set seam, implemented over ClickHouse).
+ *
+ * Splitting hydration behind this small surface keeps the realtime feed
+ * decoupled from where runs physically live, ready for a future `TaskRunFast`
+ * table or a non-Postgres row store.
+ */
+
+/** The TaskRun columns the realtime feed projects (mirrors DEFAULT_ELECTRIC_COLUMNS). */
+export const RUN_HYDRATOR_SELECT = {
+  id: true,
+  taskIdentifier: true,
+  createdAt: true,
+  updatedAt: true,
+  startedAt: true,
+  delayUntil: true,
+  queuedAt: true,
+  expiredAt: true,
+  completedAt: true,
+  friendlyId: true,
+  number: true,
+  isTest: true,
+  status: true,
+  usageDurationMs: true,
+  costInCents: true,
+  baseCostInCents: true,
+  ttl: true,
+  payload: true,
+  payloadType: true,
+  metadata: true,
+  metadataType: true,
+  output: true,
+  outputType: true,
+  runTags: true,
+  error: true,
+  realtimeStreams: true,
+} satisfies Prisma.TaskRunSelect;
+
+/**
+ * Columns the feed needs internally regardless of the client's `skipColumns`:
+ * `id` keys the row, `updatedAt` drives the offset and the live working-set diff.
+ * Everything else can be projected away when the client skips it (see
+ * `buildHydratorSelect`), so the replica doesn't ship large `payload`/`output`/
+ * `metadata`/`error` columns the response will drop anyway.
+ */
+const ALWAYS_HYDRATED_COLUMNS = new Set<string>(["id", "updatedAt", ...RESERVED_COLUMNS]);
+
+/** Project `RUN_HYDRATOR_SELECT` down to the columns the client didn't skip (plus
+ * the always-needed ones). An empty skip set returns the full select unchanged. */
+export function buildHydratorSelect(skipColumns: string[] = []): Prisma.TaskRunSelect {
+  if (skipColumns.length === 0) {
+    return RUN_HYDRATOR_SELECT;
+  }
+  const skip = new Set(skipColumns);
+  const select: Record<string, boolean> = {};
+  for (const column of Object.keys(RUN_HYDRATOR_SELECT)) {
+    if (ALWAYS_HYDRATED_COLUMNS.has(column) || !skip.has(column)) {
+      select[column] = true;
+    }
+  }
+  return select as Prisma.TaskRunSelect;
+}
+
+export type RunListFilter = {
+  organizationId: string;
+  projectId: string;
+  environmentId: string;
+  /** Contains-ANY tag match (OR). Omit/empty for non-tag feeds. */
+  tags?: string[];
+  /** Restrict to a single batch (internal batch id) — the batch feed. */
+  batchId?: string;
+  /** Lower bound on createdAt (the tag-list feed pins this; batch omits it). */
+  createdAtAfter?: Date;
+  /** Hard cap on the result set so a broad filter can't unbound the snapshot. */
+  limit: number;
+};
+
+/**
+ * Resolves a tag/list filter into the matching run id-set, filter-only (no row
+ * columns; rows are hydrated from Postgres by id afterward). Pluggable so the
+ * resolution source can change without touching the feed. The ClickHouse
+ * implementation lives in `clickHouseRunListResolver.server.ts`.
+ */
+export interface RunListResolver {
+  resolveMatchingRunIds(filter: RunListFilter): Promise<string[]>;
+}
+
+export type RunHydratorOptions = {
+  /** A read-replica Prisma client (`$replica`). Always Postgres. */
+  replica: Pick<PrismaClient, "taskRun">;
+  /**
+   * Read-through cache TTL (ms) to collapse duplicate refetches across a burst
+   * of live polls for the same run. Fan-in is low in practice, so this is
+   * insurance, not load-bearing. Set to 0 to disable. Defaults to 250ms.
+   */
+  cacheTtlMs?: number;
+  /** Hard cap on cache entries before expired entries are swept. */
+  maxCacheEntries?: number;
+};
+
+const DEFAULT_CACHE_TTL_MS = 250;
+const DEFAULT_MAX_CACHE_ENTRIES = 5_000;
+
+/**
+ * Hydrates a single run by id from the read replica, projected to the realtime
+ * columns. Concurrent refetches for the same (env, run) are single-flighted, and
+ * a short TTL cache collapses rapid repeats.
+ */
+export class RunHydrator {
+  readonly #inflight = new Map<string, Promise<RealtimeRunRow | null>>();
+  readonly #cache: BoundedTtlCache<RealtimeRunRow | null>;
+  readonly #cacheTtlMs: number;
+
+  constructor(private readonly options: RunHydratorOptions) {
+    this.#cacheTtlMs = options.cacheTtlMs ?? DEFAULT_CACHE_TTL_MS;
+    this.#cache = new BoundedTtlCache(
+      this.#cacheTtlMs,
+      options.maxCacheEntries ?? DEFAULT_MAX_CACHE_ENTRIES
+    );
+  }
+
+  async getRunById(environmentId: string, runId: string): Promise<RealtimeRunRow | null> {
+    const key = `${environmentId}:${runId}`;
+
+    if (this.#cacheTtlMs > 0) {
+      // A cached null is a valid "run not found" hit; only undefined is a miss.
+      const cached = this.#cache.get(key);
+      if (cached !== undefined) {
+        return cached;
+      }
+    }
+
+    const existing = this.#inflight.get(key);
+    if (existing) {
+      return existing;
+    }
+
+    const promise = this.#fetch(environmentId, runId).finally(() => this.#inflight.delete(key));
+    this.#inflight.set(key, promise);
+
+    const row = await promise;
+
+    if (this.#cacheTtlMs > 0) {
+      this.#cache.set(key, row);
+    }
+
+    return row;
+  }
+
+  /** Hydrate many runs by id in one query (tag/list feed). Order is not guaranteed.
+   * `skipColumns` projects the SELECT so the replica doesn't ship columns the client
+   * dropped (notably the large `payload`/`output`/`metadata`/`error` columns). */
+  async hydrateByIds(
+    environmentId: string,
+    ids: string[],
+    skipColumns: string[] = []
+  ): Promise<RealtimeRunRow[]> {
+    if (ids.length === 0) {
+      return [];
+    }
+    const rows = await this.options.replica.taskRun.findMany({
+      where: {
+        runtimeEnvironmentId: environmentId,
+        id: { in: ids },
+      },
+      select: buildHydratorSelect(skipColumns),
+    });
+    return rows as unknown as RealtimeRunRow[];
+  }
+
+  async #fetch(environmentId: string, runId: string): Promise<RealtimeRunRow | null> {
+    const run = await this.options.replica.taskRun.findFirst({
+      where: {
+        id: runId,
+        runtimeEnvironmentId: environmentId,
+      },
+      select: RUN_HYDRATOR_SELECT,
+    });
+
+    return (run ?? null) as RealtimeRunRow | null;
+  }
+}
diff --git a/apps/webapp/app/services/realtime/shadowCompare.server.ts b/apps/webapp/app/services/realtime/shadowCompare.server.ts
new file mode 100644
index 00000000000..b24540bfca3
--- /dev/null
+++ b/apps/webapp/app/services/realtime/shadowCompare.server.ts
@@ -0,0 +1,297 @@
+import {
+  type ElectricColumnType,
+  RUN_ELECTRIC_COLUMNS,
+  serializeRunRow,
+} from "./electricStreamProtocol.server";
+import { type RunHydrator, type RunListFilter, type RunListResolver } from "./runReader.server";
+
+/**
+ * Dual-run shadow-compare.
+ *
+ * The client is always served the Electric response; in the background this
+ * re-derives what the notifier path WOULD emit and diffs the two, so we can prove
+ * parity on real production traffic before any cutover.
+ *
+ * Two kinds of divergence are checked:
+ *  - serialization: for each run Electric emitted, re-hydrate it and serialize via
+ *    the notifier serializer, then compare SEMANTICALLY (decode both sides per
+ *    column type) so equivalent-but-differently-encoded wire values (timestamp
+ *    format, bool t/true, number formatting) are not false positives. The compare
+ *    is gated on same-version (matching updatedAt) so a row that changed between
+ *    Electric's emit and our refetch is recorded as "skew", not a divergence.
+ *  - membership (tag/batch initial snapshot only): the set of run ids Electric
+ *    emitted vs the set the notifier resolver returns. This is where the known
+ *    tag OR-vs-AND difference shows up.
+ *
+ * Pure except for the injected RunHydrator/RunListResolver, so it's unit-testable.
+ */
+
+export type ShadowFeed = "run" | "runs" | "batch";
+
+type WireValue = Record<string, string | null>;
+
+type ShapeMessage = {
+  key?: string;
+  value?: WireValue;
+  headers: { operation?: string; control?: string };
+};
+
+const COLUMN_BY_NAME = new Map(RUN_ELECTRIC_COLUMNS.map((column) => [column.name, column]));
+
+export type ColumnDiff = {
+  runId: string;
+  column: string;
+  electric: string | null;
+  notifier: string | null;
+};
+
+export type ShadowCompareOutcome = {
+  feed: ShadowFeed;
+  /** Runs whose every emitted column matched (same-version). */
+  serializationMatched: number;
+  /** Runs with at least one semantic column divergence (same-version). */
+  serializationDiverged: number;
+  /** Runs that changed between Electric's emit and our refetch (not a divergence). */
+  serializationSkew: number;
+  /** Per-column divergences (capped) for logging. */
+  diffs: ColumnDiff[];
+  /** Set membership (tag/batch initial snapshot only). undefined when not checked. */
+  membershipMatch?: boolean;
+  missingInNotifier?: string[];
+  extraInNotifier?: string[];
+};
+
+export type ShadowCompareInput = {
+  feed: ShadowFeed;
+  /** The served Electric response body (a JSON array of messages, or "" / "[]"). */
+  electricBody: string;
+  environment: { id: string };
+  skipColumns: string[];
+  /** True when this was an initial snapshot request (offset=-1); enables membership compare. */
+  isInitialSnapshot: boolean;
+  /** When set (tag/batch initial snapshot), compare the resolved id-set. */
+  membershipFilter?: RunListFilter;
+};
+
+const MAX_DIFFS = 20;
+
+export class RealtimeShadowComparator {
+  constructor(
+    private readonly options: { runReader: RunHydrator; runListResolver: RunListResolver }
+  ) {}
+
+  async compare(input: ShadowCompareInput): Promise<ShadowCompareOutcome> {
+    const messages = parseBody(input.electricBody);
+    const changes = messages.filter(
+      (m): m is ShapeMessage & { value: WireValue } =>
+        typeof m.headers?.operation === "string" && !!m.value && m.headers.operation !== "delete"
+    );
+
+    const outcome: ShadowCompareOutcome = {
+      feed: input.feed,
+      serializationMatched: 0,
+      serializationDiverged: 0,
+      serializationSkew: 0,
+      diffs: [],
+    };
+
+    // Bulk-hydrate every emitted run in one query rather than a per-message round
+    // trip, so shadow mode doesn't inflate the very replica load it's measuring.
+    const emittedIds = changes
+      .map((m) => m.value.id)
+      .filter((id): id is string => typeof id === "string");
+    const hydrated = await this.options.runReader.hydrateByIds(input.environment.id, emittedIds);
+    const rowsById = new Map(hydrated.map((row) => [row.id, row]));
+
+    for (const message of changes) {
+      const runId = message.value.id ?? undefined;
+      if (!runId) {
+        continue;
+      }
+
+      const row = rowsById.get(runId);
+      if (!row) {
+        // Run no longer readable (deleted / replica miss). Not a serialization divergence.
+        outcome.serializationSkew++;
+        continue;
+      }
+
+      const notifierValue = serializeRunRow(row, input.skipColumns);
+
+      // Only compare rows at the same version; otherwise the row advanced between
+      // Electric's emit and our refetch (timing skew, not a divergence).
+      if (!sameInstant(message.value.updatedAt, notifierValue.updatedAt)) {
+        outcome.serializationSkew++;
+        continue;
+      }
+
+      let rowDiverged = false;
+      for (const [column, electricRaw] of Object.entries(message.value)) {
+        const meta = COLUMN_BY_NAME.get(column);
+        if (!meta) {
+          continue;
+        }
+        const notifierRaw = notifierValue[column] ?? null;
+        if (!valuesEqual(electricRaw, notifierRaw, meta.type, meta.dims, column)) {
+          rowDiverged = true;
+          if (outcome.diffs.length < MAX_DIFFS) {
+            outcome.diffs.push({ runId, column, electric: electricRaw, notifier: notifierRaw });
+          }
+        }
+      }
+
+      if (rowDiverged) {
+        outcome.serializationDiverged++;
+      } else {
+        outcome.serializationMatched++;
+      }
+    }
+
+    if (input.isInitialSnapshot && input.membershipFilter) {
+      const electricIds = new Set(
+        changes.map((m) => m.value.id).filter((id): id is string => typeof id === "string")
+      );
+      const notifierIds = new Set(
+        await this.options.runListResolver.resolveMatchingRunIds(input.membershipFilter)
+      );
+
+      outcome.missingInNotifier = [...electricIds].filter((id) => !notifierIds.has(id));
+      outcome.extraInNotifier = [...notifierIds].filter((id) => !electricIds.has(id));
+      outcome.membershipMatch =
+        outcome.missingInNotifier.length === 0 && outcome.extraInNotifier.length === 0;
+    }
+
+    return outcome;
+  }
+}
+
+function parseBody(body: string): ShapeMessage[] {
+  const text = body.trim();
+  if (!text) {
+    return [];
+  }
+  try {
+    const parsed = JSON.parse(text);
+    return Array.isArray(parsed) ? (parsed as ShapeMessage[]) : [];
+  } catch {
+    return [];
+  }
+}
+
+/** Status carries a known legacy rewrite (DEQUEUED -> EXECUTING) applied equally to
+ * both paths for non-current API versions; treat them as equivalent. */
+function normalizeStatus(value: string): string {
+  return value === "DEQUEUED" ? "EXECUTING" : value;
+}
+
+function sameInstant(a: string | null | undefined, b: string | null | undefined): boolean {
+  if (a == null || b == null) {
+    return a == null && b == null;
+  }
+  // Mirror the SDK's RawShapeDate (`new Date(val + "Z")`).
+  return new Date(`${a}Z`).getTime() === new Date(`${b}Z`).getTime();
+}
+
+function valuesEqual(
+  electricRaw: string | null,
+  notifierRaw: string | null,
+  type: ElectricColumnType,
+  dims: number | undefined,
+  column: string
+): boolean {
+  if (electricRaw == null || notifierRaw == null) {
+    return electricRaw == null && notifierRaw == null;
+  }
+
+  if (dims && dims > 0) {
+    return arraysEqual(parsePgTextArray(electricRaw), parsePgTextArray(notifierRaw));
+  }
+
+  switch (type) {
+    case "timestamp":
+      return new Date(`${electricRaw}Z`).getTime() === new Date(`${notifierRaw}Z`).getTime();
+    case "bool":
+      return parseBool(electricRaw) === parseBool(notifierRaw);
+    case "int4":
+    case "int8":
+    case "float8":
+      return Number(electricRaw) === Number(notifierRaw);
+    case "jsonb":
+      return jsonEqual(electricRaw, notifierRaw);
+    case "text":
+    default:
+      if (column === "status") {
+        return normalizeStatus(electricRaw) === normalizeStatus(notifierRaw);
+      }
+      return electricRaw === notifierRaw;
+  }
+}
+
+function parseBool(value: string): boolean {
+  return value === "t" || value === "true";
+}
+
+function jsonEqual(a: string, b: string): boolean {
+  try {
+    return deepEqual(JSON.parse(a), JSON.parse(b));
+  } catch {
+    return a === b;
+  }
+}
+
+function deepEqual(a: unknown, b: unknown): boolean {
+  if (a === b) return true;
+  if (typeof a !== typeof b || a === null || b === null) return false;
+  if (Array.isArray(a) && Array.isArray(b)) {
+    return a.length === b.length && a.every((v, i) => deepEqual(v, b[i]));
+  }
+  if (typeof a === "object" && typeof b === "object") {
+    const ak = Object.keys(a as object).sort();
+    const bk = Object.keys(b as object).sort();
+    return (
+      ak.length === bk.length &&
+      ak.every((k, i) => k === bk[i]) &&
+      ak.every((k) => deepEqual((a as any)[k], (b as any)[k]))
+    );
+  }
+  return false;
+}
+
+function arraysEqual(a: string[], b: string[]): boolean {
+  return a.length === b.length && a.every((v, i) => v === b[i]);
+}
+
+/** Parse a Postgres text-array literal (`{"a","b"}` / `{}`). Mirrors the client's pgArrayParser. */
+function parsePgTextArray(literal: string): string[] {
+  if (literal === "{}" || literal === "") {
+    return [];
+  }
+  const inner = literal.startsWith("{") && literal.endsWith("}") ? literal.slice(1, -1) : literal;
+  const result: string[] = [];
+  let i = 0;
+  while (i < inner.length) {
+    if (inner[i] === '"') {
+      i++;
+      let s = "";
+      while (i < inner.length && inner[i] !== '"') {
+        if (inner[i] === "\\") {
+          i++;
+        }
+        s += inner[i];
+        i++;
+      }
+      result.push(s);
+      i++;
+      if (inner[i] === ",") i++;
+    } else {
+      let s = "";
+      while (i < inner.length && inner[i] !== ",") {
+        s += inner[i];
+        i++;
+      }
+      result.push(s);
+      if (inner[i] === ",") i++;
+    }
+  }
+  return result;
+}
diff --git a/apps/webapp/app/services/realtime/shadowRealtimeClient.server.ts b/apps/webapp/app/services/realtime/shadowRealtimeClient.server.ts
new file mode 100644
index 00000000000..b66b70e7ad5
--- /dev/null
+++ b/apps/webapp/app/services/realtime/shadowRealtimeClient.server.ts
@@ -0,0 +1,194 @@
+import { API_VERSIONS } from "~/api/versions";
+import { logger } from "../logger.server";
+import {
+  type RealtimeEnvironment,
+  type RealtimeRequestOptions,
+  type RealtimeRunsParams,
+} from "../realtimeClient.server";
+import { RESERVED_COLUMNS } from "./electricStreamProtocol.server";
+import {
+  type RealtimeListEnvironment,
+  type RealtimeStreamClient,
+} from "./notifierRealtimeClient.server";
+import { type RunListFilter } from "./runReader.server";
+import {
+  type RealtimeShadowComparator,
+  type ShadowCompareOutcome,
+  type ShadowFeed,
+} from "./shadowCompare.server";
+
+export type ShadowRealtimeClientOptions = {
+  /** The path actually served to the client (Electric). */
+  electric: RealtimeStreamClient;
+  comparator: RealtimeShadowComparator;
+  /** createdAt window (ms) used to resolve tag-list membership for the compare. */
+  maximumCreatedAtFilterAgeMs: number;
+  /** Cap for the membership resolve. */
+  maxListResults: number;
+  /** Metrics sink for compare outcomes. */
+  onOutcome?: (outcome: ShadowCompareOutcome) => void;
+};
+
+/**
+ * Dual-run gate: a transparent wrapper that serves the Electric
+ * response unchanged and, in the background, diffs what the notifier path would emit
+ * against it. The shadow work is fire-and-forget — it never blocks or fails the
+ * client's request — and it exercises the read replica so the notifier's real load
+ * can be measured before cutover.
+ */
+export class ShadowRealtimeClient implements RealtimeStreamClient {
+  constructor(private readonly options: ShadowRealtimeClientOptions) {}
+
+  async streamRun(
+    url: URL | string,
+    environment: RealtimeEnvironment,
+    runId: string,
+    apiVersion: API_VERSIONS,
+    requestOptions?: RealtimeRequestOptions,
+    clientVersion?: string,
+    signal?: AbortSignal
+  ): Promise<Response> {
+    const response = await this.options.electric.streamRun(
+      url,
+      environment,
+      runId,
+      apiVersion,
+      requestOptions,
+      clientVersion,
+      signal
+    );
+    this.#shadow("run", response, url, environment, requestOptions);
+    return response;
+  }
+
+  async streamRuns(
+    url: URL | string,
+    environment: RealtimeListEnvironment,
+    params: RealtimeRunsParams,
+    apiVersion: API_VERSIONS,
+    requestOptions?: RealtimeRequestOptions,
+    clientVersion?: string,
+    signal?: AbortSignal
+  ): Promise<Response> {
+    const response = await this.options.electric.streamRuns(
+      url,
+      environment,
+      params,
+      apiVersion,
+      requestOptions,
+      clientVersion,
+      signal
+    );
+    this.#shadow("runs", response, url, environment, requestOptions, { tags: params.tags ?? [] });
+    return response;
+  }
+
+  async streamBatch(
+    url: URL | string,
+    environment: RealtimeListEnvironment,
+    batchId: string,
+    apiVersion: API_VERSIONS,
+    requestOptions?: RealtimeRequestOptions,
+    clientVersion?: string,
+    signal?: AbortSignal
+  ): Promise<Response> {
+    const response = await this.options.electric.streamBatch(
+      url,
+      environment,
+      batchId,
+      apiVersion,
+      requestOptions,
+      clientVersion,
+      signal
+    );
+    this.#shadow("batch", response, url, environment, requestOptions, { batchId });
+    return response;
+  }
+
+  /** Fire-and-forget; never blocks the served response, never throws into the request. */
+  #shadow(
+    feed: ShadowFeed,
+    electricResponse: Response,
+    url: URL | string,
+    environment: RealtimeEnvironment & { projectId?: string },
+    requestOptions?: RealtimeRequestOptions,
+    membership?: { tags?: string[]; batchId?: string }
+  ): void {
+    // Clone synchronously before the client consumes the body.
+    let bodyClone: Response;
+    try {
+      if (electricResponse.status !== 200) {
+        return;
+      }
+      bodyClone = electricResponse.clone();
+    } catch {
+      return;
+    }
+
+    void this.#runShadow(feed, bodyClone, url, environment, requestOptions, membership).catch(
+      (error) => logger.debug("[shadowRealtime] compare failed", { feed, error })
+    );
+  }
+
+  async #runShadow(
+    feed: ShadowFeed,
+    bodyClone: Response,
+    url: URL | string,
+    environment: RealtimeEnvironment & { projectId?: string },
+    requestOptions: RealtimeRequestOptions | undefined,
+    membership: { tags?: string[]; batchId?: string } | undefined
+  ): Promise<void> {
+    const $url = new URL(url.toString());
+    const offset = $url.searchParams.get("offset") ?? "-1";
+    const handle = $url.searchParams.get("handle") ?? $url.searchParams.get("shape_id");
+    const isInitialSnapshot = offset === "-1" || !handle;
+    const skipColumns = resolveSkipColumns($url, requestOptions);
+    const electricBody = await bodyClone.text();
+
+    let membershipFilter: RunListFilter | undefined;
+    if (isInitialSnapshot && membership && environment.projectId) {
+      membershipFilter = {
+        organizationId: environment.organizationId,
+        projectId: environment.projectId,
+        environmentId: environment.id,
+        tags: membership.tags,
+        batchId: membership.batchId,
+        createdAtAfter: membership.batchId
+          ? undefined
+          : new Date(Date.now() - this.options.maximumCreatedAtFilterAgeMs),
+        limit: this.options.maxListResults,
+      };
+    }
+
+    const outcome = await this.options.comparator.compare({
+      feed,
+      electricBody,
+      environment: { id: environment.id },
+      skipColumns,
+      isInitialSnapshot,
+      membershipFilter,
+    });
+
+    this.options.onOutcome?.(outcome);
+
+    if (outcome.serializationDiverged > 0 || outcome.membershipMatch === false) {
+      logger.warn("[shadowRealtime] divergence detected", {
+        feed,
+        serializationDiverged: outcome.serializationDiverged,
+        serializationMatched: outcome.serializationMatched,
+        serializationSkew: outcome.serializationSkew,
+        membershipMatch: outcome.membershipMatch,
+        missingInNotifier: outcome.missingInNotifier?.slice(0, 20),
+        extraInNotifier: outcome.extraInNotifier?.slice(0, 20),
+        // Log only which run/column diverged, never the raw cell values — they can
+        // include run payload/output/metadata and must not leak into logs.
+        diffs: outcome.diffs.map(({ runId, column }) => ({ runId, column })),
+      });
+    }
+  }
+}
+
+function resolveSkipColumns(url: URL, requestOptions?: RealtimeRequestOptions): string[] {
+  const raw = requestOptions?.skipColumns ?? url.searchParams.get("skipColumns")?.split(",") ?? [];
+  return raw.map((c) => c.trim()).filter((c) => c !== "" && !RESERVED_COLUMNS.includes(c));
+}
diff --git a/apps/webapp/app/services/realtime/shadowRealtimeClientInstance.server.ts b/apps/webapp/app/services/realtime/shadowRealtimeClientInstance.server.ts
new file mode 100644
index 00000000000..95edc82620d
--- /dev/null
+++ b/apps/webapp/app/services/realtime/shadowRealtimeClientInstance.server.ts
@@ -0,0 +1,66 @@
+import { Counter } from "prom-client";
+import { $replica } from "~/db.server";
+import { env } from "~/env.server";
+import { metricsRegister } from "~/metrics.server";
+import { clickhouseFactory } from "~/services/clickhouse/clickhouseFactoryInstance.server";
+import { singleton } from "~/utils/singleton";
+import { realtimeClient } from "../realtimeClientGlobal.server";
+import { ClickHouseRunListResolver } from "./clickHouseRunListResolver.server";
+import { RunHydrator } from "./runReader.server";
+import { RealtimeShadowComparator } from "./shadowCompare.server";
+import { ShadowRealtimeClient } from "./shadowRealtimeClient.server";
+
+/**
+ * Process-singleton wiring for the shadow-compare client. Only constructed
+ * when an org's `realtimeBackend` flag is set to "shadow".
+ */
+function initializeShadowRealtimeClient(): ShadowRealtimeClient {
+  const compares = new Counter({
+    name: "realtime_shadow_compare_total",
+    help: "Dual-run shadow-compare outcomes (Electric vs notifier). kind=serialization|membership, result=match|diverge|skew.",
+    labelNames: ["feed", "kind", "result"] as const,
+    registers: [metricsRegister],
+  });
+
+  const comparator = new RealtimeShadowComparator({
+    runReader: new RunHydrator({ replica: $replica }),
+    runListResolver: new ClickHouseRunListResolver({
+      getClickhouse: (organizationId) =>
+        clickhouseFactory.getClickhouseForOrganization(organizationId, "realtime"),
+      prisma: $replica,
+    }),
+  });
+
+  return new ShadowRealtimeClient({
+    electric: realtimeClient,
+    comparator,
+    maximumCreatedAtFilterAgeMs: env.REALTIME_MAXIMUM_CREATED_AT_FILTER_AGE_IN_MS,
+    maxListResults: env.REALTIME_NOTIFIER_MAX_LIST_RESULTS,
+    onOutcome: (outcome) => {
+      const { feed } = outcome;
+      if (outcome.serializationMatched) {
+        compares.inc({ feed, kind: "serialization", result: "match" }, outcome.serializationMatched);
+      }
+      if (outcome.serializationDiverged) {
+        compares.inc(
+          { feed, kind: "serialization", result: "diverge" },
+          outcome.serializationDiverged
+        );
+      }
+      if (outcome.serializationSkew) {
+        compares.inc({ feed, kind: "serialization", result: "skew" }, outcome.serializationSkew);
+      }
+      if (outcome.membershipMatch !== undefined) {
+        compares.inc({
+          feed,
+          kind: "membership",
+          result: outcome.membershipMatch ? "match" : "diverge",
+        });
+      }
+    },
+  });
+}
+
+export function getShadowRealtimeClient(): ShadowRealtimeClient {
+  return singleton("shadowRealtimeClient", initializeShadowRealtimeClient);
+}
diff --git a/apps/webapp/app/v3/featureFlags.ts b/apps/webapp/app/v3/featureFlags.ts
index 9a5d75cfe25..55b30a8396e 100644
--- a/apps/webapp/app/v3/featureFlags.ts
+++ b/apps/webapp/app/v3/featureFlags.ts
@@ -10,6 +10,7 @@ export const FEATURE_FLAG = {
   hasPrivateConnections: "hasPrivateConnections",
   mollifierEnabled: "mollifierEnabled",
   workerQueueScheduledSplitEnabled: "workerQueueScheduledSplitEnabled",
+  realtimeBackend: "realtimeBackend",
 } as const;
 
 export const FeatureFlagCatalog = {
@@ -22,6 +23,10 @@ export const FeatureFlagCatalog = {
   [FEATURE_FLAG.hasPrivateConnections]: z.coerce.boolean(),
   [FEATURE_FLAG.mollifierEnabled]: z.coerce.boolean(),
   [FEATURE_FLAG.workerQueueScheduledSplitEnabled]: z.coerce.boolean(),
+  // Which backend serves the realtime run feed. Controllable
+  // globally and per-org (org wins). Defaults to "electric" when unset.
+  // "shadow" serves Electric but diffs the notifier path in the background.
+  [FEATURE_FLAG.realtimeBackend]: z.enum(["electric", "notifier", "shadow"]),
 };
 
 export type FeatureFlagKey = keyof typeof FeatureFlagCatalog;
diff --git a/apps/webapp/app/v3/runEngineHandlers.server.ts b/apps/webapp/app/v3/runEngineHandlers.server.ts
index 3277d74ba6e..7ef4efdef82 100644
--- a/apps/webapp/app/v3/runEngineHandlers.server.ts
+++ b/apps/webapp/app/v3/runEngineHandlers.server.ts
@@ -20,11 +20,12 @@ import { createExceptionPropertiesFromError } from "./eventRepository/common.ser
 import { getEventRepositoryForStore, recordRunDebugLog } from "./eventRepository/index.server";
 import { roomFromFriendlyRunId, socketIo } from "./handleSocketIo.server";
 import { engine } from "./runEngine.server";
+import { publishChangeRecord } from "~/services/realtime/runChangeNotifierInstance.server";
 import { PerformTaskRunAlertsService } from "./services/alerts/performTaskRunAlerts.server";
 import { TaskRunErrorCodes } from "@trigger.dev/core/v3";
 
 export function registerRunEngineEventBusHandlers() {
-  engine.eventBus.on("runSucceeded", async ({ time, run, organization }) => {
+  engine.eventBus.on("runSucceeded", async ({ time, run, organization, environment }) => {
     const [taskRunError, taskRun] = await tryCatch(
       $replica.taskRun.findFirstOrThrow({
         where: {
@@ -45,6 +46,11 @@ export function registerRunEngineEventBusHandlers() {
           isTest: true,
           organizationId: true,
           taskEventStore: true,
+          // Piggyback the realtime run-changed publish on this existing read so the
+          // per-env channel carries the membership keys (no separate query). No-op when
+          // the notifier is disabled.
+          runTags: true,
+          batchId: true,
         },
       })
     );
@@ -57,6 +63,13 @@ export function registerRunEngineEventBusHandlers() {
       return;
     }
 
+    publishChangeRecord({
+      runId: taskRun.id,
+      envId: environment.id,
+      tags: taskRun.runTags,
+      batchId: taskRun.batchId,
+    });
+
     const eventRepository = await getEventRepositoryForStore(
       run.taskEventStore,
       taskRun.organizationId ?? organization.id
@@ -91,7 +104,7 @@ export function registerRunEngineEventBusHandlers() {
   });
 
   // Handle events
-  engine.eventBus.on("runFailed", async ({ time, run, organization }) => {
+  engine.eventBus.on("runFailed", async ({ time, run, organization, environment }) => {
     const sanitizedError = sanitizeError(run.error);
     const exception = createExceptionPropertiesFromError(sanitizedError);
 
@@ -115,6 +128,10 @@ export function registerRunEngineEventBusHandlers() {
           isTest: true,
           organizationId: true,
           taskEventStore: true,
+          // Piggyback the realtime run-changed publish on this existing read (no-op when
+          // the notifier is disabled).
+          runTags: true,
+          batchId: true,
         },
       })
     );
@@ -127,6 +144,13 @@ export function registerRunEngineEventBusHandlers() {
       return;
     }
 
+    publishChangeRecord({
+      runId: taskRun.id,
+      envId: environment.id,
+      tags: taskRun.runTags,
+      batchId: taskRun.batchId,
+    });
+
     const eventRepository = await getEventRepositoryForStore(
       run.taskEventStore,
       taskRun.organizationId ?? organization.id
@@ -172,6 +196,10 @@ export function registerRunEngineEventBusHandlers() {
           isTest: true,
           organizationId: true,
           taskEventStore: true,
+          // Piggyback the realtime run-changed publish on this existing read (no-op when
+          // the notifier is disabled).
+          runTags: true,
+          batchId: true,
         },
       })
     );
@@ -184,6 +212,13 @@ export function registerRunEngineEventBusHandlers() {
       return;
     }
 
+    publishChangeRecord({
+      runId: taskRun.id,
+      envId: taskRun.runtimeEnvironmentId,
+      tags: taskRun.runTags,
+      batchId: taskRun.batchId,
+    });
+
     if (!taskRun.organizationId) {
       logger.error("[runAttemptFailed] Task run has no organization id", {
         runId: run.id,
@@ -328,7 +363,7 @@ export function registerRunEngineEventBusHandlers() {
     }
   );
 
-  engine.eventBus.on("runExpired", async ({ time, run, organization }) => {
+  engine.eventBus.on("runExpired", async ({ time, run, organization, environment }) => {
     if (!run.ttl) {
       return;
     }
@@ -353,6 +388,10 @@ export function registerRunEngineEventBusHandlers() {
           isTest: true,
           organizationId: true,
           taskEventStore: true,
+          // Piggyback the realtime run-changed publish on this existing read (no-op when
+          // the notifier is disabled).
+          runTags: true,
+          batchId: true,
         },
       })
     );
@@ -365,6 +404,13 @@ export function registerRunEngineEventBusHandlers() {
       return;
     }
 
+    publishChangeRecord({
+      runId: taskRun.id,
+      envId: environment.id,
+      tags: taskRun.runTags,
+      batchId: taskRun.batchId,
+    });
+
     const eventRepository = await getEventRepositoryForStore(
       taskRun.taskEventStore,
       taskRun.organizationId ?? organization.id
@@ -386,7 +432,7 @@ export function registerRunEngineEventBusHandlers() {
     }
   });
 
-  engine.eventBus.on("runCancelled", async ({ time, run, organization }) => {
+  engine.eventBus.on("runCancelled", async ({ time, run, organization, environment }) => {
     const [taskRunError, taskRun] = await tryCatch(
       $replica.taskRun.findFirstOrThrow({
         where: {
@@ -407,6 +453,10 @@ export function registerRunEngineEventBusHandlers() {
           isTest: true,
           organizationId: true,
           taskEventStore: true,
+          // Piggyback the realtime run-changed publish on this existing read (no-op when
+          // the notifier is disabled).
+          runTags: true,
+          batchId: true,
         },
       })
     );
@@ -419,6 +469,13 @@ export function registerRunEngineEventBusHandlers() {
       return;
     }
 
+    publishChangeRecord({
+      runId: taskRun.id,
+      envId: environment.id,
+      tags: taskRun.runTags,
+      batchId: taskRun.batchId,
+    });
+
     const eventRepository = await getEventRepositoryForStore(
       taskRun.taskEventStore,
       taskRun.organizationId ?? organization.id
@@ -505,15 +562,20 @@ export function registerRunEngineEventBusHandlers() {
   });
 
   engine.eventBus.on("runMetadataUpdated", async ({ time, run }) => {
-    const env = await findEnvironmentFromRun(run.id);
+    const result = await findEnvironmentFromRun(run.id);
 
-    if (!env) {
+    if (!result) {
       logger.error("[runMetadataUpdated] Failed to find environment", { runId: run.id });
       return;
     }
 
+    const { environment, runTags, batchId } = result;
+
     try {
-      await updateMetadataService.call(run.id, run.metadata, env);
+      await updateMetadataService.call(run.id, run.metadata, environment);
+      // Realtime run-changed publish, after the write so the router's hydrate sees the new
+      // row. A full record (env + tags + batchId), so feeds route by index.
+      publishChangeRecord({ runId: run.id, envId: environment.id, tags: runTags, batchId });
     } catch (e) {
       if (e instanceof MetadataTooLargeError) {
         logger.warn("[runMetadataUpdated] Failed to update metadata, too large", {
diff --git a/apps/webapp/package.json b/apps/webapp/package.json
index 162a9ede9a0..efebaf48207 100644
--- a/apps/webapp/package.json
+++ b/apps/webapp/package.json
@@ -163,7 +163,7 @@
     "humanize-duration": "^3.27.3",
     "input-otp": "^1.4.2",
     "intl-parse-accept-language": "^1.0.0",
-    "ioredis": "^5.3.2",
+    "ioredis": "~5.6.0",
     "isbot": "^3.6.5",
     "jose": "^5.4.0",
     "json-stable-stringify": "^1.3.0",
diff --git a/apps/webapp/test/realtime/boundedTtlCache.test.ts b/apps/webapp/test/realtime/boundedTtlCache.test.ts
new file mode 100644
index 00000000000..a3fb0b1e425
--- /dev/null
+++ b/apps/webapp/test/realtime/boundedTtlCache.test.ts
@@ -0,0 +1,52 @@
+import { afterEach, describe, expect, it, vi } from "vitest";
+import { BoundedTtlCache } from "~/services/realtime/boundedTtlCache";
+
+describe("BoundedTtlCache", () => {
+  afterEach(() => {
+    vi.useRealTimers();
+  });
+
+  it("returns a live entry within its TTL", () => {
+    vi.useFakeTimers();
+    const cache = new BoundedTtlCache<string>(1_000, 100);
+    cache.set("k", "v");
+    vi.advanceTimersByTime(500);
+    expect(cache.get("k")).toBe("v");
+    expect(cache.size).toBe(1);
+  });
+
+  it("evicts an expired entry on read instead of letting it linger", () => {
+    vi.useFakeTimers();
+    const cache = new BoundedTtlCache<number>(1_000, 100);
+    cache.set("a", 1);
+    expect(cache.size).toBe(1);
+
+    vi.advanceTimersByTime(1_001);
+    expect(cache.get("a")).toBeUndefined();
+    // The previous bug left expired entries in the map until an at-capacity sweep;
+    // they must now be removed on read.
+    expect(cache.size).toBe(0);
+  });
+
+  it("does not evict another entry when updating an existing key at capacity", () => {
+    const cache = new BoundedTtlCache<number>(60_000, 2);
+    cache.set("a", 1);
+    cache.set("b", 2);
+    // Updating an existing key doesn't grow the map, so it must not drop "b".
+    cache.set("a", 11);
+    expect(cache.get("a")).toBe(11);
+    expect(cache.get("b")).toBe(2);
+    expect(cache.size).toBe(2);
+  });
+
+  it("drops the oldest entry when full of still-live entries", () => {
+    const cache = new BoundedTtlCache<number>(60_000, 2);
+    cache.set("a", 1);
+    cache.set("b", 2);
+    cache.set("c", 3); // over capacity, none expired -> evict oldest insertion (a)
+    expect(cache.get("a")).toBeUndefined();
+    expect(cache.get("b")).toBe(2);
+    expect(cache.get("c")).toBe(3);
+    expect(cache.size).toBe(2);
+  });
+});
diff --git a/apps/webapp/test/realtime/electricStreamProtocol.test.ts b/apps/webapp/test/realtime/electricStreamProtocol.test.ts
new file mode 100644
index 00000000000..a48f4f9f8e8
--- /dev/null
+++ b/apps/webapp/test/realtime/electricStreamProtocol.test.ts
@@ -0,0 +1,304 @@
+import { SubscribeRunRawShape } from "@trigger.dev/core/v3/schemas";
+import { describe, expect, it } from "vitest";
+import {
+  buildElectricSchemaHeader,
+  buildRowsBody,
+  buildSnapshotBody,
+  buildUpdateBody,
+  buildUpToDateBody,
+  encodeOffset,
+  parseOffsetUpdatedAtMs,
+  type RealtimeRunRow,
+  rewriteBodyForLegacyApiVersion,
+  serializeRunRow,
+} from "~/services/realtime/electricStreamProtocol.server";
+
+function sampleRow(overrides: Partial<RealtimeRunRow> = {}): RealtimeRunRow {
+  return {
+    id: "run_abc123",
+    taskIdentifier: "my-task",
+    createdAt: new Date("2026-06-06T10:00:00.000Z"),
+    updatedAt: new Date("2026-06-06T10:05:30.123Z"),
+    startedAt: new Date("2026-06-06T10:01:00.000Z"),
+    delayUntil: null,
+    queuedAt: new Date("2026-06-06T10:00:30.000Z"),
+    expiredAt: null,
+    completedAt: null,
+    friendlyId: "run_friendly_abc",
+    number: 42,
+    isTest: true,
+    status: "EXECUTING",
+    usageDurationMs: 1234,
+    costInCents: 0.55,
+    baseCostInCents: 0.25,
+    ttl: "1h",
+    payload: '{"hello":"world"}',
+    payloadType: "application/json",
+    metadata: '{"step":1}',
+    metadataType: "application/json",
+    output: null,
+    outputType: "application/json",
+    runTags: ["user:123", "env:prod"],
+    error: null,
+    realtimeStreams: [],
+    ...overrides,
+  };
+}
+
+/**
+ * Faithful re-implementation of the @electric-sql/client value parser rules
+ * (defaultParser + pgArrayParser), so we can decode our wire `value` object the
+ * same way the deployed client would, then validate against the real SDK schema.
+ * Source: @electric-sql/client@1.0.14 src/parser.ts.
+ */
+function electricParse(
+  value: Record<string, string | null>,
+  schema: Record<string, { type: string; dims?: number }>
+): Record<string, unknown> {
+  const out: Record<string, unknown> = {};
+  for (const [key, raw] of Object.entries(value)) {
+    if (raw === null) {
+      out[key] = null;
+      continue;
+    }
+    const info = schema[key];
+    if (!info) {
+      out[key] = raw;
+      continue;
+    }
+    if (info.dims && info.dims > 0) {
+      out[key] = parsePgTextArray(raw);
+      continue;
+    }
+    switch (info.type) {
+      case "bool":
+        out[key] = raw === "t" || raw === "true";
+        break;
+      case "int8":
+        out[key] = BigInt(raw);
+        break;
+      case "int2":
+      case "int4":
+      case "float4":
+      case "float8":
+        out[key] = Number(raw);
+        break;
+      case "json":
+      case "jsonb":
+        out[key] = JSON.parse(raw);
+        break;
+      default:
+        out[key] = raw; // text/timestamp pass through as strings
+    }
+  }
+  return out;
+}
+
+function parsePgTextArray(literal: string): string[] {
+  if (literal === "{}") {
+    return [];
+  }
+  const inner = literal.slice(1, -1);
+  const result: string[] = [];
+  let i = 0;
+  while (i < inner.length) {
+    if (inner[i] === '"') {
+      i++;
+      let s = "";
+      while (i < inner.length && inner[i] !== '"') {
+        if (inner[i] === "\\") {
+          i++;
+        }
+        s += inner[i];
+        i++;
+      }
+      result.push(s);
+      i++; // closing quote
+      if (inner[i] === ",") i++;
+    } else {
+      let s = "";
+      while (i < inner.length && inner[i] !== ",") {
+        s += inner[i];
+        i++;
+      }
+      result.push(s);
+      if (inner[i] === ",") i++;
+    }
+  }
+  return result;
+}
+
+describe("electricStreamProtocol serializer", () => {
+  it("encodes each Postgres type the way the Electric client expects", () => {
+    const value = serializeRunRow(sampleRow());
+
+    // text: passed through as-is
+    expect(value.id).toBe("run_abc123");
+    expect(value.status).toBe("EXECUTING");
+    expect(value.payload).toBe('{"hello":"world"}');
+
+    // int/float: stringified
+    expect(value.number).toBe("42");
+    expect(value.usageDurationMs).toBe("1234");
+    expect(value.costInCents).toBe("0.55");
+
+    // bool: postgres "t"/"f"
+    expect(value.isTest).toBe("t");
+
+    // timestamp: ISO without trailing Z (the SDK appends Z before parsing)
+    expect(value.updatedAt).toBe("2026-06-06T10:05:30.123");
+    expect(value.createdAt).toBe("2026-06-06T10:00:00.000");
+
+    // nullable timestamp: null stays null
+    expect(value.delayUntil).toBeNull();
+    expect(value.completedAt).toBeNull();
+
+    // text[]: quoted pg array literal; empty realtimeStreams (@default([])) => {}
+    expect(value.runTags).toBe('{"user:123","env:prod"}');
+    expect(value.realtimeStreams).toBe("{}");
+
+    // jsonb: null stays null
+    expect(value.error).toBeNull();
+  });
+
+  it("encodes an empty no-default array column (runTags) as null, matching Electric", () => {
+    // runTags has no Postgres default, so an empty value is stored as SQL NULL and
+    // Electric emits `null` (not `{}`). realtimeStreams has @default([]), so its
+    // empty value is `{}`. Prisma hands us `[]` for both; we re-derive the wire form.
+    const value = serializeRunRow(sampleRow({ runTags: [], realtimeStreams: [] }));
+    expect(value.runTags).toBeNull();
+    expect(value.realtimeStreams).toBe("{}");
+  });
+
+  it("encodes jsonb error as a JSON string", () => {
+    const value = serializeRunRow(sampleRow({ error: { type: "STRING_ERROR", raw: "boom" } }));
+    expect(value.error).toBe('{"type":"STRING_ERROR","raw":"boom"}');
+  });
+
+  it("round-trips through the client parser into a valid SubscribeRunRawShape", () => {
+    const row = sampleRow({ error: { type: "STRING_ERROR", raw: "boom" } });
+    const value = serializeRunRow(row);
+    const schema = JSON.parse(buildElectricSchemaHeader());
+
+    const decoded = electricParse(value, schema);
+    const parsed = SubscribeRunRawShape.parse(decoded);
+
+    expect(parsed.id).toBe("run_abc123");
+    expect(parsed.friendlyId).toBe("run_friendly_abc");
+    expect(parsed.status).toBe("EXECUTING");
+    expect(parsed.number).toBe(42);
+    expect(parsed.isTest).toBe(true);
+    expect(parsed.usageDurationMs).toBe(1234);
+    expect(parsed.costInCents).toBeCloseTo(0.55);
+    expect(parsed.runTags).toEqual(["user:123", "env:prod"]);
+    expect(parsed.realtimeStreams).toEqual([]);
+    // RawShapeDate appends "Z" and coerces to a Date equal to the source instant.
+    expect(parsed.createdAt.toISOString()).toBe("2026-06-06T10:00:00.000Z");
+    expect(parsed.updatedAt.toISOString()).toBe("2026-06-06T10:05:30.123Z");
+    expect(parsed.startedAt?.toISOString()).toBe("2026-06-06T10:01:00.000Z");
+    expect(parsed.delayUntil ?? null).toBeNull();
+    expect(parsed.error).toEqual({ type: "STRING_ERROR", raw: "boom" });
+  });
+
+  it("honors skipColumns (but never the reserved columns)", () => {
+    const value = serializeRunRow(sampleRow(), ["payload", "output", "id", "status"]);
+    expect(value.payload).toBeUndefined();
+    expect(value.output).toBeUndefined();
+    // reserved columns can't be skipped
+    expect(value.id).toBe("run_abc123");
+    expect(value.status).toBe("EXECUTING");
+
+    const schema = JSON.parse(buildElectricSchemaHeader(["payload"]));
+    expect(schema.payload).toBeUndefined();
+    expect(schema.status).toBeDefined();
+  });
+});
+
+describe("electricStreamProtocol message bodies", () => {
+  it("emits insert + up-to-date for an initial snapshot", () => {
+    const messages = JSON.parse(buildSnapshotBody(sampleRow()));
+    expect(messages).toHaveLength(2);
+    expect(messages[0].headers.operation).toBe("insert");
+    expect(messages[0].key).toBe('"public"."TaskRun"/"run_abc123"');
+    expect(messages[0].value.status).toBe("EXECUTING");
+    expect(messages[1].headers.control).toBe("up-to-date");
+  });
+
+  it("emits a bare up-to-date for an empty (missing) run snapshot", () => {
+    const messages = JSON.parse(buildSnapshotBody(null));
+    expect(messages).toHaveLength(1);
+    expect(messages[0].headers.control).toBe("up-to-date");
+  });
+
+  it("emits update + up-to-date for a live change", () => {
+    const messages = JSON.parse(buildUpdateBody(sampleRow()));
+    expect(messages[0].headers.operation).toBe("update");
+    expect(messages[1].headers.control).toBe("up-to-date");
+  });
+
+  it("emits a bare up-to-date when nothing advanced", () => {
+    const messages = JSON.parse(buildUpToDateBody());
+    expect(messages).toEqual([{ headers: { control: "up-to-date" } }]);
+  });
+
+  it("uses the same merge key across insert and update so the client merges by row", () => {
+    const insert = JSON.parse(buildSnapshotBody(sampleRow()))[0];
+    const update = JSON.parse(buildUpdateBody(sampleRow()))[0];
+    expect(insert.key).toBe(update.key);
+  });
+});
+
+describe("electricStreamProtocol multi-row (tag-list) bodies", () => {
+  it("emits one change message per row with per-row operation, then up-to-date", () => {
+    const a = sampleRow({ id: "run_a" });
+    const b = sampleRow({ id: "run_b", status: "QUEUED" });
+    const messages = JSON.parse(
+      buildRowsBody([
+        { row: a, operation: "insert" },
+        { row: b, operation: "update" },
+      ])
+    );
+    expect(messages).toHaveLength(3);
+    expect(messages[0].headers.operation).toBe("insert");
+    expect(messages[0].key).toBe('"public"."TaskRun"/"run_a"');
+    expect(messages[1].headers.operation).toBe("update");
+    expect(messages[1].key).toBe('"public"."TaskRun"/"run_b"');
+    expect(messages[1].value.status).toBe("QUEUED");
+    expect(messages[2].headers.control).toBe("up-to-date");
+  });
+
+  it("emits a bare up-to-date for an empty change set", () => {
+    const messages = JSON.parse(buildRowsBody([]));
+    expect(messages).toEqual([{ headers: { control: "up-to-date" } }]);
+  });
+
+  it("honors skipColumns across all rows", () => {
+    const messages = JSON.parse(
+      buildRowsBody([{ row: sampleRow(), operation: "insert" }], ["payload"])
+    );
+    expect(messages[0].value.payload).toBeUndefined();
+    expect(messages[0].value.status).toBe("EXECUTING");
+  });
+});
+
+describe("electricStreamProtocol tokens + legacy rewrite", () => {
+  it("encodes and parses the offset updatedAt segment", () => {
+    const offset = encodeOffset(1717667130123, 7);
+    expect(offset).toBe("1717667130123_7");
+    expect(parseOffsetUpdatedAtMs(offset)).toBe(1717667130123);
+  });
+
+  it("treats the initial offset (-1) and garbage as zero", () => {
+    expect(parseOffsetUpdatedAtMs("-1")).toBe(0);
+    expect(parseOffsetUpdatedAtMs(null)).toBe(0);
+    expect(parseOffsetUpdatedAtMs("nonsense")).toBe(0);
+  });
+
+  it("rewrites DEQUEUED to EXECUTING for legacy API versions", () => {
+    const body = buildUpdateBody(sampleRow({ status: "DEQUEUED" }));
+    expect(body).toContain('"status":"DEQUEUED"');
+    const rewritten = rewriteBodyForLegacyApiVersion(body);
+    expect(rewritten).not.toContain('"status":"DEQUEUED"');
+    expect(rewritten).toContain('"status":"EXECUTING"');
+  });
+});
diff --git a/apps/webapp/test/realtime/envChangeRouter.test.ts b/apps/webapp/test/realtime/envChangeRouter.test.ts
new file mode 100644
index 00000000000..befe0356284
--- /dev/null
+++ b/apps/webapp/test/realtime/envChangeRouter.test.ts
@@ -0,0 +1,187 @@
+import { describe, expect, it, vi } from "vitest";
+import {
+  EnvChangeRouter,
+  type EnvChangeSource,
+  type RowHydrator,
+} from "~/services/realtime/envChangeRouter.server";
+import { type ChangeRecord } from "~/services/realtime/runChangeNotifier.server";
+import { type RealtimeRunRow } from "~/services/realtime/electricStreamProtocol.server";
+
+const FLOOR_MS = Date.UTC(2026, 5, 7, 12, 0, 0);
+
+function row(
+  id: string,
+  opts: { tags?: string[]; createdAtMs?: number; updatedAtMs?: number } = {}
+): RealtimeRunRow {
+  return {
+    id,
+    runTags: opts.tags ?? [],
+    createdAt: new Date(opts.createdAtMs ?? FLOOR_MS + 1_000),
+    updatedAt: new Date(opts.updatedAtMs ?? FLOOR_MS + 5_000),
+  } as unknown as RealtimeRunRow;
+}
+
+function record(runId: string, extra: Partial<ChangeRecord> = {}): ChangeRecord {
+  return { v: 1, runId, envId: "env_1", ...extra };
+}
+
+/** A controllable EnvChangeSource: tests push batches to the env's listener. */
+function fakeSource() {
+  const listeners = new Map<string, Set<(records: ChangeRecord[]) => void>>();
+  const source: EnvChangeSource = {
+    subscribeToEnv(envId, onBatch) {
+      let set = listeners.get(envId);
+      if (!set) {
+        set = new Set();
+        listeners.set(envId, set);
+      }
+      set.add(onBatch);
+      return () => {
+        listeners.get(envId)?.delete(onBatch);
+      };
+    },
+  };
+  return {
+    source,
+    push(envId: string, records: ChangeRecord[]) {
+      for (const l of listeners.get(envId) ?? []) l(records);
+    },
+    isSubscribed(envId: string) {
+      return (listeners.get(envId)?.size ?? 0) > 0;
+    },
+  };
+}
+
+function makeRouter(rowsById: Map<string, RealtimeRunRow> = new Map()) {
+  const src = fakeSource();
+  const hydrateSpy = vi.fn<RowHydrator["hydrateByIds"]>(async (_env, ids) =>
+    ids.map((id) => rowsById.get(id)).filter((r): r is RealtimeRunRow => Boolean(r))
+  );
+  const router = new EnvChangeRouter({ source: src.source, hydrator: { hydrateByIds: hydrateSpy } });
+  return { router, src, hydrateSpy };
+}
+
+describe("EnvChangeRouter", () => {
+  it("routes a tag match to the feed (hydrated + serialized) and ignores non-matches", async () => {
+    const rows = new Map([["r1", row("r1", { tags: ["a"] })]]);
+    const { router, src, hydrateSpy } = makeRouter(rows);
+    const reg = router.register("env_1", { kind: "tag", tags: ["a"] }, []);
+    const wait = reg.waitForMatch(undefined, 1_000);
+
+    // A non-matching tag is dropped (no wake); a matching tag wakes with the hydrated row.
+    src.push("env_1", [record("rX", { tags: ["b"] }), record("r1", { tags: ["a"] })]);
+
+    const result = await wait;
+    expect(result.reason).toBe("notify");
+    expect(result.rows.map((m) => m.row.id)).toEqual(["r1"]);
+    expect(result.rows[0].value.id).toBe("r1"); // serialized wire value
+    expect(hydrateSpy).toHaveBeenCalledWith("env_1", ["r1"], []);
+    reg.close();
+  });
+
+  it("batch-hydrates ONCE and shares the serialized value across feeds matching the same run", async () => {
+    const rows = new Map([["r1", row("r1", { tags: ["a"] })]]);
+    const { router, src, hydrateSpy } = makeRouter(rows);
+    const regs = [
+      router.register("env_1", { kind: "tag", tags: ["a"] }, []),
+      router.register("env_1", { kind: "tag", tags: ["a"] }, []),
+    ];
+    const waits = regs.map((r) => r.waitForMatch(undefined, 1_000));
+
+    src.push("env_1", [record("r1", { tags: ["a"] })]);
+    const results = await Promise.all(waits);
+
+    // One hydrate for the whole tick (same column set), shared by both feeds...
+    expect(hydrateSpy).toHaveBeenCalledTimes(1);
+    // ...and the same serialized value object is reused (serialize-once).
+    expect(results[0].rows[0].value).toBe(results[1].rows[0].value);
+    regs.forEach((r) => r.close());
+  });
+
+  it("routes a run feed by exact runId", async () => {
+    const rows = new Map([["r1", row("r1")]]);
+    const { router, src } = makeRouter(rows);
+    const reg = router.register("env_1", { kind: "run", runId: "r1" }, []);
+    const wait = reg.waitForMatch(undefined, 1_000);
+    src.push("env_1", [record("r2"), record("r1")]);
+    const result = await wait;
+    expect(result.rows.map((m) => m.row.id)).toEqual(["r1"]);
+    reg.close();
+  });
+
+  it("routes a batch feed by batchId", async () => {
+    const rows = new Map([["r1", row("r1")]]);
+    const { router, src } = makeRouter(rows);
+    const reg = router.register("env_1", { kind: "batch", batchId: "batch_1" }, []);
+    const wait = reg.waitForMatch(undefined, 1_000);
+    src.push("env_1", [
+      record("rX", { batchId: "other" }),
+      record("r1", { batchId: "batch_1" }),
+    ]);
+    const result = await wait;
+    expect(result.rows.map((m) => m.row.id)).toEqual(["r1"]);
+    reg.close();
+  });
+
+  it("drops a tag match created before the feed's createdAt floor", async () => {
+    const rows = new Map([["r1", row("r1", { tags: ["a"], createdAtMs: FLOOR_MS - 10_000 })]]);
+    const { router, src } = makeRouter(rows);
+    const reg = router.register("env_1", { kind: "tag", tags: ["a"], createdAtFloorMs: FLOOR_MS }, []);
+    let settled = false;
+    const wait = reg.waitForMatch(undefined, 60).then((r) => {
+      settled = true;
+      return r;
+    });
+    src.push("env_1", [record("r1", { tags: ["a"], createdAtMs: FLOOR_MS - 10_000 })]);
+    // Hydrated but out-of-window -> not woken; falls through to the timeout.
+    const result = await wait;
+    expect(settled).toBe(true);
+    expect(result.reason).toBe("timeout");
+    reg.close();
+  });
+
+  it("classifies a partial record (no tags) by hydrating and re-checking the row's tags", async () => {
+    // Partial record routes to all tag feeds as candidates; the authoritative row decides.
+    const rows = new Map([["r1", row("r1", { tags: ["a"] })]]);
+    const { router, src } = makeRouter(rows);
+    const match = router.register("env_1", { kind: "tag", tags: ["a"] }, []);
+    const noMatch = router.register("env_1", { kind: "tag", tags: ["z"] }, []);
+    const matchWait = match.waitForMatch(undefined, 1_000);
+    let noMatchSettled = false;
+    const noMatchWait = noMatch.waitForMatch(undefined, 80).then((r) => {
+      noMatchSettled = true;
+      return r;
+    });
+
+    src.push("env_1", [record("r1", { tags: undefined })]); // partial: tags absent
+
+    expect((await matchWait).rows.map((m) => m.row.id)).toEqual(["r1"]);
+    expect((await noMatchWait).reason).toBe("timeout"); // row tags ["a"] don't intersect ["z"]
+    expect(noMatchSettled).toBe(true);
+    match.close();
+    noMatch.close();
+  });
+
+  it("times out and aborts cleanly", async () => {
+    const { router, src } = makeRouter();
+    const reg = router.register("env_1", { kind: "tag", tags: ["a"] }, []);
+    expect((await reg.waitForMatch(undefined, 30)).reason).toBe("timeout");
+
+    const controller = new AbortController();
+    const wait = reg.waitForMatch(controller.signal, 5_000);
+    controller.abort();
+    expect((await wait).reason).toBe("abort");
+    reg.close();
+    expect(src.isSubscribed("env_1")).toBe(false); // last feed left -> unsubscribed
+  });
+
+  it("only routes to feeds currently waiting (gaps between polls fall to the backstop)", async () => {
+    const rows = new Map([["r1", row("r1", { tags: ["a"] })]]);
+    const { router, src, hydrateSpy } = makeRouter(rows);
+    const reg = router.register("env_1", { kind: "tag", tags: ["a"] }, []);
+    // Not waiting yet: a push is dropped (no hydrate, no buffering).
+    src.push("env_1", [record("r1", { tags: ["a"] })]);
+    expect(hydrateSpy).not.toHaveBeenCalled();
+    reg.close();
+  });
+});
diff --git a/apps/webapp/test/realtime/notifierHoldOnEmpty.test.ts b/apps/webapp/test/realtime/notifierHoldOnEmpty.test.ts
new file mode 100644
index 00000000000..e0c51d57f52
--- /dev/null
+++ b/apps/webapp/test/realtime/notifierHoldOnEmpty.test.ts
@@ -0,0 +1,192 @@
+import { setTimeout as sleep } from "node:timers/promises";
+import { CURRENT_API_VERSION } from "~/api/versions";
+import {
+  NotifierRealtimeClient,
+  type RealtimeListEnvironment,
+} from "~/services/realtime/notifierRealtimeClient.server";
+import { type RealtimeRunRow } from "~/services/realtime/electricStreamProtocol.server";
+import {
+  EnvChangeRouter,
+  type EnvChangeSource,
+} from "~/services/realtime/envChangeRouter.server";
+import { type ChangeRecord } from "~/services/realtime/runChangeNotifier.server";
+import { describe, expect, it, vi } from "vitest";
+
+const ENV: RealtimeListEnvironment = { id: "env_1", organizationId: "org_1", projectId: "proj_1" };
+
+// Fixed offset floor: a row's updatedAt above/below it produces a delta / empty diff. The
+// createdAt window resolves to this same floor (large maximumCreatedAtFilterAgeMs below).
+const FLOOR_MS = Date.UTC(2026, 5, 7, 12, 0, 0);
+
+function row(
+  id: string,
+  updatedAtMs: number,
+  opts: { createdAtMs?: number; tags?: string[] } = {}
+): RealtimeRunRow {
+  return {
+    id,
+    runTags: opts.tags ?? ["t"],
+    createdAt: new Date(opts.createdAtMs ?? FLOOR_MS + 1_000),
+    updatedAt: new Date(updatedAtMs),
+  } as unknown as RealtimeRunRow;
+}
+
+function rec(runId: string, extra: Partial<ChangeRecord> = {}): ChangeRecord {
+  return { v: 1, runId, envId: "env_1", ...extra };
+}
+
+/** A controllable EnvChangeSource the test pushes batches into. */
+function fakeSource() {
+  const listeners = new Map<string, Set<(records: ChangeRecord[]) => void>>();
+  const source: EnvChangeSource = {
+    subscribeToEnv(envId, onBatch) {
+      let set = listeners.get(envId);
+      if (!set) {
+        set = new Set();
+        listeners.set(envId, set);
+      }
+      set.add(onBatch);
+      return () => listeners.get(envId)?.delete(onBatch);
+    },
+  };
+  return {
+    source,
+    push: (envId: string, records: ChangeRecord[]) => {
+      for (const l of listeners.get(envId) ?? []) l(records);
+    },
+    isSubscribed: (envId: string) => (listeners.get(envId)?.size ?? 0) > 0,
+  };
+}
+
+function makeClient(overrides: Record<string, unknown> = {}) {
+  let rowsToReturn: RealtimeRunRow[] = [];
+  const hydrateSpy = vi.fn(async (_env: string, ids: string[]) =>
+    rowsToReturn.filter((r) => ids.includes(r.id))
+  );
+  const resolveSpy = vi.fn(async () => rowsToReturn.map((r) => r.id));
+  const src = fakeSource();
+  const router = new EnvChangeRouter({ source: src.source, hydrator: { hydrateByIds: hydrateSpy } });
+
+  const client = new NotifierRealtimeClient({
+    runReader: { getRunById: async () => null, hydrateByIds: hydrateSpy } as any,
+    runListResolver: { resolveMatchingRunIds: resolveSpy } as any,
+    router,
+    limiter: { incrementAndCheck: async () => true, decrement: async () => {} } as any,
+    cachedLimitProvider: { getCachedLimit: async () => 100 },
+    // Large so the recovered createdAt floor isn't clamped past FLOOR_MS.
+    maximumCreatedAtFilterAgeMs: 100 * 365 * 24 * 60 * 60 * 1000,
+    runSetResolveCacheTtlMs: 0,
+    livePollTimeoutMs: 10_000,
+    ...overrides,
+  });
+
+  return { client, src, hydrateSpy, resolveSpy, setRows: (rows: RealtimeRunRow[]) => (rowsToReturn = rows) };
+}
+
+function liveRuns(client: NotifierRealtimeClient) {
+  return client.streamRuns(
+    `http://localhost:3030/realtime/v1/runs?offset=${FLOOR_MS}_1&live=true&handle=runs_${FLOOR_MS}_7`,
+    ENV,
+    { tags: ["t"] },
+    CURRENT_API_VERSION,
+    undefined,
+    "1.0.0"
+  );
+}
+
+async function whenWaiting(src: ReturnType<typeof fakeSource>) {
+  // Subscribed (feed registered) + a tick so waitForMatch has armed feed.resolve.
+  await vi.waitFor(() => expect(src.isSubscribed("env_1")).toBe(true));
+  await sleep(15);
+}
+
+async function bodyOf(res: Response) {
+  return JSON.parse(await res.text()) as Array<{
+    headers?: { control?: string; operation?: string };
+    value?: unknown;
+  }>;
+}
+const hasRowOp = (body: Awaited<ReturnType<typeof bodyOf>>) =>
+  body.some((m) => m?.headers?.operation || (m && typeof m === "object" && "value" in m));
+const isUpToDate = (body: Awaited<ReturnType<typeof bodyOf>>) =>
+  body.some((m) => m?.headers?.control === "up-to-date");
+
+describe("NotifierRealtimeClient multi-run live path over the router", () => {
+  it("a matching change hydrates by id (no ClickHouse) and returns a delta", async () => {
+    const { client, src, hydrateSpy, resolveSpy, setRows } = makeClient();
+    setRows([row("run_1", FLOOR_MS + 5_000, { tags: ["t"] })]);
+
+    const responsePromise = liveRuns(client);
+    await whenWaiting(src);
+    src.push("env_1", [rec("run_1", { tags: ["t", "x"] })]);
+
+    const res = await responsePromise;
+    expect(res.status).toBe(200);
+    expect(hasRowOp(await bodyOf(res))).toBe(true);
+    expect(resolveSpy).not.toHaveBeenCalled(); // ClickHouse skipped
+    expect(hydrateSpy).toHaveBeenCalledWith("env_1", ["run_1"], expect.anything());
+  });
+
+  it("a change that doesn't match the filter never wakes the feed (no CH, no PG); a later match does", async () => {
+    const { client, src, hydrateSpy, resolveSpy, setRows } = makeClient();
+    setRows([row("run_1", FLOOR_MS + 5_000, { tags: ["t"] })]);
+
+    const responsePromise = liveRuns(client);
+    let settled = false;
+    void responsePromise.then(() => (settled = true));
+    await whenWaiting(src);
+
+    src.push("env_1", [rec("run_x", { tags: ["other"] })]); // doesn't intersect ["t"]
+    await sleep(50);
+    expect(settled).toBe(false);
+    expect(hydrateSpy).not.toHaveBeenCalled(); // router never routed it
+    expect(resolveSpy).not.toHaveBeenCalled();
+
+    src.push("env_1", [rec("run_1", { tags: ["t"] })]);
+    const res = await responsePromise;
+    expect(settled).toBe(true);
+    expect(hasRowOp(await bodyOf(res))).toBe(true);
+  });
+
+  it("a matching run created before the window floor is hydrated but dropped (keeps holding)", async () => {
+    // Generous backstop so the "still holding" assertion can't race a timeout in slow CI.
+    const { client, src, hydrateSpy, resolveSpy, setRows } = makeClient({ livePollTimeoutMs: 1500 });
+    setRows([row("run_1", FLOOR_MS + 5_000, { createdAtMs: FLOOR_MS - 10_000, tags: ["t"] })]);
+
+    const responsePromise = liveRuns(client);
+    let settled = false;
+    void responsePromise.then(() => (settled = true));
+    await whenWaiting(src);
+    src.push("env_1", [rec("run_1", { tags: ["t"] })]);
+
+    await sleep(40);
+    expect(settled).toBe(false); // dropped by the createdAt floor -> held
+    expect(hydrateSpy).toHaveBeenCalledWith("env_1", ["run_1"], expect.anything());
+    expect(resolveSpy).not.toHaveBeenCalled();
+
+    await responsePromise; // drain via the backstop
+  });
+
+  it("the backstop timeout does a full ClickHouse resolve and returns up-to-date", async () => {
+    const { client, resolveSpy } = makeClient({ livePollTimeoutMs: 50 });
+    const res = await liveRuns(client); // never pushed -> backstop fires
+    expect(res.status).toBe(200);
+    expect(isUpToDate(await bodyOf(res))).toBe(true);
+    expect(resolveSpy).toHaveBeenCalled();
+  });
+
+  it("with holdOnEmpty=false, a matched-but-not-advanced change returns up-to-date without ClickHouse", async () => {
+    const { client, src, resolveSpy, setRows } = makeClient({ holdOnEmpty: false });
+    // Matches the tag and is in-window, but updatedAt is at/below the offset floor -> no delta.
+    setRows([row("run_1", FLOOR_MS - 1_000, { tags: ["t"] })]);
+
+    const responsePromise = liveRuns(client);
+    await whenWaiting(src);
+    src.push("env_1", [rec("run_1", { tags: ["t"] })]);
+
+    const res = await responsePromise;
+    expect(res.status).toBe(200);
+    expect(isUpToDate(await bodyOf(res))).toBe(true);
+    expect(resolveSpy).not.toHaveBeenCalled();
+  });
+});
diff --git a/apps/webapp/test/realtime/notifierRealtimeClient.test.ts b/apps/webapp/test/realtime/notifierRealtimeClient.test.ts
new file mode 100644
index 00000000000..5f7b96fc099
--- /dev/null
+++ b/apps/webapp/test/realtime/notifierRealtimeClient.test.ts
@@ -0,0 +1,108 @@
+import { CURRENT_API_VERSION } from "~/api/versions";
+import {
+  NotifierRealtimeClient,
+  type RealtimeListEnvironment,
+} from "~/services/realtime/notifierRealtimeClient.server";
+import { type RealtimeRunRow } from "~/services/realtime/electricStreamProtocol.server";
+import { EnvChangeRouter } from "~/services/realtime/envChangeRouter.server";
+import { describe, expect, it } from "vitest";
+
+function sampleRow(): RealtimeRunRow {
+  return {
+    id: "run_1",
+    taskIdentifier: "t",
+    createdAt: new Date("2026-06-07T10:00:00.000Z"),
+    updatedAt: new Date("2026-06-07T10:00:01.000Z"),
+    startedAt: null,
+    delayUntil: null,
+    queuedAt: null,
+    expiredAt: null,
+    completedAt: null,
+    friendlyId: "run_friendly_1",
+    number: 1,
+    isTest: false,
+    status: "EXECUTING",
+    usageDurationMs: 0,
+    costInCents: 0,
+    baseCostInCents: 0,
+    ttl: null,
+    payload: "{}",
+    payloadType: "application/json",
+    metadata: null,
+    metadataType: "application/json",
+    output: null,
+    outputType: "application/json",
+    runTags: [],
+    error: null,
+    realtimeStreams: [],
+  };
+}
+
+// Only the initial-snapshot path is exercised here, which touches the shared
+// #buildResponse — enough to lock the response-header contract.
+function makeClient(row: RealtimeRunRow | null) {
+  return new NotifierRealtimeClient({
+    runReader: {
+      getRunById: async () => row,
+      hydrateByIds: async () => (row ? [row] : []),
+    } as any,
+    runListResolver: { resolveMatchingRunIds: async () => [] } as any,
+    // Snapshot path only; the router (over a no-op source) is never invoked here.
+    router: new EnvChangeRouter({
+      source: { subscribeToEnv: () => () => {} },
+      hydrator: { hydrateByIds: async () => (row ? [row] : []) },
+    }),
+    limiter: { incrementAndCheck: async () => true, decrement: async () => {} } as any,
+    cachedLimitProvider: { getCachedLimit: async () => 100 },
+    maximumCreatedAtFilterAgeMs: 24 * 60 * 60 * 1000,
+  });
+}
+
+const ENV: RealtimeListEnvironment = {
+  id: "env_1",
+  organizationId: "org_1",
+  projectId: "proj_1",
+};
+
+describe("NotifierRealtimeClient response headers", () => {
+  it("exposes electric headers cross-origin so browser hooks can read them", async () => {
+    const client = makeClient(sampleRow());
+    const res = await client.streamRun(
+      "http://localhost:3030/realtime/v1/runs/run_1?offset=-1",
+      ENV,
+      "run_1",
+      CURRENT_API_VERSION,
+      undefined,
+      "1.0.0-beta.1" // modern client => lowercase electric-* headers
+    );
+
+    // Without these the deployed @electric-sql/client throws MissingHeadersError
+    // (it can't read the electric-* headers across origins). This regressed once.
+    expect(res.headers.get("access-control-allow-origin")).toBe("*");
+    expect(res.headers.get("access-control-expose-headers")).toBe("*");
+
+    // Initial (non-live) snapshot requires offset + handle + schema.
+    expect(res.headers.get("electric-offset")).toBeTruthy();
+    expect(res.headers.get("electric-handle")).toBeTruthy();
+    expect(res.headers.get("electric-schema")).toBeTruthy();
+    expect(res.headers.get("content-type")).toBe("application/json");
+  });
+
+  it("renames headers for legacy (0.4.0) clients", async () => {
+    const client = makeClient(sampleRow());
+    const res = await client.streamRun(
+      "http://localhost:3030/realtime/v1/runs/run_1?offset=-1",
+      ENV,
+      "run_1",
+      CURRENT_API_VERSION,
+      undefined,
+      undefined // no client version => legacy header names
+    );
+
+    expect(res.headers.get("electric-chunk-last-offset")).toBeTruthy();
+    expect(res.headers.get("electric-shape-id")).toBeTruthy();
+    expect(res.headers.get("electric-offset")).toBeNull();
+    expect(res.headers.get("electric-handle")).toBeNull();
+    expect(res.headers.get("access-control-expose-headers")).toBe("*");
+  });
+});
diff --git a/apps/webapp/test/realtime/notifierRunSetCache.test.ts b/apps/webapp/test/realtime/notifierRunSetCache.test.ts
new file mode 100644
index 00000000000..7a6449a9eb7
--- /dev/null
+++ b/apps/webapp/test/realtime/notifierRunSetCache.test.ts
@@ -0,0 +1,340 @@
+import { CURRENT_API_VERSION } from "~/api/versions";
+import {
+  NotifierRealtimeClient,
+  type RealtimeListEnvironment,
+} from "~/services/realtime/notifierRealtimeClient.server";
+import { type RealtimeRunRow } from "~/services/realtime/electricStreamProtocol.server";
+import { EnvChangeRouter } from "~/services/realtime/envChangeRouter.server";
+import { setTimeout as sleep } from "node:timers/promises";
+import { describe, expect, it, vi } from "vitest";
+
+const ENV: RealtimeListEnvironment = { id: "env_1", organizationId: "org_1", projectId: "proj_1" };
+
+function row(id: string): RealtimeRunRow {
+  // Only id/createdAt/updatedAt are read directly; the rest serialize to null.
+  return {
+    id,
+    createdAt: new Date("2026-06-07T09:00:00.000Z"),
+    updatedAt: new Date("2026-06-07T10:00:00.000Z"),
+  } as unknown as RealtimeRunRow;
+}
+
+function makeClient(overrides: Record<string, unknown> = {}) {
+  const resolveSpy = vi.fn(async () => ["run_1", "run_2"]);
+  const hydrateSpy = vi.fn(async (_env: string, ids: string[]) => ids.map(row));
+
+  const client = new NotifierRealtimeClient({
+    runReader: { getRunById: async () => null, hydrateByIds: hydrateSpy } as any,
+    runListResolver: { resolveMatchingRunIds: resolveSpy } as any,
+    // No-op source: live polls never get a router wake, so they fall through to the
+    // backstop full-resolve — which is what the live tests below assert on.
+    router: new EnvChangeRouter({
+      source: { subscribeToEnv: () => () => {} },
+      hydrator: { hydrateByIds: hydrateSpy },
+    }),
+    limiter: { incrementAndCheck: async () => true, decrement: async () => {} } as any,
+    cachedLimitProvider: { getCachedLimit: async () => 100 },
+    maximumCreatedAtFilterAgeMs: 24 * 60 * 60 * 1000,
+    runSetResolveCacheTtlMs: 5_000,
+    ...overrides,
+  });
+
+  return { client, resolveSpy, hydrateSpy };
+}
+
+// streamBatch with offset=-1 takes the snapshot path, which calls the coalescing
+// resolve+hydrate directly (no concurrency slot / subscription needed).
+function snapshot(client: NotifierRealtimeClient, batchId: string, skipColumns?: string) {
+  const skip = skipColumns ? `&skipColumns=${skipColumns}` : "";
+  return client.streamBatch(
+    `http://localhost:3030/realtime/v1/batches/${batchId}?offset=-1${skip}`,
+    ENV,
+    batchId,
+    CURRENT_API_VERSION,
+    undefined,
+    "1.0.0"
+  );
+}
+
+// Tag-list snapshot (offset=-1) — exercises the createdAt bucketing + cache key.
+function snapshotTag(client: NotifierRealtimeClient, tags: string[]) {
+  return client.streamRuns(
+    "http://localhost:3030/realtime/v1/runs?offset=-1",
+    ENV,
+    { tags },
+    CURRENT_API_VERSION,
+    undefined,
+    "1.0.0"
+  );
+}
+
+describe("NotifierRealtimeClient run-set resolve coalescing + cache", () => {
+  it("coalesces concurrent same-filter resolves into one ClickHouse + Postgres query", async () => {
+    const { client, resolveSpy, hydrateSpy } = makeClient();
+    let release!: (ids: string[]) => void;
+    const gate = new Promise<string[]>((resolve) => {
+      release = resolve;
+    });
+    resolveSpy.mockReturnValueOnce(gate);
+
+    const p1 = snapshot(client, "batch_1");
+    const p2 = snapshot(client, "batch_1");
+    release(["run_1"]);
+    await Promise.all([p1, p2]);
+
+    expect(resolveSpy).toHaveBeenCalledTimes(1);
+    expect(hydrateSpy).toHaveBeenCalledTimes(1);
+  });
+
+  it("serves a second same-filter request from the cache within the TTL", async () => {
+    const { client, resolveSpy, hydrateSpy } = makeClient();
+    await snapshot(client, "batch_1");
+    await snapshot(client, "batch_1");
+    expect(resolveSpy).toHaveBeenCalledTimes(1);
+    expect(hydrateSpy).toHaveBeenCalledTimes(1);
+  });
+
+  it("does not share the cache across different filters", async () => {
+    const { client, resolveSpy } = makeClient();
+    await snapshot(client, "batch_1");
+    await snapshot(client, "batch_2");
+    expect(resolveSpy).toHaveBeenCalledTimes(2);
+  });
+
+  it("re-queries after the cache TTL expires", async () => {
+    vi.useFakeTimers({ toFake: ["Date"] });
+    try {
+      const { client, resolveSpy } = makeClient({ runSetResolveCacheTtlMs: 1_000 });
+      await snapshot(client, "batch_1");
+      vi.advanceTimersByTime(1_001);
+      await snapshot(client, "batch_1");
+      expect(resolveSpy).toHaveBeenCalledTimes(2);
+    } finally {
+      vi.useRealTimers();
+    }
+  });
+
+  it("passes the client's skipColumns through to the hydrator (column projection)", async () => {
+    const { client, hydrateSpy } = makeClient();
+    await snapshot(client, "batch_1", "payload,output");
+    expect(hydrateSpy).toHaveBeenCalledWith("env_1", expect.any(Array), ["payload", "output"]);
+  });
+
+  it("reports resolve outcomes (miss then hit) to the metrics hook", async () => {
+    const results: string[] = [];
+    const { client } = makeClient({ onRunSetResolve: (r: string) => results.push(r) });
+    await snapshot(client, "batch_1");
+    await snapshot(client, "batch_1");
+    expect(results).toEqual(["miss", "hit"]);
+  });
+
+  it("mints a distinct batch handle per connection and echoes a client-provided one", async () => {
+    const { client } = makeClient();
+    // Two subscribers to the SAME batch must never share a handle (the working-set
+    // cache is keyed by it; sharing lets one suppress the other's deltas forever).
+    const res1 = await snapshot(client, "batch_1");
+    const res2 = await snapshot(client, "batch_1");
+    const h1 = res1.headers.get("electric-handle");
+    const h2 = res2.headers.get("electric-handle");
+    expect(h1).toBeTruthy();
+    expect(h1).not.toBe(h2);
+
+    // Catch-up under an existing handle keeps it.
+    const res3 = await client.streamBatch(
+      `http://localhost:3030/realtime/v1/batches/batch_1?offset=123_1&handle=${h1}`,
+      ENV,
+      "batch_1",
+      CURRENT_API_VERSION,
+      undefined,
+      "1.0.0"
+    );
+    expect(res3.headers.get("electric-handle")).toBe(h1);
+  });
+});
+
+describe("NotifierRealtimeClient resolve admission gate (mass-reconnect stampede)", () => {
+  // A resolver that blocks each invocation until released, so we can watch how many run
+  // concurrently. Tracks peak concurrency and exposes a release-one-at-a-time drain.
+  function gatedResolver() {
+    let active = 0;
+    let peak = 0;
+    const releases: Array<() => void> = [];
+    const resolve = vi.fn(async () => {
+      active++;
+      peak = Math.max(peak, active);
+      await new Promise<void>((r) => releases.push(r));
+      active--;
+      return ["run_1"];
+    });
+    return {
+      resolve,
+      peak: () => peak,
+      releaseOne: () => releases.shift()?.(),
+      waiting: () => releases.length,
+    };
+  }
+
+  function makeGatedClient(resolveAdmissionLimit: number, resolver: ReturnType<typeof gatedResolver>) {
+    const hydrateSpy = vi.fn(async (_env: string, ids: string[]) => ids.map(row));
+    return new NotifierRealtimeClient({
+      runReader: { getRunById: async () => null, hydrateByIds: hydrateSpy } as any,
+      runListResolver: { resolveMatchingRunIds: resolver.resolve } as any,
+      router: new EnvChangeRouter({
+        source: { subscribeToEnv: () => () => {} },
+        hydrator: { hydrateByIds: hydrateSpy },
+      }),
+      limiter: { incrementAndCheck: async () => true, decrement: async () => {} } as any,
+      cachedLimitProvider: { getCachedLimit: async () => 100 },
+      maximumCreatedAtFilterAgeMs: 24 * 60 * 60 * 1000,
+      runSetResolveCacheTtlMs: 0, // no cache -> every distinct filter is a fresh resolve
+      resolveAdmissionLimit,
+    });
+  }
+
+  it("throttles a distinct-filter stampede to the admission limit of concurrent CH resolves", async () => {
+    const resolver = gatedResolver();
+    const client = makeGatedClient(2, resolver);
+
+    // 5 distinct batchIds => 5 distinct filters => 5 fresh resolves, fired at once.
+    const polls = [0, 1, 2, 3, 4].map((i) => snapshot(client, `batch_${i}`));
+
+    // Only the limit (2) may run concurrently; the rest queue for a permit.
+    await vi.waitFor(() => expect(resolver.resolve).toHaveBeenCalledTimes(2));
+    await sleep(20);
+    expect(resolver.resolve).toHaveBeenCalledTimes(2); // 3 still queued behind the gate
+    expect(resolver.peak()).toBe(2);
+
+    // Drain: each release frees a permit, admitting exactly one queued resolve.
+    while (resolver.waiting() > 0) {
+      resolver.releaseOne();
+      await sleep(5);
+    }
+    await Promise.all(polls);
+
+    expect(resolver.resolve).toHaveBeenCalledTimes(5); // all ran...
+    expect(resolver.peak()).toBe(2); // ...but never more than the limit at once
+  });
+
+  it("lets a same-filter burst through on a single permit (coalesces before the gate)", async () => {
+    const resolver = gatedResolver();
+    const client = makeGatedClient(1, resolver); // limit 1 would deadlock if each took a permit
+
+    // 5 identical filters fired at once -> single-flight collapses to one in-flight resolve.
+    const polls = [0, 1, 2, 3, 4].map(() => snapshot(client, "batch_same"));
+    await vi.waitFor(() => expect(resolver.resolve).toHaveBeenCalledTimes(1));
+    await sleep(20);
+
+    resolver.releaseOne();
+    await Promise.all(polls);
+    expect(resolver.resolve).toHaveBeenCalledTimes(1); // one resolve, one permit, no queue
+  });
+});
+
+describe("NotifierRealtimeClient tag-list createdAt bucketing", () => {
+  it("floors the resolved createdAt lower bound to the bucket boundary", async () => {
+    // Fix the clock to a non-bucket-aligned instant so the assertion is deterministic.
+    vi.useFakeTimers({ toFake: ["Date"] });
+    vi.setSystemTime(new Date("2026-06-07T10:00:30.500Z"));
+    try {
+      const { client, resolveSpy } = makeClient({ runSetCreatedAtBucketMs: 60_000 });
+      await snapshotTag(client, ["critical"]);
+      const passed = resolveSpy.mock.calls[0][0].createdAtAfter as Date;
+      expect(passed.getTime() % 60_000).toBe(0);
+    } finally {
+      vi.useRealTimers();
+    }
+  });
+
+  it("lets two same-tag feeds in the same bucket share one resolve", async () => {
+    // A large bucket guarantees both windows floor to the same boundary regardless of
+    // the sub-millisecond gap between the two calls.
+    const { client, resolveSpy, hydrateSpy } = makeClient({
+      runSetCreatedAtBucketMs: 60 * 60_000,
+    });
+    await snapshotTag(client, ["critical"]);
+    await snapshotTag(client, ["critical"]);
+    expect(resolveSpy).toHaveBeenCalledTimes(1);
+    expect(hydrateSpy).toHaveBeenCalledTimes(1);
+  });
+
+  it("does not share across different tags", async () => {
+    const { client, resolveSpy } = makeClient({ runSetCreatedAtBucketMs: 60 * 60_000 });
+    await snapshotTag(client, ["critical"]);
+    await snapshotTag(client, ["debug"]);
+    expect(resolveSpy).toHaveBeenCalledTimes(2);
+  });
+
+  it("does not collide a comma-containing tag with two separate tags", async () => {
+    const { client, resolveSpy } = makeClient({ runSetCreatedAtBucketMs: 60 * 60_000 });
+    await snapshotTag(client, ["a,b"]); // one tag "a,b"
+    await snapshotTag(client, ["a", "b"]); // two tags a OR b — a different filter
+    expect(resolveSpy).toHaveBeenCalledTimes(2);
+  });
+
+  it("keeps each feed's exact lower bound when bucketing is disabled (0)", async () => {
+    vi.useFakeTimers({ toFake: ["Date"] });
+    vi.setSystemTime(new Date("2026-06-07T10:00:30.500Z"));
+    try {
+      const { client, resolveSpy } = makeClient({ runSetCreatedAtBucketMs: 0 });
+      await snapshotTag(client, ["critical"]);
+      const passed = resolveSpy.mock.calls[0][0].createdAtAfter as Date;
+      // Exact (now - 24h) lower bound, not floored to a 60s boundary.
+      expect(passed.getTime() % 60_000).not.toBe(0);
+    } finally {
+      vi.useRealTimers();
+    }
+  });
+});
+
+describe("NotifierRealtimeClient review fixes", () => {
+  // makeClient's router has a no-op source, so the live poll never gets a wake and falls
+  // through to its backstop timeout — the full ClickHouse resolve these tests assert on
+  // (createdAt clamp / concurrency limit).
+
+  it("clamps a stale/crafted handle's createdAt up to the max-age floor", async () => {
+    const maxAge = 24 * 60 * 60 * 1000;
+    const { client, resolveSpy } = makeClient({
+      maximumCreatedAtFilterAgeMs: maxAge,
+      runSetCreatedAtBucketMs: 0,
+      livePollTimeoutMs: 50,
+    });
+    const before = Date.now();
+    // Handle encodes createdAt = 1ms epoch, far older than the 24h ceiling.
+    await client.streamRuns(
+      "http://localhost:3030/realtime/v1/runs?offset=123_1&live=true&handle=runs_1_7",
+      ENV,
+      { tags: ["t"] },
+      CURRENT_API_VERSION,
+      undefined,
+      "1.0.0"
+    );
+    const passed = resolveSpy.mock.calls[0][0].createdAtAfter as Date;
+    // Clamped to ~now - maxAge, not the epoch value encoded in the handle.
+    expect(passed.getTime()).toBeGreaterThan(before - maxAge - 1_000);
+  });
+
+  it("enforces a concurrency limit of 0 instead of failing with a 500", async () => {
+    let limitCheckedWith: number | undefined;
+    const { client } = makeClient({
+      cachedLimitProvider: { getCachedLimit: async () => 0 },
+      limiter: {
+        incrementAndCheck: async (_env: string, _id: string, limit: number) => {
+          limitCheckedWith = limit;
+          return true;
+        },
+        decrement: async () => {},
+      },
+      livePollTimeoutMs: 50,
+    });
+    const res = await client.streamBatch(
+      "http://localhost:3030/realtime/v1/batches/batch_1?offset=123_1&live=true&handle=batch_batch_1_7_abc",
+      ENV,
+      "batch_1",
+      CURRENT_API_VERSION,
+      undefined,
+      "1.0.0"
+    );
+    expect(res.status).toBe(200);
+    expect(limitCheckedWith).toBe(0);
+  });
+});
diff --git a/apps/webapp/test/realtime/runChangeNotifier.test.ts b/apps/webapp/test/realtime/runChangeNotifier.test.ts
new file mode 100644
index 00000000000..96d7fd56a45
--- /dev/null
+++ b/apps/webapp/test/realtime/runChangeNotifier.test.ts
@@ -0,0 +1,172 @@
+import { redisTest } from "@internal/testcontainers";
+import { setTimeout as sleep } from "node:timers/promises";
+import { describe, expect, it, vi } from "vitest";
+import {
+  type ChangeRecord,
+  decodeChangeRecord,
+  encodeChangeRecord,
+  RunChangeNotifier,
+} from "~/services/realtime/runChangeNotifier.server";
+
+function toRedisOptions(redisOptions: { host?: string; port?: number; password?: string }) {
+  return {
+    host: redisOptions.host,
+    port: redisOptions.port,
+    password: redisOptions.password,
+    tlsDisabled: true,
+    clusterMode: false,
+  };
+}
+
+// Time for a SUBSCRIBE to register server-side before we publish.
+const SUBSCRIBE_SETTLE_MS = 250;
+
+describe("RunChangeNotifier", () => {
+  redisTest(
+    "delivers a published change to an env subscriber",
+    { timeout: 30_000 },
+    async ({ redisOptions }) => {
+      const notifier = new RunChangeNotifier({ redis: toRedisOptions(redisOptions) });
+      try {
+        const received: ChangeRecord[] = [];
+        const unsubscribe = notifier.subscribeToEnv("env_1", (records) => received.push(...records));
+        expect(notifier.activeSubscriptionCount).toBe(1);
+
+        await sleep(SUBSCRIBE_SETTLE_MS);
+        notifier.publish({ runId: "run_1", envId: "env_1", tags: ["a"], batchId: "batch_1" });
+
+        await vi.waitFor(() => expect(received.some((r) => r.runId === "run_1")).toBe(true), {
+          timeout: 5_000,
+          interval: 50,
+        });
+        const got = received.find((r) => r.runId === "run_1")!;
+        expect(got.tags).toEqual(["a"]);
+        expect(got.batchId).toBe("batch_1");
+
+        unsubscribe();
+        // Cleanup is deferred until Redis confirms UNSUBSCRIBE, so the count converges to 0.
+        await vi.waitFor(() => expect(notifier.activeSubscriptionCount).toBe(0), {
+          timeout: 5_000,
+          interval: 50,
+        });
+      } finally {
+        await notifier.quit();
+      }
+    }
+  );
+
+  redisTest(
+    "does not deliver a change for a different env",
+    { timeout: 30_000 },
+    async ({ redisOptions }) => {
+      const notifier = new RunChangeNotifier({ redis: toRedisOptions(redisOptions) });
+      try {
+        const received: ChangeRecord[] = [];
+        notifier.subscribeToEnv("env_a", (records) => received.push(...records));
+
+        await sleep(SUBSCRIBE_SETTLE_MS);
+        notifier.publish({ runId: "run_1", envId: "env_b", tags: [] }); // different env
+        await sleep(500);
+
+        expect(received).toHaveLength(0);
+      } finally {
+        await notifier.quit();
+      }
+    }
+  );
+
+  redisTest(
+    "coalesces a burst of env publishes into far fewer batches than publishes (lossless)",
+    { timeout: 30_000 },
+    async ({ redisOptions }) => {
+      const notifier = new RunChangeNotifier({
+        redis: toRedisOptions(redisOptions),
+        envWakeCoalesceWindowMs: 100,
+      });
+      try {
+        let batches = 0;
+        const runIds = new Set<string>();
+        notifier.subscribeToEnv("env_burst", (records) => {
+          batches++;
+          for (const r of records) runIds.add(r.runId);
+        });
+
+        await sleep(SUBSCRIBE_SETTLE_MS);
+        let pubs = 0;
+        const end = Date.now() + 1_000;
+        while (Date.now() < end) {
+          notifier.publish({ runId: `r${pubs++}`, envId: "env_burst", tags: [] });
+          await sleep(5);
+        }
+        await sleep(300);
+
+        expect(pubs).toBeGreaterThan(100);
+        expect(batches).toBeGreaterThanOrEqual(1);
+        // Leading-edge throttle: far fewer deliveries than publishes...
+        expect(batches).toBeLessThan(pubs / 4);
+        // ...but lossless — the batch accumulates every run that changed in the window.
+        expect(runIds.size).toBeGreaterThan(pubs / 2);
+      } finally {
+        await notifier.quit();
+      }
+    }
+  );
+
+  // Sharded pub/sub (SSUBSCRIBE/SPUBLISH/smessage) wiring — validated end to end on a
+  // single node (Redis 7.2 accepts these and delivers same-node). Multi-shard ROUTING
+  // needs a real cluster (the cluster fixture covers that); this proves the command path.
+  redisTest(
+    "delivers via sharded pub/sub on the env channel",
+    { timeout: 30_000 },
+    async ({ redisOptions }) => {
+      const notifier = new RunChangeNotifier({
+        redis: toRedisOptions(redisOptions),
+        shardedPubSub: true,
+      });
+      try {
+        const received: ChangeRecord[] = [];
+        notifier.subscribeToEnv("env_sharded", (records) => received.push(...records));
+
+        await sleep(SUBSCRIBE_SETTLE_MS);
+        notifier.publish({ runId: "run_1", envId: "env_sharded", tags: ["a"] });
+
+        await vi.waitFor(() => expect(received.some((r) => r.runId === "run_1")).toBe(true), {
+          timeout: 5_000,
+          interval: 50,
+        });
+      } finally {
+        await notifier.quit();
+      }
+    }
+  );
+
+  describe("ChangeRecord codec", () => {
+    it("round-trips a full record (tags with a separator survive)", () => {
+      const encoded = encodeChangeRecord({
+        v: 1,
+        runId: "run_1",
+        envId: "env_1",
+        tags: ["a", "b,c"],
+        batchId: "batch_1",
+      });
+      expect(decodeChangeRecord(encoded)).toMatchObject({
+        v: 1,
+        runId: "run_1",
+        envId: "env_1",
+        tags: ["a", "b,c"],
+        batchId: "batch_1",
+      });
+    });
+
+    it("decodes a bare runId to a partial record (tags undefined)", () => {
+      // A bare/legacy frame: the consumer falls back to hydrate-to-classify.
+      const decoded = decodeChangeRecord("run_3");
+      expect(decoded.runId).toBe("run_3");
+      expect(decoded.tags).toBeUndefined();
+    });
+
+    it("falls back to a bare runId on an unparseable message", () => {
+      expect(decodeChangeRecord("{not json").runId).toBe("{not json");
+    });
+  });
+});
diff --git a/apps/webapp/test/realtime/runReaderProjection.test.ts b/apps/webapp/test/realtime/runReaderProjection.test.ts
new file mode 100644
index 00000000000..07aebf92589
--- /dev/null
+++ b/apps/webapp/test/realtime/runReaderProjection.test.ts
@@ -0,0 +1,75 @@
+import { describe, expect, it, vi } from "vitest";
+import { buildHydratorSelect, RunHydrator } from "~/services/realtime/runReader.server";
+
+describe("buildHydratorSelect", () => {
+  it("returns the full select when nothing is skipped", () => {
+    const select = buildHydratorSelect([]);
+    expect(select.id).toBe(true);
+    expect(select.payload).toBe(true);
+    expect(select.output).toBe(true);
+    expect(select.metadata).toBe(true);
+    expect(select.error).toBe(true);
+  });
+
+  it("keeps protocol-reserved columns even when asked to skip them", () => {
+    // Reserved columns are always emitted by the serializer, so hydration must keep
+    // them regardless of skipColumns or the output is null/incorrect.
+    const select = buildHydratorSelect([
+      "status",
+      "taskIdentifier",
+      "createdAt",
+      "friendlyId",
+      "payload",
+    ]);
+    expect(select.status).toBe(true);
+    expect(select.taskIdentifier).toBe(true);
+    expect(select.createdAt).toBe(true);
+    expect(select.friendlyId).toBe(true);
+    // A non-reserved skipped column is still dropped.
+    expect(select.payload).toBeUndefined();
+  });
+
+  it("drops skipped columns but always keeps id + updatedAt", () => {
+    const select = buildHydratorSelect(["payload", "output", "metadata", "error"]);
+    expect(select.payload).toBeUndefined();
+    expect(select.output).toBeUndefined();
+    expect(select.metadata).toBeUndefined();
+    expect(select.error).toBeUndefined();
+    // Needed internally regardless of skipColumns (keys the row, drives the diff/offset).
+    expect(select.id).toBe(true);
+    expect(select.updatedAt).toBe(true);
+    // A non-skipped column survives.
+    expect(select.status).toBe(true);
+  });
+});
+
+describe("RunHydrator.hydrateByIds column projection", () => {
+  function makeHydrator() {
+    let capturedSelect: Record<string, boolean> | undefined;
+    const replica = {
+      taskRun: {
+        findMany: vi.fn(async ({ select }: { select: Record<string, boolean> }) => {
+          capturedSelect = select;
+          return [];
+        }),
+      },
+    } as any;
+    return { hydrator: new RunHydrator({ replica }), getSelect: () => capturedSelect };
+  }
+
+  it("projects the SELECT by skipColumns", async () => {
+    const { hydrator, getSelect } = makeHydrator();
+    await hydrator.hydrateByIds("env_1", ["run_1"], ["payload", "output"]);
+    const select = getSelect()!;
+    expect(select.payload).toBeUndefined();
+    expect(select.output).toBeUndefined();
+    expect(select.id).toBe(true);
+    expect(select.updatedAt).toBe(true);
+  });
+
+  it("selects the full column set when no skipColumns are given", async () => {
+    const { hydrator, getSelect } = makeHydrator();
+    await hydrator.hydrateByIds("env_1", ["run_1"]);
+    expect(getSelect()!.payload).toBe(true);
+  });
+});
diff --git a/apps/webapp/test/realtime/shadowCompare.test.ts b/apps/webapp/test/realtime/shadowCompare.test.ts
new file mode 100644
index 00000000000..e6604a02cd6
--- /dev/null
+++ b/apps/webapp/test/realtime/shadowCompare.test.ts
@@ -0,0 +1,216 @@
+import {
+  type RealtimeRunRow,
+  serializeRunRow,
+} from "~/services/realtime/electricStreamProtocol.server";
+import { type RunListFilter } from "~/services/realtime/runReader.server";
+import { RealtimeShadowComparator } from "~/services/realtime/shadowCompare.server";
+import { describe, expect, it } from "vitest";
+
+function sampleRow(overrides: Partial<RealtimeRunRow> = {}): RealtimeRunRow {
+  return {
+    id: "run_a",
+    taskIdentifier: "my-task",
+    createdAt: new Date("2026-06-07T09:00:00.000Z"),
+    updatedAt: new Date("2026-06-07T10:05:30.123Z"),
+    startedAt: null,
+    delayUntil: null,
+    queuedAt: null,
+    expiredAt: null,
+    completedAt: null,
+    friendlyId: "run_friendly_a",
+    number: 7,
+    isTest: true,
+    status: "EXECUTING",
+    usageDurationMs: 1234,
+    costInCents: 0.55,
+    baseCostInCents: 0.25,
+    ttl: "1h",
+    payload: '{"hello":"world"}',
+    payloadType: "application/json",
+    metadata: null,
+    metadataType: "application/json",
+    output: null,
+    outputType: "application/json",
+    runTags: ["a", "b"],
+    error: null,
+    realtimeStreams: [],
+    ...overrides,
+  };
+}
+
+const UP_TO_DATE = { headers: { control: "up-to-date" } };
+
+function insert(value: Record<string, string | null>) {
+  return { key: `"public"."TaskRun"/"${value.id}"`, value, headers: { operation: "insert" } };
+}
+
+function makeComparator(
+  rowsById: Record<string, RealtimeRunRow | null>,
+  resolvedIds: string[] = []
+) {
+  return new RealtimeShadowComparator({
+    runReader: {
+      getRunById: async (_env: string, id: string) => rowsById[id] ?? null,
+      hydrateByIds: async (_env: string, ids: string[]) =>
+        ids.map((id) => rowsById[id]).filter((row): row is RealtimeRunRow => Boolean(row)),
+    } as any,
+    runListResolver: { resolveMatchingRunIds: async (_f: RunListFilter) => resolvedIds } as any,
+  });
+}
+
+describe("RealtimeShadowComparator serialization", () => {
+  it("counts a faithful re-serialization as a match", async () => {
+    const row = sampleRow();
+    const body = JSON.stringify([insert(serializeRunRow(row)), UP_TO_DATE]);
+    const cmp = makeComparator({ run_a: row });
+
+    const out = await cmp.compare({
+      feed: "run",
+      electricBody: body,
+      environment: { id: "env_1" },
+      skipColumns: [],
+      isInitialSnapshot: true,
+    });
+
+    expect(out.serializationMatched).toBe(1);
+    expect(out.serializationDiverged).toBe(0);
+    expect(out.serializationSkew).toBe(0);
+    expect(out.diffs).toEqual([]);
+  });
+
+  it("does not flag semantically-equivalent but differently-encoded values", async () => {
+    const row = sampleRow();
+    // Electric encodes bool as "true" (notifier uses "t"), a number with a trailing
+    // zero, and a timestamp without millis — all equal after decoding.
+    const value = {
+      ...serializeRunRow(row),
+      isTest: "true",
+      costInCents: "0.5500",
+      createdAt: "2026-06-07T09:00:00",
+    };
+    const body = JSON.stringify([insert(value), UP_TO_DATE]);
+    const cmp = makeComparator({ run_a: row });
+
+    const out = await cmp.compare({
+      feed: "run",
+      electricBody: body,
+      environment: { id: "env_1" },
+      skipColumns: [],
+      isInitialSnapshot: true,
+    });
+
+    expect(out.serializationMatched).toBe(1);
+    expect(out.serializationDiverged).toBe(0);
+  });
+
+  it("flags a genuine column divergence (same version)", async () => {
+    const row = sampleRow();
+    const value = { ...serializeRunRow(row), payload: '{"hello":"TAMPERED"}' };
+    const body = JSON.stringify([insert(value), UP_TO_DATE]);
+    const cmp = makeComparator({ run_a: row });
+
+    const out = await cmp.compare({
+      feed: "run",
+      electricBody: body,
+      environment: { id: "env_1" },
+      skipColumns: [],
+      isInitialSnapshot: true,
+    });
+
+    expect(out.serializationDiverged).toBe(1);
+    expect(out.serializationMatched).toBe(0);
+    expect(out.diffs).toEqual([
+      { runId: "run_a", column: "payload", electric: '{"hello":"TAMPERED"}', notifier: '{"hello":"world"}' },
+    ]);
+  });
+
+  it("treats DEQUEUED/EXECUTING as equivalent (legacy status rewrite)", async () => {
+    const row = sampleRow({ status: "EXECUTING" });
+    const value = { ...serializeRunRow(row), status: "DEQUEUED" };
+    const body = JSON.stringify([insert(value), UP_TO_DATE]);
+    const cmp = makeComparator({ run_a: row });
+
+    const out = await cmp.compare({
+      feed: "run",
+      electricBody: body,
+      environment: { id: "env_1" },
+      skipColumns: [],
+      isInitialSnapshot: true,
+    });
+
+    expect(out.serializationDiverged).toBe(0);
+    expect(out.serializationMatched).toBe(1);
+  });
+
+  it("records skew when the row advanced between emit and refetch", async () => {
+    const row = sampleRow();
+    // Electric emitted an older version; the refetched row is newer.
+    const value = { ...serializeRunRow(sampleRow({ updatedAt: new Date("2026-06-07T10:00:00.000Z") })) };
+    const body = JSON.stringify([insert(value), UP_TO_DATE]);
+    const cmp = makeComparator({ run_a: row });
+
+    const out = await cmp.compare({
+      feed: "run",
+      electricBody: body,
+      environment: { id: "env_1" },
+      skipColumns: [],
+      isInitialSnapshot: true,
+    });
+
+    expect(out.serializationSkew).toBe(1);
+    expect(out.serializationMatched).toBe(0);
+    expect(out.serializationDiverged).toBe(0);
+  });
+});
+
+describe("RealtimeShadowComparator membership", () => {
+  const filter: RunListFilter = {
+    organizationId: "org_1",
+    projectId: "proj_1",
+    environmentId: "env_1",
+    tags: ["t"],
+    createdAtAfter: new Date("2026-06-06T00:00:00.000Z"),
+    limit: 1000,
+  };
+
+  function bodyFor(ids: string[]) {
+    const msgs = ids.map((id) => insert(serializeRunRow(sampleRow({ id }))));
+    return JSON.stringify([...msgs, UP_TO_DATE]);
+  }
+
+  it("matches when Electric's set equals the notifier resolver's set", async () => {
+    const cmp = makeComparator(
+      { a: sampleRow({ id: "a" }), b: sampleRow({ id: "b" }) },
+      ["a", "b"]
+    );
+    const out = await cmp.compare({
+      feed: "runs",
+      electricBody: bodyFor(["a", "b"]),
+      environment: { id: "env_1" },
+      skipColumns: [],
+      isInitialSnapshot: true,
+      membershipFilter: filter,
+    });
+    expect(out.membershipMatch).toBe(true);
+    expect(out.missingInNotifier).toEqual([]);
+    expect(out.extraInNotifier).toEqual([]);
+  });
+
+  it("reports rows missing from / extra in the notifier resolution", async () => {
+    const cmp = makeComparator(
+      { a: sampleRow({ id: "a" }), b: sampleRow({ id: "b" }) },
+      ["a", "c"] // notifier missing b, has extra c
+    );
+    const out = await cmp.compare({
+      feed: "runs",
+      electricBody: bodyFor(["a", "b"]),
+      environment: { id: "env_1" },
+      skipColumns: [],
+      isInitialSnapshot: true,
+      membershipFilter: filter,
+    });
+    expect(out.membershipMatch).toBe(false);
+    expect(out.missingInNotifier).toEqual(["b"]);
+    expect(out.extraInNotifier).toEqual(["c"]);
+  });
+});
diff --git a/internal-packages/redis/package.json b/internal-packages/redis/package.json
index 9c13bbf21b0..6c7d8aa2608 100644
--- a/internal-packages/redis/package.json
+++ b/internal-packages/redis/package.json
@@ -6,7 +6,7 @@
   "types": "./src/index.ts",
   "type": "module",
   "dependencies": {
-    "ioredis": "^5.3.2",
+    "ioredis": "~5.6.0",
     "@trigger.dev/core": "workspace:*"
   },
   "scripts": {
diff --git a/internal-packages/run-engine/src/engine/eventBus.ts b/internal-packages/run-engine/src/engine/eventBus.ts
index 2e4adeed4b1..bd29869d280 100644
--- a/internal-packages/run-engine/src/engine/eventBus.ts
+++ b/internal-packages/run-engine/src/engine/eventBus.ts
@@ -11,7 +11,14 @@ export type EventBusEvents = {
   runCreated: [
     {
       time: Date;
-      runId: string;
+      run: {
+        id: string;
+        runTags: string[];
+        batchId: string | null;
+      };
+      environment: {
+        id: string;
+      };
     },
   ];
   runEnqueuedAfterDelay: [
@@ -23,6 +30,8 @@ export type EventBusEvents = {
         queuedAt: Date;
         updatedAt: Date;
         createdAt: Date;
+        runTags: string[];
+        batchId: string | null;
       };
       organization: {
         id: string;
@@ -44,6 +53,8 @@ export type EventBusEvents = {
         delayUntil: Date;
         updatedAt: Date;
         createdAt: Date;
+        runTags: string[];
+        batchId: string | null;
       };
       organization: {
         id: string;
@@ -76,6 +87,8 @@ export type EventBusEvents = {
         maxDurationInSeconds?: number;
         maxAttempts?: number;
         createdAt: Date;
+        runTags: string[];
+        batchId: string | null;
       };
       organization: {
         id: string;
@@ -96,6 +109,8 @@ export type EventBusEvents = {
         status: TaskRunStatus;
         updatedAt: Date;
         createdAt: Date;
+        runTags: string[];
+        batchId: string | null;
       };
       organization: {
         id?: string;
@@ -119,6 +134,8 @@ export type EventBusEvents = {
         attemptNumber: number;
         baseCostInCents: number;
         executedAt: Date | undefined;
+        runTags: string[];
+        batchId: string | null;
       };
       organization: {
         id: string;
@@ -245,6 +262,8 @@ export type EventBusEvents = {
         createdAt: Date;
         error: TaskRunError;
         taskEventStore?: string;
+        runTags: string[];
+        batchId: string | null;
       };
       organization: {
         id: string;
diff --git a/internal-packages/run-engine/src/engine/index.ts b/internal-packages/run-engine/src/engine/index.ts
index 835ff90cc48..c3e0a5c75d0 100644
--- a/internal-packages/run-engine/src/engine/index.ts
+++ b/internal-packages/run-engine/src/engine/index.ts
@@ -1042,7 +1042,14 @@ export class RunEngine {
 
         this.eventBus.emit("runCreated", {
           time: new Date(),
-          runId: taskRun.id,
+          run: {
+            id: taskRun.id,
+            runTags: taskRun.runTags,
+            batchId: taskRun.batchId,
+          },
+          environment: {
+            id: environment.id,
+          },
         });
 
         return taskRun;
diff --git a/internal-packages/run-engine/src/engine/systems/checkpointSystem.ts b/internal-packages/run-engine/src/engine/systems/checkpointSystem.ts
index 384384fd8c7..6c66591e288 100644
--- a/internal-packages/run-engine/src/engine/systems/checkpointSystem.ts
+++ b/internal-packages/run-engine/src/engine/systems/checkpointSystem.ts
@@ -147,6 +147,8 @@ export class CheckpointSystem {
           status: run.status,
           updatedAt: run.updatedAt,
           createdAt: run.createdAt,
+          runTags: run.runTags,
+          batchId: run.batchId,
         },
         organization: {
           id: run.runtimeEnvironment.organizationId,
@@ -308,6 +310,8 @@ export class CheckpointSystem {
           projectId: true,
           updatedAt: true,
           createdAt: true,
+          runTags: true,
+          batchId: true,
         },
       });
 
@@ -326,6 +330,8 @@ export class CheckpointSystem {
           status: run.status,
           updatedAt: run.updatedAt,
           createdAt: run.createdAt,
+          runTags: run.runTags,
+          batchId: run.batchId,
         },
         organization: {
           id: run.organizationId ?? undefined,
diff --git a/internal-packages/run-engine/src/engine/systems/delayedRunSystem.ts b/internal-packages/run-engine/src/engine/systems/delayedRunSystem.ts
index 32ab98bad6c..10c965741cf 100644
--- a/internal-packages/run-engine/src/engine/systems/delayedRunSystem.ts
+++ b/internal-packages/run-engine/src/engine/systems/delayedRunSystem.ts
@@ -79,6 +79,8 @@ export class DelayedRunSystem {
               delayUntil: delayUntil,
               updatedAt: updatedRun.updatedAt,
               createdAt: updatedRun.createdAt,
+              runTags: updatedRun.runTags,
+              batchId: updatedRun.batchId,
             },
             organization: {
               id: snapshot.organizationId,
@@ -192,6 +194,8 @@ export class DelayedRunSystem {
           queuedAt,
           updatedAt: updatedRun.updatedAt,
           createdAt: updatedRun.createdAt,
+          runTags: updatedRun.runTags,
+          batchId: updatedRun.batchId,
         },
         organization: {
           id: run.runtimeEnvironment.organizationId,
diff --git a/internal-packages/run-engine/src/engine/systems/dequeueSystem.ts b/internal-packages/run-engine/src/engine/systems/dequeueSystem.ts
index 3fe1ef072cf..7c811ebfdfc 100644
--- a/internal-packages/run-engine/src/engine/systems/dequeueSystem.ts
+++ b/internal-packages/run-engine/src/engine/systems/dequeueSystem.ts
@@ -490,6 +490,8 @@ export class DequeueSystem {
                   maxAttempts: lockedTaskRun.maxAttempts ?? undefined,
                   updatedAt: lockedTaskRun.updatedAt,
                   createdAt: lockedTaskRun.createdAt,
+                  runTags: lockedTaskRun.runTags,
+                  batchId: lockedTaskRun.batchId,
                 },
                 organization: {
                   id: orgId,
@@ -751,6 +753,8 @@ export class DequeueSystem {
           attemptNumber: true,
           updatedAt: true,
           createdAt: true,
+          runTags: true,
+          batchId: true,
           runtimeEnvironment: {
             select: {
               id: true,
@@ -792,6 +796,8 @@ export class DequeueSystem {
           status: run.status,
           updatedAt: run.updatedAt,
           createdAt: run.createdAt,
+          runTags: run.runTags,
+          batchId: run.batchId,
         },
         organization: {
           id: run.runtimeEnvironment.project.organizationId,
diff --git a/internal-packages/run-engine/src/engine/systems/pendingVersionSystem.ts b/internal-packages/run-engine/src/engine/systems/pendingVersionSystem.ts
index 6d503012fbc..b46b857f02a 100644
--- a/internal-packages/run-engine/src/engine/systems/pendingVersionSystem.ts
+++ b/internal-packages/run-engine/src/engine/systems/pendingVersionSystem.ts
@@ -163,6 +163,8 @@ export class PendingVersionSystem {
           status: "PENDING",
           updatedAt: run.updatedAt,
           createdAt: run.createdAt,
+          runTags: run.runTags,
+          batchId: run.batchId,
         },
         organization: {
           id: backgroundWorker.runtimeEnvironment.organizationId,
diff --git a/internal-packages/run-engine/src/engine/systems/runAttemptSystem.ts b/internal-packages/run-engine/src/engine/systems/runAttemptSystem.ts
index 06c80f67f2c..02fd83a7a25 100644
--- a/internal-packages/run-engine/src/engine/systems/runAttemptSystem.ts
+++ b/internal-packages/run-engine/src/engine/systems/runAttemptSystem.ts
@@ -520,6 +520,8 @@ export class RunAttemptSystem {
               attemptNumber: nextAttemptNumber,
               baseCostInCents: updatedRun.baseCostInCents,
               executedAt: updatedRun.executedAt ?? undefined,
+              runTags: updatedRun.runTags,
+              batchId: updatedRun.batchId,
             },
             organization: {
               id: updatedRun.runtimeEnvironment.organizationId,
@@ -1052,6 +1054,8 @@ export class RunAttemptSystem {
                   error: completion.error,
                   createdAt: run.createdAt,
                   taskEventStore: run.taskEventStore,
+                  runTags: run.runTags,
+                  batchId: run.batchId,
                 },
                 organization: {
                   id: run.runtimeEnvironment.organizationId,
diff --git a/internal-packages/testcontainers/package.json b/internal-packages/testcontainers/package.json
index 4ea83344c34..b3ab7ce5dc4 100644
--- a/internal-packages/testcontainers/package.json
+++ b/internal-packages/testcontainers/package.json
@@ -16,7 +16,7 @@
     "@clickhouse/client": "^1.11.1",
     "@opentelemetry/api": "^1.9.1",
     "@trigger.dev/database": "workspace:*",
-    "ioredis": "^5.3.2"
+    "ioredis": "~5.6.0"
   },
   "devDependencies": {
     "@testcontainers/postgresql": "^11.14.0",
diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml
index 782b62cf7ff..39273b2976c 100644
--- a/pnpm-lock.yaml
+++ b/pnpm-lock.yaml
@@ -228,8 +228,8 @@ importers:
         specifier: ^4.0.6
         version: 4.0.6
       ioredis:
-        specifier: ^5.3.2
-        version: 5.3.2
+        specifier: ~5.6.0
+        version: 5.6.1
       p-limit:
         specifier: ^6.2.0
         version: 6.2.0
@@ -664,8 +664,8 @@ importers:
         specifier: ^1.0.0
         version: 1.0.0
       ioredis:
-        specifier: ^5.3.2
-        version: 5.3.2
+        specifier: ~5.6.0
+        version: 5.6.1
       isbot:
         specifier: ^3.6.5
         version: 3.6.5
@@ -1256,8 +1256,8 @@ importers:
         specifier: workspace:*
         version: link:../../packages/core
       ioredis:
-        specifier: ^5.3.2
-        version: 5.3.2
+        specifier: ~5.6.0
+        version: 5.6.1
 
   internal-packages/replication:
     dependencies:
@@ -1404,8 +1404,8 @@ importers:
         specifier: workspace:*
         version: link:../database
       ioredis:
-        specifier: ^5.3.2
-        version: 5.3.2
+        specifier: ~5.6.0
+        version: 5.6.1
     devDependencies:
       '@testcontainers/postgresql':
         specifier: ^11.14.0
@@ -11970,8 +11970,8 @@ packages:
     resolution: {integrity: sha512-YFMSV91JNBOSjw1cOfw2tup6hDP7mkz+2AUV7W1L1AM6ntgI75qC1ZeFpjPGMrWp+upmBRTX2fJWQ8c7jsUWpA==}
     engines: {node: '>=14'}
 
-  ioredis@5.3.2:
-    resolution: {integrity: sha512-1DKMMzlIHM02eBBVOFQ1+AolGjs6+xEcM4PDL7NqOS6szq7H9jSaEkIUH6/a5Hl241LzW6JLSiAbNvTQjUupUA==}
+  ioredis@5.6.1:
+    resolution: {integrity: sha512-UxC0Yv1Y4WRJiGQxQkP0hfdL0/5/6YvdfOOClRgJ0qppSarkhneSa6UvkMkms0AkdGimSH3Ikqm+6mkMmX7vGA==}
     engines: {node: '>=12.22.0'}
 
   ip-address@10.0.1:
@@ -30048,11 +30048,11 @@ snapshots:
 
   intl-parse-accept-language@1.0.0: {}
 
-  ioredis@5.3.2:
+  ioredis@5.6.1:
     dependencies:
       '@ioredis/commands': 1.2.0
       cluster-key-slot: 1.1.2
-      debug: 4.3.7(supports-color@10.0.0)
+      debug: 4.4.3(supports-color@10.0.0)
       denque: 2.1.0
       lodash.defaults: 4.2.0
       lodash.isarguments: 3.1.0
@@ -33909,7 +33909,7 @@ snapshots:
 
   send@1.1.0(supports-color@10.0.0):
     dependencies:
-      debug: 4.3.6(supports-color@10.0.0)
+      debug: 4.4.3(supports-color@10.0.0)
       destroy: 1.2.0
       encodeurl: 2.0.0
       escape-html: 1.0.3