diff --git a/web/ee/tests/playwright/acceptance/members/index.ts b/web/ee/tests/playwright/acceptance/members/index.ts
index 360d0308af..4ea83d5ed6 100644
--- a/web/ee/tests/playwright/acceptance/members/index.ts
+++ b/web/ee/tests/playwright/acceptance/members/index.ts
@@ -75,16 +75,19 @@ const invitePendingMember = async (page: any, apiHelpers: any, uiHelpers: any):
     const basePath = apiHelpers.getProjectScopedBasePath()
     await page.goto(`${basePath}/settings`, {waitUntil: "domcontentloaded"})
     await uiHelpers.expectPath("/settings")
-    // networkidle ensures the dynamic() import for InviteUsersModal has finished loading
-    // before we click the button — avoids the race where the click fires before the
-    // modal component is mounted, leaving the dialog never visible.
-    await page.waitForLoadState("networkidle")
-    await expect(page.getByRole("button", {name: "Invite Members"})).toBeVisible({timeout: 15000})
 
-    await page.getByRole("button", {name: "Invite Members"}).click()
+    const inviteButton = page.getByRole("button", {name: "Invite Members"})
+    await expect(inviteButton).toBeVisible({timeout: 20000})
+    await inviteButton.click()
+
     const inviteModal = page.getByRole("dialog", {name: "Invite Members"})
-    await expect(inviteModal).toBeVisible({timeout: 15000})
-    await inviteModal.getByPlaceholder("member@organization.com").fill(testEmail)
+    const emailInput = inviteModal.getByPlaceholder("member@organization.com")
+    // Wait for the email input rather than just the dialog — the InviteUsersModal
+    // is a dynamic() import, so the form body can lag behind the modal wrapper.
+    // Waiting for the input guarantees the chunk has fully rendered.
+    await expect(emailInput).toBeVisible({timeout: 20000})
+    await emailInput.fill(testEmail)
+
     await Promise.all([
         waitForInviteResponse(page),
         inviteModal.getByRole("button", {name: "Invite"}).click(),
@@ -114,9 +117,8 @@ const membersTests = () => {
                 const basePath = apiHelpers.getProjectScopedBasePath()
                 await page.goto(`${basePath}/settings`, {waitUntil: "domcontentloaded"})
                 await uiHelpers.expectPath("/settings")
-                await page.waitForLoadState("networkidle")
                 await expect(page.getByRole("button", {name: "Invite Members"})).toBeVisible({
-                    timeout: 15000,
+                    timeout: 20000,
                 })
             })
 
@@ -126,10 +128,10 @@ const membersTests = () => {
                     await page.getByRole("button", {name: "Invite Members"}).click()
 
                     const inviteModal = page.getByRole("dialog", {name: "Invite Members"})
-                    await expect(inviteModal).toBeVisible({timeout: 10000})
-
                     const emailInput = inviteModal.getByPlaceholder("member@organization.com")
-                    await expect(emailInput).toBeVisible({timeout: 5000})
+                    // Wait for the input directly — the InviteUsersModal is a dynamic()
+                    // import so the form body can lag behind the modal wrapper appearing.
+                    await expect(emailInput).toBeVisible({timeout: 20000})
                     await emailInput.fill(testEmail)
 
                     // EE renders a role selector; keep the default selection
@@ -166,7 +168,9 @@ const membersTests = () => {
         "should resend an invitation and confirm success",
         {tag: lightFastTags},
         async ({page, apiHelpers, uiHelpers}) => {
-            test.setTimeout(60000)
+            // invitePendingMember runs a full invite flow as setup — give enough
+            // headroom for navigation + modal interaction + the resend action.
+            test.setTimeout(90000)
 
             await scenarios.given("the user is authenticated", async () => {
                 await expectAuthenticatedSession(page)
@@ -203,7 +207,9 @@ const membersTests = () => {
         "should remove a pending member from the workspace",
         {tag: lightFastTags},
         async ({page, apiHelpers, uiHelpers}) => {
-            test.setTimeout(60000)
+            // invitePendingMember runs a full invite flow as setup — give enough
+            // headroom for navigation + modal interaction + the remove action.
+            test.setTimeout(90000)
 
             await scenarios.given("the user is authenticated", async () => {
                 await expectAuthenticatedSession(page)
diff --git a/web/oss/tests/playwright/acceptance/app/test.ts b/web/oss/tests/playwright/acceptance/app/test.ts
index aafd428fb2..1d56c967e5 100644
--- a/web/oss/tests/playwright/acceptance/app/test.ts
+++ b/web/oss/tests/playwright/acceptance/app/test.ts
@@ -58,7 +58,11 @@ export const openCreateAppDrawerForType = async (
             .catch(() => false)
 
         if (opened) {
-            await typeSelector.click()
+            // The Popover re-renders when appTemplatesQueryAtom resolves,
+            // making the item briefly unstable. force:true dispatches the
+            // click immediately without waiting for Playwright's stability
+            // check, which otherwise retries until the 60 s test timeout.
+            await typeSelector.click({force: true})
             const drawer = page
                 .getByRole("dialog")
                 .filter({has: page.getByTestId("app-create-name-input")})
diff --git a/web/oss/tests/playwright/acceptance/deployment/index.ts b/web/oss/tests/playwright/acceptance/deployment/index.ts
index bc11808014..01d8d01e15 100644
--- a/web/oss/tests/playwright/acceptance/deployment/index.ts
+++ b/web/oss/tests/playwright/acceptance/deployment/index.ts
@@ -168,12 +168,12 @@ const deploymentTests = () => {
             const modal = page.getByRole("dialog", {name: /Deploy Development/i}).last()
             await expect(modal).toBeVisible({timeout: 10000})
 
-            const rows = modal.locator("[data-row-key]")
+            const rows = modal.locator('[data-row-key]:not([data-row-key*="skeleton"])')
             const deployBtn = modal.getByRole("button", {name: "Deploy"})
             const radioSelector =
                 '.ant-radio-wrapper, .ant-radio, [role="radio"], input[type="radio"]'
 
-            await expect(rows.first()).toBeVisible({timeout: 15000})
+            await expect(rows.first()).toBeVisible({timeout: 30000})
             await expect
                 .poll(
                     async () => {
diff --git a/web/oss/tests/playwright/acceptance/human-annotation/tests.ts b/web/oss/tests/playwright/acceptance/human-annotation/tests.ts
index 9d0343294f..03a5a74c3e 100644
--- a/web/oss/tests/playwright/acceptance/human-annotation/tests.ts
+++ b/web/oss/tests/playwright/acceptance/human-annotation/tests.ts
@@ -1,9 +1,10 @@
+import {randomUUID} from "crypto"
+
 import {test as baseTest} from "@agenta/web-tests/tests/fixtures/base.fixture"
 import {getProjectScopedBasePath} from "@agenta/web-tests/tests/fixtures/base.fixture/apiHelpers"
 import {expect} from "@agenta/web-tests/utils"
 import type {EvaluationRunForKindDetection} from "@agenta/web-tests/utils/evaluationKind"
 import type {Locator, Page} from "@playwright/test"
-import {randomUUID} from "crypto"
 
 import type {HumanEvaluationConfig, HumanEvaluationFixtures} from "./assets/types"
 
@@ -261,19 +262,25 @@ const getVisibleButtonByLabels = async (page: Page, labels: readonly (string | R
 }
 
 const getHumanEvaluationCreateButton = async (page: Page, timeout = 10000) => {
+    // Cache the button inside the poll to avoid a TOCTOU race where the poll
+    // succeeds but a subsequent call finds the button gone (e.g. mid re-render).
+    let foundButton: Awaited<ReturnType<typeof getVisibleButtonByLabels>> = null
+
     await expect
         .poll(
-            async () =>
-                Boolean(
-                    await getVisibleButtonByLabels(page, HUMAN_EVALUATION_CREATE_BUTTON_LABELS),
-                ),
+            async () => {
+                foundButton = await getVisibleButtonByLabels(
+                    page,
+                    HUMAN_EVALUATION_CREATE_BUTTON_LABELS,
+                )
+                return Boolean(foundButton)
+            },
             {timeout},
         )
         .toBe(true)
 
-    const createButton = await getVisibleButtonByLabels(page, HUMAN_EVALUATION_CREATE_BUTTON_LABELS)
-    if (createButton) {
-        return createButton
+    if (foundButton) {
+        return foundButton
     }
 
     throw new Error("Could not find a human evaluation create button.")
diff --git a/web/oss/tests/playwright/acceptance/observability/index.ts b/web/oss/tests/playwright/acceptance/observability/index.ts
index 7fccec8404..a1253549fc 100644
--- a/web/oss/tests/playwright/acceptance/observability/index.ts
+++ b/web/oss/tests/playwright/acceptance/observability/index.ts
@@ -57,9 +57,12 @@ const clickFirstTraceRow = async (page: any) => {
  * to the Observability page and waits for the trace row to appear.
  *
  * Traces are indexed asynchronously. The first trace in an ephemeral project can
- * take up to ~110 s to appear. The function enables auto-refresh (15 s interval)
- * so the page re-fetches automatically once the trace is available on the backend,
- * then waits up to 150 s for the [data-tour="trace-row"] element to become visible.
+ * take up to ~150 s to appear. Setup (provider check + app creation + playground run)
+ * adds another 30-60 s on top. The function enables auto-refresh (15 s interval)
+ * so the page re-fetches automatically, and also performs periodic manual refreshes
+ * every 20 s for up to 200 s total while waiting for [data-tour="trace-row"].
+ *
+ * Tests using this function must set test.setTimeout to at least 300000 (5 min).
  */
 const runPlaygroundAndGoToObservability = async (
     page: any,
@@ -111,8 +114,6 @@ const runPlaygroundAndGoToObservability = async (
 
     // Enable auto-refresh (the Switch next to "auto-refresh" label). This makes
     // the page re-fetch traces every 15 s without any manual Refresh clicks.
-    // When traces are indexed asynchronously, auto-refresh ensures they appear
-    // within ~15 s of becoming available on the backend.
     const autoRefreshSwitch = page.getByRole("switch").first()
     const isSwitchVisible = await autoRefreshSwitch.isVisible().catch(() => false)
     if (isSwitchVisible) {
@@ -129,20 +130,22 @@ const runPlaygroundAndGoToObservability = async (
     // find the wrong element or nothing at all.
     const firstDataRow = getFirstTraceRow(page)
 
-    // Wait up to 150 s for the trace to appear. With auto-refresh at 15 s intervals,
-    // the trace should appear within ~15 s of backend indexing completing.
-    const hasRow = await firstDataRow
-        .waitFor({state: "visible", timeout: 150000})
-        .then(() => true)
-        .catch(() => false)
-    if (hasRow) return
-
-    // Last resort: one manual refresh then a final short wait
-    if (await refreshButton.isVisible().catch(() => false)) {
-        await refreshButton.click()
-        await page.waitForTimeout(2000)
+    // Poll every 20 s for up to 200 s. On each iteration we trigger a manual
+    // refresh so the page re-fetches even if auto-refresh is slower than expected.
+    // Backend trace indexing can take 60-150 s; 200 s gives comfortable headroom.
+    const POLL_INTERVAL_MS = 20000
+    const MAX_POLLS = 10
+    for (let attempt = 0; attempt < MAX_POLLS; attempt++) {
+        if (await firstDataRow.isVisible().catch(() => false)) return
+
+        if (await refreshButton.isVisible().catch(() => false)) {
+            await refreshButton.click()
+        }
+        await page.waitForTimeout(POLL_INTERVAL_MS)
     }
-    await expect(firstDataRow).toBeVisible({timeout: 20000})
+
+    // Final assertion — surfaces a clear failure message if trace never arrived.
+    await expect(firstDataRow).toBeVisible({timeout: 10000})
 }
 
 const observabilityTests = () => {
@@ -151,10 +154,9 @@ const observabilityTests = () => {
         "view traces",
         {tag: smokeTags},
         async ({page, uiHelpers, apiHelpers, testProviderHelpers}) => {
-            // 3 minutes: this is the first test in the suite and may be the first to
-            // generate a trace in the ephemeral project, where backend indexing can
-            // take 60-90 s before the row appears in the observability table.
-            test.setTimeout(180000)
+            // 5 minutes: setup (provider + app creation + playground run) takes 30-60 s,
+            // and backend trace indexing can take up to 150 s after the invoke completes.
+            test.setTimeout(300000)
 
             await scenarios.given("the user is authenticated", async () => {
                 await expectAuthenticatedSession(page)
@@ -193,7 +195,7 @@ const observabilityTests = () => {
         "should filter traces by date range and by app",
         {tag: lightSlowTags},
         async ({page, apiHelpers, uiHelpers, testProviderHelpers}) => {
-            test.setTimeout(180000)
+            test.setTimeout(300000)
             await runPlaygroundAndGoToObservability(
                 page,
                 apiHelpers,
@@ -231,7 +233,7 @@ const observabilityTests = () => {
         "should filter traces by span name or attribute",
         {tag: lightSlowTags},
         async ({page, apiHelpers, uiHelpers, testProviderHelpers}) => {
-            test.setTimeout(180000)
+            test.setTimeout(300000)
             await runPlaygroundAndGoToObservability(
                 page,
                 apiHelpers,
@@ -271,7 +273,7 @@ const observabilityTests = () => {
         "should open a span and drill into its attributes",
         {tag: lightSlowTags},
         async ({page, apiHelpers, uiHelpers, testProviderHelpers}) => {
-            test.setTimeout(180000)
+            test.setTimeout(300000)
             await runPlaygroundAndGoToObservability(
                 page,
                 apiHelpers,
@@ -304,7 +306,7 @@ const observabilityTests = () => {
         "should switch between trace tabs and see filtered rows",
         {tag: lightSlowTags},
         async ({page, apiHelpers, uiHelpers, testProviderHelpers}) => {
-            test.setTimeout(180000)
+            test.setTimeout(300000)
             await runPlaygroundAndGoToObservability(
                 page,
                 apiHelpers,
@@ -347,7 +349,7 @@ const observabilityTests = () => {
         "should create a trace after a Playground run",
         {tag: lightSlowTags},
         async ({page, apiHelpers, uiHelpers, testProviderHelpers}) => {
-            test.setTimeout(180000)
+            test.setTimeout(300000)
 
             // runPlaygroundAndGoToObservability handles the full flow:
             // run a variant → navigate to observability → wait for trace row (with Refresh).
diff --git a/web/oss/tests/playwright/acceptance/playground/index.ts b/web/oss/tests/playwright/acceptance/playground/index.ts
index 4c3183ab8a..431f052621 100644
--- a/web/oss/tests/playwright/acceptance/playground/index.ts
+++ b/web/oss/tests/playwright/acceptance/playground/index.ts
@@ -235,6 +235,7 @@ const playgroundTests = () => {
         "should open compare mode and display two variants side by side",
         {tag: compareTags},
         async ({page, apiHelpers, navigateToPlayground}) => {
+            basePlaygroundTest.setTimeout(120000)
             let appId = ""
 
             await scenarios.given("the user is authenticated", async () => {
@@ -252,7 +253,9 @@ const playgroundTests = () => {
                 async () => {
                     // The "Compare" button creates a local draft copy of the current revision,
                     // immediately adding a second panel without requiring variant selection.
-                    await page.getByRole("button", {name: "Compare"}).click()
+                    const compareButton = page.getByRole("button", {name: "Compare"})
+                    await expect(compareButton).toBeEnabled({timeout: 15000})
+                    await compareButton.click()
                 },
             )
 
diff --git a/web/oss/tests/playwright/acceptance/prompt-registry/index.ts b/web/oss/tests/playwright/acceptance/prompt-registry/index.ts
index e087918f4c..975cf9b91e 100644
--- a/web/oss/tests/playwright/acceptance/prompt-registry/index.ts
+++ b/web/oss/tests/playwright/acceptance/prompt-registry/index.ts
@@ -1,10 +1,3 @@
-import type {Locator, Page} from "@playwright/test"
-import {test} from "@agenta/web-tests/tests/fixtures/base.fixture"
-import {expect} from "@agenta/web-tests/utils"
-import {getProjectScopedBasePath} from "@agenta/web-tests/tests/fixtures/base.fixture/apiHelpers"
-import {expectAuthenticatedSession} from "../utils/auth"
-import {createScenarios} from "../utils/scenarios"
-import {buildAcceptanceTags} from "../utils/tags"
 import {
     TestCoverage,
     TestcaseType,
@@ -16,6 +9,14 @@ import {
     TestRoleType,
     TestSpeedType,
 } from "@agenta/web-tests/playwright/config/testTags"
+import {test} from "@agenta/web-tests/tests/fixtures/base.fixture"
+import {getProjectScopedBasePath} from "@agenta/web-tests/tests/fixtures/base.fixture/apiHelpers"
+import {expect} from "@agenta/web-tests/utils"
+import type {Locator, Page} from "@playwright/test"
+
+import {expectAuthenticatedSession} from "../utils/auth"
+import {createScenarios} from "../utils/scenarios"
+import {buildAcceptanceTags} from "../utils/tags"
 
 interface WorkflowRevision {
     id: string
@@ -28,12 +29,12 @@ interface WorkflowRevisionsResponse {
     count?: number
 }
 
-type PromptRegistryApiHelpers = {
+interface PromptRegistryApiHelpers {
     getApp: (slug: string) => Promise<{id: string}>
     waitForApiResponse: <T>(options: {route: string; method: string}) => Promise<T>
 }
 
-type PromptRegistryUiHelpers = {
+interface PromptRegistryUiHelpers {
     expectPath: (path: string) => Promise<void>
 }
 
@@ -88,13 +89,40 @@ const openFirstPublishedWorkflowRevision = async (
 
     test.skip(revisions.length === 0, "No workflow revisions found in registry")
 
-    const selectedRevision = revisions[0]
-    const revisionId = selectedRevision.id
-    const row = page.locator(`[data-row-key="${revisionId}"]`).first()
-    await expect(row).toBeVisible({timeout: 30000})
+    // The app may accumulate revisions across test runs, and the table uses
+    // virtual scrolling — so a specific revision ID from the API response may
+    // not be rendered if it is scrolled out of the viewport. Instead poll for
+    // ANY visible published revision row and click whichever appears first.
+    const publishedRevisionIds = new Set(revisions.map((r) => r.id))
+    let foundRevisionId: string | null = null
+
+    await expect
+        .poll(
+            async () => {
+                const rows = page.locator("[data-row-key]")
+                const count = await rows.count()
+                for (let i = 0; i < count; i++) {
+                    const row = rows.nth(i)
+                    const key = await row.getAttribute("data-row-key").catch(() => null)
+                    if (
+                        key &&
+                        publishedRevisionIds.has(key) &&
+                        (await row.isVisible().catch(() => false))
+                    ) {
+                        foundRevisionId = key
+                        return true
+                    }
+                }
+                return false
+            },
+            {timeout: 30000},
+        )
+        .toBe(true)
+
+    const row = page.locator(`[data-row-key="${foundRevisionId}"]`).first()
     await row.click()
 
-    return revisionId
+    return foundRevisionId!
 }
 
 const expectWorkflowRevisionDrawer = async (page: Page, appId: string, revisionId: string) => {
diff --git a/web/tests/tests/fixtures/base.fixture/apiHelpers/index.ts b/web/tests/tests/fixtures/base.fixture/apiHelpers/index.ts
index a5f70ffb13..b45f63c5a0 100644
--- a/web/tests/tests/fixtures/base.fixture/apiHelpers/index.ts
+++ b/web/tests/tests/fixtures/base.fixture/apiHelpers/index.ts
@@ -415,7 +415,8 @@ export const getApp = async (page: Page, type: APP_TYPE = "completion") => {
     const appMatchesType = (app: ListAppsItem) => {
         if (type === "chat") return !!app.flags?.is_chat
         if (type === "custom") return !!app.flags?.is_custom
-        return !app.flags?.is_chat && !app.flags?.is_custom
+        // completion: exclude evaluator apps, which also lack is_chat/is_custom
+        return !app.flags?.is_chat && !app.flags?.is_custom && !app.flags?.is_evaluator
     }
 
     let targetApp