diff --git a/packages/core/src/capture.test.ts b/packages/core/src/capture.test.ts new file mode 100644 index 0000000..680103f --- /dev/null +++ b/packages/core/src/capture.test.ts @@ -0,0 +1,75 @@ +import { describe, expect, it } from "vitest"; +import { resolveCaptureType } from "./capture"; + +const browserNav = { isBrowserSubrequest: false, isDocumentRequest: true }; +const nonBrowserGet = { isBrowserSubrequest: false, isDocumentRequest: false }; +const subrequest = { isBrowserSubrequest: true, isDocumentRequest: false }; + +describe("resolveCaptureType", () => { + it("maps capture() return values to an event type or skip", () => { + expect( + resolveCaptureType(() => true, nonBrowserGet, { path: "/docs/x.md", method: "GET" }) + ).toBe("pageView"); + expect( + resolveCaptureType(() => false, nonBrowserGet, { path: "/docs/x.md", method: "GET" }) + ).toBeNull(); + expect( + resolveCaptureType(() => "apiCall", nonBrowserGet, { path: "/api/x", method: "GET" }) + ).toBe("apiCall"); + }); + + it("always skips browser sub-requests (RSC/XHR) before calling capture", () => { + let called = false; + const capture = () => { + called = true; + return true; + }; + expect( + resolveCaptureType(capture, subrequest, { path: "/anything", method: "GET" }) + ).toBeNull(); + expect(called).toBe(false); + }); + + it("skips non-GET requests that aren't document navigations (HEAD, webhook POST)", () => { + let called = false; + const capture = () => { + called = true; + return true; + }; + expect(resolveCaptureType(capture, nonBrowserGet, { path: "/raw", method: "HEAD" })).toBeNull(); + expect( + resolveCaptureType(capture, nonBrowserGet, { path: "/hook", method: "POST" }) + ).toBeNull(); + expect(called).toBe(false); + }); + + it("calls capture for a document navigation regardless of method (form POST)", () => { + const seen: Array<{ fromBrowser: boolean; method: string }> = []; + const capture = (r: { fromBrowser: boolean; method: string }) => { + seen.push({ fromBrowser: r.fromBrowser, method: r.method }); + return true; + }; + expect(resolveCaptureType(capture, browserNav, { path: "/submit", method: "POST" })).toBe( + "pageView" + ); + expect(seen).toEqual([{ fromBrowser: true, method: "POST" }]); + }); + + it("passes path, method, fromBrowser and userAgent through to capture", () => { + let received: unknown; + resolveCaptureType( + (r) => { + received = r; + return false; + }, + nonBrowserGet, + { path: "/llms.txt", method: "GET", userAgent: "curl/8" } + ); + expect(received).toEqual({ + path: "/llms.txt", + method: "GET", + fromBrowser: false, + userAgent: "curl/8", + }); + }); +}); diff --git a/packages/core/src/capture.ts b/packages/core/src/capture.ts new file mode 100644 index 0000000..cc108f2 --- /dev/null +++ b/packages/core/src/capture.ts @@ -0,0 +1,42 @@ +import type { CaptureRequest } from "./types"; + +/** The mechanical request facts the capture decision needs. */ +type CaptureReqInfo = { + /** Browser-initiated sub-request (RSC/XHR/fetch/subresource). */ + isBrowserSubrequest: boolean; + /** Hard document navigation (Sec-Fetch-Dest: document / mode: navigate). */ + isDocumentRequest: boolean; +}; + +/** + * Resolve the event type for a request under `capture` configuration, or `null` + * to skip. Pure and side-effect free so it can be unit-tested directly. + * + * Mechanical noise is filtered first regardless of what `capture` returns: + * - browser sub-requests (RSC soft-nav / XHR / fetch / subresource) — would + * duplicate the client-side pageView, so never recorded here; + * - non-GET requests that aren't document navigations (HEAD probes, webhook + * POSTs, etc.) — not page views. + * + * Everything else — a real browser navigation, or a direct GET from a + * non-browser client — is handed to `capture`, whose return decides: + * `false` → skip, `true` → "pageView", `""` → that event type. + */ +export function resolveCaptureType( + capture: (req: CaptureRequest) => boolean | string, + reqInfo: CaptureReqInfo, + req: { path: string; method: string; userAgent?: string } +): string | null { + if (reqInfo.isBrowserSubrequest) return null; + if (req.method !== "GET" && !reqInfo.isDocumentRequest) return null; + + const result = capture({ + path: req.path, + method: req.method, + fromBrowser: reqInfo.isDocumentRequest, + userAgent: req.userAgent, + }); + + if (result === false) return null; + return result === true ? "pageView" : result; +} diff --git a/packages/core/src/config-helpers.ts b/packages/core/src/config-helpers.ts index 9fcf505..50ed0a5 100644 --- a/packages/core/src/config-helpers.ts +++ b/packages/core/src/config-helpers.ts @@ -29,9 +29,24 @@ export interface ConfigValidationResult { warnings: string[]; } -export function validateConfig(_config: NextlyticsConfig): ConfigValidationResult { - // Currently no validations - can add backend-specific checks here - return { valid: true, warnings: [] }; +export function validateConfig(config: NextlyticsConfig): ConfigValidationResult { + const warnings: string[] = []; + + const deprecated = (["isApiPath", "excludeApiCalls", "excludePaths"] as const).filter( + (k) => config[k] !== undefined + ); + + if (config.capture && deprecated.length > 0) { + warnings.push( + `[Nextlytics] \`capture\` is set, so the deprecated option(s) ${deprecated.join(", ")} are ignored. Move that logic into \`capture\`.` + ); + } else if (deprecated.length > 0) { + warnings.push( + `[Nextlytics] ${deprecated.join(", ")} are deprecated; prefer \`capture\` (return false / true / "").` + ); + } + + return { valid: true, warnings }; } export function logConfigWarnings(result: ConfigValidationResult): void { diff --git a/packages/core/src/middleware.ts b/packages/core/src/middleware.ts index ce412e1..60002f4 100644 --- a/packages/core/src/middleware.ts +++ b/packages/core/src/middleware.ts @@ -14,6 +14,7 @@ import type { } from "./types"; import type { NextlyticsConfigWithDefaults } from "./config-helpers"; import { generateId, getRequestInfo, createServerContext, getNextVersion } from "./uitils"; +import { resolveCaptureType } from "./capture"; import { resolveAnonymousUser } from "./anonymous-user"; import { handleEventPost, @@ -90,6 +91,53 @@ export function createNextlyticsMiddleware( return response; } + // capture-based config: the single source of truth for what gets recorded. + // When set, the deprecated isApiPath/excludeApiCalls/excludePaths are ignored. + if (config.capture) { + const eventType = resolveCaptureType(config.capture, reqInfo, { + path: pathname, + method: request.method, + userAgent: request.headers.get("user-agent") ?? undefined, + }); + if (eventType === null) { + const response = NextResponse.next(); + response.headers.set(headerNames.active, "1"); + return response; + } + + const pageRenderId = generateId(); + const serverContext = createServerContext(request); + const response = NextResponse.next(); + const ctx = createRequestContext(request); + response.cookies.set(LAST_PAGE_RENDER_ID_COOKIE, pageRenderId, { path: "/" }); + const { anonId } = await resolveAnonymousUser({ ctx, serverContext, config, response }); + const userContext = await getUserContext(config, ctx); + const extraProps = await getEventProps(config, ctx, userContext); + const event = createEvent( + pageRenderId, + serverContext, + eventType, + userContext, + anonId, + extraProps + ); + const { clientActions, completion } = dispatchEvent(event, ctx, "on-request"); + const actions = await clientActions; + const scripts = actions.items.filter( + (i): i is TemplatizedScriptInsertion => i.type === "script-template" + ); + after(() => completion); + serializeServerComponentContext(response, { + pageRenderId, + pathname: request.nextUrl.pathname, + search: request.nextUrl.search, + scripts, + }); + return response; + } + + // ----- Deprecated path (no `capture`): isApiPath / excludeApiCalls / excludePaths ----- + // Skip browser-initiated sub-requests (RSC soft-navigations, XHR, fetch(), // subresources): the browser sets Sec-Fetch-Dest to something other than // "document" for these. A single soft navigation fires RSC fetches that @@ -152,10 +200,10 @@ export function createNextlyticsMiddleware( const userContext = await getUserContext(config, ctx); const extraProps = await getEventProps(config, ctx, userContext); - const pageViewEvent = createPageViewEvent( + const pageViewEvent = createEvent( pageRenderId, serverContext, - isApiPath, + isApiPath ? "apiCall" : "pageView", userContext, anonId, extraProps @@ -185,15 +233,14 @@ export function createNextlyticsMiddleware( }; } -function createPageViewEvent( +function createEvent( pageRenderId: string, serverContext: ServerEventContext, - isApiPath: boolean, + eventType: string, userContext?: UserContext, anonymousUserId?: string, extraProps?: Record ): NextlyticsEvent { - const eventType = isApiPath ? "apiCall" : "pageView"; return { origin: "server", collectedAt: serverContext.collectedAt.toISOString(), diff --git a/packages/core/src/types.ts b/packages/core/src/types.ts index 088e6b6..26ae1ba 100644 --- a/packages/core/src/types.ts +++ b/packages/core/src/types.ts @@ -142,9 +142,40 @@ export type BackendWithConfig = { /** Backend config entry - either a backend directly or with config */ export type BackendConfigEntry = NextlyticsBackend | NextlyticsBackendFactory | BackendWithConfig; +/** The request `capture` decides on. Only real browser navigations and direct + * (non-browser) requests reach `capture`; RSC/XHR sub-requests, prefetches, + * static assets, and non-GET non-navigation writes are skipped before it. */ +export type CaptureRequest = { + /** URL pathname, e.g. "/docs/quick-start.md" */ + path: string; + /** HTTP method (GET, POST, …) */ + method: string; + /** True when a real browser navigated here (Sec-Fetch-Dest: document). False + * for programmatic clients — agents, crawlers, curl, server-to-server — which + * omit Sec-Fetch-* headers. */ + fromBrowser: boolean; + /** User-Agent header, if present. */ + userAgent?: string; +}; + export type NextlyticsConfig = { /** Enable debug logging (shows backend stats for each event) */ debug?: boolean; + /** + * Decide whether — and as what event type — to record a request. + * + * - `false` → don't record + * - `true` → record as the default type ("pageView") + * - `""` → record with this string as `event.type` (e.g. "apiCall") + * + * Real browser users are the common case; programmatic clients (agents, + * crawlers, curl) are identified by `fromBrowser: false`. Defaults to + * `({ fromBrowser }) => fromBrowser` — i.e. track browser navigations only. + * + * When set, this is the single source of truth and the deprecated + * `isApiPath` / `excludeApiCalls` / `excludePaths` options are ignored. + */ + capture?: (req: CaptureRequest) => boolean | string; anonymousUsers?: { /** Store anonymous ID in cookies */ useCookies?: boolean; @@ -157,11 +188,11 @@ export type NextlyticsConfig = { /** Cookie max age in seconds (default: 2 years) */ cookieMaxAge?: number; }; - /** Skip tracking for API routes */ + /** @deprecated Use `capture` instead — return `false` for API paths. Skip tracking for API routes. */ excludeApiCalls?: boolean; - /** Skip tracking for specific paths */ + /** @deprecated Use `capture` instead — return `false` for the paths you want to skip. */ excludePaths?: (path: string) => boolean; - /** Determine if path is API route. Default: () => false */ + /** @deprecated Use `capture` instead — return `"apiCall"` (or `false`) for API paths. */ isApiPath?: (path: string) => boolean; /** Endpoint for client events. Default: "/api/event" */ eventEndpoint?: string; diff --git a/packages/website/src/nextlytics.ts b/packages/website/src/nextlytics.ts index 1c7bb62..a28f922 100644 --- a/packages/website/src/nextlytics.ts +++ b/packages/website/src/nextlytics.ts @@ -94,11 +94,11 @@ export const { middleware, analytics, NextlyticsServer } = Nextlytics({ anonymousUsers: {}, debug: true, backends: buildBackends(), - isApiPath: (path) => path.startsWith("/api/"), - excludeApiCalls: true, - // The middleware now tracks every non-API, non-RSC request as a pageView - // (not just browser navigations). Keep the demo's own data clean by dropping - // machine/asset paths that aren't real pages. - excludePaths: (path) => - path === "/robots.txt" || path === "/sitemap.xml" || path.startsWith("/.well-known/"), + // Record browser navigations and direct (non-browser) GETs as pageViews, + // except API routes and machine/asset paths that aren't real pages. + capture: ({ path }) => + !path.startsWith("/api/") && + path !== "/robots.txt" && + path !== "/sitemap.xml" && + !path.startsWith("/.well-known/"), });