diff --git a/e2e/test-app/src/app/raw-data/route.ts b/e2e/test-app/src/app/raw-data/route.ts new file mode 100644 index 0000000..941f202 --- /dev/null +++ b/e2e/test-app/src/app/raw-data/route.ts @@ -0,0 +1,10 @@ +// A non-API route handler that serves plain text to non-browser clients. +// Used by the e2e suite to assert that direct, non-navigation GETs (the kind +// agents/curl make for .md / .txt content) are tracked as pageViews by the +// middleware — not just browser page navigations. +export function GET() { + return new Response("raw data payload", { + status: 200, + headers: { "content-type": "text/plain; charset=utf-8" }, + }); +} diff --git a/e2e/tests/analytics.test.ts b/e2e/tests/analytics.test.ts index 0e6352e..de23c20 100644 --- a/e2e/tests/analytics.test.ts +++ b/e2e/tests/analytics.test.ts @@ -116,6 +116,62 @@ describe.each(versions)("%s", (version) => { await page.close(); }); + it("tracks a direct non-navigation route-handler GET as a pageView", async () => { + // A machine-style fetch (Accept: text/plain, no navigation headers) to a + // non-API route handler — the kind agents/curl make for .md / .txt + // content. The middleware must record exactly one pageView at the request + // path, not skip it for not being a browser page navigation. + const res = await fetch(`${testApp.baseUrl}/raw-data`, { + headers: { accept: "text/plain" }, + }); + expect(res.status).toBe(200); + + const events = await testApp.waitForEvents((evs) => + evs.some((e) => e.type === "pageView" && e.path === "/raw-data") + ); + const matches = events.filter((e) => e.type === "pageView" && e.path === "/raw-data"); + expect(matches.length).toBe(1); + }); + + it("does not track browser sub-requests like RSC soft-navigations", async () => { + // A browser-initiated sub-request (RSC soft navigation / XHR / fetch()) + // carries Sec-Fetch-Dest set to something other than "document". In Next + // 15.5+ the soft-nav RSC fetch carries no reliable "rsc" header, so + // Sec-Fetch-Dest is what identifies it. Such requests are tracked + // client-side via /api/event; the middleware must skip them to avoid a + // duplicate pageView. (node fetch can set Sec-Fetch-* — a browser can't — + // so this reproduces the real soft-nav request the middleware sees.) + const res = await fetch(`${testApp.baseUrl}${testApp.testPagePath}`, { + headers: { + rsc: "1", + "sec-fetch-dest": "empty", + "sec-fetch-mode": "cors", + "sec-fetch-site": "same-origin", + }, + }); + expect(res.status).toBeLessThan(500); + + await new Promise((r) => setTimeout(r, 500)); + const events = await testApp.getAnalyticsEvents(); + const matches = events.filter( + (e) => e.type === "pageView" && e.path === testApp.testPagePath + ); + expect(matches.length).toBe(0); + }); + + it("does not track non-GET requests to non-API routes (e.g. webhook POST)", async () => { + // A POST/PUT/etc. to a non-API route (a webhook or programmatic write to a + // Route Handler) is not a page view. The middleware runs regardless of + // whether the handler accepts POST, so this asserts the request is skipped + // even though /raw-data only implements GET. + await fetch(`${testApp.baseUrl}/raw-data`, { method: "POST" }); + + await new Promise((r) => setTimeout(r, 500)); + const events = await testApp.getAnalyticsEvents(); + const matches = events.filter((e) => e.type === "pageView" && e.path === "/raw-data"); + expect(matches.length).toBe(0); + }); + it("captures server context (host, method, user-agent)", async () => { const page = await testApp.newPage(); diff --git a/packages/core/src/middleware.ts b/packages/core/src/middleware.ts index 6a9a9a2..1b15ed8 100644 --- a/packages/core/src/middleware.ts +++ b/packages/core/src/middleware.ts @@ -66,6 +66,8 @@ export function createNextlyticsMiddleware( integrity: request.integrity, isPrefetch: reqInfo.isPrefetch, isRsc: reqInfo.isRsc, + isDocumentRequest: reqInfo.isDocumentRequest, + isBrowserSubrequest: reqInfo.isBrowserSubrequest, isPageNavigation: reqInfo.isPageNavigation, isStaticFile: reqInfo.isStaticFile, isNextjsInternal: reqInfo.isNextjsInternal, @@ -88,9 +90,30 @@ export function createNextlyticsMiddleware( return response; } - // Skip non-page-navigation, non-API requests (e.g. RSC fetches). - // Soft navigations are tracked via the client /api/event request. - if (!reqInfo.isPageNavigation && !config.isApiPath(pathname)) { + // Skip browser-initiated sub-requests (RSC soft-navigations, XHR, fetch(), + // subresources): the browser sets Sec-Fetch-Dest to something other than + // "document" for these. A single soft navigation fires RSC fetches that + // carry no reliable RSC header in Next 15.5+, so we can't single them out by + // an "rsc" marker — but they DO carry Sec-Fetch-Dest, and they're tracked + // client-side via /api/event, so counting them here would duplicate the + // pageView. Everything else is tracked: hard document navigations, and + // requests from non-browser clients (agents, curl, bots, server-to-server) + // which omit Sec-Fetch-* — that's how route handlers serving .md/.txt/JSON + // get counted. API paths are exempt so they still reach the apiCall handling + // below (subject to excludeApiCalls). + if (reqInfo.isBrowserSubrequest && !config.isApiPath(pathname)) { + const response = NextResponse.next(); + response.headers.set(headerNames.active, "1"); + return response; + } + + // On non-API routes, only reads (GET/HEAD) and document navigations are + // pageViews. A non-read, non-navigation request to a non-API route — e.g. a + // webhook or programmatic POST/PUT to a Route Handler — is not a page view, + // so skip it. (A classic server-rendered form POST is a document navigation, + // so it's kept.) API paths flow through for any method → apiCall. + const isReadMethod = request.method === "GET" || request.method === "HEAD"; + if (!isReadMethod && !reqInfo.isDocumentRequest && !config.isApiPath(pathname)) { const response = NextResponse.next(); response.headers.set(headerNames.active, "1"); return response; diff --git a/packages/core/src/uitils.ts b/packages/core/src/uitils.ts index e6bff75..7e9146c 100644 --- a/packages/core/src/uitils.ts +++ b/packages/core/src/uitils.ts @@ -74,6 +74,17 @@ export type RequestInfo = { isPrefetch: boolean; /** True if this is an RSC (React Server Components) navigation */ isRsc: boolean; + /** True if this is a hard document navigation (sec-fetch-dest=document or + * sec-fetch-mode=navigate) — the strong signal, distinct from the weaker + * accepts-HTML heuristic folded into isPageNavigation. */ + isDocumentRequest: boolean; + /** True if this is a browser-initiated sub-request (RSC soft-navigation, XHR, + * fetch(), or a subresource) — Sec-Fetch-Dest is present and is not + * "document". Modern browsers always send Sec-Fetch-Dest (it's a forbidden + * header, so it can't be spoofed); non-browser clients (curl, agents, bots, + * server-to-server) omit it. Used to skip soft-navigation RSC fetches, which + * carry no reliable RSC header in Next 15.5+ yet must not be double-counted. */ + isBrowserSubrequest: boolean; /** True if this is a standard document or RSC navigation */ isPageNavigation: boolean; /** True if this is a static file (ico, png, css, js, etc.) */ @@ -126,6 +137,10 @@ export function getRequestInfo(request: NextRequest): RequestInfo { const accept = headers.get("accept") || ""; const isDocumentRequest = secFetchDest === "document" || secFetchMode === "navigate"; + // A browser set Sec-Fetch-Dest but this isn't a document navigation → it's an + // RSC soft-nav / XHR / fetch() / subresource. Non-browser clients omit + // Sec-Fetch-* entirely, so they are NOT sub-requests and remain trackable. + const isBrowserSubrequest = secFetchDest !== null && !isDocumentRequest; const acceptsHtml = accept.includes("text/html"); // Page navigation = document request OR accepts HTML. @@ -141,6 +156,8 @@ export function getRequestInfo(request: NextRequest): RequestInfo { return { isPrefetch, isRsc, + isDocumentRequest, + isBrowserSubrequest, isPageNavigation, isStaticFile, isNextjsInternal, diff --git a/packages/website/src/nextlytics.ts b/packages/website/src/nextlytics.ts index b33c98d..1c7bb62 100644 --- a/packages/website/src/nextlytics.ts +++ b/packages/website/src/nextlytics.ts @@ -96,4 +96,9 @@ export const { middleware, analytics, NextlyticsServer } = Nextlytics({ backends: buildBackends(), isApiPath: (path) => path.startsWith("/api/"), excludeApiCalls: true, + // The middleware now tracks every non-API, non-RSC request as a pageView + // (not just browser navigations). Keep the demo's own data clean by dropping + // machine/asset paths that aren't real pages. + excludePaths: (path) => + path === "/robots.txt" || path === "/sitemap.xml" || path.startsWith("/.well-known/"), });