From 3607da8e3930c2113338f25e0a08169b98d91612 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=B8ren=20Bramer=20Schmidt?= Date: Thu, 11 Jun 2026 20:36:27 +0700 Subject: [PATCH 01/12] Add otel traces profile and OTLP ingest --- src/app_core.ts | 160 ++++- src/config.ts | 15 + src/profiles/evlog.ts | 149 ++++- src/profiles/index.ts | 15 + src/profiles/otelTraces.ts | 311 ++++++++++ src/profiles/otelTraces/normalize.ts | 730 +++++++++++++++++++++++ src/profiles/otelTraces/otlp.ts | 855 +++++++++++++++++++++++++++ src/profiles/otelTraces/schema.ts | 405 +++++++++++++ src/profiles/profile.ts | 53 ++ test/profile_otel_traces.test.ts | 461 +++++++++++++++ 10 files changed, 3148 insertions(+), 6 deletions(-) create mode 100644 src/profiles/otelTraces.ts create mode 100644 src/profiles/otelTraces/normalize.ts create mode 100644 src/profiles/otelTraces/otlp.ts create mode 100644 src/profiles/otelTraces/schema.ts create mode 100644 test/profile_otel_traces.test.ts diff --git a/src/app_core.ts b/src/app_core.ts index a92d134..7e74eab 100644 --- a/src/app_core.ts +++ b/src/app_core.ts @@ -51,10 +51,13 @@ import { parseAggregateRequestBodyResult } from "./search/aggregate"; import { StreamProfileStore, parseProfileUpdateResult, + resolveOtlpTracesCapability, resolveJsonIngestCapability, resolveTouchCapability, + type PreparedJsonRecord, type StreamTouchRoute, } from "./profiles"; +import { encodeOtlpTraceExportResponse } from "./profiles/otelTraces/otlp"; import { dsError } from "./util/ds_error.ts"; import { streamHash16Hex } from "./util/stream_paths"; @@ -150,6 +153,10 @@ function badRequest(msg: string): Response { return json(400, { error: { code: "bad_request", message: msg } }); } +function unsupportedMediaType(msg: string): Response { + return json(415, { error: { code: "unsupported_media_type", message: msg } }); +} + function notFound(msg = "not_found"): Response { return json(404, { error: { code: "not_found", message: msg } }); } @@ -1166,6 +1173,32 @@ export function createAppCore(cfg: Config, opts: CreateAppCoreOptions): App { return Result.ok({ rows }); }; + const buildPreparedJsonRows = ( + stream: string, + records: PreparedJsonRecord[] + ): Result<{ rows: AppendRow[] }, { status: 400 | 500; message: string }> => { + const regRes = registry.getRegistryResult(stream); + if (Result.isError(regRes)) return Result.err({ status: 500, message: regRes.error.message }); + const reg = regRes.value; + const validator = reg.currentVersion > 0 ? registry.getValidatorForVersion(reg, reg.currentVersion) : null; + if (reg.currentVersion > 0 && !validator) { + return Result.err({ status: 500, message: "schema validator missing" }); + } + const rows: AppendRow[] = []; + for (const record of records) { + if (validator && !validator(record.value)) { + const msg = validator.errors ? validator.errors.map((e) => e.message).join("; ") : "schema validation failed"; + return Result.err({ status: 400, message: msg }); + } + rows.push({ + routingKey: keyBytesFromString(record.routingKey), + contentType: "application/json", + payload: JSON_TEXT_ENCODER.encode(JSON.stringify(record.value)), + }); + } + return Result.ok({ rows }); + }; + const buildAppendRowsResult = ( stream: string, bodyBytes: Uint8Array, @@ -1623,6 +1656,113 @@ export function createAppCore(cfg: Config, opts: CreateAppCoreOptions): App { } const path = url.pathname; + const handleOtlpTracesIngest = async (stream: string, autoCreate: boolean): Promise => { + if (req.method !== "POST") return badRequest("unsupported method"); + const contentType = req.headers.get("content-type"); + if (!contentType) return badRequest("missing content-type"); + const leaveAppendPhase = memorySampler?.enter("append", { + route: "otlp_traces", + stream, + content_type: normalizeContentType(contentType) ?? contentType, + }); + try { + return await runWithGate(ingestGate, async () => { + let srow = db.getStream(stream); + if (!srow && autoCreate) { + srow = db.ensureStream(stream, { contentType: "application/json" }); + const profileRes = profiles.updateProfileResult(stream, srow, { kind: "otel-traces" }); + if (Result.isError(profileRes)) return badRequest(profileRes.error.message); + try { + if (profileRes.value.schemaRegistry) { + await uploadSchemaRegistry(stream, profileRes.value.schemaRegistry); + } + await uploader.publishManifest(stream); + } catch { + return json(500, { error: { code: "internal", message: "profile upload failed" } }); + } + indexer?.enqueue(stream); + notifier.notifyDetailsChanged(stream); + srow = db.getStream(stream); + } + if (!srow || db.isDeleted(srow)) return notFound(); + if (srow.expires_at_ms != null && db.nowMs() > srow.expires_at_ms) return notFound("stream expired"); + + const profileRes = profiles.getProfileResult(stream, srow); + if (Result.isError(profileRes)) return internalError("invalid stream profile"); + const capability = resolveOtlpTracesCapability(profileRes.value); + if (!capability) return badRequest("stream profile does not support OTLP traces"); + + const ab = await req.arrayBuffer(); + if (ab.byteLength > cfg.appendMaxBodyBytes) return tooLarge(`body too large (max ${cfg.appendMaxBodyBytes})`); + const bodyBytes = new Uint8Array(ab); + const decodedRes = capability.decodeExportRequestResult({ + stream, + profile: profileRes.value, + contentType, + contentEncoding: req.headers.get("content-encoding"), + body: bodyBytes, + maxDecodedBytes: cfg.appendMaxBodyBytes, + }); + if (Result.isError(decodedRes)) { + if (decodedRes.error.status === 415) return unsupportedMediaType(decodedRes.error.message); + return badRequest(decodedRes.error.message); + } + + const rowsRes = buildPreparedJsonRows(stream, decodedRes.value.records); + if (Result.isError(rowsRes)) { + if (rowsRes.error.status === 500) return internalError(rowsRes.error.message); + return badRequest(rowsRes.error.message); + } + const rows = rowsRes.value.rows; + let appendHeaders: Record = {}; + if (rows.length > 0) { + const appendResOrResponse = await awaitAppendWithTimeout(enqueueAppend({ + stream, + baseAppendMs: db.nowMs(), + rows, + contentType: "application/json", + close: false, + })); + if (appendResOrResponse instanceof Response) return appendResOrResponse; + const appendRes = appendResOrResponse; + if (Result.isError(appendRes)) { + if (appendRes.error.kind === "overloaded") return overloaded(); + if (appendRes.error.kind === "gone") return notFound("stream expired"); + if (appendRes.error.kind === "not_found") return notFound(); + if (appendRes.error.kind === "content_type_mismatch") return conflict("content-type mismatch"); + return json(500, { error: { code: "internal", message: "append failed" } }); + } + const appendBytes = rows.reduce((acc, row) => acc + row.payload.byteLength, 0); + recordAppendOutcome({ + stream, + lastOffset: appendRes.value.lastOffset, + appendedRows: appendRes.value.appendedRows, + metricsBytes: appendBytes, + ingestedBytes: bodyBytes.byteLength, + touched: true, + closed: appendRes.value.closed, + }); + appendHeaders = { + "stream-next-offset": encodeOffset(srow.epoch, appendRes.value.lastOffset), + }; + } + + const encoded = encodeOtlpTraceExportResponse(decodedRes.value); + const responseBody = encoded.body instanceof Uint8Array ? bodyBufferFromBytes(encoded.body) : encoded.body; + return new Response(responseBody, { + status: 200, + headers: withNosniff({ + "content-type": encoded.contentType, + "cache-control": "no-store", + ...appendHeaders, + }), + }); + }); + } finally { + leaveAppendPhase?.(); + } + }; + if (path === "/health") { return json(200, { ok: true }); } @@ -1635,6 +1775,11 @@ export function createAppCore(cfg: Config, opts: CreateAppCoreOptions): App { if (req.method === "GET" && path === "/v1/server/_mem") { return json(200, buildServerMem()); } + if (path === "/v1/traces") { + const stream = cfg.otlpTracesStream; + if (!stream) return badRequest("DS_OTLP_TRACES_STREAM is not configured"); + return handleOtlpTracesIngest(stream, cfg.otlpAutoCreate); + } // /v1/streams if (req.method === "GET" && path === "/v1/streams") { @@ -1674,9 +1819,18 @@ export function createAppCore(cfg: Config, opts: CreateAppCoreOptions): App { let isDetails = false; let isIndexStatus = false; let isRoutingKeys = false; + let isOtlpTraces = false; let pathKeyParam: string | null = null; let touchMode: StreamTouchRoute | null = null; - if (segments[segments.length - 1] === "_schema") { + if ( + segments.length >= 3 && + segments[segments.length - 3] === "_otlp" && + segments[segments.length - 2] === "v1" && + segments[segments.length - 1] === "traces" + ) { + isOtlpTraces = true; + segments.splice(segments.length - 3, 3); + } else if (segments[segments.length - 1] === "_schema") { isSchema = true; segments.pop(); } else if (segments[segments.length - 1] === "_profile") { @@ -1719,6 +1873,10 @@ export function createAppCore(cfg: Config, opts: CreateAppCoreOptions): App { if (streamPart.length === 0) return badRequest("missing stream name"); const stream = decodeURIComponent(streamPart); + if (isOtlpTraces) { + return handleOtlpTracesIngest(stream, false); + } + if (isSchema) { const srow = db.getStream(stream); if (!srow || db.isDeleted(srow)) return notFound(); diff --git a/src/config.ts b/src/config.ts index 873f254..2364078 100644 --- a/src/config.ts +++ b/src/config.ts @@ -68,6 +68,8 @@ export type Config = { touchCheckIntervalMs: number; touchMaxBatchRows: number; touchMaxBatchBytes: number; + otlpTracesStream: string | null; + otlpAutoCreate: boolean; port: number; }; @@ -143,6 +145,8 @@ const KNOWN_DS_ENVS = new Set([ "DS_TOUCH_CHECK_MS", "DS_TOUCH_MAX_BATCH_ROWS", "DS_TOUCH_MAX_BATCH_BYTES", + "DS_OTLP_TRACES_STREAM", + "DS_OTLP_AUTO_CREATE", "DS_AUTO_TUNE_REQUESTED_MB", "DS_AUTO_TUNE_PRESET_MB", "DS_AUTO_TUNE_EFFECTIVE_MEMORY_LIMIT_MB", @@ -218,6 +222,15 @@ function envNum(name: string, def: number): number { return n; } +function envBool(name: string, def: boolean): boolean { + const v = process.env[name]; + if (v == null || v === "") return def; + const normalized = v.trim().toLowerCase(); + if (normalized === "1" || normalized === "true" || normalized === "yes") return true; + if (normalized === "0" || normalized === "false" || normalized === "no") return false; + throw dsError(`invalid ${name}: ${v}`); +} + function envBytes(name: string): number | null { const v = process.env[name]; if (!v) return null; @@ -354,6 +367,8 @@ export function loadConfig(): Config { touchCheckIntervalMs: envNum("DS_TOUCH_CHECK_MS", 250), touchMaxBatchRows: envNum("DS_TOUCH_MAX_BATCH_ROWS", 500), touchMaxBatchBytes: envNum("DS_TOUCH_MAX_BATCH_BYTES", 4 * 1024 * 1024), + otlpTracesStream: process.env.DS_OTLP_TRACES_STREAM?.trim() || null, + otlpAutoCreate: envBool("DS_OTLP_AUTO_CREATE", false), port: envNum("PORT", 8080), }; } diff --git a/src/profiles/evlog.ts b/src/profiles/evlog.ts index 6e5a4a8..215c6b3 100644 --- a/src/profiles/evlog.ts +++ b/src/profiles/evlog.ts @@ -6,6 +6,7 @@ import type { StreamProfilePersistResult, StreamProfileReadResult, StreamProfileSpec, + UnifiedTimelineItem, } from "./profile"; import { cloneStreamProfileSpec, @@ -20,6 +21,11 @@ import { buildEvlogDefaultRegistry } from "./evlog/schema"; export type EvlogStreamProfile = { kind: "evlog"; redactKeys?: string[]; + correlation?: { + requestIdFields?: string[]; + traceContextFields?: string[]; + parseTraceparent?: boolean; + }; }; const DEFAULT_REDACT_KEYS = ["password", "token", "secret", "authorization", "cookie", "apikey"] as const; @@ -86,17 +92,59 @@ function parseRedactKeysResult(raw: unknown, path: string): Result { + if (raw === undefined) return Result.ok(undefined); + if (!Array.isArray(raw)) return Result.err({ message: `${path} must be an array of strings` }); + if (raw.length > maxItems) return Result.err({ message: `${path} too large (max ${maxItems})` }); + const out: string[] = []; + const seen = new Set(); + for (const item of raw) { + if (typeof item !== "string") return Result.err({ message: `${path} must be an array of strings` }); + const value = item.trim(); + if (value === "") return Result.err({ message: `${path} must not contain empty strings` }); + if (seen.has(value)) continue; + seen.add(value); + out.push(value); + } + return Result.ok(out); +} + +function parseEvlogCorrelationResult(raw: unknown, path: string): Result { + if (raw === undefined) return Result.ok(undefined); + const objRes = expectPlainObjectResult(raw, path); + if (Result.isError(objRes)) return objRes; + const keyCheck = rejectUnknownKeysResult(objRes.value, ["requestIdFields", "traceContextFields", "parseTraceparent"], path); + if (Result.isError(keyCheck)) return keyCheck; + const requestIdFieldsRes = parseStringListResult(objRes.value.requestIdFields, `${path}.requestIdFields`, 64); + if (Result.isError(requestIdFieldsRes)) return requestIdFieldsRes; + const traceContextFieldsRes = parseStringListResult(objRes.value.traceContextFields, `${path}.traceContextFields`, 64); + if (Result.isError(traceContextFieldsRes)) return traceContextFieldsRes; + if (objRes.value.parseTraceparent !== undefined && typeof objRes.value.parseTraceparent !== "boolean") { + return Result.err({ message: `${path}.parseTraceparent must be boolean` }); + } + const correlation: NonNullable = {}; + if (requestIdFieldsRes.value) correlation.requestIdFields = requestIdFieldsRes.value; + if (traceContextFieldsRes.value) correlation.traceContextFields = traceContextFieldsRes.value; + if (objRes.value.parseTraceparent !== undefined) correlation.parseTraceparent = objRes.value.parseTraceparent; + return Result.ok(Object.keys(correlation).length > 0 ? correlation : undefined); +} + function validateEvlogProfileResult(raw: unknown, path: string): Result { const objRes = expectPlainObjectResult(raw, path); if (Result.isError(objRes)) return objRes; if (objRes.value.kind !== "evlog") { return Result.err({ message: `${path}.kind must be evlog` }); } - const keyCheck = rejectUnknownKeysResult(objRes.value, ["kind", "redactKeys"], path); + const keyCheck = rejectUnknownKeysResult(objRes.value, ["kind", "redactKeys", "correlation"], path); if (Result.isError(keyCheck)) return keyCheck; const redactKeysRes = parseRedactKeysResult(objRes.value.redactKeys, `${path}.redactKeys`); if (Result.isError(redactKeysRes)) return redactKeysRes; - return Result.ok(redactKeysRes.value ? { kind: "evlog", redactKeys: redactKeysRes.value } : { kind: "evlog" }); + const correlationRes = parseEvlogCorrelationResult(objRes.value.correlation, `${path}.correlation`); + if (Result.isError(correlationRes)) return correlationRes; + const profile: EvlogStreamProfile = { kind: "evlog" }; + if (redactKeysRes.value) profile.redactKeys = redactKeysRes.value; + if (correlationRes.value) profile.correlation = correlationRes.value; + return Result.ok(profile); } function normalizeString(value: unknown): string | null { @@ -112,6 +160,49 @@ function normalizeTraceField(input: Record, field: "traceId" | return traceContext ? normalizeString(traceContext[field]) : null; } +function readDottedString(input: Record, path: string): string | null { + let cur: unknown = input; + for (const part of path.split(".")) { + if (!isPlainObject(cur)) return null; + cur = cur[part]; + } + return normalizeString(cur); +} + +function normalizeRequestId(input: Record, profile: EvlogStreamProfile): string | null { + const fields = profile.correlation?.requestIdFields ?? ["requestId", "context.requestId"]; + for (const field of fields) { + const value = readDottedString(input, field); + if (value) return value; + } + return null; +} + +function normalizeConfiguredTraceField(input: Record, profile: EvlogStreamProfile, field: "traceId" | "spanId"): string | null { + const fields = profile.correlation?.traceContextFields; + if (!fields) return normalizeTraceField(input, field); + for (const path of fields) { + if (path !== field && !path.endsWith(`.${field}`)) continue; + const value = readDottedString(input, path); + if (value) return value; + } + return normalizeTraceField(input, field); +} + +function parseTraceparent(input: Record): { traceId: string; spanId: string } | null { + for (const path of ["traceparent", "traceContext.traceparent", "context.traceparent", "headers.traceparent"]) { + const value = readDottedString(input, path); + if (!value) continue; + const match = /^([0-9a-f]{2})-([0-9a-f]{32})-([0-9a-f]{16})-([0-9a-f]{2})(?:-.+)?$/i.exec(value); + if (!match) continue; + const traceId = match[2].toLowerCase(); + const spanId = match[3].toLowerCase(); + if (/^0+$/.test(traceId) || /^0+$/.test(spanId)) continue; + return { traceId, spanId }; + } + return null; +} + function normalizeOptionalNumber(value: unknown): number | null { if (typeof value === "number" && Number.isFinite(value)) return value; if (typeof value === "string" && value.trim() !== "") { @@ -187,9 +278,10 @@ function normalizeEvlogRecordResult(profile: EvlogStreamProfile, value: unknown) const status = normalizeOptionalInteger(input.status); const duration = normalizeOptionalNumber(input.duration); const timestamp = normalizeString(input.timestamp) ?? new Date().toISOString(); - const requestId = normalizeString(input.requestId); - const traceId = normalizeTraceField(input, "traceId"); - const spanId = normalizeTraceField(input, "spanId"); + const requestId = normalizeRequestId(input, profile); + const traceparent = profile.correlation?.parseTraceparent === false ? null : parseTraceparent(input); + const traceId = normalizeConfiguredTraceField(input, profile, "traceId") ?? traceparent?.traceId ?? null; + const spanId = normalizeConfiguredTraceField(input, profile, "spanId") ?? traceparent?.spanId ?? null; const contextRes = redactValue(buildContext(input), new Set([...DEFAULT_REDACT_KEYS, ...(profile.redactKeys ?? [])])); const normalized = { @@ -221,6 +313,47 @@ function normalizeEvlogRecordResult(profile: EvlogStreamProfile, value: unknown) }); } +function evlogSeverity(record: Record): "debug" | "info" | "warn" | "error" { + const level = normalizeString(record.level)?.toLowerCase(); + if (level === "debug" || level === "info" || level === "warn" || level === "error") return level; + const status = normalizeOptionalInteger(record.status); + if (status != null && status >= 500) return "error"; + if (status != null && status >= 400) return "warn"; + return "info"; +} + +function evlogTimelineItems(args: { stream: string; offset?: string; record: unknown }): UnifiedTimelineItem[] { + if (!isPlainObject(args.record)) return []; + const record = args.record; + const timestamp = normalizeString(record.timestamp); + if (!timestamp) return []; + const message = normalizeString(record.message); + const method = normalizeString(record.method); + const path = normalizeString(record.path); + const title = message ?? ([method, path].filter(Boolean).join(" ") || "evlog event"); + return [ + { + kind: "evlog.event", + time: timestamp, + duration: normalizeOptionalNumber(record.duration), + service: normalizeString(record.service), + title, + severity: evlogSeverity(record), + ids: { + requestId: normalizeString(record.requestId), + traceId: normalizeString(record.traceId), + spanId: normalizeString(record.spanId), + }, + source: { + stream: args.stream, + offset: args.offset, + profile: "evlog", + }, + data: record, + }, + ]; +} + export const EVLOG_STREAM_PROFILE_DEFINITION: StreamProfileDefinition = { kind: "evlog", usesStoredProfileRow: true, @@ -296,4 +429,10 @@ export const EVLOG_STREAM_PROFILE_DEFINITION: StreamProfileDefinition = { return normalizeEvlogRecordResult(profile, value); }, }, + + correlation: { + toTimelineItems(args) { + return evlogTimelineItems(args); + }, + }, }; diff --git a/src/profiles/index.ts b/src/profiles/index.ts index fee20bb..f8acd58 100644 --- a/src/profiles/index.ts +++ b/src/profiles/index.ts @@ -6,6 +6,7 @@ import { dsError } from "../util/ds_error.ts"; import { GENERIC_STREAM_PROFILE_DEFINITION } from "./generic"; import { EVLOG_STREAM_PROFILE_DEFINITION } from "./evlog"; import { METRICS_STREAM_PROFILE_DEFINITION } from "./metrics"; +import { OTEL_TRACES_STREAM_PROFILE_DEFINITION } from "./otelTraces"; import { buildStreamProfileResource, cloneStreamProfileSpec, @@ -15,6 +16,8 @@ import { type CachedStreamProfile, type StoredProfileRow, type StreamProfileJsonIngestCapability, + type StreamProfileOtlpTracesCapability, + type StreamProfileCorrelationCapability, type StreamProfileDefinition, type StreamProfileMetricsCapability, type StreamProfileReadError, @@ -29,12 +32,14 @@ export * from "./profile"; export { EVLOG_STREAM_PROFILE_DEFINITION } from "./evlog"; export { GENERIC_STREAM_PROFILE_DEFINITION } from "./generic"; export { METRICS_STREAM_PROFILE_DEFINITION } from "./metrics"; +export { OTEL_TRACES_STREAM_PROFILE_DEFINITION } from "./otelTraces"; export { STATE_PROTOCOL_STREAM_PROFILE_DEFINITION } from "./stateProtocol"; const STREAM_PROFILE_DEFINITIONS: Record = { [EVLOG_STREAM_PROFILE_DEFINITION.kind]: EVLOG_STREAM_PROFILE_DEFINITION, [GENERIC_STREAM_PROFILE_DEFINITION.kind]: GENERIC_STREAM_PROFILE_DEFINITION, [METRICS_STREAM_PROFILE_DEFINITION.kind]: METRICS_STREAM_PROFILE_DEFINITION, + [OTEL_TRACES_STREAM_PROFILE_DEFINITION.kind]: OTEL_TRACES_STREAM_PROFILE_DEFINITION, [STATE_PROTOCOL_STREAM_PROFILE_DEFINITION.kind]: STATE_PROTOCOL_STREAM_PROFILE_DEFINITION, }; // New built-in profiles are wired here. Core runtime paths must resolve the @@ -100,6 +105,16 @@ export function resolveMetricsCapability(profile: StreamProfileSpec | null | und return resolveStreamProfileDefinition(profile.kind)?.metrics ?? null; } +export function resolveOtlpTracesCapability(profile: StreamProfileSpec | null | undefined): StreamProfileOtlpTracesCapability | null { + if (!profile) return null; + return resolveStreamProfileDefinition(profile.kind)?.otlpTraces ?? null; +} + +export function resolveCorrelationCapability(profile: StreamProfileSpec | null | undefined): StreamProfileCorrelationCapability | null { + if (!profile) return null; + return resolveStreamProfileDefinition(profile.kind)?.correlation ?? null; +} + export function resolveEnabledTouchCapability( profile: StreamProfileSpec | null | undefined ): { capability: StreamTouchCapability; touchCfg: NonNullable> } | null { diff --git a/src/profiles/otelTraces.ts b/src/profiles/otelTraces.ts new file mode 100644 index 0000000..06f2eb5 --- /dev/null +++ b/src/profiles/otelTraces.ts @@ -0,0 +1,311 @@ +import { Result } from "better-result"; +import type { + CachedStreamProfile, + StreamProfileDefinition, + StreamProfilePersistResult, + StreamProfileReadResult, + StreamProfileSpec, + UnifiedTimelineItem, +} from "./profile"; +import { + cloneStreamProfileSpec, + expectPlainObjectResult, + normalizeProfileContentType, + parseStoredProfileJsonResult, + rejectUnknownKeysResult, + isPlainObject, +} from "./profile"; +import { buildOtelTracesDefaultRegistry } from "./otelTraces/schema"; +import { + DEFAULT_ATTRIBUTE_LIMITS, + DEFAULT_OTEL_TRACE_REDACT_KEYS, + DEFAULT_REQUEST_ID_ATTRIBUTES, + DEFAULT_STORE_CONFIG, + normalizeOtelTraceRecordResult, + type DbStatementMode, + type OtelTraceAttributeLimits, + type OtelTraceStoreConfig, + type OtelTracesStreamProfile, +} from "./otelTraces/normalize"; +import { decodeOtlpTraceExportRequestResult } from "./otelTraces/otlp"; + +export type { OtelTracesStreamProfile }; + +function cloneOtelTracesProfile(profile: OtelTracesStreamProfile): OtelTracesStreamProfile { + return cloneStreamProfileSpec(profile) as OtelTracesStreamProfile; +} + +function cloneOtelTracesCache(cache: CachedStreamProfile | null): CachedStreamProfile | null { + if (!cache || cache.profile.kind !== "otel-traces") return null; + return { + profile: cloneOtelTracesProfile(cache.profile as OtelTracesStreamProfile), + updatedAtMs: cache.updatedAtMs, + }; +} + +function isOtelTracesProfile(profile: StreamProfileSpec | null | undefined): profile is OtelTracesStreamProfile { + return !!profile && profile.kind === "otel-traces"; +} + +function parseStringArrayResult(raw: unknown, path: string, maxItems: number): Result { + if (raw === undefined) return Result.ok(undefined); + if (!Array.isArray(raw)) return Result.err({ message: `${path} must be an array of strings` }); + if (raw.length > maxItems) return Result.err({ message: `${path} too large (max ${maxItems})` }); + const out: string[] = []; + const seen = new Set(); + for (const item of raw) { + if (typeof item !== "string") return Result.err({ message: `${path} must be an array of strings` }); + const value = item.trim(); + if (value === "") return Result.err({ message: `${path} must not contain empty strings` }); + const key = value.toLowerCase(); + if (seen.has(key)) continue; + seen.add(key); + out.push(path.endsWith("redactKeys") ? key : value); + } + return Result.ok(out); +} + +function parsePositiveIntResult(raw: unknown, path: string, fallback: number): Result { + if (raw === undefined) return Result.ok(fallback); + if (typeof raw !== "number" || !Number.isFinite(raw) || !Number.isInteger(raw) || raw <= 0) { + return Result.err({ message: `${path} must be a positive integer` }); + } + return Result.ok(raw); +} + +function parseAttributeLimitsResult(raw: unknown, path: string): Result | undefined, { message: string }> { + if (raw === undefined) return Result.ok(undefined); + const objRes = expectPlainObjectResult(raw, path); + if (Result.isError(objRes)) return objRes; + const keyCheck = rejectUnknownKeysResult( + objRes.value, + ["maxAttributeValueBytes", "maxAttributesPerSpan", "maxEventsPerSpan", "maxLinksPerSpan", "maxStatementBytes"], + path + ); + if (Result.isError(keyCheck)) return keyCheck; + const out: Partial = {}; + for (const key of Object.keys(DEFAULT_ATTRIBUTE_LIMITS) as Array) { + const valueRes = parsePositiveIntResult(objRes.value[key], `${path}.${key}`, DEFAULT_ATTRIBUTE_LIMITS[key]); + if (Result.isError(valueRes)) return valueRes; + if (objRes.value[key] !== undefined) out[key] = valueRes.value; + } + return Result.ok(Object.keys(out).length > 0 ? out : undefined); +} + +function parseStoreResult(raw: unknown, path: string): Result | undefined, { message: string }> { + if (raw === undefined) return Result.ok(undefined); + const objRes = expectPlainObjectResult(raw, path); + if (Result.isError(objRes)) return objRes; + const keyCheck = rejectUnknownKeysResult(objRes.value, ["rawResourceAttributes", "rawSpanAttributes", "rawEvents", "rawLinks"], path); + if (Result.isError(keyCheck)) return keyCheck; + const out: Partial = {}; + for (const key of Object.keys(DEFAULT_STORE_CONFIG) as Array) { + const value = objRes.value[key]; + if (value === undefined) continue; + if (typeof value !== "boolean") return Result.err({ message: `${path}.${key} must be boolean` }); + out[key] = value; + } + return Result.ok(Object.keys(out).length > 0 ? out : undefined); +} + +function parseDbStatementModeResult(raw: unknown, path: string): Result { + if (raw === undefined) return Result.ok(undefined); + if (raw === "drop" || raw === "raw") return Result.ok(raw); + return Result.err({ message: `${path} must be drop or raw` }); +} + +function validateOtelTracesProfileResult(raw: unknown, path: string): Result { + const objRes = expectPlainObjectResult(raw, path); + if (Result.isError(objRes)) return objRes; + if (objRes.value.kind !== "otel-traces") return Result.err({ message: `${path}.kind must be otel-traces` }); + const keyCheck = rejectUnknownKeysResult( + objRes.value, + ["kind", "redactKeys", "requestIdAttributes", "attributeLimits", "store", "dbStatementMode"], + path + ); + if (Result.isError(keyCheck)) return keyCheck; + const redactKeysRes = parseStringArrayResult(objRes.value.redactKeys, `${path}.redactKeys`, 64); + if (Result.isError(redactKeysRes)) return redactKeysRes; + const requestIdAttributesRes = parseStringArrayResult(objRes.value.requestIdAttributes, `${path}.requestIdAttributes`, 64); + if (Result.isError(requestIdAttributesRes)) return requestIdAttributesRes; + const limitsRes = parseAttributeLimitsResult(objRes.value.attributeLimits, `${path}.attributeLimits`); + if (Result.isError(limitsRes)) return limitsRes; + const storeRes = parseStoreResult(objRes.value.store, `${path}.store`); + if (Result.isError(storeRes)) return storeRes; + const dbStatementModeRes = parseDbStatementModeResult(objRes.value.dbStatementMode, `${path}.dbStatementMode`); + if (Result.isError(dbStatementModeRes)) return dbStatementModeRes; + const profile: OtelTracesStreamProfile = { kind: "otel-traces" }; + if (redactKeysRes.value) profile.redactKeys = redactKeysRes.value; + if (requestIdAttributesRes.value) profile.requestIdAttributes = requestIdAttributesRes.value; + if (limitsRes.value) profile.attributeLimits = limitsRes.value; + if (storeRes.value) profile.store = storeRes.value; + if (dbStatementModeRes.value) profile.dbStatementMode = dbStatementModeRes.value; + return Result.ok(profile); +} + +function getString(record: Record, key: string): string | null { + const value = record[key]; + return typeof value === "string" && value.trim() !== "" ? value : null; +} + +function getNumber(record: Record, key: string): number | null { + const value = record[key]; + return typeof value === "number" && Number.isFinite(value) ? value : null; +} + +function severityForSpan(record: Record): "debug" | "info" | "warn" | "error" { + const status = isPlainObject(record.status) ? getString(record.status, "code") : null; + const error = isPlainObject(record.error) && record.error.isError === true; + return status === "error" || error ? "error" : "info"; +} + +function buildOtelTimelineItems(args: { stream: string; offset?: string; record: unknown }): UnifiedTimelineItem[] { + if (!isPlainObject(args.record)) return []; + const record = args.record; + const traceId = getString(record, "traceId"); + const spanId = getString(record, "spanId"); + const parentSpanId = getString(record, "parentSpanId"); + const requestId = getString(record, "requestId"); + const service = getString(record, "service"); + const title = getString(record, "name") ?? spanId ?? "span"; + const timestamp = getString(record, "timestamp"); + const endTimestamp = getString(record, "endTimestamp"); + const duration = getNumber(record, "duration"); + const severity = severityForSpan(record); + const source = { stream: args.stream, offset: args.offset, profile: "otel-traces" }; + const ids = { requestId, traceId, spanId, parentSpanId }; + const out: UnifiedTimelineItem[] = []; + if (timestamp) { + out.push({ + kind: "otel.span.start", + time: timestamp, + duration, + service, + title, + severity, + ids, + source, + data: record, + }); + } + if (Array.isArray(record.events)) { + for (const event of record.events) { + if (!isPlainObject(event)) continue; + const eventTime = getString(event, "timestamp"); + const eventName = getString(event, "name") ?? "span event"; + if (!eventTime) continue; + out.push({ + kind: eventName === "exception" ? "otel.exception" : "otel.span.event", + time: eventTime, + service, + title: eventName, + severity: eventName === "exception" ? "error" : severity, + ids, + source, + data: event, + }); + } + } + if (endTimestamp) { + out.push({ + kind: "otel.span.end", + time: endTimestamp, + duration, + service, + title, + severity, + ids, + source, + data: record, + }); + } + return out; +} + +export const OTEL_TRACES_STREAM_PROFILE_DEFINITION: StreamProfileDefinition = { + kind: "otel-traces", + usesStoredProfileRow: true, + + defaultProfile(): OtelTracesStreamProfile { + return { kind: "otel-traces" }; + }, + + validateResult(raw, path) { + return validateOtelTracesProfileResult(raw, path); + }, + + readProfileResult({ row, cached }): Result { + if (!row) return Result.ok({ profile: { kind: "otel-traces" }, cache: null }); + const cachedCopy = cloneOtelTracesCache(cached); + if (cachedCopy && cachedCopy.updatedAtMs === row.updated_at_ms) { + return Result.ok({ + profile: cloneOtelTracesProfile(cachedCopy.profile as OtelTracesStreamProfile), + cache: cachedCopy, + }); + } + const parsedRes = parseStoredProfileJsonResult(row.profile_json); + if (Result.isError(parsedRes)) return parsedRes; + const profileRes = validateOtelTracesProfileResult(parsedRes.value, "profile"); + if (Result.isError(profileRes)) return profileRes; + const profile = cloneOtelTracesProfile(profileRes.value); + return Result.ok({ + profile: cloneOtelTracesProfile(profile), + cache: { profile, updatedAtMs: row.updated_at_ms }, + }); + }, + + persistProfileResult({ db, registry, stream, streamRow, profile }): Result { + if (!isOtelTracesProfile(profile)) return Result.err({ kind: "bad_request", message: "invalid otel-traces profile" }); + const contentType = normalizeProfileContentType(streamRow.content_type); + if (contentType !== "application/json") { + return Result.err({ + kind: "bad_request", + message: "otel-traces profile requires application/json stream content-type", + }); + } + if (streamRow.profile !== "otel-traces" && streamRow.next_offset > 0n) { + return Result.err({ + kind: "bad_request", + message: "otel-traces profile must be installed before appending data", + }); + } + + const persistedProfile = cloneOtelTracesProfile(profile); + const registryRes = registry.replaceRegistryResult(stream, buildOtelTracesDefaultRegistry(stream)); + if (Result.isError(registryRes)) { + return Result.err({ kind: "bad_request", message: registryRes.error.message }); + } + db.updateStreamProfile(stream, persistedProfile.kind); + db.upsertStreamProfile(stream, JSON.stringify(persistedProfile)); + db.deleteStreamTouchState(stream); + const row = db.getStreamProfile(stream); + return Result.ok({ + profile: cloneOtelTracesProfile(persistedProfile), + cache: { + profile: persistedProfile, + updatedAtMs: row?.updated_at_ms ?? db.nowMs(), + }, + schemaRegistry: registryRes.value, + }); + }, + + jsonIngest: { + prepareRecordResult({ profile, value }) { + if (!isOtelTracesProfile(profile)) return Result.err({ message: "invalid otel-traces profile" }); + return normalizeOtelTraceRecordResult(profile, value); + }, + }, + + otlpTraces: { + decodeExportRequestResult({ profile, stream, contentType, contentEncoding, body, maxDecodedBytes }) { + if (!isOtelTracesProfile(profile)) return Result.err({ status: 400, message: "invalid otel-traces profile" }); + return decodeOtlpTraceExportRequestResult({ stream, profile, contentType, contentEncoding, body, maxDecodedBytes }); + }, + }, + + correlation: { + toTimelineItems(args) { + return buildOtelTimelineItems(args); + }, + }, +}; diff --git a/src/profiles/otelTraces/normalize.ts b/src/profiles/otelTraces/normalize.ts new file mode 100644 index 0000000..218a859 --- /dev/null +++ b/src/profiles/otelTraces/normalize.ts @@ -0,0 +1,730 @@ +import { createHash } from "node:crypto"; +import { Result } from "better-result"; +import type { PreparedJsonRecord } from "../profile"; +import { expectPlainObjectResult, isPlainObject } from "../profile"; + +export type OTelSpanKind = "unspecified" | "internal" | "server" | "client" | "producer" | "consumer"; +export type OTelStatusCode = "unset" | "ok" | "error"; +export type DbStatementMode = "drop" | "raw"; + +export type OtelTraceAttributeLimits = { + maxAttributeValueBytes: number; + maxAttributesPerSpan: number; + maxEventsPerSpan: number; + maxLinksPerSpan: number; + maxStatementBytes: number; +}; + +export type OtelTraceStoreConfig = { + rawResourceAttributes: boolean; + rawSpanAttributes: boolean; + rawEvents: boolean; + rawLinks: boolean; +}; + +export type OtelTracesStreamProfile = { + kind: "otel-traces"; + redactKeys?: string[]; + requestIdAttributes?: string[]; + attributeLimits?: Partial; + store?: Partial; + dbStatementMode?: DbStatementMode; +}; + +export type DecodedOtelEvent = { + timeUnixNano: string | null; + name: string; + attributes: Record; + droppedAttributesCount?: number; +}; + +export type DecodedOtelLink = { + traceId: string; + spanId: string; + traceState: string | null; + attributes: Record; + droppedAttributesCount?: number; +}; + +export type DecodedOtelSpan = { + traceId: string; + spanId: string; + parentSpanId?: string | null; + traceState?: string | null; + traceFlags?: number | null; + name: string; + kind?: number | string | null; + startUnixNano?: string | null; + endUnixNano?: string | null; + timestamp?: string | null; + status?: { + code?: number | string | null; + message?: string | null; + }; + resourceSchemaUrl?: string | null; + resourceAttributes: Record; + instrumentationScope?: { + name?: string | null; + version?: string | null; + schemaUrl?: string | null; + attributes?: Record; + }; + attributes: Record; + events: DecodedOtelEvent[]; + links: DecodedOtelLink[]; + droppedAttributesCount?: number; + droppedEventsCount?: number; + droppedLinksCount?: number; + requestId?: string | null; +}; + +export type CanonicalOtelSpan = { + schemaVersion: 1; + signal: "trace.span"; + timestamp: string; + endTimestamp: string | null; + startUnixNano: string | null; + endUnixNano: string | null; + duration: number | null; + traceId: string; + spanId: string; + parentSpanId: string | null; + traceState: string | null; + traceFlags: { + sampled: boolean; + raw: number | null; + }; + name: string; + kind: OTelSpanKind; + status: { + code: OTelStatusCode; + message: string | null; + }; + service: string | null; + serviceNamespace: string | null; + serviceInstanceId: string | null; + environment: string | null; + version: string | null; + region: string | null; + requestId: string | null; + http: { + method: string | null; + route: string | null; + path: string | null; + target: string | null; + url: string | null; + statusCode: number | null; + userAgent: string | null; + }; + db: { + system: string | null; + name: string | null; + operation: string | null; + statement: string | null; + }; + rpc: { + system: string | null; + service: string | null; + method: string | null; + }; + messaging: { + system: string | null; + destination: string | null; + operation: string | null; + }; + error: { + isError: boolean; + type: string | null; + message: string | null; + stacktrace: string | null; + }; + instrumentationScope: { + name: string | null; + version: string | null; + schemaUrl: string | null; + attributes: Record; + }; + resource: { + schemaUrl: string | null; + attributes: Record; + }; + attributes: Record; + events: Array<{ + timestamp: string | null; + timeUnixNano: string | null; + name: string; + attributes: Record; + droppedAttributesCount?: number; + }>; + eventNames: string[]; + links: Array<{ + traceId: string; + spanId: string; + traceState: string | null; + attributes: Record; + droppedAttributesCount?: number; + }>; + dropped: { + attributes: number; + events: number; + links: number; + }; + redaction: { + keys: string[]; + }; + identity: { + spanKey: string; + dedupeKey: string; + }; +}; + +const TEXT_ENCODER = new TextEncoder(); +const TEXT_DECODER = new TextDecoder(); +const REDACTED_VALUE = "[REDACTED]"; + +export const DEFAULT_OTEL_TRACE_REDACT_KEYS = [ + "password", + "token", + "secret", + "authorization", + "cookie", + "apikey", + "api_key", + "set-cookie", + "x-api-key", +] as const; + +export const DEFAULT_REQUEST_ID_ATTRIBUTES = [ + "request.id", + "http.request_id", + "http.request.header.x_request_id", + "http.request.header.x-request-id", + "http.request.header.x_correlation_id", + "http.request.header.x-correlation-id", + "correlation.id", +] as const; + +export const DEFAULT_ATTRIBUTE_LIMITS: OtelTraceAttributeLimits = { + maxAttributeValueBytes: 8192, + maxAttributesPerSpan: 256, + maxEventsPerSpan: 128, + maxLinksPerSpan: 128, + maxStatementBytes: 4096, +}; + +export const DEFAULT_STORE_CONFIG: OtelTraceStoreConfig = { + rawResourceAttributes: true, + rawSpanAttributes: true, + rawEvents: true, + rawLinks: true, +}; + +function normalizeString(value: unknown): string | null { + if (typeof value !== "string") return null; + const trimmed = value.trim(); + return trimmed === "" ? null : trimmed; +} + +function normalizeNumber(value: unknown): number | null { + if (typeof value === "number" && Number.isFinite(value)) return value; + if (typeof value === "bigint") return Number(value); + if (typeof value === "string" && value.trim() !== "") { + const parsed = Number(value); + if (Number.isFinite(parsed)) return parsed; + } + return null; +} + +function normalizeInteger(value: unknown): number | null { + const n = normalizeNumber(value); + return n != null && Number.isInteger(n) ? n : null; +} + +function normalizeNanoString(value: unknown): string | null { + if (value == null) return null; + if (typeof value === "bigint") return value >= 0n ? value.toString() : null; + if (typeof value === "number" && Number.isFinite(value) && Number.isInteger(value) && value >= 0) { + return BigInt(value).toString(); + } + if (typeof value === "string") { + const trimmed = value.trim(); + if (/^(0|[1-9][0-9]*)$/.test(trimmed)) return trimmed; + } + return null; +} + +function isoFromUnixNano(nanoString: string | null): string | null { + if (!nanoString) return null; + try { + const ms = BigInt(nanoString) / 1_000_000n; + const date = new Date(Number(ms)); + if (Number.isNaN(date.getTime())) return null; + return date.toISOString(); + } catch { + return null; + } +} + +function durationMs(startUnixNano: string | null, endUnixNano: string | null): Result { + if (!startUnixNano || !endUnixNano) return Result.ok(null); + const start = BigInt(startUnixNano); + const end = BigInt(endUnixNano); + if (end < start) return Result.err({ message: "endTimeUnixNano must be greater than or equal to startTimeUnixNano" }); + return Result.ok(Number(end - start) / 1_000_000); +} + +function normalizeHexIdResult(raw: unknown, chars: number, field: string): Result { + const value = normalizeString(raw)?.toLowerCase() ?? ""; + if (!new RegExp(`^[0-9a-f]{${chars}}$`).test(value)) { + return Result.err({ message: `${field} must be ${chars} lowercase hex characters` }); + } + if (/^0+$/.test(value)) return Result.err({ message: `${field} must not be all zeroes` }); + return Result.ok(value); +} + +function normalizeParentSpanIdResult(raw: unknown): Result { + const value = normalizeString(raw); + if (!value) return Result.ok(null); + const lowered = value.toLowerCase(); + if (/^0+$/.test(lowered)) return Result.ok(null); + return normalizeHexIdResult(lowered, 16, "parentSpanId"); +} + +function normalizeSpanKind(value: unknown): OTelSpanKind { + if (typeof value === "number") { + if (value === 1) return "internal"; + if (value === 2) return "server"; + if (value === 3) return "client"; + if (value === 4) return "producer"; + if (value === 5) return "consumer"; + return "unspecified"; + } + const raw = normalizeString(value)?.toLowerCase().replace(/^span_kind_/, ""); + if (raw === "internal" || raw === "server" || raw === "client" || raw === "producer" || raw === "consumer") return raw; + return "unspecified"; +} + +function normalizeStatusCode(value: unknown): OTelStatusCode { + if (typeof value === "number") { + if (value === 1) return "ok"; + if (value === 2) return "error"; + return "unset"; + } + const raw = normalizeString(value)?.toLowerCase().replace(/^status_code_/, ""); + if (raw === "ok" || raw === "error") return raw; + return "unset"; +} + +function truncateUtf8(value: string, maxBytes: number): string { + const bytes = TEXT_ENCODER.encode(value); + if (bytes.byteLength <= maxBytes) return value; + return TEXT_DECODER.decode(bytes.slice(0, Math.max(0, maxBytes))); +} + +function sanitizeAttributeValue(value: unknown, redactKeys: Set, path: string, maxBytes: number): { value: unknown; redacted: string[] } { + if (typeof value === "string") return { value: truncateUtf8(value, maxBytes), redacted: [] }; + if (typeof value === "number") return { value: Number.isFinite(value) ? value : null, redacted: [] }; + if (typeof value === "boolean" || value === null) return { value, redacted: [] }; + if (typeof value === "bigint") return { value: value.toString(), redacted: [] }; + if (value instanceof Uint8Array) return { value: Buffer.from(value).toString("base64"), redacted: [] }; + if (Array.isArray(value)) { + const out: unknown[] = []; + const redacted: string[] = []; + for (let i = 0; i < value.length; i++) { + const child = sanitizeAttributeValue(value[i], redactKeys, `${path}.${i}`, maxBytes); + out.push(child.value); + redacted.push(...child.redacted); + } + return { value: out, redacted }; + } + if (!isPlainObject(value)) return { value: null, redacted: [] }; + const out: Record = {}; + const redacted: string[] = []; + for (const [key, childValue] of Object.entries(value)) { + const childPath = path === "" ? key : `${path}.${key}`; + if (redactKeys.has(key.toLowerCase())) { + out[key] = REDACTED_VALUE; + redacted.push(childPath); + continue; + } + const child = sanitizeAttributeValue(childValue, redactKeys, childPath, maxBytes); + out[key] = child.value; + redacted.push(...child.redacted); + } + return { value: out, redacted }; +} + +function limitAttributes( + attrs: Record, + args: { + maxAttributes: number; + maxAttributeValueBytes: number; + dropped: number; + redactKeys: Set; + path: string; + } +): { attributes: Record; dropped: number; redacted: string[] } { + const out: Record = {}; + const redacted: string[] = []; + let count = 0; + let dropped = Math.max(0, Math.trunc(args.dropped)); + for (const [key, value] of Object.entries(attrs)) { + if (count >= args.maxAttributes) { + dropped += 1; + continue; + } + count += 1; + const keyPath = args.path === "" ? key : `${args.path}.${key}`; + if (args.redactKeys.has(key.toLowerCase())) { + out[key] = REDACTED_VALUE; + redacted.push(keyPath); + continue; + } + const sanitized = sanitizeAttributeValue(value, args.redactKeys, keyPath, args.maxAttributeValueBytes); + out[key] = sanitized.value; + redacted.push(...sanitized.redacted); + } + return { attributes: out, dropped, redacted }; +} + +function getString(attrs: Record, ...keys: string[]): string | null { + for (const key of keys) { + const value = normalizeString(attrs[key]); + if (value) return value; + } + return null; +} + +function getInteger(attrs: Record, ...keys: string[]): number | null { + for (const key of keys) { + const value = normalizeInteger(attrs[key]); + if (value != null) return value; + } + return null; +} + +function getRequestId(attrs: Record, direct: string | null, requestIdAttributes: readonly string[]): string | null { + if (direct) return direct; + for (const key of requestIdAttributes) { + const value = normalizeString(attrs[key]); + if (value) return value; + } + return null; +} + +function extractExceptionFromEvents(events: DecodedOtelEvent[]): { type: string | null; message: string | null; stacktrace: string | null } { + for (const event of events) { + if (event.name !== "exception") continue; + return { + type: getString(event.attributes, "exception.type"), + message: getString(event.attributes, "exception.message"), + stacktrace: getString(event.attributes, "exception.stacktrace"), + }; + } + return { type: null, message: null, stacktrace: null }; +} + +function sha256Hex(value: string): string { + return createHash("sha256").update(value).digest("hex"); +} + +export function normalizeOtelDecodedSpanResult( + profile: OtelTracesStreamProfile, + input: DecodedOtelSpan +): Result { + const traceIdRes = normalizeHexIdResult(input.traceId, 32, "traceId"); + if (Result.isError(traceIdRes)) return traceIdRes; + const spanIdRes = normalizeHexIdResult(input.spanId, 16, "spanId"); + if (Result.isError(spanIdRes)) return spanIdRes; + const parentSpanIdRes = normalizeParentSpanIdResult(input.parentSpanId); + if (Result.isError(parentSpanIdRes)) return parentSpanIdRes; + + const limits = { ...DEFAULT_ATTRIBUTE_LIMITS, ...(profile.attributeLimits ?? {}) }; + const store = { ...DEFAULT_STORE_CONFIG, ...(profile.store ?? {}) }; + const redactKeys = new Set([...DEFAULT_OTEL_TRACE_REDACT_KEYS, ...(profile.redactKeys ?? [])].map((key) => key.toLowerCase())); + const requestIdAttributes = profile.requestIdAttributes ?? [...DEFAULT_REQUEST_ID_ATTRIBUTES]; + + const resourceRes = limitAttributes(input.resourceAttributes, { + maxAttributes: limits.maxAttributesPerSpan, + maxAttributeValueBytes: limits.maxAttributeValueBytes, + dropped: 0, + redactKeys, + path: "resource.attributes", + }); + const scopeRes = limitAttributes(input.instrumentationScope?.attributes ?? {}, { + maxAttributes: limits.maxAttributesPerSpan, + maxAttributeValueBytes: limits.maxAttributeValueBytes, + dropped: 0, + redactKeys, + path: "instrumentationScope.attributes", + }); + const attrsRes = limitAttributes(input.attributes, { + maxAttributes: limits.maxAttributesPerSpan, + maxAttributeValueBytes: limits.maxAttributeValueBytes, + dropped: input.droppedAttributesCount ?? 0, + redactKeys, + path: "attributes", + }); + + const startUnixNano = normalizeNanoString(input.startUnixNano); + const endUnixNano = normalizeNanoString(input.endUnixNano); + const durationRes = durationMs(startUnixNano, endUnixNano); + if (Result.isError(durationRes)) return durationRes; + const timestamp = isoFromUnixNano(startUnixNano) ?? normalizeString(input.timestamp) ?? new Date().toISOString(); + const endTimestamp = isoFromUnixNano(endUnixNano); + + const normalizedEvents: CanonicalOtelSpan["events"] = []; + let droppedEvents = Math.max(0, Math.trunc(input.droppedEventsCount ?? 0)); + const eventNames: string[] = []; + for (const event of input.events) { + if (normalizedEvents.length >= limits.maxEventsPerSpan) { + droppedEvents += 1; + continue; + } + const eventAttrs = limitAttributes(event.attributes, { + maxAttributes: limits.maxAttributesPerSpan, + maxAttributeValueBytes: limits.maxAttributeValueBytes, + dropped: event.droppedAttributesCount ?? 0, + redactKeys, + path: `events.${normalizedEvents.length}.attributes`, + }); + const eventName = normalizeString(event.name) ?? ""; + eventNames.push(eventName); + normalizedEvents.push({ + timestamp: isoFromUnixNano(normalizeNanoString(event.timeUnixNano)), + timeUnixNano: normalizeNanoString(event.timeUnixNano), + name: eventName, + attributes: store.rawEvents ? eventAttrs.attributes : {}, + droppedAttributesCount: eventAttrs.dropped, + }); + resourceRes.redacted.push(...eventAttrs.redacted); + } + + const normalizedLinks: CanonicalOtelSpan["links"] = []; + let droppedLinks = Math.max(0, Math.trunc(input.droppedLinksCount ?? 0)); + for (const link of input.links) { + if (normalizedLinks.length >= limits.maxLinksPerSpan) { + droppedLinks += 1; + continue; + } + const linkTraceIdRes = normalizeHexIdResult(link.traceId, 32, "links.traceId"); + if (Result.isError(linkTraceIdRes)) { + droppedLinks += 1; + continue; + } + const linkSpanIdRes = normalizeHexIdResult(link.spanId, 16, "links.spanId"); + if (Result.isError(linkSpanIdRes)) { + droppedLinks += 1; + continue; + } + const linkAttrs = limitAttributes(link.attributes, { + maxAttributes: limits.maxAttributesPerSpan, + maxAttributeValueBytes: limits.maxAttributeValueBytes, + dropped: link.droppedAttributesCount ?? 0, + redactKeys, + path: `links.${normalizedLinks.length}.attributes`, + }); + normalizedLinks.push({ + traceId: linkTraceIdRes.value, + spanId: linkSpanIdRes.value, + traceState: normalizeString(link.traceState), + attributes: store.rawLinks ? linkAttrs.attributes : {}, + droppedAttributesCount: linkAttrs.dropped, + }); + resourceRes.redacted.push(...linkAttrs.redacted); + } + + const resourceAttrs = resourceRes.attributes; + const spanAttrs = attrsRes.attributes; + const service = getString(resourceAttrs, "service.name"); + const statusCode = normalizeStatusCode(input.status?.code); + const exception = extractExceptionFromEvents(normalizedEvents); + const attrErrorType = getString(spanAttrs, "exception.type", "error.type"); + const attrErrorMessage = getString(spanAttrs, "exception.message", "error.message"); + const attrErrorStack = getString(spanAttrs, "exception.stacktrace", "error.stacktrace"); + const httpStatusCode = getInteger(spanAttrs, "http.response.status_code", "http.status_code"); + const errorMessage = attrErrorMessage ?? exception.message ?? normalizeString(input.status?.message); + const traceFlagsRaw = normalizeInteger(input.traceFlags); + const dbStatementRaw = getString(spanAttrs, "db.statement", "db.query.text"); + const dbStatement = + profile.dbStatementMode === "raw" && dbStatementRaw + ? truncateUtf8(dbStatementRaw, limits.maxStatementBytes) + : null; + + const canonical: CanonicalOtelSpan = { + schemaVersion: 1, + signal: "trace.span", + timestamp, + endTimestamp, + startUnixNano, + endUnixNano, + duration: durationRes.value, + traceId: traceIdRes.value, + spanId: spanIdRes.value, + parentSpanId: parentSpanIdRes.value, + traceState: normalizeString(input.traceState), + traceFlags: { + sampled: traceFlagsRaw == null ? false : (traceFlagsRaw & 1) === 1, + raw: traceFlagsRaw, + }, + name: normalizeString(input.name) ?? "", + kind: normalizeSpanKind(input.kind), + status: { + code: statusCode, + message: normalizeString(input.status?.message), + }, + service, + serviceNamespace: getString(resourceAttrs, "service.namespace"), + serviceInstanceId: getString(resourceAttrs, "service.instance.id"), + environment: getString(resourceAttrs, "deployment.environment.name", "deployment.environment"), + version: getString(resourceAttrs, "service.version"), + region: getString(resourceAttrs, "cloud.region"), + requestId: getRequestId(spanAttrs, normalizeString(input.requestId), requestIdAttributes), + http: { + method: getString(spanAttrs, "http.request.method", "http.method"), + route: getString(spanAttrs, "http.route"), + path: getString(spanAttrs, "url.path", "http.target"), + target: getString(spanAttrs, "http.target"), + url: getString(spanAttrs, "url.full", "http.url"), + statusCode: httpStatusCode, + userAgent: getString(spanAttrs, "user_agent.original", "http.user_agent"), + }, + db: { + system: getString(spanAttrs, "db.system"), + name: getString(spanAttrs, "db.name", "db.namespace"), + operation: getString(spanAttrs, "db.operation", "db.operation.name"), + statement: dbStatement, + }, + rpc: { + system: getString(spanAttrs, "rpc.system"), + service: getString(spanAttrs, "rpc.service"), + method: getString(spanAttrs, "rpc.method"), + }, + messaging: { + system: getString(spanAttrs, "messaging.system"), + destination: getString(spanAttrs, "messaging.destination", "messaging.destination.name"), + operation: getString(spanAttrs, "messaging.operation", "messaging.operation.name"), + }, + error: { + isError: statusCode === "error" || (httpStatusCode != null && httpStatusCode >= 500) || !!attrErrorType || !!exception.type, + type: attrErrorType ?? exception.type, + message: errorMessage, + stacktrace: attrErrorStack ?? exception.stacktrace, + }, + instrumentationScope: { + name: normalizeString(input.instrumentationScope?.name), + version: normalizeString(input.instrumentationScope?.version), + schemaUrl: normalizeString(input.instrumentationScope?.schemaUrl), + attributes: scopeRes.attributes, + }, + resource: { + schemaUrl: normalizeString(input.resourceSchemaUrl), + attributes: store.rawResourceAttributes ? resourceAttrs : {}, + }, + attributes: store.rawSpanAttributes ? spanAttrs : {}, + events: store.rawEvents ? normalizedEvents : [], + eventNames, + links: store.rawLinks ? normalizedLinks : [], + dropped: { + attributes: attrsRes.dropped, + events: droppedEvents, + links: droppedLinks, + }, + redaction: { + keys: [...resourceRes.redacted, ...scopeRes.redacted, ...attrsRes.redacted].sort(), + }, + identity: { + spanKey: `${traceIdRes.value}:${spanIdRes.value}`, + dedupeKey: sha256Hex(`${traceIdRes.value}\0${spanIdRes.value}\0${startUnixNano ?? ""}\0${service ?? ""}\0${normalizeString(input.name) ?? ""}`), + }, + }; + + return Result.ok(canonical); +} + +function objectFromUnknown(value: unknown): Record { + return isPlainObject(value) ? structuredClone(value) : {}; +} + +function eventFromCanonical(value: unknown): DecodedOtelEvent | null { + if (!isPlainObject(value)) return null; + return { + timeUnixNano: normalizeNanoString(value.timeUnixNano), + name: normalizeString(value.name) ?? "", + attributes: objectFromUnknown(value.attributes), + droppedAttributesCount: normalizeInteger(value.droppedAttributesCount) ?? 0, + }; +} + +function linkFromCanonical(value: unknown): DecodedOtelLink | null { + if (!isPlainObject(value)) return null; + const traceId = normalizeString(value.traceId); + const spanId = normalizeString(value.spanId); + if (!traceId || !spanId) return null; + return { + traceId, + spanId, + traceState: normalizeString(value.traceState), + attributes: objectFromUnknown(value.attributes), + droppedAttributesCount: normalizeInteger(value.droppedAttributesCount) ?? 0, + }; +} + +function decodedSpanFromCanonicalLikeResult(value: unknown): Result { + const objRes = expectPlainObjectResult(value, "otel-traces record"); + if (Result.isError(objRes)) return objRes; + const obj = objRes.value; + const traceId = normalizeString(obj.traceId); + const spanId = normalizeString(obj.spanId); + if (!traceId) return Result.err({ message: "traceId is required" }); + if (!spanId) return Result.err({ message: "spanId is required" }); + const resource = isPlainObject(obj.resource) ? obj.resource : {}; + const scope = isPlainObject(obj.instrumentationScope) ? obj.instrumentationScope : {}; + const status = isPlainObject(obj.status) ? obj.status : {}; + const traceFlags = isPlainObject(obj.traceFlags) ? obj.traceFlags : {}; + return Result.ok({ + traceId, + spanId, + parentSpanId: normalizeString(obj.parentSpanId), + traceState: normalizeString(obj.traceState), + traceFlags: normalizeInteger(traceFlags.raw), + name: normalizeString(obj.name) ?? "", + kind: obj.kind as number | string | null | undefined, + startUnixNano: normalizeNanoString(obj.startUnixNano), + endUnixNano: normalizeNanoString(obj.endUnixNano), + timestamp: normalizeString(obj.timestamp), + status: { + code: status.code as number | string | null | undefined, + message: normalizeString(status.message), + }, + resourceSchemaUrl: normalizeString(resource.schemaUrl), + resourceAttributes: objectFromUnknown(resource.attributes), + instrumentationScope: { + name: normalizeString(scope.name), + version: normalizeString(scope.version), + schemaUrl: normalizeString(scope.schemaUrl), + attributes: objectFromUnknown(scope.attributes), + }, + attributes: objectFromUnknown(obj.attributes), + events: Array.isArray(obj.events) ? obj.events.map(eventFromCanonical).filter((event): event is DecodedOtelEvent => !!event) : [], + links: Array.isArray(obj.links) ? obj.links.map(linkFromCanonical).filter((link): link is DecodedOtelLink => !!link) : [], + droppedAttributesCount: isPlainObject(obj.dropped) ? (normalizeInteger(obj.dropped.attributes) ?? 0) : 0, + droppedEventsCount: isPlainObject(obj.dropped) ? (normalizeInteger(obj.dropped.events) ?? 0) : 0, + droppedLinksCount: isPlainObject(obj.dropped) ? (normalizeInteger(obj.dropped.links) ?? 0) : 0, + requestId: normalizeString(obj.requestId), + }); +} + +export function normalizeOtelTraceRecordResult( + profile: OtelTracesStreamProfile, + value: unknown +): Result { + const decodedRes = decodedSpanFromCanonicalLikeResult(value); + if (Result.isError(decodedRes)) return decodedRes; + const normalizedRes = normalizeOtelDecodedSpanResult(profile, decodedRes.value); + if (Result.isError(normalizedRes)) return normalizedRes; + return Result.ok({ + value: normalizedRes.value, + routingKey: normalizedRes.value.traceId, + }); +} diff --git a/src/profiles/otelTraces/otlp.ts b/src/profiles/otelTraces/otlp.ts new file mode 100644 index 0000000..0811472 --- /dev/null +++ b/src/profiles/otelTraces/otlp.ts @@ -0,0 +1,855 @@ +import { gunzipSync } from "node:zlib"; +import { Result } from "better-result"; +import type { OtlpTraceExportError, OtlpTraceExportResult } from "../profile"; +import { + normalizeOtelDecodedSpanResult, + type DecodedOtelEvent, + type DecodedOtelLink, + type DecodedOtelSpan, + type OtelTracesStreamProfile, +} from "./normalize"; + +const JSON_TEXT_DECODER = new TextDecoder(); +const JSON_CONTENT_TYPE = "application/json"; +const PROTOBUF_CONTENT_TYPE = "application/x-protobuf"; + +type ResourceSpansDecoded = { + resourceAttributes: Record; + resourceSchemaUrl: string | null; + scopeSpans: ScopeSpansDecoded[]; +}; + +type ScopeSpansDecoded = { + scope: { + name: string | null; + version: string | null; + schemaUrl: string | null; + attributes: Record; + }; + spans: Array>; +}; + +function baseContentType(value: string): string { + return value.split(";")[0]?.trim().toLowerCase() ?? ""; +} + +function hexFromBytes(bytes: Uint8Array): string { + return Array.from(bytes) + .map((byte) => byte.toString(16).padStart(2, "0")) + .join(""); +} + +function isPlainObject(value: unknown): value is Record { + return !!value && typeof value === "object" && !Array.isArray(value); +} + +function normalizeString(value: unknown): string | null { + if (typeof value !== "string") return null; + const trimmed = value.trim(); + return trimmed === "" ? null : trimmed; +} + +function normalizeNanoString(value: unknown): string | null { + if (value == null) return null; + if (typeof value === "bigint") return value >= 0n ? value.toString() : null; + if (typeof value === "number" && Number.isFinite(value) && Number.isInteger(value) && value >= 0) return BigInt(value).toString(); + if (typeof value === "string") { + const trimmed = value.trim(); + if (/^(0|[1-9][0-9]*)$/.test(trimmed)) return trimmed; + } + return null; +} + +function anyValueFromJson(raw: unknown): unknown { + if (!isPlainObject(raw)) return structuredClone(raw); + if (Object.prototype.hasOwnProperty.call(raw, "stringValue")) return normalizeString(raw.stringValue) ?? ""; + if (Object.prototype.hasOwnProperty.call(raw, "boolValue")) return raw.boolValue === true; + if (Object.prototype.hasOwnProperty.call(raw, "intValue")) { + const value = raw.intValue; + if (typeof value === "string" && /^-?(0|[1-9][0-9]*)$/.test(value.trim())) return value.trim(); + if (typeof value === "number" && Number.isFinite(value)) return Math.trunc(value); + return null; + } + if (Object.prototype.hasOwnProperty.call(raw, "doubleValue")) return typeof raw.doubleValue === "number" ? raw.doubleValue : Number(raw.doubleValue); + if (Object.prototype.hasOwnProperty.call(raw, "bytesValue")) return normalizeString(raw.bytesValue) ?? ""; + if (isPlainObject(raw.arrayValue) && Array.isArray(raw.arrayValue.values)) { + return raw.arrayValue.values.map(anyValueFromJson); + } + if (isPlainObject(raw.kvlistValue) && Array.isArray(raw.kvlistValue.values)) { + return keyValuesFromJson(raw.kvlistValue.values); + } + return structuredClone(raw); +} + +function keyValuesFromJson(raw: unknown): Record { + const out: Record = {}; + if (!Array.isArray(raw)) return out; + for (const item of raw) { + if (!isPlainObject(item)) continue; + const key = normalizeString(item.key); + if (!key) continue; + out[key] = anyValueFromJson(item.value); + } + return out; +} + +function eventFromJson(raw: unknown): DecodedOtelEvent | null { + if (!isPlainObject(raw)) return null; + return { + timeUnixNano: normalizeNanoString(raw.timeUnixNano), + name: normalizeString(raw.name) ?? "", + attributes: keyValuesFromJson(raw.attributes), + droppedAttributesCount: typeof raw.droppedAttributesCount === "number" ? raw.droppedAttributesCount : Number(raw.droppedAttributesCount ?? 0), + }; +} + +function linkFromJson(raw: unknown): DecodedOtelLink | null { + if (!isPlainObject(raw)) return null; + const traceId = normalizeString(raw.traceId); + const spanId = normalizeString(raw.spanId); + if (!traceId || !spanId) return null; + return { + traceId, + spanId, + traceState: normalizeString(raw.traceState), + attributes: keyValuesFromJson(raw.attributes), + droppedAttributesCount: typeof raw.droppedAttributesCount === "number" ? raw.droppedAttributesCount : Number(raw.droppedAttributesCount ?? 0), + }; +} + +function spanFromJson(raw: unknown): Omit | null { + if (!isPlainObject(raw)) return null; + const traceId = normalizeString(raw.traceId); + const spanId = normalizeString(raw.spanId); + if (!traceId || !spanId) return null; + const status = isPlainObject(raw.status) ? raw.status : {}; + return { + traceId, + spanId, + parentSpanId: normalizeString(raw.parentSpanId), + traceState: normalizeString(raw.traceState), + traceFlags: typeof raw.flags === "number" ? raw.flags : Number(raw.flags ?? raw.traceFlags ?? 0), + name: normalizeString(raw.name) ?? "", + kind: raw.kind as number | string | null | undefined, + startUnixNano: normalizeNanoString(raw.startTimeUnixNano), + endUnixNano: normalizeNanoString(raw.endTimeUnixNano), + status: { + code: status.code as number | string | null | undefined, + message: normalizeString(status.message), + }, + attributes: keyValuesFromJson(raw.attributes), + events: Array.isArray(raw.events) ? raw.events.map(eventFromJson).filter((event): event is DecodedOtelEvent => !!event) : [], + links: Array.isArray(raw.links) ? raw.links.map(linkFromJson).filter((link): link is DecodedOtelLink => !!link) : [], + droppedAttributesCount: typeof raw.droppedAttributesCount === "number" ? raw.droppedAttributesCount : Number(raw.droppedAttributesCount ?? 0), + droppedEventsCount: typeof raw.droppedEventsCount === "number" ? raw.droppedEventsCount : Number(raw.droppedEventsCount ?? 0), + droppedLinksCount: typeof raw.droppedLinksCount === "number" ? raw.droppedLinksCount : Number(raw.droppedLinksCount ?? 0), + }; +} + +function decodeJsonExportResult(body: Uint8Array): Result { + let parsed: unknown; + try { + parsed = JSON.parse(JSON_TEXT_DECODER.decode(body)); + } catch { + return Result.err({ message: "invalid OTLP JSON" }); + } + if (!isPlainObject(parsed)) return Result.err({ message: "OTLP JSON request must be an object" }); + const out: DecodedOtelSpan[] = []; + const resourceSpans = Array.isArray(parsed.resourceSpans) ? parsed.resourceSpans : []; + for (const resourceSpanRaw of resourceSpans) { + if (!isPlainObject(resourceSpanRaw)) continue; + const resource = isPlainObject(resourceSpanRaw.resource) ? resourceSpanRaw.resource : {}; + const resourceAttributes = keyValuesFromJson(resource.attributes); + const resourceSchemaUrl = normalizeString(resourceSpanRaw.schemaUrl); + const scopeSpans = [ + ...(Array.isArray(resourceSpanRaw.scopeSpans) ? resourceSpanRaw.scopeSpans : []), + ...(Array.isArray(resourceSpanRaw.instrumentationLibrarySpans) ? resourceSpanRaw.instrumentationLibrarySpans : []), + ]; + for (const scopeSpanRaw of scopeSpans) { + if (!isPlainObject(scopeSpanRaw)) continue; + const scopeRaw = isPlainObject(scopeSpanRaw.scope) ? scopeSpanRaw.scope : isPlainObject(scopeSpanRaw.instrumentationLibrary) ? scopeSpanRaw.instrumentationLibrary : {}; + const scope = { + name: normalizeString(scopeRaw.name), + version: normalizeString(scopeRaw.version), + schemaUrl: normalizeString(scopeSpanRaw.schemaUrl), + attributes: keyValuesFromJson(scopeRaw.attributes), + }; + const spans = Array.isArray(scopeSpanRaw.spans) ? scopeSpanRaw.spans : []; + for (const spanRaw of spans) { + const span = spanFromJson(spanRaw); + if (!span) continue; + out.push({ + ...span, + resourceAttributes, + resourceSchemaUrl, + instrumentationScope: scope, + }); + } + } + } + return Result.ok(out); +} + +class ProtoReader { + private pos = 0; + + constructor(private readonly bytes: Uint8Array) {} + + eof(): boolean { + return this.pos >= this.bytes.byteLength; + } + + readTag(): Result<{ field: number; wire: number }, { message: string }> { + const tagRes = this.readVarint(); + if (Result.isError(tagRes)) return tagRes; + const tag = Number(tagRes.value); + if (tag === 0) return Result.err({ message: "invalid protobuf tag" }); + return Result.ok({ field: tag >>> 3, wire: tag & 7 }); + } + + readVarint(): Result { + let shift = 0n; + let out = 0n; + while (shift <= 63n) { + if (this.pos >= this.bytes.byteLength) return Result.err({ message: "truncated protobuf varint" }); + const byte = this.bytes[this.pos++]!; + out |= BigInt(byte & 0x7f) << shift; + if ((byte & 0x80) === 0) return Result.ok(out); + shift += 7n; + } + return Result.err({ message: "protobuf varint too long" }); + } + + readFixed32(): Result { + if (this.pos + 4 > this.bytes.byteLength) return Result.err({ message: "truncated protobuf fixed32" }); + const view = new DataView(this.bytes.buffer, this.bytes.byteOffset + this.pos, 4); + this.pos += 4; + return Result.ok(view.getUint32(0, true)); + } + + readFixed64(): Result { + if (this.pos + 8 > this.bytes.byteLength) return Result.err({ message: "truncated protobuf fixed64" }); + const view = new DataView(this.bytes.buffer, this.bytes.byteOffset + this.pos, 8); + this.pos += 8; + return Result.ok(view.getBigUint64(0, true)); + } + + readDouble(): Result { + if (this.pos + 8 > this.bytes.byteLength) return Result.err({ message: "truncated protobuf double" }); + const view = new DataView(this.bytes.buffer, this.bytes.byteOffset + this.pos, 8); + this.pos += 8; + return Result.ok(view.getFloat64(0, true)); + } + + readBytes(): Result { + const lenRes = this.readVarint(); + if (Result.isError(lenRes)) return lenRes; + const len = Number(lenRes.value); + if (!Number.isSafeInteger(len) || len < 0 || this.pos + len > this.bytes.byteLength) { + return Result.err({ message: "truncated protobuf bytes" }); + } + const out = this.bytes.slice(this.pos, this.pos + len); + this.pos += len; + return Result.ok(out); + } + + readString(): Result { + const bytesRes = this.readBytes(); + if (Result.isError(bytesRes)) return bytesRes; + return Result.ok(JSON_TEXT_DECODER.decode(bytesRes.value)); + } + + skip(wire: number): Result { + if (wire === 0) { + const res = this.readVarint(); + return Result.isError(res) ? res : Result.ok(undefined); + } + if (wire === 1) { + const res = this.readFixed64(); + return Result.isError(res) ? res : Result.ok(undefined); + } + if (wire === 2) { + const res = this.readBytes(); + return Result.isError(res) ? res : Result.ok(undefined); + } + if (wire === 5) { + const res = this.readFixed32(); + return Result.isError(res) ? res : Result.ok(undefined); + } + return Result.err({ message: `unsupported protobuf wire type ${wire}` }); + } +} + +function signedInt64(value: bigint): string { + return value > 9_223_372_036_854_775_807n ? (value - 18_446_744_073_709_551_616n).toString() : value.toString(); +} + +function decodeAnyValue(bytes: Uint8Array): Result { + const reader = new ProtoReader(bytes); + let value: unknown = null; + while (!reader.eof()) { + const tagRes = reader.readTag(); + if (Result.isError(tagRes)) return tagRes; + const { field, wire } = tagRes.value; + if (field === 1 && wire === 2) { + const res = reader.readString(); + if (Result.isError(res)) return res; + value = res.value; + } else if (field === 2 && wire === 0) { + const res = reader.readVarint(); + if (Result.isError(res)) return res; + value = res.value !== 0n; + } else if (field === 3 && wire === 0) { + const res = reader.readVarint(); + if (Result.isError(res)) return res; + value = signedInt64(res.value); + } else if (field === 4 && wire === 1) { + const res = reader.readDouble(); + if (Result.isError(res)) return res; + value = res.value; + } else if (field === 5 && wire === 2) { + const bytesRes = reader.readBytes(); + if (Result.isError(bytesRes)) return bytesRes; + const arrayRes = decodeArrayValue(bytesRes.value); + if (Result.isError(arrayRes)) return arrayRes; + value = arrayRes.value; + } else if (field === 6 && wire === 2) { + const bytesRes = reader.readBytes(); + if (Result.isError(bytesRes)) return bytesRes; + const kvRes = decodeKeyValueList(bytesRes.value); + if (Result.isError(kvRes)) return kvRes; + value = kvRes.value; + } else if (field === 7 && wire === 2) { + const bytesRes = reader.readBytes(); + if (Result.isError(bytesRes)) return bytesRes; + value = Buffer.from(bytesRes.value).toString("base64"); + } else { + const skipRes = reader.skip(wire); + if (Result.isError(skipRes)) return skipRes; + } + } + return Result.ok(value); +} + +function decodeArrayValue(bytes: Uint8Array): Result { + const reader = new ProtoReader(bytes); + const out: unknown[] = []; + while (!reader.eof()) { + const tagRes = reader.readTag(); + if (Result.isError(tagRes)) return tagRes; + if (tagRes.value.field === 1 && tagRes.value.wire === 2) { + const bytesRes = reader.readBytes(); + if (Result.isError(bytesRes)) return bytesRes; + const valueRes = decodeAnyValue(bytesRes.value); + if (Result.isError(valueRes)) return valueRes; + out.push(valueRes.value); + } else { + const skipRes = reader.skip(tagRes.value.wire); + if (Result.isError(skipRes)) return skipRes; + } + } + return Result.ok(out); +} + +function decodeKeyValue(bytes: Uint8Array): Result<{ key: string; value: unknown } | null, { message: string }> { + const reader = new ProtoReader(bytes); + let key = ""; + let value: unknown = null; + while (!reader.eof()) { + const tagRes = reader.readTag(); + if (Result.isError(tagRes)) return tagRes; + const { field, wire } = tagRes.value; + if (field === 1 && wire === 2) { + const keyRes = reader.readString(); + if (Result.isError(keyRes)) return keyRes; + key = keyRes.value; + } else if (field === 2 && wire === 2) { + const bytesRes = reader.readBytes(); + if (Result.isError(bytesRes)) return bytesRes; + const valueRes = decodeAnyValue(bytesRes.value); + if (Result.isError(valueRes)) return valueRes; + value = valueRes.value; + } else { + const skipRes = reader.skip(wire); + if (Result.isError(skipRes)) return skipRes; + } + } + return Result.ok(key === "" ? null : { key, value }); +} + +function decodeKeyValueList(bytes: Uint8Array): Result, { message: string }> { + const reader = new ProtoReader(bytes); + const out: Record = {}; + while (!reader.eof()) { + const tagRes = reader.readTag(); + if (Result.isError(tagRes)) return tagRes; + if (tagRes.value.field === 1 && tagRes.value.wire === 2) { + const bytesRes = reader.readBytes(); + if (Result.isError(bytesRes)) return bytesRes; + const kvRes = decodeKeyValue(bytesRes.value); + if (Result.isError(kvRes)) return kvRes; + if (kvRes.value) out[kvRes.value.key] = kvRes.value.value; + } else { + const skipRes = reader.skip(tagRes.value.wire); + if (Result.isError(skipRes)) return skipRes; + } + } + return Result.ok(out); +} + +function decodeResource(bytes: Uint8Array): Result, { message: string }> { + return decodeKeyValueList(bytes); +} + +function decodeScope(bytes: Uint8Array): Result { + const reader = new ProtoReader(bytes); + const scope: ScopeSpansDecoded["scope"] = { name: null, version: null, schemaUrl: null, attributes: {} }; + while (!reader.eof()) { + const tagRes = reader.readTag(); + if (Result.isError(tagRes)) return tagRes; + const { field, wire } = tagRes.value; + if (field === 1 && wire === 2) { + const res = reader.readString(); + if (Result.isError(res)) return res; + scope.name = res.value; + } else if (field === 2 && wire === 2) { + const res = reader.readString(); + if (Result.isError(res)) return res; + scope.version = res.value; + } else if (field === 3 && wire === 2) { + const bytesRes = reader.readBytes(); + if (Result.isError(bytesRes)) return bytesRes; + const attrsRes = decodeKeyValueList(bytesRes.value); + if (Result.isError(attrsRes)) return attrsRes; + scope.attributes = { ...scope.attributes, ...attrsRes.value }; + } else { + const skipRes = reader.skip(wire); + if (Result.isError(skipRes)) return skipRes; + } + } + return Result.ok(scope); +} + +function decodeStatus(bytes: Uint8Array): Result<{ code?: number; message?: string | null }, { message: string }> { + const reader = new ProtoReader(bytes); + const status: { code?: number; message?: string | null } = {}; + while (!reader.eof()) { + const tagRes = reader.readTag(); + if (Result.isError(tagRes)) return tagRes; + const { field, wire } = tagRes.value; + if ((field === 1 || field === 3) && wire === 0) { + const res = reader.readVarint(); + if (Result.isError(res)) return res; + status.code = Number(res.value); + } else if (field === 2 && wire === 2) { + const res = reader.readString(); + if (Result.isError(res)) return res; + status.message = res.value; + } else { + const skipRes = reader.skip(wire); + if (Result.isError(skipRes)) return skipRes; + } + } + return Result.ok(status); +} + +function decodeEvent(bytes: Uint8Array): Result { + const reader = new ProtoReader(bytes); + const event: DecodedOtelEvent = { timeUnixNano: null, name: "", attributes: {}, droppedAttributesCount: 0 }; + while (!reader.eof()) { + const tagRes = reader.readTag(); + if (Result.isError(tagRes)) return tagRes; + const { field, wire } = tagRes.value; + if (field === 1 && (wire === 1 || wire === 0)) { + const res = wire === 1 ? reader.readFixed64() : reader.readVarint(); + if (Result.isError(res)) return res; + event.timeUnixNano = res.value.toString(); + } else if (field === 2 && wire === 2) { + const res = reader.readString(); + if (Result.isError(res)) return res; + event.name = res.value; + } else if (field === 3 && wire === 2) { + const bytesRes = reader.readBytes(); + if (Result.isError(bytesRes)) return bytesRes; + const kvRes = decodeKeyValue(bytesRes.value); + if (Result.isError(kvRes)) return kvRes; + if (kvRes.value) event.attributes[kvRes.value.key] = kvRes.value.value; + } else if (field === 4 && wire === 0) { + const res = reader.readVarint(); + if (Result.isError(res)) return res; + event.droppedAttributesCount = Number(res.value); + } else { + const skipRes = reader.skip(wire); + if (Result.isError(skipRes)) return skipRes; + } + } + return Result.ok(event); +} + +function decodeLink(bytes: Uint8Array): Result { + const reader = new ProtoReader(bytes); + const link: DecodedOtelLink = { traceId: "", spanId: "", traceState: null, attributes: {}, droppedAttributesCount: 0 }; + while (!reader.eof()) { + const tagRes = reader.readTag(); + if (Result.isError(tagRes)) return tagRes; + const { field, wire } = tagRes.value; + if (field === 1 && wire === 2) { + const res = reader.readBytes(); + if (Result.isError(res)) return res; + link.traceId = hexFromBytes(res.value); + } else if (field === 2 && wire === 2) { + const res = reader.readBytes(); + if (Result.isError(res)) return res; + link.spanId = hexFromBytes(res.value); + } else if (field === 3 && wire === 2) { + const res = reader.readString(); + if (Result.isError(res)) return res; + link.traceState = res.value; + } else if (field === 4 && wire === 2) { + const bytesRes = reader.readBytes(); + if (Result.isError(bytesRes)) return bytesRes; + const kvRes = decodeKeyValue(bytesRes.value); + if (Result.isError(kvRes)) return kvRes; + if (kvRes.value) link.attributes[kvRes.value.key] = kvRes.value.value; + } else if (field === 5 && wire === 0) { + const res = reader.readVarint(); + if (Result.isError(res)) return res; + link.droppedAttributesCount = Number(res.value); + } else { + const skipRes = reader.skip(wire); + if (Result.isError(skipRes)) return skipRes; + } + } + return Result.ok(link); +} + +function decodeSpan(bytes: Uint8Array): Result, { message: string }> { + const reader = new ProtoReader(bytes); + const span: Omit = { + traceId: "", + spanId: "", + parentSpanId: null, + traceState: null, + traceFlags: null, + name: "", + kind: 0, + startUnixNano: null, + endUnixNano: null, + status: { code: 0, message: null }, + attributes: {}, + events: [], + links: [], + droppedAttributesCount: 0, + droppedEventsCount: 0, + droppedLinksCount: 0, + }; + while (!reader.eof()) { + const tagRes = reader.readTag(); + if (Result.isError(tagRes)) return tagRes; + const { field, wire } = tagRes.value; + if (field === 1 && wire === 2) { + const res = reader.readBytes(); + if (Result.isError(res)) return res; + span.traceId = hexFromBytes(res.value); + } else if (field === 2 && wire === 2) { + const res = reader.readBytes(); + if (Result.isError(res)) return res; + span.spanId = hexFromBytes(res.value); + } else if (field === 3 && wire === 2) { + const res = reader.readString(); + if (Result.isError(res)) return res; + span.traceState = res.value; + } else if (field === 4 && wire === 2) { + const res = reader.readBytes(); + if (Result.isError(res)) return res; + span.parentSpanId = res.value.byteLength === 0 ? null : hexFromBytes(res.value); + } else if (field === 5 && wire === 2) { + const res = reader.readString(); + if (Result.isError(res)) return res; + span.name = res.value; + } else if (field === 6 && wire === 0) { + const res = reader.readVarint(); + if (Result.isError(res)) return res; + span.kind = Number(res.value); + } else if ((field === 7 || field === 8) && (wire === 1 || wire === 0)) { + const res = wire === 1 ? reader.readFixed64() : reader.readVarint(); + if (Result.isError(res)) return res; + if (field === 7) span.startUnixNano = res.value.toString(); + else span.endUnixNano = res.value.toString(); + } else if (field === 9 && wire === 2) { + const bytesRes = reader.readBytes(); + if (Result.isError(bytesRes)) return bytesRes; + const kvRes = decodeKeyValue(bytesRes.value); + if (Result.isError(kvRes)) return kvRes; + if (kvRes.value) span.attributes[kvRes.value.key] = kvRes.value.value; + } else if (field === 10 && wire === 0) { + const res = reader.readVarint(); + if (Result.isError(res)) return res; + span.droppedAttributesCount = Number(res.value); + } else if (field === 11 && wire === 2) { + const bytesRes = reader.readBytes(); + if (Result.isError(bytesRes)) return bytesRes; + const eventRes = decodeEvent(bytesRes.value); + if (Result.isError(eventRes)) return eventRes; + span.events.push(eventRes.value); + } else if (field === 12 && wire === 0) { + const res = reader.readVarint(); + if (Result.isError(res)) return res; + span.droppedEventsCount = Number(res.value); + } else if (field === 13 && wire === 2) { + const bytesRes = reader.readBytes(); + if (Result.isError(bytesRes)) return bytesRes; + const linkRes = decodeLink(bytesRes.value); + if (Result.isError(linkRes)) return linkRes; + span.links.push(linkRes.value); + } else if (field === 14 && wire === 0) { + const res = reader.readVarint(); + if (Result.isError(res)) return res; + span.droppedLinksCount = Number(res.value); + } else if (field === 15 && wire === 2) { + const bytesRes = reader.readBytes(); + if (Result.isError(bytesRes)) return bytesRes; + const statusRes = decodeStatus(bytesRes.value); + if (Result.isError(statusRes)) return statusRes; + span.status = statusRes.value; + } else if (field === 16 && (wire === 5 || wire === 0)) { + if (wire === 5) { + const res = reader.readFixed32(); + if (Result.isError(res)) return res; + span.traceFlags = res.value; + } else { + const res = reader.readVarint(); + if (Result.isError(res)) return res; + span.traceFlags = Number(res.value); + } + } else { + const skipRes = reader.skip(wire); + if (Result.isError(skipRes)) return skipRes; + } + } + return Result.ok(span); +} + +function decodeScopeSpans(bytes: Uint8Array): Result { + const reader = new ProtoReader(bytes); + const out: ScopeSpansDecoded = { + scope: { name: null, version: null, schemaUrl: null, attributes: {} }, + spans: [], + }; + while (!reader.eof()) { + const tagRes = reader.readTag(); + if (Result.isError(tagRes)) return tagRes; + const { field, wire } = tagRes.value; + if ((field === 1 || field === 1000) && wire === 2) { + const bytesRes = reader.readBytes(); + if (Result.isError(bytesRes)) return bytesRes; + const scopeRes = decodeScope(bytesRes.value); + if (Result.isError(scopeRes)) return scopeRes; + out.scope = { ...out.scope, ...scopeRes.value }; + } else if (field === 2 && wire === 2) { + const bytesRes = reader.readBytes(); + if (Result.isError(bytesRes)) return bytesRes; + const spanRes = decodeSpan(bytesRes.value); + if (Result.isError(spanRes)) return spanRes; + out.spans.push(spanRes.value); + } else if (field === 3 && wire === 2) { + const res = reader.readString(); + if (Result.isError(res)) return res; + out.scope.schemaUrl = res.value; + } else { + const skipRes = reader.skip(wire); + if (Result.isError(skipRes)) return skipRes; + } + } + return Result.ok(out); +} + +function decodeResourceSpans(bytes: Uint8Array): Result { + const reader = new ProtoReader(bytes); + const out: ResourceSpansDecoded = { resourceAttributes: {}, resourceSchemaUrl: null, scopeSpans: [] }; + while (!reader.eof()) { + const tagRes = reader.readTag(); + if (Result.isError(tagRes)) return tagRes; + const { field, wire } = tagRes.value; + if (field === 1 && wire === 2) { + const bytesRes = reader.readBytes(); + if (Result.isError(bytesRes)) return bytesRes; + const resourceRes = decodeResource(bytesRes.value); + if (Result.isError(resourceRes)) return resourceRes; + out.resourceAttributes = resourceRes.value; + } else if ((field === 2 || field === 1000) && wire === 2) { + const bytesRes = reader.readBytes(); + if (Result.isError(bytesRes)) return bytesRes; + const scopeRes = decodeScopeSpans(bytesRes.value); + if (Result.isError(scopeRes)) return scopeRes; + out.scopeSpans.push(scopeRes.value); + } else if (field === 3 && wire === 2) { + const res = reader.readString(); + if (Result.isError(res)) return res; + out.resourceSchemaUrl = res.value; + } else { + const skipRes = reader.skip(wire); + if (Result.isError(skipRes)) return skipRes; + } + } + return Result.ok(out); +} + +function decodeProtobufExportResult(body: Uint8Array): Result { + const reader = new ProtoReader(body); + const out: DecodedOtelSpan[] = []; + while (!reader.eof()) { + const tagRes = reader.readTag(); + if (Result.isError(tagRes)) return tagRes; + if (tagRes.value.field === 1 && tagRes.value.wire === 2) { + const bytesRes = reader.readBytes(); + if (Result.isError(bytesRes)) return bytesRes; + const resourceSpansRes = decodeResourceSpans(bytesRes.value); + if (Result.isError(resourceSpansRes)) return resourceSpansRes; + for (const scopeSpans of resourceSpansRes.value.scopeSpans) { + for (const span of scopeSpans.spans) { + out.push({ + ...span, + resourceAttributes: resourceSpansRes.value.resourceAttributes, + resourceSchemaUrl: resourceSpansRes.value.resourceSchemaUrl, + instrumentationScope: scopeSpans.scope, + }); + } + } + } else { + const skipRes = reader.skip(tagRes.value.wire); + if (Result.isError(skipRes)) return skipRes; + } + } + return Result.ok(out); +} + +function decodeBody(args: { + contentType: string; + contentEncoding: string | null; + body: Uint8Array; + maxDecodedBytes: number; +}): Result<{ spans: DecodedOtelSpan[]; responseEncoding: "protobuf" | "json" }, OtlpTraceExportError> { + let body = args.body; + const encoding = args.contentEncoding?.trim().toLowerCase() ?? ""; + if (encoding !== "" && encoding !== "identity" && encoding !== "gzip") { + return Result.err({ status: 415, message: "unsupported content-encoding" }); + } + if (encoding === "gzip") { + try { + body = new Uint8Array(gunzipSync(body)); + } catch { + return Result.err({ status: 400, message: "invalid gzip body" }); + } + } + if (body.byteLength > args.maxDecodedBytes) { + return Result.err({ status: 400, message: `decoded OTLP body too large (max ${args.maxDecodedBytes})` }); + } + + const contentType = baseContentType(args.contentType); + if (contentType === JSON_CONTENT_TYPE) { + const spansRes = decodeJsonExportResult(body); + if (Result.isError(spansRes)) return Result.err({ status: 400, message: spansRes.error.message }); + return Result.ok({ spans: spansRes.value, responseEncoding: "json" }); + } + if (contentType === PROTOBUF_CONTENT_TYPE) { + const spansRes = decodeProtobufExportResult(body); + if (Result.isError(spansRes)) return Result.err({ status: 400, message: spansRes.error.message }); + return Result.ok({ spans: spansRes.value, responseEncoding: "protobuf" }); + } + return Result.err({ status: 415, message: "OTLP traces require application/x-protobuf or application/json" }); +} + +export function decodeOtlpTraceExportRequestResult(args: { + stream: string; + profile: OtelTracesStreamProfile; + contentType: string; + contentEncoding: string | null; + body: Uint8Array; + maxDecodedBytes: number; +}): Result { + const decodedRes = decodeBody(args); + if (Result.isError(decodedRes)) return decodedRes; + const records: OtlpTraceExportResult["records"] = []; + const warnings: string[] = []; + let rejectedSpans = 0; + for (const span of decodedRes.value.spans) { + const normalizedRes = normalizeOtelDecodedSpanResult(args.profile, span); + if (Result.isError(normalizedRes)) { + rejectedSpans += 1; + if (warnings.length < 8) warnings.push(normalizedRes.error.message); + continue; + } + records.push({ + value: normalizedRes.value, + routingKey: normalizedRes.value.traceId, + }); + } + return Result.ok({ + records, + acceptedSpans: records.length, + rejectedSpans, + warnings, + responseEncoding: decodedRes.value.responseEncoding, + }); +} + +function writeVarint(out: number[], value: bigint): void { + let n = value; + while (n >= 0x80n) { + out.push(Number((n & 0x7fn) | 0x80n)); + n >>= 7n; + } + out.push(Number(n)); +} + +function writeTag(out: number[], field: number, wire: number): void { + writeVarint(out, BigInt((field << 3) | wire)); +} + +function writeString(out: number[], field: number, value: string): void { + const bytes = new TextEncoder().encode(value); + writeTag(out, field, 2); + writeVarint(out, BigInt(bytes.byteLength)); + out.push(...bytes); +} + +function writeInt64(out: number[], field: number, value: bigint): void { + writeTag(out, field, 0); + writeVarint(out, value); +} + +function writeMessage(out: number[], field: number, body: number[]): void { + writeTag(out, field, 2); + writeVarint(out, BigInt(body.length)); + out.push(...body); +} + +export function encodeOtlpTraceExportResponse(result: Pick): { + contentType: string; + body: Uint8Array | string; +} { + const message = + result.rejectedSpans > 0 + ? `${result.rejectedSpans} spans rejected${result.warnings.length > 0 ? `: ${result.warnings.join("; ")}` : ""}` + : ""; + if (result.responseEncoding === "json") { + if (result.rejectedSpans === 0) return { contentType: "application/json; charset=utf-8", body: "{}" }; + return { + contentType: "application/json; charset=utf-8", + body: JSON.stringify({ + partialSuccess: { + rejectedSpans: result.rejectedSpans, + errorMessage: message, + }, + }), + }; + } + if (result.rejectedSpans === 0) return { contentType: PROTOBUF_CONTENT_TYPE, body: new Uint8Array() }; + const partial: number[] = []; + writeInt64(partial, 1, BigInt(result.rejectedSpans)); + writeString(partial, 2, message); + const response: number[] = []; + writeMessage(response, 1, partial); + return { contentType: PROTOBUF_CONTENT_TYPE, body: new Uint8Array(response) }; +} diff --git a/src/profiles/otelTraces/schema.ts b/src/profiles/otelTraces/schema.ts new file mode 100644 index 0000000..b9bce34 --- /dev/null +++ b/src/profiles/otelTraces/schema.ts @@ -0,0 +1,405 @@ +import { + SCHEMA_REGISTRY_API_VERSION, + type SchemaRegistry, + type SearchConfig, + type SearchFieldConfig, +} from "../../schema/registry"; + +const NULLABLE_STRING = { type: ["string", "null"] } as const; +const NULLABLE_NUMBER = { type: ["number", "null"] } as const; +const ATTRIBUTES_SCHEMA = { type: "object", additionalProperties: true } as const; + +export const OTEL_TRACES_CANONICAL_SCHEMA = { + type: "object", + additionalProperties: false, + properties: { + schemaVersion: { type: "integer", enum: [1] }, + signal: { type: "string", enum: ["trace.span"] }, + timestamp: { type: "string" }, + endTimestamp: NULLABLE_STRING, + startUnixNano: NULLABLE_STRING, + endUnixNano: NULLABLE_STRING, + duration: NULLABLE_NUMBER, + traceId: { type: "string" }, + spanId: { type: "string" }, + parentSpanId: NULLABLE_STRING, + traceState: NULLABLE_STRING, + traceFlags: { + type: "object", + additionalProperties: false, + properties: { + sampled: { type: "boolean" }, + raw: { type: ["integer", "null"] }, + }, + required: ["sampled", "raw"], + }, + name: { type: "string" }, + kind: { type: "string", enum: ["unspecified", "internal", "server", "client", "producer", "consumer"] }, + status: { + type: "object", + additionalProperties: false, + properties: { + code: { type: "string", enum: ["unset", "ok", "error"] }, + message: NULLABLE_STRING, + }, + required: ["code", "message"], + }, + service: NULLABLE_STRING, + serviceNamespace: NULLABLE_STRING, + serviceInstanceId: NULLABLE_STRING, + environment: NULLABLE_STRING, + version: NULLABLE_STRING, + region: NULLABLE_STRING, + requestId: NULLABLE_STRING, + http: { + type: "object", + additionalProperties: false, + properties: { + method: NULLABLE_STRING, + route: NULLABLE_STRING, + path: NULLABLE_STRING, + target: NULLABLE_STRING, + url: NULLABLE_STRING, + statusCode: { type: ["integer", "null"] }, + userAgent: NULLABLE_STRING, + }, + required: ["method", "route", "path", "target", "url", "statusCode", "userAgent"], + }, + db: { + type: "object", + additionalProperties: false, + properties: { + system: NULLABLE_STRING, + name: NULLABLE_STRING, + operation: NULLABLE_STRING, + statement: NULLABLE_STRING, + }, + required: ["system", "name", "operation", "statement"], + }, + rpc: { + type: "object", + additionalProperties: false, + properties: { + system: NULLABLE_STRING, + service: NULLABLE_STRING, + method: NULLABLE_STRING, + }, + required: ["system", "service", "method"], + }, + messaging: { + type: "object", + additionalProperties: false, + properties: { + system: NULLABLE_STRING, + destination: NULLABLE_STRING, + operation: NULLABLE_STRING, + }, + required: ["system", "destination", "operation"], + }, + error: { + type: "object", + additionalProperties: false, + properties: { + isError: { type: "boolean" }, + type: NULLABLE_STRING, + message: NULLABLE_STRING, + stacktrace: NULLABLE_STRING, + }, + required: ["isError", "type", "message", "stacktrace"], + }, + instrumentationScope: { + type: "object", + additionalProperties: false, + properties: { + name: NULLABLE_STRING, + version: NULLABLE_STRING, + schemaUrl: NULLABLE_STRING, + attributes: ATTRIBUTES_SCHEMA, + }, + required: ["name", "version", "schemaUrl", "attributes"], + }, + resource: { + type: "object", + additionalProperties: false, + properties: { + schemaUrl: NULLABLE_STRING, + attributes: ATTRIBUTES_SCHEMA, + }, + required: ["schemaUrl", "attributes"], + }, + attributes: ATTRIBUTES_SCHEMA, + events: { + type: "array", + items: { + type: "object", + additionalProperties: false, + properties: { + timestamp: NULLABLE_STRING, + timeUnixNano: NULLABLE_STRING, + name: { type: "string" }, + attributes: ATTRIBUTES_SCHEMA, + droppedAttributesCount: { type: "integer" }, + }, + required: ["timestamp", "timeUnixNano", "name", "attributes"], + }, + }, + eventNames: { + type: "array", + items: { type: "string" }, + }, + links: { + type: "array", + items: { + type: "object", + additionalProperties: false, + properties: { + traceId: { type: "string" }, + spanId: { type: "string" }, + traceState: NULLABLE_STRING, + attributes: ATTRIBUTES_SCHEMA, + droppedAttributesCount: { type: "integer" }, + }, + required: ["traceId", "spanId", "traceState", "attributes"], + }, + }, + dropped: { + type: "object", + additionalProperties: false, + properties: { + attributes: { type: "integer" }, + events: { type: "integer" }, + links: { type: "integer" }, + }, + required: ["attributes", "events", "links"], + }, + redaction: { + type: "object", + additionalProperties: false, + properties: { + keys: { + type: "array", + items: { type: "string" }, + }, + }, + required: ["keys"], + }, + identity: { + type: "object", + additionalProperties: false, + properties: { + spanKey: { type: "string" }, + dedupeKey: { type: "string" }, + }, + required: ["spanKey", "dedupeKey"], + }, + }, + required: [ + "schemaVersion", + "signal", + "timestamp", + "endTimestamp", + "startUnixNano", + "endUnixNano", + "duration", + "traceId", + "spanId", + "parentSpanId", + "traceState", + "traceFlags", + "name", + "kind", + "status", + "service", + "serviceNamespace", + "serviceInstanceId", + "environment", + "version", + "region", + "requestId", + "http", + "db", + "rpc", + "messaging", + "error", + "instrumentationScope", + "resource", + "attributes", + "events", + "eventNames", + "links", + "dropped", + "redaction", + "identity", + ], +} as const; + +const exactKeyword = (jsonPointer: string, aggregatable = false): SearchFieldConfig => { + const field: SearchFieldConfig = { + kind: "keyword", + bindings: [{ version: 1, jsonPointer }], + exact: true, + prefix: true, + exists: true, + sortable: true, + }; + if (aggregatable) field.aggregatable = true; + return field; +}; + +const lowercaseKeyword = (jsonPointer: string, aggregatable = false): SearchFieldConfig => ({ + ...exactKeyword(jsonPointer, aggregatable), + normalizer: "lowercase_v1", +}); + +const textField = (jsonPointer: string): SearchFieldConfig => ({ + kind: "text", + bindings: [{ version: 1, jsonPointer }], + analyzer: "unicode_word_v1", + exists: true, + positions: true, +}); + +export const OTEL_TRACES_DEFAULT_SEARCH_CONFIG: SearchConfig = { + profile: "otel-traces", + primaryTimestampField: "timestamp", + aliases: { + db: "db.system", + duration_ms: "duration", + error: "error.isError", + method: "http.method", + op: "name", + parent: "parentSpanId", + req: "requestId", + route: "http.route", + span: "spanId", + status: "http.statusCode", + svc: "service", + time: "timestamp", + trace: "traceId", + ts: "timestamp", + }, + defaultFields: [ + { field: "name", boost: 2 }, + { field: "error.message", boost: 2 }, + { field: "status.message", boost: 1.5 }, + { field: "events.name", boost: 1.2 }, + { field: "db.statement", boost: 0.5 }, + ], + fields: { + timestamp: { + kind: "date", + bindings: [{ version: 1, jsonPointer: "/timestamp" }], + exact: true, + column: true, + exists: true, + sortable: true, + aggregatable: true, + }, + endTimestamp: { + kind: "date", + bindings: [{ version: 1, jsonPointer: "/endTimestamp" }], + exact: true, + column: true, + exists: true, + sortable: true, + aggregatable: true, + }, + duration: { + kind: "float", + bindings: [{ version: 1, jsonPointer: "/duration" }], + exact: true, + column: true, + exists: true, + sortable: true, + aggregatable: true, + }, + traceId: exactKeyword("/traceId"), + spanId: exactKeyword("/spanId"), + parentSpanId: exactKeyword("/parentSpanId"), + requestId: exactKeyword("/requestId"), + service: lowercaseKeyword("/service", true), + serviceNamespace: lowercaseKeyword("/serviceNamespace", true), + serviceInstanceId: exactKeyword("/serviceInstanceId"), + environment: lowercaseKeyword("/environment", true), + version: exactKeyword("/version"), + region: lowercaseKeyword("/region", true), + name: { + kind: "keyword", + bindings: [{ version: 1, jsonPointer: "/name" }], + exact: true, + prefix: true, + exists: true, + sortable: true, + aggregatable: true, + }, + kind: lowercaseKeyword("/kind", true), + "status.code": lowercaseKeyword("/status/code", true), + "status.message": textField("/status/message"), + "error.isError": { + kind: "bool", + bindings: [{ version: 1, jsonPointer: "/error/isError" }], + exact: true, + column: true, + exists: true, + sortable: true, + aggregatable: true, + }, + "error.type": exactKeyword("/error/type", true), + "error.message": textField("/error/message"), + "error.stacktrace": textField("/error/stacktrace"), + "http.method": lowercaseKeyword("/http/method", true), + "http.route": exactKeyword("/http/route", true), + "http.path": exactKeyword("/http/path"), + "http.statusCode": { + kind: "integer", + bindings: [{ version: 1, jsonPointer: "/http/statusCode" }], + exact: true, + column: true, + exists: true, + sortable: true, + aggregatable: true, + }, + "db.system": lowercaseKeyword("/db/system", true), + "db.operation": lowercaseKeyword("/db/operation", true), + "db.statement": textField("/db/statement"), + "rpc.system": lowercaseKeyword("/rpc/system", true), + "rpc.service": exactKeyword("/rpc/service", true), + "rpc.method": exactKeyword("/rpc/method", true), + "messaging.system": lowercaseKeyword("/messaging/system", true), + "messaging.destination": exactKeyword("/messaging/destination", true), + "messaging.operation": lowercaseKeyword("/messaging/operation", true), + "events.name": textField("/eventNames"), + }, + rollups: { + spans: { + dimensions: ["service", "kind", "status.code"], + intervals: ["1m", "5m", "1h"], + measures: { + spans: { kind: "count" }, + latency: { kind: "summary", field: "duration", histogram: "log2_v1" }, + }, + }, + http_server: { + dimensions: ["service", "http.method", "http.route", "http.statusCode"], + intervals: ["1m", "5m", "1h"], + measures: { + requests: { kind: "count" }, + latency: { kind: "summary", field: "duration", histogram: "log2_v1" }, + }, + }, + }, +}; + +export function buildOtelTracesDefaultRegistry(stream: string): SchemaRegistry { + return { + apiVersion: SCHEMA_REGISTRY_API_VERSION, + schema: stream, + currentVersion: 1, + search: structuredClone(OTEL_TRACES_DEFAULT_SEARCH_CONFIG), + boundaries: [{ offset: 0, version: 1 }], + schemas: { + "1": structuredClone(OTEL_TRACES_CANONICAL_SCHEMA), + }, + lenses: {}, + }; +} diff --git a/src/profiles/profile.ts b/src/profiles/profile.ts index 1d762b6..0b1c108 100644 --- a/src/profiles/profile.ts +++ b/src/profiles/profile.ts @@ -72,6 +72,42 @@ export type PreparedJsonRecord = { routingKey: string | null; }; +export type OtlpTraceExportResponseEncoding = "protobuf" | "json"; + +export type OtlpTraceExportResult = { + records: PreparedJsonRecord[]; + acceptedSpans: number; + rejectedSpans: number; + warnings: string[]; + responseEncoding: OtlpTraceExportResponseEncoding; +}; + +export type OtlpTraceExportError = { + message: string; + status?: 400 | 415; +}; + +export type UnifiedTimelineItem = { + kind: "evlog.event" | "otel.span.start" | "otel.span.end" | "otel.span.event" | "otel.exception"; + time: string; + duration?: number | null; + service?: string | null; + title: string; + severity: "debug" | "info" | "warn" | "error"; + ids: { + requestId?: string | null; + traceId?: string | null; + spanId?: string | null; + parentSpanId?: string | null; + }; + source: { + stream: string; + offset?: string; + profile: string; + }; + data: unknown; +}; + export type MetricsCompanionRecord = { metric: string; unit: string; @@ -128,6 +164,21 @@ export interface StreamProfileJsonIngestCapability { prepareRecordResult(args: { stream: string; profile: StreamProfileSpec; value: unknown }): Result; } +export interface StreamProfileOtlpTracesCapability { + decodeExportRequestResult(args: { + stream: string; + profile: StreamProfileSpec; + contentType: string; + contentEncoding: string | null; + body: Uint8Array; + maxDecodedBytes: number; + }): Result; +} + +export interface StreamProfileCorrelationCapability { + toTimelineItems(args: { stream: string; offset?: string; record: unknown }): UnifiedTimelineItem[]; +} + export interface StreamProfileMetricsCapability { normalizeRecordResult(args: { stream: string; @@ -145,6 +196,8 @@ export interface StreamProfileDefinition { persistProfileResult(args: PersistProfileArgs): Result; touch?: StreamTouchCapability; jsonIngest?: StreamProfileJsonIngestCapability; + otlpTraces?: StreamProfileOtlpTracesCapability; + correlation?: StreamProfileCorrelationCapability; metrics?: StreamProfileMetricsCapability; } diff --git a/test/profile_otel_traces.test.ts b/test/profile_otel_traces.test.ts new file mode 100644 index 0000000..554315b --- /dev/null +++ b/test/profile_otel_traces.test.ts @@ -0,0 +1,461 @@ +import { describe, expect, test } from "bun:test"; +import { mkdtempSync, rmSync } from "node:fs"; +import { tmpdir } from "node:os"; +import { join } from "node:path"; +import { gzipSync } from "node:zlib"; +import { createProfileTestApp, fetchJsonApp } from "./profile_test_utils"; + +const TRACE_ID = "5b8efff798038103d269b633813fc60c"; +const SPAN_ID = "086e83747d0e381e"; +const CHILD_SPAN_ID = "186e83747d0e381f"; + +async function createOtelTraceStream(app: ReturnType["app"], stream: string, profile: Record = {}) { + await app.fetch( + new Request(`http://local/v1/stream/${stream}`, { + method: "PUT", + headers: { "content-type": "application/json" }, + }) + ); + return fetchJsonApp(app, `http://local/v1/stream/${stream}/_profile`, { + method: "POST", + headers: { "content-type": "application/json" }, + body: JSON.stringify({ + apiVersion: "durable.streams/profile/v1", + profile: { + kind: "otel-traces", + ...profile, + }, + }), + }); +} + +function otlpJsonSpan(overrides: Record = {}) { + return { + traceId: TRACE_ID, + spanId: SPAN_ID, + name: "GET /checkout", + kind: 2, + startTimeUnixNano: "1772020800000000000", + endTimeUnixNano: "1772020800123000000", + attributes: [ + { key: "request.id", value: { stringValue: "req_otel_1" } }, + { key: "http.request.method", value: { stringValue: "GET" } }, + { key: "http.route", value: { stringValue: "/checkout" } }, + { key: "http.response.status_code", value: { intValue: "500" } }, + { key: "authorization", value: { stringValue: "Bearer secret" } }, + { key: "db.system", value: { stringValue: "postgresql" } }, + { key: "db.statement", value: { stringValue: "SELECT * FROM users WHERE email = 'a@example.com'" } }, + ], + events: [ + { + timeUnixNano: "1772020800100000000", + name: "exception", + attributes: [ + { key: "exception.type", value: { stringValue: "Error" } }, + { key: "exception.message", value: { stringValue: "checkout failed" } }, + { key: "token", value: { stringValue: "secret-token" } }, + ], + }, + ], + status: { code: 2, message: "failed" }, + ...overrides, + }; +} + +function otlpJsonRequest(spans = [otlpJsonSpan()]) { + return { + resourceSpans: [ + { + resource: { + attributes: [ + { key: "service.name", value: { stringValue: "checkout" } }, + { key: "deployment.environment.name", value: { stringValue: "prod" } }, + { key: "service.version", value: { stringValue: "1.2.3" } }, + ], + }, + scopeSpans: [ + { + scope: { name: "test-sdk", version: "1.0.0" }, + spans, + }, + ], + }, + ], + }; +} + +function writeVarint(out: number[], value: bigint): void { + let n = value; + while (n >= 0x80n) { + out.push(Number((n & 0x7fn) | 0x80n)); + n >>= 7n; + } + out.push(Number(n)); +} + +function writeTag(out: number[], field: number, wire: number): void { + writeVarint(out, BigInt((field << 3) | wire)); +} + +function writeString(out: number[], field: number, value: string): void { + const bytes = new TextEncoder().encode(value); + writeTag(out, field, 2); + writeVarint(out, BigInt(bytes.byteLength)); + out.push(...bytes); +} + +function writeBytes(out: number[], field: number, bytes: Uint8Array): void { + writeTag(out, field, 2); + writeVarint(out, BigInt(bytes.byteLength)); + out.push(...bytes); +} + +function writeMessage(out: number[], field: number, body: number[]): void { + writeBytes(out, field, new Uint8Array(body)); +} + +function writeFixed64(out: number[], field: number, value: bigint): void { + writeTag(out, field, 1); + const bytes = new Uint8Array(8); + new DataView(bytes.buffer).setBigUint64(0, value, true); + out.push(...bytes); +} + +function hexBytes(hex: string): Uint8Array { + const bytes = new Uint8Array(hex.length / 2); + for (let i = 0; i < bytes.length; i++) bytes[i] = Number.parseInt(hex.slice(i * 2, i * 2 + 2), 16); + return bytes; +} + +function anyString(value: string): number[] { + const out: number[] = []; + writeString(out, 1, value); + return out; +} + +function kvString(key: string, value: string): number[] { + const out: number[] = []; + writeString(out, 1, key); + writeMessage(out, 2, anyString(value)); + return out; +} + +function statusMessage(code: number, message: string): number[] { + const out: number[] = []; + writeString(out, 2, message); + writeTag(out, 3, 0); + writeVarint(out, BigInt(code)); + return out; +} + +function makeOtlpProtoRequest(): Uint8Array { + const span: number[] = []; + writeBytes(span, 1, hexBytes(TRACE_ID)); + writeBytes(span, 2, hexBytes(CHILD_SPAN_ID)); + writeBytes(span, 4, hexBytes(SPAN_ID)); + writeString(span, 5, "SELECT cart"); + writeTag(span, 6, 0); + writeVarint(span, 1n); + writeFixed64(span, 7, 1772020800010000000n); + writeFixed64(span, 8, 1772020800018000000n); + writeMessage(span, 9, kvString("request.id", "req_proto_1")); + writeMessage(span, 9, kvString("db.system", "postgresql")); + writeMessage(span, 9, kvString("db.operation", "SELECT")); + writeMessage(span, 15, statusMessage(1, "ok")); + + const scope: number[] = []; + writeString(scope, 1, "proto-test"); + + const scopeSpans: number[] = []; + writeMessage(scopeSpans, 1, scope); + writeMessage(scopeSpans, 2, span); + + const resource: number[] = []; + writeMessage(resource, 1, kvString("service.name", "checkout")); + writeMessage(resource, 1, kvString("deployment.environment.name", "prod")); + + const resourceSpans: number[] = []; + writeMessage(resourceSpans, 1, resource); + writeMessage(resourceSpans, 2, scopeSpans); + + const request: number[] = []; + writeMessage(request, 1, resourceSpans); + return new Uint8Array(request); +} + +describe("otel-traces profile", () => { + test("installs on json streams and exposes canonical schema/search defaults", async () => { + const root = mkdtempSync(join(tmpdir(), "ds-profile-otel-install-")); + const { app } = createProfileTestApp(root); + try { + const res = await createOtelTraceStream(app, "otel-install", { + redactKeys: ["sessionToken"], + requestIdAttributes: ["request.id", "x-request-id"], + attributeLimits: { maxAttributesPerSpan: 32 }, + store: { rawLinks: false }, + dbStatementMode: "raw", + }); + expect(res.status).toBe(200); + expect(res.body?.profile).toEqual({ + kind: "otel-traces", + redactKeys: ["sessiontoken"], + requestIdAttributes: ["request.id", "x-request-id"], + attributeLimits: { maxAttributesPerSpan: 32 }, + store: { rawLinks: false }, + dbStatementMode: "raw", + }); + + const schemaRes = await fetchJsonApp(app, "http://local/v1/stream/otel-install/_schema", { method: "GET" }); + expect(schemaRes.status).toBe(200); + expect(schemaRes.body?.search?.profile).toBe("otel-traces"); + expect(schemaRes.body?.search?.aliases?.trace).toBe("traceId"); + expect(schemaRes.body?.search?.aliases?.status).toBe("http.statusCode"); + expect(schemaRes.body?.search?.fields?.traceId?.kind).toBe("keyword"); + expect(schemaRes.body?.search?.fields?.duration?.kind).toBe("float"); + expect(schemaRes.body?.search?.fields?.["events.name"]?.bindings?.[0]?.jsonPointer).toBe("/eventNames"); + expect(schemaRes.body?.search?.rollups?.spans?.measures?.latency?.field).toBe("duration"); + } finally { + app.close(); + rmSync(root, { recursive: true, force: true }); + } + }); + + test("rejects non-json streams, invalid config, and late profile install", async () => { + const root = mkdtempSync(join(tmpdir(), "ds-profile-otel-validate-")); + const { app } = createProfileTestApp(root); + try { + await app.fetch(new Request("http://local/v1/stream/otel-non-json", { method: "PUT", headers: { "content-type": "text/plain" } })); + const nonJsonRes = await fetchJsonApp(app, "http://local/v1/stream/otel-non-json/_profile", { + method: "POST", + headers: { "content-type": "application/json" }, + body: JSON.stringify({ profile: { kind: "otel-traces" } }), + }); + expect(nonJsonRes.status).toBe(400); + expect(nonJsonRes.body?.error?.message).toContain("application/json"); + + await app.fetch(new Request("http://local/v1/stream/otel-invalid", { method: "PUT", headers: { "content-type": "application/json" } })); + const invalidRes = await fetchJsonApp(app, "http://local/v1/stream/otel-invalid/_profile", { + method: "POST", + headers: { "content-type": "application/json" }, + body: JSON.stringify({ profile: { kind: "otel-traces", dbStatementMode: "redact_literals" } }), + }); + expect(invalidRes.status).toBe(400); + expect(invalidRes.body?.error?.message).toContain("dbStatementMode"); + + await app.fetch(new Request("http://local/v1/stream/otel-late", { method: "PUT", headers: { "content-type": "application/json" } })); + await app.fetch( + new Request("http://local/v1/stream/otel-late", { + method: "POST", + headers: { "content-type": "application/json" }, + body: JSON.stringify({ ok: true }), + }) + ); + const lateRes = await fetchJsonApp(app, "http://local/v1/stream/otel-late/_profile", { + method: "POST", + headers: { "content-type": "application/json" }, + body: JSON.stringify({ profile: { kind: "otel-traces" } }), + }); + expect(lateRes.status).toBe(400); + expect(lateRes.body?.error?.message).toContain("before appending data"); + } finally { + app.close(); + rmSync(root, { recursive: true, force: true }); + } + }); + + test("normalizes canonical JSON appends, redacts attributes, and supports search aliases", async () => { + const root = mkdtempSync(join(tmpdir(), "ds-profile-otel-json-")); + const { app } = createProfileTestApp(root, { searchWalOverlayQuietPeriodMs: 0 }); + try { + await createOtelTraceStream(app, "otel-json", { dbStatementMode: "raw" }); + const appendRes = await app.fetch( + new Request("http://local/v1/stream/otel-json", { + method: "POST", + headers: { "content-type": "application/json" }, + body: JSON.stringify({ + traceId: TRACE_ID, + spanId: SPAN_ID, + parentSpanId: null, + name: "GET /checkout", + kind: "server", + startUnixNano: "1772020800000000000", + endUnixNano: "1772020800123000000", + status: { code: "error", message: "failed" }, + resource: { + attributes: { + "service.name": "checkout", + "deployment.environment.name": "prod", + }, + }, + attributes: { + "request.id": "req_json_1", + "http.request.method": "GET", + "http.route": "/checkout", + "http.response.status_code": 500, + authorization: "Bearer secret", + "db.system": "postgresql", + "db.statement": "SELECT 1", + }, + events: [ + { + timeUnixNano: "1772020800100000000", + name: "exception", + attributes: { + "exception.type": "Error", + "exception.message": "checkout failed", + token: "secret-token", + }, + }, + ], + }), + }) + ); + expect([200, 204]).toContain(appendRes.status); + + const readRes = await fetchJsonApp(app, "http://local/v1/stream/otel-json?format=json", { method: "GET" }); + expect(readRes.status).toBe(200); + expect(readRes.body).toHaveLength(1); + const span = readRes.body[0]; + expect(span).toMatchObject({ + schemaVersion: 1, + signal: "trace.span", + traceId: TRACE_ID, + spanId: SPAN_ID, + requestId: "req_json_1", + service: "checkout", + environment: "prod", + duration: 123, + http: { method: "GET", route: "/checkout", statusCode: 500 }, + db: { system: "postgresql", statement: "SELECT 1" }, + error: { isError: true, type: "Error", message: "checkout failed" }, + eventNames: ["exception"], + }); + expect(span.attributes.authorization).toBe("[REDACTED]"); + expect(span.events[0].attributes.token).toBe("[REDACTED]"); + expect(span.redaction.keys).toEqual(expect.arrayContaining(["attributes.authorization", "events.0.attributes.token"])); + + const searchRes = await fetchJsonApp(app, "http://local/v1/stream/otel-json/_search", { + method: "POST", + headers: { "content-type": "application/json" }, + body: JSON.stringify({ + q: `trace:${TRACE_ID} req:req_json_1 status:>=500`, + sort: ["timestamp:asc", "spanId:asc"], + }), + }); + expect(searchRes.status).toBe(200); + expect(searchRes.body?.hits).toHaveLength(1); + expect(searchRes.body?.hits?.[0]?.fields).toMatchObject({ + traceId: TRACE_ID, + requestId: "req_json_1", + service: "checkout", + "http.statusCode": 500, + }); + } finally { + app.close(); + rmSync(root, { recursive: true, force: true }); + } + }); + + test("ingests OTLP JSON over the default endpoint with gzip and auto-create", async () => { + const root = mkdtempSync(join(tmpdir(), "ds-profile-otel-otlp-json-")); + const { app } = createProfileTestApp(root, { + otlpTracesStream: "auto-traces", + otlpAutoCreate: true, + searchWalOverlayQuietPeriodMs: 0, + }); + try { + const body = gzipSync(JSON.stringify(otlpJsonRequest())); + const res = await fetchJsonApp(app, "http://local/v1/traces", { + method: "POST", + headers: { + "content-type": "application/json", + "content-encoding": "gzip", + }, + body, + }); + expect(res.status).toBe(200); + expect(res.body).toEqual({}); + expect(app.deps.db.getStream("auto-traces")?.profile).toBe("otel-traces"); + + const readRes = await fetchJsonApp(app, "http://local/v1/stream/auto-traces?format=json", { method: "GET" }); + expect(readRes.status).toBe(200); + expect(readRes.body).toHaveLength(1); + expect(readRes.body[0]).toMatchObject({ + traceId: TRACE_ID, + spanId: SPAN_ID, + requestId: "req_otel_1", + service: "checkout", + http: { method: "GET", route: "/checkout", statusCode: 500 }, + db: { system: "postgresql", statement: null }, + }); + } finally { + app.close(); + rmSync(root, { recursive: true, force: true }); + } + }); + + test("ingests OTLP protobuf on explicit stream endpoint", async () => { + const root = mkdtempSync(join(tmpdir(), "ds-profile-otel-otlp-proto-")); + const { app } = createProfileTestApp(root, { searchWalOverlayQuietPeriodMs: 0 }); + try { + await createOtelTraceStream(app, "proto-traces"); + const res = await app.fetch( + new Request("http://local/v1/stream/proto-traces/_otlp/v1/traces", { + method: "POST", + headers: { "content-type": "application/x-protobuf" }, + body: makeOtlpProtoRequest(), + }) + ); + expect(res.status).toBe(200); + expect(res.headers.get("content-type")).toBe("application/x-protobuf"); + expect((await res.arrayBuffer()).byteLength).toBe(0); + + const readRes = await fetchJsonApp(app, "http://local/v1/stream/proto-traces?format=json", { method: "GET" }); + expect(readRes.status).toBe(200); + expect(readRes.body).toHaveLength(1); + expect(readRes.body[0]).toMatchObject({ + traceId: TRACE_ID, + spanId: CHILD_SPAN_ID, + parentSpanId: SPAN_ID, + requestId: "req_proto_1", + service: "checkout", + db: { system: "postgresql", operation: "SELECT" }, + status: { code: "ok", message: "ok" }, + }); + } finally { + app.close(); + rmSync(root, { recursive: true, force: true }); + } + }); + + test("returns OTLP partial success for rejected spans without dropping valid spans", async () => { + const root = mkdtempSync(join(tmpdir(), "ds-profile-otel-partial-")); + const { app } = createProfileTestApp(root); + try { + await createOtelTraceStream(app, "partial-traces"); + const res = await fetchJsonApp(app, "http://local/v1/stream/partial-traces/_otlp/v1/traces", { + method: "POST", + headers: { "content-type": "application/json" }, + body: JSON.stringify( + otlpJsonRequest([ + otlpJsonSpan(), + otlpJsonSpan({ + traceId: "00000000000000000000000000000000", + spanId: "0000000000000000", + }), + ]) + ), + }); + expect(res.status).toBe(200); + expect(res.body?.partialSuccess?.rejectedSpans).toBe(1); + expect(res.body?.partialSuccess?.errorMessage).toContain("traceId"); + + const readRes = await fetchJsonApp(app, "http://local/v1/stream/partial-traces?format=json", { method: "GET" }); + expect(readRes.body).toHaveLength(1); + expect(readRes.body[0]?.traceId).toBe(TRACE_ID); + } finally { + app.close(); + rmSync(root, { recursive: true, force: true }); + } + }); +}); From d1f37039025b57d1babf535f3d8d2ed8b8139143 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=B8ren=20Bramer=20Schmidt?= Date: Thu, 11 Jun 2026 20:41:11 +0700 Subject: [PATCH 02/12] Add request observability correlation API --- src/app_core.ts | 277 +++++++++++++++++- src/observe/request.ts | 535 +++++++++++++++++++++++++++++++++++ test/observe_request.test.ts | 261 +++++++++++++++++ 3 files changed, 1072 insertions(+), 1 deletion(-) create mode 100644 src/observe/request.ts create mode 100644 test/observe_request.test.ts diff --git a/src/app_core.ts b/src/app_core.ts index 7e74eab..427891e 100644 --- a/src/app_core.ts +++ b/src/app_core.ts @@ -4,7 +4,7 @@ import type { Config } from "./config"; import { SqliteDurableStore, type StreamRow } from "./db/db"; import { IngestQueue, type ProducerInfo, type AppendRow, type AppendResult } from "./ingest"; import type { ObjectStore } from "./objectstore/interface"; -import type { StreamReader, ReadBatch, ReaderError, SearchResultBatch } from "./reader"; +import type { StreamReader, ReadBatch, ReaderError, SearchHit, SearchResultBatch } from "./reader"; import { StreamNotifier } from "./notifier"; import { encodeOffset, parseOffsetResult, offsetToSeqOrNeg1, canonicalizeOffset, type ParsedOffset } from "./offset"; import { parseDurationMsResult } from "./util/duration"; @@ -51,6 +51,7 @@ import { parseAggregateRequestBodyResult } from "./search/aggregate"; import { StreamProfileStore, parseProfileUpdateResult, + resolveCorrelationCapability, resolveOtlpTracesCapability, resolveJsonIngestCapability, resolveTouchCapability, @@ -58,6 +59,17 @@ import { type StreamTouchRoute, } from "./profiles"; import { encodeOtlpTraceExportResponse } from "./profiles/otelTraces/otlp"; +import { + buildObserveSummary, + buildTimeSearchClauses, + buildTraceDetails, + choosePrimaryEvent, + combineSearchClauses, + parseObserveRequestResult, + quoteSearchValue, + sortTimeline, + summarizeSearchCoverage, +} from "./observe/request"; import { dsError } from "./util/ds_error.ts"; import { streamHash16Hex } from "./util/stream_paths"; @@ -229,6 +241,10 @@ function normalizeContentType(value: string | null): string | null { return base ? base : null; } +function isRecord(value: unknown): value is Record { + return !!value && typeof value === "object" && !Array.isArray(value); +} + function isJsonContentType(value: string | null): boolean { return normalizeContentType(value) === "application/json"; } @@ -1763,6 +1779,262 @@ export function createAppCore(cfg: Config, opts: CreateAppCoreOptions): App { } }; + const handleObserveRequest = async (): Promise => { + if (req.method !== "POST") return badRequest("unsupported method"); + let body: unknown; + try { + body = await req.json(); + } catch { + return badRequest("observe request must be valid JSON"); + } + const requestRes = parseObserveRequestResult(body); + if (Result.isError(requestRes)) return badRequest(requestRes.error.message); + const observeReq = requestRes.value; + + const loadCorrelationCapability = (stream: string): ReturnType | Response => { + const srow = db.getStream(stream); + if (!srow || db.isDeleted(srow)) return notFound(); + if (srow.expires_at_ms != null && db.nowMs() > srow.expires_at_ms) return notFound("stream expired"); + const profileRes = profiles.getProfileResult(stream, srow); + if (Result.isError(profileRes)) return internalError("invalid stream profile"); + const capability = resolveCorrelationCapability(profileRes.value); + if (!capability) return badRequest(`stream ${stream} profile does not support observability correlation`); + const regRes = registry.getRegistryResult(stream); + if (Result.isError(regRes)) return internalError(regRes.error.message); + if (!regRes.value.search) return badRequest(`stream ${stream} does not have search configured`); + return capability; + }; + + const eventCorrelation = + observeReq.include.events && observeReq.streams.events ? loadCorrelationCapability(observeReq.streams.events) : null; + if (eventCorrelation instanceof Response) return eventCorrelation; + const traceCorrelation = + observeReq.include.trace && observeReq.streams.traces ? loadCorrelationCapability(observeReq.streams.traces) : null; + if (traceCorrelation instanceof Response) return traceCorrelation; + + const runPagedSearch = async ( + stream: string, + q: string, + limit: number, + sort: string[] + ): Promise<{ hits: SearchHit[]; batches: SearchResultBatch[]; limitReached: boolean } | Response> => { + const regRes = registry.getRegistryResult(stream); + if (Result.isError(regRes)) return internalError(regRes.error.message); + const hits: SearchHit[] = []; + const batches: SearchResultBatch[] = []; + const seenOffsets = new Set(); + let searchAfter: unknown[] | null = null; + let limitReached = false; + while (hits.length < limit) { + const size = Math.min(500, limit - hits.length); + const requestBody: Record = { + q, + size, + sort, + timeout_ms: SEARCH_REQUEST_TIMEOUT_MS, + }; + if (searchAfter) requestBody.search_after = searchAfter; + const parsedRes = parseSearchRequestBodyResult(regRes.value, requestBody); + if (Result.isError(parsedRes)) return badRequest(parsedRes.error.message); + const request = { + ...parsedRes.value, + timeoutMs: clampSearchRequestTimeoutMs(parsedRes.value.timeoutMs), + }; + const searchRes = await runForegroundWithGate(searchGate, () => reader.searchResult({ stream, request })); + if (Result.isError(searchRes)) return readerErrorResponse(searchRes.error); + batches.push(searchRes.value); + for (const hit of searchRes.value.hits) { + if (seenOffsets.has(hit.offset)) continue; + seenOffsets.add(hit.offset); + hits.push(hit); + if (hits.length >= limit) break; + } + if (!searchRes.value.nextSearchAfter || searchRes.value.hits.length === 0) break; + searchAfter = searchRes.value.nextSearchAfter; + if (hits.length >= limit) { + limitReached = true; + break; + } + } + return { hits, batches, limitReached }; + }; + + const timeClauses = buildTimeSearchClauses(observeReq.time); + const lookupClause = (field: "req" | "trace" | "span", value: string) => `${field}:${quoteSearchValue(value)}`; + const eventSort = ["timestamp:desc", "offset:desc"]; + const traceSort = ["timestamp:asc", "spanId:asc"]; + let eventHits: SearchHit[] = []; + let eventBatches: SearchResultBatch[] = []; + let eventLimitReached = false; + let traceHits: SearchHit[] = []; + let traceBatches: SearchResultBatch[] = []; + let traceLimitReached = false; + const candidateTraceIds = new Set(); + const addTraceIdsFromHits = (hits: SearchHit[]) => { + for (const hit of hits) { + if (!isRecord(hit.source)) continue; + const traceId = typeof hit.source.traceId === "string" ? hit.source.traceId : null; + if (traceId) candidateTraceIds.add(traceId); + } + }; + const appendSearch = ( + target: "events" | "traces", + result: { hits: SearchHit[]; batches: SearchResultBatch[]; limitReached: boolean } + ) => { + if (target === "events") { + const seen = new Set(eventHits.map((hit) => hit.offset)); + for (const hit of result.hits) { + if (seen.has(hit.offset)) continue; + seen.add(hit.offset); + eventHits.push(hit); + } + eventBatches.push(...result.batches); + eventLimitReached = eventLimitReached || result.limitReached || eventHits.length >= observeReq.limits.events && !!result.batches.at(-1)?.nextSearchAfter; + } else { + const seen = new Set(traceHits.map((hit) => hit.offset)); + for (const hit of result.hits) { + if (seen.has(hit.offset)) continue; + seen.add(hit.offset); + traceHits.push(hit); + } + traceBatches.push(...result.batches); + traceLimitReached = traceLimitReached || result.limitReached || traceHits.length >= observeReq.limits.spans && !!result.batches.at(-1)?.nextSearchAfter; + } + }; + + const searchEvents = async (field: "req" | "trace" | "span", value: string): Promise => { + if (!observeReq.include.events || !observeReq.streams.events) return null; + if (eventHits.length >= observeReq.limits.events) return null; + const q = combineSearchClauses(lookupClause(field, value), ...timeClauses); + const result = await runPagedSearch(observeReq.streams.events, q, observeReq.limits.events - eventHits.length, eventSort); + if (result instanceof Response) return result; + appendSearch("events", result); + addTraceIdsFromHits(result.hits); + return null; + }; + + const searchTraces = async (field: "req" | "trace" | "span", value: string): Promise => { + if (!observeReq.include.trace || !observeReq.streams.traces) return null; + if (traceHits.length >= observeReq.limits.spans) return null; + const q = combineSearchClauses(lookupClause(field, value), ...timeClauses); + const result = await runPagedSearch(observeReq.streams.traces, q, observeReq.limits.spans - traceHits.length, traceSort); + if (result instanceof Response) return result; + appendSearch("traces", result); + addTraceIdsFromHits(result.hits); + return null; + }; + + if (observeReq.lookup.requestId) { + const eventResponse = await searchEvents("req", observeReq.lookup.requestId); + if (eventResponse) return eventResponse; + if (candidateTraceIds.size > 0) { + for (const traceId of candidateTraceIds) { + const traceResponse = await searchTraces("trace", traceId); + if (traceResponse) return traceResponse; + } + } else { + const traceResponse = await searchTraces("req", observeReq.lookup.requestId); + if (traceResponse) return traceResponse; + } + } else if (observeReq.lookup.traceId) { + candidateTraceIds.add(observeReq.lookup.traceId); + const traceResponse = await searchTraces("trace", observeReq.lookup.traceId); + if (traceResponse) return traceResponse; + const eventResponse = await searchEvents("trace", observeReq.lookup.traceId); + if (eventResponse) return eventResponse; + } else if (observeReq.lookup.spanId) { + const traceResponse = await searchTraces("span", observeReq.lookup.spanId); + if (traceResponse) return traceResponse; + if (candidateTraceIds.size > 0) { + for (const traceId of Array.from(candidateTraceIds)) { + const fullTraceResponse = await searchTraces("trace", traceId); + if (fullTraceResponse) return fullTraceResponse; + const eventResponse = await searchEvents("trace", traceId); + if (eventResponse) return eventResponse; + } + } else { + const eventResponse = await searchEvents("span", observeReq.lookup.spanId); + if (eventResponse) return eventResponse; + } + } + + const eventCoverage = summarizeSearchCoverage(eventBatches, eventHits, eventLimitReached); + const traceCoverage = summarizeSearchCoverage(traceBatches, traceHits, traceLimitReached); + const trace = buildTraceDetails( + traceHits.map((hit) => hit.source), + { spanLimitReached: traceCoverage.limit_reached, coverageComplete: traceCoverage.complete } + ); + const primaryEventHit = choosePrimaryEvent(eventHits, trace.traceId ?? observeReq.lookup.traceId); + const primaryEvent = primaryEventHit && isRecord(primaryEventHit.source) ? primaryEventHit.source : null; + const timeline: unknown[] = []; + if (observeReq.include.timeline) { + const items: any[] = []; + if (eventCorrelation && observeReq.streams.events) { + for (const hit of eventHits) { + items.push(...eventCorrelation.toTimelineItems({ stream: observeReq.streams.events, offset: hit.offset, record: hit.source })); + } + } + if (traceCorrelation && observeReq.streams.traces) { + for (const hit of traceHits) { + items.push(...traceCorrelation.toTimelineItems({ stream: observeReq.streams.traces, offset: hit.offset, record: hit.source })); + } + } + timeline.push(...sortTimeline(items)); + } + + const warnings: string[] = []; + if (observeReq.include.trace && traceHits.length === 0) warnings.push("no trace spans found"); + if (observeReq.include.events && eventHits.length === 0) warnings.push("no evlog events found"); + if (eventCoverage.limit_reached) warnings.push("event limit reached"); + if (traceCoverage.limit_reached) warnings.push("span limit reached"); + if (!eventCoverage.complete && eventCoverage.searched) warnings.push("event search coverage incomplete"); + if (!traceCoverage.complete && traceCoverage.searched) warnings.push("trace search coverage incomplete"); + if (trace.missingParents.length > 0) warnings.push("trace has missing parent spans"); + + return json(200, { + lookup: { + requestId: + observeReq.lookup.requestId ?? + (primaryEvent && typeof primaryEvent.requestId === "string" ? primaryEvent.requestId : null) ?? + null, + traceId: observeReq.lookup.traceId ?? trace.traceId, + spanId: observeReq.lookup.spanId, + }, + summary: buildObserveSummary({ lookup: observeReq.lookup, primaryEvent, trace }), + evlog: observeReq.include.events + ? { + stream: observeReq.streams.events ?? null, + primary: primaryEvent, + matches: eventHits.map((hit) => ({ + offset: hit.offset, + source: hit.source, + })), + } + : null, + trace: observeReq.include.trace + ? { + stream: observeReq.streams.traces ?? null, + traceId: trace.traceId, + rootSpanId: trace.rootSpanId, + spans: trace.spans, + tree: trace.tree, + serviceMap: trace.serviceMap, + criticalPath: trace.criticalPath, + errors: trace.errors, + partial: trace.partial, + missingParents: trace.missingParents, + duplicateSpans: trace.duplicateSpans, + } + : null, + timeline, + coverage: { + events: eventCoverage, + traces: traceCoverage, + warnings, + }, + }); + }; + if (path === "/health") { return json(200, { ok: true }); } @@ -1780,6 +2052,9 @@ export function createAppCore(cfg: Config, opts: CreateAppCoreOptions): App { if (!stream) return badRequest("DS_OTLP_TRACES_STREAM is not configured"); return handleOtlpTracesIngest(stream, cfg.otlpAutoCreate); } + if (path === "/v1/observe/request") { + return handleObserveRequest(); + } // /v1/streams if (req.method === "GET" && path === "/v1/streams") { diff --git a/src/observe/request.ts b/src/observe/request.ts new file mode 100644 index 0000000..72446cb --- /dev/null +++ b/src/observe/request.ts @@ -0,0 +1,535 @@ +import { Result } from "better-result"; +import type { SearchHit, SearchResultBatch } from "../reader"; +import type { UnifiedTimelineItem } from "../profiles"; + +export type ObserveRequestInput = { + streams: { + events?: string; + traces?: string; + }; + lookup: { + requestId: string | null; + traceId: string | null; + spanId: string | null; + }; + time: { + from: string | null; + to: string | null; + paddingMs: number; + }; + include: { + events: boolean; + trace: boolean; + timeline: boolean; + raw: boolean; + }; + limits: { + events: number; + spans: number; + }; +}; + +export type ObserveSearchCoverage = { + searched: boolean; + complete: boolean; + timed_out: boolean; + limit_reached: boolean; + hits: number; + total: { value: number; relation: "eq" | "gte" }; + index_families_used: string[]; + scanned_tail_docs: number; + scanned_segments: number; + possible_missing_events_upper_bound: number; +}; + +export type TraceTreeNode = { + spanId: string; + parentSpanId: string | null; + children: TraceTreeNode[]; + depth: number; + service: string | null; + name: string; + kind: string; + startTime: string; + endTime: string | null; + duration: number | null; + statusCode: "unset" | "ok" | "error"; +}; + +export type ServiceEdge = { + from: string; + to: string; + count: number; + errorCount: number; + latency: { + count: number; + sum: number; + min: number | null; + max: number | null; + }; +}; + +export type TraceError = { + spanId: string; + service: string | null; + name: string; + time: string | null; + type: string | null; + message: string | null; +}; + +export type TraceDetails = { + traceId: string | null; + rootSpanId: string | null; + spans: Record[]; + tree: TraceTreeNode[]; + serviceMap: ServiceEdge[]; + criticalPath: string[]; + errors: TraceError[]; + partial: boolean; + missingParents: string[]; + duplicateSpans: number; +}; + +function isPlainObject(value: unknown): value is Record { + return !!value && typeof value === "object" && !Array.isArray(value); +} + +function stringField(record: Record, field: string): string | null { + const value = record[field]; + return typeof value === "string" && value.trim() !== "" ? value : null; +} + +function numberField(record: Record, field: string): number | null { + const value = record[field]; + return typeof value === "number" && Number.isFinite(value) ? value : null; +} + +function nestedObject(record: Record, field: string): Record { + const value = record[field]; + return isPlainObject(value) ? value : {}; +} + +function parseOptionalString(raw: unknown, path: string): Result { + if (raw === undefined || raw === null) return Result.ok(null); + if (typeof raw !== "string") return Result.err({ message: `${path} must be a string` }); + const trimmed = raw.trim(); + return Result.ok(trimmed === "" ? null : trimmed); +} + +function parseBoolean(raw: unknown, fallback: boolean, path: string): Result { + if (raw === undefined) return Result.ok(fallback); + if (typeof raw !== "boolean") return Result.err({ message: `${path} must be boolean` }); + return Result.ok(raw); +} + +function parseLimit(raw: unknown, fallback: number, max: number, path: string): Result { + if (raw === undefined) return Result.ok(fallback); + if (typeof raw !== "number" || !Number.isFinite(raw) || !Number.isInteger(raw) || raw <= 0 || raw > max) { + return Result.err({ message: `${path} must be an integer between 1 and ${max}` }); + } + return Result.ok(raw); +} + +function parseTime(raw: unknown): Result { + if (raw === undefined) return Result.ok({ from: null, to: null, paddingMs: 0 }); + if (!isPlainObject(raw)) return Result.err({ message: "time must be an object" }); + const fromRes = parseOptionalString(raw.from, "time.from"); + if (Result.isError(fromRes)) return fromRes; + const toRes = parseOptionalString(raw.to, "time.to"); + if (Result.isError(toRes)) return toRes; + for (const [path, value] of [ + ["time.from", fromRes.value], + ["time.to", toRes.value], + ] as const) { + if (value != null && Number.isNaN(Date.parse(value))) return Result.err({ message: `${path} must be an ISO timestamp` }); + } + const paddingRaw = raw.paddingMs ?? raw.padding_ms; + if (paddingRaw === undefined) return Result.ok({ from: fromRes.value, to: toRes.value, paddingMs: 0 }); + if (typeof paddingRaw !== "number" || !Number.isFinite(paddingRaw) || paddingRaw < 0 || paddingRaw > 86_400_000) { + return Result.err({ message: "time.paddingMs must be a non-negative number no greater than 86400000" }); + } + return Result.ok({ from: fromRes.value, to: toRes.value, paddingMs: Math.trunc(paddingRaw) }); +} + +export function parseObserveRequestResult(raw: unknown): Result { + if (!isPlainObject(raw)) return Result.err({ message: "observe request must be an object" }); + const streamsRaw = raw.streams; + if (!isPlainObject(streamsRaw)) return Result.err({ message: "streams must be an object" }); + const eventsStreamRes = parseOptionalString(streamsRaw.events, "streams.events"); + if (Result.isError(eventsStreamRes)) return eventsStreamRes; + const tracesStreamRes = parseOptionalString(streamsRaw.traces, "streams.traces"); + if (Result.isError(tracesStreamRes)) return tracesStreamRes; + + const lookupRaw = raw.lookup; + if (!isPlainObject(lookupRaw)) return Result.err({ message: "lookup must be an object" }); + const requestIdRes = parseOptionalString(lookupRaw.requestId, "lookup.requestId"); + if (Result.isError(requestIdRes)) return requestIdRes; + const traceIdRes = parseOptionalString(lookupRaw.traceId, "lookup.traceId"); + if (Result.isError(traceIdRes)) return traceIdRes; + const spanIdRes = parseOptionalString(lookupRaw.spanId, "lookup.spanId"); + if (Result.isError(spanIdRes)) return spanIdRes; + const lookupCount = [requestIdRes.value, traceIdRes.value, spanIdRes.value].filter((value) => value != null).length; + if (lookupCount !== 1) return Result.err({ message: "lookup must include exactly one of requestId, traceId, or spanId" }); + + const includeRaw = isPlainObject(raw.include) ? raw.include : {}; + const includeEventsRes = parseBoolean(includeRaw.events, true, "include.events"); + if (Result.isError(includeEventsRes)) return includeEventsRes; + const includeTraceRes = parseBoolean(includeRaw.trace, true, "include.trace"); + if (Result.isError(includeTraceRes)) return includeTraceRes; + const includeTimelineRes = parseBoolean(includeRaw.timeline, true, "include.timeline"); + if (Result.isError(includeTimelineRes)) return includeTimelineRes; + const includeRawRes = parseBoolean(includeRaw.raw, false, "include.raw"); + if (Result.isError(includeRawRes)) return includeRawRes; + if (includeEventsRes.value && !eventsStreamRes.value) return Result.err({ message: "streams.events is required when include.events is true" }); + if (includeTraceRes.value && !tracesStreamRes.value) return Result.err({ message: "streams.traces is required when include.trace is true" }); + + const limitsRaw = isPlainObject(raw.limits) ? raw.limits : {}; + const eventLimitRes = parseLimit(limitsRaw.events, 100, 500, "limits.events"); + if (Result.isError(eventLimitRes)) return eventLimitRes; + const spanLimitRes = parseLimit(limitsRaw.spans, 5000, 10_000, "limits.spans"); + if (Result.isError(spanLimitRes)) return spanLimitRes; + const timeRes = parseTime(raw.time); + if (Result.isError(timeRes)) return timeRes; + + return Result.ok({ + streams: { + events: eventsStreamRes.value ?? undefined, + traces: tracesStreamRes.value ?? undefined, + }, + lookup: { + requestId: requestIdRes.value, + traceId: traceIdRes.value, + spanId: spanIdRes.value, + }, + time: timeRes.value, + include: { + events: includeEventsRes.value, + trace: includeTraceRes.value, + timeline: includeTimelineRes.value, + raw: includeRawRes.value, + }, + limits: { + events: eventLimitRes.value, + spans: spanLimitRes.value, + }, + }); +} + +export function quoteSearchValue(value: string): string { + return `"${value.replace(/\\/g, "\\\\").replace(/"/g, '\\"')}"`; +} + +export function buildTimeSearchClauses(time: ObserveRequestInput["time"]): string[] { + const out: string[] = []; + if (time.from) { + const from = new Date(Date.parse(time.from) - time.paddingMs).toISOString(); + out.push(`timestamp:>=${quoteSearchValue(from)}`); + } + if (time.to) { + const to = new Date(Date.parse(time.to) + time.paddingMs).toISOString(); + out.push(`timestamp:<=${quoteSearchValue(to)}`); + } + return out; +} + +export function combineSearchClauses(...clauses: Array): string { + return clauses.filter((clause): clause is string => !!clause && clause.trim() !== "").join(" "); +} + +function statusCode(record: Record): "unset" | "ok" | "error" { + const status = nestedObject(record, "status"); + const code = stringField(status, "code"); + return code === "ok" || code === "error" ? code : "unset"; +} + +function spanIsError(record: Record): boolean { + if (statusCode(record) === "error") return true; + const error = nestedObject(record, "error"); + return error.isError === true; +} + +function toTraceNode(record: Record, depth: number): TraceTreeNode { + return { + spanId: stringField(record, "spanId") ?? "", + parentSpanId: stringField(record, "parentSpanId"), + children: [], + depth, + service: stringField(record, "service"), + name: stringField(record, "name") ?? "", + kind: stringField(record, "kind") ?? "unspecified", + startTime: stringField(record, "timestamp") ?? "", + endTime: stringField(record, "endTimestamp"), + duration: numberField(record, "duration"), + statusCode: statusCode(record), + }; +} + +function compareSpans(left: Record, right: Record): number { + const leftTs = stringField(left, "timestamp") ?? ""; + const rightTs = stringField(right, "timestamp") ?? ""; + if (leftTs !== rightTs) return leftTs < rightTs ? -1 : 1; + const leftDuration = numberField(left, "duration") ?? -1; + const rightDuration = numberField(right, "duration") ?? -1; + if (leftDuration !== rightDuration) return rightDuration - leftDuration; + return (stringField(left, "name") ?? "").localeCompare(stringField(right, "name") ?? ""); +} + +function sortTree(nodes: TraceTreeNode[]): void { + nodes.sort((left, right) => { + if (left.startTime !== right.startTime) return left.startTime < right.startTime ? -1 : 1; + if ((left.duration ?? -1) !== (right.duration ?? -1)) return (right.duration ?? -1) - (left.duration ?? -1); + return left.name.localeCompare(right.name); + }); + for (const node of nodes) sortTree(node.children); +} + +function cloneNodeAtDepth(node: TraceTreeNode, depth: number): TraceTreeNode { + return { + ...node, + depth, + children: node.children.map((child) => cloneNodeAtDepth(child, depth + 1)), + }; +} + +function buildCriticalPath(rootNodes: TraceTreeNode[]): string[] { + if (rootNodes.length === 0) return []; + const score = (node: TraceTreeNode): number => (node.duration ?? 0) + (node.statusCode === "error" ? 1_000_000 : 0); + let current = [...rootNodes].sort((a, b) => score(b) - score(a))[0]!; + const out: string[] = []; + while (current) { + out.push(current.spanId); + if (current.children.length === 0) break; + current = [...current.children].sort((a, b) => score(b) - score(a))[0]!; + } + return out; +} + +function buildServiceMap(spans: Record[], bySpanId: Map>): ServiceEdge[] { + const edges = new Map(); + for (const span of spans) { + const parentSpanId = stringField(span, "parentSpanId"); + if (!parentSpanId) continue; + const parent = bySpanId.get(parentSpanId); + if (!parent) continue; + const from = stringField(parent, "service"); + const to = stringField(span, "service"); + if (!from || !to || from === to) continue; + const key = `${from}\0${to}`; + let edge = edges.get(key); + if (!edge) { + edge = { + from, + to, + count: 0, + errorCount: 0, + latency: { count: 0, sum: 0, min: null, max: null }, + }; + edges.set(key, edge); + } + edge.count += 1; + if (spanIsError(span)) edge.errorCount += 1; + const duration = numberField(span, "duration"); + if (duration != null) { + edge.latency.count += 1; + edge.latency.sum += duration; + edge.latency.min = edge.latency.min == null ? duration : Math.min(edge.latency.min, duration); + edge.latency.max = edge.latency.max == null ? duration : Math.max(edge.latency.max, duration); + } + } + return Array.from(edges.values()).sort((a, b) => `${a.from}:${a.to}`.localeCompare(`${b.from}:${b.to}`)); +} + +function buildTraceErrors(spans: Record[]): TraceError[] { + const errors: TraceError[] = []; + for (const span of spans) { + if (!spanIsError(span)) continue; + const error = nestedObject(span, "error"); + errors.push({ + spanId: stringField(span, "spanId") ?? "", + service: stringField(span, "service"), + name: stringField(span, "name") ?? "", + time: stringField(span, "timestamp"), + type: stringField(error, "type"), + message: stringField(error, "message") ?? (isPlainObject(span.status) ? stringField(span.status, "message") : null), + }); + } + return errors; +} + +export function buildTraceDetails(spansRaw: unknown[], args?: { spanLimitReached?: boolean; coverageComplete?: boolean }): TraceDetails { + const input = spansRaw.filter(isPlainObject).sort(compareSpans); + const unique = new Map>(); + let duplicateSpans = 0; + for (const span of input) { + const traceId = stringField(span, "traceId"); + const spanId = stringField(span, "spanId"); + if (!traceId || !spanId) continue; + const key = `${traceId}:${spanId}`; + if (unique.has(key)) { + duplicateSpans += 1; + continue; + } + unique.set(key, span); + } + const spans = Array.from(unique.values()).sort(compareSpans); + const bySpanId = new Map>(); + for (const span of spans) { + const spanId = stringField(span, "spanId"); + if (spanId) bySpanId.set(spanId, span); + } + const nodeBySpanId = new Map(); + for (const span of spans) { + const spanId = stringField(span, "spanId"); + if (spanId) nodeBySpanId.set(spanId, toTraceNode(span, 0)); + } + + const roots: TraceTreeNode[] = []; + const missingParents = new Set(); + for (const span of spans) { + const spanId = stringField(span, "spanId"); + if (!spanId) continue; + const node = nodeBySpanId.get(spanId); + if (!node) continue; + const parentSpanId = stringField(span, "parentSpanId"); + if (!parentSpanId) { + roots.push(node); + continue; + } + const parent = nodeBySpanId.get(parentSpanId); + if (!parent) { + missingParents.add(parentSpanId); + roots.push(node); + continue; + } + parent.children.push(node); + } + + const setDepth = (node: TraceTreeNode, depth: number): TraceTreeNode => { + node.depth = depth; + node.children = node.children.map((child) => setDepth(child, depth + 1)); + return node; + }; + const tree = roots.map((root) => setDepth(root, 0)).map((root) => cloneNodeAtDepth(root, 0)); + sortTree(tree); + + return { + traceId: spans.length > 0 ? stringField(spans[0]!, "traceId") : null, + rootSpanId: tree[0]?.spanId ?? null, + spans, + tree, + serviceMap: buildServiceMap(spans, bySpanId), + criticalPath: buildCriticalPath(tree), + errors: buildTraceErrors(spans), + partial: (args?.spanLimitReached ?? false) || args?.coverageComplete === false || missingParents.size > 0, + missingParents: Array.from(missingParents).sort(), + duplicateSpans, + }; +} + +export function summarizeSearchCoverage(batches: SearchResultBatch[], hits: SearchHit[], limitReached: boolean): ObserveSearchCoverage { + const families = new Set(); + let complete = batches.length > 0; + let timedOut = false; + let scannedTailDocs = 0; + let scannedSegments = 0; + let possibleMissing = 0; + let totalValue = 0; + let totalRelation: "eq" | "gte" = "eq"; + for (const batch of batches) { + complete = complete && batch.coverage.complete; + timedOut = timedOut || batch.timedOut; + scannedTailDocs += batch.coverage.scannedTailDocs; + scannedSegments += batch.coverage.scannedSegments; + possibleMissing += batch.coverage.possibleMissingEventsUpperBound; + totalValue += batch.total.value; + if (batch.total.relation === "gte") totalRelation = "gte"; + for (const family of batch.coverage.indexFamiliesUsed) families.add(family); + } + if (batches.length === 0) complete = true; + return { + searched: batches.length > 0, + complete: complete && !timedOut && !limitReached, + timed_out: timedOut, + limit_reached: limitReached, + hits: hits.length, + total: { value: totalValue, relation: limitReached ? "gte" : totalRelation }, + index_families_used: Array.from(families).sort(), + scanned_tail_docs: scannedTailDocs, + scanned_segments: scannedSegments, + possible_missing_events_upper_bound: possibleMissing, + }; +} + +export function sortTimeline(items: UnifiedTimelineItem[]): UnifiedTimelineItem[] { + return [...items].sort((left, right) => { + if (left.time !== right.time) return left.time < right.time ? -1 : 1; + return left.kind.localeCompare(right.kind); + }); +} + +function evlogLevel(record: Record): "debug" | "info" | "warn" | "error" | null { + const level = stringField(record, "level"); + return level === "debug" || level === "info" || level === "warn" || level === "error" ? level : null; +} + +function firstString(...values: Array): string | null { + return values.find((value) => value != null) ?? null; +} + +function firstNumber(...values: Array): number | null { + return values.find((value) => value != null) ?? null; +} + +export function buildObserveSummary(args: { + lookup: ObserveRequestInput["lookup"]; + primaryEvent: Record | null; + trace: TraceDetails; +}): Record { + const rootSpan = args.trace.spans.find((span) => stringField(span, "spanId") === args.trace.rootSpanId) ?? args.trace.spans[0] ?? null; + const event = args.primaryEvent; + const http = rootSpan ? nestedObject(rootSpan, "http") : {}; + const error = rootSpan ? nestedObject(rootSpan, "error") : {}; + const eventStatus = event ? numberField(event, "status") : null; + const spanStatus = numberField(http, "statusCode"); + const level = event ? evlogLevel(event) : null; + const method = firstString(event ? stringField(event, "method") : null, stringField(http, "method")); + const path = firstString(event ? stringField(event, "path") : null, stringField(http, "path")); + const route = stringField(http, "route"); + const rootStart = rootSpan ? stringField(rootSpan, "timestamp") : null; + const rootEnd = rootSpan ? stringField(rootSpan, "endTimestamp") : null; + const eventMessage = event ? stringField(event, "message") : null; + const spanName = rootSpan ? stringField(rootSpan, "name") : null; + return { + title: + eventMessage ?? + ([method, route ?? path].filter(Boolean).join(" ") || spanName || args.lookup.requestId || args.lookup.traceId || args.lookup.spanId || "request"), + service: firstString(event ? stringField(event, "service") : null, rootSpan ? stringField(rootSpan, "service") : null), + environment: firstString(event ? stringField(event, "environment") : null, rootSpan ? stringField(rootSpan, "environment") : null), + method, + path, + route, + status: firstNumber(eventStatus, spanStatus), + level, + duration: firstNumber(event ? numberField(event, "duration") : null, rootSpan ? numberField(rootSpan, "duration") : null), + startTime: firstString(event ? stringField(event, "timestamp") : null, rootStart), + endTime: rootEnd, + error: { + isError: level === "error" || eventStatus != null && eventStatus >= 500 || args.trace.errors.length > 0 || error.isError === true, + type: stringField(error, "type"), + message: firstString(stringField(error, "message"), event ? stringField(event, "message") : null), + why: event ? stringField(event, "why") : null, + fix: event ? stringField(event, "fix") : null, + link: event ? stringField(event, "link") : null, + }, + }; +} + +export function choosePrimaryEvent(events: SearchHit[], traceId: string | null): SearchHit | null { + if (events.length === 0) return null; + if (traceId) { + const matching = events.find((hit) => isPlainObject(hit.source) && stringField(hit.source, "traceId") === traceId); + if (matching) return matching; + } + return events[0]!; +} diff --git a/test/observe_request.test.ts b/test/observe_request.test.ts new file mode 100644 index 0000000..f1effa9 --- /dev/null +++ b/test/observe_request.test.ts @@ -0,0 +1,261 @@ +import { describe, expect, test } from "bun:test"; +import { mkdtempSync, rmSync } from "node:fs"; +import { tmpdir } from "node:os"; +import { join } from "node:path"; +import { createProfileTestApp, fetchJsonApp } from "./profile_test_utils"; + +const TRACE_ID = "5b8efff798038103d269b633813fc60c"; +const ROOT_SPAN_ID = "086e83747d0e381e"; +const DB_SPAN_ID = "186e83747d0e381f"; +const CLIENT_SPAN_ID = "286e83747d0e3820"; + +async function createJsonStream(app: ReturnType["app"], stream: string, profile: Record) { + await app.fetch( + new Request(`http://local/v1/stream/${stream}`, { + method: "PUT", + headers: { "content-type": "application/json" }, + }) + ); + const res = await fetchJsonApp(app, `http://local/v1/stream/${stream}/_profile`, { + method: "POST", + headers: { "content-type": "application/json" }, + body: JSON.stringify({ + apiVersion: "durable.streams/profile/v1", + profile, + }), + }); + expect(res.status).toBe(200); +} + +function span(args: { + spanId: string; + parentSpanId?: string | null; + service: string; + name: string; + kind?: string; + start: string; + end: string; + statusCode?: "unset" | "ok" | "error"; + httpStatus?: number; + requestId?: string; + errorMessage?: string; +}) { + return { + traceId: TRACE_ID, + spanId: args.spanId, + parentSpanId: args.parentSpanId ?? null, + name: args.name, + kind: args.kind ?? "internal", + startUnixNano: args.start, + endUnixNano: args.end, + status: { code: args.statusCode ?? "unset", message: args.errorMessage ?? null }, + resource: { + attributes: { + "service.name": args.service, + "deployment.environment.name": "prod", + }, + }, + attributes: { + ...(args.requestId ? { "request.id": args.requestId } : {}), + ...(args.httpStatus ? { "http.response.status_code": args.httpStatus } : {}), + ...(args.name.startsWith("GET") ? { "http.request.method": "GET", "http.route": "/checkout" } : {}), + ...(args.name.startsWith("SELECT") ? { "db.system": "postgresql", "db.operation": "SELECT" } : {}), + }, + events: args.errorMessage + ? [ + { + timeUnixNano: args.end, + name: "exception", + attributes: { + "exception.type": "Error", + "exception.message": args.errorMessage, + }, + }, + ] + : [], + }; +} + +async function seedObservabilityStreams(app: ReturnType["app"]) { + await createJsonStream(app, "app-events", { kind: "evlog" }); + await createJsonStream(app, "app-traces", { kind: "otel-traces" }); + + const eventRes = await app.fetch( + new Request("http://local/v1/stream/app-events", { + method: "POST", + headers: { "content-type": "application/json" }, + body: JSON.stringify({ + timestamp: "2026-03-27T10:00:00.250Z", + level: "error", + service: "checkout", + environment: "prod", + requestId: "req_obs_1", + traceId: TRACE_ID, + spanId: ROOT_SPAN_ID, + method: "GET", + path: "/checkout", + status: 502, + duration: 260, + message: "Checkout failed", + why: "Payment provider returned 502", + fix: "Retry after provider recovery", + }), + }) + ); + expect([200, 204]).toContain(eventRes.status); + + const traceRes = await app.fetch( + new Request("http://local/v1/stream/app-traces", { + method: "POST", + headers: { "content-type": "application/json" }, + body: JSON.stringify([ + span({ + spanId: ROOT_SPAN_ID, + service: "checkout", + name: "GET /checkout", + kind: "server", + start: "1772020800000000000", + end: "1772020800260000000", + statusCode: "error", + httpStatus: 502, + requestId: "req_obs_1", + errorMessage: "provider unavailable", + }), + span({ + spanId: DB_SPAN_ID, + parentSpanId: ROOT_SPAN_ID, + service: "checkout", + name: "SELECT cart", + start: "1772020800030000000", + end: "1772020800040000000", + statusCode: "ok", + }), + span({ + spanId: CLIENT_SPAN_ID, + parentSpanId: ROOT_SPAN_ID, + service: "payments", + name: "POST payment", + kind: "client", + start: "1772020800100000000", + end: "1772020800250000000", + statusCode: "error", + errorMessage: "provider unavailable", + }), + ]), + }) + ); + expect([200, 204]).toContain(traceRes.status); +} + +function observeBody(lookup: Record, extra: Record = {}) { + return { + streams: { events: "app-events", traces: "app-traces" }, + lookup, + include: { events: true, trace: true, timeline: true }, + limits: { events: 20, spans: 100 }, + ...extra, + }; +} + +describe("observe request API", () => { + test("looks up by requestId and returns evlog context with trace tree", async () => { + const root = mkdtempSync(join(tmpdir(), "ds-observe-request-id-")); + const { app } = createProfileTestApp(root, { searchWalOverlayQuietPeriodMs: 0 }); + try { + await seedObservabilityStreams(app); + const res = await fetchJsonApp(app, "http://local/v1/observe/request", { + method: "POST", + headers: { "content-type": "application/json" }, + body: JSON.stringify(observeBody({ requestId: "req_obs_1" })), + }); + + expect(res.status).toBe(200); + expect(res.body.lookup).toEqual({ requestId: "req_obs_1", traceId: TRACE_ID, spanId: null }); + expect(res.body.summary).toMatchObject({ + title: "Checkout failed", + service: "checkout", + environment: "prod", + method: "GET", + path: "/checkout", + route: "/checkout", + status: 502, + level: "error", + duration: 260, + error: { + isError: true, + why: "Payment provider returned 502", + fix: "Retry after provider recovery", + }, + }); + expect(res.body.evlog.primary.requestId).toBe("req_obs_1"); + expect(res.body.evlog.matches).toHaveLength(1); + expect(res.body.trace.traceId).toBe(TRACE_ID); + expect(res.body.trace.rootSpanId).toBe(ROOT_SPAN_ID); + expect(res.body.trace.spans).toHaveLength(3); + expect(res.body.trace.tree).toHaveLength(1); + expect(res.body.trace.tree[0].children.map((child: any) => child.spanId).sort()).toEqual([CLIENT_SPAN_ID, DB_SPAN_ID].sort()); + expect(res.body.trace.serviceMap).toEqual([ + { + from: "checkout", + to: "payments", + count: 1, + errorCount: 1, + latency: { count: 1, sum: 150, min: 150, max: 150 }, + }, + ]); + expect(res.body.trace.errors.map((error: any) => error.spanId).sort()).toEqual([CLIENT_SPAN_ID, ROOT_SPAN_ID].sort()); + expect(res.body.trace.criticalPath).toContain(ROOT_SPAN_ID); + expect(res.body.timeline.length).toBeGreaterThanOrEqual(7); + expect(res.body.coverage.events.searched).toBe(true); + expect(res.body.coverage.traces.searched).toBe(true); + expect(res.body.coverage.warnings).toEqual([]); + } finally { + app.close(); + rmSync(root, { recursive: true, force: true }); + } + }); + + test("looks up by spanId, expands to the full trace, and correlates evlog by traceId", async () => { + const root = mkdtempSync(join(tmpdir(), "ds-observe-span-id-")); + const { app } = createProfileTestApp(root, { searchWalOverlayQuietPeriodMs: 0 }); + try { + await seedObservabilityStreams(app); + const res = await fetchJsonApp(app, "http://local/v1/observe/request", { + method: "POST", + headers: { "content-type": "application/json" }, + body: JSON.stringify(observeBody({ spanId: CLIENT_SPAN_ID })), + }); + + expect(res.status).toBe(200); + expect(res.body.lookup).toEqual({ requestId: "req_obs_1", traceId: TRACE_ID, spanId: CLIENT_SPAN_ID }); + expect(res.body.trace.spans.map((item: any) => item.spanId).sort()).toEqual([ROOT_SPAN_ID, DB_SPAN_ID, CLIENT_SPAN_ID].sort()); + expect(res.body.evlog.primary.traceId).toBe(TRACE_ID); + expect(res.body.trace.partial).toBe(false); + } finally { + app.close(); + rmSync(root, { recursive: true, force: true }); + } + }); + + test("reports partial trace when span limit is reached", async () => { + const root = mkdtempSync(join(tmpdir(), "ds-observe-limit-")); + const { app } = createProfileTestApp(root, { searchWalOverlayQuietPeriodMs: 0 }); + try { + await seedObservabilityStreams(app); + const res = await fetchJsonApp(app, "http://local/v1/observe/request", { + method: "POST", + headers: { "content-type": "application/json" }, + body: JSON.stringify(observeBody({ traceId: TRACE_ID }, { limits: { events: 20, spans: 1 } })), + }); + + expect(res.status).toBe(200); + expect(res.body.trace.spans).toHaveLength(1); + expect(res.body.trace.partial).toBe(true); + expect(res.body.coverage.traces.limit_reached).toBe(true); + expect(res.body.coverage.warnings).toContain("span limit reached"); + } finally { + app.close(); + rmSync(root, { recursive: true, force: true }); + } + }); +}); From c658e712748414ba46f4ef90782234b6cee47c76 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=B8ren=20Bramer=20Schmidt?= Date: Thu, 11 Jun 2026 20:44:42 +0700 Subject: [PATCH 03/12] Document otel traces observability APIs --- docs/architecture.md | 23 ++- docs/durable-streams-spec.md | 109 +++++++++++++- docs/index.md | 4 + docs/overview.md | 13 ++ docs/profile-evlog.md | 31 ++++ docs/profile-otel-traces.md | 227 ++++++++++++++++++++++++++++ docs/request-observability.md | 268 ++++++++++++++++++++++++++++++++++ docs/schemas.md | 10 +- docs/stream-profiles.md | 24 +++ 9 files changed, 703 insertions(+), 6 deletions(-) create mode 100644 docs/profile-otel-traces.md create mode 100644 docs/request-observability.md diff --git a/docs/architecture.md b/docs/architecture.md index 295914a..04d128a 100644 --- a/docs/architecture.md +++ b/docs/architecture.md @@ -32,14 +32,17 @@ Implemented built-ins today: - `evlog` - `generic` - `metrics` +- `otel-traces` - `state-protocol` `generic` adds no canonical payload envelope and leaves schema management to the user. `evlog` owns canonical wide-event normalization, redaction, and its default schema/search/rollup registry on JSON append. `metrics` owns canonical metrics interval normalization, its default schema/search/rollup registry, and -the metrics-block companion family. `state-protocol` owns the live `/touch/*` -surface and its touch configuration. +the metrics-block companion family. `otel-traces` owns canonical span +normalization, OTLP trace export decoding, redaction, backend-side trace limits, +and its default schema/search/rollup registry. `state-protocol` owns the live +`/touch/*` surface and its touch configuration. See [stream-profiles.md](./stream-profiles.md) for the normative model. @@ -51,6 +54,8 @@ See [stream-profiles.md](./stream-profiles.md) for the normative model. - Implements long-poll reads without busy loops. - Resolves the stream profile definition before handling profile-owned metadata or routes. +- Uses profile capabilities for OTLP trace ingestion and correlation timeline + conversion instead of hard-coding profile branches in the core route path. - Admits ingest, read, and search work through bounded in-process concurrency gates instead of a direct memory-based reject path. @@ -151,6 +156,20 @@ Today, `metrics` uses the same model to own: - bundled per-segment `PSCIX2` `.cix` search companions for metrics-serving state +Today, `otel-traces` uses the same model to own: + +- canonical OpenTelemetry span normalization on JSON append +- OTLP JSON/protobuf trace export decoding on `POST /v1/traces` and + `POST /v1/stream/{name}/_otlp/v1/traces` +- pre-append redaction and backend-side attribute/event/link limits +- routing-key defaults from `traceId` +- default schema-owned `search` and `search.rollups` installation + +The cross-stream request observability API is a query layer over `evlog` and +`otel-traces` streams. It uses stream search results and profile correlation +capabilities to build summaries, trace trees, service edges, and timelines; it +does not create a separate mutable observability store. + ## Control-Plane Metadata Per stream, SQLite stores: diff --git a/docs/durable-streams-spec.md b/docs/durable-streams-spec.md index 25da70b..a4f794c 100644 --- a/docs/durable-streams-spec.md +++ b/docs/durable-streams-spec.md @@ -59,12 +59,19 @@ implementation. - `GET /v1/stream/{name}/_routing_keys` list routing keys alphabetically - `GET /v1/stream/{name}/_index_status` get per-stream index status - `GET /v1/stream/{name}/_details` get combined stream details +- `POST /v1/stream/{name}/_otlp/v1/traces` ingest OTLP traces into an + `otel-traces` stream -### 2.5 Streams collection +### 2.5 Observability resources + +- `POST /v1/traces` ingest OTLP traces into `DS_OTLP_TRACES_STREAM` +- `POST /v1/observe/request` correlate request events and trace spans + +### 2.6 Streams collection - `GET /v1/streams` list streams -### 2.6 Server inspection +### 2.7 Server inspection - `GET /v1/server/_details` get server-scoped configured limits and live runtime state @@ -211,6 +218,104 @@ Optional fields: - `group_by` - `measures` +### 4.4 OTLP trace ingestion + +`POST /v1/traces` accepts OTLP trace export requests and writes accepted spans +to the stream named by `DS_OTLP_TRACES_STREAM`. + +Rules: + +- `DS_OTLP_TRACES_STREAM` must be configured, otherwise the endpoint returns + `400`. +- If the target stream does not exist and `DS_OTLP_AUTO_CREATE=true`, the + server creates an `application/json` stream, installs the `otel-traces` + profile, uploads the profile-owned schema registry, publishes a manifest, and + then appends accepted spans. +- If the target stream does not exist and auto-create is not enabled, the + endpoint returns `404`. +- The target stream must have the `otel-traces` profile. + +`POST /v1/stream/{name}/_otlp/v1/traces` accepts the same OTLP payloads for an +explicit stream. The stream must already exist and have the `otel-traces` +profile. + +Supported request content types: + +- `application/x-protobuf` +- `application/json` + +Supported content encodings: + +- no encoding / `identity` +- `gzip` + +Successful full acceptance returns HTTP `200` with an empty OTLP +`ExportTraceServiceResponse` for protobuf or `{}` for JSON. Partial acceptance +also returns HTTP `200` and includes OTLP partial-success information with the +number of rejected spans and an error message. Clients must not retry spans +rejected by a partial-success response. + +Malformed payloads return `400`. Unsupported content types or content encodings +return `415`. Accepted spans are appended as canonical JSON span records using +`traceId` as the routing key. + +### 4.5 Request observability + +`POST /v1/observe/request` correlates an event stream and a trace stream at +query time. It does not append data and does not create a new stream profile. + +Request body: + +```json +{ + "streams": { + "events": "app-events", + "traces": "app-traces" + }, + "lookup": { + "requestId": "req_123" + }, + "time": { + "from": "2026-03-27T00:00:00.000Z", + "to": "2026-03-28T00:00:00.000Z", + "paddingMs": 5000 + }, + "include": { + "events": true, + "trace": true, + "timeline": true, + "raw": false + }, + "limits": { + "events": 100, + "spans": 5000 + } +} +``` + +`lookup` must contain exactly one of `requestId`, `traceId`, or `spanId`. +`streams.events` is required when `include.events=true`; `streams.traces` is +required when `include.trace=true`. + +The endpoint uses the configured `_search` registries for the referenced +streams. Event and trace streams must expose the profile correlation capability. +The response contains: + +- `lookup` +- `summary` +- `evlog` +- `trace` +- `timeline` +- `coverage` + +The trace response deduplicates returned spans by `traceId:spanId` for the +tree, service map, errors, and critical path. Duplicate span records remain in +the underlying append-only stream. + +The endpoint returns `400` for invalid request bodies, unsupported profile +combinations, or streams without search configuration. Missing streams return +`404`. + --- ## 5. Offsets diff --git a/docs/index.md b/docs/index.md index c9d39d3..9715380 100644 --- a/docs/index.md +++ b/docs/index.md @@ -25,8 +25,12 @@ and tooling. - [stream-profiles.md](./stream-profiles.md) — stream/profile/schema model and profile subresource - [profile-generic.md](./profile-generic.md) — reference for the baseline `generic` profile - [profile-metrics.md](./profile-metrics.md) — reference for the built-in `metrics` profile +- [profile-otel-traces.md](./profile-otel-traces.md) — reference for the built-in + `otel-traces` profile and OTLP trace ingestion - [profile-state-protocol.md](./profile-state-protocol.md) — reference for the `state-protocol` profile - [profile-evlog.md](./profile-evlog.md) — design and reference for the `evlog` profile +- [request-observability.md](./request-observability.md) — cross-stream request + lookup that correlates `evlog` events and `otel-traces` spans - [schemas.md](./schemas.md) — schema registry and lens behavior - [durable-lens-v1-schema.md](./durable-lens-v1-schema.md) — reference schema for `durable.lens/v1` - [sqlite-schema.md](./sqlite-schema.md) — SQLite schema, invariants, and migration expectations diff --git a/docs/overview.md b/docs/overview.md index fcaa6eb..11abdae 100644 --- a/docs/overview.md +++ b/docs/overview.md @@ -15,11 +15,17 @@ Every stream has a profile. validation. - `metrics` is the built-in metrics profile for canonical interval summaries, default search/rollups, and object-store-native metrics companions. +- `otel-traces` is the built-in OpenTelemetry trace profile for one canonical + JSON span per record, OTLP trace ingestion, trace search/rollups, and request + correlation with `evlog`. - `state-protocol` is the built-in live/touch profile for JSON State Protocol streams. - Profiles define stream semantics; schemas define payload shape. See [stream-profiles.md](./stream-profiles.md). +See [profile-otel-traces.md](./profile-otel-traces.md) and +[request-observability.md](./request-observability.md) for trace ingestion and +cross-stream request lookup. This repository currently contains two server modes: @@ -180,6 +186,13 @@ Optional flags: - `--bootstrap-from-r2` - `--auto-tune[=MB]` +Optional OTLP trace receiver configuration: + +- `DS_OTLP_TRACES_STREAM=` enables the default `POST /v1/traces` + receiver target +- `DS_OTLP_AUTO_CREATE=true` lets `/v1/traces` create and profile that stream + as `otel-traces` before accepting spans + ### Object Store Configuration Local MockR2: diff --git a/docs/profile-evlog.md b/docs/profile-evlog.md index 4366e1b..ab99765 100644 --- a/docs/profile-evlog.md +++ b/docs/profile-evlog.md @@ -19,6 +19,9 @@ The v1 profile: - support request-centric lookup through the existing routing-key path The v1 profile does not introduce a separate observability storage engine. +It also does not store OpenTelemetry span graphs; spans belong in +[`otel-traces`](./profile-otel-traces.md) streams and are correlated at query +time. ## Stream Contract @@ -31,8 +34,35 @@ The v1 profile does not introduce a separate observability storage engine. default `search` and `search.rollups` config - the profile provides a default routing key from `requestId`, with `traceId` fallback +- optional correlation settings can define request/trace field aliases and + `traceparent` parsing for better joins with `otel-traces` - reads continue to use the normal durable stream APIs +Supported profile shape: + +```json +{ + "kind": "evlog", + "redactKeys": ["sessiontoken"], + "correlation": { + "requestIdFields": ["requestId", "context.requestId"], + "traceContextFields": [ + "traceId", + "spanId", + "traceContext.traceId", + "traceContext.spanId" + ], + "parseTraceparent": true + } +} +``` + +`correlation` only affects how the evlog canonical envelope derives +`requestId`, `traceId`, and `spanId`; it does not make evlog accept spans. +When `parseTraceparent` is not false, the profile reads W3C `traceparent` from +`traceparent`, `traceContext.traceparent`, `context.traceparent`, or +`headers.traceparent` if explicit trace fields are absent. + ## Canonical Envelope Each stored event should use this stable top-level shape: @@ -196,6 +226,7 @@ Current evlog query surfaces: - `POST /v1/stream/{name}/_search` - `GET /v1/stream/{name}/_search?q=...` - `POST /v1/stream/{name}/_aggregate` +- `POST /v1/observe/request` when paired with an `otel-traces` stream ## UI Integration diff --git a/docs/profile-otel-traces.md b/docs/profile-otel-traces.md new file mode 100644 index 0000000..0afcd76 --- /dev/null +++ b/docs/profile-otel-traces.md @@ -0,0 +1,227 @@ +# OpenTelemetry Traces Profile + +This document defines the v1 `otel-traces` profile. + +`otel-traces` is the built-in profile for storing OpenTelemetry trace spans as +ordinary Prisma Streams records. It keeps trace semantics separate from +`evlog`: an `evlog` stream stores one wide request-centric event, while an +`otel-traces` stream stores one canonical JSON record per span. + +## Stream Contract + +`otel-traces` means: + +- the stream content type must be `application/json` +- the profile must be installed before the stream has appended data +- JSON appends are normalized into the canonical span envelope +- OTLP trace exports can be ingested through profile-owned OTLP endpoints +- redaction and backend-side limits run before durable append +- installing the profile auto-installs canonical schema version `1` +- installing the profile auto-installs default `search` fields and rollups +- the default routing key is `traceId` +- reads continue to use the normal durable stream APIs + +The profile does not introduce a mutable local span table. The stream remains +the durable source of truth, and search/aggregate behavior uses the existing +schema-owned companion index system. + +## Profile Resource + +Install the profile with: + +```http +POST /v1/stream/app-traces/_profile +Content-Type: application/json + +{ + "apiVersion": "durable.streams/profile/v1", + "profile": { + "kind": "otel-traces", + "redactKeys": ["authorization", "cookie", "password", "token", "secret"], + "requestIdAttributes": ["request.id", "http.request.header.x-request-id"], + "attributeLimits": { + "maxAttributeValueBytes": 8192, + "maxAttributesPerSpan": 256, + "maxEventsPerSpan": 128, + "maxLinksPerSpan": 128, + "maxStatementBytes": 4096 + }, + "store": { + "rawResourceAttributes": true, + "rawSpanAttributes": true, + "rawEvents": true, + "rawLinks": true + }, + "dbStatementMode": "drop" + } +} +``` + +Supported `dbStatementMode` values: + +- `drop` stores `db.statement` as `null` +- `raw` stores the statement after normal attribute value truncation + +There is no `redact_literals` mode in the shipped implementation. + +## Canonical Span Envelope + +Each stored span is normalized to a stable JSON object with: + +- identity fields: `traceId`, `spanId`, `parentSpanId`, `identity.spanKey` +- timestamps: `timestamp`, `endTimestamp`, `startUnixNano`, `endUnixNano`, + `duration` +- span semantics: `name`, `kind`, `status`, `traceState`, `traceFlags` +- resource fields: `service`, `serviceNamespace`, `serviceInstanceId`, + `environment`, `version`, `region` +- correlation field: `requestId` +- semantic convention groups: `http`, `db`, `rpc`, `messaging`, `error` +- raw retained data: `resource.attributes`, `instrumentationScope.attributes`, + `attributes`, `events`, `links` +- derived `eventNames` for searchable span event names +- dropped/limited counters and `redaction.keys` + +Trace IDs must be 32-character lowercase hex strings and span IDs must be +16-character lowercase hex strings. All-zero trace and span IDs are rejected. + +Nanosecond timestamps are preserved as decimal strings. `timestamp`, +`endTimestamp`, and `duration` are derived for search, sort, aggregation, and +UI rendering. + +## OTLP Ingestion + +Two endpoints accept OTLP trace exports. + +Default endpoint: + +```http +POST /v1/traces +Content-Type: application/x-protobuf +``` + +or: + +```http +POST /v1/traces +Content-Type: application/json +``` + +`/v1/traces` writes to `DS_OTLP_TRACES_STREAM`. If +`DS_OTLP_AUTO_CREATE=true` and the stream does not exist, the server creates an +`application/json` stream, installs `otel-traces`, uploads the schema/profile +metadata, publishes a manifest, and then appends accepted spans. + +Explicit stream endpoint: + +```http +POST /v1/stream/app-traces/_otlp/v1/traces +Content-Type: application/x-protobuf +``` + +The explicit endpoint requires the target stream to already have the +`otel-traces` profile. + +Both endpoints support: + +- `application/x-protobuf` +- `application/json` +- `Content-Encoding: gzip` + +Malformed payloads return `400`. Unsupported media types or encodings return +`415`. A successful full acceptance returns OTLP success. Partial acceptance +returns HTTP `200` with OTLP `partialSuccess` / `partial_success` information; +clients should not retry rejected spans from that response. + +## JSON Appends + +Normal JSON appends to an `otel-traces` stream are also normalized by the +profile. This is intended for tests, local tools, and direct integrations that +already have canonical span-shaped JSON. + +```http +POST /v1/stream/app-traces +Content-Type: application/json + +{ + "traceId": "5b8efff798038103d269b633813fc60c", + "spanId": "086e83747d0e381e", + "name": "GET /checkout", + "kind": "server", + "startUnixNano": "1772020800000000000", + "endUnixNano": "1772020800123000000", + "resource": { "attributes": { "service.name": "checkout" } }, + "attributes": { "request.id": "req_123" } +} +``` + +The stored record is the canonical span envelope, not the input object. + +## Search Defaults + +The profile installs schema-owned search fields including: + +- exact/prefix: `traceId`, `spanId`, `parentSpanId`, `requestId`, `service`, + `environment`, `name`, `kind`, `status.code`, HTTP/DB/RPC/messaging fields +- typed columns: `timestamp`, `endTimestamp`, `duration`, + `http.statusCode`, `error.isError` +- text: `status.message`, `error.message`, `error.stacktrace`, + `db.statement`, `events.name` + +Aliases include: + +- `trace` -> `traceId` +- `span` -> `spanId` +- `parent` -> `parentSpanId` +- `req` -> `requestId` +- `svc` -> `service` +- `op` -> `name` +- `route` -> `http.route` +- `method` -> `http.method` +- `status` -> `http.statusCode` +- `error` -> `error.isError` +- `db` -> `db.system` +- `duration_ms` -> `duration` +- `time` / `ts` -> `timestamp` + +Default rollups: + +- `spans` over `service`, `kind`, and `status.code` +- `http_server` over `service`, `http.method`, `http.route`, and + `http.statusCode` + +Each rollup includes count and `duration` summary measures. Filtered count +measures are not part of the shipped rollup schema. + +## Request Correlation + +Applications should copy the evlog request ID into the active root/server span +as `request.id`. The profile also checks these request ID attributes by +default: + +- `request.id` +- `http.request_id` +- `http.request.header.x_request_id` +- `http.request.header.x-request-id` +- `http.request.header.x_correlation_id` +- `http.request.header.x-correlation-id` +- `correlation.id` + +The cross-stream request view is implemented by +[`request-observability.md`](./request-observability.md), not by merging +`evlog` and spans into one profile. + +## Security And Privacy + +Redaction is case-insensitive and happens before durable append. It applies to: + +- resource attributes +- span attributes +- instrumentation scope attributes +- span event attributes +- link attributes + +Default sensitive keys include `password`, `token`, `secret`, `authorization`, +`cookie`, `apikey`, `api_key`, `set-cookie`, and `x-api-key`. + +Use separate streams for data with different retention, access, or tenant +boundaries. Do not rely on UI filters for tenant isolation. diff --git a/docs/request-observability.md b/docs/request-observability.md new file mode 100644 index 0000000..9824e24 --- /dev/null +++ b/docs/request-observability.md @@ -0,0 +1,268 @@ +# Request Observability API + +This document describes the backend API that correlates `evlog` request events +with `otel-traces` spans. It is a query layer over streams, not a stream +profile and not a custom UI. + +The API is designed for request detail views, trace waterfall renderers, and +debugging tools that need one response containing: + +- the best evlog request event +- matching trace spans +- a parent/child trace tree +- service edges +- errors +- a combined timeline +- search coverage and partial-result warnings + +## Endpoint + +```http +POST /v1/observe/request +Content-Type: application/json +``` + +Request: + +```json +{ + "streams": { + "events": "app-events", + "traces": "app-traces" + }, + "lookup": { + "requestId": "req_123" + }, + "time": { + "from": "2026-03-27T00:00:00.000Z", + "to": "2026-03-28T00:00:00.000Z", + "paddingMs": 5000 + }, + "include": { + "events": true, + "trace": true, + "timeline": true, + "raw": false + }, + "limits": { + "events": 100, + "spans": 5000 + } +} +``` + +`lookup` must contain exactly one of: + +- `requestId` +- `traceId` +- `spanId` + +`streams.events` is required when `include.events` is true. `streams.traces` is +required when `include.trace` is true. + +Limits: + +- `limits.events`: 1 to 500, default 100 +- `limits.spans`: 1 to 10000, default 5000 + +The implementation pages internally through `_search` because `_search` pages +are capped at 500 hits. + +## Lookup Behavior + +### Request ID + +For `{ "lookup": { "requestId": "req_123" } }`: + +1. Search the evlog stream with `req:"req_123"`. +2. Extract candidate `traceId`s from matching evlog events. +3. If trace IDs exist, search the trace stream by each `trace:"..."`. +4. If no trace ID is found from evlog, search the trace stream with + `req:"req_123"`. +5. Build the trace tree and combined timeline. + +### Trace ID + +For `{ "lookup": { "traceId": "..." } }`: + +1. Search the trace stream with `trace:"..."`. +2. Search the evlog stream with `trace:"..."`. +3. Build the same response shape. + +### Span ID + +For `{ "lookup": { "spanId": "..." } }`: + +1. Search the trace stream with `span:"..."`. +2. Extract the trace ID from matching span records. +3. Search the full trace by `trace:"..."`. +4. Search the evlog stream by the resolved trace ID. + +If a span lookup cannot resolve a trace ID, the API falls back to searching the +event stream by `span:"..."`. + +## Response Shape + +The response has this top-level shape: + +```json +{ + "lookup": { + "requestId": "req_123", + "traceId": "5b8efff798038103d269b633813fc60c", + "spanId": null + }, + "summary": {}, + "evlog": {}, + "trace": {}, + "timeline": [], + "coverage": { + "events": {}, + "traces": {}, + "warnings": [] + } +} +``` + +`summary` is a best-effort request header synthesized from evlog first and +root/server span data second. It includes method/path/route/status, service, +environment, duration, start/end time, level, and error fields. + +`evlog` is null when `include.events=false`. Otherwise it contains: + +- `stream` +- `primary` +- `matches` + +The primary event prefers a match with the selected trace ID, otherwise the +first event result. + +`trace` is null when `include.trace=false`. Otherwise it contains: + +- `stream` +- `traceId` +- `rootSpanId` +- `spans` +- `tree` +- `serviceMap` +- `criticalPath` +- `errors` +- `partial` +- `missingParents` +- `duplicateSpans` + +Spans are deduplicated by `traceId:spanId` for the trace view. The underlying +stream remains append-only and keeps duplicate deliveries. + +## Trace Tree + +Tree nodes contain: + +```json +{ + "spanId": "086e83747d0e381e", + "parentSpanId": null, + "children": [], + "depth": 0, + "service": "checkout", + "name": "GET /checkout", + "kind": "server", + "startTime": "2026-03-27T10:00:00.000Z", + "endTime": "2026-03-27T10:00:00.260Z", + "duration": 260, + "statusCode": "error" +} +``` + +Parents are linked by `parentSpanId`. Spans without a parent, or whose parent +was not found in the returned span set, become roots. Missing parent span IDs +are reported in `trace.missingParents`, and `trace.partial` becomes true. + +Children are sorted by start time, then duration descending, then name. + +## Timeline + +The timeline merges profile-owned timeline items: + +- `evlog.event` +- `otel.span.start` +- `otel.span.end` +- `otel.span.event` +- `otel.exception` + +Each item includes time, title, service, severity, IDs, source stream/profile, +and source data. + +This response is intended for custom UI rendering, but no custom UI is shipped +with this feature. + +## Coverage + +`coverage.events` and `coverage.traces` summarize the `_search` calls used by +the request: + +- `searched` +- `complete` +- `timed_out` +- `limit_reached` +- `hits` +- `total` +- `index_families_used` +- `scanned_tail_docs` +- `scanned_segments` +- `possible_missing_events_upper_bound` + +Warnings are emitted for missing evlog events, missing trace spans, hit limits, +incomplete search coverage, and missing parent spans. A UI should surface these +warnings instead of presenting an incomplete response as authoritative. + +## Examples + +Lookup by request ID: + +```json +{ + "streams": { + "events": "app-events", + "traces": "app-traces" + }, + "lookup": { + "requestId": "req_123" + } +} +``` + +Lookup by trace ID: + +```json +{ + "streams": { + "events": "app-events", + "traces": "app-traces" + }, + "lookup": { + "traceId": "5b8efff798038103d269b633813fc60c" + }, + "limits": { + "spans": 5000 + } +} +``` + +Lookup by span ID without event data: + +```json +{ + "streams": { + "traces": "app-traces" + }, + "lookup": { + "spanId": "086e83747d0e381e" + }, + "include": { + "events": false, + "trace": true, + "timeline": true + } +} +``` diff --git a/docs/schemas.md b/docs/schemas.md index 9793f22..d7e18a0 100644 --- a/docs/schemas.md +++ b/docs/schemas.md @@ -77,11 +77,15 @@ Notes: fields for schema updates, routing-key updates, and search updates. - Profile-owned live/touch configuration belongs in `/_profile`, not `/_schema`. -One profile-owned exception exists in the current shipped system: +Profile-owned exceptions exist for built-in canonical profiles: - installing the `evlog` profile auto-installs its canonical schema version `1` and default `search` registry, so the default evlog path does not require a separate manual `/_schema` call +- installing the `metrics` profile auto-installs its canonical metrics schema + version `1` and default `search`/`search.rollups` registry +- installing the `otel-traces` profile auto-installs its canonical span schema + version `1` and default `search`/`search.rollups` registry Accepted POST shapes: @@ -168,8 +172,10 @@ If `routingKey` is configured: Schemas do not define: -- whether a stream is `generic`, `evlog`, `metrics`, or `state-protocol` +- whether a stream is `generic`, `evlog`, `metrics`, `otel-traces`, or + `state-protocol` - profile-owned endpoints or runtime hooks +- OTLP trace ingestion or cross-stream request correlation Schemas do define payload-owned field extraction, including routing keys and schema-owned search field declarations and rollups. diff --git a/docs/stream-profiles.md b/docs/stream-profiles.md index 5807ca5..e7822c8 100644 --- a/docs/stream-profiles.md +++ b/docs/stream-profiles.md @@ -45,6 +45,7 @@ Current built-ins: - `evlog` - `generic` - `metrics` +- `otel-traces` - `state-protocol` ## `generic` @@ -96,6 +97,26 @@ It means: See [profile-metrics.md](./profile-metrics.md) and [metrics.md](./metrics.md) for the detailed contract. +## `otel-traces` + +`otel-traces` is the built-in profile for OpenTelemetry trace spans. + +It means: + +- the stream content type must be `application/json` +- JSON appends are normalized into the canonical span envelope +- OTLP trace exports are accepted through `POST /v1/traces` and + `POST /v1/stream/{name}/_otlp/v1/traces` +- installing the profile auto-installs the canonical span schema registry, + search fields, and default rollups +- the canonical routing key is `traceId` +- request correlation with `evlog` is provided by the cross-stream + `/v1/observe/request` API, not by mixing spans into `evlog` + +See [profile-otel-traces.md](./profile-otel-traces.md) and +[request-observability.md](./request-observability.md) for the detailed +contract. + ## `state-protocol` `state-protocol` is the built-in profile for streams that carry State Protocol @@ -126,6 +147,8 @@ Examples: - `/touch/*` availability: profile - touch configuration: profile - metrics canonicalization and `.mblk` enablement: profile +- OpenTelemetry span normalization and OTLP trace ingestion: profile +- cross-stream request lookup and timeline construction: query/API layer - JSON validation: schema - version boundaries and lenses: schema - routing-key extraction: schema @@ -189,6 +212,7 @@ What does **not** belong in `/_schema`: - State Protocol runtime behavior - evlog envelope normalization or redaction - metrics interval normalization and `.mblk` enablement +- OpenTelemetry span normalization or OTLP trace ingestion ## Supported API Rules From 394deaa2047b2bed1186d6fe936d5023c70eb60d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=B8ren=20Bramer=20Schmidt?= Date: Thu, 11 Jun 2026 20:45:37 +0700 Subject: [PATCH 04/12] Add observe request scale coverage --- test/observe_request.test.ts | 67 ++++++++++++++++++++++++++++++++++++ 1 file changed, 67 insertions(+) diff --git a/test/observe_request.test.ts b/test/observe_request.test.ts index f1effa9..e0c8d14 100644 --- a/test/observe_request.test.ts +++ b/test/observe_request.test.ts @@ -258,4 +258,71 @@ describe("observe request API", () => { rmSync(root, { recursive: true, force: true }); } }); + + test("paginates trace lookup across more than one _search page", async () => { + const root = mkdtempSync(join(tmpdir(), "ds-observe-scale-")); + const { app } = createProfileTestApp(root, { searchWalOverlayQuietPeriodMs: 0 }); + try { + await createJsonStream(app, "scale-traces", { kind: "otel-traces" }); + const rootSpanId = "0000000000000001"; + const spans = [ + span({ + spanId: rootSpanId, + service: "api", + name: "GET /bulk", + kind: "server", + start: "1772020800000000000", + end: "1772020802000000000", + statusCode: "ok", + requestId: "req_scale_1", + }), + ]; + for (let i = 2; i <= 1200; i++) { + const spanId = i.toString(16).padStart(16, "0"); + spans.push( + span({ + spanId, + parentSpanId: rootSpanId, + service: i % 2 === 0 ? "api" : "worker", + name: `child ${i}`, + start: String(1772020800000000000n + BigInt(i) * 1_000_000n), + end: String(1772020800000000000n + BigInt(i + 1) * 1_000_000n), + statusCode: "ok", + }) + ); + } + + const appendRes = await app.fetch( + new Request("http://local/v1/stream/scale-traces", { + method: "POST", + headers: { "content-type": "application/json" }, + body: JSON.stringify(spans), + }) + ); + expect([200, 204]).toContain(appendRes.status); + + const res = await fetchJsonApp(app, "http://local/v1/observe/request", { + method: "POST", + headers: { "content-type": "application/json" }, + body: JSON.stringify({ + streams: { traces: "scale-traces" }, + lookup: { traceId: TRACE_ID }, + include: { events: false, trace: true, timeline: false }, + limits: { spans: 1300 }, + }), + }); + + expect(res.status).toBe(200); + expect(res.body.trace.spans).toHaveLength(1200); + expect(res.body.trace.tree).toHaveLength(1); + expect(res.body.trace.tree[0].children).toHaveLength(1199); + expect(res.body.trace.partial).toBe(false); + expect(res.body.coverage.traces.hits).toBe(1200); + expect(res.body.coverage.traces.limit_reached).toBe(false); + expect(res.body.coverage.warnings).toEqual([]); + } finally { + app.close(); + rmSync(root, { recursive: true, force: true }); + } + }); }); From dd6c8c299a3691283afe58b286d753509fd86c4a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=B8ren=20Bramer=20Schmidt?= Date: Thu, 11 Jun 2026 20:48:24 +0700 Subject: [PATCH 05/12] Await bootstrap test app shutdown --- test/bootstrap_from_r2.test.ts | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/test/bootstrap_from_r2.test.ts b/test/bootstrap_from_r2.test.ts index a556820..adb2682 100644 --- a/test/bootstrap_from_r2.test.ts +++ b/test/bootstrap_from_r2.test.ts @@ -212,7 +212,7 @@ describe("bootstrap from R2", () => { expectedPublishedLogicalSize = sourceRow && sourceRow.logical_size_bytes > unpublishedTailBytes ? sourceRow.logical_size_bytes - unpublishedTailBytes : 0n; } finally { - app.close(); + await app.close(); } const cfg2 = makeConfig(root2, { @@ -367,7 +367,7 @@ describe("bootstrap from R2", () => { const detailsBody = await detailsRes.json(); expect(detailsBody.stream.total_size_bytes).toBe(expectedPublishedLogicalSize.toString()); } finally { - app2.close(); + await app2.close(); rmSync(root, { recursive: true, force: true }); rmSync(root2, { recursive: true, force: true }); } @@ -407,7 +407,7 @@ describe("bootstrap from R2", () => { ); expect(delRes.status).toBe(204); } finally { - app.close(); + await app.close(); } const cfg2 = makeConfig(root2, { @@ -429,7 +429,7 @@ describe("bootstrap from R2", () => { const list = (await listRes.json()) as Array<{ name: string }>; expect(list.find((x) => x.name === stream)).toBeUndefined(); } finally { - app2.close(); + await app2.close(); rmSync(root, { recursive: true, force: true }); rmSync(root2, { recursive: true, force: true }); } @@ -489,7 +489,7 @@ describe("bootstrap from R2", () => { sourceRow && sourceRow.logical_size_bytes > unpublishedTailBytes ? sourceRow.logical_size_bytes - unpublishedTailBytes : 0n; expect(expectedPublishedLogicalSize).toBeGreaterThan(0n); } finally { - app.close(); + await app.close(); } const manifestKey = manifestObjectKey(streamHash16Hex(stream)); @@ -532,7 +532,7 @@ describe("bootstrap from R2", () => { const detailsBody = await detailsRes.json(); expect(detailsBody.stream.total_size_bytes).toBe(expectedPublishedLogicalSize.toString()); } finally { - app2.close(); + await app2.close(); rmSync(root, { recursive: true, force: true }); rmSync(root2, { recursive: true, force: true }); } From 40bbd35f6758ef2ddf4c4f5e35bb8162cc000d33 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=B8ren=20Bramer=20Schmidt?= Date: Thu, 11 Jun 2026 21:06:01 +0700 Subject: [PATCH 06/12] Stabilize app shutdown in tests --- src/app.ts | 12 +++--- src/app_core.ts | 2 +- src/app_local.ts | 2 +- src/index/indexer.ts | 45 ++++++++++++++++---- src/index/lexicon_indexer.ts | 43 ++++++++++++++++--- src/index/secondary_indexer.ts | 47 ++++++++++++++++++--- src/search/companion_manager.ts | 46 +++++++++++++++++--- test/aggregate_http.test.ts | 12 +++--- test/assumptions.test.ts | 40 +++++++++--------- test/chaos_restart_bootstrap.test.ts | 8 ++-- test/companion_backfill.test.ts | 22 +++++----- test/compute/demo_site.test.ts | 4 +- test/conformance.test.ts | 4 +- test/exact_index_backfill.test.ts | 6 +-- test/fault_injection.test.ts | 4 +- test/gharchive_demo.test.ts | 22 +++++----- test/http_behavior.test.ts | 14 +++--- test/index_compaction.test.ts | 2 +- test/index_runs.test.ts | 2 +- test/ingest_busy_retry.test.ts | 2 +- test/ingest_queue_drain.test.ts | 2 +- test/large_index_filter.test.ts | 2 +- test/live_stream2_read_perf.test.ts | 2 +- test/observe_request.test.ts | 8 ++-- test/poison_stream.test.ts | 2 +- test/profile_evlog.test.ts | 14 +++--- test/profile_generic.test.ts | 10 ++--- test/profile_metrics.test.ts | 8 ++-- test/profile_otel_traces.test.ts | 12 +++--- test/profile_state_protocol.test.ts | 16 +++---- test/queue_limits.test.ts | 2 +- test/restart.test.ts | 4 +- test/routing_key_lexicon.test.ts | 20 ++++----- test/schema_evolution.test.ts | 2 +- test/search_http.test.ts | 16 +++---- test/search_perf_repro.test.ts | 16 +++---- test/secondary_indexer.test.ts | 4 +- test/segment_meta.test.ts | 2 +- test/segment_recovery.test.ts | 4 +- test/touch_memory_journal.test.ts | 22 +++++----- test/touch_processor.test.ts | 12 +++--- test/touch_wait_timeout_reliability.test.ts | 2 +- 42 files changed, 324 insertions(+), 197 deletions(-) diff --git a/src/app.ts b/src/app.ts index 2041ee0..9e3898e 100644 --- a/src/app.ts +++ b/src/app.ts @@ -40,11 +40,13 @@ class CombinedIndexController implements StreamIndexLookup { this.lexiconIndex.start(); } - stop(): void { - this.routingIndex.stop(); - this.secondaryIndex.stop(); - this.companionIndex.stop(); - this.lexiconIndex.stop(); + async stop(): Promise { + await Promise.all([ + this.routingIndex.stop(), + this.secondaryIndex.stop(), + this.companionIndex.stop(), + this.lexiconIndex.stop(), + ]); } enqueue(stream: string): void { diff --git a/src/app_core.ts b/src/app_core.ts index 427891e..0b3d749 100644 --- a/src/app_core.ts +++ b/src/app_core.ts @@ -3301,7 +3301,7 @@ export function createAppCore(cfg: Config, opts: CreateAppCoreOptions): App { await touch.stop(); await segmenter.stop(true); uploader.stop(true); - indexer?.stop(); + await indexer?.stop(); metricsEmitter.stop(); expirySweeper.stop(); streamSizeReconciler.stop(); diff --git a/src/app_local.ts b/src/app_local.ts index 8b7927c..f1578eb 100644 --- a/src/app_local.ts +++ b/src/app_local.ts @@ -33,7 +33,7 @@ class LocalIndexLookup implements StreamIndexLookup { start(): void {} - stop(): void {} + async stop(): Promise {} enqueue(_stream: string): void {} diff --git a/src/index/indexer.ts b/src/index/indexer.ts index c56b422..a775b8c 100644 --- a/src/index/indexer.ts +++ b/src/index/indexer.ts @@ -37,7 +37,7 @@ export type CompanionSectionLookupStats = { export type StreamIndexLookup = { start(): void; - stop(): void; + stop(): Promise; enqueue(stream: string): void; candidateSegmentsForRoutingKey(stream: string, keyBytes: Uint8Array): Promise; candidateSegmentsForSecondaryIndex(stream: string, indexName: string, keyBytes: Uint8Array): Promise; @@ -95,6 +95,8 @@ export class IndexManager { private timer: any | null = null; private wakeTimer: any | null = null; private running = false; + private stopped = false; + private tickPromise: Promise | null = null; private readonly publishManifest?: (stream: string) => Promise; private readonly onMetadataChanged?: (stream: string) => void; private readonly memorySampler?: RuntimeMemorySampler; @@ -149,20 +151,24 @@ export class IndexManager { start(): void { if (this.span <= 0) return; if (this.timer) return; + this.stopped = false; this.timer = setInterval(() => { - void this.tick(); + if (!this.stopped) this.runTick(); }, this.cfg.indexCheckIntervalMs); } - stop(): void { + async stop(): Promise { + this.stopped = true; if (this.timer) clearInterval(this.timer); if (this.wakeTimer) clearTimeout(this.wakeTimer); this.timer = null; this.wakeTimer = null; + while (this.tickPromise) await this.tickPromise; + this.firstQueuedAtMs = null; } enqueue(stream: string): void { - if (this.span <= 0) return; + if (this.span <= 0 || this.stopped) return; if (this.firstQueuedAtMs == null) this.firstQueuedAtMs = Date.now(); this.queue.add(stream); if (shouldDeferEnqueuedIndexWork(this.cfg)) { @@ -173,9 +179,10 @@ export class IndexManager { } private scheduleTick(delayMs = 0): void { - if (!this.timer || this.wakeTimer) return; + if (this.stopped || !this.timer || this.wakeTimer) return; this.wakeTimer = setTimeout(() => { this.wakeTimer = null; + if (this.stopped) return; if ( shouldWaitForLowMemoryIndexQuiet( this.cfg, @@ -190,11 +197,32 @@ export class IndexManager { this.scheduleTick(250); return; } - void this.tick(); + this.runTick(); }, delayMs); (this.wakeTimer as { unref?: () => void }).unref?.(); } + private runTick(): void { + if (this.tickPromise) return; + const promise = this.tick() + .catch((e) => { + const lower = errorMessage(e).toLowerCase(); + const shutdownError = + lower.includes("database has closed") || + lower.includes("closed database") || + lower.includes("statement has finalized") || + lower.includes("disk i/o error"); + if (!this.stopped || !shutdownError) { + // eslint-disable-next-line no-console + console.error("index tick failed", e); + } + }) + .finally(() => { + if (this.tickPromise === promise) this.tickPromise = null; + }); + this.tickPromise = promise; + } + async candidateSegmentsForRoutingKey(stream: string, keyBytes: Uint8Array): Promise { if (this.span <= 0) return null; if (!this.isRoutingConfigured(stream)) return null; @@ -277,7 +305,7 @@ export class IndexManager { } private async tick(): Promise { - if (this.running) return; + if (this.running || this.stopped) return; this.running = true; try { if (this.metrics) { @@ -287,6 +315,7 @@ export class IndexManager { const streams = Array.from(this.queue); this.queue.clear(); for (const stream of streams) { + if (this.stopped) break; if (!this.isRoutingConfigured(stream)) { const hadRoutingState = !!this.db.getIndexState(stream) || this.db.listIndexRunsAll(stream).length > 0; if (hadRoutingState) { @@ -331,7 +360,7 @@ export class IndexManager { this.recordCacheStats(); } finally { this.running = false; - if (this.queue.size > 0) { + if (!this.stopped && this.queue.size > 0) { if (this.firstQueuedAtMs == null) this.firstQueuedAtMs = Date.now(); this.scheduleTick(shouldDeferEnqueuedIndexWork(this.cfg) ? LOW_MEMORY_INDEX_ENQUEUE_QUIET_MS : 0); } else { diff --git a/src/index/lexicon_indexer.ts b/src/index/lexicon_indexer.ts index 1ff732a..f744bc1 100644 --- a/src/index/lexicon_indexer.ts +++ b/src/index/lexicon_indexer.ts @@ -95,6 +95,8 @@ export class LexiconIndexManager { private timer: any | null = null; private wakeTimer: any | null = null; private running = false; + private stopped = false; + private tickPromise: Promise | null = null; private firstQueuedAtMs: number | null = null; constructor( @@ -131,21 +133,25 @@ export class LexiconIndexManager { start(): void { if (this.span <= 0 || this.timer) return; + this.stopped = false; this.timer = setInterval(() => { - void this.tick(); + if (!this.stopped) this.runTick(); }, this.cfg.indexCheckIntervalMs); } - stop(): void { + async stop(): Promise { + this.stopped = true; if (this.timer) clearInterval(this.timer); if (this.wakeTimer) clearTimeout(this.wakeTimer); this.timer = null; this.wakeTimer = null; + while (this.tickPromise) await this.tickPromise; + this.firstQueuedAtMs = null; this.fileCache?.clearMapped(); } enqueue(stream: string): void { - if (this.span <= 0) return; + if (this.span <= 0 || this.stopped) return; if (this.firstQueuedAtMs == null) this.firstQueuedAtMs = Date.now(); this.queue.add(stream); if (shouldDeferEnqueuedIndexWork(this.cfg)) { @@ -156,9 +162,10 @@ export class LexiconIndexManager { } private scheduleTick(delayMs = 0): void { - if (!this.timer || this.wakeTimer) return; + if (this.stopped || !this.timer || this.wakeTimer) return; this.wakeTimer = setTimeout(() => { this.wakeTimer = null; + if (this.stopped) return; if ( shouldWaitForLowMemoryIndexQuiet( this.cfg, @@ -173,11 +180,32 @@ export class LexiconIndexManager { this.scheduleTick(250); return; } - void this.tick(); + this.runTick(); }, delayMs); (this.wakeTimer as { unref?: () => void }).unref?.(); } + private runTick(): void { + if (this.tickPromise) return; + const promise = this.tick() + .catch((e) => { + const lower = errorMessage(e).toLowerCase(); + const shutdownError = + lower.includes("database has closed") || + lower.includes("closed database") || + lower.includes("statement has finalized") || + lower.includes("disk i/o error"); + if (!this.stopped || !shutdownError) { + // eslint-disable-next-line no-console + console.error("lexicon tick failed", e); + } + }) + .finally(() => { + if (this.tickPromise === promise) this.tickPromise = null; + }); + this.tickPromise = promise; + } + getLocalCacheBytes(stream: string): number { return this.fileCache?.bytesForObjectKeyPrefix(`streams/${streamHash16Hex(stream)}/lexicon/`) ?? 0; } @@ -247,12 +275,13 @@ export class LexiconIndexManager { } private async tick(): Promise { - if (this.running) return; + if (this.running || this.stopped) return; this.running = true; try { const streams = Array.from(this.queue); this.queue.clear(); for (const stream of streams) { + if (this.stopped) break; if (!this.isRoutingLexiconConfigured(stream)) { const hadState = this.db.getLexiconIndexState(stream, ROUTING_KEY_SOURCE_KIND, ROUTING_KEY_SOURCE_NAME) != null || @@ -287,7 +316,7 @@ export class LexiconIndexManager { } } finally { this.running = false; - if (this.queue.size > 0) { + if (!this.stopped && this.queue.size > 0) { if (this.firstQueuedAtMs == null) this.firstQueuedAtMs = Date.now(); this.scheduleTick(shouldDeferEnqueuedIndexWork(this.cfg) ? LOW_MEMORY_INDEX_ENQUEUE_QUIET_MS : 0); } else { diff --git a/src/index/secondary_indexer.ts b/src/index/secondary_indexer.ts index de7f3d6..249454a 100644 --- a/src/index/secondary_indexer.ts +++ b/src/index/secondary_indexer.ts @@ -53,6 +53,10 @@ function binarySearch(values: bigint[], needle: bigint): number { return -1; } +function errorMessage(e: unknown): string { + return String((e as any)?.message ?? e); +} + const PAYLOAD_DECODER = new TextDecoder(); const TERM_ENCODER = new TextEncoder(); export class SecondaryIndexManager { @@ -77,6 +81,8 @@ export class SecondaryIndexManager { private timer: any | null = null; private wakeTimer: any | null = null; private running = false; + private stopped = false; + private tickPromise: Promise | null = null; private readonly publishManifest?: (stream: string) => Promise; private readonly onMetadataChanged?: (stream: string) => void; private readonly memorySampler?: RuntimeMemorySampler; @@ -131,21 +137,25 @@ export class SecondaryIndexManager { start(): void { if (this.span <= 0) return; if (this.timer) return; + this.stopped = false; this.timer = setInterval(() => { - void this.tick(); + if (!this.stopped) this.runTick(); }, this.cfg.indexCheckIntervalMs); } - stop(): void { + async stop(): Promise { + this.stopped = true; if (this.timer) clearInterval(this.timer); if (this.wakeTimer) clearTimeout(this.wakeTimer); this.timer = null; this.wakeTimer = null; + while (this.tickPromise) await this.tickPromise; this.streamIdleTicks.clear(); + this.firstQueuedAtMs = null; } enqueue(stream: string): void { - if (this.span <= 0) return; + if (this.span <= 0 || this.stopped) return; if (this.firstQueuedAtMs == null) this.firstQueuedAtMs = Date.now(); this.queue.add(stream); if (shouldDeferEnqueuedIndexWork(this.cfg)) { @@ -156,9 +166,10 @@ export class SecondaryIndexManager { } private scheduleTick(delayMs = 0): void { - if (!this.timer || this.wakeTimer) return; + if (this.stopped || !this.timer || this.wakeTimer) return; this.wakeTimer = setTimeout(() => { this.wakeTimer = null; + if (this.stopped) return; if ( shouldWaitForLowMemoryIndexQuiet( this.cfg, @@ -173,11 +184,32 @@ export class SecondaryIndexManager { this.scheduleTick(250); return; } - void this.tick(); + this.runTick(); }, delayMs); (this.wakeTimer as { unref?: () => void }).unref?.(); } + private runTick(): void { + if (this.tickPromise) return; + const promise = this.tick() + .catch((e) => { + const lower = errorMessage(e).toLowerCase(); + const shutdownError = + lower.includes("database has closed") || + lower.includes("closed database") || + lower.includes("statement has finalized") || + lower.includes("disk i/o error"); + if (!this.stopped || !shutdownError) { + // eslint-disable-next-line no-console + console.error("secondary index tick failed", e); + } + }) + .finally(() => { + if (this.tickPromise === promise) this.tickPromise = null; + }); + this.tickPromise = promise; + } + async candidateSegmentsForSecondaryIndex( stream: string, indexName: string, @@ -250,12 +282,13 @@ export class SecondaryIndexManager { } private async tick(): Promise { - if (this.running) return; + if (this.running || this.stopped) return; this.running = true; try { const streams = Array.from(this.queue); this.queue.clear(); for (const stream of streams) { + if (this.stopped) break; const regRes = this.registry.getRegistryResult(stream); if (Result.isError(regRes)) continue; if (this.shouldPauseExactBackgroundWork(stream)) { @@ -305,7 +338,7 @@ export class SecondaryIndexManager { } } finally { this.running = false; - if (this.queue.size > 0) { + if (!this.stopped && this.queue.size > 0) { if (this.firstQueuedAtMs == null) this.firstQueuedAtMs = Date.now(); this.scheduleTick(shouldDeferEnqueuedIndexWork(this.cfg) ? LOW_MEMORY_INDEX_ENQUEUE_QUIET_MS : 0); } else { diff --git a/src/search/companion_manager.ts b/src/search/companion_manager.ts index 956717d..4405db9 100644 --- a/src/search/companion_manager.ts +++ b/src/search/companion_manager.ts @@ -66,6 +66,10 @@ function invalidCompanionBuild(message: string): Result | null = null; private firstQueuedAtMs: number | null = null; constructor( @@ -255,20 +261,25 @@ export class SearchCompanionManager { start(): void { if (this.timer) return; + this.stopped = false; this.timer = setInterval(() => { - void this.tick(); + if (!this.stopped) this.runTick(); }, this.cfg.indexCheckIntervalMs); } - stop(): void { + async stop(): Promise { + this.stopped = true; if (this.timer) clearInterval(this.timer); if (this.wakeTimer) clearTimeout(this.wakeTimer); this.timer = null; this.wakeTimer = null; + while (this.tickPromise) await this.tickPromise; + this.firstQueuedAtMs = null; this.fileCache.clearMapped(); } enqueue(stream: string): void { + if (this.stopped) return; if (this.firstQueuedAtMs == null) this.firstQueuedAtMs = Date.now(); this.queue.add(stream); if (shouldDeferEnqueuedIndexWork(this.cfg)) { @@ -279,9 +290,10 @@ export class SearchCompanionManager { } private scheduleTick(delayMs = 0): void { - if (!this.timer || this.wakeTimer) return; + if (this.stopped || !this.timer || this.wakeTimer) return; this.wakeTimer = setTimeout(() => { this.wakeTimer = null; + if (this.stopped) return; if ( shouldWaitForLowMemoryIndexQuiet( this.cfg, @@ -296,11 +308,32 @@ export class SearchCompanionManager { this.scheduleTick(250); return; } - void this.tick(); + this.runTick(); }, delayMs); (this.wakeTimer as { unref?: () => void }).unref?.(); } + private runTick(): void { + if (this.tickPromise) return; + const promise = this.tick() + .catch((e) => { + const lower = errorMessage(e).toLowerCase(); + const shutdownError = + lower.includes("database has closed") || + lower.includes("closed database") || + lower.includes("statement has finalized") || + lower.includes("disk i/o error"); + if (!this.stopped || !shutdownError) { + // eslint-disable-next-line no-console + console.error("bundled companion tick failed", e); + } + }) + .finally(() => { + if (this.tickPromise === promise) this.tickPromise = null; + }); + this.tickPromise = promise; + } + async getColSegmentCompanion(stream: string, segmentIndex: number): Promise { return (await this.getSectionCompanion(stream, segmentIndex, "col")) ?? null; } @@ -499,7 +532,7 @@ export class SearchCompanionManager { } private async tick(): Promise { - if (this.running) return; + if (this.running || this.stopped) return; this.running = true; try { if (this.metrics) { @@ -509,6 +542,7 @@ export class SearchCompanionManager { const streams = Array.from(new Set([...this.db.listSearchCompanionPlanStreams(), ...this.queue])); this.queue.clear(); for (const stream of streams) { + if (this.stopped) break; try { const buildRes = await this.buildPendingSegmentsResult(stream); if (Result.isError(buildRes)) { @@ -522,7 +556,7 @@ export class SearchCompanionManager { } } finally { this.running = false; - if (this.queue.size > 0) { + if (!this.stopped && this.queue.size > 0) { if (this.firstQueuedAtMs == null) this.firstQueuedAtMs = Date.now(); this.scheduleTick(shouldDeferEnqueuedIndexWork(this.cfg) ? LOW_MEMORY_INDEX_ENQUEUE_QUIET_MS : 0); } else { diff --git a/test/aggregate_http.test.ts b/test/aggregate_http.test.ts index b7fdf7c..c63a4fc 100644 --- a/test/aggregate_http.test.ts +++ b/test/aggregate_http.test.ts @@ -283,7 +283,7 @@ describe("_aggregate http", () => { expect(fallbackBody.buckets).toHaveLength(1); expect(fallbackBody.buckets[0].groups[0].measures.requests).toEqual({ count: 1 }); } finally { - app.close(); + await app.close(); rmSync(root, { recursive: true, force: true }); } }, @@ -392,7 +392,7 @@ describe("_aggregate http", () => { }, ]); } finally { - app.close(); + await app.close(); rmSync(root, { recursive: true, force: true }); } }, @@ -463,7 +463,7 @@ describe("_aggregate http", () => { } await waitForAggFamily(app, 20_000); - app.close(); + await app.close(); app = createApp(queryCfg, store); const counters = instrumentAggregateCompanionCounters(app); @@ -486,7 +486,7 @@ describe("_aggregate http", () => { expect(counters.aggCalls).toBe(1); expect(counters.colCalls).toBe(0); } finally { - app.close(); + await app.close(); rmSync(root, { recursive: true, force: true }); } }, @@ -562,7 +562,7 @@ describe("_aggregate http", () => { expect(body.buckets).toHaveLength(1); expect(body.buckets[0].groups[0].measures.requests).toEqual({ count: 2 }); } finally { - app.close(); + await app.close(); rmSync(root, { recursive: true, force: true }); } }, @@ -640,7 +640,7 @@ describe("_aggregate http", () => { expect(body.coverage.possible_missing_events_upper_bound).toBeGreaterThan(0); expect(body.buckets).toEqual([]); } finally { - app.close(); + await app.close(); rmSync(root, { recursive: true, force: true }); } }, diff --git a/test/assumptions.test.ts b/test/assumptions.test.ts index 4e4f197..388bd86 100644 --- a/test/assumptions.test.ts +++ b/test/assumptions.test.ts @@ -104,7 +104,7 @@ describe("assumptions", () => { expect(r.status).toBe(404); server.stop(); - app.close(); + await app.close(); rmSync(root, { recursive: true, force: true }); }); @@ -129,7 +129,7 @@ describe("assumptions", () => { expect(r.status).toBe(404); server.stop(); - app.close(); + await app.close(); rmSync(root, { recursive: true, force: true }); }); @@ -154,7 +154,7 @@ describe("assumptions", () => { expect(arr.length).toBe(2); server.stop(); - app.close(); + await app.close(); rmSync(root, { recursive: true, force: true }); }); @@ -177,7 +177,7 @@ describe("assumptions", () => { expect(schemaRes.status).toBe(200); server.stop(); - app.close(); + await app.close(); rmSync(root, { recursive: true, force: true }); }); @@ -210,7 +210,7 @@ describe("assumptions", () => { expect(schemaRes.status).toBe(400); server.stop(); - app.close(); + await app.close(); rmSync(root, { recursive: true, force: true }); }); @@ -236,7 +236,7 @@ describe("assumptions", () => { expect(schemaRes.status).toBe(200); server.stop(); - app.close(); + await app.close(); rmSync(root, { recursive: true, force: true }); }); @@ -262,7 +262,7 @@ describe("assumptions", () => { expect(schemaRes.status).toBe(400); server.stop(); - app.close(); + await app.close(); rmSync(root, { recursive: true, force: true }); }); @@ -289,7 +289,7 @@ describe("assumptions", () => { expect(r.status).toBe(400); server.stop(); - app.close(); + await app.close(); rmSync(root, { recursive: true, force: true }); }); @@ -309,7 +309,7 @@ describe("assumptions", () => { expect(r.status).toBe(204); server.stop(); - app.close(); + await app.close(); rmSync(root, { recursive: true, force: true }); }); @@ -329,7 +329,7 @@ describe("assumptions", () => { expect(r.status).toBe(400); server.stop(); - app.close(); + await app.close(); rmSync(root, { recursive: true, force: true }); }); @@ -358,7 +358,7 @@ describe("assumptions", () => { expect(r.status).toBe(400); server.stop(); - app.close(); + await app.close(); rmSync(root, { recursive: true, force: true }); }); @@ -379,7 +379,7 @@ describe("assumptions", () => { expect(r.status).toBe(400); server.stop(); - app.close(); + await app.close(); rmSync(root, { recursive: true, force: true }); }); @@ -395,7 +395,7 @@ describe("assumptions", () => { expect(r.status).toBe(400); server.stop(); - app.close(); + await app.close(); rmSync(root, { recursive: true, force: true }); }); @@ -414,7 +414,7 @@ describe("assumptions", () => { expect(r.status).toBe(400); server.stop(); - app.close(); + await app.close(); rmSync(root, { recursive: true, force: true }); }); @@ -450,7 +450,7 @@ describe("assumptions", () => { expect(text).toBe("b"); server.stop(); - app.close(); + await app.close(); rmSync(root, { recursive: true, force: true }); }); @@ -473,7 +473,7 @@ describe("assumptions", () => { if (p.kind === "seq") expect(p.epoch).toBe(0); server.stop(); - app.close(); + await app.close(); rmSync(root, { recursive: true, force: true }); }); @@ -504,7 +504,7 @@ describe("assumptions", () => { expectAccelerationStateCleared(app.deps.db, "del"); server.stop(); - app.close(); + await app.close(); rmSync(root, { recursive: true, force: true }); }); @@ -524,7 +524,7 @@ describe("assumptions", () => { expect(app.deps.db.listLexiconIndexStates("stale")).toHaveLength(1); expect(app.deps.db.getSearchCompanionPlan("stale")).not.toBeNull(); } finally { - app.close(); + await app.close(); } const restarted = createApp(cfg, new MockR2Store()); @@ -534,7 +534,7 @@ describe("assumptions", () => { expect(deletedRow && restarted.deps.db.isDeleted(deletedRow)).toBe(true); expectAccelerationStateCleared(restarted.deps.db, "stale"); } finally { - restarted.close(); + await restarted.close(); rmSync(root, { recursive: true, force: true }); } }); @@ -566,7 +566,7 @@ describe("assumptions", () => { expect(ok.status).toBe(204); server.stop(); - app.close(); + await app.close(); rmSync(root, { recursive: true, force: true }); }); }); diff --git a/test/chaos_restart_bootstrap.test.ts b/test/chaos_restart_bootstrap.test.ts index dfa847c..a627e2a 100644 --- a/test/chaos_restart_bootstrap.test.ts +++ b/test/chaos_restart_bootstrap.test.ts @@ -188,7 +188,7 @@ describe("chaos restart + bootstrap", () => { expected.get(stream)!.push(value); if (rand() < 0.2) { - app.close(); + await app.close(); app = createApp(cfg, chaosStore); } if (rand() < 0.3) await sleep(5); @@ -218,7 +218,7 @@ describe("chaos restart + bootstrap", () => { expect(values).toEqual(expected.get(s)); } - app.close(); + await app.close(); // Snapshot a self-consistent bootstrap source-of-truth from manifests. const { store: bootstrapStore, uploadedCounts } = await snapshotBootstrapStore(chaosStore, streams); @@ -242,11 +242,11 @@ describe("chaos restart + bootstrap", () => { expect(meta).not.toBeNull(); } } finally { - app2.close(); + await app2.close(); } } finally { try { - app.close(); + await app.close(); } catch { // ignore } diff --git a/test/companion_backfill.test.ts b/test/companion_backfill.test.ts index 9425fc1..3e36d84 100644 --- a/test/companion_backfill.test.ts +++ b/test/companion_backfill.test.ts @@ -207,7 +207,7 @@ describe("bundled companions and backfill", () => { expect(app.deps.db.listSearchSegmentCompanions(STREAM)).toHaveLength(0); } finally { - app.close(); + await app.close(); rmSync(root, { recursive: true, force: true }); } }); @@ -275,7 +275,7 @@ describe("bundled companions and backfill", () => { expect((await store.list(`streams/${streamHash}/agg/segments/`)).length).toBe(0); expect((await store.list(`streams/${streamHash}/mblk/segments/`)).length).toBe(0); } finally { - app.close(); + await app.close(); } const pausedCfg = makeConfig(root, { @@ -362,7 +362,7 @@ describe("bundled companions and backfill", () => { expect(filterBody).toHaveLength(1); expect(filterBody[0].status).toBe(505); } finally { - app.close(); + await app.close(); } app = createApp(buildCfg, store); @@ -396,7 +396,7 @@ describe("bundled companions and backfill", () => { expect(searchBody.hits.length).toBeGreaterThan(0); expect(searchBody.coverage.index_families_used).toEqual(expect.arrayContaining(["fts"])); } finally { - app.close(); + await app.close(); rmSync(root, { recursive: true, force: true }); } }, @@ -458,7 +458,7 @@ describe("bundled companions and backfill", () => { await waitForCompanionGeneration(app, 1); } finally { - app.close(); + await app.close(); } const pausedCfg = makeConfig(root, { @@ -504,7 +504,7 @@ describe("bundled companions and backfill", () => { ]) ); } finally { - app.close(); + await app.close(); rmSync(root, { recursive: true, force: true }); } }, @@ -613,7 +613,7 @@ describe("bundled companions and backfill", () => { expect(searchBody.hits[0]?.source?.title).toBe("constructor push keeps building"); expect(searchBody.coverage.index_families_used).toEqual(expect.arrayContaining(["fts"])); } finally { - app.close(); + await app.close(); rmSync(root, { recursive: true, force: true }); } }, @@ -764,7 +764,7 @@ describe("bundled companions and backfill", () => { expect(String((aggError as { message?: string } | null)?.message ?? aggError).length).toBeGreaterThan(0); expect(store.stats().gets).toBe(1); } finally { - app.close(); + await app.close(); rmSync(root, { recursive: true, force: true }); } }, @@ -855,7 +855,7 @@ describe("bundled companions and backfill", () => { const localCachePath = join(root, "cache/companions", row.object_key); expect(existsSync(localCachePath)).toBeTrue(); - app.close(); + await app.close(); app = createApp(cfg, store); store.resetStats(); @@ -864,7 +864,7 @@ describe("bundled companions and backfill", () => { expect(ftsCompanion?.getField("message")).not.toBeNull(); expect(store.stats().gets).toBe(0); } finally { - app.close(); + await app.close(); rmSync(root, { recursive: true, force: true }); } }, @@ -997,7 +997,7 @@ describe("bundled companions and backfill", () => { expect(yieldCount).toBeGreaterThan(0); expect(yieldedTimerWhileBuildPending).toBeTrue(); } finally { - app.close(); + await app.close(); rmSync(root, { recursive: true, force: true }); } }, diff --git a/test/compute/demo_site.test.ts b/test/compute/demo_site.test.ts index cda7855..5079c61 100644 --- a/test/compute/demo_site.test.ts +++ b/test/compute/demo_site.test.ts @@ -129,7 +129,7 @@ describe("compute demo site", () => { expect(assetResponse.headers.get("content-type")).toContain("image/svg+xml"); } finally { site.close(); - streamsApp.close(); + await streamsApp.close(); } }); @@ -225,7 +225,7 @@ describe("compute demo site", () => { ); } finally { site.close(); - streamsApp.close(); + await streamsApp.close(); } }); }); diff --git a/test/conformance.test.ts b/test/conformance.test.ts index 738c4fb..10c9e31 100644 --- a/test/conformance.test.ts +++ b/test/conformance.test.ts @@ -31,14 +31,14 @@ describe("durable streams (Bun+TS rewrite)", () => { root = mkdtempSync(join(tmpdir(), "ds-bun-ts-")); }); - afterEach(() => { + afterEach(async () => { try { server?.stop?.(); } catch { // ignore } try { - app?.close?.(); + await app?.close?.(); } catch { // ignore } diff --git a/test/exact_index_backfill.test.ts b/test/exact_index_backfill.test.ts index d0ccea0..50c24a1 100644 --- a/test/exact_index_backfill.test.ts +++ b/test/exact_index_backfill.test.ts @@ -164,7 +164,7 @@ describe("exact secondary index backfill", () => { markAppendIdle(app); await waitForExactIndex(app, EXACT_HASH_V1); } finally { - app.close(); + await app.close(); } const pausedCfg = makeConfig(root, { @@ -205,7 +205,7 @@ describe("exact secondary index backfill", () => { expect(filterBody).toHaveLength(3); expect(filterBody.every((entry: any) => entry.tagB === "current-b")).toBe(true); } finally { - app.close(); + await app.close(); } app = createApp(buildCfg, store); @@ -235,7 +235,7 @@ describe("exact secondary index backfill", () => { expect(filterBody).toHaveLength(3); expect(filterBody.every((entry: any) => entry.tagB === "current-b")).toBe(true); } finally { - app.close(); + await app.close(); rmSync(root, { recursive: true, force: true }); } }, diff --git a/test/fault_injection.test.ts b/test/fault_injection.test.ts index 9e932e0..a1a8993 100644 --- a/test/fault_injection.test.ts +++ b/test/fault_injection.test.ts @@ -108,7 +108,7 @@ describe("fault injection", () => { expect(keys.some((k) => k.includes("/segments/") && k.endsWith(".bin"))).toBe(true); server.stop(); - app.close(); + await app.close(); } finally { rmSync(root, { recursive: true, force: true }); } @@ -149,7 +149,7 @@ describe("fault injection", () => { expect(new Set(os.attemptedSegmentIndexes)).toEqual(new Set([0])); server.stop(); - app.close(); + await app.close(); } finally { rmSync(root, { recursive: true, force: true }); } diff --git a/test/gharchive_demo.test.ts b/test/gharchive_demo.test.ts index 3a6c1f5..0c2c0c8 100644 --- a/test/gharchive_demo.test.ts +++ b/test/gharchive_demo.test.ts @@ -420,7 +420,7 @@ describe("gharchive demo", () => { expect(ftsDetails.index_status.exact_indexes).toEqual([]); expect(ftsDetails.index_status.search_families.map((entry: { family: string }) => entry.family)).toEqual(["fts"]); } finally { - app.close(); + await app.close(); rmSync(root, { recursive: true, force: true }); } }, @@ -497,7 +497,7 @@ describe("gharchive demo", () => { expect(summary.appendBackoffWaitMs).toBe(0); expect(appendOrder.slice(0, 3)).toEqual(["exact:timeout", "exact:ok", "fts:ok"]); } finally { - app.close(); + await app.close(); rmSync(root, { recursive: true, force: true }); } }, @@ -564,7 +564,7 @@ describe("gharchive demo", () => { expect(summary.appendBackoffWaitMs).toBe(0); expect(appendOrder.slice(0, 3)).toEqual(["exact:server-timeout", "exact:ok", "fts:ok"]); } finally { - app.close(); + await app.close(); rmSync(root, { recursive: true, force: true }); } }, @@ -629,7 +629,7 @@ describe("gharchive demo", () => { expect(sawCreateBackoff).toBe(true); expect(createOrder.slice(0, 3)).toEqual(["exact:create-backoff", "exact:create-ok", "fts:create-ok"]); } finally { - app.close(); + await app.close(); rmSync(root, { recursive: true, force: true }); } }, @@ -676,7 +676,7 @@ describe("gharchive demo", () => { expect(details.index_status.search_families).toEqual([]); expect(details.index_status.bundled_companions.object_count).toBe(0); } finally { - app.close(); + await app.close(); rmSync(root, { recursive: true, force: true }); } }, @@ -722,7 +722,7 @@ describe("gharchive demo", () => { expect(details.index_status.exact_indexes).toEqual([]); expect(details.index_status.search_families).toEqual([]); } finally { - app.close(); + await app.close(); rmSync(root, { recursive: true, force: true }); } }, @@ -771,7 +771,7 @@ describe("gharchive demo", () => { expect(details.index_status.exact_indexes).toEqual([]); expect(details.index_status.search_families).toEqual([]); } finally { - app.close(); + await app.close(); rmSync(root, { recursive: true, force: true }); } }, @@ -872,7 +872,7 @@ describe("gharchive demo", () => { expect(summary.startHour).toBe("2020-01-01-13"); expect(summary.streams).toEqual(["golden-stream-2"]); } finally { - app.close(); + await app.close(); rmSync(root, { recursive: true, force: true }); } }, @@ -936,7 +936,7 @@ describe("gharchive demo", () => { expect(stderrLines.join("")).toContain("skipping ahead 10h to 2011-02-12-10"); } finally { process.stderr.write = realStderrWrite; - app.close(); + await app.close(); rmSync(root, { recursive: true, force: true }); } }, @@ -1000,7 +1000,7 @@ describe("gharchive demo", () => { expect(stderrLines.join("")).toContain("skipping ahead 12h"); } finally { process.stderr.write = realStderrWrite; - app.close(); + await app.close(); rmSync(root, { recursive: true, force: true }); } }, @@ -1039,7 +1039,7 @@ describe("gharchive demo", () => { }) ).rejects.toThrow("no GH Archive hours were available"); } finally { - app.close(); + await app.close(); rmSync(root, { recursive: true, force: true }); } }, diff --git a/test/http_behavior.test.ts b/test/http_behavior.test.ts index 426aa56..ce1f6cf 100644 --- a/test/http_behavior.test.ts +++ b/test/http_behavior.test.ts @@ -66,7 +66,7 @@ async function withServer( return await fn({ baseUrl }); } finally { server.stop(); - app.close(); + await app.close(); rmSync(root, { recursive: true, force: true }); } } @@ -930,7 +930,7 @@ describe("http behavior", () => { (app.deps.memory as any).isOverLimit = originalIsOverLimit; } finally { server.stop(); - app.close(); + await app.close(); rmSync(root, { recursive: true, force: true }); } }); @@ -972,7 +972,7 @@ describe("http behavior", () => { }, }); } finally { - app.close(); + await app.close(); rmSync(root, { recursive: true, force: true }); } }); @@ -1005,7 +1005,7 @@ describe("http behavior", () => { const root = mkdtempSync(join(tmpdir(), "ds-http-close-")); const app = createApp(makeConfig(root), new MockR2Store()); try { - app.close(); + await app.close(); const r = await app.fetch(new Request("http://local/health")); expect(r.status).toBe(503); expect(r.headers.get("retry-after")).toBe("5"); @@ -1159,7 +1159,7 @@ describe("http behavior", () => { expect(store.getCalls.filter((call) => call.key.endsWith(".bin"))).toHaveLength(0); } finally { server.stop(); - app.close(); + await app.close(); rmSync(root, { recursive: true, force: true }); } } @@ -1223,7 +1223,7 @@ describe("http behavior", () => { ]); expect(findSegmentCalls).toBeLessThanOrEqual(8); } finally { - app.close(); + await app.close(); rmSync(root, { recursive: true, force: true }); } } @@ -1283,7 +1283,7 @@ describe("http behavior", () => { } expect(findSegmentCalls).toBeLessThanOrEqual(4); } finally { - app.close(); + await app.close(); rmSync(root, { recursive: true, force: true }); } } diff --git a/test/index_compaction.test.ts b/test/index_compaction.test.ts index f0f50b9..e4c90d7 100644 --- a/test/index_compaction.test.ts +++ b/test/index_compaction.test.ts @@ -101,7 +101,7 @@ describe("index compaction", () => { expect(active[0].level).toBe(1); expect(retired.length).toBe(2); } finally { - app.close(); + await app.close(); rmSync(root, { recursive: true, force: true }); } }, diff --git a/test/index_runs.test.ts b/test/index_runs.test.ts index 9460467..80fc63d 100644 --- a/test/index_runs.test.ts +++ b/test/index_runs.test.ts @@ -119,7 +119,7 @@ describe("index runs", () => { expect(res!.segments.has(1)).toBe(true); expect(res!.segments.has(0)).toBe(false); } finally { - app.close(); + await app.close(); rmSync(root, { recursive: true, force: true }); } }, 20_000); diff --git a/test/ingest_busy_retry.test.ts b/test/ingest_busy_retry.test.ts index bb5b967..64a6474 100644 --- a/test/ingest_busy_retry.test.ts +++ b/test/ingest_busy_retry.test.ts @@ -62,7 +62,7 @@ describe("ingest busy retry", () => { const res = await appendPromise; expect(Result.isOk(res)).toBe(true); - app.close(); + await app.close(); } finally { rmSync(root, { recursive: true, force: true }); } diff --git a/test/ingest_queue_drain.test.ts b/test/ingest_queue_drain.test.ts index aa02d9f..4dbba4c 100644 --- a/test/ingest_queue_drain.test.ts +++ b/test/ingest_queue_drain.test.ts @@ -53,7 +53,7 @@ describe("ingest queue drain", () => { const res = await appendPromise; expect(Result.isOk(res)).toBe(true); - app.close(); + await app.close(); } finally { rmSync(root, { recursive: true, force: true }); } diff --git a/test/large_index_filter.test.ts b/test/large_index_filter.test.ts index dc5ff96..4d493fb 100644 --- a/test/large_index_filter.test.ts +++ b/test/large_index_filter.test.ts @@ -598,7 +598,7 @@ describe("large indexed filter integration", () => { `fullScanMBps=${scanThroughputMBps.toFixed(2)} batches=${fullScan.batches} limitHits=${fullScan.limitHitBatches}` ); } finally { - app.close(); + await app.close(); rmSync(root, { recursive: true, force: true }); } }, diff --git a/test/live_stream2_read_perf.test.ts b/test/live_stream2_read_perf.test.ts index d52e854..ae5a70b 100644 --- a/test/live_stream2_read_perf.test.ts +++ b/test/live_stream2_read_perf.test.ts @@ -58,7 +58,7 @@ async function withFixtureApp(fn: (app: ReturnType) => Prom try { return await fn(app); } finally { - app.close(); + await app.close(); rmSync(root, { recursive: true, force: true }); } } diff --git a/test/observe_request.test.ts b/test/observe_request.test.ts index e0c8d14..099fdf3 100644 --- a/test/observe_request.test.ts +++ b/test/observe_request.test.ts @@ -210,7 +210,7 @@ describe("observe request API", () => { expect(res.body.coverage.traces.searched).toBe(true); expect(res.body.coverage.warnings).toEqual([]); } finally { - app.close(); + await app.close(); rmSync(root, { recursive: true, force: true }); } }); @@ -232,7 +232,7 @@ describe("observe request API", () => { expect(res.body.evlog.primary.traceId).toBe(TRACE_ID); expect(res.body.trace.partial).toBe(false); } finally { - app.close(); + await app.close(); rmSync(root, { recursive: true, force: true }); } }); @@ -254,7 +254,7 @@ describe("observe request API", () => { expect(res.body.coverage.traces.limit_reached).toBe(true); expect(res.body.coverage.warnings).toContain("span limit reached"); } finally { - app.close(); + await app.close(); rmSync(root, { recursive: true, force: true }); } }); @@ -321,7 +321,7 @@ describe("observe request API", () => { expect(res.body.coverage.traces.limit_reached).toBe(false); expect(res.body.coverage.warnings).toEqual([]); } finally { - app.close(); + await app.close(); rmSync(root, { recursive: true, force: true }); } }); diff --git a/test/poison_stream.test.ts b/test/poison_stream.test.ts index 381c778..b19c338 100644 --- a/test/poison_stream.test.ts +++ b/test/poison_stream.test.ts @@ -92,7 +92,7 @@ describe("poison stream isolation", () => { expect(poisonKeys.length).toBe(0); server.stop(); - app.close(); + await app.close(); } finally { console.error = origError; rmSync(root, { recursive: true, force: true }); diff --git a/test/profile_evlog.test.ts b/test/profile_evlog.test.ts index d8de817..517b7e6 100644 --- a/test/profile_evlog.test.ts +++ b/test/profile_evlog.test.ts @@ -90,7 +90,7 @@ describe("evlog profile", () => { expect(schemaRes.body?.schemas?.["1"]).toBeDefined(); expect(app.deps.db.getSchemaRegistry("evlog-install")).not.toBeNull(); } finally { - app.close(); + await app.close(); rmSync(root, { recursive: true, force: true }); } }); @@ -163,7 +163,7 @@ describe("evlog profile", () => { expect(lateInstallRes.status).toBe(400); expect(lateInstallRes.body?.error?.message).toContain("before appending data"); } finally { - app.close(); + await app.close(); rmSync(root, { recursive: true, force: true }); } }); @@ -290,7 +290,7 @@ describe("evlog profile", () => { ); expect(invalidRes.status).toBe(400); } finally { - app.close(); + await app.close(); rmSync(root, { recursive: true, force: true }); } }); @@ -335,7 +335,7 @@ describe("evlog profile", () => { expect(byTraceIdRes.body[0]?.spanId).toBe("span_only_1"); expect(byTraceIdRes.body[0]?.level).toBe("info"); } finally { - app.close(); + await app.close(); rmSync(root, { recursive: true, force: true }); } }); @@ -453,7 +453,7 @@ describe("evlog profile", () => { expect(detailsRes.body?.index_status?.search_families).toEqual(indexStatusRes.body?.search_families); expect(detailsRes.body?.index_status?.exact_indexes).toEqual(indexStatusRes.body?.exact_indexes); } finally { - app.close(); + await app.close(); rmSync(root, { recursive: true, force: true }); } }, @@ -514,10 +514,10 @@ describe("evlog profile", () => { expect(listRes.status).toBe(200); expect(listRes.body.find((row: any) => row.name === "evlog-bootstrap")?.profile).toBe("evlog"); } finally { - app2.close(); + await app2.close(); } } finally { - app.close(); + await app.close(); rmSync(root, { recursive: true, force: true }); rmSync(root2, { recursive: true, force: true }); } diff --git a/test/profile_generic.test.ts b/test/profile_generic.test.ts index 8307eea..5f2cf9f 100644 --- a/test/profile_generic.test.ts +++ b/test/profile_generic.test.ts @@ -34,7 +34,7 @@ describe("generic profile", () => { expect(app.deps.db.getStreamProfile("generic-default")).toBeNull(); expect(app.deps.db.getStreamTouchState("generic-default")).toBeNull(); } finally { - app.close(); + await app.close(); rmSync(root, { recursive: true, force: true }); } }); @@ -64,7 +64,7 @@ describe("generic profile", () => { expect(res.status).toBe(400); expect(res.body?.error?.message).toContain("profile.touch"); } finally { - app.close(); + await app.close(); rmSync(root, { recursive: true, force: true }); } }); @@ -118,7 +118,7 @@ describe("generic profile", () => { expect(app.deps.db.getStreamProfile("generic-switch")).toBeNull(); expect(app.deps.db.getStreamTouchState("generic-switch")).toBeNull(); } finally { - app.close(); + await app.close(); rmSync(root, { recursive: true, force: true }); } }); @@ -136,7 +136,7 @@ describe("generic profile", () => { ); await app.deps.uploader.publishManifest("generic-bootstrap"); } finally { - app.close(); + await app.close(); } const cfg2 = makeProfileTestConfig(root2, { segmentCacheMaxBytes: 0, segmentFooterCacheEntries: 0 }); @@ -158,7 +158,7 @@ describe("generic profile", () => { expect(listRes.status).toBe(200); expect(listRes.body.find((entry: any) => entry.name === "generic-bootstrap")?.profile).toBe("generic"); } finally { - app2.close(); + await app2.close(); rmSync(root, { recursive: true, force: true }); rmSync(root2, { recursive: true, force: true }); } diff --git a/test/profile_metrics.test.ts b/test/profile_metrics.test.ts index ce9c00b..5818548 100644 --- a/test/profile_metrics.test.ts +++ b/test/profile_metrics.test.ts @@ -130,7 +130,7 @@ describe("metrics profile", () => { expect(app.deps.db.getSearchCompanionPlan("__stream_metrics__")).toBeNull(); expect(app.deps.db.listSearchSegmentCompanions("__stream_metrics__")).toEqual([]); } finally { - app.close(); + await app.close(); rmSync(root, { recursive: true, force: true }); } }); @@ -200,7 +200,7 @@ describe("metrics profile", () => { null ); } finally { - first.app.close(); + await first.app.close(); } const second = createProfileTestApp(root, { metricsFlushIntervalMs: 0 }); @@ -218,7 +218,7 @@ describe("metrics profile", () => { expect(second.app.deps.db.getSearchCompanionPlan("__stream_metrics__")).toBeNull(); expect(second.app.deps.db.listSearchSegmentCompanions("__stream_metrics__")).toEqual([]); } finally { - second.app.close(); + await second.app.close(); rmSync(root, { recursive: true, force: true }); } }); @@ -392,7 +392,7 @@ describe("metrics profile", () => { }) ); } finally { - app.close(); + await app.close(); rmSync(root, { recursive: true, force: true }); } }); diff --git a/test/profile_otel_traces.test.ts b/test/profile_otel_traces.test.ts index 554315b..2224b81 100644 --- a/test/profile_otel_traces.test.ts +++ b/test/profile_otel_traces.test.ts @@ -215,7 +215,7 @@ describe("otel-traces profile", () => { expect(schemaRes.body?.search?.fields?.["events.name"]?.bindings?.[0]?.jsonPointer).toBe("/eventNames"); expect(schemaRes.body?.search?.rollups?.spans?.measures?.latency?.field).toBe("duration"); } finally { - app.close(); + await app.close(); rmSync(root, { recursive: true, force: true }); } }); @@ -258,7 +258,7 @@ describe("otel-traces profile", () => { expect(lateRes.status).toBe(400); expect(lateRes.body?.error?.message).toContain("before appending data"); } finally { - app.close(); + await app.close(); rmSync(root, { recursive: true, force: true }); } }); @@ -351,7 +351,7 @@ describe("otel-traces profile", () => { "http.statusCode": 500, }); } finally { - app.close(); + await app.close(); rmSync(root, { recursive: true, force: true }); } }); @@ -389,7 +389,7 @@ describe("otel-traces profile", () => { db: { system: "postgresql", statement: null }, }); } finally { - app.close(); + await app.close(); rmSync(root, { recursive: true, force: true }); } }); @@ -423,7 +423,7 @@ describe("otel-traces profile", () => { status: { code: "ok", message: "ok" }, }); } finally { - app.close(); + await app.close(); rmSync(root, { recursive: true, force: true }); } }); @@ -454,7 +454,7 @@ describe("otel-traces profile", () => { expect(readRes.body).toHaveLength(1); expect(readRes.body[0]?.traceId).toBe(TRACE_ID); } finally { - app.close(); + await app.close(); rmSync(root, { recursive: true, force: true }); } }); diff --git a/test/profile_state_protocol.test.ts b/test/profile_state_protocol.test.ts index 26192f7..3b66a39 100644 --- a/test/profile_state_protocol.test.ts +++ b/test/profile_state_protocol.test.ts @@ -52,7 +52,7 @@ describe("state-protocol profile", () => { expect(app.deps.db.getStreamProfile("state-install")).not.toBeNull(); expect(app.deps.db.getStreamTouchState("state-install")).not.toBeNull(); } finally { - app.close(); + await app.close(); rmSync(root, { recursive: true, force: true }); } }); @@ -92,7 +92,7 @@ describe("state-protocol profile", () => { expect(app.deps.db.getStreamProfile("state-non-json")).toBeNull(); expect(app.deps.db.getStreamTouchState("state-non-json")).toBeNull(); } finally { - app.close(); + await app.close(); rmSync(root, { recursive: true, force: true }); } }); @@ -139,7 +139,7 @@ describe("state-protocol profile", () => { expect(invalidTouchField.status).toBe(400); expect(invalidTouchField.body?.error?.message).toContain("profile.touch.storage"); } finally { - app.close(); + await app.close(); rmSync(root, { recursive: true, force: true }); } }); @@ -182,7 +182,7 @@ describe("state-protocol profile", () => { const touchMetaRes = await app.fetch(new Request("http://local/v1/stream/state-disabled/touch/meta", { method: "GET" })); expect(touchMetaRes.status).toBe(404); } finally { - app.close(); + await app.close(); rmSync(root, { recursive: true, force: true }); } }); @@ -247,10 +247,10 @@ describe("state-protocol profile", () => { const touchMetaRes = await app2.fetch(new Request("http://local/v1/stream/state-bootstrap/touch/meta", { method: "GET" })); expect(touchMetaRes.status).toBe(200); } finally { - app2.close(); + await app2.close(); } } finally { - app.close(); + await app.close(); rmSync(root, { recursive: true, force: true }); rmSync(root2, { recursive: true, force: true }); } @@ -326,7 +326,7 @@ describe("state-protocol profile", () => { }, ]); } finally { - app.close(); + await app.close(); rmSync(root, { recursive: true, force: true }); } }); @@ -427,7 +427,7 @@ describe("state-protocol profile", () => { expect(String(appendRes.body?.error?.message ?? ""), tc.name).toContain(tc.message); } } finally { - app.close(); + await app.close(); rmSync(root, { recursive: true, force: true }); } }); diff --git a/test/queue_limits.test.ts b/test/queue_limits.test.ts index e2baa62..87e90c6 100644 --- a/test/queue_limits.test.ts +++ b/test/queue_limits.test.ts @@ -66,7 +66,7 @@ describe("queue limits", () => { expect([200, 204, 408]).toContain(r2.status); server.stop(); - app.close(); + await app.close(); } finally { rmSync(root, { recursive: true, force: true }); } diff --git a/test/restart.test.ts b/test/restart.test.ts index e1d5318..b225a60 100644 --- a/test/restart.test.ts +++ b/test/restart.test.ts @@ -29,7 +29,7 @@ describe("restart recovery", () => { }); server.stop(); - app.close(); + await app.close(); app = createApp(cfg, os); server = Bun.serve({ port: 0, fetch: app.fetch }); @@ -39,7 +39,7 @@ describe("restart recovery", () => { expect(new TextDecoder().decode(bytes)).toBe("hello"); server.stop(); - app.close(); + await app.close(); } finally { rmSync(root, { recursive: true, force: true }); } diff --git a/test/routing_key_lexicon.test.ts b/test/routing_key_lexicon.test.ts index a926b66..26ea0d7 100644 --- a/test/routing_key_lexicon.test.ts +++ b/test/routing_key_lexicon.test.ts @@ -90,7 +90,7 @@ describe("routing key lexicon", () => { expect(res.status).toBe(400); expect(res.body?.error?.message).toBe("invalid limit"); } finally { - app.close(); + await app.close(); rmSync(root, { recursive: true, force: true }); } }); @@ -125,7 +125,7 @@ describe("routing key lexicon", () => { expect(res.body?.coverage?.indexed_segments).toBe(0); expect(res.body?.coverage?.scanned_uploaded_segments).toBeGreaterThanOrEqual(1); } finally { - app.close(); + await app.close(); rmSync(root, { recursive: true, force: true }); } }); @@ -177,7 +177,7 @@ describe("routing key lexicon", () => { expect(second.body?.keys).toEqual(["delta/repo", "gamma/repo"]); expect(second.body?.next_after).toBe("gamma/repo"); } finally { - app.close(); + await app.close(); rmSync(root, { recursive: true, force: true }); } }); @@ -216,7 +216,7 @@ describe("routing key lexicon", () => { expect(app.deps.db.getLexiconIndexState(stream, "routing_key", "")).toBeNull(); expect(app.deps.db.listLexiconIndexRuns(stream, "routing_key", "")).toHaveLength(0); } finally { - app.close(); + await app.close(); rmSync(root, { recursive: true, force: true }); } }); @@ -244,7 +244,7 @@ describe("routing key lexicon", () => { return (state?.indexed_through ?? 0) >= 1; }); - app.deps.indexer?.stop(); + await app.deps.indexer?.stop(); await sleep(50); store.resetStats(); @@ -276,7 +276,7 @@ describe("routing key lexicon", () => { expect([...res.body!.keys].sort()).toEqual(res.body?.keys); expect(store.stats().gets).toBe(0); } finally { - app.close(); + await app.close(); rmSync(root, { recursive: true, force: true }); } }); @@ -316,7 +316,7 @@ describe("routing key lexicon", () => { return uploaded >= 18 && (state?.indexed_through ?? 0) >= uploaded; }, 20_000); - app.deps.indexer?.stop(); + await app.deps.indexer?.stop(); await appendRepoBatchEvents( app, stream, @@ -335,7 +335,7 @@ describe("routing key lexicon", () => { expect(res.body?.timing?.fallback_wal_scan_ms).toBeLessThan(100); expect(res.body?.coverage?.scanned_uploaded_segments).toBe(0); } finally { - app.close(); + await app.close(); rmSync(root, { recursive: true, force: true }); } }); @@ -382,7 +382,7 @@ describe("routing key lexicon", () => { }); await app.deps.uploader.publishManifest(stream); } finally { - app.close(); + await app.close(); } const cfg2 = makeProfileTestConfig(root2, { @@ -407,7 +407,7 @@ describe("routing key lexicon", () => { expect(indexStatus.body?.routing_key_lexicon?.configured).toBe(true); expect(indexStatus.body?.routing_key_lexicon?.active_run_count).toBeGreaterThanOrEqual(1); } finally { - app2.close(); + await app2.close(); rmSync(root, { recursive: true, force: true }); rmSync(root2, { recursive: true, force: true }); } diff --git a/test/schema_evolution.test.ts b/test/schema_evolution.test.ts index e2e60eb..f322e45 100644 --- a/test/schema_evolution.test.ts +++ b/test/schema_evolution.test.ts @@ -32,7 +32,7 @@ async function withServer( return await fn({ baseUrl }); } finally { server.stop(); - app.close(); + await app.close(); rmSync(root, { recursive: true, force: true }); } } diff --git a/test/search_http.test.ts b/test/search_http.test.ts index 4c8bff1..6164091 100644 --- a/test/search_http.test.ts +++ b/test/search_http.test.ts @@ -213,7 +213,7 @@ describe("_search http", () => { expect(body.coverage.indexed_segments).toBeGreaterThan(0); expect(body.hits.length).toBeGreaterThan(0); } finally { - app.close(); + await app.close(); rmSync(root, { force: true, recursive: true }); } }); @@ -424,7 +424,7 @@ describe("_search http", () => { expect(body.hits).toHaveLength(1); expect(body.hits[0].fields.requestId).toBe("req_2"); } finally { - app.close(); + await app.close(); rmSync(root, { recursive: true, force: true }); } }, @@ -582,7 +582,7 @@ describe("_search http", () => { }, }); } finally { - app.close(); + await app.close(); rmSync(root, { recursive: true, force: true }); } }, @@ -681,7 +681,7 @@ describe("_search http", () => { expect(body.hits[0].fields.requestId).toBe("req_6"); expect(body.coverage.indexed_segments + body.coverage.scanned_segments + Math.min(body.coverage.scanned_tail_docs, 1)).toBe(1); } finally { - app.close(); + await app.close(); rmSync(root, { recursive: true, force: true }); } }, @@ -723,7 +723,7 @@ describe("_search http", () => { // Keep this test focused on search behavior while companions are not // caught up. Enqueued work normally wakes the managers promptly. - app.deps.indexer?.stop(); + await app.deps.indexer?.stop(); for (const event of [ { @@ -778,7 +778,7 @@ describe("_search http", () => { expect(body.coverage.possible_missing_wal_rows).toBe(0); expect(body.total).toEqual({ value: 2, relation: "eq" }); } finally { - app.close(); + await app.close(); rmSync(root, { recursive: true, force: true }); } }, @@ -892,7 +892,7 @@ describe("_search http", () => { expect(body.coverage.visible_through_primary_timestamp_max).toEqual(expect.any(String)); expect(body.total).toEqual({ value: 2, relation: "gte" }); } finally { - app.close(); + await app.close(); rmSync(root, { recursive: true, force: true }); } }, @@ -1007,7 +1007,7 @@ describe("_search http", () => { expect(body.total.value).toBeGreaterThan(0); expect(["eq", "gte"]).toContain(body.total.relation); } finally { - app.close(); + await app.close(); rmSync(root, { recursive: true, force: true }); } }, diff --git a/test/search_perf_repro.test.ts b/test/search_perf_repro.test.ts index dcc4d90..1c8feb2 100644 --- a/test/search_perf_repro.test.ts +++ b/test/search_perf_repro.test.ts @@ -330,7 +330,7 @@ async function buildFixture(args: { appendSeedRows(buildApp, args.stream, totalRows, args.payloadBytes, APPEND_BATCH_ROWS); await waitForUploadedCompanions(buildApp, args.stream, args.segments, TIMEOUT_MS); } finally { - buildApp?.close(); + await buildApp?.close(); buildApp = null; } @@ -415,7 +415,7 @@ async function buildExactOnlyFixture(args: { stream: string; rows: number; paylo appendExactOnlyRows(buildApp, args.stream, args.rows, args.payloadBytes, APPEND_BATCH_ROWS); await waitForUploadedCompanions(buildApp, args.stream, 1, TIMEOUT_MS); } finally { - buildApp?.close(); + await buildApp?.close(); buildApp = null; } @@ -517,7 +517,7 @@ describe("search performance repro cases", () => { expect(result.body.coverage.index_families_used).toContain("fts"); expectMultiSecondRuntime("default timestamp sort broad filter", result.elapsedMs); } finally { - fixture.app.close(); + await fixture.app.close(); rmSync(fixture.root, { recursive: true, force: true }); } }, @@ -547,7 +547,7 @@ describe("search performance repro cases", () => { expect(result.body.coverage.index_families_used).toContain("fts"); expectMultiSecondRuntime("offset-desc newest segment decode", result.elapsedMs); } finally { - fixture.app.close(); + await fixture.app.close(); rmSync(fixture.root, { recursive: true, force: true }); } }, @@ -579,7 +579,7 @@ describe("search performance repro cases", () => { expect(result.parseCalls).toBeLessThanOrEqual(DEFAULT_SORT_ROWS_PER_SEGMENT + 128); expectMultiSecondRuntime("explicit timestamp-desc broad filter", result.elapsedMs); } finally { - fixture.app.close(); + await fixture.app.close(); rmSync(fixture.root, { recursive: true, force: true }); } }, @@ -612,7 +612,7 @@ describe("search performance repro cases", () => { expect(fixture.app.deps.db.listSecondaryIndexRuns(fixture.stream, "environment")).toHaveLength(0); expectMultiSecondRuntime("small stream below exact L0 span", result.elapsedMs); } finally { - fixture.app.close(); + await fixture.app.close(); rmSync(fixture.root, { recursive: true, force: true }); } }, @@ -644,7 +644,7 @@ describe("search performance repro cases", () => { expect(warm.body.coverage.scanned_tail_docs).toBe(1); expect(warm.elapsedMs).toBeLessThan(cold.elapsedMs); } finally { - fixture.app.close(); + await fixture.app.close(); rmSync(fixture.root, { recursive: true, force: true }); } }, @@ -672,7 +672,7 @@ describe("search performance repro cases", () => { expect(result.body.coverage.candidate_doc_ids).toBe(1); expect(result.parseCalls).toBeLessThanOrEqual(8); } finally { - fixture.app.close(); + await fixture.app.close(); rmSync(fixture.root, { recursive: true, force: true }); } }, diff --git a/test/secondary_indexer.test.ts b/test/secondary_indexer.test.ts index 7733629..89e8c91 100644 --- a/test/secondary_indexer.test.ts +++ b/test/secondary_indexer.test.ts @@ -125,7 +125,7 @@ describe("secondary indexer", () => { }); expect((manager as any).shouldPauseExactBackgroundWork("evlog")).toBe(true); } finally { - app.close(); + await app.close(); rmSync(root, { recursive: true, force: true }); } }); @@ -249,7 +249,7 @@ describe("secondary indexer", () => { expect(Array.from(apiSegments!.segments).sort((a, b) => a - b)).toEqual([0]); expect(Array.from(workerSegments!.segments).sort((a, b) => a - b)).toEqual([1]); } finally { - app.close(); + await app.close(); rmSync(root, { recursive: true, force: true }); } }, 30_000); diff --git a/test/segment_meta.test.ts b/test/segment_meta.test.ts index 6dd6183..61d57b7 100644 --- a/test/segment_meta.test.ts +++ b/test/segment_meta.test.ts @@ -82,7 +82,7 @@ describe("segment meta", () => { expect(manifest.segment_count).toBe(segs.length); expect(manifest.uploaded_through).toBe(segs.length); } finally { - app.close(); + await app.close(); rmSync(root, { recursive: true, force: true }); } }); diff --git a/test/segment_recovery.test.ts b/test/segment_recovery.test.ts index 6a8393c..b950672 100644 --- a/test/segment_recovery.test.ts +++ b/test/segment_recovery.test.ts @@ -43,7 +43,7 @@ describe("segment recovery", () => { }); server1.stop(); - app1.close(); + await app1.close(); const db = new SqliteDurableStore(cfg.dbPath); db.db.query(`UPDATE streams SET segment_in_progress=1 WHERE stream=?;`).run(stream); @@ -71,7 +71,7 @@ describe("segment recovery", () => { expect(text).toBe("hello"); server2.stop(); - app2.close(); + await app2.close(); } finally { rmSync(root, { recursive: true, force: true }); } diff --git a/test/touch_memory_journal.test.ts b/test/touch_memory_journal.test.ts index 624cbd7..9eee691 100644 --- a/test/touch_memory_journal.test.ts +++ b/test/touch_memory_journal.test.ts @@ -112,7 +112,7 @@ describe("touch storage=memory (journal cursors)", () => { // ignore } try { - app?.close?.(); + await app?.close?.(); } catch { // ignore } @@ -151,7 +151,7 @@ describe("touch storage=memory (journal cursors)", () => { // "Restart" by creating a new app+server on the same sqlite state. server1.stop(); - app1.close(); + await app1.close(); server1 = null; app1 = null; @@ -187,12 +187,12 @@ describe("touch storage=memory (journal cursors)", () => { // ignore } try { - app1?.close?.(); + await app1?.close?.(); } catch { // ignore } try { - app2?.close?.(); + await app2?.close?.(); } catch { // ignore } @@ -260,7 +260,7 @@ describe("touch storage=memory (journal cursors)", () => { // ignore } try { - app?.close?.(); + await app?.close?.(); } catch { // ignore } @@ -417,7 +417,7 @@ describe("touch storage=memory (journal cursors)", () => { // ignore } try { - app?.close?.(); + await app?.close?.(); } catch { // ignore } @@ -581,7 +581,7 @@ describe("touch storage=memory (journal cursors)", () => { // ignore } try { - app?.close?.(); + await app?.close?.(); } catch { // ignore } @@ -672,7 +672,7 @@ describe("touch storage=memory (journal cursors)", () => { // ignore } try { - app?.close?.(); + await app?.close?.(); } catch { // ignore } @@ -774,7 +774,7 @@ describe("touch storage=memory (journal cursors)", () => { // ignore } try { - app?.close?.(); + await app?.close?.(); } catch { // ignore } @@ -879,7 +879,7 @@ describe("touch storage=memory (journal cursors)", () => { // ignore } try { - app?.close?.(); + await app?.close?.(); } catch { // ignore } @@ -985,7 +985,7 @@ describe("touch storage=memory (journal cursors)", () => { // ignore } try { - app?.close?.(); + await app?.close?.(); } catch { // ignore } diff --git a/test/touch_processor.test.ts b/test/touch_processor.test.ts index 7153a13..0db3eb3 100644 --- a/test/touch_processor.test.ts +++ b/test/touch_processor.test.ts @@ -146,7 +146,7 @@ describe("live touches (state protocol)", () => { // ignore } try { - app?.close?.(); + await app?.close?.(); } catch { // ignore } @@ -242,7 +242,7 @@ describe("live touches (state protocol)", () => { // ignore } try { - app?.close?.(); + await app?.close?.(); } catch { // ignore } @@ -351,7 +351,7 @@ describe("live touches (state protocol)", () => { // ignore } try { - app?.close?.(); + await app?.close?.(); } catch { // ignore } @@ -397,7 +397,7 @@ describe("live touches (state protocol)", () => { // ignore } try { - app?.close?.(); + await app?.close?.(); } catch { // ignore } @@ -549,7 +549,7 @@ describe("live touches (state protocol)", () => { // ignore } try { - app?.close?.(); + await app?.close?.(); } catch { // ignore } @@ -719,7 +719,7 @@ describe("live touches (state protocol)", () => { // ignore } try { - app?.close?.(); + await app?.close?.(); } catch { // ignore } diff --git a/test/touch_wait_timeout_reliability.test.ts b/test/touch_wait_timeout_reliability.test.ts index 2146c06..9c4a02d 100644 --- a/test/touch_wait_timeout_reliability.test.ts +++ b/test/touch_wait_timeout_reliability.test.ts @@ -90,7 +90,7 @@ describe("/touch/wait timeout reliability", () => { expect(within / N).toBeGreaterThanOrEqual(0.99); } finally { try { - app.close(); + await app.close(); } catch { // ignore } From d2dfab8605c7e8715fd3c80899c17c1c44cb8985 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=B8ren=20Bramer=20Schmidt?= Date: Fri, 12 Jun 2026 14:51:36 +0700 Subject: [PATCH 07/12] Expose request observability pairings --- docs/overview.md | 4 +- docs/profile-evlog.md | 27 +++++++++++- docs/profile-otel-traces.md | 25 ++++++++++- docs/request-observability.md | 30 ++++++++++++++ docs/stream-profiles.md | 3 ++ src/app_core.ts | 62 +++++++++++++++++----------- src/observe/pairing.ts | 61 +++++++++++++++++++++++++++ src/profiles/evlog.ts | 41 +++++++++++++++++- src/profiles/otelTraces.ts | 36 +++++++++++++++- src/profiles/otelTraces/normalize.ts | 5 +++ test/profile_evlog.test.ts | 51 ++++++++++++++++++++++- test/profile_otel_traces.test.ts | 45 ++++++++++++++++++++ 12 files changed, 359 insertions(+), 31 deletions(-) create mode 100644 src/observe/pairing.ts diff --git a/docs/overview.md b/docs/overview.md index 11abdae..6eaf37d 100644 --- a/docs/overview.md +++ b/docs/overview.md @@ -25,7 +25,9 @@ Every stream has a profile. See [stream-profiles.md](./stream-profiles.md). See [profile-otel-traces.md](./profile-otel-traces.md) and [request-observability.md](./request-observability.md) for trace ingestion and -cross-stream request lookup. +cross-stream request lookup. UIs should use the explicit +`observability.request` descriptor from `GET /v1/streams` or +`GET /v1/stream/{name}/_details` to pair `evlog` and `otel-traces` streams. This repository currently contains two server modes: diff --git a/docs/profile-evlog.md b/docs/profile-evlog.md index ab99765..5365416 100644 --- a/docs/profile-evlog.md +++ b/docs/profile-evlog.md @@ -53,6 +53,11 @@ Supported profile shape: "traceContext.spanId" ], "parseTraceparent": true + }, + "observability": { + "request": { + "tracesStream": "app-traces" + } } } ``` @@ -63,6 +68,24 @@ When `parseTraceparent` is not false, the profile reads W3C `traceparent` from `traceparent`, `traceContext.traceparent`, `context.traceparent`, or `headers.traceparent` if explicit trace fields are absent. +`observability.request.tracesStream` declares the explicit `otel-traces` +counterpart for request-observability clients. When it is present, +`GET /v1/streams` and `GET /v1/stream/{name}/_details` expose: + +```json +{ + "observability": { + "request": { + "events_stream": "app-events", + "traces_stream": "app-traces" + } + } +} +``` + +Clients must use this descriptor instead of guessing the trace stream from +other stream names or profiles. + ## Canonical Envelope Each stored event should use this stable top-level shape: @@ -237,7 +260,9 @@ record/detail surface. Recommended integration flow: 1. Create the stream with `application/json`. -2. Install the `evlog` profile with `POST /v1/stream/{name}/_profile`. +2. Install the `evlog` profile with `POST /v1/stream/{name}/_profile`. Include + `observability.request.tracesStream` when this stream has a known + `otel-traces` counterpart. 3. Read `GET /v1/stream/{name}/_details` when the UI needs the combined stream/profile/schema/index descriptor. 4. Read `GET /v1/stream/{name}/_index_status` for dedicated indexing progress diff --git a/docs/profile-otel-traces.md b/docs/profile-otel-traces.md index 0afcd76..e2d3df3 100644 --- a/docs/profile-otel-traces.md +++ b/docs/profile-otel-traces.md @@ -52,7 +52,12 @@ Content-Type: application/json "rawEvents": true, "rawLinks": true }, - "dbStatementMode": "drop" + "dbStatementMode": "drop", + "observability": { + "request": { + "eventsStream": "app-events" + } + } } } ``` @@ -210,6 +215,24 @@ The cross-stream request view is implemented by [`request-observability.md`](./request-observability.md), not by merging `evlog` and spans into one profile. +`observability.request.eventsStream` declares the explicit `evlog` counterpart +for request-observability clients. When it is present, `GET /v1/streams` and +`GET /v1/stream/{name}/_details` expose: + +```json +{ + "observability": { + "request": { + "events_stream": "app-events", + "traces_stream": "app-traces" + } + } +} +``` + +Clients must use this descriptor instead of selecting the first `evlog` stream +they find. + ## Security And Privacy Redaction is case-insensitive and happens before durable append. It applies to: diff --git a/docs/request-observability.md b/docs/request-observability.md index 9824e24..8ed4036 100644 --- a/docs/request-observability.md +++ b/docs/request-observability.md @@ -68,6 +68,36 @@ Limits: The implementation pages internally through `_search` because `_search` pages are capped at 500 hits. +## Pairing Descriptor + +Clients should discover request-observability pairs from stream metadata before +calling this endpoint. `GET /v1/streams` and +`GET /v1/stream/{name}/_details` expose `observability.request` when a stream +profile declares its counterpart: + +```json +{ + "name": "app-events", + "profile": "evlog", + "observability": { + "request": { + "events_stream": "app-events", + "traces_stream": "app-traces" + } + } +} +``` + +For an `evlog` stream, the descriptor comes from +`profile.observability.request.tracesStream`. +For an `otel-traces` stream, it comes from +`profile.observability.request.eventsStream`. + +The descriptor is the supported way to choose the counterpart stream. Clients +must not pick the first stream with the opposite profile. If a descriptor is +absent, clients may still call this endpoint with only the active stream and +set the missing side's include flag to false. + ## Lookup Behavior ### Request ID diff --git a/docs/stream-profiles.md b/docs/stream-profiles.md index e7822c8..2cf4245 100644 --- a/docs/stream-profiles.md +++ b/docs/stream-profiles.md @@ -112,6 +112,9 @@ It means: - the canonical routing key is `traceId` - request correlation with `evlog` is provided by the cross-stream `/v1/observe/request` API, not by mixing spans into `evlog` +- request-observability clients discover explicit pairs through + `observability.request` on `GET /v1/streams` or + `GET /v1/stream/{name}/_details`, not by guessing from stream names See [profile-otel-traces.md](./profile-otel-traces.md) and [request-observability.md](./request-observability.md) for the detailed diff --git a/src/app_core.ts b/src/app_core.ts index 0b3d749..a956b74 100644 --- a/src/app_core.ts +++ b/src/app_core.ts @@ -56,6 +56,7 @@ import { resolveJsonIngestCapability, resolveTouchCapability, type PreparedJsonRecord, + type StreamProfileSpec, type StreamTouchRoute, } from "./profiles"; import { encodeOtlpTraceExportResponse } from "./profiles/otelTraces/otlp"; @@ -70,6 +71,7 @@ import { sortTimeline, summarizeSearchCoverage, } from "./observe/request"; +import { buildRequestObservabilityPairingDescriptor } from "./observe/pairing"; import { dsError } from "./util/ds_error.ts"; import { streamHash16Hex } from "./util/stream_paths"; @@ -1319,30 +1321,38 @@ export function createAppCore(cfg: Config, opts: CreateAppCoreOptions): App { return Result.ok(Buffer.concat(parts)); }; - const buildStreamSummary = (stream: string, row: StreamRow, profileKind: string) => ({ - name: stream, - content_type: normalizeContentType(row.content_type) ?? row.content_type, - profile: profileKind, - created_at: timestampToIsoString(row.created_at_ms), - updated_at: timestampToIsoString(row.updated_at_ms), - expires_at: timestampToIsoString(row.expires_at_ms), - ttl_seconds: row.ttl_seconds, - stream_seq: row.stream_seq, - closed: row.closed !== 0, - epoch: row.epoch, - next_offset: row.next_offset.toString(), - sealed_through: row.sealed_through.toString(), - uploaded_through: row.uploaded_through.toString(), - segment_count: db.countSegmentsForStream(stream), - uploaded_segment_count: db.countUploadedSegments(stream), - pending_rows: row.pending_rows.toString(), - pending_bytes: row.pending_bytes.toString(), - total_size_bytes: row.logical_size_bytes.toString(), - wal_rows: row.wal_rows.toString(), - wal_bytes: row.wal_bytes.toString(), - last_append_at: timestampToIsoString(row.last_append_ms), - last_segment_cut_at: timestampToIsoString(row.last_segment_cut_ms), - }); + const buildStreamSummary = ( + stream: string, + row: StreamRow, + profile: StreamProfileSpec + ) => { + const observability = buildRequestObservabilityPairingDescriptor(stream, profile); + return { + name: stream, + content_type: normalizeContentType(row.content_type) ?? row.content_type, + profile: profile.kind, + ...(observability ? { observability } : {}), + created_at: timestampToIsoString(row.created_at_ms), + updated_at: timestampToIsoString(row.updated_at_ms), + expires_at: timestampToIsoString(row.expires_at_ms), + ttl_seconds: row.ttl_seconds, + stream_seq: row.stream_seq, + closed: row.closed !== 0, + epoch: row.epoch, + next_offset: row.next_offset.toString(), + sealed_through: row.sealed_through.toString(), + uploaded_through: row.uploaded_through.toString(), + segment_count: db.countSegmentsForStream(stream), + uploaded_segment_count: db.countUploadedSegments(stream), + pending_rows: row.pending_rows.toString(), + pending_bytes: row.pending_bytes.toString(), + total_size_bytes: row.logical_size_bytes.toString(), + wal_rows: row.wal_rows.toString(), + wal_bytes: row.wal_bytes.toString(), + last_append_at: timestampToIsoString(row.last_append_ms), + last_segment_cut_at: timestampToIsoString(row.last_segment_cut_ms), + }; + }; const buildIndexLagMs = (stream: string, headRow: StreamRow, coveredSegmentCount: number): string | null => { if (coveredSegmentCount <= 0) return null; @@ -1619,7 +1629,7 @@ export function createAppCore(cfg: Config, opts: CreateAppCoreOptions): App { mode === "index_status" ? indexStatus : { - stream: buildStreamSummary(stream, srow, profileKind), + stream: buildStreamSummary(stream, srow, profileRes.value.profile), profile: profileRes.value, schema: regRes.value, index_status: indexStatus, @@ -2066,6 +2076,7 @@ export function createAppCore(cfg: Config, opts: CreateAppCoreOptions): App { const profileRes = profiles.getProfileResult(r.stream, r); if (Result.isError(profileRes)) return internalError("invalid stream profile"); const profile = profileRes.value; + const observability = buildRequestObservabilityPairingDescriptor(r.stream, profile); out.push({ name: r.stream, created_at: new Date(Number(r.created_at_ms)).toISOString(), @@ -2075,6 +2086,7 @@ export function createAppCore(cfg: Config, opts: CreateAppCoreOptions): App { sealed_through: r.sealed_through.toString(), uploaded_through: r.uploaded_through.toString(), profile: profile.kind, + ...(observability ? { observability } : {}), }); } return json(200, out); diff --git a/src/observe/pairing.ts b/src/observe/pairing.ts new file mode 100644 index 0000000..f8d4fae --- /dev/null +++ b/src/observe/pairing.ts @@ -0,0 +1,61 @@ +import type { StreamProfileSpec } from "../profiles"; + +export type RequestObservabilityPairingDescriptor = { + request: { + events_stream: string; + traces_stream: string; + }; +}; + +function readRequestPairing( + profile: StreamProfileSpec +): Record | null { + const observability = profile.observability; + if ( + !observability || + typeof observability !== "object" || + Array.isArray(observability) + ) { + return null; + } + const request = (observability as Record).request; + return request && typeof request === "object" && !Array.isArray(request) + ? (request as Record) + : null; +} + +function nonEmptyString(value: unknown): string | null { + return typeof value === "string" && value.trim() !== "" ? value.trim() : null; +} + +export function buildRequestObservabilityPairingDescriptor( + stream: string, + profile: StreamProfileSpec +): RequestObservabilityPairingDescriptor | null { + const request = readRequestPairing(profile); + if (!request) return null; + + if (profile.kind === "evlog") { + const tracesStream = nonEmptyString(request.tracesStream); + if (!tracesStream) return null; + return { + request: { + events_stream: stream, + traces_stream: tracesStream, + }, + }; + } + + if (profile.kind === "otel-traces") { + const eventsStream = nonEmptyString(request.eventsStream); + if (!eventsStream) return null; + return { + request: { + events_stream: eventsStream, + traces_stream: stream, + }, + }; + } + + return null; +} diff --git a/src/profiles/evlog.ts b/src/profiles/evlog.ts index 215c6b3..8552fde 100644 --- a/src/profiles/evlog.ts +++ b/src/profiles/evlog.ts @@ -26,6 +26,11 @@ export type EvlogStreamProfile = { traceContextFields?: string[]; parseTraceparent?: boolean; }; + observability?: { + request?: { + tracesStream: string; + }; + }; }; const DEFAULT_REDACT_KEYS = ["password", "token", "secret", "authorization", "cookie", "apikey"] as const; @@ -129,21 +134,55 @@ function parseEvlogCorrelationResult(raw: unknown, path: string): Result 0 ? correlation : undefined); } +function parseStreamNameResult(raw: unknown, path: string): Result { + if (raw === undefined) return Result.ok(undefined); + if (typeof raw !== "string") return Result.err({ message: `${path} must be a string` }); + const value = raw.trim(); + if (value === "") return Result.err({ message: `${path} must not be empty` }); + return Result.ok(value); +} + +function parseEvlogObservabilityResult(raw: unknown, path: string): Result { + if (raw === undefined) return Result.ok(undefined); + const objRes = expectPlainObjectResult(raw, path); + if (Result.isError(objRes)) return objRes; + const keyCheck = rejectUnknownKeysResult(objRes.value, ["request"], path); + if (Result.isError(keyCheck)) return keyCheck; + + if (objRes.value.request === undefined) return Result.ok(undefined); + const requestRes = expectPlainObjectResult(objRes.value.request, `${path}.request`); + if (Result.isError(requestRes)) return requestRes; + const requestKeyCheck = rejectUnknownKeysResult(requestRes.value, ["tracesStream"], `${path}.request`); + if (Result.isError(requestKeyCheck)) return requestKeyCheck; + const tracesStreamRes = parseStreamNameResult(requestRes.value.tracesStream, `${path}.request.tracesStream`); + if (Result.isError(tracesStreamRes)) return tracesStreamRes; + if (!tracesStreamRes.value) return Result.ok(undefined); + + return Result.ok({ + request: { + tracesStream: tracesStreamRes.value, + }, + }); +} + function validateEvlogProfileResult(raw: unknown, path: string): Result { const objRes = expectPlainObjectResult(raw, path); if (Result.isError(objRes)) return objRes; if (objRes.value.kind !== "evlog") { return Result.err({ message: `${path}.kind must be evlog` }); } - const keyCheck = rejectUnknownKeysResult(objRes.value, ["kind", "redactKeys", "correlation"], path); + const keyCheck = rejectUnknownKeysResult(objRes.value, ["kind", "redactKeys", "correlation", "observability"], path); if (Result.isError(keyCheck)) return keyCheck; const redactKeysRes = parseRedactKeysResult(objRes.value.redactKeys, `${path}.redactKeys`); if (Result.isError(redactKeysRes)) return redactKeysRes; const correlationRes = parseEvlogCorrelationResult(objRes.value.correlation, `${path}.correlation`); if (Result.isError(correlationRes)) return correlationRes; + const observabilityRes = parseEvlogObservabilityResult(objRes.value.observability, `${path}.observability`); + if (Result.isError(observabilityRes)) return observabilityRes; const profile: EvlogStreamProfile = { kind: "evlog" }; if (redactKeysRes.value) profile.redactKeys = redactKeysRes.value; if (correlationRes.value) profile.correlation = correlationRes.value; + if (observabilityRes.value) profile.observability = observabilityRes.value; return Result.ok(profile); } diff --git a/src/profiles/otelTraces.ts b/src/profiles/otelTraces.ts index 06f2eb5..5f298b6 100644 --- a/src/profiles/otelTraces.ts +++ b/src/profiles/otelTraces.ts @@ -114,13 +114,44 @@ function parseDbStatementModeResult(raw: unknown, path: string): Result { + if (raw === undefined) return Result.ok(undefined); + if (typeof raw !== "string") return Result.err({ message: `${path} must be a string` }); + const value = raw.trim(); + if (value === "") return Result.err({ message: `${path} must not be empty` }); + return Result.ok(value); +} + +function parseOtelTracesObservabilityResult(raw: unknown, path: string): Result { + if (raw === undefined) return Result.ok(undefined); + const objRes = expectPlainObjectResult(raw, path); + if (Result.isError(objRes)) return objRes; + const keyCheck = rejectUnknownKeysResult(objRes.value, ["request"], path); + if (Result.isError(keyCheck)) return keyCheck; + + if (objRes.value.request === undefined) return Result.ok(undefined); + const requestRes = expectPlainObjectResult(objRes.value.request, `${path}.request`); + if (Result.isError(requestRes)) return requestRes; + const requestKeyCheck = rejectUnknownKeysResult(requestRes.value, ["eventsStream"], `${path}.request`); + if (Result.isError(requestKeyCheck)) return requestKeyCheck; + const eventsStreamRes = parseStreamNameResult(requestRes.value.eventsStream, `${path}.request.eventsStream`); + if (Result.isError(eventsStreamRes)) return eventsStreamRes; + if (!eventsStreamRes.value) return Result.ok(undefined); + + return Result.ok({ + request: { + eventsStream: eventsStreamRes.value, + }, + }); +} + function validateOtelTracesProfileResult(raw: unknown, path: string): Result { const objRes = expectPlainObjectResult(raw, path); if (Result.isError(objRes)) return objRes; if (objRes.value.kind !== "otel-traces") return Result.err({ message: `${path}.kind must be otel-traces` }); const keyCheck = rejectUnknownKeysResult( objRes.value, - ["kind", "redactKeys", "requestIdAttributes", "attributeLimits", "store", "dbStatementMode"], + ["kind", "redactKeys", "requestIdAttributes", "attributeLimits", "store", "dbStatementMode", "observability"], path ); if (Result.isError(keyCheck)) return keyCheck; @@ -134,12 +165,15 @@ function validateOtelTracesProfileResult(raw: unknown, path: string): Result; store?: Partial; dbStatementMode?: DbStatementMode; + observability?: { + request?: { + eventsStream: string; + }; + }; }; export type DecodedOtelEvent = { diff --git a/test/profile_evlog.test.ts b/test/profile_evlog.test.ts index 517b7e6..a2dd62a 100644 --- a/test/profile_evlog.test.ts +++ b/test/profile_evlog.test.ts @@ -53,6 +53,11 @@ describe("evlog profile", () => { apiVersion: "durable.streams/profile/v1", profile: { kind: "evlog", + observability: { + request: { + tracesStream: "app-traces", + }, + }, redactKeys: ["sessionToken"], }, }), @@ -62,6 +67,11 @@ describe("evlog profile", () => { apiVersion: "durable.streams/profile/v1", profile: { kind: "evlog", + observability: { + request: { + tracesStream: "app-traces", + }, + }, redactKeys: ["sessiontoken"], }, }); @@ -70,14 +80,35 @@ describe("evlog profile", () => { expect(getRes.status).toBe(200); expect(getRes.body?.profile?.kind).toBe("evlog"); expect(getRes.body?.profile?.redactKeys).toEqual(["sessiontoken"]); + expect(getRes.body?.profile?.observability).toEqual({ + request: { + tracesStream: "app-traces", + }, + }); const listRes = await fetchJsonApp(app, "http://local/v1/streams", { method: "GET" }); expect(listRes.status).toBe(200); - expect(listRes.body.find((row: any) => row.name === "evlog-install")?.profile).toBe("evlog"); + const listRow = listRes.body.find((row: any) => row.name === "evlog-install"); + expect(listRow?.profile).toBe("evlog"); + expect(listRow?.observability).toEqual({ + request: { + events_stream: "evlog-install", + traces_stream: "app-traces", + }, + }); expect(app.deps.db.getStream("evlog-install")?.profile).toBe("evlog"); expect(app.deps.db.getStreamProfile("evlog-install")).not.toBeNull(); + const detailsRes = await fetchJsonApp(app, "http://local/v1/stream/evlog-install/_details", { method: "GET" }); + expect(detailsRes.status).toBe(200); + expect(detailsRes.body?.stream?.observability).toEqual({ + request: { + events_stream: "evlog-install", + traces_stream: "app-traces", + }, + }); + const schemaRes = await fetchJsonApp(app, "http://local/v1/stream/evlog-install/_schema", { method: "GET" }); expect(schemaRes.status).toBe(200); expect(schemaRes.body?.currentVersion).toBe(1); @@ -138,6 +169,24 @@ describe("evlog profile", () => { expect(invalidConfigRes.status).toBe(400); expect(invalidConfigRes.body?.error?.message).toContain("profile.extra"); + const invalidPairingRes = await fetchJsonApp(app, "http://local/v1/stream/evlog-invalid/_profile", { + method: "POST", + headers: { "content-type": "application/json" }, + body: JSON.stringify({ + apiVersion: "durable.streams/profile/v1", + profile: { + kind: "evlog", + observability: { + request: { + tracesStream: "", + }, + }, + }, + }), + }); + expect(invalidPairingRes.status).toBe(400); + expect(invalidPairingRes.body?.error?.message).toContain("profile.observability.request.tracesStream"); + await app.fetch( new Request("http://local/v1/stream/evlog-late", { method: "PUT", diff --git a/test/profile_otel_traces.test.ts b/test/profile_otel_traces.test.ts index 2224b81..892d78c 100644 --- a/test/profile_otel_traces.test.ts +++ b/test/profile_otel_traces.test.ts @@ -194,6 +194,11 @@ describe("otel-traces profile", () => { attributeLimits: { maxAttributesPerSpan: 32 }, store: { rawLinks: false }, dbStatementMode: "raw", + observability: { + request: { + eventsStream: "app-events", + }, + }, }); expect(res.status).toBe(200); expect(res.body?.profile).toEqual({ @@ -203,6 +208,11 @@ describe("otel-traces profile", () => { attributeLimits: { maxAttributesPerSpan: 32 }, store: { rawLinks: false }, dbStatementMode: "raw", + observability: { + request: { + eventsStream: "app-events", + }, + }, }); const schemaRes = await fetchJsonApp(app, "http://local/v1/stream/otel-install/_schema", { method: "GET" }); @@ -214,6 +224,24 @@ describe("otel-traces profile", () => { expect(schemaRes.body?.search?.fields?.duration?.kind).toBe("float"); expect(schemaRes.body?.search?.fields?.["events.name"]?.bindings?.[0]?.jsonPointer).toBe("/eventNames"); expect(schemaRes.body?.search?.rollups?.spans?.measures?.latency?.field).toBe("duration"); + + const listRes = await fetchJsonApp(app, "http://local/v1/streams", { method: "GET" }); + expect(listRes.status).toBe(200); + expect(listRes.body.find((row: any) => row.name === "otel-install")?.observability).toEqual({ + request: { + events_stream: "app-events", + traces_stream: "otel-install", + }, + }); + + const detailsRes = await fetchJsonApp(app, "http://local/v1/stream/otel-install/_details", { method: "GET" }); + expect(detailsRes.status).toBe(200); + expect(detailsRes.body?.stream?.observability).toEqual({ + request: { + events_stream: "app-events", + traces_stream: "otel-install", + }, + }); } finally { await app.close(); rmSync(root, { recursive: true, force: true }); @@ -242,6 +270,23 @@ describe("otel-traces profile", () => { expect(invalidRes.status).toBe(400); expect(invalidRes.body?.error?.message).toContain("dbStatementMode"); + const invalidPairingRes = await fetchJsonApp(app, "http://local/v1/stream/otel-invalid/_profile", { + method: "POST", + headers: { "content-type": "application/json" }, + body: JSON.stringify({ + profile: { + kind: "otel-traces", + observability: { + request: { + eventsStream: "", + }, + }, + }, + }), + }); + expect(invalidPairingRes.status).toBe(400); + expect(invalidPairingRes.body?.error?.message).toContain("profile.observability.request.eventsStream"); + await app.fetch(new Request("http://local/v1/stream/otel-late", { method: "PUT", headers: { "content-type": "application/json" } })); await app.fetch( new Request("http://local/v1/stream/otel-late", { From 61999d5f11702cd3106328f5fba00b710525d7ae Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=B8ren=20Bramer=20Schmidt?= Date: Fri, 12 Jun 2026 15:55:29 +0700 Subject: [PATCH 08/12] Harden otel traces observability --- src/app_core.ts | 20 +- src/observe/request.ts | 121 ++++++++++- src/profiles/otelTraces.ts | 40 +++- src/profiles/otelTraces/normalize.ts | 179 +++++++++++++++- src/profiles/otelTraces/otlp.ts | 75 +++++-- src/profiles/otelTraces/schema.ts | 3 + src/profiles/profile.ts | 2 +- src/schema/registry.ts | 21 +- src/search/aggregate.ts | 22 +- test/observe_request.test.ts | 102 ++++++++++ test/profile_otel_traces.test.ts | 292 ++++++++++++++++++++++++++- 11 files changed, 823 insertions(+), 54 deletions(-) diff --git a/src/app_core.ts b/src/app_core.ts index a956b74..e8454d9 100644 --- a/src/app_core.ts +++ b/src/app_core.ts @@ -1731,6 +1731,7 @@ export function createAppCore(cfg: Config, opts: CreateAppCoreOptions): App { }); if (Result.isError(decodedRes)) { if (decodedRes.error.status === 415) return unsupportedMediaType(decodedRes.error.message); + if (decodedRes.error.status === 413) return tooLarge(decodedRes.error.message); return badRequest(decodedRes.error.message); } @@ -1891,21 +1892,24 @@ export function createAppCore(cfg: Config, opts: CreateAppCoreOptions): App { target: "events" | "traces", result: { hits: SearchHit[]; batches: SearchResultBatch[]; limitReached: boolean } ) => { + const stream = result.batches[0]?.stream ?? ""; if (target === "events") { - const seen = new Set(eventHits.map((hit) => hit.offset)); + const seen = new Set(eventHits.map((hit) => `${(hit as SearchHit & { stream?: string }).stream ?? ""}\0${hit.offset}`)); for (const hit of result.hits) { - if (seen.has(hit.offset)) continue; - seen.add(hit.offset); - eventHits.push(hit); + const key = `${stream}\0${hit.offset}`; + if (seen.has(key)) continue; + seen.add(key); + eventHits.push({ ...hit, stream } as SearchHit); } eventBatches.push(...result.batches); eventLimitReached = eventLimitReached || result.limitReached || eventHits.length >= observeReq.limits.events && !!result.batches.at(-1)?.nextSearchAfter; } else { - const seen = new Set(traceHits.map((hit) => hit.offset)); + const seen = new Set(traceHits.map((hit) => `${(hit as SearchHit & { stream?: string }).stream ?? ""}\0${hit.offset}`)); for (const hit of result.hits) { - if (seen.has(hit.offset)) continue; - seen.add(hit.offset); - traceHits.push(hit); + const key = `${stream}\0${hit.offset}`; + if (seen.has(key)) continue; + seen.add(key); + traceHits.push({ ...hit, stream } as SearchHit); } traceBatches.push(...result.batches); traceLimitReached = traceLimitReached || result.limitReached || traceHits.length >= observeReq.limits.spans && !!result.batches.at(-1)?.nextSearchAfter; diff --git a/src/observe/request.ts b/src/observe/request.ts index 72446cb..2a66da5 100644 --- a/src/observe/request.ts +++ b/src/observe/request.ts @@ -35,6 +35,9 @@ export type ObserveSearchCoverage = { timed_out: boolean; limit_reached: boolean; hits: number; + unique_hits: number; + query_count: number; + batch_count: number; total: { value: number; relation: "eq" | "gte" }; index_families_used: string[]; scanned_tail_docs: number; @@ -292,15 +295,103 @@ function cloneNodeAtDepth(node: TraceTreeNode, depth: number): TraceTreeNode { }; } -function buildCriticalPath(rootNodes: TraceTreeNode[]): string[] { +function parseTimeMs(value: string | null): number | null { + if (!value) return null; + const parsed = Date.parse(value); + return Number.isFinite(parsed) ? parsed : null; +} + +function intervalDurationMs(node: TraceTreeNode): number | null { + const start = parseTimeMs(node.startTime); + const end = parseTimeMs(node.endTime); + if (start == null || end == null || end < start) return node.duration; + return end - start; +} + +function exclusiveDurationMs(node: TraceTreeNode): number { + const total = intervalDurationMs(node) ?? node.duration ?? 0; + const start = parseTimeMs(node.startTime); + const end = parseTimeMs(node.endTime); + if (start == null || end == null || end <= start || node.children.length === 0) return Math.max(0, total); + const intervals = node.children + .map((child) => { + const childStart = parseTimeMs(child.startTime); + const childEnd = parseTimeMs(child.endTime); + if (childStart == null || childEnd == null || childEnd <= childStart) return null; + return [Math.max(start, childStart), Math.min(end, childEnd)] as const; + }) + .filter((interval): interval is readonly [number, number] => !!interval && interval[1] > interval[0]) + .sort((left, right) => left[0] - right[0]); + let covered = 0; + let currentStart: number | null = null; + let currentEnd: number | null = null; + for (const [left, right] of intervals) { + if (currentStart == null || currentEnd == null) { + currentStart = left; + currentEnd = right; + continue; + } + if (left <= currentEnd) { + currentEnd = Math.max(currentEnd, right); + continue; + } + covered += currentEnd - currentStart; + currentStart = left; + currentEnd = right; + } + if (currentStart != null && currentEnd != null) covered += currentEnd - currentStart; + return Math.max(0, total - covered); +} + +function criticalPathScore(node: TraceTreeNode, memo: Map): number { + const cached = memo.get(node.spanId); + if (cached != null) return cached; + const score = + node.children.length === 0 + ? exclusiveDurationMs(node) + : exclusiveDurationMs(node) + Math.max(...node.children.map((child) => criticalPathScore(child, memo))); + memo.set(node.spanId, score); + return score; +} + +function rootSelectionScore(node: TraceTreeNode, record: Record | undefined): number { + const http = record ? nestedObject(record, "http") : {}; + const hasHttp = + stringField(http, "method") != null || + stringField(http, "route") != null || + stringField(http, "path") != null || + numberField(http, "statusCode") != null; + return ( + (node.parentSpanId == null ? 10_000 : 0) + + (node.kind === "server" ? 2_000 : 0) + + (hasHttp ? 1_000 : 0) + + (record && stringField(record, "requestId") ? 500 : 0) + + Math.min(node.duration ?? 0, 60_000) / 10 + ); +} + +function selectRootSpanId(rootNodes: TraceTreeNode[], bySpanId: Map>): string | null { + if (rootNodes.length === 0) return null; + return [...rootNodes] + .sort((left, right) => { + const scoreDiff = rootSelectionScore(right, bySpanId.get(right.spanId)) - rootSelectionScore(left, bySpanId.get(left.spanId)); + if (scoreDiff !== 0) return scoreDiff; + if (left.startTime !== right.startTime) return left.startTime < right.startTime ? -1 : 1; + return left.spanId.localeCompare(right.spanId); + })[0]?.spanId ?? null; +} + +function buildCriticalPath(rootNodes: TraceTreeNode[], rootSpanId: string | null): string[] { if (rootNodes.length === 0) return []; - const score = (node: TraceTreeNode): number => (node.duration ?? 0) + (node.statusCode === "error" ? 1_000_000 : 0); - let current = [...rootNodes].sort((a, b) => score(b) - score(a))[0]!; + const memo = new Map(); + let current = + rootNodes.find((node) => node.spanId === rootSpanId) ?? + [...rootNodes].sort((a, b) => criticalPathScore(b, memo) - criticalPathScore(a, memo))[0]!; const out: string[] = []; while (current) { out.push(current.spanId); if (current.children.length === 0) break; - current = [...current.children].sort((a, b) => score(b) - score(a))[0]!; + current = [...current.children].sort((a, b) => criticalPathScore(b, memo) - criticalPathScore(a, memo))[0]!; } return out; } @@ -412,14 +503,15 @@ export function buildTraceDetails(spansRaw: unknown[], args?: { spanLimitReached }; const tree = roots.map((root) => setDepth(root, 0)).map((root) => cloneNodeAtDepth(root, 0)); sortTree(tree); + const rootSpanId = selectRootSpanId(tree, bySpanId); return { traceId: spans.length > 0 ? stringField(spans[0]!, "traceId") : null, - rootSpanId: tree[0]?.spanId ?? null, + rootSpanId, spans, tree, serviceMap: buildServiceMap(spans, bySpanId), - criticalPath: buildCriticalPath(tree), + criticalPath: buildCriticalPath(tree, rootSpanId), errors: buildTraceErrors(spans), partial: (args?.spanLimitReached ?? false) || args?.coverageComplete === false || missingParents.size > 0, missingParents: Array.from(missingParents).sort(), @@ -429,31 +521,40 @@ export function buildTraceDetails(spansRaw: unknown[], args?: { spanLimitReached export function summarizeSearchCoverage(batches: SearchResultBatch[], hits: SearchHit[], limitReached: boolean): ObserveSearchCoverage { const families = new Set(); + const uniqueHitKeys = new Set(); let complete = batches.length > 0; let timedOut = false; let scannedTailDocs = 0; let scannedSegments = 0; let possibleMissing = 0; - let totalValue = 0; let totalRelation: "eq" | "gte" = "eq"; + const batchStreams = new Set(batches.map((batch) => batch.stream)); + const fallbackStream = batchStreams.size === 1 ? Array.from(batchStreams)[0]! : ""; + for (const hit of hits) { + const stream = typeof (hit as SearchHit & { stream?: unknown }).stream === "string" ? (hit as SearchHit & { stream: string }).stream : fallbackStream; + uniqueHitKeys.add(`${stream}\0${hit.offset}`); + } for (const batch of batches) { complete = complete && batch.coverage.complete; timedOut = timedOut || batch.timedOut; scannedTailDocs += batch.coverage.scannedTailDocs; scannedSegments += batch.coverage.scannedSegments; possibleMissing += batch.coverage.possibleMissingEventsUpperBound; - totalValue += batch.total.value; if (batch.total.relation === "gte") totalRelation = "gte"; for (const family of batch.coverage.indexFamiliesUsed) families.add(family); } if (batches.length === 0) complete = true; + const exactUniqueTotal = !limitReached && !timedOut && complete && totalRelation === "eq"; return { searched: batches.length > 0, complete: complete && !timedOut && !limitReached, timed_out: timedOut, limit_reached: limitReached, - hits: hits.length, - total: { value: totalValue, relation: limitReached ? "gte" : totalRelation }, + hits: uniqueHitKeys.size, + unique_hits: uniqueHitKeys.size, + query_count: batches.length, + batch_count: batches.length, + total: { value: uniqueHitKeys.size, relation: exactUniqueTotal ? "eq" : "gte" }, index_families_used: Array.from(families).sort(), scanned_tail_docs: scannedTailDocs, scanned_segments: scannedSegments, diff --git a/src/profiles/otelTraces.ts b/src/profiles/otelTraces.ts index 5f298b6..9d1a203 100644 --- a/src/profiles/otelTraces.ts +++ b/src/profiles/otelTraces.ts @@ -18,12 +18,12 @@ import { import { buildOtelTracesDefaultRegistry } from "./otelTraces/schema"; import { DEFAULT_ATTRIBUTE_LIMITS, - DEFAULT_OTEL_TRACE_REDACT_KEYS, - DEFAULT_REQUEST_ID_ATTRIBUTES, + DEFAULT_OTLP_LIMITS, DEFAULT_STORE_CONFIG, normalizeOtelTraceRecordResult, type DbStatementMode, type OtelTraceAttributeLimits, + type OtelTraceOtlpLimits, type OtelTraceStoreConfig, type OtelTracesStreamProfile, } from "./otelTraces/normalize"; @@ -92,6 +92,25 @@ function parseAttributeLimitsResult(raw: unknown, path: string): Result 0 ? out : undefined); } +function parseOtlpLimitsResult(raw: unknown, path: string): Result | undefined, { message: string }> { + if (raw === undefined) return Result.ok(undefined); + const objRes = expectPlainObjectResult(raw, path); + if (Result.isError(objRes)) return objRes; + const keyCheck = rejectUnknownKeysResult( + objRes.value, + ["maxCompressedBytes", "maxDecodedBytes", "maxResourceSpansPerRequest", "maxScopeSpansPerRequest", "maxSpansPerRequest"], + path + ); + if (Result.isError(keyCheck)) return keyCheck; + const out: Partial = {}; + for (const key of Object.keys(DEFAULT_OTLP_LIMITS) as Array) { + const valueRes = parsePositiveIntResult(objRes.value[key], `${path}.${key}`, DEFAULT_OTLP_LIMITS[key]); + if (Result.isError(valueRes)) return valueRes; + if (objRes.value[key] !== undefined) out[key] = valueRes.value; + } + return Result.ok(Object.keys(out).length > 0 ? out : undefined); +} + function parseStoreResult(raw: unknown, path: string): Result | undefined, { message: string }> { if (raw === undefined) return Result.ok(undefined); const objRes = expectPlainObjectResult(raw, path); @@ -151,7 +170,7 @@ function validateOtelTracesProfileResult(raw: unknown, path: string): Result): "debug" | "info" | "w return status === "error" || error ? "error" : "info"; } +function spanEventIsException(event: Record): boolean { + const eventName = getString(event, "name")?.toLowerCase() ?? ""; + if (eventName === "exception") return true; + const attributes = isPlainObject(event.attributes) ? event.attributes : {}; + return getString(attributes, "exception.type") != null || getString(attributes, "exception.message") != null; +} + function buildOtelTimelineItems(args: { stream: string; offset?: string; record: unknown }): UnifiedTimelineItem[] { if (!isPlainObject(args.record)) return []; const record = args.record; @@ -228,12 +257,13 @@ function buildOtelTimelineItems(args: { stream: string; offset?: string; record: const eventTime = getString(event, "timestamp"); const eventName = getString(event, "name") ?? "span event"; if (!eventTime) continue; + const isException = spanEventIsException(event); out.push({ - kind: eventName === "exception" ? "otel.exception" : "otel.span.event", + kind: isException ? "otel.exception" : "otel.span.event", time: eventTime, service, title: eventName, - severity: eventName === "exception" ? "error" : severity, + severity: isException ? "error" : severity, ids, source, data: event, diff --git a/src/profiles/otelTraces/normalize.ts b/src/profiles/otelTraces/normalize.ts index 87cf36e..3f91234 100644 --- a/src/profiles/otelTraces/normalize.ts +++ b/src/profiles/otelTraces/normalize.ts @@ -15,6 +15,14 @@ export type OtelTraceAttributeLimits = { maxStatementBytes: number; }; +export type OtelTraceOtlpLimits = { + maxCompressedBytes: number; + maxDecodedBytes: number; + maxResourceSpansPerRequest: number; + maxScopeSpansPerRequest: number; + maxSpansPerRequest: number; +}; + export type OtelTraceStoreConfig = { rawResourceAttributes: boolean; rawSpanAttributes: boolean; @@ -29,6 +37,7 @@ export type OtelTracesStreamProfile = { attributeLimits?: Partial; store?: Partial; dbStatementMode?: DbStatementMode; + otlpLimits?: Partial; observability?: { request?: { eventsStream: string; @@ -217,6 +226,14 @@ export const DEFAULT_ATTRIBUTE_LIMITS: OtelTraceAttributeLimits = { maxStatementBytes: 4096, }; +export const DEFAULT_OTLP_LIMITS: OtelTraceOtlpLimits = { + maxCompressedBytes: 4 * 1024 * 1024, + maxDecodedBytes: 16 * 1024 * 1024, + maxResourceSpansPerRequest: 1024, + maxScopeSpansPerRequest: 4096, + maxSpansPerRequest: 50_000, +}; + export const DEFAULT_STORE_CONFIG: OtelTraceStoreConfig = { rawResourceAttributes: true, rawSpanAttributes: true, @@ -326,6 +343,36 @@ function truncateUtf8(value: string, maxBytes: number): string { return TEXT_DECODER.decode(bytes.slice(0, Math.max(0, maxBytes))); } +function redactionKeyCandidates(key: string): Set { + const lowered = key.trim().toLowerCase(); + const out = new Set(); + if (lowered === "") return out; + out.add(lowered); + + const dotted = lowered.split(".").filter((part) => part !== ""); + for (let i = 0; i < dotted.length; i++) out.add(dotted.slice(i).join(".")); + const terminal = dotted.at(-1) ?? lowered; + out.add(terminal); + out.add(terminal.replace(/[-_]/g, "")); + + const tokens = lowered.split(/[._-]+/).filter((part) => part !== ""); + for (let length = 1; length <= Math.min(4, tokens.length); length++) { + const suffix = tokens.slice(tokens.length - length); + out.add(suffix.join(".")); + out.add(suffix.join("-")); + out.add(suffix.join("_")); + out.add(suffix.join("")); + } + return out; +} + +function shouldRedactAttributeKey(key: string, redactKeys: Set): boolean { + for (const candidate of redactionKeyCandidates(key)) { + if (redactKeys.has(candidate)) return true; + } + return false; +} + function sanitizeAttributeValue(value: unknown, redactKeys: Set, path: string, maxBytes: number): { value: unknown; redacted: string[] } { if (typeof value === "string") return { value: truncateUtf8(value, maxBytes), redacted: [] }; if (typeof value === "number") return { value: Number.isFinite(value) ? value : null, redacted: [] }; @@ -347,7 +394,7 @@ function sanitizeAttributeValue(value: unknown, redactKeys: Set, path: s const redacted: string[] = []; for (const [key, childValue] of Object.entries(value)) { const childPath = path === "" ? key : `${path}.${key}`; - if (redactKeys.has(key.toLowerCase())) { + if (shouldRedactAttributeKey(key, redactKeys)) { out[key] = REDACTED_VALUE; redacted.push(childPath); continue; @@ -380,7 +427,7 @@ function limitAttributes( } count += 1; const keyPath = args.path === "" ? key : `${args.path}.${key}`; - if (args.redactKeys.has(key.toLowerCase())) { + if (shouldRedactAttributeKey(key, args.redactKeys)) { out[key] = REDACTED_VALUE; redacted.push(keyPath); continue; @@ -419,11 +466,14 @@ function getRequestId(attrs: Record, direct: string | null, req function extractExceptionFromEvents(events: DecodedOtelEvent[]): { type: string | null; message: string | null; stacktrace: string | null } { for (const event of events) { - if (event.name !== "exception") continue; + const type = getString(event.attributes, "exception.type"); + const message = getString(event.attributes, "exception.message"); + const stacktrace = getString(event.attributes, "exception.stacktrace"); + if ((normalizeString(event.name)?.toLowerCase() ?? "") !== "exception" && !type && !message) continue; return { - type: getString(event.attributes, "exception.type"), - message: getString(event.attributes, "exception.message"), - stacktrace: getString(event.attributes, "exception.stacktrace"), + type, + message, + stacktrace, }; } return { type: null, message: null, stacktrace: null }; @@ -675,6 +725,118 @@ function linkFromCanonical(value: unknown): DecodedOtelLink | null { }; } +function canonicalString(value: unknown, fallback: string | null): string | null { + const normalized = normalizeString(value); + return normalized ?? fallback; +} + +function canonicalNumber(value: unknown, fallback: number | null): number | null { + const normalized = normalizeNumber(value); + return normalized ?? fallback; +} + +function canonicalInteger(value: unknown, fallback: number | null): number | null { + const normalized = normalizeInteger(value); + return normalized ?? fallback; +} + +function canonicalBoolean(value: unknown, fallback: boolean): boolean { + return typeof value === "boolean" ? value : fallback; +} + +function preserveCanonicalEventNames(value: unknown, fallback: string[]): string[] { + const out = new Set(fallback); + if (Array.isArray(value)) { + for (const item of value) { + const normalized = normalizeString(item); + if (normalized) out.add(normalized); + } + } + return Array.from(out); +} + +function preserveRedactionKeys(value: unknown, fallback: string[]): string[] { + const out = new Set(fallback); + if (isPlainObject(value) && Array.isArray(value.keys)) { + for (const item of value.keys) { + const normalized = normalizeString(item); + if (normalized) out.add(normalized); + } + } + return Array.from(out).sort(); +} + +function preserveCanonicalDerivedFields(canonical: CanonicalOtelSpan, raw: Record): CanonicalOtelSpan { + if (raw.schemaVersion !== 1 || raw.signal !== "trace.span") return canonical; + const out: CanonicalOtelSpan = structuredClone(canonical); + + out.duration = canonicalNumber(raw.duration, out.duration); + out.service = canonicalString(raw.service, out.service); + out.serviceNamespace = canonicalString(raw.serviceNamespace, out.serviceNamespace); + out.serviceInstanceId = canonicalString(raw.serviceInstanceId, out.serviceInstanceId); + out.environment = canonicalString(raw.environment, out.environment); + out.version = canonicalString(raw.version, out.version); + out.region = canonicalString(raw.region, out.region); + out.requestId = canonicalString(raw.requestId, out.requestId); + + const http = isPlainObject(raw.http) ? raw.http : {}; + out.http = { + method: canonicalString(http.method, out.http.method), + route: canonicalString(http.route, out.http.route), + path: canonicalString(http.path, out.http.path), + target: canonicalString(http.target, out.http.target), + url: canonicalString(http.url, out.http.url), + statusCode: canonicalInteger(http.statusCode, out.http.statusCode), + userAgent: canonicalString(http.userAgent, out.http.userAgent), + }; + + const db = isPlainObject(raw.db) ? raw.db : {}; + out.db = { + system: canonicalString(db.system, out.db.system), + name: canonicalString(db.name, out.db.name), + operation: canonicalString(db.operation, out.db.operation), + statement: canonicalString(db.statement, out.db.statement), + }; + + const rpc = isPlainObject(raw.rpc) ? raw.rpc : {}; + out.rpc = { + system: canonicalString(rpc.system, out.rpc.system), + service: canonicalString(rpc.service, out.rpc.service), + method: canonicalString(rpc.method, out.rpc.method), + }; + + const messaging = isPlainObject(raw.messaging) ? raw.messaging : {}; + out.messaging = { + system: canonicalString(messaging.system, out.messaging.system), + destination: canonicalString(messaging.destination, out.messaging.destination), + operation: canonicalString(messaging.operation, out.messaging.operation), + }; + + const error = isPlainObject(raw.error) ? raw.error : {}; + out.error = { + isError: canonicalBoolean(error.isError, out.error.isError), + type: canonicalString(error.type, out.error.type), + message: canonicalString(error.message, out.error.message), + stacktrace: canonicalString(error.stacktrace, out.error.stacktrace), + }; + + out.eventNames = preserveCanonicalEventNames(raw.eventNames, out.eventNames); + out.redaction.keys = preserveRedactionKeys(raw.redaction, out.redaction.keys); + + const dropped = isPlainObject(raw.dropped) ? raw.dropped : {}; + out.dropped = { + attributes: canonicalInteger(dropped.attributes, out.dropped.attributes) ?? 0, + events: canonicalInteger(dropped.events, out.dropped.events) ?? 0, + links: canonicalInteger(dropped.links, out.dropped.links) ?? 0, + }; + + out.identity = { + spanKey: `${out.traceId}:${out.spanId}`, + dedupeKey: sha256Hex(`${out.traceId}\0${out.spanId}\0${out.startUnixNano ?? ""}\0${out.service ?? ""}\0${out.name}`), + }; + return out; +} + function decodedSpanFromCanonicalLikeResult(value: unknown): Result { const objRes = expectPlainObjectResult(value, "otel-traces record"); if (Result.isError(objRes)) return objRes; @@ -728,8 +890,9 @@ export function normalizeOtelTraceRecordResult( if (Result.isError(decodedRes)) return decodedRes; const normalizedRes = normalizeOtelDecodedSpanResult(profile, decodedRes.value); if (Result.isError(normalizedRes)) return normalizedRes; + const normalized = preserveCanonicalDerivedFields(normalizedRes.value, isPlainObject(value) ? value : {}); return Result.ok({ - value: normalizedRes.value, - routingKey: normalizedRes.value.traceId, + value: normalized, + routingKey: normalized.traceId, }); } diff --git a/src/profiles/otelTraces/otlp.ts b/src/profiles/otelTraces/otlp.ts index 0811472..aca352f 100644 --- a/src/profiles/otelTraces/otlp.ts +++ b/src/profiles/otelTraces/otlp.ts @@ -2,10 +2,12 @@ import { gunzipSync } from "node:zlib"; import { Result } from "better-result"; import type { OtlpTraceExportError, OtlpTraceExportResult } from "../profile"; import { + DEFAULT_OTLP_LIMITS, normalizeOtelDecodedSpanResult, type DecodedOtelEvent, type DecodedOtelLink, type DecodedOtelSpan, + type OtelTraceOtlpLimits, type OtelTracesStreamProfile, } from "./normalize"; @@ -146,7 +148,24 @@ function spanFromJson(raw: unknown): Omit { +type OtlpDecodeCounters = { + resourceSpans: number; + scopeSpans: number; + spans: number; +}; + +function incrementLimitCounter( + counters: OtlpDecodeCounters, + key: keyof OtlpDecodeCounters, + max: number, + label: string +): Result { + counters[key] += 1; + if (counters[key] > max) return Result.err({ message: `too many ${label} in OTLP request (max ${max})` }); + return Result.ok(undefined); +} + +function decodeJsonExportResult(body: Uint8Array, limits: OtelTraceOtlpLimits): Result { let parsed: unknown; try { parsed = JSON.parse(JSON_TEXT_DECODER.decode(body)); @@ -155,8 +174,11 @@ function decodeJsonExportResult(body: Uint8Array): Result { +function decodeScopeSpans(bytes: Uint8Array, limits: OtelTraceOtlpLimits, counters: OtlpDecodeCounters): Result { const reader = new ProtoReader(bytes); const out: ScopeSpansDecoded = { scope: { name: null, version: null, schemaUrl: null, attributes: {} }, @@ -647,6 +673,8 @@ function decodeScopeSpans(bytes: Uint8Array): Result { +function decodeResourceSpans(bytes: Uint8Array, limits: OtelTraceOtlpLimits, counters: OtlpDecodeCounters): Result { const reader = new ProtoReader(bytes); const out: ResourceSpansDecoded = { resourceAttributes: {}, resourceSchemaUrl: null, scopeSpans: [] }; while (!reader.eof()) { @@ -678,9 +706,11 @@ function decodeResourceSpans(bytes: Uint8Array): Result { +function decodeProtobufExportResult(body: Uint8Array, limits: OtelTraceOtlpLimits): Result { const reader = new ProtoReader(body); const out: DecodedOtelSpan[] = []; + const counters: OtlpDecodeCounters = { resourceSpans: 0, scopeSpans: 0, spans: 0 }; while (!reader.eof()) { const tagRes = reader.readTag(); if (Result.isError(tagRes)) return tagRes; if (tagRes.value.field === 1 && tagRes.value.wire === 2) { + const resourceLimitRes = incrementLimitCounter(counters, "resourceSpans", limits.maxResourceSpansPerRequest, "resourceSpans"); + if (Result.isError(resourceLimitRes)) return resourceLimitRes; const bytesRes = reader.readBytes(); if (Result.isError(bytesRes)) return bytesRes; - const resourceSpansRes = decodeResourceSpans(bytesRes.value); + const resourceSpansRes = decodeResourceSpans(bytesRes.value, limits, counters); if (Result.isError(resourceSpansRes)) return resourceSpansRes; for (const scopeSpans of resourceSpansRes.value.scopeSpans) { for (const span of scopeSpans.spans) { @@ -729,31 +762,40 @@ function decodeBody(args: { contentEncoding: string | null; body: Uint8Array; maxDecodedBytes: number; + limits: OtelTraceOtlpLimits; }): Result<{ spans: DecodedOtelSpan[]; responseEncoding: "protobuf" | "json" }, OtlpTraceExportError> { let body = args.body; + const maxDecodedBytes = Math.min(args.maxDecodedBytes, args.limits.maxDecodedBytes); const encoding = args.contentEncoding?.trim().toLowerCase() ?? ""; if (encoding !== "" && encoding !== "identity" && encoding !== "gzip") { return Result.err({ status: 415, message: "unsupported content-encoding" }); } if (encoding === "gzip") { + if (body.byteLength > args.limits.maxCompressedBytes) { + return Result.err({ status: 413, message: `compressed OTLP body too large (max ${args.limits.maxCompressedBytes})` }); + } try { - body = new Uint8Array(gunzipSync(body)); - } catch { + body = new Uint8Array(gunzipSync(body, { maxOutputLength: maxDecodedBytes })); + } catch (error) { + const code = typeof error === "object" && error && "code" in error ? String((error as { code?: unknown }).code) : ""; + if (code === "ERR_BUFFER_TOO_LARGE") { + return Result.err({ status: 413, message: `decoded OTLP body too large (max ${maxDecodedBytes})` }); + } return Result.err({ status: 400, message: "invalid gzip body" }); } } - if (body.byteLength > args.maxDecodedBytes) { - return Result.err({ status: 400, message: `decoded OTLP body too large (max ${args.maxDecodedBytes})` }); + if (body.byteLength > maxDecodedBytes) { + return Result.err({ status: 413, message: `decoded OTLP body too large (max ${maxDecodedBytes})` }); } const contentType = baseContentType(args.contentType); if (contentType === JSON_CONTENT_TYPE) { - const spansRes = decodeJsonExportResult(body); + const spansRes = decodeJsonExportResult(body, args.limits); if (Result.isError(spansRes)) return Result.err({ status: 400, message: spansRes.error.message }); return Result.ok({ spans: spansRes.value, responseEncoding: "json" }); } if (contentType === PROTOBUF_CONTENT_TYPE) { - const spansRes = decodeProtobufExportResult(body); + const spansRes = decodeProtobufExportResult(body, args.limits); if (Result.isError(spansRes)) return Result.err({ status: 400, message: spansRes.error.message }); return Result.ok({ spans: spansRes.value, responseEncoding: "protobuf" }); } @@ -768,7 +810,8 @@ export function decodeOtlpTraceExportRequestResult(args: { body: Uint8Array; maxDecodedBytes: number; }): Result { - const decodedRes = decodeBody(args); + const limits = { ...DEFAULT_OTLP_LIMITS, ...(args.profile.otlpLimits ?? {}) }; + const decodedRes = decodeBody({ ...args, limits }); if (Result.isError(decodedRes)) return decodedRes; const records: OtlpTraceExportResult["records"] = []; const warnings: string[] = []; diff --git a/src/profiles/otelTraces/schema.ts b/src/profiles/otelTraces/schema.ts index b9bce34..f1f1acd 100644 --- a/src/profiles/otelTraces/schema.ts +++ b/src/profiles/otelTraces/schema.ts @@ -376,14 +376,17 @@ export const OTEL_TRACES_DEFAULT_SEARCH_CONFIG: SearchConfig = { intervals: ["1m", "5m", "1h"], measures: { spans: { kind: "count" }, + errors: { kind: "count", include: "error:true" }, latency: { kind: "summary", field: "duration", histogram: "log2_v1" }, }, }, http_server: { + include: "kind:server", dimensions: ["service", "http.method", "http.route", "http.statusCode"], intervals: ["1m", "5m", "1h"], measures: { requests: { kind: "count" }, + errors: { kind: "count", include: "error:true" }, latency: { kind: "summary", field: "duration", histogram: "log2_v1" }, }, }, diff --git a/src/profiles/profile.ts b/src/profiles/profile.ts index 0b1c108..85f5b21 100644 --- a/src/profiles/profile.ts +++ b/src/profiles/profile.ts @@ -84,7 +84,7 @@ export type OtlpTraceExportResult = { export type OtlpTraceExportError = { message: string; - status?: 400 | 415; + status?: 400 | 413 | 415; }; export type UnifiedTimelineItem = { diff --git a/src/schema/registry.ts b/src/schema/registry.ts index 82e01f1..b554aa3 100644 --- a/src/schema/registry.ts +++ b/src/schema/registry.ts @@ -45,7 +45,7 @@ export type SearchFieldConfig = { }; export type SearchRollupMeasureConfig = - | { kind: "count" } + | { kind: "count"; include?: string } | { kind: "summary"; field: string; histogram?: "log2_v1" } | { kind: "summary_parts"; @@ -58,6 +58,7 @@ export type SearchRollupMeasureConfig = export type SearchRollupConfig = { timestampField?: string; + include?: string; dimensions?: string[]; intervals: string[]; measures: Record; @@ -299,9 +300,12 @@ function parseSearchRollupMeasureResult( ): Result { if (!isPlainObject(raw)) return Result.err({ message: `${path} must be an object` }); if (raw.kind === "count") { - const keyCheck = rejectUnknownKeysResult(raw, ["kind"], path); + const keyCheck = rejectUnknownKeysResult(raw, ["kind", "include"], path); if (Result.isError(keyCheck)) return keyCheck; - return Result.ok({ kind: "count" }); + if (raw.include !== undefined && (typeof raw.include !== "string" || raw.include.trim() === "")) { + return Result.err({ message: `${path}.include must be a non-empty string` }); + } + return Result.ok({ kind: "count", include: typeof raw.include === "string" ? raw.include.trim() : undefined }); } if (raw.kind === "summary") { const keyCheck = rejectUnknownKeysResult(raw, ["kind", "field", "histogram"], path); @@ -360,7 +364,7 @@ function parseSearchRollupConfigResult( primaryTimestampField: string ): Result { if (!isPlainObject(raw)) return Result.err({ message: `${path} must be an object` }); - const keyCheck = rejectUnknownKeysResult(raw, ["timestampField", "dimensions", "intervals", "measures"], path); + const keyCheck = rejectUnknownKeysResult(raw, ["timestampField", "include", "dimensions", "intervals", "measures"], path); if (Result.isError(keyCheck)) return keyCheck; const timestampFieldRaw = raw.timestampField === undefined ? primaryTimestampField : raw.timestampField; @@ -371,6 +375,14 @@ function parseSearchRollupConfigResult( if (!timestampField) return Result.err({ message: `${path}.timestampField must reference a declared field` }); if (timestampField.kind !== "date") return Result.err({ message: `${path}.timestampField must reference a date field` }); + let include: string | undefined; + if (raw.include !== undefined) { + if (typeof raw.include !== "string" || raw.include.trim() === "") { + return Result.err({ message: `${path}.include must be a non-empty string` }); + } + include = raw.include.trim(); + } + let dimensions: string[] | undefined; if (raw.dimensions !== undefined) { if (!Array.isArray(raw.dimensions)) return Result.err({ message: `${path}.dimensions must be an array` }); @@ -421,6 +433,7 @@ function parseSearchRollupConfigResult( return Result.ok({ timestampField: timestampFieldRes.value, + include, dimensions, intervals, measures, diff --git a/src/search/aggregate.ts b/src/search/aggregate.ts index 7e01de3..b58f49c 100644 --- a/src/search/aggregate.ts +++ b/src/search/aggregate.ts @@ -13,6 +13,7 @@ import { } from "./schema"; import { collectPositiveSearchExactClauses, + evaluateSearchQueryResult, parseSearchQueryResult, type CompiledSearchQuery, } from "./query"; @@ -289,6 +290,20 @@ export function rollupRequiredFieldNames(registry: SchemaRegistry, rollup: Searc return Array.from(fields); } +function matchesRollupIncludeResult( + registry: SchemaRegistry, + offset: bigint, + value: unknown, + include: string | undefined +): Result { + if (!include) return Result.ok(true); + const queryRes = parseSearchQueryResult(registry, include); + if (Result.isError(queryRes)) return queryRes; + const evalRes = evaluateSearchQueryResult(registry, offset, queryRes.value, value); + if (Result.isError(evalRes)) return evalRes; + return Result.ok(evalRes.value.matched); +} + export function extractRollupContributionResult( registry: SchemaRegistry, rollup: SearchRollupConfig, @@ -297,6 +312,9 @@ export function extractRollupContributionResult( precomputedRawValues?: Map ): Result<{ timestampMs: number; dimensions: Record; measures: Record } | null, { message: string }> { if (!isPlainObject(value)) return Result.ok(null); + const rollupIncludeRes = matchesRollupIncludeResult(registry, offset, value, rollup.include); + if (Result.isError(rollupIncludeRes)) return rollupIncludeRes; + if (!rollupIncludeRes.value) return Result.ok(null); const rawValuesRes = precomputedRawValues ? Result.ok(precomputedRawValues) : extractRawSearchValuesForFieldsResult(registry, offset, value, rollupRequiredFieldNames(registry, rollup)); @@ -331,7 +349,9 @@ export function extractRollupContributionResult( const measures: Record = {}; for (const [measureName, measure] of Object.entries(rollup.measures)) { if (measure.kind === "count") { - measures[measureName] = { kind: "count", value: 1 }; + const includeRes = matchesRollupIncludeResult(registry, offset, value, measure.include); + if (Result.isError(includeRes)) return includeRes; + measures[measureName] = { kind: "count", value: includeRes.value ? 1 : 0 }; continue; } if (measure.kind === "summary") { diff --git a/test/observe_request.test.ts b/test/observe_request.test.ts index 099fdf3..b372581 100644 --- a/test/observe_request.test.ts +++ b/test/observe_request.test.ts @@ -2,6 +2,8 @@ import { describe, expect, test } from "bun:test"; import { mkdtempSync, rmSync } from "node:fs"; import { tmpdir } from "node:os"; import { join } from "node:path"; +import { buildTraceDetails, summarizeSearchCoverage } from "../src/observe/request"; +import type { SearchHit, SearchResultBatch } from "../src/reader"; import { createProfileTestApp, fetchJsonApp } from "./profile_test_utils"; const TRACE_ID = "5b8efff798038103d269b633813fc60c"; @@ -158,6 +160,106 @@ function observeBody(lookup: Record, extra: Record { + test("selects the best request root without dropping other root spans", () => { + const trace = buildTraceDetails([ + { + traceId: TRACE_ID, + spanId: "aaaaaaaaaaaaaaaa", + parentSpanId: null, + name: "background flush", + kind: "internal", + timestamp: "2026-02-25T12:00:00.000Z", + endTimestamp: "2026-02-25T12:00:10.000Z", + duration: 10_000, + status: { code: "unset", message: null }, + }, + { + traceId: TRACE_ID, + spanId: ROOT_SPAN_ID, + parentSpanId: null, + name: "GET /checkout", + kind: "server", + timestamp: "2026-02-25T12:00:01.000Z", + endTimestamp: "2026-02-25T12:00:01.260Z", + duration: 260, + requestId: "req_obs_1", + http: { method: "GET", route: "/checkout", statusCode: 200 }, + status: { code: "ok", message: null }, + }, + ]); + + expect(trace.rootSpanId).toBe(ROOT_SPAN_ID); + expect(trace.tree.map((node) => node.spanId).sort()).toEqual(["aaaaaaaaaaaaaaaa", ROOT_SPAN_ID].sort()); + }); + + test("deduplicates overlapping request-observe coverage totals by stream and offset", () => { + const baseCoverage: SearchResultBatch["coverage"] = { + mode: "complete", + complete: true, + streamHeadOffset: "1", + visibleThroughOffset: "1", + visibleThroughPrimaryTimestampMax: null, + oldestOmittedAppendAt: null, + possibleMissingEventsUpperBound: 0, + possibleMissingUploadedSegments: 0, + possibleMissingSealedRows: 0, + possibleMissingWalRows: 0, + indexedSegments: 0, + indexedSegmentTimeMs: 0, + ftsSectionGetMs: 0, + ftsDecodeMs: 0, + ftsClauseEstimateMs: 0, + scannedSegments: 0, + scannedSegmentTimeMs: 0, + scannedTailDocs: 0, + scannedTailTimeMs: 0, + exactCandidateTimeMs: 0, + candidateDocIds: 0, + decodedRecords: 0, + jsonParseTimeMs: 0, + segmentPayloadBytesFetched: 0, + sortTimeMs: 0, + peakHitsHeld: 0, + indexFamiliesUsed: ["exact"], + }; + const batches: SearchResultBatch[] = [ + { + stream: "app-traces", + snapshotEndOffset: "1", + tookMs: 1, + timedOut: false, + timeoutMs: null, + coverage: baseCoverage, + total: { value: 2, relation: "eq" }, + hits: [], + nextSearchAfter: null, + }, + { + stream: "app-traces", + snapshotEndOffset: "1", + tookMs: 1, + timedOut: false, + timeoutMs: null, + coverage: baseCoverage, + total: { value: 2, relation: "eq" }, + hits: [], + nextSearchAfter: null, + }, + ]; + const hits: Array = [ + { stream: "app-traces", offset: "0", score: 1, sort: [], fields: {}, source: {} }, + { stream: "app-traces", offset: "1", score: 1, sort: [], fields: {}, source: {} }, + { stream: "app-traces", offset: "1", score: 1, sort: [], fields: {}, source: {} }, + ]; + + expect(summarizeSearchCoverage(batches, hits, false)).toMatchObject({ + hits: 2, + unique_hits: 2, + query_count: 2, + total: { value: 2, relation: "eq" }, + }); + }); + test("looks up by requestId and returns evlog context with trace tree", async () => { const root = mkdtempSync(join(tmpdir(), "ds-observe-request-id-")); const { app } = createProfileTestApp(root, { searchWalOverlayQuietPeriodMs: 0 }); diff --git a/test/profile_otel_traces.test.ts b/test/profile_otel_traces.test.ts index 892d78c..8a10b5c 100644 --- a/test/profile_otel_traces.test.ts +++ b/test/profile_otel_traces.test.ts @@ -165,6 +165,7 @@ function makeOtlpProtoRequest(): Uint8Array { const scope: number[] = []; writeString(scope, 1, "proto-test"); + writeMessage(scope, 3, kvString("telemetry.sdk.language", "javascript")); const scopeSpans: number[] = []; writeMessage(scopeSpans, 1, scope); @@ -194,6 +195,11 @@ describe("otel-traces profile", () => { attributeLimits: { maxAttributesPerSpan: 32 }, store: { rawLinks: false }, dbStatementMode: "raw", + otlpLimits: { + maxCompressedBytes: 1024, + maxDecodedBytes: 2048, + maxSpansPerRequest: 100, + }, observability: { request: { eventsStream: "app-events", @@ -208,6 +214,11 @@ describe("otel-traces profile", () => { attributeLimits: { maxAttributesPerSpan: 32 }, store: { rawLinks: false }, dbStatementMode: "raw", + otlpLimits: { + maxCompressedBytes: 1024, + maxDecodedBytes: 2048, + maxSpansPerRequest: 100, + }, observability: { request: { eventsStream: "app-events", @@ -224,6 +235,9 @@ describe("otel-traces profile", () => { expect(schemaRes.body?.search?.fields?.duration?.kind).toBe("float"); expect(schemaRes.body?.search?.fields?.["events.name"]?.bindings?.[0]?.jsonPointer).toBe("/eventNames"); expect(schemaRes.body?.search?.rollups?.spans?.measures?.latency?.field).toBe("duration"); + expect(schemaRes.body?.search?.rollups?.spans?.measures?.errors).toEqual({ kind: "count", include: "error:true" }); + expect(schemaRes.body?.search?.rollups?.http_server?.include).toBe("kind:server"); + expect(schemaRes.body?.search?.rollups?.http_server?.measures?.errors).toEqual({ kind: "count", include: "error:true" }); const listRes = await fetchJsonApp(app, "http://local/v1/streams", { method: "GET" }); expect(listRes.status).toBe(200); @@ -338,6 +352,11 @@ describe("otel-traces profile", () => { "http.route": "/checkout", "http.response.status_code": 500, authorization: "Bearer secret", + "http.request.header.authorization": "Bearer header secret", + "http.request.header.cookie": "session=secret", + "http.response.header.set-cookie": "session=secret", + "http.request.header.x-api-key": "api-key-secret", + "rpc.request.metadata.authorization": "Basic secret", "db.system": "postgresql", "db.statement": "SELECT 1", }, @@ -376,8 +395,23 @@ describe("otel-traces profile", () => { eventNames: ["exception"], }); expect(span.attributes.authorization).toBe("[REDACTED]"); + expect(span.attributes["http.request.header.authorization"]).toBe("[REDACTED]"); + expect(span.attributes["http.request.header.cookie"]).toBe("[REDACTED]"); + expect(span.attributes["http.response.header.set-cookie"]).toBe("[REDACTED]"); + expect(span.attributes["http.request.header.x-api-key"]).toBe("[REDACTED]"); + expect(span.attributes["rpc.request.metadata.authorization"]).toBe("[REDACTED]"); expect(span.events[0].attributes.token).toBe("[REDACTED]"); - expect(span.redaction.keys).toEqual(expect.arrayContaining(["attributes.authorization", "events.0.attributes.token"])); + expect(span.redaction.keys).toEqual( + expect.arrayContaining([ + "attributes.authorization", + "attributes.http.request.header.authorization", + "attributes.http.request.header.cookie", + "attributes.http.response.header.set-cookie", + "attributes.http.request.header.x-api-key", + "attributes.rpc.request.metadata.authorization", + "events.0.attributes.token", + ]) + ); const searchRes = await fetchJsonApp(app, "http://local/v1/stream/otel-json/_search", { method: "POST", @@ -395,6 +429,112 @@ describe("otel-traces profile", () => { service: "checkout", "http.statusCode": 500, }); + + const eventNameSearchRes = await fetchJsonApp(app, "http://local/v1/stream/otel-json/_search", { + method: "POST", + headers: { "content-type": "application/json" }, + body: JSON.stringify({ q: "events.name:exception" }), + }); + expect(eventNameSearchRes.status).toBe(200); + expect(eventNameSearchRes.body?.hits).toHaveLength(1); + + const bareExceptionSearchRes = await fetchJsonApp(app, "http://local/v1/stream/otel-json/_search", { + method: "POST", + headers: { "content-type": "application/json" }, + body: JSON.stringify({ q: "exception" }), + }); + expect(bareExceptionSearchRes.status).toBe(200); + expect(bareExceptionSearchRes.body?.hits).toHaveLength(1); + } finally { + await app.close(); + rmSync(root, { recursive: true, force: true }); + } + }); + + test("preserves canonical derived fields when raw attributes were dropped", async () => { + const root = mkdtempSync(join(tmpdir(), "ds-profile-otel-canonical-preserve-")); + const { app } = createProfileTestApp(root, { searchWalOverlayQuietPeriodMs: 0 }); + try { + await createOtelTraceStream(app, "otel-canonical-preserve", { + store: { + rawResourceAttributes: false, + rawSpanAttributes: false, + rawEvents: false, + rawLinks: false, + }, + }); + const appendRes = await app.fetch( + new Request("http://local/v1/stream/otel-canonical-preserve", { + method: "POST", + headers: { "content-type": "application/json" }, + body: JSON.stringify({ + traceId: TRACE_ID, + spanId: SPAN_ID, + name: "GET /checkout", + kind: "server", + startUnixNano: "1772020800000000000", + endUnixNano: "1772020800123000000", + status: { code: "error", message: "failed" }, + resource: { + attributes: { + "service.name": "checkout", + "deployment.environment.name": "prod", + }, + }, + attributes: { + "request.id": "req_preserve_1", + "http.request.method": "GET", + "http.route": "/checkout", + "http.response.status_code": 500, + }, + events: [ + { + timeUnixNano: "1772020800100000000", + name: "exception", + attributes: { + "exception.message": "checkout failed", + }, + }, + ], + }), + }) + ); + expect([200, 204]).toContain(appendRes.status); + + const firstReadRes = await fetchJsonApp(app, "http://local/v1/stream/otel-canonical-preserve?format=json", { method: "GET" }); + expect(firstReadRes.status).toBe(200); + const canonical = firstReadRes.body[0]; + expect(canonical.attributes).toEqual({}); + expect(canonical.resource.attributes).toEqual({}); + expect(canonical.events).toEqual([]); + expect(canonical).toMatchObject({ + service: "checkout", + environment: "prod", + requestId: "req_preserve_1", + http: { method: "GET", route: "/checkout", statusCode: 500 }, + error: { isError: true, message: "failed" }, + eventNames: ["exception"], + }); + + const reappendRes = await app.fetch( + new Request("http://local/v1/stream/otel-canonical-preserve", { + method: "POST", + headers: { "content-type": "application/json" }, + body: JSON.stringify(canonical), + }) + ); + expect([200, 204]).toContain(reappendRes.status); + + const secondReadRes = await fetchJsonApp(app, "http://local/v1/stream/otel-canonical-preserve?format=json", { method: "GET" }); + expect(secondReadRes.status).toBe(200); + expect(secondReadRes.body[1]).toMatchObject({ + service: "checkout", + environment: "prod", + requestId: "req_preserve_1", + http: { method: "GET", route: "/checkout", statusCode: 500 }, + error: { isError: true, message: "failed" }, + eventNames: ["exception"], + }); } finally { await app.close(); rmSync(root, { recursive: true, force: true }); @@ -464,6 +604,10 @@ describe("otel-traces profile", () => { parentSpanId: SPAN_ID, requestId: "req_proto_1", service: "checkout", + instrumentationScope: { + name: "proto-test", + attributes: { "telemetry.sdk.language": "javascript" }, + }, db: { system: "postgresql", operation: "SELECT" }, status: { code: "ok", message: "ok" }, }); @@ -503,4 +647,150 @@ describe("otel-traces profile", () => { rmSync(root, { recursive: true, force: true }); } }); + + test("rejects OTLP requests that exceed compressed, decoded, or span-count limits", async () => { + const root = mkdtempSync(join(tmpdir(), "ds-profile-otel-limits-")); + const { app } = createProfileTestApp(root); + try { + await createOtelTraceStream(app, "limited-traces", { + otlpLimits: { + maxCompressedBytes: 48, + maxDecodedBytes: 4096, + maxSpansPerRequest: 1, + }, + }); + + const compressedTooLargeRes = await fetchJsonApp(app, "http://local/v1/stream/limited-traces/_otlp/v1/traces", { + method: "POST", + headers: { + "content-type": "application/json", + "content-encoding": "gzip", + }, + body: gzipSync(JSON.stringify(otlpJsonRequest())), + }); + expect(compressedTooLargeRes.status).toBe(413); + expect(compressedTooLargeRes.body?.error?.message).toContain("compressed OTLP body too large"); + + await createOtelTraceStream(app, "decoded-limited-traces", { + otlpLimits: { + maxCompressedBytes: 4096, + maxDecodedBytes: 64, + }, + }); + const decodedTooLargeRes = await fetchJsonApp(app, "http://local/v1/stream/decoded-limited-traces/_otlp/v1/traces", { + method: "POST", + headers: { + "content-type": "application/json", + "content-encoding": "gzip", + }, + body: gzipSync(JSON.stringify(otlpJsonRequest())), + }); + expect(decodedTooLargeRes.status).toBe(413); + expect(decodedTooLargeRes.body?.error?.message).toContain("decoded OTLP body too large"); + + const tooManySpansRes = await fetchJsonApp(app, "http://local/v1/stream/limited-traces/_otlp/v1/traces", { + method: "POST", + headers: { "content-type": "application/json" }, + body: JSON.stringify(otlpJsonRequest([otlpJsonSpan(), otlpJsonSpan({ spanId: CHILD_SPAN_ID })])), + }); + expect(tooManySpansRes.status).toBe(400); + expect(tooManySpansRes.body?.error?.message).toContain("too many spans"); + } finally { + await app.close(); + rmSync(root, { recursive: true, force: true }); + } + }); + + test("otel-traces rollups filter http_server to server spans and count errors", async () => { + const root = mkdtempSync(join(tmpdir(), "ds-profile-otel-rollups-")); + const { app } = createProfileTestApp(root, { searchWalOverlayQuietPeriodMs: 0 }); + try { + await createOtelTraceStream(app, "rollup-traces"); + const appendRes = await app.fetch( + new Request("http://local/v1/stream/rollup-traces", { + method: "POST", + headers: { "content-type": "application/json" }, + body: JSON.stringify([ + { + traceId: TRACE_ID, + spanId: SPAN_ID, + name: "GET /checkout", + kind: "server", + startUnixNano: "1772020800000000000", + endUnixNano: "1772020800123000000", + status: { code: "error", message: "failed" }, + resource: { attributes: { "service.name": "checkout" } }, + attributes: { + "http.request.method": "GET", + "http.route": "/checkout", + "http.response.status_code": 500, + }, + }, + { + traceId: TRACE_ID, + spanId: CHILD_SPAN_ID, + parentSpanId: SPAN_ID, + name: "SELECT cart", + kind: "internal", + startUnixNano: "1772020800010000000", + endUnixNano: "1772020800018000000", + status: { code: "error", message: "db failed" }, + resource: { attributes: { "service.name": "checkout" } }, + attributes: { + "db.system": "postgresql", + "db.operation": "SELECT", + }, + }, + ]), + }) + ); + expect([200, 204]).toContain(appendRes.status); + + const httpAggregateRes = await fetchJsonApp(app, "http://local/v1/stream/rollup-traces/_aggregate", { + method: "POST", + headers: { "content-type": "application/json" }, + body: JSON.stringify({ + rollup: "http_server", + from: "2026-02-25T12:00:00.000Z", + to: "2026-02-25T12:01:00.000Z", + interval: "1m", + group_by: ["service", "http.method", "http.route", "http.statusCode"], + }), + }); + expect(httpAggregateRes.status).toBe(200); + expect(httpAggregateRes.body?.buckets?.[0]?.groups).toEqual([ + { + key: { + service: "checkout", + "http.method": "get", + "http.route": "/checkout", + "http.statusCode": "500", + }, + measures: { + errors: { count: 1 }, + latency: expect.objectContaining({ count: 1, sum: 123 }), + requests: { count: 1 }, + }, + }, + ]); + + const spansAggregateRes = await fetchJsonApp(app, "http://local/v1/stream/rollup-traces/_aggregate", { + method: "POST", + headers: { "content-type": "application/json" }, + body: JSON.stringify({ + rollup: "spans", + from: "2026-02-25T12:00:00.000Z", + to: "2026-02-25T12:01:00.000Z", + interval: "1m", + group_by: ["service"], + }), + }); + expect(spansAggregateRes.status).toBe(200); + expect(spansAggregateRes.body?.buckets?.[0]?.groups?.[0]?.measures?.spans).toEqual({ count: 2 }); + expect(spansAggregateRes.body?.buckets?.[0]?.groups?.[0]?.measures?.errors).toEqual({ count: 2 }); + } finally { + await app.close(); + rmSync(root, { recursive: true, force: true }); + } + }); }); From 30e938dbf01d5fa37096b527b91c9edf7d56a99f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=B8ren=20Bramer=20Schmidt?= Date: Fri, 12 Jun 2026 15:55:39 +0700 Subject: [PATCH 09/12] Document otel observability safeguards --- README.md | 20 +++++++++++++++++++- docs/durable-streams-spec.md | 19 ++++++++++++++++--- docs/profile-otel-traces.md | 35 ++++++++++++++++++++++++++++------- docs/request-observability.md | 19 +++++++++++++++++++ docs/schemas.md | 7 ++++++- 5 files changed, 88 insertions(+), 12 deletions(-) diff --git a/README.md b/README.md index 95a6a2b..4abc353 100644 --- a/README.md +++ b/README.md @@ -119,6 +119,7 @@ Current built-ins: - `evlog` - `generic` - `metrics` +- `otel-traces` - `state-protocol` Planned next built-ins: @@ -189,6 +190,20 @@ It means: The internal `__stream_metrics__` stream is created with this profile automatically. +### `otel-traces` + +`otel-traces` is the built-in profile for OpenTelemetry trace spans. + +It means: + +- the stream content type must be `application/json` +- JSON appends are normalized into the canonical span envelope +- OTLP trace exports are accepted through `POST /v1/traces` and + `POST /v1/stream/{name}/_otlp/v1/traces` +- installing the profile also installs the canonical trace schema/search and + default rollups +- the canonical routing key is `traceId` + ## Profile Versus Schema What belongs in a profile: @@ -407,11 +422,14 @@ Not implemented today: The supported behavior is: -- use `/_profile` to choose `generic`, `state-protocol`, or `evlog` +- use `/_profile` to choose a built-in profile, including `generic`, `evlog`, + `metrics`, `otel-traces`, and `state-protocol` - use `/_schema` only for schema validation, routing-key config, and schema evolution - use `/touch/*` only on `state-protocol` streams with touch enabled - use normal JSON appends on `evlog` streams to store canonical evlog events +- use OTLP trace endpoints only on `otel-traces` streams, or on `/v1/traces` + when `DS_OTLP_TRACES_STREAM` is configured Legacy compatibility branches are intentionally not part of the supported surface. diff --git a/docs/durable-streams-spec.md b/docs/durable-streams-spec.md index a4f794c..435b445 100644 --- a/docs/durable-streams-spec.md +++ b/docs/durable-streams-spec.md @@ -255,9 +255,11 @@ also returns HTTP `200` and includes OTLP partial-success information with the number of rejected spans and an error message. Clients must not retry spans rejected by a partial-success response. -Malformed payloads return `400`. Unsupported content types or content encodings -return `415`. Accepted spans are appended as canonical JSON span records using -`traceId` as the routing key. +Malformed payloads and requests that exceed resource-span, scope-span, or span +count limits return `400`. Compressed or decoded OTLP bodies that exceed the +configured byte limits return `413`. Unsupported content types or content +encodings return `415`. Accepted spans are appended as canonical JSON span +records using `traceId` as the routing key. ### 4.5 Request observability @@ -312,6 +314,17 @@ The trace response deduplicates returned spans by `traceId:spanId` for the tree, service map, errors, and critical path. Duplicate span records remain in the underlying append-only stream. +`trace.rootSpanId` is selected from all returned root candidates by preferring +likely request roots: no parent, server kind, HTTP fields, request ID, and then +duration. Other roots remain in `trace.tree`. `trace.criticalPath` is an +interval-aware highlighted path from the selected root when one exists. + +`coverage.events` and `coverage.traces` de-duplicate `hits`, `unique_hits`, and +`total.value` by stream and offset across overlapping lookup searches. +`query_count` and `batch_count` report the number of underlying `_search` +batches used. `total.relation` is `gte` when limits, timeouts, incomplete +coverage, or underlying lower-bound totals prevent an exact unique total. + The endpoint returns `400` for invalid request bodies, unsupported profile combinations, or streams without search configuration. Missing streams return `404`. diff --git a/docs/profile-otel-traces.md b/docs/profile-otel-traces.md index e2d3df3..fabcd6c 100644 --- a/docs/profile-otel-traces.md +++ b/docs/profile-otel-traces.md @@ -53,6 +53,13 @@ Content-Type: application/json "rawLinks": true }, "dbStatementMode": "drop", + "otlpLimits": { + "maxCompressedBytes": 4194304, + "maxDecodedBytes": 16777216, + "maxResourceSpansPerRequest": 1024, + "maxScopeSpansPerRequest": 4096, + "maxSpansPerRequest": 50000 + }, "observability": { "request": { "eventsStream": "app-events" @@ -69,6 +76,13 @@ Supported `dbStatementMode` values: There is no `redact_literals` mode in the shipped implementation. +Redaction matches configured keys case-insensitively and also checks dotted +header/metadata suffixes. For example, the built-in `authorization`, `cookie`, +`set-cookie`, and `x-api-key` entries redact attributes such as +`http.request.header.authorization`, `http.request.header.cookie`, +`http.response.header.set-cookie`, `http.request.header.x-api-key`, and +`rpc.request.metadata.authorization`. + ## Canonical Span Envelope Each stored span is normalized to a stable JSON object with: @@ -93,6 +107,11 @@ Nanosecond timestamps are preserved as decimal strings. `timestamp`, `endTimestamp`, and `duration` are derived for search, sort, aggregation, and UI rendering. +When an already-canonical span record is appended again, top-level canonical +fields such as service, environment, request ID, HTTP fields, error fields, +duration, and `eventNames` are preserved even if the raw attributes or raw +events were not retained in the stored record. + ## OTLP Ingestion Two endpoints accept OTLP trace exports. @@ -132,10 +151,12 @@ Both endpoints support: - `application/json` - `Content-Encoding: gzip` -Malformed payloads return `400`. Unsupported media types or encodings return -`415`. A successful full acceptance returns OTLP success. Partial acceptance -returns HTTP `200` with OTLP `partialSuccess` / `partial_success` information; -clients should not retry rejected spans from that response. +Malformed payloads and requests that exceed resource-span, scope-span, or span +count limits return `400`. Payloads that exceed compressed or decoded byte +limits return `413`. Unsupported media types or encodings return `415`. A +successful full acceptance returns OTLP success. Partial acceptance returns +HTTP `200` with OTLP `partialSuccess` / `partial_success` information; clients +should not retry rejected spans from that response. ## JSON Appends @@ -192,10 +213,10 @@ Default rollups: - `spans` over `service`, `kind`, and `status.code` - `http_server` over `service`, `http.method`, `http.route`, and - `http.statusCode` + `http.statusCode`, filtered to `kind:server` -Each rollup includes count and `duration` summary measures. Filtered count -measures are not part of the shipped rollup schema. +Each rollup includes a count measure, an `errors` count measure filtered to +`error:true`, and a `duration` summary measure. ## Request Correlation diff --git a/docs/request-observability.md b/docs/request-observability.md index 8ed4036..2cdb2d1 100644 --- a/docs/request-observability.md +++ b/docs/request-observability.md @@ -184,6 +184,16 @@ first event result. Spans are deduplicated by `traceId:spanId` for the trace view. The underlying stream remains append-only and keeps duplicate deliveries. +`rootSpanId` is selected from the returned root candidates by scoring likely +request roots first: no parent, server kind, HTTP fields, request ID, and then +duration. Other root spans remain in `trace.tree`; the selected root only +drives summary fields and the highlighted path. + +`criticalPath` is an interval-aware highlighted span path that starts at the +selected root when one exists. Child selection uses each subtree's exclusive +time plus its longest descendant contribution, so overlapping sibling spans do +not simply add together. + ## Trace Tree Tree nodes contain: @@ -236,12 +246,21 @@ the request: - `timed_out` - `limit_reached` - `hits` +- `unique_hits` +- `query_count` +- `batch_count` - `total` - `index_families_used` - `scanned_tail_docs` - `scanned_segments` - `possible_missing_events_upper_bound` +`hits`, `unique_hits`, and `total.value` are de-duplicated by stream and offset +across overlapping lookup searches. `query_count` / `batch_count` show how many +underlying `_search` batches were used. `total.relation` is `gte` whenever a +limit, timeout, incomplete coverage, or any underlying lower-bound total means +the exact unique total is not known. + Warnings are emitted for missing evlog events, missing trace spans, hit limits, incomplete search coverage, and missing parent spans. A UI should surface these warnings instead of presenting an incomplete response as authoritative. diff --git a/docs/schemas.md b/docs/schemas.md index d7e18a0..74e90c6 100644 --- a/docs/schemas.md +++ b/docs/schemas.md @@ -68,6 +68,11 @@ Notes: and `sortable`. - `search.rollups` is optional. When configured, the server builds schema-owned `.agg` rollup companions and enables `POST /v1/stream/{name}/_aggregate`. +- A rollup may set `include` to a normal search query string. Records that do + not match that query do not contribute to that rollup. +- A `count` rollup measure may also set `include`. Matching records contribute + `1`; non-matching records contribute `0` to that measure while still + contributing to the rest of the rollup row. ## HTTP API @@ -110,7 +115,7 @@ Accepted POST shapes: 4) Search update with rollups: ```json -{"search": {"primaryTimestampField": "eventTime", "fields": {"eventTime": {"kind": "date", "bindings": [{"version": 1, "jsonPointer": "/eventTime"}], "exact": true, "column": true, "exists": true, "sortable": true}, "service": {"kind": "keyword", "bindings": [{"version": 1, "jsonPointer": "/service"}], "exact": true, "prefix": true, "exists": true}, "duration": {"kind": "float", "bindings": [{"version": 1, "jsonPointer": "/duration"}], "exact": true, "column": true, "exists": true, "sortable": true, "aggregatable": true}}, "rollups": {"requests": {"dimensions": ["service"], "intervals": ["1m"], "measures": {"requests": {"kind": "count"}, "latency": {"kind": "summary", "field": "duration", "histogram": "log2_v1"}}}}}} +{"search": {"primaryTimestampField": "eventTime", "fields": {"eventTime": {"kind": "date", "bindings": [{"version": 1, "jsonPointer": "/eventTime"}], "exact": true, "column": true, "exists": true, "sortable": true}, "service": {"kind": "keyword", "bindings": [{"version": 1, "jsonPointer": "/service"}], "exact": true, "prefix": true, "exists": true}, "kind": {"kind": "keyword", "bindings": [{"version": 1, "jsonPointer": "/kind"}], "exact": true, "exists": true}, "error": {"kind": "bool", "bindings": [{"version": 1, "jsonPointer": "/error"}], "exact": true, "column": true, "exists": true}, "duration": {"kind": "float", "bindings": [{"version": 1, "jsonPointer": "/duration"}], "exact": true, "column": true, "exists": true, "sortable": true, "aggregatable": true}}, "rollups": {"requests": {"include": "kind:server", "dimensions": ["service"], "intervals": ["1m"], "measures": {"requests": {"kind": "count"}, "errors": {"kind": "count", "include": "error:true"}, "latency": {"kind": "summary", "field": "duration", "histogram": "log2_v1"}}}}}} ``` Important rule: From 50d6d54f96c53918707cd055b46cb77689225ea7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=B8ren=20Bramer=20Schmidt?= Date: Fri, 12 Jun 2026 18:22:54 +0700 Subject: [PATCH 10/12] Harden otel policy and observability diagnostics --- docs/durable-streams-spec.md | 21 ++- docs/profile-otel-traces.md | 35 +++- docs/request-observability.md | 16 +- src/app_core.ts | 16 +- src/observe/request.ts | 47 ++++- src/profiles/otelTraces.ts | 23 ++- src/profiles/otelTraces/normalize.ts | 114 ++++++++---- src/profiles/otelTraces/otlp.ts | 260 +++++++++++++++++++-------- test/observe_request.test.ts | 32 +++- test/profile_otel_traces.test.ts | 194 +++++++++++++++++++- 10 files changed, 615 insertions(+), 143 deletions(-) diff --git a/docs/durable-streams-spec.md b/docs/durable-streams-spec.md index 435b445..c9f6fe3 100644 --- a/docs/durable-streams-spec.md +++ b/docs/durable-streams-spec.md @@ -255,11 +255,13 @@ also returns HTTP `200` and includes OTLP partial-success information with the number of rejected spans and an error message. Clients must not retry spans rejected by a partial-success response. -Malformed payloads and requests that exceed resource-span, scope-span, or span -count limits return `400`. Compressed or decoded OTLP bodies that exceed the -configured byte limits return `413`. Unsupported content types or content -encodings return `415`. Accepted spans are appended as canonical JSON span -records using `traceId` as the routing key. +Malformed payloads and requests that exceed resource-span or scope-span limits +return `400`. Compressed or decoded OTLP bodies that exceed the configured byte +limits return `413`. Unsupported content types or content encodings return +`415`. If a decodable request exceeds the configured span-count limit, the +server accepts the first spans up to the limit and returns HTTP `200` with OTLP +partial-success information for the rejected overflow. Accepted spans are +appended as canonical JSON span records using `traceId` as the routing key. ### 4.5 Request observability @@ -316,14 +318,17 @@ the underlying append-only stream. `trace.rootSpanId` is selected from all returned root candidates by preferring likely request roots: no parent, server kind, HTTP fields, request ID, and then -duration. Other roots remain in `trace.tree`. `trace.criticalPath` is an -interval-aware highlighted path from the selected root when one exists. +duration. Other roots remain in `trace.tree`. `trace.criticalPath` is a +best-effort interval-aware latency path from the selected root when one exists. `coverage.events` and `coverage.traces` de-duplicate `hits`, `unique_hits`, and `total.value` by stream and offset across overlapping lookup searches. `query_count` and `batch_count` report the number of underlying `_search` batches used. `total.relation` is `gte` when limits, timeouts, incomplete -coverage, or underlying lower-bound totals prevent an exact unique total. +coverage, or underlying lower-bound totals prevent an exact unique total. Each +coverage object also includes `queries`, preserving per-query diagnostics such +as `q`, returned `hits`, backend `total`, page count, timeout state, and limit +state. The endpoint returns `400` for invalid request bodies, unsupported profile combinations, or streams without search configuration. Missing streams return diff --git a/docs/profile-otel-traces.md b/docs/profile-otel-traces.md index fabcd6c..720a764 100644 --- a/docs/profile-otel-traces.md +++ b/docs/profile-otel-traces.md @@ -53,12 +53,16 @@ Content-Type: application/json "rawLinks": true }, "dbStatementMode": "drop", + "urlMode": "drop_query", "otlpLimits": { "maxCompressedBytes": 4194304, "maxDecodedBytes": 16777216, "maxResourceSpansPerRequest": 1024, "maxScopeSpansPerRequest": 4096, - "maxSpansPerRequest": 50000 + "maxSpansPerRequest": 50000, + "maxAnyValueDepth": 16, + "maxArrayValuesPerAnyValue": 256, + "maxKvListValuesPerAnyValue": 256 }, "observability": { "request": { @@ -72,10 +76,15 @@ Content-Type: application/json Supported `dbStatementMode` values: - `drop` stores `db.statement` as `null` -- `raw` stores the statement after normal attribute value truncation +- `raw` stores the statement after `maxStatementBytes` truncation There is no `redact_literals` mode in the shipped implementation. +Supported `urlMode` values: + +- `drop_query` stores `http.url` / `url.full` without query or fragment +- `raw` stores the URL after normal attribute value truncation + Redaction matches configured keys case-insensitively and also checks dotted header/metadata suffixes. For example, the built-in `authorization`, `cookie`, `set-cookie`, and `x-api-key` entries redact attributes such as @@ -110,7 +119,10 @@ UI rendering. When an already-canonical span record is appended again, top-level canonical fields such as service, environment, request ID, HTTP fields, error fields, duration, and `eventNames` are preserved even if the raw attributes or raw -events were not retained in the stored record. +events were not retained in the stored record. Preservation still applies the +current profile policy: `dbStatementMode` can drop `db.statement`, `urlMode` +can remove URL query/fragment data, and preserved strings are truncated by the +active attribute limits. ## OTLP Ingestion @@ -151,12 +163,17 @@ Both endpoints support: - `application/json` - `Content-Encoding: gzip` -Malformed payloads and requests that exceed resource-span, scope-span, or span -count limits return `400`. Payloads that exceed compressed or decoded byte -limits return `413`. Unsupported media types or encodings return `415`. A -successful full acceptance returns OTLP success. Partial acceptance returns -HTTP `200` with OTLP `partialSuccess` / `partial_success` information; clients -should not retry rejected spans from that response. +Malformed payloads and requests that exceed resource-span or scope-span limits +return `400`. Payloads that exceed compressed or decoded byte limits return +`413`. Unsupported media types or encodings return `415`. If a decodable batch +exceeds `maxSpansPerRequest`, the first spans up to the limit are accepted and +the response is HTTP `200` with OTLP `partialSuccess` / `partial_success` +information for the rejected overflow. Clients should not retry rejected spans +from that response. + +OTLP `AnyValue` decoding is bounded by `maxAnyValueDepth`, +`maxArrayValuesPerAnyValue`, and `maxKvListValuesPerAnyValue` for both JSON +and protobuf requests. ## JSON Appends diff --git a/docs/request-observability.md b/docs/request-observability.md index 2cdb2d1..4a646e0 100644 --- a/docs/request-observability.md +++ b/docs/request-observability.md @@ -189,10 +189,11 @@ request roots first: no parent, server kind, HTTP fields, request ID, and then duration. Other root spans remain in `trace.tree`; the selected root only drives summary fields and the highlighted path. -`criticalPath` is an interval-aware highlighted span path that starts at the +`criticalPath` is a best-effort interval-aware latency path that starts at the selected root when one exists. Child selection uses each subtree's exclusive time plus its longest descendant contribution, so overlapping sibling spans do -not simply add together. +not simply add together. It is intended for UI highlighting and debugging, not +as a mathematically exact causal critical path. ## Trace Tree @@ -254,6 +255,7 @@ the request: - `scanned_tail_docs` - `scanned_segments` - `possible_missing_events_upper_bound` +- `queries` `hits`, `unique_hits`, and `total.value` are de-duplicated by stream and offset across overlapping lookup searches. `query_count` / `batch_count` show how many @@ -261,6 +263,16 @@ underlying `_search` batches were used. `total.relation` is `gte` whenever a limit, timeout, incomplete coverage, or any underlying lower-bound total means the exact unique total is not known. +`queries` preserves per-query diagnostics for UI debug panels: + +- `q` +- `hits` +- `total` +- `pages` +- `complete` +- `timed_out` +- `limit_reached` + Warnings are emitted for missing evlog events, missing trace spans, hit limits, incomplete search coverage, and missing parent spans. A UI should surface these warnings instead of presenting an incomplete response as authoritative. diff --git a/src/app_core.ts b/src/app_core.ts index e8454d9..c0cfacd 100644 --- a/src/app_core.ts +++ b/src/app_core.ts @@ -70,6 +70,8 @@ import { quoteSearchValue, sortTimeline, summarizeSearchCoverage, + summarizeSearchQueryCoverage, + type ObserveSearchQueryCoverage, } from "./observe/request"; import { buildRequestObservabilityPairingDescriptor } from "./observe/pairing"; import { dsError } from "./util/ds_error.ts"; @@ -1828,7 +1830,7 @@ export function createAppCore(cfg: Config, opts: CreateAppCoreOptions): App { q: string, limit: number, sort: string[] - ): Promise<{ hits: SearchHit[]; batches: SearchResultBatch[]; limitReached: boolean } | Response> => { + ): Promise<{ hits: SearchHit[]; batches: SearchResultBatch[]; limitReached: boolean; query: ObserveSearchQueryCoverage } | Response> => { const regRes = registry.getRegistryResult(stream); if (Result.isError(regRes)) return internalError(regRes.error.message); const hits: SearchHit[] = []; @@ -1867,7 +1869,7 @@ export function createAppCore(cfg: Config, opts: CreateAppCoreOptions): App { break; } } - return { hits, batches, limitReached }; + return { hits, batches, limitReached, query: summarizeSearchQueryCoverage(q, batches, hits, limitReached) }; }; const timeClauses = buildTimeSearchClauses(observeReq.time); @@ -1876,9 +1878,11 @@ export function createAppCore(cfg: Config, opts: CreateAppCoreOptions): App { const traceSort = ["timestamp:asc", "spanId:asc"]; let eventHits: SearchHit[] = []; let eventBatches: SearchResultBatch[] = []; + const eventQueries: ObserveSearchQueryCoverage[] = []; let eventLimitReached = false; let traceHits: SearchHit[] = []; let traceBatches: SearchResultBatch[] = []; + const traceQueries: ObserveSearchQueryCoverage[] = []; let traceLimitReached = false; const candidateTraceIds = new Set(); const addTraceIdsFromHits = (hits: SearchHit[]) => { @@ -1890,7 +1894,7 @@ export function createAppCore(cfg: Config, opts: CreateAppCoreOptions): App { }; const appendSearch = ( target: "events" | "traces", - result: { hits: SearchHit[]; batches: SearchResultBatch[]; limitReached: boolean } + result: { hits: SearchHit[]; batches: SearchResultBatch[]; limitReached: boolean; query: ObserveSearchQueryCoverage } ) => { const stream = result.batches[0]?.stream ?? ""; if (target === "events") { @@ -1902,6 +1906,7 @@ export function createAppCore(cfg: Config, opts: CreateAppCoreOptions): App { eventHits.push({ ...hit, stream } as SearchHit); } eventBatches.push(...result.batches); + eventQueries.push(result.query); eventLimitReached = eventLimitReached || result.limitReached || eventHits.length >= observeReq.limits.events && !!result.batches.at(-1)?.nextSearchAfter; } else { const seen = new Set(traceHits.map((hit) => `${(hit as SearchHit & { stream?: string }).stream ?? ""}\0${hit.offset}`)); @@ -1912,6 +1917,7 @@ export function createAppCore(cfg: Config, opts: CreateAppCoreOptions): App { traceHits.push({ ...hit, stream } as SearchHit); } traceBatches.push(...result.batches); + traceQueries.push(result.query); traceLimitReached = traceLimitReached || result.limitReached || traceHits.length >= observeReq.limits.spans && !!result.batches.at(-1)?.nextSearchAfter; } }; @@ -1972,8 +1978,8 @@ export function createAppCore(cfg: Config, opts: CreateAppCoreOptions): App { } } - const eventCoverage = summarizeSearchCoverage(eventBatches, eventHits, eventLimitReached); - const traceCoverage = summarizeSearchCoverage(traceBatches, traceHits, traceLimitReached); + const eventCoverage = summarizeSearchCoverage(eventBatches, eventHits, eventLimitReached, eventQueries); + const traceCoverage = summarizeSearchCoverage(traceBatches, traceHits, traceLimitReached, traceQueries); const trace = buildTraceDetails( traceHits.map((hit) => hit.source), { spanLimitReached: traceCoverage.limit_reached, coverageComplete: traceCoverage.complete } diff --git a/src/observe/request.ts b/src/observe/request.ts index 2a66da5..9e8795f 100644 --- a/src/observe/request.ts +++ b/src/observe/request.ts @@ -43,6 +43,17 @@ export type ObserveSearchCoverage = { scanned_tail_docs: number; scanned_segments: number; possible_missing_events_upper_bound: number; + queries: ObserveSearchQueryCoverage[]; +}; + +export type ObserveSearchQueryCoverage = { + q: string; + hits: number; + total: { value: number; relation: "eq" | "gte" }; + pages: number; + complete: boolean; + timed_out: boolean; + limit_reached: boolean; }; export type TraceTreeNode = { @@ -519,7 +530,40 @@ export function buildTraceDetails(spansRaw: unknown[], args?: { spanLimitReached }; } -export function summarizeSearchCoverage(batches: SearchResultBatch[], hits: SearchHit[], limitReached: boolean): ObserveSearchCoverage { +export function summarizeSearchQueryCoverage( + q: string, + batches: SearchResultBatch[], + hits: SearchHit[], + limitReached: boolean +): ObserveSearchQueryCoverage { + let complete = batches.length > 0; + let timedOut = false; + let totalValue = 0; + let totalRelation: "eq" | "gte" = "eq"; + for (const batch of batches) { + complete = complete && batch.coverage.complete; + timedOut = timedOut || batch.timedOut; + totalValue = Math.max(totalValue, batch.total.value); + if (batch.total.relation === "gte") totalRelation = "gte"; + } + if (batches.length === 0) complete = true; + return { + q, + hits: hits.length, + total: { value: totalValue, relation: totalRelation }, + pages: batches.length, + complete: complete && !timedOut && !limitReached, + timed_out: timedOut, + limit_reached: limitReached, + }; +} + +export function summarizeSearchCoverage( + batches: SearchResultBatch[], + hits: SearchHit[], + limitReached: boolean, + queries: ObserveSearchQueryCoverage[] = [] +): ObserveSearchCoverage { const families = new Set(); const uniqueHitKeys = new Set(); let complete = batches.length > 0; @@ -559,6 +603,7 @@ export function summarizeSearchCoverage(batches: SearchResultBatch[], hits: Sear scanned_tail_docs: scannedTailDocs, scanned_segments: scannedSegments, possible_missing_events_upper_bound: possibleMissing, + queries, }; } diff --git a/src/profiles/otelTraces.ts b/src/profiles/otelTraces.ts index 9d1a203..d809b50 100644 --- a/src/profiles/otelTraces.ts +++ b/src/profiles/otelTraces.ts @@ -26,6 +26,7 @@ import { type OtelTraceOtlpLimits, type OtelTraceStoreConfig, type OtelTracesStreamProfile, + type UrlMode, } from "./otelTraces/normalize"; import { decodeOtlpTraceExportRequestResult } from "./otelTraces/otlp"; @@ -98,7 +99,16 @@ function parseOtlpLimitsResult(raw: unknown, path: string): Result { + if (raw === undefined) return Result.ok(undefined); + if (raw === "drop_query" || raw === "raw") return Result.ok(raw); + return Result.err({ message: `${path} must be drop_query or raw` }); +} + function parseStreamNameResult(raw: unknown, path: string): Result { if (raw === undefined) return Result.ok(undefined); if (typeof raw !== "string") return Result.err({ message: `${path} must be a string` }); @@ -170,7 +186,7 @@ function validateOtelTracesProfileResult(raw: unknown, path: string): Result; store?: Partial; dbStatementMode?: DbStatementMode; + urlMode?: UrlMode; otlpLimits?: Partial; observability?: { request?: { @@ -232,6 +237,9 @@ export const DEFAULT_OTLP_LIMITS: OtelTraceOtlpLimits = { maxResourceSpansPerRequest: 1024, maxScopeSpansPerRequest: 4096, maxSpansPerRequest: 50_000, + maxAnyValueDepth: 16, + maxArrayValuesPerAnyValue: 256, + maxKvListValuesPerAnyValue: 256, }; export const DEFAULT_STORE_CONFIG: OtelTraceStoreConfig = { @@ -241,6 +249,8 @@ export const DEFAULT_STORE_CONFIG: OtelTraceStoreConfig = { rawLinks: true, }; +export const DEFAULT_URL_MODE: UrlMode = "drop_query"; + function normalizeString(value: unknown): string | null { if (typeof value !== "string") return null; const trimmed = value.trim(); @@ -343,6 +353,24 @@ function truncateUtf8(value: string, maxBytes: number): string { return TEXT_DECODER.decode(bytes.slice(0, Math.max(0, maxBytes))); } +function truncateNullableString(value: string | null, maxBytes: number): string | null { + return value == null ? null : truncateUtf8(value, maxBytes); +} + +function stripUrlQueryAndFragment(value: string): string { + const fragmentStart = value.indexOf("#"); + const withoutFragment = fragmentStart >= 0 ? value.slice(0, fragmentStart) : value; + const queryStart = withoutFragment.indexOf("?"); + return queryStart >= 0 ? withoutFragment.slice(0, queryStart) : withoutFragment; +} + +function sanitizeUrl(value: string | null, urlMode: UrlMode, maxBytes: number): string | null { + if (!value) return null; + const sanitized = urlMode === "raw" ? value : stripUrlQueryAndFragment(value); + const normalized = normalizeString(sanitized); + return normalized ? truncateUtf8(normalized, maxBytes) : null; +} + function redactionKeyCandidates(key: string): Set { const lowered = key.trim().toLowerCase(); const out = new Set(); @@ -496,6 +524,7 @@ export function normalizeOtelDecodedSpanResult( const limits = { ...DEFAULT_ATTRIBUTE_LIMITS, ...(profile.attributeLimits ?? {}) }; const store = { ...DEFAULT_STORE_CONFIG, ...(profile.store ?? {}) }; + const urlMode = profile.urlMode ?? DEFAULT_URL_MODE; const redactKeys = new Set([...DEFAULT_OTEL_TRACE_REDACT_KEYS, ...(profile.redactKeys ?? [])].map((key) => key.toLowerCase())); const requestIdAttributes = profile.requestIdAttributes ?? [...DEFAULT_REQUEST_ID_ATTRIBUTES]; @@ -598,7 +627,8 @@ export function normalizeOtelDecodedSpanResult( const attrErrorMessage = getString(spanAttrs, "exception.message", "error.message"); const attrErrorStack = getString(spanAttrs, "exception.stacktrace", "error.stacktrace"); const httpStatusCode = getInteger(spanAttrs, "http.response.status_code", "http.status_code"); - const errorMessage = attrErrorMessage ?? exception.message ?? normalizeString(input.status?.message); + const statusMessage = truncateNullableString(normalizeString(input.status?.message), limits.maxAttributeValueBytes); + const errorMessage = attrErrorMessage ?? exception.message ?? statusMessage; const traceFlagsRaw = normalizeInteger(input.traceFlags); const dbStatementRaw = getString(spanAttrs, "db.statement", "db.query.text"); const dbStatement = @@ -626,7 +656,7 @@ export function normalizeOtelDecodedSpanResult( kind: normalizeSpanKind(input.kind), status: { code: statusCode, - message: normalizeString(input.status?.message), + message: statusMessage, }, service, serviceNamespace: getString(resourceAttrs, "service.namespace"), @@ -640,7 +670,7 @@ export function normalizeOtelDecodedSpanResult( route: getString(spanAttrs, "http.route"), path: getString(spanAttrs, "url.path", "http.target"), target: getString(spanAttrs, "http.target"), - url: getString(spanAttrs, "url.full", "http.url"), + url: sanitizeUrl(getString(spanAttrs, "url.full", "http.url"), urlMode, limits.maxAttributeValueBytes), statusCode: httpStatusCode, userAgent: getString(spanAttrs, "user_agent.original", "http.user_agent"), }, @@ -664,7 +694,7 @@ export function normalizeOtelDecodedSpanResult( isError: statusCode === "error" || (httpStatusCode != null && httpStatusCode >= 500) || !!attrErrorType || !!exception.type, type: attrErrorType ?? exception.type, message: errorMessage, - stacktrace: attrErrorStack ?? exception.stacktrace, + stacktrace: truncateNullableString(attrErrorStack ?? exception.stacktrace, limits.maxAttributeValueBytes), }, instrumentationScope: { name: normalizeString(input.instrumentationScope?.name), @@ -730,6 +760,10 @@ function canonicalString(value: unknown, fallback: string | null): string | null return normalized ?? fallback; } +function canonicalLimitedString(value: unknown, fallback: string | null, maxBytes: number): string | null { + return truncateNullableString(canonicalString(value, fallback), maxBytes); +} + function canonicalNumber(value: unknown, fallback: number | null): number | null { const normalized = normalizeNumber(value); return normalized ?? fallback; @@ -766,58 +800,73 @@ function preserveRedactionKeys(value: unknown, fallback: string[]): string[] { return Array.from(out).sort(); } -function preserveCanonicalDerivedFields(canonical: CanonicalOtelSpan, raw: Record): CanonicalOtelSpan { +function preserveCanonicalDerivedFields( + canonical: CanonicalOtelSpan, + raw: Record, + profile: OtelTracesStreamProfile, + limits: OtelTraceAttributeLimits +): CanonicalOtelSpan { if (raw.schemaVersion !== 1 || raw.signal !== "trace.span") return canonical; const out: CanonicalOtelSpan = structuredClone(canonical); + const urlMode = profile.urlMode ?? DEFAULT_URL_MODE; out.duration = canonicalNumber(raw.duration, out.duration); - out.service = canonicalString(raw.service, out.service); - out.serviceNamespace = canonicalString(raw.serviceNamespace, out.serviceNamespace); - out.serviceInstanceId = canonicalString(raw.serviceInstanceId, out.serviceInstanceId); - out.environment = canonicalString(raw.environment, out.environment); - out.version = canonicalString(raw.version, out.version); - out.region = canonicalString(raw.region, out.region); - out.requestId = canonicalString(raw.requestId, out.requestId); + out.service = canonicalLimitedString(raw.service, out.service, limits.maxAttributeValueBytes); + out.serviceNamespace = canonicalLimitedString(raw.serviceNamespace, out.serviceNamespace, limits.maxAttributeValueBytes); + out.serviceInstanceId = canonicalLimitedString(raw.serviceInstanceId, out.serviceInstanceId, limits.maxAttributeValueBytes); + out.environment = canonicalLimitedString(raw.environment, out.environment, limits.maxAttributeValueBytes); + out.version = canonicalLimitedString(raw.version, out.version, limits.maxAttributeValueBytes); + out.region = canonicalLimitedString(raw.region, out.region, limits.maxAttributeValueBytes); + out.requestId = canonicalLimitedString(raw.requestId, out.requestId, limits.maxAttributeValueBytes); + + const status = isPlainObject(raw.status) ? raw.status : {}; + out.status = { + code: out.status.code, + message: canonicalLimitedString(status.message, out.status.message, limits.maxAttributeValueBytes), + }; const http = isPlainObject(raw.http) ? raw.http : {}; out.http = { - method: canonicalString(http.method, out.http.method), - route: canonicalString(http.route, out.http.route), - path: canonicalString(http.path, out.http.path), - target: canonicalString(http.target, out.http.target), - url: canonicalString(http.url, out.http.url), + method: canonicalLimitedString(http.method, out.http.method, limits.maxAttributeValueBytes), + route: canonicalLimitedString(http.route, out.http.route, limits.maxAttributeValueBytes), + path: canonicalLimitedString(http.path, out.http.path, limits.maxAttributeValueBytes), + target: canonicalLimitedString(http.target, out.http.target, limits.maxAttributeValueBytes), + url: sanitizeUrl(canonicalString(http.url, out.http.url), urlMode, limits.maxAttributeValueBytes), statusCode: canonicalInteger(http.statusCode, out.http.statusCode), - userAgent: canonicalString(http.userAgent, out.http.userAgent), + userAgent: canonicalLimitedString(http.userAgent, out.http.userAgent, limits.maxAttributeValueBytes), }; const db = isPlainObject(raw.db) ? raw.db : {}; out.db = { - system: canonicalString(db.system, out.db.system), - name: canonicalString(db.name, out.db.name), - operation: canonicalString(db.operation, out.db.operation), - statement: canonicalString(db.statement, out.db.statement), + system: canonicalLimitedString(db.system, out.db.system, limits.maxAttributeValueBytes), + name: canonicalLimitedString(db.name, out.db.name, limits.maxAttributeValueBytes), + operation: canonicalLimitedString(db.operation, out.db.operation, limits.maxAttributeValueBytes), + statement: + profile.dbStatementMode === "raw" + ? canonicalLimitedString(db.statement, out.db.statement, limits.maxStatementBytes) + : null, }; const rpc = isPlainObject(raw.rpc) ? raw.rpc : {}; out.rpc = { - system: canonicalString(rpc.system, out.rpc.system), - service: canonicalString(rpc.service, out.rpc.service), - method: canonicalString(rpc.method, out.rpc.method), + system: canonicalLimitedString(rpc.system, out.rpc.system, limits.maxAttributeValueBytes), + service: canonicalLimitedString(rpc.service, out.rpc.service, limits.maxAttributeValueBytes), + method: canonicalLimitedString(rpc.method, out.rpc.method, limits.maxAttributeValueBytes), }; const messaging = isPlainObject(raw.messaging) ? raw.messaging : {}; out.messaging = { - system: canonicalString(messaging.system, out.messaging.system), - destination: canonicalString(messaging.destination, out.messaging.destination), - operation: canonicalString(messaging.operation, out.messaging.operation), + system: canonicalLimitedString(messaging.system, out.messaging.system, limits.maxAttributeValueBytes), + destination: canonicalLimitedString(messaging.destination, out.messaging.destination, limits.maxAttributeValueBytes), + operation: canonicalLimitedString(messaging.operation, out.messaging.operation, limits.maxAttributeValueBytes), }; const error = isPlainObject(raw.error) ? raw.error : {}; out.error = { isError: canonicalBoolean(error.isError, out.error.isError), - type: canonicalString(error.type, out.error.type), - message: canonicalString(error.message, out.error.message), - stacktrace: canonicalString(error.stacktrace, out.error.stacktrace), + type: canonicalLimitedString(error.type, out.error.type, limits.maxAttributeValueBytes), + message: canonicalLimitedString(error.message, out.error.message, limits.maxAttributeValueBytes), + stacktrace: canonicalLimitedString(error.stacktrace, out.error.stacktrace, limits.maxAttributeValueBytes), }; out.eventNames = preserveCanonicalEventNames(raw.eventNames, out.eventNames); @@ -890,7 +939,8 @@ export function normalizeOtelTraceRecordResult( if (Result.isError(decodedRes)) return decodedRes; const normalizedRes = normalizeOtelDecodedSpanResult(profile, decodedRes.value); if (Result.isError(normalizedRes)) return normalizedRes; - const normalized = preserveCanonicalDerivedFields(normalizedRes.value, isPlainObject(value) ? value : {}); + const limits = { ...DEFAULT_ATTRIBUTE_LIMITS, ...(profile.attributeLimits ?? {}) }; + const normalized = preserveCanonicalDerivedFields(normalizedRes.value, isPlainObject(value) ? value : {}, profile, limits); return Result.ok({ value: normalized, routingKey: normalized.traceId, diff --git a/src/profiles/otelTraces/otlp.ts b/src/profiles/otelTraces/otlp.ts index aca352f..db8a6ea 100644 --- a/src/profiles/otelTraces/otlp.ts +++ b/src/profiles/otelTraces/otlp.ts @@ -31,6 +31,12 @@ type ScopeSpansDecoded = { spans: Array>; }; +type DecodedExport = { + spans: DecodedOtelSpan[]; + rejectedSpans: number; + warnings: string[]; +}; + function baseContentType(value: string): string { return value.split(";")[0]?.trim().toLowerCase() ?? ""; } @@ -62,70 +68,132 @@ function normalizeNanoString(value: unknown): string | null { return null; } -function anyValueFromJson(raw: unknown): unknown { - if (!isPlainObject(raw)) return structuredClone(raw); - if (Object.prototype.hasOwnProperty.call(raw, "stringValue")) return normalizeString(raw.stringValue) ?? ""; - if (Object.prototype.hasOwnProperty.call(raw, "boolValue")) return raw.boolValue === true; +function appendWarning(warnings: string[], message: string): void { + if (warnings.includes(message)) return; + if (warnings.length < 8) warnings.push(message); +} + +function checkAnyValueDepthResult(depth: number, limits: OtelTraceOtlpLimits): Result { + if (depth > limits.maxAnyValueDepth) { + return Result.err({ message: `OTLP AnyValue nesting too deep (max ${limits.maxAnyValueDepth})` }); + } + return Result.ok(undefined); +} + +function anyValueFromJsonResult(raw: unknown, limits: OtelTraceOtlpLimits, depth = 0): Result { + const depthRes = checkAnyValueDepthResult(depth, limits); + if (Result.isError(depthRes)) return depthRes; + if (!isPlainObject(raw)) return Result.ok(structuredClone(raw)); + if (Object.prototype.hasOwnProperty.call(raw, "stringValue")) return Result.ok(normalizeString(raw.stringValue) ?? ""); + if (Object.prototype.hasOwnProperty.call(raw, "boolValue")) return Result.ok(raw.boolValue === true); if (Object.prototype.hasOwnProperty.call(raw, "intValue")) { const value = raw.intValue; - if (typeof value === "string" && /^-?(0|[1-9][0-9]*)$/.test(value.trim())) return value.trim(); - if (typeof value === "number" && Number.isFinite(value)) return Math.trunc(value); - return null; + if (typeof value === "string" && /^-?(0|[1-9][0-9]*)$/.test(value.trim())) return Result.ok(value.trim()); + if (typeof value === "number" && Number.isFinite(value)) return Result.ok(Math.trunc(value)); + return Result.ok(null); } - if (Object.prototype.hasOwnProperty.call(raw, "doubleValue")) return typeof raw.doubleValue === "number" ? raw.doubleValue : Number(raw.doubleValue); - if (Object.prototype.hasOwnProperty.call(raw, "bytesValue")) return normalizeString(raw.bytesValue) ?? ""; + if (Object.prototype.hasOwnProperty.call(raw, "doubleValue")) { + return Result.ok(typeof raw.doubleValue === "number" ? raw.doubleValue : Number(raw.doubleValue)); + } + if (Object.prototype.hasOwnProperty.call(raw, "bytesValue")) return Result.ok(normalizeString(raw.bytesValue) ?? ""); if (isPlainObject(raw.arrayValue) && Array.isArray(raw.arrayValue.values)) { - return raw.arrayValue.values.map(anyValueFromJson); + if (raw.arrayValue.values.length > limits.maxArrayValuesPerAnyValue) { + return Result.err({ message: `OTLP AnyValue array too large (max ${limits.maxArrayValuesPerAnyValue})` }); + } + const out: unknown[] = []; + for (const item of raw.arrayValue.values) { + const valueRes = anyValueFromJsonResult(item, limits, depth + 1); + if (Result.isError(valueRes)) return valueRes; + out.push(valueRes.value); + } + return Result.ok(out); } if (isPlainObject(raw.kvlistValue) && Array.isArray(raw.kvlistValue.values)) { - return keyValuesFromJson(raw.kvlistValue.values); + return keyValuesFromJsonResult(raw.kvlistValue.values, limits, depth + 1, true); } - return structuredClone(raw); + return Result.ok(structuredClone(raw)); } -function keyValuesFromJson(raw: unknown): Record { +function keyValuesFromJsonResult( + raw: unknown, + limits: OtelTraceOtlpLimits, + depth: number, + enforceCollectionLimit: boolean +): Result, { message: string }> { + const depthRes = checkAnyValueDepthResult(depth, limits); + if (Result.isError(depthRes)) return depthRes; const out: Record = {}; - if (!Array.isArray(raw)) return out; + if (!Array.isArray(raw)) return Result.ok(out); + if (enforceCollectionLimit && raw.length > limits.maxKvListValuesPerAnyValue) { + return Result.err({ message: `OTLP AnyValue kvlist too large (max ${limits.maxKvListValuesPerAnyValue})` }); + } for (const item of raw) { if (!isPlainObject(item)) continue; const key = normalizeString(item.key); if (!key) continue; - out[key] = anyValueFromJson(item.value); + const valueRes = anyValueFromJsonResult(item.value, limits, depth); + if (Result.isError(valueRes)) return valueRes; + out[key] = valueRes.value; } - return out; + return Result.ok(out); } -function eventFromJson(raw: unknown): DecodedOtelEvent | null { - if (!isPlainObject(raw)) return null; - return { +function eventFromJsonResult(raw: unknown, limits: OtelTraceOtlpLimits): Result { + if (!isPlainObject(raw)) return Result.ok(null); + const attrsRes = keyValuesFromJsonResult(raw.attributes, limits, 0, false); + if (Result.isError(attrsRes)) return attrsRes; + return Result.ok({ timeUnixNano: normalizeNanoString(raw.timeUnixNano), name: normalizeString(raw.name) ?? "", - attributes: keyValuesFromJson(raw.attributes), + attributes: attrsRes.value, droppedAttributesCount: typeof raw.droppedAttributesCount === "number" ? raw.droppedAttributesCount : Number(raw.droppedAttributesCount ?? 0), - }; + }); } -function linkFromJson(raw: unknown): DecodedOtelLink | null { - if (!isPlainObject(raw)) return null; +function linkFromJsonResult(raw: unknown, limits: OtelTraceOtlpLimits): Result { + if (!isPlainObject(raw)) return Result.ok(null); const traceId = normalizeString(raw.traceId); const spanId = normalizeString(raw.spanId); - if (!traceId || !spanId) return null; - return { + if (!traceId || !spanId) return Result.ok(null); + const attrsRes = keyValuesFromJsonResult(raw.attributes, limits, 0, false); + if (Result.isError(attrsRes)) return attrsRes; + return Result.ok({ traceId, spanId, traceState: normalizeString(raw.traceState), - attributes: keyValuesFromJson(raw.attributes), + attributes: attrsRes.value, droppedAttributesCount: typeof raw.droppedAttributesCount === "number" ? raw.droppedAttributesCount : Number(raw.droppedAttributesCount ?? 0), - }; + }); } -function spanFromJson(raw: unknown): Omit | null { - if (!isPlainObject(raw)) return null; +function spanFromJsonResult( + raw: unknown, + limits: OtelTraceOtlpLimits +): Result | null, { message: string }> { + if (!isPlainObject(raw)) return Result.ok(null); const traceId = normalizeString(raw.traceId); const spanId = normalizeString(raw.spanId); - if (!traceId || !spanId) return null; + if (!traceId || !spanId) return Result.ok(null); const status = isPlainObject(raw.status) ? raw.status : {}; - return { + const attrsRes = keyValuesFromJsonResult(raw.attributes, limits, 0, false); + if (Result.isError(attrsRes)) return attrsRes; + const events: DecodedOtelEvent[] = []; + if (Array.isArray(raw.events)) { + for (const eventRaw of raw.events) { + const eventRes = eventFromJsonResult(eventRaw, limits); + if (Result.isError(eventRes)) return eventRes; + if (eventRes.value) events.push(eventRes.value); + } + } + const links: DecodedOtelLink[] = []; + if (Array.isArray(raw.links)) { + for (const linkRaw of raw.links) { + const linkRes = linkFromJsonResult(linkRaw, limits); + if (Result.isError(linkRes)) return linkRes; + if (linkRes.value) links.push(linkRes.value); + } + } + return Result.ok({ traceId, spanId, parentSpanId: normalizeString(raw.parentSpanId), @@ -139,24 +207,26 @@ function spanFromJson(raw: unknown): Omit !!event) : [], - links: Array.isArray(raw.links) ? raw.links.map(linkFromJson).filter((link): link is DecodedOtelLink => !!link) : [], + attributes: attrsRes.value, + events, + links, droppedAttributesCount: typeof raw.droppedAttributesCount === "number" ? raw.droppedAttributesCount : Number(raw.droppedAttributesCount ?? 0), droppedEventsCount: typeof raw.droppedEventsCount === "number" ? raw.droppedEventsCount : Number(raw.droppedEventsCount ?? 0), droppedLinksCount: typeof raw.droppedLinksCount === "number" ? raw.droppedLinksCount : Number(raw.droppedLinksCount ?? 0), - }; + }); } type OtlpDecodeCounters = { resourceSpans: number; scopeSpans: number; spans: number; + rejectedSpans: number; + warnings: string[]; }; function incrementLimitCounter( counters: OtlpDecodeCounters, - key: keyof OtlpDecodeCounters, + key: "resourceSpans" | "scopeSpans", max: number, label: string ): Result { @@ -165,7 +235,15 @@ function incrementLimitCounter( return Result.ok(undefined); } -function decodeJsonExportResult(body: Uint8Array, limits: OtelTraceOtlpLimits): Result { +function acceptSpanForDecode(counters: OtlpDecodeCounters, limits: OtelTraceOtlpLimits): boolean { + counters.spans += 1; + if (counters.spans <= limits.maxSpansPerRequest) return true; + counters.rejectedSpans += 1; + appendWarning(counters.warnings, `too many spans in OTLP request (max ${limits.maxSpansPerRequest})`); + return false; +} + +function decodeJsonExportResult(body: Uint8Array, limits: OtelTraceOtlpLimits): Result { let parsed: unknown; try { parsed = JSON.parse(JSON_TEXT_DECODER.decode(body)); @@ -174,14 +252,16 @@ function decodeJsonExportResult(body: Uint8Array, limits: OtelTraceOtlpLimits): } if (!isPlainObject(parsed)) return Result.err({ message: "OTLP JSON request must be an object" }); const out: DecodedOtelSpan[] = []; - const counters: OtlpDecodeCounters = { resourceSpans: 0, scopeSpans: 0, spans: 0 }; + const counters: OtlpDecodeCounters = { resourceSpans: 0, scopeSpans: 0, spans: 0, rejectedSpans: 0, warnings: [] }; const resourceSpans = Array.isArray(parsed.resourceSpans) ? parsed.resourceSpans : []; for (const resourceSpanRaw of resourceSpans) { const resourceLimitRes = incrementLimitCounter(counters, "resourceSpans", limits.maxResourceSpansPerRequest, "resourceSpans"); if (Result.isError(resourceLimitRes)) return resourceLimitRes; if (!isPlainObject(resourceSpanRaw)) continue; const resource = isPlainObject(resourceSpanRaw.resource) ? resourceSpanRaw.resource : {}; - const resourceAttributes = keyValuesFromJson(resource.attributes); + const resourceAttributesRes = keyValuesFromJsonResult(resource.attributes, limits, 0, false); + if (Result.isError(resourceAttributesRes)) return resourceAttributesRes; + const resourceAttributes = resourceAttributesRes.value; const resourceSchemaUrl = normalizeString(resourceSpanRaw.schemaUrl); const scopeSpans = [ ...(Array.isArray(resourceSpanRaw.scopeSpans) ? resourceSpanRaw.scopeSpans : []), @@ -192,17 +272,20 @@ function decodeJsonExportResult(body: Uint8Array, limits: OtelTraceOtlpLimits): if (Result.isError(scopeLimitRes)) return scopeLimitRes; if (!isPlainObject(scopeSpanRaw)) continue; const scopeRaw = isPlainObject(scopeSpanRaw.scope) ? scopeSpanRaw.scope : isPlainObject(scopeSpanRaw.instrumentationLibrary) ? scopeSpanRaw.instrumentationLibrary : {}; + const scopeAttrsRes = keyValuesFromJsonResult(scopeRaw.attributes, limits, 0, false); + if (Result.isError(scopeAttrsRes)) return scopeAttrsRes; const scope = { name: normalizeString(scopeRaw.name), version: normalizeString(scopeRaw.version), schemaUrl: normalizeString(scopeSpanRaw.schemaUrl), - attributes: keyValuesFromJson(scopeRaw.attributes), + attributes: scopeAttrsRes.value, }; const spans = Array.isArray(scopeSpanRaw.spans) ? scopeSpanRaw.spans : []; for (const spanRaw of spans) { - const spanLimitRes = incrementLimitCounter(counters, "spans", limits.maxSpansPerRequest, "spans"); - if (Result.isError(spanLimitRes)) return spanLimitRes; - const span = spanFromJson(spanRaw); + if (!acceptSpanForDecode(counters, limits)) continue; + const spanRes = spanFromJsonResult(spanRaw, limits); + if (Result.isError(spanRes)) return spanRes; + const span = spanRes.value; if (!span) continue; out.push({ ...span, @@ -213,7 +296,7 @@ function decodeJsonExportResult(body: Uint8Array, limits: OtelTraceOtlpLimits): } } } - return Result.ok(out); + return Result.ok({ spans: out, rejectedSpans: counters.rejectedSpans, warnings: counters.warnings }); } class ProtoReader { @@ -310,7 +393,9 @@ function signedInt64(value: bigint): string { return value > 9_223_372_036_854_775_807n ? (value - 18_446_744_073_709_551_616n).toString() : value.toString(); } -function decodeAnyValue(bytes: Uint8Array): Result { +function decodeAnyValue(bytes: Uint8Array, limits: OtelTraceOtlpLimits, depth = 0): Result { + const depthRes = checkAnyValueDepthResult(depth, limits); + if (Result.isError(depthRes)) return depthRes; const reader = new ProtoReader(bytes); let value: unknown = null; while (!reader.eof()) { @@ -336,13 +421,13 @@ function decodeAnyValue(bytes: Uint8Array): Result } else if (field === 5 && wire === 2) { const bytesRes = reader.readBytes(); if (Result.isError(bytesRes)) return bytesRes; - const arrayRes = decodeArrayValue(bytesRes.value); + const arrayRes = decodeArrayValue(bytesRes.value, limits, depth + 1); if (Result.isError(arrayRes)) return arrayRes; value = arrayRes.value; } else if (field === 6 && wire === 2) { const bytesRes = reader.readBytes(); if (Result.isError(bytesRes)) return bytesRes; - const kvRes = decodeKeyValueList(bytesRes.value); + const kvRes = decodeKeyValueList(bytesRes.value, limits, depth + 1, true); if (Result.isError(kvRes)) return kvRes; value = kvRes.value; } else if (field === 7 && wire === 2) { @@ -357,16 +442,21 @@ function decodeAnyValue(bytes: Uint8Array): Result return Result.ok(value); } -function decodeArrayValue(bytes: Uint8Array): Result { +function decodeArrayValue(bytes: Uint8Array, limits: OtelTraceOtlpLimits, depth: number): Result { + const depthRes = checkAnyValueDepthResult(depth, limits); + if (Result.isError(depthRes)) return depthRes; const reader = new ProtoReader(bytes); const out: unknown[] = []; while (!reader.eof()) { const tagRes = reader.readTag(); if (Result.isError(tagRes)) return tagRes; if (tagRes.value.field === 1 && tagRes.value.wire === 2) { + if (out.length >= limits.maxArrayValuesPerAnyValue) { + return Result.err({ message: `OTLP AnyValue array too large (max ${limits.maxArrayValuesPerAnyValue})` }); + } const bytesRes = reader.readBytes(); if (Result.isError(bytesRes)) return bytesRes; - const valueRes = decodeAnyValue(bytesRes.value); + const valueRes = decodeAnyValue(bytesRes.value, limits, depth); if (Result.isError(valueRes)) return valueRes; out.push(valueRes.value); } else { @@ -377,7 +467,7 @@ function decodeArrayValue(bytes: Uint8Array): Result { +function decodeKeyValue(bytes: Uint8Array, limits: OtelTraceOtlpLimits, depth: number): Result<{ key: string; value: unknown } | null, { message: string }> { const reader = new ProtoReader(bytes); let key = ""; let value: unknown = null; @@ -392,7 +482,7 @@ function decodeKeyValue(bytes: Uint8Array): Result<{ key: string; value: unknown } else if (field === 2 && wire === 2) { const bytesRes = reader.readBytes(); if (Result.isError(bytesRes)) return bytesRes; - const valueRes = decodeAnyValue(bytesRes.value); + const valueRes = decodeAnyValue(bytesRes.value, limits, depth); if (Result.isError(valueRes)) return valueRes; value = valueRes.value; } else { @@ -403,16 +493,28 @@ function decodeKeyValue(bytes: Uint8Array): Result<{ key: string; value: unknown return Result.ok(key === "" ? null : { key, value }); } -function decodeKeyValueList(bytes: Uint8Array): Result, { message: string }> { +function decodeKeyValueList( + bytes: Uint8Array, + limits: OtelTraceOtlpLimits, + depth: number, + enforceCollectionLimit: boolean +): Result, { message: string }> { + const depthRes = checkAnyValueDepthResult(depth, limits); + if (Result.isError(depthRes)) return depthRes; const reader = new ProtoReader(bytes); const out: Record = {}; + let count = 0; while (!reader.eof()) { const tagRes = reader.readTag(); if (Result.isError(tagRes)) return tagRes; if (tagRes.value.field === 1 && tagRes.value.wire === 2) { + count += 1; + if (enforceCollectionLimit && count > limits.maxKvListValuesPerAnyValue) { + return Result.err({ message: `OTLP AnyValue kvlist too large (max ${limits.maxKvListValuesPerAnyValue})` }); + } const bytesRes = reader.readBytes(); if (Result.isError(bytesRes)) return bytesRes; - const kvRes = decodeKeyValue(bytesRes.value); + const kvRes = decodeKeyValue(bytesRes.value, limits, depth); if (Result.isError(kvRes)) return kvRes; if (kvRes.value) out[kvRes.value.key] = kvRes.value.value; } else { @@ -423,11 +525,11 @@ function decodeKeyValueList(bytes: Uint8Array): Result, return Result.ok(out); } -function decodeResource(bytes: Uint8Array): Result, { message: string }> { - return decodeKeyValueList(bytes); +function decodeResource(bytes: Uint8Array, limits: OtelTraceOtlpLimits): Result, { message: string }> { + return decodeKeyValueList(bytes, limits, 0, false); } -function decodeScope(bytes: Uint8Array): Result { +function decodeScope(bytes: Uint8Array, limits: OtelTraceOtlpLimits): Result { const reader = new ProtoReader(bytes); const scope: ScopeSpansDecoded["scope"] = { name: null, version: null, schemaUrl: null, attributes: {} }; while (!reader.eof()) { @@ -445,7 +547,7 @@ function decodeScope(bytes: Uint8Array): Result { +function decodeEvent(bytes: Uint8Array, limits: OtelTraceOtlpLimits): Result { const reader = new ProtoReader(bytes); const event: DecodedOtelEvent = { timeUnixNano: null, name: "", attributes: {}, droppedAttributesCount: 0 }; while (!reader.eof()) { @@ -497,7 +599,7 @@ function decodeEvent(bytes: Uint8Array): Result { +function decodeLink(bytes: Uint8Array, limits: OtelTraceOtlpLimits): Result { const reader = new ProtoReader(bytes); const link: DecodedOtelLink = { traceId: "", spanId: "", traceState: null, attributes: {}, droppedAttributesCount: 0 }; while (!reader.eof()) { @@ -534,7 +636,7 @@ function decodeLink(bytes: Uint8Array): Result, { message: string }> { +function decodeSpan( + bytes: Uint8Array, + limits: OtelTraceOtlpLimits +): Result, { message: string }> { const reader = new ProtoReader(bytes); const span: Omit = { traceId: "", @@ -605,7 +710,7 @@ function decodeSpan(bytes: Uint8Array): Result { +function decodeProtobufExportResult(body: Uint8Array, limits: OtelTraceOtlpLimits): Result { const reader = new ProtoReader(body); const out: DecodedOtelSpan[] = []; - const counters: OtlpDecodeCounters = { resourceSpans: 0, scopeSpans: 0, spans: 0 }; + const counters: OtlpDecodeCounters = { resourceSpans: 0, scopeSpans: 0, spans: 0, rejectedSpans: 0, warnings: [] }; while (!reader.eof()) { const tagRes = reader.readTag(); if (Result.isError(tagRes)) return tagRes; @@ -754,7 +858,7 @@ function decodeProtobufExportResult(body: Uint8Array, limits: OtelTraceOtlpLimit if (Result.isError(skipRes)) return skipRes; } } - return Result.ok(out); + return Result.ok({ spans: out, rejectedSpans: counters.rejectedSpans, warnings: counters.warnings }); } function decodeBody(args: { @@ -763,7 +867,7 @@ function decodeBody(args: { body: Uint8Array; maxDecodedBytes: number; limits: OtelTraceOtlpLimits; -}): Result<{ spans: DecodedOtelSpan[]; responseEncoding: "protobuf" | "json" }, OtlpTraceExportError> { +}): Result { let body = args.body; const maxDecodedBytes = Math.min(args.maxDecodedBytes, args.limits.maxDecodedBytes); const encoding = args.contentEncoding?.trim().toLowerCase() ?? ""; @@ -792,12 +896,12 @@ function decodeBody(args: { if (contentType === JSON_CONTENT_TYPE) { const spansRes = decodeJsonExportResult(body, args.limits); if (Result.isError(spansRes)) return Result.err({ status: 400, message: spansRes.error.message }); - return Result.ok({ spans: spansRes.value, responseEncoding: "json" }); + return Result.ok({ ...spansRes.value, responseEncoding: "json" }); } if (contentType === PROTOBUF_CONTENT_TYPE) { const spansRes = decodeProtobufExportResult(body, args.limits); if (Result.isError(spansRes)) return Result.err({ status: 400, message: spansRes.error.message }); - return Result.ok({ spans: spansRes.value, responseEncoding: "protobuf" }); + return Result.ok({ ...spansRes.value, responseEncoding: "protobuf" }); } return Result.err({ status: 415, message: "OTLP traces require application/x-protobuf or application/json" }); } @@ -814,8 +918,8 @@ export function decodeOtlpTraceExportRequestResult(args: { const decodedRes = decodeBody({ ...args, limits }); if (Result.isError(decodedRes)) return decodedRes; const records: OtlpTraceExportResult["records"] = []; - const warnings: string[] = []; - let rejectedSpans = 0; + const warnings: string[] = [...decodedRes.value.warnings]; + let rejectedSpans = decodedRes.value.rejectedSpans; for (const span of decodedRes.value.spans) { const normalizedRes = normalizeOtelDecodedSpanResult(args.profile, span); if (Result.isError(normalizedRes)) { diff --git a/test/observe_request.test.ts b/test/observe_request.test.ts index b372581..4a655a3 100644 --- a/test/observe_request.test.ts +++ b/test/observe_request.test.ts @@ -2,7 +2,7 @@ import { describe, expect, test } from "bun:test"; import { mkdtempSync, rmSync } from "node:fs"; import { tmpdir } from "node:os"; import { join } from "node:path"; -import { buildTraceDetails, summarizeSearchCoverage } from "../src/observe/request"; +import { buildTraceDetails, summarizeSearchCoverage, summarizeSearchQueryCoverage } from "../src/observe/request"; import type { SearchHit, SearchResultBatch } from "../src/reader"; import { createProfileTestApp, fetchJsonApp } from "./profile_test_utils"; @@ -252,11 +252,21 @@ describe("observe request API", () => { { stream: "app-traces", offset: "1", score: 1, sort: [], fields: {}, source: {} }, ]; - expect(summarizeSearchCoverage(batches, hits, false)).toMatchObject({ + const query = summarizeSearchQueryCoverage("trace:abc", batches, hits.slice(0, 2), false); + expect(query).toMatchObject({ + q: "trace:abc", + hits: 2, + total: { value: 2, relation: "eq" }, + pages: 2, + complete: true, + }); + + expect(summarizeSearchCoverage(batches, hits, false, [query])).toMatchObject({ hits: 2, unique_hits: 2, query_count: 2, total: { value: 2, relation: "eq" }, + queries: [query], }); }); @@ -310,6 +320,18 @@ describe("observe request API", () => { expect(res.body.timeline.length).toBeGreaterThanOrEqual(7); expect(res.body.coverage.events.searched).toBe(true); expect(res.body.coverage.traces.searched).toBe(true); + expect(res.body.coverage.events.queries[0]).toMatchObject({ + hits: 1, + pages: 1, + complete: true, + }); + expect(res.body.coverage.events.queries[0].q).toContain("req:"); + expect(res.body.coverage.traces.queries[0]).toMatchObject({ + hits: 3, + pages: 1, + complete: true, + }); + expect(res.body.coverage.traces.queries[0].q).toContain("trace:"); expect(res.body.coverage.warnings).toEqual([]); } finally { await app.close(); @@ -421,6 +443,12 @@ describe("observe request API", () => { expect(res.body.trace.partial).toBe(false); expect(res.body.coverage.traces.hits).toBe(1200); expect(res.body.coverage.traces.limit_reached).toBe(false); + expect(res.body.coverage.traces.queries[0]).toMatchObject({ + hits: 1200, + pages: 3, + complete: true, + }); + expect(res.body.coverage.traces.queries[0].q).toBe(`trace:"${TRACE_ID}"`); expect(res.body.coverage.warnings).toEqual([]); } finally { await app.close(); diff --git a/test/profile_otel_traces.test.ts b/test/profile_otel_traces.test.ts index 8a10b5c..6f5ad71 100644 --- a/test/profile_otel_traces.test.ts +++ b/test/profile_otel_traces.test.ts @@ -140,6 +140,21 @@ function kvString(key: string, value: string): number[] { return out; } +function anyArray(values: number[][]): number[] { + const arrayValue: number[] = []; + for (const value of values) writeMessage(arrayValue, 1, value); + const out: number[] = []; + writeMessage(out, 5, arrayValue); + return out; +} + +function kvAny(key: string, value: number[]): number[] { + const out: number[] = []; + writeString(out, 1, key); + writeMessage(out, 2, value); + return out; +} + function statusMessage(code: number, message: string): number[] { const out: number[] = []; writeString(out, 2, message); @@ -148,7 +163,7 @@ function statusMessage(code: number, message: string): number[] { return out; } -function makeOtlpProtoRequest(): Uint8Array { +function makeOtlpProtoRequest(extraSpanAttributes: number[][] = []): Uint8Array { const span: number[] = []; writeBytes(span, 1, hexBytes(TRACE_ID)); writeBytes(span, 2, hexBytes(CHILD_SPAN_ID)); @@ -161,6 +176,7 @@ function makeOtlpProtoRequest(): Uint8Array { writeMessage(span, 9, kvString("request.id", "req_proto_1")); writeMessage(span, 9, kvString("db.system", "postgresql")); writeMessage(span, 9, kvString("db.operation", "SELECT")); + for (const attr of extraSpanAttributes) writeMessage(span, 9, attr); writeMessage(span, 15, statusMessage(1, "ok")); const scope: number[] = []; @@ -195,10 +211,14 @@ describe("otel-traces profile", () => { attributeLimits: { maxAttributesPerSpan: 32 }, store: { rawLinks: false }, dbStatementMode: "raw", + urlMode: "raw", otlpLimits: { maxCompressedBytes: 1024, maxDecodedBytes: 2048, maxSpansPerRequest: 100, + maxAnyValueDepth: 8, + maxArrayValuesPerAnyValue: 16, + maxKvListValuesPerAnyValue: 16, }, observability: { request: { @@ -214,10 +234,14 @@ describe("otel-traces profile", () => { attributeLimits: { maxAttributesPerSpan: 32 }, store: { rawLinks: false }, dbStatementMode: "raw", + urlMode: "raw", otlpLimits: { maxCompressedBytes: 1024, maxDecodedBytes: 2048, maxSpansPerRequest: 100, + maxAnyValueDepth: 8, + maxArrayValuesPerAnyValue: 16, + maxKvListValuesPerAnyValue: 16, }, observability: { request: { @@ -284,6 +308,14 @@ describe("otel-traces profile", () => { expect(invalidRes.status).toBe(400); expect(invalidRes.body?.error?.message).toContain("dbStatementMode"); + const invalidUrlModeRes = await fetchJsonApp(app, "http://local/v1/stream/otel-invalid/_profile", { + method: "POST", + headers: { "content-type": "application/json" }, + body: JSON.stringify({ profile: { kind: "otel-traces", urlMode: "redact_query" } }), + }); + expect(invalidUrlModeRes.status).toBe(400); + expect(invalidUrlModeRes.body?.error?.message).toContain("urlMode"); + const invalidPairingRes = await fetchJsonApp(app, "http://local/v1/stream/otel-invalid/_profile", { method: "POST", headers: { "content-type": "application/json" }, @@ -350,6 +382,8 @@ describe("otel-traces profile", () => { "request.id": "req_json_1", "http.request.method": "GET", "http.route": "/checkout", + "url.full": "https://checkout.test/checkout?token=secret#fragment", + "user_agent.original": "test-agent", "http.response.status_code": 500, authorization: "Bearer secret", "http.request.header.authorization": "Bearer header secret", @@ -394,6 +428,8 @@ describe("otel-traces profile", () => { error: { isError: true, type: "Error", message: "checkout failed" }, eventNames: ["exception"], }); + expect(span.http.url).toBe("https://checkout.test/checkout"); + expect(span.http.userAgent).toBe("test-agent"); expect(span.attributes.authorization).toBe("[REDACTED]"); expect(span.attributes["http.request.header.authorization"]).toBe("[REDACTED]"); expect(span.attributes["http.request.header.cookie"]).toBe("[REDACTED]"); @@ -541,6 +577,85 @@ describe("otel-traces profile", () => { } }); + test("canonical re-append preserves derived fields through active privacy policy", async () => { + const root = mkdtempSync(join(tmpdir(), "ds-profile-otel-canonical-policy-")); + const { app } = createProfileTestApp(root, { searchWalOverlayQuietPeriodMs: 0 }); + try { + const canonical = { + schemaVersion: 1, + signal: "trace.span", + traceId: TRACE_ID, + spanId: SPAN_ID, + name: "GET /checkout", + kind: "server", + startUnixNano: "1772020800000000000", + endUnixNano: "1772020800123000000", + status: { code: "error", message: "status-message-that-is-long" }, + service: "checkout-service-name-that-is-long", + http: { + method: "GET", + route: "/checkout", + url: "https://x.io/cb?token=secret#fragment", + userAgent: "u".repeat(64), + statusCode: 500, + }, + db: { + system: "postgresql", + statement: "SELECT * FROM users WHERE email = 'secret@example.com'", + }, + error: { + isError: true, + message: "m".repeat(64), + stacktrace: "s".repeat(64), + }, + }; + + await createOtelTraceStream(app, "otel-policy-drop", { + attributeLimits: { maxAttributeValueBytes: 16, maxStatementBytes: 8 }, + }); + const dropAppendRes = await app.fetch( + new Request("http://local/v1/stream/otel-policy-drop", { + method: "POST", + headers: { "content-type": "application/json" }, + body: JSON.stringify(canonical), + }) + ); + expect([200, 204]).toContain(dropAppendRes.status); + const dropReadRes = await fetchJsonApp(app, "http://local/v1/stream/otel-policy-drop?format=json", { method: "GET" }); + expect(dropReadRes.status).toBe(200); + const dropped = dropReadRes.body[0]; + expect(dropped.db.statement).toBe(null); + expect(dropped.http.url).toBe("https://x.io/cb"); + expect(dropped.http.userAgent.length).toBeLessThanOrEqual(16); + expect(dropped.status.message.length).toBeLessThanOrEqual(16); + expect(dropped.error.message.length).toBeLessThanOrEqual(16); + expect(dropped.error.stacktrace.length).toBeLessThanOrEqual(16); + + await createOtelTraceStream(app, "otel-policy-raw", { + dbStatementMode: "raw", + urlMode: "raw", + attributeLimits: { maxAttributeValueBytes: 128, maxStatementBytes: 8 }, + }); + const rawAppendRes = await app.fetch( + new Request("http://local/v1/stream/otel-policy-raw", { + method: "POST", + headers: { "content-type": "application/json" }, + body: JSON.stringify({ ...canonical, spanId: CHILD_SPAN_ID }), + }) + ); + expect([200, 204]).toContain(rawAppendRes.status); + const rawReadRes = await fetchJsonApp(app, "http://local/v1/stream/otel-policy-raw?format=json", { method: "GET" }); + expect(rawReadRes.status).toBe(200); + const raw = rawReadRes.body[0]; + expect(raw.db.statement).toBe("SELECT *"); + expect(raw.db.statement.length).toBeLessThanOrEqual(8); + expect(raw.http.url).toBe("https://x.io/cb?token=secret#fragment"); + } finally { + await app.close(); + rmSync(root, { recursive: true, force: true }); + } + }); + test("ingests OTLP JSON over the default endpoint with gzip and auto-create", async () => { const root = mkdtempSync(join(tmpdir(), "ds-profile-otel-otlp-json-")); const { app } = createProfileTestApp(root, { @@ -648,7 +763,7 @@ describe("otel-traces profile", () => { } }); - test("rejects OTLP requests that exceed compressed, decoded, or span-count limits", async () => { + test("handles OTLP requests that exceed compressed, decoded, or span-count limits", async () => { const root = mkdtempSync(join(tmpdir(), "ds-profile-otel-limits-")); const { app } = createProfileTestApp(root); try { @@ -693,8 +808,79 @@ describe("otel-traces profile", () => { headers: { "content-type": "application/json" }, body: JSON.stringify(otlpJsonRequest([otlpJsonSpan(), otlpJsonSpan({ spanId: CHILD_SPAN_ID })])), }); - expect(tooManySpansRes.status).toBe(400); - expect(tooManySpansRes.body?.error?.message).toContain("too many spans"); + expect(tooManySpansRes.status).toBe(200); + expect(tooManySpansRes.body?.partialSuccess?.rejectedSpans).toBe(1); + expect(tooManySpansRes.body?.partialSuccess?.errorMessage).toContain("too many spans"); + + const limitedReadRes = await fetchJsonApp(app, "http://local/v1/stream/limited-traces?format=json", { method: "GET" }); + expect(limitedReadRes.status).toBe(200); + expect(limitedReadRes.body).toHaveLength(1); + expect(limitedReadRes.body[0]?.spanId).toBe(SPAN_ID); + } finally { + await app.close(); + rmSync(root, { recursive: true, force: true }); + } + }); + + test("rejects OTLP JSON nested AnyValue beyond configured depth", async () => { + const root = mkdtempSync(join(tmpdir(), "ds-profile-otel-anyvalue-json-")); + const { app } = createProfileTestApp(root); + try { + await createOtelTraceStream(app, "anyvalue-json-limited", { + otlpLimits: { maxAnyValueDepth: 2 }, + }); + const nestedSpan = otlpJsonSpan({ + attributes: [ + { + key: "nested", + value: { + arrayValue: { + values: [ + { + arrayValue: { + values: [ + { + arrayValue: { + values: [{ stringValue: "x" }], + }, + }, + ], + }, + }, + ], + }, + }, + }, + ], + }); + const res = await fetchJsonApp(app, "http://local/v1/stream/anyvalue-json-limited/_otlp/v1/traces", { + method: "POST", + headers: { "content-type": "application/json" }, + body: JSON.stringify(otlpJsonRequest([nestedSpan])), + }); + expect(res.status).toBe(400); + expect(res.body?.error?.message).toContain("AnyValue nesting too deep"); + } finally { + await app.close(); + rmSync(root, { recursive: true, force: true }); + } + }); + + test("rejects OTLP protobuf nested AnyValue beyond configured depth", async () => { + const root = mkdtempSync(join(tmpdir(), "ds-profile-otel-anyvalue-proto-")); + const { app } = createProfileTestApp(root); + try { + await createOtelTraceStream(app, "anyvalue-proto-limited", { + otlpLimits: { maxAnyValueDepth: 2 }, + }); + const nested = kvAny("nested", anyArray([anyArray([anyArray([anyString("x")])])])); + const res = await fetchJsonApp(app, "http://local/v1/stream/anyvalue-proto-limited/_otlp/v1/traces", { + method: "POST", + headers: { "content-type": "application/x-protobuf" }, + body: makeOtlpProtoRequest([nested]), + }); + expect(res.status).toBe(400); + expect(res.body?.error?.message).toContain("AnyValue nesting too deep"); } finally { await app.close(); rmSync(root, { recursive: true, force: true }); From e3389b00f916b006d9baf0ae262590481b77382a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=B8ren=20Bramer=20Schmidt?= Date: Fri, 12 Jun 2026 18:28:07 +0700 Subject: [PATCH 11/12] Preserve otel event-derived errors when raw events are dropped --- README.md | 5 +++++ src/profiles/otelTraces/normalize.ts | 9 ++++++++- test/profile_otel_traces.test.ts | 7 ++++--- 3 files changed, 17 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 4abc353..be78910 100644 --- a/README.md +++ b/README.md @@ -204,6 +204,11 @@ It means: default rollups - the canonical routing key is `traceId` +See [docs/profile-otel-traces.md](./docs/profile-otel-traces.md) for the +profile and OTLP receiver contract, and +[docs/request-observability.md](./docs/request-observability.md) for +cross-stream lookup over `evlog` events and `otel-traces` spans. + ## Profile Versus Schema What belongs in a profile: diff --git a/src/profiles/otelTraces/normalize.ts b/src/profiles/otelTraces/normalize.ts index dafcb75..43541d9 100644 --- a/src/profiles/otelTraces/normalize.ts +++ b/src/profiles/otelTraces/normalize.ts @@ -558,6 +558,7 @@ export function normalizeOtelDecodedSpanResult( const endTimestamp = isoFromUnixNano(endUnixNano); const normalizedEvents: CanonicalOtelSpan["events"] = []; + const eventDerivationInput: DecodedOtelEvent[] = []; let droppedEvents = Math.max(0, Math.trunc(input.droppedEventsCount ?? 0)); const eventNames: string[] = []; for (const event of input.events) { @@ -574,6 +575,12 @@ export function normalizeOtelDecodedSpanResult( }); const eventName = normalizeString(event.name) ?? ""; eventNames.push(eventName); + eventDerivationInput.push({ + timeUnixNano: normalizeNanoString(event.timeUnixNano), + name: eventName, + attributes: eventAttrs.attributes, + droppedAttributesCount: eventAttrs.dropped, + }); normalizedEvents.push({ timestamp: isoFromUnixNano(normalizeNanoString(event.timeUnixNano)), timeUnixNano: normalizeNanoString(event.timeUnixNano), @@ -622,7 +629,7 @@ export function normalizeOtelDecodedSpanResult( const spanAttrs = attrsRes.attributes; const service = getString(resourceAttrs, "service.name"); const statusCode = normalizeStatusCode(input.status?.code); - const exception = extractExceptionFromEvents(normalizedEvents); + const exception = extractExceptionFromEvents(eventDerivationInput); const attrErrorType = getString(spanAttrs, "exception.type", "error.type"); const attrErrorMessage = getString(spanAttrs, "exception.message", "error.message"); const attrErrorStack = getString(spanAttrs, "exception.stacktrace", "error.stacktrace"); diff --git a/test/profile_otel_traces.test.ts b/test/profile_otel_traces.test.ts index 6f5ad71..13187e3 100644 --- a/test/profile_otel_traces.test.ts +++ b/test/profile_otel_traces.test.ts @@ -371,7 +371,7 @@ describe("otel-traces profile", () => { kind: "server", startUnixNano: "1772020800000000000", endUnixNano: "1772020800123000000", - status: { code: "error", message: "failed" }, + status: { code: "error" }, resource: { attributes: { "service.name": "checkout", @@ -528,6 +528,7 @@ describe("otel-traces profile", () => { timeUnixNano: "1772020800100000000", name: "exception", attributes: { + "exception.type": "Error", "exception.message": "checkout failed", }, }, @@ -548,7 +549,7 @@ describe("otel-traces profile", () => { environment: "prod", requestId: "req_preserve_1", http: { method: "GET", route: "/checkout", statusCode: 500 }, - error: { isError: true, message: "failed" }, + error: { isError: true, type: "Error", message: "checkout failed" }, eventNames: ["exception"], }); @@ -568,7 +569,7 @@ describe("otel-traces profile", () => { environment: "prod", requestId: "req_preserve_1", http: { method: "GET", route: "/checkout", statusCode: 500 }, - error: { isError: true, message: "failed" }, + error: { isError: true, type: "Error", message: "checkout failed" }, eventNames: ["exception"], }); } finally { From c5a16aef5f6c64cc3afb7f676f3b7eab77f7b9f9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=B8ren=20Bramer=20Schmidt?= Date: Fri, 12 Jun 2026 19:05:32 +0700 Subject: [PATCH 12/12] Harden observe request response contract --- docs/durable-streams-spec.md | 10 ++++ docs/request-observability.md | 21 ++++++-- src/app_core.ts | 36 ++++++++++---- src/observe/request.ts | 91 +++++++++++++++++++++++++++++++++++ test/observe_request.test.ts | 71 +++++++++++++++++++++++++++ 5 files changed, 216 insertions(+), 13 deletions(-) diff --git a/docs/durable-streams-spec.md b/docs/durable-streams-spec.md index c9f6fe3..481c35b 100644 --- a/docs/durable-streams-spec.md +++ b/docs/durable-streams-spec.md @@ -300,9 +300,19 @@ Request body: `lookup` must contain exactly one of `requestId`, `traceId`, or `spanId`. `streams.events` is required when `include.events=true`; `streams.traces` is required when `include.trace=true`. +The supported request-observability pairing is `streams.events` with profile +`evlog` and `streams.traces` with profile `otel-traces`. The endpoint uses the configured `_search` registries for the referenced streams. Event and trace streams must expose the profile correlation capability. +`include.raw` defaults to `false`. With `raw=false`, `evlog.primary`, +`evlog.matches[].source`, and `trace.spans[]` contain compact normalized records +that keep IDs, timestamps, service/request fields, status/error fields, and +safe request/operation summaries while omitting raw context, attributes, +resources, span events, links, statements, URLs, stack traces, redaction +metadata, and identity internals. Timeline items omit `data`. With `raw=true`, +those response fields include the full profile-normalized source records and +timeline source data. The response contains: - `lookup` diff --git a/docs/request-observability.md b/docs/request-observability.md index 4a646e0..93e522e 100644 --- a/docs/request-observability.md +++ b/docs/request-observability.md @@ -59,6 +59,9 @@ Request: `streams.events` is required when `include.events` is true. `streams.traces` is required when `include.trace` is true. +The supported pairing is an `evlog` stream for `streams.events` and an +`otel-traces` stream for `streams.traces`; swapped or unsupported profile roles +return `400`. Limits: @@ -68,6 +71,14 @@ Limits: The implementation pages internally through `_search` because `_search` pages are capped at 500 hits. +`include.raw` defaults to false. With `raw=false`, the response keeps compact +normalized event/span records for request detail rendering but omits raw source +payload fields such as evlog `context`, span `attributes`, `resource`, +`instrumentationScope`, raw span `events`, `links`, raw statements, URLs, stack +traces, redaction metadata, identity internals, and timeline `data`. With +`raw=true`, `evlog.primary`, `evlog.matches[].source`, `trace.spans[]`, and +timeline items include the full profile-normalized source payloads. + ## Pairing Descriptor Clients should discover request-observability pairs from stream metadata before @@ -165,7 +176,8 @@ environment, duration, start/end time, level, and error fields. - `matches` The primary event prefers a match with the selected trace ID, otherwise the -first event result. +first event result. With `include.raw=false`, `primary` and `matches[].source` +are compact evlog records rather than full source records. `trace` is null when `include.trace=false`. Otherwise it contains: @@ -182,7 +194,9 @@ first event result. - `duplicateSpans` Spans are deduplicated by `traceId:spanId` for the trace view. The underlying -stream remains append-only and keeps duplicate deliveries. +stream remains append-only and keeps duplicate deliveries. With +`include.raw=false`, `spans` contains compact span records; the tree, service +map, errors, and critical path are still computed from the full returned spans. `rootSpanId` is selected from the returned root candidates by scoring likely request roots first: no parent, server kind, HTTP fields, request ID, and then @@ -232,7 +246,8 @@ The timeline merges profile-owned timeline items: - `otel.exception` Each item includes time, title, service, severity, IDs, source stream/profile, -and source data. +and source stream/profile. Timeline source `data` is included only when +`include.raw=true`. This response is intended for custom UI rendering, but no custom UI is shipped with this feature. diff --git a/src/app_core.ts b/src/app_core.ts index c0cfacd..ce6da79 100644 --- a/src/app_core.ts +++ b/src/app_core.ts @@ -65,6 +65,9 @@ import { buildTimeSearchClauses, buildTraceDetails, choosePrimaryEvent, + compactEvlogRecord, + compactTimelineItem, + compactTraceSpanRecord, combineSearchClauses, parseObserveRequestResult, quoteSearchValue, @@ -1804,12 +1807,21 @@ export function createAppCore(cfg: Config, opts: CreateAppCoreOptions): App { if (Result.isError(requestRes)) return badRequest(requestRes.error.message); const observeReq = requestRes.value; - const loadCorrelationCapability = (stream: string): ReturnType | Response => { + const loadCorrelationCapability = ( + stream: string, + role: "events" | "traces" + ): ReturnType | Response => { const srow = db.getStream(stream); if (!srow || db.isDeleted(srow)) return notFound(); if (srow.expires_at_ms != null && db.nowMs() > srow.expires_at_ms) return notFound("stream expired"); const profileRes = profiles.getProfileResult(stream, srow); if (Result.isError(profileRes)) return internalError("invalid stream profile"); + if (role === "events" && profileRes.value.kind !== "evlog") { + return badRequest(`streams.events must reference an evlog stream; ${stream} has profile ${profileRes.value.kind}`); + } + if (role === "traces" && profileRes.value.kind !== "otel-traces") { + return badRequest(`streams.traces must reference an otel-traces stream; ${stream} has profile ${profileRes.value.kind}`); + } const capability = resolveCorrelationCapability(profileRes.value); if (!capability) return badRequest(`stream ${stream} profile does not support observability correlation`); const regRes = registry.getRegistryResult(stream); @@ -1819,10 +1831,10 @@ export function createAppCore(cfg: Config, opts: CreateAppCoreOptions): App { }; const eventCorrelation = - observeReq.include.events && observeReq.streams.events ? loadCorrelationCapability(observeReq.streams.events) : null; + observeReq.include.events && observeReq.streams.events ? loadCorrelationCapability(observeReq.streams.events, "events") : null; if (eventCorrelation instanceof Response) return eventCorrelation; const traceCorrelation = - observeReq.include.trace && observeReq.streams.traces ? loadCorrelationCapability(observeReq.streams.traces) : null; + observeReq.include.trace && observeReq.streams.traces ? loadCorrelationCapability(observeReq.streams.traces, "traces") : null; if (traceCorrelation instanceof Response) return traceCorrelation; const runPagedSearch = async ( @@ -2001,6 +2013,13 @@ export function createAppCore(cfg: Config, opts: CreateAppCoreOptions): App { } timeline.push(...sortTimeline(items)); } + const responsePrimaryEvent = observeReq.include.raw ? primaryEvent : compactEvlogRecord(primaryEvent); + const responseEventMatches = eventHits.map((hit) => ({ + offset: hit.offset, + source: observeReq.include.raw ? hit.source : compactEvlogRecord(hit.source), + })); + const responseTraceSpans = observeReq.include.raw ? trace.spans : trace.spans.map((span) => compactTraceSpanRecord(span)); + const responseTimeline = observeReq.include.raw ? timeline : timeline.map((item) => compactTimelineItem(item)); const warnings: string[] = []; if (observeReq.include.trace && traceHits.length === 0) warnings.push("no trace spans found"); @@ -2024,11 +2043,8 @@ export function createAppCore(cfg: Config, opts: CreateAppCoreOptions): App { evlog: observeReq.include.events ? { stream: observeReq.streams.events ?? null, - primary: primaryEvent, - matches: eventHits.map((hit) => ({ - offset: hit.offset, - source: hit.source, - })), + primary: responsePrimaryEvent, + matches: responseEventMatches, } : null, trace: observeReq.include.trace @@ -2036,7 +2052,7 @@ export function createAppCore(cfg: Config, opts: CreateAppCoreOptions): App { stream: observeReq.streams.traces ?? null, traceId: trace.traceId, rootSpanId: trace.rootSpanId, - spans: trace.spans, + spans: responseTraceSpans, tree: trace.tree, serviceMap: trace.serviceMap, criticalPath: trace.criticalPath, @@ -2046,7 +2062,7 @@ export function createAppCore(cfg: Config, opts: CreateAppCoreOptions): App { duplicateSpans: trace.duplicateSpans, } : null, - timeline, + timeline: responseTimeline, coverage: { events: eventCoverage, traces: traceCoverage, diff --git a/src/observe/request.ts b/src/observe/request.ts index 9e8795f..f7a5f62 100644 --- a/src/observe/request.ts +++ b/src/observe/request.ts @@ -124,6 +124,23 @@ function nestedObject(record: Record, field: string): Record, fields: readonly string[]): Record { + const out: Record = {}; + for (const field of fields) { + if (Object.prototype.hasOwnProperty.call(record, field)) out[field] = structuredClone(record[field]); + } + return out; +} + +function nonEmptyRecord(record: Record): Record | null { + return Object.keys(record).length === 0 ? null : record; +} + +function compactNested(record: Record, field: string, fields: readonly string[]): Record | null { + const nested = nestedObject(record, field); + return nonEmptyRecord(pickFields(nested, fields)); +} + function parseOptionalString(raw: unknown, path: string): Result { if (raw === undefined || raw === null) return Result.ok(null); if (typeof raw !== "string") return Result.err({ message: `${path} must be a string` }); @@ -679,3 +696,77 @@ export function choosePrimaryEvent(events: SearchHit[], traceId: string | null): } return events[0]!; } + +export function compactEvlogRecord(record: unknown): unknown { + if (!isPlainObject(record)) return record; + return pickFields(record, [ + "timestamp", + "level", + "service", + "environment", + "version", + "region", + "requestId", + "traceId", + "spanId", + "method", + "path", + "status", + "duration", + "message", + "why", + "fix", + "link", + ]); +} + +export function compactTraceSpanRecord(record: unknown): unknown { + if (!isPlainObject(record)) return record; + const out = pickFields(record, [ + "schemaVersion", + "signal", + "timestamp", + "endTimestamp", + "startUnixNano", + "endUnixNano", + "duration", + "traceId", + "spanId", + "parentSpanId", + "name", + "kind", + "service", + "serviceNamespace", + "serviceInstanceId", + "environment", + "version", + "region", + "requestId", + "eventNames", + "dropped", + ]); + + const traceFlags = nestedObject(record, "traceFlags"); + if (Object.prototype.hasOwnProperty.call(traceFlags, "sampled")) out.traceFlags = { sampled: traceFlags.sampled }; + + const status = compactNested(record, "status", ["code", "message"]); + if (status) out.status = status; + const http = compactNested(record, "http", ["method", "route", "path", "statusCode"]); + if (http) out.http = http; + const db = compactNested(record, "db", ["system", "name", "operation"]); + if (db) out.db = db; + const rpc = compactNested(record, "rpc", ["system", "service", "method"]); + if (rpc) out.rpc = rpc; + const messaging = compactNested(record, "messaging", ["system", "destination", "operation"]); + if (messaging) out.messaging = messaging; + const error = compactNested(record, "error", ["isError", "type", "message"]); + if (error) out.error = error; + + return out; +} + +export function compactTimelineItem(item: unknown): unknown { + if (!isPlainObject(item)) return item; + const { data: _data, ...rest } = item; + return rest; +} diff --git a/test/observe_request.test.ts b/test/observe_request.test.ts index 4a655a3..2f2f6ce 100644 --- a/test/observe_request.test.ts +++ b/test/observe_request.test.ts @@ -101,6 +101,11 @@ async function seedObservabilityStreams(app: ReturnType { } }); + test("rejects swapped event and trace stream profile roles", async () => { + const root = mkdtempSync(join(tmpdir(), "ds-observe-swapped-")); + const { app } = createProfileTestApp(root, { searchWalOverlayQuietPeriodMs: 0 }); + try { + await seedObservabilityStreams(app); + const res = await fetchJsonApp(app, "http://local/v1/observe/request", { + method: "POST", + headers: { "content-type": "application/json" }, + body: JSON.stringify({ + streams: { events: "app-traces", traces: "app-events" }, + lookup: { requestId: "req_obs_1" }, + include: { events: true, trace: true, timeline: true }, + }), + }); + + expect(res.status).toBe(400); + expect(res.body.error.message).toContain("streams.events must reference an evlog stream"); + } finally { + await app.close(); + rmSync(root, { recursive: true, force: true }); + } + }); + + test("honors include.raw by compacting source payloads unless explicitly requested", async () => { + const root = mkdtempSync(join(tmpdir(), "ds-observe-raw-")); + const { app } = createProfileTestApp(root, { searchWalOverlayQuietPeriodMs: 0 }); + try { + await seedObservabilityStreams(app); + const compactRes = await fetchJsonApp(app, "http://local/v1/observe/request", { + method: "POST", + headers: { "content-type": "application/json" }, + body: JSON.stringify(observeBody({ requestId: "req_obs_1" }, { include: { events: true, trace: true, timeline: true, raw: false } })), + }); + + expect(compactRes.status).toBe(200); + expect(compactRes.body.evlog.primary.requestId).toBe("req_obs_1"); + expect(compactRes.body.evlog.matches[0].source.requestId).toBe("req_obs_1"); + expect(compactRes.body.evlog.matches[0].source.context).toBeUndefined(); + const compactRootSpan = compactRes.body.trace.spans.find((item: any) => item.spanId === ROOT_SPAN_ID); + expect(compactRootSpan.http).toMatchObject({ method: "GET", route: "/checkout", statusCode: 502 }); + expect(compactRootSpan.status).toMatchObject({ code: "error", message: "provider unavailable" }); + expect(compactRootSpan.attributes).toBeUndefined(); + expect(compactRootSpan.resource).toBeUndefined(); + expect(compactRootSpan.events).toBeUndefined(); + expect(compactRes.body.timeline.length).toBeGreaterThan(0); + expect(compactRes.body.timeline.some((item: any) => Object.prototype.hasOwnProperty.call(item, "data"))).toBe(false); + + const rawRes = await fetchJsonApp(app, "http://local/v1/observe/request", { + method: "POST", + headers: { "content-type": "application/json" }, + body: JSON.stringify(observeBody({ requestId: "req_obs_1" }, { include: { events: true, trace: true, timeline: true, raw: true } })), + }); + + expect(rawRes.status).toBe(200); + expect(rawRes.body.evlog.matches[0].source.context.provider).toBe("stripe"); + const rawRootSpan = rawRes.body.trace.spans.find((item: any) => item.spanId === ROOT_SPAN_ID); + expect(rawRootSpan.attributes["request.id"]).toBe("req_obs_1"); + expect(rawRootSpan.resource.attributes["service.name"]).toBe("checkout"); + expect(rawRootSpan.events.length).toBeGreaterThan(0); + expect(rawRes.body.timeline.some((item: any) => Object.prototype.hasOwnProperty.call(item, "data"))).toBe(true); + } finally { + await app.close(); + rmSync(root, { recursive: true, force: true }); + } + }); + test("looks up by spanId, expands to the full trace, and correlates evlog by traceId", async () => { const root = mkdtempSync(join(tmpdir(), "ds-observe-span-id-")); const { app } = createProfileTestApp(root, { searchWalOverlayQuietPeriodMs: 0 });