From 6b6a803e3432bb5f76d2547290d25e0527a4b41f Mon Sep 17 00:00:00 2001 From: glasstiger Date: Wed, 13 May 2026 18:34:33 +0100 Subject: [PATCH 01/44] QWP HA docs --- .../client-failover/concepts.md | 246 ++++++++++++++ .../client-failover/configuration.md | 162 +++++++++ .../store-and-forward/concepts.md | 297 +++++++++++++++++ .../store-and-forward/configuration.md | 205 ++++++++++++ .../store-and-forward/operating-and-tuning.md | 309 ++++++++++++++++++ .../store-and-forward/when-to-use.md | 221 +++++++++++++ documentation/sidebars.js | 18 + 7 files changed, 1458 insertions(+) create mode 100644 documentation/high-availability/client-failover/concepts.md create mode 100644 documentation/high-availability/client-failover/configuration.md create mode 100644 documentation/high-availability/store-and-forward/concepts.md create mode 100644 documentation/high-availability/store-and-forward/configuration.md create mode 100644 documentation/high-availability/store-and-forward/operating-and-tuning.md create mode 100644 documentation/high-availability/store-and-forward/when-to-use.md diff --git a/documentation/high-availability/client-failover/concepts.md b/documentation/high-availability/client-failover/concepts.md new file mode 100644 index 000000000..8a64a104f --- /dev/null +++ b/documentation/high-availability/client-failover/concepts.md @@ -0,0 +1,246 @@ +--- +title: Client failover concepts +sidebar_label: Concepts +description: + How QuestDB clients detect a failed primary and transparently switch to a + healthy peer using multi-host addr lists, host-health classification, role + filtering, and zone-aware selection. +--- + +import { EnterpriseNote } from "@site/src/components/EnterpriseNote" + + + Client failover is most useful with QuestDB Enterprise primary-replica + replication. OSS users with a single instance gain limited benefit from + multi-host configuration. + + +:::note Java-only today + +Client-side failover support is currently available in the Java client. +Additional language clients are on the roadmap. + +::: + +When a QuestDB cluster fails over from one primary to another — whether through +a planned promotion, a rolling upgrade, or an unplanned outage — clients with a +single hard-coded address must be reconfigured and restarted. A failover-aware +client instead carries the full list of peers and walks that list automatically +when the current connection breaks. + +This page explains the model. The user-facing knobs and worked examples live in +the [Configuration](/docs/high-availability/client-failover/configuration/) +page. + +## What failover does + +You give the client a comma-separated list of endpoints: + +``` +addr=node-a:9000,node-b:9000,node-c:9000 +``` + +The client picks one, connects, and uses it until that connection breaks. When +it breaks, the client walks the rest of the list, classifies what it found at +each host, and either reconnects or surfaces a failure to your code. The exact +loop that drives this depends on whether you are ingesting (long-lived +background reconnect) or querying (per-request retry budget). Both loops share +the same primitives described here. + +## Host health model + +For every entry in `addr`, the client tracks two attributes: a **state** and a +**zone tier**. + +### State + +The state records what the client most recently observed when it tried that +host. + +| State | When the client moves a host here | +|---|---| +| `Healthy` | The last connect attempt succeeded. | +| `Unknown` | The host has not been tried in this round, or its classification was reset. | +| `TransientReject` | The server returned `421` with `X-QuestDB-Role: PRIMARY_CATCHUP` — it is a primary that is still catching up after promotion. Expected to recover. | +| `TransportError` | TCP/TLS handshake failed, an HTTP upgrade returned a transient error code, or an established connection broke mid-stream. | +| `TopologyReject` | The server returned `421` with a role that cannot satisfy the requested `target=` filter — for example, a `REPLICA` when you asked for `target=primary`. The host will not become writable without a topology change. | + +A lower state in the table above is preferred when the client picks the next +host to try. + +### Zone tier + +Each host is also classified relative to the client's configured `zone=`: + +| Zone tier | Meaning | +|---|---| +| `Same` | Server's advertised zone matches the client's `zone=` (case-insensitive), or `zone=` is unset, or `target=primary`. | +| `Unknown` | Server has not advertised a zone yet. | +| `Other` | Server advertised a different zone. | + +Zone information is advertised by the server on a successful upgrade and +(starting in QWP v2) on `421` rejects. The client remembers it for the lifetime +of the connection. + +`target=primary` collapses every host's zone tier to `Same` — writers must +follow the primary regardless of geography. Ingress is currently zone-blind in +both storage modes, so the `zone=` key is silently accepted on ingress +connections and only takes effect on egress. + +### Selection priority + +When the client needs to pick the next host, it sorts by the tuple `(state, +zone_tier)` lexicographically — state first, zone second. So a known-good host +in another zone wins against an untried local host. Within a tied bucket, the +order in your `addr=` list is preserved verbatim. + +The client does **not** shuffle, randomise, or load-balance across peers. +Cluster-level load balancing is the responsibility of QuestDB's server-side +coordinators. If you need a different first-pick distribution across many +simultaneously-starting clients, rotate the connect string at deployment time. + +## Sticky-Healthy across rounds + +Once the client lands on a `Healthy` host, that host stays the priority pick on +the next round of failover — provided its zone tier is still `Same`. This +avoids unnecessary churn after a short blip: a momentary network glitch +doesn't promote a different node into the active slot just because it +happened to be probed first. + +`Healthy` hosts in another zone are reset to `Unknown` between outages rather +than kept sticky. Otherwise a once-healthy cross-zone host would lock the +client out of probing local hosts after they recover. + +## Role filter (`target=`) + +The `target=` key controls which server role the client is willing to bind to: + +| `target=` | STANDALONE | PRIMARY | REPLICA | PRIMARY_CATCHUP | +|---|---|---|---|---| +| `any` (default) | accept | accept | accept | accept (transient) | +| `primary` | accept | accept | reject (topology) | accept (transient) | +| `replica` | reject (topology) | reject (topology) | accept | reject (topology) | + +`PRIMARY_CATCHUP` is a primary that has been promoted but has not yet caught +up to its predecessor's WAL — the client treats it as transient and retries +the same host (with a fresh round, no exponential backoff) until it either +becomes a full `PRIMARY` or the outage budget expires. + +A `421 Misdirected Request` response **without** an `X-QuestDB-Role` header +is treated as a generic transport error, not a role reject — the client walks +to the next host but does not pin the rejecting host as topology-unreachable. + +`target=replica` is intended for read-side workloads that explicitly want to +spread query load across read-only peers (see the egress flow below). + +## Two failover contexts + +Failover applies to both directions of QWP traffic, but the two contexts have +very different goals. + +### Ingress (writes) + +The ingress reconnect loop sits inside the store-and-forward I/O thread. It +runs continuously in the background, retrying through outages while the +producer keeps appending to the local buffer. The defaults are tuned for +throughput-oriented workloads that can tolerate minutes of server unavailability: + +- Initial backoff: `100 ms` +- Maximum backoff: `5 s` +- Per-outage budget: `5 minutes` (`reconnect_max_duration_millis`) +- Jitter: **equal-jitter** `[base, 2·base)` — non-zero lower bound damps + reconnect storms when many producers share a cluster +- Inter-host pause within a round: **none** — the client walks the full + address list as fast as `auth_timeout_ms` allows, paying one backoff + sleep at round exhaustion + +See the [store-and-forward concepts](/docs/high-availability/store-and-forward/concepts/) +page for how the reconnect loop interacts with the disk-backed segment ring. + +### Egress (queries) + +The egress failover loop wraps each `Execute()` call on the read-side query +client. It is interactive: a slow failover is worse than a clear error, so +the budget is short: + +- Initial backoff: `50 ms` +- Maximum backoff: `1 s` +- Total wall-clock budget: `30 s` (`failover_max_duration_ms`) +- Attempt cap: `8` (`failover_max_attempts`) +- Jitter: **full-jitter** `[0, base)` — a single-user query benefits from the + lowest expected recovery time, and one client per workload removes the + thundering-herd concern + +The egress loop also respects the `target=` role filter and prefers same-zone +hosts when `zone=` is set. + +## Error classification + +Every error the client encounters falls into one of three buckets, which drives +the loop's response: + +### Terminal — bypass failover + +The client surfaces the error to your code immediately. Retrying every host +will not help. + +| Condition | Why terminal | +|---|---| +| HTTP `401` / `403` on upgrade | Credentials are cluster-wide; retrying floods server logs without recovery. | +| Server-status reject (SF) | Application-layer reject; replay reproduces the same response. | + +### Topology — handled inside the round + +The host is demoted in the priority lattice; the client walks to the next host +within the same round. No exponential backoff is consumed. + +- `421` + `X-QuestDB-Role: PRIMARY_CATCHUP` → `TransientReject` +- `421` + any other recognised role → `TopologyReject` +- `SERVER_INFO.Role` does not match the requested `target=` + +If every host in a round role-rejects, ingress pays one fixed backoff sleep +(reset to `InitialBackoff`, no doubling) and starts a fresh round; egress +fails the current `Execute()` call. + +### Transient — enter backoff + +Everything else: TCP/TLS errors, `auth_timeout_ms` expiry, mid-stream send or +receive failures, `404` / `426` / `503` on upgrade, version mismatches +(per-endpoint — a rolling upgrade in flight does not lock out compatible +peers), and generic frame-decode errors. The client records `TransportError` +and walks to the next host. + +When a round exhausts with transient errors, the client sleeps for the +backoff interval (clamped to the remaining outage budget) and starts the +next round. + +## Mid-stream demotion + +If a connection breaks mid-stream — for example, the receive pump throws after +a successful upgrade — the client marks the failed host as `TransportError` +**before** picking the next host. Without this ordering, the sticky-Healthy +rule would re-pick the same just-failed host as the priority candidate, and +the next attempt would target the broken node again. + +This invariant only matters when you are reading client source code or +debugging a custom implementation. As a user, you observe it as "failover +moves off a broken node on the very next attempt, with no exponential delay +when at least one peer is healthy." + +## Authentication is cluster-wide + +A `401` or `403` on the HTTP upgrade is terminal — the client does not retry +other hosts. The assumption is that auth credentials are configured +identically across the cluster, so a credential failure against one node is +a credential failure against all of them. Retrying would spam every peer's +audit log without recovering. + +If your deployment has per-host credentials, that is unsupported and outside +the failover model — split the workload into one connect string per credential. + +## Next steps + +- [Configuration](/docs/high-availability/client-failover/configuration/) — + the connect-string keys and worked examples for each context. +- [Store-and-forward concepts](/docs/high-availability/store-and-forward/concepts/) — + how the ingress failover loop interacts with the disk-backed substrate. diff --git a/documentation/high-availability/client-failover/configuration.md b/documentation/high-availability/client-failover/configuration.md new file mode 100644 index 000000000..6da5a703f --- /dev/null +++ b/documentation/high-availability/client-failover/configuration.md @@ -0,0 +1,162 @@ +--- +title: Client failover configuration +sidebar_label: Configuration +description: + Connect-string keys that configure multi-host failover for QuestDB clients, + including addr lists, zone preference, role filtering, and the ingress and + egress retry budgets. +--- + +:::note Java-only today + +Client-side failover support is currently available in the Java client. +Additional language clients are on the roadmap. + +::: + +This page is the configuration reference for client failover. For the model +behind these keys — host-health states, zone tiers, role filtering, and the +two retry loops — read [Concepts](/docs/high-availability/client-failover/concepts/) +first. + +## Common keys + +These keys apply to every WS / WSS / HTTP / HTTPS client. They are documented +in full on the +[connect-string reference](/docs/client-configuration/connect-string#failover-keys); +the table below summarises the failover-relevant subset. + +| Key | Type | Default | Notes | +|---|---|---|---| +| `addr` | `host:port[,host:port…]` | required | Comma-separated peer list. The two syntactic forms (`addr=h1,h2` and repeated `addr=h1;addr=h2`) accumulate. Empty entries are rejected. | +| `zone` | string | unset | Client's zone identifier (opaque, case-insensitive — `eu-west-1a`, `dc-amsterdam`, etc.). Egress prefers same-zone peers when `target` is `any` or `replica`. Silently ignored on ingress. | +| `target` | `any` \| `primary` \| `replica` | `any` | Which server role the client accepts. See [Role filter](/docs/high-availability/client-failover/concepts/#role-filter-target) for the role table. | +| `auth_timeout_ms` | int (ms) | `15000` | Upper bound on the HTTP-upgrade response read per host. Does **not** cover the TCP connect or TLS handshake — those use the OS default. Set lower if you have well-known network paths and want faster failover; set higher only if upgrade is genuinely slow. | + +`addr` syntax — both of these are equivalent and produce the same three-peer +list: + +``` +addr=node-a:9000,node-b:9000,node-c:9000 +addr=node-a:9000;addr=node-b:9000;addr=node-c:9000 +``` + +## Ingress (write) + +The ingress reconnect loop is driven by store-and-forward connect-string +keys. See +[Store-and-forward configuration](/docs/high-availability/store-and-forward/configuration/#reconnect-keys) +and the +[connect-string reference](/docs/client-configuration/connect-string#sf-keys) +for the full list. The failover-relevant keys are: + +| Key | Type | Default | Notes | +|---|---|---|---| +| `reconnect_max_duration_millis` | int (ms) | `300000` (5 min) | Per-outage wall-clock budget. Resets on every successful reconnect. Size this to span your largest expected failover window, but short enough to surface permanent topology issues. | +| `reconnect_initial_backoff_millis` | int (ms) | `100` | Starting backoff sleep at round exhaustion. Doubles up to `reconnect_max_backoff_millis`. | +| `reconnect_max_backoff_millis` | int (ms) | `5000` | Cap on the exponential backoff. With equal-jitter, the actual sleep lands in `[max, 2·max)` once the base saturates. | +| `initial_connect_retry` | `off` \| `on` \| `async` | `off` | Whether to apply the same retry loop to the very first connect attempt. See below. | + +### `initial_connect_retry` + +By default, the first connect failure is **terminal** — typically the first +attempt failing means a misconfiguration (wrong host, wrong port, no +network), and retrying for five minutes only hides it. + +| Value | Behaviour | +|---|---| +| `off` (default; alias `false`) | First-connect failure is terminal. The producer's call to build the sender throws immediately. | +| `on` (aliases `sync`, `true`) | First-connect failures enter the same reconnect loop as mid-stream failures. The constructor blocks until success or the per-outage budget expires. | +| `async` | The constructor returns immediately; the background I/O thread drives the reconnect loop. The producer experiences backpressure if it tries to publish before the connection comes up. Intended for unattended producers where the SF directory may already carry segments from a prior process and the server may come up later. | + +## Egress (query) + +The egress failover loop wraps each `Execute()` call on the read-side query +client. The full key list lives on the +[connect-string reference](/docs/client-configuration/connect-string#egress-flow); +the user-visible knobs are: + +| Key | Type | Default | Notes | +|---|---|---|---| +| `failover` | `on` \| `off` | `on` | Global on/off. With `failover=off`, a single failed `Execute()` call surfaces the underlying error without walking the address list. | +| `failover_max_attempts` | int | `8` | Hard cap on attempts within a single `Execute()` call. | +| `failover_max_duration_ms` | int (ms) | `30000` | Wall-clock budget for failover eligibility. Bounds **when failover stops**, not the wall-clock of `Execute()` itself — a final `WalkTracker` round can still cost up to `hostCount × auth_timeout_ms` after the budget expires. | +| `failover_backoff_initial_ms` | int (ms) | `50` | Starting backoff sleep. Doubles up to the cap. | +| `failover_backoff_max_ms` | int (ms) | `1000` | Cap on the exponential backoff. With full-jitter, the actual sleep lands in `[0, max)`. | + +## Worked examples + +### Three-node Enterprise cluster, default failover + +Most users need only the `addr` list — defaults cover the rest. + +```java +try (Sender sender = Sender.fromConfig( + "ws::addr=node-a:9000,node-b:9000,node-c:9000;sf_dir=/var/lib/qdb-sender;")) { + sender.table("events") + .symbol("source", "edge-42") + .longColumn("count", 1) + .atNow(); +} +``` + +The `ws::` scheme picks the QWP WebSocket transport. `sf_dir` enables the +disk-backed store-and-forward substrate, which keeps unacked data across +sender restarts; see +[Store-and-forward concepts](/docs/high-availability/store-and-forward/concepts/). + +### Zone-aware read replicas + +For read-only queries spread across same-zone replicas, with a primary as +final fallback: + +```java +try (QueryClient client = QueryClient.fromConfig( + "ws::addr=replica-eu-1a:9000,replica-eu-1b:9000,primary:9000;" + + "zone=eu-west-1a;target=any;")) { + try (ResultSet rs = client.execute("SELECT * FROM trades WHERE ts > now() - 1h")) { + // ... + } +} +``` + +Setting `target=replica` would skip the primary entirely; `target=any` is +usually preferable so the query still completes after a replica outage. + +### Long-tolerated ingest with async first connect + +Useful for unattended ingest processes (edge sensors, ETL jobs) that may +restart before the server comes up: + +```java +try (Sender sender = Sender.fromConfig( + "ws::addr=primary:9000;sf_dir=/var/lib/qdb-sender;" + + "initial_connect_retry=async;" + + "reconnect_max_duration_millis=1800000;")) { + // appendBlocking() will absorb up to sf_max_total_bytes of writes + // while the I/O thread retries the initial connect. +} +``` + +The 30-minute reconnect budget gives a wide failover window; the `async` +initial-connect policy lets the producer thread proceed immediately. + +### Tight egress failover for an interactive dashboard + +```java +try (QueryClient client = QueryClient.fromConfig( + "ws::addr=node-a:9000,node-b:9000;" + + "failover_max_duration_ms=5000;failover_max_attempts=3;")) { + // Surfaces an error within a few seconds if the cluster is unreachable. +} +``` + +## Where each key is documented + +| Key | Concept | Reference | +|---|---|---| +| `addr`, `zone`, `target`, `auth_timeout_ms` | Host selection, role filter | [connect-string #failover-keys](/docs/client-configuration/connect-string#failover-keys) | +| `reconnect_*`, `initial_connect_retry` | Ingress retry budget | [connect-string #reconnect-keys](/docs/client-configuration/connect-string#reconnect-keys) | +| `failover`, `failover_*` | Egress retry budget | [connect-string #egress-flow](/docs/client-configuration/connect-string#egress-flow) | +| `username` / `password` / `token` | Authentication | [connect-string #auth](/docs/client-configuration/connect-string#auth) | +| `tls_*` | TLS configuration | [connect-string #tls](/docs/client-configuration/connect-string#tls) | diff --git a/documentation/high-availability/store-and-forward/concepts.md b/documentation/high-availability/store-and-forward/concepts.md new file mode 100644 index 000000000..7f070d33d --- /dev/null +++ b/documentation/high-availability/store-and-forward/concepts.md @@ -0,0 +1,297 @@ +--- +title: Store-and-forward concepts +sidebar_label: Concepts +description: + How the QuestDB store-and-forward client substrate decouples the producer + from the wire, masks network outages and server restarts, and replays + unacknowledged frames against a fresh connection. +--- + +:::note Java-only today + +Client-side store-and-forward support is currently available in the Java +client. Additional language clients are on the roadmap. + +::: + +Store-and-forward (SF) is the client-side substrate that sits between your +application code and the QWP wire transport. It absorbs publishes into a +local ring of fixed-size segments, drains them over a WebSocket connection +on a dedicated I/O thread, and replays any unacknowledged frames after a +disconnect or restart. + +The goal is **producer-never-blocks-on-the-wire**. Your call to `flush()` +returns as soon as data is published into the substrate. Acknowledgements +arrive asynchronously. A network outage, a server restart, even a JVM +crash leaves your producer code unaffected — the I/O thread quietly +reconnects and replays what remains. + +## Two modes + +SF runs in either of two modes selected by the connect string: + +| Aspect | Memory mode | SF mode | +|---|---|---| +| Trigger | `sf_dir` is **unset** | `sf_dir` is set | +| Storage | malloc'd ring in process RAM | mmap'd files under `//` | +| Default capacity | `128 MiB` | `10 GiB` | +| Survives JVM exit | No | Yes | +| Survives JVM crash | No | Yes — replay on next start | +| Tolerates transient network blips | Yes | Yes | +| Tolerates multi-minute server outages | Bounded by RAM cap | Bounded by disk cap | +| Recovers another sender's stale slot | n/a | Opt-in via `drain_orphans=on` | + +Both modes share the same reconnect loop, the same backoff and retry +budgets, and the same on-the-wire behaviour. The only difference is +where unacked data lives. + +## What "frame" means here + +A **frame** is one encoded QWP message — typically a batch of rows for one +or more tables. The SF substrate treats frames as opaque payloads with two +properties: a length, and a CRC32C checksum. The append protocol writes the +payload first, the checksum last, and a partial write left behind by a +crash is detected and discarded by the recovery scanner on next start. + +Frames in SF mode are **self-sufficient**: every frame carries the full +schema for every table it touches and the full symbol-dictionary delta +from id 0. That makes a frame replayable against any server connection, +weeks or months later, even after a process restart that wiped all +in-memory schema state. The cost is a small per-batch overhead which is +accepted for correctness. + +## The FSN model + +Two distinct counters track frame identity: + +- **FSN** (frame-sequence-number) — a monotonic counter assigned when a + frame is appended to the substrate. FSN survives reconnects and (in SF + mode) restarts. It is the substrate's permanent identifier for a frame. +- **wireSeq** — the per-connection counter the server uses for + deduplication, reset to `0` on every successful WebSocket upgrade. + +On every (re)connect the relationship is pinned: + +``` +fsn = fsnAtZero + wireSeq +``` + +where `fsnAtZero` is `ackedFsn + 1` (i.e. the next un-acked FSN). The +client streams frames from disk to the wire in strict FSN order, one frame +per WebSocket binary message, incrementing `wireSeq`. The server echoes +back the same `wireSeq` in its OK frames, and the client maps that back to +the original FSN to advance the trim watermark. + +Two consequences: + +- Frames **must** be sent in strict order. The wire format does not + serialise `wireSeq` — the server assigns it implicitly from receive + order. Reordering breaks the FSN mapping. +- After a reconnect, the server sees the **same payloads** at new + `wireSeq` values. Server-side dedup keys off `messageSequence` inside + the payload, not `wireSeq`, so replay does not produce double-writes. + +## Trim: how unacked data is reclaimed + +The substrate holds frames until the server confirms it has received and +processed them. Each confirmation advances the **acked FSN**, which +allows the manager thread to unlink sealed segment files (in SF mode) or +release ring memory (in memory mode) up to that watermark. + +Two trim drivers exist: + +### Default — OK-driven trim + +Each successful batch produces an **OK frame** carrying the highest +`wireSeq` it acknowledges and the per-table `seqTxn` watermarks that +batch updated. On receipt: + +1. The substrate translates `wireSeq` back to FSN. +2. `ackedFsn` advances to the new value. +3. Any segment whose last FSN is `≤ ackedFsn` is unlinked and its bytes + returned to the available pool. + +This is the default and is sufficient when "data is in the server's WAL" +is the durability bar you need. + +### `request_durable_ack=on` — WAL-durable trim + +When the connect string sets `request_durable_ack=on`, trim is driven by +a separate frame: `STATUS_DURABLE_ACK`. These carry per-table watermarks +for data the server has **already uploaded from the WAL to the configured +object store** (S3, Azure Blob, GCS, or NFS). + +- OK frames still arrive on every batch, but they no longer advance the + trim watermark. Instead, they are stashed alongside their per-table + `seqTxn` values. +- A `STATUS_DURABLE_ACK` frame names tables and their durable `seqTxn` + watermarks. The client matches the head of the OK queue against these + watermarks; each fully-covered head entry pops, and `ackedFsn` + advances to the highest covered wireSeq. +- The client opt-in is mandatory — the connect fails loudly if the server + does not echo `X-QWP-Durable-Ack: enabled` on the upgrade response. + This avoids the silent failure mode where the producer waits forever + for ack frames that will never arrive. + +Durable-ack mode is the right choice when "data is in the object store" +is the durability bar, but it has two costs: a longer time-to-trim (so +larger steady-state disk usage in SF mode), and a small WebSocket PING +sent every `durable_ack_keepalive_interval_millis` to nudge the server's +flush path when the client is idle but has pending confirmations. + +See [When to use](/docs/high-availability/store-and-forward/when-to-use/) +for the decision. + +## Reconnect and replay + +When the wire connection breaks — for any reason — the I/O thread enters +the reconnect loop documented in +[Client failover concepts](/docs/high-availability/client-failover/concepts/). +The producer is **not notified**: it keeps publishing into the substrate, +bounded by `sf_max_total_bytes` (see backpressure below). + +On every successful (re)connect: + +1. `fsnAtZero = ackedFsn + 1`. +2. `wireSeq` resets to `0`. +3. The read cursor rewinds to the first un-acked frame on disk (or in + memory). +4. Frames stream to the wire in FSN order. The server's dedup window + absorbs any frames that landed before the disconnect. +5. New frames appended by the producer during replay are picked up + automatically — the I/O loop watches a volatile `publishedFsn` + cursor. + +Frames sent before the disconnect and re-sent after a reconnect count +in the `getTotalFramesReplayed` observability counter. + +## Backpressure + +The substrate enforces `sf_max_total_bytes` as a hard cap on resident +storage. When the cap is hit, the producer's `appendBlocking` call +busy-spins (with cooperative yield) up to `sf_append_deadline_millis` +waiting for ACK-driven trim to free space. If the deadline fires, the +call throws a typed exception. + +The exception message distinguishes the two scenarios: + +- **Backpressure while the wire is publishing** — the server is acking + but the producer is faster than the server can absorb. Solutions: + raise `sf_max_total_bytes`, slow the producer, or scale the server. +- **Backpressure while reconnecting** — the I/O loop is in the retry + loop and the substrate is filling. The message includes attempt count + and outage start time. Solutions: address the cluster outage, raise + `sf_max_total_bytes`, or accept that the producer will start throwing + once the cap is exhausted. + +## Close and shutdown + +`close()` waits up to `close_flush_timeout_millis` (default 5 s) for +`ackedFsn` to reach `publishedFsn` — i.e. for the server to acknowledge +everything the producer has handed in. If the wait succeeds, all data is +acked. If the timeout fires, a `WARN` is logged and: + +- in **SF mode**, the un-acked tail is left on disk and recovered by the + next sender on the same slot; +- in **memory mode**, the un-acked tail is lost. + +Setting `close_flush_timeout_millis=0` (or `-1`) skips the drain wait +entirely — useful for fast shutdown paths where you do not want to block. +Even in this branch, the slot lock is released and segments are unmapped +cleanly, and a non-blocking safety-net check rethrows any latched +terminal error that has not already been delivered through an async +handler or a synchronous producer call. + +## Crash recovery (SF mode) + +When the engine opens an SF-mode sender, it scans the slot directory: + +1. **Acquire the slot lock.** Two senders pointing at the same + `//` will collide here and the second one fails to + start, naming the holder's PID in the error message. +2. **Validate every segment file.** Headers are checked, frames are walked + forward verifying each CRC. The first invalid or torn frame becomes + the file's end-of-data; anything past it is discarded. +3. **Reconcile gaps.** Segments are sorted by their `baseSeq` and adjacent + pairs must satisfy `prev.baseSeq + prev.frameCount == curr.baseSeq`. + A gap is a fatal recovery error — the engine refuses to start. +4. **Seed the ack watermark.** Either from `.ack-watermark` (if your + client maintains it; see below) or from the lowest surviving FSN minus + one. +5. **Bump the connection generation** so the I/O loop, on first connect, + replays from disk against a fresh wireSeq window. + +After recovery the producer publishes new frames as normal; the I/O +thread replays the un-acked tail and then drains forward. + +### `.ack-watermark` + +An optional 16-byte file under the slot directory persists the cumulative +durable-ack FSN across process restarts. Without it, recovery seeds the +ack watermark from the lowest surviving segment's `baseSeq - 1` — which +guarantees no data loss, but cannot distinguish which frames inside that +lowest segment the previous sender had already received durable acks +for. Replay therefore re-sends every frame in that segment, producing +row-level duplicates against a still-alive server unless deduplication is +enabled on the target table. + +With `.ack-watermark`, recovery clamps the seed to the higher of the +on-disk and watermarked values, so already-durable-acked frames inside +the lowest surviving segment are not re-replayed. + +The file is **optional** — a conformant client may choose not to maintain +it. The Java reference client does. + +## Orphan adoption + +When the foreground sender's connect string sets `drain_orphans=on`, the +engine scans `/*` at startup for **sibling slot directories** — +other `sender_id`s under the same group root that contain unacked data +and are not marked `.failed`. For each one, up to +`max_background_drainers` at a time, a background drainer spawns, +acquires the orphan slot's lock (skipping if another process holds it), +opens a separate WebSocket connection, runs the same recovery + replay +flow, and exits when the orphan is fully drained. + +This is the rescue path for a sender that died without draining cleanly +— a JVM crash, an OOM kill, a host reboot. The replacement process picks +the orphan's slot lock and clears its disk footprint. Without +`drain_orphans=on` the dead sender's data persists on disk indefinitely +until an operator intervenes. + +The orphan flow is opt-in because in a multi-tenant deployment with +shared `sf_dir`, blindly draining unknown slots may be surprising. + +## Error frames + +Not every server response is an OK. Server errors fall into six +categories, each with a default policy: + +| Category | Default | Meaning | +|---|---|---| +| `SCHEMA_MISMATCH` | `DROP_AND_CONTINUE` | The batch's schema doesn't match the server. Replay won't help — the substrate logs and advances trim past the rejected span. | +| `WRITE_ERROR` | `DROP_AND_CONTINUE` | Per-batch write failure (e.g. table is not currently accepting writes). | +| `PARSE_ERROR` | `HALT` | Almost certainly a client bug. The substrate preserves on-disk frames for postmortem. | +| `INTERNAL_ERROR` | `HALT` | Catch-all server fault. | +| `SECURITY_ERROR` | `HALT` | Cluster-wide auth / authorization failure. | +| `PROTOCOL_VIOLATION` | `HALT` (forced) | Connection is gone after a terminal WebSocket close code; no choice. | + +Errors are also delivered to an **error inbox** — a bounded queue +consumed by a daemon dispatcher that invokes your registered handler. +Overflow drops the oldest entry rather than the newest (watermarks are +monotonic; the latest entry is the most informative). The default +handler logs every received error: silence is forbidden by the contract, +because a buggy or no-op handler would hide data loss +indistinguishably from a healthy connection. + +## Next steps + +- [When to use](/docs/high-availability/store-and-forward/when-to-use/) — + decision guide for memory vs SF mode, and when to opt into + durable-ack and orphan adoption. +- [Operating and tuning](/docs/high-availability/store-and-forward/operating-and-tuning/) — + slot directory layout, lock semantics, sizing, observability. +- [Configuration](/docs/high-availability/store-and-forward/configuration/) — + connect-string key reference. +- [Client failover concepts](/docs/high-availability/client-failover/concepts/) — + how the reconnect loop selects hosts and classifies errors. diff --git a/documentation/high-availability/store-and-forward/configuration.md b/documentation/high-availability/store-and-forward/configuration.md new file mode 100644 index 000000000..db13ff71b --- /dev/null +++ b/documentation/high-availability/store-and-forward/configuration.md @@ -0,0 +1,205 @@ +--- +title: Store-and-forward configuration +sidebar_label: Configuration +description: + Connect-string keys that configure the QuestDB store-and-forward client + substrate — storage, reconnect, durable-ack, and error-handling. +--- + +:::note Java-only today + +Client-side store-and-forward support is currently available in the Java +client. Additional language clients are on the roadmap. + +::: + +This page is the configuration reference for the SF connect-string keys. +For the model behind each knob, read +[Concepts](/docs/high-availability/store-and-forward/concepts/); for +operational guidance read +[Operating and tuning](/docs/high-availability/store-and-forward/operating-and-tuning/). + +Shared keys (authentication, TLS, address list) are documented on the +[connect-string reference](/docs/client-configuration/connect-string). +The keys below are the SF-specific subset. + +## Storage keys + +These keys select between memory mode and SF mode and govern on-disk +layout. The single switch is `sf_dir`: unset → memory mode, set → SF +mode. + +| Key | Type | Default | Description | +|---|---|---|---| +| `sf_dir` | path | unset | Group root directory. When set, the slot lives at `//` and unacked data is durable across process restarts. When unset, the substrate runs in memory mode. | +| `sender_id` | string | `default` | Slot subdirectory name. Two senders sharing the same `sender_id` and `sf_dir` will collide on the slot lock. Must not contain path separators or be empty. | +| `sf_max_bytes` | size | `4M` | Per-segment file size; rotation threshold. | +| `sf_max_total_bytes` | size | `128M` (memory) / `10G` (SF) | Hard cap on resident SF storage. Triggers producer backpressure when full. | +| `sf_durability` | enum | `memory` | Reserved for future per-batch / per-frame fsync modes. Only `memory` is currently implemented; `flush` and `append` parse but are rejected at build time. | +| `sf_append_deadline_millis` | int (ms) | `30000` | How long a producer `appendBlocking` call waits for ACK-driven trim to free space before throwing. | +| `drain_orphans` | bool | `off` | Scan `/*` at startup and spawn drainers for sibling slots that contain unacked data. See [orphan adoption](/docs/high-availability/store-and-forward/concepts/#orphan-adoption). | +| `max_background_drainers` | int | `4` | Cap on concurrent orphan drainers. | + +Size values accept integer bytes or unit suffixes (`K`, `M`, `G`, `T`) +using binary multipliers. + +These keys are also documented on the central +[connect-string reference](/docs/client-configuration/connect-string#sf-keys). + +## Reconnect keys + +Govern the in-flight reconnect loop after the wire breaks. Backoff math +and host-walk semantics are documented in +[Client failover concepts](/docs/high-availability/client-failover/concepts/). + +| Key | Type | Default | Description | +|---|---|---|---| +| `reconnect_max_duration_millis` | int (ms) | `300000` (5 min) | Per-outage wall-clock budget. Resets on every successful reconnect. | +| `reconnect_initial_backoff_millis` | int (ms) | `100` | Initial backoff sleep at round exhaustion. | +| `reconnect_max_backoff_millis` | int (ms) | `5000` | Cap on the exponential backoff. With equal-jitter the actual sleep lands in `[max, 2·max)`. | +| `initial_connect_retry` | enum | `off` | `off` (alias `false`): first-connect failure is terminal. `on` (aliases `sync`, `true`): same retry loop as reconnect, blocking the constructor. `async`: same retry loop in the I/O thread, non-blocking. | +| `close_flush_timeout_millis` | int (ms) | `5000` | `close()` blocks up to this long waiting for `ackedFsn ≥ publishedFsn`. `0` or `-1` skips the drain wait. The safety-net `checkError()` still runs. | + +Cross-reference: +[connect-string #reconnect-keys](/docs/client-configuration/connect-string#reconnect-keys). + +## Durable-ack keys + +Opt in to object-store-durable trim. See +[Durable-ack: when to opt in](/docs/high-availability/store-and-forward/when-to-use/#durable-ack-when-to-opt-in). + +| Key | Type | Default | Description | +|---|---|---|---| +| `request_durable_ack` | bool | `off` | Opt-in via the upgrade header `X-QWP-Request-Durable-Ack: true`. Trim is then driven by `STATUS_DURABLE_ACK` frames only; OK frames no longer advance the trim watermark. Connect fails loudly if the server does not echo `X-QWP-Durable-Ack: enabled`. WebSocket transports only. | +| `durable_ack_keepalive_interval_millis` | int (ms) | `200` | Cadence of WebSocket PING the I/O loop sends while there are pending durable confirmations and the producer is idle. `0` or negative disables. | + +## Error-handling keys + +| Key | Type | Default | Description | +|---|---|---|---| +| `error_inbox_capacity` | int (≥16) | `256` | Bounded SPSC queue capacity for async error notifications. Overflow drops the oldest entry and increments `getDroppedErrorNotifications`. | +| `on_server_error`, `on_schema_error`, `on_parse_error`, `on_internal_error`, `on_security_error`, `on_write_error` | enum | per category | Override the default policy (`HALT` or `DROP_AND_CONTINUE`) for a category. Reserved in the spec but not yet recognised by the Java connect-string parser — use the fluent `LineSenderBuilder` API today. | + +The per-category defaults are documented in +[Concepts § Error frames](/docs/high-availability/store-and-forward/concepts/#error-frames). +`PROTOCOL_VIOLATION` and `UNKNOWN` are forced `HALT` and not user-overridable. + +## Other relevant keys + +These keys are not SF-specific but affect SF behaviour. See the +[connect-string reference](/docs/client-configuration/connect-string) for the +canonical entries. + +| Key | Type | Default | Description | +|---|---|---|---| +| `addr` | `host[:port][,host[:port]…]` | required | Multi-host failover list. See [Client failover configuration](/docs/high-availability/client-failover/configuration/). | +| `username` / `password` | string | unset | HTTP Basic auth on the upgrade request. | +| `token` | string | unset | Bearer token on the upgrade request. | +| `tls_verify` | enum | `on` | `on` or `unsafe_off`. Applies to `wss::` / TLS connections. | +| `tls_roots` | path | system trust | Custom CA trust store. | +| `tls_roots_password` | string | unset | Trust store password. | +| `auto_flush` | bool | `on` | Global on/off for auto-flush triggers. | +| `auto_flush_rows` | int / `off` | `1000` | Row-count flush trigger. | +| `auto_flush_bytes` | int / `off` | `0` (off) | Byte-size flush trigger. | +| `auto_flush_interval` | int (ms) / `off` | `100` | Time-since-first-row flush trigger. | +| `init_buf_size` | size | `64K` | Initial encode buffer capacity. | +| `max_buf_size` | size | `100M` | Max encode buffer capacity. | +| `max_name_len` | int | `127` | Local validation cap for table / column names. | +| `max_schemas_per_connection` | int | `65535` | Per-connection schema-id ceiling. | + +## Validation + +The parser rejects: + +- Unknown keys (forward compatibility is via the spec, not silent + acceptance). +- `sf_durability` values other than `memory`, `flush`, `append`. `flush` + and `append` parse but are rejected at build time today. +- `sender_id` containing path separators or empty. +- `request_durable_ack=on` on non-WebSocket transports. + +## Worked examples + +### Single-node memory-mode producer + +```java +try (Sender sender = Sender.fromConfig("ws::addr=localhost:9000;")) { + sender.table("events") + .stringColumn("source", "edge-42") + .longColumn("count", 1) + .atNow(); +} +``` + +No `sf_dir`, so memory mode. The default `128 MiB` cap absorbs short +network blips. A JVM crash loses the unacked tail. + +### Single-node durable producer + +```java +try (Sender sender = Sender.fromConfig( + "ws::addr=localhost:9000;sf_dir=/var/lib/qdb-sender;")) { + // ... +} +``` + +Same producer code; SF mode is enabled by the one extra key. Unacked +data persists at `/var/lib/qdb-sender/default/` across crashes. + +### Multi-host with object-store durability + +```java +try (Sender sender = Sender.fromConfig( + "wss::addr=node-a:9000,node-b:9000,node-c:9000;" + + "sf_dir=/var/lib/qdb-sender;sender_id=ingest-svc;" + + "request_durable_ack=on;" + + "username=ingest;password=…;")) { + // ... +} +``` + +`wss::` for TLS, three-host failover, durable-ack opt-in. Slot lives at +`/var/lib/qdb-sender/ingest-svc/`. The connect fails loudly if any peer +returns an upgrade without `X-QWP-Durable-Ack: enabled`. + +### Multi-tenant host with orphan rescue + +```java +try (Sender sender = Sender.fromConfig( + "ws::addr=node-a:9000;sf_dir=/var/lib/qdb-sender;" + + "sender_id=worker-" + workerInstanceId + ";" + + "drain_orphans=on;max_background_drainers=8;")) { + // ... +} +``` + +Each worker instance has a unique `sender_id`. When a worker crashes and +a new instance comes up under a different `sender_id`, the new +instance's foreground sender adopts the dead worker's slot in the +background and drains it. + +### Long-outage tolerance for unattended ingest + +```java +try (Sender sender = Sender.fromConfig( + "ws::addr=primary:9000;sf_dir=/var/lib/qdb-sender;" + + "sf_max_total_bytes=50G;" + + "reconnect_max_duration_millis=3600000;" + + "initial_connect_retry=async;")) { + // ... +} +``` + +50 GB of buffer space, a one-hour reconnect budget, async initial +connect so the constructor returns immediately even if the server is +down. Suitable for edge / IoT producers on unreliable links. + +## Where each key is documented + +| Group | Connect-string reference | +|---|---| +| Storage (`sf_dir`, `sender_id`, …) | [#sf-keys](/docs/client-configuration/connect-string#sf-keys) | +| Reconnect (`reconnect_*`, `initial_connect_retry`, `close_flush_timeout_millis`) | [#reconnect-keys](/docs/client-configuration/connect-string#reconnect-keys) | +| Failover (`addr`, `zone`, `target`, `auth_timeout_ms`) | [#failover-keys](/docs/client-configuration/connect-string#failover-keys) | +| Auth (`username`, `password`, `token`) | [#auth](/docs/client-configuration/connect-string#auth) | +| TLS (`tls_*`) | [#tls](/docs/client-configuration/connect-string#tls) | diff --git a/documentation/high-availability/store-and-forward/operating-and-tuning.md b/documentation/high-availability/store-and-forward/operating-and-tuning.md new file mode 100644 index 000000000..8a0d3e481 --- /dev/null +++ b/documentation/high-availability/store-and-forward/operating-and-tuning.md @@ -0,0 +1,309 @@ +--- +title: Operating and tuning store-and-forward +sidebar_label: Operating & tuning +description: + Operational guidance for QuestDB store-and-forward producers — slot + directory layout, locks, capacity sizing, recovery, backpressure, + observability, and orphan adoption. +--- + +:::note Java-only today + +Client-side store-and-forward support is currently available in the Java +client. Additional language clients are on the roadmap. + +::: + +This page is the operator-facing guide for SF in production: how to +provision the slot directory, what to watch, and how to tune the limits. +For the underlying model see +[Concepts](/docs/high-availability/store-and-forward/concepts/); for the +choice between memory mode and SF mode see +[When to use](/docs/high-availability/store-and-forward/when-to-use/). + +## Slot directory layout + +In SF mode every sender owns one **slot directory**: + +``` +// +├── .lock # advisory exclusive lock (kernel-released on process exit) +├── .lock.pid # UTF-8 text: holder PID + '\n' (diagnostic only) +├── .failed # optional drainer-failure sentinel (UTF-8 reason text) +├── .ack-watermark # optional 16-byte durable-ack high-water mark +├── sf-0000000000000001.sfa +├── sf-0000000000000002.sfa +└── ... +``` + +`` is the **group root** — the directory you point the connect +string at. `` is the slot subdirectory; it defaults to +`default` but should be set explicitly when more than one sender shares +the host. + +### `.lock` and `.lock.pid` + +The `.lock` file is held under an advisory exclusive lock for the engine's +lifetime — POSIX clients use `flock` / `fcntl`, Windows uses +`LockFileEx`. The lock is released automatically when the file descriptor +closes, including on hard process exit (kernel cleanup). + +A second sender pointing at the same slot directory will fail to start +with an error that names the holder's PID, read from `.lock.pid`. The +PID file is overwritten on every successful acquire; an absent or empty +`.lock.pid` reports `holder=unknown` rather than failing the lookup. + +Neither `.lock` nor `.lock.pid` is deleted on clean shutdown. Stale +files are harmless — the next acquirer silently overwrites them. + +**Cross-platform interop:** a POSIX client and a Windows client must +**not** share a slot on a network filesystem. Their lock primitives are +incompatible. + +### `.failed` + +Present iff a previous drainer attempt gave up on the slot — reconnect +budget exhausted, terminal auth failure, or irrecoverable corruption. +The file contents are a UTF-8 reason for human operators; the **presence** +is the signal that the orphan scanner uses to exclude the slot from +auto-drain on subsequent scans. + +**Operator action:** read the reason, fix the underlying cause (rotate +credentials, restore the missing peer, etc.), then delete `.failed`. The +next sender that scans `` will pick the slot up again. + +### Segment files + +Segments are named `sf-.sfa` where `` is a 16-character +zero-padded hexadecimal generation counter. The number reflects +allocation order, **not** the FSN range — that lives in the file header +and is read at recovery time. + +Pre-allocation reserves real disk blocks at file creation. On Linux this +is `posix_fallocate`; on macOS, `F_PREALLOCATE` / `F_ALLOCATEALL`. The +substrate refuses to fall back silently to `ftruncate` on filesystems +where these are unsupported — sparse files would risk a `SIGBUS` later +when the mmap'd region writes into a hole. On filesystems where the +native layer **must** fall back to `ftruncate`, size `sf_max_bytes` +conservatively against free space. + +## Lock collisions in practice + +Two `sender_id`s in the same `sf_dir` never collide — they are +independent slots. The same `sender_id` started twice **will** collide, +and the second start fails loudly. + +A common cause is a redeploy where the old process hasn't fully exited +when the new one comes up. Solutions: + +- Wait for the old process to release the lock (the kernel releases on + exit; `kill -9` is sufficient). +- Use a deployment unit that orders shutdown before startup. +- For containerised deployments, set `sender_id` from a per-pod stable + identity so two pods with the same template name don't collide. + +`drain_orphans=on` does **not** override the lock — a busy orphan slot +is skipped, not stolen. + +## Sizing capacity + +Two limits matter: + +### `sf_max_bytes` — per-segment file size (default `4 MiB`) + +This is the rotation threshold and the unit of trim. Segments that are +smaller release disk faster but waste more space on the active tail; +larger segments waste less on the active tail but hold acked frames in +the same file as the still-unacked tail until every frame in the segment +is acked. + +For most workloads `4 MiB` is fine. Raise it if you are appending very +large batches and pre-allocation cost matters; lower it if you observe +disk usage staying high under slow ack cadence. + +### `sf_max_total_bytes` — slot capacity (default `128 MiB` memory / `10 GiB` SF) + +This is the **hard cap** on resident SF storage — sealed segments plus +the active segment. When this fills, producer `appendBlocking` calls +block (with cooperative yield) for up to `sf_append_deadline_millis` +waiting for ACK-driven trim to free space; on timeout the call throws. + +Size this against your **worst expected outage** times your ingest +rate: + +``` +sf_max_total_bytes ≥ ingest_rate × max_tolerated_outage +``` + +A 5-minute reconnect budget at 10 MB/s of compressed frames implies at +least 3 GB. Add safety margin for trim latency — in particular, +`request_durable_ack=on` extends time-to-trim by the WAL→object-store +upload window. + +In memory mode the default `128 MiB` is deliberately small: it forces +you to think about backpressure rather than letting an outage silently +balloon process RSS. + +## Backpressure observability + +`appendBlocking` distinguishes two reasons it can stall: + +- **Wire-publishing backpressure.** The server is acking but the + producer is faster than ack throughput. The exception message names + this state. Solutions: scale the server, slow the producer, or raise + `sf_max_total_bytes`. +- **Reconnect backpressure.** The I/O loop is in the retry loop and the + substrate is filling. The exception message includes the attempt + count and outage start time. Solutions: address the cluster outage, + raise `sf_max_total_bytes`, or accept that the producer will start + throwing once the cap is exhausted. + +The `getTotalBackpressureStalls()` counter (see Observability below) +records every producer thread that hit the cap. + +## Recovery on restart + +When an SF-mode sender opens, it runs this sequence: + +1. Acquire `//.lock`. Fail loudly on contention. +2. Scan every `*.sfa` file: + - Validate magic, version, header. + - Walk frames forward verifying each CRC32C-Castagnoli. + - The first invalid frame becomes end-of-data; any non-zero bytes + past that point are logged as a torn-tail count. +3. Sort segments by `baseSeq` and verify no gaps. A gap is a fatal + recovery error. +4. Open `.ack-watermark` (if present) and read the cumulative + durable-ack FSN. Reject a watermark that exceeds the on-disk + ceiling — it would seed `ackedFsn` past every un-acked frame and + silently drop the un-acked tail. +5. Seed `ackedFsn = max(lowestBaseSeq - 1, watermark)`. +6. Allocate the next segment generation as `max(existing-gen) + 1`. +7. Bump the connection generation so the I/O loop replays from disk + against a fresh wireSeq window. + +A clean shutdown that drained everything is indistinguishable from a +fresh start: no segments, no replay. + +### Recovery failures + +| Symptom | Likely cause | Operator action | +|---|---|---| +| "Slot held by PID ``" | Two processes claiming the same `sender_id`. | Stop the duplicate. The lock releases on its exit. | +| "Gap between segments" | Corruption — a segment was deleted out of band. | Restore from backup or accept data loss; the substrate refuses to start. | +| "Watermark exceeds publishedFsn" | `.ack-watermark` is corrupt; the engine falls back to the no-watermark seed. | Logged as `WARN`. Replay will re-send the lowest segment's frames; rely on server deduplication. | +| Torn tail count > 0 | The previous process crashed mid-frame-write. | Informational; the CRC + zero-fill design discards the partial frame. | + +## Close and shutdown + +`close()` semantics depend on `close_flush_timeout_millis`: + +| Value | Behaviour | +|---|---| +| `5000` (default) | Block up to 5 s waiting for `ackedFsn ≥ publishedFsn`. Log `WARN` on timeout; un-acked tail stays on disk (SF) or is lost (memory). | +| `0` or `-1` | Skip the drain wait. Pending data persists on disk (SF) for the next sender, or is lost (memory). | +| any other positive value | That timeout in milliseconds. | + +In every branch `close()`: + +- Performs a non-blocking safety-net check that rethrows any latched + terminal error not already delivered through an async handler or a + synchronous producer call. +- Releases the slot lock and unmaps segment files. + +The safety-net check is what makes "close-and-forget" callers safe: if +the only API your code uses is `close()`, terminal errors still surface +rather than silently sinking into a no-op handler. + +## Orphan adoption in operations + +With `drain_orphans=on`, the foreground sender — after acquiring its own +lock — scans `/*` for siblings that: + +- are not its own `sender_id`, +- contain at least one `*.sfa` file, +- do not have a `.failed` sentinel. + +Up to `max_background_drainers` drainers run concurrently. Each drainer +opens its own engine and WebSocket connection, runs recovery + replay, +and exits when the orphan's `ackedFsn ≥ publishedFsn`. + +### Drainer failure modes + +- **Reconnect budget exhausted.** Drainer writes `.failed` with reason, + releases the lock, exits. +- **Auth-terminal upgrade error.** Same. +- **Irrecoverable corruption.** Same. + +`.failed` slots are excluded from auto-drain on subsequent scans — +operator action is required to clear the sentinel. + +### Observing drainers + +- `getActiveBackgroundDrainers()` — count of currently-running drainers + (best-effort: a just-finished drainer may still count for a few ms). +- `getTotalBackgroundDrainersSucceeded()` / `…Failed()` — cumulative + outcomes since process start. +- The `BackgroundDrainerListener` callback delivers per-drainer + events (progress watermark, durable-ack-mismatch escalation, terminal + outcome) for richer dashboards. +- On-disk `.failed` sentinels are the canonical record of giveup + events surviving sender restart. + +## Observability counters + +A conformant client exposes at minimum: + +| Counter | What it tells you | +|---|---| +| `getTotalReconnectAttempts()` | How often the wire has broken across the sender's lifetime. | +| `getTotalReconnectsSucceeded()` | How many of those recovered. | +| `getTotalFramesReplayed()` | Volume re-sent after reconnects. A spike usually means a fresh outage; sustained growth means a flapping wire. | +| `getTotalServerErrors()` | Count of error frames received (any category). | +| `getDroppedErrorNotifications()` | Error-inbox overflow count. Non-zero means a busy error stream or a slow handler. | +| `getTotalErrorNotificationsDelivered()` | Errors delivered to the user handler. | +| `getTotalBackpressureStalls()` | Producer threads that hit `sf_max_total_bytes`. | +| `getLastTerminalError()` | The latched `SenderError`, or null. | +| `getActiveBackgroundDrainers()` | Running orphan drainers right now. | +| `getTotalBackgroundDrainersSucceeded()` / `…Failed()` | Cumulative drainer outcomes. | + +### Suggested dashboards + +- **Reconnect health:** `reconnect_attempts - reconnect_succeeded` over + time. A non-zero difference for more than a few seconds means the + wire is currently down. Alert if it stays elevated past your + `reconnect_max_duration_millis`. +- **Replay volume:** `frames_replayed` rate. Bursts are expected; + sustained replay means a chronic instability. +- **Backpressure:** `backpressure_stalls` rate. Any non-zero rate is a + capacity signal. +- **Error rate by category:** instrument your error handler to bucket + by category. Background `SCHEMA_MISMATCH` is usually a schema-drift + symptom worth alerting on. + +The default error handler logs every received `SenderError` — +`ERROR`-level for HALT, `WARN`-level for DROP. Replace it only if you +are also routing the errors somewhere else (Sentry, structured logs): +silence is forbidden by the contract. + +## Multi-sender deployments + +When several senders share a host and a `sf_dir`: + +- Give each one a unique `sender_id`. The defaults `sender_id=default` + is fine for a single-sender host but collides for any second + sender. +- Consider `drain_orphans=on` if dynamic sender identities mean dead + instances can leave permanent orphans. +- Size `sf_max_total_bytes × number_of_senders` against available disk. +- Plan for the worst-case lock-collision recovery: a misconfigured + fleet that all share `sender_id=default` will leave only one sender + alive on each host. That is the design — fail loudly rather than + silently corrupt overlapping slots. + +## Next steps + +- [Configuration](/docs/high-availability/store-and-forward/configuration/) — + the full connect-string key reference. +- [Client failover concepts](/docs/high-availability/client-failover/concepts/) — + what the reconnect loop does between disconnects. diff --git a/documentation/high-availability/store-and-forward/when-to-use.md b/documentation/high-availability/store-and-forward/when-to-use.md new file mode 100644 index 000000000..931831e42 --- /dev/null +++ b/documentation/high-availability/store-and-forward/when-to-use.md @@ -0,0 +1,221 @@ +--- +title: When to use store-and-forward +sidebar_label: When to use +description: + Decision guide for choosing between memory mode and disk-backed + store-and-forward, when to opt into durable-ack trim, and when to enable + orphan adoption. +--- + +:::note Java-only today + +Client-side store-and-forward support is currently available in the Java +client. Additional language clients are on the roadmap. + +::: + +The QWP WebSocket transport always uses a store-and-forward (SF) substrate. +What changes between deployments is **where** that substrate keeps unacked +data and **what durability bar** it acknowledges against. This page is the +decision guide. + +If you are new to SF, start with +[Concepts](/docs/high-availability/store-and-forward/concepts/). + +## Memory mode vs SF mode + +The single switch that decides this is whether you set `sf_dir` in the +connect string. + +### Memory mode — `sf_dir` unset + +Unacked frames live in a malloc'd ring in process memory. Default cap is +`128 MiB`. + +**Choose memory mode when:** + +- The producer process is short-lived or ephemeral (a CLI job, a CI + worker, a serverless function). +- A process restart is acceptable as a fresh start — you don't need + in-flight data to survive a crash. +- You only need to tolerate **transient** network blips and short server + outages (think: rolling upgrades, brief network partitions). +- Your data volume comfortably fits in RAM during the longest outage you + care about. + +### SF mode — `sf_dir=/path/to/slot-root` + +Unacked frames are written to mmap'd files under +`//`. Default cap is `10 GiB`. + +**Choose SF mode when:** + +- The producer process is long-running and outage budgets are measured + in minutes (the default `reconnect_max_duration_millis` is 5 minutes + for a reason). +- You need in-flight data to survive process restarts — JVM crash, OOM + kill, host reboot, planned redeploy. +- You ingest at rates where minutes of buffering exceeds RAM you can + spare. +- You operate unattended at the edge (sensors, ETL jobs) where the + server may sometimes be unreachable for extended periods. + +Both modes share the same wire behaviour, the same failover loop, and +the same connect-string keys for everything other than storage. You can +switch between them without changing application code — only the connect +string. + +## Comparison at a glance + +| Question | Memory mode | SF mode | +|---|---|---| +| Where is buffered data? | Process RAM | Disk (`//`) | +| Default capacity | `128 MiB` | `10 GiB` | +| Survives a JVM crash? | No | Yes | +| Survives `kill -9`? | No | Yes | +| Survives a host reboot? | No | Yes (if the disk does) | +| Cross-sender rescue (orphan adoption) | n/a | Yes (opt-in) | +| Setup cost | Zero | Provisioning a writable directory | +| Operational cost | Zero | Sizing, monitoring, lock collisions | + +## Durable-ack: when to opt in + +By default the substrate trims unacked data on OK ack from the server. +That means the substrate releases a frame once the server has acknowledged +it into the WAL. The frame is durable on the **primary's** disk; whether +it has been replicated to the object store or to replicas is a separate +matter. + +When the connect string sets `request_durable_ack=on`, trim is held back +until a separate `STATUS_DURABLE_ACK` frame confirms the data has been +uploaded from the WAL to the **configured object store** (S3, Azure Blob, +GCS, or NFS). + +### Choose durable-ack when + +- You require object-store durability before considering a write + acknowledged — e.g. compliance requirements, end-to-end exactly-once + pipelines with cross-region recovery. +- Loss of an entire primary node (and its local disk) must not lose + in-flight data — replicas haven't downloaded the WAL yet, only the + object store has. +- You are willing to trade later trim (and so larger steady-state SF + disk usage) for the stronger guarantee. + +### Stay on the default OK trim when + +- WAL-local durability on the primary is sufficient. +- You want minimum steady-state disk usage. +- You are running OSS or a build that does not support durable-ack. + (The handshake fails loudly if you opt in but the server cannot + deliver — see below.) + +### Caveats + +- **Server support is required.** The client sends + `X-QWP-Request-Durable-Ack: true` on the upgrade. The server must echo + back `X-QWP-Durable-Ack: enabled`. If it does not — OSS build, + uninitialised primary, missing registry, hitting a replica — the + connect **fails loudly**, by design. Silently waiting for ack frames + that never arrive would let the SF disk fill up. +- **Idle keepalive.** The OSS server only flushes pending durable-ack + frames during inbound recv events. The client sends a WebSocket PING + every `durable_ack_keepalive_interval_millis` (default 200 ms) when + there are pending confirmations and the producer is idle. +- **Disk pressure.** Steady-state SF disk usage is roughly + `ingest_rate × time_to_object_store_durability`. Size + `sf_max_total_bytes` accordingly. + +## Orphan adoption: when to enable + +A sender that exits without draining its slot leaves unacked data on +disk. If another process restarts under the same `sender_id` and same +`sf_dir`, it picks up the orphan automatically as part of normal +recovery. But if no process ever uses that `sender_id` again, the data +sits on disk forever. + +Setting `drain_orphans=on` tells the **foreground sender** to scan +`/*` at startup for sibling `sender_id`s with unacked data and +spawn background drainers to clear them. + +### Enable orphan adoption when + +- You have a fleet of senders writing to a shared `sf_dir` (multi-tenant + host, container restart) and want any survivor to rescue dead + siblings' data. +- Your deployment can dynamically allocate `sender_id` (e.g. one per + process instance), so dead instances leave permanent orphans that no + natural restart will adopt. +- You prefer "automatic eventual delivery" over "operator manually + reattaches the slot." + +### Leave it off when + +- Each `sender_id` is statically pinned to a specific process — there + are no orphans by construction; a restart of the same process + recovers its own slot. +- You want explicit operator control over data movement in a shared + `sf_dir`. +- You run a single producer per host. + +Drainer concurrency is capped by `max_background_drainers` (default +`4`). Each drainer opens its own connection — they share the network +path but not the WebSocket. + +`drain_orphans=on` does not interfere with regular recovery: the +foreground sender still recovers its own `sender_id` first, then +drainers spawn for sibling slots. + +## Migrating from HTTP/TCP ILP + +If you are currently using HTTP or TCP ILP ingest, the comparison is: + +| Capability | HTTP ILP | TCP ILP | QWP WebSocket + SF | +|---|---|---|---| +| Non-blocking producer | No (request waits) | No (TCP backpressure) | Yes (buffer absorbs publishes) | +| Survives process crash | No | No | Yes (SF mode) | +| Server outage tolerance | Best-effort retry | None | Reconnect loop with multi-minute budget | +| Multi-host failover | Yes (HTTP only) | No | Yes | +| Cross-region durability ack | No | No | Yes (`request_durable_ack=on`) | +| Cluster-wide ordering | Best-effort | Best-effort | FSN-driven, server-deduplicated | + +The transition is application-transparent — `Sender.fromConfig` accepts +a `ws::` or `wss::` connect string and the public builder API is the +same. The most common migration is HTTP ILP → QWP WS+SF, with `sf_dir` +set, retaining HTTP for backward compatibility while the QWP path +becomes the primary. + +For specifically the multi-host HA path on HTTP ILP, see the existing +[ILP overview "Multiple URLs for High Availability"](/docs/ingestion/ilp/overview/#multiple-urls-for-high-availability) +section. QWP failover (documented in +[Client failover concepts](/docs/high-availability/client-failover/concepts/)) +replaces and extends it. + +## Decision flowchart + +```mermaid +graph TD + Q1{Will the producer outlive any single outage you care about?} + Q2{Does data need to survive a JVM crash or kill -9?} + Q3{Is object-store durability required before ack?} + Q4{Multiple senders share sf_dir, with dynamic sender_id?} + + Q1 -->|"No (ephemeral job)"| Memory[Memory mode — leave sf_dir unset] + Q1 -->|"Yes (long-running service)"| Q2 + Q2 -->|No| Memory + Q2 -->|Yes| SF[SF mode — set sf_dir] + SF --> Q3 + Q3 -->|Yes| Durable[Add request_durable_ack=on] + Q3 -->|No| Q4 + Durable --> Q4 + Q4 -->|Yes| Orphans[Add drain_orphans=on] + Q4 -->|No| Done[Configuration complete] + Orphans --> Done +``` + +## Next steps + +- [Configuration](/docs/high-availability/store-and-forward/configuration/) — + the connect-string keys. +- [Operating and tuning](/docs/high-availability/store-and-forward/operating-and-tuning/) — + slot layout, sizing, observability. diff --git a/documentation/sidebars.js b/documentation/sidebars.js index f5bd297a2..fe4972542 100644 --- a/documentation/sidebars.js +++ b/documentation/sidebars.js @@ -654,6 +654,24 @@ module.exports = { type: "doc", label: "WAL Cleanup", }, + { + type: "category", + label: "Client Failover", + items: [ + "high-availability/client-failover/concepts", + "high-availability/client-failover/configuration", + ], + }, + { + type: "category", + label: "Store-and-Forward", + items: [ + "high-availability/store-and-forward/concepts", + "high-availability/store-and-forward/when-to-use", + "high-availability/store-and-forward/operating-and-tuning", + "high-availability/store-and-forward/configuration", + ], + }, ], }, From 66d0d9d4b48d486ff31c31777f19124dce6ab1fc Mon Sep 17 00:00:00 2001 From: Vlad Ilyushchenko Date: Wed, 13 May 2026 21:33:09 +0100 Subject: [PATCH 02/44] docs: introduce Connect section, connect-string reference, QWP stubs Consolidate how-applications-talk-to-QuestDB content under a single Connect supersection. Rewrite the ingestion overview as QWP-native "Connect to QuestDB", add a comprehensive connect-string reference at documentation/client-configuration/, and scaffold the Wire Protocols sub-section under documentation/protocols/. Co-Authored-By: Claude Opus 4.7 (1M context) --- ONBOARDING.md | 181 +++++ .../client-configuration/connect-string.md | 620 ++++++++++++++++++ .../getting-started/capacity-planning.md | 2 +- .../clients/date-to-timestamp-conversion.md | 4 +- documentation/ingestion/overview.md | 193 +++--- .../operations/monitoring-alerting.md | 2 +- documentation/protocols/overview.md | 54 ++ .../protocols/qwp-egress-websocket.md | 75 +++ documentation/protocols/qwp-ingress-udp.md | 55 ++ .../protocols/qwp-ingress-websocket.md | 75 +++ documentation/query/datatypes/overview.md | 2 +- documentation/sidebars.js | 211 +++--- src/components/Resources/index.tsx | 2 +- 13 files changed, 1275 insertions(+), 201 deletions(-) create mode 100644 ONBOARDING.md create mode 100644 documentation/client-configuration/connect-string.md create mode 100644 documentation/protocols/overview.md create mode 100644 documentation/protocols/qwp-egress-websocket.md create mode 100644 documentation/protocols/qwp-ingress-udp.md create mode 100644 documentation/protocols/qwp-ingress-websocket.md diff --git a/ONBOARDING.md b/ONBOARDING.md new file mode 100644 index 000000000..b07b0bfd7 --- /dev/null +++ b/ONBOARDING.md @@ -0,0 +1,181 @@ +# QWP Documentation Project — Onboarding + +A coordinated three-person effort to document QuestDB's new wire protocols (QWP ingress + egress), client failover, and store-and-forward. This is your starting point. + +## The project in one paragraph + +We're shipping documentation for: a new public ingress wire protocol (QWP, with a WebSocket and a UDP variant), a new public egress wire protocol (QWP query result streaming), a comprehensive client failover system, and a store-and-forward client substrate. The specs in `questdb-enterprise/questdb/docs/qwp/` are the source of truth. The reference client is `java-questdb-client`. We document **Java-only on day one** — other languages follow later. + +## Setup + +Clone these three repos as siblings (parent directory doesn't matter, but they share one): + +``` +parent/ +├── documentation/ ← this repo, where docs land +├── questdb-enterprise/ ← spec source: docs/qwp/*.md +└── java-questdb-client/ ← reference implementation +``` + +You need access to the enterprise repo — ping in your team channel if you don't have it. + +Local dev server (from `documentation/`): + +``` +yarn install +yarn start # http://localhost:3001 +``` + +See `CLAUDE.md` in this repo for Docusaurus conventions, admonition syntax, custom components, and the railroad-diagram workflow. + +## Bundle assignments (proposed — swap if needed) + +| Bundle | Person | Scope | Files (exclusive ownership) | +|---|---|---|---| +| **A — Wire Protocols** | **Javier** | 4 new pages: Overview, QWP Ingress (WS), QWP Ingress (UDP), QWP Egress (WS). Audience is third-party client implementers. | `documentation/protocols/**` (all new) | +| **B — Client Configuration + central wiring** | **Vlad** | New top-level connect-string reference, 3 patches to existing pages, sole owner of `sidebars.js`. | `documentation/client-configuration/**`, the 3 patch files below, `documentation/sidebars.js` | +| **C — Client Reliability** | **Imre** | 6 new pages: 2 client failover, 4 store-and-forward. Lives under the Connect section (cross-linked from the existing High Availability section for server-side context). | `documentation/ingestion/clients/failover/**`, `documentation/ingestion/clients/store-and-forward/**` | + +Bundle B's three patch files: +- `documentation/ingestion/ilp/overview.md` — shorten "Multiple URLs for HA" → link to Bundle C +- `documentation/ingestion/clients/java.md` — shorten "Configuring multiple URLs" → link to Bundle C +- `documentation/ingestion/clients/configuration-string.md` — redirect to new location + +## Don't-trip-over-each-other rules + +1. **`sidebars.js` is single-writer.** Only Bundle B edits it. A and C: send your entries in PR descriptions; B commits them in one go. +2. **The connect-string page is single-writer.** Only B edits `documentation/client-configuration/connect-string.md`. C delivers SF / failover / reconnect key documentation as draft markdown snippets to B for inclusion. +3. **Day 1 — B lands the skeleton first.** Empty connect-string page with stable anchor IDs (`#auth`, `#tls`, `#failover-keys`, `#sf-keys`, `#reconnect-keys`, `#egress-flow`) + 4 Protocols stub pages + `sidebars.js` entries. Until this lands, A and C should not commit new pages — internal links would 404. +4. **File scopes are hard.** No bundle edits files outside its scope. Disputed patches belong to B. +5. **B's patches land last.** They replace shallow content with links into C's new pages, so they wait until C's pages are live. + +## Source specs + +Located in `questdb-enterprise/questdb/docs/qwp/`. These are normative — if a doc page contradicts the spec, the spec wins. + +| Spec file | Used by | +|---|---| +| `wire-ingress.md` | A (Ingress WS page) | +| `wire-egress.md` | A (Egress WS page) | +| `wire-udp.md` | A (Ingress UDP page) | +| `failover.md` | C (failover pages), B (failover keys section) | +| `sf-client.md` | C (SF pages), B (SF + reconnect keys sections) | +| `README.md` | A (Overview page), all (audience matrix) | + +Reference implementation paths in `java-questdb-client/`: +- `core/src/main/java/io/questdb/client/cutlass/qwp/` — QWP client +- `core/src/main/java/io/questdb/client/cutlass/qwp/client/sf/` — store-and-forward +- `core/src/main/java/io/questdb/client/impl/ConfStringParser.java` — canonical list of connect-string options +- `core/src/main/java/io/questdb/client/Sender.java` — public builder API + +## Using Claude Code on this project + +### Start a session + +From the `documentation/` clone: + +``` +claude +``` + +`CLAUDE.md` is loaded automatically — Claude already knows about Docusaurus conventions and dev commands. + +### High-value patterns for this work + +**Hand the spec to Claude — don't paraphrase.** +``` +Read ../questdb-enterprise/questdb/docs/qwp/wire-ingress.md. +We'll write documentation/protocols/qwp-ingress-websocket.md from it. +Audience: third-party client implementers. +``` + +**Use plan mode for any new page.** Press the plan-mode shortcut (or type `/plan`) before drafting so you can review structure and approach before content is written. + +**Delegate broad searches to subagents.** "Where is the existing failover documentation in this repo?" — Claude will spawn an Explore subagent instead of grepping in the foreground. + +**Cross-check against the reference impl.** When documenting an option: +``` +Before I write up reconnect_max_duration_millis, check +ConfStringParser.java in ../java-questdb-client for the actual default +and behavior. +``` + +**Run `/review` on your branch** before opening a PR. + +### Project-specific tips + +- Spec paths are relative to `documentation/`. Tell Claude they're sibling clones: `../questdb-enterprise/questdb/docs/qwp/...`. +- Docusaurus admonitions (`:::note`, `:::tip`, `:::warning`), code fences with `questdb-sql` for syntax highlighting, custom `` / `` — all covered in `CLAUDE.md`. +- For grammar railroad diagrams in protocol pages, see the `scripts/railroad.py` workflow in `CLAUDE.md`. +- **Java-only callout** belongs at the top of every failover and SF page: + > Client-side support is currently available in the Java client. Additional language clients are on the roadmap. +- Always run `yarn build` locally before opening a PR — it catches broken internal links. + +### First-prompt templates + +**Javier — Bundle A (Wire Protocols):** +``` +I'm documenting the QWP wire protocols for third-party client implementers. + +Read ../questdb-enterprise/questdb/docs/qwp/README.md for the audience matrix, +then ../questdb-enterprise/questdb/docs/qwp/wire-ingress.md. + +Help me draft documentation/protocols/qwp-ingress-websocket.md. +Audience: someone writing a non-Java client from scratch. They need framing, +type codes, schema/null encoding, close/error codes, versioning, and a +pointer to the reference impl (java-questdb-client at a pinned commit). + +Use plan mode first. +``` + +**Vlad — Bundle B (Client Configuration + central wiring):** +``` +I'm promoting documentation/ingestion/clients/configuration-string.md to a +new top-level "Client Configuration" section. The same connect-string now +drives ILP, QWP ingress, QWP egress, failover, and store-and-forward. + +Read the existing page, then ../java-questdb-client/core/src/main/java/io/ +questdb/client/impl/ConfStringParser.java for the canonical option list. + +Today's goal is a skeleton with stable anchor IDs (#auth, #tls, +#failover-keys, #sf-keys, #reconnect-keys, #egress-flow) so my +collaborators can deep-link while I flesh out the body. Also add the new +top-level entry in sidebars.js and 4 stub pages under documentation/ +protocols/ (Overview, Ingress WS, Ingress UDP, Egress WS). + +Use plan mode first. +``` + +**Imre — Bundle C (Client Reliability):** +``` +I'm writing client-side reliability documentation under the Connect +section. The files live under documentation/ingestion/clients/ in two +sub-folders: failover/ and store-and-forward/. + +Read ../questdb-enterprise/questdb/docs/qwp/failover.md and +../questdb-enterprise/questdb/docs/qwp/sf-client.md. + +Six pages to write: +- ingestion/clients/failover/concepts.md +- ingestion/clients/failover/configuration.md +- ingestion/clients/store-and-forward/concepts.md +- ingestion/clients/store-and-forward/when-to-use.md +- ingestion/clients/store-and-forward/operating.md +- ingestion/clients/store-and-forward/configuration.md + +Start with the failover concepts page. Audience is end users on QuestDB +Enterprise. Java-only callout at the top of every page. Cross-link to +the existing High Availability section for server-side HA context. + +Use plan mode first. +``` + +## When you're stuck + +- **Spec ambiguity** — ask the spec author before improvising. Specs are normative. +- **Cross-bundle question** — post in the project channel. Don't solve it by editing someone else's files. +- **Claude Code question** — type `/help` in a session. + +--- + +Good luck. The structure is designed so each bundle can drive to PR independently after Day 1. diff --git a/documentation/client-configuration/connect-string.md b/documentation/client-configuration/connect-string.md new file mode 100644 index 000000000..93715309d --- /dev/null +++ b/documentation/client-configuration/connect-string.md @@ -0,0 +1,620 @@ +--- +title: Connect string reference +description: + Configuration knobs for QuestDB native clients (QWP over WebSocket). + Drives ingress, egress, multi-host failover, and store-and-forward. +--- + +The QuestDB native client is configured with a single connect string. The +same string format drives QWP ingress, QWP egress, multi-host failover, and +the store-and-forward substrate. Per-language clients accept the same +options under the same names, so configuration is portable across +implementations. + +For legacy InfluxDB Line Protocol (ILP) transports (`http`, `https`, `tcp`, +`tcps`), see the [ILP overview](/docs/ingestion/ilp/overview/). + +**On this page:** + +- [Syntax](#syntax) +- [Common patterns](#common-patterns) +- [Recipes](#recipes) +- [Protocols and transports](#protocols-and-transports) +- [Authentication](#auth) +- [TLS](#tls) +- [Auto-flushing](#auto-flush) +- [Buffer sizing](#buffer) +- [Multi-host failover](#failover-keys) +- [Store-and-forward](#sf-keys) +- [Reconnect and failover](#reconnect-keys) +- [Durable ACK](#egress-flow) +- [Error handling](#error-handling) +- [Key index](#key-index) + +## Syntax {#syntax} + +A connect string has the form: + +``` +schema::key1=value1;key2=value2; +``` + +The `schema` selects the wire protocol and transport. The remaining +`key=value` pairs configure it. The trailing semicolon is optional but +recommended. + +For example: + +``` +http::addr=localhost:9000;username=admin;password=secret; +``` + +This selects the HTTP transport, connects to `localhost:9000`, and provides +basic-auth credentials. + +For the list of supported schemas, see +[Protocols and transports](#protocols-and-transports). + +### Grammar + +- **Schema** — alphanumeric ASCII characters and underscore. Terminated by + `::`. +- **Key** — alphanumeric ASCII characters and underscore. Terminated by `=`. + Keys are case-sensitive; the canonical form is lowercase `snake_case`. +- **Value** — any character except control characters + (U+0000–U+001F, U+007F–U+009F). Terminated by `;`. +- **Escaping** — to include a literal `;` in a value, double it (`;;`). + +Example with an escaped semicolon in a password (the actual password value +is `p;ssw;rd`): + +``` +http::addr=localhost:9000;username=admin;password=p;;ssw;;rd; +``` + +### Loading a connect string + +The Java client accepts a connect string in three ways: + +- From a string literal: + + ```java + Sender sender = Sender.fromConfig("http::addr=localhost:9000;"); + ``` + +- From an environment variable (reads `QDB_CLIENT_CONF`): + + ```java + Sender sender = Sender.fromEnv(); + ``` + +- From the builder, which accepts the same option keys programmatically: + + ```java + Sender sender = Sender.builder(Transport.HTTP) + .address("localhost:9000") + .build(); + ``` + +Other language clients expose equivalent entry points; see each +[client library page](/docs/ingestion/overview/#client-libraries) for the +per-language syntax. + +## Common patterns {#common-patterns} + +Canonical shapes for typical deployments. Each can be extended with +auth, failover, or store-and-forward options from the sections below. + +### Local development (no auth, no TLS) + +``` +ws::addr=localhost:9000; +``` + +### Production with basic auth (TLS) + +``` +wss::addr=questdb.example.com:443;username=admin;password=secret; +``` + +### Production with a custom trust store + +``` +wss::addr=questdb.example.com:443;username=admin;password=secret;tls_roots=/etc/ssl/truststore.jks;tls_roots_password=changeit; +``` + +### Ingest with store-and-forward across multiple nodes + +``` +ws::addr=node-a:9000,node-b:9000;sf_dir=/var/lib/myapp/qdb-sf;sender_id=ingest-1; +``` + +### Query (egress) preferring a replica in your zone + +``` +wss::addr=node-a:443,node-b:443;target=replica;zone=eu-west-1a; +``` + +## Recipes {#recipes} + +Goal-to-keys mapping. For complete connect-string templates, see +[Common patterns](#common-patterns). For per-key details (type, default, +caveats), follow the section links from the [Key index](#key-index). + +| Goal | Direction | Required keys | Optional / related | +| ------------------------------------------------- | --------- | -------------------------------------- | ------------------------------------------------------------------------------------------- | +| Minimal connect string | both | `addr` | — | +| Enable TLS | both | `addr` with `wss` schema | `tls_verify`, `tls_roots`, `tls_roots_password` | +| Basic-auth credentials | both | `username`, `password` | `auth_timeout_ms` | +| Bearer-token credentials | both | `token` | `auth_timeout_ms` | +| Multi-host failover | both | `addr=h1,h2,…` | `target`, `zone`, `reconnect_*` (ingress), `failover_*` (egress) | +| Query only the primary (freshest data) | egress | `target=primary` | — | +| Query only replicas (offload primary) | egress | `target=replica` | — | +| Zone-aware routing with DR last-resort | egress | `zone=` | `target` | +| Tune ingest batching | ingress | — | `auto_flush_rows`, `auto_flush_interval`, `auto_flush_bytes` | +| Disable auto-flush (manual `flush()` only) | ingress | `auto_flush=off` | — | +| Memory-buffered ingest (no disk durability) | ingress | (omit `sf_dir`) | `init_buf_size`, `max_buf_size` | +| Durable store-and-forward ingest | ingress | `sf_dir` | `sender_id`, `sf_max_bytes`, `sf_max_total_bytes`, `sf_append_deadline_millis` | +| Run multiple senders sharing one `sf_dir` | ingress | `sf_dir`, `sender_id` | unique `sender_id` per sender | +| Orphan recovery for crashed senders | ingress | `drain_orphans=on` | `max_background_drainers` | +| End-to-end durable acknowledgement | ingress | `request_durable_ack=on` | `durable_ack_keepalive_interval_millis` | +| Tune ingress reconnect budget | ingress | — | `reconnect_initial_backoff_millis`, `reconnect_max_backoff_millis`, `reconnect_max_duration_millis` | +| Retry initial connect | ingress | `initial_connect_retry=on` or `=async` | `reconnect_*` | +| Fast `close()` without drain | ingress | `close_flush_timeout_millis=0` | — | +| Disable per-query egress failover | egress | `failover=off` | — | +| Tune per-query egress failover | egress | — | `failover_max_attempts`, `failover_backoff_initial_ms`, `failover_backoff_max_ms`, `failover_max_duration_ms` | +| Configure async error inbox | both | — | `error_inbox_capacity` | + +## Protocols and transports {#protocols-and-transports} + +*Applies to: ingress and egress.* + +The schema prefix selects the QWP transport. + +| Schema | Transport | Default port | Notes | +| ------ | --------------- | ------------ | -------------------------------------------------------------------------------------------------------------------- | +| `ws` | WebSocket | `9000` | QWP over plain WebSocket. Use for development or trusted networks. | +| `wss` | WebSocket + TLS | `9000` | QWP over TLS-secured WebSocket. Recommended for production. | +| `udp` | UDP | `9007` | Fire-and-forget metrics ingest, single table per datagram. See [QWP Ingress (UDP)](/docs/protocols/qwp-ingress-udp/). | + +The default port is applied when `addr` omits `:port`. Note that `wss` does +**not** default to `443`: both `ws` and `wss` use `9000` unless overridden. + +QWP negotiates its protocol version during the WebSocket upgrade — clients +do not need to configure it. + +## Authentication {#auth} + +*Applies to: ingress and egress.* + +QWP runs over WebSocket and uses HTTP-style credentials sent on the +WebSocket upgrade request. + +- `username` — username for HTTP basic authentication. +- `password` — password for HTTP basic authentication. +- `token` — bearer token sent as `Authorization: Bearer `. Mutually + exclusive with `username` / `password`. +- `auth_timeout_ms` — per-host upper bound on the upgrade response read. + Does not cover TCP connect, TLS handshake, or post-upgrade frame reads — + those use OS or hard-coded defaults. Default: `15000` (15 s). + +For mutual TLS, configure trusted roots in the [TLS](#tls) section. + +## TLS {#tls} + +*Applies to: ingress and egress.* + +TLS is enabled by selecting the `wss` schema. + +- `tls_verify` — controls server certificate verification. Options: `on`, + `unsafe_off`. Default: `on`. `unsafe_off` disables verification; **use + only for testing** — bypassing verification makes the connection + vulnerable to MITM attacks. +- `tls_roots` — path to a Java keystore (`.jks`) containing trusted root + certificates. If omitted, the system default trust store is used. +- `tls_roots_password` — password for the keystore file. Required when + `tls_roots` is set. + +See also the [server-side TLS configuration](/docs/security/tls/). + +## Auto-flushing {#auto-flush} + +*Applies to: ingress.* + +The client buffers rows in memory and flushes them to the server in batches. +Auto-flushing controls when the buffer is sent without an explicit +`flush()` call. + +- `auto_flush` — global enable. Options: `on`, `off`. Default: `on`. + When `off`, the application must call `flush()` explicitly to send + buffered rows. +- `auto_flush_rows` — flush when the buffered row count reaches this + threshold. Set to `off` to disable. Default: `1000`. +- `auto_flush_interval` — flush when this many milliseconds have elapsed + since the first buffered row. Evaluated on the next `at()` / `flush()` + call (not driven by a wall-clock timer). Set to `off` to disable. + Default: `100` (100 ms). +- `auto_flush_bytes` — flush when the encode buffer reaches this byte + size. Set to `off` to disable. Default: `0` (off). Accepts + [size suffixes](#size-suffixes). + +## Buffer sizing {#buffer} + +*Applies to: ingress (encode buffer). `max_schemas_per_connection` also +applies to egress.* + +These keys control the in-memory row buffer that the client uses before +flushing. + +- `init_buf_size` — initial buffer size in bytes. Default: `65536` + (64 KiB). Accepts [size suffixes](#size-suffixes). +- `max_buf_size` — maximum buffer size; the buffer grows up to this cap. + Default: `104857600` (100 MiB). Accepts size suffixes. +- `max_name_len` — maximum allowed length of a table or column name in + bytes. Default: `127`. +- `max_schemas_per_connection` — per-connection ceiling on the number of + distinct schema IDs the client can register. WebSocket / QWP only. + Default: `65535`. +- `max_datagram_size` — UDP only. Maximum datagram size; defaults to a + value below typical Ethernet MTU. + +### Size suffixes {#size-suffixes} + +Size-typed values (`init_buf_size`, `max_buf_size`, `sf_max_bytes`, +`sf_max_total_bytes`) accept JVM-style unit suffixes. Suffixes are +case-insensitive and 1024-based, matching `-Xmx` conventions: + +| Suffix | Meaning | Example | +| -------------- | ----------------- | ------------ | +| *(none)* | bytes | `65536` | +| `k` or `kb` | KiB (× 1024) | `64k` | +| `m` or `mb` | MiB (× 1024²) | `4m`, `4mb` | +| `g` or `gb` | GiB (× 1024³) | `1g`, `10gb` | +| `t` or `tb` | TiB (× 1024⁴) | `1t` | + +## Multi-host failover {#failover-keys} + +*Applies to: ingress and egress. The [Role filter and zone preference](#role-filter-and-zone-preference) +sub-section is egress only.* + +:::note QuestDB Enterprise + +Multi-host failover requires QuestDB Enterprise. OSS is single-node — there +is no secondary server to fail over to. + +::: + +The connect string accepts multiple `host:port` pairs in `addr`. Two +syntaxes are accepted and accumulate: + +``` +ws::addr=node-a:9000,node-b:9000,node-c:9000; +``` + +``` +ws::addr=node-a:9000;addr=node-b:9000;addr=node-c:9000; +``` + +Empty entries (`,,`, or leading / trailing commas) are rejected. + +The I/O loop rotates through the endpoints on every reconnect attempt +within a single outage budget. When the server rejects the connection +because the current host is in the wrong role, the client treats it as +failover input and immediately tries the next endpoint without waiting for +backoff. + +### Role filter and zone preference + +Both `target` and `zone` apply to **egress only**. QuestDB is currently a +single-primary cluster: ingress automatically follows the primary across +the host list and adapts when the primary moves to another node. These +keys are silently accepted on ingress but have no effect. + +- `target` — server-role filter applied per endpoint after the upgrade + reads `SERVER_INFO`. Options: + - `any` (default) — no preference; route to any healthy endpoint. + - `primary` — route only to the writer. Use when queries must see the + most recent data; replicas are eventually consistent and may lag the + primary. + - `replica` — route only to replicas. Use for historical or analytical + queries to avoid contending with the ingest traffic the primary is + handling. + + Endpoints whose role does not match the filter are skipped. + +- `zone` — client zone identifier (opaque, case-insensitive — e.g. + `eu-west-1a`, `dc-amsterdam`). When set, egress prefers endpoints whose + server-advertised `zone_id` matches the client's. Mismatched-zone + endpoints — typically a remote DR replica — drop to a lower priority + tier; the client routes to them only as a last resort, when every + same-zone endpoint is unhealthy. With `target=primary`, zone preference + collapses: the writer is followed regardless of zone. + +The full behavioural model — host picker policy, host-health states, error +classification, and backoff schedule — is documented under the Connect +section (Client failover, coming with Bundle C). Server-side HA is covered +separately under the +[High Availability section](/docs/high-availability/overview/). + +Related: [Reconnect and failover](#reconnect-keys), +[Store-and-forward](#sf-keys). + +:::warning Enable DEDUP on tables ingested through failover + +On unplanned failover — when the primary dies before issuing a durable +ACK — the client replays unacknowledged frames against the new primary. +Without [DEDUP](/docs/concepts/deduplication/) on the target table, those +replays can produce duplicate rows. Tables ingested through a multi-host +failover connect string **must** declare `DEDUP UPSERT KEYS(...)` covering +row identity. + +::: + +## Store-and-forward {#sf-keys} + +*Applies to: ingress.* + +Store-and-forward (SF) is an opt-in durability substrate available on QWP / +WebSocket. The client persists outgoing frames to disk before sending; the +server's cumulative ACK trims acknowledged segments. If the connection drops +or the client process restarts, the I/O thread silently reconnects and +replays whatever is still on disk. + +To enable SF mode, set `sf_dir`. Without it, the client runs a memory-only +equivalent — same architecture, no durability across restarts. + +### Storage + +- `sf_dir` — parent directory under which the slot lives. The slot path is + `//`. Required for SF mode; omit for memory-only mode. + Path handling: + - Taken verbatim. Absolute paths recommended for production; relative + paths resolve against the process working directory. + - Shell-style expansions like `~` are **not** expanded by the client. + - The leaf directory is created automatically if missing, but its parent + must already exist — the client does not create paths recursively. +- `sender_id` — slot identity. The slot lives at `//`, + used verbatim as the directory name. Allowed characters: letters, + digits, `_`, `-`. No path separators, no `.`, no spaces. Two senders + sharing the same `sender_id` collide on the slot lock — the second one + fails fast. Default: `default`. +- `sf_durability` — disk durability mode. Currently only `memory` is + shipping. (`flush` and `append` per-write fsync modes are planned.) +- `sf_max_bytes` — per-segment rotation threshold. Must be ≥ the largest + single flushed frame. Default: `4 MiB` (`4m`). Accepts + [size suffixes](#size-suffixes). +- `sf_max_total_bytes` — hard cap on per-slot storage. When the cap is + reached, append blocks until ACKs trim space (see + `sf_append_deadline_millis`). Defaults: `10 GiB` (`10g`) in SF mode, + `128 MiB` (`128m`) in memory mode. Accepts size suffixes. + +### Sender restart and replay + +SF persists outgoing frames and the durable-ack watermark to disk under +`//`. + +**Recovery is triggered at Sender creation.** When the application +instantiates a new sender — `Sender.fromConfig(...)`, `Sender.fromEnv()`, +or the builder — the client analyses the on-disk state under `sf_dir` +before returning control. There is no background daemon; replay is part +of the Sender lifecycle. + +To resume from the previous session's buffer after a restart — clean +exit, SIGKILL, host crash, or reboot — instantiate a new sender with the +**same** `sf_dir` and `sender_id`: + +1. The new sender acquires the slot's POSIX `flock` (`LockFileEx` on + Windows). If the previous process is still alive and holds the lock, + the new sender fails fast with `sf slot already in use`. The kernel + releases the lock on process exit, even after SIGKILL, so a crashed + sender does not leave the slot stuck. +2. Recovery reads the persisted ack watermark and replays every on-disk + segment past it against the server. Replay runs on the I/O thread in + parallel with the application's new `append()` calls — the application + is not blocked. + +If `sf_dir` is a relative path, ensure the process resolves it the same +way after restart (typically: use an absolute path). + +For an **abandoned** slot to be picked up by a *different* sender — the +original is never coming back — see [Orphan recovery](#orphan-recovery) +below. + +**At-least-once delivery.** Replay can re-send frames the server already +accepted but did not durable-acknowledge before the previous sender died. +To prevent duplicate rows in the target table, declare +[DEDUP](/docs/concepts/deduplication/) `UPSERT KEYS(...)` covering row +identity. + +### Backpressure + +- `sf_append_deadline_millis` — maximum time `append()` waits for trim to + free space when the cap is hit. If the deadline fires, the call throws. + Default: `30000` (30 s). + +### Orphan recovery + +When `drain_orphans=on`, the new sender scans `/*` at startup for +sibling slots that are unlocked and contain unacked data. The scan runs +as part of Sender creation (alongside the same-slot recovery above). Each +orphan slot is locked, drained on its own dedicated connection, and +released — **multiple orphans drain in parallel**, up to +`max_background_drainers` concurrent drains. + +- `drain_orphans` — `on` enables the orphan drainer pool. Default: `off`. +- `max_background_drainers` — maximum concurrent drainers. Default: `4`. + +For delivery semantics, architecture, and tradeoffs (at-least-once +guarantees, DEDUP requirements, segment-granular trim), see the +Store-and-forward concepts page under Connect (coming with Bundle C). + +## Reconnect and failover {#reconnect-keys} + +*Applies to: ingress and egress (separate key families).* + +QWP / WebSocket has two distinct recovery loops, each with its own knob +family. The **ingress** cursor-engine reconnect loop runs continuously for +the lifetime of the sender. The **egress** per-`Execute()` failover loop +runs once per query. + +### Ingress reconnect + +These keys control the cursor-engine reconnect loop used by QWP ingest. +SF mode and memory-only mode share the same loop. + +- `reconnect_initial_backoff_millis` — initial wait between reconnect + attempts. Backoff grows exponentially up to `reconnect_max_backoff_millis`. + Default: `100`. +- `reconnect_max_backoff_millis` — cap on per-attempt backoff. (Alias: + `max_backoff_millis`.) Default: `5000` (5 s). +- `reconnect_max_duration_millis` — total time budget for a single outage. + Once exceeded, the I/O loop gives up and surfaces a terminal error. + Default: `300000` (5 min). +- `initial_connect_retry` — whether the initial connect attempt is retried + on failure. The same loop drives the retry. + - `off` (default, alias `false`) — fail fast on initial connect failure. + - `on` (aliases `sync`, `true`) — retry synchronously on the user + thread. + - `async` — return the `Sender` immediately; the I/O thread retries in + the background, surfacing terminal failures via the error inbox. +- `close_flush_timeout_millis` — `close()` blocks up to this many + milliseconds waiting for buffered frames to drain. Default: `5000` (5 s). + Set to `0` or `-1` for fast close (skip the drain). + +Auth failures during reconnect (authentication rejected, version mismatch, +durable-ack mismatch, non-101 upgrade without a role hint) are immediately +terminal — the loop does not retry them. + +### Egress failover + +These keys control the per-`Execute()` reconnect loop on the QWP query +client. Each query has its own budget; the loop resets between queries. +Requires QuestDB Enterprise (multi-host). + +- `failover` — master switch. `on` (default) or `off`. When `off`, + transport errors surface directly through `onError` without retry. +- `failover_max_attempts` — cap on reconnects per `Execute()` (initial + attempt + `N − 1` failovers). Default: `8`. +- `failover_backoff_initial_ms` — first post-failure sleep. Default: `50`. +- `failover_backoff_max_ms` — cap on per-attempt sleep. Default: `1000` + (1 s). +- `failover_max_duration_ms` — total wall-clock budget per `Execute()`. + Default: `30000` (30 s). Set to `0` for unbounded. + +## Durable ACK {#egress-flow} + +*Applies to: ingress.* + +:::note QuestDB Enterprise + +Durable ACK requires QuestDB Enterprise. OSS is single-node and does not +ship WALs off-box, so the server-side durability-acknowledgement signal +that drives this protocol is enterprise-only. + +::: + +QuestDB Enterprise ships Write-Ahead Logs (WALs) from the primary to an +object store or another file system — typically over the network. Once a +WAL is durably shipped, the server emits a `STATUS_DURABLE_ACK` frame to +the store-and-forward client; the client marks that frame's FSN as durable +only after this acknowledgement arrives. + +The benefit: if the primary dies before shipping a WAL, the client still +holds the corresponding frames in its SF buffer and replays them against +the new primary on failover — closing the data-loss window that a +transport-level OK ACK alone cannot close. + +- `request_durable_ack` — when `on`, the client gates trim on + `STATUS_DURABLE_ACK` frames from the server, suppressing OK-driven trim. + Default: `off`. +- `durable_ack_keepalive_interval_millis` — interval at which the client + emits keepalive PINGs while waiting for durable-ack frames. Required + because the server only flushes pending durable acks on inbound recv + events. Default: `200` (ms). Set to `0` or a negative value to disable. + +See the [QWP Egress (WebSocket)](/docs/protocols/qwp-egress-websocket/) +wire protocol for the underlying mechanism. + +## Error handling {#error-handling} + +*Applies to: ingress and egress.* + +The QWP / WebSocket I/O loop reports errors via an asynchronous inbox +consumed by the application. + +- `error_inbox_capacity` — bounded capacity for async error notifications. + Must be ≥ `16`. Overflow drops the oldest entry and bumps a + `droppedErrorNotifications` counter. Default: `256`. + +The following per-category override keys are **reserved by the spec but +not yet recognised by the Java connect-string parser** — today they are +wired only via the fluent builder API. New client implementations should +accept them in the connect string per the spec; precedence rules are +documented in the [QWP store-and-forward spec](https://github.com/questdb/questdb-enterprise/blob/main/questdb/docs/qwp/sf-client.md) +§14. + +- `on_server_error` — handler for server-reject status frames. +- `on_schema_error` — handler for schema-validation errors. +- `on_parse_error` — handler for client-side parse errors. +- `on_internal_error` — handler for unexpected client-side errors. +- `on_security_error` — handler for auth / TLS errors. +- `on_write_error` — handler for transport write failures. + +## Key index {#key-index} + +Alphabetical list of every option. The Section column links to the full +description and behaviour notes. + +| Key | Type | Default | Section | +| --------------------------------------- | ----------------------------- | ----------------------------- | ------------------------------------------------------------- | +| `addr` | `host:port[,host:port…]` | required | [Multi-host failover](#failover-keys) | +| `auth_timeout_ms` | int (ms) | `15000` | [Authentication](#auth) | +| `auto_flush` | enum (`on` / `off`) | `on` | [Auto-flushing](#auto-flush) | +| `auto_flush_bytes` | size | `0` (off) | [Auto-flushing](#auto-flush) | +| `auto_flush_interval` | int (ms) / `off` | `100` (100 ms) | [Auto-flushing](#auto-flush) | +| `auto_flush_rows` | int / `off` | `1000` | [Auto-flushing](#auto-flush) | +| `close_flush_timeout_millis` | int (ms) | `5000` | [Ingress reconnect](#reconnect-keys) | +| `drain_orphans` | enum (`on` / `off`) | `off` | [Store-and-forward](#sf-keys) | +| `durable_ack_keepalive_interval_millis` | int (ms) | `200` | [Durable ACK](#egress-flow) | +| `error_inbox_capacity` | int (≥ 16) | `256` | [Error handling](#error-handling) | +| `failover` | enum (`on` / `off`) | `on` | [Egress failover](#reconnect-keys) | +| `failover_backoff_initial_ms` | int (ms) | `50` | [Egress failover](#reconnect-keys) | +| `failover_backoff_max_ms` | int (ms) | `1000` | [Egress failover](#reconnect-keys) | +| `failover_max_attempts` | int | `8` | [Egress failover](#reconnect-keys) | +| `failover_max_duration_ms` | int (ms) | `30000` | [Egress failover](#reconnect-keys) | +| `init_buf_size` | size | `65536` (64 KiB) | [Buffer sizing](#buffer) | +| `initial_connect_retry` | enum (`off` / `on` / `async`) | `off` | [Ingress reconnect](#reconnect-keys) | +| `max_background_drainers` | int | `4` | [Store-and-forward](#sf-keys) | +| `max_buf_size` | size | `104857600` (100 MiB) | [Buffer sizing](#buffer) | +| `max_datagram_size` | size | (UDP) below typical MTU | [Buffer sizing](#buffer) | +| `max_name_len` | int | `127` | [Buffer sizing](#buffer) | +| `max_schemas_per_connection` | int | `65535` | [Buffer sizing](#buffer) | +| `on_internal_error` * | enum | — (reserved) | [Error handling](#error-handling) | +| `on_parse_error` * | enum | — (reserved) | [Error handling](#error-handling) | +| `on_schema_error` * | enum | — (reserved) | [Error handling](#error-handling) | +| `on_security_error` * | enum | — (reserved) | [Error handling](#error-handling) | +| `on_server_error` * | enum | — (reserved) | [Error handling](#error-handling) | +| `on_write_error` * | enum | — (reserved) | [Error handling](#error-handling) | +| `password` | string | unset | [Authentication](#auth) | +| `reconnect_initial_backoff_millis` | int (ms) | `100` | [Ingress reconnect](#reconnect-keys) | +| `reconnect_max_backoff_millis` | int (ms) | `5000` | [Ingress reconnect](#reconnect-keys) | +| `reconnect_max_duration_millis` | int (ms) | `300000` (5 min) | [Ingress reconnect](#reconnect-keys) | +| `request_durable_ack` | enum (`on` / `off`) | `off` | [Durable ACK](#egress-flow) | +| `sender_id` | string | `default` | [Store-and-forward](#sf-keys) | +| `sf_append_deadline_millis` | int (ms) | `30000` (30 s) | [Store-and-forward](#sf-keys) | +| `sf_dir` | path | unset (memory mode) | [Store-and-forward](#sf-keys) | +| `sf_durability` | enum (`memory`) | `memory` | [Store-and-forward](#sf-keys) | +| `sf_max_bytes` | size | `4 MiB` | [Store-and-forward](#sf-keys) | +| `sf_max_total_bytes` | size | `128 MiB` mem / `10 GiB` SF | [Store-and-forward](#sf-keys) | +| `target` | enum (`any` / `primary` / `replica`) | `any` | [Multi-host failover](#failover-keys) | +| `tls_roots` | path | system trust store | [TLS](#tls) | +| `tls_roots_password` | string | — (required if `tls_roots`) | [TLS](#tls) | +| `tls_verify` | enum (`on` / `unsafe_off`) | `on` | [TLS](#tls) | +| `token` | string | unset | [Authentication](#auth) | +| `username` | string | unset | [Authentication](#auth) | +| `zone` | string | unset | [Multi-host failover](#failover-keys) | + +\* Reserved by the spec; the Java connect-string parser does not yet +recognise these — they are currently wired only via the fluent builder +API. New client implementations should accept them. See +[Error handling](#error-handling). diff --git a/documentation/getting-started/capacity-planning.md b/documentation/getting-started/capacity-planning.md index 0257762a5..ace91f78f 100644 --- a/documentation/getting-started/capacity-planning.md +++ b/documentation/getting-started/capacity-planning.md @@ -209,7 +209,7 @@ As of QuestDB 7.4.2, InfluxDB Line Protocol operates over HTTP instead of TCP. As such, ILP is optimal out-of-the box. -See your [ILP client](/docs/ingestion/overview/#first-party-clients) for +See your [ILP client](/docs/ingestion/overview/#client-libraries) for language-specific configurations. ### Postgres Wire Protocol diff --git a/documentation/ingestion/clients/date-to-timestamp-conversion.md b/documentation/ingestion/clients/date-to-timestamp-conversion.md index 9293f8cf3..3d0e266b4 100644 --- a/documentation/ingestion/clients/date-to-timestamp-conversion.md +++ b/documentation/ingestion/clients/date-to-timestamp-conversion.md @@ -1,7 +1,9 @@ --- title: Date to Timestamp Conversion in Different Programming Languages sidebar_label: Date to Timestamp -description: Python, Go, JAVA, JavaScript, C/C++, Rust, .Net, PHP, or Ruby. +description: + How to convert language-native date/time values into QuestDB timestamp + columns, for each supported client library. --- Most languages have a dedicated type for dates or timestamps, with the notable exception of C. In this guide, we show how to convert from a literal string representing a date into the native `Date` type, and then diff --git a/documentation/ingestion/overview.md b/documentation/ingestion/overview.md index f050359b8..e1ed8a626 100644 --- a/documentation/ingestion/overview.md +++ b/documentation/ingestion/overview.md @@ -1,124 +1,103 @@ --- -title: Ingestion overview +title: Connect to QuestDB +sidebar_label: Overview description: - Learn how to ingest data into QuestDB, whether through the InfluxDB Line - Protocol, PostgreSQL Wire Protocol, or through a service like Apache Kafka, - Apache Spark, and more. + How to send data to QuestDB and run queries. Choose between native client + libraries, compatibility protocols (ILP, PGWire, REST), or the wire-protocol + specifications. --- -import Screenshot from "@theme/Screenshot" - import { Clients } from "../../src/components/Clients" -For high-throughput data ingestion, use our **first-party clients** with the -**InfluxDB Line Protocol (ILP)**. This is the recommended method for production -workloads. - -## First-party clients - -Our first-party clients are **the fastest way to insert data**. They excel -with high-throughput, low-latency data streaming and are the recommended choice -for production deployments. - -To start quickly, select your language: +QuestDB exposes several ways for applications to send data and run queries. +Pick the path that matches your environment. + +## Choose your path + +| Your situation | Use | +| ----------------------------------------------------------------------- | ---------------------------------------------------------------- | +| Greenfield app — want the best throughput, durability, and feature set | [**Client Libraries**](#client-libraries) | +| Existing InfluxDB collectors, Telegraf, or Kafka / Flink pipelines | [Compatibility → ILP](/docs/ingestion/ilp/overview/) | +| Postgres-shaped data layer, BI tools, ORMs | [Compatibility → PGWire](/docs/query/pgwire/overview/) | +| HTTP scripts, ad-hoc `curl`, or CSV imports | [Compatibility → REST API](/docs/query/rest-api/) | +| Building a new QuestDB client library (QWP spec) | [Wire Protocols](/docs/protocols/overview/) | + +## Client Libraries + +The first-party libraries for **Java, Python, Go, Rust, Node.js, C & C++, and +.NET** are the recommended way to talk to QuestDB. They speak the +**QuestDB Wire Protocol (QWP)** and unify ingest and query under one +configuration and one connection. + +### QWP support + +QWP ships in the libraries below. The remaining language clients are being +updated — until they ship a QWP build, they continue to use ILP for ingestion +and PGWire for queries. + +| Language | QWP support | +| --------- | ----------- | +| Java | ✓ | +| C & C++ | ✓ | +| Rust | ✓ | +| Go | ✓ | +| .NET | ✓ | +| Python | Planned | +| Node.js | Planned | + +Highlights: + +- **Binary on the wire** — roughly half the size of ILP or HTTP. +- **Streaming both directions** — sustained 800 MiB/s ingress, up to + 2.5 GiB/s egress on a single connection. +- **Automatic failover** — ingress and egress fail over without application + intervention. +- **Store-and-forward** — survives server outages, including full server + destruction. Sub-200 ns offload latency. +- **One configuration** — a single + [connect string](/docs/client-configuration/connect-string/) drives every + option, portable across all languages. +- **Schema-flexible** — automatic table creation and on-the-fly column + additions. + +Pick a language: -Our clients utitilize the InfluxDB Line Protocol (ILP) which is an insert-only -protocol that bypasses SQL `INSERT` statements, thus achieving significantly -higher throughput. It also provides some key benefits: - -- **Automatic table creation**: No need to define your schema upfront. -- **Concurrent schema changes**: Seamlessly handle multiple data streams with - on-the-fly schema modifications -- **Optimized batching**: Use strong defaults or curate the size of your batches -- **Health checks and feedback**: Ensure your system's integrity with built-in - health monitoring -- **Automatic write retries**: Reuse connections and retry after interruptions - -An example of "data-in" - via the line - appears as: - -```shell -trades,symbol=ETH-USD,side=sell price=2615.54,amount=0.00044 1646762637609765000\n -trades,symbol=BTC-USD,side=sell price=39269.98,amount=0.001 1646762637710419000\n -trades,symbol=ETH-USD,side=buy price=2615.4,amount=0.002 1646762637764098000\n -``` - -Once inside of QuestDB, it's yours to manipulate and query via extended SQL. Please note that table and column names -must follow the QuestDB [naming rules](/docs/query/sql/create-table/#table-name). - -### Ingestion characteristics - -QuestDB is optimized for both throughput and latency. Send data when you have -it - there's no need to artificially batch on the client side. - -| Mode | Throughput (per connection) | -|------|----------------------------| -| Batched writes | ~400k rows/sec | -| Single-row writes | ~60-80k rows/sec | - -Clients control batching via explicit `flush()` calls. Each flush ends a batch -and sends it to the server. If your data arrives one row at a time, send it one -row at a time - QuestDB handles this efficiently. If data arrives in bursts, -batch it naturally and flush when ready. - -Server-side, WAL processing is asynchronous. Transactions are grouped into -segments that roll based on size or row count, requiring no client-side tuning. - -## Message brokers and queues - -If you already have Kafka, Flink, or another streaming platform in your stack, -QuestDB integrates seamlessly. - -See our integration guides: - -- [Flink](/docs/ingestion/message-brokers/flink) -- [Kafka](/docs/ingestion/message-brokers/kafka) -- [Redpanda](/docs/ingestion/message-brokers/redpanda) -- [Telegraf](/docs/ingestion/message-brokers/telegraf) - -## CSV import - -For bulk imports or one-time data loads, use the -[Import CSV tab](/docs/getting-started/web-console/import-csv) in the [Web Console](/docs/getting-started/web-console/overview/): - - - -For all CSV import methods, including using the APIs directly, see the -[CSV Import Guide](/docs/ingestion/import-csv/). - -## Create new data +## Compatibility protocols -No data yet? Just starting? No worries. We've got you covered. +Use these if you have existing tooling that speaks them, or if a native client +library isn't a fit for your environment. -There are several quick scaffolding options: +- **[InfluxDB Line Protocol (ILP)](/docs/ingestion/ilp/overview/)** — the + text-based ingest protocol used by InfluxDB. Works with Telegraf, Kafka, + Redpanda, Flink, and any collector that already emits ILP. +- **[PostgreSQL Wire Protocol (PGWire)](/docs/query/pgwire/overview/)** — query + QuestDB from any Postgres-compatible driver (psycopg, JDBC, pgx, …), BI + tools (Tableau, Grafana, Metabase), and ORMs. +- **[REST API](/docs/query/rest-api/)** — HTTP / JSON endpoints for ad-hoc + queries, scripting, and bulk [CSV import](/docs/ingestion/import-csv/). -1. [QuestDB demo instance](https://demo.questdb.io): Hosted, fully loaded and - ready to go. Quickly explore the [Web Console](/docs/getting-started/web-console/overview/) and SQL syntax. -2. [Create my first data set guide](/docs/getting-started/create-database/): Create - tables, use `rnd_` functions and make your own data. -3. [Sample dataset repos](https://github.com/questdb/sample-datasets): IoT, - e-commerce, finance or git logs? Check them out! -4. [Quick start repos](https://github.com/questdb/questdb-quickstart): - Code-based quick starts that cover ingestion, querying and data visualization - using common programming languages and use cases. Also, a cat in a tracksuit. -5. [Time series streaming analytics template](https://github.com/questdb/time-series-streaming-analytics-template): - A handy template for near real-time analytics using open source technologies. +These remain fully supported. They are grouped as *compatibility* because they +predate QWP and exist primarily to integrate with tooling that already speaks +them. -## Next step - queries +## Wire protocols -Depending on your infrastructure, it should now be apparent which ingestion -method is worth pursuing. +The byte-on-the-wire specifications for the **QuestDB Wire Protocol (QWP)**, +including WebSocket variants for ingress and egress and a UDP variant for +fire-and-forget metrics. Read these if you are **building a new QuestDB +client library** in a language we don't yet support, or embedding QuestDB +connectivity into an existing framework. -Of course, ingestion (data-in) is only half the battle. +See the [Wire Protocols reference](/docs/protocols/overview/). -> **Your next best step? Learn how to query and explore data-out from the -> [Query & SQL Overview](/docs/query/overview/).** +## Next steps -It might also be a solid bet to review -[timestamp basics](/docs/concepts/timestamps-timezones/). +- Pick a language above and follow its quick-start. +- For SQL syntax, functions, and operators, see the + [SQL Reference](/docs/query/overview/). +- New to QuestDB? Try the [demo instance](https://demo.questdb.io), or follow + the [first-data-set guide](/docs/getting-started/create-database/). +- Background on time-series fundamentals: + [timestamp basics](/docs/concepts/timestamps-timezones/). diff --git a/documentation/operations/monitoring-alerting.md b/documentation/operations/monitoring-alerting.md index 940fb43d6..e61c8aff8 100644 --- a/documentation/operations/monitoring-alerting.md +++ b/documentation/operations/monitoring-alerting.md @@ -200,7 +200,7 @@ WHERE walEnabled **Resolution:** -- Use the [official client libraries](/docs/ingestion/overview/#first-party-clients) +- Use the [official client libraries](/docs/ingestion/overview/#client-libraries) which handle batching automatically - For custom ILP clients, configure auto-flush by row count or time interval rather than flushing after each row diff --git a/documentation/protocols/overview.md b/documentation/protocols/overview.md new file mode 100644 index 000000000..5baa309f5 --- /dev/null +++ b/documentation/protocols/overview.md @@ -0,0 +1,54 @@ +--- +title: Wire protocols overview +description: + QuestDB's wire-protocol specifications for client implementers. +--- + +:::note Page in draft + +This is the day-one skeleton for the Protocols section. Content is being +filled in. + +::: + +:::info Audience + +This section documents QuestDB's wire protocols at the byte-on-the-wire +level for **client implementers** — engineers building a new QuestDB client +from scratch. End users should see the +[language client guides](/docs/ingestion/overview) and the +[connect string reference](/docs/client-configuration/connect-string). + +::: + +## QWP — QuestWire Protocol + +QWP is QuestDB's native wire protocol for both ingest and query traffic. The +specifications below are normative — if a client's behaviour conflicts with +a spec, the spec wins. + +| Protocol | Transport | Purpose | +| --- | --- | --- | +| [QWP Ingress (WebSocket)](/docs/protocols/qwp-ingress-websocket) | WebSocket | Columnar binary ingest with optional store-and-forward | +| [QWP Ingress (UDP)](/docs/protocols/qwp-ingress-udp) | UDP | Fire-and-forget metrics ingest, MTU-bounded | +| [QWP Egress (WebSocket)](/docs/protocols/qwp-egress-websocket) | WebSocket | Streaming SQL query results | + +## Versioning + + + +## Reference implementation + +The reference client implementation is the Java client +([`java-questdb-client`](https://github.com/questdb/java-questdb-client)). +Each protocol page below pins the reference-implementation commit that +matches the documented version. + + + +## Source specifications + +The canonical specs live in the QuestDB Enterprise repository under +`docs/qwp/`. The pages in this section are the public expression of those +specs; the specs themselves remain the source of truth. diff --git a/documentation/protocols/qwp-egress-websocket.md b/documentation/protocols/qwp-egress-websocket.md new file mode 100644 index 000000000..1089be5ad --- /dev/null +++ b/documentation/protocols/qwp-egress-websocket.md @@ -0,0 +1,75 @@ +--- +title: QWP Egress (WebSocket) +description: + Wire-protocol specification for QuestDB's WebSocket-based streaming + query-result protocol. +--- + +:::note Page in draft + +This is the day-one skeleton. Content will be filled in from +`questdb-enterprise/questdb/docs/qwp/wire-egress.md`. This page documents +the **Phase 1** surface — Phase 2 features (row iterator, unbounded CANCEL, +lazy decode, multi-query, prepared statements) are tracked separately and +not yet public. + +::: + +:::info Audience + +This is a **wire-protocol specification** intended for client implementers +building a new QuestDB query client. End users see the +[language client guides](/docs/query/overview) and the +[connect string reference](/docs/client-configuration/connect-string). + +::: + +## Overview {#overview} + + + +## Versioning {#versioning} + + + +## Connection lifecycle {#lifecycle} + + + +## Query submission {#submission} + + + +## Result framing {#results} + + + +## Schema messages {#schema} + + + +## Flow control {#flow-control} + + + +## Durable ACK {#durable-ack} + + + +## Error codes {#errors} + + + +## Close codes {#close-codes} + + + +## Reference implementation {#reference} + + diff --git a/documentation/protocols/qwp-ingress-udp.md b/documentation/protocols/qwp-ingress-udp.md new file mode 100644 index 000000000..29044cd47 --- /dev/null +++ b/documentation/protocols/qwp-ingress-udp.md @@ -0,0 +1,55 @@ +--- +title: QWP Ingress (UDP) +description: + Wire-protocol specification for QuestDB's UDP-based fire-and-forget + ingest variant. +--- + +:::note Page in draft + +This is the day-one skeleton. Content will be filled in from +`questdb-enterprise/questdb/docs/qwp/wire-udp.md`. + +::: + +:::info Audience + +This is a **wire-protocol specification** intended for client implementers +building a UDP-based ingest agent (typically a metrics collector). End users +see the [language client guides](/docs/ingestion/overview) and the +[connect string reference](/docs/client-configuration/connect-string). + +::: + +## Overview {#overview} + + + +## Versioning {#versioning} + + + +## Datagram layout {#layout} + + + +## MTU sizing {#mtu} + + + +## Single-table constraint {#single-table} + + + +## Type codes and encoding {#types} + + + +## Loss semantics {#loss} + + + +## Reference implementation {#reference} + + diff --git a/documentation/protocols/qwp-ingress-websocket.md b/documentation/protocols/qwp-ingress-websocket.md new file mode 100644 index 000000000..c9542ee41 --- /dev/null +++ b/documentation/protocols/qwp-ingress-websocket.md @@ -0,0 +1,75 @@ +--- +title: QWP Ingress (WebSocket) +description: + Wire-protocol specification for QuestDB's WebSocket-based columnar binary + ingest protocol. +--- + +:::note Page in draft + +This is the day-one skeleton. Content will be filled in from +`questdb-enterprise/questdb/docs/qwp/wire-ingress.md`. + +::: + +:::info Audience + +This is a **wire-protocol specification** intended for client implementers +building a new QuestDB ingest client. End users see the +[language client guides](/docs/ingestion/overview) and the +[connect string reference](/docs/client-configuration/connect-string). + +::: + +## Overview {#overview} + + + +## Versioning {#versioning} + + + +## Connection lifecycle {#lifecycle} + + + +## Frame structure {#framing} + + + +## Schema messages {#schema} + + + +## Type codes and column encoding {#types} + + + +## Null encoding {#nulls} + + + +## Error codes {#errors} + + + +## Close codes {#close-codes} + + + +## Store-and-forward interaction {#sf} + + + +Client-side store-and-forward behaviour will be documented under the +Connect section (coming with Bundle C). + +## Reference implementation {#reference} + + diff --git a/documentation/query/datatypes/overview.md b/documentation/query/datatypes/overview.md index a645b2598..14bda43dd 100644 --- a/documentation/query/datatypes/overview.md +++ b/documentation/query/datatypes/overview.md @@ -170,7 +170,7 @@ PreparedStatement ps = connection.prepareStatement("INSERT INTO my_table VALUES ps.setObject(1, uuid); ``` -[QuestDB Client Libraries](/docs/ingestion/overview/#first-party-clients) can +[QuestDB Client Libraries](/docs/ingestion/overview/#client-libraries) can send `UUIDs` as `strings` to be converted to UUIDs by the server. ## IPv4 diff --git a/documentation/sidebars.js b/documentation/sidebars.js index f5bd297a2..aefc6f5c6 100644 --- a/documentation/sidebars.js +++ b/documentation/sidebars.js @@ -45,26 +45,35 @@ module.exports = { }, // =================== - // INGESTION REFERENCE + // CONNECT // =================== { type: "category", - label: "Ingestion Reference", + label: "Connect", items: [ { id: "ingestion/overview", type: "doc", label: "Overview", }, + { + id: "client-configuration/connect-string", + type: "doc", + label: "Connect string", + }, + { + id: "ingestion/clients/date-to-timestamp-conversion", + type: "doc", + label: "Date to Timestamp", + }, { type: "category", - label: "Language Clients", - collapsed: true, + label: "Client Libraries", items: [ { - id: "ingestion/clients/configuration-string", + id: "ingestion/clients/java", type: "doc", - label: "Configuration String", + label: "Java", }, { id: "ingestion/clients/python", @@ -76,11 +85,6 @@ module.exports = { type: "doc", label: "Go", }, - { - id: "ingestion/clients/java", - type: "doc", - label: "Java", - }, { id: "ingestion/clients/rust", type: "doc", @@ -101,32 +105,16 @@ module.exports = { type: "doc", label: ".NET", }, - { - id: "ingestion/clients/date-to-timestamp-conversion", - type: "doc", - label: "Date to Timestamp", - }, - ], - }, - { - type: "category", - label: "Message Brokers", - collapsed: true, - items: [ - "ingestion/message-brokers/kafka", - "ingestion/message-brokers/telegraf", - "ingestion/message-brokers/redpanda", - "ingestion/message-brokers/flink", ], }, { type: "category", - label: "Protocols", - collapsed: true, + label: "Compatibility Protocols", items: [ { type: "category", label: "InfluxDB Line Protocol (ILP)", + collapsed: true, items: [ { id: "ingestion/ilp/overview", @@ -143,91 +131,136 @@ module.exports = { type: "doc", label: "Advanced Settings", }, + { + id: "ingestion/java-embedded", + type: "doc", + label: "Java Embedded", + }, + { + type: "category", + label: "Message Brokers", + collapsed: true, + items: [ + "ingestion/message-brokers/kafka", + "ingestion/message-brokers/telegraf", + "ingestion/message-brokers/redpanda", + "ingestion/message-brokers/flink", + ], + }, ], }, { - id: "ingestion/java-embedded", - type: "doc", - label: "Java Embedded", + type: "category", + label: "PostgreSQL Wire Protocol (PGWire)", + collapsed: true, + items: [ + { + id: "query/pgwire/overview", + type: "doc", + label: "Overview", + }, + { + id: "query/pgwire/large-result-sets", + type: "doc", + label: "Large Result Sets", + }, + { + id: "query/pgwire/python", + type: "doc", + label: "Python", + }, + { + id: "query/pgwire/go", + type: "doc", + label: "Go", + }, + { + id: "query/pgwire/java", + type: "doc", + label: "Java", + }, + { + id: "query/pgwire/rust", + type: "doc", + label: "Rust", + }, + { + id: "query/pgwire/nodejs", + type: "doc", + label: "Node.js", + }, + { + id: "query/pgwire/dotnet", + type: "doc", + label: ".NET", + }, + { + id: "query/pgwire/php", + type: "doc", + label: "PHP", + }, + { + id: "query/pgwire/r", + type: "doc", + label: "R", + }, + { + id: "query/pgwire/c-and-cpp", + type: "doc", + label: "C/C++", + }, + ], + }, + { + type: "category", + label: "REST API", + collapsed: true, + items: [ + "query/rest-api", + "ingestion/import-csv", + "query/export-parquet", + ], }, ], }, - "ingestion/import-csv", - ], - }, - - // =================== - // QUERY & SQL REFERENCE - // =================== - { - type: "category", - label: "Query & SQL Reference", - items: [ - "query/overview", { + label: "Wire Protocols", type: "category", - label: "PostgreSQL Wire Protocol", collapsed: true, items: [ { - id: "query/pgwire/overview", + id: "protocols/overview", type: "doc", label: "Overview", }, { - id: "query/pgwire/large-result-sets", - type: "doc", - label: "Large Result Sets", - }, - { - id: "query/pgwire/python", - type: "doc", - label: "Python", - }, - { - id: "query/pgwire/go", - type: "doc", - label: "Go", - }, - { - id: "query/pgwire/java", - type: "doc", - label: "Java", - }, - { - id: "query/pgwire/rust", - type: "doc", - label: "Rust", - }, - { - id: "query/pgwire/nodejs", + id: "protocols/qwp-ingress-websocket", type: "doc", - label: "Node.js", + label: "QWP Ingress (WebSocket)", }, { - id: "query/pgwire/dotnet", + id: "protocols/qwp-ingress-udp", type: "doc", - label: ".NET", + label: "QWP Ingress (UDP)", }, { - id: "query/pgwire/php", + id: "protocols/qwp-egress-websocket", type: "doc", - label: "PHP", - }, - { - id: "query/pgwire/r", - type: "doc", - label: "R", - }, - { - id: "query/pgwire/c-and-cpp", - type: "doc", - label: "C/C++", + label: "QWP Egress (WebSocket)", }, ], }, - "query/rest-api", - "query/export-parquet", + ], + }, + + // =================== + // SQL REFERENCE + // =================== + { + type: "category", + label: "SQL Reference", + items: [ + "query/overview", { type: "category", label: "Data Types", diff --git a/src/components/Resources/index.tsx b/src/components/Resources/index.tsx index e2fa9f143..96d2f1d4c 100644 --- a/src/components/Resources/index.tsx +++ b/src/components/Resources/index.tsx @@ -41,7 +41,7 @@ const resources: Array = [ }, }, { - href: '/docs/ingestion/overview/#first-party-clients', + href: '/docs/ingestion/overview/#client-libraries', name: 'Language clients', description: 'Explore our language clients and how to use them to ingest data into QuestDB.', From a9bb3e9cd58273601e6605406171202db19cb868 Mon Sep 17 00:00:00 2001 From: javier Date: Thu, 14 May 2026 00:31:55 +0200 Subject: [PATCH 03/44] first version of qwp ingress websocket reference --- .../protocols/qwp-ingress-websocket.md | 1004 ++++++++++++++++- 1 file changed, 965 insertions(+), 39 deletions(-) diff --git a/documentation/protocols/qwp-ingress-websocket.md b/documentation/protocols/qwp-ingress-websocket.md index c9542ee41..96c916dd0 100644 --- a/documentation/protocols/qwp-ingress-websocket.md +++ b/documentation/protocols/qwp-ingress-websocket.md @@ -1,75 +1,1001 @@ --- -title: QWP Ingress (WebSocket) +title: QWP ingress (WebSocket) description: Wire-protocol specification for QuestDB's WebSocket-based columnar binary ingest protocol. --- -:::note Page in draft +:::info Audience -This is the day-one skeleton. Content will be filled in from -`questdb-enterprise/questdb/docs/qwp/wire-ingress.md`. +This is a **wire-protocol specification** for client implementers building a +new QuestDB ingest client from scratch. End users should see the +[language client guides](/docs/ingestion/overview) and the +[connect string reference](/docs/client-configuration/connect-string). ::: -:::info Audience +QuestWire Protocol (QWP) is QuestDB's columnar binary protocol for +high-throughput data ingestion over WebSocket. Each message carries one or more +table blocks, where every column's values are stored contiguously. Batched +messages, schema references, and Gorilla-compressed timestamps reduce wire +overhead for sustained streaming workloads. -This is a **wire-protocol specification** intended for client implementers -building a new QuestDB ingest client. End users see the -[language client guides](/docs/ingestion/overview) and the -[connect string reference](/docs/client-configuration/connect-string). +This page covers WebSocket ingress only. Related specifications: +[QWP ingress (UDP)](/docs/protocols/qwp-ingress-udp/) for fire-and-forget +datagram ingestion, and +[QWP egress (WebSocket)](/docs/protocols/qwp-egress-websocket/) for streaming +query results back to clients. + +## Overview + +QWP encodes data in a column-major layout: all values for a single column are +packed together before the next column begins. This allows the server to +decompress and commit each column independently, avoiding row-by-row +deserialization. + +Design goals: + +- **Column-oriented**: values for each column are contiguous in the message. +- **Batch-oriented**: a single message can carry rows for multiple tables. +- **Schema-referencing**: after the first batch, subsequent batches reference a + previously sent schema by numeric ID, avoiding redundant column definitions. +- **Timestamp compression**: designated timestamp columns can use + Gorilla delta-of-delta encoding, reducing 8 bytes per timestamp to as + little as 1 bit for steady-rate streams. + +Every QWP message begins with a 4-byte magic: + +| Magic | Hex value | Description | +|--------|----------------|-----------------------| +| `QWP1` | `0x31505751` | Standard data message | + +## Transport and versioning + +### WebSocket endpoints + +The client initiates an HTTP GET request to either `/write/v4` or `/api/v4/write` +with standard [WebSocket](https://datatracker.ietf.org/doc/html/rfc6455) upgrade +headers. After the server responds with `101 Switching Protocols`, all +communication uses binary WebSocket frames. + +### Version negotiation + +During the HTTP upgrade, the client and server negotiate the protocol version +using custom headers. + +**Client request headers:** + +| Header | Required | Description | +|---------------------|----------|--------------------------------------------------------------------------------------| +| `X-QWP-Max-Version` | No | Maximum QWP version the client supports (positive integer). Defaults to 1 if absent. | +| `X-QWP-Client-Id` | No | Free-form client identifier (e.g., `java/1.0.2`, `zig/0.1.0`). | + +**Server response header:** + +| Header | Description | +|-----------------|-----------------------------------------------| +| `X-QWP-Version` | The QWP version selected for this connection. | + +The server selects the version as `min(clientMax, serverMax)`. The selected +version is never higher than either side's maximum. + +### Connection-level contract + +All messages on a connection must carry the negotiated version in the version +byte (offset 4) of the message header. The server validates every incoming +message against the negotiated version and rejects mismatches with a parse +error. + +### Current version + +Ingress is pinned to version 1. No v2 ingest semantics exist. Ingress +clients advertise `X-QWP-Max-Version: 1`. + +## Authentication + +Authentication is handled at the HTTP level during the WebSocket upgrade +handshake, before any QWP binary frames are exchanged. + +Supported methods: + +- **HTTP basic auth** (OSS and Enterprise): see + [Authentication in QuestDB Open Source](/docs/query/rest-api/#authentication-in-questdb-open-source). +- **Token-based auth** (Enterprise only): see + [Authentication (RBAC)](/docs/query/rest-api/#authentication-rbac). +- **OIDC** (Enterprise only): see [OpenID Connect](/docs/security/oidc/). + +A failed authentication results in a `401` or `403` HTTP response before the +WebSocket connection is established. No QWP-level auth handshake exists. + +## Encoding primitives + +### Byte ordering + +All multi-byte numeric values are **little-endian**. Variable-length integers +use unsigned LEB128 (see below). + +### Variable-length integer encoding (varint) + +:::note LEB128 + +LEB128 (Little Endian Base 128) is a variable-length integer encoding from the +[DWARF debugging format](https://en.wikipedia.org/wiki/LEB128), also used by +Protocol Buffers and WebAssembly. It encodes small values in fewer bytes than +fixed-width integers. + +::: + +QWP uses **unsigned LEB128** for variable-length integers. Values are split into +7-bit groups, least significant first. The high bit of each byte is a +continuation flag: set (1) means more bytes follow, clear (0) means this is the +last byte. A 64-bit value requires at most 10 bytes. + +**Encoding:** + +```python +while (value & ~0x7F) != 0: + output_byte((value & 0x7F) | 0x80) + value >>= 7 +output_byte(value) +``` + +**Decoding:** + +```python +result = 0 +shift = 0 +while True: + b = read_byte() + result |= (b & 0x7F) << shift + shift += 7 + if (b & 0x80) == 0: + break +return result +``` + +**Examples:** + +| Value | Encoded bytes | +|-------|--------------------| +| 0 | `0x00` | +| 1 | `0x01` | +| 127 | `0x7F` | +| 128 | `0x80 0x01` | +| 255 | `0xFF 0x01` | +| 300 | `0xAC 0x02` | +| 16384 | `0x80 0x80 0x01` | + +### ZigZag encoding + +:::note ZigZag encoding + +ZigZag encoding maps signed integers to unsigned integers so that values with +small absolute values produce small varints. It was popularized by +[Protocol Buffers](https://protobuf.dev/programming-guides/encoding/#signed-ints). + +::: + +```python +def zigzag_encode(n): + return (n << 1) ^ (n >> 63) + +def zigzag_decode(n): + return (n >> 1) ^ -(n & 1) +``` + +| Signed | Unsigned | +|--------|----------| +| 0 | 0 | +| -1 | 1 | +| 1 | 2 | +| -2 | 3 | +| 2 | 4 | + +## Message structure + +### Message header (12 bytes, fixed) + +```text +Offset Size Type Field Description +------ ---- ------ ------------- -------------------------------- +0 4 int32 magic "QWP1" (0x31505751) +4 1 uint8 version Protocol version (0x01) +5 1 uint8 flags Encoding flags +6 2 uint16 table_count Number of table blocks +8 4 uint32 payload_length Payload size in bytes +``` + +**Total message size** = 12 + payload_length. + +### Flags byte + +| Bit | Mask | Name | Description | +|-----|--------|----------------------------|-------------------------------------------------------| +| 0-1 | | Reserved | Must be 0 | +| 2 | `0x04` | `FLAG_GORILLA` | Gorilla delta-of-delta encoding for timestamp columns | +| 3 | `0x08` | `FLAG_DELTA_SYMBOL_DICT` | Delta symbol dictionary mode enabled | +| 4-7 | | Reserved | Must be 0 | + +### Complete message layout + +```text ++---------------------------------------------+ +| Message Header (12 bytes) | ++---------------------------------------------+ +| Payload (variable) | +| +- [Delta Symbol Dictionary] (if 0x08) | +| +- Table Block 0 | +| +- Table Block 1 | +| +- ... Table Block N-1 | ++---------------------------------------------+ +``` + +### Delta symbol dictionary + +Present only when `FLAG_DELTA_SYMBOL_DICT` (0x08) is set. Appears at the start +of the payload, before any table blocks. + +```text ++------------------------------------------------------------+ +| delta_start: varint Starting global ID for this delta | +| delta_count: varint Number of new entries | +| For each new entry: | +| name_length: varint UTF-8 byte length | +| name_bytes: bytes UTF-8 encoded symbol string | ++------------------------------------------------------------+ +``` + +The client maintains a global symbol dictionary mapping symbol strings to +sequential integer IDs starting from 0. On each batch, only newly added +symbols (the "delta") are transmitted. The server accumulates these entries +across batches for the lifetime of the connection. + +WebSocket clients set `FLAG_DELTA_SYMBOL_DICT` on every message and use global +delta dictionaries exclusively. Symbol columns then contain varint-encoded +global IDs instead of per-column dictionaries. + +On connection loss, both sides reset the dictionary. + +## Table blocks + +Each table block contains data for a single table. + +```text ++----------------------------------+ +| Table Header (variable) | ++----------------------------------+ +| Schema Section (variable) | ++----------------------------------+ +| Column Data (variable) | +| +- Column 0 data | +| +- Column 1 data | +| +- ... Column N-1 data | ++----------------------------------+ +``` + +### Table header + +| Field | Type | Description | +|--------------|--------|------------------------------------| +| name_length | varint | Table name length in bytes | +| name | UTF-8 | Table name (max 127 bytes) | +| row_count | varint | Number of rows in this block | +| column_count | varint | Number of columns | + +## Schema definition + +The schema section immediately follows the table header and defines the columns +in the block. + +### Schema mode byte + +| Value | Mode | Description | +|--------|-----------|------------------------------------------------| +| `0x00` | Full | Schema ID + complete column definitions inline | +| `0x01` | Reference | Schema ID only (lookup from registry) | + +### Full schema mode (0x00) + +Sent the first time a table's schema appears on a connection, or whenever the +column set changes. + +```text ++----------------------------------+ +| mode_byte: 0x00 | ++----------------------------------+ +| schema_id: varint | ++----------------------------------+ +| Column Definition 0 | +| +- name_length: varint | +| +- name: UTF-8 bytes | +| +- type_code: uint8 | ++----------------------------------+ +| Column Definition 1 ... | ++----------------------------------+ +``` + +Schema IDs are non-negative integers assigned by the client and scoped to the +lifetime of a single connection. They are global across all tables on the +connection (not per-table). Clients typically assign them sequentially starting +at 0, but the server does not require any particular ordering. + +A column with an **empty name** (length 0) and type TIMESTAMP denotes the +[designated timestamp](/docs/concepts/designated-timestamp/) column, the +per-table column that QuestDB uses for time-based partitioning and ordering. + +### Reference schema mode (0x01) + +Used for subsequent batches when the server has already registered the schema. + +```text ++-------------------------+ +| mode_byte: 0x01 | ++-------------------------+ +| schema_id: varint | ++-------------------------+ +``` + +The server looks up the schema by its ID in the per-connection schema registry. + +### Schema registry lifecycle + +1. First batch for a table: full schema mode with a new schema ID. +2. Subsequent batches with the same columns: reference mode with the same ID. +3. When a table gains a column, the client assigns a new schema ID and sends + it in full mode. +4. Full-mode schemas may re-register an existing ID; the server accepts any ID + within the per-connection schema-ID limit. +5. On reconnect, both sides reset: the client reassigns IDs from 0 and the + server clears its registry. + +## Column types + +| Code | Hex | Type | Size | Description | +|------|--------|-----------------|---------|------------------------------------| +| 1 | `0x01` | BOOLEAN | 1 bit | Bit-packed boolean | +| 2 | `0x02` | BYTE | 1 | Signed 8-bit integer | +| 3 | `0x03` | SHORT | 2 | Signed 16-bit integer | +| 4 | `0x04` | INT | 4 | Signed 32-bit integer | +| 5 | `0x05` | LONG | 8 | Signed 64-bit integer | +| 6 | `0x06` | FLOAT | 4 | IEEE 754 single precision | +| 7 | `0x07` | DOUBLE | 8 | IEEE 754 double precision | +| 9 | `0x09` | SYMBOL | var | Dictionary-encoded string | +| 10 | `0x0A` | TIMESTAMP | 8 | Microseconds since Unix epoch | +| 11 | `0x0B` | DATE | 8 | Milliseconds since Unix epoch | +| 12 | `0x0C` | UUID | 16 | RFC 4122 UUID | +| 13 | `0x0D` | LONG256 | 32 | 256-bit integer | +| 14 | `0x0E` | GEOHASH | var | Geospatial hash | +| 15 | `0x0F` | VARCHAR | var | Length-prefixed UTF-8 | +| 16 | `0x10` | TIMESTAMP_NANOS | 8 | Nanoseconds since Unix epoch | +| 17 | `0x11` | DOUBLE_ARRAY | var | N-dimensional double array | +| 18 | `0x12` | LONG_ARRAY | var | N-dimensional long array | +| 19 | `0x13` | DECIMAL64 | 8 | Decimal (18 digits precision) | +| 20 | `0x14` | DECIMAL128 | 16 | Decimal (38 digits precision) | +| 21 | `0x15` | DECIMAL256 | 32 | Decimal (77 digits precision) | +| 22 | `0x16` | CHAR | 2 | Single UTF-16 code unit | +| 23 | `0x17` | BINARY | var | Length-prefixed opaque bytes | +| 24 | `0x18` | IPv4 | 4 | 32-bit IPv4 address | + +Code `0x08` is unassigned. It was previously STRING, which has been removed. +Use VARCHAR (`0x0F`) for text columns. + +TIMESTAMP and TIMESTAMP_NANOS may use Gorilla encoding when `FLAG_GORILLA` is +set. See [Timestamp encoding](#timestamp-encoding) below. + +## Null handling + +Each column's data section begins with a 1-byte **null flag**. The flag tells +the decoder how nulls are represented in the data that follows. + +### Sentinel mode (null flag = 0x00) + +No bitmap follows. The column data contains one value per row (`row_count` +values total). Null rows are represented by a reserved marker value (a +"sentinel") that falls outside the column's valid range. For example, `0x00` +for BYTE or `0x0000` for SHORT. The decoder recognizes these values as null +rather than as real data. + +Sentinel mode requires the type to have a dedicated null representation. Types +whose full value range is meaningful payload (e.g., VARCHAR, SYMBOL) cannot use +sentinel mode. + +### Bitmap mode (null flag != 0x00) + +A null bitmap follows immediately after the flag byte. The column data then +contains only non-null values, densely packed +(`value_count = row_count - null_count`). + +**Bitmap format:** + +- **Size**: `ceil(row_count / 8)` bytes +- **Bit order**: LSB first within each byte +- **Semantics**: bit = 1 means the row is NULL, bit = 0 means the row has a value + +```text +Byte 0: [row7][row6][row5][row4][row3][row2][row1][row0] +Byte 1: [row15][row14][row13][row12][row11][row10][row9][row8] +... +``` + +**Accessing null status:** + +```python +byte_index = row_index // 8 +bit_index = row_index % 8 +is_null = (bitmap[byte_index] & (1 << bit_index)) != 0 +``` + +**Example:** 10 rows where rows 0, 2, and 9 are null: + +```text +Byte 0: 0b00000101 = 0x05 (bits 0 and 2 set) +Byte 1: 0b00000010 = 0x02 (bit 1 set = row 9) +``` + +### Complete column data layout + +```text ++------------------------------------------------------------+ +| null_flag: uint8 0 = sentinel, nonzero = bitmap | +| [null bitmap: ceil(row_count/8) bytes if flag != 0] | +| Column values: | +| flag == 0 : row_count entries (null rows use sentinels) | +| flag != 0 : value_count non-null entries, densely packed | +| (value_count = row_count - null_count) | ++------------------------------------------------------------+ +``` + +The encoder chooses the strategy per column. The decoder must support both. + +### Sentinel values + +When the reference implementation emits sentinel mode (null flag = 0x00), null +rows are encoded as: + +| Type | Sentinel | +|---------|-----------------------------------------------------------------------------------| +| BOOLEAN | bit `0` (false) | +| BYTE | `0x00` | +| SHORT | `0x0000` | +| CHAR | `0x0000` | +| GEOHASH | All-ones (`0xFF...FF`), truncated to `ceil(precision_bits / 8)` bytes | + +### Reference implementation null strategy + +The reference Java client uses these strategies per type: + +| Strategy | Types | +|----------|-------------------------------------------------------------------------------------------------------------------------------------------| +| Sentinel | BOOLEAN, BYTE, SHORT, CHAR, GEOHASH | +| Bitmap | INT, LONG, FLOAT, DOUBLE, VARCHAR, SYMBOL, TIMESTAMP, TIMESTAMP_NANOS, DATE, UUID, LONG256, DECIMAL64, DECIMAL128, DECIMAL256, DOUBLE_ARRAY, LONG_ARRAY | + +Alternative implementations may make different per-column choices as long as +the null flag accurately describes the data that follows. A column with no null +rows produces identical output under either strategy (null flag = 0x00, +`row_count` values). + +## Column data encoding + +### Fixed-width types + +For BYTE, SHORT, INT, LONG, FLOAT, DOUBLE, DATE, CHAR, and IPv4: values are +written as contiguous arrays of their respective sizes in little-endian byte +order. + +```text ++------------------------------------------------------+ +| [Null flag + bitmap (see Null handling)] | ++------------------------------------------------------+ +| Values: | +| value[0], value[1], ... value[N-1] | +| N = row_count if null_flag == 0 | +| N = row_count - null_count if null_flag != 0 | ++------------------------------------------------------+ +``` + +### Boolean + +Values are bit-packed, 8 per byte, LSB-first. `ceil(N/8)` bytes are written +where `N = row_count` in sentinel mode or `N = row_count - null_count` in +bitmap mode. The reference implementation uses sentinel mode for BOOLEAN: null +rows appear as bit `0` (false). + +```text +Values [true, false, true, true, false, false, false, true]: + 0b10001101 = 0x8D +``` + +### VARCHAR and BINARY + +VARCHAR, and BINARY share the same wire format: + +```text ++------------------------------------------------+ +| [Null flag + bitmap (see Null handling)] | ++------------------------------------------------+ +| Offset array: (value_count + 1) x uint32 | +| offset[0] = 0 | +| offset[i+1] = end of value[i] | ++------------------------------------------------+ +| Data: concatenated bytes | ++------------------------------------------------+ +``` + +- `value_count = row_count - null_count` +- Offsets are uint32, little-endian +- Value `i` spans bytes `[offset[i], offset[i+1])` +- For VARCHAR, the bytes are valid UTF-8. For BINARY, the bytes are opaque. +- The uint32 offsets bound individual values to 2^31 - 1 bytes. + +### Symbol + +Dictionary-encoded strings for low-cardinality columns. The wire format depends +on the dictionary mode. + +#### Per-table dictionary mode + +Used by UDP because datagrams cannot rely on a connection-scoped dictionary +persisting across messages. + +```text ++----------------------------------------------+ +| [Null flag + bitmap (see Null handling)] | ++----------------------------------------------+ +| dictionary_size: varint | ++----------------------------------------------+ +| Dictionary entries: | +| For each entry: | +| entry_length: varint | +| entry_data: UTF-8 bytes | ++----------------------------------------------+ +| Value indices: | +| For each non-null row: | +| dict_index: varint | ++----------------------------------------------+ +``` + +Dictionary indices are 0-based. When a null bitmap is present, only non-null +rows have indices written. + +#### Global delta dictionary mode (WebSocket) + +When `FLAG_DELTA_SYMBOL_DICT` (0x08) is set, symbol columns use global integer +IDs instead of per-table dictionaries. The dictionary entries are sent in the +message-level [delta symbol dictionary](#delta-symbol-dictionary) section. +Column data consists of varint-encoded global IDs only: + +```text ++--------------------------------------------+ +| For each non-null row: | +| global_id: varint Global symbol ID | ++--------------------------------------------+ +``` + +WebSocket clients set `FLAG_DELTA_SYMBOL_DICT` on every message and use this +mode exclusively. + +### Timestamp encoding + +:::note Gorilla compression + +Gorilla is a time-series compression scheme from the +[Facebook/Meta Gorilla paper](https://www.vldb.org/pvldb/vol8/p1816-teller.pdf) +(Pelkonen et al., VLDB 2015). It exploits the regularity of timestamps in +time-series data by encoding the delta-of-deltas between consecutive values, +which are often zero or very small. ::: -## Overview {#overview} +When `FLAG_GORILLA` (0x04) is **not** set, timestamp columns are written as +plain int64 arrays with no encoding flag: + +```text ++----------------------------------------------+ +| [Null flag + bitmap (see Null handling)] | ++----------------------------------------------+ +| Timestamp values (non-null only): | +| value_count x int64 | ++----------------------------------------------+ +``` + +When `FLAG_GORILLA` (0x04) **is** set, a 1-byte encoding flag follows the null +handling section: + +| Flag | Mode | Description | +|--------|--------------|------------------------------------------------| +| `0x00` | Uncompressed | Array of int64 values (non-null only) | +| `0x01` | Gorilla | Delta-of-delta compressed | + +**Uncompressed mode (0x00):** + +```text ++----------------------------------------------+ +| [Null flag + bitmap (see Null handling)] | ++----------------------------------------------+ +| encoding_flag: uint8 (0x00) | ++----------------------------------------------+ +| Timestamp values (non-null only): | +| value_count x int64 | ++----------------------------------------------+ +``` + +**Gorilla mode (0x01):** + +```text ++----------------------------------------------+ +| [Null flag + bitmap (see Null handling)] | ++----------------------------------------------+ +| encoding_flag: uint8 (0x01) | ++----------------------------------------------+ +| first_timestamp: int64 | ++----------------------------------------------+ +| second_timestamp: int64 | ++----------------------------------------------+ +| Bit-packed delta-of-deltas: | +| For timestamps 3..N | ++----------------------------------------------+ +``` + +#### Gorilla delta-of-delta algorithm + +```python +delta_i = t[i] - t[i - 1] +dod_i = delta_i - delta_prev +``` + +Encoding buckets (bits are written LSB-first): + +| Condition | Prefix | Value bits | Total bits | +|----------------------|--------|-------------|------------| +| DoD == 0 | `0` | 0 | 1 | +| DoD in [-64, 63] | `10` | 7 (signed) | 9 | +| DoD in [-256, 255] | `110` | 9 (signed) | 12 | +| DoD in [-2048, 2047] | `1110` | 12 (signed) | 16 | +| Otherwise | `1111` | 32 (signed) | 36 | + +The bit stream is padded to a byte boundary at the end. If any DoD value +exceeds the 32-bit signed integer range, the encoder falls back to +uncompressed mode. + +### UUID + +16 bytes per value: 8 bytes for the low 64 bits, then 8 bytes for the high +64 bits, both little-endian. + +### LONG256 + +32 bytes per value: four int64 values, least significant first, all +little-endian. + +### GeoHash + +```text ++------------------------------------------------------+ +| [Null flag + bitmap (see Null handling)] | ++------------------------------------------------------+ +| precision_bits: varint (1-60) | ++------------------------------------------------------+ +| Packed geohash values: | +| bytes_per_value = ceil(precision_bits / 8) | +| total = bytes_per_value x N | +| N = row_count if null_flag == 0 | +| N = row_count - null_count if null_flag != 0 | ++------------------------------------------------------+ +``` + +The reference implementation uses sentinel mode for GEOHASH: null rows are +encoded as all-ones truncated to `bytes_per_value`. + +### Array types (DOUBLE_ARRAY, LONG_ARRAY) + +N-dimensional arrays, row-major order: + +```text ++------------------------------------------------------+ +| For each non-null row: | +| n_dims: uint8 Number of dimensions | +| dim_lengths: n_dims x int32 Length per dimension | +| values: product(dims) x element | +| (float64 for DOUBLE_ARRAY, | +| int64 for LONG_ARRAY) | ++------------------------------------------------------+ +``` + +### Decimal types (DECIMAL64, DECIMAL128, DECIMAL256) + +Decimal values are stored as two's complement integers. A 1-byte scale prefix +is shared by all values in the column. + +```text ++----------------------------------------------+ +| [Null flag + bitmap (see Null handling)] | ++----------------------------------------------+ +| scale: uint8 | ++----------------------------------------------+ +| Unscaled values: | +| DECIMAL64: 8 bytes x value_count | +| DECIMAL128: 16 bytes x value_count | +| DECIMAL256: 32 bytes x value_count | ++----------------------------------------------+ +``` + +| Type | Value size | Precision | +|-------------|------------|------------| +| DECIMAL64 | 8 bytes | 18 digits | +| DECIMAL128 | 16 bytes | 38 digits | +| DECIMAL256 | 32 bytes | 77 digits | + +## Server responses + +Every response starts with a 1-byte status code. OK and error responses include +an 8-byte sequence number that correlates the response with the original +request. + +### OK response + +```text ++------------------------------------------------------+ +| status: uint8 (0x00) | +| sequence: int64 Request sequence number | +| tableCount: uint16 Number of table entries | +| Repeated tableCount times: | +| nameLen: uint16 Table name length | +| name: bytes UTF-8 table name | +| seqTxn: int64 Sequencer txn for table | ++------------------------------------------------------+ +``` + +The per-table entries report the +[sequencer transaction](/docs/query/functions/meta/#wal_tables) assigned to each +table that committed data in the acknowledged batch. `tableCount` is 0 when no +[WAL](/docs/concepts/write-ahead-log/) (Write-Ahead Log) tables committed +(e.g., non-WAL tables or empty batches). + +### Error response + +```text ++-----------------------------------------------------+ +| status: uint8 Status code | +| sequence: int64 Request sequence number | +| msg_len: uint16 Error message length | +| msg_bytes: bytes UTF-8 error message | ++-----------------------------------------------------+ +``` + +### Status codes + +| Code | Hex | Name | Description | +|------|--------|-----------------|--------------------------------------------------| +| 0 | `0x00` | OK | Batch accepted (written to WAL) | +| 2 | `0x02` | DURABLE_ACK | Batch WAL uploaded to object store (Enterprise) | +| 3 | `0x03` | SCHEMA_MISMATCH | Column type incompatible with existing table | +| 5 | `0x05` | PARSE_ERROR | Malformed message | +| 6 | `0x06` | INTERNAL_ERROR | Server-side error | +| 8 | `0x08` | SECURITY_ERROR | Authorization failure | +| 9 | `0x09` | WRITE_ERROR | Write failure (e.g., table not accepting writes) | + +### Durable acknowledgement + +:::note Enterprise + +Durable acknowledgement (status code 0x02) is available in QuestDB Enterprise +with primary replication configured. Open source QuestDB returns OK (0x00) or +error responses only. + +::: + +A standard OK confirms the batch was committed to the server's local WAL. To +receive a second acknowledgement after the WAL has been durably uploaded to the +configured object store, include `X-QWP-Request-Durable-Ack: true` +(case-insensitive) in the WebSocket upgrade request. + +If the server accepts the opt-in, it echoes `X-QWP-Durable-Ack: enabled` in +the 101 response. Clients that opt in **must** verify this header is present +and fail the connect attempt if it is absent. + +**Durable-ack response format:** + +```text ++------------------------------------------------------+ +| status: uint8 (0x02) | +| tableCount: uint16 Number of table entries | +| Repeated tableCount times: | +| nameLen: uint16 Table name length | +| name: bytes UTF-8 table name | +| seqTxn: int64 Durably-uploaded seqTxn | ++------------------------------------------------------+ +``` + +The durable-ack has no sequence field. It carries cumulative per-table +watermarks that advance as uploads complete. Only tables whose durable +watermark advanced since the last durable-ack are included. + +Servers without replication silently ignore the request header and never emit +durable-ack frames. There is no durable-failure status; persistent upload +failures surface only as absence of a durable-ack frame. + +## Protocol limits + +| Limit | Default value | +|-------------------------------|---------------| +| Max batch size | 16 MB | +| Max tables per connection | 10,000 | +| Max rows per table block | 1,000,000 | +| Max columns per table | 2,048 | +| Max table name length | 127 bytes | +| Max column name length | 127 bytes | +| Max in-flight batches | 128 | +| Max symbol dictionary entries | 1,000,000 | + +The header's `table_count` field is a uint16, so the protocol ceiling for +tables per message is 65,535 regardless of the configured limit. Individual +string values have no dedicated length limit; they are bounded only by the max +batch size. + +The symbol dictionary limit applies per column in per-table dictionary mode and +per connection in global delta dictionary mode. Exceeding it causes the server +to reject the message with `PARSE_ERROR`. + +## Examples + +### Single table with three columns + +Table `sensors`, 2 rows, 3 columns: `id` (LONG), `value` (DOUBLE), `ts` +(TIMESTAMP). No nulls, no Gorilla compression, no delta symbol dictionary. + +```text +# Header (12 bytes) +51 57 50 31 # Magic: "QWP1" +01 # Version: 1 +00 # Flags: none +01 00 # Table count: 1 +XX XX XX XX # Payload length + +# Table Block +07 # Table name length: 7 +73 65 6E 73 6F 72 73 # "sensors" UTF-8 +02 # Row count: 2 +03 # Column count: 3 + +# Schema (full mode) +00 # Schema mode: full +00 # Schema ID: 0 + +# Column 0: id (LONG) +02 # Name length: 2 +69 64 # "id" UTF-8 +05 # Type: LONG + +# Column 1: value (DOUBLE) +05 # Name length: 5 +76 61 6C 75 65 # "value" UTF-8 +07 # Type: DOUBLE + +# Column 2: ts (TIMESTAMP, designated) +00 # Name length: 0 (designated timestamp) +0A # Type: TIMESTAMP + +# Column 0 data (LONG, 2 values) +00 # null_flag: 0x00 (no bitmap) +01 00 00 00 00 00 00 00 # id = 1 +02 00 00 00 00 00 00 00 # id = 2 + +# Column 1 data (DOUBLE, 2 values) +00 # null_flag: 0x00 (no bitmap) +CD CC CC CC CC CC F4 3F # value = 1.3 +9A 99 99 99 99 99 01 40 # value = 2.2 + +# Column 2 data (TIMESTAMP, uncompressed, 2 values) +00 # null_flag: 0x00 (no bitmap) +00 E4 0B 54 02 00 00 00 # ts = 10000000000 microseconds +80 1A 06 00 00 00 00 00 # ts = 400000 microseconds +``` + +### Nullable VARCHAR column + +4 rows where row 1 is null: + +```text +# Null flag + bitmap +01 # null_flag: nonzero = bitmap follows +02 # 0b00000010 (bit 1 set = row 1 is null) + +# Offset array (3 non-null values = 4 offsets) +00 00 00 00 # offset[0] = 0 (start of "foo") +03 00 00 00 # offset[1] = 3 (end of "foo") +06 00 00 00 # offset[2] = 6 (end of "bar") +09 00 00 00 # offset[3] = 9 (end of "baz") - +# String data (concatenated UTF-8) +66 6F 6F # "foo" (row 0) +62 61 72 # "bar" (row 2) +62 61 7A # "baz" (row 3) +``` -## Versioning {#versioning} +### Symbol column with per-table dictionary - +3 rows with values: "us", "eu", "us": -## Connection lifecycle {#lifecycle} +```text +# Null flag +00 # null_flag: 0x00 (no nulls) - +# Dictionary +02 # Dictionary size: 2 entries -## Frame structure {#framing} +02 # Entry 0 length: 2 +75 73 # "us" - +02 # Entry 1 length: 2 +65 75 # "eu" -## Schema messages {#schema} +# Value indices +00 # Row 0: index 0 ("us") +01 # Row 1: index 1 ("eu") +00 # Row 2: index 0 ("us") +``` - +### Gorilla timestamps with delta symbol dictionary -## Type codes and column encoding {#types} +Table `sensors`, 2 rows, 3 columns: `host` (SYMBOL), `temp` (DOUBLE), +designated TIMESTAMP. Both `FLAG_GORILLA` and `FLAG_DELTA_SYMBOL_DICT` are set. - +```text +# Header (12 bytes) +51 57 50 31 # Magic: "QWP1" +01 # Version: 1 +0C # Flags: 0x04 (Gorilla) | 0x08 (Delta Symbol Dict) +01 00 # Table count: 1 +XX XX XX XX # Payload length -## Null encoding {#nulls} +# Delta Symbol Dictionary +00 # delta_start = 0 +02 # delta_count = 2 +07 73 65 72 76 65 72 31 # "server1" (length = 7) +07 73 65 72 76 65 72 32 # "server2" (length = 7) - +# Table Block +07 73 65 6E 73 6F 72 73 # Table name "sensors" (length = 7) +02 # row_count = 2 +03 # column_count = 3 -## Error codes {#errors} +# Schema (full mode) +00 # schema_mode = FULL +00 # schema_id = 0 +04 68 6F 73 74 09 # "host" : SYMBOL +04 74 65 6D 70 07 # "temp" : DOUBLE +00 0A # "" : TIMESTAMP (designated) - +# Column 0 (SYMBOL, global delta IDs) +00 # null_flag: no nulls +00 # Row 0: global ID 0 +01 # Row 1: global ID 1 -## Close codes {#close-codes} +# Column 1 (DOUBLE, 2 values) +00 # null_flag: no nulls +66 66 66 66 66 E6 56 40 # 91.6 +9A 99 99 99 99 19 57 40 # 92.4 - +# Column 2 (TIMESTAMP, Gorilla) +00 # null_flag: no nulls +01 # encoding = Gorilla +[8 bytes: first timestamp] +[8 bytes: second timestamp] +# (only 2 values, so no delta-of-delta bit stream follows) +``` -## Store-and-forward interaction {#sf} +## Reference implementation - +The reference client implementation is +[`java-questdb-client`](https://github.com/questdb/java-questdb-client) +at commit +[`67bb5e4`](https://github.com/questdb/java-questdb-client/commit/67bb5e49feea7e63b813ea08189c23ea11486131). -Client-side store-and-forward behaviour will be documented under the -Connect section (coming with Bundle C). +The server-side protocol parser lives in the QuestDB server repository under +`core/src/main/java/io/questdb/cutlass/qwp/protocol/`. -## Reference implementation {#reference} +## Version history - +| Version | Description | +|------------|---------------------------------| +| 1 (`0x01`) | Initial binary protocol release | From 2a78c240598f8b3b9321194447c4648cdbda7ede Mon Sep 17 00:00:00 2001 From: javier Date: Thu, 14 May 2026 00:58:37 +0200 Subject: [PATCH 04/44] added client operation section --- .../protocols/qwp-ingress-websocket.md | 87 ++++++++++++++++--- 1 file changed, 76 insertions(+), 11 deletions(-) diff --git a/documentation/protocols/qwp-ingress-websocket.md b/documentation/protocols/qwp-ingress-websocket.md index 96c916dd0..030c79c02 100644 --- a/documentation/protocols/qwp-ingress-websocket.md +++ b/documentation/protocols/qwp-ingress-websocket.md @@ -5,6 +5,8 @@ description: ingest protocol. --- +import QwpMessageHeader from "../partials/_qwp.message-header.partial.mdx" + :::info Audience This is a **wire-protocol specification** for client implementers building a @@ -195,17 +197,7 @@ def zigzag_decode(n): ### Message header (12 bytes, fixed) -```text -Offset Size Type Field Description ------- ---- ------ ------------- -------------------------------- -0 4 int32 magic "QWP1" (0x31505751) -4 1 uint8 version Protocol version (0x01) -5 1 uint8 flags Encoding flags -6 2 uint16 table_count Number of table blocks -8 4 uint32 payload_length Payload size in bytes -``` - -**Total message size** = 12 + payload_length. + ### Flags byte @@ -836,6 +828,79 @@ The symbol dictionary limit applies per column in per-table dictionary mode and per connection in global delta dictionary mode. Exceeding it causes the server to reject the message with `PARSE_ERROR`. +## Client operation + +This section describes the high-level batching and I/O behavior a client +implements. The full client-side substrate (on-disk store-and-forward, frame +sequence numbers, ACK-driven trim, reconnect/replay semantics) is specified in +the [connect string reference](/docs/client-configuration/connect-string). + +### Double-buffered async I/O + +The client uses double-buffered microbatches: + +1. The user thread writes rows to the **active** buffer. +2. When a buffer reaches its threshold (row count, byte size, or age), the + client seals it and enqueues it for sending. +3. A dedicated I/O thread sends batches over the WebSocket. +4. The client swaps to the other buffer so writing can continue without + blocking. + +### Auto-flush triggers + +| Trigger | Default | +|----------------------|------------| +| Row count | 1,000 rows | +| Byte size | disabled | +| Time since first row | 100 ms | + +### Failover and high availability + +Ingress senders use a reconnect loop regardless of whether store-and-forward +is configured. The two storage modes share identical failover semantics; they +differ only in where unacknowledged data lives: + +- **`sf_dir` set** (store-and-forward): segments are memory-mapped files under + `sf_dir`. Unacknowledged data survives sender restarts and is replayed by + the next sender bound to the same slot. +- **`sf_dir` unset** (memory mode): segments are allocated in process memory. + Unacknowledged data is lost if the sender process dies. The reconnect loop + still spans transient server outages such as rolling upgrades, but the RAM + buffer caps how much data can accumulate during the outage. + +Connect-string keys that control ingress failover are documented in the +[reconnect and failover](/docs/client-configuration/connect-string#reconnect-keys) +section of the connect string reference: + +| Key | Default | Description | +|----------------------------------|-----------|-------------------------------------------| +| `reconnect_max_duration_millis` | `300000` | Total outage budget before giving up. | +| `reconnect_initial_backoff_millis` | `100` | First post-failure sleep. | +| `reconnect_max_backoff_millis` | `5000` | Cap on per-attempt sleep. | +| `initial_connect_retry` | `off` | Retry on first connect (`on`, `sync`, `async`). | + +Key behaviors: + +- **Ingress is zone-blind.** It pins QWP v1 and never reads `SERVER_INFO`, so + every host's zone tier is equivalent and selection is based on health state + only. The `zone=` connect-string key is accepted but silently ignored, so a + connect string shared with egress clients works unchanged on ingress. +- **Authentication errors are terminal** at any host (`401`/`403`). The + reconnect loop does not continue past them. +- **`421 + X-QuestDB-Role`** is a role reject: transient if the role is + `PRIMARY_CATCHUP`, topology-level otherwise. +- **All other upgrade errors are transient** and feed into the reconnect loop, + including `404`, `426`, `503`, generic 4xx/5xx, TCP/TLS failures, + mid-stream send/recv errors, and an upgrade response that advertises a QWP + version outside the client's supported range (per-endpoint, so a host on a + rolling upgrade does not lock the client out of compatible peers). + +:::note Enterprise + +Multi-host failover with automatic reconnect requires QuestDB Enterprise. + +::: + ## Examples ### Single table with three columns From 9967b1903d88254b33c3546b6dfa2cc4759b29a9 Mon Sep 17 00:00:00 2001 From: javier Date: Thu, 14 May 2026 00:59:57 +0200 Subject: [PATCH 05/44] added qwp egress reference. Extracting common parts to partial --- .../partials/_qwp.message-header.partial.mdx | 11 + .../protocols/qwp-egress-websocket.md | 855 +++++++++++++++++- 2 files changed, 826 insertions(+), 40 deletions(-) create mode 100644 documentation/partials/_qwp.message-header.partial.mdx diff --git a/documentation/partials/_qwp.message-header.partial.mdx b/documentation/partials/_qwp.message-header.partial.mdx new file mode 100644 index 000000000..7f8187bbd --- /dev/null +++ b/documentation/partials/_qwp.message-header.partial.mdx @@ -0,0 +1,11 @@ +```text +Offset Size Type Field Description +------ ---- ------ ------------- -------------------------------- +0 4 int32 magic "QWP1" (0x31505751) +4 1 uint8 version Protocol version (0x01) +5 1 uint8 flags Encoding flags +6 2 uint16 table_count Number of table blocks +8 4 uint32 payload_length Payload size in bytes +``` + +**Total message size** = 12 + payload_length. diff --git a/documentation/protocols/qwp-egress-websocket.md b/documentation/protocols/qwp-egress-websocket.md index 1089be5ad..249ee81e3 100644 --- a/documentation/protocols/qwp-egress-websocket.md +++ b/documentation/protocols/qwp-egress-websocket.md @@ -1,75 +1,850 @@ --- -title: QWP Egress (WebSocket) +title: QWP egress (WebSocket) description: Wire-protocol specification for QuestDB's WebSocket-based streaming query-result protocol. --- -:::note Page in draft - -This is the day-one skeleton. Content will be filled in from -`questdb-enterprise/questdb/docs/qwp/wire-egress.md`. This page documents -the **Phase 1** surface — Phase 2 features (row iterator, unbounded CANCEL, -lazy decode, multi-query, prepared statements) are tracked separately and -not yet public. - -::: +import QwpMessageHeader from "../partials/_qwp.message-header.partial.mdx" :::info Audience -This is a **wire-protocol specification** intended for client implementers -building a new QuestDB query client. End users see the +This is a **wire-protocol specification** for client implementers building a +new QuestDB query client from scratch. End users should see the [language client guides](/docs/query/overview) and the [connect string reference](/docs/client-configuration/connect-string). ::: -## Overview {#overview} +QWP egress streams SQL query results to clients over +[WebSocket](https://datatracker.ietf.org/doc/html/rfc6455), reusing the same +columnar binary encoding as +[QWP ingress](/docs/protocols/qwp-ingress-websocket/). The column types, null +handling, and per-column data encodings are identical. Egress adds a message +kind byte at the start of each payload, eight new message kinds for the +request/response lifecycle, and byte-credit flow control. + +Related specifications: +[QWP ingress (WebSocket)](/docs/protocols/qwp-ingress-websocket/) for data +ingestion, and +[QWP ingress (UDP)](/docs/protocols/qwp-ingress-udp/) for fire-and-forget +datagram ingestion. + +## Overview + +Key properties: + +- **Columnar result batches.** Each batch is a single QWP table block (schema + section followed by per-column data with null bitmaps). The decoder is the + same code path as ingress. +- **Server-driven schemas.** The server assigns connection-scoped schema IDs. + Full mode (0x00) on the first batch of a query; reference mode (0x01) on + subsequent batches with the same column set. +- **Per-connection symbol dictionary.** The server accumulates symbol entries + across all queries on the connection. Repeated queries reuse prior IDs + without retransmitting the strings. +- **Byte-credit flow control.** The client grants the server permission to + send up to N bytes of result data. The server pauses once the credit window + is exhausted. A row floor guarantees forward progress. +- **One result set per request.** One `QUERY_REQUEST` produces zero or more + `RESULT_BATCH` frames followed by exactly one terminator (`RESULT_END`, + `EXEC_DONE`, or `QUERY_ERROR`). + +## Transport and versioning + +### Endpoint + +Egress uses a dedicated endpoint, separate from ingress: + +```text +GET /read/v1 +``` + +This separation lets operators route, scale, and authorize ingest and query +workloads independently. Mixed-mode clients open one connection per direction. + +### Version negotiation + +Version and compression are negotiated at the HTTP upgrade: + +**Client request headers:** + +| Header | Required | Description | +|-------------------------|----------|-----------------------------------------------------------------------------| +| `X-QWP-Max-Version` | No | Maximum QWP version the client supports. Defaults to 1 if absent. | +| `X-QWP-Client-Id` | No | Free-form client identifier (e.g., `java-egress/1.0.0`). | +| `X-QWP-Accept-Encoding` | No | Comma-separated list of acceptable result batch body encodings (see below). | +| `X-QWP-Max-Batch-Rows` | No | Client-preferred per-batch row cap. `0` or absent = server default. | + +**Server response headers:** + +| Header | Description | +|--------------------------|--------------------------------------------------------------------------| +| `X-QWP-Version` | Negotiated version = `min(clientMax, serverMax)`. | +| `X-QWP-Content-Encoding` | Server's selected encoding from the client's accept list. Absent = raw. | + +The connection-level contract from the ingress spec applies: every message's +header version byte must equal the negotiated version. + +### Authentication + +Authentication is handled at the HTTP level during the WebSocket upgrade, +identical to ingress. See the +[ingress authentication section](/docs/protocols/qwp-ingress-websocket/#authentication) +for supported methods. + +### Batch body compression + +`X-QWP-Accept-Encoding` is a comma-separated list of tokens. First match wins. + +| Token | Description | +|----------|---------------------------------------------------------------------------------| +| `raw` | No compression (also accepted as `identity`). | +| `zstd` | Whole-batch zstd compression. Optional `level=N` hint; server clamps to [1,9]. | + +When `zstd` is negotiated, individual `RESULT_BATCH` frames set `FLAG_ZSTD` +on a per-batch basis. A batch whose compressed form is larger than raw ships +uncompressed. The region before the payload (msg_kind + request_id + +batch_seq) is never compressed so the client can dispatch frames without +decompressing first. + +Absent `X-QWP-Accept-Encoding`, the server defaults to `raw`. + +### Current version + +Version 1 is the initial egress release. Version 2 adds an unsolicited +`SERVER_INFO` frame (see [SERVER_INFO](#server_info-0x18)) delivered as the first +WebSocket frame after the upgrade. A v1 client never sees it. + +## Message structure + +The egress header is byte-identical to the +[ingress header](/docs/protocols/qwp-ingress-websocket/#message-structure) +(12 bytes, little-endian): + + + +The **first byte of the payload** is the message kind. The remaining payload +depends on the kind. + +```text ++------------------------------------------+ +| Header (12 bytes) | ++------------------------------------------+ +| Payload | +| msg_kind: uint8 | +| (kind-specific body) | ++------------------------------------------+ +``` + +Placing `msg_kind` in the payload (rather than the header) keeps the header +codec shared with ingress. Endpoint disambiguation is sufficient because +connections are direction-pure. + +### Flags byte + +For `RESULT_BATCH` frames, the flags byte uses the ingress bit definitions +plus one egress-specific bit: + +| Bit | Name | Description | +|--------|--------------------------|-----------------------------------------------------------------------| +| `0x04` | `FLAG_GORILLA` | Gorilla delta-of-delta encoding on timestamp columns. | +| `0x08` | `FLAG_DELTA_SYMBOL_DICT` | Connection-scoped delta symbol dictionary section present. | +| `0x10` | `FLAG_ZSTD` | Payload after msg_kind/request_id/batch_seq is zstd-compressed. | + +`FLAG_GORILLA` and `FLAG_DELTA_SYMBOL_DICT` are always set on `RESULT_BATCH` +frames in the current implementation. When `FLAG_GORILLA` is set, every +TIMESTAMP, TIMESTAMP_NANOS, and DATE column carries a 1-byte encoding flag +before its value region: `0x00` = raw int64 values, `0x01` = Gorilla +bitstream. The server picks Gorilla when the column has at least three +non-null values and the delta-of-delta bitstream is smaller than +`nonNullCount * 8` bytes; unordered or jumpy columns fall back to raw. + +## Message kinds + +| Code | Name | Direction | Description | +|--------|---------------|-----------|-----------------------------------------| +| `0x10` | QUERY_REQUEST | C -> S | SQL query plus bind parameters | +| `0x11` | RESULT_BATCH | S -> C | One table block of result rows | +| `0x12` | RESULT_END | S -> C | Cursor exhausted (success) | +| `0x13` | QUERY_ERROR | S -> C | Mid-stream or parse-time error | +| `0x14` | CANCEL | C -> S | Stop a running query | +| `0x15` | CREDIT | C -> S | Extend the byte-credit window | +| `0x16` | EXEC_DONE | S -> C | Non-SELECT statement acknowledgement | +| `0x17` | CACHE_RESET | S -> C | Clear connection-scoped caches | +| `0x18` | SERVER_INFO | S -> C | Server role and identity (v2 only) | + +Codes `0x00` and `0x01` are the ingress DATA_BATCH and RESPONSE kinds +(not used on the egress endpoint). Codes `0x19` through `0x1F` are reserved +for future egress kinds. `0x20+` is reserved for protocol extensions. + +## QUERY_REQUEST (0x10) + +Client to server. Initiates a new query cursor. + +```text ++----------------------------------------------------------+ +| msg_kind: uint8 0x10 | +| request_id: int64 Client-assigned, unique | +| within the connection | +| sql_length: varint UTF-8 byte length | +| sql_bytes: bytes SQL text | +| initial_credit: varint Bytes; 0 = unbounded | +| bind_count: varint Number of bind parameters | +| For each bind parameter (in declaration order): | +| type_code: uint8 Column type code | +| bind_block: column_data Ingress column encoding | +| with row_count = 1 | ++----------------------------------------------------------+ +``` + +### request_id + +64-bit client-assigned identifier. It is echoed back by every server-to-client +frame related to the query (`RESULT_BATCH`, `RESULT_END`, `QUERY_ERROR`). The +client may reuse a `request_id` only after observing the terminator for the +previous use. + +### Bind parameters + +A bind parameter is encoded exactly as a one-row column under the +[ingress column data encoding](/docs/protocols/qwp-ingress-websocket/#column-data-encoding). +Each block begins with a `type_code` (uint8), followed by the standard +`null_flag` byte and either zero or one value. + +A NULL bind parameter is: `type_code` + `null_flag = 0x01` + bitmap byte +`0x01`, with no value bytes following. + +DECIMAL binds carry the 1-byte scale prefix. ARRAY binds carry the per-row +dimension header. Symbol bind parameters are encoded as VARCHAR (no dictionary +for a single value). + +:::note Server leniency + +The current server decoder accepts a SYMBOL wire type code for a bind +parameter and treats it identically to VARCHAR. Compliant clients should still +send VARCHAR. A future revision may reject SYMBOL bind type codes. + +::: + +### Concurrency + +:::note Phase 1 limitation + +The current implementation supports a single in-flight query per connection. +The server rejects a second `QUERY_REQUEST` before the active query terminates. +The wire protocol allows multiple in-flight queries (demultiplexed by +`request_id`); multi-query support is planned for a future release. + +::: + +## RESULT_BATCH (0x11) + +Server to client. Carries one table block of result rows. + +```text ++----------------------------------------------------------+ +| msg_kind: uint8 0x11 | +| request_id: int64 From the originating | +| QUERY_REQUEST | +| batch_seq: varint Monotonic per request, | +| starting at 0 | +| (rest of payload: optional delta symbol dictionary, | +| then exactly one table block) | ++----------------------------------------------------------+ +``` + +The header's `table_count` is `1`. The table block format is identical to +ingress: schema section followed by per-column data. The table name is empty +(`name_length = 0`); result sets have no table name. + +**Schema handling:** + +- First batch for a query: schema mode 0x00 (full) with a server-assigned + schema_id. +- Subsequent batches with the same columns: schema mode 0x01 (reference). + +If the result set is empty, the server still sends one `RESULT_BATCH` with +`row_count = 0` so the client receives the schema, followed by `RESULT_END`. + +## RESULT_END (0x12) + +Server to client. Signals successful end of stream. + +```text ++----------------------------------------------------------+ +| msg_kind: uint8 0x12 | +| request_id: int64 | +| final_seq: varint Sequence of last RESULT_BATCH | +| (or 0 if none) | +| total_rows: varint Total rows produced; 0 if not | +| tracked by the server | ++----------------------------------------------------------+ +``` + +The header's `table_count` is `0`. After `RESULT_END`, the server has no +further state for this `request_id` and the client may reuse it. + +## QUERY_ERROR (0x13) + +Server to client. Signals failure at any point in the lifecycle: before any +`RESULT_BATCH` (parse or security failure) or mid-stream (storage failure, +cancellation, server shutdown). + +```text ++----------------------------------------------------------+ +| msg_kind: uint8 0x13 | +| request_id: int64 | +| status: uint8 See Status codes below | +| msg_length: uint16 UTF-8 byte length | +| msg_bytes: bytes Human-readable error message | ++----------------------------------------------------------+ +``` + +The header's `table_count` is `0`. `QUERY_ERROR` is terminal: the client must +not expect any further frames for this `request_id`. + +## CANCEL (0x14) + +Client to server. Requests termination of a running query. + +```text ++---------------------------+ +| msg_kind: uint8 0x14 | +| request_id: int64 | ++---------------------------+ +``` + +The server acknowledges by emitting either `RESULT_END` (if the cursor +finished first) or `QUERY_ERROR` with status `CANCELLED`. The client must +continue to drain any in-flight `RESULT_BATCH` frames the server sent before +processing the cancel; the terminator is the synchronization point. + +If `request_id` does not refer to an active query, the server silently drops +the cancel. + +## CREDIT (0x15) + +Client to server. Extends the byte-credit window for a specific query. + +```text ++----------------------------------------------+ +| msg_kind: uint8 0x15 | +| request_id: int64 | +| additional_bytes: varint Bytes to add | ++----------------------------------------------+ +``` + +See [Flow control](#flow-control) for the credit model. + +## EXEC_DONE (0x16) + +Server to client. Terminates a non-SELECT `QUERY_REQUEST` (DDL, INSERT, +UPDATE, ALTER, DROP, TRUNCATE, CREATE TABLE, CREATE MATERIALIZED VIEW). No +`RESULT_BATCH` frames are sent for these statements. + +```text ++----------------------------------------------------------+ +| msg_kind: uint8 0x16 | +| request_id: int64 | +| op_type: uint8 Statement type discriminator | +| rows_affected: varint Row count for INSERT/UPDATE; | +| 0 for DDL | ++----------------------------------------------------------+ +``` + +The header's `table_count` is `0`. `EXEC_DONE` is terminal: the client must +not expect any further frames for this `request_id`. If the statement fails, +the server sends `QUERY_ERROR` instead. + +## CACHE_RESET (0x17) + +Server to client. Instructs the client to clear one or both connection-scoped +caches: the symbol delta dictionary and the schema registry. Emitted at a +query boundary (between the previous query's terminator and the next query's +first `RESULT_BATCH` or `EXEC_DONE`); never mid-stream. + +```text ++----------------------------------------------+ +| msg_kind: uint8 0x17 | +| reset_mask: uint8 Bit 0 = symbol dict | +| Bit 1 = schema cache | +| Bits 2-7 reserved (0) | ++----------------------------------------------+ +``` + +The header's `table_count` is `0`. No `request_id`: the frame targets +connection state, not a specific query. + +**Semantics by bit:** + +- **Bit 0 (RESET_MASK_DICT)**: clear the connection-scoped symbol dictionary. + After the reset, the dictionary is empty. The next `RESULT_BATCH` with + `FLAG_DELTA_SYMBOL_DICT` must start its delta section at `deltaStart = 0`. +- **Bit 1 (RESET_MASK_SCHEMAS)**: clear the connection-scoped schema + registry. All previously assigned schema IDs are discarded. The next + `RESULT_BATCH` must use full schema mode (0x00) with freshly allocated IDs. + +Both bits may be set in the same frame. Clients must ignore unknown reserved +bits. + +**Default soft caps:** + +| Cap | Default | Triggers | +|----------------------------------|-----------|--------------------| +| Symbol dict entries | 100,000 | `RESET_MASK_DICT` | +| Symbol dict UTF-8 heap bytes | 8 MiB | `RESET_MASK_DICT` | +| Distinct registered schemas | 4,096 | `RESET_MASK_SCHEMAS` | + +Actual cap values are implementation-defined. Clients must accept any cap +policy and must be prepared to receive `CACHE_RESET` after any query +terminator. + +**Why never mid-stream:** resetting the dictionary or schema registry while a +`RESULT_BATCH` is in flight would invalidate IDs already referenced in that +batch's payload. The server postpones the reset until a natural query +boundary. Under a saturating workload, the server may temporarily exceed its +soft caps for the duration of a single query; the caps are self-healing and +bounded by any one query's distinct symbol/schema footprint. + +**Wire-level example:** + +```text +client -> QUERY_REQUEST(request_id=42, ...) +server -> CACHE_RESET(reset_mask=0x01) # dict bit only +server -> RESULT_BATCH(request_id=42, batch_seq=0, deltaStart=0, ...) +server -> RESULT_BATCH(request_id=42, batch_seq=1, ...) +server -> RESULT_END(request_id=42, ...) +``` + +If the schema cache is also over cap, the server emits a single +`CACHE_RESET(reset_mask=0x03)` and the client clears both caches in one hop. + +## SERVER_INFO (0x18) + +Server to client. Unsolicited frame delivered as the first WebSocket frame +after the HTTP upgrade, only when the negotiated version is 2 or above. A v1 +client never sees it. + +```text ++----------------------------------------------------------+ +| msg_kind: uint8 0x18 | +| role: uint8 See role table | +| epoch: uint64 Monotonic role epoch | +| capabilities: uint32 Bitfield | +| server_wall_ns: int64 Server wall-clock (ns since | +| Unix epoch) | +| cluster_id_len: uint16 UTF-8 byte length | +| cluster_id: bytes Cluster identifier | +| node_id_len: uint16 UTF-8 byte length | +| node_id: bytes Node identifier | +| (if capabilities & 0x01): | +| zone_id_len: uint16 UTF-8 byte length | +| zone_id: bytes Geographic/logical zone | ++----------------------------------------------------------+ +``` + +**Role values:** + +| Value | Role | Description | +|--------|------------------|----------------------------------------------------------| +| `0x00` | STANDALONE | No replication configured. Behaves like a primary. | +| `0x01` | PRIMARY | Authoritative write node; reads see latest commits. | +| `0x02` | REPLICA | Read-only replica; reads may lag the primary. | +| `0x03` | PRIMARY_CATCHUP | Promotion in flight; behaves like a primary. | + +**Capabilities:** + +| Bit | Name | Description | +|--------------|----------|----------------------------------------------------------| +| `0x00000001` | CAP_ZONE | `zone_id` fields are appended after `node_id`. | + +Clients encountering unknown capability bits must ignore them. Trailing fields +gated by unset bits are absent from the frame. + +**epoch:** monotonic across role transitions on the same node (e.g., replica +promoted to primary). Clients tracking a specific primary can use it to refuse +a stale reconnect that lands on a node which no longer holds the primary role +at the current cluster epoch. The field is 0 on releases where fencing has not +been wired up yet; clients may treat it as a hint. + +**Delivery timing:** `SERVER_INFO` is included in the same TCP send buffer as +the 101 upgrade response, so on a healthy connection the frame is already in +the client's kernel recv buffer by the time the client parses the upgrade. If +the server negotiates v1, it omits the frame entirely and clients fall back to +treating the server as `STANDALONE`. + +### Client routing + +Egress clients that support v2 can accept multiple endpoints plus role and +zone preferences on the connect string: + +```text +ws::addr=db-a:9000,db-b:9000,db-c:9000;target=any;zone=eu-west-1a;failover=on; +``` + +| Key | Values | Default | Description | +|------------|---------------------------|---------|-----------------------------------------------| +| `target` | `any`, `primary`, `replica` | `any` | Role filter applied per endpoint after reading `SERVER_INFO`. | +| `zone` | free-form string | | Compared case-insensitively against `zone_id` from `SERVER_INFO`. | +| `failover` | `on`, `off` | `on` | Master switch for per-query reconnect loop. `off` surfaces transport errors directly. | + +When `target=primary`, zone preference is still recorded but every host's zone +tier is treated as equivalent (the primary must be followed across zones). + +The `421 + X-QuestDB-Role` (and optional `X-QuestDB-Zone`) upgrade-reject +convention is shared with ingress: the server returns HTTP 421 when the +connecting client's role filter does not match, allowing the client to try the +next endpoint without completing the WebSocket handshake. + +## Null sentinel conventions + +Egress inherits QuestDB's internal null sentinel conventions. When the server +writes a null value into the dense values array, it uses the type's sentinel +and also sets the corresponding null bitmap bit. Clients consuming egress +results should treat these sentinels as indistinguishable from explicit NULL: + +| Type | Null sentinel | +|----------------------------------------------|---------------------| +| INT, IPv4 | `Integer.MIN_VALUE` (INT); `0` (IPv4) | +| LONG, DATE, TIMESTAMP, TIMESTAMP_NANOS, DECIMAL64 | `Long.MIN_VALUE` | +| FLOAT | `NaN` | +| DOUBLE | `NaN` | +| GEOHASH (all widths) | All-ones (`-1`) | +| UUID | Both halves `Long.MIN_VALUE` | +| LONG256 | All four longs `Long.MIN_VALUE` | +| BOOLEAN, BYTE, SHORT, CHAR | No null sentinel; these types cannot carry NULL in QuestDB | + +### Array element nulls + +Array columns (`DOUBLE_ARRAY`, `LONG_ARRAY`) have no per-element null bitmap. +Element-level NULL uses the element type's row-level sentinel: + +- `DOUBLE_ARRAY` element: `NaN` (a non-null `NaN` is indistinguishable from NULL) +- `LONG_ARRAY` element: `Long.MIN_VALUE` (cannot be represented as non-null) + +The row-level null bitmap bit signals "the array itself is NULL", distinct +from "an array of zero or more elements where some may be element-NULL." + +## Schema and symbol dictionary scope + +### Schema registry + +The server maintains a per-connection schema registry. The first +`RESULT_BATCH` for a query registers a new schema in full mode (0x00); +subsequent batches with the same column set use reference mode (0x01). + +Connections that accumulate many distinct column shapes may cross the server's +schema soft cap. When that happens, the server emits `CACHE_RESET` with +`RESET_MASK_SCHEMAS` at a query boundary and both sides clear the registry. +Schema IDs after the reset may collide with previously used values. + +On disconnect, both sides reset the registry. + +### Symbol dictionary + +Egress uses a connection-scoped delta dictionary (the same +`FLAG_DELTA_SYMBOL_DICT` mechanic as ingress). The server maintains a global +mapping of symbol strings to sequential integer IDs starting at 0, shared +across every query on the connection. Each `RESULT_BATCH` carries a delta +section listing newly added symbols. + +Per-connection scope benefits repeated queries (e.g. BI dashboards refreshing +the same SELECTs). The server enforces soft caps on entry count and heap bytes. +When either cap is crossed, the server emits `CACHE_RESET` with +`RESET_MASK_DICT` and both sides clear the dictionary; the next delta section +starts at `deltaStart = 0`. + +On disconnect, both sides reset the dictionary. + +## Cursor lifecycle + +```text + QUERY_REQUEST + client ---------------------------------> server + | + (parse, plan, + open cursor) + | + client <---------- RESULT_BATCH(seq=0) ----- schema mode 0x00 + client <---------- RESULT_BATCH(seq=1) ----- schema mode 0x01 + client <---------- RESULT_BATCH(seq=N) ----- + | + client <----------- RESULT_END -------------- +``` + +**Error path:** + +```text + client <---------- RESULT_BATCH(seq=K) ----- + client <----------- QUERY_ERROR ------------- (terminal) +``` + +**Cancel path:** + +```text + client ----------- CANCEL ------------------> + client <--- (any in-flight RESULT_BATCH) ---- + client <----------- QUERY_ERROR ------------- status = CANCELLED + (or RESULT_END if it raced) +``` + +**Non-SELECT path:** + +```text + QUERY_REQUEST (DDL/INSERT/UPDATE) + client ---------------------------------> server + client <----------- EXEC_DONE --------------- +``` + +**Cache reset at query boundary:** + +```text + client <----------- RESULT_END -------------- (query N) + client <----------- CACHE_RESET ------------- (optional) + QUERY_REQUEST + client ---------------------------------> server (query N+1) + client <---------- RESULT_BATCH(seq=0) ----- deltaStart=0 after reset +``` + +A connection-level error (malformed header, authentication failure) closes the +WebSocket. The server's last frame before close should be a `QUERY_ERROR` with +`request_id = -1` if the failure is not attributable to a specific request. + +## Failover and high availability + +Egress clients can drive a per-query reconnect loop across multiple endpoints. +When a transport error occurs mid-stream, the client reconnects to the next +healthy endpoint, reads `SERVER_INFO` to verify the role filter, and replays +the query. `batch_seq` restarts at 0 on the new connection. + +The connect-string keys that control egress failover +(`failover_max_attempts`, `failover_backoff_initial_ms`, +`failover_backoff_max_ms`, `failover_max_duration_ms`) are documented in the +[reconnect and failover](/docs/client-configuration/connect-string#reconnect-keys) +section of the connect string reference. The shared failover primitives +(host-health model, backoff, role filter, error classification) are covered in +[multi-host failover](/docs/client-configuration/connect-string#failover-keys). + +Key behaviors: + +- Authentication errors are terminal at any host; the reconnect loop does not + continue past them. +- A `CANCEL` acknowledged with `QUERY_ERROR(CANCELLED)` routes through the + normal error path, not the transport-error path, so it never triggers + failover. +- An upgrade-time version mismatch is per-endpoint, not terminal. A host + whose upgrade response advertises a QWP version outside the client's + supported range is recorded as a transport error and the walk continues. + +:::note Enterprise + +Multi-host failover with automatic reconnect requires QuestDB Enterprise. + +::: + +## Flow control + +:::note Byte credits + +Egress uses byte-credit flow control to prevent the server from overwhelming +the client with result data. The client tells the server how many bytes it is +willing to receive, and the server pauses when the budget is exhausted. + +::: + +### Initial credit + +The client sets `initial_credit` in `QUERY_REQUEST`. A value of `0` means +unbounded: the server streams without waiting for credit. A nonzero value is +the byte budget the server may emit before pausing. + +### Granting more credit + +The client sends `CREDIT` frames to extend the window. The server adds +`additional_bytes` to the remaining budget. There is no upper bound on a +single grant. + +### Accounting + +The server decrements the budget by the total wire length of each +`RESULT_BATCH` (header + payload). When the budget would go non-positive, the +server pauses production for that `request_id`. + +### Row floor + +To prevent deadlock on rows larger than the remaining window, the server may +send one additional `RESULT_BATCH` of at least one row even if doing so drives +the budget negative. The next batch will not be sent until credit returns to a +positive value. + +This guarantees forward progress for any well-formed query regardless of +credit size. Clients should size buffers to absorb up to one extra batch. + +### Independence per request + +Each `request_id` has its own credit accounting. Granting credit on one +request does not unblock another. + +## Status codes + +`QUERY_ERROR` reuses the ingress status code namespace and adds two +egress-specific codes: + +| Code | Hex | Name | Description | +|------|--------|-----------------|---------------------------------------------------| +| 3 | `0x03` | SCHEMA_MISMATCH | Bind parameter type incompatible with placeholder | +| 5 | `0x05` | PARSE_ERROR | Malformed message or SQL syntax error | +| 6 | `0x06` | INTERNAL_ERROR | Server-side execution failure | +| 8 | `0x08` | SECURITY_ERROR | Authorization failure | +| 10 | `0x0A` | CANCELLED | Query terminated in response to CANCEL | +| 11 | `0x0B` | LIMIT_EXCEEDED | A protocol limit was hit (see Protocol limits) | + +OK (0x00) is not used in egress; success terminates with `RESULT_END` or +`EXEC_DONE`. + +## Protocol limits + +| Limit | Default value | Notes | +|----------------------------------|---------------|----------------------------------------------------| +| Max in-flight queries | 1 | Per connection. Wire protocol allows more; Phase 1 enforces 1. | +| Max SQL text length | 1 MiB | UTF-8 bytes. | +| Max bind parameters | 1,024 | Per QUERY_REQUEST. | +| Max RESULT_BATCH wire size | 16 MiB | Same as ingress batch ceiling. | +| Symbol dict soft cap (entries) | 100,000 | Per connection. Exceeding triggers CACHE_RESET. | +| Symbol dict soft cap (heap) | 8 MiB | Per connection, UTF-8 bytes. | +| Schema registry soft cap | 4,096 | Per connection. Exceeding triggers CACHE_RESET. | + +Soft caps are implementation-defined and may be tuned by the server operator. + +## Examples + +### Simple unbounded query + +Client sends `SELECT id, value FROM sensors LIMIT 2` with no bind parameters +and unbounded credit. + +```text +QUERY_REQUEST: + Header: + 51 57 50 31 # Magic: "QWP1" + 01 # Version: 1 + 00 # Flags + 00 00 # table_count = 0 + XX XX XX XX # payload_length + + Payload: + 10 # msg_kind = QUERY_REQUEST + 01 00 00 00 00 00 00 00 # request_id = 1 + 24 # sql_length = 36 + 53 45 4C 45 43 54 20 69 # "SELECT i" + 64 2C 20 76 61 6C 75 65 # "d, value" + 20 46 52 4F 4D 20 73 65 # " FROM se" + 6E 73 6F 72 73 20 4C 49 # "nsors LI" + 4D 49 54 20 32 # "MIT 2" + 00 # initial_credit = 0 (unbounded) + 00 # bind_count = 0 +``` + +Server responds with one result batch and end-of-stream: + +```text +RESULT_BATCH (seq=0): + Header: + 51 57 50 31 # Magic: "QWP1" + 01 # Version: 1 + 00 # Flags + 01 00 # table_count = 1 + XX XX XX XX # payload_length + + Payload: + 11 # msg_kind = RESULT_BATCH + 01 00 00 00 00 00 00 00 # request_id = 1 + 00 # batch_seq = 0 + + Table block: + 00 # name_length = 0 (anonymous) + 02 # row_count = 2 + 02 # column_count = 2 - + Schema (full mode): + 00 # schema_mode = FULL + 00 # schema_id = 0 + 02 69 64 05 # "id" : LONG + 05 76 61 6C 75 65 07 # "value" : DOUBLE -## Versioning {#versioning} + Column 0 (LONG): + 00 # null_flag = 0 + 01 00 00 00 00 00 00 00 # 1 + 02 00 00 00 00 00 00 00 # 2 - + Column 1 (DOUBLE): + 00 # null_flag = 0 + CD CC CC CC CC CC F4 3F # 1.3 + 9A 99 99 99 99 99 01 40 # 2.2 -## Connection lifecycle {#lifecycle} +RESULT_END: + Header: + 51 57 50 31 01 00 00 00 XX XX XX XX - + Payload: + 12 # msg_kind = RESULT_END + 01 00 00 00 00 00 00 00 # request_id = 1 + 00 # final_seq = 0 + 02 # total_rows = 2 +``` -## Query submission {#submission} +### Bind parameter - +A LONG bind parameter with value `42`: -## Result framing {#results} +```text +05 # type_code = LONG +00 # null_flag = 0 (no nulls) +2A 00 00 00 00 00 00 00 # value = 42 +``` - +A NULL LONG bind parameter: -## Schema messages {#schema} +```text +05 # type_code = LONG +01 # null_flag = nonzero (bitmap follows) +01 # bitmap byte: bit 0 set = NULL + # (no value bytes) +``` - +### Credit-controlled streaming -## Flow control {#flow-control} +Client opens a query with a 64 KiB initial credit: - +```text +QUERY_REQUEST: initial_credit = 65536, request_id = 7 +``` -## Durable ACK {#durable-ack} +Server emits `RESULT_BATCH` frames totaling 60 KiB, then pauses. Client +grants more credit: - +```text +CREDIT: + 15 # msg_kind = CREDIT + 07 00 00 00 00 00 00 00 # request_id = 7 + 80 80 04 # additional_bytes = 65536 +``` -## Error codes {#errors} +Server resumes streaming. - +## Reference implementation -## Close codes {#close-codes} +The reference client implementation is +[`java-questdb-client`](https://github.com/questdb/java-questdb-client) +at commit +[`67bb5e4`](https://github.com/questdb/java-questdb-client/commit/67bb5e49feea7e63b813ea08189c23ea11486131). - +The server-side egress handler lives in the QuestDB server repository. -## Reference implementation {#reference} +## Version history - +| Version | Description | +|------------|------------------------------------------------------------| +| 1 (`0x01`) | Initial egress release. | +| 2 (`0x02`) | Adds unsolicited SERVER_INFO frame after upgrade (v2 only).| From 44c7225b177fe23b6779f7aa07829313634503e5 Mon Sep 17 00:00:00 2001 From: javier Date: Thu, 14 May 2026 13:25:46 +0200 Subject: [PATCH 06/44] remove QWP ingress UDP placeholder and all references UDP documentation is deferred. Removes the stub page, sidebar entry, and links from ingress, egress, overview, and connect-string pages. --- .../client-configuration/connect-string.md | 2 +- documentation/protocols/overview.md | 1 - .../protocols/qwp-egress-websocket.md | 7 +-- documentation/protocols/qwp-ingress-udp.md | 55 ------------------- .../protocols/qwp-ingress-websocket.md | 11 ++-- documentation/sidebars.js | 5 -- 6 files changed, 7 insertions(+), 74 deletions(-) delete mode 100644 documentation/protocols/qwp-ingress-udp.md diff --git a/documentation/client-configuration/connect-string.md b/documentation/client-configuration/connect-string.md index 93715309d..d5cb49579 100644 --- a/documentation/client-configuration/connect-string.md +++ b/documentation/client-configuration/connect-string.md @@ -175,7 +175,7 @@ The schema prefix selects the QWP transport. | ------ | --------------- | ------------ | -------------------------------------------------------------------------------------------------------------------- | | `ws` | WebSocket | `9000` | QWP over plain WebSocket. Use for development or trusted networks. | | `wss` | WebSocket + TLS | `9000` | QWP over TLS-secured WebSocket. Recommended for production. | -| `udp` | UDP | `9007` | Fire-and-forget metrics ingest, single table per datagram. See [QWP Ingress (UDP)](/docs/protocols/qwp-ingress-udp/). | +| `udp` | UDP | `9007` | Fire-and-forget metrics ingest, single table per datagram. | The default port is applied when `addr` omits `:port`. Note that `wss` does **not** default to `443`: both `ws` and `wss` use `9000` unless overridden. diff --git a/documentation/protocols/overview.md b/documentation/protocols/overview.md index 5baa309f5..8a3edbe6c 100644 --- a/documentation/protocols/overview.md +++ b/documentation/protocols/overview.md @@ -30,7 +30,6 @@ a spec, the spec wins. | Protocol | Transport | Purpose | | --- | --- | --- | | [QWP Ingress (WebSocket)](/docs/protocols/qwp-ingress-websocket) | WebSocket | Columnar binary ingest with optional store-and-forward | -| [QWP Ingress (UDP)](/docs/protocols/qwp-ingress-udp) | UDP | Fire-and-forget metrics ingest, MTU-bounded | | [QWP Egress (WebSocket)](/docs/protocols/qwp-egress-websocket) | WebSocket | Streaming SQL query results | ## Versioning diff --git a/documentation/protocols/qwp-egress-websocket.md b/documentation/protocols/qwp-egress-websocket.md index 249ee81e3..0b30a82b0 100644 --- a/documentation/protocols/qwp-egress-websocket.md +++ b/documentation/protocols/qwp-egress-websocket.md @@ -24,11 +24,8 @@ handling, and per-column data encodings are identical. Egress adds a message kind byte at the start of each payload, eight new message kinds for the request/response lifecycle, and byte-credit flow control. -Related specifications: -[QWP ingress (WebSocket)](/docs/protocols/qwp-ingress-websocket/) for data -ingestion, and -[QWP ingress (UDP)](/docs/protocols/qwp-ingress-udp/) for fire-and-forget -datagram ingestion. +For data ingestion, see +[QWP ingress (WebSocket)](/docs/protocols/qwp-ingress-websocket/). ## Overview diff --git a/documentation/protocols/qwp-ingress-udp.md b/documentation/protocols/qwp-ingress-udp.md deleted file mode 100644 index 29044cd47..000000000 --- a/documentation/protocols/qwp-ingress-udp.md +++ /dev/null @@ -1,55 +0,0 @@ ---- -title: QWP Ingress (UDP) -description: - Wire-protocol specification for QuestDB's UDP-based fire-and-forget - ingest variant. ---- - -:::note Page in draft - -This is the day-one skeleton. Content will be filled in from -`questdb-enterprise/questdb/docs/qwp/wire-udp.md`. - -::: - -:::info Audience - -This is a **wire-protocol specification** intended for client implementers -building a UDP-based ingest agent (typically a metrics collector). End users -see the [language client guides](/docs/ingestion/overview) and the -[connect string reference](/docs/client-configuration/connect-string). - -::: - -## Overview {#overview} - - - -## Versioning {#versioning} - - - -## Datagram layout {#layout} - - - -## MTU sizing {#mtu} - - - -## Single-table constraint {#single-table} - - - -## Type codes and encoding {#types} - - - -## Loss semantics {#loss} - - - -## Reference implementation {#reference} - - diff --git a/documentation/protocols/qwp-ingress-websocket.md b/documentation/protocols/qwp-ingress-websocket.md index 030c79c02..6679edfb2 100644 --- a/documentation/protocols/qwp-ingress-websocket.md +++ b/documentation/protocols/qwp-ingress-websocket.md @@ -22,11 +22,8 @@ table blocks, where every column's values are stored contiguously. Batched messages, schema references, and Gorilla-compressed timestamps reduce wire overhead for sustained streaming workloads. -This page covers WebSocket ingress only. Related specifications: -[QWP ingress (UDP)](/docs/protocols/qwp-ingress-udp/) for fire-and-forget -datagram ingestion, and -[QWP egress (WebSocket)](/docs/protocols/qwp-egress-websocket/) for streaming -query results back to clients. +This page covers WebSocket ingress only. For streaming query results back to +clients, see [QWP egress (WebSocket)](/docs/protocols/qwp-egress-websocket/). ## Overview @@ -526,8 +523,8 @@ on the dictionary mode. #### Per-table dictionary mode -Used by UDP because datagrams cannot rely on a connection-scoped dictionary -persisting across messages. +Used by the UDP transport, where datagrams cannot rely on a connection-scoped +dictionary persisting across messages. WebSocket clients do not use this mode. ```text +----------------------------------------------+ diff --git a/documentation/sidebars.js b/documentation/sidebars.js index aefc6f5c6..5e9178fc3 100644 --- a/documentation/sidebars.js +++ b/documentation/sidebars.js @@ -238,11 +238,6 @@ module.exports = { type: "doc", label: "QWP Ingress (WebSocket)", }, - { - id: "protocols/qwp-ingress-udp", - type: "doc", - label: "QWP Ingress (UDP)", - }, { id: "protocols/qwp-egress-websocket", type: "doc", From 7d40968d574f219b040c4f865ea5f37dc5811e4a Mon Sep 17 00:00:00 2001 From: glasstiger Date: Thu, 14 May 2026 14:24:21 +0100 Subject: [PATCH 07/44] high availability docs --- .../client-failover/concepts.md | 13 ++----- .../client-failover/configuration.md | 7 ---- documentation/high-availability/overview.md | 36 +++++++++++++++---- documentation/high-availability/setup.md | 11 ++++++ .../store-and-forward/concepts.md | 17 +++------ .../store-and-forward/configuration.md | 11 ++---- .../store-and-forward/operating-and-tuning.md | 7 ---- .../store-and-forward/when-to-use.md | 13 ++----- documentation/high-availability/tuning.md | 3 +- documentation/sidebars.js | 20 ++++------- 10 files changed, 62 insertions(+), 76 deletions(-) diff --git a/documentation/high-availability/client-failover/concepts.md b/documentation/high-availability/client-failover/concepts.md index 8a64a104f..673060808 100644 --- a/documentation/high-availability/client-failover/concepts.md +++ b/documentation/high-availability/client-failover/concepts.md @@ -10,18 +10,11 @@ description: import { EnterpriseNote } from "@site/src/components/EnterpriseNote" - Client failover is most useful with QuestDB Enterprise primary-replica - replication. OSS users with a single instance gain limited benefit from - multi-host configuration. + Client failover is most useful with QuestDB Enterprise + [primary-replica replication](/docs/high-availability/overview/). OSS users + with a single instance gain limited benefit from multi-host configuration. -:::note Java-only today - -Client-side failover support is currently available in the Java client. -Additional language clients are on the roadmap. - -::: - When a QuestDB cluster fails over from one primary to another — whether through a planned promotion, a rolling upgrade, or an unplanned outage — clients with a single hard-coded address must be reconfigured and restarted. A failover-aware diff --git a/documentation/high-availability/client-failover/configuration.md b/documentation/high-availability/client-failover/configuration.md index 6da5a703f..c7e13ecbb 100644 --- a/documentation/high-availability/client-failover/configuration.md +++ b/documentation/high-availability/client-failover/configuration.md @@ -7,13 +7,6 @@ description: egress retry budgets. --- -:::note Java-only today - -Client-side failover support is currently available in the Java client. -Additional language clients are on the roadmap. - -::: - This page is the configuration reference for client failover. For the model behind these keys — host-health states, zone tiers, role filtering, and the two retry loops — read [Concepts](/docs/high-availability/client-failover/concepts/) diff --git a/documentation/high-availability/overview.md b/documentation/high-availability/overview.md index c36f38bde..4d782e6a7 100644 --- a/documentation/high-availability/overview.md +++ b/documentation/high-availability/overview.md @@ -1,19 +1,35 @@ --- -title: Replication overview +title: High availability overview sidebar_label: Overview description: - Learn how QuestDB Enterprise replication works, its benefits, and architecture. + How QuestDB delivers high availability — server-side primary-replica + replication, plus client-side failover and store-and-forward. --- import { EnterpriseNote } from "@site/src/components/EnterpriseNote" - Replication provides high availability and disaster recovery for your QuestDB cluster. + Primary-replica replication is a QuestDB Enterprise feature. Client failover + and store-and-forward are available to all native clients. -QuestDB Enterprise provides **primary-replica replication** for high availability -and disaster recovery. Your data is automatically synced to replica instances -via an object store, with no direct network connections required between nodes. +QuestDB approaches high availability in two layers, and a resilient deployment +usually needs both: + +- **Server-side replication** keeps a hot copy of your data on one or more + replica nodes, so the cluster survives the loss of a node. This is a QuestDB + Enterprise feature, and it is the main subject of this page. +- **Client-side resilience** keeps your applications connected across the + failover that replication makes possible. + [Client failover](/docs/high-availability/client-failover/concepts/) lets a + client walk a list of hosts when its connection breaks, and + [store-and-forward](/docs/high-availability/store-and-forward/concepts/) + buffers unacknowledged data locally so a producer never loses writes during + the gap. + +Replication moves the data; the client-side features make sure your +applications follow it. The rest of this page covers replication — see the +**Client Failover** and **Store-and-Forward** sections for the client side. ## Why use replication? @@ -113,4 +129,10 @@ by you. ## Next steps -Ready to set up replication? Continue to the [Setup Guide](/docs/high-availability/setup/). +- [Setup Guide](/docs/high-availability/setup/) — configure object storage, the + primary, and replica nodes. +- [Client failover](/docs/high-availability/client-failover/concepts/) — + configure your applications to follow a primary promotion automatically. +- [Store-and-forward](/docs/high-availability/store-and-forward/concepts/) — + buffer unacknowledged writes on the client so a producer survives an outage + without data loss. diff --git a/documentation/high-availability/setup.md b/documentation/high-availability/setup.md index 7ec89a12b..41d3d43ec 100644 --- a/documentation/high-availability/setup.md +++ b/documentation/high-availability/setup.md @@ -254,6 +254,13 @@ Data committed to the primary but not yet replicated will be lost. Use planned migration if the primary is still functional. ::: +:::tip Keep clients connected across the switch +Promoting a replica only helps if your applications can find the new primary. +Configure clients with a multi-host address list so they fail over +automatically — see +[Client failover](/docs/high-availability/client-failover/concepts/). +::: + ### Point-in-time recovery Restore the database to a specific historical timestamp. @@ -273,3 +280,7 @@ Restore the database to a specific historical timestamp. - [Tuning guide](/docs/high-availability/tuning/) - Optimize replication performance +- [Client failover](/docs/high-availability/client-failover/concepts/) - + Configure your applications with a multi-host address list so they follow a + primary promotion automatically. Replication moves the data; client failover + keeps your clients connected to it. diff --git a/documentation/high-availability/store-and-forward/concepts.md b/documentation/high-availability/store-and-forward/concepts.md index 7f070d33d..ff025b3cf 100644 --- a/documentation/high-availability/store-and-forward/concepts.md +++ b/documentation/high-availability/store-and-forward/concepts.md @@ -7,13 +7,6 @@ description: unacknowledged frames against a fresh connection. --- -:::note Java-only today - -Client-side store-and-forward support is currently available in the Java -client. Additional language clients are on the roadmap. - -::: - Store-and-forward (SF) is the client-side substrate that sits between your application code and the QWP wire transport. It absorbs publishes into a local ring of fixed-size segments, drains them over a WebSocket connection @@ -22,7 +15,7 @@ disconnect or restart. The goal is **producer-never-blocks-on-the-wire**. Your call to `flush()` returns as soon as data is published into the substrate. Acknowledgements -arrive asynchronously. A network outage, a server restart, even a JVM +arrive asynchronously. A network outage, a server restart, even a process crash leaves your producer code unaffected — the I/O thread quietly reconnects and replays what remains. @@ -35,8 +28,8 @@ SF runs in either of two modes selected by the connect string: | Trigger | `sf_dir` is **unset** | `sf_dir` is set | | Storage | malloc'd ring in process RAM | mmap'd files under `//` | | Default capacity | `128 MiB` | `10 GiB` | -| Survives JVM exit | No | Yes | -| Survives JVM crash | No | Yes — replay on next start | +| Survives process exit | No | Yes | +| Survives process crash | No | Yes — replay on next start | | Tolerates transient network blips | Yes | Yes | | Tolerates multi-minute server outages | Bounded by RAM cap | Bounded by disk cap | | Recovers another sender's stale slot | n/a | Opt-in via `drain_orphans=on` | @@ -240,7 +233,7 @@ on-disk and watermarked values, so already-durable-acked frames inside the lowest surviving segment are not re-replayed. The file is **optional** — a conformant client may choose not to maintain -it. The Java reference client does. +it. The reference client does. ## Orphan adoption @@ -254,7 +247,7 @@ opens a separate WebSocket connection, runs the same recovery + replay flow, and exits when the orphan is fully drained. This is the rescue path for a sender that died without draining cleanly -— a JVM crash, an OOM kill, a host reboot. The replacement process picks +— a process crash, an OOM kill, a host reboot. The replacement process picks the orphan's slot lock and clears its disk footprint. Without `drain_orphans=on` the dead sender's data persists on disk indefinitely until an operator intervenes. diff --git a/documentation/high-availability/store-and-forward/configuration.md b/documentation/high-availability/store-and-forward/configuration.md index db13ff71b..9edcbd383 100644 --- a/documentation/high-availability/store-and-forward/configuration.md +++ b/documentation/high-availability/store-and-forward/configuration.md @@ -6,13 +6,6 @@ description: substrate — storage, reconnect, durable-ack, and error-handling. --- -:::note Java-only today - -Client-side store-and-forward support is currently available in the Java -client. Additional language clients are on the roadmap. - -::: - This page is the configuration reference for the SF connect-string keys. For the model behind each knob, read [Concepts](/docs/high-availability/store-and-forward/concepts/); for @@ -78,7 +71,7 @@ Opt in to object-store-durable trim. See | Key | Type | Default | Description | |---|---|---|---| | `error_inbox_capacity` | int (≥16) | `256` | Bounded SPSC queue capacity for async error notifications. Overflow drops the oldest entry and increments `getDroppedErrorNotifications`. | -| `on_server_error`, `on_schema_error`, `on_parse_error`, `on_internal_error`, `on_security_error`, `on_write_error` | enum | per category | Override the default policy (`HALT` or `DROP_AND_CONTINUE`) for a category. Reserved in the spec but not yet recognised by the Java connect-string parser — use the fluent `LineSenderBuilder` API today. | +| `on_server_error`, `on_schema_error`, `on_parse_error`, `on_internal_error`, `on_security_error`, `on_write_error` | enum | per category | Override the default policy (`HALT` or `DROP_AND_CONTINUE`) for a category. Reserved in the spec but not yet recognised by the connect-string parser. | The per-category defaults are documented in [Concepts § Error frames](/docs/high-availability/store-and-forward/concepts/#error-frames). @@ -132,7 +125,7 @@ try (Sender sender = Sender.fromConfig("ws::addr=localhost:9000;")) { ``` No `sf_dir`, so memory mode. The default `128 MiB` cap absorbs short -network blips. A JVM crash loses the unacked tail. +network blips. A process crash loses the unacked tail. ### Single-node durable producer diff --git a/documentation/high-availability/store-and-forward/operating-and-tuning.md b/documentation/high-availability/store-and-forward/operating-and-tuning.md index 8a0d3e481..3bd064656 100644 --- a/documentation/high-availability/store-and-forward/operating-and-tuning.md +++ b/documentation/high-availability/store-and-forward/operating-and-tuning.md @@ -7,13 +7,6 @@ description: observability, and orphan adoption. --- -:::note Java-only today - -Client-side store-and-forward support is currently available in the Java -client. Additional language clients are on the roadmap. - -::: - This page is the operator-facing guide for SF in production: how to provision the slot directory, what to watch, and how to tune the limits. For the underlying model see diff --git a/documentation/high-availability/store-and-forward/when-to-use.md b/documentation/high-availability/store-and-forward/when-to-use.md index 931831e42..7fce660c1 100644 --- a/documentation/high-availability/store-and-forward/when-to-use.md +++ b/documentation/high-availability/store-and-forward/when-to-use.md @@ -7,13 +7,6 @@ description: orphan adoption. --- -:::note Java-only today - -Client-side store-and-forward support is currently available in the Java -client. Additional language clients are on the roadmap. - -::: - The QWP WebSocket transport always uses a store-and-forward (SF) substrate. What changes between deployments is **where** that substrate keeps unacked data and **what durability bar** it acknowledges against. This page is the @@ -53,7 +46,7 @@ Unacked frames are written to mmap'd files under - The producer process is long-running and outage budgets are measured in minutes (the default `reconnect_max_duration_millis` is 5 minutes for a reason). -- You need in-flight data to survive process restarts — JVM crash, OOM +- You need in-flight data to survive process restarts — process crash, OOM kill, host reboot, planned redeploy. - You ingest at rates where minutes of buffering exceeds RAM you can spare. @@ -71,7 +64,7 @@ string. |---|---|---| | Where is buffered data? | Process RAM | Disk (`//`) | | Default capacity | `128 MiB` | `10 GiB` | -| Survives a JVM crash? | No | Yes | +| Survives a process crash? | No | Yes | | Survives `kill -9`? | No | Yes | | Survives a host reboot? | No | Yes (if the disk does) | | Cross-sender rescue (orphan adoption) | n/a | Yes (opt-in) | @@ -196,7 +189,7 @@ replaces and extends it. ```mermaid graph TD Q1{Will the producer outlive any single outage you care about?} - Q2{Does data need to survive a JVM crash or kill -9?} + Q2{Does data need to survive a process crash or kill -9?} Q3{Is object-store durability required before ack?} Q4{Multiple senders share sf_dir, with dynamic sender_id?} diff --git a/documentation/high-availability/tuning.md b/documentation/high-availability/tuning.md index 5a25a20a9..73b93a1a9 100644 --- a/documentation/high-availability/tuning.md +++ b/documentation/high-availability/tuning.md @@ -279,6 +279,7 @@ For example, a 2 MiB WAL segment becomes ~256 KiB in the transport layer. ## Next steps -- [Replication overview](/docs/high-availability/overview/) - How replication works +- [High availability overview](/docs/high-availability/overview/) - How replication works - [Setup guide](/docs/high-availability/setup/) - Configure replication +- [Client failover](/docs/high-availability/client-failover/concepts/) - Configure clients to follow a primary promotion - [Configuration reference](/docs/configuration/overview/) - All server settings diff --git a/documentation/sidebars.js b/documentation/sidebars.js index 333ea7194..36b60750d 100644 --- a/documentation/sidebars.js +++ b/documentation/sidebars.js @@ -668,19 +668,13 @@ module.exports = { label: "Overview", }, { - id: "high-availability/setup", - type: "doc", - label: "Setup Guide", - }, - { - id: "high-availability/tuning", - type: "doc", - label: "Tuning", - }, - { - id: "high-availability/wal-cleanup", - type: "doc", - label: "WAL Cleanup", + type: "category", + label: "Replication", + items: [ + "high-availability/setup", + "high-availability/tuning", + "high-availability/wal-cleanup", + ], }, { type: "category", From d01cea6c4e07c8e4f3cd09dcc12c6e64b58ffbec Mon Sep 17 00:00:00 2001 From: glasstiger Date: Thu, 14 May 2026 14:33:02 +0100 Subject: [PATCH 08/44] reframing "survives a crash" as whether unacked data is lost or can be recovered --- .../store-and-forward/concepts.md | 12 ++++++----- .../store-and-forward/configuration.md | 2 +- .../store-and-forward/when-to-use.md | 21 +++++++++---------- 3 files changed, 18 insertions(+), 17 deletions(-) diff --git a/documentation/high-availability/store-and-forward/concepts.md b/documentation/high-availability/store-and-forward/concepts.md index ff025b3cf..bba8cc989 100644 --- a/documentation/high-availability/store-and-forward/concepts.md +++ b/documentation/high-availability/store-and-forward/concepts.md @@ -15,9 +15,11 @@ disconnect or restart. The goal is **producer-never-blocks-on-the-wire**. Your call to `flush()` returns as soon as data is published into the substrate. Acknowledgements -arrive asynchronously. A network outage, a server restart, even a process -crash leaves your producer code unaffected — the I/O thread quietly -reconnects and replays what remains. +arrive asynchronously. A network outage or a server restart leaves your +producer code unaffected — the I/O thread quietly reconnects and replays +what remains. In SF mode, even a crash of the sender process itself loses +no unacked data: the next sender on the slot recovers it from disk and +replays it. ## Two modes @@ -28,8 +30,8 @@ SF runs in either of two modes selected by the connect string: | Trigger | `sf_dir` is **unset** | `sf_dir` is set | | Storage | malloc'd ring in process RAM | mmap'd files under `//` | | Default capacity | `128 MiB` | `10 GiB` | -| Survives process exit | No | Yes | -| Survives process crash | No | Yes — replay on next start | +| Unacked data if the sender crashes | Lost | Recovered and replayed on restart | +| Unacked data if the sender's host reboots | Lost | Recovered, if the disk persists | | Tolerates transient network blips | Yes | Yes | | Tolerates multi-minute server outages | Bounded by RAM cap | Bounded by disk cap | | Recovers another sender's stale slot | n/a | Opt-in via `drain_orphans=on` | diff --git a/documentation/high-availability/store-and-forward/configuration.md b/documentation/high-availability/store-and-forward/configuration.md index 9edcbd383..318b6556a 100644 --- a/documentation/high-availability/store-and-forward/configuration.md +++ b/documentation/high-availability/store-and-forward/configuration.md @@ -125,7 +125,7 @@ try (Sender sender = Sender.fromConfig("ws::addr=localhost:9000;")) { ``` No `sf_dir`, so memory mode. The default `128 MiB` cap absorbs short -network blips. A process crash loses the unacked tail. +network blips. A sender crash loses the unacked tail. ### Single-node durable producer diff --git a/documentation/high-availability/store-and-forward/when-to-use.md b/documentation/high-availability/store-and-forward/when-to-use.md index 7fce660c1..38c69aad2 100644 --- a/documentation/high-availability/store-and-forward/when-to-use.md +++ b/documentation/high-availability/store-and-forward/when-to-use.md @@ -29,8 +29,8 @@ Unacked frames live in a malloc'd ring in process memory. Default cap is - The producer process is short-lived or ephemeral (a CLI job, a CI worker, a serverless function). -- A process restart is acceptable as a fresh start — you don't need - in-flight data to survive a crash. +- A sender restart is acceptable as a fresh start — losing any in-flight + data when the sender stops is acceptable. - You only need to tolerate **transient** network blips and short server outages (think: rolling upgrades, brief network partitions). - Your data volume comfortably fits in RAM during the longest outage you @@ -46,8 +46,8 @@ Unacked frames are written to mmap'd files under - The producer process is long-running and outage budgets are measured in minutes (the default `reconnect_max_duration_millis` is 5 minutes for a reason). -- You need in-flight data to survive process restarts — process crash, OOM - kill, host reboot, planned redeploy. +- In-flight data must not be lost when the sender stops or its host + reboots — crash, OOM kill, planned redeploy. - You ingest at rates where minutes of buffering exceeds RAM you can spare. - You operate unattended at the edge (sensors, ETL jobs) where the @@ -60,13 +60,12 @@ string. ## Comparison at a glance -| Question | Memory mode | SF mode | +| Aspect | Memory mode | SF mode | |---|---|---| -| Where is buffered data? | Process RAM | Disk (`//`) | +| Buffered data location | Process RAM | Disk (`//`) | | Default capacity | `128 MiB` | `10 GiB` | -| Survives a process crash? | No | Yes | -| Survives `kill -9`? | No | Yes | -| Survives a host reboot? | No | Yes (if the disk does) | +| Unacked data after a sender crash (`kill -9`, OOM) | Lost | Recovered and replayed on restart | +| Unacked data after the sender's host reboots | Lost | Recovered, if the disk persists | | Cross-sender rescue (orphan adoption) | n/a | Yes (opt-in) | | Setup cost | Zero | Provisioning a writable directory | | Operational cost | Zero | Sizing, monitoring, lock collisions | @@ -166,7 +165,7 @@ If you are currently using HTTP or TCP ILP ingest, the comparison is: | Capability | HTTP ILP | TCP ILP | QWP WebSocket + SF | |---|---|---|---| | Non-blocking producer | No (request waits) | No (TCP backpressure) | Yes (buffer absorbs publishes) | -| Survives process crash | No | No | Yes (SF mode) | +| No data loss on a sender crash | No | No | Yes (SF mode) | | Server outage tolerance | Best-effort retry | None | Reconnect loop with multi-minute budget | | Multi-host failover | Yes (HTTP only) | No | Yes | | Cross-region durability ack | No | No | Yes (`request_durable_ack=on`) | @@ -189,7 +188,7 @@ replaces and extends it. ```mermaid graph TD Q1{Will the producer outlive any single outage you care about?} - Q2{Does data need to survive a process crash or kill -9?} + Q2{A sender crash must not lose in-flight data?} Q3{Is object-store durability required before ack?} Q4{Multiple senders share sf_dir, with dynamic sender_id?} From 1e9dab63503e80c95e67a8bc23bbd7d6765733e1 Mon Sep 17 00:00:00 2001 From: Vlad Ilyushchenko Date: Thu, 14 May 2026 14:48:07 +0100 Subject: [PATCH 09/44] qwp ingress: precision fixes, motivation pitch, frame-cap clarification MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Add "Why implement a QWP client" pitch and "Client lifecycle" narrative so new implementers can orient before diving into encoding details. - Spell out sequence numbering (server-assigned by receive order, not in the wire header), Gorilla first-DoD anchor, decimal scale formula (value = unscaled / 10^scale), and VARCHAR offset endianness — closes silent-wrong-guess risks for one-shot client generation. - Collapse Symbol section to WebSocket-only (per-table dict is UDP) and drop the now-stranded per-table example. - Document the practical WebSocket frame cap: http.recv.buffer.size (default 2 MiB) is the real ceiling, not the 16 MB protocol limit; exceeding it returns close code 1009 MESSAGE_TOO_BIG. - Fill out durable-ack semantics: watermark trails OK, empty messages trivially durable, reconnects discard in-flight tracking. - Note X-QWP-Client-Id may influence version selection. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../protocols/qwp-ingress-websocket.md | 251 +++++++++++++----- 1 file changed, 182 insertions(+), 69 deletions(-) diff --git a/documentation/protocols/qwp-ingress-websocket.md b/documentation/protocols/qwp-ingress-websocket.md index 030c79c02..e80a9569c 100644 --- a/documentation/protocols/qwp-ingress-websocket.md +++ b/documentation/protocols/qwp-ingress-websocket.md @@ -28,6 +28,45 @@ datagram ingestion, and [QWP egress (WebSocket)](/docs/protocols/qwp-egress-websocket/) for streaming query results back to clients. +## Why implement a QWP client + +If your language already has a QuestDB client, use it — the +[language client guides](/docs/ingestion/overview) list what's available. The +rest of this section is for implementers writing a new one (e.g., to bring +QWP to JavaScript, Rust, Ruby, .NET, or an embedded runtime that the existing +clients don't cover). + +Compared with the line-oriented ILP protocols (`http`, `https`, `tcp`), +QWP trades a denser binary encoding for higher throughput and lower CPU on +both ends: + +- **One schema, many batches.** After the first message defines a table's + columns, subsequent messages reference the schema by an integer ID — no + per-row type tags, no per-batch column names. +- **Columnar wire format.** Each column's values are contiguous in the + message, so the server commits them column-at-a-time without row-by-row + parsing. This is the same shape QuestDB uses on disk. +- **Gorilla timestamps.** Steady-cadence timestamps collapse from 8 bytes to + as little as 1 bit each via delta-of-delta encoding. +- **Global symbol delta dictionary.** Low-cardinality string columns send + each distinct value once per connection, then reference it by varint ID. +- **Multi-table batches.** A single WebSocket frame can carry rows for many + tables in one trip across the wire. +- **Server-acknowledged commits.** Every batch gets an OK frame carrying the + per-table sequencer transaction it landed in, so the client knows + precisely what's durable. An optional `X-QWP-Request-Durable-Ack` opt-in + on the upgrade extends this to cluster-durable acks (Enterprise only). + +A minimum-viable client that supports BOOLEAN, LONG, DOUBLE, TIMESTAMP, and +VARCHAR — the five types that cover most real workloads — is on the order of +~500 lines in a typed language, plus a WebSocket library. Adding the +remaining ~20 types is mostly extending switch statements; the framing, +schema registry, and ack loop stay the same. + +The authoritative reference implementation is +[`java-questdb-client`](https://github.com/questdb/java-questdb-client). It's +worth keeping open in a tab as you read this page. + ## Overview QWP encodes data in a column-major layout: all values for a single column are @@ -79,7 +118,8 @@ using custom headers. | `X-QWP-Version` | The QWP version selected for this connection. | The server selects the version as `min(clientMax, serverMax)`. The selected -version is never higher than either side's maximum. +version is never higher than either side's maximum. The server may also +consider the `X-QWP-Client-Id` when selecting the version. ### Connection-level contract @@ -109,6 +149,45 @@ Supported methods: A failed authentication results in a `401` or `403` HTTP response before the WebSocket connection is established. No QWP-level auth handshake exists. +## Client lifecycle + +The end-to-end shape of a QWP client session, before the encoding details: + +1. **Open WebSocket.** Issue an HTTP `GET` to `/write/v4` (or `/api/v4/write`) + with the standard `Upgrade: websocket` headers, plus: + - `X-QWP-Max-Version: 1` — highest version supported. + - `X-QWP-Client-Id: /` — recommended, helps server-side + diagnostics and version negotiation. + - Authentication header (`Authorization: Basic …` or `Authorization: Bearer …`). + - `X-QWP-Request-Durable-Ack: true` — optional, opt-in for cluster-durable + acks (Enterprise). +2. **Verify the upgrade.** On `101 Switching Protocols`, read the response + headers: + - `X-QWP-Version` — the version the connection runs on. Use it for the + `version` byte in every outgoing message header. Reject the connection + if it's outside the range your client supports. + - `X-QWP-Durable-Ack: enabled` — confirms durable-ack frames will follow, + iff you opted in. If you opted in and this header is absent, fail the + connection (don't silently wait for acks the server will never send). +3. **Send binary frames.** Each frame is one QWP message: + `12-byte header` + payload (`Delta Symbol Dictionary` if any, then one or + more `Table Block`s). The first frame for a given table carries a full + schema; subsequent frames for the same column set reference it by + schema ID. +4. **Drain server responses.** The server sends an OK (or error) binary frame + per request, in send order. Match responses to requests by their position + in your in-flight queue — the server-assigned `sequence` field in each + response is the authoritative confirmation. If you opted in to durable + ack, you'll also receive periodic `STATUS_DURABLE_ACK` frames carrying + cumulative per-table watermarks. +5. **Close.** Send a WebSocket `Close` frame after the last expected OK has + been drained. + +Every reconnect resets connection-scoped state on both sides: schema IDs, +symbol dictionary, and sequence counter. Clients that want sender-restart +durability layer a store-and-forward buffer on top — see the +[connect string reference](/docs/client-configuration/connect-string#sf-keys). + ## Encoding primitives ### Byte ordering @@ -502,69 +581,55 @@ Values [true, false, true, true, false, false, false, true]: VARCHAR, and BINARY share the same wire format: ```text -+------------------------------------------------+ -| [Null flag + bitmap (see Null handling)] | -+------------------------------------------------+ -| Offset array: (value_count + 1) x uint32 | -| offset[0] = 0 | -| offset[i+1] = end of value[i] | -+------------------------------------------------+ -| Data: concatenated bytes | -+------------------------------------------------+ ++--------------------------------------------------+ +| [Null flag + bitmap (see Null handling)] | ++--------------------------------------------------+ +| Offset array: (value_count + 1) x uint32 LE | +| offset[0] = 0 | +| offset[i+1] = end of value[i] | ++--------------------------------------------------+ +| Data: concatenated bytes | ++--------------------------------------------------+ ``` - `value_count = row_count - null_count` -- Offsets are uint32, little-endian +- Offsets are uint32, little-endian (all multi-byte numeric values in QWP are + little-endian — restated here because the diagram is often skimmed). - Value `i` spans bytes `[offset[i], offset[i+1])` - For VARCHAR, the bytes are valid UTF-8. For BINARY, the bytes are opaque. - The uint32 offsets bound individual values to 2^31 - 1 bytes. ### Symbol -Dictionary-encoded strings for low-cardinality columns. The wire format depends -on the dictionary mode. - -#### Per-table dictionary mode +Dictionary-encoded strings for low-cardinality columns. -Used by UDP because datagrams cannot rely on a connection-scoped dictionary -persisting across messages. +:::info WebSocket uses global delta dictionaries only -```text -+----------------------------------------------+ -| [Null flag + bitmap (see Null handling)] | -+----------------------------------------------+ -| dictionary_size: varint | -+----------------------------------------------+ -| Dictionary entries: | -| For each entry: | -| entry_length: varint | -| entry_data: UTF-8 bytes | -+----------------------------------------------+ -| Value indices: | -| For each non-null row: | -| dict_index: varint | -+----------------------------------------------+ -``` +WebSocket clients set `FLAG_DELTA_SYMBOL_DICT` (`0x08`) on every message +and use the global delta dictionary mode **exclusively**. The per-table +dictionary mode is UDP-only — see +[QWP ingress (UDP)](/docs/protocols/qwp-ingress-udp/) for that format. -Dictionary indices are 0-based. When a null bitmap is present, only non-null -rows have indices written. - -#### Global delta dictionary mode (WebSocket) +::: -When `FLAG_DELTA_SYMBOL_DICT` (0x08) is set, symbol columns use global integer -IDs instead of per-table dictionaries. The dictionary entries are sent in the -message-level [delta symbol dictionary](#delta-symbol-dictionary) section. -Column data consists of varint-encoded global IDs only: +The dictionary entries themselves are sent in the message-level +[delta symbol dictionary](#delta-symbol-dictionary) section. Column data for a +SYMBOL column is then just a sequence of varint-encoded global IDs, one per +non-null row: ```text +--------------------------------------------+ +| [Null flag + bitmap (see Null handling)] | ++--------------------------------------------+ | For each non-null row: | | global_id: varint Global symbol ID | +--------------------------------------------+ ``` -WebSocket clients set `FLAG_DELTA_SYMBOL_DICT` on every message and use this -mode exclusively. +The client owns the global ID assignment. Each new string gets the next +sequential integer, starting from `0` on a fresh connection. Only the new +entries since the previous message are transmitted; the server accumulates the +dictionary for the lifetime of the connection. ### Timestamp encoding @@ -630,11 +695,18 @@ handling section: #### Gorilla delta-of-delta algorithm +The first two timestamps are written in full as int64 values. Starting from +the third timestamp (index `i = 2`), each subsequent value is encoded as a +delta-of-deltas: + ```python -delta_i = t[i] - t[i - 1] -dod_i = delta_i - delta_prev +delta_i = t[i] - t[i - 1] +dod_i = delta_i - delta_{i-1} # delta_{i-1} = t[i-1] - t[i-2] ``` +The very first encoded DoD applies at `i = 2`, where `delta_{i-1} = t[1] - t[0]`. +There is no implicit zero-delta anchor before that. + Encoding buckets (bits are written LSB-first): | Condition | Prefix | Value bits | Total bits | @@ -696,7 +768,16 @@ N-dimensional arrays, row-major order: ### Decimal types (DECIMAL64, DECIMAL128, DECIMAL256) Decimal values are stored as two's complement integers. A 1-byte scale prefix -is shared by all values in the column. +is shared by all values in the column. The scale is the number of decimal +digits to the right of the decimal point — i.e., the real value is reconstructed +as: + +```text +value = unscaled_int / 10^scale +``` + +For example, with `scale = 3` an unscaled int64 of `12345` decodes to `12.345`. +The scale is base-10, not base-2. ```text +----------------------------------------------+ @@ -723,6 +804,27 @@ Every response starts with a 1-byte status code. OK and error responses include an 8-byte sequence number that correlates the response with the original request. +### Sequence numbering + +The QWP wire encoder does **not** put a sequence number into the request +header — the message header at offset 0 ends at offset 12 with `payload_length`, +and that is the entire client-side framing. The server assigns the sequence +number itself: it counts inbound binary frames on the connection (starting at +`0`) and echoes the assigned `wireSeq` in the `sequence` field of every OK and +error frame. + +Two consequences for client implementers: + +- **Frames must be sent in strict order.** The server assumes "the Nth frame + received is wireSeq = N", so any reordering by the client breaks the mapping + between requests and responses. +- **Match responses by send order.** The client tracks an ordered list of + outstanding messages; the next OK/error response always corresponds to the + oldest unacknowledged message, and the `sequence` field is the server's + authoritative confirmation of which one. + +On a fresh connection both sides start at `0`. On reconnect both sides reset. + ### OK response ```text @@ -802,6 +904,16 @@ The durable-ack has no sequence field. It carries cumulative per-table watermarks that advance as uploads complete. Only tables whose durable watermark advanced since the last durable-ack are included. +The durable-ack watermark always trails the regular OK watermark. Empty +messages (those that produced no WAL commit, for example messages that only +reference materialized views) are trivially durable; their sequence advances +the durable watermark as soon as all preceding messages are durable. + +Reconnects discard any in-flight durable-ack tracking. The new connection +re-OKs replayed batches and the server re-emits cumulative durable-ack +watermarks from scratch, so the client's trim watermark must restart against +the new connection's wire sequencing. + Servers without replication silently ignore the request header and never emit durable-ack frames. There is no durable-failure status; persistent upload failures surface only as absence of a durable-ack frame. @@ -828,6 +940,30 @@ The symbol dictionary limit applies per column in per-table dictionary mode and per connection in global delta dictionary mode. Exceeding it causes the server to reject the message with `PARSE_ERROR`. +### Practical WebSocket frame cap + +The 16 MB max batch is a **QWP protocol ceiling**, not an effective server-side +cap. The HTTP receive buffer used by the WebSocket plumbing is typically +smaller, and it is checked **before** the QWP parser ever sees the payload: + +| Server config key | Default | Effect | +|-------------------------|---------|---------------------------------------------------------------------| +| `http.recv.buffer.size` | 2 MiB | Maximum WebSocket frame the server will accept on `/write/v4`. | + +A WebSocket binary frame larger than this is rejected immediately with close +code `1009 MESSAGE_TOO_BIG` and the connection is dropped — the client will +observe an abrupt disconnect (`ECANCELED`, EPIPE, or similar depending on the +WebSocket library) partway through the send. + +The effective per-message size limit is therefore +`min(http.recv.buffer.size, 16 MiB) − WebSocket frame overhead (≤ 14 bytes)`. + +**Recommendation for client implementers:** keep individual QWP messages +comfortably under the server's `http.recv.buffer.size` — for the default +2 MiB recv buffer, a 1.9 MiB / ~25k-row ceiling per message is a safe target. +Operators who want larger batches must raise `http.recv.buffer.size` on the +server (e.g., `http.recv.buffer.size=17m` to use the full QWP 16 MB headroom). + ## Client operation This section describes the high-level batching and I/O behavior a client @@ -977,29 +1113,6 @@ CD CC CC CC CC CC F4 3F # value = 1.3 62 61 7A # "baz" (row 3) ``` -### Symbol column with per-table dictionary - -3 rows with values: "us", "eu", "us": - -```text -# Null flag -00 # null_flag: 0x00 (no nulls) - -# Dictionary -02 # Dictionary size: 2 entries - -02 # Entry 0 length: 2 -75 73 # "us" - -02 # Entry 1 length: 2 -65 75 # "eu" - -# Value indices -00 # Row 0: index 0 ("us") -01 # Row 1: index 1 ("eu") -00 # Row 2: index 0 ("us") -``` - ### Gorilla timestamps with delta symbol dictionary Table `sensors`, 2 rows, 3 columns: `host` (SYMBOL), `temp` (DOUBLE), From a630bccfa706e56a8080ea33cddd9d8133c6af9a Mon Sep 17 00:00:00 2001 From: Vlad Ilyushchenko Date: Thu, 14 May 2026 15:14:02 +0100 Subject: [PATCH 10/44] qwp egress: precision fixes, motivation pitch, frame-cap clarification - Add "Why implement a QWP query client" pitch and "Client lifecycle" narrative paralleling the ingress doc; surfaces the java-questdb-client reference impl link upfront. - Document the practical WebSocket frame cap on /read/v1: client-to-server frames (QUERY_REQUEST in particular) are bounded by http.recv.buffer.size (default 2 MiB), not the 16 MiB protocol limit; oversized frames are rejected with close code 1009 MESSAGE_TOO_BIG. - Clarify X-QWP-Max-Batch-Rows only asks for smaller batches than the server default (clamps to server's hard limit). - Tighten NULL sentinel docs: FLOAT/DOUBLE sentinel is *any* NaN (incl. 0.0/0.0); IPv4 0.0.0.0 and all-ones GEOHASH cannot round-trip as non-null. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../protocols/qwp-egress-websocket.md | 131 +++++++++++++++++- 1 file changed, 128 insertions(+), 3 deletions(-) diff --git a/documentation/protocols/qwp-egress-websocket.md b/documentation/protocols/qwp-egress-websocket.md index 0b30a82b0..16a753599 100644 --- a/documentation/protocols/qwp-egress-websocket.md +++ b/documentation/protocols/qwp-egress-websocket.md @@ -27,6 +27,45 @@ request/response lifecycle, and byte-credit flow control. For data ingestion, see [QWP ingress (WebSocket)](/docs/protocols/qwp-ingress-websocket/). +## Why implement a QWP query client + +If your language already has a QuestDB client, use it — the +[language client guides](/docs/query/overview) list what's available. The +rest of this section is for implementers writing a new one (e.g., to bring +QWP query support to JavaScript, Rust, .NET, or runtimes that the existing +clients don't cover). + +Compared with the row-oriented HTTP `/exec` JSON endpoint, QWP egress trades +a denser binary encoding for higher throughput and lower CPU on both ends: + +- **Columnar result batches.** Each batch is a single QWP table block — the + same shape QuestDB uses on disk. No per-row type tags, no JSON parsing. +- **Server-driven schemas.** After the first batch carries the schema in + full mode, subsequent batches reference it by integer ID. No repeated + column metadata on the wire. +- **Per-connection symbol dictionary.** Repeated queries on the same + connection (BI dashboards refreshing identical SELECTs) reuse prior + symbol IDs without retransmitting strings. +- **Byte-credit flow control.** The client tells the server how many bytes + it's ready to receive; the server pauses production when the window is + exhausted. Bounded memory for arbitrarily large result sets. +- **zstd compression (optional).** Negotiated at the upgrade, + applied per-batch when it shrinks the payload. +- **Bind parameters.** Typed binds prevent SQL injection and let the + server reuse plans without re-parsing. +- **Multi-host failover (Enterprise).** Connect strings can list multiple + endpoints with role/zone preferences; clients reconnect and replay + on transport failure. + +A minimum-viable client that supports SELECTs with the common column types +(BOOLEAN, LONG, DOUBLE, TIMESTAMP, VARCHAR, SYMBOL) plus simple binds is +on the order of ~600 lines in a typed language, plus a WebSocket library +and (optionally) a zstd dependency. + +The authoritative reference implementation is +[`java-questdb-client`](https://github.com/questdb/java-questdb-client). It's +worth keeping open in a tab as you read this page. + ## Overview Key properties: @@ -71,7 +110,7 @@ Version and compression are negotiated at the HTTP upgrade: | `X-QWP-Max-Version` | No | Maximum QWP version the client supports. Defaults to 1 if absent. | | `X-QWP-Client-Id` | No | Free-form client identifier (e.g., `java-egress/1.0.0`). | | `X-QWP-Accept-Encoding` | No | Comma-separated list of acceptable result batch body encodings (see below). | -| `X-QWP-Max-Batch-Rows` | No | Client-preferred per-batch row cap. `0` or absent = server default. | +| `X-QWP-Max-Batch-Rows` | No | Client-preferred per-batch row cap; the server clamps to its own hard limit, so this only ever asks for *smaller* batches (lower latency to first row, more per-batch overhead). `0` or absent = server default. | **Server response headers:** @@ -113,6 +152,56 @@ Version 1 is the initial egress release. Version 2 adds an unsolicited `SERVER_INFO` frame (see [SERVER_INFO](#server_info-0x18)) delivered as the first WebSocket frame after the upgrade. A v1 client never sees it. +## Client lifecycle + +The end-to-end shape of a QWP query client session, before the encoding +details: + +1. **Open WebSocket to `/read/v1`.** Standard `Upgrade: websocket` headers, + plus: + - `X-QWP-Max-Version: 2` — request v2 to receive `SERVER_INFO`; the + server downgrades to v1 if it doesn't support v2. + - `X-QWP-Client-Id: /` — recommended. + - `X-QWP-Accept-Encoding: zstd, raw` — optional; opt into compression. + - `X-QWP-Max-Batch-Rows: ` — optional; request smaller batches than + the server default (for lower latency to first row). + - Authentication header (`Authorization: Basic …` or `Authorization: Bearer …`). +2. **Verify the upgrade.** On `101 Switching Protocols`: + - `X-QWP-Version` is the negotiated version. Use it as the `version` + byte in every outgoing message header. + - `X-QWP-Content-Encoding` is the server's chosen compression (absent + means `raw`). +3. **(v2 only) Read `SERVER_INFO`.** The first WebSocket binary frame + carries the server's role, cluster/node identity, and zone (if + advertised). Apply your `target=` / `zone=` filter before sending a + `QUERY_REQUEST`; if the role doesn't match, close and try the next + endpoint. +4. **Send `QUERY_REQUEST`.** Assign a fresh `request_id` (client-owned, + unique within the connection), include SQL text, bind parameters, and + `initial_credit` (`0` for unbounded streaming). +5. **Drain frames demuxed by `request_id`.** The server streams + `RESULT_BATCH(seq=0, schema mode 0x00)`, then + `RESULT_BATCH(seq=1+, schema mode 0x01)`, until a terminator: + - `RESULT_END` — cursor exhausted, success. + - `EXEC_DONE` — non-SELECT statement, no rows; carries `rows_affected`. + - `QUERY_ERROR` — failure at any point in the lifecycle; terminal. + The server may interpose a `CACHE_RESET` between a terminator and the + next query's first frame; clients must process it before assuming + schema-ID or symbol-dict continuity. +6. **Flow control.** If you set a non-zero `initial_credit`, send + `CREDIT(request_id, additional_bytes)` frames to keep the byte window + open. The server pauses production when the budget reaches zero (with + a one-batch row floor to guarantee progress). +7. **Cancel (optional).** Send `CANCEL(request_id)` to abort. Continue + draining in-flight `RESULT_BATCH` frames until the terminator + (`QUERY_ERROR(CANCELLED)` or, if it raced, `RESULT_END`). +8. **Close.** Send a WebSocket `Close` frame after the last expected + terminator has been drained. + +Reconnects reset connection-scoped state on both sides: schema registry, +symbol dictionary, and `batch_seq` (which restarts at `0` for any replayed +query on the new connection). + ## Message structure The egress header is byte-identical to the @@ -505,13 +594,23 @@ results should treat these sentinels as indistinguishable from explicit NULL: |----------------------------------------------|---------------------| | INT, IPv4 | `Integer.MIN_VALUE` (INT); `0` (IPv4) | | LONG, DATE, TIMESTAMP, TIMESTAMP_NANOS, DECIMAL64 | `Long.MIN_VALUE` | -| FLOAT | `NaN` | -| DOUBLE | `NaN` | +| FLOAT | any `NaN` (incl. `0.0f / 0.0f`) | +| DOUBLE | any `NaN` (incl. `0.0 / 0.0`) | | GEOHASH (all widths) | All-ones (`-1`) | | UUID | Both halves `Long.MIN_VALUE` | | LONG256 | All four longs `Long.MIN_VALUE` | | BOOLEAN, BYTE, SHORT, CHAR | No null sentinel; these types cannot carry NULL in QuestDB | +A consequence of reusing in-engine sentinels on the wire is that some bit +patterns cannot be expressed as non-null: + +- **IPv4 `0.0.0.0`** is the IPv4 null sentinel; a non-null `0.0.0.0` cannot be + round-tripped and decodes as NULL. +- **GEOHASH "all ones"** is the geohash null sentinel; a geohash whose bit + pattern is all-ones cannot be round-tripped and decodes as NULL. +- **FLOAT / DOUBLE `NaN`** of any bit pattern (including non-canonical NaNs + like `0.0 / 0.0`) decodes as NULL. There is no separate "QWP NaN". + ### Array element nulls Array columns (`DOUBLE_ARRAY`, `LONG_ARRAY`) have no per-element null bitmap. @@ -714,6 +813,32 @@ OK (0x00) is not used in egress; success terminates with `RESULT_END` or Soft caps are implementation-defined and may be tuned by the server operator. +### Practical WebSocket frame cap + +The 16 MiB `RESULT_BATCH` limit and 1 MiB SQL limit are **QWP protocol +ceilings**, not effective server-side caps. The HTTP receive buffer for the +`/read/v1` endpoint applies to **client → server** frames (`QUERY_REQUEST`, +`CANCEL`, `CREDIT`) and is checked before the QWP parser sees the payload: + +| Server config key | Default | Effect | +|-------------------------|---------|--------------------------------------------------------------------------------------------| +| `http.recv.buffer.size` | 2 MiB | Maximum WebSocket frame the server will accept on `/read/v1`. | + +A client-side frame larger than this is rejected with WebSocket close code +`1009 MESSAGE_TOO_BIG` and the connection is dropped — the client observes an +abrupt disconnect (`ECANCELED`, `EPIPE`, or similar) before any +`QUERY_ERROR` arrives. + +**For client implementers:** a `QUERY_REQUEST` carries SQL text plus all bind +parameter values. Keep the total under `http.recv.buffer.size` minus +WebSocket frame overhead (≤ 14 bytes). With the default 2 MiB recv buffer, +~1.9 MiB of SQL + binds is a safe ceiling. Long SQL or large array binds are +the realistic triggers. + +`RESULT_BATCH` frames (server → client) are bounded by the server's own +producer-side configuration; sizing the client's WebSocket library to handle +up to 16 MiB receive frames covers any well-configured server. + ## Examples ### Simple unbounded query From 1b5e105335bae8c156865a50cc14c118b0ecba05 Mon Sep 17 00:00:00 2001 From: glasstiger Date: Thu, 14 May 2026 15:22:11 +0100 Subject: [PATCH 11/44] fixing examples, smaller fixes --- .../client-failover/concepts.md | 15 +++++----- .../client-failover/configuration.md | 29 ++++++++++--------- 2 files changed, 24 insertions(+), 20 deletions(-) diff --git a/documentation/high-availability/client-failover/concepts.md b/documentation/high-availability/client-failover/concepts.md index 673060808..1e2360cfa 100644 --- a/documentation/high-availability/client-failover/concepts.md +++ b/documentation/high-availability/client-failover/concepts.md @@ -56,7 +56,7 @@ host. | `Unknown` | The host has not been tried in this round, or its classification was reset. | | `TransientReject` | The server returned `421` with `X-QuestDB-Role: PRIMARY_CATCHUP` — it is a primary that is still catching up after promotion. Expected to recover. | | `TransportError` | TCP/TLS handshake failed, an HTTP upgrade returned a transient error code, or an established connection broke mid-stream. | -| `TopologyReject` | The server returned `421` with a role that cannot satisfy the requested `target=` filter — for example, a `REPLICA` when you asked for `target=primary`. The host will not become writable without a topology change. | +| `TopologyReject` | The server returned `421` with any role other than `PRIMARY_CATCHUP` (`PRIMARY`, `REPLICA`, `STANDALONE`, or an unrecognised token), or — on egress — a successfully-upgraded host whose `SERVER_INFO` role does not satisfy the requested `target=` filter. The host will not become usable without a topology change. | A lower state in the table above is preferred when the client picks the next host to try. @@ -72,8 +72,9 @@ Each host is also classified relative to the client's configured `zone=`: | `Other` | Server advertised a different zone. | Zone information is advertised by the server on a successful upgrade and -(starting in QWP v2) on `421` rejects. The client remembers it for the lifetime -of the connection. +on `421` rejects. Once observed, the client remembers a host's zone tier for +the lifetime of that client — it persists across rounds and reconnects until +the host re-advertises a different zone. `target=primary` collapses every host's zone tier to `Same` — writers must follow the primary regardless of geography. Ingress is currently zone-blind in @@ -152,7 +153,7 @@ page for how the reconnect loop interacts with the disk-backed segment ring. ### Egress (queries) -The egress failover loop wraps each `Execute()` call on the read-side query +The egress failover loop wraps each `execute()` call on the read-side query client. It is interactive: a slow failover is worse than a clear error, so the budget is short: @@ -188,12 +189,12 @@ The host is demoted in the priority lattice; the client walks to the next host within the same round. No exponential backoff is consumed. - `421` + `X-QuestDB-Role: PRIMARY_CATCHUP` → `TransientReject` -- `421` + any other recognised role → `TopologyReject` -- `SERVER_INFO.Role` does not match the requested `target=` +- `421` + any other non-empty role, including unrecognised tokens → `TopologyReject` +- `SERVER_INFO.Role` does not match the requested `target=` (egress only) If every host in a round role-rejects, ingress pays one fixed backoff sleep (reset to `InitialBackoff`, no doubling) and starts a fresh round; egress -fails the current `Execute()` call. +fails the current `execute()` call. ### Transient — enter backoff diff --git a/documentation/high-availability/client-failover/configuration.md b/documentation/high-availability/client-failover/configuration.md index c7e13ecbb..a307385a3 100644 --- a/documentation/high-availability/client-failover/configuration.md +++ b/documentation/high-availability/client-failover/configuration.md @@ -14,16 +14,18 @@ first. ## Common keys -These keys apply to every WS / WSS / HTTP / HTTPS client. They are documented -in full on the +`addr` and `auth_timeout_ms` apply to every WS / WSS / HTTP / HTTPS client. +`zone` is accepted everywhere but only takes effect on egress; `target` is an +egress-only key and is rejected as an unknown key on an ingress connect string. +They are documented in full on the [connect-string reference](/docs/client-configuration/connect-string#failover-keys); the table below summarises the failover-relevant subset. | Key | Type | Default | Notes | |---|---|---|---| | `addr` | `host:port[,host:port…]` | required | Comma-separated peer list. The two syntactic forms (`addr=h1,h2` and repeated `addr=h1;addr=h2`) accumulate. Empty entries are rejected. | -| `zone` | string | unset | Client's zone identifier (opaque, case-insensitive — `eu-west-1a`, `dc-amsterdam`, etc.). Egress prefers same-zone peers when `target` is `any` or `replica`. Silently ignored on ingress. | -| `target` | `any` \| `primary` \| `replica` | `any` | Which server role the client accepts. See [Role filter](/docs/high-availability/client-failover/concepts/#role-filter-target) for the role table. | +| `zone` | string | unset | Client's zone identifier (opaque, case-insensitive — `eu-west-1a`, `dc-amsterdam`, etc.). Egress prefers same-zone peers when `target` is `any` or `replica`. Silently accepted but ignored on ingress. | +| `target` | `any` \| `primary` \| `replica` | `any` | **Egress only.** Which server role the query client accepts. Rejected as an unknown key on an ingress connect string. See [Role filter](/docs/high-availability/client-failover/concepts/#role-filter-target) for the role table. | | `auth_timeout_ms` | int (ms) | `15000` | Upper bound on the HTTP-upgrade response read per host. Does **not** cover the TCP connect or TLS handshake — those use the OS default. Set lower if you have well-known network paths and want faster failover; set higher only if upgrade is genuinely slow. | `addr` syntax — both of these are equivalent and produce the same three-peer @@ -64,16 +66,16 @@ network), and retrying for five minutes only hides it. ## Egress (query) -The egress failover loop wraps each `Execute()` call on the read-side query +The egress failover loop wraps each `execute()` call on the read-side query client. The full key list lives on the [connect-string reference](/docs/client-configuration/connect-string#egress-flow); the user-visible knobs are: | Key | Type | Default | Notes | |---|---|---|---| -| `failover` | `on` \| `off` | `on` | Global on/off. With `failover=off`, a single failed `Execute()` call surfaces the underlying error without walking the address list. | -| `failover_max_attempts` | int | `8` | Hard cap on attempts within a single `Execute()` call. | -| `failover_max_duration_ms` | int (ms) | `30000` | Wall-clock budget for failover eligibility. Bounds **when failover stops**, not the wall-clock of `Execute()` itself — a final `WalkTracker` round can still cost up to `hostCount × auth_timeout_ms` after the budget expires. | +| `failover` | `on` \| `off` | `on` | Global on/off. With `failover=off`, a single failed `execute()` call surfaces the underlying error without walking the address list. | +| `failover_max_attempts` | int | `8` | Hard cap on attempts within a single `execute()` call. | +| `failover_max_duration_ms` | int (ms) | `30000` | Wall-clock budget for failover eligibility. Bounds **when failover stops**, not the wall-clock of `execute()` itself — a final `WalkTracker` round can still cost up to `hostCount × auth_timeout_ms` after the budget expires. | | `failover_backoff_initial_ms` | int (ms) | `50` | Starting backoff sleep. Doubles up to the cap. | | `failover_backoff_max_ms` | int (ms) | `1000` | Cap on the exponential backoff. With full-jitter, the actual sleep lands in `[0, max)`. | @@ -104,12 +106,12 @@ For read-only queries spread across same-zone replicas, with a primary as final fallback: ```java -try (QueryClient client = QueryClient.fromConfig( +try (QwpQueryClient client = QwpQueryClient.fromConfig( "ws::addr=replica-eu-1a:9000,replica-eu-1b:9000,primary:9000;" + "zone=eu-west-1a;target=any;")) { - try (ResultSet rs = client.execute("SELECT * FROM trades WHERE ts > now() - 1h")) { - // ... - } + client.connect(); + // handler is a QwpColumnBatchHandler that receives the result batches + client.execute("SELECT * FROM trades WHERE ts > now() - 1h", handler); } ``` @@ -137,9 +139,10 @@ initial-connect policy lets the producer thread proceed immediately. ### Tight egress failover for an interactive dashboard ```java -try (QueryClient client = QueryClient.fromConfig( +try (QwpQueryClient client = QwpQueryClient.fromConfig( "ws::addr=node-a:9000,node-b:9000;" + "failover_max_duration_ms=5000;failover_max_attempts=3;")) { + client.connect(); // Surfaces an error within a few seconds if the cluster is unreachable. } ``` From ac9193e5026bdd0e9c3ce7904770e97da9ea32b0 Mon Sep 17 00:00:00 2001 From: Vlad Ilyushchenko Date: Thu, 14 May 2026 15:56:22 +0100 Subject: [PATCH 12/44] ilp: reposition as compatibility protocol; move Java Embedded and Message Brokers up to Connect - Add a "Use QWP for new clients" tip callout to ilp/overview.md naming QWP's wins (binary, type-rich, faster, failover, store-and-forward) and framing ILP as the path for InfluxDB / Telegraf / Kafka / Flink users who already emit ILP. - Shorter callout on ilp/columnset-types.md reframing the page as "extensions on top of the InfluxDB type model" and noting QWP exposes the full QuestDB type system natively (no suffix encoding, no casts). - Operator-facing callout on ilp/advanced-settings.md flagging this page as the legacy ILP tuning surface and pointing new deployments at QWP. - Sidebar: lift Java Embedded and Message Brokers out from under ILP (they're protocol-agnostic delivery mechanisms, not ILP sub-pages). Final Connect order: Overview, Connect string, Date to Timestamp, Client Libraries, Message Brokers, Compatibility Protocols, Java Embedded, Wire Protocols. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../ingestion/ilp/advanced-settings.md | 12 +++++++ .../ingestion/ilp/columnset-types.md | 13 ++++++++ documentation/ingestion/ilp/overview.md | 15 +++++++++ documentation/sidebars.js | 32 +++++++++---------- 4 files changed, 56 insertions(+), 16 deletions(-) diff --git a/documentation/ingestion/ilp/advanced-settings.md b/documentation/ingestion/ilp/advanced-settings.md index 0381fe575..0688b4d2d 100644 --- a/documentation/ingestion/ilp/advanced-settings.md +++ b/documentation/ingestion/ilp/advanced-settings.md @@ -9,6 +9,18 @@ description: This documentation provides aid for those venturing outside of the path laid down by their language clients. +:::tip Consider QWP first + +This page is for operators tuning the legacy ILP transport. For new +deployments, prefer the +[QuestDB Wire Protocol (QWP)](/docs/protocols/qwp-ingress-websocket/) — +the native binary protocol with multi-host failover, store-and-forward, +and the full QuestDB type system built in. See the +[ingestion overview](/docs/ingestion/overview/) for languages with +native QWP support. + +::: + For the introductory InfluxDB Line Protocol materials, including authentication, see the [ILP overview](/docs/ingestion/ilp/overview/). diff --git a/documentation/ingestion/ilp/columnset-types.md b/documentation/ingestion/ilp/columnset-types.md index 404800bb3..4dc39dfd3 100644 --- a/documentation/ingestion/ilp/columnset-types.md +++ b/documentation/ingestion/ilp/columnset-types.md @@ -8,6 +8,19 @@ description: This page lists the supported InfluxDB Line Protocol columnset value types and details about type casting. +:::tip QWP exposes the full type system natively + +This page documents InfluxDB line-protocol type extensions QuestDB layers +on top of the InfluxDB type model (the `i`, `t`, `n`, etc. suffixes and the +cast tables below). The native +[QuestDB Wire Protocol (QWP)](/docs/protocols/qwp-ingress-websocket/) +supports the entire QuestDB type system directly — no suffix encoding, no +implicit casts — and is the recommended choice for new clients. See the +[ingestion overview](/docs/ingestion/overview/) for languages with native +QWP support. + +::: + If a target column does not exist, QuestDB will create a column using the same type that the ILP client sends. diff --git a/documentation/ingestion/ilp/overview.md b/documentation/ingestion/ilp/overview.md index a0db5c1dd..ec6008941 100644 --- a/documentation/ingestion/ilp/overview.md +++ b/documentation/ingestion/ilp/overview.md @@ -18,6 +18,21 @@ import { Clients } from "../../../src/components/Clients" QuestDB implements the InfluxDB Line Protocol to ingest data. +:::tip Use QWP for new clients + +ILP is now a **compatibility protocol** in QuestDB. It exists for users +coming from InfluxDB, Telegraf, or Kafka / Flink pipelines that already +emit ILP. New deployments should prefer the +[QuestDB Wire Protocol (QWP)](/docs/protocols/qwp-ingress-websocket/) — +binary on the wire, type-rich (full QuestDB type system, no suffix +encoding), faster, and with multi-host failover and store-and-forward +built into the client. See the +[ingestion overview](/docs/ingestion/overview/) for a side-by-side +comparison and the list of languages with native QWP clients available +today. + +::: + The InfluxDB Line Protocol is for **data ingestion only**. For building queries, see the diff --git a/documentation/sidebars.js b/documentation/sidebars.js index 36b60750d..cc0be33b7 100644 --- a/documentation/sidebars.js +++ b/documentation/sidebars.js @@ -107,6 +107,17 @@ module.exports = { }, ], }, + { + type: "category", + label: "Message Brokers", + collapsed: true, + items: [ + "ingestion/message-brokers/kafka", + "ingestion/message-brokers/telegraf", + "ingestion/message-brokers/redpanda", + "ingestion/message-brokers/flink", + ], + }, { type: "category", label: "Compatibility Protocols", @@ -131,22 +142,6 @@ module.exports = { type: "doc", label: "Advanced Settings", }, - { - id: "ingestion/java-embedded", - type: "doc", - label: "Java Embedded", - }, - { - type: "category", - label: "Message Brokers", - collapsed: true, - items: [ - "ingestion/message-brokers/kafka", - "ingestion/message-brokers/telegraf", - "ingestion/message-brokers/redpanda", - "ingestion/message-brokers/flink", - ], - }, ], }, { @@ -223,6 +218,11 @@ module.exports = { }, ], }, + { + id: "ingestion/java-embedded", + type: "doc", + label: "Java Embedded", + }, { label: "Wire Protocols", type: "category", From 8d44c61d80b997db643f74114940385c37d3ee17 Mon Sep 17 00:00:00 2001 From: Vlad Ilyushchenko Date: Thu, 14 May 2026 15:56:32 +0100 Subject: [PATCH 13/44] qwp egress: document asymmetric framing (client frames omit 12-byte header) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The egress endpoint /read/v1 is asymmetric on the wire: server-to-client frames carry the 12-byte QWP header, but client-to-server frames start directly with msg_kind — no QWP header. Including the header makes the server read 0x51 (the ASCII 'Q' of "QWP1") as an unknown msg_kind and close the WebSocket with code 1006, partway through send. Verified against server (QwpEgressUpgradeProcessor.dispatchEgressMessage calls peekMsgKind at offset 0 of the WS frame body) and Java reference client (QwpEgressIoThread.sendQueryRequest writes msg_kind as the first byte, no header). The upstream wire-egress.md spec is wrong on this point and should be filed separately. - Rewrite Message structure section with two ASCII diagrams (server-to- client with header, client-to-server without) and a warning callout naming the symptom and the reason (server keeps the header for RESULT_BATCH's flags + payload_length; client control frames have no analogous need). - Fix Example 1: drop the bogus 12-byte header from the QUERY_REQUEST hex dump; RESULT_BATCH / RESULT_END below unchanged. - Client lifecycle step 4: inline note that the binary frame body starts directly with msg_kind for client-to-server frames. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../protocols/qwp-egress-websocket.md | 84 +++++++++++-------- 1 file changed, 51 insertions(+), 33 deletions(-) diff --git a/documentation/protocols/qwp-egress-websocket.md b/documentation/protocols/qwp-egress-websocket.md index 16a753599..de15c504d 100644 --- a/documentation/protocols/qwp-egress-websocket.md +++ b/documentation/protocols/qwp-egress-websocket.md @@ -178,7 +178,10 @@ details: endpoint. 4. **Send `QUERY_REQUEST`.** Assign a fresh `request_id` (client-owned, unique within the connection), include SQL text, bind parameters, and - `initial_credit` (`0` for unbounded streaming). + `initial_credit` (`0` for unbounded streaming). The WebSocket binary + frame body starts directly with `msg_kind = 0x10` — see + [Message structure](#message-structure); client-to-server frames carry + no 12-byte QWP header. 5. **Drain frames demuxed by `request_id`.** The server streams `RESULT_BATCH(seq=0, schema mode 0x00)`, then `RESULT_BATCH(seq=1+, schema mode 0x01)`, until a terminator: @@ -204,28 +207,49 @@ query on the new connection). ## Message structure -The egress header is byte-identical to the -[ingress header](/docs/protocols/qwp-ingress-websocket/#message-structure) -(12 bytes, little-endian): +Egress framing is **asymmetric**: - +- **Server → client** frames carry the full 12-byte QWP header followed + by the payload. The header is byte-identical to the + [ingress header](/docs/protocols/qwp-ingress-websocket/#message-structure): -The **first byte of the payload** is the message kind. The remaining payload -depends on the kind. + + +- **Client → server** frames carry **only the payload**, starting directly + with `msg_kind`. There is no 12-byte QWP header on outbound client frames. ```text +------------------------------------------+ -| Header (12 bytes) | +| WebSocket frame body, server -> client: | +| Header (12 bytes) | +| Payload | +| msg_kind: uint8 | +| (kind-specific body) | ++------------------------------------------+ + +------------------------------------------+ -| Payload | -| msg_kind: uint8 | -| (kind-specific body) | +| WebSocket frame body, client -> server: | +| Payload | +| msg_kind: uint8 | +| (kind-specific body) | +------------------------------------------+ ``` -Placing `msg_kind` in the payload (rather than the header) keeps the header -codec shared with ingress. Endpoint disambiguation is sufficient because -connections are direction-pure. +:::warning Asymmetric framing — common stumbling block + +If you copy the ingress framing (which is symmetric — header on both +directions) into an egress client, the server reads the QWP magic's first +byte (`0x51`, the ASCII `Q`) as an unknown `msg_kind` and closes the +WebSocket with code 1006. Client frames must start directly with +`msg_kind`. + +The header is retained server-to-client because `RESULT_BATCH` uses the +header's `flags` byte (Gorilla, delta dict, zstd) and `payload_length`. +Client-to-server frames have no analogous needs: version is fixed from the +upgrade, `table_count` doesn't apply to control kinds, and the WebSocket +frame already carries the payload length. + +::: ### Flags byte @@ -847,25 +871,19 @@ Client sends `SELECT id, value FROM sensors LIMIT 2` with no bind parameters and unbounded credit. ```text -QUERY_REQUEST: - Header: - 51 57 50 31 # Magic: "QWP1" - 01 # Version: 1 - 00 # Flags - 00 00 # table_count = 0 - XX XX XX XX # payload_length - - Payload: - 10 # msg_kind = QUERY_REQUEST - 01 00 00 00 00 00 00 00 # request_id = 1 - 24 # sql_length = 36 - 53 45 4C 45 43 54 20 69 # "SELECT i" - 64 2C 20 76 61 6C 75 65 # "d, value" - 20 46 52 4F 4D 20 73 65 # " FROM se" - 6E 73 6F 72 73 20 4C 49 # "nsors LI" - 4D 49 54 20 32 # "MIT 2" - 00 # initial_credit = 0 (unbounded) - 00 # bind_count = 0 +QUERY_REQUEST (client -> server; WebSocket binary frame body + — no QWP header, see "Message structure" above): + + 10 # msg_kind = QUERY_REQUEST + 01 00 00 00 00 00 00 00 # request_id = 1 + 24 # sql_length = 36 + 53 45 4C 45 43 54 20 69 # "SELECT i" + 64 2C 20 76 61 6C 75 65 # "d, value" + 20 46 52 4F 4D 20 73 65 # " FROM se" + 6E 73 6F 72 73 20 4C 49 # "nsors LI" + 4D 49 54 20 32 # "MIT 2" + 00 # initial_credit = 0 (unbounded) + 00 # bind_count = 0 ``` Server responds with one result batch and end-of-stream: From a89c5a176faf88377b36b6cebce958c2ee688efb Mon Sep 17 00:00:00 2001 From: javier Date: Thu, 14 May 2026 17:40:14 +0200 Subject: [PATCH 14/44] qwp server config --- documentation/configuration/qwp.md | 142 +++++++++++++++++++++++++++++ 1 file changed, 142 insertions(+) create mode 100644 documentation/configuration/qwp.md diff --git a/documentation/configuration/qwp.md b/documentation/configuration/qwp.md new file mode 100644 index 000000000..24c3d7ba7 --- /dev/null +++ b/documentation/configuration/qwp.md @@ -0,0 +1,142 @@ +--- +title: QuestDB Wire Protocol (QWP) +description: + Server-side configuration for QWP ingestion and query endpoints. +--- + +QWP is QuestDB's columnar binary protocol for high-throughput data ingestion +(`/write/v4`) and streaming query results (`/read/v1`) over WebSocket and UDP. +These properties control protocol limits and the UDP receiver. WebSocket +ingestion and egress share the HTTP server's network settings (port, TLS, +worker threads); see +[HTTP server configuration](/docs/configuration/http-server/) for those. + +## Protocol limits + +### qwp.max.rows.per.table + +- **Default**: `1000000` +- **Reloadable**: no + +Maximum number of rows per table block in a single QWP message. The server +rejects batches that exceed this limit with a parse error. + +### qwp.max.schemas.per.connection + +- **Default**: `65535` +- **Reloadable**: no + +Maximum number of distinct schemas the server registers per connection. Each +unique combination of column names and types consumes one schema slot. When +the limit is reached, the server rejects further full-schema messages. For +egress connections, a lower soft cap (4,096 by default) triggers a +`CACHE_RESET` frame that clears and restarts the registry before hitting +this hard limit. + +### qwp.max.tables.per.connection + +- **Default**: `10000` +- **Reloadable**: no + +Maximum number of distinct tables a single connection may write to. The +server rejects messages referencing additional tables once this limit is +reached. + +## UDP receiver + +:::note + +The QWP UDP receiver is a fire-and-forget ingestion path for metrics +workloads where occasional message loss is acceptable. It is disabled by +default. For reliable ingestion, use the WebSocket transport. + +::: + +### qwp.udp.bind.to + +- **Default**: `0.0.0.0:9007` +- **Reloadable**: no + +IP address and port the UDP receiver binds to. The default listens on all +network interfaces on port 9007. + +### qwp.udp.commit.interval + +- **Default**: `2000` (milliseconds) +- **Reloadable**: no + +Time interval between commits for data received over UDP. Lower values +reduce the window of uncommitted data at the cost of more frequent I/O. + +### qwp.udp.enabled + +- **Default**: `false` +- **Reloadable**: no + +Enable or disable the QWP UDP receiver. + +### qwp.udp.join + +- **Default**: `224.1.1.1` +- **Reloadable**: no + +Multicast group address the UDP receiver joins. Only relevant when +`qwp.udp.unicast` is `false`. + +### qwp.udp.max.uncommitted.datagrams + +- **Default**: `1048576` +- **Reloadable**: no + +Maximum number of uncommitted datagrams before the receiver forces a commit, +regardless of the time-based commit interval. + +### qwp.udp.msg.buffer.size + +- **Default**: `65536` (bytes) +- **Reloadable**: no + +Size of each message buffer allocated for the UDP receiver. + +### qwp.udp.msg.count + +- **Default**: `10000` +- **Reloadable**: no + +Number of message buffers to pre-allocate. Higher values absorb larger +bursts at the cost of more memory. + +### qwp.udp.own.thread + +- **Default**: `true` +- **Reloadable**: no + +When `true`, the UDP receiver runs in a dedicated thread with a busy-spin +loop for lowest latency. When `false`, the receiver uses the shared worker +pool. + +### qwp.udp.own.thread.affinity + +- **Default**: `-1` (no affinity) +- **Reloadable**: no + +CPU core affinity for the dedicated UDP receiver thread. A value of `-1` +lets the OS schedule the thread. Only applies when `qwp.udp.own.thread` is +`true`. + +### qwp.udp.receive.buffer.size + +- **Default**: `-1` (OS default) +- **Reloadable**: no + +OS-level socket receive buffer size in bytes. A value of `-1` uses the +operating system's default. Increase this if you observe datagram drops +under high throughput. + +### qwp.udp.unicast + +- **Default**: `true` +- **Reloadable**: no + +When `true`, the UDP receiver operates in unicast mode. When `false`, it +joins the multicast group specified by `qwp.udp.join`. From 9f90b06e2d5e090cada31a95e72f896c62e54a4d Mon Sep 17 00:00:00 2001 From: javier Date: Thu, 14 May 2026 17:40:33 +0200 Subject: [PATCH 15/44] qwp server config --- documentation/configuration/overview.md | 1 + documentation/sidebars.js | 1 + 2 files changed, 2 insertions(+) diff --git a/documentation/configuration/overview.md b/documentation/configuration/overview.md index 5d2aa018a..8e3a7036b 100644 --- a/documentation/configuration/overview.md +++ b/documentation/configuration/overview.md @@ -537,6 +537,7 @@ http.net.connection.sndbuf=2m | [OpenID Connect (OIDC)](/docs/configuration/oidc/) | OIDC integration | ✓ | | [Parallel SQL execution](/docs/configuration/parallel-sql-execution/) | Query parallelism settings | | | [Postgres wire protocol](/docs/configuration/postgres-wire-protocol/) | PostgreSQL wire protocol connections | | +| [QuestDB Wire Protocol (QWP)](/docs/configuration/qwp/) | QWP protocol limits and UDP receiver | | | [Replication](/docs/configuration/database-replication/) | High availability cluster replication | ✓ | | [Shared workers](/docs/configuration/shared-workers/) | Worker thread pools | | | [Storage policy](/docs/configuration/storage-policy/) | Partition lifecycle management | ✓ | diff --git a/documentation/sidebars.js b/documentation/sidebars.js index cc0be33b7..5de7f4eee 100644 --- a/documentation/sidebars.js +++ b/documentation/sidebars.js @@ -622,6 +622,7 @@ module.exports = { "configuration/oidc", "configuration/parallel-sql-execution", "configuration/postgres-wire-protocol", + "configuration/qwp", "configuration/database-replication", "configuration/shared-workers", "configuration/storage-policy", From 5aba7888cf423e5056bed4a0484dacb6bbbadea1 Mon Sep 17 00:00:00 2001 From: javier Date: Thu, 14 May 2026 17:41:30 +0200 Subject: [PATCH 16/44] draft of java qwp client --- documentation/ingestion/clients/java.md | 980 ++++++++++++------ documentation/protocols/overview.md | 2 +- .../protocols/qwp-ingress-websocket.md | 2 +- 3 files changed, 658 insertions(+), 326 deletions(-) diff --git a/documentation/ingestion/clients/java.md b/documentation/ingestion/clients/java.md index bd33241e5..44a728978 100644 --- a/documentation/ingestion/clients/java.md +++ b/documentation/ingestion/clients/java.md @@ -1,6 +1,7 @@ --- -title: Java Client Documentation -description: "Reference for the questdb-client Maven artifact — the Java ILP ingestion client for QuestDB, covering setup, configuration, authentication, and error handling." +title: Java client for QuestDB +sidebar_label: Java +description: "QuestDB Java client for high-throughput data ingestion and streaming SQL queries over the QWP binary protocol." --- import Tabs from "@theme/Tabs" @@ -9,43 +10,39 @@ import TabItem from "@theme/TabItem" import CodeBlock from "@theme/CodeBlock" -import { RemoteRepoExample } from "@theme/RemoteRepoExample" - :::note -This is the reference for the QuestDB Java Client when QuestDB is used as a -server. - -For embedded QuestDB, please check our -[Java Embedded Guide](/docs/ingestion/java-embedded/). +This is the reference for the QuestDB Java client when QuestDB is used as a +server. For embedded QuestDB, see the +[Java embedded guide](/docs/ingestion/java-embedded/). ::: -The QuestDB Java client is distributed as a separate Maven artifact -(`org.questdb:questdb-client`). +The QuestDB Java client connects to QuestDB over the +[QWP binary protocol](/docs/protocols/qwp-ingress-websocket/) (WebSocket). It +supports high-throughput data ingestion and streaming SQL queries on the same +transport. -The client provides the following benefits: +Key capabilities: -- **Automatic table creation**: No need to define your schema upfront. -- **Concurrent schema changes**: Seamlessly handle multiple data streams with - on-the-fly schema modifications -- **Optimized batching**: Use strong defaults or curate the size of your batches -- **Health checks and feedback**: Ensure your system's integrity with built-in - health monitoring -- **Automatic write retries**: Reuse connections and retry after interruptions +- **Ingestion**: column-oriented batched writes with automatic table creation, + schema evolution, and optional store-and-forward durability. +- **Querying**: streaming SQL result sets, DDL/DML execution, bind parameters, + and byte-credit flow control. +- **Failover**: multi-endpoint connections with automatic reconnect across + rolling upgrades and primary migrations. -:::info +:::tip Legacy transports -This page focuses on our high-performance ingestion client, which is optimized -for **writing** data to QuestDB. For retrieving data, we recommend using a -[PostgreSQL-compatible Java library](/docs/query/pgwire/java/) or our -[HTTP query endpoint](/docs/query/overview/#rest-http-api). +The client also supports ILP ingestion over HTTP and TCP for backward +compatibility. This page documents the recommended WebSocket (QWP) path. For +ILP transport details, see the [ILP overview](/docs/ingestion/ilp/overview/). ::: ## Quick start -Add the QuestDB Java client as a dependency in your project's build configuration file. +Add the dependency: -The code below creates a client instance configured to use HTTP transport to -connect to a QuestDB server running on localhost, port 9000. It then sends two -rows, each containing one symbol and two floating-point values. The client asks -the server to assign a timestamp to each row based on the server's wall-clock -time. +### Ingest data - +```java +try (Sender sender = Sender.fromConfig("ws::addr=localhost:9000;")) { + sender.table("trades") + .symbol("symbol", "ETH-USD") + .symbol("side", "sell") + .doubleColumn("price", 2615.54) + .doubleColumn("amount", 0.00044) + .atNow(); + sender.table("trades") + .symbol("symbol", "BTC-USD") + .symbol("side", "sell") + .doubleColumn("price", 39269.98) + .doubleColumn("amount", 0.001) + .atNow(); + sender.flush(); +} +``` + +### Query data -The client is configured using a configuration string. See -[Ways to create the client](#ways-to-create-the-client) for all configuration -methods, and [Configuration options](#configuration-options) for available -settings. +```java +try (QwpQueryClient client = QwpQueryClient.newPlainText("localhost", 9000)) { + client.connect(); + client.execute( + "SELECT ts, sym, price, qty FROM trades WHERE sym = 'ETH-USD' LIMIT 10", + new QwpColumnBatchHandler() { + @Override + public void onBatch(QwpColumnBatch batch) { + batch.forEachRow(row -> System.out.printf( + "ts=%d sym=%s price=%.4f qty=%d%n", + row.getLongValue(0), + row.getSymbol(1), + row.getDoubleValue(2), + row.getLongValue(3) + )); + } + + @Override + public void onEnd(long totalRows) { + System.out.println("done: " + totalRows + " rows"); + } + + @Override + public void onError(byte status, String message) { + System.err.println("query failed: " + message); + } + } + ); +} +``` -## Authenticate and encrypt +## Authentication and TLS -This sample configures the client to use HTTP transport with TLS enabled for a -connection to a QuestDB server. It also instructs the client to authenticate -using HTTP Basic Authentication. +Authentication happens at the HTTP level during the WebSocket upgrade, before +any binary frames are exchanged. The same mechanisms work for both `Sender` +(ingestion) and `QwpQueryClient` (querying). -When using QuestDB Enterprise, you can authenticate using a REST bearer token as -well. Please check the [RBAC docs](/docs/security/rbac/#authentication) for -more info. +### HTTP basic auth - +```java +// Ingestion +try (Sender sender = Sender.fromConfig( + "wss::addr=db.example.com:9000;username=admin;password=quest;")) { + // ... +} -## Ways to create the client +// Querying +try (QwpQueryClient client = QwpQueryClient.fromConfig( + "wss::addr=db.example.com:9000;username=admin;password=quest;")) { + client.connect(); + // ... +} +``` -There are three ways to create a client instance: +### Token auth (Enterprise) -1. **From a configuration string.** This is the most common way to create a - client instance. It describes the entire client configuration in a single - string, and allows sharing the same configuration across clients in different - languages. The general format is: +```java +try (Sender sender = Sender.fromConfig( + "wss::addr=db.example.com:9000;token=your_bearer_token;")) { + // ... +} +``` - ```text - ::=;=;...; - ``` +### TLS with custom trust store - [Transport protocol](/docs/ingestion/ilp/overview/#transport-selection) - can be one of these: +```java +try (Sender sender = Sender.fromConfig( + "wss::addr=db.example.com:9000;tls_roots=/path/to/truststore.jks;tls_roots_password=changeit;")) { + // ... +} +``` - - `http` — ILP/HTTP - - `https` — ILP/HTTP with TLS encryption - - `tcp` — ILP/TCP - - `tcps` — ILP/TCP with TLS encryption +For OIDC authentication (Enterprise), see +[OpenID Connect](/docs/security/oidc/). - The key `addr` sets the hostname and port of the QuestDB server. Port - defaults to 9000 for HTTP(S) and 9009 for TCP(S). The minimum configuration - includes the transport and the address. +## Creating the client - ```java - try (Sender sender = Sender.fromConfig("http::addr=localhost:9000;auto_flush_rows=5000;retry_timeout=10000;")) { - // ... - } - ``` +### From a connect string - For all available options, see - [Configuration options](#configuration-options). +The connect string format is `::=;=;...;` -2. **From an environment variable.** The `QDB_CLIENT_CONF` environment variable - is used to set the configuration string. Moving configuration parameters to - an environment variable allows you to avoid hard-coding sensitive information - such as tokens and passwords in your code. +For ingestion, use `ws` (plain) or `wss` (TLS): - ```bash - export QDB_CLIENT_CONF="http::addr=localhost:9000;auto_flush_rows=5000;retry_timeout=10000;" - ``` +```java +try (Sender sender = Sender.fromConfig("ws::addr=localhost:9000;")) { + // ... +} +``` - ```java - try (Sender sender = Sender.fromEnv()) { - // ... - } - ``` +For querying: -3. **Using the Java builder API.** This provides type-safe configuration. +```java +try (QwpQueryClient client = QwpQueryClient.fromConfig("ws::addr=localhost:9000;")) { + client.connect(); + // ... +} +``` - ```java - try (Sender sender = Sender.builder(Sender.Transport.HTTP) - .address("localhost:9000") - .autoFlushRows(5000) - .retryTimeoutMillis(10000) - .build()) { - // ... - } - ``` - -## Configuring multiple URLs +For the full list of connect-string keys, see the +[connect string reference](/docs/client-configuration/connect-string/). -:::note +### From an environment variable -This feature requires QuestDB OSS 9.1.0+ or Enterprise 3.0.4+. +Set `QDB_CLIENT_CONF` to avoid hard-coding credentials: -::: +```bash +export QDB_CLIENT_CONF="wss::addr=db.example.com:9000;username=admin;password=quest;" +``` -The ILP client can be configured with multiple _possible_ endpoints to send your data to. Only one endpoint is used at -a time. +```java +try (Sender sender = Sender.fromEnv()) { + // ... +} +``` -To configure this feature, simply provide multiple `addr` entries. For example: +### Using the builder API +The builder provides type-safe configuration: ```java -try (Sender sender = Sender.fromConfig("http::addr=localhost:9000;addr=localhost:9999;")) { - // ... +try (Sender sender = Sender.builder(Sender.Transport.WEBSOCKET) + .address("localhost:9000") + .autoFlushRows(500) + .autoFlushIntervalMillis(50) + .build()) { + // ... } ``` -On initialisation, if `protocol_version=auto`, the sender will identify the first instance that is writeable. Then it will _stick_ to this instance and write -any subsequent data to it. +For `QwpQueryClient`, use the factory methods or configure post-construction: -In the event that the instance becomes unavailable for writes, the client will retry the other possible endpoints, and when it finds -a new writeable instance, will _stick_ to it instead. This unavailability is characterised by failures to connect or locate the instance, -or the instance returning an error code due to it being read-only. +```java +try (QwpQueryClient client = QwpQueryClient.newPlainText("localhost", 9000)) { + client.withInitialCredit(256 * 1024); + client.connect(); + // ... +} +``` -By configuring multiple addresses, you can continue to capture data if your primary instance -fails, without having to reconfigure the clients. This backup instance can be hot or cold, and so long as it is assigned a known address, it will be written to as soon as it is started. +## Data ingestion + +### General usage pattern + +1. Create a `Sender` via `Sender.fromConfig()` or the builder. +2. Call `table(name)` to select a table. +3. Call column methods to add values: + - `symbol(name, value)` + - `stringColumn(name, value)` + - `boolColumn(name, value)` + - `byteColumn(name, byte)`, `shortColumn(name, short)`, `intColumn(name, int)` + - `longColumn(name, long)`, `floatColumn(name, float)`, `doubleColumn(name, double)` + - `charColumn(name, char)` + - `timestampColumn(name, Instant)` or `timestampColumn(name, long, ChronoUnit)` + - `uuidColumn(name, lo, hi)` (two longs) + - `long256Column(name, l0, l1, l2, l3)` (four longs, least significant first) + - `decimalColumn(name, Decimal256)` or `decimalColumn(name, CharSequence)` + - `doubleArray(name, ...)` and `longArray(name, ...)` (see [Ingest arrays](#ingest-arrays)) +5. Call `at(Instant)`, `at(long, ChronoUnit)`, or `atNow()` to finalize the row. +6. Repeat from step 2, or call `flush()` to send buffered data. +7. Call `close()` when done (or use try-with-resources). -Enterprise users can leverage this feature to transparently handle replication failover, without the need to introduce a load-balancer or -reconfigure clients. +```java +try (Sender sender = Sender.fromConfig("ws::addr=localhost:9000;")) { + sender.table("trades") + .symbol("symbol", "EURUSD") + .symbol("side", "buy") + .doubleColumn("price", 1.0842) + .longColumn("quantity", 100_000) + .at(Instant.now()); +} +``` + +Tables and columns are created automatically if they do not exist. + +### Ingest arrays -:::tip +For 1D and 2D arrays, pass a Java array directly: -You may wish to increase the value of `retry_timeout` if you expect your backup instance to take a large amount of time to become writeable. +```java +double[] prices = {1.0842, 1.0843, 1.0841}; +sender.table("book").doubleArray("levels", prices).atNow(); + +long[] counts = {100, 200, 300}; +sender.table("book").longArray("depths", counts).atNow(); +``` -For example, when performing a primary migration (Enterprise replication), with default settings, you might want to increase this -to `30s` or higher. +For higher-dimensional arrays, use the `DoubleArray` or `LongArray` class to +avoid GC overhead. Create the instance once and reuse it: +```java +try (Sender sender = Sender.fromConfig("ws::addr=localhost:9000;"); + DoubleArray ary = new DoubleArray(3, 3, 3)) { + for (int i = 0; i < ROW_COUNT; i++) { + for (int v = 0; v < 27; v++) { + ary.append(v); + } + sender.table("book") + .doubleArray("cube", ary) + .at(getTimestamp(), ChronoUnit.MICROS); + } +} +``` + +:::note +Arrays require QuestDB 9.0.0 or later. ::: +### Designated timestamp -## General usage pattern +The [designated timestamp](/docs/concepts/designated-timestamp/) column +controls time-based partitioning and ordering. There are two ways to set it: -1. Create a client instance via `Sender.fromConfig()`. -2. Use `table(CharSequence)` to select a table for inserting a new row. -3. Use `symbol(CharSequence, CharSequence)` to add all symbols. You must add - symbols before adding other column types. -4. Use the following options to add all the remaining columns: +**User-assigned** (recommended for deduplication and exactly-once delivery): - - `stringColumn(CharSequence, CharSequence)` - - `longColumn(CharSequence, long)` - - `doubleColumn(CharSequence, double)` - - `boolColumn(CharSequence, boolean)` - - `arrayColumn()` -- several variants, see below - - `timestampColumn(CharSequence, Instant)`, or - `timestampColumn(CharSequence, long, ChronoUnit)` - - `decimalColumn(CharSequence, Decimal256)` or - `decimalColumn(CharSequence, CharSequence)` (string literal) +```java +sender.table("trades") + .symbol("symbol", "EURUSD") + .doubleColumn("price", 1.0842) + .at(Instant.now()); + +// Or with explicit units for high-throughput paths: +sender.table("trades") + .symbol("symbol", "EURUSD") + .doubleColumn("price", 1.0842) + .at(System.currentTimeMillis() * 1000, ChronoUnit.MICROS); +``` -:::caution -Decimal values require QuestDB version 9.2.0 or later. +**Server-assigned** (server uses its wall-clock time): -Create decimal columns ahead of time with `DECIMAL(precision, scale)` so QuestDB can ingest the values -with the expected precision. See the -[decimal data type](/docs/query/datatypes/decimal/#creating-tables-with-decimals) page for a refresher on -precision and scale. +```java +sender.table("trades") + .symbol("symbol", "EURUSD") + .doubleColumn("price", 1.0842) + .atNow(); +``` + +:::note +QuestDB works best when data arrives in chronological order (sorted by +timestamp). ::: -5. Use `at(Instant)` or `at(long timestamp, ChronoUnit unit)` or `atNow()` to - set a designated timestamp. -6. Optionally: You can use `flush()` to send locally buffered data into a - server. -7. Repeat from step 2 to start a new row. -8. Use `close()` to dispose the Sender after you no longer need it. +### Decimal columns -## Ingest arrays +:::caution +Decimal values require QuestDB 9.2.0 or later. Create decimal columns ahead +of time with `DECIMAL(precision, scale)` so QuestDB ingests values with the +expected precision. See the +[decimal data type](/docs/query/datatypes/decimal/#creating-tables-with-decimals) +page for details. +::: + +### Flushing + +The client accumulates rows in an internal buffer and sends them in batches. + +**Auto-flush** (default): the client flushes when either threshold is reached: + +| Trigger | WebSocket default | HTTP default | +|------------|-------------------|--------------| +| Row count | 1,000 rows | 75,000 rows | +| Time | 100 ms | 1,000 ms | + +Customize via connect string: -To ingest a 1D or 2D array, simply construct a Java array of the appropriate -type (`double[]`, `double[][]`) and supply it to the `arrayColumn()` method. In -order to avoid GC overheads, create the array instance once, and then populate -it with the data of each row. +```text +ws::addr=localhost:9000;auto_flush_rows=500;auto_flush_interval=50; +``` -For arrays of higher dimensionality, use the `DoubleArray` class. Here's a basic -example for a 3D array: +**Explicit flush**: disable auto-flush and call `flush()` yourself: ```java -// or "tcp::addr=localhost:9009;protocol_version=2;" -try (Sender sender = Sender.fromConfig("http::addr=localhost:9000;"); - DoubleArray ary = new DoubleArray(3, 3, 3); -) { - for (int i = 0; i < ROW_COUNT; i++) { - for (int value = 0; value < 3 * 3 * 3; value++) { - ary.append(value); - } - sender.table("tango") - .doubleArray("array", ary) - .at(getTimestamp(), ChronoUnit.MICROS); +try (Sender sender = Sender.fromConfig("ws::addr=localhost:9000;auto_flush=off;")) { + for (Trade trade : trades) { + sender.table("trades") + .symbol("symbol", trade.symbol()) + .doubleColumn("price", trade.price()) + .longColumn("quantity", trade.quantity()) + .at(trade.timestamp()); } + sender.flush(); } ``` -The `ary.append(value)` method allows you to populate the array in the row-major -order, without having to compute every coordinate individually. You can also use -`ary.set(value, coords...)` to set a value at specific coordinates. +The client also flushes when closed. However, if the flush fails at close +time, the client does not retry. Always flush explicitly before closing. + +### Store-and-forward + +With store-and-forward enabled, unacknowledged data is persisted to disk and +replayed after reconnection, surviving sender process restarts. + +```text +ws::addr=localhost:9000;sf_dir=/var/lib/questdb/sf;sender_id=ingest-1; +``` + +Without `sf_dir`, unacknowledged data lives in process memory and is lost if +the sender process dies. The reconnect loop still spans transient server +outages (rolling upgrades), but the RAM buffer caps how much data can +accumulate. + +### Durable acknowledgement + +:::note Enterprise + +Durable acknowledgement requires QuestDB Enterprise with primary replication +configured. -:::note -Arrays are supported from QuestDB version 9.0.0, and require updated -client libraries. ::: -## Flush the buffer +By default, the server confirms a batch when it is committed to the local +[WAL](/docs/concepts/write-ahead-log/). To wait for the batch to be durably +uploaded to object storage: + +```text +ws::addr=localhost:9000;sf_dir=/var/lib/questdb/sf;request_durable_ack=on; +``` + +## Querying and SQL execution + +The `QwpQueryClient` sends SQL statements over the +[QWP egress](/docs/protocols/qwp-egress-websocket/) endpoint (`/read/v1`). +Results arrive as columnar batches via a callback handler. + +### Executing SELECT queries + +```java +try (QwpQueryClient client = QwpQueryClient.newPlainText("localhost", 9000)) { + client.connect(); + client.execute( + "SELECT ts, sym, price FROM trades WHERE sym = 'EURUSD' LIMIT 100", + new QwpColumnBatchHandler() { + @Override + public void onBatch(QwpColumnBatch batch) { + for (int row = 0; row < batch.getRowCount(); row++) { + long ts = batch.getLongValue(0, row); + String sym = batch.getSymbol(1, row); + double price = batch.getDoubleValue(2, row); + // process row... + } + } + + @Override + public void onEnd(long totalRows) { } + + @Override + public void onError(byte status, String message) { + System.err.printf("error: 0x%02X %s%n", status & 0xFF, message); + } + } + ); +} +``` + +The `QwpColumnBatch` object is valid only during the `onBatch` callback. Copy +values out if you need them after the callback returns. + +**Convenience accessors**: `batch.forEachRow(row -> ...)` provides a +`RowView` with single-argument accessors (`row.getLongValue(col)`, +`row.getSymbol(col)`, etc.) for compact read paths. + +**Null checking**: call `batch.isNull(col, row)` before reading a value. + +### Reading result batches -The client accumulates the data into an internal buffer and doesn't immediately -send it to the server. It can flush the buffer to the server either -automatically or on explicit request. +`QwpColumnBatch` provides typed accessors for all QuestDB column types: -### Flush explicitly +| Accessor | Column types | +|----------|-------------| +| `getLongValue(col, row)` | LONG, TIMESTAMP, TIMESTAMP_NANOS, DATE | +| `getIntValue(col, row)` | INT | +| `getDoubleValue(col, row)` | DOUBLE | +| `getFloatValue(col, row)` | FLOAT | +| `getBoolValue(col, row)` | BOOLEAN | +| `getByteValue(col, row)` | BYTE | +| `getShortValue(col, row)` | SHORT | +| `getCharValue(col, row)` | CHAR | +| `getSymbol(col, row)` | SYMBOL (returns cached `String`) | +| `getStrA(col, row)` / `getStrB(col, row)` | VARCHAR (reusable `CharSequence` views) | +| `getBinaryA(col, row)` / `getBinaryB(col, row)` | BINARY (reusable views) | +| `getString(col, row, CharSink)` | VARCHAR (copy into sink) | +| `getUuid(col, row, Uuid)` | UUID | +| `getLong256(col, row, Long256Sink)` | LONG256 | -You can configure the client to not use automatic flushing, and issue explicit -flush requests by calling `sender.flush()`: +Column metadata is available via `batch.getColumnInfo(col)` (name, type) and +`batch.getColumnCount()`. + +### DDL and DML statements + +Non-SELECT statements (CREATE TABLE, INSERT, UPDATE, ALTER, DROP, TRUNCATE) +are executed through the same `execute()` method. The server responds with +`EXEC_DONE` instead of result batches: ```java -try (Sender sender = Sender.fromConfig("http::addr=localhost:9000;auto_flush=off")) { - sender.table("trades") - .symbol("symbol", "ETH-USD") - .symbol("side", "sell") - .doubleColumn("price", 2615.54) - .doubleColumn("amount", 0.00044) - .atNow(); - sender.table("trades") - .symbol("symbol", "BTC-USD") - .symbol("side", "sell") - .doubleColumn("price", 39269.98) - .doubleColumn("amount", 0.001) - .atNow(); - sender.flush(); +client.execute( + "CREATE TABLE trades (" + + "ts TIMESTAMP, sym SYMBOL, price DOUBLE, qty LONG" + + ") TIMESTAMP(ts) PARTITION BY DAY WAL", + new QwpColumnBatchHandler() { + @Override + public void onBatch(QwpColumnBatch batch) { } + + @Override + public void onEnd(long totalRows) { } + + @Override + public void onError(byte status, String message) { + System.err.println("failed: " + message); + } + + @Override + public void onExecDone(short opType, long rowsAffected) { + System.out.printf("done: opType=%d rows=%d%n", opType, rowsAffected); + } + } +); +``` + +`rowsAffected` reports the count for INSERT/UPDATE/DELETE. Pure DDL (CREATE, +DROP, ALTER, TRUNCATE) reports 0. + +### Bind parameters + +Parameterized queries use typed bind values, avoiding SQL injection and +enabling server-side factory cache reuse across repeated calls: + +```java +String sql = "SELECT ts, sym, price, qty FROM trades " + + "WHERE sym = $1 AND price >= $2 LIMIT 1000"; + +for (String symbol : List.of("EURUSD", "GBPUSD", "USDJPY")) { + client.execute( + sql, + binds -> binds + .setVarchar(0, symbol) + .setDouble(1, 1.0), + handler + ); } ``` -:::note +Bind indices are 0-based (`$1` maps to index 0). Available setters include +`setBoolean`, `setByte`, `setShort`, `setInt`, `setLong`, `setFloat`, +`setDouble`, `setString`, `setVarchar`, `setTimestampMicros`, `setDate`, +`setUuid`, `setDecimal64/128/256`, `setSymbol`, `setNull`, and more. -Calling `sender.flush()` will flush the buffer even with auto-flushing enabled, -but this isn't a typical way to use the client. +To pass a NULL bind value: -::: +```java +binds -> binds.setNull(0) +``` + +:::note Server leniency -### Flush automatically +The current server accepts a SYMBOL wire type for bind parameters and treats +it as VARCHAR. Compliant clients should send VARCHAR. A future revision may +reject SYMBOL bind type codes. -By default, the client automatically flushes the buffer according to a simple -policy. With HTTP, it will automatically flush at the time you append a new -row, if either of these has become true: +::: -- reached 75,000 rows -- hasn't been flushed for 1 second +### Flow control -Both parameters can be customized in order to achieve a good tradeoff between -throughput (large batches) and latency (small batches). +For large result sets, byte-credit flow control prevents the server from +overwhelming the client: -This configuration string will cause the client to auto-flush every 10 rows or -every 10 seconds, whichever comes first: +```java +try (QwpQueryClient client = QwpQueryClient.newPlainText("localhost", 9000) + .withInitialCredit(256 * 1024)) { + client.connect(); + // Server pauses after streaming ~256 KiB, auto-replenishes after each batch +} +``` -`http::addr=localhost:9000;auto_flush_rows=10;auto_flush_interval=10000;` +A credit of `0` (the default) means unbounded: the server streams as fast as +the network allows. -With TCP, the client flushes its internal buffer whenever it gets full. +### Compression -The client will also flush automatically when it is being closed and there's -still some data in the buffer. However, **if the network operation fails at this -time, the client won't retry it.** Always explicitly flush the buffer before -closing the client. +Negotiate zstd compression to reduce network bandwidth for large result sets: + +```java +try (QwpQueryClient client = QwpQueryClient.fromConfig( + "ws::addr=localhost:9000;compression=zstd;compression_level=3;")) { + client.connect(); + // Batches are automatically decompressed +} +``` ## Error handling -HTTP automatically retries failed, recoverable requests: network errors, some -server errors, and timeouts. Non-recoverable errors include invalid data, -authentication errors, and other client-side errors. +### Ingestion errors -:::note +WebSocket ingestion uses an asynchronous error model. Batch rejections are +delivered via the `SenderErrorHandler` callback, not thrown from `flush()`: -If you have configured multiple addresses, retries will be run against different instances. +```java +try (Sender sender = Sender.builder(Sender.Transport.WEBSOCKET) + .address("localhost:9000") + .errorHandler(error -> { + System.err.printf("batch rejected: category=%s table=%s msg=%s%n", + error.getCategory(), error.getTableName(), error.getServerMessage()); + }) + .build()) { + // ... +} +``` -::: +Each `SenderError` carries: -Retrying is especially useful during transient network issues or when the server -goes offline for a short period. Configure the retrying behavior through the -`retry_timeout` configuration option or via the builder API with -`retryTimeoutMillis(long timeoutMillis)`. The client continues to retry after -recoverable errors until it either succeeds or the specified timeout expires. If -it hits the timeout without success, the client throws a `LineSenderException`. +- **Category**: `SCHEMA_MISMATCH`, `PARSE_ERROR`, `INTERNAL_ERROR`, + `SECURITY_ERROR`, `WRITE_ERROR`, `PROTOCOL_VIOLATION`, or `UNKNOWN`. +- **Policy**: `DROP_AND_CONTINUE` (batch dropped, sender continues) or `HALT` + (sender halted, next API call throws `LineSenderServerException`). +- **Server message**: human-readable error text. +- **Table name**: the rejected table (null for multi-table batches). -The client won't retry requests while it's being closed and attempting to flush -the data left over in the buffer. +The error handler runs on a dedicated dispatcher thread, never on the I/O or +producer thread. -The TCP transport has no mechanism to notify the client it encountered an -error; instead it just disconnects. When the client detects this, it throws a -`LineSenderException` and becomes unusable. +**Recovery after errors**: call `reset()` to clear buffers and continue with +fresh data. On WebSocket, `reset()` does not recover from terminal failures +(auth failure, reconnect budget exhaustion). In those cases, close the sender +and create a new one. -## Recover after a client-side error +### Query errors -With HTTP transport, the client always prepares a full row in RAM before trying -to send it. It also remains usable after an exception has occurred. This allows -you to cancel sending a row, for example due to a validation error, and go on -with the next row. +Query errors arrive via the `onError` callback: -With TCP transport, you don't have this option. If you get an exception, you -can't continue with the same client instance, and don't have insight into which -rows were accepted by the server. +```java +@Override +public void onError(byte status, String message) { + System.err.printf("query failed: 0x%02X %s%n", status & 0xFF, message); +} +``` -:::caution +Status codes: -Error handling behaviour changed with the release of QuestDB 9.1.0. +| Code | Name | Description | +|--------|-----------------|---------------------------------------------------| +| `0x03` | SCHEMA_MISMATCH | Bind parameter type incompatible with placeholder | +| `0x05` | PARSE_ERROR | SQL syntax error or malformed message | +| `0x06` | INTERNAL_ERROR | Server-side execution failure | +| `0x08` | SECURITY_ERROR | Authorization failure | +| `0x0A` | CANCELLED | Query terminated by CANCEL | +| `0x0B` | LIMIT_EXCEEDED | Protocol limit hit | -Previously, failing all retries would cause an exception and release the buffered data. +Errors can arrive before any data (parse failure) or mid-stream (storage +failure, server shutdown). When `onError` is called, no further frames arrive +for that query. -Now the buffer will not be released. If you wish to re-use the same sender with fresh data, you must call the -new `reset()` function. +### Connection-level errors -::: +- **Authentication failure**: `401`/`403` HTTP response before the WebSocket + upgrade completes. Terminal across all endpoints. +- **Malformed frames**: `QwpDecodeException` or WebSocket close with a + terminal code. +- **Role mismatch**: `QwpRoleMismatchException` when all endpoints report + roles that do not match the `target=` filter. -## Designated timestamp considerations - -The concept of [designated timestamp](/docs/concepts/designated-timestamp/) is -important when ingesting data into QuestDB. - -There are two ways to assign a designated timestamp to a row: - -1. User-assigned timestamp: the client assigns a specific timestamp to the row. - - ```java - java.time.Instant timestamp = Instant.now(); // or any other timestamp - sender.table("trades") - .symbol("symbol", "ETH-USD") - .symbol("side", "sell") - .doubleColumn("price", 2615.54) - .doubleColumn("amount", 0.00044) - .at(timestamp); - ``` - - The `Instant` class is part of the `java.time` package and is used to - represent a specific moment in time. The `sender.at()` method can accept a - long timestamp representing the elapsed time since the beginning of the - [Unix epoch](https://en.wikipedia.org/wiki/Unix_time), as well as a - `ChronoUnit` to specify the time unit. This approach is useful in - high-throughput scenarios where instantiating an `Instant` object for each - row is not feasible due to performance considerations. - -2. Server-assigned timestamp: the server automatically assigns a timestamp to - the row based on the server's wall-clock time at the time of ingesting the - row. Example: - - ```java - sender.table("trades") - .symbol("symbol", "ETH-USD") - .symbol("side", "sell") - .doubleColumn("price", 2615.54) - .doubleColumn("amount", 0.00044) - .atNow(); - ``` - -We recommend using the event's original timestamp when ingesting data into -QuestDB. Using ingestion-time timestamps precludes the ability to deduplicate -rows, which is -[important for exactly-once processing](/docs/ingestion/ilp/overview/#exactly-once-delivery-vs-at-least-once-delivery). +## Failover and high availability -:::note +:::note Enterprise -QuestDB works best when you send data in chronological order (sorted by -timestamp). +Multi-host failover with automatic reconnect requires QuestDB Enterprise. ::: -## Protocol Version +### Multiple endpoints -To enhance data ingestion performance, QuestDB _version 9.0.0_ introduced an -upgraded version "2" to the text-based InfluxDB Line Protocol which encodes -arrays and f64 values in binary form. Arrays are supported only in this upgraded -protocol version. +Specify comma-separated addresses in the connect string: -You can select the protocol version with the `protocol_version` setting in the -configuration string. +```text +ws::addr=db-primary:9000,db-replica-1:9000,db-replica-2:9000; +``` -HTTP transport automatically negotiates the protocol version by default. -In order to avoid the slight latency cost at connection time, you can explicitly -configure the protocol version by setting `protocol_version=2|1;`. +The client tries endpoints in order. On connection loss, it walks the list +to find the next healthy endpoint. -TCP transport does not negotiate the protocol version and uses version 1 by -default. You must explicitly set `protocol_version=2;` in order to ingest -arrays, as in this example: +### Ingestion failover -```text -tcp::addr=localhost:9009;protocol_version=2; +The ingestion sender uses a reconnect loop with exponential backoff. Key +connect-string options: + +| Key | Default | Description | +|----------------------------------|-----------|-------------------------------------------| +| `reconnect_max_duration_millis` | `300000` | Total outage budget before giving up. | +| `reconnect_initial_backoff_millis` | `100` | First post-failure sleep. | +| `reconnect_max_backoff_millis` | `5000` | Cap on per-attempt sleep. | +| `initial_connect_retry` | `off` | Retry on first connect (`on`, `sync`, `async`). | + +Ingress is zone-blind: it pins QWP v1 and does not read `SERVER_INFO`. The +`zone=` key is accepted but ignored, so a connect string shared with egress +clients works unchanged. + +With store-and-forward (`sf_dir` set), unacknowledged data survives sender +restarts. Without it, unacknowledged data lives in process memory and is lost +if the sender process dies. + +### Query failover + +The query client drives a per-query reconnect loop. When a transport error +occurs mid-stream, the client reconnects and replays the query. `batch_seq` +restarts at 0 on the new connection. + +Key connect-string options: + +| Key | Default | Description | +|-------------------------------|---------|-------------------------------------------| +| `failover` | `on` | Master switch for per-query reconnect. | +| `failover_max_attempts` | `8` | Max reconnect attempts per query. | +| `failover_backoff_initial_ms` | `50` | First post-failure sleep. | +| `failover_backoff_max_ms` | `1000` | Cap on per-attempt sleep. | +| `failover_max_duration_ms` | `30000` | Total wall-clock budget per query. | + +**Handling partial results**: when failover occurs mid-stream, the +`onFailoverReset` callback fires before replayed batches arrive. Use it to +clear any accumulated state: + +```java +@Override +public void onFailoverReset(QwpServerInfo newNode) { + // Clear partial results; the server will re-send from the beginning + results.clear(); +} +``` + +If you do not clear state, you will see overlapping data (the server replays +the full result set). + +### Connection events + +For ingestion, register a `SenderConnectionListener` to observe connection +state transitions: + +```java +Sender sender = Sender.builder(Sender.Transport.WEBSOCKET) + .address("db-primary:9000") + .address("db-replica:9000") + .connectionListener(event -> { + System.out.printf("%s host=%s:%d%n", + event.getKind(), event.getHost(), event.getPort()); + }) + .build(); ``` -## Configuration options +Event kinds: `CONNECTED`, `DISCONNECTED`, `RECONNECTED`, `FAILED_OVER`, +`ENDPOINT_ATTEMPT_FAILED`, `ALL_ENDPOINTS_UNREACHABLE`, `AUTH_FAILED` +(terminal), `RECONNECT_BUDGET_EXHAUSTED` (terminal). + +### Error classification + +- **Authentication errors** (`401`/`403`): terminal at any host. The + reconnect loop stops immediately. +- **Role reject** (`421 + X-QuestDB-Role`): transient if the role is + `PRIMARY_CATCHUP`, topology-level otherwise. +- **Version mismatch** at upgrade: per-endpoint, not terminal. The client + tries the next endpoint. +- **All other errors** (TCP/TLS failures, `404`, `503`, mid-stream errors): + transient, fed into the reconnect loop. + +For the full list of connect-string keys, see the +[reconnect and failover](/docs/client-configuration/connect-string#reconnect-keys) +and +[multi-host failover](/docs/client-configuration/connect-string#failover-keys) +sections of the connect string reference. + +## Parallel queries -Client can be configured either by using a configuration string as shown in the -examples above, or by using the builder API. +:::note Phase 1 limitation -The builder API is available via the `Sender.builder(Transport transport)` -method. +The current implementation supports a single in-flight query per connection. +The wire protocol allows multiple concurrent queries (demultiplexed by +request ID); multi-query support is planned for a future release. -For a breakdown of available options, see the -[Configuration string](/docs/ingestion/clients/configuration-string/) page. +::: + +To run queries in parallel, create separate `QwpQueryClient` instances. Each +instance manages its own WebSocket connection. + +Neither `Sender` nor `QwpQueryClient` is thread-safe. For multi-threaded +workloads, use one instance per thread or use an object pool. + +## Configuration reference + +For the full list of connect-string keys and their defaults, see the +[connect string reference](/docs/client-configuration/connect-string/). + +Common WebSocket-specific options: + +| Key | Default | Description | +|-----|---------|-------------| +| `auto_flush_rows` | `1000` | Rows before auto-flush. | +| `auto_flush_interval` | `100` | Milliseconds before auto-flush. | +| `auto_flush_bytes` | disabled | Bytes before auto-flush. | +| `sf_dir` | unset | Store-and-forward directory. | +| `sender_id` | `default` | Sender slot identity for SF. | +| `request_durable_ack` | `off` | Request durable upload ACK (Enterprise). | +| `reconnect_max_duration_millis` | `300000` | Ingress reconnect budget. | +| `failover` | `on` | Egress per-query reconnect switch. | +| `compression` | `raw` | Egress batch compression (`raw`, `zstd`). | ## Compatible JDKs The client relies on some JDK internal libraries, which certain specialised JDK offerings may not support. -Here is a list of known incompatible JDKs: - -- Azul Zing 17 - - A fix is in progress. You can use Azul Zulu 17 in the meantime. - -## Other considerations - -- Refer to the [ILP overview](/docs/ingestion/ilp/overview) for details - about transactions, error control, delivery guarantees, health check, or table - and column auto-creation. -- The method `flush()` can be called to force sending the internal buffer to a - server, even when the buffer is not full yet. -- The Sender is not thread-safe. For multiple threads to send data to QuestDB, - each thread should have its own Sender instance. An object pool can also be - used to re-use Sender instances. -- The Sender instance has to be closed after it is no longer in use. The Sender - implements the `java.lang.AutoCloseable` interface, and therefore the - [try-with-resource](https://docs.oracle.com/javase/tutorial/essential/exceptions/tryResourceClose.html) - pattern can be used to ensure that the Sender is closed. +Known incompatible JDKs: + +- Azul Zing 17 (use Azul Zulu 17 instead) + +## Migration from ILP (HTTP/TCP) + +If you are migrating from the ILP-based client, the row-building API is +unchanged. The main differences: + +| Aspect | HTTP (ILP) | WebSocket (QWP) | +|--------|-----------|-----------------| +| Connect string schema | `http::` / `https::` | `ws::` / `wss::` | +| Auto-flush rows | 75,000 | 1,000 | +| Auto-flush interval | 1,000 ms | 100 ms | +| Error model | Synchronous (`flush()` throws) | Async (`SenderErrorHandler` callback) | +| Buffer capacity | Configurable | Not configurable (internal cursor) | +| Store-and-forward | Not available | Available (`sf_dir`) | +| Multi-endpoint failover | Limited | Full reconnect loop with backoff | +| Querying | Not available | `QwpQueryClient` | + +To migrate, change your connect string from `http::` to `ws::` (or `https::` +to `wss::`), register a `SenderErrorHandler` for async error handling, and +adjust auto-flush settings if needed. diff --git a/documentation/protocols/overview.md b/documentation/protocols/overview.md index 8a3edbe6c..d686815c3 100644 --- a/documentation/protocols/overview.md +++ b/documentation/protocols/overview.md @@ -21,7 +21,7 @@ from scratch. End users should see the ::: -## QWP — QuestWire Protocol +## QWP — QuestDB Wire Protocol QWP is QuestDB's native wire protocol for both ingest and query traffic. The specifications below are normative — if a client's behaviour conflicts with diff --git a/documentation/protocols/qwp-ingress-websocket.md b/documentation/protocols/qwp-ingress-websocket.md index 81fdeb7f1..124c9be6a 100644 --- a/documentation/protocols/qwp-ingress-websocket.md +++ b/documentation/protocols/qwp-ingress-websocket.md @@ -16,7 +16,7 @@ new QuestDB ingest client from scratch. End users should see the ::: -QuestWire Protocol (QWP) is QuestDB's columnar binary protocol for +QuestDB Wire Protocol (QWP) is QuestDB's columnar binary protocol for high-throughput data ingestion over WebSocket. Each message carries one or more table blocks, where every column's values are stored contiguously. Batched messages, schema references, and Gorilla-compressed timestamps reduce wire From e31233f753aabf0d86a49f40eafef7d947499d0d Mon Sep 17 00:00:00 2001 From: javier Date: Thu, 14 May 2026 17:49:55 +0200 Subject: [PATCH 17/44] documenting data type limitations --- documentation/ingestion/clients/java.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/documentation/ingestion/clients/java.md b/documentation/ingestion/clients/java.md index 44a728978..68bbcf478 100644 --- a/documentation/ingestion/clients/java.md +++ b/documentation/ingestion/clients/java.md @@ -247,6 +247,12 @@ try (QwpQueryClient client = QwpQueryClient.newPlainText("localhost", 9000)) { - `long256Column(name, l0, l1, l2, l3)` (four longs, least significant first) - `decimalColumn(name, Decimal256)` or `decimalColumn(name, CharSequence)` - `doubleArray(name, ...)` and `longArray(name, ...)` (see [Ingest arrays](#ingest-arrays)) + + The QWP wire format supports additional types (GEOHASH, IPv4, BINARY, DATE) + that the Java client does not yet expose for ingestion. To write these types, + create the columns via DDL and use a + [third-party client or the wire protocol directly](/docs/protocols/qwp-ingress-websocket/#column-types). + 5. Call `at(Instant)`, `at(long, ChronoUnit)`, or `atNow()` to finalize the row. 6. Repeat from step 2, or call `flush()` to send buffered data. 7. Call `close()` when done (or use try-with-resources). From 5938166a52cad13e3b62398806e4c3ca61c5803d Mon Sep 17 00:00:00 2001 From: Vlad Ilyushchenko Date: Thu, 14 May 2026 17:10:15 +0100 Subject: [PATCH 18/44] agents: add Connect > Agents page; reorder Client Libraries MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit New page (documentation/connect/agents.md) covering how AI agents operate QuestDB across three angles: protocols, tooling, and operational practices. Positioning: - QWP egress is the recommended path for SQL execution (DDL + streaming SELECT), with native client libraries when available and the protocol spec for clean-room implementations. - QWP ingress is the recommended path for all writes (bulk and sustained), including local-file uploads — the recipe explicitly calls out the failure mode where agents reach for read_parquet/read_csv/COPY, which require server-side filesystem access. - REST is positioned for schema discovery and small ad-hoc queries that fit in a single HTTP response. - PGWire and /imp are intentionally not recommended (superseded by QWP). - No MCP framing: an MCP server would just wrap REST + QWP without adding capability, so the page tells agents to use the underlying protocols directly. Includes a Recipes section seeded with the local-file upload recipe; links to the existing Getting Started > AI Coding Agents page for the tooling quickstart and the QuestDB / TSBS Claude skills. Sidebar: - Add Connect > Agents (between Client Libraries and Message Brokers). - Move Date to Timestamp inside Client Libraries (cross-cutting reference for all language clients). - Move Connect string inside Client Libraries as the first item (config schema shared by every QWP client). Co-Authored-By: Claude Opus 4.7 (1M context) --- documentation/connect/agents.md | 201 ++++++++++++++++++++++++++++++++ documentation/sidebars.js | 25 ++-- 2 files changed, 216 insertions(+), 10 deletions(-) create mode 100644 documentation/connect/agents.md diff --git a/documentation/connect/agents.md b/documentation/connect/agents.md new file mode 100644 index 000000000..20835e91d --- /dev/null +++ b/documentation/connect/agents.md @@ -0,0 +1,201 @@ +--- +title: Agents +description: + How AI agents operate QuestDB — which protocols they use, what tooling + exists, and how to give them safe access. +--- + +AI agents — Claude Code, Cursor, OpenAI Codex, autonomous research tools — +are first-class clients of QuestDB. They drive the database the same way a +developer would: discover the schema, write SQL, plot results, ingest new +data. What changes is the loop: an agent runs that cycle continuously, +often without a human in the inner loop. + +This page covers the three things to know: + +1. [Protocols](#protocols) — which endpoints agents use, and when. +2. [Tooling](#tooling) — concrete agents and skills that work with QuestDB. +3. [Practices](#practices) — how to give an agent safe, scoped access. + +For a hands-on walkthrough with named agents, see +[AI Coding Agents](/docs/getting-started/ai-coding-agents/) in Getting +Started. + +## Protocols + +Agents reach QuestDB through the same interfaces as any other client. The +right choice depends on what the agent is doing and which SDK or framework +it ships with. + +| Interface | Best for | Why | +|-----------------------------------------------------------|---------------------------------------------------------------------------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| [**QWP egress**](/docs/protocols/qwp-egress-websocket/) | The primary path for executing SQL — DDL, exploratory SELECT, and large result streaming. | Binary, columnar, byte-credit flow control, multi-host failover. Use a native [client library](/docs/ingestion/overview/) when one exists for the agent's runtime; otherwise an agent can implement one directly against the protocol spec. | +| [**QWP ingress**](/docs/protocols/qwp-ingress-websocket/) | The primary path for ingesting data — agentic ETL, sensor feeds, bulk loads. | Native binary protocol with multi-host failover and store-and-forward built into the client. | +| [**REST API**](/docs/query/rest-api/) | Schema discovery and small ad-hoc queries (a few hundred rows or fewer). | HTTP + JSON. Every agent framework supports it; no SDK to install. `SHOW TABLES` / `SHOW COLUMNS` and other lookups map naturally to function-calling tools. | + +**QWP egress is the recommended path for any sustained SQL work** — +exploratory or production. Reach for REST when the agent is doing schema +discovery or pulling small result sets that fit comfortably in a single +HTTP response. + +## Tooling + +### General-purpose coding agents + +Claude Code, OpenAI Codex, Cursor, Aider, and similar code-execution agents +work with QuestDB out of the box. They read the public QuestDB documentation +and generate code that talks to a QWP client library or the REST API. No +setup, no MCP server required — point them at a QuestDB endpoint and ask. + +See [AI Coding Agents](/docs/getting-started/ai-coding-agents/) for the +quickstart, including the public demo at `https://demo.questdb.io/`. + +### QuestDB agent skills (Claude) + +The +[QuestDB agent skill](/docs/getting-started/ai-coding-agents/#questdb-agent-skill) +embeds QuestDB-specific context (SQL idioms, ingestion patterns, Grafana +dashboards) directly into the agent. Claude Code loads it on demand, so the +agent produces correct `SAMPLE BY`, `LATEST ON`, and time-series queries on +the first try instead of approximating PostgreSQL syntax. + +The +[TSBS Benchmark skill](/docs/getting-started/ai-coding-agents/#tsbs-benchmark-skill) +goes further: it automates end-to-end ingestion benchmarking, useful when an +agent is evaluating QuestDB against alternative time-series databases. + +## Practices + +### Schema discovery + +Agents need to know the shape of the data before they can query it. The +useful entry points all run over the standard SQL interfaces: + +```questdb-sql +-- List all tables +SHOW TABLES; + +-- Inspect a specific table's columns and types +SHOW COLUMNS FROM trades; + +-- Meta-query: full table metadata including designated timestamp +SELECT * FROM tables(); +``` + +Over REST, the same queries run as `GET /exec?query=SHOW%20TABLES`. + +See the [`SHOW` reference](/docs/query/sql/show/) and +[`tables()`](/docs/query/functions/meta/) for the full surface. + +### Read-only access + +Production deployments should give agents read-only credentials whenever +possible: + +- **Open Source**: configure HTTP basic auth and provide read-only + credentials to the agent. The same credentials authenticate the QWP + endpoints via the WebSocket upgrade. +- **Enterprise**: use [RBAC](/docs/security/rbac/) to create a role with + query-only permissions and assign it to the agent's user. The same role + applies whether the agent connects over REST or QWP. + +Pick the transport by data volume: + +- **Small queries** — schema inspection, parameter lookup, a few hundred + rows — fit naturally on REST `/exec`. The JSON response is directly + consumable by the agent without an SDK. +- **Large result sets** — exporting data into another system, materializing + analytics output — should go through a + [QWP egress client](/docs/protocols/qwp-egress-websocket/). Byte-credit + flow control prevents the agent from being overwhelmed mid-export, and + the binary columnar format keeps wire size low. + +Containing the blast radius this way matters: if the agent's prompt is +compromised or it hallucinates a destructive statement, the credentials +themselves prevent damage. + +### Query budgets + +Agents will write expensive queries while exploring. Set realistic ceilings: + +- Always include `LIMIT` in exploratory queries; the agent rarely needs more + than a few hundred rows to reason about the shape of the data. +- Cap concurrent agent traffic at the reverse proxy (HTTP rate limits) or + via QWP connection limits on the server side. +- Watch the [query log and metrics](/docs/operations/logging-metrics/) for + runaway scans. + +### Write access for ingest + +If the agent is generating ingestion code, not just querying, **QWP is the +recommended path for all writes**: + +- **Bulk upload and sustained ingestion** (agentic ETL, a streaming sensor + feed fronted by an LLM, batch loads from another system): use a + [QWP client library](/docs/ingestion/overview/). The agent generates + setup code; the runtime gets throughput, multi-host failover, and + store-and-forward for free. +- **No native client for the agent's runtime?** The agent can implement an + uploader directly against the + [QWP ingress wire spec](/docs/protocols/qwp-ingress-websocket/) — the + protocol is fully documented for clean-room implementations and a + minimum-viable client is on the order of a few hundred lines. +- **Quick one-off inserts** during exploration: `INSERT INTO ...` via REST + `/exec` is acceptable for ad-hoc testing, but production write paths + should always be on QWP. + +### Observability + +Treat agent traffic like any production workload: + +- Log all SQL the agent executes (most agent frameworks expose a hook for + pre-execution inspection). +- Surface query latency and result-size metrics — runaway scans show up + there first. +- Audit DDL statements separately if you allow them at all; an agent that + drops a table by accident is a different incident class from one that + writes a slow query. + +## Recipes + +### Uploading CSV or Parquet from the agent's local machine + +**Failure mode to avoid:** SQL functions like `read_parquet()`, +`read_csv()`, and the `COPY` statement all read files from the +**QuestDB server's filesystem** (via `cairo.sql.copy.root`). They do not +work when the agent has the file locally and the database is on another +host — a remote VM, a Docker container, a cloud deployment, or +`demo.questdb.io`. An agent reaching for these to upload a local file is +on a dead-end path; it will either fail with a permissions error or +"file not found", or beat around the bush trying to mount a directory it +can't reach. + +**Correct path:** parse the file in the agent's runtime, then push the +rows to QuestDB through a QWP ingress client. + +1. Check the [Ingestion overview](/docs/ingestion/overview/) for the + current list of QWP client libraries supported in the agent's runtime + language. +2. **Native client available** — the agent reads the file locally + (e.g., pyarrow / pandas / polars for Parquet; the language's built-in + CSV reader for CSV) and streams rows to QuestDB through the client. +3. **No native client for that runtime** — the agent can implement an + uploader directly against the + [QWP ingress wire spec](/docs/protocols/qwp-ingress-websocket/). The + protocol is fully documented for clean-room implementations and a + minimum-viable client (BOOLEAN, LONG, DOUBLE, TIMESTAMP, VARCHAR) is + on the order of a few hundred lines. + +This works regardless of where QuestDB runs — Docker, cloud, +`demo.questdb.io`, remote VM — and gives the agent throughput, +multi-host failover, and store-and-forward for free. + +## Next steps + +- **Quickstart**: [AI Coding Agents](/docs/getting-started/ai-coding-agents/) +- **Query interfaces**: [QWP egress (WebSocket)](/docs/protocols/qwp-egress-websocket/), + [REST API](/docs/query/rest-api/) +- **Ingest interfaces**: [Ingestion overview](/docs/ingestion/overview/), + [QWP ingress (WebSocket)](/docs/protocols/qwp-ingress-websocket/) +- **Operating safely**: [Security overview](/docs/security/), + [RBAC](/docs/security/rbac/) (Enterprise) diff --git a/documentation/sidebars.js b/documentation/sidebars.js index cc0be33b7..37bfd06aa 100644 --- a/documentation/sidebars.js +++ b/documentation/sidebars.js @@ -56,20 +56,15 @@ module.exports = { type: "doc", label: "Overview", }, - { - id: "client-configuration/connect-string", - type: "doc", - label: "Connect string", - }, - { - id: "ingestion/clients/date-to-timestamp-conversion", - type: "doc", - label: "Date to Timestamp", - }, { type: "category", label: "Client Libraries", items: [ + { + id: "client-configuration/connect-string", + type: "doc", + label: "Connect string", + }, { id: "ingestion/clients/java", type: "doc", @@ -105,8 +100,18 @@ module.exports = { type: "doc", label: ".NET", }, + { + id: "ingestion/clients/date-to-timestamp-conversion", + type: "doc", + label: "Date to Timestamp", + }, ], }, + { + id: "connect/agents", + type: "doc", + label: "Agents", + }, { type: "category", label: "Message Brokers", From dff6869a45e103cfc3278dccd80e582647259283 Mon Sep 17 00:00:00 2001 From: glasstiger Date: Thu, 14 May 2026 17:22:41 +0100 Subject: [PATCH 19/44] fixing broken link --- documentation/connect/agents.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/documentation/connect/agents.md b/documentation/connect/agents.md index 20835e91d..90cfcfde9 100644 --- a/documentation/connect/agents.md +++ b/documentation/connect/agents.md @@ -197,5 +197,5 @@ multi-host failover, and store-and-forward for free. [REST API](/docs/query/rest-api/) - **Ingest interfaces**: [Ingestion overview](/docs/ingestion/overview/), [QWP ingress (WebSocket)](/docs/protocols/qwp-ingress-websocket/) -- **Operating safely**: [Security overview](/docs/security/), - [RBAC](/docs/security/rbac/) (Enterprise) +- **Operating safely**: [RBAC](/docs/security/rbac/) (Enterprise), + [TLS](/docs/security/tls/) From 6f5cbebf72d79d6b65e2ce3d8f3666bee3184bab Mon Sep 17 00:00:00 2001 From: javier Date: Thu, 14 May 2026 18:31:10 +0200 Subject: [PATCH 20/44] documenting data type limitations --- documentation/ingestion/clients/java.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/documentation/ingestion/clients/java.md b/documentation/ingestion/clients/java.md index 68bbcf478..f1a15bbf6 100644 --- a/documentation/ingestion/clients/java.md +++ b/documentation/ingestion/clients/java.md @@ -248,10 +248,10 @@ try (QwpQueryClient client = QwpQueryClient.newPlainText("localhost", 9000)) { - `decimalColumn(name, Decimal256)` or `decimalColumn(name, CharSequence)` - `doubleArray(name, ...)` and `longArray(name, ...)` (see [Ingest arrays](#ingest-arrays)) - The QWP wire format supports additional types (GEOHASH, IPv4, BINARY, DATE) - that the Java client does not yet expose for ingestion. To write these types, - create the columns via DDL and use a - [third-party client or the wire protocol directly](/docs/protocols/qwp-ingress-websocket/#column-types). + The server also accepts GEOHASH and DATE on ingress, but the Java client + does not yet expose sender methods for them. IPv4 and BINARY are not + supported for ingestion on either the client or the server. All types are + readable on the [egress side](#reading-result-batches). 5. Call `at(Instant)`, `at(long, ChronoUnit)`, or `atNow()` to finalize the row. 6. Repeat from step 2, or call `flush()` to send buffered data. From f5f1908fed395962d6f5c68901485e41a2061cda Mon Sep 17 00:00:00 2001 From: javier Date: Thu, 14 May 2026 18:42:36 +0200 Subject: [PATCH 21/44] fixing timestamp_ns, imports, and auto_flush incorrect docs --- documentation/ingestion/clients/java.md | 23 +++++++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) diff --git a/documentation/ingestion/clients/java.md b/documentation/ingestion/clients/java.md index f1a15bbf6..333bbe35f 100644 --- a/documentation/ingestion/clients/java.md +++ b/documentation/ingestion/clients/java.md @@ -71,6 +71,8 @@ Add the dependency: ### Ingest data ```java +import io.questdb.client.Sender; + try (Sender sender = Sender.fromConfig("ws::addr=localhost:9000;")) { sender.table("trades") .symbol("symbol", "ETH-USD") @@ -91,6 +93,10 @@ try (Sender sender = Sender.fromConfig("ws::addr=localhost:9000;")) { ### Query data ```java +import io.questdb.client.cutlass.qwp.client.QwpQueryClient; +import io.questdb.client.cutlass.qwp.client.QwpColumnBatchHandler; +import io.questdb.client.cutlass.qwp.client.QwpColumnBatch; + try (QwpQueryClient client = QwpQueryClient.newPlainText("localhost", 9000)) { client.connect(); client.execute( @@ -286,6 +292,8 @@ For higher-dimensional arrays, use the `DoubleArray` or `LongArray` class to avoid GC overhead. Create the instance once and reuse it: ```java +import io.questdb.client.cutlass.line.array.DoubleArray; + try (Sender sender = Sender.fromConfig("ws::addr=localhost:9000;"); DoubleArray ary = new DoubleArray(3, 3, 3)) { for (int i = 0; i < ROW_COUNT; i++) { @@ -364,10 +372,11 @@ Customize via connect string: ws::addr=localhost:9000;auto_flush_rows=500;auto_flush_interval=50; ``` -**Explicit flush**: disable auto-flush and call `flush()` yourself: +**Explicit flush**: you can call `flush()` at any time to send buffered data +immediately, even with auto-flush enabled: ```java -try (Sender sender = Sender.fromConfig("ws::addr=localhost:9000;auto_flush=off;")) { +try (Sender sender = Sender.fromConfig("ws::addr=localhost:9000;")) { for (Trade trade : trades) { sender.table("trades") .symbol("symbol", trade.symbol()) @@ -375,10 +384,16 @@ try (Sender sender = Sender.fromConfig("ws::addr=localhost:9000;auto_flush=off;" .longColumn("quantity", trade.quantity()) .at(trade.timestamp()); } - sender.flush(); + sender.flush(); // send everything now, regardless of auto-flush thresholds } ``` +:::note +Disabling auto-flush entirely (`auto_flush=off`) is not supported on the +WebSocket transport. Use the auto-flush row count and interval settings to +control batch size instead. +::: + The client also flushes when closed. However, if the flush fails at close time, the client does not retry. Always flush explicitly before closing. @@ -464,7 +479,7 @@ values out if you need them after the callback returns. | Accessor | Column types | |----------|-------------| -| `getLongValue(col, row)` | LONG, TIMESTAMP, TIMESTAMP_NANOS, DATE | +| `getLongValue(col, row)` | LONG, TIMESTAMP, `timestamp_ns`, DATE | | `getIntValue(col, row)` | INT | | `getDoubleValue(col, row)` | DOUBLE | | `getFloatValue(col, row)` | FLOAT | From 504b01e5aa5bdf830d0c8a044d558d31488bbdd6 Mon Sep 17 00:00:00 2001 From: javier Date: Thu, 14 May 2026 19:26:59 +0200 Subject: [PATCH 22/44] fix(java): address missing accessors, imports, and api gaps Adds imports to code examples, fixes auto_flush=off (unsupported on WS), completes the egress accessor table with all QwpColumnBatch methods, removes unreleased LONG_ARRAY references, documents timestamp_ns via ChronoUnit.NANOS, DoubleArray clear/reshape lifecycle, array column reading on egress, and execute() blocking semantics. --- documentation/ingestion/clients/java.md | 88 ++++++++++++++++++++----- 1 file changed, 72 insertions(+), 16 deletions(-) diff --git a/documentation/ingestion/clients/java.md b/documentation/ingestion/clients/java.md index 333bbe35f..9e272deb7 100644 --- a/documentation/ingestion/clients/java.md +++ b/documentation/ingestion/clients/java.md @@ -252,7 +252,7 @@ try (QwpQueryClient client = QwpQueryClient.newPlainText("localhost", 9000)) { - `uuidColumn(name, lo, hi)` (two longs) - `long256Column(name, l0, l1, l2, l3)` (four longs, least significant first) - `decimalColumn(name, Decimal256)` or `decimalColumn(name, CharSequence)` - - `doubleArray(name, ...)` and `longArray(name, ...)` (see [Ingest arrays](#ingest-arrays)) + - `doubleArray(name, ...)` (see [Ingest arrays](#ingest-arrays)) The server also accepts GEOHASH and DATE on ingress, but the Java client does not yet expose sender methods for them. IPv4 and BINARY are not @@ -283,13 +283,11 @@ For 1D and 2D arrays, pass a Java array directly: ```java double[] prices = {1.0842, 1.0843, 1.0841}; sender.table("book").doubleArray("levels", prices).atNow(); - -long[] counts = {100, 200, 300}; -sender.table("book").longArray("depths", counts).atNow(); ``` -For higher-dimensional arrays, use the `DoubleArray` or `LongArray` class to -avoid GC overhead. Create the instance once and reuse it: +For higher-dimensional arrays, use the `DoubleArray` class to avoid GC +overhead. Create the instance once and reuse it across rows by calling +`clear()` before populating each row: ```java import io.questdb.client.cutlass.line.array.DoubleArray; @@ -297,6 +295,7 @@ import io.questdb.client.cutlass.line.array.DoubleArray; try (Sender sender = Sender.fromConfig("ws::addr=localhost:9000;"); DoubleArray ary = new DoubleArray(3, 3, 3)) { for (int i = 0; i < ROW_COUNT; i++) { + ary.clear(); // reset write position, reuse native memory for (int v = 0; v < 27; v++) { ary.append(v); } @@ -307,6 +306,13 @@ try (Sender sender = Sender.fromConfig("ws::addr=localhost:9000;"); } ``` +The constructor `new DoubleArray(d1, d2, ...)` defines the shape. Values are +appended in row-major order: the last dimension varies fastest. For a 2D array +with shape `(3, 2)`, `append()` fills positions `[0,0], [0,1], [1,0], [1,1], +[2,0], [2,1]`. You can also use `set(value, i, j, ...)` to write at specific +coordinates. Call `reshape(d1, d2, ...)` to change the shape without +reallocating. + :::note Arrays require QuestDB 9.0.0 or later. ::: @@ -324,13 +330,23 @@ sender.table("trades") .doubleColumn("price", 1.0842) .at(Instant.now()); -// Or with explicit units for high-throughput paths: +// Explicit microseconds for high-throughput paths: sender.table("trades") .symbol("symbol", "EURUSD") .doubleColumn("price", 1.0842) .at(System.currentTimeMillis() * 1000, ChronoUnit.MICROS); + +// Nanosecond precision (creates a timestamp_ns column): +sender.table("ticks") + .symbol("symbol", "EURUSD") + .doubleColumn("price", 1.0842) + .at(System.nanoTime(), ChronoUnit.NANOS); ``` +Using `ChronoUnit.NANOS` with `at()` or `timestampColumn()` creates a +`timestamp_ns` column. Using any other unit creates a standard `TIMESTAMP` +column (microsecond precision). + **Server-assigned** (server uses its wall-clock time): ```java @@ -434,6 +450,20 @@ The `QwpQueryClient` sends SQL statements over the [QWP egress](/docs/protocols/qwp-egress-websocket/) endpoint (`/read/v1`). Results arrive as columnar batches via a callback handler. +`execute()` is **blocking**: it sends the query, drives the WebSocket receive +loop on the calling thread, invokes the handler callbacks (`onBatch`, +`onEnd`, `onError`, or `onExecDone`), and returns only after the query +completes. This means you can safely sequence operations: + +```java +client.execute("CREATE TABLE t (...) ...", ddlHandler); +// Table exists by this point +client.execute("INSERT INTO t VALUES ...", dmlHandler); +// Data is committed by this point +client.execute("SELECT * FROM t", selectHandler); +// Results have been fully consumed by this point +``` + ### Executing SELECT queries ```java @@ -479,23 +509,49 @@ values out if you need them after the callback returns. | Accessor | Column types | |----------|-------------| -| `getLongValue(col, row)` | LONG, TIMESTAMP, `timestamp_ns`, DATE | -| `getIntValue(col, row)` | INT | -| `getDoubleValue(col, row)` | DOUBLE | -| `getFloatValue(col, row)` | FLOAT | | `getBoolValue(col, row)` | BOOLEAN | | `getByteValue(col, row)` | BYTE | | `getShortValue(col, row)` | SHORT | | `getCharValue(col, row)` | CHAR | +| `getIntValue(col, row)` | INT, IPv4 | +| `getLongValue(col, row)` | LONG, TIMESTAMP, `timestamp_ns`, DATE | +| `getFloatValue(col, row)` | FLOAT | +| `getDoubleValue(col, row)` | DOUBLE | | `getSymbol(col, row)` | SYMBOL (returns cached `String`) | | `getStrA(col, row)` / `getStrB(col, row)` | VARCHAR (reusable `CharSequence` views) | -| `getBinaryA(col, row)` / `getBinaryB(col, row)` | BINARY (reusable views) | +| `getString(col, row)` | VARCHAR (heap-allocating `String`) | | `getString(col, row, CharSink)` | VARCHAR (copy into sink) | -| `getUuid(col, row, Uuid)` | UUID | -| `getLong256(col, row, Long256Sink)` | LONG256 | +| `getBinaryA(col, row)` / `getBinaryB(col, row)` | BINARY (reusable native views) | +| `getBinary(col, row)` | BINARY (heap-allocating `byte[]`) | +| `getUuid(col, row, Uuid)` | UUID (zero-allocation, into sink) | +| `getUuidHi(col, row)` / `getUuidLo(col, row)` | UUID (individual 64-bit halves) | +| `getLong256(col, row, Long256Sink)` | LONG256 (into sink) | +| `getLong256Word(col, row, wordIndex)` | LONG256 (individual 64-bit word) | +| `getGeohashValue(col, row)` | GEOHASH (raw long value) | +| `getGeohashPrecisionBits(col)` | GEOHASH (precision metadata, per column) | +| `getDecimal128High(col, row)` / `getDecimal128Low(col, row)` | DECIMAL128 (two longs) | +| `getDecimalScale(col)` | DECIMAL (scale metadata, per column) | +| `getDoubleArrayElements(col, row)` | DOUBLE_ARRAY (flattened `double[]`, row-major) | +| `getArrayNDims(col, row)` | DOUBLE_ARRAY (dimension count) | +| `isNull(col, row)` | All types | + +Column metadata is available via `batch.getColumnName(col)`, +`batch.getColumnWireType(col)`, and `batch.getColumnCount()`. + +**Reading array columns:** + +`getDoubleArrayElements(col, row)` returns a flattened `double[]` in row-major +order. Use `getArrayNDims(col, row)` to discover the dimensionality. For +example, reading a 2D `DOUBLE[][]` column: + +```java +int nDims = batch.getArrayNDims(colIndex, row); // e.g. 2 +double[] flat = batch.getDoubleArrayElements(colIndex, row); +// flat contains all elements in row-major order +``` -Column metadata is available via `batch.getColumnInfo(col)` (name, type) and -`batch.getColumnCount()`. +Alternatively, you can extract individual elements in SQL (e.g., +`SELECT bids[1][1] FROM market_data`) and read them as scalar doubles. ### DDL and DML statements From 5271ad86e5ee999a2be80dc0133b0f3aa339f0d6 Mon Sep 17 00:00:00 2001 From: javier Date: Thu, 14 May 2026 19:30:13 +0200 Subject: [PATCH 23/44] removing not needed array version infobox --- documentation/ingestion/clients/java.md | 4 ---- 1 file changed, 4 deletions(-) diff --git a/documentation/ingestion/clients/java.md b/documentation/ingestion/clients/java.md index 9e272deb7..1f2279b93 100644 --- a/documentation/ingestion/clients/java.md +++ b/documentation/ingestion/clients/java.md @@ -313,10 +313,6 @@ with shape `(3, 2)`, `append()` fills positions `[0,0], [0,1], [1,0], [1,1], coordinates. Call `reshape(d1, d2, ...)` to change the shape without reallocating. -:::note -Arrays require QuestDB 9.0.0 or later. -::: - ### Designated timestamp The [designated timestamp](/docs/concepts/designated-timestamp/) column From 451e359b3cbe8f4f53f5fd151aaa52f7d5b45a29 Mon Sep 17 00:00:00 2001 From: Vlad Ilyushchenko Date: Thu, 14 May 2026 19:53:36 +0100 Subject: [PATCH 24/44] connect: migrate URLs under /docs/connect/* via slug overrides Every page reachable from the Connect sidebar now declares an explicit slug that mirrors its sidebar nesting. File paths are unchanged (redirects to be added in a follow-up); internal links are updated to the new URLs so the production build stays green. URL mapping: - /docs/ingestion/overview -> /docs/connect/overview - /docs/client-configuration/connect-string -> /docs/connect/clients/connect-string - /docs/ingestion/clients/* -> /docs/connect/clients/* - /docs/ingestion/message-brokers/* -> /docs/connect/message-brokers/* - /docs/ingestion/ilp/* -> /docs/connect/compatibility/ilp/* - /docs/query/pgwire/* -> /docs/connect/compatibility/pgwire/* - /docs/query/rest-api -> /docs/connect/compatibility/rest-api - /docs/ingestion/import-csv -> /docs/connect/compatibility/import-csv - /docs/query/export-parquet -> /docs/connect/compatibility/export-parquet - /docs/ingestion/java-embedded -> /docs/connect/java-embedded - /docs/protocols/* -> /docs/connect/wire-protocols/* Changes: - 36 frontmatter slug additions on Connect pages. - ~306 internal markdown link rewrites across ~90 files in documentation/ (pages, partials, admonitions). - Hardcoded URLs updated in src/components/Resources/index.tsx, src/modules/integration/index.tsx, shared/clients.json, shared/ilp_clients.json. - Fixed pre-existing broken link: 5 client pages had [Configuration string](/docs/.../configuration-string/), now point at the real connect-string page. External inbound links still point at the old URLs and will 404 until redirects are configured (planned follow-up). The production build (CONTEXT=preview yarn build) now succeeds with zero broken-link errors. Co-Authored-By: Claude Opus 4.7 (1M context) --- documentation/architecture/storage-engine.md | 2 +- documentation/changelog.mdx | 18 ++--- .../client-configuration/connect-string.md | 7 +- documentation/configuration/cairo-engine.md | 4 +- documentation/configuration/ingestion.md | 2 +- documentation/connect/agents.md | 67 +++++++++++++++---- .../integrations/grafana/read-only-user.md | 2 +- .../integrations/opcua-dense-format.md | 4 +- .../operations/check-transaction-applied.md | 2 +- .../operations/copy-data-between-instances.md | 8 +-- .../operations/csv-import-milliseconds.md | 2 +- .../operations/docker-compose-config.md | 2 +- .../operations/store-questdb-metrics.md | 2 +- .../cookbook/operations/tls-pgbouncer.md | 2 +- .../programmatic/cpp/missing-columns.md | 2 +- .../programmatic/php/inserting-ilp.md | 6 +- .../programmatic/ruby/inserting-ilp.md | 6 +- .../programmatic/tls-ca-configuration.md | 4 +- documentation/deployment/aws.md | 2 +- documentation/deployment/azure.md | 2 +- documentation/deployment/digital-ocean.md | 2 +- documentation/deployment/docker.md | 14 ++-- documentation/deployment/hetzner.md | 14 ++-- documentation/deployment/kubernetes.md | 6 +- .../getting-started/ai-coding-agents.mdx | 4 +- .../getting-started/capacity-planning.md | 2 +- .../getting-started/create-database.md | 8 +-- .../getting-started/enterprise-quick-start.md | 12 ++-- documentation/getting-started/quick-start.mdx | 8 +-- .../client-failover/configuration.md | 16 ++--- .../store-and-forward/configuration.md | 18 ++--- .../store-and-forward/when-to-use.md | 2 +- documentation/ingestion/clients/c-and-cpp.md | 11 +-- .../ingestion/clients/configuration-string.md | 2 +- .../clients/date-to-timestamp-conversion.md | 23 ++++--- documentation/ingestion/clients/dotnet.md | 15 +++-- documentation/ingestion/clients/go.md | 9 +-- documentation/ingestion/clients/java.md | 17 ++--- documentation/ingestion/clients/nodejs.md | 9 +-- documentation/ingestion/clients/python.md | 13 ++-- documentation/ingestion/clients/rust.md | 13 ++-- .../ingestion/ilp/advanced-settings.md | 25 +++---- .../ingestion/ilp/columnset-types.md | 5 +- documentation/ingestion/ilp/overview.md | 37 +++++----- documentation/ingestion/import-csv.md | 7 +- documentation/ingestion/java-embedded.md | 3 +- .../ingestion/message-brokers/flink.md | 1 + .../ingestion/message-brokers/kafka.md | 7 +- .../ingestion/message-brokers/redpanda.md | 11 +-- .../ingestion/message-brokers/telegraf.md | 1 + documentation/ingestion/overview.md | 21 +++--- .../integrations/data-processing/pandas.md | 2 +- .../integrations/data-processing/polars.md | 6 +- documentation/integrations/other/airbyte.md | 2 +- documentation/integrations/other/databento.md | 2 +- documentation/integrations/other/mindsdb.md | 2 +- documentation/integrations/overview.md | 8 +-- .../integrations/visualization/powerbi.md | 2 +- .../integrations/visualization/qstudio.md | 2 +- documentation/introduction.md | 2 +- .../operations/monitoring-alerting.md | 2 +- documentation/operations/task-automation.md | 2 +- .../partials/_curl.imp.insert.partial.mdx | 2 +- documentation/protocols/overview.md | 9 +-- .../protocols/qwp-egress-websocket.md | 17 ++--- .../protocols/qwp-ingress-websocket.md | 19 +++--- documentation/query/datatypes/geohashes.md | 4 +- documentation/query/datatypes/overview.md | 4 +- documentation/query/export-parquet.md | 9 +-- documentation/query/functions/date-time.md | 2 +- documentation/query/functions/meta.md | 2 +- documentation/query/functions/parquet.md | 2 +- documentation/query/overview.md | 12 ++-- documentation/query/pgwire/c-and-cpp.md | 3 +- documentation/query/pgwire/dotnet.md | 7 +- documentation/query/pgwire/go.md | 5 +- documentation/query/pgwire/java.md | 7 +- .../query/pgwire/large-result-sets.md | 3 +- documentation/query/pgwire/nodejs.md | 5 +- documentation/query/pgwire/overview.md | 5 +- documentation/query/pgwire/php.md | 3 +- documentation/query/pgwire/python.md | 13 ++-- documentation/query/pgwire/r.md | 3 +- documentation/query/pgwire/rust.md | 5 +- documentation/query/rest-api.md | 11 +-- .../alter-table-alter-column-set-parquet.md | 2 +- documentation/query/sql/copy.md | 2 +- documentation/query/sql/create-table.md | 4 +- documentation/schema-design-essentials.md | 6 +- documentation/security/tls.md | 6 +- documentation/troubleshooting/faq.md | 2 +- documentation/tutorials/influxdb-migration.md | 6 +- shared/clients.json | 30 ++++----- shared/ilp_clients.json | 2 +- src/components/Resources/index.tsx | 2 +- src/modules/integration/index.tsx | 8 +-- 96 files changed, 406 insertions(+), 330 deletions(-) diff --git a/documentation/architecture/storage-engine.md b/documentation/architecture/storage-engine.md index ada358aef..9f7fb2c57 100644 --- a/documentation/architecture/storage-engine.md +++ b/documentation/architecture/storage-engine.md @@ -50,7 +50,7 @@ to optimize writes in the event of out-of-order data or when updating sampling i ### Tier Three: Parquet, Locally or in an Object Store Older partitions (any partition other than the most recent one) can be converted to -[Parquet](/docs/query/export-parquet) for both interoperability and compression ratio. +[Parquet](/docs/connect/compatibility/export-parquet) for both interoperability and compression ratio. Partitions in Parquet format remain fully available for queries. Users don't need to know whether a partition is in QuestDB binary format or Parquet format. All the data types available in QuestDB can be converted to Parquet. diff --git a/documentation/changelog.mdx b/documentation/changelog.mdx index a77dccbbe..9d29bd092 100644 --- a/documentation/changelog.mdx +++ b/documentation/changelog.mdx @@ -47,7 +47,7 @@ This page tracks significant updates to the QuestDB documentation. - [UNNEST](/docs/query/sql/unnest/) - SQL reference for unnesting arrays into rows - [LATERAL JOIN](/docs/query/sql/lateral-join/) - SQL reference for lateral subqueries - [Sparkline and bar visualization functions](/docs/query/functions/visualization/) - Text-based chart functions for terminal and console output -- [StructArrayExplode transform](/docs/ingestion/message-brokers/kafka/) - Kafka SMT for exploding struct arrays +- [StructArrayExplode transform](/docs/connect/message-brokers/kafka/) - Kafka SMT for exploding struct arrays ### Reference @@ -61,13 +61,13 @@ This page tracks significant updates to the QuestDB documentation. - [SQL reference pages](/docs/query/sql/select/) - Replaced railroad diagrams with code-based syntax blocks across 65 SQL pages, with updated examples - [SAMPLE BY](/docs/query/sql/sample-by/) - Updated timezone bucket alignment behavior -- [Parquet export](/docs/query/export-parquet/) - Added partitioning options for exports +- [Parquet export](/docs/connect/compatibility/export-parquet/) - Added partitioning options for exports - [WINDOW JOIN](/docs/query/sql/window-join/) - Documented dynamic window boundaries - [HORIZON JOIN](/docs/query/sql/horizon-join/) - Documented multi-RHS table support - [JOIN](/docs/query/sql/join/) - Improved join documentation and reorganized page - [Web Console](/docs/getting-started/web-console/overview/) - Added table details, updated screenshots - [TTL](/docs/concepts/ttl/) - Fixed removal syntax and general improvements -- [REST API `/exp` endpoint](/docs/query/rest-api/) - Documented timeout parameter, removed outdated warning +- [REST API `/exp` endpoint](/docs/connect/compatibility/rest-api/) - Documented timeout parameter, removed outdated warning ## March 2026 @@ -93,7 +93,7 @@ This page tracks significant updates to the QuestDB documentation. - [Cookbook](/docs/cookbook/) - Refreshed recipes with lookback patterns, named windows, and updated schema references - [Per-column Parquet encoding and compression](/docs/query/sql/alter-table-alter-column-set-parquet/) - Comprehensive documentation for column-level settings - [Database replication](/docs/configuration/database-replication/) - GCP NFS transport and tuning updates -- [Ingestion benchmarks](/docs/ingestion/overview/) - Updated benchmark image to Q1 2026 +- [Ingestion benchmarks](/docs/connect/overview/) - Updated benchmark image to Q1 2026 ## February 2026 @@ -120,7 +120,7 @@ This page tracks significant updates to the QuestDB documentation. ### Updated - [Named windows](/docs/query/functions/window-functions/overview/) - Support for reusable `WINDOW` clause definitions -- [Parquet export](/docs/query/export-parquet/) - Fixed compression defaults, restructured page, updated `read_parquet` types +- [Parquet export](/docs/connect/compatibility/export-parquet/) - Fixed compression defaults, restructured page, updated `read_parquet` types - [Backup](/docs/operations/backup/) - Documented `backup.schedule.cron` format, improved scheduler visibility - [Date/time functions](/docs/query/functions/date-time/) - Page updates and corrections - [Window functions](/docs/query/functions/window-functions/overview/) - Added limitation documentation @@ -202,7 +202,7 @@ This page tracks significant updates to the QuestDB documentation. ### Updated -- [Parquet export](/docs/query/export-parquet/) - Complete documentation for exporting data to Parquet format +- [Parquet export](/docs/connect/compatibility/export-parquet/) - Complete documentation for exporting data to Parquet format - [Shared pool configuration](/docs/configuration/shared-workers/) - Updated with network, query, and write shared pool options - [SQL hints](/docs/query/sql/asof-join/#choose-the-optimal-algorithm-with-an-sql-hint) - Rewritten section on temporal join hints @@ -214,7 +214,7 @@ This page tracks significant updates to the QuestDB documentation. ### New -- [PGWire for C/C++](/docs/query/pgwire/c-and-cpp/) - Guide for C/C++ applications using PostgreSQL wire protocol +- [PGWire for C/C++](/docs/connect/compatibility/pgwire/c-and-cpp/) - Guide for C/C++ applications using PostgreSQL wire protocol - [Table and column naming rules](/docs/query/sql/create-table/#table-name) - Guidelines for valid identifiers ### Reference @@ -232,7 +232,7 @@ This page tracks significant updates to the QuestDB documentation. ### Updated - [Partitioning](/docs/concepts/partitions/) - Improved formatting and explanations -- [Go client examples](/docs/ingestion/clients/go/) - Updated to v4 API +- [Go client examples](/docs/connect/clients/go/) - Updated to v4 API ## August 2025 @@ -266,7 +266,7 @@ This page tracks significant updates to the QuestDB documentation. ### Updated -- [ILP clients](/docs/ingestion/overview/) - Array support added to Python, C++, Rust, Java, and .NET clients +- [ILP clients](/docs/connect/overview/) - Array support added to Python, C++, Rust, Java, and .NET clients - [WAL metrics](/docs/operations/monitoring-alerting/) - Added metrics for detecting WAL apply lag ## Earlier updates diff --git a/documentation/client-configuration/connect-string.md b/documentation/client-configuration/connect-string.md index d5cb49579..af1f16114 100644 --- a/documentation/client-configuration/connect-string.md +++ b/documentation/client-configuration/connect-string.md @@ -1,4 +1,5 @@ --- +slug: /connect/clients/connect-string title: Connect string reference description: Configuration knobs for QuestDB native clients (QWP over WebSocket). @@ -12,7 +13,7 @@ options under the same names, so configuration is portable across implementations. For legacy InfluxDB Line Protocol (ILP) transports (`http`, `https`, `tcp`, -`tcps`), see the [ILP overview](/docs/ingestion/ilp/overview/). +`tcps`), see the [ILP overview](/docs/connect/compatibility/ilp/overview/). **On this page:** @@ -97,7 +98,7 @@ The Java client accepts a connect string in three ways: ``` Other language clients expose equivalent entry points; see each -[client library page](/docs/ingestion/overview/#client-libraries) for the +[client library page](/docs/connect/overview/#client-libraries) for the per-language syntax. ## Common patterns {#common-patterns} @@ -532,7 +533,7 @@ transport-level OK ACK alone cannot close. because the server only flushes pending durable acks on inbound recv events. Default: `200` (ms). Set to `0` or a negative value to disable. -See the [QWP Egress (WebSocket)](/docs/protocols/qwp-egress-websocket/) +See the [QWP Egress (WebSocket)](/docs/connect/wire-protocols/qwp-egress-websocket/) wire protocol for the underlying mechanism. ## Error handling {#error-handling} diff --git a/documentation/configuration/cairo-engine.md b/documentation/configuration/cairo-engine.md index b7f1ba9de..6e74f5986 100644 --- a/documentation/configuration/cairo-engine.md +++ b/documentation/configuration/cairo-engine.md @@ -54,8 +54,8 @@ When `false`, disables the `reload_config()` SQL function. A global timeout in seconds for long-running queries. Per-query overrides are available via the HTTP header -[`Statement-Timeout`](/docs/query/rest-api/#headers) or the Postgres -[`options`](/docs/query/pgwire/overview/) +[`Statement-Timeout`](/docs/connect/compatibility/rest-api/#headers) or the Postgres +[`options`](/docs/connect/compatibility/pgwire/overview/) connection property. ## Commit and write behavior diff --git a/documentation/configuration/ingestion.md b/documentation/configuration/ingestion.md index d0b3fabcb..010225adb 100644 --- a/documentation/configuration/ingestion.md +++ b/documentation/configuration/ingestion.md @@ -280,7 +280,7 @@ yields. :::note The UDP receiver is deprecated since QuestDB version 6.5.2. We recommend -[ILP over HTTP](/docs/ingestion/ilp/overview/) instead. +[ILP over HTTP](/docs/connect/compatibility/ilp/overview/) instead. ::: diff --git a/documentation/connect/agents.md b/documentation/connect/agents.md index 90cfcfde9..96dd2a583 100644 --- a/documentation/connect/agents.md +++ b/documentation/connect/agents.md @@ -1,4 +1,5 @@ --- +slug: /connect/agents title: Agents description: How AI agents operate QuestDB — which protocols they use, what tooling @@ -29,9 +30,9 @@ it ships with. | Interface | Best for | Why | |-----------------------------------------------------------|---------------------------------------------------------------------------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| [**QWP egress**](/docs/protocols/qwp-egress-websocket/) | The primary path for executing SQL — DDL, exploratory SELECT, and large result streaming. | Binary, columnar, byte-credit flow control, multi-host failover. Use a native [client library](/docs/ingestion/overview/) when one exists for the agent's runtime; otherwise an agent can implement one directly against the protocol spec. | -| [**QWP ingress**](/docs/protocols/qwp-ingress-websocket/) | The primary path for ingesting data — agentic ETL, sensor feeds, bulk loads. | Native binary protocol with multi-host failover and store-and-forward built into the client. | -| [**REST API**](/docs/query/rest-api/) | Schema discovery and small ad-hoc queries (a few hundred rows or fewer). | HTTP + JSON. Every agent framework supports it; no SDK to install. `SHOW TABLES` / `SHOW COLUMNS` and other lookups map naturally to function-calling tools. | +| [**QWP egress**](/docs/connect/wire-protocols/qwp-egress-websocket/) | The primary path for executing SQL — DDL, exploratory SELECT, and large result streaming. | Binary, columnar, byte-credit flow control, multi-host failover. Use a native [client library](/docs/connect/overview/) when one exists for the agent's runtime; otherwise an agent can implement one directly against the protocol spec. | +| [**QWP ingress**](/docs/connect/wire-protocols/qwp-ingress-websocket/) | The primary path for ingesting data — agentic ETL, sensor feeds, bulk loads. | Native binary protocol with multi-host failover and store-and-forward built into the client. | +| [**REST API**](/docs/connect/compatibility/rest-api/) | Schema discovery and small ad-hoc queries (a few hundred rows or fewer). | HTTP + JSON. Every agent framework supports it; no SDK to install. `SHOW TABLES` / `SHOW COLUMNS` and other lookups map naturally to function-calling tools. | **QWP egress is the recommended path for any sustained SQL work** — exploratory or production. Reach for REST when the agent is doing schema @@ -106,7 +107,7 @@ Pick the transport by data volume: consumable by the agent without an SDK. - **Large result sets** — exporting data into another system, materializing analytics output — should go through a - [QWP egress client](/docs/protocols/qwp-egress-websocket/). Byte-credit + [QWP egress client](/docs/connect/wire-protocols/qwp-egress-websocket/). Byte-credit flow control prevents the agent from being overwhelmed mid-export, and the binary columnar format keeps wire size low. @@ -132,12 +133,12 @@ recommended path for all writes**: - **Bulk upload and sustained ingestion** (agentic ETL, a streaming sensor feed fronted by an LLM, batch loads from another system): use a - [QWP client library](/docs/ingestion/overview/). The agent generates + [QWP client library](/docs/connect/overview/). The agent generates setup code; the runtime gets throughput, multi-host failover, and store-and-forward for free. - **No native client for the agent's runtime?** The agent can implement an uploader directly against the - [QWP ingress wire spec](/docs/protocols/qwp-ingress-websocket/) — the + [QWP ingress wire spec](/docs/connect/wire-protocols/qwp-ingress-websocket/) — the protocol is fully documented for clean-room implementations and a minimum-viable client is on the order of a few hundred lines. - **Quick one-off inserts** during exploration: `INSERT INTO ...` via REST @@ -173,7 +174,7 @@ can't reach. **Correct path:** parse the file in the agent's runtime, then push the rows to QuestDB through a QWP ingress client. -1. Check the [Ingestion overview](/docs/ingestion/overview/) for the +1. Check the [Ingestion overview](/docs/connect/overview/) for the current list of QWP client libraries supported in the agent's runtime language. 2. **Native client available** — the agent reads the file locally @@ -181,21 +182,61 @@ rows to QuestDB through a QWP ingress client. CSV reader for CSV) and streams rows to QuestDB through the client. 3. **No native client for that runtime** — the agent can implement an uploader directly against the - [QWP ingress wire spec](/docs/protocols/qwp-ingress-websocket/). The + [QWP ingress wire spec](/docs/connect/wire-protocols/qwp-ingress-websocket/). The protocol is fully documented for clean-room implementations and a minimum-viable client (BOOLEAN, LONG, DOUBLE, TIMESTAMP, VARCHAR) is - on the order of a few hundred lines. + on the order of a few hundred lines. See the next recipe for the two + patterns that matter for throughput. This works regardless of where QuestDB runs — Docker, cloud, `demo.questdb.io`, remote VM — and gives the agent throughput, multi-host failover, and store-and-forward for free. +### Writing a fast QWP ingress uploader + +If the agent is implementing a QWP ingress client against the +[wire spec](/docs/connect/wire-protocols/qwp-ingress-websocket/) — because no native +client exists for its runtime, or as a bespoke one-off uploader — two +patterns make the difference between a slow client and a fast one. An LLM +left to its own devices tends to default to the slow shape because it +"looks correct" and the bottleneck only shows up under load. + +**Pipeline frames; don't wait for each ack.** QWP allows many frames in +flight per connection (up to the +[max in-flight batches](/docs/connect/wire-protocols/qwp-ingress-websocket/#protocol-limits) +limit, 128 by default). Acks arrive asynchronously on the same connection, +in send order, and the server-assigned `sequence` field correlates each +ack with its frame. A lock-step `send → await OK → send next` loop wastes +a round-trip time per batch and caps throughput at a small fraction of +what the link supports. Decouple the writer (which streams frames into +the WebSocket) from the reader (which drains OK frames and advances the +ack watermark), and let the writer keep pushing while the reader catches +up. The writer only needs to check **transport-level** backpressure — the +socket's send buffer fill, or a bounded queue between encoder and sender — +not application-level acks. + +**Encode column-major, not row-major.** QWP's wire format lays out all +values for column 0 first, then all values for column 1, and so on. Source +data from columnar formats (Parquet, Arrow, columnar DB exports) is +already in this shape; preserve it end-to-end. An encoder that +materialises an intermediate row-major buffer — pseudocode +`for row in rows: for col in cols: emit(row[col])` — pays for the +allocation, breaks CPU cache locality, and prevents the bulk memcpy / SIMD +path that fixed-width column buffers would otherwise allow. The right +shape is `for col in cols: bulkCopy(columnBuffers[col])` — one tight loop +per column, often a single bulk copy for fixed-width types. + +These two changes compound: a pipelined, column-major client is often +several-fold faster than a lock-step, row-major one — sometimes the +difference between "the client is the bottleneck" and "the link +saturates". + ## Next steps - **Quickstart**: [AI Coding Agents](/docs/getting-started/ai-coding-agents/) -- **Query interfaces**: [QWP egress (WebSocket)](/docs/protocols/qwp-egress-websocket/), - [REST API](/docs/query/rest-api/) -- **Ingest interfaces**: [Ingestion overview](/docs/ingestion/overview/), - [QWP ingress (WebSocket)](/docs/protocols/qwp-ingress-websocket/) +- **Query interfaces**: [QWP egress (WebSocket)](/docs/connect/wire-protocols/qwp-egress-websocket/), + [REST API](/docs/connect/compatibility/rest-api/) +- **Ingest interfaces**: [Ingestion overview](/docs/connect/overview/), + [QWP ingress (WebSocket)](/docs/connect/wire-protocols/qwp-ingress-websocket/) - **Operating safely**: [RBAC](/docs/security/rbac/) (Enterprise), [TLS](/docs/security/tls/) diff --git a/documentation/cookbook/integrations/grafana/read-only-user.md b/documentation/cookbook/integrations/grafana/read-only-user.md index 064b64af0..154979df7 100644 --- a/documentation/cookbook/integrations/grafana/read-only-user.md +++ b/documentation/cookbook/integrations/grafana/read-only-user.md @@ -75,7 +75,7 @@ After enabling, you have two separate users: - Use for: Grafana dashboards, monitoring tools, analytics applications :::info Related Documentation -- [PostgreSQL wire protocol](/docs/query/pgwire/overview/) +- [PostgreSQL wire protocol](/docs/connect/compatibility/pgwire/overview/) - [QuestDB Enterprise RBAC](/docs/security/rbac/) - [Configuration reference](/docs/configuration/overview/) - [Grafana QuestDB data source](https://grafana.com/grafana/plugins/questdb-questdb-datasource/) diff --git a/documentation/cookbook/integrations/opcua-dense-format.md b/documentation/cookbook/integrations/opcua-dense-format.md index b5a0826bd..a91f97211 100644 --- a/documentation/cookbook/integrations/opcua-dense-format.md +++ b/documentation/cookbook/integrations/opcua-dense-format.md @@ -300,6 +300,6 @@ OPC-UA timestamps may have different precision than QuestDB expects. Ensure: :::info Related Documentation - [Telegraf OPC-UA plugin](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/opcua) - [Telegraf merge aggregator](https://github.com/influxdata/telegraf/tree/master/plugins/aggregators/merge) -- [QuestDB ILP reference](/docs/ingestion/ilp/overview/) -- [InfluxDB Line Protocol](/docs/ingestion/ilp/overview/) +- [QuestDB ILP reference](/docs/connect/compatibility/ilp/overview/) +- [InfluxDB Line Protocol](/docs/connect/compatibility/ilp/overview/) ::: diff --git a/documentation/cookbook/operations/check-transaction-applied.md b/documentation/cookbook/operations/check-transaction-applied.md index 75a6c29c1..51ecbe721 100644 --- a/documentation/cookbook/operations/check-transaction-applied.md +++ b/documentation/cookbook/operations/check-transaction-applied.md @@ -31,5 +31,5 @@ Another viable approach is to run `SELECT count(*) FROM my_table` and verify the :::info Related Documentation - [Write-Ahead Log concept](/docs/concepts/write-ahead-log/) - [Meta functions reference](/docs/query/functions/meta/) -- [InfluxDB Line Protocol overview](/docs/ingestion/ilp/overview/) +- [InfluxDB Line Protocol overview](/docs/connect/compatibility/ilp/overview/) ::: diff --git a/documentation/cookbook/operations/copy-data-between-instances.md b/documentation/cookbook/operations/copy-data-between-instances.md index f26f82be3..54244449c 100644 --- a/documentation/cookbook/operations/copy-data-between-instances.md +++ b/documentation/cookbook/operations/copy-data-between-instances.md @@ -30,10 +30,10 @@ This reads from the source instance using PostgreSQL wire protocol and writes to ## Alternative: Export endpoint -You can also use [the export endpoint](/docs/query/rest-api/#exp---export-data) to export data to CSV or other formats. +You can also use [the export endpoint](/docs/connect/compatibility/rest-api/#exp---export-data) to export data to CSV or other formats. :::info Related Documentation -- [ILP ingestion](/docs/ingestion/overview/) -- [PostgreSQL wire protocol](/docs/query/pgwire/overview/) -- [REST API export](/docs/query/rest-api/#exp---export-data) +- [ILP ingestion](/docs/connect/overview/) +- [PostgreSQL wire protocol](/docs/connect/compatibility/pgwire/overview/) +- [REST API export](/docs/connect/compatibility/rest-api/#exp---export-data) ::: diff --git a/documentation/cookbook/operations/csv-import-milliseconds.md b/documentation/cookbook/operations/csv-import-milliseconds.md index 330476439..96b5c1469 100644 --- a/documentation/cookbook/operations/csv-import-milliseconds.md +++ b/documentation/cookbook/operations/csv-import-milliseconds.md @@ -62,6 +62,6 @@ Read the CSV line-by-line and convert, then send via the ILP client. :::info Related Documentation - [CSV import](/docs/getting-started/web-console/import-csv/) -- [ILP ingestion](/docs/ingestion/overview/) +- [ILP ingestion](/docs/connect/overview/) - [read_parquet()](/docs/query/functions/parquet/) ::: diff --git a/documentation/cookbook/operations/docker-compose-config.md b/documentation/cookbook/operations/docker-compose-config.md index 96e77ee7b..434d2eb37 100644 --- a/documentation/cookbook/operations/docker-compose-config.md +++ b/documentation/cookbook/operations/docker-compose-config.md @@ -98,5 +98,5 @@ For a full list of available configuration parameters, see: :::info Related Documentation - [Server Configuration](/docs/configuration/overview/) - [Docker Deployment Guide](/docs/deployment/docker/) -- [PostgreSQL Wire Protocol](/docs/query/pgwire/overview/) +- [PostgreSQL Wire Protocol](/docs/connect/compatibility/pgwire/overview/) ::: diff --git a/documentation/cookbook/operations/store-questdb-metrics.md b/documentation/cookbook/operations/store-questdb-metrics.md index ffb596e5e..782df659e 100644 --- a/documentation/cookbook/operations/store-questdb-metrics.md +++ b/documentation/cookbook/operations/store-questdb-metrics.md @@ -52,6 +52,6 @@ A few things to note: :::info Related Documentation - [QuestDB metrics](/docs/operations/logging-metrics/) -- [ILP ingestion](/docs/ingestion/overview/) +- [ILP ingestion](/docs/connect/overview/) - [Telegraf documentation](https://docs.influxdata.com/telegraf/) ::: diff --git a/documentation/cookbook/operations/tls-pgbouncer.md b/documentation/cookbook/operations/tls-pgbouncer.md index 6db184d79..f36344d39 100644 --- a/documentation/cookbook/operations/tls-pgbouncer.md +++ b/documentation/cookbook/operations/tls-pgbouncer.md @@ -51,7 +51,7 @@ Traffic will be unencrypted between PgBouncer and QuestDB. This setup is only su :::info Related Documentation -- [PostgreSQL wire protocol](/docs/query/pgwire/overview/) +- [PostgreSQL wire protocol](/docs/connect/compatibility/pgwire/overview/) - [QuestDB security](/docs/security/tls/) - [PgBouncer documentation](https://www.pgbouncer.org/config.html) ::: diff --git a/documentation/cookbook/programmatic/cpp/missing-columns.md b/documentation/cookbook/programmatic/cpp/missing-columns.md index 2c31b5698..fe2991ac9 100644 --- a/documentation/cookbook/programmatic/cpp/missing-columns.md +++ b/documentation/cookbook/programmatic/cpp/missing-columns.md @@ -113,5 +113,5 @@ int main() :::info Related Documentation - [QuestDB C++ client documentation](https://github.com/questdb/c-questdb-client) -- [ILP reference](/docs/ingestion/ilp/overview/) +- [ILP reference](/docs/connect/compatibility/ilp/overview/) ::: diff --git a/documentation/cookbook/programmatic/php/inserting-ilp.md b/documentation/cookbook/programmatic/php/inserting-ilp.md index 2695b88cc..8acddc61f 100644 --- a/documentation/cookbook/programmatic/php/inserting-ilp.md +++ b/documentation/cookbook/programmatic/php/inserting-ilp.md @@ -47,7 +47,7 @@ The format consists of: - **Columns** (fields): Space-separated, then comma-separated key-value pairs for numerical or string data - **Timestamp** (optional): Nanosecond-precision timestamp; if omitted, QuestDB uses server time -For complete ILP specification, see the [ILP reference documentation](/docs/ingestion/ilp/overview/). +For complete ILP specification, see the [ILP reference documentation](/docs/connect/compatibility/ilp/overview/). ## ILP over HTTP @@ -351,7 +351,7 @@ TCP ILP does not provide acknowledgments for successful writes. If the connectio | **Recommended for** | Custom timestamps required | Ease of development, server timestamps acceptable | High-volume, loss-tolerant scenarios | :::info Related Documentation -- [ILP reference documentation](/docs/ingestion/ilp/overview/) -- [HTTP REST API](/docs/query/rest-api/) +- [ILP reference documentation](/docs/connect/compatibility/ilp/overview/) +- [HTTP REST API](/docs/connect/compatibility/rest-api/) - [Authentication and security](/docs/security/rbac/) ::: diff --git a/documentation/cookbook/programmatic/ruby/inserting-ilp.md b/documentation/cookbook/programmatic/ruby/inserting-ilp.md index 41ccd10c4..5fa3c473b 100644 --- a/documentation/cookbook/programmatic/ruby/inserting-ilp.md +++ b/documentation/cookbook/programmatic/ruby/inserting-ilp.md @@ -348,8 +348,8 @@ TCP ILP has no acknowledgments. If the connection drops, data may be lost silent ::: :::info Related Documentation -- [ILP reference](/docs/ingestion/ilp/overview/) -- [ILP over HTTP](/docs/ingestion/ilp/overview/#transport-selection) -- [ILP over TCP](/docs/ingestion/ilp/overview/#transport-selection) +- [ILP reference](/docs/connect/compatibility/ilp/overview/) +- [ILP over HTTP](/docs/connect/compatibility/ilp/overview/#transport-selection) +- [ILP over TCP](/docs/connect/compatibility/ilp/overview/#transport-selection) - [InfluxDB Ruby client](https://github.com/influxdata/influxdb-client-ruby) ::: diff --git a/documentation/cookbook/programmatic/tls-ca-configuration.md b/documentation/cookbook/programmatic/tls-ca-configuration.md index dc8847476..e836555a4 100644 --- a/documentation/cookbook/programmatic/tls-ca-configuration.md +++ b/documentation/cookbook/programmatic/tls-ca-configuration.md @@ -96,7 +96,7 @@ The examples are in Rust but the concepts are similar in other languages. Check :::info Related Documentation - [QuestDB Rust client](https://docs.rs/questdb/) -- [QuestDB Python client](/docs/ingestion/clients/python/) -- [QuestDB C++ client](/docs/ingestion/clients/c-and-cpp/) +- [QuestDB Python client](/docs/connect/clients/python/) +- [QuestDB C++ client](/docs/connect/clients/c-and-cpp/) - [QuestDB TLS configuration](/docs/security/tls/) ::: diff --git a/documentation/deployment/aws.md b/documentation/deployment/aws.md index c2e9ac0a8..b94f1e64a 100644 --- a/documentation/deployment/aws.md +++ b/documentation/deployment/aws.md @@ -190,7 +190,7 @@ pg.password=your_secure_password ``` **InfluxDB line protocol** - edit `conf/auth.json`. See -[ILP authentication](/docs/ingestion/ilp/overview/#authentication). +[ILP authentication](/docs/connect/compatibility/ilp/overview/#authentication). Restart after changes: diff --git a/documentation/deployment/azure.md b/documentation/deployment/azure.md index 6a2e05d31..8351978ea 100644 --- a/documentation/deployment/azure.md +++ b/documentation/deployment/azure.md @@ -262,7 +262,7 @@ pg.password=your_secure_password ``` **InfluxDB line protocol** - edit `conf/auth.json`. See -[ILP authentication](/docs/ingestion/ilp/overview/#authentication). +[ILP authentication](/docs/connect/compatibility/ilp/overview/#authentication). Restart after changes: diff --git a/documentation/deployment/digital-ocean.md b/documentation/deployment/digital-ocean.md index a5ff85d87..982179d75 100644 --- a/documentation/deployment/digital-ocean.md +++ b/documentation/deployment/digital-ocean.md @@ -99,7 +99,7 @@ pg.password=... ``` For details on authentication using InfluxDB line protocol, see the -[InfluxDB line protocol authentication guide](/docs/ingestion/ilp/overview/#authentication). +[InfluxDB line protocol authentication guide](/docs/connect/compatibility/ilp/overview/#authentication). ### Disabling authentication diff --git a/documentation/deployment/docker.md b/documentation/deployment/docker.md index 962f04526..57563e8af 100644 --- a/documentation/deployment/docker.md +++ b/documentation/deployment/docker.md @@ -53,16 +53,16 @@ Below each parameter is described in detail. This parameter will expose a port to the host. You can specify: -- `-p 9000:9000` - [REST API](/docs/query/rest-api/) and +- `-p 9000:9000` - [REST API](/docs/connect/compatibility/rest-api/) and [Web Console](/docs/getting-started/web-console/overview/) -- `-p 9009:9009` - [InfluxDB line protocol](/docs/ingestion/ilp/overview/) -- `-p 8812:8812` - [Postgres wire protocol](/docs/query/pgwire/overview/) +- `-p 9009:9009` - [InfluxDB line protocol](/docs/connect/compatibility/ilp/overview/) +- `-p 8812:8812` - [Postgres wire protocol](/docs/connect/compatibility/pgwire/overview/) - `-p 9003:9003` - [Min health server](/docs/operations/logging-metrics/#minimal-http-server) All ports are optional, you can pick only the ones you need. For example, it is enough to expose `8812` if you only plan to use -[Postgres wire protocol](/docs/query/pgwire/overview/). +[Postgres wire protocol](/docs/connect/compatibility/pgwire/overview/). ### `-v` parameter to mount storage @@ -161,11 +161,11 @@ the root cause. When QuestDB is running, you can start interacting with it: - Port `9000` is for REST. More info is available on the - [REST documentation page](/docs/query/rest-api/). + [REST documentation page](/docs/connect/compatibility/rest-api/). - Port `8812` is used for Postgres. Check our - [Postgres reference page](/docs/query/pgwire/overview/). + [Postgres reference page](/docs/connect/compatibility/pgwire/overview/). - Port `9009` is dedicated to InfluxDB Line Protocol. Consult our - [InfluxDB protocol page](/docs/ingestion/ilp/overview/). + [InfluxDB protocol page](/docs/connect/compatibility/ilp/overview/). ## Data persistence diff --git a/documentation/deployment/hetzner.md b/documentation/deployment/hetzner.md index a4b809cf9..6a5b4d35f 100644 --- a/documentation/deployment/hetzner.md +++ b/documentation/deployment/hetzner.md @@ -128,9 +128,9 @@ Replace `` with your actual public IP address. For production deploymen ::: **Default QuestDB Ports:** -- `9000`: [Web Console](/docs/getting-started/web-console/overview/) and [REST API](/docs/query/rest-api/) -- `8812`: [PostgreSQL wire protocol](/docs/query/pgwire/overview/) -- `9009`: [InfluxDB line protocol](/docs/ingestion/ilp/overview/) (TCP) +- `9000`: [Web Console](/docs/getting-started/web-console/overview/) and [REST API](/docs/connect/compatibility/rest-api/) +- `8812`: [PostgreSQL wire protocol](/docs/connect/compatibility/pgwire/overview/) +- `9009`: [InfluxDB line protocol](/docs/connect/compatibility/ilp/overview/) (TCP) - `9003`: [Health monitoring](/docs/operations/logging-metrics/#minimal-http-server) and Prometheus metrics Add firewall rules for additional ports as needed for your specific use case. @@ -239,9 +239,9 @@ renderText={(release) => ( /> **Port mappings explained:** -- `-p 9000:9000`: [Web Console](/docs/getting-started/web-console/overview/) and [REST API](/docs/query/rest-api/) -- `-p 9009:9009`: [InfluxDB line protocol](/docs/ingestion/ilp/overview/) (TCP) -- `-p 8812:8812`: [PostgreSQL wire protocol](/docs/query/pgwire/overview/) +- `-p 9000:9000`: [Web Console](/docs/getting-started/web-console/overview/) and [REST API](/docs/connect/compatibility/rest-api/) +- `-p 9009:9009`: [InfluxDB line protocol](/docs/connect/compatibility/ilp/overview/) (TCP) +- `-p 8812:8812`: [PostgreSQL wire protocol](/docs/connect/compatibility/pgwire/overview/) - `-p 9003:9003`: [Health monitoring](/docs/operations/logging-metrics/#minimal-http-server) and Prometheus metrics :::tip Port Selection @@ -570,7 +570,7 @@ questdb01$ psql -c "SELECT version();" Expected output should show QuestDB version information, confirming successful database connectivity. -For more details on QuestDB's PostgreSQL compatibility, see the [PostgreSQL wire protocol](/docs/query/pgwire/overview/) documentation. +For more details on QuestDB's PostgreSQL compatibility, see the [PostgreSQL wire protocol](/docs/connect/compatibility/pgwire/overview/) documentation. ### Manual Backup Test diff --git a/documentation/deployment/kubernetes.md b/documentation/deployment/kubernetes.md index 9a954bcf4..638294d64 100644 --- a/documentation/deployment/kubernetes.md +++ b/documentation/deployment/kubernetes.md @@ -70,10 +70,10 @@ kubectl port-forward my-questdb-0 9000 The following ports may also be used: -- 9000: [REST API](/docs/query/rest-api/) and +- 9000: [REST API](/docs/connect/compatibility/rest-api/) and [Web Console](/docs/getting-started/web-console/overview/) -- 8812: [Postgres](/docs/query/pgwire/overview/) -- 9009: [InfluxDB line protocol](/docs/ingestion/ilp/overview/) +- 8812: [Postgres](/docs/connect/compatibility/pgwire/overview/) +- 9009: [InfluxDB line protocol](/docs/connect/compatibility/ilp/overview/) ## Customizing the deployment diff --git a/documentation/getting-started/ai-coding-agents.mdx b/documentation/getting-started/ai-coding-agents.mdx index fbbb45447..c31744cd4 100644 --- a/documentation/getting-started/ai-coding-agents.mdx +++ b/documentation/getting-started/ai-coding-agents.mdx @@ -180,8 +180,8 @@ The agent will handle everything from installing prerequisites through reporting ## Next steps -- [REST API reference](/docs/query/rest-api/) - API documentation +- [REST API reference](/docs/connect/compatibility/rest-api/) - API documentation - [SQL overview](/docs/query/overview/) - QuestDB SQL syntax -- [Client libraries](/docs/ingestion/overview/) - Official client libraries +- [Client libraries](/docs/connect/overview/) - Official client libraries - [Sample datasets](https://github.com/questdb/sample-datasets) - Example data to try diff --git a/documentation/getting-started/capacity-planning.md b/documentation/getting-started/capacity-planning.md index ace91f78f..be4759d44 100644 --- a/documentation/getting-started/capacity-planning.md +++ b/documentation/getting-started/capacity-planning.md @@ -209,7 +209,7 @@ As of QuestDB 7.4.2, InfluxDB Line Protocol operates over HTTP instead of TCP. As such, ILP is optimal out-of-the box. -See your [ILP client](/docs/ingestion/overview/#client-libraries) for +See your [ILP client](/docs/connect/overview/#client-libraries) for language-specific configurations. ### Postgres Wire Protocol diff --git a/documentation/getting-started/create-database.md b/documentation/getting-started/create-database.md index d148fc271..8d620784f 100644 --- a/documentation/getting-started/create-database.md +++ b/documentation/getting-started/create-database.md @@ -12,8 +12,8 @@ specific types. For most applications, you will import your data using methods like the InfluxDB Line Protocol, CSV imports, or integration with third-party tools such as -Telegraf, [Kafka](/docs/ingestion/message-brokers/kafka), or Prometheus. If your interest lies in data ingestion rather -than generation, refer to our [ingestion overview](/docs/ingestion/overview/). +Telegraf, [Kafka](/docs/connect/message-brokers/kafka), or Prometheus. If your interest lies in data ingestion rather +than generation, refer to our [ingestion overview](/docs/connect/overview/). Alternatively, the [QuestDB demo instance](https://demo.questdb.io) offers a practical way to explore data creation and manipulation without setting up your dataset. @@ -31,8 +31,8 @@ All commands are run through the [Web Console](/docs/getting-started/web-console `http://localhost:9000`. You can also run the same SQL via the -[Postgres endpoint](/docs/query/pgwire/overview/) or the -[REST API](/docs/query/rest-api/). +[Postgres endpoint](/docs/connect/compatibility/pgwire/overview/) or the +[REST API](/docs/connect/compatibility/rest-api/). If QuestDB is not running locally, checkout the [quick start](/docs/getting-started/quick-start/). diff --git a/documentation/getting-started/enterprise-quick-start.md b/documentation/getting-started/enterprise-quick-start.md index 1e1f64483..0ac0fe152 100644 --- a/documentation/getting-started/enterprise-quick-start.md +++ b/documentation/getting-started/enterprise-quick-start.md @@ -266,11 +266,11 @@ the server's certificate. For local testing with self-signed certificates, you c Connecting a client to ILP is a common path. -However, you may use something like [Kafka](/docs/ingestion/message-brokers/kafka). +However, you may use something like [Kafka](/docs/connect/message-brokers/kafka). For more on ILP ingestion, see: -- [ILP Overview](/docs/ingestion/ilp/overview/) — Protocol details and configuration -- [Ingestion Overview](/docs/ingestion/overview/) — Client libraries and ingestion methods +- [ILP Overview](/docs/connect/compatibility/ilp/overview/) — Protocol details and configuration +- [Ingestion Overview](/docs/connect/overview/) — Client libraries and ingestion methods ## 5. Ingest data, Kafka Connect (optional) @@ -344,7 +344,7 @@ client.conf.string=https::addr=localhost:9000;token=qt1KAsf1U9YbUVAX1H2IahXEE3-4 Once you deploy this configuration, the connector will start sending data from your Kafka topic to QuestDB. If you encounter any issues, check the logs for both your Kafka Connect worker and your QuestDB server for more details. -See the [QuestDB Kafka Connector documentation](/docs/ingestion/message-brokers/kafka/#questdb-kafka-connect-connector) for more details +See the [QuestDB Kafka Connector documentation](/docs/connect/message-brokers/kafka/#questdb-kafka-connect-connector) for more details on the configuration options and how to set up the connector. ## 6. Query data, PostgreSQL query @@ -404,7 +404,7 @@ This covers the very basics of user creation and service accounts. We have an `ingest` service account and a `dashboard` service account. For more on querying, see: -- [PostgreSQL Wire Protocol](/docs/query/pgwire/overview/) — Connection details and compatibility +- [PostgreSQL Wire Protocol](/docs/connect/compatibility/pgwire/overview/) — Connection details and compatibility - [Query & SQL Overview](/docs/query/overview/) — SQL syntax and functions > For the full role-based access control docs, including group management, see @@ -574,7 +574,7 @@ Enterprise. If you're new to QuestDB, consider checking out: -- [Ingestion overview](/docs/ingestion/overview/): Learn the various ingestion +- [Ingestion overview](/docs/connect/overview/): Learn the various ingestion methods and their benefits and tradeoffs, and pick a language client. - [Query & SQL overview](/docs/query/overview/): Learn how to query QuestDB. diff --git a/documentation/getting-started/quick-start.mdx b/documentation/getting-started/quick-start.mdx index 424d1a006..d40dda310 100644 --- a/documentation/getting-started/quick-start.mdx +++ b/documentation/getting-started/quick-start.mdx @@ -258,9 +258,9 @@ It works? You're ready to bring your data. | Port | Service | |------|---------| -| `9000` | [REST API](/docs/query/rest-api/) and [Web Console](/docs/getting-started/web-console/overview/) | -| `9009` | [InfluxDB Line Protocol (ILP)](/docs/ingestion/ilp/overview/) - Legacy TCP, use HTTP instead | -| `8812` | [PostgreSQL Wire Protocol](/docs/query/pgwire/overview/) | +| `9000` | [REST API](/docs/connect/compatibility/rest-api/) and [Web Console](/docs/getting-started/web-console/overview/) | +| `9009` | [InfluxDB Line Protocol (ILP)](/docs/connect/compatibility/ilp/overview/) - Legacy TCP, use HTTP instead | +| `8812` | [PostgreSQL Wire Protocol](/docs/connect/compatibility/pgwire/overview/) | | `9003` | [Health endpoint](/docs/operations/logging-metrics/#minimal-http-server) | ## Bring your data @@ -273,7 +273,7 @@ Choose from one of our premium ingest-only language clients: -_Want more options? See the [ingestion overview](/docs/ingestion/overview/)._ +_Want more options? See the [ingestion overview](/docs/connect/overview/)._ ### Create new data diff --git a/documentation/high-availability/client-failover/configuration.md b/documentation/high-availability/client-failover/configuration.md index a307385a3..ff453c6dd 100644 --- a/documentation/high-availability/client-failover/configuration.md +++ b/documentation/high-availability/client-failover/configuration.md @@ -18,7 +18,7 @@ first. `zone` is accepted everywhere but only takes effect on egress; `target` is an egress-only key and is rejected as an unknown key on an ingress connect string. They are documented in full on the -[connect-string reference](/docs/client-configuration/connect-string#failover-keys); +[connect-string reference](/docs/connect/clients/connect-string#failover-keys); the table below summarises the failover-relevant subset. | Key | Type | Default | Notes | @@ -42,7 +42,7 @@ The ingress reconnect loop is driven by store-and-forward connect-string keys. See [Store-and-forward configuration](/docs/high-availability/store-and-forward/configuration/#reconnect-keys) and the -[connect-string reference](/docs/client-configuration/connect-string#sf-keys) +[connect-string reference](/docs/connect/clients/connect-string#sf-keys) for the full list. The failover-relevant keys are: | Key | Type | Default | Notes | @@ -68,7 +68,7 @@ network), and retrying for five minutes only hides it. The egress failover loop wraps each `execute()` call on the read-side query client. The full key list lives on the -[connect-string reference](/docs/client-configuration/connect-string#egress-flow); +[connect-string reference](/docs/connect/clients/connect-string#egress-flow); the user-visible knobs are: | Key | Type | Default | Notes | @@ -151,8 +151,8 @@ try (QwpQueryClient client = QwpQueryClient.fromConfig( | Key | Concept | Reference | |---|---|---| -| `addr`, `zone`, `target`, `auth_timeout_ms` | Host selection, role filter | [connect-string #failover-keys](/docs/client-configuration/connect-string#failover-keys) | -| `reconnect_*`, `initial_connect_retry` | Ingress retry budget | [connect-string #reconnect-keys](/docs/client-configuration/connect-string#reconnect-keys) | -| `failover`, `failover_*` | Egress retry budget | [connect-string #egress-flow](/docs/client-configuration/connect-string#egress-flow) | -| `username` / `password` / `token` | Authentication | [connect-string #auth](/docs/client-configuration/connect-string#auth) | -| `tls_*` | TLS configuration | [connect-string #tls](/docs/client-configuration/connect-string#tls) | +| `addr`, `zone`, `target`, `auth_timeout_ms` | Host selection, role filter | [connect-string #failover-keys](/docs/connect/clients/connect-string#failover-keys) | +| `reconnect_*`, `initial_connect_retry` | Ingress retry budget | [connect-string #reconnect-keys](/docs/connect/clients/connect-string#reconnect-keys) | +| `failover`, `failover_*` | Egress retry budget | [connect-string #egress-flow](/docs/connect/clients/connect-string#egress-flow) | +| `username` / `password` / `token` | Authentication | [connect-string #auth](/docs/connect/clients/connect-string#auth) | +| `tls_*` | TLS configuration | [connect-string #tls](/docs/connect/clients/connect-string#tls) | diff --git a/documentation/high-availability/store-and-forward/configuration.md b/documentation/high-availability/store-and-forward/configuration.md index 318b6556a..848ad448d 100644 --- a/documentation/high-availability/store-and-forward/configuration.md +++ b/documentation/high-availability/store-and-forward/configuration.md @@ -13,7 +13,7 @@ operational guidance read [Operating and tuning](/docs/high-availability/store-and-forward/operating-and-tuning/). Shared keys (authentication, TLS, address list) are documented on the -[connect-string reference](/docs/client-configuration/connect-string). +[connect-string reference](/docs/connect/clients/connect-string). The keys below are the SF-specific subset. ## Storage keys @@ -37,7 +37,7 @@ Size values accept integer bytes or unit suffixes (`K`, `M`, `G`, `T`) using binary multipliers. These keys are also documented on the central -[connect-string reference](/docs/client-configuration/connect-string#sf-keys). +[connect-string reference](/docs/connect/clients/connect-string#sf-keys). ## Reconnect keys @@ -54,7 +54,7 @@ and host-walk semantics are documented in | `close_flush_timeout_millis` | int (ms) | `5000` | `close()` blocks up to this long waiting for `ackedFsn ≥ publishedFsn`. `0` or `-1` skips the drain wait. The safety-net `checkError()` still runs. | Cross-reference: -[connect-string #reconnect-keys](/docs/client-configuration/connect-string#reconnect-keys). +[connect-string #reconnect-keys](/docs/connect/clients/connect-string#reconnect-keys). ## Durable-ack keys @@ -80,7 +80,7 @@ The per-category defaults are documented in ## Other relevant keys These keys are not SF-specific but affect SF behaviour. See the -[connect-string reference](/docs/client-configuration/connect-string) for the +[connect-string reference](/docs/connect/clients/connect-string) for the canonical entries. | Key | Type | Default | Description | @@ -191,8 +191,8 @@ down. Suitable for edge / IoT producers on unreliable links. | Group | Connect-string reference | |---|---| -| Storage (`sf_dir`, `sender_id`, …) | [#sf-keys](/docs/client-configuration/connect-string#sf-keys) | -| Reconnect (`reconnect_*`, `initial_connect_retry`, `close_flush_timeout_millis`) | [#reconnect-keys](/docs/client-configuration/connect-string#reconnect-keys) | -| Failover (`addr`, `zone`, `target`, `auth_timeout_ms`) | [#failover-keys](/docs/client-configuration/connect-string#failover-keys) | -| Auth (`username`, `password`, `token`) | [#auth](/docs/client-configuration/connect-string#auth) | -| TLS (`tls_*`) | [#tls](/docs/client-configuration/connect-string#tls) | +| Storage (`sf_dir`, `sender_id`, …) | [#sf-keys](/docs/connect/clients/connect-string#sf-keys) | +| Reconnect (`reconnect_*`, `initial_connect_retry`, `close_flush_timeout_millis`) | [#reconnect-keys](/docs/connect/clients/connect-string#reconnect-keys) | +| Failover (`addr`, `zone`, `target`, `auth_timeout_ms`) | [#failover-keys](/docs/connect/clients/connect-string#failover-keys) | +| Auth (`username`, `password`, `token`) | [#auth](/docs/connect/clients/connect-string#auth) | +| TLS (`tls_*`) | [#tls](/docs/connect/clients/connect-string#tls) | diff --git a/documentation/high-availability/store-and-forward/when-to-use.md b/documentation/high-availability/store-and-forward/when-to-use.md index 38c69aad2..5b4a953f7 100644 --- a/documentation/high-availability/store-and-forward/when-to-use.md +++ b/documentation/high-availability/store-and-forward/when-to-use.md @@ -178,7 +178,7 @@ set, retaining HTTP for backward compatibility while the QWP path becomes the primary. For specifically the multi-host HA path on HTTP ILP, see the existing -[ILP overview "Multiple URLs for High Availability"](/docs/ingestion/ilp/overview/#multiple-urls-for-high-availability) +[ILP overview "Multiple URLs for High Availability"](/docs/connect/compatibility/ilp/overview/#multiple-urls-for-high-availability) section. QWP failover (documented in [Client failover concepts](/docs/high-availability/client-failover/concepts/)) replaces and extends it. diff --git a/documentation/ingestion/clients/c-and-cpp.md b/documentation/ingestion/clients/c-and-cpp.md index 359c576f2..643dea2ed 100644 --- a/documentation/ingestion/clients/c-and-cpp.md +++ b/documentation/ingestion/clients/c-and-cpp.md @@ -1,4 +1,5 @@ --- +slug: /connect/clients/c-and-cpp title: C & C++ Client Documentation description: "Dive into QuestDB using the C & C++ ingestion client for high-performance, @@ -185,7 +186,7 @@ int main() Now, both events use the same timestamp. We recommend using the event's original timestamp when ingesting data into QuestDB. Using ingestion-time timestamps precludes the ability to deduplicate rows, which is -[important for exactly-once processing](/docs/ingestion/ilp/overview/#exactly-once-delivery-vs-at-least-once-delivery). +[important for exactly-once processing](/docs/connect/compatibility/ilp/overview/#exactly-once-delivery-vs-at-least-once-delivery). ### Array Insertion @@ -599,7 +600,7 @@ error: Now, both events use the same timestamp. We recommend using the event's original timestamp when ingesting data into QuestDB. Using ingestion-time timestamps precludes the ability to deduplicate rows, which is -[important for exactly-once processing](/docs/ingestion/ilp/overview/#exactly-once-delivery-vs-at-least-once-delivery). +[important for exactly-once processing](/docs/connect/compatibility/ilp/overview/#exactly-once-delivery-vs-at-least-once-delivery). ### Array Insertion @@ -781,7 +782,7 @@ share the same codebase. Please refer to the for the full details on configuration. Alternatively, for a breakdown of Configuration string options available across -all clients, see the [Configuration string](/docs/ingestion/clients/configuration-string/) page. +all clients, see the [Connect string](/docs/connect/clients/connect-string/) page. ### Don't forget to flush @@ -802,7 +803,7 @@ QuestDB instances), call `sender.flush_and_keep(&buffer)` or ### Transactional flush As described in -[ILP overview](/docs/ingestion/ilp/overview#http-transaction-semantics), the +[ILP overview](/docs/connect/compatibility/ilp/overview#http-transaction-semantics), the HTTP transport has some support for transactions. To ensure in advance that a flush will not affect more than one table, call @@ -836,7 +837,7 @@ version 9.0.0. ## Next Steps -Please refer to the [ILP overview](/docs/ingestion/ilp/overview) for details +Please refer to the [ILP overview](/docs/connect/compatibility/ilp/overview) for details about transactions, error control, delivery guarantees, health check, or table and column auto-creation. diff --git a/documentation/ingestion/clients/configuration-string.md b/documentation/ingestion/clients/configuration-string.md index eedb19416..d5a3e5cff 100644 --- a/documentation/ingestion/clients/configuration-string.md +++ b/documentation/ingestion/clients/configuration-string.md @@ -90,7 +90,7 @@ The following options are available: ## Other considerations -- Please refer to the [ILP overview](/docs/ingestion/ilp/overview) for +- Please refer to the [ILP overview](/docs/connect/compatibility/ilp/overview) for details about transactions, error control, delivery guarantees, health check, or table and column auto-creation. - The method `flush()` can be called to force sending the internal buffer to a diff --git a/documentation/ingestion/clients/date-to-timestamp-conversion.md b/documentation/ingestion/clients/date-to-timestamp-conversion.md index 3d0e266b4..e41e58e52 100644 --- a/documentation/ingestion/clients/date-to-timestamp-conversion.md +++ b/documentation/ingestion/clients/date-to-timestamp-conversion.md @@ -1,4 +1,5 @@ --- +slug: /connect/clients/date-to-timestamp-conversion title: Date to Timestamp Conversion in Different Programming Languages sidebar_label: Date to Timestamp description: @@ -13,7 +14,7 @@ QuestDB offers clients for Python, Go, Java, C, C++, Rust, C#/.NET, and JavaScri can directly use a `Timestamp` type when using the client, while others need to convert the timestamp into a long representing the epoch time in microseconds. We add such required conversions into the snippets. -Please refer to the [ingestion overview](/docs/ingestion/overview/) to learn more about the details of the client library for your language. +Please refer to the [ingestion overview](/docs/connect/overview/) to learn more about the details of the client library for your language. ## Date to Timestamp in Python @@ -47,7 +48,7 @@ pd_timestamp = pd.Timestamp(datetime_obj) print(f"Pandas Timestamp: {pd_timestamp}") ``` -Learn more about the [QuestDB Python Client](/docs/ingestion/clients/python/) +Learn more about the [QuestDB Python Client](/docs/connect/clients/python/) ## Date to Timestamp in Go @@ -91,7 +92,7 @@ func main() { } ``` -Learn more about the [QuestDB Go Client](/docs/ingestion/clients/go/) +Learn more about the [QuestDB Go Client](/docs/connect/clients/go/) ## Date to Timestamp in Java @@ -175,7 +176,7 @@ public class Main { } ``` -Learn more about the [QuestDB Java Client](/docs/ingestion/clients/java/) +Learn more about the [QuestDB Java Client](/docs/connect/clients/java/) ## Date to Timestamp in C @@ -209,7 +210,7 @@ int main() { } ``` -Learn more about the [QuestDB C Client](/docs/ingestion/clients/c-and-cpp/#c-1) +Learn more about the [QuestDB C Client](/docs/connect/clients/c-and-cpp/#c-1) ## Date to Timestamp in C++ @@ -246,7 +247,7 @@ int main() { return 0; } ``` -Learn more about the [QuestDB C++ Client](/docs/ingestion/clients/c-and-cpp/) +Learn more about the [QuestDB C++ Client](/docs/connect/clients/c-and-cpp/) ## Date to Timestamp in Rust @@ -278,7 +279,7 @@ fn main() { } ``` -Learn more about the [QuestDB Rust Client](/docs/ingestion/clients/rust/) +Learn more about the [QuestDB Rust Client](/docs/connect/clients/rust/) ## Date to Timestamp in C#/.NET @@ -316,7 +317,7 @@ class Program } ``` -Learn more about the [QuestDB .NET Client](/docs/ingestion/clients/dotnet/) +Learn more about the [QuestDB .NET Client](/docs/connect/clients/dotnet/) ## Date to Timestamp in JavasScript/Node.js @@ -339,7 +340,7 @@ console.log("Timestamp (microseconds):", timestamp.toString()); // .timestampColumn("NonDesignatedTimestampColumnName", timestamp) ``` -Learn more about the [QuestDB Node.js Client](/docs/ingestion/clients/nodejs/) +Learn more about the [QuestDB Node.js Client](/docs/connect/clients/nodejs/) ## Date to Timestamp in Ruby @@ -363,7 +364,7 @@ puts "Date: #{date_obj}" puts "Timestamp (microseconds): #{timestamp}" ``` -Learn more about the [ILP text format](/docs/ingestion/ilp/advanced-settings/). +Learn more about the [ILP text format](/docs/connect/compatibility/ilp/advanced-settings/). ## Date to Timestamp in PHP @@ -391,5 +392,5 @@ echo "Date: " . $time_precise->format('Y-m-d H:i:s.u') . PHP_EOL; echo "Timestamp (microseconds): " . $timestamp_precise . PHP_EOL; ``` -Learn more about the [ILP text format](/docs/ingestion/ilp/advanced-settings/). +Learn more about the [ILP text format](/docs/connect/compatibility/ilp/advanced-settings/). diff --git a/documentation/ingestion/clients/dotnet.md b/documentation/ingestion/clients/dotnet.md index 627f86b49..41067715f 100644 --- a/documentation/ingestion/clients/dotnet.md +++ b/documentation/ingestion/clients/dotnet.md @@ -1,4 +1,5 @@ --- +slug: /connect/clients/dotnet title: .NET Client Documentation description: "Get started with QuestDB using the .NET client for efficient, @@ -31,7 +32,7 @@ perform basic insert operations. This page focuses on our high-performance ingestion client, which is optimized for **writing** data to QuestDB. For retrieving data, we recommend using a -[PostgreSQL-compatible .NET library](/docs/query/pgwire/dotnet/) or our +[PostgreSQL-compatible .NET library](/docs/connect/compatibility/pgwire/dotnet/) or our [HTTP query endpoint](/docs/query/overview/#rest-http-api). ::: @@ -176,7 +177,7 @@ class Program Now, both events use the same timestamp. We recommend using the event's original timestamp when ingesting data into QuestDB. Using ingestion-time timestamps precludes the ability to deduplicate rows, which is -[important for exactly-once processing](/docs/ingestion/ilp/overview/#exactly-once-delivery-vs-at-least-once-delivery). +[important for exactly-once processing](/docs/connect/compatibility/ilp/overview/#exactly-once-delivery-vs-at-least-once-delivery). +Related accessors: -## Configuration options +| Method | Returns | +|--------|---------| +| `flush_and_get_fsn(&mut buf)` | Highest FSN published by this call. `None` if the buffer was empty. | +| `flush_and_keep_and_get_fsn(&buf)` | Same, but keeps the buffer. | +| `published_fsn()` | Highest FSN published locally. | +| `acked_fsn()` | Highest FSN completed (server ACK or reject-and-continue). | +| `await_acked_fsn(fsn, timeout)` | Block until `acked_fsn()` reaches `fsn`, or the timeout elapses. | -The easiest way to configure the line sender is the configuration string. The -general structure is: +In durable ACK mode, `acked_fsn` advances after durable upload, not on the +ordinary OK frame. -```plain -::addr=host:port;param1=val1;param2=val2;... +## Store-and-forward + +With store-and-forward (SF) enabled, unacknowledged frames are persisted to +disk and replayed after reconnection, surviving sender process restarts: + +```text +ws::addr=localhost:9000;sf_dir=/var/lib/questdb/sf;sender_id=ingest-1; ``` -`transport` can be `http`, `https`, `tcp`, or `tcps`. Go to the client's -[crate documentation](https://docs.rs/questdb-rs/latest/questdb/ingress) for the -full details on configuration. +Without `sf_dir`, unacknowledged data lives in process memory and is lost if +the sender process exits. The reconnect loop still spans transient server +outages, but a RAM cap bounds how much data can accumulate. -Alternatively, for breakdown of available params, see the -[Connect string](/docs/connect/clients/connect-string/) page. +### SF tuning keys -## Don't forget to flush +| Key | Default | Description | +|-----|---------|-------------| +| `sf_dir` | unset | Enables disk-backed SF when set. | +| `sender_id` | `default` | Slot identity. Allowed chars: `A-Za-z0-9_-`. Use distinct ids per sender process. | +| `sf_max_bytes` | 4 MiB | Per-segment size cap. | +| `sf_max_total_bytes` | 128 MiB (memory) / 10 GiB (disk) | Cap on total queued bytes. | +| `sf_durability` | `memory` | `memory`, `flush`, or `append` (strongest). | +| `sf_append_deadline_millis` | 30000 | Per-append wait budget in `append` mode. | +| `drain_orphans` | `off` | If `on`, take over stale slots owned by a previous sender. | +| `max_background_drainers` | 4 | Concurrency cap when draining orphans. | -The sender and buffer objects are entirely decoupled. This means that the sender -won't get access to the data in the buffer until you explicitly call -`sender.flush(&mut buffer)` or a variant. This may lead to a pitfall where you -drop a buffer that still has some data in it, resulting in permanent data loss. +## Durable acknowledgement -A common technique is to flush periodically on a timer and/or once the buffer -exceeds a certain size. You can check the buffer's size by calling -`buffer.len()`. +:::note Enterprise -The default `flush()` method clears the buffer after sending its data. If you -want to preserve its contents (for example, to send the same data to multiple -QuestDB instances), call `sender.flush_and_keep(&mut buffer)` instead. +Durable acknowledgement requires QuestDB Enterprise with primary replication +configured. -## Transactional flush +::: -As described in -[ILP overview](/docs/connect/compatibility/ilp/overview#http-transaction-semantics), the -HTTP transport has some support for transactions. +By default, the server confirms a batch once it is committed to the local +[WAL](/docs/concepts/write-ahead-log/). To wait for the batch to be durably +uploaded to object storage: -In order to ensure in advance that a flush will not affect more than one table, -call `sender.flush_and_keep_with_flags(&mut buffer, true)`. This call will -refuse to flush a buffer if the flush wouldn't be data-transactional. +```text +ws::addr=localhost:9000;sf_dir=/var/lib/questdb/sf;request_durable_ack=on; +``` -## Error handling +`durable_ack_keepalive_interval_millis` (default 200) controls how often the +client probes the server for durable ACK progress when no other traffic is in +flight. -The two supported transport modes, HTTP and TCP, handle errors very differently. -In a nutshell, HTTP is much better at error handling. +## Asynchronous error handling -### HTTP +QWP/WebSocket ingestion is asynchronous: `flush()` returns as soon as the +frame is accepted locally. Server-side rejections and protocol violations are +reported separately. -HTTP distinguishes between recoverable and non-recoverable errors. For -recoverable ones, it enters a retry loop with exponential backoff, and reports -the error to the caller only after it has exhausted the retry time budget -(configuration parameter: `retry_timeout`). +There are two ways to observe them. -`sender.flush()` and variant methods communicate the error in the `Result` -return value. The category of the error is signalled through the `ErrorCode` -enum, and it's accompanied with an error message. +### Polling -After the sender has signalled an error, it remains usable. You can handle the -error as appropriate and continue using it. +```rust +while let Some(err) = sender.poll_qwp_ws_error()? { + eprintln!( + "category={:?} policy={:?} status={:?} fsn=[{}..={}] msg={:?}", + err.category, + err.applied_policy, + err.status, + err.from_fsn, + err.to_fsn, + err.message, + ); +} +``` -### TCP +### Handler callback -TCP doesn't report errors at all to the sender; instead, the server quietly -disconnects and you'll have to inspect the server logs to get more information -on the reason. When this has happened, the sender transitions into an error -state, and it is permanently unusable. You must drop it and create a new sender. -You can inspect the sender's error state by calling `sender.must_close()`. +Install a handler on the builder. It runs synchronously from sender API calls +such as `flush()`. The handler must not call back into the same sender. -For more details about the HTTP and TCP transports, please refer to the -[ILP overview](/docs/connect/compatibility/ilp/overview#transport-selection). +```rust +use questdb::ingress::{Protocol, SenderBuilder}; + +let mut sender = SenderBuilder::new(Protocol::QwpWs, "localhost", 9000) + .qwp_ws_error_handler(|err| { + eprintln!("QWP error: {err:?}"); + })? + .build()?; +``` + +### `QwpWsSenderError` fields + +| Field | Meaning | +|-------|---------| +| `category` | `SchemaMismatch`, `ParseError`, `InternalError`, `SecurityError`, `WriteError`, `ProtocolViolation`, `Unknown`. | +| `applied_policy` | `DropAndContinue` (batch dropped, sender continues) or `Halt` (sender latched terminal). | +| `status` | Raw QWP status byte. `None` for WebSocket protocol violations. | +| `message` | Human-readable error text from the server or the close reason. | +| `message_sequence` | Server's per-frame QWP message sequence. | +| `from_fsn` / `to_fsn` | Inclusive FSN span of the affected frame(s). | + +`Sender::qwp_ws_errors_dropped()` reports how many diagnostics were lost +because the bounded log overflowed (typically due to a lagging poll cursor). + +After a `Halt` policy fires, the sender is terminal. Drop it and create a new +one. `Sender::must_close()` reports whether the sender has entered a terminal +state. + +`DropAndContinue` errors do not halt the sender. The affected batch is +discarded; subsequent frames are unaffected and the I/O loop keeps running. + +## Progress modes + +The client drives the WebSocket loop in one of two modes: + +| Mode | Behaviour | +|------|-----------| +| `QwpWsProgress::Background` (default) | A sender-owned thread sends frames, receives ACKs, reconnects, and replays. Right choice for most callers. | +| `QwpWsProgress::Manual` | No background thread. The caller drives progress with `Sender::drive_once()` or `Sender::await_acked_fsn()`. | + +```rust +use questdb::ingress::{Protocol, SenderBuilder, QwpWsProgress}; + +let mut sender = SenderBuilder::new(Protocol::QwpWs, "localhost", 9000) + .qwp_ws_progress(QwpWsProgress::Manual)? + .build()?; + +loop { + // ... publish frames ... + sender.flush(&mut buffer)?; + // Drive until idle so the I/O loop catches up. + while sender.drive_once()? {} +} +``` + +`drive_once()` performs at most one unit of work per call (send one frame, +drain ready responses, do one storage-maintenance step). Call it in a loop +until it returns `false` before parking. -## Protocol Version +## Failover and high availability -To enhance data ingestion performance, QuestDB introduced an upgrade to the -text-based InfluxDB Line Protocol which encodes arrays and `f64` values in -binary form. Arrays are supported only in this upgraded protocol version. +:::note Enterprise -You can select the protocol version with the `protocol_version` setting in the -configuration string. +Multi-host failover with automatic reconnect is most useful with QuestDB +Enterprise primary-replica replication. -HTTP transport automatically negotiates the protocol version by default. In order -to avoid the slight latency cost at connection time, you can explicitly configure -the protocol version by setting `protocol_version=2|1;`. +::: + +### Multiple endpoints -TCP transport does not negotiate the protocol version and uses version 1 by -default. You must explicitly set `protocol_version=2;` in order to ingest -arrays, as in this example: +Specify a comma-separated address list (or repeat `addr=`): ```text -tcp::addr=localhost:9009;protocol_version=2; +ws::addr=db-primary:9000,db-replica-1:9000,db-replica-2:9000; ``` -Protocol Version 2 along with its support for arrays is available from QuestDB -version 9.0.0. +The client picks an endpoint, connects, and walks the list to find the next +healthy peer when the current connection breaks. + +### Reconnect knobs + +| Key | Default | Description | +|-----|---------|-------------| +| `reconnect_max_duration_millis` | 300000 | Total outage budget before giving up. | +| `reconnect_initial_backoff_millis` | 100 | First post-failure sleep. | +| `reconnect_max_backoff_millis` | 5000 | Cap on per-attempt sleep. | +| `initial_connect_retry` | `off` | Retry on first connect. Values: `off`, `on` / `true` / `sync` (synchronous retry), `async` (background retry), `false` (alias for `off`). | + +By default the first connect fails fast; subsequent disconnects use the +reconnect policy. Set `initial_connect_retry=on` to apply the same policy to +the initial connect. + +The Rust client is zone-blind on ingress: the `zone=` key is accepted but +ignored, so connect strings shared with future zone-aware egress clients work +unchanged. + +The Rust client does not currently expose connection-state event callbacks +(the equivalent of Java's `SenderConnectionListener`). Connection lifecycle is +observable through `log` crate output and through error notifications +delivered to the polling API or the `qwp_ws_error_handler` callback. + +### Error classification + +- **Authentication errors** (`401`/`403`): terminal across all endpoints. The + reconnect loop stops immediately. +- **Role reject** (`421 + X-QuestDB-Role`): transient if the role is + `PRIMARY_CATCHUP`, topology-level otherwise. +- **Version mismatch at upgrade**: per-endpoint, not terminal. The client + tries the next endpoint. +- **All other errors** (TCP/TLS failures, `404`, `503`, mid-stream errors): + transient, fed into the reconnect loop. + +## Closing the sender + +Call `Sender::close_drain()` before dropping the sender: + +```rust +sender.close_drain()?; +drop(sender); +``` + +`close_drain()` stops accepting new publications and waits up to +`close_flush_timeout_millis` (default 5000) for already-published frames to +ACK. Dropping the sender without `close_drain` may discard unacknowledged +in-memory frames; SF mode persists them to disk so a later sender can replay +them. + +## Configuration reference + +For the full list of connect-string keys and their defaults, see the +[connect string reference](/docs/connect/clients/connect-string/). + +Common WebSocket-specific options: + +| Key | Default | Description | +|-----|---------|-------------| +| `addr` | required | One or more `host:port` entries. | +| `username` / `password` | unset | HTTP basic auth. | +| `token` | unset | Bearer token auth (Enterprise). | +| `auth_timeout_ms` | 15000 | WebSocket upgrade timeout. | +| `tls_ca` / `tls_roots` / `tls_verify` | webpki | TLS configuration (`wss`/`qwpwss` only). | +| `auto_flush` | required `off` if set | Auto-flush is not supported. `auto_flush_rows` and `auto_flush_bytes` are rejected. | +| `sf_dir` | unset | Enable disk-backed store-and-forward. | +| `sender_id` | `default` | SF slot identity. | +| `sf_durability` | `memory` | `memory`, `flush`, or `append`. | +| `request_durable_ack` | `off` | Wait for durable upload before ACK (Enterprise). | +| `reconnect_max_duration_millis` | 300000 | Per-outage reconnect budget. | +| `initial_connect_retry` | `off` | Apply reconnect policy to the first connect. | +| `close_flush_timeout_millis` | 5000 | Bound on `close_drain` wait. | +| `qwp_ws_progress` | `background` | `background` or `manual`. | +| `max_in_flight` | 128 | Max unacknowledged frames in flight on a connection. Acts as the backpressure window: publishers block locally once the window is full. | ## Crate features -The QuestDB client crate supports some optional features, mostly related to -additional library dependencies. +The QuestDB Rust client uses Cargo features to gate optional dependencies and +transports. ### Default-enabled features -- `tls-webpki-certs`: supports using the `webpki-roots` crate for TLS - certificate verification. +- `sync-sender`: enables all sync sender transports (TCP, HTTP, QWP/UDP, + QWP/WebSocket). +- `tls-webpki-certs`: TLS verification using `webpki-roots`. +- `ring-crypto`: TLS crypto via the `ring` crate. ### Optional features -These features are opt-in: +- `sync-sender-qwp-ws`: QWP/WebSocket transport only (subset of `sync-sender`). +- `chrono_timestamp`: build timestamps from `chrono::DateTime`. +- `ndarray`: ingest arrays from the [ndarray](https://docs.rs/ndarray) crate. +- `rust_decimal` / `bigdecimal`: ingest decimals from those crates. +- `tls-native-certs`: validate TLS against the OS certificate store. +- `insecure-skip-verify`: disable TLS verification (testing only). -- `ilp-over-http`: Enables ILP/HTTP support using the `ureq` crate. -- `chrono_timestamp`: Allows specifying timestamps as `chrono::Datetime` - objects. -- `tls-native-certs`: Supports validating TLS certificates against the OS's - certificates store. -- `insecure-skip-verify`: Allows skipping server certificate validation in TLS - (this compromises security). -- `ndarray`: Enables ingestion of arrays from the - [ndarray](https://docs.rs/ndarray) crate. +## Migration from ILP (HTTP/TCP) -## Next steps +The buffer API is unchanged. To switch a sender to QWP/WebSocket: + +| Aspect | HTTP (ILP) | WebSocket (QWP) | +|--------|-----------|-----------------| +| Connect string schema | `http::` / `https::` | `ws::` / `wss::` | +| Batch trigger | Row/time-based auto-flush (defaults: 75000 rows, 1000 ms) | Explicit `flush()` only | +| Error model | Synchronous on `flush()` | Async via `poll_qwp_ws_error` / handler | +| Completion tracking | Implicit per request | Explicit FSN watermarks | +| Store-and-forward | Not available | Available (`sf_dir`) | +| Multi-endpoint failover | Not available | Built in (comma-separated `addr`) | +| Shutdown | `drop` | `close_drain()` then `drop` | -Please refer to the [ILP overview](/docs/connect/compatibility/ilp/overview) for details -about transactions, error control, delivery guarantees, health check, or table -and column auto-creation. +To migrate an existing sender, change the connect string from `http::` to +`ws::` (or `https::` to `wss::`), drop any `auto_flush_*` keys, install a +`qwp_ws_error_handler` or poll `poll_qwp_ws_error()`, and call `close_drain()` +before dropping the sender. + +## Next steps -Explore the full capabilities of the Rust client via the -[Crate API page](https://docs.rs/questdb-rs/latest/questdb/). +Explore the full API on the +[crate docs](https://docs.rs/questdb-rs/latest/questdb/ingress/). -With data flowing into QuestDB, now it's time for analysis. +For querying QuestDB from Rust, see the +[PGWire Rust client](/docs/connect/compatibility/pgwire/rust/) or the +[REST API](/docs/connect/compatibility/rest-api/). -To learn _The Way_ of QuestDB SQL, see the -[Query & SQL Overview](/docs/query/overview/). +With data flowing into QuestDB, the next step is querying. See the +[Query overview](/docs/query/overview/) to learn QuestDB SQL. -Alone? Stuck? Want help? Visit us in our +Need help? Visit the [Community Forum](https://community.questdb.com/). From 2e6f8bcd721bf203cda80f8a6f8f308b239d6e2b Mon Sep 17 00:00:00 2001 From: Marko Topolnik Date: Fri, 15 May 2026 14:24:16 +0200 Subject: [PATCH 26/44] Rewrite Go client docs for the QWP protocol The Go client page was still the legacy HTTP/ILP, insert-only reference. Rewrite it to document the QWP (WebSocket) path, mirroring the in-flight Java client page as the per-language template: client creation, QWP ingestion, the new QwpQueryClient query API, error handling, store-and-forward, and multi-host failover. Exhaustive connect-string keys, protocol details, and HA concepts are deep-linked to the already-landed connect-string, protocols, and high-availability pages rather than duplicated. The page is written so an autonomous coding agent can build a correct Go application from this page alone. The caveats that otherwise cause data loss, corruption, or panics are stated inline where code is copied: the asynchronous ingestion error model, query-batch buffer aliasing, single-goroutine concurrency, the comma-ok QwpSender assertion, store-and-forward blocking and HALT, and Exec not being retried across a reconnect. Simple single-host idioms are the default; the extra rules for multi-host failover are consolidated into a short checklist at the top of the failover section and signposted from the top of the page, so neither audience pays for the other. Also add a documentation changelog entry. The featured example blocks use RemoteRepoExample and render once the matching Go client examples land on go-questdb-client/main. Co-Authored-By: Claude Opus 4.7 (1M context) --- documentation/changelog.mdx | 1 + documentation/ingestion/clients/go.md | 1044 ++++++++++++++++++++----- 2 files changed, 859 insertions(+), 186 deletions(-) diff --git a/documentation/changelog.mdx b/documentation/changelog.mdx index 86eecb52e..293d3cd13 100644 --- a/documentation/changelog.mdx +++ b/documentation/changelog.mdx @@ -26,6 +26,7 @@ This page tracks significant updates to the QuestDB documentation. ### Updated +- [Go client](/docs/connect/clients/go/) - Rewrote for the QWP binary protocol: ingestion, the QwpQueryClient query API, store-and-forward, and failover - [Aggregation functions](/docs/query/functions/aggregation/) - Added demo tags and updated examples with runnable queries - [LATEST ON](/docs/query/sql/latest-on/) - Added demo tags to examples - [JOIN](/docs/query/sql/join/) - Updated examples to use demo data diff --git a/documentation/ingestion/clients/go.md b/documentation/ingestion/clients/go.md index 0a8a75652..4451d250b 100644 --- a/documentation/ingestion/clients/go.md +++ b/documentation/ingestion/clients/go.md @@ -1,294 +1,966 @@ --- slug: /connect/clients/go -title: Go Client Documentation +title: Go client for QuestDB +sidebar_label: Go description: - "Dive into QuestDB using the Go ingestion client for high-performance, - insert-only operations. Unlock peak time series data ingestion." + "QuestDB Go client for high-throughput data ingestion and streaming SQL + queries over the QWP binary protocol." --- -import { ILPClientsTable } from "@theme/ILPClientsTable" +import { RemoteRepoExample } from "@theme/RemoteRepoExample" -QuestDB supports the Go ecosystem, offering a Go client designed for -high-performance data ingestion, tailored specifically for insert-only -operations. This combination of QuestDB and its Go client provides exceptional -time series data ingestion and analytical capabilities. +The QuestDB Go client connects to QuestDB over the +[QWP binary protocol](/docs/connect/wire-protocols/qwp-ingress-websocket/) +(WebSocket). It supports high-throughput data ingestion and streaming SQL +queries on the same transport. -The Go client introduces several advantages: +Key capabilities: -- **Automatic table creation**: No need to define your schema upfront. -- **Concurrent schema changes**: Seamlessly handle multiple data streams with - on-the-fly schema modifications -- **Optimized batching**: Use strong defaults or curate the size of your batches -- **Health checks and feedback**: Ensure your system's integrity with built-in - health monitoring -- **Automatic write retries**: Reuse connections and retry after interruptions +- **Ingestion**: column-oriented batched writes with automatic table creation, + schema evolution, and optional store-and-forward durability. +- **Querying**: streaming SQL result sets, DDL and DML execution, bind + parameters, and byte-credit flow control. +- **Failover**: multi-endpoint connections with automatic reconnect across + rolling upgrades and primary migrations. -This quick start guide will help you get up and running with the basic -functionalities of the Go client, covering connection setup, authentication, and -some common insert patterns. +:::tip Legacy transports - +The client also supports ILP ingestion over HTTP and TCP for backward +compatibility. This page documents the recommended WebSocket (QWP) path. For +ILP transport details, see the +[ILP overview](/docs/connect/compatibility/ilp/overview/). -:::info +::: -This page focuses on our high-performance ingestion client, which is optimized for **writing** data to QuestDB. -For retrieving data, we recommend using a [PostgreSQL-compatible Go library](/docs/connect/compatibility/pgwire/go/) or our -[HTTP query endpoint](/docs/query/overview/#rest-http-api). +## Quick start -::: +The client requires Go 1.23 or later. Add it to your module: -## Requirements +```bash +go get github.com/questdb/go-questdb-client/v4 +``` -- Requires Go 1.19 or later. -- Assumes QuestDB is running. If it's not, refer to - [the general quick start](/docs/getting-started/quick-start/). +### Ingest data -## Client Installation +```go +package main -To add the QuestDB client to your Go project: +import ( + "context" -```toml -go get github.com/questdb/go-questdb-client/ -``` + qdb "github.com/questdb/go-questdb-client/v4" +) -## Authentication +func main() { + ctx := context.TODO() -Passing in a configuration string with HTTP basic authentication: + sender, err := qdb.LineSenderFromConf(ctx, "ws::addr=localhost:9000;") + if err != nil { + panic(err) + } + defer sender.Close(ctx) -```Go + err = sender.Table("trades"). + Symbol("symbol", "ETH-USD"). + Symbol("side", "sell"). + Float64Column("price", 2615.54). + Float64Column("amount", 0.00044). + AtNow(ctx) + if err != nil { + panic(err) + } + + if err := sender.Flush(ctx); err != nil { + panic(err) + } +} +``` + +### Query data + +```go package main import ( "context" - "github.com/questdb/go-questdb-client/v4" + "fmt" + + qdb "github.com/questdb/go-questdb-client/v4" ) func main() { ctx := context.TODO() - client, err := questdb.LineSenderFromConf(ctx, "http::addr=localhost:9000;username=admin;password=quest;") + client, err := qdb.NewQwpQueryClient(ctx, + qdb.WithQwpQueryAddress("localhost:9000")) if err != nil { - panic("Failed to create client") + panic(err) + } + defer client.Close(ctx) + + q := client.Query(ctx, + "SELECT symbol, price FROM trades WHERE symbol = 'ETH-USD' LIMIT 10") + defer q.Close() + + for batch, err := range q.Batches() { + if err != nil { + panic(err) + } + for row := 0; row < batch.RowCount(); row++ { + fmt.Println(batch.String(0, row), batch.Float64(1, row)) + } } - - // Utilize the client for your operations... } ``` -Or, set the QDB_CLIENT_CONF environment variable and call -`questdb.LineSenderFromEnv()`. +:::caution Read before building on these snippets + +The two snippets above are deliberately minimal. Three behaviors will cause +data loss, corruption, or panics if you carry the minimal form into real code: + +- **Ingestion errors are asynchronous.** `Flush` returning `nil` does **not** + mean the server accepted the rows. Schema, parse, and write rejections are + delivered out of band. Register an error handler. See + [Ingestion errors](#ingestion-errors). +- **A sender or query client is not safe for concurrent use.** Use one per + goroutine. See [Concurrency](#concurrency). +- **A query batch is valid only inside its loop iteration.** Some accessors + alias the network buffer. Copy out anything you keep. See + [Reading result batches](#reading-result-batches). + +Building with multi-host failover? It adds exactly three rules on top of the +single-host code, listed up front in +[Failover and high availability](#failover-and-high-availability). Single-host +applications can ignore them. + +::: + +## Authentication and TLS -1. Export the configuration string as an environment variable: - ```bash - export QDB_CLIENT_CONF="http::addr=localhost:9000;username=admin;password=quest;" - ``` -2. Then in your Go code: - ```Go - client, err := questdb.LineSenderFromEnv(context.TODO()) - ``` +Authentication happens at the HTTP level during the WebSocket upgrade, before +any binary frames are exchanged. The same mechanisms work for both the +`LineSender` (ingestion) and the `QwpQueryClient` (querying). -Alternatively, you can use the built-in Go API to specify the connection -options. +### HTTP basic auth ```go -package main +// Ingestion +sender, err := qdb.LineSenderFromConf(ctx, + "wss::addr=db.example.com:9000;username=admin;password=quest;") -import ( - "context" - qdb "github.com/questdb/go-questdb-client/v4" -) +// Querying +client, err := qdb.QwpQueryClientFromConf(ctx, + "wss::addr=db.example.com:9000;username=admin;password=quest;") +``` +The options API exposes the same settings: -func main() { - ctx := context.TODO() +```go +sender, err := qdb.NewLineSender(ctx, + qdb.WithQwp(), + qdb.WithAddress("db.example.com:9000"), + qdb.WithTls(), + qdb.WithBasicAuth("admin", "quest")) +``` - client, err := qdb.NewLineSender(context.TODO(), qdb.WithHttp(), qdb.WithAddress("localhost:9000"), qdb.WithBasicAuth("admin", "quest")) +### Token auth (Enterprise) + +```go +sender, err := qdb.LineSenderFromConf(ctx, + "wss::addr=db.example.com:9000;token=your_bearer_token;") + +client, err := qdb.NewQwpQueryClient(ctx, + qdb.WithQwpQueryAddress("db.example.com:9000"), + qdb.WithQwpQueryTls(), + qdb.WithQwpQueryBearerToken("your_bearer_token")) ``` -When using QuestDB Enterprise, authentication can also be done via REST token. -Please check the [RBAC docs](/docs/security/rbac/#authentication) for more -info. +### TLS with a custom trust store -## Basic Insert +TLS is enabled by the `wss` schema (or `qdb.WithTls()`). Trust-store keys are +documented in the [TLS section](/docs/connect/clients/connect-string#tls) of +the connect string reference. For OIDC authentication (Enterprise), see +[OpenID Connect](/docs/security/oidc/). -Example: inserting executed trades for cryptocurrencies. +## Creating the client -Without authentication and using the current timestamp: +### From a connect string -```Go -package main +The connect string format is `::=;=;...;`. Use +`ws` for plain WebSocket or `wss` for TLS: -import ( - "context" - "github.com/questdb/go-questdb-client/v4" -) +```go +sender, err := qdb.LineSenderFromConf(ctx, "ws::addr=localhost:9000;") -func main() { - ctx := context.TODO() +client, err := qdb.QwpQueryClientFromConf(ctx, "ws::addr=localhost:9000;") +``` - client, err := questdb.LineSenderFromConf(ctx, "http::addr=localhost:9000;") - if err != nil { - panic("Failed to create client") - } +For the full list of connect-string keys, see the +[connect string reference](/docs/connect/clients/connect-string/). - err = client.Table("trades"). - Symbol("symbol", "ETH-USD"). - Symbol("side", "sell"). - Float64Column("price", 2615.54). - Float64Column("amount", 0.00044). - AtNow(ctx) +### From an environment variable - if err != nil { - panic("Failed to insert data") - } +Set `QDB_CLIENT_CONF` to avoid hard-coding credentials: - err = client.Flush(ctx) - if err != nil { - panic("Failed to flush data") - } -} +```bash +export QDB_CLIENT_CONF="wss::addr=db.example.com:9000;username=admin;password=quest;" ``` -In this case, the designated timestamp will be the one at execution time. Let's -see now an example with an explicit timestamp, custom auto-flushing, and basic -auth. +```go +sender, err := qdb.LineSenderFromEnv(ctx) +``` -```Go -package main +### Using the options API -import ( - "context" - "github.com/questdb/go-questdb-client/v4" - "time" -) +The options API provides type-safe configuration. `NewLineSender` requires +exactly one transport option (`qdb.WithQwp()` here); +`LineSenderFromConf` infers the transport from the `ws`/`wss` schema instead. +An error handler can only be set through the options API: -func main() { - ctx := context.TODO() +```go +sender, err := qdb.NewLineSender(ctx, + qdb.WithQwp(), + qdb.WithAddress("localhost:9000"), + qdb.WithAutoFlushRows(500), + qdb.WithAutoFlushInterval(50*time.Millisecond), + qdb.WithErrorHandler(func(e *qdb.SenderError) { /* see Error handling */ })) + +client, err := qdb.NewQwpQueryClient(ctx, + qdb.WithQwpQueryAddress("localhost:9000"), + qdb.WithQwpQueryInitialCredit(256*1024)) +``` - client, err := questdb.LineSenderFromConf(ctx, "http::addr=localhost:9000;username=admin;password=quest;auto_flush_rows=100;auto_flush_interval=1000;") - if err != nil { - panic("Failed to create client") - } +## Data ingestion + +### Concurrency + +A `LineSender` owns a single connection and is **not safe for concurrent +use**. Sharing one across goroutines corrupts the buffer and interleaves +rows. Create one sender per goroutine, or hand rows to a single dedicated +writer goroutine through a channel. + +Connection pooling (`LineSenderPool`) targets the stateless HTTP transport and +is not available for QWP, so it is not the answer to QWP concurrency. + +### General usage pattern + +1. Create a sender via `qdb.LineSenderFromConf()` or `qdb.NewLineSender()`. +2. Call `Table(name)` to select a table. +3. Call column methods to add values: + - `Symbol(name, value)` + - `StringColumn(name, value)`, `BoolColumn(name, value)` + - `Int64Column(name, value)`, `Float64Column(name, value)` + - `TimestampColumn(name, time.Time)` for non-designated timestamps + - `Long256Column(name, *big.Int)` + - `Float64Array1DColumn` / `2D` / `3D` / `NDColumn` (see + [Ingest arrays](#ingest-arrays)) + - `DecimalColumn`, `DecimalColumnFromString` (see + [Decimal columns](#decimal-columns)) +4. Call `At(ctx, time.Time)` or `AtNow(ctx)` to finalize the row. +5. Repeat from step 2, or call `Flush(ctx)` to send buffered data. +6. Call `Close(ctx)` when done. + +The call order is fixed: `Table`, then `Symbol`s, then column setters, then +`At`/`AtNow`. The fluent methods do not return errors; the first error is +latched and surfaces from `At`, `AtNow`, or `Flush`, so always check that +return value. + +:::caution The error from `At`/`AtNow`/`Flush` is only the local error + +It reports a client-side problem: a bad value, wrong call order, or +store-and-forward backpressure. Server-side rejections (schema mismatch, +parse error, write error) are **asynchronous** and are delivered to the +error handler, never returned here. A `nil` return does not mean the server +accepted the data. See [Ingestion errors](#ingestion-errors). - timestamp := time.Now() - err = client.Table("trades"). - Symbol("symbol", "ETH-USD"). - Symbol("side", "sell"). - Float64Column("price", 2615.54). - Float64Column("amount", 0.00044). - At(ctx, timestamp) +::: - if err != nil { - panic("Failed to insert data") - } +Tables and columns are created automatically if they do not exist. The full +runnable example registers an error handler, the minimum correct shape for a +QWP producer: - err = client.Flush(ctx) - // You can flush manually at any point. - // If you don't flush manually, the client will flush automatically - // when a row is added and either: - // * The buffer contains 75000 rows (if HTTP) or 600 rows (if TCP) - // * The last flush was more than 1000ms ago. - // Auto-flushing can be customized via the `auto_flush_..` params. + - if err != nil { - panic("Failed to flush data") - } +The QWP transport exposes column types that are not part of ILP. Type-assert +the sender to `qdb.QwpSender` with the comma-ok form (only `ws`/`wss` senders +implement it; an HTTP or TCP sender does not): + +```go +sender, err := qdb.LineSenderFromConf(ctx, "ws::addr=localhost:9000;") +qs, ok := sender.(qdb.QwpSender) +if !ok { + panic("not a QWP sender") +} + +err = qs.Table("trades"). + Symbol("symbol", "ETH-USD"). + Int32Column("venue_id", 7). + CharColumn("side", 'S'). + UuidColumn("order_id", hi, lo). + AtNano(ctx, time.Now()) +``` + +`QwpSender` adds `ByteColumn`, `ShortColumn`, `Int32Column`, `Float32Column`, +`CharColumn`, `DateColumn`, `TimestampNanosColumn`, `UuidColumn`, +`GeohashColumn`, `Int64Array1DColumn` / `2D` / `3D`, the decimal columns, and +`AtNano` for nanosecond designated timestamps. + +### Ingest arrays + +For 1D, 2D, and 3D `double` arrays, pass a Go slice directly: + +```go +prices := []float64{1.0842, 1.0843, 1.0841} +err = sender.Table("book").Float64Array1DColumn("levels", prices).AtNow(ctx) +``` + +For higher-dimensional arrays, build an `NdArray` once and reuse it: + +```go +arr, err := qdb.NewNDArray[float64](3, 3, 3) +if err != nil { + panic(err) } +arr.Fill(1.5) +err = sender.Table("book").Float64ArrayNDColumn("cube", arr).AtNow(ctx) ``` -We recommended to use User-assigned timestamps when ingesting data into QuestDB. -Using the current timestamp hinder the ability to deduplicate rows which is -[important for exactly-once processing](/docs/connect/compatibility/ilp/overview/#exactly-once-delivery-vs-at-least-once-delivery). +Values are stored in row-major order: the last dimension varies fastest. Use +`Set(value, positions...)` to write at specific coordinates, `Append(value)` +for sequential fills, and `Reshape(shape...)` to change the shape without +reallocating. + +### Designated timestamp + +The [designated timestamp](/docs/concepts/designated-timestamp/) column +controls time-based partitioning and ordering: + +```go +// User-assigned (recommended for deduplication and exactly-once delivery) +err = sender.Table("trades"). + Symbol("symbol", "EURUSD"). + Float64Column("price", 1.0842). + At(ctx, time.Now()) + +// Nanosecond precision (creates a timestamp_ns column); QwpSender only +err = qs.Table("ticks"). + Symbol("symbol", "EURUSD"). + Float64Column("price", 1.0842). + AtNano(ctx, time.Now()) + +// Server-assigned (server uses its wall-clock time) +err = sender.Table("trades"). + Symbol("symbol", "EURUSD"). + Float64Column("price", 1.0842). + AtNow(ctx) +``` - +The server pauses after streaming the granted budget and replenishes after +each batch. A credit of `0` (the default) means unbounded: the server streams +as fast as the network allows, so set a credit when consuming a large result +set on a memory-constrained client. -## Configuration options +### Compression -The minimal configuration string needs to have the protocol, host, and port, as -in: +Negotiate zstd compression to reduce bandwidth for large result sets: +```go +client, err := qdb.QwpQueryClientFromConf(ctx, + "ws::addr=localhost:9000;compression=zstd;compression_level=3;") ``` -http::addr=localhost:9000; + +Batches are decompressed automatically. + +## Error handling + +### Ingestion errors + +WebSocket ingestion uses an asynchronous error model. Batch rejections are +**not** returned from `Flush`. They are delivered to a `SenderErrorHandler` +callback. If you do not register one, a built-in handler logs them, but your +application is not notified and cannot dead-letter or alert, so register one +in any non-trivial producer: + +```go +sender, err := qdb.NewLineSender(ctx, + qdb.WithQwp(), + qdb.WithAddress("localhost:9000"), + qdb.WithErrorHandler(func(e *qdb.SenderError) { + log.Printf("rejected: category=%s table=%s msg=%s fsn=[%d,%d]", + e.Category, e.TableName, e.ServerMessage, e.FromFsn, e.ToFsn) + })) ``` -In the Go client, you can set the configuration options via the standard config -string, which is the same across all clients, or using -[the built-in API](https://pkg.go.dev/github.com/questdb/go-questdb-client/#LineSenderOption). +Each `SenderError` carries the `Category` +(`CategorySchemaMismatch`, `CategoryParseError`, `CategoryInternalError`, +`CategorySecurityError`, `CategoryWriteError`, `CategoryProtocolViolation`, or +`CategoryUnknown`), the `AppliedPolicy` (`PolicyDropAndContinue` or +`PolicyHalt`), the server message, the rejected table, and the `[FromFsn, +ToFsn]` span that correlates the rejection with `FlushAndGetSequence`. -For all the extra options you can use, please check -[the client docs](https://pkg.go.dev/github.com/questdb/go-questdb-client/#LineSenderFromConf) +The per-category policy is configurable. Resolution precedence is the policy +resolver, then the per-category policy, then the connect-string `on_*_error` +keys, then the spec defaults. `CategoryProtocolViolation` and +`CategoryUnknown` are always `PolicyHalt`: -Alternatively, for a breakdown of Configuration string options available across -all clients, see the [Connect string](/docs/connect/clients/connect-string/) page. +```go +qdb.WithErrorPolicy(qdb.CategorySchemaMismatch, qdb.PolicyDropAndContinue) +qdb.WithErrorPolicyResolver(func(c qdb.Category) qdb.Policy { ... }) +qdb.WithErrorInboxCapacity(512) +``` -## Next Steps +After a `PolicyHalt` rejection, the sender stops draining and the next +producer call returns the same payload as a typed error. Unwrap it with +`errors.As`, then `Close` and rebuild the sender to continue: -Please refer to the [ILP overview](/docs/connect/compatibility/ilp/overview) for details -about transactions, error control, delivery guarantees, health check, or table -and column auto-creation. +```go +if err := sender.Flush(ctx); err != nil { + var se *qdb.SenderError + if errors.As(err, &se) { + // se.Category, se.ServerMessage, se.FromFsn, se.ToFsn + } +} +``` + +The handler runs on a dedicated dispatcher goroutine, never on the producer +goroutine. If the bounded inbox fills, surplus notifications are dropped and +counted by `QwpSender.DroppedErrorNotifications()`. + +### Query errors + +Server-side query failures surface as a `*QwpQueryError` from the `Batches()` +iteration or the `Exec` return value: + +```go +for batch, err := range q.Batches() { + if err != nil { + var qe *qdb.QwpQueryError + if errors.As(err, &qe) { + log.Printf("query failed: 0x%02X %s", qe.Status, qe.Message) + } + break + } + // ... +} +``` + +| Code | Name | Description | +| ------ | --------------- | ------------------------------------------------- | +| `0x03` | SCHEMA_MISMATCH | Bind parameter type incompatible with placeholder | +| `0x05` | PARSE_ERROR | SQL syntax error or malformed message | +| `0x06` | INTERNAL_ERROR | Server-side execution failure | +| `0x08` | SECURITY_ERROR | Authorization failure | +| `0x0A` | CANCELLED | Query terminated by `Cancel` | +| `0x0B` | LIMIT_EXCEEDED | Protocol limit hit | + +Errors can arrive before any data or mid-stream. Once an error is yielded, no +further batches arrive for that query. + +### Connection-level errors + +- **Authentication failure**: a `401` or `403` response before the WebSocket + upgrade completes. Terminal across all endpoints. +- **Role mismatch**: `*QwpRoleMismatchError` from `NewQwpQueryClient` when no + configured endpoint satisfies the `target=` filter. It reports the endpoints + tried, the last observed server role, and the last transport error. -Explore the full capabilities of the Go client via -[Go.dev](https://pkg.go.dev/github.com/questdb/go-questdb-client/). +## Failover and high availability -With data flowing into QuestDB, now it's time to for analysis. +:::note Enterprise +Multi-host failover with automatic reconnect requires QuestDB Enterprise. +::: + +Single-host applications need nothing from this section. The simple loops +shown earlier are already correct: treating any iteration error as terminal is +always safe, including when a reconnect happens. + +If you connect to multiple hosts for failover, a correct application must do +exactly three things beyond the single-host code. This is the whole list: + +1. **Ingestion: no loop changes.** Configure multiple endpoints and a + reconnect policy; reconnection is transparent to the producer. You still + need the universal asynchronous error handling from + [Ingestion errors](#ingestion-errors). Details: + [Ingestion failover](#ingestion-failover). +2. **Querying: handle `*QwpFailoverReset`, but only if you accumulate rows.** + If you build up rows across batches, discard them on a reset and continue + iterating. If you process each batch and keep nothing, the simple + terminal-on-error loop is already correct. Pattern: + [Query failover](#query-failover). +3. **DDL/DML: `Exec` is not retried by default.** A `*QwpFailoverReset` from + `Exec` means the statement was not confirmed, not that it succeeded. + Re-issue it only if it is idempotent, or opt into + `qdb.WithQwpQueryReplayExec(true)`. Details: + [the Exec caution](#ddl-and-dml-statements). + +Everything below is the detail behind these three points. + +### Multiple endpoints + +Specify comma-separated addresses in the connect string, or pass them to the +options API: + +```text +ws::addr=db-primary:9000,db-replica-1:9000,db-replica-2:9000; +``` + +```go +client, err := qdb.NewQwpQueryClient(ctx, + qdb.WithQwpQueryEndpoints("db-primary:9000", "db-replica-1:9000")) +``` + +The client tries endpoints in order and walks the list to find the next +healthy one on connection loss. + +### Ingestion failover + +The ingestion sender uses a reconnect loop with exponential backoff. Configure +it via the connect string or `qdb.WithReconnectPolicy(maxDuration, +initialBackoff, maxBackoff)`: + +| Key | Default | Description | +| ---------------------------------- | -------- | ------------------------------------ | +| `reconnect_max_duration_millis` | `300000` | Total outage budget before giving up | +| `reconnect_initial_backoff_millis` | `100` | First post-failure sleep | +| `reconnect_max_backoff_millis` | `5000` | Cap on per-attempt sleep | +| `initial_connect_retry` | `off` | Retry on first connect | + +`qdb.WithInitialConnectMode` selects `InitialConnectOff` (default), +`InitialConnectSync` (block the constructor while retrying), or +`InitialConnectAsync` (return immediately and buffer rows until connected). +Ingress is zone-blind: it pins QWP v1 and ignores the `zone=` key, so a connect +string shared with query clients works unchanged. Reconnect is transparent to +the producer; you do not change the ingestion loop for it. + +### Query failover + +The query client drives a per-query reconnect loop. On a mid-stream transport +error it reconnects and replays the query. + +| Key | Default | Description | +| ----------------------------- | ------- | --------------------------------- | +| `failover` | `on` | Master switch for reconnect | +| `failover_max_attempts` | `8` | Max reconnect attempts per query | +| `failover_backoff_initial_ms` | `50` | First post-failure sleep | +| `failover_backoff_max_ms` | `1000` | Cap on per-attempt sleep | +| `target` | `any` | Role filter: `any`, `primary`, `replica` | + +The matching options are `qdb.WithQwpQueryFailover`, +`qdb.WithQwpQueryFailoverMaxAttempts`, `qdb.WithQwpQueryFailoverBackoff`, and +`qdb.WithQwpQueryTarget`. + +You only need the pattern below if you **accumulate rows across batches and +want the query to continue transparently across a reconnect**. When failover +occurs mid-stream, `Batches()` yields a non-fatal `*QwpFailoverReset` before +the replayed batches arrive. Detect it with `errors.As`, discard the rows you +accumulated from the prior connection (the server replays from the +beginning), and continue iterating: -To learn _The Way_ of QuestDB SQL, see the -[Query & SQL Overview](/docs/query/overview/). +```go +for batch, err := range q.Batches() { + if err != nil { + var reset *qdb.QwpFailoverReset + if errors.As(err, &reset) { + results = results[:0] // server replays from the beginning + continue + } + return err // any other error is terminal + } + // ... +} +``` + +If you do not need transparent continuation, the simple loop is correct: +returning on any error treats a reset as terminal, which the client supports +explicitly. When the failover budget is consumed, `Batches()` (and `Exec`) +return `*QwpFailoverExhaustedError`. + +### Observability + +`QwpSender` exposes counters for dashboards: `TotalReconnectAttempts`, +`TotalReconnectsSucceeded`, `TotalFramesReplayed`, `TotalBackpressureStalls`, +`TotalServerErrors`, and `LastTerminalError`. With `drain_orphans=on`, +`BackgroundDrainers()` snapshots the goroutines adopting unacked data from +crashed sibling senders. The query client exposes `ServerInfo()` and +`CurrentEndpoint()`; `QwpServerInfo.RoleName()` returns the bound node's role. + +For background and worked configurations, see +[client failover concepts](/docs/high-availability/client-failover/concepts/), +[client failover configuration](/docs/high-availability/client-failover/configuration/), +and the +[multi-host failover](/docs/connect/clients/connect-string#failover-keys) and +[reconnect](/docs/connect/clients/connect-string#reconnect-keys) keys of the +connect string reference. + +## Concurrency and parallel queries + +:::note Phase 1 limitation +The current implementation supports a single in-flight query per connection. +Multi-query support is planned for a future release. +::: -Alone? Stuck? Want help? Visit us in our -[Community Forum](https://community.questdb.com/). +Neither the `LineSender` nor the `QwpQueryClient` is safe for concurrent use. +For multi-threaded workloads, use one instance per goroutine. To run queries +in parallel, create separate `QwpQueryClient` instances, one per goroutine. +`Cancel` (on a `*QwpQuery`) and `Close` are safe to call from other +goroutines, which is how you cancel an in-flight query or shut down cleanly. + +## Configuration reference + +For the full list of connect-string keys and their defaults, see the +[connect string reference](/docs/connect/clients/connect-string/). + +Common WebSocket-specific keys: + +| Key | Default | Description | +| ------------------------------- | -------- | ------------------------------------ | +| `auto_flush_rows` | `1000` | Rows before auto-flush | +| `auto_flush_interval` | `100` | Milliseconds before auto-flush | +| `sf_dir` | unset | Store-and-forward directory | +| `sender_id` | `default`| Sender slot identity for SF | +| `request_durable_ack` | `off` | Request durable upload ACK (Enterprise) | +| `reconnect_max_duration_millis` | `300000` | Ingress reconnect budget | +| `failover` | `on` | Query per-query reconnect switch | +| `compression` | `raw` | Query batch compression (`raw`, `zstd`) | + +## Migration from ILP (HTTP/TCP) + +The row-building API is unchanged across transports. The main differences: + +| Aspect | HTTP (ILP) | WebSocket (QWP) | +| --------------------- | ----------------- | ----------------------- | +| Connect string schema | `http::` / `https::` | `ws::` / `wss::` | +| Options transport | `qdb.WithHttp()` | `qdb.WithQwp()` | +| Auto-flush rows | 75,000 | 1,000 | +| Auto-flush interval | 1,000 ms | 100 ms | +| Error model | Synchronous | Async `SenderErrorHandler` | +| Store-and-forward | Not available | Available (`sf_dir`) | +| Multi-endpoint failover | Limited | Full reconnect loop | +| Querying | Not available | `QwpQueryClient` | + +The biggest behavioral change is the error model: on HTTP, `Flush` returns the +rejection synchronously; on QWP it does not. To migrate, change the connect +string from `http::` to `ws::` (or `https::` to `wss::`), register a +`SenderErrorHandler`, and adjust auto-flush settings if needed. `QwpSender` is +a superset of `LineSender`, so existing ingestion code keeps working. From 4eadeb333d4c82ef0884187f41b535e2bccca134 Mon Sep 17 00:00:00 2001 From: Marko Topolnik Date: Fri, 15 May 2026 15:24:10 +0200 Subject: [PATCH 27/44] Don't fail the build on a missing remote example The RemoteRepoExample component read example.code without optional chaining, so when an example was absent from the remote-repo-example plugin data it threw "Cannot read properties of undefined (reading 'code')" and failed static-site generation for the entire locale. This contradicted the plugin's own design: plugins/remote-repo-example already skips a missing example file with a warning and continues, treating an absent example as a tolerable, transient state (docs can land before the upstream example does). Guard the lookup: when the example is missing, log a warning in the same style as the plugin and render nothing, so the block appears once the example lands upstream instead of breaking the build. This unblocks the Go QWP client page, which references qwp-ingest/go and qwp-query/go before they exist on go-questdb-client/main. Co-Authored-By: Claude Opus 4.7 (1M context) --- src/theme/RemoteRepoExample/index.tsx | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/src/theme/RemoteRepoExample/index.tsx b/src/theme/RemoteRepoExample/index.tsx index 220ed28e2..edff50102 100644 --- a/src/theme/RemoteRepoExample/index.tsx +++ b/src/theme/RemoteRepoExample/index.tsx @@ -76,8 +76,23 @@ export const RemoteRepoExample = ({ } } - const example: Example = repoExample[id] - const headerMd = example?.header + const example: Example | undefined = repoExample[id] + + if (example === undefined) { + // The example is not in the remote-repo-example plugin data: its + // source file has not landed in the client repo yet, or the manifest + // entry is missing. The plugin (plugins/remote-repo-example) already + // skips absent files with a warning rather than failing the build; + // mirror that here instead of crashing the entire static-site + // generation. The block renders once the example lands upstream. + console.warn( + `[RemoteRepoExample] no example "${id}" in remote-repo-example ` + + `data; skipping. It will render once it lands in the source repo.`, + ) + return null + } + + const headerMd = example.header let code = example.code ?? "" const valueReplaceMap = [ From 719bc9e5b8019124d29e3a63209c9f4fdf8fffa5 Mon Sep 17 00:00:00 2001 From: bluestreak Date: Fri, 15 May 2026 14:43:22 +0100 Subject: [PATCH 28/44] concepts: add Delivery semantics page; fix #egress-flow anchor Establishes a transport-agnostic home for at-least-once / exactly-once semantics so QWP client docs no longer depend on legacy ILP pages scheduled for removal. - New documentation/concepts/delivery-semantics.md covering the three replay paths (client retry, multi-host failover, SF restart), the designated-timestamp + DEDUP recipe for exactly-once, and when at-least-once alone is acceptable. - Cross-link from connect-string.md DEDUP warning and SF replay note, client-failover/concepts.md ingress section, and store-and-forward/concepts.md .ack-watermark section. - rust.md: replace the legacy /docs/connect/compatibility/ilp/overview anchor with the new concepts page. - connect-string.md: rename the misleading {#egress-flow} anchor on the ingress Durable ACK section to {#durable-ack}; add a stable {#egress-failover} anchor on the actual egress-failover subsection. Repoint client-failover/configuration.md references that previously landed on durable-ack content while claiming to document egress failover. - sidebars.js: insert Delivery semantics in Core Concepts after Deduplication. - review-client skill: add checklist item 18 (no content dependencies on legacy ILP pages) and a style-guide exception clarifying that legacy links don't count as coverage. Co-Authored-By: Claude Opus 4.7 (1M context) --- .claude/skills/review-client/SKILL.md | 308 ++++++++++++++++++ .../client-configuration/connect-string.md | 16 +- documentation/concepts/delivery-semantics.md | 169 ++++++++++ .../client-failover/concepts.md | 3 + .../client-failover/configuration.md | 4 +- .../store-and-forward/concepts.md | 4 +- documentation/ingestion/clients/rust.md | 3 +- documentation/sidebars.js | 1 + 8 files changed, 497 insertions(+), 11 deletions(-) create mode 100644 .claude/skills/review-client/SKILL.md create mode 100644 documentation/concepts/delivery-semantics.md diff --git a/.claude/skills/review-client/SKILL.md b/.claude/skills/review-client/SKILL.md new file mode 100644 index 000000000..dfe076b97 --- /dev/null +++ b/.claude/skills/review-client/SKILL.md @@ -0,0 +1,308 @@ +--- +name: review-client +description: Review a pull request that changes QuestDB **QWP** client documentation (the WebSocket transport, `ws::` / `wss::`) from the perspective of an agent or developer building an application against the client. Validates that the docs answer the concrete questions someone writing code would hit on day one — null handling, concurrency, DDL/DML/streaming SQL, acks (sync vs async, optional vs required), failover behavior and backpressure, connection notifications, mid-stream stream restarts, connect-string clarity, and the absence of content dependencies on legacy ILP pages scheduled for removal. Legacy ILP (`http::` / `tcp::`) client pages are explicitly out of scope. Requires a PR number as input. +argument-hint: +--- + +# Review client documentation PR + +**Usage:** `/review-client ` — for example, `/review-client 451`. + +The PR number is **required**. If the user invokes the skill with no +argument (e.g. just `/review-client`), do not proceed: ask them which PR to +review and stop. Do **not** infer the PR from the current branch, recent +commits, `gh pr list`, or any other source — the answer must come from the +user. Acceptable forms are a bare number (`451`), a `#`-prefixed form +(`#451`), or a full PR URL; reject anything else and ask again. + +Reviews a pull request that touches **QWP** client documentation against a +fixed checklist of questions an application developer (human or agent) would +need answered before they can ship code. The output is a structured review +the docs author can act on directly. + +## Scope: QWP only + +There are two parallel families of client documentation in this repo: + +- **QWP (in scope).** The new WebSocket transport. Pages render under + `/docs/connect/clients//` (slug `slug: /connect/clients/`) and + document the `ws::` / `wss::` connect-string schemas, the QWP ingress and + egress wire protocols, store-and-forward, durable ACK, multi-host + failover, etc. **These are the only pages this skill reviews.** +- **Legacy ILP (out of scope).** The older HTTP/TCP transport. Pages document + only `http::` / `https::` / `tcp::` / `tcps::` schemas and ILP-specific + buffering. These pages may still live on disk under + `documentation/ingestion/clients/` (and some have been given a + `/connect/clients/...` slug as part of a routing reshuffle without their + content being rewritten). **Skip them.** If the user wants those reviewed, + recommend `/review` instead. + +**How to tell them apart on a per-file basis:** open the file and look for +QWP signals — `ws::` / `wss::` in code blocks, references to +`QwpQueryClient` / `QwpWs*` / `qwp_ws_*` APIs, sections on store-and-forward, +durable ACK, or FSN watermarks, a link to +`/docs/connect/clients/connect-string/`. A page whose only schemas are +`http::` / `https::` / `tcp::` / `tcps::` and whose content centres on ILP +buffering is legacy — exclude it from the review even if it appears in the +PR's file list. + +## When to use + +Trigger when the user runs `/review-client ` or asks to review a QWP +client documentation PR. Examples: + +- `/review-client 451` +- "review the Java QWP client doc PR #451" +- "run review-client on PR 451" + +For generic doc review use `/review` instead. For legacy ILP client pages, +also use `/review`. + +## Inputs + +- **PR number** (required). Resolve via `gh pr view ` to get the + branch, head SHA, and changed files. +- If the user omits the PR number, ask for it. Do not guess from the current + branch. + +## Workflow + +### Step 1: Fetch PR metadata and changed files + +```bash +gh pr view --json number,title,headRefName,baseRefName,headRefOid,files +``` + +Then narrow the file list to **QWP client documentation only**. Candidate +paths to consider: + +- `documentation/ingestion/clients/*.md` — **but** include only files whose + content documents QWP (see the [Scope](#scope-qwp-only) section above for + the QWP-vs-legacy signals). Files that still document `http::` / `tcp::` + ILP only are out of scope, even when their slug now resolves to + `/docs/connect/clients/...`. +- `documentation/connect/**/*.md` — the new Connect section. In scope. +- `documentation/client-configuration/connect-string.md` (and any sibling + `connect-string*` file wherever it lives) — the shared QWP connect-string + reference. **Always in scope** when any QWP client page is changed; read + it even if the PR did not modify it. + +To classify a candidate file fast: read its first ~80 lines. If you see +`ws::` / `wss::` in code blocks, references to `QwpQueryClient` or +`qwp_ws_*` APIs, or a link to the connect-string reference under +`/docs/connect/clients/connect-string/`, treat it as QWP. If the only +schemas it shows are `http::` / `tcp::`, treat it as legacy and skip. + +Be explicit in your output about which files you considered and why each +was included or skipped — the docs author needs to know whether the absence +of a file from the review means "reviewed clean" or "skipped as legacy." + +If the PR changes no QWP client docs, stop and tell the user — recommend +`/review` instead. + +### Step 2: Read each changed client doc in full + +Use the Read tool on each file at the PR's head SHA (check it out, or read +from the working tree if the branch is already checked out). Do not rely on +the diff alone — context outside the diff matters for "is this question +answered anywhere on the page" checks. + +If a page links to a sibling reference (e.g., `connect-string.md`), read +that too. The connect-string page is shared across all client docs; treat it +as in-scope whenever any client page is changed. + +### Step 3: Run the checklist + +For each changed client page, evaluate every item below. Each item gets one +of three verdicts: + +- **Covered** — a developer can answer the question from the page alone (or + from a clearly linked sibling page). Cite the section/line. +- **Partial** — touched on but unclear, buried, or missing an example. + Quote the relevant text and say what's missing. +- **Missing** — not addressed. Say so plainly. + +Be specific. "Section X doesn't mention Y" beats "could be clearer." +Reference exact line numbers and quote short snippets when calling out a gap. + +#### Ingestion checklist + +1. **Inserting NULL values during ingestion.** Can the reader figure out how + to write a null for a given column without trial and error? Is there an + example? Does it explain whether "omit the column" is equivalent to + "explicit null," and whether that interacts with schema inference? +2. **Multiple concurrent publishers.** Is it clear whether `Sender` (or its + per-language equivalent) is thread-safe? If not, what is the + recommended pattern — one sender per thread, pool, queue+single-writer? + Is there guidance on whether parallel senders writing to the same table + need distinct identities (`sender_id`, store-and-forward slots)? +3. **Easy to execute DDL.** Is there a concrete copy-paste example for + `CREATE TABLE`, `ALTER`, `DROP`, `TRUNCATE` via the query client? Does + the page distinguish DDL response (`onExecDone`, `EXEC_DONE`, + `rowsAffected = 0`) from SELECT response? +4. **Easy to execute DML and stream rows.** Is `SELECT` with a row-by-row + callback shown? Is bind-parameter usage shown with the syntax + (`$1`/`?`/named) the client actually accepts? Is the + "columnar batch vs row view" tradeoff explained, with an example? +5. **Sync vs async acks — are acks optional?** Does the page say plainly + whether the application **must** await acknowledgements before + considering data durable, or whether `flush()` / `close()` is enough? + For async clients (WebSocket), is the error-handler callback shown? + Does it explain what happens to in-flight data if the app exits without + awaiting the ack? +6. **Durable ack vs WAL ack.** Is the distinction between "committed to + local WAL" and "uploaded to object storage" (Enterprise) clear? When + would an app care about `request_durable_ack`? + +#### Failover and resilience checklist + +7. **Ingress failover is bounded.** Does the page say that ingress reconnect + has a budget (`reconnect_max_duration_millis`) and will eventually give + up? Is it clear what the application sees when the budget is exhausted + (terminal exception, callback, etc.)? +8. **Backpressure on the application side.** If the server is unreachable + for a long time, where does buffered data go? Is store-and-forward + explained as the durability story, and the RAM buffer cap explained for + the non-SF case? Does the page tell the app how to detect "I am being + backpressured" so it can stop producing? +9. **Connection-state notifications.** Can the app wire a callback that + fires on `CONNECTED`, `DISCONNECTED`, `RECONNECTED`, `FAILED_OVER`, + `AUTH_FAILED`, `RECONNECT_BUDGET_EXHAUSTED`? Is there a code example? + Does the example show what an app would actually do (log, alert, + redirect traffic)? +10. **Mid-stream query failover — duplicate-data hazard.** Does the page + explain that if a query fails over mid-result, the server replays from + the start of the result set? Does it show the `onFailoverReset` + callback and **warn explicitly** that without wiring this callback the + application will see duplicate rows? This is the single most common + footgun — it must be impossible to miss. +11. **Per-query failover bounds.** Are the failover knobs + (`failover_max_attempts`, `failover_backoff_*`, `failover_max_duration_ms`) + listed with defaults? What does the app see if all attempts are + exhausted? + +#### Connect string and config checklist + +12. **Reference to connect-string docs.** Is there at least one link from + the client page to the connect-string reference? Is the link placed + where a reader needing it would actually look (near the first connect + string example, not just in a footer)? +13. **Connect string is easy to assemble.** Can a reader build a working + QWP connect string from scratch? Schema (`ws::` / `wss::`), address + syntax, where to put auth, where to put TLS, separator/terminator + rules. Are common pitfalls called out (trailing `;`, escaping `;` or + `=` in values, multi-address syntax)? Legacy `http::` / `tcp::` need + only a "for legacy ILP transports, see [link]" pointer — do not + require coverage on the QWP page itself. +14. **Environment variable path.** Is `QDB_CLIENT_CONF` (or per-language + equivalent) documented as the credentials-out-of-code path? + +#### Cross-cutting + +15. **Thread safety statement.** Stated once, in a place a reader looking + for "can I share this instance?" would find it — not buried under + "Parallel queries" or similar. +16. **Error-handling story is end-to-end.** For each error class (auth, + schema, parse, transport, mid-stream), the page should answer: how is + the error surfaced (throw vs callback), what state is the client in + afterward (usable vs must-reset vs must-close), and what should the + app do. +17. **Migration / "what changed from before" notes** if applicable. If this + PR introduces a new transport (e.g., QWP) alongside legacy (e.g., ILP), + is there a side-by-side that a maintainer of existing code can scan? +18. **No content dependencies on legacy ILP pages.** Legacy ILP client + documentation (`documentation/ingestion/clients/{go,c-and-cpp,dotnet,nodejs,python}.md`, + `documentation/connect/compatibility/ilp/**`, `documentation/ingestion/clients/date-to-timestamp-conversion.md`, + and similar ILP-era support material) is on a deprecation path and + will be removed. Outbound links from a QWP client page to legacy ILP + content are acceptable **only** when framed as a "for legacy ILP, + see X" escape hatch — typically inside a `:::tip Legacy transports` + admonition near the top of the page. Flag as **Missing** any link + that *depends* on a legacy page to explain a concept the QWP reader + needs (e.g., "see the ILP overview for exactly-once delivery + semantics"). The concept must live somewhere that survives ILP + deprecation: the QWP page itself, the connect-string reference, a + transport-agnostic concepts page, or a new QWP-native page. Look + especially for sneaky cases: anchor links into legacy pages + (`/docs/connect/compatibility/ilp/overview/#some-section`) and + references to timestamp-conversion / date-handling support pages + that were authored for ILP. Fix shape suggestion: "move this + explanation onto the QWP page, or root it in a shared concepts page + under `/docs/concepts/`." + +### Step 4: Produce the review + +Format the output as one section per changed file. Within each file group +findings by checklist section (Ingestion / Failover / Connect string / +Cross-cutting). Use this structure: + +```markdown +## documentation/ingestion/clients/.md + +### Ingestion +- ❌ **Missing — inserting NULL values.** The column-method list (lines + 245-256) shows typed setters but never says how to write null. No + example. Recommend adding either an explicit `setNull(name)` example or + a one-liner stating that omitted columns are stored as null. +- ⚠️ **Partial — multiple publishers.** Line 845 states `Sender` is not + thread-safe, but the statement is under "Parallel queries" where a + reader looking for ingestion guidance would not look. Move or duplicate + under "Data ingestion." +- ✅ **Covered — DDL.** Lines 559-582 show CREATE TABLE with + `onExecDone`. + +### Failover +- ❌ **Missing — duplicate-data hazard on mid-stream failover.** The + `onFailoverReset` callback is mentioned (lines 784-790) but the page + does not say *what happens if you don't wire it*. Add an explicit + warning: "Without an onFailoverReset handler that clears accumulated + results, the application will observe duplicate rows after a mid-stream + reconnect." +- ... + +### Connect string +- ... + +### Cross-cutting +- ... +``` + +End with a short summary: total counts (Covered / Partial / Missing), the +top three highest-impact gaps, and any items where the doc actively +misleads the reader (call these out separately — they are worse than gaps). + +### Step 5: Offer to file the gaps + +After printing the review, ask whether to: + +- Post the review as a PR comment (`gh pr comment --body-file `). +- Draft inline edits for the highest-impact gaps. +- Stop here. + +Do not post the review without confirmation. + +## Style guidance for the review itself + +- Quote short snippets and cite line numbers (`file.md:245-256`). Vague + reviews are unactionable. +- For each gap, suggest the **shape** of the fix (one example, one warning + block, one paragraph move) — not the full prose. The doc author will + write the prose. +- Use ✅ / ⚠️ / ❌ markers so the author can scan. (Skill output is the + only place in this repo where emojis are appropriate, since the user + asked for a review tool.) +- Do not flag items that are correctly out of scope for the page (e.g., + don't ask the Java page to document the Python client's null handling). +- If the page links to a sibling page that fully answers an item, mark it + Covered with the link as the citation. Do not require every page to be + self-contained. **Exception:** links into legacy ILP material do not + count as coverage — see checklist item 18. A QWP page that "covers" + null handling by linking to the ILP overview is not covered; it has a + content dependency on a page scheduled for removal. + +## What this skill is not + +- Not a generic doc reviewer (`/review` for that). +- Not a copy-editor (no typo / wording polish). +- Not a security review (`/security-review` for that). +- Not a build/link checker — assume `yarn build` is run separately. diff --git a/documentation/client-configuration/connect-string.md b/documentation/client-configuration/connect-string.md index af1f16114..65d94f30d 100644 --- a/documentation/client-configuration/connect-string.md +++ b/documentation/client-configuration/connect-string.md @@ -28,7 +28,7 @@ For legacy InfluxDB Line Protocol (ILP) transports (`http`, `https`, `tcp`, - [Multi-host failover](#failover-keys) - [Store-and-forward](#sf-keys) - [Reconnect and failover](#reconnect-keys) -- [Durable ACK](#egress-flow) +- [Durable ACK](#durable-ack) - [Error handling](#error-handling) - [Key index](#key-index) @@ -347,7 +347,8 @@ ACK — the client replays unacknowledged frames against the new primary. Without [DEDUP](/docs/concepts/deduplication/) on the target table, those replays can produce duplicate rows. Tables ingested through a multi-host failover connect string **must** declare `DEDUP UPSERT KEYS(...)` covering -row identity. +row identity. See [Delivery semantics](/docs/concepts/delivery-semantics/) +for the full at-least-once / exactly-once model. ::: @@ -425,7 +426,8 @@ below. accepted but did not durable-acknowledge before the previous sender died. To prevent duplicate rows in the target table, declare [DEDUP](/docs/concepts/deduplication/) `UPSERT KEYS(...)` covering row -identity. +identity. See [Delivery semantics](/docs/concepts/delivery-semantics/) for +the full model and recipe. ### Backpressure @@ -486,7 +488,7 @@ Auth failures during reconnect (authentication rejected, version mismatch, durable-ack mismatch, non-101 upgrade without a role hint) are immediately terminal — the loop does not retry them. -### Egress failover +### Egress failover {#egress-failover} These keys control the per-`Execute()` reconnect loop on the QWP query client. Each query has its own budget; the loop resets between queries. @@ -502,7 +504,7 @@ Requires QuestDB Enterprise (multi-host). - `failover_max_duration_ms` — total wall-clock budget per `Execute()`. Default: `30000` (30 s). Set to `0` for unbounded. -## Durable ACK {#egress-flow} +## Durable ACK {#durable-ack} *Applies to: ingress.* @@ -576,7 +578,7 @@ description and behaviour notes. | `auto_flush_rows` | int / `off` | `1000` | [Auto-flushing](#auto-flush) | | `close_flush_timeout_millis` | int (ms) | `5000` | [Ingress reconnect](#reconnect-keys) | | `drain_orphans` | enum (`on` / `off`) | `off` | [Store-and-forward](#sf-keys) | -| `durable_ack_keepalive_interval_millis` | int (ms) | `200` | [Durable ACK](#egress-flow) | +| `durable_ack_keepalive_interval_millis` | int (ms) | `200` | [Durable ACK](#durable-ack) | | `error_inbox_capacity` | int (≥ 16) | `256` | [Error handling](#error-handling) | | `failover` | enum (`on` / `off`) | `on` | [Egress failover](#reconnect-keys) | | `failover_backoff_initial_ms` | int (ms) | `50` | [Egress failover](#reconnect-keys) | @@ -600,7 +602,7 @@ description and behaviour notes. | `reconnect_initial_backoff_millis` | int (ms) | `100` | [Ingress reconnect](#reconnect-keys) | | `reconnect_max_backoff_millis` | int (ms) | `5000` | [Ingress reconnect](#reconnect-keys) | | `reconnect_max_duration_millis` | int (ms) | `300000` (5 min) | [Ingress reconnect](#reconnect-keys) | -| `request_durable_ack` | enum (`on` / `off`) | `off` | [Durable ACK](#egress-flow) | +| `request_durable_ack` | enum (`on` / `off`) | `off` | [Durable ACK](#durable-ack) | | `sender_id` | string | `default` | [Store-and-forward](#sf-keys) | | `sf_append_deadline_millis` | int (ms) | `30000` (30 s) | [Store-and-forward](#sf-keys) | | `sf_dir` | path | unset (memory mode) | [Store-and-forward](#sf-keys) | diff --git a/documentation/concepts/delivery-semantics.md b/documentation/concepts/delivery-semantics.md new file mode 100644 index 000000000..4ee695453 --- /dev/null +++ b/documentation/concepts/delivery-semantics.md @@ -0,0 +1,169 @@ +--- +title: Delivery semantics +sidebar_label: Delivery semantics +description: + How QuestDB clients deliver data (at-least-once), where duplicate rows can + arise, and how to combine designated timestamps with deduplication for + exactly-once outcomes. +--- + +QuestDB clients deliver data **at-least-once**: every row your application +publishes is guaranteed to reach the server, but under failure it may arrive +more than once. Storing each row exactly once is the application's +responsibility, and QuestDB provides the mechanisms to make it routine. + +This page explains where duplicates come from and how to suppress them. + +## At-least-once vs exactly-once + +| Property | Meaning | Where it comes from | +|----------|---------|---------------------| +| **At-most-once** | Each row reaches the server zero or one times. Rows can be lost. | A "fire and forget" client that does not retransmit on failure. | +| **At-least-once** | Each row reaches the server one or more times. No row is lost; duplicates are possible. | A client that retransmits unacknowledged data after a transport error. **This is the QuestDB client default.** | +| **Exactly-once** | Each row is stored exactly once. | At-least-once delivery plus server-side deduplication on a key covering row identity. | + +QuestDB's clients retransmit unacknowledged batches after transport errors, +host failovers, and process restarts. The trade-off is deliberate: losing +data silently is the worse failure mode. The cost is that the application +must tolerate or suppress duplicates. + +## Where duplicates come from + +Three replay paths can resend rows the server already accepted. + +### Client retry on transport error + +The client buffers unacknowledged rows. When the connection breaks before +the server confirms a batch, the client reconnects and re-sends. If the +server had already committed the batch but the acknowledgement was lost in +flight, the second send produces duplicates. + +This path applies to every QuestDB client deployment. + +### Multi-host failover replay + +In a [multi-host](/docs/high-availability/client-failover/concepts/) +Enterprise deployment, the client carries a list of peers. When the primary +fails over to a replica, the client redirects to the new primary and +replays any batches it had not yet seen acknowledged. If the dying primary +committed those batches before the failover took effect, the new primary +applies them again on replay. + +### Store-and-forward replay across sender restarts + +With [store-and-forward](/docs/high-availability/store-and-forward/concepts/) +enabled, the client persists outgoing frames to disk. After a sender +process crash or restart, the next sender instance reads the on-disk queue +and replays everything past the durable-ack watermark. The window between +"the server applied the frame" and "the client recorded the ack" is +exactly the window in which replay produces duplicates. + +This path applies only when `sf_dir` is set on the connect string. + +## Achieving exactly-once + +Three things must hold: + +1. **A user-assigned designated timestamp.** The application chooses the + timestamp for each row (event time), not the server. Server-assigned + timestamps — `atNow()`, `at_now()`, omitting `at()` — change between + the original send and the replay, so the two rows are not identical and + deduplication cannot match them. +2. **A [deduplication](/docs/concepts/deduplication/) key covering row + identity.** Declare `DEDUP UPSERT KEYS(...)` on the target table with + keys that uniquely identify a logical event. The designated timestamp + is always part of the key; add any other columns needed to distinguish + two events that share a timestamp. +3. **Stable values across retransmits.** Any column that participates in + row identity must be derived deterministically from the source event — + not from wall-clock time at the moment of sending, and not from a + per-attempt counter. + +When those three hold, the server treats a replayed batch as already-seen +and skips the write. + +## Recipe + +Define the table with DEDUP on the columns that identify a unique event: + +```questdb-sql +CREATE TABLE trades ( + ts TIMESTAMP, + symbol SYMBOL, + side SYMBOL, + price DOUBLE, + qty DOUBLE +) TIMESTAMP(ts) PARTITION BY DAY WAL +DEDUP UPSERT KEYS(ts, symbol, side); +``` + +In the publishing client, set `ts` explicitly to the event time: + +```java +sender.table("trades") + .symbol("symbol", "ETH-USD") + .symbol("side", "buy") + .doubleColumn("price", 2615.54) + .doubleColumn("qty", 0.5) + .at(eventInstant); // not atNow() +``` + +If two distinct events can share `(ts, symbol, side)` and both should be +preserved, widen `UPSERT KEYS` to include a column that distinguishes them +— for example a `trade_id` or `seq` column. + +:::warning DEDUP is required on tables behind multi-host failover + +When the client fails over from one primary to another, unacknowledged +batches are replayed against the new primary. Without `DEDUP UPSERT KEYS` +covering row identity, those replays produce duplicate rows in the target +table. + +::: + +## When at-least-once is enough + +DEDUP has a cost: the server compares each incoming row against existing +rows with the same keys. For most workloads the cost is invisible; for +high-cardinality keys or heavily out-of-order data, it adds work to the +write path. + +If your application tolerates occasional duplicates — counting events with +a small tolerance, aggregating over a window where one extra row shifts +the average by a negligible amount, append-only logs where uniqueness is +not meaningful — you can skip DEDUP and rely on at-least-once delivery +directly. + +The decision is per-table, not per-deployment: enable DEDUP on the tables +that need exactly-once, leave it off on the tables that don't. + +## Related Enterprise features + +These features change *where* the replay window opens, but do not change +the guarantee — at-least-once still applies, and DEDUP is still the +mechanism that achieves exactly-once. + +- **Durable ACK** + ([`request_durable_ack=on`](/docs/connect/clients/connect-string#durable-ack)) + — the server delays the per-batch acknowledgement until the WAL is + shipped to object storage. This narrows the replay window after primary + failover but does not eliminate it. +- **[Store-and-forward](/docs/high-availability/store-and-forward/concepts/)** + — provides at-least-once across sender process restarts. Replay + semantics from this page apply. +- **[Multi-host client failover](/docs/high-availability/client-failover/concepts/)** + — provides at-least-once across primary failovers. Replay semantics + from this page apply. + +## See also + +- [Deduplication](/docs/concepts/deduplication/) — the server-side + mechanism that makes exactly-once achievable. +- [Designated timestamp](/docs/concepts/designated-timestamp/) — required + for DEDUP and for explicit-timestamp publishing. +- [Write-ahead log](/docs/concepts/write-ahead-log/) — when the server + considers a batch durable. +- [Client failover concepts](/docs/high-availability/client-failover/concepts/) + — the multi-host replay path in detail. +- [Store-and-forward concepts](/docs/high-availability/store-and-forward/concepts/) + — the sender-restart replay path in detail. diff --git a/documentation/high-availability/client-failover/concepts.md b/documentation/high-availability/client-failover/concepts.md index 1e2360cfa..77e4d1319 100644 --- a/documentation/high-availability/client-failover/concepts.md +++ b/documentation/high-availability/client-failover/concepts.md @@ -150,6 +150,9 @@ throughput-oriented workloads that can tolerate minutes of server unavailability See the [store-and-forward concepts](/docs/high-availability/store-and-forward/concepts/) page for how the reconnect loop interacts with the disk-backed segment ring. +Replay across a primary failover delivers at-least-once — see +[Delivery semantics](/docs/concepts/delivery-semantics/) for the DEDUP +requirement on affected tables. ### Egress (queries) diff --git a/documentation/high-availability/client-failover/configuration.md b/documentation/high-availability/client-failover/configuration.md index ff453c6dd..064d8c53e 100644 --- a/documentation/high-availability/client-failover/configuration.md +++ b/documentation/high-availability/client-failover/configuration.md @@ -68,7 +68,7 @@ network), and retrying for five minutes only hides it. The egress failover loop wraps each `execute()` call on the read-side query client. The full key list lives on the -[connect-string reference](/docs/connect/clients/connect-string#egress-flow); +[connect-string reference](/docs/connect/clients/connect-string#egress-failover); the user-visible knobs are: | Key | Type | Default | Notes | @@ -153,6 +153,6 @@ try (QwpQueryClient client = QwpQueryClient.fromConfig( |---|---|---| | `addr`, `zone`, `target`, `auth_timeout_ms` | Host selection, role filter | [connect-string #failover-keys](/docs/connect/clients/connect-string#failover-keys) | | `reconnect_*`, `initial_connect_retry` | Ingress retry budget | [connect-string #reconnect-keys](/docs/connect/clients/connect-string#reconnect-keys) | -| `failover`, `failover_*` | Egress retry budget | [connect-string #egress-flow](/docs/connect/clients/connect-string#egress-flow) | +| `failover`, `failover_*` | Egress retry budget | [connect-string #egress-failover](/docs/connect/clients/connect-string#egress-failover) | | `username` / `password` / `token` | Authentication | [connect-string #auth](/docs/connect/clients/connect-string#auth) | | `tls_*` | TLS configuration | [connect-string #tls](/docs/connect/clients/connect-string#tls) | diff --git a/documentation/high-availability/store-and-forward/concepts.md b/documentation/high-availability/store-and-forward/concepts.md index bba8cc989..1345b205a 100644 --- a/documentation/high-availability/store-and-forward/concepts.md +++ b/documentation/high-availability/store-and-forward/concepts.md @@ -228,7 +228,9 @@ guarantees no data loss, but cannot distinguish which frames inside that lowest segment the previous sender had already received durable acks for. Replay therefore re-sends every frame in that segment, producing row-level duplicates against a still-alive server unless deduplication is -enabled on the target table. +enabled on the target table. See +[Delivery semantics](/docs/concepts/delivery-semantics/) for the +at-least-once model and the DEDUP recipe. With `.ack-watermark`, recovery clamps the seed to the higher of the on-disk and watermarked values, so already-durable-acked frames inside diff --git a/documentation/ingestion/clients/rust.md b/documentation/ingestion/clients/rust.md index 478b4af08..6dcbd5099 100644 --- a/documentation/ingestion/clients/rust.md +++ b/documentation/ingestion/clients/rust.md @@ -268,7 +268,8 @@ buffer.table("trades")?.column_f64("price", 1.0842)?.at_now()?; `at_now()` removes the ability to deduplicate rows. Prefer explicit timestamps for production ingestion. See -[exactly-once delivery](/docs/connect/compatibility/ilp/overview/#exactly-once-delivery-vs-at-least-once-delivery). +[Delivery semantics](/docs/concepts/delivery-semantics/) for why +server-assigned timestamps defeat exactly-once outcomes. :::note diff --git a/documentation/sidebars.js b/documentation/sidebars.js index f162a74b9..6b4bb545b 100644 --- a/documentation/sidebars.js +++ b/documentation/sidebars.js @@ -566,6 +566,7 @@ module.exports = { label: "Materialized Views", }, "concepts/deduplication", + "concepts/delivery-semantics", "concepts/ttl", "concepts/storage-policy", "concepts/write-ahead-log", From 0c0b40e7468c716f375a4e9b5ae612698afddc83 Mon Sep 17 00:00:00 2001 From: Marko Topolnik Date: Fri, 15 May 2026 16:29:09 +0200 Subject: [PATCH 29/44] Address review gaps in Go QWP client docs A client-documentation review of the Go QWP page surfaced several correctness and completeness gaps, each verified against the go-questdb-client source and the QWP spec rather than inferred: - Fix a broken cross-reference: the "durable ACK keys" link pointed at connect-string#egress-flow, an anchor that does not exist. The section is #durable-ack; the reader was being dropped at the page top. - Document how to write NULL on ingest. The client has no null setter; a cell is null when its column setter is omitted before At/AtNow, and a column introduced on a later row is backfilled with null for earlier buffered rows. This was previously undocumented. - Warn that store-and-forward / failover replay is at-least-once. Without DEDUP UPSERT KEYS on the target table, replay produces duplicate rows. This hazard was prominent in the connect-string reference but absent from the client page. - State that there is no per-transition connection callback. Connect, disconnect, reconnect, and failover are not delivered as events; reconnect is observable only via the counters and terminal failures via the error handler. - Add the failover_max_duration_ms key (default 30000, 0 = unbounded) and the WithQwpQueryFailoverMaxDuration option to the query-failover table. This documents behaviour added in go-questdb-client PR #62; the doc is accurate once that PR merges. The temporary plugins/remote-repo-example/index.js preview change is intentionally left uncommitted. Co-Authored-By: Claude Opus 4.7 (1M context) --- documentation/ingestion/clients/go.md | 35 ++++++++++++++++++++++++--- 1 file changed, 32 insertions(+), 3 deletions(-) diff --git a/documentation/ingestion/clients/go.md b/documentation/ingestion/clients/go.md index 4451d250b..980953f88 100644 --- a/documentation/ingestion/clients/go.md +++ b/documentation/ingestion/clients/go.md @@ -305,6 +305,17 @@ err = qs.Table("trades"). `GeohashColumn`, `Int64Array1DColumn` / `2D` / `3D`, the decimal columns, and `AtNano` for nanosecond designated timestamps. +### Null values + +The client has no null setter. To store a null for a column in a given row, +omit that column's setter before `At`/`AtNow`/`AtNano`. On row commit, every +column not set in the row is gap-filled with a null, so omitting a column and +writing an "explicit null" are the same operation. + +The buffered column set is the union across the batch: a column first used on +a later row is backfilled with null for every earlier row still in the send +buffer. + ### Ingest arrays For 1D, 2D, and 3D `double` arrays, pass a Go slice directly: @@ -447,6 +458,18 @@ Without `sf_dir`, unacknowledged data lives in process memory and is lost if the sender process dies. The reconnect loop still spans transient server outages, but the RAM buffer caps how much data can accumulate. +:::caution Replay is at-least-once — enable DEDUP + +After a reconnect or a sender restart, the client replays frames the server +may have accepted but not yet acknowledged. Without +[DEDUP](/docs/concepts/deduplication/) on the target table, replay produces +duplicate rows. Tables ingested over a reconnecting or multi-host connection +**must** declare `DEDUP UPSERT KEYS(...)` covering row identity. See +[Delivery semantics](/docs/concepts/delivery-semantics/) for the full +at-least-once / exactly-once model. + +::: + :::caution Store-and-forward changes how `At` and errors behave - **`At`/`AtNow`/`Flush` can block.** When the on-disk buffer hits its cap, @@ -477,7 +500,7 @@ By default, the server confirms a batch when it is committed to the local [WAL](/docs/concepts/write-ahead-log/). To wait for the batch to be durably uploaded to object storage, add `request_durable_ack=on;` to the connect string. See the -[durable ACK keys](/docs/connect/clients/connect-string#egress-flow). +[durable ACK keys](/docs/connect/clients/connect-string#durable-ack). ## Querying and SQL execution @@ -864,11 +887,12 @@ error it reconnects and replays the query. | `failover_max_attempts` | `8` | Max reconnect attempts per query | | `failover_backoff_initial_ms` | `50` | First post-failure sleep | | `failover_backoff_max_ms` | `1000` | Cap on per-attempt sleep | +| `failover_max_duration_ms` | `30000` | Total wall-clock failover budget per query (`0` = unbounded) | | `target` | `any` | Role filter: `any`, `primary`, `replica` | The matching options are `qdb.WithQwpQueryFailover`, -`qdb.WithQwpQueryFailoverMaxAttempts`, `qdb.WithQwpQueryFailoverBackoff`, and -`qdb.WithQwpQueryTarget`. +`qdb.WithQwpQueryFailoverMaxAttempts`, `qdb.WithQwpQueryFailoverBackoff`, +`qdb.WithQwpQueryFailoverMaxDuration`, and `qdb.WithQwpQueryTarget`. You only need the pattern below if you **accumulate rows across batches and want the query to continue transparently across a reconnect**. When failover @@ -905,6 +929,11 @@ return `*QwpFailoverExhaustedError`. crashed sibling senders. The query client exposes `ServerInfo()` and `CurrentEndpoint()`; `QwpServerInfo.RoleName()` returns the bound node's role. +There is no per-transition connection callback: connect, disconnect, +reconnect, and failover are not delivered as events. Observe reconnect and +failover through these counters, and terminal failures through the +[ingestion error handler](#ingestion-errors). + For background and worked configurations, see [client failover concepts](/docs/high-availability/client-failover/concepts/), [client failover configuration](/docs/high-availability/client-failover/configuration/), From cacc3fcb8f667f708a993820068aa4f2e28cb6a9 Mon Sep 17 00:00:00 2001 From: bluestreak Date: Fri, 15 May 2026 15:40:22 +0100 Subject: [PATCH 30/44] review-client: extend checklist, order findings by severity MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds checklist items 19-22 (Enterprise/OIDC + mTLS coverage, exhaustive bind-parameter type enumeration, cross-page capital-markets schema consistency, field-level error diagnostic surface). Removes go.md from the legacy-ILP list now that it's a QWP page. Switches the Step 4 output format from grouped-by-section to ordered by severity (Missing → Partial → Covered) so action items surface first; checklist section becomes a parenthetical tag per finding instead of a sub-heading. Co-Authored-By: Claude Opus 4.7 (1M context) --- .claude/skills/review-client/SKILL.md | 169 ++++++++++++++++++++++---- 1 file changed, 142 insertions(+), 27 deletions(-) diff --git a/.claude/skills/review-client/SKILL.md b/.claude/skills/review-client/SKILL.md index dfe076b97..c4b219c4f 100644 --- a/.claude/skills/review-client/SKILL.md +++ b/.claude/skills/review-client/SKILL.md @@ -1,6 +1,6 @@ --- name: review-client -description: Review a pull request that changes QuestDB **QWP** client documentation (the WebSocket transport, `ws::` / `wss::`) from the perspective of an agent or developer building an application against the client. Validates that the docs answer the concrete questions someone writing code would hit on day one — null handling, concurrency, DDL/DML/streaming SQL, acks (sync vs async, optional vs required), failover behavior and backpressure, connection notifications, mid-stream stream restarts, connect-string clarity, and the absence of content dependencies on legacy ILP pages scheduled for removal. Legacy ILP (`http::` / `tcp::`) client pages are explicitly out of scope. Requires a PR number as input. +description: Review a pull request that changes QuestDB **QWP** client documentation (the WebSocket transport, `ws::` / `wss::`) from the perspective of an agent or developer building an application against the client. Validates that the docs answer the concrete questions someone writing code would hit on day one — null handling, concurrency, DDL/DML/streaming SQL, acks (sync vs async, optional vs required), failover behavior and backpressure, connection notifications, mid-stream stream restarts, connect-string clarity, Enterprise connection patterns (TLS + auth + multi-host worked examples, OIDC token acquisition and refresh, explicit "not supported" statements when applicable), exhaustive type coverage on bind-parameter and column-setter surfaces (no "and more" handwaves), the absence of content dependencies on legacy ILP pages scheduled for removal, a consistent capital-markets data model across every QWP client page (no `foo`/`bar` placeholders, no schema drift between languages), and field-level documentation of the diagnostic payload on every error object (status, message stability, affected scope, correlation ID, PII safety). Legacy ILP (`http::` / `tcp::`) client pages are explicitly out of scope. Requires a PR number as input. argument-hint: --- @@ -211,7 +211,7 @@ Reference exact line numbers and quote short snippets when calling out a gap. PR introduces a new transport (e.g., QWP) alongside legacy (e.g., ILP), is there a side-by-side that a maintainer of existing code can scan? 18. **No content dependencies on legacy ILP pages.** Legacy ILP client - documentation (`documentation/ingestion/clients/{go,c-and-cpp,dotnet,nodejs,python}.md`, + documentation (`documentation/ingestion/clients/{c-and-cpp,dotnet,nodejs,python}.md`, `documentation/connect/compatibility/ilp/**`, `documentation/ingestion/clients/date-to-timestamp-conversion.md`, and similar ILP-era support material) is on a deprecation path and will be removed. Outbound links from a QWP client page to legacy ILP @@ -229,42 +229,157 @@ Reference exact line numbers and quote short snippets when calling out a gap. that were authored for ILP. Fix shape suggestion: "move this explanation onto the QWP page, or root it in a shared concepts page under `/docs/concepts/`." +19. **Enterprise connection patterns and OIDC.** The page shows at least + one worked example combining TLS (`wss::`), credentials, and + multi-host `addr=...` — the realistic production shape — not just + three separate one-liners. For each Enterprise auth path the client + supports (HTTP basic, bearer token, OIDC, mTLS), there is either + (a) a concrete example showing how an application obtains and + passes the credential, or (b) an explicit one-line statement that + the path is not supported by this client, with a pointer to the + closest alternative. **Silence is not acceptable** — a reader must + not have to grep the page to discover that OIDC token refresh, mTLS + client certificates, or token rotation is unsupported. Special + attention to OIDC: the [OpenID Connect](/docs/security/oidc/) page + documents the server-side flow; the client page must answer "how + does the application acquire a token to pass to the client" and + "what happens when the token expires mid-session — does the client + refresh, does it fail, does it expect the app to register a + callback?" A bare "for OIDC, see the security page" is **not** + coverage — flag as Partial at best. +20. **Bind-parameter type coverage and limitations.** Where the page + documents bind parameters (or the per-language equivalent), it + enumerates **all** supported bind types — not a sample ending in + "and more" or "…". For every QuestDB column type a reader might + expect to bind (BOOLEAN, BYTE, SHORT, CHAR, INT, IPv4, LONG, FLOAT, + DOUBLE, TIMESTAMP, timestamp_ns, DATE, SYMBOL, VARCHAR, BINARY, + UUID, LONG256, DECIMAL64/128/256, GEOHASH, DOUBLE[]/ARRAY), the + page either (a) shows the setter / API and the type code, or + (b) lists the type explicitly under "unsupported as bind parameter" + with a one-line rationale (e.g., "ARRAY: bind ARGS frames don't + carry array shape; use SQL array literals instead"). Verdict + ladder: complete enumeration → Covered; sample-and-handwave ("and + more", "…", "see source") → ⚠️ Partial; no list at all → ❌ + Missing. The same principle — enumerate or call out as unsupported + — applies wherever the page documents a type-keyed surface + (ingestion column setters, result-batch accessors). The + bind-parameter table is the most common place coverage drifts + because the API is younger than the type system. +21. **Consistent capital-markets data model across clients.** Every code + example uses a capital-markets domain (trades, quotes, order books, + FX, market data). **Reject** generic placeholders — `foo`, `bar`, + `baz`, `my_table`, `t1`, `Example`, `Test`. The placeholder pattern + is a tell that the example was written in isolation and was never + cross-read against sibling client pages. Beyond the per-page check, + examples must be **consistent across the full set of QWP client + pages**: same table names, same column names, same column types, + same symbol values. When the PR ships one client page and the other + QWP client pages already exist, compare schemas — flag every + inconsistency the reader would hit when porting between languages: + + | Class of drift | Examples | + |---|---| + | Table name | `trades` vs `Trades` vs `market_trades` | + | Column name | `qty` vs `quantity` vs `amount`; `symbol` vs `sym` vs `instrument` | + | Column type | `LONG` vs `DOUBLE` for size; `SYMBOL` vs `VARCHAR` for ticker | + | Symbol value | `EURUSD` vs `EUR/USD` vs `EUR-USD`; `ETH-USD` vs `ETHUSD` | + | Timestamp precision | microseconds vs nanoseconds for the same notional event | + + Verdict ladder: domain-correct, placeholder-free, schema matches + every other QWP client page → ✅ Covered; domain-correct but + schema drifts from siblings → ⚠️ Partial (cite the specific + drift); generic placeholders or non-capital-markets domain + (sensors, IoT, logs) → ❌ Missing. Fix shape: pick the schema + used by the page with the most polished example and align the + others, or call out one canonical schema in this skill / a README + under `documentation/ingestion/clients/` so future client docs land + on it without negotiation. +22. **Diagnostic information on the error object/event.** Item 16 + enumerates the error categories and the surfacing / recovery + model. This item demands the next level of detail: **what + structured information is on the error and how user code reads + it**, so a real production handler can log, alert, debug, and + correlate with server-side state. + + For every error path the client exposes, the page documents: + - **Server message text** — which field or parameter carries it + (`SenderError.getServerMessage()`, the `message` parameter on + `onError`, `QwpWsSenderError.message`, etc.), whether it is + stable enough to pattern-match on, localized vs English, and + whether it is capped in length. + - **Status code** — both numeric (e.g. `0x05`) and named (e.g. + `PARSE_ERROR`), and how user code reads each. + - **Affected scope** — table name on ingest errors, FSN range + (`from_fsn`/`to_fsn`) or batch identifier on async ingest + rejections, failing SQL / bind index on query parse errors, + query ID on mid-stream query failures. + - **Server correlation / request ID** for support tickets, if the + protocol carries one; otherwise an explicit statement that no + such ID is surfaced. + - **PII / secret safety** — whether the message text is safe to + forward to end-user UIs or third-party error trackers, or + whether the application must sanitise first. + + Verdict ladder: every bullet covered on every error path → ✅ + Covered; primary fields named but stability / PII / correlation + silent → ⚠️ Partial; only "the message is human-readable text" + with no field-by-field guidance → ❌ Missing. The fix shape is + almost always a small table next to the error-handling code + example listing the fields, their types, and one-line guidance per + field — much more readable than burying these properties in + prose. ### Step 4: Produce the review -Format the output as one section per changed file. Within each file group -findings by checklist section (Ingestion / Failover / Connect string / -Cross-cutting). Use this structure: +Format the output as one section per changed file. **Within each file, +order findings by severity, worst first** — ❌ Missing, then ⚠️ Partial, +then ✅ Covered at the bottom. This is the load-bearing rule of the output +format: human readers scan top-down looking for action items, and a doc +author should be able to stop reading as soon as the ❌/⚠️ blocks end. + +Do **not** group by checklist section (Ingestion / Failover / Connect +string / Cross-cutting). Instead, tag each finding with its section in +parentheses after the title — `(Ingestion)`, `(Failover)`, `(Connect +string)`, `(Cross-cutting)` — so the author still knows which category an +item belongs to without losing the severity ordering. + +Within a severity bucket, order by impact (the gap a reader would hit +first or hardest comes first). When in doubt, follow the checklist's own +ordering as a tiebreaker. + +The ✅ Covered block at the bottom may be terser than the ❌/⚠️ blocks +above it — one-line confirmations with citation are fine. The point of +keeping Covered findings in the output at all is to let the author see +that the item *was* checked and reassure them no follow-up is needed; it +is not to re-justify the verdict. + +Use this structure: ```markdown ## documentation/ingestion/clients/.md -### Ingestion -- ❌ **Missing — inserting NULL values.** The column-method list (lines - 245-256) shows typed setters but never says how to write null. No - example. Recommend adding either an explicit `setNull(name)` example or - a one-liner stating that omitted columns are stored as null. -- ⚠️ **Partial — multiple publishers.** Line 845 states `Sender` is not - thread-safe, but the statement is under "Parallel queries" where a - reader looking for ingestion guidance would not look. Move or duplicate - under "Data ingestion." -- ✅ **Covered — DDL.** Lines 559-582 show CREATE TABLE with - `onExecDone`. - -### Failover -- ❌ **Missing — duplicate-data hazard on mid-stream failover.** The - `onFailoverReset` callback is mentioned (lines 784-790) but the page +- ❌ **Missing — inserting NULL values (Ingestion).** The column-method + list (lines 245-256) shows typed setters but never says how to write + null. No example. Recommend adding either an explicit `setNull(name)` + example or a one-liner stating that omitted columns are stored as null. +- ❌ **Missing — duplicate-data hazard on mid-stream failover (Failover).** + The `onFailoverReset` callback is mentioned (lines 784-790) but the page does not say *what happens if you don't wire it*. Add an explicit warning: "Without an onFailoverReset handler that clears accumulated results, the application will observe duplicate rows after a mid-stream reconnect." -- ... - -### Connect string -- ... - -### Cross-cutting -- ... +- ⚠️ **Partial — multiple publishers (Ingestion).** Line 845 states + `Sender` is not thread-safe, but the statement is under "Parallel + queries" where a reader looking for ingestion guidance would not look. + Move or duplicate under "Data ingestion." +- ⚠️ **Partial — OIDC (Cross-cutting).** Line 172 is a bare "see the + security page" pointer; the client page must answer how the app + acquires the token and what happens on expiry. +- ✅ **Covered — DDL (Ingestion).** Lines 559-582 show CREATE TABLE with + `onExecDone`. +- ✅ **Covered — thread safety statement (Cross-cutting).** Stated at + l.236-244 next to the ingestion code. +- ✅ **Covered — connect-string reference link (Connect string).** l.198. ``` End with a short summary: total counts (Covered / Partial / Missing), the From 7405797dfc7a0d8ae3c03b842aacc6cc295069c6 Mon Sep 17 00:00:00 2001 From: bluestreak Date: Fri, 15 May 2026 15:59:49 +0100 Subject: [PATCH 31/44] Rust QWP client docs: address review gaps Grounded against the implementation at c-questdb-client. - Replace bare OIDC pointer with an explicit "not implemented" note covering token acquisition, passing via `token=`, and rebuild-on-expiry. - Add explicit "mTLS not supported" callout under TLS. - Add a Concurrency subsection (single-owner Sender, decoupled Buffer, distinct sender_id per slot). - Replace the sample column-setter list + crate-docs handwave with a complete table covering all 20 typed setters, the NULL variants, and a worked `column_f64_opt` example. Surface the IPv4 / LONG_ARRAY client-encodes-but-server-rejects caveat with a footnote. - Correct the `sf_durability` rows: only `memory` ships; `flush` and `append` are reserved (setting them today fails sender construction). Co-Authored-By: Claude Opus 4.7 (1M context) --- documentation/ingestion/clients/rust.md | 105 ++++++++++++++++++++---- 1 file changed, 91 insertions(+), 14 deletions(-) diff --git a/documentation/ingestion/clients/rust.md b/documentation/ingestion/clients/rust.md index 6dcbd5099..a6c1aa1d3 100644 --- a/documentation/ingestion/clients/rust.md +++ b/documentation/ingestion/clients/rust.md @@ -89,7 +89,23 @@ let mut sender = Sender::from_conf( )?; ``` -For OIDC authentication, see [OpenID Connect](/docs/security/oidc/). +:::note OIDC + +The Rust client does not implement any OIDC flow (client-credentials, +authorization-code, or refresh-token). For OIDC-secured deployments: + +1. Acquire a bearer token in your application using an OIDC library such + as [`openidconnect`](https://crates.io/crates/openidconnect) or + [`oauth2`](https://crates.io/crates/oauth2). +2. Pass it to the client via `token=...`. +3. The client does not refresh the token. When it expires mid-session + the client surfaces a terminal auth error; rebuild the `Sender` with + a fresh token. + +The server-side OIDC flow is documented at +[OpenID Connect](/docs/security/oidc/). + +::: ### TLS @@ -110,6 +126,15 @@ Supported values: | `tls_roots=/path/to/root-ca.pem` | Load roots from a PEM file. Useful for self-signed certs during testing. | | `tls_verify=unsafe_off` | Disable verification. Never use in production. | +:::note mTLS (client certificates) not supported + +The Rust client does not currently implement mTLS / client-certificate +auth. The TLS surface is one-way: the client verifies the server, not +the other way round. For credential auth, use HTTP basic (`username` + +`password`) or bearer token (`token`). + +::: + ### Authentication timeout `auth_timeout_ms` (default 15000) controls how long the client waits for the @@ -167,25 +192,77 @@ progress mode, reconnect timing, and `initial_connect_retry`. ## Data ingestion +### Concurrency + +`Sender` is single-owner: every publishing method takes `&mut self`, so +only one caller can use it at a time. For concurrent producers, create +one `Sender` per thread, or hand rows to a single owner over a channel. + +`Buffer` is decoupled from `Sender`. Build buffers on any thread, then +call `sender.flush(&mut buffer)` once you have the sender in scope. This +lets worker threads encode rows in parallel and serialises only the +publish step. + +When several `Sender` instances share an `sf_dir`, give each a distinct +`sender_id` — slots are exclusive (see +[Store-and-forward](#store-and-forward)). + ### General usage pattern 1. Call `buffer.table(name)?` to select a table. -2. Call column methods to add values: - - `symbol(name, value)` - - `column_bool(name, value)` - - `column_i64(name, value)` - - `column_f64(name, value)` - - `column_str(name, value)` - - `column_ts(name, timestamp)` - - `column_arr(name, ...)` for arrays - - `column_dec(name, ...)` for decimals - - `_opt` variants (e.g. `column_f64_opt`) for `Option` ergonomics +2. Call typed column setters to add values (see + [Column setters](#column-setters) below). 3. Call `at(timestamp)?` or `at_now()?` to finalize the row. 4. Repeat from step 1, or call `sender.flush(&mut buffer)?` to send. Tables and columns are created automatically if they do not exist. -For the full column method reference, see the +### Column setters + +Every typed setter has an `_opt` variant taking `Option` that writes +a null when the value is `None`: + +```rust +buffer.column_f64_opt("price", None)?; // writes null +buffer.column_f64_opt("price", Some(2615.54))?; // equivalent to column_f64 +``` + +`SYMBOL` has no `_opt` variant — omit the `symbol()` call to leave the +symbol unset on a row. + +| QuestDB type | Setter | NULL variant | +| --- | --- | --- | +| `SYMBOL` | `symbol(name, &str)` | — (omit the call) | +| `BOOLEAN` | `column_bool(name, bool)` | `column_bool_opt(name, Option)` | +| `BYTE` | `column_i8(name, i8)` | `column_i8_opt(name, Option)` | +| `SHORT` | `column_i16(name, i16)` | `column_i16_opt(name, Option)` | +| `INT` | `column_i32(name, i32)` | `column_i32_opt(name, Option)` | +| `LONG` | `column_i64(name, i64)` | `column_i64_opt(name, Option)` | +| `FLOAT` | `column_f32(name, f32)` | `column_f32_opt(name, Option)` | +| `DOUBLE` | `column_f64(name, f64)` | `column_f64_opt(name, Option)` | +| `CHAR` | `column_char(name, u16)` (UTF-16 code unit) | `column_char_opt(name, Option)` | +| `VARCHAR` | `column_str(name, &str)` | `column_str_opt(name, Option<&str>)` | +| `BINARY` | `column_binary(name, &[u8])` | `column_binary_opt(name, Option<&[u8]>)` | +| `UUID` | `column_uuid(name, lo: u64, hi: u64)` | `column_uuid_opt(name, Option<(u64, u64)>)` | +| `LONG256` | `column_long256(name, &[u8; 32])` (4 LE limbs) | `column_long256_opt(name, Option<&[u8; 32]>)` | +| `DATE` | `column_date(name, millis: i64)` | `column_date_opt(name, Option)` | +| `TIMESTAMP` / `timestamp_ns` (non-designated) | `column_ts(name, TimestampMicros / TimestampNanos)` | `column_ts_opt(name, Option<…>)` | +| `GEOHASH` | `column_geohash(name, bits: u64, precision_bits: u8)` (1–60 bits) | `column_geohash_opt(name, Option<(u64, u8)>)` | +| `DECIMAL` (up to 256-bit) | `column_dec(name, &str / rust_decimal / bigdecimal)` | `column_dec_opt(name, …)` | +| `DECIMAL64` | `column_dec64(name, …)` | `column_dec64_opt(name, …)` | +| `DECIMAL128` | `column_dec128(name, …)` | `column_dec128_opt(name, …)` | +| `DOUBLE[]` (arrays) | `column_arr(name, &view)` — slices, vecs up to 3D, [`ndarray`](https://docs.rs/ndarray) views | `column_arr_opt(name, Option<&view>)` | +| `IPv4` † | `column_ipv4(name, std::net::Ipv4Addr)` | `column_ipv4_opt(name, Option)` | +| `LONG[]` (i64 arrays) † | `column_arr` with `i64` element type | `column_arr_opt` with `i64` element type | + +† **Spec-only — currently rejected by the server.** QWP v1 defines these +wire types and the client encodes them correctly, but server-side ingest +does not yet accept them. Batches using them will be rejected with a +descriptive error. Application code written against these setters today +will start working once the server adds support; no client change is +needed. + +For exact signatures and accepted parameter conversions, see the [crate docs](https://docs.rs/questdb-rs/latest/questdb/ingress/struct.Buffer.html). ### Ingest arrays @@ -353,7 +430,7 @@ outages, but a RAM cap bounds how much data can accumulate. | `sender_id` | `default` | Slot identity. Allowed chars: `A-Za-z0-9_-`. Use distinct ids per sender process. | | `sf_max_bytes` | 4 MiB | Per-segment size cap. | | `sf_max_total_bytes` | 128 MiB (memory) / 10 GiB (disk) | Cap on total queued bytes. | -| `sf_durability` | `memory` | `memory`, `flush`, or `append` (strongest). | +| `sf_durability` | `memory` | `memory` is the only shipping value. `flush` and `append` are reserved for future per-write fsync modes; setting them today fails sender construction. | | `sf_append_deadline_millis` | 30000 | Per-append wait budget in `append` mode. | | `drain_orphans` | `off` | If `on`, take over stale slots owned by a previous sender. | | `max_background_drainers` | 4 | Concurrency cap when draining orphans. | @@ -552,7 +629,7 @@ Common WebSocket-specific options: | `auto_flush` | required `off` if set | Auto-flush is not supported. `auto_flush_rows` and `auto_flush_bytes` are rejected. | | `sf_dir` | unset | Enable disk-backed store-and-forward. | | `sender_id` | `default` | SF slot identity. | -| `sf_durability` | `memory` | `memory`, `flush`, or `append`. | +| `sf_durability` | `memory` | Only `memory` is currently accepted (see [SF tuning keys](#sf-tuning-keys)). | | `request_durable_ack` | `off` | Wait for durable upload before ACK (Enterprise). | | `reconnect_max_duration_millis` | 300000 | Per-outage reconnect budget. | | `initial_connect_retry` | `off` | Apply reconnect policy to the first connect. | From 75fbd47d7dce6ecadf0f749047e202e48967c605 Mon Sep 17 00:00:00 2001 From: bluestreak Date: Fri, 15 May 2026 16:13:19 +0100 Subject: [PATCH 32/44] Rust QWP client docs: document flush() backpressure and oversized-payload rejection MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Add ### Backpressure on flush() under ## Flushing: the two stacked caps (max_in_flight wire window, sf_max_total_bytes queue cap), block semantics, sf_append_deadline_millis as the bounded deadline, SubmitTimedOut on timeout, never-drops invariant, and memory ≡ disk symmetry. Verified against c-questdb-client qwp_ws.rs:355-399 and qwp_ws_sfa_queue.rs:891-910. - Add a :::caution covering the PayloadExceedsByteCapacity error path: a single payload exceeding sf_max_bytes is rejected immediately, does not enter the backpressure wait. The Rust analog of Java's PAYLOAD_TOO_LARGE sentinel. - Update the existing Flushing intro to forward to the new section. - Fix the sf_append_deadline_millis row description in the SF tuning keys table: applies in both memory and disk modes (the SFA queue is shared), not only in `append` mode (which does not ship). Co-Authored-By: Claude Opus 4.7 (1M context) --- documentation/ingestion/clients/rust.md | 44 +++++++++++++++++++++++-- 1 file changed, 41 insertions(+), 3 deletions(-) diff --git a/documentation/ingestion/clients/rust.md b/documentation/ingestion/clients/rust.md index a6c1aa1d3..4759b2127 100644 --- a/documentation/ingestion/clients/rust.md +++ b/documentation/ingestion/clients/rust.md @@ -375,10 +375,48 @@ size (via `buffer.len()`) exceeds a threshold. contents (for example, to fan the same buffer out to multiple senders). On QWP/WebSocket, `flush()` returns once the buffer is accepted by the local -replay queue, before the server acknowledges it. Server errors observed later -are reported asynchronously (see +replay queue, before the server acknowledges it. The call can block if the +queue is full — see [Backpressure on `flush()`](#backpressure-on-flush). Server +errors observed later are reported asynchronously (see [Asynchronous error handling](#asynchronous-error-handling)). +### Backpressure on `flush()` + +`flush()` is not unconditionally non-blocking. The publisher feeds a bounded +queue with two stacked caps: + +1. **In-flight window** — `max_in_flight` (default `128`) unacknowledged + frames on the connection. Reached first under steady-state load when the + server keeps up but you have many small flushes in flight. +2. **Queue cap** — `sf_max_total_bytes` (default `128 MiB` in memory mode, + `10 GiB` in disk mode). Reached when the server is unreachable long + enough that the in-flight count stops being the active limit. + +When either cap is hit, `flush()` blocks the caller and retries as the I/O +loop releases capacity (ACK-driven trim). The wait is bounded by +`sf_append_deadline_millis` (default `30000`). If the deadline elapses, +`flush()` returns a `SubmitTimedOut` error — the application can retry, fail +closed, or shed load. **No data is ever dropped or overwritten** while the +publisher is parked. + +Memory-only and disk-backed modes have identical backpressure semantics: the +same SFA queue handles both; only the cap default and whether the buffered +data survives a sender restart differ. + +`buffer.column_*` setters and `buffer.table(...)` never block — they only +mutate the in-process `Buffer`. Backpressure surfaces only at `flush()`. + +:::caution Oversized payloads are rejected, not parked + +A single flushed payload larger than `sf_max_bytes` (default `4 MiB`) returns +a `PayloadExceedsByteCapacity` error from `flush()` immediately — it does +*not* enter the backpressure wait. The error carries the payload length and +the segment capacity. Fixes: reduce the number of rows you accumulate per +buffer before flushing, or raise `sf_max_bytes` to fit your largest single +flushed payload. + +::: + ### FSN-based completion Every published frame is assigned a frame sequence number (FSN). To wait until @@ -431,7 +469,7 @@ outages, but a RAM cap bounds how much data can accumulate. | `sf_max_bytes` | 4 MiB | Per-segment size cap. | | `sf_max_total_bytes` | 128 MiB (memory) / 10 GiB (disk) | Cap on total queued bytes. | | `sf_durability` | `memory` | `memory` is the only shipping value. `flush` and `append` are reserved for future per-write fsync modes; setting them today fails sender construction. | -| `sf_append_deadline_millis` | 30000 | Per-append wait budget in `append` mode. | +| `sf_append_deadline_millis` | 30000 | Maximum time `flush()` blocks waiting for queue capacity to free up. Applies in both memory and disk modes (the SFA queue is shared). On timeout, `flush()` surfaces a `SubmitTimedOut` error; no data is dropped. See [Backpressure on `flush()`](#backpressure-on-flush). | | `drain_orphans` | `off` | If `on`, take over stale slots owned by a previous sender. | | `max_background_drainers` | 4 | Concurrency cap when draining orphans. | From 7d0efa242742989ab2e0da420c8b2920b20cd3fb Mon Sep 17 00:00:00 2001 From: bluestreak Date: Fri, 15 May 2026 16:25:59 +0100 Subject: [PATCH 33/44] Rust QWP client docs: document QwpWsSenderError stability, PII, correlation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Closes the field-level documentation gap on QwpWsSenderError. Three new subsections under the field table: - **Message stability** — message is opaque. Server-supplied text comes from QuestDB's SQL error pipeline (reworded across versions) and is capped at 1024 bytes by the wire spec; WebSocket protocol violations produce a client-synthesized "ws-close[]: ". Application code must dispatch on category and status, never pattern-match on message. - **PII / secret safety** — message may quote application payload (an offending value from a schema/parse rejection) or a server-supplied WebSocket close reason. Treat as potentially PII-bearing; sanitize before forwarding to external error trackers. The other fields are structural metadata and safe to forward as-is. - **Correlating with server-side logs** — the WebSocket upgrade does not carry a server-issued request/connection ID. The closest correlation tuple is (message_sequence, from_fsn, to_fsn). Document the bug-report tuple (connection start time, X-QWP-Client-Id, triple). Explicit "there is no globally unique handle." Also annotates table rows: category note about programmatic dispatch; message_sequence note about per-connection scope and reconnect reset; from_fsn/to_fsn note that it's the client-side span. Co-Authored-By: Claude Opus 4.7 (1M context) --- documentation/ingestion/clients/rust.md | 56 +++++++++++++++++++++++-- 1 file changed, 52 insertions(+), 4 deletions(-) diff --git a/documentation/ingestion/clients/rust.md b/documentation/ingestion/clients/rust.md index 4759b2127..b7eaf7b92 100644 --- a/documentation/ingestion/clients/rust.md +++ b/documentation/ingestion/clients/rust.md @@ -537,16 +537,64 @@ let mut sender = SenderBuilder::new(Protocol::QwpWs, "localhost", 9000) | Field | Meaning | |-------|---------| -| `category` | `SchemaMismatch`, `ParseError`, `InternalError`, `SecurityError`, `WriteError`, `ProtocolViolation`, `Unknown`. | +| `category` | `SchemaMismatch`, `ParseError`, `InternalError`, `SecurityError`, `WriteError`, `ProtocolViolation`, `Unknown`. Use for programmatic dispatch. | | `applied_policy` | `DropAndContinue` (batch dropped, sender continues) or `Halt` (sender latched terminal). | | `status` | Raw QWP status byte. `None` for WebSocket protocol violations. | -| `message` | Human-readable error text from the server or the close reason. | -| `message_sequence` | Server's per-frame QWP message sequence. | -| `from_fsn` / `to_fsn` | Inclusive FSN span of the affected frame(s). | +| `message` | Human-readable error text from the server, or a client-synthesized close reason for WebSocket protocol violations. See [Message stability](#message-stability) and [PII safety](#message-pii) below. | +| `message_sequence` | Server's per-frame QWP wire sequence for the error frame. Resets on reconnect — only meaningful within one connection. | +| `from_fsn` / `to_fsn` | Inclusive FSN span of the affected frame(s), client-side. | `Sender::qwp_ws_errors_dropped()` reports how many diagnostics were lost because the bounded log overflowed (typically due to a lagging poll cursor). +#### Message stability {#message-stability} + +`message` is a human-readable diagnostic — **not a stable contract.** Its +text varies across server versions and across provenance: + +- **QWP error frames** carry a server-supplied UTF-8 string capped at + 1024 bytes by the wire spec. +- **WebSocket protocol violations** are client-synthesized as + `"ws-close[]: "`. +- The server-supplied text mirrors QuestDB's normal SQL error formatting, + which historically reworded across releases. +- The field may be empty. + +Use `category` and `status` for programmatic dispatch. Never pattern-match +on `message`. + +#### PII / secret safety {#message-pii} + +`message` may include fragments of the client's own payload — for +example, an offending column value quoted back by a schema or parse +rejection — or a server-supplied WebSocket close reason that the +operator did not control. **Treat `message` as potentially containing +PII or secrets.** + +Log it at the same trust level as the data being sent, and sanitize +before forwarding to external error trackers (Sentry, Datadog, end-user +UIs). The other fields on `QwpWsSenderError` are safe to forward as-is — +they carry only structural metadata. + +#### Correlating with server-side logs + +The protocol does not currently surface a server-issued request or +connection identifier in the WebSocket upgrade response. The closest +correlation tuple is `(message_sequence, from_fsn, to_fsn)`: + +- `message_sequence` — per-connection QWP wire sequence the server + attached to the error frame. Resets on reconnect. +- `from_fsn` / `to_fsn` — client-side FSN span of the affected frames. + Not generally indexed by server-side logs. + +When opening a bug report, supply: + +1. The connection start time (from your application logs). +2. The client's `X-QWP-Client-Id` header value, if your application sets one. +3. The `(message_sequence, from_fsn, to_fsn)` triple. + +There is no globally unique handle. + After a `Halt` policy fires, the sender is terminal. Drop it and create a new one. `Sender::must_close()` reports whether the sender has entered a terminal state. From d302cf3c8586d02650b27698c6980fe20c7476f5 Mon Sep 17 00:00:00 2001 From: Marko Topolnik Date: Fri, 15 May 2026 17:27:24 +0200 Subject: [PATCH 34/44] Fix misleading Go QWP docs; document egress keys A review-client pass over the Go QWP client documentation found three actively-misleading statements and several coverage gaps. go.md corrections: - The result-batch caution claimed Float64Array, Int64Array, and the *Range accessors alias the receive buffer. They return caller-owned copies; only Str and Binary alias. Rewrote the caution accordingly. - request_durable_ack=on is rejected by the Go connect-string parser today; added a caution instead of instructing readers to set it. - The custom-trust-store section pointed at tls_roots keys the Go client rejects. Replaced it with accurate trust-store/mTLS behavior, a static-token OIDC acquisition/expiry note, and a worked TLS+auth+multi-host example. go.md gap fills: - Full SenderError field table (status byte, message sequence, message length/stability/PII guidance). - QwpQueryError.RequestId and the 0x09 WRITE_ERROR row. - Called out BINARY/ARRAY/IPv4 as non-bindable and documented the DECIMAL64/256 and GEOHASH result-read paths. - Aligned the in-page schema (CREATE TABLE column names, decimal example table) so examples no longer contradict each other. - Added a counter-polling example for connection observability. connect-string.md: - Documented the egress/query-client keys (compression, compression_level, initial_credit, max_batch_rows, buffer_pool_size) so the Go page's references resolve. - Noted that tls_roots support varies by client. Excludes the local-only plugins/remote-repo-example/index.js preview change, which is marked to be reverted before merge. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../client-configuration/connect-string.md | 38 ++++ documentation/ingestion/clients/go.md | 170 +++++++++++++----- 2 files changed, 166 insertions(+), 42 deletions(-) diff --git a/documentation/client-configuration/connect-string.md b/documentation/client-configuration/connect-string.md index 65d94f30d..e30afe83a 100644 --- a/documentation/client-configuration/connect-string.md +++ b/documentation/client-configuration/connect-string.md @@ -29,6 +29,7 @@ For legacy InfluxDB Line Protocol (ILP) transports (`http`, `https`, `tcp`, - [Store-and-forward](#sf-keys) - [Reconnect and failover](#reconnect-keys) - [Durable ACK](#durable-ack) +- [Query client keys](#egress-keys) - [Error handling](#error-handling) - [Key index](#key-index) @@ -216,6 +217,18 @@ TLS is enabled by selecting the `wss` schema. - `tls_roots_password` — password for the keystore file. Required when `tls_roots` is set. +:::note Client support varies + +`tls_roots` / `tls_roots_password` are a Java-keystore feature. Some clients +(for example, Go) verify against the operating-system trust store only and +**reject these keys at parse time**; to trust a private CA there, install it +in the host trust store. Mutual TLS (client certificates) is likewise not +supported by every client. Check the relevant +[client library page](/docs/connect/overview/#client-libraries) for +specifics. + +::: + See also the [server-side TLS configuration](/docs/security/tls/). ## Auto-flushing {#auto-flush} @@ -538,6 +551,31 @@ transport-level OK ACK alone cannot close. See the [QWP Egress (WebSocket)](/docs/connect/wire-protocols/qwp-egress-websocket/) wire protocol for the underlying mechanism. +## Query client keys {#egress-keys} + +*Applies to: egress (query client).* + +These keys are accepted by the QWP query client's connect string (the +egress / `QwpQueryClient` path). They are not sender keys. + +- `compression` — result-batch compression the client advertises. Options: + `raw` (default — no compression, the accept-encoding header is omitted so + pre-compression servers see an unchanged handshake), `zstd` (demand + zstd), `auto` (accept zstd if the server offers it). +- `compression_level` — zstd level hint. Range `1`–`22`. Ignored when + `compression=raw`. Default is the client's library default. +- `initial_credit` — byte-credit flow-control budget. `0` (default) means + unbounded: the server streams as fast as the network allows. Set a + non-zero budget to bound server push on a memory-constrained client. +- `max_batch_rows` — upper bound on rows per result batch. +- `buffer_pool_size` — size of the client-side decode buffer pool. + +Equivalent options exist on the query client's builder API (for example, +`WithQwpQueryCompression`, `WithQwpQueryCompressionLevel`, +`WithQwpQueryInitialCredit` in the Go client). See the +[client library page](/docs/connect/overview/#client-libraries) for the +per-language names. + ## Error handling {#error-handling} *Applies to: ingress and egress.* diff --git a/documentation/ingestion/clients/go.md b/documentation/ingestion/clients/go.md index 980953f88..14954d761 100644 --- a/documentation/ingestion/clients/go.md +++ b/documentation/ingestion/clients/go.md @@ -175,12 +175,42 @@ client, err := qdb.NewQwpQueryClient(ctx, qdb.WithQwpQueryBearerToken("your_bearer_token")) ``` -### TLS with a custom trust store +The client takes a **static** bearer token; it does not acquire or refresh +OIDC tokens. With [OpenID Connect](/docs/security/oidc/), the application +obtains the access token from the identity provider and is responsible for +rotating it before it expires. An expired or revoked token is not refreshed +in place: the next connect or reconnect fails with a `SECURITY_ERROR` (or a +`401`/`403` on the WebSocket upgrade — terminal across all endpoints). To +rotate, construct a new sender or client with the fresh token. -TLS is enabled by the `wss` schema (or `qdb.WithTls()`). Trust-store keys are -documented in the [TLS section](/docs/connect/clients/connect-string#tls) of -the connect string reference. For OIDC authentication (Enterprise), see -[OpenID Connect](/docs/security/oidc/). +### Production example (TLS + auth + multi-host) + +The realistic Enterprise shape combines `wss`, credentials, and a multi-host +`addr` list in a single connect string: + +```go +sender, err := qdb.LineSenderFromConf(ctx, + "wss::addr=db-1.example.com:9000,db-2.example.com:9000;"+ + "username=ingest;password=secret;") + +client, err := qdb.QwpQueryClientFromConf(ctx, + "wss::addr=db-1.example.com:9000,db-2.example.com:9000;"+ + "token=your_bearer_token;target=replica;") +``` + +### TLS trust store and mTLS + +TLS is enabled by the `wss` schema (or `qdb.WithTls()`). The Go client +verifies the server certificate against the **operating-system trust +store**. It does **not** support a custom trust store: the `tls_roots` / +`tls_roots_password` connect-string keys (a Java-keystore feature) are +rejected by the Go connect-string parser. To trust a private CA, install it +in the host trust store. Mutual TLS (client certificates) is **not +supported** by this client — authenticate with a bearer token or basic auth +over `wss` instead. For test-only certificate-verification bypass, see +`tls_verify` in the +[TLS section](/docs/connect/clients/connect-string#tls) of the connect +string reference. ## Creating the client @@ -398,9 +428,9 @@ if err != nil { panic(err) } -err = qs.Table("trades"). +err = qs.Table("trade_fees"). Symbol("symbol", "ETH-USD"). - Decimal128Column("price", price). + Decimal128Column("settled_price", price). Decimal128Column("commission", commission). AtNow(ctx) ``` @@ -497,11 +527,21 @@ configured. ::: By default, the server confirms a batch when it is committed to the local -[WAL](/docs/concepts/write-ahead-log/). To wait for the batch to be durably -uploaded to object storage, add `request_durable_ack=on;` to the connect -string. See the +[WAL](/docs/concepts/write-ahead-log/). Durable acknowledgement instead waits +until the batch has been durably uploaded to object storage. See the [durable ACK keys](/docs/connect/clients/connect-string#durable-ack). +:::caution Not yet implemented in the Go client + +Durable-ack mode is a deferred follow-up in this client. Passing +`request_durable_ack=on;` (or `=true`) in the connect string is **rejected at +construction** with an `InvalidConfigStr` error; the only accepted value +today is `request_durable_ack=off` (the default). Until the feature ships, +the sender confirms on the transport-level OK ACK and ignores +`STATUS_DURABLE_ACK` frames. + +::: + ## Querying and SQL execution The `QwpQueryClient` sends SQL statements over the @@ -553,14 +593,20 @@ for batch, err := range q.Batches() { } ``` -:::caution Copy values out before the iteration ends +:::caution Copy aliasing values out before the iteration ends + +A `*QwpColumnBatch` is valid only during its iteration of the loop. Never +store the batch itself; use `batch.CopyAll()` for a retainable snapshot. +Which accessors alias the receive buffer and which return caller-owned data: -A `*QwpColumnBatch` is valid only during its iteration of the loop. -`String(col, row)` returns a freshly allocated Go string and is safe to -retain. `Str`, `Binary`, `Float64Array`, `Int64Array`, and the `QwpColumn` -`*Range` accessors return slices that **alias the receive buffer** and become -invalid on the next iteration. Copy those before the loop advances, and never -store the batch itself. +- **Alias the buffer** (copy with `bytes.Clone` before the loop advances if + you keep them): `Str(col, row)` and `Binary(col, row)`. +- **Safe to retain:** `String(col, row)` returns a freshly allocated Go + string. `Float64Array`, `Int64Array`, the `*Into` accessors, and the + `QwpColumn` `*Range` accessors return caller-owned slices (freshly + allocated, or appended into a buffer you supply). +- The fixed-width scalar accessors (`Int64`, `Float64`, …) return values, + not views. ::: @@ -617,8 +663,13 @@ Representations to be aware of: `time.Time`: microseconds, nanoseconds, and milliseconds since epoch respectively. Convert with `time.UnixMicro` / `time.Unix(0, ns)` as needed. - `UUID` is two `int64` halves (`UuidHi` / `UuidLo`); reassemble client-side. -- A decimal value is `Decimal128Hi`/`Decimal128Lo` plus the per-column - `DecimalScale(col)`; apply the scale yourself. +- Decimals come back as the unscaled integer plus the per-column + `DecimalScale(col)`: read `DECIMAL64` with `Int64`, `DECIMAL128` with + `Decimal128Hi`/`Decimal128Lo`, and `DECIMAL256` with `Long256Word` + (words 0–3); apply the scale yourself. +- `GEOHASH` result columns expose only metadata in this release + (`GeohashPrecisionBits(col)`); there is no public value accessor for a + GEOHASH cell. Cast it to a string or long in SQL if you need the value. - A typed accessor on a NULL cell returns the zero value (`0`, `false`, `""`, `nil`), which is indistinguishable from a real zero. Call `IsNull(col, row)` first whenever NULL is meaningful. @@ -632,8 +683,8 @@ Non-SELECT statements run through `Exec`, which returns an `ExecResult`: ```go res, err := client.Exec(ctx, - "CREATE TABLE trades (ts TIMESTAMP, sym SYMBOL, price DOUBLE) "+ - "TIMESTAMP(ts) PARTITION BY DAY WAL") + "CREATE TABLE trades (ts TIMESTAMP, symbol SYMBOL, side SYMBOL, "+ + "price DOUBLE, amount DOUBLE) TIMESTAMP(ts) PARTITION BY DAY WAL") if err != nil { return err } @@ -685,8 +736,11 @@ maps to `$1`. Setters include `BooleanBind`, `ByteBind`, `ShortBind`, `TimestampMicrosBind`, `TimestampNanosBind`, `VarcharBind`, `UuidBind`, `Long256Bind`, `GeohashBind`, `DecimalBind` (and `Decimal64/128/256Bind`), plus a `Null...Bind` variant for each type. There is no symbol bind: use -`VarcharBind` for symbol parameters. A gap, a duplicate index, or any -out-of-order call latches an error that surfaces from `Query` or `Exec`. +`VarcharBind` for symbol parameters. **Not bindable:** `BINARY` (no setter); +`ARRAY` / `DOUBLE[]` / `LONG[]` (bind frames carry no array shape — pass a +SQL array literal in the statement instead); `IPv4` (bind it as `INT` with +`IntBind`). A gap, a duplicate index, or any out-of-order call latches an +error that surfaces from `Query` or `Exec`. ### Flow control @@ -735,12 +789,19 @@ sender, err := qdb.NewLineSender(ctx, })) ``` -Each `SenderError` carries the `Category` -(`CategorySchemaMismatch`, `CategoryParseError`, `CategoryInternalError`, -`CategorySecurityError`, `CategoryWriteError`, `CategoryProtocolViolation`, or -`CategoryUnknown`), the `AppliedPolicy` (`PolicyDropAndContinue` or -`PolicyHalt`), the server message, the rejected table, and the `[FromFsn, -ToFsn]` span that correlates the rejection with `FlushAndGetSequence`. +Full `SenderError` field set, for logging, alerting, and support +correlation: + +| Field | Type | Use | +| ------------------ | ----------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `Category` | `Category` | Stable named class (`CategorySchemaMismatch`, `CategoryParseError`, `CategoryInternalError`, `CategorySecurityError`, `CategoryWriteError`, `CategoryProtocolViolation`, `CategoryUnknown`). The recommended switch target. | +| `ServerStatusByte` | `int` | Numeric wire status (e.g. `0x03`). `NoStatusByte` (`-1`) for `CategoryProtocolViolation`. | +| `AppliedPolicy` | `Policy` | `PolicyHalt` or `PolicyDropAndContinue` — what the send loop did. | +| `ServerMessage` | `string` | Human-readable server text. **≤ 1024 UTF-8 bytes**, English, may be empty. Safe to log; not a stable pattern-match key (switch on `Category` / `ServerStatusByte`). May echo table / column names — sanitise before forwarding to third-party error trackers. | +| `TableName` | `string` | Rejected table; empty for unknown or multi-table batches. | +| `FromFsn`,`ToFsn` | `int64` | Inclusive FSN span; join to `FlushAndGetSequence` to identify the rejected rows. | +| `MessageSequence` | `int64` | Server per-frame sequence — the correlation key for support tickets and server-log matching. `NoMessageSequence` (`-1`) for protocol violations. | +| `DetectedAt` | `time.Time` | Client-side receipt time, for ops timelines (not for correlation). | The per-category policy is configurable. Resolution precedence is the policy resolver, then the per-category policy, then the connect-string `on_*_error` @@ -780,7 +841,8 @@ for batch, err := range q.Batches() { if err != nil { var qe *qdb.QwpQueryError if errors.As(err, &qe) { - log.Printf("query failed: 0x%02X %s", qe.Status, qe.Message) + log.Printf("query %d failed: 0x%02X %s", + qe.RequestId, qe.Status, qe.Message) } break } @@ -788,17 +850,22 @@ for batch, err := range q.Batches() { } ``` -| Code | Name | Description | -| ------ | --------------- | ------------------------------------------------- | -| `0x03` | SCHEMA_MISMATCH | Bind parameter type incompatible with placeholder | -| `0x05` | PARSE_ERROR | SQL syntax error or malformed message | -| `0x06` | INTERNAL_ERROR | Server-side execution failure | -| `0x08` | SECURITY_ERROR | Authorization failure | -| `0x0A` | CANCELLED | Query terminated by `Cancel` | -| `0x0B` | LIMIT_EXCEEDED | Protocol limit hit | - -Errors can arrive before any data or mid-stream. Once an error is yielded, no -further batches arrive for that query. +| Code | Name | Description | +| ------ | --------------- | ---------------------------------------------------- | +| `0x03` | SCHEMA_MISMATCH | Bind parameter type incompatible with placeholder | +| `0x05` | PARSE_ERROR | SQL syntax error or malformed message | +| `0x06` | INTERNAL_ERROR | Server-side execution failure | +| `0x08` | SECURITY_ERROR | Authorization failure | +| `0x09` | WRITE_ERROR | Write failure (e.g. table not accepting writes; DML) | +| `0x0A` | CANCELLED | Query terminated by `Cancel` | +| `0x0B` | LIMIT_EXCEEDED | Protocol limit hit | + +`QwpQueryError` also carries `RequestId` (the client-assigned query id — the +correlation key for support tickets and server-log matching) and `Message` +(server-supplied UTF-8, English, may be empty; safe to log, but switch on +`Status`, not on message text). Errors can arrive before any data or +mid-stream. Once an error is yielded, no further batches arrive for that +query. ### Connection-level errors @@ -932,7 +999,26 @@ crashed sibling senders. The query client exposes `ServerInfo()` and There is no per-transition connection callback: connect, disconnect, reconnect, and failover are not delivered as events. Observe reconnect and failover through these counters, and terminal failures through the -[ingestion error handler](#ingestion-errors). +[ingestion error handler](#ingestion-errors). Poll the counters from a +background goroutine: + +```go +go func() { + t := time.NewTicker(10 * time.Second) + defer t.Stop() + for range t.C { + log.Printf("qwp: reconnects=%d/%d replayed=%d stalls=%d", + qs.TotalReconnectsSucceeded(), qs.TotalReconnectAttempts(), + qs.TotalFramesReplayed(), qs.TotalBackpressureStalls()) + if e := qs.LastTerminalError(); e != nil { + // Page on-call: the sender has stopped draining. + log.Printf("qwp TERMINAL: %s", e) + } + } +}() +``` + +where `qs` is the `qdb.QwpSender` from the type assertion shown earlier. For background and worked configurations, see [client failover concepts](/docs/high-availability/client-failover/concepts/), From 4633f57e4bb1e2648a7b6ed3cbea15fe5db0993c Mon Sep 17 00:00:00 2001 From: Marko Topolnik Date: Fri, 15 May 2026 17:43:37 +0200 Subject: [PATCH 35/44] Align go.md message_sequence wording with Rust MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The SenderError field table committed in d302cf3 described MessageSequence as "the correlation key for support tickets and server-log matching". The Rust QWP page (commit 7d0efa2) instead documents the same wire field as connection-scoped and reset on reconnect, with an explicit statement that the protocol surfaces no server-issued request or connection id. The two client pages told a reader porting between them contradictory stories; the cross-client follow-up posted on PR #444 flagged this. Verified against the Go client and the QWP spec that Rust's framing is the accurate one for Go too: SenderError.MessageSequence carries the raw per-connection wireSeq (qwp_sf_send_loop.go), fsnAtZero is rebound per connection, and only the client-set X-QWP-Client-Id header exists on the upgrade — there is no server-issued request id. Reworded the MessageSequence row to state it resets on reconnect and is meaningful only within one connection, and added a correlation note after the table (closest handle is the (MessageSequence, FromFsn, ToFsn) tuple plus connection start time) mirroring the Rust page so the two now tell the same story. Co-Authored-By: Claude Opus 4.7 (1M context) --- documentation/ingestion/clients/go.md | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/documentation/ingestion/clients/go.md b/documentation/ingestion/clients/go.md index 14954d761..7e63acdff 100644 --- a/documentation/ingestion/clients/go.md +++ b/documentation/ingestion/clients/go.md @@ -800,9 +800,18 @@ correlation: | `ServerMessage` | `string` | Human-readable server text. **≤ 1024 UTF-8 bytes**, English, may be empty. Safe to log; not a stable pattern-match key (switch on `Category` / `ServerStatusByte`). May echo table / column names — sanitise before forwarding to third-party error trackers. | | `TableName` | `string` | Rejected table; empty for unknown or multi-table batches. | | `FromFsn`,`ToFsn` | `int64` | Inclusive FSN span; join to `FlushAndGetSequence` to identify the rejected rows. | -| `MessageSequence` | `int64` | Server per-frame sequence — the correlation key for support tickets and server-log matching. `NoMessageSequence` (`-1`) for protocol violations. | +| `MessageSequence` | `int64` | Server's per-frame wire sequence for the rejection frame. **Resets on reconnect** — only meaningful within one connection; round-trips verbatim against that connection's server-side logs. Not a standalone correlation key (see below). `NoMessageSequence` (`-1`) for protocol violations. | | `DetectedAt` | `time.Time` | Client-side receipt time, for ops timelines (not for correlation). | +The protocol does not surface a server-issued request or connection +identifier. The closest correlation handle is the `(MessageSequence, +FromFsn, ToFsn)` tuple plus the connection start time from your +application logs — `MessageSequence` resets on reconnect, so it only +disambiguates frames within a single connection. The client sends an +`X-QWP-Client-Id` header (default `go/`) on the upgrade. When +filing a support ticket, include the connection start time and the +`(MessageSequence, FromFsn, ToFsn)` triple. + The per-category policy is configurable. Resolution precedence is the policy resolver, then the per-category policy, then the connect-string `on_*_error` keys, then the spec defaults. `CategoryProtocolViolation` and From 5ac06b044b8071cdca4072b0ccc2a5a1a8e54963 Mon Sep 17 00:00:00 2001 From: bluestreak Date: Fri, 15 May 2026 17:40:44 +0100 Subject: [PATCH 36/44] Connect-string reference: document OIDC pass-through, state mTLS unsupported Addresses two QWP-client review gaps on the connect-string reference: the Authentication section was silent on OIDC, and the existing mTLS pointer conflated server-cert trust with client identity. OIDC is now documented as a pass-through (static bearer in `token=`, app rotates), and mTLS is stated flatly as unsupported by the server in both the Authentication and TLS sections. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../client-configuration/connect-string.md | 35 ++++++++++++++++--- 1 file changed, 31 insertions(+), 4 deletions(-) diff --git a/documentation/client-configuration/connect-string.md b/documentation/client-configuration/connect-string.md index e30afe83a..abd36b45d 100644 --- a/documentation/client-configuration/connect-string.md +++ b/documentation/client-configuration/connect-string.md @@ -195,12 +195,36 @@ WebSocket upgrade request. - `username` — username for HTTP basic authentication. - `password` — password for HTTP basic authentication. - `token` — bearer token sent as `Authorization: Bearer `. Mutually - exclusive with `username` / `password`. + exclusive with `username` / `password`. For OIDC-issued tokens, see + [OIDC](#oidc). - `auth_timeout_ms` — per-host upper bound on the upgrade response read. Does not cover TCP connect, TLS handshake, or post-upgrade frame reads — those use OS or hard-coded defaults. Default: `15000` (15 s). -For mutual TLS, configure trusted roots in the [TLS](#tls) section. +**Mutual TLS (mTLS).** Not supported. The client validates the server's +certificate against a trust store but cannot present a client certificate; +the TLS handshake is server-authenticated only. `tls_roots` / +`tls_roots_password` configure server-cert trust, not client identity. Use +`token=` (bearer / OIDC) or `username=` / `password=` for +client authentication. + +### OIDC {#oidc} + +The client does not perform OIDC flows itself — there is no issuer +discovery, no client registration, and no token refresh. To authenticate +against a QuestDB Enterprise server configured with an OIDC provider, +obtain an access token out-of-band and pass it as `token=`; +the server validates the token against its configured OIDC provider and +resolves the principal and groups from the token claims. + +``` +wss::addr=questdb.example.com:443;token=; +``` + +The token is static for the lifetime of the connection. The application +is responsible for refreshing the token and creating a new client (or +reconnecting with an updated connect string) before expiry. `oidc_*` +connect-string keys are not supported. ## TLS {#tls} @@ -222,11 +246,14 @@ TLS is enabled by selecting the `wss` schema. `tls_roots` / `tls_roots_password` are a Java-keystore feature. Some clients (for example, Go) verify against the operating-system trust store only and **reject these keys at parse time**; to trust a private CA there, install it -in the host trust store. Mutual TLS (client certificates) is likewise not -supported by every client. Check the relevant +in the host trust store. Check the relevant [client library page](/docs/connect/overview/#client-libraries) for specifics. +Mutual TLS (client certificates) is not supported by QuestDB — the server +does not negotiate client certificates regardless of client. See +[Authentication](#auth) for the supported credential paths. + ::: See also the [server-side TLS configuration](/docs/security/tls/). From fb8ff3f8ac76593d4b36f825a6c1125d3a41cf0d Mon Sep 17 00:00:00 2001 From: javier Date: Fri, 15 May 2026 18:33:53 +0200 Subject: [PATCH 37/44] Extracts OIDC and DEDUP warnings to shared partials. Completes Java bind-parameter table, adds null-by-omission docs, error diagnostic fields (FSN, message sequence), sender_id uniqueness, sf_append_deadline, and data model consistency. Adds builder-to-connect-string equivalence note on all three pages. Moves mTLS non-support from Rust to the connect-string TLS section. Fixes misleading "mutual TLS" reference. --- .../client-configuration/connect-string.md | 12 +- documentation/ingestion/clients/go.md | 66 +++++----- documentation/ingestion/clients/java.md | 115 +++++++++++++----- documentation/ingestion/clients/rust.md | 42 +++---- .../partials/_oidc-client-note.partial.mdx | 17 +++ .../partials/_sf-dedup-warning.partial.mdx | 11 ++ 6 files changed, 162 insertions(+), 101 deletions(-) create mode 100644 documentation/partials/_oidc-client-note.partial.mdx create mode 100644 documentation/partials/_sf-dedup-warning.partial.mdx diff --git a/documentation/client-configuration/connect-string.md b/documentation/client-configuration/connect-string.md index abd36b45d..c3621caa0 100644 --- a/documentation/client-configuration/connect-string.md +++ b/documentation/client-configuration/connect-string.md @@ -210,12 +210,12 @@ client authentication. ### OIDC {#oidc} -The client does not perform OIDC flows itself — there is no issuer -discovery, no client registration, and no token refresh. To authenticate -against a QuestDB Enterprise server configured with an OIDC provider, -obtain an access token out-of-band and pass it as `token=`; -the server validates the token against its configured OIDC provider and -resolves the principal and groups from the token claims. +The client does not perform OIDC flows itself: no issuer discovery, no +client registration, and no token refresh. To authenticate against a +QuestDB Enterprise server configured with an OIDC provider, obtain an +access token out-of-band and pass it as `token=`; the server +validates the token against its configured OIDC provider and resolves the +principal and groups from the token claims. ``` wss::addr=questdb.example.com:443;token=; diff --git a/documentation/ingestion/clients/go.md b/documentation/ingestion/clients/go.md index 7e63acdff..7d6b2e68e 100644 --- a/documentation/ingestion/clients/go.md +++ b/documentation/ingestion/clients/go.md @@ -9,6 +9,10 @@ description: import { RemoteRepoExample } from "@theme/RemoteRepoExample" +import OidcClientNote from "../../partials/_oidc-client-note.partial.mdx" + +import SfDedupWarning from "../../partials/_sf-dedup-warning.partial.mdx" + The QuestDB Go client connects to QuestDB over the [QWP binary protocol](/docs/connect/wire-protocols/qwp-ingress-websocket/) (WebSocket). It supports high-throughput data ingestion and streaming SQL @@ -175,13 +179,11 @@ client, err := qdb.NewQwpQueryClient(ctx, qdb.WithQwpQueryBearerToken("your_bearer_token")) ``` -The client takes a **static** bearer token; it does not acquire or refresh -OIDC tokens. With [OpenID Connect](/docs/security/oidc/), the application -obtains the access token from the identity provider and is responsible for -rotating it before it expires. An expired or revoked token is not refreshed -in place: the next connect or reconnect fails with a `SECURITY_ERROR` (or a -`401`/`403` on the WebSocket upgrade — terminal across all endpoints). To -rotate, construct a new sender or client with the fresh token. + + +For Go, [`coreos/go-oidc`](https://github.com/coreos/go-oidc) or +[`golang.org/x/oauth2`](https://pkg.go.dev/golang.org/x/oauth2) can handle +the token acquisition. ### Production example (TLS + auth + multi-host) @@ -198,16 +200,14 @@ client, err := qdb.QwpQueryClientFromConf(ctx, "token=your_bearer_token;target=replica;") ``` -### TLS trust store and mTLS +### TLS trust store TLS is enabled by the `wss` schema (or `qdb.WithTls()`). The Go client verifies the server certificate against the **operating-system trust store**. It does **not** support a custom trust store: the `tls_roots` / `tls_roots_password` connect-string keys (a Java-keystore feature) are rejected by the Go connect-string parser. To trust a private CA, install it -in the host trust store. Mutual TLS (client certificates) is **not -supported** by this client — authenticate with a bearer token or basic auth -over `wss` instead. For test-only certificate-verification bypass, see +in the host trust store. For test-only certificate-verification bypass, see `tls_verify` in the [TLS section](/docs/connect/clients/connect-string#tls) of the connect string reference. @@ -242,8 +242,12 @@ sender, err := qdb.LineSenderFromEnv(ctx) ### Using the options API -The options API provides type-safe configuration. `NewLineSender` requires -exactly one transport option (`qdb.WithQwp()` here); +The options API exposes the same options as the connect string, with type-safe +Go signatures (e.g., `sf_append_deadline_millis` becomes +`qdb.WithSfAppendDeadline(30*time.Second)`). For the full list of keys, see +the [connect string reference](/docs/client-configuration/connect-string/). + +`NewLineSender` requires exactly one transport option (`qdb.WithQwp()` here); `LineSenderFromConf` infers the transport from the `ws`/`wss` schema instead. An error handler can only be set through the options API: @@ -484,35 +488,25 @@ replayed after reconnection, surviving sender process restarts: ws::addr=localhost:9000;sf_dir=/var/lib/questdb/sf;sender_id=ingest-1; ``` +When multiple senders share the same `sf_dir`, each must have a distinct +`sender_id`. Slots are exclusive: two senders with the same ID will collide. +Allowed characters: `A-Za-z0-9_-`. + Without `sf_dir`, unacknowledged data lives in process memory and is lost if the sender process dies. The reconnect loop still spans transient server outages, but the RAM buffer caps how much data can accumulate. -:::caution Replay is at-least-once — enable DEDUP + -After a reconnect or a sender restart, the client replays frames the server -may have accepted but not yet acknowledged. Without -[DEDUP](/docs/concepts/deduplication/) on the target table, replay produces -duplicate rows. Tables ingested over a reconnecting or multi-host connection -**must** declare `DEDUP UPSERT KEYS(...)` covering row identity. See -[Delivery semantics](/docs/concepts/delivery-semantics/) for the full -at-least-once / exactly-once model. +With store-and-forward enabled, `At`/`AtNow`/`Flush` can block when the +buffer hits its cap. The producer blocks until the wire path drains enough +capacity, then returns a deadline error (`sf_append_deadline_millis`) if it +does not drain in time. Treat a blocking call as a signal that the server is +unreachable or slow, not as a reason to retry in a tight loop. -::: - -:::caution Store-and-forward changes how `At` and errors behave - -- **`At`/`AtNow`/`Flush` can block.** When the on-disk buffer hits its cap, - the producer blocks until the wire path drains it, then returns a deadline - error (`sf_append_deadline_millis`) if it does not drain in time. Treat a - blocking `At` as a signal that the server is unreachable or slow, not as a - reason to retry in a tight loop. -- **Terminal rejections halt the sender.** A schema, parse, or security - rejection latches a terminal error. The next producer call returns it as a - typed `*SenderError`; the sender will not drain further. You must `Close` - and create a new sender to continue. - -::: +Terminal rejections (schema, parse, or security errors) latch a terminal +error. The next producer call returns it as a typed `*SenderError`; the +sender will not drain further. Close it and create a new sender to continue. For concepts, sizing, and recovery, see [store-and-forward](/docs/high-availability/store-and-forward/concepts/) and the diff --git a/documentation/ingestion/clients/java.md b/documentation/ingestion/clients/java.md index 7d90c27fc..540e799ec 100644 --- a/documentation/ingestion/clients/java.md +++ b/documentation/ingestion/clients/java.md @@ -9,6 +9,10 @@ import Tabs from "@theme/Tabs" import TabItem from "@theme/TabItem" +import OidcClientNote from "../../partials/_oidc-client-note.partial.mdx" + +import SfDedupWarning from "../../partials/_sf-dedup-warning.partial.mdx" + import CodeBlock from "@theme/CodeBlock" :::note @@ -101,12 +105,12 @@ import io.questdb.client.cutlass.qwp.client.QwpColumnBatch; try (QwpQueryClient client = QwpQueryClient.newPlainText("localhost", 9000)) { client.connect(); client.execute( - "SELECT ts, sym, price, qty FROM trades WHERE sym = 'ETH-USD' LIMIT 10", + "SELECT ts, sym, price, amount FROM trades WHERE sym = 'ETH-USD' LIMIT 10", new QwpColumnBatchHandler() { @Override public void onBatch(QwpColumnBatch batch) { batch.forEachRow(row -> System.out.printf( - "ts=%d sym=%s price=%.4f qty=%d%n", + "ts=%d sym=%s price=%.4f amount=%d%n", row.getLongValue(0), row.getSymbol(1), row.getDoubleValue(2), @@ -169,8 +173,12 @@ try (Sender sender = Sender.fromConfig( } ``` -For OIDC authentication (Enterprise), see -[OpenID Connect](/docs/security/oidc/). + + +For Java, libraries such as +[Nimbus OAuth 2.0 SDK](https://connect2id.com/products/nimbus-oauth-openid-connect-sdk) +or [Spring Security OAuth2](https://docs.spring.io/spring-security/reference/servlet/oauth2/index.html) +can handle the token acquisition. ## Creating the client @@ -214,7 +222,10 @@ try (Sender sender = Sender.fromEnv()) { ### Using the builder API -The builder provides type-safe configuration: +The builder exposes the same options as the connect string. Method names +follow camelCase convention (e.g., `sf_append_deadline_millis` becomes +`sfAppendDeadlineMillis()`). For the full list of keys, see the +[connect string reference](/docs/client-configuration/connect-string/). ```java try (Sender sender = Sender.builder(Sender.Transport.WEBSOCKET) @@ -240,6 +251,9 @@ try (QwpQueryClient client = QwpQueryClient.newPlainText("localhost", 9000)) { ### General usage pattern +`Sender` is not thread-safe. For multi-threaded workloads, create one instance +per thread or use an object pool. + 1. Create a `Sender` via `Sender.fromConfig()` or the builder. 2. Call `table(name)` to select a table. 3. Call column methods to add values: @@ -260,6 +274,11 @@ try (QwpQueryClient client = QwpQueryClient.newPlainText("localhost", 9000)) { supported for ingestion on either the client or the server. All types are readable on the [egress side](#reading-result-batches). + To store a null for a column, omit that column's setter before calling + `at()` or `atNow()`. The column set for the batch is the union of all + columns seen across rows; a column first used on a later row is backfilled + with null for earlier rows. + 5. Call `at(Instant)`, `at(long, ChronoUnit)`, or `atNow()` to finalize the row. 6. Repeat from step 2, or call `flush()` to send buffered data. 7. Call `close()` when done (or use try-with-resources). @@ -270,7 +289,7 @@ try (Sender sender = Sender.fromConfig("ws::addr=localhost:9000;")) { .symbol("symbol", "EURUSD") .symbol("side", "buy") .doubleColumn("price", 1.0842) - .longColumn("quantity", 100_000) + .longColumn("amount", 100_000) .at(Instant.now()); } ``` @@ -394,7 +413,7 @@ try (Sender sender = Sender.fromConfig("ws::addr=localhost:9000;")) { sender.table("trades") .symbol("symbol", trade.symbol()) .doubleColumn("price", trade.price()) - .longColumn("quantity", trade.quantity()) + .longColumn("amount", trade.amount()) .at(trade.timestamp()); } sender.flush(); // send everything now, regardless of auto-flush thresholds @@ -407,8 +426,9 @@ WebSocket transport. Use the auto-flush row count and interval settings to control batch size instead. ::: -The client also flushes when closed. However, if the flush fails at close -time, the client does not retry. Always flush explicitly before closing. +The client also flushes when closed, waiting up to `close_flush_timeout_millis` +(default 5000) for acknowledgements. If the flush fails at close time, the +client does not retry. Always flush explicitly before closing. ### Store-and-forward @@ -419,11 +439,25 @@ replayed after reconnection, surviving sender process restarts. ws::addr=localhost:9000;sf_dir=/var/lib/questdb/sf;sender_id=ingest-1; ``` +When multiple senders share the same `sf_dir`, each must have a distinct +`sender_id`. Slots are exclusive: two senders with the same ID will collide. +Allowed characters: `A-Za-z0-9_-`. + Without `sf_dir`, unacknowledged data lives in process memory and is lost if the sender process dies. The reconnect loop still spans transient server outages (rolling upgrades), but the RAM buffer caps how much data can accumulate. + + +With store-and-forward enabled, `flush()` can block when the buffer hits its +cap. The producer blocks until the wire path drains enough capacity, up to +`sf_append_deadline_millis` (default 30 seconds). If the deadline elapses, the +call fails without dropping data. Terminal rejections (schema, parse, or +security errors) latch a terminal error on the sender. The next API call +throws `LineSenderServerException`; close the sender and create a new one to +continue. + ### Durable acknowledgement :::note Enterprise @@ -559,7 +593,7 @@ are executed through the same `execute()` method. The server responds with ```java client.execute( "CREATE TABLE trades (" - + "ts TIMESTAMP, sym SYMBOL, price DOUBLE, qty LONG" + + "ts TIMESTAMP, sym SYMBOL, price DOUBLE, amount LONG" + ") TIMESTAMP(ts) PARTITION BY DAY WAL", new QwpColumnBatchHandler() { @Override @@ -590,7 +624,7 @@ Parameterized queries use typed bind values, avoiding SQL injection and enabling server-side factory cache reuse across repeated calls: ```java -String sql = "SELECT ts, sym, price, qty FROM trades " +String sql = "SELECT ts, sym, price, amount FROM trades " + "WHERE sym = $1 AND price >= $2 LIMIT 1000"; for (String symbol : List.of("EURUSD", "GBPUSD", "USDJPY")) { @@ -604,25 +638,39 @@ for (String symbol : List.of("EURUSD", "GBPUSD", "USDJPY")) { } ``` -Bind indices are 0-based (`$1` maps to index 0). Available setters include -`setBoolean`, `setByte`, `setShort`, `setInt`, `setLong`, `setFloat`, -`setDouble`, `setString`, `setVarchar`, `setTimestampMicros`, `setDate`, -`setUuid`, `setDecimal64/128/256`, `setSymbol`, `setNull`, and more. - -To pass a NULL bind value: +Bind indices are 0-based (`$1` maps to index 0). Available setters: + +| Setter | Bind type | +|--------|-----------| +| `setBoolean(index, value)` | BOOLEAN | +| `setByte(index, value)` | BYTE | +| `setChar(index, value)` | CHAR | +| `setShort(index, value)` | SHORT | +| `setInt(index, value)` | INT | +| `setLong(index, value)` | LONG | +| `setFloat(index, value)` | FLOAT | +| `setDouble(index, value)` | DOUBLE | +| `setDate(index, millis)` | DATE | +| `setTimestampMicros(index, micros)` | TIMESTAMP | +| `setTimestampNanos(index, nanos)` | `timestamp_ns` | +| `setVarchar(index, value)` | VARCHAR, STRING, and SYMBOL columns | +| `setUuid(index, lo, hi)` or `setUuid(index, UUID)` | UUID | +| `setLong256(index, l0, l1, l2, l3)` | LONG256 | +| `setGeohash(index, precisionBits, value)` | GEOHASH | +| `setDecimal64(index, scale, unscaled)` | DECIMAL64 | +| `setDecimal128(index, scale, lo, hi)` | DECIMAL128 | +| `setDecimal256(index, scale, ll, lh, hl, hh)` | DECIMAL256 | + +To pass a NULL bind value, either pass `null` to `setVarchar` or use the +typed `setNull`: ```java -binds -> binds.setNull(0) +binds -> binds.setVarchar(0, null) // null VARCHAR/SYMBOL +binds -> binds.setNull(0, TYPE_LONG) // typed null (requires QWP type code) +binds -> binds.setNullGeohash(0, 20) // null GEOHASH with precision +binds -> binds.setNullDecimal64(0, 4) // null DECIMAL64 with scale ``` -:::note Server leniency - -The current server accepts a SYMBOL wire type for bind parameters and treats -it as VARCHAR. Compliant clients should send VARCHAR. A future revision may -reject SYMBOL bind type codes. - -::: - ### Flow control For large result sets, byte-credit flow control prevents the server from @@ -672,12 +720,15 @@ try (Sender sender = Sender.builder(Sender.Transport.WEBSOCKET) Each `SenderError` carries: -- **Category**: `SCHEMA_MISMATCH`, `PARSE_ERROR`, `INTERNAL_ERROR`, - `SECURITY_ERROR`, `WRITE_ERROR`, `PROTOCOL_VIOLATION`, or `UNKNOWN`. -- **Policy**: `DROP_AND_CONTINUE` (batch dropped, sender continues) or `HALT` - (sender halted, next API call throws `LineSenderServerException`). -- **Server message**: human-readable error text. -- **Table name**: the rejected table (null for multi-table batches). +| Field | Accessor | Description | +|-------|----------|-------------| +| Category | `getCategory()` | `SCHEMA_MISMATCH`, `PARSE_ERROR`, `INTERNAL_ERROR`, `SECURITY_ERROR`, `WRITE_ERROR`, `PROTOCOL_VIOLATION`, or `UNKNOWN` | +| Policy | `getAppliedPolicy()` | `DROP_AND_CONTINUE` (batch dropped, sender continues) or `HALT` (next API call throws `LineSenderServerException`) | +| Server message | `getServerMessage()` | Human-readable error text from the server (may be null) | +| Table name | `getTableName()` | The rejected table (null for multi-table batches) | +| FSN range | `getFromFsn()` / `getToFsn()` | Frame sequence number span identifying the rejected batch | +| Message sequence | `getMessageSequence()` | Server's per-frame sequence number (`-1` if not available) | +| Status byte | `getServerStatusByte()` | Raw QWP status code (`-1` if not available) | The error handler runs on a dedicated dispatcher thread, never on the I/O or producer thread. diff --git a/documentation/ingestion/clients/rust.md b/documentation/ingestion/clients/rust.md index b7eaf7b92..fc17db399 100644 --- a/documentation/ingestion/clients/rust.md +++ b/documentation/ingestion/clients/rust.md @@ -5,6 +5,10 @@ sidebar_label: Rust description: "QuestDB Rust client for high-throughput data ingestion over the QWP binary protocol (WebSocket)." --- +import OidcClientNote from "../../partials/_oidc-client-note.partial.mdx" + +import SfDedupWarning from "../../partials/_sf-dedup-warning.partial.mdx" + The QuestDB Rust client connects to QuestDB over the [QWP binary protocol](/docs/connect/wire-protocols/qwp-ingress-websocket/) (WebSocket). It supports high-throughput, column-oriented batched writes with automatic table @@ -89,23 +93,11 @@ let mut sender = Sender::from_conf( )?; ``` -:::note OIDC - -The Rust client does not implement any OIDC flow (client-credentials, -authorization-code, or refresh-token). For OIDC-secured deployments: + -1. Acquire a bearer token in your application using an OIDC library such - as [`openidconnect`](https://crates.io/crates/openidconnect) or - [`oauth2`](https://crates.io/crates/oauth2). -2. Pass it to the client via `token=...`. -3. The client does not refresh the token. When it expires mid-session - the client surfaces a terminal auth error; rebuild the `Sender` with - a fresh token. - -The server-side OIDC flow is documented at -[OpenID Connect](/docs/security/oidc/). - -::: +For Rust, [`openidconnect`](https://crates.io/crates/openidconnect) or +[`oauth2`](https://crates.io/crates/oauth2) can handle the token +acquisition. ### TLS @@ -126,15 +118,6 @@ Supported values: | `tls_roots=/path/to/root-ca.pem` | Load roots from a PEM file. Useful for self-signed certs during testing. | | `tls_verify=unsafe_off` | Disable verification. Never use in production. | -:::note mTLS (client certificates) not supported - -The Rust client does not currently implement mTLS / client-certificate -auth. The TLS surface is one-way: the client verifies the server, not -the other way round. For credential auth, use HTTP basic (`username` + -`password`) or bearer token (`token`). - -::: - ### Authentication timeout `auth_timeout_ms` (default 15000) controls how long the client waits for the @@ -171,7 +154,10 @@ let mut sender = Sender::from_env()?; ### Using the builder API -The builder lets you configure programmatically: +The builder exposes the same options as the connect string, with Rust-typed +signatures (e.g., `sf_append_deadline_millis` becomes +`sf_append_deadline(Duration::from_secs(30))`). For the full list of keys, see +the [connect string reference](/docs/client-configuration/connect-string/). ```rust use questdb::ingress::{Protocol, SenderBuilder, QwpWsProgress}; @@ -279,7 +265,7 @@ fn main() -> Result<()> { let mut buffer = sender.new_buffer(); buffer .table("fx_order_book")? - .symbol("symbol", "EUR/USD")? + .symbol("symbol", "EURUSD")? .column_arr("bids", &vec![ vec![1.0850, 600000.0], vec![1.0849, 300000.0], @@ -460,6 +446,8 @@ Without `sf_dir`, unacknowledged data lives in process memory and is lost if the sender process exits. The reconnect loop still spans transient server outages, but a RAM cap bounds how much data can accumulate. + + ### SF tuning keys | Key | Default | Description | diff --git a/documentation/partials/_oidc-client-note.partial.mdx b/documentation/partials/_oidc-client-note.partial.mdx new file mode 100644 index 000000000..97716f27a --- /dev/null +++ b/documentation/partials/_oidc-client-note.partial.mdx @@ -0,0 +1,17 @@ +:::note OIDC (Enterprise) + +QuestDB client libraries do not implement OIDC flows (client-credentials, +authorization-code, or refresh-token). For OIDC-secured deployments: + +1. Acquire a bearer token in your application using an OIDC library for + your language. +2. Pass it to the client via `token=...` in the connect string. +3. The client does not refresh the token. When it expires mid-session, + the next connect or reconnect fails with `SECURITY_ERROR` (or a + `401`/`403` on the WebSocket upgrade, terminal across all endpoints). + Close the client and create a new one with a fresh token. + +The server-side OIDC flow is documented at +[OpenID Connect](/docs/security/oidc/). + +::: diff --git a/documentation/partials/_sf-dedup-warning.partial.mdx b/documentation/partials/_sf-dedup-warning.partial.mdx new file mode 100644 index 000000000..d01c79f1f --- /dev/null +++ b/documentation/partials/_sf-dedup-warning.partial.mdx @@ -0,0 +1,11 @@ +:::caution Replay is at-least-once — enable DEDUP + +After a reconnect or a sender restart, the client replays frames the server +may have accepted but not yet acknowledged. Without +[DEDUP](/docs/concepts/deduplication/) on the target table, replay produces +duplicate rows. Tables ingested over a reconnecting or multi-host connection +**must** declare `DEDUP UPSERT KEYS(...)` covering row identity. See +[Delivery semantics](/docs/concepts/delivery-semantics/) for the full +at-least-once / exactly-once model. + +::: From 9f65d91ae50e5900e4e4a6d4b51f82f871552d98 Mon Sep 17 00:00:00 2001 From: javier Date: Fri, 15 May 2026 19:10:32 +0200 Subject: [PATCH 38/44] Remove OIDC from client pages; recommend token auth for Enterprise OIDC is a server-side concern, not a client API. Removes OIDC partial, inline OIDC guidance, and library hints from Java, Go, and Rust pages. Removes OIDC subsection from connect-string reference. Recommends token auth over basic auth for Enterprise on all pages. Fixes Go production example to use token instead of username/password and explains the target key. Fixes broken connect-string links (slug vs filesystem path). --- .../client-configuration/connect-string.md | 26 +++--------------- documentation/ingestion/clients/go.md | 27 +++++++++---------- documentation/ingestion/clients/java.md | 16 ++++------- documentation/ingestion/clients/rust.md | 15 ++++------- .../partials/_oidc-client-note.partial.mdx | 17 ------------ 5 files changed, 27 insertions(+), 74 deletions(-) delete mode 100644 documentation/partials/_oidc-client-note.partial.mdx diff --git a/documentation/client-configuration/connect-string.md b/documentation/client-configuration/connect-string.md index c3621caa0..4539b07a1 100644 --- a/documentation/client-configuration/connect-string.md +++ b/documentation/client-configuration/connect-string.md @@ -195,8 +195,9 @@ WebSocket upgrade request. - `username` — username for HTTP basic authentication. - `password` — password for HTTP basic authentication. - `token` — bearer token sent as `Authorization: Bearer `. Mutually - exclusive with `username` / `password`. For OIDC-issued tokens, see - [OIDC](#oidc). + exclusive with `username` / `password`. Token auth avoids the per-request + overhead of basic auth and is the recommended path for Enterprise + deployments. - `auth_timeout_ms` — per-host upper bound on the upgrade response read. Does not cover TCP connect, TLS handshake, or post-upgrade frame reads — those use OS or hard-coded defaults. Default: `15000` (15 s). @@ -205,26 +206,7 @@ WebSocket upgrade request. certificate against a trust store but cannot present a client certificate; the TLS handshake is server-authenticated only. `tls_roots` / `tls_roots_password` configure server-cert trust, not client identity. Use -`token=` (bearer / OIDC) or `username=` / `password=` for -client authentication. - -### OIDC {#oidc} - -The client does not perform OIDC flows itself: no issuer discovery, no -client registration, and no token refresh. To authenticate against a -QuestDB Enterprise server configured with an OIDC provider, obtain an -access token out-of-band and pass it as `token=`; the server -validates the token against its configured OIDC provider and resolves the -principal and groups from the token claims. - -``` -wss::addr=questdb.example.com:443;token=; -``` - -The token is static for the lifetime of the connection. The application -is responsible for refreshing the token and creating a new client (or -reconnecting with an updated connect string) before expiry. `oidc_*` -connect-string keys are not supported. +`token=` or `username=` / `password=` for client authentication. ## TLS {#tls} diff --git a/documentation/ingestion/clients/go.md b/documentation/ingestion/clients/go.md index 7d6b2e68e..7031356ff 100644 --- a/documentation/ingestion/clients/go.md +++ b/documentation/ingestion/clients/go.md @@ -9,8 +9,6 @@ description: import { RemoteRepoExample } from "@theme/RemoteRepoExample" -import OidcClientNote from "../../partials/_oidc-client-note.partial.mdx" - import SfDedupWarning from "../../partials/_sf-dedup-warning.partial.mdx" The QuestDB Go client connects to QuestDB over the @@ -167,7 +165,10 @@ sender, err := qdb.NewLineSender(ctx, qdb.WithBasicAuth("admin", "quest")) ``` -### Token auth (Enterprise) +### Token auth (Enterprise, recommended) + +Token authentication avoids the per-request overhead of basic auth and is +the recommended path for Enterprise deployments. ```go sender, err := qdb.LineSenderFromConf(ctx, @@ -179,22 +180,20 @@ client, err := qdb.NewQwpQueryClient(ctx, qdb.WithQwpQueryBearerToken("your_bearer_token")) ``` - - -For Go, [`coreos/go-oidc`](https://github.com/coreos/go-oidc) or -[`golang.org/x/oauth2`](https://pkg.go.dev/golang.org/x/oauth2) can handle -the token acquisition. - -### Production example (TLS + auth + multi-host) +### Production example (TLS + token + multi-host) -The realistic Enterprise shape combines `wss`, credentials, and a multi-host -`addr` list in a single connect string: +A realistic Enterprise deployment combines `wss`, token auth, and a +multi-host `addr` list. The `target` key controls which server roles the +client will connect to: `primary` for the authoritative write node, +`replica` for read-only replicas, or `any` (default) for either. ```go +// Ingestion: connect to any writeable node sender, err := qdb.LineSenderFromConf(ctx, "wss::addr=db-1.example.com:9000,db-2.example.com:9000;"+ - "username=ingest;password=secret;") + "token=your_bearer_token;") +// Querying: prefer a replica to offload the primary client, err := qdb.QwpQueryClientFromConf(ctx, "wss::addr=db-1.example.com:9000,db-2.example.com:9000;"+ "token=your_bearer_token;target=replica;") @@ -245,7 +244,7 @@ sender, err := qdb.LineSenderFromEnv(ctx) The options API exposes the same options as the connect string, with type-safe Go signatures (e.g., `sf_append_deadline_millis` becomes `qdb.WithSfAppendDeadline(30*time.Second)`). For the full list of keys, see -the [connect string reference](/docs/client-configuration/connect-string/). +the [connect string reference](/docs/connect/clients/connect-string/). `NewLineSender` requires exactly one transport option (`qdb.WithQwp()` here); `LineSenderFromConf` infers the transport from the `ws`/`wss` schema instead. diff --git a/documentation/ingestion/clients/java.md b/documentation/ingestion/clients/java.md index 540e799ec..9cf938e15 100644 --- a/documentation/ingestion/clients/java.md +++ b/documentation/ingestion/clients/java.md @@ -9,8 +9,6 @@ import Tabs from "@theme/Tabs" import TabItem from "@theme/TabItem" -import OidcClientNote from "../../partials/_oidc-client-note.partial.mdx" - import SfDedupWarning from "../../partials/_sf-dedup-warning.partial.mdx" import CodeBlock from "@theme/CodeBlock" @@ -155,7 +153,10 @@ try (QwpQueryClient client = QwpQueryClient.fromConfig( } ``` -### Token auth (Enterprise) +### Token auth (Enterprise, recommended) + +Token authentication avoids the per-request overhead of basic auth and is +the recommended path for Enterprise deployments. ```java try (Sender sender = Sender.fromConfig( @@ -173,13 +174,6 @@ try (Sender sender = Sender.fromConfig( } ``` - - -For Java, libraries such as -[Nimbus OAuth 2.0 SDK](https://connect2id.com/products/nimbus-oauth-openid-connect-sdk) -or [Spring Security OAuth2](https://docs.spring.io/spring-security/reference/servlet/oauth2/index.html) -can handle the token acquisition. - ## Creating the client ### From a connect string @@ -225,7 +219,7 @@ try (Sender sender = Sender.fromEnv()) { The builder exposes the same options as the connect string. Method names follow camelCase convention (e.g., `sf_append_deadline_millis` becomes `sfAppendDeadlineMillis()`). For the full list of keys, see the -[connect string reference](/docs/client-configuration/connect-string/). +[connect string reference](/docs/connect/clients/connect-string/). ```java try (Sender sender = Sender.builder(Sender.Transport.WEBSOCKET) diff --git a/documentation/ingestion/clients/rust.md b/documentation/ingestion/clients/rust.md index fc17db399..444a58eec 100644 --- a/documentation/ingestion/clients/rust.md +++ b/documentation/ingestion/clients/rust.md @@ -5,8 +5,6 @@ sidebar_label: Rust description: "QuestDB Rust client for high-throughput data ingestion over the QWP binary protocol (WebSocket)." --- -import OidcClientNote from "../../partials/_oidc-client-note.partial.mdx" - import SfDedupWarning from "../../partials/_sf-dedup-warning.partial.mdx" The QuestDB Rust client connects to QuestDB over the @@ -85,7 +83,10 @@ let mut sender = Sender::from_conf( )?; ``` -### Token auth (Enterprise) +### Token auth (Enterprise, recommended) + +Token authentication avoids the per-request overhead of basic auth and is +the recommended path for Enterprise deployments. ```rust let mut sender = Sender::from_conf( @@ -93,12 +94,6 @@ let mut sender = Sender::from_conf( )?; ``` - - -For Rust, [`openidconnect`](https://crates.io/crates/openidconnect) or -[`oauth2`](https://crates.io/crates/oauth2) can handle the token -acquisition. - ### TLS Use the `wss` schema for TLS. Select where root certificates come from with @@ -157,7 +152,7 @@ let mut sender = Sender::from_env()?; The builder exposes the same options as the connect string, with Rust-typed signatures (e.g., `sf_append_deadline_millis` becomes `sf_append_deadline(Duration::from_secs(30))`). For the full list of keys, see -the [connect string reference](/docs/client-configuration/connect-string/). +the [connect string reference](/docs/connect/clients/connect-string/). ```rust use questdb::ingress::{Protocol, SenderBuilder, QwpWsProgress}; diff --git a/documentation/partials/_oidc-client-note.partial.mdx b/documentation/partials/_oidc-client-note.partial.mdx deleted file mode 100644 index 97716f27a..000000000 --- a/documentation/partials/_oidc-client-note.partial.mdx +++ /dev/null @@ -1,17 +0,0 @@ -:::note OIDC (Enterprise) - -QuestDB client libraries do not implement OIDC flows (client-credentials, -authorization-code, or refresh-token). For OIDC-secured deployments: - -1. Acquire a bearer token in your application using an OIDC library for - your language. -2. Pass it to the client via `token=...` in the connect string. -3. The client does not refresh the token. When it expires mid-session, - the next connect or reconnect fails with `SECURITY_ERROR` (or a - `401`/`403` on the WebSocket upgrade, terminal across all endpoints). - Close the client and create a new one with a fresh token. - -The server-side OIDC flow is documented at -[OpenID Connect](/docs/security/oidc/). - -::: From 3c9f251ee49980187e095523828a76a28907d42c Mon Sep 17 00:00:00 2001 From: javier Date: Fri, 15 May 2026 20:04:43 +0200 Subject: [PATCH 39/44] docs(clients): document HA edge cases from agent testing Single-endpoint failover does not retry (rotates, not retries). Query client enters terminal state after failover exhaustion (must recreate). Ingress/egress failover asymmetry documented. onFailoverReset is mid- stream only. target=replica needs N+1 replicas. Go production example uses three endpoints. All findings verified against client source code. --- documentation/ingestion/clients/go.md | 27 ++++++++++++++++-- documentation/ingestion/clients/java.md | 38 +++++++++++++++++++++++-- documentation/ingestion/clients/rust.md | 8 ++++++ 3 files changed, 69 insertions(+), 4 deletions(-) diff --git a/documentation/ingestion/clients/go.md b/documentation/ingestion/clients/go.md index 7031356ff..a6092a8de 100644 --- a/documentation/ingestion/clients/go.md +++ b/documentation/ingestion/clients/go.md @@ -195,10 +195,17 @@ sender, err := qdb.LineSenderFromConf(ctx, // Querying: prefer a replica to offload the primary client, err := qdb.QwpQueryClientFromConf(ctx, - "wss::addr=db-1.example.com:9000,db-2.example.com:9000;"+ + "wss::addr=db-1.example.com:9000,db-2.example.com:9000,db-3.example.com:9000;"+ "token=your_bearer_token;target=replica;") ``` +With `target=replica`, the client skips any endpoint whose role is PRIMARY. +If a replica gets promoted to primary (e.g., after the old primary dies), +the client has one fewer eligible endpoint. In a two-node cluster with +`target=replica`, a single promotion leaves zero eligible endpoints and the +client cannot query. Use `target=any` for two-node setups, or provide at +least three endpoints when using `target=replica`. + ### TLS trust store TLS is enabled by the `wss` schema (or `qdb.WithTls()`). The Go client @@ -948,7 +955,16 @@ the producer; you do not change the ingestion loop for it. ### Query failover The query client drives a per-query reconnect loop. On a mid-stream transport -error it reconnects and replays the query. +error it reconnects to another endpoint and replays the query. + +:::warning Failover requires multiple endpoints + +Failover rotates across endpoints. With a single `addr`, there is no other +host to try, and the loop exhausts after one attempt regardless of +`failover_max_attempts`. For failover to be useful, provide at least two +addresses. + +::: | Key | Default | Description | | ----------------------------- | ------- | --------------------------------- | @@ -989,6 +1005,13 @@ returning on any error treats a reset as terminal, which the client supports explicitly. When the failover budget is consumed, `Batches()` (and `Exec`) return `*QwpFailoverExhaustedError`. +After failover exhaustion, the query client enters a terminal state. +Subsequent `Query` or `Exec` calls fail until you `Close` the client and +create a new one. This differs from ingestion, where the `LineSender` has a +continuous reconnect loop (`reconnect_max_duration_millis`, default 5 +minutes) that spans full outages transparently. The query client reconnects +only within the scope of a single query. + ### Observability `QwpSender` exposes counters for dashboards: `TotalReconnectAttempts`, diff --git a/documentation/ingestion/clients/java.md b/documentation/ingestion/clients/java.md index 9cf938e15..a8d5375b1 100644 --- a/documentation/ingestion/clients/java.md +++ b/documentation/ingestion/clients/java.md @@ -809,8 +809,8 @@ if the sender process dies. ### Query failover The query client drives a per-query reconnect loop. When a transport error -occurs mid-stream, the client reconnects and replays the query. `batch_seq` -restarts at 0 on the new connection. +occurs mid-stream, the client reconnects to another endpoint and replays the +query. `batch_seq` restarts at 0 on the new connection. Key connect-string options: @@ -822,6 +822,15 @@ Key connect-string options: | `failover_backoff_max_ms` | `1000` | Cap on per-attempt sleep. | | `failover_max_duration_ms` | `30000` | Total wall-clock budget per query. | +:::warning Failover requires multiple endpoints + +Failover rotates across endpoints. With a single `addr`, there is no other +host to try, and the loop exhausts after one attempt regardless of +`failover_max_attempts`. For failover to be useful, provide at least two +addresses. + +::: + **Handling partial results**: when failover occurs mid-stream, the `onFailoverReset` callback fires before replayed batches arrive. Use it to clear any accumulated state: @@ -837,6 +846,31 @@ public void onFailoverReset(QwpServerInfo newNode) { If you do not clear state, you will see overlapping data (the server replays the full result set). +`onFailoverReset` is a mid-stream event only. It does not fire during +`connect()` or between queries. If the connection drops between queries, +the next `execute()` call handles the reconnect internally. + +**Terminal failure**: when all endpoints are unreachable and the failover +budget is exhausted, the error is delivered via `onError` and the +`QwpQueryClient` enters a terminal state. Subsequent `execute()` calls +throw `IllegalStateException`. Close the client and create a new one: + +```java +try { + client.execute(sql, handler); +} catch (IllegalStateException e) { + client.close(); + client = QwpQueryClient.fromConfig("ws::addr=..."); + client.connect(); + client.execute(sql, handler); +} +``` + +This differs from ingestion, where the `Sender` has a continuous reconnect +loop (`reconnect_max_duration_millis`, default 5 minutes) that spans full +outages transparently. The query client does not have an equivalent; it +reconnects only within the scope of a single `execute()` call. + ### Connection events For ingestion, register a `SenderConnectionListener` to observe connection diff --git a/documentation/ingestion/clients/rust.md b/documentation/ingestion/clients/rust.md index 444a58eec..18bc1dd6e 100644 --- a/documentation/ingestion/clients/rust.md +++ b/documentation/ingestion/clients/rust.md @@ -633,6 +633,14 @@ ws::addr=db-primary:9000,db-replica-1:9000,db-replica-2:9000; The client picks an endpoint, connects, and walks the list to find the next healthy peer when the current connection breaks. +:::warning Failover requires multiple endpoints + +Failover rotates across endpoints. With a single `addr`, there is no other +host to try, and the reconnect loop exhausts after one attempt. For failover +to be useful, provide at least two addresses. + +::: + ### Reconnect knobs | Key | Default | Description | From c05eaecf2a941a2fcf5ed1aa01917eaba2c78f29 Mon Sep 17 00:00:00 2001 From: javier Date: Fri, 15 May 2026 20:34:54 +0200 Subject: [PATCH 40/44] docs(java): add full enterprise example, revert unverified Go/Rust HA claims Adds end-to-end example combining ingestion (builder + TLS + token + multi-host + connection events + DoubleArray) and querying (connect string + recreate-on-failure pattern + onFailoverReset with correct QwpServerInfo accessors). Enterprise features marked with inline comments. Adds builder enterprise example in Creating the Client. Reverts unverified single-endpoint and terminal-state additions from Go and Rust pages (not verified against those client codebases). --- documentation/ingestion/clients/go.md | 27 +--- documentation/ingestion/clients/java.md | 181 ++++++++++++++++++++++++ documentation/ingestion/clients/rust.md | 8 -- 3 files changed, 183 insertions(+), 33 deletions(-) diff --git a/documentation/ingestion/clients/go.md b/documentation/ingestion/clients/go.md index a6092a8de..7031356ff 100644 --- a/documentation/ingestion/clients/go.md +++ b/documentation/ingestion/clients/go.md @@ -195,17 +195,10 @@ sender, err := qdb.LineSenderFromConf(ctx, // Querying: prefer a replica to offload the primary client, err := qdb.QwpQueryClientFromConf(ctx, - "wss::addr=db-1.example.com:9000,db-2.example.com:9000,db-3.example.com:9000;"+ + "wss::addr=db-1.example.com:9000,db-2.example.com:9000;"+ "token=your_bearer_token;target=replica;") ``` -With `target=replica`, the client skips any endpoint whose role is PRIMARY. -If a replica gets promoted to primary (e.g., after the old primary dies), -the client has one fewer eligible endpoint. In a two-node cluster with -`target=replica`, a single promotion leaves zero eligible endpoints and the -client cannot query. Use `target=any` for two-node setups, or provide at -least three endpoints when using `target=replica`. - ### TLS trust store TLS is enabled by the `wss` schema (or `qdb.WithTls()`). The Go client @@ -955,16 +948,7 @@ the producer; you do not change the ingestion loop for it. ### Query failover The query client drives a per-query reconnect loop. On a mid-stream transport -error it reconnects to another endpoint and replays the query. - -:::warning Failover requires multiple endpoints - -Failover rotates across endpoints. With a single `addr`, there is no other -host to try, and the loop exhausts after one attempt regardless of -`failover_max_attempts`. For failover to be useful, provide at least two -addresses. - -::: +error it reconnects and replays the query. | Key | Default | Description | | ----------------------------- | ------- | --------------------------------- | @@ -1005,13 +989,6 @@ returning on any error treats a reset as terminal, which the client supports explicitly. When the failover budget is consumed, `Batches()` (and `Exec`) return `*QwpFailoverExhaustedError`. -After failover exhaustion, the query client enters a terminal state. -Subsequent `Query` or `Exec` calls fail until you `Close` the client and -create a new one. This differs from ingestion, where the `LineSender` has a -continuous reconnect loop (`reconnect_max_duration_millis`, default 5 -minutes) that spans full outages transparently. The query client reconnects -only within the scope of a single query. - ### Observability `QwpSender` exposes counters for dashboards: `TotalReconnectAttempts`, diff --git a/documentation/ingestion/clients/java.md b/documentation/ingestion/clients/java.md index a8d5375b1..5fdb82c38 100644 --- a/documentation/ingestion/clients/java.md +++ b/documentation/ingestion/clients/java.md @@ -231,6 +231,39 @@ try (Sender sender = Sender.builder(Sender.Transport.WEBSOCKET) } ``` +**Enterprise builder with TLS, token auth, and listeners:** + +```java +try (Sender sender = Sender.builder(Sender.Transport.WEBSOCKET) + .address("db-primary:9000") + .address("db-replica:9000") + .enableTls() + .advancedTls().disableCertificateValidation() // test only + .httpToken("your_bearer_token") // works for WebSocket too + .reconnectMaxDurationMillis(300_000) + .reconnectInitialBackoffMillis(100) + .reconnectMaxBackoffMillis(5_000) + .errorHandler(error -> { + System.err.printf("batch rejected: category=%s table=%s msg=%s%n", + error.getCategory(), error.getTableName(), + error.getServerMessage()); + }) + .connectionListener(event -> { + System.out.printf("connection: %s host=%s:%d%n", + event.getKind(), event.getHost(), event.getPort()); + }) + .build()) { + // ... +} +``` + +:::note +The token method is named `httpToken()` for historical reasons but works +on all transports including WebSocket. For production TLS, use +`advancedTls().customTrustStore(path, password)` instead of +`disableCertificateValidation()`. +::: + For `QwpQueryClient`, use the factory methods or configure post-construction: ```java @@ -971,3 +1004,151 @@ unchanged. The main differences: To migrate, change your connect string from `http::` to `ws::` (or `https::` to `wss::`), register a `SenderErrorHandler` for async error handling, and adjust auto-flush settings if needed. + +## Full example: ingestion and querying with failover + +This example combines ingestion with 2D arrays and connection events, then +queries the data back with the recreate-on-failure pattern for egress. It +uses the builder API with enterprise TLS and token auth. + +```java +import io.questdb.client.Sender; +import io.questdb.client.cutlass.line.array.DoubleArray; +import io.questdb.client.cutlass.qwp.client.QwpColumnBatch; +import io.questdb.client.cutlass.qwp.client.QwpColumnBatchHandler; +import io.questdb.client.cutlass.qwp.client.QwpQueryClient; +import io.questdb.client.cutlass.qwp.client.QwpServerInfo; + +import java.time.Instant; +import java.util.concurrent.ThreadLocalRandom; + +// ─── Ingestion (builder API with connection events) ───────────────── + +try (Sender sender = Sender.builder(Sender.Transport.WEBSOCKET) + .address("db-primary:9000") // Enterprise: multi-host + .address("db-replica:9000") // Enterprise: multi-host + .enableTls() // Enterprise: wss (TLS) + .advancedTls().disableCertificateValidation() // test only! + .httpToken("your_bearer_token") // Enterprise: token auth (works for WS too) + .reconnectMaxDurationMillis(300_000) + .reconnectInitialBackoffMillis(100) + .reconnectMaxBackoffMillis(5_000) + .errorHandler(error -> { + System.err.printf("batch rejected: category=%s table=%s msg=%s%n", + error.getCategory(), error.getTableName(), + error.getServerMessage()); + }) + .connectionListener(event -> { + System.out.printf("connection: %s host=%s:%d%n", + event.getKind(), event.getHost(), event.getPort()); + }) + .build(); + DoubleArray bids = new DoubleArray(5, 2); + DoubleArray asks = new DoubleArray(5, 2)) { + + for (int i = 0; i < 100; i++) { + bids.clear(); + asks.clear(); + for (int lvl = 0; lvl < 5; lvl++) { + bids.append(1.0842 - 0.0001 * (lvl + 1)); // price + bids.append(100_000 + ThreadLocalRandom.current().nextInt(900_000)); // size + asks.append(1.0842 + 0.0001 * (lvl + 1)); + asks.append(100_000 + ThreadLocalRandom.current().nextInt(900_000)); + } + sender.table("book") + .symbol("ticker", "EURUSD") + .doubleArray("bids", bids) + .doubleArray("asks", asks) + .at(Instant.now()); + } + sender.flush(); +} + +// Connection events you will see: +// CONNECTED host=db-primary:9000 — initial connection +// DISCONNECTED host=db-primary:9000 — primary goes down +// ENDPOINT_ATTEMPT_FAILED host=... — retry attempts during outage +// ALL_ENDPOINTS_UNREACHABLE host=... — all hosts down (retries continue) +// FAILED_OVER host=db-replica:9000 — replica promoted, sender resumes + +// The Sender buffers rows in memory during outage and delivers them +// when a host becomes reachable, within the reconnect budget (default 5 min). + + +// ─── Querying (connect string, with reconnect-on-failure) ─────────── + +// The QwpQueryClient becomes permanently dead after a total outage +// exhausts the failover budget. The application must close the dead +// client and create a new one. This pattern handles that: + +String connString = + "wss::addr=db-primary:9000,db-replica:9000,db-replica2:9000;" // Enterprise: wss, multi-host + + "token=your_bearer_token;" // Enterprise: token auth + + "tls_verify=unsafe_off;" // test only! + + "failover=on;" // Enterprise: failover + + "failover_max_attempts=8;" + + "failover_max_duration_ms=30000;"; + +QwpQueryClient client = null; + +while (true) { + // Reconnect if the client is dead + if (client == null) { + try { + client = QwpQueryClient.fromConfig(connString); + client.connect(); + } catch (Exception e) { + System.err.println("connect failed: " + e.getMessage()); + client = null; + Thread.sleep(2000); + continue; + } + } + + try { + client.execute( + "SELECT ts, ticker, bids[1][1] AS best_bid, asks[1][1] AS best_ask " + + "FROM book ORDER BY ts DESC LIMIT 10", + new QwpColumnBatchHandler() { + @Override + public void onBatch(QwpColumnBatch batch) { + batch.forEachRow(row -> System.out.printf( + "ts=%s ticker=%s bid=%.5f ask=%.5f%n", + Instant.ofEpochMilli(row.getLongValue(0) / 1000), + row.getSymbol(1), + row.getDoubleValue(2), + row.getDoubleValue(3))); + } + + @Override + public void onEnd(long totalRows) { + System.out.println("(" + totalRows + " rows)"); + } + + @Override + public void onError(byte status, String message) { + System.err.printf("query error: 0x%02X %s%n", + status & 0xFF, message); + } + + @Override + public void onFailoverReset(QwpServerInfo newNode) { + // Fires only when failover happens mid-query. + // Clear any accumulated partial results here. + System.out.printf("failover to node=%s role=%s%n", + newNode.getNodeId(), + QwpServerInfo.roleName(newNode.getRole())); + } + } + ); + } catch (Exception e) { + // Failover budget exhausted or client dead — recreate + System.err.println("query failed: " + e.getMessage()); + try { client.close(); } catch (Exception ignored) { } + client = null; + System.out.println("(will reconnect on next query)"); + } + + Thread.sleep(2000); +} +``` diff --git a/documentation/ingestion/clients/rust.md b/documentation/ingestion/clients/rust.md index 18bc1dd6e..444a58eec 100644 --- a/documentation/ingestion/clients/rust.md +++ b/documentation/ingestion/clients/rust.md @@ -633,14 +633,6 @@ ws::addr=db-primary:9000,db-replica-1:9000,db-replica-2:9000; The client picks an endpoint, connects, and walks the list to find the next healthy peer when the current connection breaks. -:::warning Failover requires multiple endpoints - -Failover rotates across endpoints. With a single `addr`, there is no other -host to try, and the reconnect loop exhausts after one attempt. For failover -to be useful, provide at least two addresses. - -::: - ### Reconnect knobs | Key | Default | Description | From 8a1f6e24022c8804223520b7aaab631f3e23c3a6 Mon Sep 17 00:00:00 2001 From: javier Date: Fri, 15 May 2026 21:03:15 +0200 Subject: [PATCH 41/44] docs(rust): SF recommendation for multi-host, logging hint, full example Verified against c-questdb-client ia_qwp_ws branch. Adds warning that sf_dir is strongly recommended for multi-host (flush blocks without it), env_logger example for observing reconnect events, and full tested failover example with retry pattern around flush() and must_close(). --- documentation/ingestion/clients/rust.md | 88 ++++++++++++++++++++++++- 1 file changed, 87 insertions(+), 1 deletion(-) diff --git a/documentation/ingestion/clients/rust.md b/documentation/ingestion/clients/rust.md index 444a58eec..6d78f48e4 100644 --- a/documentation/ingestion/clients/rust.md +++ b/documentation/ingestion/clients/rust.md @@ -633,6 +633,17 @@ ws::addr=db-primary:9000,db-replica-1:9000,db-replica-2:9000; The client picks an endpoint, connects, and walks the list to find the next healthy peer when the current connection breaks. +:::tip Strongly recommend sf_dir for multi-host deployments + +Without `sf_dir`, `flush()` blocks when the connection is down and the +in-memory queue fills up. After `sf_append_deadline_millis` (default 30s), +it returns `SubmitTimedOut`. With `sf_dir`, `flush()` writes to disk and +returns quickly while the reconnect loop replays to the new primary in the +background. For any deployment where failover may take more than a few +seconds, `sf_dir` is strongly recommended. + +::: + ### Reconnect knobs | Key | Default | Description | @@ -653,7 +664,15 @@ unchanged. The Rust client does not currently expose connection-state event callbacks (the equivalent of Java's `SenderConnectionListener`). Connection lifecycle is observable through `log` crate output and through error notifications -delivered to the polling API or the `qwp_ws_error_handler` callback. +delivered to the polling API or the `qwp_ws_error_handler` callback. To see +reconnect events, enable logging for the `questdb` target: + +```rust +// e.g., with the env_logger crate +env_logger::Builder::from_env( + env_logger::Env::default().default_filter_or("questdb=info") +).init(); +``` ### Error classification @@ -746,6 +765,73 @@ To migrate an existing sender, change the connect string from `http::` to `qwp_ws_error_handler` or poll `poll_qwp_ws_error()`, and call `close_drain()` before dropping the sender. +## Full example: multi-host ingestion with failover + +This example shows a production ingestion loop with store-and-forward, +multi-host failover, and proper error handling including the retry pattern +around `flush()`. + +```rust +use questdb::ingress::{Sender, TimestampNanos}; +use std::{thread, time::Duration}; + +fn main() -> questdb::Result<()> { + // Multi-host with store-and-forward for failover durability. + // Without sf_dir, flush() blocks during an outage and times out + // after sf_append_deadline_millis (default 30s). With sf_dir, + // flush() writes to disk and returns quickly while the reconnect + // loop replays to the new primary in the background. + let mut sender = Sender::from_conf( + "wss::addr=db-primary:9000,db-replica:9000;\ // Enterprise: wss, multi-host + token=your_bearer_token;\ // Enterprise: token auth + tls_verify=unsafe_off;\ // test only! + sf_dir=/var/lib/myapp/qdb-sf;\ + sender_id=ingest-1;\ + reconnect_max_duration_millis=300000;" + )?; + + let mut buffer = sender.new_buffer(); + + loop { + buffer + .table("book")? + .symbol("ticker", "EURUSD")? + .column_f64("price", 1.0842)? + .column_f64("size", 100_000.0)? + .at(TimestampNanos::now())?; + + // flush() can still return SubmitTimedOut if the SF queue + // fills to sf_max_total_bytes during a prolonged outage. + // The buffer is retained on error; retry on the next pass. + match sender.flush(&mut buffer) { + Ok(()) => {} + Err(e) => { + eprintln!("flush error: {e}"); + // Check if the sender is terminal (auth failure, + // reconnect budget exhausted). If so, recreate it. + if sender.must_close() { + eprintln!("sender is terminal, exiting"); + break; + } + // Otherwise the buffer still holds the rows; + // the next flush() retries them. + } + } + + thread::sleep(Duration::from_millis(500)); + } + + sender.close_drain()?; + Ok(()) +} + +// Without store-and-forward (sf_dir not set), the same code works for +// short outages but flush() will return SubmitTimedOut if the in-memory +// queue fills before the reconnect loop succeeds. For any multi-host +// deployment where failover may take more than a few seconds, sf_dir +// is strongly recommended. +``` + ## Next steps Explore the full API on the From 2a71ea374466909a46852e37918e1fde045c8ba7 Mon Sep 17 00:00:00 2001 From: javier Date: Fri, 15 May 2026 21:10:46 +0200 Subject: [PATCH 42/44] docs(java): add store-and-forward to full enterprise example Adds storeAndForwardDir and senderId to the ingestion builder example. Updates trailing comments to explain SF vs memory-only behavior during outages. --- documentation/ingestion/clients/java.md | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/documentation/ingestion/clients/java.md b/documentation/ingestion/clients/java.md index 5fdb82c38..54773d003 100644 --- a/documentation/ingestion/clients/java.md +++ b/documentation/ingestion/clients/java.md @@ -1024,12 +1024,19 @@ import java.util.concurrent.ThreadLocalRandom; // ─── Ingestion (builder API with connection events) ───────────────── +// Multi-host with store-and-forward for failover durability. +// Without sf_dir, data buffered during an outage lives in process memory +// and is lost if the sender process dies. With sf_dir, unacknowledged +// frames are persisted to disk and replayed after reconnection. + try (Sender sender = Sender.builder(Sender.Transport.WEBSOCKET) .address("db-primary:9000") // Enterprise: multi-host .address("db-replica:9000") // Enterprise: multi-host .enableTls() // Enterprise: wss (TLS) .advancedTls().disableCertificateValidation() // test only! .httpToken("your_bearer_token") // Enterprise: token auth (works for WS too) + .storeAndForwardDir("/var/lib/myapp/qdb-sf") // durability across outages + .senderId("ingest-1") // unique per sender process .reconnectMaxDurationMillis(300_000) .reconnectInitialBackoffMillis(100) .reconnectMaxBackoffMillis(5_000) @@ -1071,8 +1078,10 @@ try (Sender sender = Sender.builder(Sender.Transport.WEBSOCKET) // ALL_ENDPOINTS_UNREACHABLE host=... — all hosts down (retries continue) // FAILED_OVER host=db-replica:9000 — replica promoted, sender resumes -// The Sender buffers rows in memory during outage and delivers them -// when a host becomes reachable, within the reconnect budget (default 5 min). +// With sf_dir set, unacknowledged frames are persisted to disk during +// the outage and replayed when the new primary becomes reachable. +// Without sf_dir, the Sender buffers in memory only — the reconnect +// loop still works, but data is lost if the sender process dies. // ─── Querying (connect string, with reconnect-on-failure) ─────────── From c2a9b14219bcbc61c889fc4b31dd884db37a6759 Mon Sep 17 00:00:00 2001 From: javier Date: Fri, 15 May 2026 21:12:53 +0200 Subject: [PATCH 43/44] docs(java,rust): full enterprise examples with SF, remove stale version caution Java: adds store-and-forward to the full enterprise example with builder API, connection events, and egress reconnect-on-failure pattern. Removes outdated "requires QuestDB 9.2.0" caution on decimals (QWP itself requires QuestDB 10). Rust: adds SF recommendation for multi-host, env_logger hint for observing reconnect events, and full tested failover example with flush() retry pattern and must_close(). --- documentation/ingestion/clients/java.md | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/documentation/ingestion/clients/java.md b/documentation/ingestion/clients/java.md index 54773d003..ee91aae71 100644 --- a/documentation/ingestion/clients/java.md +++ b/documentation/ingestion/clients/java.md @@ -406,13 +406,10 @@ timestamp). ### Decimal columns -:::caution -Decimal values require QuestDB 9.2.0 or later. Create decimal columns ahead -of time with `DECIMAL(precision, scale)` so QuestDB ingests values with the -expected precision. See the +Create decimal columns ahead of time with `DECIMAL(precision, scale)` so +QuestDB ingests values with the expected precision. See the [decimal data type](/docs/query/datatypes/decimal/#creating-tables-with-decimals) page for details. -::: ### Flushing From 60855d8993dbcb31c32f37c3279df15056b0bd7b Mon Sep 17 00:00:00 2001 From: javier Date: Fri, 15 May 2026 21:21:38 +0200 Subject: [PATCH 44/44] docs(go): terminal state, single-endpoint warning, full enterprise example Verified by agent testing against Go client. Documents egress terminal state after failover exhaustion with recreate-on-failure pattern. Adds single-endpoint failover warning. Notes ingress/egress reconnect asymmetry. Full tested example: ingestion with SF + options API + observability counters, and querying with QwpFailoverReset handling. Enterprise features marked with inline comments. --- documentation/ingestion/clients/go.md | 167 ++++++++++++++++++++++++++ 1 file changed, 167 insertions(+) diff --git a/documentation/ingestion/clients/go.md b/documentation/ingestion/clients/go.md index 7031356ff..a7f453359 100644 --- a/documentation/ingestion/clients/go.md +++ b/documentation/ingestion/clients/go.md @@ -989,6 +989,22 @@ returning on any error treats a reset as terminal, which the client supports explicitly. When the failover budget is consumed, `Batches()` (and `Exec`) return `*QwpFailoverExhaustedError`. +After failover exhaustion or a total outage (all endpoints down), the query +client enters a terminal state and returns errors on every subsequent call. +Close it and create a new one. This differs from ingestion, where the +`LineSender` has a continuous reconnect loop (`reconnect_max_duration_millis`, +default 5 minutes) that spans full outages transparently. The query client +reconnects only within the scope of a single query. + +:::warning Failover requires multiple endpoints + +Failover rotates across endpoints. With a single `addr`, there is no other +host to try, and the loop exhausts after one attempt regardless of +`failover_max_attempts`. For failover to be useful, provide at least two +addresses. + +::: + ### Observability `QwpSender` exposes counters for dashboards: `TotalReconnectAttempts`, @@ -1081,3 +1097,154 @@ rejection synchronously; on QWP it does not. To migrate, change the connect string from `http::` to `ws::` (or `https::` to `wss::`), register a `SenderErrorHandler`, and adjust auto-flush settings if needed. `QwpSender` is a superset of `LineSender`, so existing ingestion code keeps working. + +## Full example: ingestion and querying with failover + +This example combines ingestion with store-and-forward and connection +observability, then queries the data back with the recreate-on-failure +pattern for egress. + +```go +package main + +import ( + "context" + "errors" + "fmt" + "math/rand" + "time" + + qdb "github.com/questdb/go-questdb-client/v4" +) + +// ─── Ingestion (options API with store-and-forward) ───────────────── + +// Multi-host with store-and-forward for failover durability. +// Without sf_dir, data buffered during an outage lives in process memory +// and is lost if the sender process dies. With sf_dir, unacknowledged +// frames are persisted to disk and replayed after reconnection. + +func ingestExample() { + ctx := context.Background() + + sender, err := qdb.NewLineSender(ctx, + qdb.WithQwp(), + qdb.WithAddress("db-primary:9000"), // Enterprise: multi-host + qdb.WithAddress("db-replica:9000"), // Enterprise: multi-host + qdb.WithTls(), // Enterprise: wss (TLS) + qdb.WithBearerToken("your_bearer_token"), // Enterprise: token auth + qdb.WithSfDir("/var/lib/myapp/qdb-sf"), // durability across outages + qdb.WithSenderId("ingest-1"), // unique per sender process + qdb.WithReconnectPolicy( + 5*time.Minute, // max outage budget + 100*time.Millisecond, // initial backoff + 5*time.Second), // max backoff + qdb.WithErrorHandler(func(e *qdb.SenderError) { + fmt.Printf("batch rejected: category=%s table=%s msg=%s\n", + e.Category, e.TableName, e.ServerMessage) + })) + if err != nil { + panic(err) + } + defer sender.Close(ctx) + + for i := 0; i < 100; i++ { + price := 1.0842 + (rand.Float64()-0.5)*0.002 + err = sender.Table("book"). + Symbol("ticker", "EURUSD"). + Float64Column("price", price). + Float64Column("size", 100000+rand.Float64()*900000). + At(ctx, time.Now()) + if err != nil { + fmt.Printf("row error: %s\n", err) + } + } + if err := sender.Flush(ctx); err != nil { + fmt.Printf("flush error: %s\n", err) + } +} + +// With sf_dir set, unacknowledged frames are persisted to disk during +// the outage and replayed when the new primary becomes reachable. +// Without sf_dir, the reconnect loop still works but data is lost if +// the sender process dies. +// +// Observability (no per-event callback in Go): +// qs := sender.(qdb.QwpSender) +// qs.TotalReconnectAttempts() +// qs.TotalReconnectsSucceeded() +// qs.TotalFramesReplayed() +// qs.LastTerminalError() + + +// ─── Querying (connect string, with reconnect-on-failure) ─────────── + +// The QwpQueryClient becomes permanently dead after a total outage +// exhausts the failover budget. The application must close the dead +// client and create a new one. This pattern handles that: + +func queryExample() { + ctx := context.Background() + + connString := + "wss::addr=db-primary:9000,db-replica:9000,db-replica2:9000;" + // Enterprise: wss, multi-host + "token=your_bearer_token;" + // Enterprise: token auth + "tls_verify=unsafe_off;" + // test only! + "failover=on;" + // Enterprise: failover + "failover_max_attempts=8;" + + "failover_max_duration_ms=30000;" + + var client *qdb.QwpQueryClient + + for { + // Reconnect if the client is dead + if client == nil { + var err error + client, err = qdb.QwpQueryClientFromConf(ctx, connString) + if err != nil { + fmt.Printf("connect failed: %s\n", err) + time.Sleep(2 * time.Second) + continue + } + } + + q := client.Query(ctx, + "SELECT ts, ticker, price FROM book ORDER BY ts DESC LIMIT 10") + + rowCount := 0 + for batch, err := range q.Batches() { + if err != nil { + var reset *qdb.QwpFailoverReset + if errors.As(err, &reset) { + // Fires only when failover happens mid-query. + // Clear any accumulated partial results here. + fmt.Println("failover, clearing partial results") + rowCount = 0 + continue + } + // Any other error is terminal for this client + fmt.Printf("query failed: %s\n", err) + q.Close() + client.Close(ctx) + client = nil + fmt.Println("(will reconnect on next query)") + break + } + for row := 0; row < batch.RowCount(); row++ { + ts := time.UnixMicro(batch.Int64(0, row)) + ticker := batch.String(1, row) + price := batch.Float64(2, row) + fmt.Printf("%s %s price=%.5f\n", + ts.Format("2006-01-02T15:04:05.000Z"), ticker, price) + rowCount++ + } + } + if client != nil { + q.Close() + fmt.Printf("(%d rows)\n", rowCount) + } + + time.Sleep(2 * time.Second) + } +} +```