diff --git a/CHANGELOG.adoc b/CHANGELOG.adoc index 162ff69..f02b5a9 100644 --- a/CHANGELOG.adoc +++ b/CHANGELOG.adoc @@ -8,6 +8,27 @@ All notable changes to verisimiser will be documented in this file. This format is based on https://keepachangelog.com/en/1.1.0/[Keep a Changelog], and this project adheres to https://semver.org/spec/v2.0.0.html[Semantic Versioning]. +== [0.2.0] - 2026-05-30 + +=== Added +* JSON-family sidecar storage backend (`[sidecar].storage = "json"`) with a + `[sidecar].format` key — `plain` | `ld` (JSON-LD) | `ndjson` — at full + runtime parity with the SQLite path: hash-chained provenance (incl. forks), + temporal versioning, drift, and gc (#146, V-L2-F3). +* `sidecar::StorageKind` — single source of truth resolving + `[sidecar].storage` (+ `format`) to a backend; `validate`/`generate`/ + `drift`/`gc` all defer to it. +* Cross-process write locking for the JSON sidecar (advisory `.lock` + with stale-steal) plus documented atomic-rename durability (#150, V-L2-F4). +* `provenance ` and `history [--at ]` CLI + subcommands now query the sidecar (sqlite + json) instead of stubbing + (#150, V-L2-F4). +* `examples/json-sidecar/` manifest demonstrating the NDJSON sidecar. + +=== Changed +* `verisimiser generate` emits a `sidecar_schema.{json,jsonld,ndjson}` + scaffold for the json family (SQL backends still emit DDL). + == [0.1.0] - 2026-03-21 === Phase 1 — RSR Compliance Sweep diff --git a/Cargo.lock b/Cargo.lock index f7525b1..3a15746 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1020,7 +1020,7 @@ checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426" [[package]] name = "verisimiser" -version = "0.1.0" +version = "0.2.0" dependencies = [ "anyhow", "chrono", diff --git a/Cargo.toml b/Cargo.toml index 427fc48..0ab9ffc 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ # SPDX-License-Identifier: PMPL-1.0-or-later [package] name = "verisimiser" -version = "0.1.0" +version = "0.2.0" edition = "2024" rust-version = "1.85" authors = ["Jonathan D.A. Jewell "] diff --git a/README.adoc b/README.adoc index a7f1bd4..e552ce1 100644 --- a/README.adoc +++ b/README.adoc @@ -266,6 +266,55 @@ The `verisimiser octad` subcommand prints the active concerns from your manifest; `verisimiser doctor` checks that sidecars, thresholds, and retention bounds are configured consistently. +=== Sidecar storage backends (Phase 1 schema) + +The Phase 1 manifest uses a single `[sidecar]` section. All octad data +(provenance log, temporal versions, lineage edges, access policies) lives +in one sidecar store — never in your target database: + +[source,toml] +---- +[sidecar] +# storage backend: "sqlite" (default), "postgres"/"postgresql", or "json" +storage = "json" +# json on-disk encoding (ignored for sql backends): "plain" | "ld" | "ndjson" +format = "ndjson" +path = ".verisim/sidecar.ndjson" +---- + +* **sqlite** (default) — the reference store; embedded and transactional. +* **postgres** — the same overlay schema, emitted as PostgreSQL DDL by `generate`. +* **json** — an append-only document store mirroring the `verisimdb_*` tables, + in one of three encodings: ++ +-- +** **plain** — one JSON object keyed by table name. +** **ld** — JSON-LD (`@context` + a `@graph` of typed, `@id`-addressed nodes), for linked-data tooling. +** **ndjson** — newline-delimited JSON, one record per line. +-- + +The JSON store has full parity with the SQLite runtime path: hash-chained +provenance (including first-class forks), temporal versioning, drift, and +gc. Writes are crash-safe (atomic temp-file + `rename`) and serialised +across processes by an advisory lock file (`.lock`). + +[cols="2,1,1,1",options="header"] +|=== +| Command | sqlite | postgres | json + +| `generate` | DDL | DDL | scaffold +| `provenance` / `history` | ✓ | — | ✓ +| `drift` / `gc` | ✓ | — | ✓ +|=== + +`generate` writes `sidecar_schema.sql` for SQL backends and a +`sidecar_schema.{json,jsonld,ndjson}` scaffold for the json family. +`verisimiser provenance ` prints the entity's chain and its +verification status (plus any fork points); `verisimiser history +[--at ]` lists temporal versions, or the point-in-time snapshot per +modality. PostgreSQL *runtime* reads (`provenance`/`history`/`drift`/`gc`) +are not yet implemented and refuse explicitly rather than silently no-op. + == Architecture [source] diff --git a/examples/README.adoc b/examples/README.adoc index b9cdb48..eb3c83b 100644 --- a/examples/README.adoc +++ b/examples/README.adoc @@ -1 +1,10 @@ +// SPDX-License-Identifier: PMPL-1.0-or-later = examples Pillar + +Worked examples of `verisimiser.toml` manifests. + +* **blog-db/** — augment a typical blog database (SQLite target) with the + octad dimensions, using the default SQLite sidecar. +* **json-sidecar/** — the same octad augmentation, but stored in a + JSON-family (NDJSON) sidecar instead of SQLite. Shows `[sidecar].storage = + "json"` + `format`. diff --git a/examples/blog-db/verisimiser.toml b/examples/blog-db/verisimiser.toml index fec3131..6bd80e1 100644 --- a/examples/blog-db/verisimiser.toml +++ b/examples/blog-db/verisimiser.toml @@ -23,4 +23,7 @@ enable-simulation = false [sidecar] storage = "sqlite" +# To use the JSON document store instead (see examples/json-sidecar): +# storage = "json" +# format = "ndjson" # "plain" | "ld" | "ndjson" path = ".verisim/sidecar.db" diff --git a/examples/json-sidecar/README.adoc b/examples/json-sidecar/README.adoc new file mode 100644 index 0000000..6f2e837 --- /dev/null +++ b/examples/json-sidecar/README.adoc @@ -0,0 +1,26 @@ +// SPDX-License-Identifier: PMPL-1.0-or-later += JSON sidecar example + +Demonstrates the JSON-family sidecar backend (V-L2-F3/F4): `[sidecar].storage += "json"` with `format = "ndjson"`. The octad data (provenance, temporal, +lineage, access policies) is stored as an append-only NDJSON document store +mirroring the `verisimdb_*` tables, with the same runtime parity as SQLite — +hash-chained provenance, temporal versioning, drift, and gc. + +[source,sh] +---- +# Validate the manifest (checks storage + format resolve): +verisimiser validate -m examples/json-sidecar/verisimiser.toml + +# Emit the JSON sidecar scaffold: +verisimiser generate -m examples/json-sidecar/verisimiser.toml -o .verisim + +# After data accrues, query it: +verisimiser provenance -m examples/json-sidecar/verisimiser.toml +verisimiser history -m examples/json-sidecar/verisimiser.toml --at 2026-01-01T00:00:00Z +verisimiser gc -m examples/json-sidecar/verisimiser.toml --dry-run +---- + +Set `format` to `"ld"` for JSON-LD (`@context` + `@graph`) or `"plain"` for a +single keyed JSON object. Writes are crash-safe (atomic rename) and serialised +across processes by a `.lock` advisory lock. diff --git a/examples/json-sidecar/verisimiser.toml b/examples/json-sidecar/verisimiser.toml new file mode 100644 index 0000000..7f75515 --- /dev/null +++ b/examples/json-sidecar/verisimiser.toml @@ -0,0 +1,30 @@ +# SPDX-License-Identifier: PMPL-1.0-or-later +# VeriSimiser manifest: JSON-family (NDJSON) sidecar example. +# +# Same octad augmentation as the blog-db example, but the sidecar is an +# append-only NDJSON document store instead of SQLite — handy when you want +# the provenance/temporal data as plain, diff-/grep-able text. Switch +# `format` to "ld" for JSON-LD (linked-data tooling) or "plain" for a single +# keyed JSON object. + +[project] +name = "json-sidecar-example" +version = "0.1.0" +description = "Example: octad augmentation with a JSON-family (NDJSON) sidecar" + +[database] +backend = "sqlite" +connection-string-env = "APP_DATABASE_URL" +# schema-source = "schema.sql" # optional; omitted here for a self-contained example + +[octad] +enable-provenance = true +enable-lineage = true +enable-temporal = true +enable-access-control = true +enable-simulation = false + +[sidecar] +storage = "json" +format = "ndjson" # "plain" | "ld" | "ndjson" +path = ".verisim/sidecar.ndjson" diff --git a/src/gc.rs b/src/gc.rs index a07bd2f..9f7b1e1 100644 --- a/src/gc.rs +++ b/src/gc.rs @@ -65,15 +65,20 @@ fn run_gc_json( dry_run: bool, format: crate::sidecar::JsonFormat, ) -> Result { + use crate::sidecar::json::{self, JsonStore}; let sidecar_path = &manifest.sidecar.path; - let mut store = crate::sidecar::json::JsonStore::open(sidecar_path, format) - .with_context(|| format!("opening json sidecar at {}", sidecar_path))?; - let counts = store.gc_purge(&manifest.retention, dry_run); - if !dry_run { - store - .save() - .with_context(|| format!("saving json sidecar at {}", sidecar_path))?; - } + let retention = &manifest.retention; + let counts = if dry_run { + // Read-only: count purge candidates without locking or writing. + let mut store = JsonStore::open(sidecar_path, format) + .with_context(|| format!("opening json sidecar at {}", sidecar_path))?; + store.gc_purge(retention, true) + } else { + // Mutating: hold the cross-process write lock across load→purge→save. + json::with_locked(sidecar_path, format, |store| { + Ok(store.gc_purge(retention, false)) + })? + }; Ok(GcReport { sidecar: sidecar_path.clone(), dry_run, diff --git a/src/main.rs b/src/main.rs index e494c1b..4789ed7 100644 --- a/src/main.rs +++ b/src/main.rs @@ -329,9 +329,107 @@ fn main() -> Result<()> { } Commands::Provenance { manifest, entity } => { - let _m = manifest::load_manifest(&manifest)?; + let m = manifest::load_manifest(&manifest)?; + + // Neutral display row shared by both backends. + struct ProvRow { + hash: String, + previous_hash: String, + operation: String, + actor: String, + timestamp: String, + } + + let (rows, verified, forks): (Vec, bool, Vec) = + match sidecar::StorageKind::resolve(&m.sidecar.storage, &m.sidecar.format)? { + sidecar::StorageKind::Sqlite => { + let conn = rusqlite::Connection::open(&m.sidecar.path)?; + let mut stmt = conn.prepare( + "SELECT hash, previous_hash, operation, actor, timestamp \ + FROM verisimdb_provenance_log WHERE entity_id = ?1 \ + ORDER BY timestamp, hash", + )?; + let rows = stmt + .query_map([entity.as_str()], |r| { + Ok(ProvRow { + hash: r.get(0)?, + previous_hash: r.get(1)?, + operation: r.get(2)?, + actor: r.get(3)?, + timestamp: r.get(4)?, + }) + })? + .collect::>>()?; + let verified = tier1::provenance::verify_chain(&conn, &entity)?; + let forks = tier1::provenance::fork_points(&conn, &entity)?; + (rows, verified, forks) + } + sidecar::StorageKind::Json(format) => { + let store = sidecar::json::JsonStore::open(&m.sidecar.path, format)?; + let mut rows: Vec = store + .data() + .provenance_log + .iter() + .filter(|r| r.entity_id == entity) + .map(|r| ProvRow { + hash: r.hash.clone(), + previous_hash: r.previous_hash.clone(), + operation: r.operation.clone(), + actor: r.actor.clone(), + timestamp: r.timestamp.clone(), + }) + .collect(); + rows.sort_by(|a, b| { + a.timestamp.cmp(&b.timestamp).then(a.hash.cmp(&b.hash)) + }); + let verified = store.verify_chain(&entity); + let forks = store.fork_points(&entity); + (rows, verified, forks) + } + sidecar::StorageKind::Postgres => anyhow::bail!( + "verisimiser provenance supports the sqlite and json sidecar \ + backends; [sidecar].storage = \"postgres\" is not yet implemented" + ), + }; + println!("Provenance chain for entity: {}", entity); - // TODO: query provenance sidecar + if rows.is_empty() { + println!(" (no entries)"); + } + for r in &rows { + let prev = if r.previous_hash.is_empty() { + "genesis".to_string() + } else { + short_hash(&r.previous_hash) + }; + println!( + " {} <- {} {:<9} by {:<12} at {}", + short_hash(&r.hash), + prev, + r.operation, + r.actor, + r.timestamp + ); + } + println!( + "Chain verified: {}", + if verified { + "yes" + } else { + "NO — tampering or broken link detected" + } + ); + if !forks.is_empty() { + println!("Fork points (divergent history — ADR-0010):"); + for f in &forks { + let at = if f.predecessor.is_empty() { + "genesis".to_string() + } else { + short_hash(&f.predecessor) + }; + println!(" {} → {} branches", at, f.children); + } + } Ok(()) } @@ -340,12 +438,144 @@ fn main() -> Result<()> { entity, at, } => { - let _m = manifest::load_manifest(&manifest)?; + let m = manifest::load_manifest(&manifest)?; + let storage = sidecar::StorageKind::resolve(&m.sidecar.storage, &m.sidecar.format)?; + match at { - Some(t) => println!("Entity {} at {}", entity, t), - None => println!("Full history for entity {}", entity), + // Point-in-time: the snapshot current at `t`, per modality. + Some(t_str) => { + let t = chrono::DateTime::parse_from_rfc3339(&t_str) + .map_err(|e| { + anyhow::anyhow!( + "--at must be an RFC 3339 timestamp \ + (e.g. 2026-01-02T03:04:05Z): {e}" + ) + })? + .with_timezone(&chrono::Utc); + + let snaps: Vec<(String, Option)> = match storage { + sidecar::StorageKind::Sqlite => { + let conn = rusqlite::Connection::open(&m.sidecar.path)?; + let mut stmt = conn.prepare( + "SELECT DISTINCT table_name FROM verisimdb_temporal_versions \ + WHERE entity_id = ?1 ORDER BY table_name", + )?; + let tables = stmt + .query_map([entity.as_str()], |r| r.get::<_, String>(0))? + .collect::>>()?; + let mut out = Vec::with_capacity(tables.len()); + for table in tables { + let snap = tier1::temporal::read_at(&conn, &entity, &table, &t)?; + out.push((table, snap)); + } + out + } + sidecar::StorageKind::Json(format) => { + let store = sidecar::json::JsonStore::open(&m.sidecar.path, format)?; + let mut tables: Vec = store + .data() + .temporal_versions + .iter() + .filter(|r| r.entity_id == entity) + .map(|r| r.table_name.clone()) + .collect(); + tables.sort(); + tables.dedup(); + tables + .into_iter() + .map(|table| { + let snap = store.read_at(&entity, &table, &t); + (table, snap) + }) + .collect() + } + sidecar::StorageKind::Postgres => anyhow::bail!( + "verisimiser history supports the sqlite and json sidecar \ + backends; [sidecar].storage = \"postgres\" is not yet implemented" + ), + }; + + println!("Entity {} at {}", entity, t_str); + if snaps.is_empty() { + println!(" (no versions)"); + } + for (table, snap) in &snaps { + match snap { + Some(s) => println!(" [{}] {}", table, s), + None => println!(" [{}] (did not exist at this time)", table), + } + } + } + + // Full history: every version, per modality. + None => { + struct VRow { + table_name: String, + version: u64, + valid_from: String, + valid_to: Option, + operation: String, + } + let rows: Vec = match storage { + sidecar::StorageKind::Sqlite => { + let conn = rusqlite::Connection::open(&m.sidecar.path)?; + let mut stmt = conn.prepare( + "SELECT table_name, version, valid_from, valid_to, operation \ + FROM verisimdb_temporal_versions WHERE entity_id = ?1 \ + ORDER BY table_name, version", + )?; + stmt.query_map([entity.as_str()], |r| { + Ok(VRow { + table_name: r.get(0)?, + version: r.get::<_, i64>(1)? as u64, + valid_from: r.get(2)?, + valid_to: r.get(3)?, + operation: r.get(4)?, + }) + })? + .collect::>>()? + } + sidecar::StorageKind::Json(format) => { + let store = sidecar::json::JsonStore::open(&m.sidecar.path, format)?; + let mut rows: Vec = store + .data() + .temporal_versions + .iter() + .filter(|r| r.entity_id == entity) + .map(|r| VRow { + table_name: r.table_name.clone(), + version: r.version, + valid_from: r.valid_from.clone(), + valid_to: r.valid_to.clone(), + operation: r.operation.clone(), + }) + .collect(); + rows.sort_by(|a, b| { + a.table_name + .cmp(&b.table_name) + .then(a.version.cmp(&b.version)) + }); + rows + } + sidecar::StorageKind::Postgres => anyhow::bail!( + "verisimiser history supports the sqlite and json sidecar \ + backends; [sidecar].storage = \"postgres\" is not yet implemented" + ), + }; + + println!("Full history for entity {}", entity); + if rows.is_empty() { + println!(" (no versions)"); + } + for r in &rows { + let to = r.valid_to.as_deref().unwrap_or("current"); + println!( + " [{}] v{} {} .. {} ({})", + r.table_name, r.version, r.valid_from, to, r.operation + ); + } + } } - // TODO: query temporal sidecar Ok(()) } @@ -452,6 +682,16 @@ fn emit_report(report: &manifest::ValidationReport, json: bool, kind: &str) -> R } } +/// Truncate a long hash for human-readable display (provenance hashes are +/// 64 hex chars). Short strings are returned unchanged. +fn short_hash(hash: &str) -> String { + if hash.len() > 12 { + format!("{}…", &hash[..12]) + } else { + hash.to_string() + } +} + /// Print the 8 octad dimensions with descriptions. fn print_octad() { println!("=== VeriSimDB Octad: Eight Dimensions ==="); diff --git a/src/sidecar/json.rs b/src/sidecar/json.rs index 41c4350..4421dba 100644 --- a/src/sidecar/json.rs +++ b/src/sidecar/json.rs @@ -381,6 +381,12 @@ impl JsonStore { /// Persist the store atomically: write a sibling temp file, then rename /// over the target so a crash mid-write can't truncate the sidecar. + /// `rename(2)` within a directory is atomic, so a concurrent reader sees + /// either the old or new complete file, never a partial one. + /// + /// For *write* flows, call this via [`with_locked`] so the load→mutate→ + /// save cycle is serialised against other writers; calling it bare is + /// fine for a freshly-built store no other process can see yet. pub fn save(&self) -> Result<()> { if let Some(parent) = self.path.parent() { if !parent.as_os_str().is_empty() { @@ -891,6 +897,38 @@ pub fn scaffold(octad: &OctadConfig, format: JsonFormat) -> Result { } } +/// Run a mutating transaction against the json sidecar at `path` while +/// holding the cross-process write lock for the whole load→mutate→save +/// cycle, then persist atomically. +/// +/// This is the safe entry point for any operation that *writes* the store +/// (gc, provenance/temporal appends): the lock serialises concurrent +/// writers (the json analogue of SQLite's write lock) and the fresh load +/// inside the lock guarantees the closure sees the latest state. Read-only +/// callers can use [`JsonStore::open`] directly — atomic rename means a +/// reader always sees a complete file. +pub fn with_locked( + path: impl AsRef, + format: JsonFormat, + f: impl FnOnce(&mut JsonStore) -> Result, +) -> Result { + let path = path.as_ref(); + // The lock file is a sibling of the sidecar, so its parent must exist + // before we can create it. + if let Some(parent) = path.parent() { + if !parent.as_os_str().is_empty() { + std::fs::create_dir_all(parent) + .with_context(|| format!("creating sidecar dir {}", parent.display()))?; + } + } + let _lock = super::lock::FileLock::acquire(path)?; + let mut store = JsonStore::open(path, format)?; + let out = f(&mut store)?; + store.save()?; + Ok(out) + // `_lock` is dropped here, releasing the write lock. +} + /// Parse an RFC 3339 timestamp to UTC, discarding the offset. fn parse_ts(s: &str) -> Option> { DateTime::parse_from_rfc3339(s) diff --git a/src/sidecar/lock.rs b/src/sidecar/lock.rs new file mode 100644 index 0000000..4e2bee3 --- /dev/null +++ b/src/sidecar/lock.rs @@ -0,0 +1,168 @@ +// SPDX-License-Identifier: PMPL-1.0-or-later +// Copyright (c) 2026 Jonathan D.A. Jewell (hyperpolymath) +// +// Cross-process advisory write lock for the JSON sidecar (V-L2-F4, #150). +// +// The SQLite sidecar serialises concurrent writers through the database +// write lock; the JSON store has no such engine, so this provides the +// equivalent: an exclusive sibling lock file (`.lock`) created with +// `O_EXCL` semantics (`create_new`). It is dependency-free (no new crate, +// no `unsafe`) and host-local. +// +// Crash resilience: because an `O_EXCL` lock file is not released by the OS +// when a process dies, a lock whose mtime is older than `stale_after` is +// treated as abandoned and stolen. The window is a deliberate trade-off — +// long enough that a slow-but-live writer is not robbed, short enough that +// a crash doesn't wedge the sidecar for long. Acquisition retries with a +// fixed interval until a timeout. +// +// Caveat: like all lock-file schemes this is host-local and not safe over +// network filesystems without working `O_EXCL` (e.g. some NFS configs). + +use std::fs::OpenOptions; +use std::io::{ErrorKind, Write}; +use std::path::{Path, PathBuf}; +use std::thread::sleep; +use std::time::{Duration, Instant, SystemTime}; + +use anyhow::{Context, Result}; + +/// Default wait before giving up acquiring a contended lock. +const DEFAULT_ACQUIRE_TIMEOUT: Duration = Duration::from_secs(10); +/// Default age past which a lock file is presumed abandoned and stolen. +const DEFAULT_STALE_AFTER: Duration = Duration::from_secs(30); +/// Poll interval while a lock is contended. +const RETRY_INTERVAL: Duration = Duration::from_millis(25); + +/// An acquired advisory lock; the lock file is removed on drop. +#[derive(Debug)] +pub struct FileLock { + path: PathBuf, +} + +impl FileLock { + /// The lock-file path for a target sidecar (`.lock`). + fn lock_path(target: &Path) -> PathBuf { + let mut name = target.as_os_str().to_owned(); + name.push(".lock"); + PathBuf::from(name) + } + + /// Acquire the lock for `target`, blocking (with retry) up to the + /// default timeout and stealing a stale lock if necessary. + pub fn acquire(target: &Path) -> Result { + Self::acquire_with(target, DEFAULT_ACQUIRE_TIMEOUT, DEFAULT_STALE_AFTER) + } + + /// Like [`acquire`](FileLock::acquire) but with explicit `timeout` and + /// `stale_after` (used by tests to exercise contention/staleness fast). + pub fn acquire_with(target: &Path, timeout: Duration, stale_after: Duration) -> Result { + let path = Self::lock_path(target); + let deadline = Instant::now() + timeout; + loop { + match OpenOptions::new().write(true).create_new(true).open(&path) { + Ok(mut file) => { + // Record holder (pid + epoch secs) for diagnostics; the + // file's mere existence is the lock, so ignore write errs. + let _ = writeln!(file, "{} {}", std::process::id(), epoch_secs()); + return Ok(FileLock { path }); + } + Err(e) if e.kind() == ErrorKind::AlreadyExists => { + if is_stale(&path, stale_after) { + // Abandoned by a dead writer: steal and retry. + let _ = std::fs::remove_file(&path); + continue; + } + if Instant::now() >= deadline { + anyhow::bail!( + "timed out after {:?} acquiring sidecar lock {} \ + (another writer holds it)", + timeout, + path.display() + ); + } + sleep(RETRY_INTERVAL); + } + Err(e) => { + return Err(e) + .with_context(|| format!("creating sidecar lock file {}", path.display())); + } + } + } + } +} + +impl Drop for FileLock { + fn drop(&mut self) { + // Best-effort release; a leftover lock will be reclaimed as stale. + let _ = std::fs::remove_file(&self.path); + } +} + +/// Seconds since the Unix epoch (0 if the clock is before it). +fn epoch_secs() -> u64 { + SystemTime::now() + .duration_since(SystemTime::UNIX_EPOCH) + .map(|d| d.as_secs()) + .unwrap_or(0) +} + +/// `true` if `path` exists and its mtime is older than `stale_after`. +fn is_stale(path: &Path, stale_after: Duration) -> bool { + let Ok(modified) = std::fs::metadata(path).and_then(|m| m.modified()) else { + return false; + }; + SystemTime::now() + .duration_since(modified) + .map(|age| age > stale_after) + .unwrap_or(false) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn acquire_and_release_round_trip() { + let dir = tempfile::tempdir().unwrap(); + let target = dir.path().join("sidecar.json"); + let lock_file = FileLock::lock_path(&target); + { + let _guard = FileLock::acquire(&target).unwrap(); + assert!(lock_file.exists(), "lock file exists while held"); + } + assert!(!lock_file.exists(), "lock file removed on drop"); + // Re-acquire after release succeeds. + let _again = FileLock::acquire(&target).unwrap(); + } + + #[test] + fn contended_lock_times_out_quickly() { + let dir = tempfile::tempdir().unwrap(); + let target = dir.path().join("sidecar.json"); + let _held = FileLock::acquire(&target).unwrap(); + // A second acquisition with a tiny timeout and a long stale window + // must fail rather than steal a live lock. + let err = FileLock::acquire_with( + &target, + Duration::from_millis(80), + Duration::from_secs(3600), + ) + .unwrap_err(); + assert!(err.to_string().contains("timed out"), "got: {err}"); + } + + #[test] + fn stale_lock_is_stolen() { + let dir = tempfile::tempdir().unwrap(); + let target = dir.path().join("sidecar.json"); + // Leave a lock behind (simulating a crashed writer). + std::mem::forget(FileLock::acquire(&target).unwrap()); + assert!(FileLock::lock_path(&target).exists()); + // With a tiny stale window, the next acquire steals it. + std::thread::sleep(Duration::from_millis(40)); + let _stolen = + FileLock::acquire_with(&target, Duration::from_secs(2), Duration::from_millis(20)) + .expect("stale lock should be stolen"); + } +} diff --git a/src/sidecar/mod.rs b/src/sidecar/mod.rs index 6f91fe6..f7bbadb 100644 --- a/src/sidecar/mod.rs +++ b/src/sidecar/mod.rs @@ -15,6 +15,7 @@ // SQLite path implements today. The JSON store itself lives in [`json`]. pub mod json; +pub mod lock; use crate::codegen::overlay::SqlDialect; diff --git a/tests/sidecar_cli_test.rs b/tests/sidecar_cli_test.rs new file mode 100644 index 0000000..1f592d0 --- /dev/null +++ b/tests/sidecar_cli_test.rs @@ -0,0 +1,92 @@ +// SPDX-License-Identifier: PMPL-1.0-or-later +// Copyright (c) 2026 Jonathan D.A. Jewell (hyperpolymath) +// +// End-to-end coverage for the `provenance` and `history` CLI subcommands +// over the JSON sidecar backend (V-L2-F4, #150). The store is seeded through +// the library's *locked* write path (real hashes), then the built binary is +// driven against it so the full manifest → resolve → read → print plumbing +// is exercised. + +use std::path::Path; +use std::process::Command; + +use verisimiser::sidecar::JsonFormat; +use verisimiser::sidecar::json; + +fn bin() -> Command { + Command::new(env!("CARGO_BIN_EXE_verisimiser")) +} + +fn write_manifest(dir: &Path, sidecar: &Path, format: &str) -> std::path::PathBuf { + let toml = format!( + "[database]\nbackend = \"sqlite\"\n\ + [sidecar]\nstorage = \"json\"\nformat = \"{}\"\npath = \"{}\"\n", + format, + sidecar.display().to_string().replace('\\', "/") + ); + let path = dir.join("verisimiser.toml"); + std::fs::write(&path, toml).unwrap(); + path +} + +#[test] +fn provenance_and_history_over_json_ndjson() { + let dir = tempfile::tempdir().unwrap(); + let sidecar = dir.path().join("sidecar.ndjson"); + + // Seed real provenance + temporal data through the locked write path. + json::with_locked(&sidecar, JsonFormat::Ndjson, |store| { + store.append_provenance("e1", "users", "insert", "alice", None, None)?; + store.append_provenance("e1", "users", "update", "alice", Some("{\"n\":1}"), None)?; + store.append_temporal_version("e1", "users", "{\"n\":0}", "insert"); + store.append_temporal_version("e1", "users", "{\"n\":1}", "update"); + Ok(()) + }) + .unwrap(); + + let manifest = write_manifest(dir.path(), &sidecar, "ndjson"); + let manifest = manifest.to_str().unwrap(); + + // provenance: lists entries and verifies the chain. + let out = bin() + .args(["provenance", "-m", manifest, "e1"]) + .output() + .unwrap(); + assert!(out.status.success(), "provenance failed: {out:?}"); + let s = String::from_utf8(out.stdout).unwrap(); + assert!(s.contains("Provenance chain for entity: e1"), "{s}"); + assert!(s.contains("Chain verified: yes"), "chain must verify: {s}"); + assert!(s.contains("insert") && s.contains("update"), "{s}"); + + // history: lists both versions, the latest marked current. + let out = bin() + .args(["history", "-m", manifest, "e1"]) + .output() + .unwrap(); + assert!(out.status.success(), "history failed: {out:?}"); + let s = String::from_utf8(out.stdout).unwrap(); + assert!(s.contains("[users] v1"), "{s}"); + assert!(s.contains("[users] v2"), "{s}"); + assert!(s.contains("current"), "latest version marked current: {s}"); +} + +#[test] +fn provenance_empty_entity_reports_no_entries_and_verifies_vacuously() { + let dir = tempfile::tempdir().unwrap(); + let sidecar = dir.path().join("s.json"); + // Materialise an empty store. + json::with_locked(&sidecar, JsonFormat::Plain, |_store| Ok(())).unwrap(); + + let manifest = write_manifest(dir.path(), &sidecar, "plain"); + let out = bin() + .args(["provenance", "-m", manifest.to_str().unwrap(), "ghost"]) + .output() + .unwrap(); + assert!(out.status.success()); + let s = String::from_utf8(out.stdout).unwrap(); + assert!(s.contains("(no entries)"), "{s}"); + assert!( + s.contains("Chain verified: yes"), + "empty chain verifies: {s}" + ); +}