diff --git a/src/abi/mod.rs b/src/abi/mod.rs index 6b94921..b7f2e84 100644 --- a/src/abi/mod.rs +++ b/src/abi/mod.rs @@ -460,8 +460,12 @@ impl AccessPolicy { /// with validation and path resolution. #[derive(Debug, Clone, Serialize, Deserialize)] pub struct SidecarConfig { - /// Storage backend: "sqlite" (default) or "postgres"/"postgresql". + /// Storage backend: "sqlite" (default), "postgres"/"postgresql", or + /// "json" (see `format`). pub storage: String, + /// On-disk encoding for the `json` store: "plain" (default), "ld" + /// (JSON-LD), or "ndjson". Ignored for sql backends. V-L2-F3 (#146). + pub format: String, /// File path for the sidecar database. pub path: String, } @@ -471,6 +475,7 @@ impl SidecarConfig { pub fn default_sqlite() -> Self { Self { storage: "sqlite".to_string(), + format: "plain".to_string(), path: ".verisim/sidecar.db".to_string(), } } diff --git a/src/codegen/overlay.rs b/src/codegen/overlay.rs index d7650d7..f7c7097 100644 --- a/src/codegen/overlay.rs +++ b/src/codegen/overlay.rs @@ -67,47 +67,24 @@ fn must_validate_identifier(name: &str) -> &str { // SQL dialect (V-L2-F1, #45) // --------------------------------------------------------------------------- -/// The SQL dialect the sidecar DDL is emitted for. Selected from the -/// manifest's `[sidecar].storage`. The table bodies are written in the -/// portable subset both engines accept (`CREATE TABLE IF NOT EXISTS`, -/// `CHECK`, partial unique indexes, `CURRENT_TIMESTAMP`); the only -/// genuinely dialect-divergent fragment is the metadata upsert -/// (`INSERT OR IGNORE` vs `INSERT … ON CONFLICT DO NOTHING`), which lives -/// in the [`sqlite`] / [`postgres`] modules. +/// The SQL dialect the sidecar DDL is emitted for. Selected (via +/// [`crate::sidecar::StorageKind`]) from the manifest's `[sidecar].storage`. +/// The table bodies are written in the portable subset both engines accept +/// (`CREATE TABLE IF NOT EXISTS`, `CHECK`, partial unique indexes, +/// `CURRENT_TIMESTAMP`); the only genuinely dialect-divergent fragment is +/// the metadata upsert (`INSERT OR IGNORE` vs `INSERT … ON CONFLICT DO +/// NOTHING`), which lives in the [`sqlite`] / [`postgres`] modules. /// -/// [`from_storage`](SqlDialect::from_storage) is the single source of -/// truth for which `[sidecar].storage` values are accepted; `generate`, -/// `validate`, and `doctor` all defer to it. +/// Storage-string resolution lives in [`crate::sidecar::StorageKind::resolve`] +/// (the single source of truth) — it maps `sqlite`/`postgres` to a dialect +/// via [`StorageKind::sql_dialect`](crate::sidecar::StorageKind::sql_dialect) +/// and `json` to the non-SQL [`crate::sidecar::json`] store. #[derive(Copy, Clone, Debug, PartialEq, Eq)] pub enum SqlDialect { Sqlite, Postgres, } -impl SqlDialect { - /// Map a `[sidecar].storage` value to a dialect (case-insensitive): - /// `sqlite` → [`SqlDialect::Sqlite`]; `postgres`/`postgresql` → - /// [`SqlDialect::Postgres`]. Every other value is rejected rather than - /// silently emitting SQLite DDL regardless of the backend (V-L2-F1). - /// - /// The octad data layer is intrinsically relational (hash-chains under - /// `BEGIN IMMEDIATE`, partial-unique temporal indexes, `CHECK` - /// constraints, recursive-CTE lineage acyclicity), so the - /// never-implemented `json` document store was dropped rather than - /// built (V-L2-F2, #112). It is now an unsupported value like any - /// other — no special-casing, no "coming soon" pointer. - pub fn from_storage(storage: &str) -> anyhow::Result { - match storage.to_lowercase().as_str() { - "sqlite" => Ok(SqlDialect::Sqlite), - "postgres" | "postgresql" => Ok(SqlDialect::Postgres), - other => anyhow::bail!( - "unsupported [sidecar].storage {other:?}; supported values are \ - \"sqlite\" (default) and \"postgres\"/\"postgresql\"." - ), - } - } -} - // --------------------------------------------------------------------------- // Overlay generation // --------------------------------------------------------------------------- @@ -870,33 +847,7 @@ mod tests { assert!(!p.contains("Seed metadata from parsed schema")); } - #[test] - fn test_storage_to_dialect_mapping() { - assert_eq!( - SqlDialect::from_storage("sqlite").unwrap(), - SqlDialect::Sqlite - ); - assert_eq!( - SqlDialect::from_storage("postgres").unwrap(), - SqlDialect::Postgres - ); - assert_eq!( - SqlDialect::from_storage("PostgreSQL").unwrap(), - SqlDialect::Postgres - ); - // V-L2-F2 (#112): the json store was dropped, never implemented. It - // is now rejected like any other unsupported value, and the error - // advertises only the supported stores — it must NOT imply json is - // planned (no "#112" / "not implemented" pointer). - let json_err = SqlDialect::from_storage("json").unwrap_err().to_string(); - assert!( - json_err.contains("unsupported") && json_err.contains("sqlite"), - "json must be rejected as an unsupported store, got: {json_err}" - ); - assert!( - !json_err.contains("#112") && !json_err.to_lowercase().contains("not implemented"), - "the dropped json store must not be advertised as planned, got: {json_err}" - ); - assert!(SqlDialect::from_storage("mariadb").is_err()); - } + // Storage-string resolution (incl. the json family) is owned by + // `crate::sidecar::StorageKind` and tested there; `SqlDialect` is now a + // plain dialect tag with no string parsing of its own. } diff --git a/src/gc.rs b/src/gc.rs index d623acf..a07bd2f 100644 --- a/src/gc.rs +++ b/src/gc.rs @@ -39,19 +39,52 @@ impl GcReport { } /// Purge sidecar rows older than the retention bound. `dry_run = true` -/// reports what would be deleted without changing the DB. +/// reports what would be deleted without changing the store. /// -/// Returns `Err` if the sidecar storage is not SQLite (unsupported in -/// this cut) or if the file is unreachable. +/// Dispatches on the resolved [`StorageKind`](crate::sidecar::StorageKind): +/// the `sqlite` and `json` (plain/ld/ndjson) backends are implemented; +/// `postgres` gc is not yet. Returns `Err` for an unsupported storage or an +/// unreachable store. pub fn run_gc(manifest: &Manifest, dry_run: bool) -> Result { - if manifest.sidecar.storage != "sqlite" { - bail!( - "verisimiser gc currently only supports the SQLite sidecar backend; \ - [sidecar].storage is {:?}", - manifest.sidecar.storage - ); + use crate::sidecar::StorageKind; + match StorageKind::resolve(&manifest.sidecar.storage, &manifest.sidecar.format)? { + StorageKind::Sqlite => run_gc_sqlite(manifest, dry_run), + StorageKind::Json(format) => run_gc_json(manifest, dry_run, format), + StorageKind::Postgres => bail!( + "verisimiser gc supports the sqlite and json sidecar backends; \ + gc for [sidecar].storage = \"postgres\" is not yet implemented" + ), } +} +/// JSON-family gc: load the store, purge in memory, persist iff applying. +/// The per-dimension semantics (incl. keeping the current temporal version) +/// live in [`crate::sidecar::json::JsonStore::gc_purge`]. +fn run_gc_json( + manifest: &Manifest, + dry_run: bool, + format: crate::sidecar::JsonFormat, +) -> Result { + let sidecar_path = &manifest.sidecar.path; + let mut store = crate::sidecar::json::JsonStore::open(sidecar_path, format) + .with_context(|| format!("opening json sidecar at {}", sidecar_path))?; + let counts = store.gc_purge(&manifest.retention, dry_run); + if !dry_run { + store + .save() + .with_context(|| format!("saving json sidecar at {}", sidecar_path))?; + } + Ok(GcReport { + sidecar: sidecar_path.clone(), + dry_run, + provenance_deleted: counts.provenance, + temporal_deleted: counts.temporal, + lineage_deleted: counts.lineage, + }) +} + +/// SQLite gc (the reference path). +fn run_gc_sqlite(manifest: &Manifest, dry_run: bool) -> Result { let sidecar_path = &manifest.sidecar.path; let conn = Connection::open(sidecar_path) .with_context(|| format!("opening sidecar at {}", sidecar_path))?; @@ -148,6 +181,7 @@ mod tests { .unwrap(); m.sidecar = SidecarConfig { storage: storage.to_string(), + format: "plain".to_string(), path: sidecar_path.to_string(), }; m.retention = retention; @@ -308,15 +342,67 @@ mod tests { } #[test] - fn gc_rejects_non_sqlite_backend() { - // `postgres` is a valid generate-time dialect, but gc is SQLite-only - // and must refuse rather than silently no-op. (The `json` value was - // dropped as a storage option entirely in V-L2-F2 / #112.) + fn gc_rejects_postgres_backend() { + // `postgres` is a valid generate-time dialect, but gc is not yet + // implemented for it and must refuse rather than silently no-op. + // (The json family *is* now supported — see the json gc test below.) let m = fixture("/dev/null", RetentionConfig::default(), "postgres"); let err = run_gc(&m, true).unwrap_err(); assert!( - err.to_string().contains("only supports the SQLite sidecar"), - "expected explicit unsupported-backend error; got: {err}" + err.to_string().contains("not yet implemented"), + "expected explicit postgres-unsupported error; got: {err}" + ); + } + + #[test] + fn gc_json_backend_purges_old_rows_and_persists() { + use crate::sidecar::JsonFormat; + use crate::sidecar::json::{JsonStore, ProvenanceRow, SidecarData, encode}; + + let dir = tempfile::tempdir().unwrap(); + let sidecar = dir.path().join("sidecar.json"); + let sidecar_str = sidecar.to_str().unwrap(); + + // Seed one aged + one fresh provenance row directly (deterministic + // timestamps; the append API always stamps "now"). + let aged = ProvenanceRow { + hash: "old".into(), + previous_hash: String::new(), + entity_id: "e".into(), + table_name: "t".into(), + operation: "insert".into(), + actor: "a".into(), + timestamp: "2020-01-01T00:00:00+00:00".into(), + before_snapshot: None, + transformation: None, + }; + let fresh = ProvenanceRow { + hash: "new".into(), + timestamp: "9999-01-01T00:00:00+00:00".into(), + ..aged.clone() + }; + let data = SidecarData { + provenance_log: vec![aged, fresh], + ..Default::default() + }; + std::fs::write(&sidecar, encode(&data, JsonFormat::Plain).unwrap()).unwrap(); + + let m = fixture( + sidecar_str, + RetentionConfig { + provenance_days: 30, + temporal_days: 30, + lineage_days: 30, + }, + "json", ); + let report = run_gc(&m, false).unwrap(); + assert_eq!(report.provenance_deleted, 1, "old provenance row purged"); + assert_eq!(report.total(), 1); + + // The purge was persisted: reopening shows only the fresh row. + let reopened = JsonStore::open(&sidecar, JsonFormat::Plain).unwrap(); + assert_eq!(reopened.data().provenance_log.len(), 1); + assert_eq!(reopened.data().provenance_log[0].hash, "new"); } } diff --git a/src/lib.rs b/src/lib.rs index e24fb37..8e1e713 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -13,6 +13,7 @@ pub mod doctor; pub mod gc; pub mod intercept; pub mod manifest; +pub mod sidecar; pub mod tier1; pub mod tier2; diff --git a/src/main.rs b/src/main.rs index 0e7de2e..e494c1b 100644 --- a/src/main.rs +++ b/src/main.rs @@ -18,7 +18,7 @@ use anyhow::Result; use clap::{Parser, Subcommand, ValueEnum}; use tracing_subscriber::EnvFilter; -use verisimiser::{abi, codegen, doctor, gc, manifest, tier1}; +use verisimiser::{abi, codegen, doctor, gc, manifest, sidecar, tier1}; /// Diagnostic-log rendering. Data output (reports, version, the octad /// table) is always written verbatim to stdout regardless of this; this @@ -209,19 +209,36 @@ fn main() -> Result<()> { // Create output directory. std::fs::create_dir_all(&output)?; - // The sidecar DDL dialect follows [sidecar].storage. Any value - // other than sqlite/postgres is rejected here (V-L2-F1) instead - // of silently emitting SQLite DDL for a non-SQLite store. - let dialect = codegen::overlay::SqlDialect::from_storage(&m.sidecar.storage)?; - - // Generate sidecar overlay schema. Errors here surface invalid - // table/column identifiers in the parsed schema before they - // reach disk. - let overlay_ddl = - codegen::overlay::generate_sidecar_schema(&schema, &m.octad, dialect)?; - let overlay_path = format!("{}/sidecar_schema.sql", output); - std::fs::write(&overlay_path, &overlay_ddl)?; - tracing::info!(path = %overlay_path, "generated sidecar schema"); + // The sidecar artifact follows [sidecar].storage (+ format for + // json). Unknown values are rejected here (V-L2-F1/F3) instead + // of silently emitting SQLite DDL for a non-SQLite store. SQL + // backends emit DDL; the json family emits a format-appropriate + // scaffold. + let storage = sidecar::StorageKind::resolve(&m.sidecar.storage, &m.sidecar.format)?; + match storage { + sidecar::StorageKind::Sqlite | sidecar::StorageKind::Postgres => { + let dialect = storage + .sql_dialect() + .expect("sqlite/postgres resolve to a SQL dialect"); + // Errors here surface invalid table/column identifiers + // in the parsed schema before they reach disk. + let overlay_ddl = + codegen::overlay::generate_sidecar_schema(&schema, &m.octad, dialect)?; + let overlay_path = format!("{}/sidecar_schema.sql", output); + std::fs::write(&overlay_path, &overlay_ddl)?; + tracing::info!(path = %overlay_path, "generated sidecar schema"); + } + sidecar::StorageKind::Json(format) => { + let scaffold = sidecar::json::scaffold(&m.octad, format)?; + let overlay_path = format!("{}/sidecar_schema.{}", output, format.extension()); + std::fs::write(&overlay_path, &scaffold)?; + tracing::info!( + path = %overlay_path, + format = format.as_str(), + "generated json sidecar scaffold" + ); + } + } // Generate query interceptors. let interceptors = codegen::query::generate_interceptors(&schema, &m.octad, backend); @@ -258,27 +275,45 @@ fn main() -> Result<()> { threshold, } => { let m = manifest::load_manifest(&manifest)?; - if m.sidecar.storage != "sqlite" { - anyhow::bail!( - "verisimiser drift currently only supports the SQLite \ - sidecar backend; [sidecar].storage is {:?}", - m.sidecar.storage - ); - } - let conn = rusqlite::Connection::open(&m.sidecar.path)?; - // Distinct entity_ids that have at least one row in temporal_versions. - let mut stmt = - conn.prepare("SELECT DISTINCT entity_id FROM verisimdb_temporal_versions")?; - let entities: Vec = stmt - .query_map([], |r| r.get::<_, String>(0))? - .collect::>()?; + // Both implemented backends produce the same shape: the count of + // entities scanned plus the per-entity temporal-drift reports. + let (scanned, reports) = + match sidecar::StorageKind::resolve(&m.sidecar.storage, &m.sidecar.format)? { + sidecar::StorageKind::Sqlite => { + let conn = rusqlite::Connection::open(&m.sidecar.path)?; + // Distinct entity_ids with at least one temporal row. + let mut stmt = conn.prepare( + "SELECT DISTINCT entity_id FROM verisimdb_temporal_versions", + )?; + let entities: Vec = stmt + .query_map([], |r| r.get::<_, String>(0))? + .collect::>()?; + let mut reports = Vec::new(); + for entity in &entities { + if let Some(r) = tier1::drift::detect_temporal_drift(&conn, entity)? { + reports.push(r); + } + } + (entities.len(), reports) + } + sidecar::StorageKind::Json(format) => { + let store = sidecar::json::JsonStore::open(&m.sidecar.path, format)?; + let entities = store.distinct_temporal_entities(); + let reports = entities + .iter() + .filter_map(|e| store.detect_temporal_drift(e)) + .collect::>(); + (entities.len(), reports) + } + sidecar::StorageKind::Postgres => anyhow::bail!( + "verisimiser drift supports the sqlite and json sidecar backends; \ + [sidecar].storage = \"postgres\" drift is not yet implemented" + ), + }; tracing::info!(threshold, "checking temporal drift"); let mut reported = 0usize; - for entity in &entities { - let Some(report) = tier1::drift::detect_temporal_drift(&conn, entity)? else { - continue; - }; + for report in &reports { if report.overall_score >= threshold { println!(" {} drift={:.3}", report.entity_id, report.overall_score); reported += 1; @@ -286,8 +321,8 @@ fn main() -> Result<()> { } println!( "Scanned {} entit{}; {} above threshold.", - entities.len(), - if entities.len() == 1 { "y" } else { "ies" }, + scanned, + if scanned == 1 { "y" } else { "ies" }, reported ); Ok(()) diff --git a/src/manifest/mod.rs b/src/manifest/mod.rs index 3fe0bb2..22576f4 100644 --- a/src/manifest/mod.rs +++ b/src/manifest/mod.rs @@ -281,12 +281,20 @@ mod octad_tests { pub struct SidecarConfig { /// Storage backend for the sidecar. `"sqlite"` (default) is the /// reference store; `"postgres"`/`"postgresql"` selects the PostgreSQL - /// DDL dialect. Any other value is rejected at `validate` and - /// `generate` time by `codegen::overlay::SqlDialect::from_storage`, + /// DDL dialect; `"json"` selects the JSON-family document store (see + /// [`format`](SidecarConfig::format)). Resolved — and any other value + /// rejected — at `validate`/`generate` time by + /// [`sidecar::StorageKind::resolve`](crate::sidecar::StorageKind::resolve), /// the single source of truth for supported stores. #[serde(default = "default_sidecar_storage")] pub storage: String, + /// On-disk encoding for the `json` store: `"plain"` (default), + /// `"ld"` (JSON-LD), or `"ndjson"`. Ignored for `sqlite`/`postgres`. + /// V-L2-F3 (#146). + #[serde(default = "default_sidecar_format")] + pub format: String, + /// File path for the sidecar database. #[serde(default = "default_sidecar_path")] pub path: String, @@ -330,6 +338,7 @@ impl Default for SidecarConfig { fn default() -> Self { Self { storage: default_sidecar_storage(), + format: default_sidecar_format(), path: default_sidecar_path(), } } @@ -394,14 +403,13 @@ mod validate_manifest_tests { assert_eq!(failed, vec!["schema-source-exists"]); } - /// An unsupported `[sidecar].storage` (here the dropped `json` store, - /// V-L2-F2 / #112) must fail `sidecar-storage-supported`, and the - /// failure detail must not advertise json as a planned option. + /// `storage = "json"` (with a valid format) now *passes* validation — + /// the JSON family is supported again (V-L2-F3 / #146). #[test] - fn unsupported_storage_fails() { + fn json_storage_with_valid_format_passes() { let dir = tempfile::tempdir().expect("tempdir"); let path = dir.path().join("verisimiser.toml"); - let sidecar_path = dir.path().join("sidecar.db"); + let sidecar_path = dir.path().join("sidecar.ndjson"); let body = format!( "[project]\n\ name = \"test\"\n\ @@ -409,28 +417,22 @@ mod validate_manifest_tests { backend = \"sqlite\"\n\ [sidecar]\n\ storage = \"json\"\n\ + format = \"ndjson\"\n\ path = \"{}\"\n", sidecar_path.display().to_string().replace('\\', "/") ); std::fs::write(&path, body).expect("write"); let report = validate_manifest(path.to_str().unwrap()); - assert!(!report.passed); - let storage = report - .checks - .iter() - .find(|c| c.name == "sidecar-storage-supported") - .expect("storage check must run"); - assert!(!storage.passed, "json storage must fail the check"); - let detail = storage.detail.as_deref().unwrap_or_default(); assert!( - detail.contains("unsupported") && !detail.contains("#112"), - "detail must reject json plainly without a 'coming soon' pointer; got: {detail}" + report.passed, + "json+ndjson must validate; checks: {:?}", + report.checks ); } - /// Complements `unsupported_storage_fails`: the PostgreSQL dialect is a - /// supported `[sidecar].storage` value (it selects the postgres DDL for + /// Complements the failure cases: the PostgreSQL dialect is a supported + /// `[sidecar].storage` value (it selects the postgres DDL for /// `generate`), so a postgres sidecar must *pass* the /// `sidecar-storage-supported` check and validate cleanly. #[test] @@ -465,6 +467,40 @@ mod validate_manifest_tests { ); } + /// A bad `[sidecar].format` for the json store, and an unknown storage + /// backend, must each fail `sidecar-storage-supported`. + #[test] + fn bad_format_and_unknown_storage_fail() { + let dir = tempfile::tempdir().expect("tempdir"); + let storage_check = |toml: &str| { + let path = dir.path().join("verisimiser.toml"); + std::fs::write(&path, toml).expect("write"); + let report = validate_manifest(path.to_str().unwrap()); + report + .checks + .into_iter() + .find(|c| c.name == "sidecar-storage-supported") + .expect("storage check must run") + }; + + let bad_format = storage_check( + "[database]\nbackend = \"sqlite\"\n\ + [sidecar]\nstorage = \"json\"\nformat = \"yaml\"\n", + ); + assert!(!bad_format.passed); + assert!( + bad_format + .detail + .as_deref() + .unwrap_or_default() + .contains("format") + ); + + let unknown = + storage_check("[database]\nbackend = \"sqlite\"\n[sidecar]\nstorage = \"mariadb\"\n"); + assert!(!unknown.passed); + } + /// A malformed manifest must fail `manifest-loads` and stop further /// checks (because the rest depend on having a parsed manifest). #[test] @@ -604,6 +640,9 @@ fn default_connection_env() -> String { fn default_sidecar_storage() -> String { "sqlite".to_string() } +fn default_sidecar_format() -> String { + "plain".to_string() +} fn default_sidecar_path() -> String { ".verisim/sidecar.db".to_string() } @@ -692,8 +731,10 @@ enable-constraints = {enable_constraints} enable-simulation = {enable_simulation} [sidecar] -# storage backend: "sqlite" (default) or "postgres"/"postgresql" +# storage backend: "sqlite" (default), "postgres"/"postgresql", or "json" storage = "{sidecar_storage}" +# json on-disk encoding (ignored for sql backends): "plain" | "ld" | "ndjson" +format = "{sidecar_format}" path = "{sidecar_path}" [retention] @@ -711,6 +752,7 @@ lineage-days = {lineage_days} enable_constraints = octad.enable_constraints, enable_simulation = octad.enable_simulation, sidecar_storage = sidecar.storage, + sidecar_format = sidecar.format, sidecar_path = sidecar.path, provenance_days = retention.provenance_days, temporal_days = retention.temporal_days, @@ -806,9 +848,10 @@ impl ValidationReport { /// set, the file at that path is readable. /// 3. **`sidecar-path-writable`** — the parent directory of /// `[sidecar].path` is writable (or createable). -/// 4. **`sidecar-storage-supported`** — `[sidecar].storage` names a -/// backend `codegen` can emit (`sqlite`/`postgres`). Catches typos and -/// the dropped `json` store (V-L2-F2 / #112). +/// 4. **`sidecar-storage-supported`** — `[sidecar].storage` (+ `format` +/// for the json store) names a backend the tool supports +/// (`sqlite`/`postgres`/`json` with `format` ∈ plain|ld|ndjson). +/// Catches typos before codegen. (V-L2-F3 / #146.) /// /// Out of scope here: V-L2-E1 backend/target_db conflict (own issue), /// target-DB reachability (needs live connection). @@ -896,19 +939,19 @@ pub fn validate_manifest(path: &str) -> ValidationReport { }); } - // 4. Sidecar storage backend is supported. Delegates to the one - // validator (`SqlDialect::from_storage`) so `validate`/`doctor` - // and `generate` agree on the accepted set. This is where a typo'd - // or dropped backend (e.g. the never-implemented `json` store, - // V-L2-F2 / #112) is surfaced before it reaches codegen. + // 4. Sidecar storage backend (+ json format) is supported. + // Delegates to the one resolver (`sidecar::StorageKind::resolve`) so + // `validate`/`doctor` and `generate` agree on the accepted set. This + // is where a typo'd backend, or a bad `[sidecar].format` for the + // json store (V-L2-F3 / #146), is surfaced before it reaches codegen. let storage_check = ValidationCheck { name: "sidecar-storage-supported".to_string(), - description: "[sidecar].storage names a supported backend".to_string(), + description: "[sidecar].storage (+ format) names a supported backend".to_string(), passed: true, detail: None, }; checks.push( - match crate::codegen::overlay::SqlDialect::from_storage(&m.sidecar.storage) { + match crate::sidecar::StorageKind::resolve(&m.sidecar.storage, &m.sidecar.format) { Ok(_) => storage_check, Err(e) => ValidationCheck { passed: false, diff --git a/src/sidecar/json.rs b/src/sidecar/json.rs new file mode 100644 index 0000000..41c4350 --- /dev/null +++ b/src/sidecar/json.rs @@ -0,0 +1,1291 @@ +// SPDX-License-Identifier: PMPL-1.0-or-later +// Copyright (c) 2026 Jonathan D.A. Jewell (hyperpolymath) +// +// JSON-family sidecar store (V-L2-F3, #146). +// +// An append-only document store that mirrors the `verisimdb_*` overlay +// tables, with the same runtime octad operations the SQLite path +// implements today: provenance hash-chains (incl. first-class forks), +// temporal versioning (monotonic, exactly-one-current), temporal drift, +// and age-based gc. +// +// One internal [`SidecarData`] model holds every collection; the on-disk +// [`JsonFormat`] (plain JSON / JSON-LD / NDJSON) is *purely a codec* over +// it, so the operations are written once and are format-independent. +// +// Concurrency model: load → mutate → atomic rewrite (temp file + rename). +// History is append-only at the *logical* level (rows are never mutated +// except `gc`); the physical file is rewritten atomically. Unlike the +// SQLite path — which serialises concurrent writers through the database +// write lock — this store assumes a single writer at a time. Cross-process +// write serialisation is a hardening follow-up. + +use std::collections::{HashMap, HashSet}; +use std::path::{Path, PathBuf}; + +use anyhow::{Context, Result}; +use chrono::{DateTime, Duration, Utc}; +use serde::{Deserialize, Serialize}; +use serde_json::{Map, Value}; + +use super::JsonFormat; +use crate::abi::ProvenanceEntry; +use crate::manifest::{OctadConfig, RetentionConfig}; +use crate::tier1::drift::{DriftCategory, DriftReport, temporal_drift_score}; +use crate::tier1::provenance::ForkPoint; + +/// JSON-LD vocabulary IRI; bare `@type`/field terms expand against it. +const LD_VOCAB: &str = "https://verisimdb.org/ns#"; +/// Reserved pseudo-table for scaffold metadata; ignored on read. +const META_TABLE: &str = "_meta"; + +// --------------------------------------------------------------------------- +// Row types — one per verisimdb_* table the runtime path touches +// --------------------------------------------------------------------------- + +/// A row of `verisimdb_provenance_log`. +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +pub struct ProvenanceRow { + pub hash: String, + pub previous_hash: String, + pub entity_id: String, + pub table_name: String, + pub operation: String, + pub actor: String, + /// ISO 8601 / RFC 3339. + pub timestamp: String, + #[serde(default, skip_serializing_if = "Option::is_none")] + pub before_snapshot: Option, + #[serde(default, skip_serializing_if = "Option::is_none")] + pub transformation: Option, +} + +/// A branch tip in `verisimdb_provenance_chain_heads`. +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +pub struct ChainHead { + pub entity_id: String, + pub head_hash: String, +} + +/// A row of `verisimdb_temporal_versions`. +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +pub struct TemporalRow { + pub entity_id: String, + pub table_name: String, + pub version: u64, + pub valid_from: String, + #[serde(default, skip_serializing_if = "Option::is_none")] + pub valid_to: Option, + pub snapshot: String, + pub operation: String, +} + +/// A row of `verisimdb_lineage_graph`. +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +pub struct LineageRow { + pub edge_id: String, + pub source_entity: String, + pub source_table: String, + pub target_entity: String, + pub target_table: String, + pub derivation_type: String, + #[serde(default, skip_serializing_if = "Option::is_none")] + pub description: Option, + pub created_at: String, +} + +/// A row of `verisimdb_access_policies`. +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +pub struct AccessPolicyRow { + pub policy_id: String, + pub target_table: String, + #[serde(default, skip_serializing_if = "Option::is_none")] + pub target_column: Option, + pub principal: String, + pub access_level: String, + #[serde(default, skip_serializing_if = "Option::is_none")] + pub condition: Option, + pub created_at: String, + pub active: bool, +} + +/// The full in-memory sidecar model. The plain-JSON encoding is exactly +/// this struct (field renames are the table names); the other formats are +/// alternate codecs over the same data. +#[derive(Debug, Clone, Default, PartialEq, Eq, Serialize, Deserialize)] +pub struct SidecarData { + #[serde(default, rename = "verisimdb_provenance_log")] + pub provenance_log: Vec, + #[serde(default, rename = "verisimdb_provenance_chain_heads")] + pub provenance_chain_heads: Vec, + #[serde(default, rename = "verisimdb_temporal_versions")] + pub temporal_versions: Vec, + #[serde(default, rename = "verisimdb_lineage_graph")] + pub lineage_graph: Vec, + #[serde(default, rename = "verisimdb_access_policies")] + pub access_policies: Vec, +} + +/// Rows purged per dimension by [`JsonStore::gc_purge`]. +#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)] +pub struct GcCounts { + pub provenance: usize, + pub temporal: usize, + pub lineage: usize, +} + +// --------------------------------------------------------------------------- +// Codec: SidecarData <-> {plain, ld, ndjson} +// --------------------------------------------------------------------------- + +/// Map a `verisimdb_*` table name to its JSON-LD `@type` term. +fn ld_type_for(table: &str) -> &'static str { + match table { + "verisimdb_provenance_log" => "ProvenanceEntry", + "verisimdb_provenance_chain_heads" => "ProvenanceChainHead", + "verisimdb_temporal_versions" => "TemporalVersion", + "verisimdb_lineage_graph" => "LineageEdge", + "verisimdb_access_policies" => "AccessPolicy", + _ => "Thing", + } +} + +/// Inverse of [`ld_type_for`], tolerant of a `verisimdb:`/vocab IRI prefix. +/// Returns `None` for the reserved `Meta` type (skipped on read). +fn table_for_ld_type(ld_type: &str) -> Result> { + let term = ld_type.rsplit(['#', ':']).next().unwrap_or(ld_type); + match term { + "ProvenanceEntry" => Ok(Some("verisimdb_provenance_log")), + "ProvenanceChainHead" => Ok(Some("verisimdb_provenance_chain_heads")), + "TemporalVersion" => Ok(Some("verisimdb_temporal_versions")), + "LineageEdge" => Ok(Some("verisimdb_lineage_graph")), + "AccessPolicy" => Ok(Some("verisimdb_access_policies")), + "Meta" => Ok(None), + other => anyhow::bail!("unknown JSON-LD @type {other:?} in sidecar @graph"), + } +} + +/// Encode `data` to a string in the requested `format`. +pub fn encode(data: &SidecarData, format: JsonFormat) -> Result { + match format { + JsonFormat::Plain => Ok(serde_json::to_string_pretty(data)?), + JsonFormat::Ndjson => encode_ndjson(data), + JsonFormat::Ld => encode_ld(data), + } +} + +/// Decode a string in `format` back to [`SidecarData`]. An empty input is +/// an empty store. The reserved `_meta` record/key/`@type` is ignored. +pub fn decode(text: &str, format: JsonFormat) -> Result { + match format { + JsonFormat::Plain => { + if text.trim().is_empty() { + Ok(SidecarData::default()) + } else { + serde_json::from_str(text).context("parsing plain-JSON sidecar") + } + } + JsonFormat::Ndjson => decode_ndjson(text), + JsonFormat::Ld => decode_ld(text), + } +} + +/// Apply `f` to every (table, serialised-row) pair in deterministic order. +/// Centralises the per-collection walk shared by the ndjson/ld encoders. +fn for_each_row(data: &SidecarData, mut f: impl FnMut(&str, Value) -> Result<()>) -> Result<()> { + for r in &data.provenance_log { + f("verisimdb_provenance_log", serde_json::to_value(r)?)?; + } + for r in &data.provenance_chain_heads { + f("verisimdb_provenance_chain_heads", serde_json::to_value(r)?)?; + } + for r in &data.temporal_versions { + f("verisimdb_temporal_versions", serde_json::to_value(r)?)?; + } + for r in &data.lineage_graph { + f("verisimdb_lineage_graph", serde_json::to_value(r)?)?; + } + for r in &data.access_policies { + f("verisimdb_access_policies", serde_json::to_value(r)?)?; + } + Ok(()) +} + +/// Extract the object map from a serialised row, or fail loudly (every row +/// type serialises to a JSON object). +fn into_object(value: Value) -> Result> { + match value { + Value::Object(map) => Ok(map), + _ => anyhow::bail!("internal: sidecar row did not serialise to a JSON object"), + } +} + +fn encode_ndjson(data: &SidecarData) -> Result { + let mut out = String::new(); + for_each_row(data, |table, value| { + let mut map = into_object(value)?; + map.insert("_table".to_string(), Value::String(table.to_string())); + out.push_str(&serde_json::to_string(&Value::Object(map))?); + out.push('\n'); + Ok(()) + })?; + Ok(out) +} + +fn decode_ndjson(text: &str) -> Result { + let mut data = SidecarData::default(); + for (i, line) in text.lines().enumerate() { + if line.trim().is_empty() { + continue; + } + let mut value: Value = + serde_json::from_str(line).with_context(|| format!("ndjson line {}", i + 1))?; + let obj = value + .as_object_mut() + .ok_or_else(|| anyhow::anyhow!("ndjson line {} is not a JSON object", i + 1))?; + let table = obj + .remove("_table") + .and_then(|t| t.as_str().map(String::from)) + .ok_or_else(|| anyhow::anyhow!("ndjson line {} missing \"_table\"", i + 1))?; + if table == META_TABLE { + continue; + } + push_row(&mut data, &table, value).with_context(|| format!("ndjson line {}", i + 1))?; + } + Ok(data) +} + +fn encode_ld(data: &SidecarData) -> Result { + let mut graph: Vec = Vec::new(); + for_each_row(data, |table, value| { + let mut map = into_object(value)?; + map.insert( + "@type".to_string(), + Value::String(ld_type_for(table).to_string()), + ); + map.insert("@id".to_string(), Value::String(ld_id(table, &map))); + graph.push(Value::Object(map)); + Ok(()) + })?; + + let doc = serde_json::json!({ + "@context": { "@vocab": LD_VOCAB, "verisimdb": LD_VOCAB }, + "@graph": graph, + }); + Ok(serde_json::to_string_pretty(&doc)?) +} + +fn decode_ld(text: &str) -> Result { + if text.trim().is_empty() { + return Ok(SidecarData::default()); + } + let doc: Value = serde_json::from_str(text).context("parsing JSON-LD sidecar")?; + let graph = doc + .get("@graph") + .and_then(|g| g.as_array()) + .ok_or_else(|| anyhow::anyhow!("JSON-LD sidecar has no \"@graph\" array"))?; + + let mut data = SidecarData::default(); + for node in graph { + let mut map = node + .as_object() + .cloned() + .ok_or_else(|| anyhow::anyhow!("JSON-LD @graph node is not an object"))?; + let ld_type = map + .get("@type") + .and_then(|t| t.as_str()) + .ok_or_else(|| anyhow::anyhow!("JSON-LD @graph node missing \"@type\""))? + .to_string(); + let Some(table) = table_for_ld_type(&ld_type)? else { + continue; // Meta node + }; + map.remove("@type"); + map.remove("@id"); + push_row(&mut data, table, Value::Object(map))?; + } + Ok(data) +} + +/// Compute a stable `@id` IRI for a row, from its already-serialised map. +fn ld_id(table: &str, map: &Map) -> String { + let get = |k: &str| map.get(k).and_then(|v| v.as_str()).unwrap_or(""); + match table { + "verisimdb_provenance_log" => format!("urn:verisimdb:provenance:{}", get("hash")), + "verisimdb_provenance_chain_heads" => format!( + "urn:verisimdb:chain-head:{}:{}", + get("entity_id"), + get("head_hash") + ), + "verisimdb_temporal_versions" => format!( + "urn:verisimdb:temporal:{}:{}:{}", + get("entity_id"), + get("table_name"), + map.get("version") + .map(|v| v.to_string()) + .unwrap_or_default() + ), + "verisimdb_lineage_graph" => format!("urn:verisimdb:lineage:{}", get("edge_id")), + "verisimdb_access_policies" => format!("urn:verisimdb:access:{}", get("policy_id")), + _ => format!("urn:verisimdb:row:{table}"), + } +} + +/// Deserialise `value` into the row type named by `table` and append it. +fn push_row(data: &mut SidecarData, table: &str, value: Value) -> Result<()> { + match table { + "verisimdb_provenance_log" => data.provenance_log.push(serde_json::from_value(value)?), + "verisimdb_provenance_chain_heads" => data + .provenance_chain_heads + .push(serde_json::from_value(value)?), + "verisimdb_temporal_versions" => { + data.temporal_versions.push(serde_json::from_value(value)?) + } + "verisimdb_lineage_graph" => data.lineage_graph.push(serde_json::from_value(value)?), + "verisimdb_access_policies" => data.access_policies.push(serde_json::from_value(value)?), + other => anyhow::bail!("unknown sidecar table {other:?}"), + } + Ok(()) +} + +// --------------------------------------------------------------------------- +// Store: load / save + octad operations +// --------------------------------------------------------------------------- + +/// A JSON-family sidecar store bound to a file path and on-disk format. +pub struct JsonStore { + path: PathBuf, + format: JsonFormat, + data: SidecarData, +} + +impl JsonStore { + /// Open the store at `path`, or start an empty one if it doesn't exist. + pub fn open(path: impl AsRef, format: JsonFormat) -> Result { + let path = path.as_ref().to_path_buf(); + let data = if path.exists() { + let text = std::fs::read_to_string(&path) + .with_context(|| format!("reading sidecar {}", path.display()))?; + decode(&text, format).with_context(|| { + format!("decoding {} sidecar {}", format.as_str(), path.display()) + })? + } else { + SidecarData::default() + }; + Ok(Self { path, format, data }) + } + + /// Borrow the underlying data (read-only). + pub fn data(&self) -> &SidecarData { + &self.data + } + + /// Persist the store atomically: write a sibling temp file, then rename + /// over the target so a crash mid-write can't truncate the sidecar. + pub fn save(&self) -> Result<()> { + if let Some(parent) = self.path.parent() { + if !parent.as_os_str().is_empty() { + std::fs::create_dir_all(parent) + .with_context(|| format!("creating sidecar dir {}", parent.display()))?; + } + } + let text = encode(&self.data, self.format)?; + let tmp = self + .path + .with_extension(format!("{}.tmp", self.format.extension())); + std::fs::write(&tmp, text.as_bytes()) + .with_context(|| format!("writing sidecar temp {}", tmp.display()))?; + std::fs::rename(&tmp, &self.path) + .with_context(|| format!("renaming {} -> {}", tmp.display(), self.path.display()))?; + Ok(()) + } + + // --- Provenance (mirrors tier1::provenance) --------------------------- + + /// Current branch-tip hashes for `entity_id`. + fn head_set(&self, entity_id: &str) -> Vec { + self.data + .provenance_chain_heads + .iter() + .filter(|h| h.entity_id == entity_id) + .map(|h| h.head_hash.clone()) + .collect() + } + + fn add_head(&mut self, entity_id: &str, hash: &str) { + if !self + .data + .provenance_chain_heads + .iter() + .any(|h| h.entity_id == entity_id && h.head_hash == hash) + { + self.data.provenance_chain_heads.push(ChainHead { + entity_id: entity_id.to_string(), + head_hash: hash.to_string(), + }); + } + } + + fn remove_head(&mut self, entity_id: &str, hash: &str) { + self.data + .provenance_chain_heads + .retain(|h| !(h.entity_id == entity_id && h.head_hash == hash)); + } + + /// Append a provenance entry on the entity's single current tip + /// (genesis if none). A forked entity (≥2 heads) is ambiguous — use + /// [`JsonStore::append_provenance_fork`]. Returns the new entry hash. + #[allow(clippy::too_many_arguments)] + pub fn append_provenance( + &mut self, + entity_id: &str, + table_name: &str, + operation: &str, + actor: &str, + before_snapshot: Option<&str>, + transformation: Option<&str>, + ) -> Result { + let heads = self.head_set(entity_id); + let prev_hash = match heads.len() { + 0 => String::new(), + 1 => heads[0].clone(), + n => anyhow::bail!( + "entity {entity_id:?} has {n} chain heads (forked); linear append \ + is ambiguous — use append_provenance_fork(from_hash) (ADR-0010)" + ), + }; + let hash = self.insert_provenance( + &prev_hash, + entity_id, + table_name, + operation, + actor, + before_snapshot, + transformation, + )?; + if !prev_hash.is_empty() { + self.remove_head(entity_id, &prev_hash); + } + self.add_head(entity_id, &hash); + Ok(hash) + } + + /// Deliberately fork: extend `entity_id` from a *specific ancestor* + /// `from_hash` rather than the current tip (ADR-0010 §2). Adds a head + /// without removing one, so the entity gains a branch. + #[allow(clippy::too_many_arguments)] + pub fn append_provenance_fork( + &mut self, + entity_id: &str, + table_name: &str, + operation: &str, + actor: &str, + before_snapshot: Option<&str>, + transformation: Option<&str>, + from_hash: &str, + ) -> Result { + let ancestor_exists = self + .data + .provenance_log + .iter() + .any(|r| r.entity_id == entity_id && r.hash == from_hash); + if !ancestor_exists { + anyhow::bail!( + "from_hash {from_hash:?} is not an entry in entity {entity_id:?}'s chain; \ + cannot fork from a non-existent ancestor" + ); + } + let hash = self.insert_provenance( + from_hash, + entity_id, + table_name, + operation, + actor, + before_snapshot, + transformation, + )?; + self.add_head(entity_id, &hash); + Ok(hash) + } + + /// Compute the hash, reject an exact-duplicate (the `hash` primary-key + /// guard in the SQLite path), and push the log row. + #[allow(clippy::too_many_arguments)] + fn insert_provenance( + &mut self, + prev_hash: &str, + entity_id: &str, + table_name: &str, + operation: &str, + actor: &str, + before_snapshot: Option<&str>, + transformation: Option<&str>, + ) -> Result { + let timestamp = Utc::now(); + let hash = ProvenanceEntry::compute_hash( + prev_hash, + entity_id, + operation, + actor, + ×tamp, + before_snapshot, + transformation, + ); + if self.data.provenance_log.iter().any(|r| r.hash == hash) { + anyhow::bail!( + "duplicate provenance entry: an entry with hash {hash} already exists \ + (identical preimage)" + ); + } + self.data.provenance_log.push(ProvenanceRow { + hash: hash.clone(), + previous_hash: prev_hash.to_string(), + entity_id: entity_id.to_string(), + table_name: table_name.to_string(), + operation: operation.to_string(), + actor: actor.to_string(), + timestamp: timestamp.to_rfc3339(), + before_snapshot: before_snapshot.map(str::to_string), + transformation: transformation.map(str::to_string), + }); + Ok(hash) + } + + /// Verify every branch of `entity_id`'s chain is hash-consistent + /// (ADR-0010 §3). A forked entity is not a tampered one: each branch + /// tip is walked back to a genesis and every node must recompute to its + /// stored hash and chain to a present predecessor. + pub fn verify_chain(&self, entity_id: &str) -> bool { + let nodes: HashMap<&str, &ProvenanceRow> = self + .data + .provenance_log + .iter() + .filter(|r| r.entity_id == entity_id) + .map(|r| (r.hash.as_str(), r)) + .collect(); + if nodes.is_empty() { + return true; // vacuous + } + + let mut has_child: HashSet<&str> = HashSet::new(); + for r in nodes.values() { + if !r.previous_hash.is_empty() { + has_child.insert(r.previous_hash.as_str()); + } + } + + let mut tips: HashSet = self.head_set(entity_id).into_iter().collect(); + for hash in nodes.keys() { + if !has_child.contains(hash) { + tips.insert((*hash).to_string()); + } + } + + for tip in tips { + let mut cursor = tip; + loop { + let Some(node) = nodes.get(cursor.as_str()) else { + return false; // dangling tip or broken link + }; + let Ok(ts) = DateTime::parse_from_rfc3339(&node.timestamp) else { + return false; + }; + let recomputed = ProvenanceEntry::compute_hash( + &node.previous_hash, + entity_id, + &node.operation, + &node.actor, + &ts.with_timezone(&Utc), + node.before_snapshot.as_deref(), + node.transformation.as_deref(), + ); + if recomputed != cursor { + return false; + } + if node.previous_hash.is_empty() { + break; + } + cursor = node.previous_hash.clone(); + } + } + true + } + + /// Every fork point in `entity_id`'s history (predecessors with >1 + /// child). Empty ⇒ the chain is linear. + pub fn fork_points(&self, entity_id: &str) -> Vec { + let mut counts: HashMap<&str, u64> = HashMap::new(); + for r in self + .data + .provenance_log + .iter() + .filter(|r| r.entity_id == entity_id) + { + *counts.entry(r.previous_hash.as_str()).or_insert(0) += 1; + } + let mut points: Vec = counts + .into_iter() + .filter(|&(_, c)| c > 1) + .map(|(predecessor, children)| ForkPoint { + predecessor: predecessor.to_string(), + children, + }) + .collect(); + points.sort_by(|a, b| a.predecessor.cmp(&b.predecessor)); + points + } + + // --- Temporal (mirrors tier1::temporal) ------------------------------- + + /// Append a new version of `(entity_id, table_name)`. Closes out the + /// previous current row (sets its `valid_to`) before inserting the new + /// one, preserving "exactly one current version" by construction. + /// Returns the assigned (monotonic) version number. + pub fn append_temporal_version( + &mut self, + entity_id: &str, + table_name: &str, + snapshot: &str, + operation: &str, + ) -> u64 { + let prev_version = self + .data + .temporal_versions + .iter() + .filter(|r| r.entity_id == entity_id && r.table_name == table_name) + .map(|r| r.version) + .max() + .unwrap_or(0); + let next_version = prev_version + 1; + let now = Utc::now().to_rfc3339(); + + for row in self.data.temporal_versions.iter_mut().filter(|r| { + r.entity_id == entity_id && r.table_name == table_name && r.valid_to.is_none() + }) { + row.valid_to = Some(now.clone()); + } + + self.data.temporal_versions.push(TemporalRow { + entity_id: entity_id.to_string(), + table_name: table_name.to_string(), + version: next_version, + valid_from: now, + valid_to: None, + snapshot: snapshot.to_string(), + operation: operation.to_string(), + }); + next_version + } + + /// Current snapshot of `(entity_id, table_name)`, if any. + pub fn read_current(&self, entity_id: &str, table_name: &str) -> Option { + self.data + .temporal_versions + .iter() + .find(|r| { + r.entity_id == entity_id && r.table_name == table_name && r.valid_to.is_none() + }) + .map(|r| r.snapshot.clone()) + } + + /// Snapshot of `(entity_id, table_name)` as it existed at time `t`: + /// `valid_from <= t` and (`valid_to` is NULL or `> t`), highest version + /// wins. `None` if the entity didn't exist then. + pub fn read_at(&self, entity_id: &str, table_name: &str, t: &DateTime) -> Option { + self.data + .temporal_versions + .iter() + .filter(|r| r.entity_id == entity_id && r.table_name == table_name) + .filter(|r| { + let from_ok = parse_ts(&r.valid_from).map(|f| f <= *t).unwrap_or(false); + let to_ok = match &r.valid_to { + None => true, + Some(s) => parse_ts(s).map(|to| to > *t).unwrap_or(false), + }; + from_ok && to_ok + }) + .max_by_key(|r| r.version) + .map(|r| r.snapshot.clone()) + } + + /// Roll `(entity_id, table_name)` back to `target_version` by appending + /// that snapshot as a new `rollback` version (audit-preserving). Errors + /// if the target version doesn't exist. + pub fn rollback_to( + &mut self, + entity_id: &str, + table_name: &str, + target_version: u64, + ) -> Result { + let snapshot = self + .data + .temporal_versions + .iter() + .find(|r| { + r.entity_id == entity_id + && r.table_name == table_name + && r.version == target_version + }) + .map(|r| r.snapshot.clone()) + .ok_or_else(|| { + anyhow::anyhow!("no version {target_version} for ({entity_id:?}, {table_name:?})") + })?; + Ok(self.append_temporal_version(entity_id, table_name, &snapshot, "rollback")) + } + + // --- Drift (reuses the storage-agnostic kernel) ----------------------- + + /// Entities that have at least one temporal version, de-duplicated and + /// sorted (drive for `verisimiser drift`). + pub fn distinct_temporal_entities(&self) -> Vec { + let mut seen: Vec = self + .data + .temporal_versions + .iter() + .map(|r| r.entity_id.clone()) + .collect(); + seen.sort(); + seen.dedup(); + seen + } + + /// Temporal drift for one entity (ADR-0003 §3.1): max pairwise drift of + /// the latest version per `table_name`. `None` if the entity is + /// recorded under fewer than two modalities. + pub fn detect_temporal_drift(&self, entity_id: &str) -> Option { + let mut latest: HashMap<&str, i64> = HashMap::new(); + for r in self + .data + .temporal_versions + .iter() + .filter(|r| r.entity_id == entity_id) + { + let e = latest.entry(r.table_name.as_str()).or_insert(0); + *e = (*e).max(r.version as i64); + } + if latest.len() < 2 { + return None; + } + let versions: Vec = latest.into_values().collect(); + let score = temporal_drift_score(&versions); + Some(DriftReport { + entity_id: entity_id.to_string(), + overall_score: score, + categories: vec![(DriftCategory::Temporal, score)], + measured_at: Utc::now(), + }) + } + + // --- GC (mirrors gc::run_gc semantics) -------------------------------- + + /// Purge rows older than the retention bounds. A field of `0` days + /// means "keep forever". Only *superseded* temporal versions + /// (`valid_to` set) are eligible — the current version is always kept. + /// `dry_run` counts without mutating; otherwise rows are removed in + /// place (the caller persists via [`JsonStore::save`]). + pub fn gc_purge(&mut self, retention: &RetentionConfig, dry_run: bool) -> GcCounts { + let now = Utc::now(); + let mut counts = GcCounts::default(); + + if retention.provenance_days > 0 { + let cutoff = now - Duration::days(retention.provenance_days as i64); + counts.provenance = purge_vec(&mut self.data.provenance_log, dry_run, |r| { + older_than(&r.timestamp, &cutoff) + }); + } + if retention.temporal_days > 0 { + let cutoff = now - Duration::days(retention.temporal_days as i64); + counts.temporal = purge_vec(&mut self.data.temporal_versions, dry_run, |r| { + r.valid_to.is_some() && older_than(&r.valid_from, &cutoff) + }); + } + if retention.lineage_days > 0 { + let cutoff = now - Duration::days(retention.lineage_days as i64); + counts.lineage = purge_vec(&mut self.data.lineage_graph, dry_run, |r| { + older_than(&r.created_at, &cutoff) + }); + } + counts + } +} + +/// Build the `generate` scaffold for the enabled octad dimensions in the +/// given format. Emits an empty store annotated with a `_meta` record so a +/// freshly-generated file is self-describing; the runtime ignores `_meta`. +pub fn scaffold(octad: &OctadConfig, format: JsonFormat) -> Result { + let mut dims: Vec<&str> = vec!["data", "metadata"]; + if octad.enable_provenance { + dims.push("provenance"); + } + if octad.enable_lineage { + dims.push("lineage"); + } + if octad.enable_temporal { + dims.push("temporal"); + } + if octad.enable_access_control { + dims.push("access_control"); + } + if octad.enable_constraints { + dims.push("constraints"); + } + if octad.enable_simulation { + dims.push("simulation"); + } + + let meta = serde_json::json!({ + "generator": "verisimiser generate (V-L2-F3)", + "storage": "json", + "format": format.as_str(), + "dimensions": dims, + "note": "append-only sidecar; mirrors the verisimdb_* overlay tables", + }); + + let enabled_tables = |octad: &OctadConfig| -> Vec<&'static str> { + let mut t = Vec::new(); + if octad.enable_provenance { + t.push("verisimdb_provenance_log"); + t.push("verisimdb_provenance_chain_heads"); + } + if octad.enable_temporal { + t.push("verisimdb_temporal_versions"); + } + if octad.enable_lineage { + t.push("verisimdb_lineage_graph"); + } + if octad.enable_access_control { + t.push("verisimdb_access_policies"); + } + t + }; + + match format { + JsonFormat::Plain => { + let mut obj = Map::new(); + obj.insert(META_TABLE.to_string(), meta); + for t in enabled_tables(octad) { + obj.insert(t.to_string(), Value::Array(Vec::new())); + } + Ok(serde_json::to_string_pretty(&Value::Object(obj))?) + } + JsonFormat::Ndjson => { + let mut meta_obj = meta.as_object().cloned().unwrap_or_default(); + meta_obj.insert("_table".to_string(), Value::String(META_TABLE.to_string())); + Ok(format!( + "{}\n", + serde_json::to_string(&Value::Object(meta_obj))? + )) + } + JsonFormat::Ld => { + let mut meta_node = meta.as_object().cloned().unwrap_or_default(); + meta_node.insert("@type".to_string(), Value::String("Meta".to_string())); + meta_node.insert( + "@id".to_string(), + Value::String("urn:verisimdb:meta".to_string()), + ); + let doc = serde_json::json!({ + "@context": { "@vocab": LD_VOCAB, "verisimdb": LD_VOCAB }, + "@graph": [Value::Object(meta_node)], + }); + Ok(serde_json::to_string_pretty(&doc)?) + } + } +} + +/// Parse an RFC 3339 timestamp to UTC, discarding the offset. +fn parse_ts(s: &str) -> Option> { + DateTime::parse_from_rfc3339(s) + .ok() + .map(|dt| dt.with_timezone(&Utc)) +} + +/// `true` if `ts` parses and is strictly before `cutoff`. Unparseable +/// timestamps are treated as "not older" (never purged) — fail safe. +fn older_than(ts: &str, cutoff: &DateTime) -> bool { + parse_ts(ts).map(|t| t < *cutoff).unwrap_or(false) +} + +/// Count (dry-run) or remove rows matching `purge`. Returns the match count. +fn purge_vec(rows: &mut Vec, dry_run: bool, purge: impl Fn(&T) -> bool) -> usize { + if dry_run { + rows.iter().filter(|r| purge(r)).count() + } else { + let before = rows.len(); + rows.retain(|r| !purge(r)); + before - rows.len() + } +} + +// --------------------------------------------------------------------------- +// Tests +// --------------------------------------------------------------------------- + +#[cfg(test)] +mod tests { + use super::*; + + const FORMATS: [JsonFormat; 3] = [JsonFormat::Plain, JsonFormat::Ld, JsonFormat::Ndjson]; + + fn store(format: JsonFormat) -> (tempfile::TempDir, JsonStore) { + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join(format!("sidecar.{}", format.extension())); + let s = JsonStore::open(&path, format).unwrap(); + (dir, s) + } + + // --- Provenance parity ------------------------------------------------ + + #[test] + fn provenance_genesis_and_sequential_chain_verifies() { + for fmt in FORMATS { + let (_d, mut s) = store(fmt); + let h1 = s + .append_provenance("e1", "users", "insert", "alice", None, None) + .unwrap(); + let h2 = s + .append_provenance("e1", "users", "update", "alice", Some("{\"n\":1}"), None) + .unwrap(); + let h3 = s + .append_provenance("e1", "users", "delete", "bob", None, None) + .unwrap(); + assert_ne!(h1, h2); + assert_ne!(h2, h3); + // Genesis chains from empty. + assert_eq!(s.data().provenance_log[0].previous_hash, ""); + // A linear chain advances its single head. + assert_eq!(s.head_set("e1"), vec![h3]); + assert!(s.verify_chain("e1"), "fresh chain must verify ({fmt:?})"); + } + } + + #[test] + fn provenance_tamper_is_detected() { + let (_d, mut s) = store(JsonFormat::Plain); + s.append_provenance("e1", "users", "insert", "alice", None, None) + .unwrap(); + s.append_provenance("e1", "users", "update", "alice", None, None) + .unwrap(); + // Tamper with a stored field after the fact. + s.data.provenance_log[1].operation = "transform".to_string(); + assert!( + !s.verify_chain("e1"), + "tampered entry must fail verification" + ); + } + + #[test] + fn provenance_fork_keeps_both_branches_and_verifies() { + let (_d, mut s) = store(JsonFormat::Ndjson); + let genesis = s + .append_provenance("e1", "users", "insert", "alice", None, None) + .unwrap(); + let _linear = s + .append_provenance("e1", "users", "update", "alice", None, None) + .unwrap(); + // Fork from genesis: a second divergent branch. + let fork = s + .append_provenance_fork("e1", "users", "update", "carol", None, None, &genesis) + .unwrap(); + // Two heads now. + let heads = s.head_set("e1"); + assert_eq!(heads.len(), 2); + assert!(heads.contains(&fork)); + // Fork point detected at genesis (two children). + let points = s.fork_points("e1"); + assert_eq!(points.len(), 1); + assert_eq!(points[0].predecessor, genesis); + assert_eq!(points[0].children, 2); + assert!(s.verify_chain("e1"), "both branches must verify"); + } + + #[test] + fn provenance_linear_append_on_forked_entity_is_ambiguous() { + let (_d, mut s) = store(JsonFormat::Plain); + let genesis = s + .append_provenance("e1", "users", "insert", "alice", None, None) + .unwrap(); + s.append_provenance_fork("e1", "users", "update", "bob", None, None, &genesis) + .unwrap(); + // Two heads now; a plain linear append can't pick a branch. + let err = s + .append_provenance("e1", "users", "update", "carol", None, None) + .unwrap_err() + .to_string(); + assert!( + err.contains("forked") && err.contains("append_provenance_fork"), + "linear append on a forked entity must point at the fork API; got: {err}" + ); + } + + #[test] + fn provenance_round_trips_through_disk_in_every_format() { + for fmt in FORMATS { + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join(format!("s.{}", fmt.extension())); + let head = { + let mut s = JsonStore::open(&path, fmt).unwrap(); + s.append_provenance("e1", "users", "insert", "alice", None, None) + .unwrap(); + let h = s + .append_provenance("e1", "users", "update", "bob", Some("{}"), Some("x")) + .unwrap(); + s.save().unwrap(); + h + }; + // Reopen and confirm state survived the codec. + let s2 = JsonStore::open(&path, fmt).unwrap(); + assert_eq!(s2.head_set("e1"), vec![head], "head survives {fmt:?}"); + assert_eq!(s2.data().provenance_log.len(), 2, "rows survive {fmt:?}"); + assert!( + s2.verify_chain("e1"), + "chain re-verifies after reload {fmt:?}" + ); + } + } + + // --- Temporal parity -------------------------------------------------- + + #[test] + fn temporal_versions_are_monotonic_with_one_current() { + for fmt in FORMATS { + let (_d, mut s) = store(fmt); + for i in 1..=50u64 { + let v = + s.append_temporal_version("e1", "users", &format!("{{\"v\":{i}}}"), "update"); + assert_eq!(v, i, "monotonic version ({fmt:?})"); + } + let current = s + .data() + .temporal_versions + .iter() + .filter(|r| r.entity_id == "e1" && r.valid_to.is_none()) + .count(); + assert_eq!(current, 1, "exactly one current version ({fmt:?})"); + assert_eq!(s.read_current("e1", "users").as_deref(), Some("{\"v\":50}")); + } + } + + #[test] + fn temporal_read_at_returns_point_in_time_snapshot() { + let (_d, mut s) = store(JsonFormat::Plain); + s.append_temporal_version("e1", "users", "{\"v\":1}", "insert"); + std::thread::sleep(std::time::Duration::from_millis(15)); + let t1 = Utc::now(); + std::thread::sleep(std::time::Duration::from_millis(15)); + s.append_temporal_version("e1", "users", "{\"v\":2}", "update"); + std::thread::sleep(std::time::Duration::from_millis(15)); + let t2 = Utc::now(); + + assert_eq!(s.read_at("e1", "users", &t1).as_deref(), Some("{\"v\":1}")); + assert_eq!(s.read_at("e1", "users", &t2).as_deref(), Some("{\"v\":2}")); + } + + #[test] + fn temporal_rollback_appends_old_snapshot_as_new_version() { + let (_d, mut s) = store(JsonFormat::Ld); + s.append_temporal_version("e1", "users", "{\"v\":1}", "insert"); + s.append_temporal_version("e1", "users", "{\"v\":2}", "update"); + s.append_temporal_version("e1", "users", "{\"v\":3}", "update"); + let new_v = s.rollback_to("e1", "users", 1).unwrap(); + assert_eq!(new_v, 4, "rollback creates a new version"); + assert_eq!(s.read_current("e1", "users").as_deref(), Some("{\"v\":1}")); + assert!(s.rollback_to("e1", "users", 99).is_err()); + } + + // --- Drift parity ----------------------------------------------------- + + #[test] + fn drift_matches_sqlite_worked_example() { + let (_d, mut s) = store(JsonFormat::Plain); + // Two modalities at versions 5 and 4 -> score 0.2 (ADR-0003). + for _ in 0..5 { + s.append_temporal_version("e1", "posts", "{}", "update"); + } + for _ in 0..4 { + s.append_temporal_version("e1", "posts_graph", "{}", "update"); + } + let report = s.detect_temporal_drift("e1").unwrap(); + assert_eq!(report.entity_id, "e1"); + assert!((report.overall_score - 0.2).abs() < 1e-12); + assert_eq!(report.categories[0].0, DriftCategory::Temporal); + + // Single-modality entity -> None. + s.append_temporal_version("e2", "posts", "{}", "insert"); + assert!(s.detect_temporal_drift("e2").is_none()); + + assert_eq!(s.distinct_temporal_entities(), vec!["e1", "e2"]); + } + + // --- GC parity -------------------------------------------------------- + + fn seed_aged(s: &mut JsonStore) { + // provenance: 1 old, 1 fresh + s.data.provenance_log.push(ProvenanceRow { + hash: "old".into(), + previous_hash: "".into(), + entity_id: "e".into(), + table_name: "t".into(), + operation: "insert".into(), + actor: "a".into(), + timestamp: "2020-01-01T00:00:00+00:00".into(), + before_snapshot: None, + transformation: None, + }); + s.data.provenance_log.push(ProvenanceRow { + timestamp: "9999-01-01T00:00:00+00:00".into(), + hash: "new".into(), + ..s.data.provenance_log[0].clone() + }); + // temporal: old superseded, old current, fresh superseded + let mk = |v: u64, from: &str, to: Option<&str>| TemporalRow { + entity_id: "e".into(), + table_name: "t".into(), + version: v, + valid_from: from.into(), + valid_to: to.map(str::to_string), + snapshot: "{}".into(), + operation: "update".into(), + }; + s.data.temporal_versions.push(mk( + 1, + "2020-01-01T00:00:00+00:00", + Some("2020-06-01T00:00:00+00:00"), + )); + s.data + .temporal_versions + .push(mk(2, "2020-01-01T00:00:00+00:00", None)); + s.data.temporal_versions.push(mk( + 3, + "9999-01-01T00:00:00+00:00", + Some("9999-06-01T00:00:00+00:00"), + )); + // lineage: 1 old, 1 fresh + s.data.lineage_graph.push(LineageRow { + edge_id: "old".into(), + source_entity: "a".into(), + source_table: "t".into(), + target_entity: "b".into(), + target_table: "t".into(), + derivation_type: "copy".into(), + description: None, + created_at: "2020-01-01T00:00:00+00:00".into(), + }); + s.data.lineage_graph.push(LineageRow { + edge_id: "new".into(), + created_at: "9999-01-01T00:00:00+00:00".into(), + ..s.data.lineage_graph[0].clone() + }); + } + + #[test] + fn gc_dry_run_counts_but_keeps_current_temporal() { + let (_d, mut s) = store(JsonFormat::Plain); + seed_aged(&mut s); + let r = RetentionConfig { + provenance_days: 30, + temporal_days: 30, + lineage_days: 30, + }; + let counts = s.gc_purge(&r, true); + assert_eq!(counts.provenance, 1); + assert_eq!(counts.temporal, 1, "only old + superseded; current kept"); + assert_eq!(counts.lineage, 1); + // dry-run mutates nothing. + assert_eq!(s.data().provenance_log.len(), 2); + assert_eq!(s.data().temporal_versions.len(), 3); + } + + #[test] + fn gc_apply_removes_old_rows_but_keeps_current_version() { + let (_d, mut s) = store(JsonFormat::Plain); + seed_aged(&mut s); + let r = RetentionConfig { + provenance_days: 30, + temporal_days: 30, + lineage_days: 30, + }; + let counts = s.gc_purge(&r, false); + assert_eq!(counts.provenance + counts.temporal + counts.lineage, 3); + assert_eq!(s.data().provenance_log.len(), 1); + // The old *current* version (v2) survives; only old superseded v1 is gone. + assert_eq!(s.data().temporal_versions.len(), 2); + assert!(s.read_current("e", "t").is_some()); + assert_eq!(s.data().lineage_graph.len(), 1); + } + + #[test] + fn gc_retention_zero_is_forever() { + let (_d, mut s) = store(JsonFormat::Plain); + seed_aged(&mut s); + let counts = s.gc_purge(&RetentionConfig::default(), false); + assert_eq!(counts.provenance + counts.temporal + counts.lineage, 0); + } + + // --- Codec specifics -------------------------------------------------- + + #[test] + fn ld_output_is_genuine_linked_data() { + let mut data = SidecarData::default(); + data.provenance_log.push(ProvenanceRow { + hash: "abc".into(), + previous_hash: "".into(), + entity_id: "e1".into(), + table_name: "users".into(), + operation: "insert".into(), + actor: "alice".into(), + timestamp: "2026-01-01T00:00:00+00:00".into(), + before_snapshot: None, + transformation: None, + }); + let text = encode(&data, JsonFormat::Ld).unwrap(); + let v: Value = serde_json::from_str(&text).unwrap(); + assert!(v.get("@context").is_some(), "JSON-LD needs @context"); + let graph = v.get("@graph").unwrap().as_array().unwrap(); + assert_eq!(graph[0]["@type"], "ProvenanceEntry"); + assert_eq!(graph[0]["@id"], "urn:verisimdb:provenance:abc"); + // Round-trips back to the same data. + assert_eq!(decode(&text, JsonFormat::Ld).unwrap(), data); + } + + #[test] + fn ndjson_is_one_tagged_record_per_line() { + let mut data = SidecarData::default(); + data.temporal_versions.push(TemporalRow { + entity_id: "e1".into(), + table_name: "users".into(), + version: 1, + valid_from: "2026-01-01T00:00:00+00:00".into(), + valid_to: None, + snapshot: "{}".into(), + operation: "insert".into(), + }); + let text = encode(&data, JsonFormat::Ndjson).unwrap(); + let lines: Vec<&str> = text.lines().collect(); + assert_eq!(lines.len(), 1); + let v: Value = serde_json::from_str(lines[0]).unwrap(); + assert_eq!(v["_table"], "verisimdb_temporal_versions"); + assert_eq!(decode(&text, JsonFormat::Ndjson).unwrap(), data); + } + + #[test] + fn empty_inputs_decode_to_empty_store() { + for fmt in FORMATS { + assert_eq!(decode("", fmt).unwrap(), SidecarData::default()); + } + } + + #[test] + fn scaffold_round_trips_to_empty_store_ignoring_meta() { + let octad = OctadConfig::default(); + for fmt in FORMATS { + let text = scaffold(&octad, fmt).unwrap(); + assert!(text.contains("provenance") || fmt == JsonFormat::Ndjson); + // The scaffold's _meta must not deserialise into real rows. + let decoded = decode(&text, fmt).unwrap(); + assert_eq!( + decoded, + SidecarData::default(), + "scaffold is an empty store ({fmt:?})" + ); + } + } +} diff --git a/src/sidecar/mod.rs b/src/sidecar/mod.rs new file mode 100644 index 0000000..6f91fe6 --- /dev/null +++ b/src/sidecar/mod.rs @@ -0,0 +1,201 @@ +// SPDX-License-Identifier: PMPL-1.0-or-later +// Copyright (c) 2026 Jonathan D.A. Jewell (hyperpolymath) +// +// Sidecar storage backend selection. +// +// `[sidecar].storage` (+ `[sidecar].format` for the json family) resolves +// to a [`StorageKind`]: `Sqlite`, `Postgres`, or `Json(JsonFormat)`. This +// module is the single source of truth for which storage values are +// accepted; `validate`/`doctor`, `generate`, `drift`, and `gc` all +// dispatch on it. +// +// V-L2-F3 (#146): re-opens the JSON sidecar capability that was dropped in +// V-L2-F2 (#112/#144), now as a deliberately-scoped *family* — plain JSON, +// JSON-LD, and NDJSON — with full parity to the runtime operations the +// SQLite path implements today. The JSON store itself lives in [`json`]. + +pub mod json; + +use crate::codegen::overlay::SqlDialect; + +/// On-disk encoding for the `json` sidecar store. The format is purely a +/// codec over the shared [`json::SidecarData`] model — every octad +/// operation is written once and is format-independent. +#[derive(Copy, Clone, Debug, PartialEq, Eq)] +pub enum JsonFormat { + /// One JSON object keyed by table name, each holding an array of rows. + Plain, + /// JSON-LD: `@context` + `@graph` of typed (`@type`/`@id`) nodes. + Ld, + /// Newline-delimited JSON: one `{"_table": …, …}` record per line. + Ndjson, +} + +impl JsonFormat { + /// Parse a `[sidecar].format` value (case-insensitive). An empty + /// string is treated as the default (`plain`) so `storage = "json"` + /// with no explicit `format` still resolves. + pub fn parse(format: &str) -> anyhow::Result { + match format.to_lowercase().as_str() { + "" | "plain" | "json" => Ok(JsonFormat::Plain), + "ld" | "json-ld" | "jsonld" => Ok(JsonFormat::Ld), + "ndjson" | "nd-json" | "jsonl" | "jsonlines" => Ok(JsonFormat::Ndjson), + other => anyhow::bail!( + "unsupported [sidecar].format {other:?}; supported values are \ + \"plain\" (default), \"ld\" (JSON-LD), and \"ndjson\"." + ), + } + } + + /// Canonical lower-case token for this format. + pub fn as_str(self) -> &'static str { + match self { + JsonFormat::Plain => "plain", + JsonFormat::Ld => "ld", + JsonFormat::Ndjson => "ndjson", + } + } + + /// File extension for an emitted scaffold (`generate`). + pub fn extension(self) -> &'static str { + match self { + JsonFormat::Plain => "json", + JsonFormat::Ld => "jsonld", + JsonFormat::Ndjson => "ndjson", + } + } +} + +/// The resolved sidecar storage backend. +#[derive(Copy, Clone, Debug, PartialEq, Eq)] +pub enum StorageKind { + /// SQLite sidecar (the reference store). + Sqlite, + /// PostgreSQL sidecar (SQL dialect; same overlay schema). + Postgres, + /// JSON-family document store in the given on-disk [`JsonFormat`]. + Json(JsonFormat), +} + +impl StorageKind { + /// Resolve `[sidecar].storage` (+ `[sidecar].format` for `json`) to a + /// backend. Case-insensitive; `format` is only consulted for `json`. + /// + /// This is the canonical validator for `[sidecar]` storage selection — + /// `validate`, `generate`, `drift`, and `gc` all defer to it so they + /// agree on the accepted set. + pub fn resolve(storage: &str, format: &str) -> anyhow::Result { + match storage.to_lowercase().as_str() { + "sqlite" => Ok(StorageKind::Sqlite), + "postgres" | "postgresql" => Ok(StorageKind::Postgres), + "json" => Ok(StorageKind::Json(JsonFormat::parse(format)?)), + other => anyhow::bail!( + "unsupported [sidecar].storage {other:?}; supported values are \ + \"sqlite\" (default), \"postgres\"/\"postgresql\", and \"json\" \ + (with [sidecar].format = plain|ld|ndjson)." + ), + } + } + + /// The SQL dialect for SQL-backed kinds; `None` for [`StorageKind::Json`]. + /// Lets `generate` reuse the existing `codegen::overlay` DDL path for + /// SQL stores and branch to the JSON codec otherwise. + pub fn sql_dialect(self) -> Option { + match self { + StorageKind::Sqlite => Some(SqlDialect::Sqlite), + StorageKind::Postgres => Some(SqlDialect::Postgres), + StorageKind::Json(_) => None, + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn resolves_sql_backends_case_insensitively() { + assert_eq!( + StorageKind::resolve("sqlite", "").unwrap(), + StorageKind::Sqlite + ); + assert_eq!( + StorageKind::resolve("Postgres", "").unwrap(), + StorageKind::Postgres + ); + assert_eq!( + StorageKind::resolve("POSTGRESQL", "plain").unwrap(), + StorageKind::Postgres + ); + // format is ignored for SQL backends. + assert_eq!( + StorageKind::resolve("sqlite", "ndjson").unwrap(), + StorageKind::Sqlite + ); + } + + #[test] + fn resolves_json_family_with_format() { + assert_eq!( + StorageKind::resolve("json", "").unwrap(), + StorageKind::Json(JsonFormat::Plain), + "json with no format defaults to plain" + ); + assert_eq!( + StorageKind::resolve("json", "plain").unwrap(), + StorageKind::Json(JsonFormat::Plain) + ); + assert_eq!( + StorageKind::resolve("JSON", "JSON-LD").unwrap(), + StorageKind::Json(JsonFormat::Ld) + ); + assert_eq!( + StorageKind::resolve("json", "ndjson").unwrap(), + StorageKind::Json(JsonFormat::Ndjson) + ); + } + + #[test] + fn rejects_unknown_storage_and_format() { + let storage_err = StorageKind::resolve("mariadb", "plain") + .unwrap_err() + .to_string(); + assert!(storage_err.contains("unsupported") && storage_err.contains("json")); + + let format_err = StorageKind::resolve("json", "yaml") + .unwrap_err() + .to_string(); + assert!( + format_err.contains("unsupported") && format_err.contains("ndjson"), + "bad format must list supported formats, got: {format_err}" + ); + } + + #[test] + fn sql_dialect_is_none_for_json() { + assert!( + StorageKind::resolve("json", "ld") + .unwrap() + .sql_dialect() + .is_none() + ); + assert_eq!( + StorageKind::resolve("sqlite", "").unwrap().sql_dialect(), + Some(SqlDialect::Sqlite) + ); + assert_eq!( + StorageKind::resolve("postgres", "").unwrap().sql_dialect(), + Some(SqlDialect::Postgres) + ); + } + + #[test] + fn format_tokens_and_extensions() { + assert_eq!(JsonFormat::Plain.as_str(), "plain"); + assert_eq!(JsonFormat::Ld.as_str(), "ld"); + assert_eq!(JsonFormat::Ndjson.as_str(), "ndjson"); + assert_eq!(JsonFormat::Plain.extension(), "json"); + assert_eq!(JsonFormat::Ld.extension(), "jsonld"); + assert_eq!(JsonFormat::Ndjson.extension(), "ndjson"); + } +}