From 6321d10272e699668840660ebacb7b2d679fca01 Mon Sep 17 00:00:00 2001 From: mikkeldamsgaard Date: Thu, 12 Mar 2026 12:35:44 +0100 Subject: [PATCH] feat: add ignore_columns to exclude columns from reconciliation Columns listed in ignore_columns are included in the initial INSERT but excluded from change detection, UPDATE statements, and content hash computation. Useful for timestamps, tokens, or values managed by database triggers. Co-Authored-By: Claude Opus 4.6 --- CHANGELOG.md | 1 + docs/seeding.md | 22 ++++ src/seed/executor.rs | 235 +++++++++++++++++++++++++++++++++++++++++-- src/seed/hash.rs | 51 ++++++++++ src/seed/schema.rs | 82 +++++++++++++++ 5 files changed, 382 insertions(+), 9 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f9cde8f..e0267e6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added - Auto-tag workflow: CI automatically creates a git tag when `Cargo.toml` version changes on main, triggering the release workflow. - `/release` skill for Claude Code: guided release preparation with version determination, confirmation, and PR creation. +- `ignore_columns` option for reconcile mode tables: columns listed in `ignore_columns` are included in the initial INSERT but excluded from change detection, UPDATE statements, and content hash computation. Useful for timestamps, tokens, or values managed by database triggers. ### Fixed - Replaced Dockerfile `--mount=type=cache` with dependency layer caching ("empty main" trick) for reliable Docker build caching in GitHub Actions, where `--mount=type=cache` does not persist across runners. diff --git a/docs/seeding.md b/docs/seeding.md index d8f0e1f..2ea868e 100644 --- a/docs/seeding.md +++ b/docs/seeding.md @@ -250,6 +250,28 @@ initium seed --spec /seeds/seed.yaml --reconcile-all - **Changed rows** (different values for same unique key) are updated. - **Removed rows** (in DB but not in spec) are deleted. +**Ignoring columns:** Some columns should be set on initial insert but never overwritten during reconciliation (e.g., timestamps, random tokens, or values managed by database triggers). Use `ignore_columns` to exclude them: + +```yaml +tables: + - table: users + unique_key: [email] + ignore_columns: [created_at, api_token] + rows: + - email: alice@example.com + name: Alice + created_at: "2026-01-01" + api_token: "$env:ALICE_TOKEN" +``` + +Ignored columns are: +- **Included** in the initial INSERT (the row is written with all columns). +- **Excluded** from change detection (changing an ignored column's value in the spec does not trigger an update). +- **Excluded** from UPDATE statements (manual or trigger-managed changes in the database are preserved). +- **Excluded** from the content hash (so they don't affect the fast-path skip). + +`ignore_columns` cannot overlap with `unique_key`. + **Requirements:** - Every table in a reconciled seed set must have a `unique_key`. Without it, there is no way to identify which rows correspond to which spec entries. - Environment variable changes trigger reconciliation (resolved values are compared, not raw templates). diff --git a/src/seed/executor.rs b/src/seed/executor.rs index 3481039..ab18fd3 100644 --- a/src/seed/executor.rs +++ b/src/seed/executor.rs @@ -481,8 +481,8 @@ impl<'a> SeedExecutor<'a> { // Build canonical row_key JSON (sorted by unique key column name) let row_key = build_row_key(&ts.unique_key, &unique_columns, &unique_values); - // Build row_values JSON (all columns, sorted) - let row_values_json = build_row_values(&columns, &values); + // Build row_values JSON (excluding ignored columns for comparison) + let row_values_json = build_row_values_excluding(&columns, &values, &ts.ignore_columns); seen_keys.insert(row_key.clone()); @@ -500,16 +500,16 @@ impl<'a> SeedExecutor<'a> { continue; } - // Values differ — UPDATE + // Values differ — UPDATE (exclude key columns and ignored columns) let non_key_columns: Vec = columns .iter() - .filter(|c| !ts.unique_key.contains(c)) + .filter(|c| !ts.unique_key.contains(c) && !ts.ignore_columns.contains(c)) .cloned() .collect(); let non_key_values: Vec = columns .iter() .zip(values.iter()) - .filter(|(c, _)| !ts.unique_key.contains(c)) + .filter(|(c, _)| !ts.unique_key.contains(c) && !ts.ignore_columns.contains(c)) .map(|(_, v)| v.clone()) .collect(); @@ -700,7 +700,8 @@ impl<'a> SeedExecutor<'a> { } let row_key = build_row_key(&ts.unique_key, &unique_columns, &unique_values); - let row_values_json = build_row_values(&columns, &values); + let row_values_json = + build_row_values_excluding(&columns, &values, &ts.ignore_columns); seen_keys.insert(row_key.clone()); match tracked_values.get(&row_key) { @@ -740,11 +741,14 @@ fn build_row_key(unique_key_spec: &[String], columns: &[String], values: &[Strin serde_json::to_string(&map).unwrap_or_default() } -/// Build a canonical JSON representation of all row values (sorted by column name). -fn build_row_values(columns: &[String], values: &[String]) -> String { +/// Build a canonical JSON representation of row values, excluding specified columns. +/// Ignored columns are excluded from tracking so changes to them don't trigger reconciliation. +fn build_row_values_excluding(columns: &[String], values: &[String], exclude: &[String]) -> String { let mut map = BTreeMap::new(); for (i, col) in columns.iter().enumerate() { - map.insert(col.clone(), values[i].clone()); + if !exclude.contains(col) { + map.insert(col.clone(), values[i].clone()); + } } serde_json::to_string(&map).unwrap_or_default() } @@ -2425,4 +2429,217 @@ phases: assert!(result.is_err()); assert!(result.unwrap_err().contains("no unique_key")); } + + #[test] + fn test_reconcile_ignore_columns_not_compared() { + let dir = tempfile::TempDir::new().unwrap(); + let db_path = dir.path().join("test.db"); + let db_path_str = db_path.to_str().unwrap(); + + let sqlite = SqliteDb::connect(db_path_str).unwrap(); + sqlite + .conn + .execute_batch( + "CREATE TABLE config (key TEXT PRIMARY KEY, value TEXT, updated_at TEXT);", + ) + .unwrap(); + + // Initial apply with updated_at as ignored column + let yaml1 = r#" +database: + driver: sqlite + url: ":memory:" +phases: + - name: phase1 + seed_sets: + - name: config + mode: reconcile + tables: + - table: config + unique_key: [key] + ignore_columns: [updated_at] + rows: + - key: app_name + value: MyApp + updated_at: "2026-01-01" +"#; + let plan1 = SeedPlan::from_yaml(yaml1).unwrap(); + let log = test_logger(); + + let db1 = SqliteDb::connect(db_path_str).unwrap(); + let mut exec1 = SeedExecutor::new(&log, Box::new(db1), "initium_seed".into(), false); + exec1.execute(&plan1).unwrap(); + + // Verify initial values + let db_check = SqliteDb::connect(db_path_str).unwrap(); + let val: String = db_check + .conn + .query_row( + "SELECT updated_at FROM config WHERE key = 'app_name'", + [], + |r| r.get(0), + ) + .unwrap(); + assert_eq!(val, "2026-01-01"); + + // Change the ignored column value — should NOT trigger an update + let yaml2 = r#" +database: + driver: sqlite + url: ":memory:" +phases: + - name: phase1 + seed_sets: + - name: config + mode: reconcile + tables: + - table: config + unique_key: [key] + ignore_columns: [updated_at] + rows: + - key: app_name + value: MyApp + updated_at: "2026-12-31" +"#; + let plan2 = SeedPlan::from_yaml(yaml2).unwrap(); + let db2 = SqliteDb::connect(db_path_str).unwrap(); + let mut exec2 = SeedExecutor::new(&log, Box::new(db2), "initium_seed".into(), false); + exec2.execute(&plan2).unwrap(); + + // updated_at should remain unchanged (ignored column not updated) + let db_final = SqliteDb::connect(db_path_str).unwrap(); + let val: String = db_final + .conn + .query_row( + "SELECT updated_at FROM config WHERE key = 'app_name'", + [], + |r| r.get(0), + ) + .unwrap(); + assert_eq!(val, "2026-01-01"); + } + + #[test] + fn test_reconcile_ignore_columns_still_inserted() { + let dir = tempfile::TempDir::new().unwrap(); + let db_path = dir.path().join("test.db"); + let db_path_str = db_path.to_str().unwrap(); + + let sqlite = SqliteDb::connect(db_path_str).unwrap(); + sqlite + .conn + .execute_batch("CREATE TABLE items (name TEXT PRIMARY KEY, note TEXT);") + .unwrap(); + + let yaml = r#" +database: + driver: sqlite + url: ":memory:" +phases: + - name: phase1 + seed_sets: + - name: items + mode: reconcile + tables: + - table: items + unique_key: [name] + ignore_columns: [note] + rows: + - name: item1 + note: "initial note" +"#; + let plan = SeedPlan::from_yaml(yaml).unwrap(); + let log = test_logger(); + + let db1 = SqliteDb::connect(db_path_str).unwrap(); + let mut exec = SeedExecutor::new(&log, Box::new(db1), "initium_seed".into(), false); + exec.execute(&plan).unwrap(); + + // Ignored column should still be present on initial insert + let db_check = SqliteDb::connect(db_path_str).unwrap(); + let note: String = db_check + .conn + .query_row("SELECT note FROM items WHERE name = 'item1'", [], |r| { + r.get(0) + }) + .unwrap(); + assert_eq!(note, "initial note"); + } + + #[test] + fn test_reconcile_ignore_columns_non_ignored_still_updated() { + let dir = tempfile::TempDir::new().unwrap(); + let db_path = dir.path().join("test.db"); + let db_path_str = db_path.to_str().unwrap(); + + let sqlite = SqliteDb::connect(db_path_str).unwrap(); + sqlite + .conn + .execute_batch( + "CREATE TABLE config (key TEXT PRIMARY KEY, value TEXT, updated_at TEXT);", + ) + .unwrap(); + + // Initial + let yaml1 = r#" +database: + driver: sqlite + url: ":memory:" +phases: + - name: phase1 + seed_sets: + - name: config + mode: reconcile + tables: + - table: config + unique_key: [key] + ignore_columns: [updated_at] + rows: + - key: setting1 + value: old_value + updated_at: "2026-01-01" +"#; + let plan1 = SeedPlan::from_yaml(yaml1).unwrap(); + let log = test_logger(); + + let db1 = SqliteDb::connect(db_path_str).unwrap(); + let mut exec1 = SeedExecutor::new(&log, Box::new(db1), "initium_seed".into(), false); + exec1.execute(&plan1).unwrap(); + + // Change value (non-ignored) — should trigger update, but NOT touch updated_at + let yaml2 = r#" +database: + driver: sqlite + url: ":memory:" +phases: + - name: phase1 + seed_sets: + - name: config + mode: reconcile + tables: + - table: config + unique_key: [key] + ignore_columns: [updated_at] + rows: + - key: setting1 + value: new_value + updated_at: "2026-12-31" +"#; + let plan2 = SeedPlan::from_yaml(yaml2).unwrap(); + let db2 = SqliteDb::connect(db_path_str).unwrap(); + let mut exec2 = SeedExecutor::new(&log, Box::new(db2), "initium_seed".into(), false); + exec2.execute(&plan2).unwrap(); + + let db_final = SqliteDb::connect(db_path_str).unwrap(); + let (value, updated_at): (String, String) = db_final + .conn + .query_row( + "SELECT value, updated_at FROM config WHERE key = 'setting1'", + [], + |r| Ok((r.get(0)?, r.get(1)?)), + ) + .unwrap(); + assert_eq!(value, "new_value"); // Non-ignored column updated + assert_eq!(updated_at, "2026-01-01"); // Ignored column preserved + } } diff --git a/src/seed/hash.rs b/src/seed/hash.rs index eedd7d9..2b06d10 100644 --- a/src/seed/hash.rs +++ b/src/seed/hash.rs @@ -43,6 +43,11 @@ pub fn compute_seed_set_hash( if key.as_str() == "_ref" { continue; } + // Ignored columns don't affect the hash — changes to them + // won't trigger reconciliation. + if ts.ignore_columns.contains(key) { + continue; + } hasher.update(key.as_bytes()); hasher.update(b"="); @@ -252,4 +257,50 @@ phases: let h2 = compute_seed_set_hash(&plan2.phases[0].seed_sets[0], &identity_resolver).unwrap(); assert_ne!(h1, h2); } + + #[test] + fn test_hash_ignores_ignored_columns() { + let yaml1 = r#" +database: + driver: sqlite + url: ":memory:" +phases: + - name: p + seed_sets: + - name: s + mode: reconcile + tables: + - table: t + unique_key: [k] + ignore_columns: [note] + rows: + - k: a + note: "version 1" +"#; + let yaml2 = r#" +database: + driver: sqlite + url: ":memory:" +phases: + - name: p + seed_sets: + - name: s + mode: reconcile + tables: + - table: t + unique_key: [k] + ignore_columns: [note] + rows: + - k: a + note: "version 2" +"#; + let plan1 = SeedPlan::from_yaml(yaml1).unwrap(); + let plan2 = SeedPlan::from_yaml(yaml2).unwrap(); + let h1 = compute_seed_set_hash(&plan1.phases[0].seed_sets[0], &identity_resolver).unwrap(); + let h2 = compute_seed_set_hash(&plan2.phases[0].seed_sets[0], &identity_resolver).unwrap(); + assert_eq!( + h1, h2, + "hash should be identical when only ignored columns change" + ); + } } diff --git a/src/seed/schema.rs b/src/seed/schema.rs index 3b64f0a..d56031a 100644 --- a/src/seed/schema.rs +++ b/src/seed/schema.rs @@ -121,6 +121,8 @@ pub struct TableSeed { #[serde(default)] pub unique_key: Vec, #[serde(default)] + pub ignore_columns: Vec, + #[serde(default)] pub auto_id: Option, pub rows: Vec>, } @@ -256,6 +258,20 @@ impl SeedPlan { ts.table, ss.name, reserved )); } + if ts.ignore_columns.iter().any(|c| c.trim().is_empty()) { + return Err(format!( + "table '{}' in seed_set '{}' has empty or whitespace-only entries in ignore_columns", + ts.table, ss.name + )); + } + for ic in &ts.ignore_columns { + if ts.unique_key.contains(ic) { + return Err(format!( + "table '{}' in seed_set '{}': column '{}' cannot be in both unique_key and ignore_columns", + ts.table, ss.name, ic + )); + } + } for (row_idx, row) in ts.rows.iter().enumerate() { for uk in &ts.unique_key { if !row.contains_key(uk) { @@ -738,4 +754,70 @@ phases: let err = SeedPlan::from_yaml(yaml).unwrap_err(); assert!(err.contains("missing unique_key column 'email'")); } + + #[test] + fn test_reconcile_rejects_ignore_columns_overlapping_unique_key() { + let yaml = r#" +database: + driver: sqlite + url: ":memory:" +phases: + - name: p + seed_sets: + - name: s + mode: reconcile + tables: + - table: t + unique_key: [email] + ignore_columns: [email] + rows: + - email: alice@co.com +"#; + let err = SeedPlan::from_yaml(yaml).unwrap_err(); + assert!(err.contains("cannot be in both unique_key and ignore_columns")); + } + + #[test] + fn test_reconcile_rejects_empty_ignore_columns_entry() { + let yaml = r#" +database: + driver: sqlite + url: ":memory:" +phases: + - name: p + seed_sets: + - name: s + mode: reconcile + tables: + - table: t + unique_key: [email] + ignore_columns: [""] + rows: + - email: alice@co.com +"#; + let err = SeedPlan::from_yaml(yaml).unwrap_err(); + assert!(err.contains("empty or whitespace-only entries in ignore_columns")); + } + + #[test] + fn test_reconcile_accepts_valid_ignore_columns() { + let yaml = r#" +database: + driver: sqlite + url: ":memory:" +phases: + - name: p + seed_sets: + - name: s + mode: reconcile + tables: + - table: t + unique_key: [email] + ignore_columns: [updated_at] + rows: + - email: alice@co.com + updated_at: "2026-01-01" +"#; + assert!(SeedPlan::from_yaml(yaml).is_ok()); + } }