From 98bc8f24f583ef1e5448f732dc6a488cb31b2e98 Mon Sep 17 00:00:00 2001 From: Dennis Kobert Date: Sat, 13 Jun 2026 22:09:21 +0200 Subject: [PATCH] Add document-format crate --- Cargo.lock | 19 + Cargo.toml | 3 + document/document-format/Cargo.toml | 26 + document/document-format/src/codec.rs | 324 ++++++ document/document-format/src/export.rs | 49 + document/document-format/src/io.rs | 60 + document/document-format/src/layout.rs | 51 + document/document-format/src/lib.rs | 1018 +++++++++++++++++ document/document-format/src/manifest.rs | 68 ++ document/document-format/src/session_state.rs | 32 + document/document-format/tests/open_create.rs | 643 +++++++++++ node-graph/rfcs/document-format.md | 390 +++++++ 12 files changed, 2683 insertions(+) create mode 100644 document/document-format/Cargo.toml create mode 100644 document/document-format/src/codec.rs create mode 100644 document/document-format/src/export.rs create mode 100644 document/document-format/src/io.rs create mode 100644 document/document-format/src/layout.rs create mode 100644 document/document-format/src/lib.rs create mode 100644 document/document-format/src/manifest.rs create mode 100644 document/document-format/src/session_state.rs create mode 100644 document/document-format/tests/open_create.rs create mode 100644 node-graph/rfcs/document-format.md diff --git a/Cargo.lock b/Cargo.lock index 88e882908e..d4fe3b5a29 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1269,6 +1269,25 @@ dependencies = [ "litrs", ] +[[package]] +name = "document-format" +version = "0.0.0" +dependencies = [ + "chrono", + "core-types", + "document-container", + "futures", + "graph-craft", + "graph-storage", + "graphene-resource", + "log", + "rmp-serde", + "serde", + "serde_json", + "tempfile", + "thiserror 2.0.18", +] + [[package]] name = "downcast-rs" version = "1.2.1" diff --git a/Cargo.toml b/Cargo.toml index 444fdffa9f..b502b2269c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -9,6 +9,7 @@ members = [ "desktop/platform/win", "document/container", "document/graph-storage", + "document/document-format", "editor", "frontend/wrapper", "libraries/dyn-any", @@ -89,6 +90,8 @@ math-nodes = { path = "node-graph/nodes/math" } path-bool-nodes = { path = "node-graph/nodes/path-bool" } graph-craft = { path = "node-graph/graph-craft" } graph-storage = { path = "document/graph-storage" } +document-format = { path = "document/document-format" } +document-container = { path = "document/container" } raster-nodes = { path = "node-graph/nodes/raster" } graphene-std = { path = "node-graph/nodes/gstd" } interpreted-executor = { path = "node-graph/interpreted-executor" } diff --git a/document/document-format/Cargo.toml b/document/document-format/Cargo.toml new file mode 100644 index 0000000000..3cf3552fde --- /dev/null +++ b/document/document-format/Cargo.toml @@ -0,0 +1,26 @@ +[package] +name = "document-format" +description = "Typed handle for the .gdd document format, sitting over graph-storage and document-container" +edition.workspace = true +version.workspace = true +license.workspace = true +authors.workspace = true + +[dependencies] +document-container = { workspace = true, features = ["zip", "xz"] } +graph-storage = { workspace = true } +graph-craft = { workspace = true } +graphene-resource = { workspace = true } +core-types = { workspace = true } +serde = { workspace = true } +serde_json = { workspace = true } +rmp-serde = { workspace = true } +futures = { workspace = true } +chrono = { workspace = true } +thiserror = "2.0" +log = { workspace = true } + +[dev-dependencies] +futures = { workspace = true } +graphene-resource = { workspace = true } +tempfile = "3" diff --git a/document/document-format/src/codec.rs b/document/document-format/src/codec.rs new file mode 100644 index 0000000000..bae0aeea9d --- /dev/null +++ b/document/document-format/src/codec.rs @@ -0,0 +1,324 @@ +//! Codec for a stream of values. Single-value writes are just streams of length one. + +use serde::{Deserialize, Serialize, de::DeserializeOwned}; + +#[derive(Copy, Clone, Debug, PartialEq, Eq, Serialize, Deserialize)] +pub enum Codec { + /// A single JSON document. `append` to a non-empty buffer errors. + Json, + /// Newline-delimited compact JSON, one value per line. + JsonLines, + /// A single MessagePack blob. `append` to a non-empty buffer errors. + MessagePack, + /// Length-prefixed MessagePack frames: `[u32 big-endian length][MessagePack bytes]` per value. + MessagePackFrames, +} + +#[derive(Debug, thiserror::Error)] +pub enum CodecError { + #[error("MessagePack encode error: {0}")] + MessagePackEncode(#[from] rmp_serde::encode::Error), + #[error("MessagePack decode error: {0}")] + MessagePackDecode(#[from] rmp_serde::decode::Error), + #[error("JSON error: {0}")] + Json(#[from] serde_json::Error), + #[error("frame length {0} exceeds u32")] + FrameTooLarge(usize), + #[error("frame length prefix truncated: need 4 bytes, have {0}")] + TruncatedLengthPrefix(usize), + #[error("declared frame length {declared} exceeds remaining buffer ({remaining} bytes)")] + TruncatedFrame { declared: usize, remaining: usize }, + #[error("single-value codec cannot append to a non-empty buffer")] + SingleValueAlreadyWritten, + #[error("expected at least one value, got none")] + Empty, + #[error("expected exactly one value, got more")] + ExpectedSingle, +} + +impl Codec { + pub fn extension(self) -> &'static str { + match self { + Codec::Json => "json", + Codec::JsonLines => "jsonl", + Codec::MessagePack => "bin", + Codec::MessagePackFrames => "frames", + } + } + + /// Append one value to `output` in this codec's framing. + /// Single-value codecs error if `output` is non-empty. + pub fn append(self, output: &mut Vec, value: &T) -> Result<(), CodecError> { + match self { + Codec::Json => { + if !output.is_empty() { + return Err(CodecError::SingleValueAlreadyWritten); + } + serde_json::to_writer_pretty(output, value)?; + Ok(()) + } + Codec::JsonLines => { + serde_json::to_writer(&mut *output, value)?; + output.push(b'\n'); + Ok(()) + } + Codec::MessagePack => { + if !output.is_empty() { + return Err(CodecError::SingleValueAlreadyWritten); + } + rmp_serde::encode::write(output, value)?; + Ok(()) + } + Codec::MessagePackFrames => { + let payload = rmp_serde::to_vec(value)?; + let length = u32::try_from(payload.len()).map_err(|_| CodecError::FrameTooLarge(payload.len()))?; + output.extend_from_slice(&length.to_be_bytes()); + output.extend_from_slice(&payload); + Ok(()) + } + } + } + + /// Iterate values from `bytes`. Single-value codecs yield exactly one item; + /// stream codecs yield however many were written. + pub fn iter<'a, T: DeserializeOwned + 'a>(self, bytes: &'a [u8]) -> Box> + 'a> { + match self { + Codec::Json => { + let single = serde_json::from_slice::(bytes).map_err(CodecError::from); + Box::new(std::iter::once(single)) + } + Codec::JsonLines => Box::new(JsonLineIter { + remaining: bytes, + _marker: std::marker::PhantomData, + }), + Codec::MessagePack => { + let single = rmp_serde::from_slice::(bytes).map_err(CodecError::from); + Box::new(std::iter::once(single)) + } + Codec::MessagePackFrames => Box::new(MessagePackFrameIter { + remaining: bytes, + _marker: std::marker::PhantomData, + }), + } + } + + /// Serialize a single value into a fresh buffer. + pub fn write_single(self, value: &T) -> Result, CodecError> { + let mut output = Vec::new(); + self.append(&mut output, value)?; + Ok(output) + } + + /// Deserialize the single value in `bytes`. Errors if zero or more than one value is present. + pub fn read_single(self, bytes: &[u8]) -> Result { + let mut iter = self.iter::(bytes); + let first = iter.next().ok_or(CodecError::Empty)??; + if iter.next().is_some() { + return Err(CodecError::ExpectedSingle); + } + Ok(first) + } +} + +struct JsonLineIter<'a, T> { + remaining: &'a [u8], + _marker: std::marker::PhantomData T>, +} + +impl Iterator for JsonLineIter<'_, T> { + type Item = Result; + + fn next(&mut self) -> Option { + loop { + if self.remaining.is_empty() { + return None; + } + + let (line, tail) = match self.remaining.iter().position(|&byte| byte == b'\n') { + Some(index) => (&self.remaining[..index], &self.remaining[index + 1..]), + None => (self.remaining, &[][..]), + }; + self.remaining = tail; + + let trimmed = trim_ascii(line); + if trimmed.is_empty() { + continue; + } + + return Some(serde_json::from_slice(trimmed).map_err(CodecError::from)); + } + } +} + +struct MessagePackFrameIter<'a, T> { + remaining: &'a [u8], + _marker: std::marker::PhantomData T>, +} + +impl Iterator for MessagePackFrameIter<'_, T> { + type Item = Result; + + fn next(&mut self) -> Option { + if self.remaining.is_empty() { + return None; + } + + let buffer = std::mem::take(&mut self.remaining); + + let Some((length_bytes, tail)) = buffer.split_first_chunk::<4>() else { + return Some(Err(CodecError::TruncatedLengthPrefix(buffer.len()))); + }; + let length = u32::from_be_bytes(*length_bytes) as usize; + + if tail.len() < length { + return Some(Err(CodecError::TruncatedFrame { + declared: length, + remaining: tail.len(), + })); + } + + let (frame, after) = tail.split_at(length); + self.remaining = after; + + Some(rmp_serde::from_slice(frame).map_err(CodecError::from)) + } +} + +fn trim_ascii(bytes: &[u8]) -> &[u8] { + let start = bytes.iter().position(|byte| !byte.is_ascii_whitespace()).unwrap_or(bytes.len()); + let end = bytes.iter().rposition(|byte| !byte.is_ascii_whitespace()).map(|index| index + 1).unwrap_or(start); + &bytes[start..end] +} + +#[cfg(test)] +mod tests { + use super::*; + use serde::{Deserialize, Serialize}; + + #[derive(Debug, PartialEq, Eq, Serialize, Deserialize)] + struct Frame { + id: u32, + label: String, + } + + fn frames() -> [Frame; 3] { + [Frame { id: 1, label: "alpha".into() }, Frame { id: 2, label: "beta".into() }, Frame { id: 3, label: "gamma".into() }] + } + + #[test] + fn json_round_trip_single() { + let frame = Frame { id: 7, label: "solo".into() }; + let bytes = Codec::Json.write_single(&frame).unwrap(); + let decoded: Frame = Codec::Json.read_single(&bytes).unwrap(); + assert_eq!(decoded, frame); + } + + #[test] + fn json_append_to_non_empty_errors() { + let mut buffer = b"already here".to_vec(); + let result = Codec::Json.append(&mut buffer, &Frame { id: 1, label: "x".into() }); + assert!(matches!(result, Err(CodecError::SingleValueAlreadyWritten)), "got {result:?}"); + } + + #[test] + fn message_pack_round_trip_single() { + let frame = Frame { id: 99, label: "blob".into() }; + let bytes = Codec::MessagePack.write_single(&frame).unwrap(); + let decoded: Frame = Codec::MessagePack.read_single(&bytes).unwrap(); + assert_eq!(decoded, frame); + } + + #[test] + fn message_pack_append_to_non_empty_errors() { + let mut buffer = vec![0xAB]; + let result = Codec::MessagePack.append(&mut buffer, &Frame { id: 1, label: "x".into() }); + assert!(matches!(result, Err(CodecError::SingleValueAlreadyWritten)), "got {result:?}"); + } + + /// A type-erased `serde_json::Value` round-trips through the binary codec: the property postcard + /// could not satisfy (it raises `WontImplement` on self-describing values), which is why the + /// resource/attribute deltas that carry `serde_json::Value` bodies need a self-describing codec. + #[test] + fn message_pack_round_trips_serde_json_value() { + let value = serde_json::json!({ "kind": "embedded", "priority": 1.5, "tags": ["a", "b"] }); + let bytes = Codec::MessagePack.write_single(&value).unwrap(); + let decoded: serde_json::Value = Codec::MessagePack.read_single(&bytes).unwrap(); + assert_eq!(decoded, value); + } + + #[test] + fn json_lines_round_trip_and_skip_blanks() { + let frames = [Frame { id: 1, label: "alpha".into() }, Frame { id: 2, label: "beta".into() }]; + + let mut buffer = Vec::new(); + Codec::JsonLines.append(&mut buffer, &frames[0]).unwrap(); + buffer.extend_from_slice(b" \n\n"); + Codec::JsonLines.append(&mut buffer, &frames[1]).unwrap(); + + let decoded: Vec = Codec::JsonLines.iter(&buffer).collect::>().unwrap(); + assert_eq!(decoded, frames); + } + + #[test] + fn message_pack_frames_round_trip() { + let frames = frames(); + let mut buffer = Vec::new(); + for frame in &frames { + Codec::MessagePackFrames.append(&mut buffer, frame).unwrap(); + } + let decoded: Vec = Codec::MessagePackFrames.iter(&buffer).collect::>().unwrap(); + assert_eq!(decoded, frames); + } + + /// A crash mid-append leaves a torn final frame. The length prefix lets us detect that + /// deterministically (declared length exceeds the bytes that actually made it to disk) rather + /// than decoding a partial value into a plausible-but-wrong one. + #[test] + fn message_pack_frames_detect_truncation() { + let mut buffer = Vec::new(); + Codec::MessagePackFrames.append(&mut buffer, &Frame { id: 7, label: "ok".into() }).unwrap(); + buffer.truncate(buffer.len() - 1); + let last = Codec::MessagePackFrames.iter::(&buffer).last().unwrap(); + assert!(matches!(last, Err(CodecError::TruncatedFrame { .. })), "got {last:?}"); + } + + /// A buffer whose first record's length prefix itself is incomplete (fewer than 4 bytes) is + /// reported as a truncated prefix rather than mis-read as a zero-length frame. + #[test] + fn message_pack_frames_detect_truncated_length_prefix() { + let buffer = vec![0x00, 0x00]; + let last = Codec::MessagePackFrames.iter::(&buffer).last().unwrap(); + assert!(matches!(last, Err(CodecError::TruncatedLengthPrefix(2))), "got {last:?}"); + } + + #[test] + fn write_single_then_read_with_iter_yields_one() { + let frame = Frame { id: 5, label: "one".into() }; + for codec in [Codec::Json, Codec::JsonLines, Codec::MessagePack, Codec::MessagePackFrames] { + let bytes = codec.write_single(&frame).unwrap(); + let collected: Vec = codec.iter(&bytes).collect::>().unwrap(); + assert_eq!(collected, vec![Frame { id: 5, label: "one".into() }], "codec {codec:?}"); + } + } + + #[test] + fn read_single_rejects_multi_value_stream() { + let mut buffer = Vec::new(); + Codec::JsonLines.append(&mut buffer, &Frame { id: 1, label: "a".into() }).unwrap(); + Codec::JsonLines.append(&mut buffer, &Frame { id: 2, label: "b".into() }).unwrap(); + let result: Result = Codec::JsonLines.read_single(&buffer); + assert!(matches!(result, Err(CodecError::ExpectedSingle)), "got {result:?}"); + } + + #[test] + fn extensions_are_distinct() { + let exts = [ + Codec::Json.extension(), + Codec::JsonLines.extension(), + Codec::MessagePack.extension(), + Codec::MessagePackFrames.extension(), + ]; + let unique: std::collections::HashSet<_> = exts.iter().collect(); + assert_eq!(unique.len(), exts.len(), "extensions collide: {exts:?}"); + } +} diff --git a/document/document-format/src/export.rs b/document/document-format/src/export.rs new file mode 100644 index 0000000000..58227e4f6b --- /dev/null +++ b/document/document-format/src/export.rs @@ -0,0 +1,49 @@ +//! Export options. Walking the working copy through an archive codec / re-encoding payloads. +//! Implementation lives on [`crate::Gdd::export`]. + +/// Export wrapping. Payloads keep the working copy's recorded per-payload codecs (see +/// [`crate::manifest::PayloadCodecs`]); export does not re-encode. +#[derive(Copy, Clone, Debug)] +pub enum ExportFormat { + /// Copy the working copy to a destination folder. + Folder, + /// Wrap as a `.gdd.zip` archive (deflate, pure-Rust `zip` crate). + Zip, + /// Wrap as a `.gdd.xz` archive (whole-archive xz via `lzma-rust2`). + Xz, +} + +#[derive(Copy, Clone, Debug)] +pub struct ExportOptions { + /// Whether to include the registry snapshot. `false` produces a history-only export, useful + /// for VCS workflows where the diffable `history.jsonl` is the interesting payload and the + /// registry would rewrite whole-file on every retirement. Consumers replay history from an + /// empty registry. + pub include_registry: bool, + /// Whether to include `history.jsonl`. `false` produces a flat snapshot (registry only), + /// useful for sharing without revealing edit history and for cutting file size. + pub include_history: bool, + /// Materialize every `DataSource::FilePath` resource into `resources/` for portability. + /// Does not mutate the in-memory `Gdd`. + pub embed_all_resources: bool, +} + +impl ExportOptions { + /// Returns an error description if the combination is incoherent. + pub fn validate(&self) -> Result<(), &'static str> { + if !self.include_registry && !self.include_history { + return Err("export must include at least one of: registry, history"); + } + Ok(()) + } +} + +impl Default for ExportOptions { + fn default() -> Self { + Self { + include_registry: true, + include_history: true, + embed_all_resources: false, + } + } +} diff --git a/document/document-format/src/io.rs b/document/document-format/src/io.rs new file mode 100644 index 0000000000..987e3f37da --- /dev/null +++ b/document/document-format/src/io.rs @@ -0,0 +1,60 @@ +//! Bridge between [`crate::Codec`] and [`document_container::AnyContainer`]. Each payload's codec +//! is known up front (the manifest is always JSON; every other payload's codec is recorded in the +//! manifest), so reads and writes address a fixed `{basename}.{ext}` path without probing. + +use document_container::{AnyContainer, AsyncContainer}; +use serde::Serialize; +use serde::de::DeserializeOwned; + +use crate::{Codec, CodecError}; + +/// Compose a container path from `basename` and `codec.extension()`. +pub fn path_for(basename: &str, codec: Codec) -> String { + format!("{basename}.{}", codec.extension()) +} + +#[derive(Debug, thiserror::Error)] +pub enum ReadError { + #[error("file not found for basename {basename:?} with codec {codec:?}")] + NotFound { basename: String, codec: Codec }, + #[error("container error: {0}")] + Container(#[from] document_container::ContainerError), + #[error("codec error: {0}")] + Codec(#[from] CodecError), +} + +/// Read `{basename}.{ext}` and decode the single value it contains. +pub async fn read_single(container: &AnyContainer, basename: &str, codec: Codec) -> Result { + let bytes = read_bytes(container, basename, codec).await?; + Ok(codec.read_single::(bytes.as_slice())?) +} + +/// Same as [`read_single`] but yields every value when `codec` is a stream codec. +pub async fn iter(container: &AnyContainer, basename: &str, codec: Codec) -> Result, ReadError> { + let bytes = read_bytes(container, basename, codec).await?; + Ok(codec.iter::(bytes.as_slice()).collect::, _>>()?) +} + +/// Whether `{basename}.{ext}` exists for the given codec. +pub async fn exists(container: &AnyContainer, basename: &str, codec: Codec) -> bool { + container.exists(&path_for(basename, codec)).await +} + +async fn read_bytes(container: &AnyContainer, basename: &str, codec: Codec) -> Result { + let path = path_for(basename, codec); + if !container.exists(&path).await { + return Err(ReadError::NotFound { + basename: basename.to_string(), + codec, + }); + } + Ok(container.read(&path).await?) +} + +/// Encode `value` with `codec` and write to `{basename}.{ext}`. Synchronous: the write goes through +/// the container's sync write surface (durable on folder/memory, enqueued on OPFS). +pub fn write_single(container: &AnyContainer, basename: &str, codec: Codec, value: &T) -> Result<(), ReadError> { + let bytes = codec.write_single(value)?; + container.write_non_blocking(&path_for(basename, codec), &bytes)?; + Ok(()) +} diff --git a/document/document-format/src/layout.rs b/document/document-format/src/layout.rs new file mode 100644 index 0000000000..846409cffc --- /dev/null +++ b/document/document-format/src/layout.rs @@ -0,0 +1,51 @@ +//! Path layout for a `.gdd` working copy. +//! +//! Layout owns basenames only — the codec choice for each payload is a runtime parameter at the +//! read/write call site. Working-copy creation, exports, and migrations may all hit the same +//! basename with different codecs. + +use graphene_resource::ResourceHash; + +pub trait Layout { + fn manifest_basename(&self) -> &str; + fn session_basename(&self) -> &str; + fn registry_basename(&self) -> &str; + fn history_basename(&self) -> &str; + fn hot_log_basename(&self) -> &str; + fn resources_dir(&self) -> &str; + fn resource_path(&self, hash: &ResourceHash) -> String; + /// The embedded legacy `.graphite` document, stored verbatim during the dual-write soak so the + /// new format can be validated against (and recovered from) the old one. Dropped once `.gdd` + /// becomes the sole source of truth. + fn legacy_basename(&self) -> &str; +} + +#[derive(Copy, Clone, Debug, Default)] +pub struct GddV1; + +impl Layout for GddV1 { + fn manifest_basename(&self) -> &str { + "manifest" + } + fn session_basename(&self) -> &str { + "session" + } + fn registry_basename(&self) -> &str { + "registry" + } + fn history_basename(&self) -> &str { + "history" + } + fn hot_log_basename(&self) -> &str { + "hot-log" + } + fn resources_dir(&self) -> &str { + "resources" + } + fn resource_path(&self, hash: &ResourceHash) -> String { + format!("{}/{hash}", self.resources_dir()) + } + fn legacy_basename(&self) -> &str { + "legacy.graphite" + } +} diff --git a/document/document-format/src/lib.rs b/document/document-format/src/lib.rs new file mode 100644 index 0000000000..d2ffa33fc0 --- /dev/null +++ b/document/document-format/src/lib.rs @@ -0,0 +1,1018 @@ +//! Typed handle for `.gdd` documents. +//! +//! [`Gdd`] owns a [`graph_storage::Session`] plus a working-copy [`document_container::AnyContainer`]. +//! Mutations flow through `Gdd` to keep the session and the on-disk working copy mirrored. +//! Export is a separate, explicit operation — see [`export::ExportFormat`]. +//! +//! See `notes/disk-container-format.md` for the design rationale. + +use std::collections::HashMap; +use std::sync::Arc; +// `Path`, `Archive`, and `FolderBackend` are only used by the native-only path-based open/create +// and filesystem export, so they're gated off wasm to avoid unused-import warnings. +#[cfg(not(target_family = "wasm"))] +use std::path::Path; + +#[cfg(not(target_family = "wasm"))] +use document_container::archive::Archive; +#[cfg(not(target_family = "wasm"))] +use document_container::backends::folder::FolderBackend; +use document_container::{AnyContainer, AsyncContainer, ByteHolder, ContainerError}; +use graph_storage::{CommitError, CrdtError, Delta, HotOp, NodeMetadataSource, PeerId, Registry, Rev, Session, TimeStamp}; +use graphene_resource::ResourceFuture; +use graphene_resource::{LoadResource, Resource, ResourceHash, ResourceStorage}; + +pub mod codec; +pub mod export; +pub mod io; +pub mod layout; +pub mod manifest; +pub mod session_state; + +pub use codec::{Codec, CodecError}; +pub use export::{ExportFormat, ExportOptions}; +pub use io::ReadError; +pub use layout::{GddV1, Layout}; +pub use manifest::{Manifest, PayloadCodecs}; +pub use session_state::SessionState; + +/// The manifest is always JSON: it is the bootstrap file, read before any other payload's codec is +/// known, so its own codec cannot itself be configurable. +pub const MANIFEST_CODEC: Codec = Codec::Json; + +/// Working-copy codecs. The working copy lives in appdata, not under VCS — these defaults +/// optimize for size and write cost. MessagePack is self-describing, so it round-trips the +/// type-erased `serde_json::Value` bodies that resource and attribute deltas carry (a non-self- +/// describing format like postcard cannot). JSON/JSONL is opt-in via `ExportFormat::Folder` for +/// users who want a diffable on-disk representation. Recorded in the manifest at create time and +/// read back on open (see [`manifest::PayloadCodecs`]), so the persist path never probes the filesystem. +pub const DEFAULT_SESSION_CODEC: Codec = Codec::Json; +pub const DEFAULT_REGISTRY_CODEC: Codec = Codec::MessagePack; +pub const DEFAULT_HISTORY_CODEC: Codec = Codec::MessagePackFrames; +pub const DEFAULT_HOT_LOG_CODEC: Codec = Codec::MessagePackFrames; + +/// Editor-facing handle. Owns the `Session` and the working-copy container; mutations are mirrored +/// to disk continuously (every retirement appends to the history file and re-snapshots the registry). +/// +/// The per-edit persist path (`commit_from_runtime`, `apply_hot_op`, `retire`) is synchronous and +/// read-free: the manifest is cached in memory (so payload codecs and `last_retired_at` need no +/// disk read), and writes go through the container's sync write surface. Only `open` / `create` / +/// `export` are async, since they read. +/// `Clone` shares the working-copy container (`Arc`) so a cloned handle reads and writes +/// the *same* on-disk/OPFS working copy — including any writes still queued on the OPFS backend. The +/// `Session` is cloned (a snapshot copy); the container is shared. +#[derive(Clone)] +pub struct Gdd { + session: Session, + working: Arc, + layout: L, + /// In-memory copy of the manifest, kept authoritative since `Gdd` is its sole writer. Holds the + /// per-payload codecs (so the persist path never probes the filesystem) and `last_retired_at` + /// (so retirement writes the manifest without first reading it). Lets the persist path stay + /// fully read-free and synchronous. + manifest: Manifest, + /// Per-peer view settings (PTZ, rulers, etc.), persisted in `session.json` not the registry, so + /// they stay out of the CRDT/history. Opaque to the storage layer; the editor owns the keys/values. + view_settings: std::collections::HashMap, + /// Per-network view settings (node-graph nav + previewing), keyed by stable [`NetworkId`]. Same per-peer + /// `session.json` treatment as [`view_settings`](Self::view_settings), but scoped per network. + network_view_settings: std::collections::HashMap>, +} + +/// Native folder-backed convenience constructors. On wasm the editor builds an OPFS-backed +/// `AnyContainer` itself and uses [`Gdd::open_in`] / [`Gdd::create_in`] directly. +#[cfg(not(target_family = "wasm"))] +impl Gdd { + /// Open an existing working copy at `path`. Validates the manifest, materializes the session + /// from `registry.bin` (fast path) or by replaying `history.jsonl` (slow path), then applies + /// the persisted hot log on top. + pub async fn open(path: &Path) -> Result { + let working = AnyContainer::Folder(FolderBackend::open(path)?); + let layout = L::default(); + Self::open_in(working, layout).await + } + + /// Create a fresh, empty working copy at `path` bound to `peer`. Writes a default manifest + /// and session state; the caller fills in editor metadata via [`Gdd::update_manifest`]. + pub async fn create(path: &Path, peer: PeerId, document_uuid: u64, editor_version: String, stdlib_version: String) -> Result { + let working = AnyContainer::Folder(FolderBackend::create(path)?); + let layout = L::default(); + Self::create_in(working, layout, peer, document_uuid, editor_version, stdlib_version).await + } +} + +impl Gdd { + /// Open a `.gdd` from archive bytes (xz or zip, auto-detected) by materializing it into `working`, + /// then opening it as a working copy. The archive is deserialized into an in-memory staging backend + /// (the archive reader is synchronous), then each entry is written into `working` via the sync + /// `write_non_blocking` surface — durable on folder/memory, eagerly enqueued on OPFS. `working` is + /// expected to be a fresh per-document container; entries with colliding paths are overwritten. + pub async fn open_from_archive(bytes: &[u8], working: AnyContainer, layout: L) -> Result { + use document_container::AsyncContainer; + use document_container::Container; + use document_container::backends::memory::MemoryBackend; + + let mut staging = MemoryBackend::new(); + document_container::archive::open_auto(bytes, &mut staging)?; + + // Copy every entry, recursing into subdirectories: `list` is single-level, so a flat top-level + // copy would skip the `resources/` subtree (the document's resource + declaration bytes). + let mut directories = vec![String::new()]; + while let Some(dir) = directories.pop() { + for path in Container::list(&staging, &dir)? { + let holder = Container::read(&staging, &path)?; + working.write_non_blocking(&path, holder.as_slice())?; + } + directories.extend(Container::list_dirs(&staging, &dir)?); + } + + Self::open_in(working, layout).await + } + + /// Backend-agnostic open. Splits out so tests can supply a [`document_container::backends::memory::MemoryBackend`]. + pub async fn open_in(working: AnyContainer, layout: L) -> Result { + let manifest: Manifest = io::read_single(&working, layout.manifest_basename(), MANIFEST_CODEC).await?; + validate_manifest(&manifest)?; + let codecs = manifest.codecs; + + let session_state: SessionState = match io::exists(&working, layout.session_basename(), codecs.session).await { + true => io::read_single(&working, layout.session_basename(), codecs.session).await?, + false => SessionState::default(), + }; + + let has_registry = io::exists(&working, layout.registry_basename(), codecs.registry).await; + let has_history = io::exists(&working, layout.history_basename(), codecs.history).await; + + let mut session = match (has_registry, has_history) { + (true, true) => { + let registry: Registry = io::read_single(&working, layout.registry_basename(), codecs.registry).await?; + let history_map: HashMap = load_history(&working, &layout, codecs.history).await?.into_iter().map(|delta| (delta.id, delta)).collect(); + Session::load( + manifest.peer_id, + registry, + history_map, + session_state.head_rev, + session_state.redo_stack, + session_state.next_node_counter, + ) + } + (true, false) => { + // Registry-only export: synthesize a history that reproduces this state. + let registry: Registry = io::read_single(&working, layout.registry_basename(), codecs.registry).await?; + Session::bootstrap_from_registry(manifest.peer_id, registry)? + } + (false, _) => Session::replay_from_history(manifest.peer_id, load_history(&working, &layout, codecs.history).await?, session_state.next_node_counter)?, + }; + + replay_hot_log(&working, &layout, codecs.hot_log, &mut session).await?; + + Ok(Self { + session, + working: Arc::new(working), + layout, + manifest, + view_settings: session_state.view_settings, + network_view_settings: session_state.network_view_settings, + }) + } + + /// Backend-agnostic create. Records the working-copy default codecs (see `DEFAULT_*_CODEC`) in + /// the manifest and writes each payload with its recorded codec. + pub async fn create_in(working: AnyContainer, layout: L, peer: PeerId, document_uuid: u64, editor_version: String, stdlib_version: String) -> Result { + let manifest = Manifest::new(document_uuid, peer, editor_version, stdlib_version); + let codecs = manifest.codecs; + io::write_single(&working, layout.manifest_basename(), MANIFEST_CODEC, &manifest)?; + io::write_single(&working, layout.session_basename(), codecs.session, &SessionState::default())?; + + let session = Session::with_peer(peer); + io::write_single(&working, layout.registry_basename(), codecs.registry, session.registry())?; + + Ok(Self { + session, + working: Arc::new(working), + layout, + manifest, + view_settings: std::collections::HashMap::new(), + network_view_settings: std::collections::HashMap::new(), + }) + } +} + +fn validate_manifest(manifest: &Manifest) -> Result<(), OpenError> { + if manifest.format != manifest::FORMAT_MAGIC { + return Err(OpenError::WrongFormat { + found: manifest.format.clone(), + expected: manifest::FORMAT_MAGIC, + }); + } + if manifest.format_version > manifest::SUPPORTED_FORMAT_VERSION { + return Err(OpenError::UnsupportedVersion { + found: manifest.format_version, + max_supported: manifest::SUPPORTED_FORMAT_VERSION, + }); + } + Ok(()) +} + +async fn load_history(working: &AnyContainer, layout: &L, codec: Codec) -> Result, OpenError> { + if !io::exists(working, layout.history_basename(), codec).await { + return Ok(Vec::new()); + } + Ok(io::iter::(working, layout.history_basename(), codec).await?) +} + +async fn replay_hot_log(working: &AnyContainer, layout: &L, codec: Codec, session: &mut Session) -> Result<(), OpenError> { + if !io::exists(working, layout.hot_log_basename(), codec).await { + return Ok(()); + } + for hot_op in io::iter::(working, layout.hot_log_basename(), codec).await? { + session.replay_hot_op(hot_op)?; + } + Ok(()) +} + +impl Gdd { + pub fn session(&self) -> &Session { + &self.session + } + + pub fn can_undo(&self) -> bool { + self.session.can_undo() + } + + pub fn can_redo(&self) -> bool { + self.session.can_redo() + } + + /// Move the undo cursor back one commit (silent-zone reflog undo) and persist the new cursor. Returns + /// the undone `Rev`. The working registry is rewound in place by the reverse delta, so re-snapshot it + /// (alongside `head`) or a reopen would read a `registry.bin` inconsistent with the persisted cursor. + pub fn undo(&mut self) -> Result { + let rev = self.session.undo()?; + self.persist_registry_snapshot()?; + self.persist_session_state()?; + Ok(rev) + } + + /// Re-apply the most-recently-undone commit and persist the new cursor and re-snapshotted registry. + pub fn redo(&mut self) -> Result { + let rev = self.session.redo()?; + self.persist_registry_snapshot()?; + self.persist_session_state()?; + Ok(rev) + } + + pub fn registry(&self) -> &Registry { + self.session.registry() + } + + /// Resolve each runtime `network_path` to its stable [`NetworkId`](graph_storage::NetworkId), so the + /// editor can key per-network, per-peer view state by a stable id. See [`Session::network_ids`]. + pub fn network_ids( + &self, + network: &graph_craft::document::NodeNetwork, + metadata: &M, + ) -> Result, graph_storage::NetworkId>, CommitError> { + self.session.network_ids(network, metadata) + } + + /// Every resource hash referenced by the current registry or anywhere in history, so resource GC keeps + /// redoable/re-undoable gestures' resources (notably proto-node declaration bytes) alive even when an + /// undo has dropped them from the current registry. + pub fn all_referenced_resource_hashes(&self) -> std::collections::HashSet { + self.session.all_referenced_resource_hashes() + } + + pub fn layout(&self) -> &L { + &self.layout + } + + /// Drop the session and return the working-copy container + layout. + /// Intended for test code that needs to reopen against the same container; panics if the container + /// is still shared by a `Gdd` clone (tests don't clone before calling this). + pub fn into_storage(self) -> (AnyContainer, L) { + let working = Arc::try_unwrap(self.working).unwrap_or_else(|_| panic!("into_storage called while the working-copy container is still shared by a Gdd clone")); + (working, self.layout) + } + + /// The in-memory manifest. `Gdd` is its sole writer, so this is authoritative without re-reading + /// disk. + pub fn manifest(&self) -> &Manifest { + &self.manifest + } + + /// Edit the cached manifest and persist it. Always JSON, synchronous. + pub fn update_manifest(&mut self, edit: impl FnOnce(&mut Manifest)) -> Result<(), OpenError> { + edit(&mut self.manifest); + io::write_single(&self.working, self.layout.manifest_basename(), MANIFEST_CODEC, &self.manifest)?; + Ok(()) + } + + /// Stage a runtime snapshot as hot ops without retiring: diff the runtime against the working + /// registry, append the hot frames (so a crash recovers the work), and persist proto-node + /// declaration bytes. The working registry reflects the edit immediately, but nothing enters durable + /// retired history until [`retire_pending_gesture`](Self::retire_pending_gesture). Staging on every + /// edit while retiring only at gesture boundaries lets several edits coalesce into one retired gesture. + pub fn stage_runtime_snapshot( + &mut self, + network: &graph_craft::document::NodeNetwork, + metadata: &M, + resources: &graphene_resource::ResourceRegistry, + byte_store: &dyn ResourceStorage, + ) -> Result<(), CommitFromRuntimeError> { + let (hot_ops, declaration_bytes) = self.session.stage_from_runtime(network, metadata, resources)?; + + for hot_op in &hot_ops { + self.append_hot_frame(hot_op)?; + } + + // Persist proto-node declaration content to the byte store (the global cache in the editor, + // the working-copy container for standalone export). Content-addressed, so re-storing + // identical bytes on every commit is an idempotent no-op. + for bytes in declaration_bytes.values() { + byte_store.store(bytes); + } + Ok(()) + } + + /// Retire every pending hot op into durable history as a single gesture (marking the batch's last + /// delta as the gesture boundary), then re-snapshot the registry. One gesture is one undo unit, so + /// the caller invokes this at each undo-step boundary and before any undo/redo. A no-op when there + /// are no pending hot ops. + pub fn retire_pending_gesture(&mut self) -> Result, RetireError> { + let Some(up_to) = self.session.hot_log().iter().map(|hot_op| hot_op.timestamp).max() else { + return Ok(Vec::new()); + }; + self.retire_inner(up_to, true) + } + + /// Commit a runtime snapshot as one complete gesture: stage it, then immediately retire it into + /// durable history. Convenience for callers that produce a whole gesture atomically (tests, and any + /// one-shot commit). Equivalent to [`stage_runtime_snapshot`](Self::stage_runtime_snapshot) followed + /// by [`retire_pending_gesture`](Self::retire_pending_gesture). + pub fn commit_from_runtime( + &mut self, + network: &graph_craft::document::NodeNetwork, + metadata: &M, + resources: &graphene_resource::ResourceRegistry, + byte_store: &dyn ResourceStorage, + ) -> Result, CommitFromRuntimeError> { + self.stage_runtime_snapshot(network, metadata, resources, byte_store)?; + Ok(self.retire_pending_gesture()?) + } + + /// Resolve the proto-node declarations referenced by the registry into a [`graph_storage::Declarations`] + /// map, loading each `ProtoNode`'s bytes from `byte_store` (the global cache in the editor, the + /// working-copy container for standalone). Only resources referenced by `Implementation::ProtoNode` + /// are visited, so image/font resources are skipped. Cold-path (open / `to_runtime`); async + /// because resource loads are. + pub async fn declarations(&self, byte_store: &dyn LoadResource) -> graph_storage::Declarations { + use graph_storage::Implementation; + + let registry = self.session.registry(); + let mut declarations = graph_storage::Declarations::new(); + + for node in registry.node_instances.values() { + let Implementation::ProtoNode(id) = node.implementation() else { continue }; + if declarations.contains_key(id) { + continue; + } + let Some(hash) = registry.resources.get(id).and_then(|entry| entry.hash) else { + log::error!("Declaration resource {id} has no resolved hash; cannot load ProtoNode"); + continue; + }; + let Some(resource) = byte_store.load(hash).await else { + log::error!("Declaration bytes for {id} (hash {hash}) missing from byte store"); + continue; + }; + match graph_storage::decode_declaration(resource.as_ref()) { + Ok(proto) => { + declarations.insert(*id, proto); + } + Err(error) => log::error!("Failed to deserialize ProtoNode for {id}: {error}"), + } + } + + declarations + } + + /// Apply a hot op from the broadcast stream, appending one frame to the hot log. + pub fn apply_hot_op(&mut self, op: HotOp) -> Result<(), CrdtError> { + self.session.apply_hot_op(op.clone())?; + if let Err(error) = self.append_hot_frame(&op) { + log::error!("Failed to append hot op frame: {error}"); + } + Ok(()) + } + + /// Persist freshly-staged hot ops and immediately retire them into durable history. Appends each + /// hot frame (so a crash before retirement still recovers the work), then retires up to the last + /// staged timestamp, which drains exactly these ops and re-snapshots the registry. Returns the + /// retired `Rev`s. A no-op when nothing was staged. + fn append_and_retire(&mut self, hot_ops: &[HotOp], gesture: bool) -> Result, RetireError> { + let Some(last) = hot_ops.last() else { return Ok(Vec::new()) }; + + for hot_op in hot_ops { + self.append_hot_frame(hot_op)?; + } + + self.retire_inner(last.timestamp, gesture) + } + + /// Encode the history deltas identified by `revs` and append them to the history file. + /// Single pass over the history (O(history length)), filtering by `revs` membership. + fn append_history_deltas(&mut self, revs: &[Rev]) -> Result<(), OpenError> { + let wanted: std::collections::HashSet = revs.iter().copied().collect(); + let mut buffer = Vec::new(); + for delta in self.session.history().filter(|delta| wanted.contains(&delta.id)) { + self.manifest.codecs.history.append(&mut buffer, delta)?; + } + self.working.append_non_blocking(&io::path_for(self.layout.history_basename(), self.manifest.codecs.history), &buffer)?; + Ok(()) + } + + /// Set a local annotation (e.g. a commit message) on an existing retired delta and re-persist it. + /// Unlike the per-gesture marker written inline at retire, this targets an already-written delta, so + /// the whole history file is rewritten in topological order. O(history) — fine for occasional user + /// labeling, not for per-gesture marking (which uses the inline path). No-op if `rev` is unknown. + pub fn annotate_delta(&mut self, rev: Rev, key: &str, value: serde_json::Value) -> Result<(), OpenError> { + if self.session.annotate_delta(rev, key, value) { + self.rewrite_history()?; + } + Ok(()) + } + + /// Rewrite the entire history file from the in-memory session, in deterministic topological order. + fn rewrite_history(&mut self) -> Result<(), OpenError> { + let mut buffer = Vec::new(); + for delta in self.session.history_topological() { + self.manifest.codecs.history.append(&mut buffer, delta)?; + } + self.working.write_non_blocking(&io::path_for(self.layout.history_basename(), self.manifest.codecs.history), &buffer)?; + Ok(()) + } + + fn persist_session_state(&mut self) -> Result<(), OpenError> { + let state = SessionState { + head_rev: self.session.head_rev(), + redo_stack: self.session.redo_stack().to_vec(), + next_node_counter: self.session.next_node_counter(), + view_settings: self.view_settings.clone(), + network_view_settings: self.network_view_settings.clone(), + }; + io::write_single(&self.working, self.layout.session_basename(), self.manifest.codecs.session, &state)?; + Ok(()) + } + + /// Re-snapshot the materialized working registry to `registry.bin`. `Session::load` trusts the stored + /// registry to match the persisted `head`, so any cursor move (undo/redo) that rewinds the working + /// registry without retiring must re-persist it or a reopen would read a registry inconsistent with + /// `head`. Synchronous and hot-path-safe (`write_non_blocking`). + fn persist_registry_snapshot(&mut self) -> Result<(), OpenError> { + io::write_single(&self.working, self.layout.registry_basename(), self.manifest.codecs.registry, self.session.registry())?; + Ok(()) + } + + /// The per-peer view settings read from `session.json` (PTZ, rulers, overlays, snapping, collapse). + /// Opaque `ui::doc::*` blobs; the editor decodes them. Empty for a fresh document. + pub fn view_settings(&self) -> &std::collections::HashMap { + &self.view_settings + } + + /// Replace the per-peer view settings and persist them to `session.json`. Called by the editor when + /// the viewport or a document-level toggle changes; never enters the registry, history, or CRDT. + pub fn set_view_settings(&mut self, view_settings: std::collections::HashMap) -> Result<(), OpenError> { + self.view_settings = view_settings; + self.persist_session_state() + } + + /// The per-network view settings read from `session.json` (node-graph nav + previewing), keyed by + /// [`NetworkId`](graph_storage::NetworkId). Opaque `ui::nav::*` / `ui::previewing` blobs the editor decodes. + pub fn network_view_settings(&self) -> &std::collections::HashMap> { + &self.network_view_settings + } + + /// Replace the per-network view settings and persist them to `session.json`. Per-peer, per-network; never + /// enters the registry, history, or CRDT. + pub fn set_network_view_settings( + &mut self, + network_view_settings: std::collections::HashMap>, + ) -> Result<(), OpenError> { + self.network_view_settings = network_view_settings; + self.persist_session_state() + } + + fn append_hot_frame(&mut self, op: &HotOp) -> Result<(), OpenError> { + let mut buffer = Vec::new(); + self.manifest.codecs.hot_log.append(&mut buffer, op)?; + self.working.append_non_blocking(&io::path_for(self.layout.hot_log_basename(), self.manifest.codecs.hot_log), &buffer)?; + Ok(()) + } + + /// Working-copy checkpoint: promote hot ops with timestamp `≤ up_to` into retired deltas, + /// append them to the history file, rewrite the hot log with remaining (unretired) ops, + /// re-snapshot the registry, and bump `last_retired_at` on the manifest. Synchronous. + pub fn retire(&mut self, up_to: TimeStamp) -> Result, RetireError> { + self.retire_inner(up_to, false) + } + + /// `gesture`: mark the batch's last delta as a gesture boundary (one undo unit) before its history + /// frame is written, so the marker persists on reopen without a later frame rewrite. + fn retire_inner(&mut self, up_to: TimeStamp, gesture: bool) -> Result, RetireError> { + let new_revs = self.session.retire(up_to)?; + + // Mark before `append_history_deltas` so the on-disk frame carries the boundary. + if gesture && let Some(&last) = new_revs.last() { + self.session.mark_interaction_end(last); + } + + if !new_revs.is_empty() { + self.append_history_deltas(&new_revs)?; + } + + // Rewrite hot log with whatever survived retirement. + let mut hot_buffer = Vec::new(); + for hot_op in self.session.hot_log() { + self.manifest.codecs.hot_log.append(&mut hot_buffer, hot_op)?; + } + self.working + .write_non_blocking(&io::path_for(self.layout.hot_log_basename(), self.manifest.codecs.hot_log), &hot_buffer)?; + + // Re-snapshot registry. + io::write_single(&self.working, self.layout.registry_basename(), self.manifest.codecs.registry, self.session.registry())?; + + self.persist_session_state()?; + + // Bump cached manifest timestamp and persist it. + self.update_manifest(|m| m.last_retired_at = Some(chrono::Utc::now().to_rfc3339()))?; + + Ok(new_revs) + } + + pub async fn read_resource(&self, hash: &ResourceHash) -> Result { + self.working.read(&self.layout.resource_path(hash)).await + } + + /// Store the legacy `.graphite` document bytes verbatim inside the working copy (dual-write soak). + /// Synchronous (hot-path safe via `write_non_blocking`): called at the autosave boundary alongside + /// the registry snapshot. The bytes are opaque to `Gdd` — it never deserializes them. + pub fn store_legacy_document(&self, bytes: &[u8]) -> Result<(), ContainerError> { + self.working.write_non_blocking(self.layout.legacy_basename(), bytes) + } + + /// Read back the embedded legacy `.graphite` document, if present. The compare-on-open oracle and + /// the recovery fallback both go through here. `None` when no legacy blob was ever written. + pub async fn read_legacy_document(&self) -> Option { + self.working.read(self.layout.legacy_basename()).await.ok() + } + + /// Register a resource under `id` and store its bytes. Commits an `AddResource` delta (a single + /// `DataSource::Embedded` source resolved to the content hash) through the session so the registry + /// records the resource and the entry replicates, then writes the bytes into the working copy's + /// content-addressed store. The caller owns `id` allocation. + pub fn add_resource(&mut self, id: graph_storage::ResourceId, bytes: &[u8]) -> Result<(), AddResourceError> { + let hash = ResourceHash::from(bytes); + + let hot_ops = self.session.stage_embedded_resource(id, hash)?; + self.append_and_retire(&hot_ops, false)?; + + self.working.write_non_blocking(&self.layout.resource_path(&hash), bytes)?; + Ok(()) + } + + /// Like [`add_resource`](Self::add_resource) but copies the bytes from a filesystem `src` rather + /// than buffering them. Folder backends use `fs::copy` (CoW on supported filesystems); other + /// backends fall back to read-then-write. Native-only: there is no filesystem source path on wasm. + #[cfg(not(target_family = "wasm"))] + pub fn add_resource_from_path(&mut self, id: graph_storage::ResourceId, hash: ResourceHash, src: &Path) -> Result<(), AddResourceError> { + let hot_ops = self.session.stage_embedded_resource(id, hash)?; + self.append_and_retire(&hot_ops, false)?; + + let dest_path = self.layout.resource_path(&hash); + if let AnyContainer::Folder(folder) = self.working.as_ref() { + let full = folder.root().join(&dest_path); + if let Some(parent) = full.parent() { + std::fs::create_dir_all(parent).map_err(ContainerError::Io)?; + } + std::fs::copy(src, &full).map_err(ContainerError::Io)?; + return Ok(()); + } + + let bytes = std::fs::read(src).map_err(ContainerError::Io)?; + self.working.write_non_blocking(&dest_path, &bytes)?; + Ok(()) + } + + pub async fn has_resource(&self, hash: &ResourceHash) -> bool { + self.working.exists(&self.layout.resource_path(hash)).await + } + + pub fn remove_resource(&self, hash: &ResourceHash) -> Result<(), ContainerError> { + self.working.remove_non_blocking(&self.layout.resource_path(hash)) + } + + pub fn resource_proxy(&self) -> ResourceProxy + where + L: Clone, + { + ResourceProxy(self.working.clone(), self.layout.clone()) + } + + /// Enumerate every resource currently in the working copy. Paths that don't parse as a + /// `ResourceHash` (foreign files dropped into the resources directory) are silently skipped. + pub async fn resource_hashes(&self) -> Result, ContainerError> { + let dir = self.layout.resources_dir(); + if !self.working.list_dirs("").await?.iter().any(|d| d == dir) { + return Ok(Vec::new()); + } + let entries = self.working.list(dir).await?; + let prefix = format!("{dir}/"); + let mut hashes = Vec::with_capacity(entries.len()); + for entry in entries { + let Some(name) = entry.strip_prefix(&prefix) else { continue }; + if let Ok(hash) = name.parse::() { + hashes.push(hash); + } + } + Ok(hashes) + } + + /// Build a self-contained export of the working copy: re-encodes typed payloads with the + /// chosen codec, omits session/hot-log (peer-local + ephemeral), copies resources straight + /// through, then materializes as a folder, zip, or xz archive at `dest`. Does not mutate + /// `self` and does not buffer the full export — resources stream end-to-end. Native-only: + /// export writes to a filesystem path. + /// + /// `byte_store` is the source for `embed_all_resources`: in the editor the working copy holds no + /// resource bytes (they live in the app-global cache), so embedding resolves each registry hash + /// through the store. It is unused when `embed_all_resources` is false. + #[cfg(not(target_family = "wasm"))] + pub async fn export(&self, dest: &Path, format: ExportFormat, options: ExportOptions, byte_store: &dyn LoadResource) -> Result<(), ExportError> { + options.validate().map_err(ExportError::InvalidOptions)?; + + match format { + ExportFormat::Folder => { + let mut folder = document_container::backends::folder::FolderBackend::create(dest)?; + let mut sink = FolderSink { folder: &mut folder }; + self.stream_entries(options, byte_store, &mut sink).await?; + } + ExportFormat::Zip => { + let file = std::fs::File::create(dest).map_err(document_container::ContainerError::Io)?; + let mut writer = document_container::archive::Zip::writer(file)?; + self.stream_entries(options, byte_store, &mut writer).await?; + use document_container::archive::ArchiveWriter; + writer.finish()?; + } + ExportFormat::Xz => { + let file = std::fs::File::create(dest).map_err(document_container::ContainerError::Io)?; + let mut writer = document_container::archive::Xz::writer(file)?; + self.stream_entries(options, byte_store, &mut writer).await?; + use document_container::archive::ArchiveWriter; + writer.finish()?; + } + } + + Ok(()) + } + + /// Build a self-contained archive of the working copy in memory and return its bytes, instead of + /// writing to a filesystem path. Available on every target (no `std::fs`), so the editor can hand + /// the bytes to the frontend to download / save. Buffers the whole archive in memory; fine for + /// document-sized saves, not for huge exports (the streaming `export` covers that, native-only). + /// + /// `legacy_document`, when present, is embedded verbatim at `Layout::legacy_basename()`, so the + /// produced `.gdd` carries the legacy `.graphite` fallback the dual-write soak relies on. + /// `ExportFormat::Folder` has no single-file byte form and is rejected. + pub async fn export_to_bytes(&self, format: ExportFormat, options: ExportOptions, byte_store: &dyn LoadResource, legacy_document: Option<&[u8]>) -> Result, ExportError> { + use document_container::archive::Archive; + + options.validate().map_err(ExportError::InvalidOptions)?; + + let cursor = std::io::Cursor::new(Vec::new()); + let buffer = match format { + ExportFormat::Folder => return Err(ExportError::InvalidOptions("folder export has no single-file byte form")), + ExportFormat::Zip => { + let mut writer = document_container::archive::Zip::writer(cursor)?; + self.stream_entries(options, byte_store, &mut writer).await?; + if let Some(legacy) = legacy_document { + ExportSink::write_entry(&mut writer, self.layout.legacy_basename(), legacy)?; + } + writer.finish_into()? + } + ExportFormat::Xz => { + let mut writer = document_container::archive::Xz::writer(cursor)?; + self.stream_entries(options, byte_store, &mut writer).await?; + if let Some(legacy) = legacy_document { + ExportSink::write_entry(&mut writer, self.layout.legacy_basename(), legacy)?; + } + writer.finish_into()? + } + }; + + Ok(buffer.into_inner()) + } + + /// Drive a sink through manifest → registry → history → resources. Payloads keep the working + /// copy's recorded per-payload codecs (no re-encode), so registry stays single-value and history + /// stays multi-value without the caller having to keep them coherent. Each entry is written one + /// at a time so the sink only ever sees one payload's bytes; the manifest itself is always JSON. + async fn stream_entries(&self, options: ExportOptions, byte_store: &dyn LoadResource, sink: &mut dyn ExportSink) -> Result<(), ExportError> { + use document_container::AsyncContainer; + + let codecs = self.manifest.codecs; + sink.write_entry(&io::path_for(self.layout.manifest_basename(), MANIFEST_CODEC), &MANIFEST_CODEC.write_single(&self.manifest)?)?; + + // Include the per-peer session state (cursor + `view_settings` like PTZ/rulers) so a `.gdd` opened on + // another machine restores the saved viewport and undo position. It's working-copy-only otherwise, so + // without this the archive's `view_settings` would be empty on open and the viewport would reset. + let session_state = SessionState { + head_rev: self.session.head_rev(), + redo_stack: self.session.redo_stack().to_vec(), + next_node_counter: self.session.next_node_counter(), + view_settings: self.view_settings.clone(), + network_view_settings: self.network_view_settings.clone(), + }; + sink.write_entry(&io::path_for(self.layout.session_basename(), codecs.session), &codecs.session.write_single(&session_state)?)?; + + // Hashes the working copy already holds on disk; their bytes are copied through verbatim below + // and don't need the byte store. + let working_copy_hashes: std::collections::HashSet = self.resource_hashes().await?.into_iter().collect(); + + // Decide which resources travel as bytes. A resource already marked `Embedded` always has its + // bytes materialized (in the editor they live in the byte store, not the working copy, so a + // plain export must still pull them). `embed_all_resources` additionally promotes link-only + // resources (`Url`/`FilePath`/`Font`) by prepending an `Embedded` source for a self-contained + // export. Bytes already in the working copy are skipped here; the copy-through pass writes them. + let mut export_session = self.session.clone(); + let mut hashes_from_store: Vec = Vec::new(); + let mut links_to_promote: Vec = Vec::new(); + for (id, entry) in &export_session.registry().resources { + let Some(hash) = entry.hash else { continue }; + let embed = entry.has_embedded_source() || options.embed_all_resources; + if !embed { + continue; + } + if !entry.has_embedded_source() { + links_to_promote.push(*id); + } + if !working_copy_hashes.contains(&hash) { + hashes_from_store.push(hash); + } + } + + // Load the gap from the byte store (fail fast if an embedded resource is missing), then commit + // the link promotions as real `AddSource` deltas on the clone so the exported registry and + // history stay consistent. The live `Gdd` is untouched. + let mut embedded_bytes: Vec<(ResourceHash, Resource)> = Vec::new(); + for hash in hashes_from_store { + let Some(resource) = byte_store.load(hash).await else { + return Err(ExportError::MissingResource(hash)); + }; + embedded_bytes.push((hash, resource)); + } + export_session.embed_resource_sources(links_to_promote)?; + + if options.include_registry { + sink.write_entry( + &io::path_for(self.layout.registry_basename(), codecs.registry), + &codecs.registry.write_single(export_session.registry())?, + )?; + } + + if options.include_history { + let mut buffer = Vec::new(); + for delta in export_session.history_topological() { + codecs.history.append(&mut buffer, delta)?; + } + if !buffer.is_empty() { + sink.write_entry(&io::path_for(self.layout.history_basename(), codecs.history), &buffer)?; + } + } + + // Copy whatever resource bytes the working copy already holds, tracking which hashes are + // covered so the embed pass doesn't re-emit them. + let mut emitted = std::collections::HashSet::new(); + let resources_dir = self.layout.resources_dir(); + if self.working.list_dirs("").await?.iter().any(|d| d == resources_dir) { + let prefix = format!("{resources_dir}/"); + for path in self.working.list(resources_dir).await? { + if let Some(hash) = path.strip_prefix(&prefix).and_then(|name| name.parse::().ok()) { + emitted.insert(hash); + } + let holder = self.working.read(&path).await?; + + // On native, an `External` (mmap'd) holder exposes a source path the sink can copy + // directly (CoW / kernel-side); other holders, and every holder on wasm (OPFS has no + // filesystem path), fall back to writing the in-memory bytes. + #[cfg(not(target_family = "wasm"))] + match holder.source_path() { + Some(src_path) => sink.write_entry_from_path(&path, src_path)?, + None => sink.write_entry(&path, holder.as_slice())?, + } + #[cfg(target_family = "wasm")] + sink.write_entry(&path, holder.as_slice())?; + } + } + + // Write the embedded bytes the working copy didn't already hold. + for (hash, resource) in &embedded_bytes { + if emitted.insert(*hash) { + sink.write_entry(&self.layout.resource_path(hash), resource.as_ref())?; + } + } + + Ok(()) + } +} + +impl LoadResource for Gdd { + fn load(&self, hash: ResourceHash) -> ResourceFuture<'_> { + Box::pin(async move { + let bytes = self.working.read(&self.layout.resource_path(&hash)).await.ok()?; + Some(Resource::new(bytes)) + }) + } +} +pub struct ResourceProxy(Arc, T); + +impl LoadResource for ResourceProxy { + fn load(&self, hash: ResourceHash) -> ResourceFuture<'_> { + Box::pin(async move { + let bytes = self.0.read(&self.1.resource_path(&hash)).await.ok()?; + Some(Resource::new(bytes)) + }) + } +} + +impl ResourceStorage for Gdd { + fn store(&self, data: &[u8]) -> ResourceHash { + let hash = ResourceHash::from(data); + if let Err(error) = self.working.write_non_blocking(&self.layout.resource_path(&hash), data) { + log::error!("ResourceStorage::store failed for {hash}: {error}"); + } + hash + } + + fn contains(&self, hash: &ResourceHash) -> bool { + self.working.exists_non_blocking(&self.layout.resource_path(hash)) + } + + fn garbage_collect(&self, used: &[ResourceHash]) { + let kept: std::collections::HashSet<&ResourceHash> = used.iter().collect(); + let hashes = match futures::executor::block_on(self.resource_hashes()) { + Ok(hashes) => hashes, + Err(error) => { + log::error!("Failed to list resources during garbage_collect: {error}"); + return; + } + }; + for hash in hashes { + if kept.contains(&hash) { + continue; + } + if let Err(error) = self.working.remove_non_blocking(&self.layout.resource_path(&hash)) { + log::error!("ResourceStorage::garbage_collect failed to remove {hash}: {error}"); + } + } + } +} + +/// Abstraction over the sink an export streams entries into. Lets a single async loop drive +/// folder writes, zip writes, and xz writes without duplicating the entry sequence. The archive +/// sinks (zip/xz) work on every target since the codecs are pure-Rust; only the folder sink and +/// the filesystem copy-through (`write_entry_from_path`) are native-only. +/// +/// `Send` because `stream_entries` holds `&mut dyn ExportSink` across `.await`s, so the enclosing +/// future (e.g. the editor's save future) must be `Send` on native. The concrete sinks are all `Send`. +trait ExportSink: Send { + fn write_entry(&mut self, path: &str, bytes: &[u8]) -> Result<(), ExportError>; + + /// Copy a file from disk into the sink. Default impl reads the source into memory and + /// forwards to `write_entry`; sinks like the folder writer override to use `fs::copy` + /// (CoW on supported filesystems, kernel-side copy otherwise). Native-only: only reachable + /// for an `External` (mmap'd) holder, which doesn't exist on wasm. + #[cfg(not(target_family = "wasm"))] + fn write_entry_from_path(&mut self, path: &str, src: &std::path::Path) -> Result<(), ExportError> { + let bytes = std::fs::read(src).map_err(document_container::ContainerError::Io)?; + self.write_entry(path, &bytes) + } +} + +#[cfg(not(target_family = "wasm"))] +struct FolderSink<'a> { + folder: &'a mut document_container::backends::folder::FolderBackend, +} + +#[cfg(not(target_family = "wasm"))] +impl ExportSink for FolderSink<'_> { + fn write_entry(&mut self, path: &str, bytes: &[u8]) -> Result<(), ExportError> { + document_container::Container::write(self.folder, path, bytes)?; + Ok(()) + } + + fn write_entry_from_path(&mut self, path: &str, src: &std::path::Path) -> Result<(), ExportError> { + document_container::validate_path(path)?; + let dest = self.folder.root().join(path); + if let Some(parent) = dest.parent() { + std::fs::create_dir_all(parent).map_err(document_container::ContainerError::Io)?; + } + std::fs::copy(src, &dest).map_err(document_container::ContainerError::Io)?; + Ok(()) + } +} + +impl ExportSink for document_container::archive::ZipWriter { + fn write_entry(&mut self, path: &str, bytes: &[u8]) -> Result<(), ExportError> { + use document_container::archive::ArchiveWriter; + ArchiveWriter::write_entry(self, path, bytes)?; + Ok(()) + } +} + +impl ExportSink for document_container::archive::XzWriter { + fn write_entry(&mut self, path: &str, bytes: &[u8]) -> Result<(), ExportError> { + use document_container::archive::ArchiveWriter; + ArchiveWriter::write_entry(self, path, bytes)?; + Ok(()) + } +} + +/// Errors from [`Gdd::open`] / [`Gdd::create`]. Per design, any unexpected condition is a hard error. +#[derive(Debug, thiserror::Error)] +pub enum OpenError { + #[error("container error: {0}")] + Container(#[from] ContainerError), + #[error("read error: {0}")] + Read(#[from] ReadError), + #[error("not a .gdd document (manifest format = {found:?}, expected {expected:?})")] + WrongFormat { found: String, expected: &'static str }, + #[error("unsupported format version: found {found}, max supported {max_supported}")] + UnsupportedVersion { found: u32, max_supported: u32 }, + #[error("codec error: {0}")] + Codec(#[from] CodecError), + #[error("CRDT error: {0}")] + Crdt(#[from] CrdtError), +} + +#[derive(Debug, thiserror::Error)] +pub enum RetireError { + #[error("container error: {0}")] + Container(#[from] ContainerError), + #[error("read error: {0}")] + Read(#[from] ReadError), + #[error("codec error: {0}")] + Codec(#[from] CodecError), + #[error("CRDT error: {0}")] + Crdt(#[from] CrdtError), + #[error("manifest update failed: {0}")] + Manifest(#[from] OpenError), +} + +#[derive(Debug, thiserror::Error)] +pub enum UndoError { + #[error("CRDT error: {0}")] + Crdt(#[from] CrdtError), + #[error("failed to persist cursor: {0}")] + Persist(#[from] OpenError), +} + +#[derive(Debug, thiserror::Error)] +pub enum AddResourceError { + #[error("container error: {0}")] + Container(#[from] ContainerError), + #[error("CRDT error: {0}")] + Crdt(#[from] CrdtError), + #[error("failed to retire registration: {0}")] + Retire(#[from] RetireError), +} + +/// A commit staged into the in-memory session but its on-disk persistence failed. The session has +/// advanced past what the working copy reflects; callers should treat the document as needing +/// re-persist (or surface the failure) rather than assuming the snapshot is durable. +#[derive(Debug, thiserror::Error)] +pub enum CommitFromRuntimeError { + #[error("failed to stage runtime snapshot: {0}")] + Stage(#[from] CommitError), + #[error("failed to persist staged hot frames: {0}")] + Persist(#[from] OpenError), + #[error("failed to retire staged hot ops: {0}")] + Retire(#[from] RetireError), +} + +#[derive(Debug, thiserror::Error)] +pub enum ExportError { + #[error("container error: {0}")] + Container(#[from] ContainerError), + #[error("read error: {0}")] + Read(#[from] ReadError), + #[error("open error: {0}")] + Open(#[from] OpenError), + #[error("codec error: {0}")] + Codec(#[from] CodecError), + #[error("invalid export options: {0}")] + InvalidOptions(&'static str), + #[error("embedded resource {0} missing from the byte store")] + MissingResource(ResourceHash), + #[error("CRDT error: {0}")] + Crdt(#[from] CrdtError), +} diff --git a/document/document-format/src/manifest.rs b/document/document-format/src/manifest.rs new file mode 100644 index 0000000000..a9f6eb9bd0 --- /dev/null +++ b/document/document-format/src/manifest.rs @@ -0,0 +1,68 @@ +//! Bootstrap file for a `.gdd` document. Always JSON regardless of payload codec choice. + +use graph_storage::PeerId; +use serde::{Deserialize, Serialize}; + +use crate::Codec; +use crate::{DEFAULT_HISTORY_CODEC, DEFAULT_HOT_LOG_CODEC, DEFAULT_REGISTRY_CODEC, DEFAULT_SESSION_CODEC}; + +/// Magic string carried in [`Manifest::format`] to identify a `.gdd` document. +pub const FORMAT_MAGIC: &str = "gdd"; + +/// Maximum manifest version this build can open. Bumped when manifest layout changes +/// in a way that older builds can't safely read. +pub const SUPPORTED_FORMAT_VERSION: u32 = 1; + +/// The on-disk codec for each working-copy payload, recorded so reads/writes never have to probe +/// the filesystem to discover it. The manifest itself is excluded: it is always JSON, since it must +/// be parsed before any other codec is known. +#[derive(Clone, Copy, Debug, Serialize, Deserialize)] +pub struct PayloadCodecs { + pub registry: Codec, + pub history: Codec, + pub hot_log: Codec, + pub session: Codec, +} + +impl Default for PayloadCodecs { + fn default() -> Self { + Self { + registry: DEFAULT_REGISTRY_CODEC, + history: DEFAULT_HISTORY_CODEC, + hot_log: DEFAULT_HOT_LOG_CODEC, + session: DEFAULT_SESSION_CODEC, + } + } +} + +#[derive(Clone, Debug, Serialize, Deserialize)] +pub struct Manifest { + pub format: String, + pub format_version: u32, + pub document_uuid: u64, + pub peer_id: PeerId, + pub editor_version: String, + pub stdlib_version: String, + /// Codec used for each non-manifest payload on disk. Authoritative — never inferred from which + /// file extension is present. + #[serde(default)] + pub codecs: PayloadCodecs, + /// RFC 3339 timestamp of the most recent retirement, set by [`crate::Gdd::retire`]. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub last_retired_at: Option, +} + +impl Manifest { + pub fn new(document_uuid: u64, peer_id: PeerId, editor_version: String, stdlib_version: String) -> Self { + Self { + format: FORMAT_MAGIC.to_string(), + format_version: SUPPORTED_FORMAT_VERSION, + document_uuid, + peer_id, + editor_version, + stdlib_version, + codecs: PayloadCodecs::default(), + last_retired_at: None, + } + } +} diff --git a/document/document-format/src/session_state.rs b/document/document-format/src/session_state.rs new file mode 100644 index 0000000000..a10c85b3f0 --- /dev/null +++ b/document/document-format/src/session_state.rs @@ -0,0 +1,32 @@ +//! Persistent cursor state for the local peer. Separate from [`crate::Manifest`] because the +//! manifest describes document identity (what this document *is*), while [`SessionState`] +//! describes where the local peer's cursor sits inside it. +//! +//! Lives in `session.json`. Rewritten on retirement. + +use graph_storage::{NetworkId, Rev}; +use serde::{Deserialize, Serialize}; +use std::collections::HashMap; + +#[derive(Clone, Debug, Default, Serialize, Deserialize)] +pub struct SessionState { + /// Local-chain cursor. Points at the most recently applied retired delta. + #[serde(default)] + pub head_rev: Rev, + /// Revs the user has undone past, so redo survives a reopen. (The legacy `VecDeque` redo history + /// is not persisted, so within the shadow phase this is strictly more capable than the live editor.) + #[serde(default)] + pub redo_stack: Vec, + /// Shared-monotonic counter feeding `Document::next_node_id`. Persisted so reopens don't + /// collide on minted IDs. + #[serde(default)] + pub next_node_counter: u64, + /// Per-peer view settings (PTZ, rulers, overlays, snapping, panel collapse). Local to the viewer, + /// so kept out of the CRDT/history. Editor owns the keys/values (opaque `ui::doc::*` blobs). + #[serde(default)] + pub view_settings: HashMap, + /// Per-network view settings (node-graph nav + previewing), keyed by the stable storage [`NetworkId`]. + /// Per-peer like [`view_settings`](Self::view_settings); opaque `ui::nav::*` / `ui::previewing` blobs. + #[serde(default)] + pub network_view_settings: HashMap>, +} diff --git a/document/document-format/tests/open_create.rs b/document/document-format/tests/open_create.rs new file mode 100644 index 0000000000..a1ba749726 --- /dev/null +++ b/document/document-format/tests/open_create.rs @@ -0,0 +1,643 @@ +use document_container::AnyContainer; +use document_container::backends::memory::MemoryBackend; +use document_format::{Codec, Gdd, GddV1, Layout, Manifest, OpenError, io, manifest}; +use graph_storage::{HotOp, Network, NetworkId, PeerId, ROOT_NETWORK, RegistryDelta, TimeStamp}; + +fn empty_container() -> AnyContainer { + AnyContainer::Memory(MemoryBackend::new()) +} + +/// A resource byte store for export calls. Empty unless a test pre-populates it; only consulted when +/// `embed_all_resources` is set. +fn empty_byte_store() -> graph_craft::application_io::resource::HashMapResourceStorage { + graph_craft::application_io::resource::HashMapResourceStorage::new() +} + +/// A one-node network referencing `id` via a `TaggedValue::Resource` input. Conversion only snapshots +/// resources the network references, so a resource needs a referencing node to survive into storage. +fn network_referencing_resource(id: graphene_resource::ResourceId) -> graph_craft::document::NodeNetwork { + use graph_craft::ProtoNodeIdentifier; + use graph_craft::document::value::TaggedValue; + use graph_craft::document::{DocumentNode, DocumentNodeImplementation, NodeId, NodeInput, NodeNetwork}; + + NodeNetwork { + nodes: [( + NodeId(0), + DocumentNode { + inputs: vec![NodeInput::value(TaggedValue::Resource(id), false)], + implementation: DocumentNodeImplementation::ProtoNode(ProtoNodeIdentifier::new("graphene_core::ops::identity::IdentityNode")), + ..Default::default() + }, + )] + .into_iter() + .collect(), + ..Default::default() + } +} + +#[test] +fn create_in_round_trips_empty_document() { + futures::executor::block_on(async { + let container = empty_container(); + + let created = match Gdd::::create_in(container, GddV1, PeerId(7), 0xFEED, "editor-x".into(), "stdlib-x".into()).await { + Ok(gdd) => gdd, + Err(error) => panic!("create_in failed: {error:?}"), + }; + + let (working, layout) = created.into_storage(); + let reopened = match Gdd::::open_in(working, layout).await { + Ok(gdd) => gdd, + Err(error) => panic!("open_in failed: {error:?}"), + }; + + assert_eq!(reopened.session().peer(), PeerId(7)); + assert!(reopened.registry().node_instances.is_empty()); + assert!(reopened.registry().networks.is_empty()); + }); +} + +#[test] +fn open_in_rejects_wrong_format_magic() { + futures::executor::block_on(async { + let container = empty_container(); + let layout = GddV1; + + let mut bogus = Manifest::new(0xC0DE, PeerId(1), "ed".into(), "std".into()); + bogus.format = "not-gdd".into(); + io::write_single(&container, layout.manifest_basename(), Codec::Json, &bogus).unwrap(); + + match Gdd::::open_in(container, layout).await { + Err(OpenError::WrongFormat { .. }) => {} + Ok(_) => panic!("expected WrongFormat, got Ok"), + Err(other) => panic!("expected WrongFormat, got {other:?}"), + } + }); +} + +#[test] +fn manifest_returns_what_create_in_wrote() { + futures::executor::block_on(async { + let gdd = Gdd::::create_in(empty_container(), GddV1, PeerId(13), 0xC0FFEE, "ed-1.2".into(), "std-0.7".into()) + .await + .unwrap_or_else(|error| panic!("create_in failed: {error:?}")); + + let manifest = gdd.manifest(); + assert_eq!(manifest.peer_id, PeerId(13)); + assert_eq!(manifest.document_uuid, 0xC0FFEE); + assert_eq!(manifest.editor_version, "ed-1.2"); + assert_eq!(manifest.stdlib_version, "std-0.7"); + assert_eq!(manifest.format, manifest::FORMAT_MAGIC); + }); +} + +#[test] +fn update_manifest_changes_visible_after_reopen() { + futures::executor::block_on(async { + let mut gdd = Gdd::::create_in(empty_container(), GddV1, PeerId(1), 0xAB, "ed".into(), "std".into()) + .await + .unwrap_or_else(|error| panic!("create_in failed: {error:?}")); + + gdd.update_manifest(|m| m.editor_version = "ed-NEW".into()) + .unwrap_or_else(|error| panic!("update_manifest failed: {error:?}")); + + let (working, layout) = gdd.into_storage(); + let reopened = Gdd::::open_in(working, layout).await.unwrap_or_else(|error| panic!("open_in failed: {error:?}")); + let manifest = reopened.manifest(); + assert_eq!(manifest.editor_version, "ed-NEW"); + }); +} + +#[test] +fn apply_hot_op_persists_to_hot_log_and_survives_reopen() { + futures::executor::block_on(async { + let mut gdd = Gdd::::create_in(empty_container(), GddV1, PeerId(5), 0xDEAD, "ed".into(), "std".into()) + .await + .unwrap_or_else(|error| panic!("create_in failed: {error:?}")); + + // AddNetwork on the root network. Idempotent at apply, so two hot ops applied in sequence + // produces one network in the registry. + let hot_op = HotOp { + op: RegistryDelta::AddNetwork { + id: ROOT_NETWORK, + network: Network::default(), + }, + timestamp: TimeStamp { counter: 1, peer: PeerId(5) }, + }; + gdd.apply_hot_op(hot_op).unwrap_or_else(|error| panic!("apply_hot_op failed: {error:?}")); + + assert!(gdd.registry().networks.contains_key(&ROOT_NETWORK), "hot op should have created the root network in memory"); + + let (working, layout) = gdd.into_storage(); + let reopened = Gdd::::open_in(working, layout).await.unwrap_or_else(|error| panic!("open_in failed: {error:?}")); + + assert!(reopened.registry().networks.contains_key(&ROOT_NETWORK), "hot op should have been replayed from the hot log on reopen"); + }); +} + +#[test] +fn retire_moves_eligible_hot_ops_to_history_and_keeps_rest() { + futures::executor::block_on(async { + let mut gdd = Gdd::::create_in(empty_container(), GddV1, PeerId(5), 0xDEAD, "ed".into(), "std".into()) + .await + .unwrap_or_else(|error| panic!("create_in failed: {error:?}")); + + // Two hot ops: one with low timestamp (will retire), one with high (will stay). + let early = HotOp { + op: RegistryDelta::AddNetwork { + id: ROOT_NETWORK, + network: Network::default(), + }, + timestamp: TimeStamp { counter: 1, peer: PeerId(5) }, + }; + let late = HotOp { + op: RegistryDelta::AddNetwork { + id: NetworkId(42), + network: Network::default(), + }, + timestamp: TimeStamp { counter: 10, peer: PeerId(5) }, + }; + gdd.apply_hot_op(early).unwrap(); + gdd.apply_hot_op(late).unwrap(); + assert_eq!(gdd.session().hot_log().len(), 2); + + // Retire only up to timestamp 5 → drains the early op, leaves the late one. + let cutoff = TimeStamp { counter: 5, peer: PeerId(5) }; + gdd.retire(cutoff).unwrap_or_else(|error| panic!("retire failed: {error:?}")); + + assert_eq!(gdd.session().hot_log().len(), 1, "late hot op should still be in hot log"); + assert_eq!(gdd.session().history().count(), 1, "early hot op should be in retired history"); + + // Reopen and confirm survival: hot log has the late op (replayed), history has the early op. + let (working, layout) = gdd.into_storage(); + let reopened = Gdd::::open_in(working, layout).await.unwrap_or_else(|error| panic!("open_in failed: {error:?}")); + + assert!(reopened.registry().networks.contains_key(&ROOT_NETWORK), "retired op's effect should be in registry"); + assert!(reopened.registry().networks.contains_key(&NetworkId(42)), "hot op's effect should be replayed"); + assert_eq!(reopened.session().history().count(), 1); + assert_eq!(reopened.session().hot_log().len(), 1); + + // Manifest bumped. + assert!(reopened.manifest().last_retired_at.is_some(), "retire should bump last_retired_at"); + }); +} + +#[test] +fn export_folder_round_trips_through_open() { + use document_format::{ExportFormat, ExportOptions}; + + futures::executor::block_on(async { + let gdd = Gdd::::create_in(empty_container(), GddV1, PeerId(3), 0xAB, "ed".into(), "std".into()) + .await + .unwrap_or_else(|error| panic!("create_in failed: {error:?}")); + + let dir = tempfile::tempdir().unwrap(); + let dest = dir.path().join("export"); + + gdd.export(&dest, ExportFormat::Folder, ExportOptions::default(), &empty_byte_store()) + .await + .unwrap_or_else(|error| panic!("export failed: {error:?}")); + + // Payloads keep the working-copy codecs: registry is MessagePack (`.bin`), manifest is JSON. + assert!(dest.join("registry.bin").exists()); + assert!(dest.join("manifest.json").exists()); + assert!(dest.join("session.json").exists()); + assert!(!dest.join("hot-log.bin").exists()); + assert!(!dest.join("hot-log.frames").exists()); + + // And the export is itself openable. + let reopened = Gdd::::open(&dest).await.unwrap_or_else(|error| panic!("open failed: {error:?}")); + assert_eq!(reopened.session().peer(), PeerId(3)); + }); +} + +#[test] +fn export_zip_round_trips_via_deserialize() { + use document_container::archive::{Archive, Zip}; + use document_format::{ExportFormat, ExportOptions}; + + futures::executor::block_on(async { + let gdd = Gdd::::create_in(empty_container(), GddV1, PeerId(4), 0xCD, "ed".into(), "std".into()) + .await + .unwrap_or_else(|error| panic!("create_in failed: {error:?}")); + + let dir = tempfile::tempdir().unwrap(); + let dest = dir.path().join("doc.gdd.zip"); + + gdd.export(&dest, ExportFormat::Zip, ExportOptions::default(), &empty_byte_store()) + .await + .unwrap_or_else(|error| panic!("export failed: {error:?}")); + + let bytes = std::fs::read(&dest).unwrap(); + let mut restored = document_container::backends::memory::MemoryBackend::new(); + Zip::open(std::io::Cursor::new(&bytes), &mut restored).unwrap(); + use document_container::Container; + assert!(restored.exists("manifest.json")); + assert!(restored.exists("registry.bin")); + assert!(restored.exists("session.json")); + assert!(!restored.exists("hot-log.frames")); + }); +} + +#[test] +fn export_rejects_invalid_options() { + use document_format::{ExportError, ExportFormat, ExportOptions}; + + futures::executor::block_on(async { + let gdd = Gdd::::create_in(empty_container(), GddV1, PeerId(1), 0xEF, "ed".into(), "std".into()) + .await + .unwrap_or_else(|error| panic!("create_in failed: {error:?}")); + + let dir = tempfile::tempdir().unwrap(); + let dest = dir.path().join("nope"); + let options = ExportOptions { + include_registry: false, + include_history: false, + embed_all_resources: false, + }; + + match gdd.export(&dest, ExportFormat::Folder, options, &empty_byte_store()).await { + Err(ExportError::InvalidOptions(_)) => {} + Ok(_) => panic!("expected InvalidOptions, got Ok"), + Err(other) => panic!("expected InvalidOptions, got {other:?}"), + } + }); +} + +#[test] +fn resource_round_trip_add_read_remove() { + use graphene_resource::{ResourceHash, ResourceId}; + + futures::executor::block_on(async { + let mut gdd = Gdd::::create_in(empty_container(), GddV1, PeerId(99), 0xCAFE, "ed".into(), "std".into()) + .await + .unwrap_or_else(|error| panic!("create_in failed: {error:?}")); + + let payload = b"deadbeef cafe babe"; + let hash = ResourceHash::from(&payload[..]); + let id = ResourceId::new(); + + assert!(!gdd.has_resource(&hash).await); + gdd.add_resource(id, payload).unwrap_or_else(|error| panic!("add_resource failed: {error:?}")); + assert!(gdd.has_resource(&hash).await); + + let read_back = gdd.read_resource(&hash).await.unwrap(); + assert_eq!(read_back.as_slice(), payload); + + // The registry records the resource (entry keyed by id, resolved to the content hash). + let entry = gdd.registry().resources.get(&id).expect("registry records the added resource"); + assert_eq!(entry.hash, Some(hash)); + + let hashes = gdd.resource_hashes().await.unwrap(); + assert_eq!(hashes, vec![hash]); + + gdd.remove_resource(&hash).unwrap(); + assert!(!gdd.has_resource(&hash).await); + }); +} + +#[test] +fn resource_survives_reopen() { + use graphene_resource::{ResourceHash, ResourceId}; + + futures::executor::block_on(async { + let mut gdd = Gdd::::create_in(empty_container(), GddV1, PeerId(7), 0xC0DE, "ed".into(), "std".into()) + .await + .unwrap_or_else(|error| panic!("create_in failed: {error:?}")); + + let payload = b"persistent bytes"; + let hash = ResourceHash::from(&payload[..]); + let id = ResourceId::new(); + gdd.add_resource(id, payload).unwrap(); + + let (working, layout) = gdd.into_storage(); + let reopened = Gdd::::open_in(working, layout).await.unwrap_or_else(|error| panic!("open_in failed: {error:?}")); + + assert!(reopened.has_resource(&hash).await); + assert_eq!(reopened.read_resource(&hash).await.unwrap().as_slice(), payload); + + // The registry entry replicated through the history file and survives reopen. + let entry = reopened.registry().resources.get(&id).expect("reopened registry records the resource"); + assert_eq!(entry.hash, Some(hash)); + }); +} + +#[test] +fn resource_from_path_uses_fs_copy_on_folder_backend() { + use document_container::AnyContainer; + use document_container::backends::folder::FolderBackend; + use graphene_resource::{ResourceHash, ResourceId}; + + futures::executor::block_on(async { + // Need a folder-backed working copy to exercise the fs::copy path. + let working_dir = tempfile::tempdir().unwrap(); + let working = AnyContainer::Folder(FolderBackend::create(working_dir.path()).unwrap()); + let mut gdd = Gdd::::create_in(working, GddV1, PeerId(1), 0xAB, "ed".into(), "std".into()) + .await + .unwrap_or_else(|error| panic!("create_in failed: {error:?}")); + + // Source file outside the working copy. + let payload = b"external resource bytes"; + let src_dir = tempfile::tempdir().unwrap(); + let src_path = src_dir.path().join("blob"); + std::fs::write(&src_path, payload).unwrap(); + + let hash = ResourceHash::from(&payload[..]); + let id = ResourceId::new(); + gdd.add_resource_from_path(id, hash, &src_path) + .unwrap_or_else(|error| panic!("add_resource_from_path failed: {error:?}")); + + assert!(gdd.has_resource(&hash).await); + assert_eq!(gdd.read_resource(&hash).await.unwrap().as_slice(), payload); + }); +} + +#[test] +fn export_carries_resources() { + use document_format::{ExportFormat, ExportOptions}; + use graphene_resource::{ResourceHash, ResourceId}; + + futures::executor::block_on(async { + let mut gdd = Gdd::::create_in(empty_container(), GddV1, PeerId(2), 0xBC, "ed".into(), "std".into()) + .await + .unwrap_or_else(|error| panic!("create_in failed: {error:?}")); + + let payload = b"exported resource"; + let hash = ResourceHash::from(&payload[..]); + let id = ResourceId::new(); + gdd.add_resource(id, payload).unwrap(); + + let dir = tempfile::tempdir().unwrap(); + let dest = dir.path().join("export"); + gdd.export(&dest, ExportFormat::Folder, ExportOptions::default(), &empty_byte_store()).await.unwrap(); + + let resource_file = dest.join("resources").join(format!("{hash}")); + assert!(resource_file.exists(), "exported resource file should exist at {resource_file:?}"); + assert_eq!(std::fs::read(&resource_file).unwrap(), payload); + }); +} + +/// `embed_all_resources` makes a link-only resource self-contained: the bytes (which live only in +/// the byte store, not the working copy) are written into the export, the exported registry's chain +/// gains a leading `Embedded` source ahead of the original `Url`, and the export reopens with both. +#[test] +fn embed_all_resources_materializes_link_only_resource() { + use document_format::{ExportFormat, ExportOptions}; + use graph_craft::application_io::resource::ResourceStorage; + use graph_storage::NoMetadata; + use graphene_resource::{DataSource, ResourceHash, ResourceId, ResourceRegistry}; + + futures::executor::block_on(async { + let mut gdd = Gdd::::create_in(empty_container(), GddV1, PeerId(8), 0xF00D, "ed".into(), "std".into()) + .await + .unwrap_or_else(|error| panic!("create_in failed: {error:?}")); + + // A resource whose only source is a URL, resolved to a hash. The bytes live solely in the + // byte store; the working copy never holds them. + let payload = b"bytes behind a url"; + let hash = ResourceHash::from(&payload[..]); + let byte_store = empty_byte_store(); + byte_store.store(payload); + + let mut resources = ResourceRegistry::new(); + let id = ResourceId::new(); + resources.push_source_back(&id, DataSource::Url("https://example.com/r.bin".parse().unwrap())); + resources.resolve(&id, hash); + + gdd.commit_from_runtime(&network_referencing_resource(id), &NoMetadata, &resources, &byte_store) + .unwrap_or_else(|error| panic!("commit_from_runtime failed: {error:?}")); + + // The working copy holds no resource bytes (URL source, nothing embedded yet). + assert!(!gdd.has_resource(&hash).await); + + let dir = tempfile::tempdir().unwrap(); + let dest = dir.path().join("embedded"); + gdd.export( + &dest, + ExportFormat::Folder, + ExportOptions { + embed_all_resources: true, + ..Default::default() + }, + &byte_store, + ) + .await + .unwrap_or_else(|error| panic!("export failed: {error:?}")); + + // Bytes materialized into the export. + let resource_file = dest.join("resources").join(format!("{hash}")); + assert!(resource_file.exists(), "embedded resource bytes should be written to {resource_file:?}"); + assert_eq!(std::fs::read(&resource_file).unwrap(), payload); + + // Reopen the export: the registry chain now leads with Embedded, keeping the URL as fallback, + // and the bytes are resolvable from the export itself with no byte store. + let reopened = Gdd::::open(&dest).await.unwrap_or_else(|error| panic!("open export failed: {error:?}")); + assert!(reopened.has_resource(&hash).await, "embedded bytes should be resolvable from the export"); + + let entry = reopened.registry().resources.get(&id).expect("resource entry survived export"); + assert_eq!(entry.hash, Some(hash)); + let embedded = serde_json::to_value(DataSource::Embedded).unwrap(); + let url = serde_json::to_value(DataSource::Url("https://example.com/r.bin".parse().unwrap())).unwrap(); + let chain: Vec<_> = entry.sources.iter().map(|(_, value)| value.source.clone()).collect(); + assert_eq!(chain, vec![embedded, url], "Embedded leads the chain, URL kept as fallback"); + }); +} + +/// A plain export (no `embed_all_resources`) still materializes the bytes of an already-`Embedded` +/// resource, pulling from the byte store when the working copy doesn't hold them (the editor case +/// where bytes live in the app-global cache, not the per-document working copy). +#[test] +fn export_materializes_embedded_resource_from_byte_store() { + use document_format::{ExportFormat, ExportOptions}; + use graph_craft::application_io::resource::ResourceStorage; + use graph_storage::NoMetadata; + use graphene_resource::{DataSource, ResourceHash, ResourceId, ResourceRegistry}; + + futures::executor::block_on(async { + let mut gdd = Gdd::::create_in(empty_container(), GddV1, PeerId(9), 0xBEEF, "ed".into(), "std".into()) + .await + .unwrap_or_else(|error| panic!("create_in failed: {error:?}")); + + // An Embedded resource whose bytes live only in the byte store, not the working copy. + let payload = b"embedded bytes in the cache"; + let hash = ResourceHash::from(&payload[..]); + let byte_store = empty_byte_store(); + byte_store.store(payload); + + let mut resources = ResourceRegistry::new(); + let id = ResourceId::new(); + resources.push_source_back(&id, DataSource::Embedded); + resources.resolve(&id, hash); + gdd.commit_from_runtime(&network_referencing_resource(id), &NoMetadata, &resources, &byte_store) + .unwrap_or_else(|error| panic!("commit_from_runtime failed: {error:?}")); + + assert!(!gdd.has_resource(&hash).await, "bytes should not be in the working copy"); + + let dir = tempfile::tempdir().unwrap(); + let dest = dir.path().join("plain"); + // Default options: embed_all_resources is false. + gdd.export(&dest, ExportFormat::Folder, ExportOptions::default(), &byte_store) + .await + .unwrap_or_else(|error| panic!("export failed: {error:?}")); + + let resource_file = dest.join("resources").join(format!("{hash}")); + assert!(resource_file.exists(), "embedded resource bytes should be pulled from the store into {resource_file:?}"); + assert_eq!(std::fs::read(&resource_file).unwrap(), payload); + }); +} + +#[test] +fn open_in_rejects_future_format_version() { + futures::executor::block_on(async { + let container = empty_container(); + let layout = GddV1; + + let mut future_version = Manifest::new(0xC0DE, PeerId(1), "ed".into(), "std".into()); + future_version.format_version = manifest::SUPPORTED_FORMAT_VERSION + 1; + io::write_single(&container, layout.manifest_basename(), Codec::Json, &future_version).unwrap(); + + match Gdd::::open_in(container, layout).await { + Err(OpenError::UnsupportedVersion { .. }) => {} + Ok(_) => panic!("expected UnsupportedVersion, got Ok"), + Err(other) => panic!("expected UnsupportedVersion, got {other:?}"), + } + }); +} + +#[test] +fn create_in_records_default_codecs_in_manifest() { + futures::executor::block_on(async { + let gdd = Gdd::::create_in(empty_container(), GddV1, PeerId(1), 0xAB, "ed".into(), "std".into()) + .await + .unwrap_or_else(|error| panic!("create_in failed: {error:?}")); + + let codecs = gdd.manifest().codecs; + assert_eq!(codecs.registry, Codec::MessagePack); + assert_eq!(codecs.history, Codec::MessagePackFrames); + assert_eq!(codecs.hot_log, Codec::MessagePackFrames); + assert_eq!(codecs.session, Codec::Json); + }); +} + +/// The `RegisterPeer` op auto-emitted on the first commit rides the hot-op pipeline through +/// persistence and retirement, so the `peer_users` mapping survives a reopen. +#[test] +fn first_commit_registers_peer_and_survives_reopen() { + use graph_craft::application_io::resource::HashMapResourceStorage; + use graph_craft::document::{DocumentNode, DocumentNodeImplementation, NodeInput, NodeNetwork}; + use graph_craft::{ProtoNodeIdentifier, concrete}; + use graph_storage::{NoMetadata, UserId}; + use graphene_resource::ResourceRegistry; + + futures::executor::block_on(async { + let mut gdd = Gdd::::create_in(empty_container(), GddV1, PeerId(21), 0xAB, "ed".into(), "std".into()) + .await + .unwrap_or_else(|error| panic!("create_in failed: {error:?}")); + + let network = NodeNetwork { + exports: vec![NodeInput::node(core_types::uuid::NodeId(0), 0)], + nodes: [( + core_types::uuid::NodeId(0), + DocumentNode { + inputs: vec![NodeInput::import(concrete!(u32), 0)], + implementation: DocumentNodeImplementation::ProtoNode(ProtoNodeIdentifier::new("graphene_core::ops::identity::IdentityNode")), + ..Default::default() + }, + )] + .into_iter() + .collect(), + ..Default::default() + }; + + gdd.commit_from_runtime(&network, &NoMetadata, &ResourceRegistry::new(), &HashMapResourceStorage::new()) + .unwrap_or_else(|error| panic!("commit_from_runtime failed: {error:?}")); + assert_eq!(gdd.registry().peer_users.get(&PeerId(21)), Some(&UserId(21)), "first commit registers the peer"); + + let (working, layout) = gdd.into_storage(); + let reopened = Gdd::::open_in(working, layout).await.unwrap_or_else(|error| panic!("open_in failed: {error:?}")); + assert_eq!(reopened.registry().peer_users.get(&PeerId(21)), Some(&UserId(21)), "registration survives reopen"); + }); +} + +#[test] +fn persist_path_writes_at_manifest_declared_codec_paths() { + // The manifest declares the on-disk codec for each payload; the persist path must write at the + // extension that codec implies, and reopen (which reads the codec from the manifest) must find them. + futures::executor::block_on(async { + use document_container::AsyncContainer; + + let mut gdd = Gdd::::create_in(empty_container(), GddV1, PeerId(5), 0xDEAD, "ed".into(), "std".into()) + .await + .unwrap_or_else(|error| panic!("create_in failed: {error:?}")); + + let hot_op = HotOp { + op: RegistryDelta::AddNetwork { + id: ROOT_NETWORK, + network: Network::default(), + }, + timestamp: TimeStamp { counter: 1, peer: PeerId(5) }, + }; + gdd.apply_hot_op(hot_op).unwrap_or_else(|error| panic!("apply_hot_op failed: {error:?}")); + + let (working, layout) = gdd.into_storage(); + // Defaults: hot log is MessagePackFrames (.frames), manifest is always JSON. + assert!(working.exists(&io::path_for(layout.hot_log_basename(), Codec::MessagePackFrames)).await); + assert!(working.exists(&io::path_for(layout.manifest_basename(), Codec::Json)).await); + + let reopened = Gdd::::open_in(working, layout).await.unwrap_or_else(|error| panic!("open_in failed: {error:?}")); + assert!(reopened.registry().networks.contains_key(&ROOT_NETWORK)); + }); +} + +/// Complete declaration round-trip through the byte store: committing a runtime network with a +/// proto-node persists its `ProtoNode` content into a `ResourceStorage`, and resolving declarations +/// back through that store reconstructs the proto-node identifier in `to_runtime`. This is the +/// editor-shaped path (declaration bytes live in the resource store, not the Gdd container). +#[test] +fn declarations_round_trip_through_byte_store() { + use graph_craft::application_io::resource::HashMapResourceStorage; + use graph_craft::document::{DocumentNode, DocumentNodeImplementation, NodeInput, NodeNetwork}; + use graph_craft::{ProtoNodeIdentifier, concrete}; + use graph_storage::NoMetadata; + use graphene_resource::ResourceRegistry; + + const PROTO: &str = "graphene_core::ops::identity::IdentityNode"; + + futures::executor::block_on(async { + let network = NodeNetwork { + exports: vec![NodeInput::node(core_types::uuid::NodeId(0), 0)], + nodes: [( + core_types::uuid::NodeId(0), + DocumentNode { + inputs: vec![NodeInput::import(concrete!(u32), 0)], + implementation: DocumentNodeImplementation::ProtoNode(ProtoNodeIdentifier::new(PROTO)), + ..Default::default() + }, + )] + .into_iter() + .collect(), + ..Default::default() + }; + + let mut gdd = Gdd::::create_in(empty_container(), GddV1, PeerId(1), 0xAB, "ed".into(), "std".into()) + .await + .unwrap_or_else(|error| panic!("create_in failed: {error:?}")); + + // Commit: declaration bytes flow into the byte store, not the Gdd container. + let byte_store = HashMapResourceStorage::new(); + gdd.commit_from_runtime(&network, &NoMetadata, &ResourceRegistry::new(), &byte_store) + .unwrap_or_else(|error| panic!("commit_from_runtime failed: {error:?}")); + + // Resolve declarations back through the store and convert to a runtime network. + let declarations = gdd.declarations(&byte_store).await; + assert_eq!(declarations.len(), 1, "expected one proto-node declaration resolved from the byte store"); + + let (converted, _entries) = gdd.registry().to_runtime_with_metadata(&declarations).unwrap_or_else(|error| panic!("to_runtime failed: {error:?}")); + + let node = converted.nodes.values().next().expect("converted network has the node"); + match &node.implementation { + DocumentNodeImplementation::ProtoNode(identifier) => assert_eq!(identifier.as_str(), PROTO, "proto-node identifier survived the byte-store round-trip"), + other => panic!("expected a ProtoNode implementation, got {other:?}"), + } + }); +} diff --git a/node-graph/rfcs/document-format.md b/node-graph/rfcs/document-format.md new file mode 100644 index 0000000000..f328ae7636 --- /dev/null +++ b/node-graph/rfcs/document-format.md @@ -0,0 +1,390 @@ +# Summary + +A document format (`.gdd`) for Graphite that decouples on-disk layout from the editor's in-memory runtime types. The format is a flat node registry plus a tree of operation-based CRDT deltas. The same delta type drives history, undo/redo, concurrent multi-user editing, migrations, and incremental compilation. + +# Motivation + +A delta-based, runtime-independent storage format addresses four problems with the legacy `.graphite` format (bincode/JSON of the editor's runtime structs): + +- **Scattered migrations.** Three coexisting legacy mechanisms — global string replacement on serialized JSON (`document_migration_string_preprocessing`), per-field `#[serde(alias = ...)]` / `deserialize_with` on runtime structs, and post-deserialize fixups (`migrate_path_modify_node`, `migrate_node`) — each requires keeping old runtime shapes alive in the codebase. +- **Snapshot undo/redo.** `document_undo_history: VecDeque` clones the whole interface on every gesture. +- **No concurrent editing path.** Online multi-user editing and offline merge are blocked by the snapshot model. +- **Recompiled-from-scratch graphs.** No diff signal to drive incremental compilation. + +A single delta representation unifies the data needed to fix all four: history step, CRDT op, migration unit, and compilation invalidation signal. + +# Guide-level explanation + +A document is a `Registry` plus a tree of operations applied to it. + +## Registry + +The `Registry` is a **flat** node graph. All nodes from all nested networks live in a single map; each node carries a back-pointer to its network. Networks themselves only store their list of exports. Proto-node declarations are not a separate table — they are content-addressed resources like any other (see [Resources](#resources)), referenced by `ResourceId`. + +```rs +pub struct Registry { + pub node_instances: HashMap, // all nodes, flat + pub networks: HashMap, // exports + per-network attrs + pub exported_nodes: Vec, // library API surface + pub peer_users: HashMap, // per-device → per-human identity + pub resources: ResourceStore, // content-addressable resources (images, fonts, declarations) + pub attributes: Attributes, // document-level metadata +} + +pub struct Node { + pub implementation: Implementation, // ProtoNode(ResourceId) or Network(net) + pub inputs: Vec, + pub inputs_attributes: Vec, + pub attributes: Attributes, + pub network: NetworkId, +} + +pub struct InputSlot { + pub input: NodeInput, + pub timestamp: TimeStamp, +} + +pub struct Network { + pub exports: Vec, + pub attributes: Attributes, // per-network ui::* (navigation, previewing) +} + +pub struct ExportSlot { + pub target: Option, // None = removed/empty + pub timestamp: TimeStamp, +} + +pub const ROOT_NETWORK: NetworkId = 0; +``` + +`peer_users` records the append-only `PeerId → UserId` mapping written by each device's first contribution (see [Concurrency model](#concurrency-model--cmrdt)). + +The renderable graph lives in `networks[&ROOT_NETWORK]`. By convention the renderer consumes slot 0 of its exports; the editor can pick a different slot via type-based heuristics or user choice. + +## Two exports concepts + +- **`Network.exports`** — the outputs of a callable network. Used by parent networks and (on `ROOT_NETWORK`) by the renderer. High-frequency edits. +- **`Registry.exported_nodes`** — the document's library API: nodes an importing document can reference. A node exposed here may itself be backed by a network via `Implementation::Network`. Library metadata (display name, category, ...) lives as `library::*` attributes on the referenced node. Low-frequency edits. + +Library import (how `.gdd` files reference each other and surface library nodes) is the subject of a follow-up RFC. + +## Attributes — the type-erased metadata bucket + +All metadata that isn't structural — node positions, display names, `call_argument` overrides, visibility, `context_features`, locked/pinned flags, input type hints, reflection metadata — lives in a single `Attributes` bucket per node, per input, and at the document level: + +```rs +pub struct Value { + pub value: serde_json::Value, + pub timestamp: TimeStamp, +} + +pub type Attributes = HashMap; +``` + +Keys are namespaced (`ui::position`, `compute::call_argument`, `library::display_name`, ...). Values are JSON; the per-value `TimeStamp` drives LWW on concurrent edits. + +Type-erasure exists for migrations: storage data can be transformed without keeping old Rust struct shapes alive just to deserialize them. + +## Deltas + +A `RegistryDelta` is one atomic change to the registry, simultaneously a history step, a CRDT op to broadcast to peers, and a recompilation signal: + +```rs +pub enum RegistryDelta { + AddNode { node_id: NodeId, node: Node }, + RemoveNode { node_id: NodeId, snapshot: Node }, + ChangeNodeInput { node_id: NodeId, input_idx: usize, new_input: NodeInput }, + ChangeNodeAttribute { node_id: NodeId, delta: AttributeDelta }, + ChangeNodeInputAttribute { node_id: NodeId, input_idx: usize, delta: AttributeDelta }, + SetExport { network: NetworkId, slot: u32, target: Option }, + ChangeNetworkAttribute { network: NetworkId, delta: AttributeDelta }, // per-network ui::nav::*, ... + AddNetwork { network: NetworkId, contents: Network }, + RemoveNetwork { network: NetworkId, snapshot: Network }, + SetExportedNodes { nodes: Vec }, + ChangeDocumentAttribute { delta: AttributeDelta }, + RegisterPeer { peer: PeerId, user: UserId }, + // Resources (incl. proto-node declarations): + SetResourceHash { id: ResourceId, hash: Option }, // LWW on the resolved hash + AddSource { id: ResourceId, key: SourceKey, source: Value }, // add-wins entry in the source chain + RemoveSource { id: ResourceId, key: SourceKey }, + AddResource { id: ResourceId, entry: ResourceEntry }, // whole-entry; reverse of RemoveResource + RemoveResource { id: ResourceId, snapshot: ResourceEntry }, // snapshot for O(1) reverse +} + +/// `value: None` is the removal case. Timestamp lives on the wrapping `Delta`. +pub struct AttributeDelta { + pub key: String, + pub value: Option, +} +``` + +Each delta is wrapped with metadata for history, identity, and causality. `Rev` is content-addressed: `blake3` truncated to 128 bits of `(parents, author, timestamp, delta_type)`, so identical content always produces the same `Rev` and concurrent retirements that converge collapse by construction. + +```rs +pub type Rev = u128; + +pub struct Delta { + pub id: Rev, + pub parents: Vec, // multi-parent for JJ-style merges + pub author: PeerId, + pub timestamp: TimeStamp, + pub delta_type: RegistryDelta, + pub reverse: RegistryDelta, // precomputed for undo; excluded from id + pub attributes: Attributes, // mutable local annotations; excluded from id +} +``` + +One timestamp per `Delta` applies to every LWW-eligible write inside its `delta_type` — slot writes, attribute writes, and whole-list writes all read the same `Delta.timestamp`. + +`Delta.attributes` is a type-erased annotation bucket (same shape as the registry's attribute buckets) for mutable, local-only labels — the `compute::gesture_end` marker that bounds undo units, and later commit messages. It is **excluded from `id`** so annotating a delta never changes its content-addressed identity; an inline write sets it before the delta's history frame is persisted, while a later relabel rewrites that frame. + +## History as a tree + +History is a multi-parent DAG. Branching is implicit: every concurrent or out-of-sync edit creates a branch by virtue of sharing a parent with another delta. A user's first commit after observing remote work adds the remote tip as an additional parent, so merges ride on the user's own edit rather than introducing phantom merge commits. + +``` + D1 ── D2 ── D3 (one user's session) + / + ── root ── + \ + D4 ── D5 (another peer, branched at root) +``` + +Linear undo is the common case; branching falls out naturally when two peers (or two windows on one machine) edit from the same parent. A history UI lets users navigate this tree to recover from convoluted undo/redo sessions or revisit past exploration. History compression collapses similar consecutive deltas (e.g., three sequential "move shape" ops) into a single coarser delta. + +## Two-tier history: hot ops and retired commits + +History has two tiers: + +- **Hot ops** — speculative, broadcast per-keystroke for live collaboration. Carry only a Lamport timestamp; no parents, no content-addressed `Rev`. Live in `Document.hot_log`, GC'd at retirement, persisted as a sidecar for crash recovery. May pass through non-compiling intermediate states. +- **Retired commits** — coarser `Delta`s produced by retirement. Every retired commit compiles in the leader's local view. Content-addressed, multi-parent, durable, browseable, replayable. + +A leader-elected peer periodically retires a window of hot ops into one or more semantically-equivalent retired commits (one per logical `(node, field)` group, not one giant commit per window). Retired commits use a single retirement timestamp for every field they write; the original hot-op timestamps are discarded. Leader election is gossip-based — lowest `PeerId` among peers whose `retirement_tip` matches the session max — and best-effort: there is no quorum, since content-addressed `Rev`s make concurrent retirements that converge dedupe by construction. + +Do/undo pair collapse only happens when both land in the same retirement window, subject to dependency closure (collapse must not orphan a reference to `X`). The undo/redo mechanism itself is described below. + +Solo retirement is the same mechanism with a session of one — history compaction during solo editing falls out for free. + +See [the collaboration model note](../../notes/document-format-collaboration.md) for the full retirement, leader-election, persistence, and reconnect semantics. + +## Undo/redo + +Undo/redo operate on the delta history rather than full-interface snapshots. A commit's undo behavior depends on whether it has been broadcast to other peers, tracked by `last_broadcast_rev: Option` on `Document` (the latest commit shared with at least one peer; `None`, and thus the entire history, during solo editing): + +- **Silent zone** — commits after `last_broadcast_rev`. No other peer has seen them, so they can be rewound in place. +- **Published zone** — commits at or before `last_broadcast_rev`. Shared history is never rewound; undoing one is a *new* forward commit applying the inverse with a fresh timestamp, so concurrent peers converge by LWW. + +The silent zone is the implemented path (solo editing has no transport yet); the published-zone forward-undo lands with collaboration. + +**Silent-zone cursor.** `head: Rev` is a movable pointer into the append-only DAG. Undo/redo move it; they never delete deltas (that would make redo impossible and discard branch history). The extra state is a redo stack `Vec` — the checkpoints the user has undone past — because the DAG alone can't say which child a `head` was undone *from*. New state persists in `session.json` alongside `head`, so redo survives reopen. A new edit while the redo stack is non-empty clears it (the undone-forward branch stays physically in the DAG but is no longer reachable via redo). + +**Gestures, not deltas.** One user action retires into several deltas (one per `(node, field)` group), so undo steps per *gesture*: the last delta of each gesture is tagged with the `compute::gesture_end` attribute, and undo reverts deltas walking the first-parent chain until the parent is a `gesture_end` boundary or the root. The starting `head` (the checkpoint) is pushed to the redo stack; redo re-applies forward to it. + +**Force-apply.** Rewinding re-applies each delta's precomputed `reverse` (for redo, the forward `delta_type`). These carry the *original* timestamp, which would tie — and so lose — the LWW arms' strict `>` comparison, since the forward op already stamped each field at that timestamp. In the single-writer silent zone the rewind value is authoritative, so silent undo/redo apply in a **force** mode where LWW arms assign unconditionally and structural ops are idempotent. Undo and redo are symmetric (force-reverse, force-forward), so no clock advances and identities are unchanged. + +**Two registries.** Computing a correct `reverse` for an LWW field means reading the field's *pre-op* value. But staged edits apply to the live registry immediately (for responsiveness), so by retirement time it already holds the *post*-op value. `Document` therefore keeps two registries: a **working** registry (committed state plus live un-retired ops, what reads and the cursor see) and a **retired snapshot** (committed deltas only). Retirement computes reverses against and forward-applies to the snapshot, so the reverse captures the true prior value; the working registry already reflects the ops and is left as-is. When there are no un-retired ops the two are equal *by value* (their LWW field timestamps can differ, since retirement re-stamps the snapshot at a fresh time); undo/redo restore that equality by resyncing the snapshot to the rewound working registry. + +## Concurrency model — CmRDT + +The format uses an operation-based CRDT. The transport layer delivers ops in causal order exactly once (TCP plus the multi-parent chain in each `Delta`); the storage layer assumes this and requires only that concurrent op pairs commute. It does not need idempotency, state-merge, or out-of-order replay. + +Graph-shape invariants (the graph remaining a DAG, the result compiling) are best-effort: conflicts that produce a non-compiling graph surface as wiring or type errors rather than being masked by the CRDT. + +Identity is two-tier: `PeerId` is per-device (stable per `(device, document)`, used for CRDT tiebreaking and `NodeId` scoping); `UserId` is per-human (stable across devices, used for identity display and undo-chain walking). Each device's first contribution emits `RegisterPeer { peer, user }`, which writes an append-only entry to `Registry.peer_users`. Causal delivery guarantees the registration arrives before any of that peer's other ops. + +## Editor pipeline + +The editor operates on its existing runtime types. Storage is a serialization layer for persistence, sync, and history: + +``` + ┌─────────────────────────────────────────┐ + │ Editor (runtime) │ + │ NodeNetworkInterface │ + │ ├── NodeNetwork (compute graph) │ + │ └── NodeNetworkMetadata (editor UI) │ + └─────────────────────────────────────────┘ + ▲ │ + │ to_runtime │ from_runtime + │ ▼ + ┌─────────────────────────────────────────┐ + │ Storage layer (graph-storage crate) │ + │ Registry, RegistryDelta, Document │ + └─────────────────────────────────────────┘ + │ + ▼ + ┌─────────────────────────────────────────┐ + │ On-disk (.gdd container) │ + │ named payloads: manifest, document, │ + │ history, resources/ │ + │ served by a Container backend │ + │ (folder, in-memory, OPFS), optionally │ + │ encoded through an Archive codec │ + │ (zip, xz) │ + └─────────────────────────────────────────┘ +``` + +The runtime is the source of truth during editing. Conversion runs on save, on load, and across the sync boundary when broadcasting or receiving ops. The editor-facing handle is `Session` (`graph_storage::Session`); `Document` is internal. `Session::stage_from_runtime(&NodeNetwork, &dyn NodeMetadataSource)` is the entry point: it diffs the stored registry against a fresh conversion, ticks the clock once per emitted op, and applies each as a hot op on the hot log. The `Gdd` handle then persists the hot frames and retires them into durable history. + +Staging and retirement are split so one undo gesture maps to one retired gesture. The editor's undo unit is one legacy transaction boundary, but a single user action (e.g. a tool drag) re-commits the runtime many times within one such boundary. So the editor *stages* on every commit (keeping the working registry and autosave current) and *retires the pending hot ops as one gesture* only at the undo-step boundary and before any undo/redo. (`commit_from_runtime` — stage and retire atomically — remains for one-shot callers.) Solo editing thus flows through the same hot-op-then-retire path collaboration uses, exercising it before any transport lands. + +## On-disk container + +A `.gdd` document is a collection of named byte payloads. A `Container` backend (loose folder, in-memory, OPFS in the browser) provides the path-keyed read/write surface; an `Archive` codec (zip, xz-compressed tarball) optionally encodes a container into a single byte stream for compact distribution. The same logical document can be saved as a loose folder for VCS-friendly checkouts or as an archive for shipping, without any change above the container layer. + +The two concerns live in downstream crates: `document-container` defines the `Container` and `AsyncContainer` traits, the backends, byte ownership (mmap regions, owned buffers, external file mmaps via `mmap-io`), and the `Archive` trait. `document-format` defines the typed `Gdd` handle, the layout (logical-payload-name → in-container path), the data codec (JSON or binary), the manifest, and the save/load orchestration. `graph-storage` itself stays disk-unaware. + +``` + ┌─────────────────────────────────┐ + │ editor │ + └─────────────────────────────────┘ + │ │ + ▼ ▼ + ┌───────────────┐ ┌──────────────────────────────┐ + │ graph-storage │ │ document-format │ + │ (disk-unaware)│◀─│ Gdd handle, Layout, codec, │ + └───────────────┘ │ ExportOptions │ + └──────────────────────────────┘ + │ + ▼ + ┌──────────────────────────────┐ + │ document-container │ + │ Container backends + Archive│ + │ codecs (folder, memory, │ + │ OPFS / zip, xz) │ + └──────────────────────────────┘ +``` + +Arrows are "depends on": the editor uses `Session` from `graph-storage` at runtime and `Gdd` from `document-format` on save/load; `document-format` serializes `graph-storage`'s types and delegates byte I/O to `document-container`; `graph-storage` and `document-container` are independent leaves. + +A document contains: + +- `manifest.json` — always JSON, the bootstrap file. Carries the magic identifier `"gdd"`, a single `u32` `format_version`, a stable `document_uuid`, the saving session's `PeerId`, editor and stdlib versions, an optional save timestamp, and a record of which payloads this save included (registry / history / embedded resources). +- `document.{json,bin}` — the serialized `Registry`. The codec is fixed per payload and recorded in the manifest (JSON for inspectable, MessagePack for compact; binary must be self-describing — see the codec rationale). Export reuses the working copy's recorded codecs rather than re-encoding. +- `history.{jsonl,frames}` — the serialized delta DAG, appended a record at a time. JSON history is line-oriented (one delta per line); binary history is length-prefixed MessagePack frames, the prefix guarding against a torn final frame from a crash. +- `resources/` — embedded resource bytes, keyed by `ResourceHash`. + +The folder backend stores these as plain files on disk; an archive codec packs the same named entries into a single file. + +``` + my-doc.gdd/ + ├── manifest.json + ├── document.json + ├── history.jsonl + └── resources/ + ├── 7f3a... + └── 2c91... +``` + +The `Gdd` handle owns the loaded bytes and exposes them as zero-copy slices. On the folder backend, reads are direct mmap references; loading from an archive decompresses once on open into an in-memory backend. The working copy is mutated continuously (autosave); `export(dest, format, options, byte_store)` produces a separate artifact through an `ExportFormat` (`Folder`/`Zip`/`Xz`) without mutating the handle. + +`ExportOptions` controls scope: `include_registry` (skip = rebuild from history on load), `include_history` (skip = state-only snapshot), and `embed_all_resources`. These compose freely except that `include_registry: false && include_history: false` is rejected. The `byte_store` resolves resource bytes the working copy doesn't physically hold (in the editor they live in the app-global cache). `Embedded`-sourced resources are always materialized into the export's `resources/`; `embed_all_resources` additionally promotes link-only resources (`Url`/`FilePath`/`Font`) by prepending an `Embedded` source. That promotion is committed as real `AddSource` deltas on a throwaway session clone so the exported registry and history stay consistent; history is serialized in deterministic topological order, so identical delta sets export byte-identically. + +## Resources + +Everything content-addressable — raster images, fonts, embedded WASM, **and proto-node declarations** — is a resource. The storage `Registry` holds `resources: ResourceStore` (references only); the bytes live in a content-addressed byte store keyed by `ResourceHash`, owned by the caller (the app-global cache in the editor, the `Gdd` container for standalone/export), not by `graph-storage`. + +```rs +pub type ResourceStore = HashMap; + +pub struct ResourceEntry { + pub sources: Vec<(SourceKey, SourceValue)>, // fallback chain, sorted by key, add-wins OR-set + pub hash: Option, // resolved content hash (LWW) + pub hash_timestamp: TimeStamp, +} + +pub struct SourceKey { pub priority: Priority, pub peer: PeerId } // fractional priority + peer tiebreak +pub struct SourceValue { pub source: serde_json::Value, pub timestamp: TimeStamp } +``` + +A node references a resource by `ResourceId`; the entry maps it to a chain of `DataSource`s tried in order (`Embedded` bytes by hash, `FilePath`, `Url`, `Font`) plus the resolved `ResourceHash`. The chain is an **add-wins ordered OR-set**: each entry's `SourceKey` carries a fractional `Priority` so a peer can insert between two sources without renumbering, and concurrent insertions at the same priority converge via the `PeerId` tiebreak. The `hash` is **LWW** (content-derived, so concurrent resolves agree by construction). + +Each `DataSource` is stored as `serde_json::Value` rather than a typed enum, with the same motivation as the `Attributes` bucket: type-erasure lets migrations restructure variants without keeping old enum shapes alive. `DataSource` stays typed at the runtime layer; conversion happens at the serialization boundary. Unknown variants are a hard error on load. + +**Declarations as resources.** `Implementation::ProtoNode(ResourceId)` references a declaration resource. `from_runtime` serializes each `ProtoNode` through a self-describing `serde_json::Value` (MessagePack-encoded, via `encode_declaration`), hashes the bytes, derives the `ResourceId` from that hash (deterministic bootstrap; a future stable well-known-ID table would let the ID denote the function), and registers a `DataSource::Embedded` entry; the bytes go to the caller's byte store. `to_runtime` resolves declarations back via a `Declarations` (`ResourceId → ProtoNode`) map the caller builds from its byte store. The self-describing form keeps `ProtoNode`'s serde aliases working so the on-disk shape stays migratable. + +A `NodeInput::Value` stores its `TaggedValue` as a self-describing `serde_json::Value` (the same type-erasure as `Attributes`/`DataSource`), so the `TaggedValue` serde aliases keep working and the on-disk shape stays migratable. Legacy documents with inline image `TaggedValue`s have those values extracted into resources at load time; new saves never embed inline image blobs in `NodeInput::Value`. + +## Migrations + +Migrations run on the type-erased `Registry`, after deserialization and before `to_runtime`. The pipeline reads the format version from the manifest, deserializes the registry with attributes as raw `serde_json::Value`, applies registered migrations scoped to the version range, and hands the result to `to_runtime`. + +Migrations live in a dedicated crate so they are usable both from the editor and from a CLI for batch upgrades. A single global format version is used initially; per-library versioning is a future extension. + +# Reference-level explanation + +## Conversion: runtime ↔ storage + +`from_runtime` flattens the recursive `NodeNetwork` into the flat `Registry`: + +- Each node's path through the runtime nesting is hashed (blake3 truncated to 64 bits, with the document's `PeerId` mixed in) to produce a stable global `NodeId`. The original local ID is stashed in an attribute (`compute::original_node_id`) so the round-trip can rebuild the runtime's per-network local IDs. Subsequent live edits mint fresh peer-scoped IDs via `Document::next_node_id` (`blake3(peer, counter)`) instead of going through the path-hash bootstrap. +- Each nested `NodeNetwork`'s `NetworkId` is derived from the owning node's path (blake3 of `(peer, path)` with a `"network"` domain tag), not assigned by a traversal counter. This makes it stable across a `to_runtime` → `from_runtime` round trip — load-bearing because node paths (and thus node-ID hashes) include `NetworkId`s, so an unstable network ID would cascade into unstable node IDs and break re-commit after open. Aliasing (multiple nodes referencing the same network) is structurally supported by the storage model — `Implementation::Network(NetworkId)` is a reference — but the converter does not exploit it yet. Aliasing is fixed at the runtime layer first; the converter then preserves sharing without an explicit dedup pass. +- Non-structural `DocumentNode` fields (`call_argument`, `context_features`, `visible`, `skip_deduplication`, ...) become entries in the node's `attributes`. UI metadata from `DocumentNodeMetadata` (positions, display names, locked, pinned, ...) flows through the same bucket under `ui::*` keys. + +`to_runtime` is the inverse: rebuild local IDs from the stashed attribute, restore typed fields from attribute values, follow `Implementation::Network` references to recursively materialize nested networks, and resolve `Implementation::ProtoNode(ResourceId)` against a `Declarations` map (`ResourceId → ProtoNode`) the caller supplies from its byte store. Since `graph-storage` is byte-unaware, `to_runtime` takes the resolved declarations as a parameter rather than reaching for bytes itself. + +## Slots — inputs and exports + +`Vec` and `Vec` are positionally indexed at the storage layer. Each slot carries its own `TimeStamp`, giving LWW per slot on concurrent edits. + +`ExportSlot` is sparse: `target == None` means the slot has been removed. `InputSlot` is dense. The runtime conversion compacts exports into a dense `Vec` (preserving the runtime's "remove an export shifts later positions" semantics) and strips input timestamps. + +Because inputs are stamped, `NodeInput::Node` references are set directly via `ChangeNodeInput` — there is no add/remove rewire workaround. + +## CmRDT semantics + +For the full design and per-op derivation, see the [CmRDT design doc](../../notes/document-format-cmrdt.md). Summary: + +- **Timestamps.** `TimeStamp = (u64, PeerId)` — a Lamport counter with a peer-ID tiebreak. Comparison is lexicographic. Wall-clock time is not used. +- **NodeId identity.** Every new `AddNode` issues a peer-scoped ID, so concurrent creates cannot collide. +- **Causal delivery.** `apply_delta` requires every entry in `delta.parents` is already in local history. The storage layer does not buffer; out-of-order delivery is a transport concern. New peers initialize via snapshot transfer (`Registry` + history) before streaming deltas. +- **Removal.** Physical, no tombstones. If a later op targets an absent node or network, the receiver replays the most recent `AddNode` / network creation from history before applying. `RemoveNode` and `RemoveNetwork` each carry a `snapshot` of the removed entity so their reverse can rebuild in O(1) without re-walking history — required because retirement recomputes an op's reverse *after* the hot op already applied the removal, when the live entity is gone. Removal is therefore non-durable under concurrent edits: any concurrent reference to a removed node revives it. +- **LWW primitives.** Per-input (`InputSlot.timestamp`), per-export-slot (`ExportSlot.timestamp`), per-attribute-value (the `TimeStamp` in `Attributes`), and whole-list for `SetExportedNodes` via a sidecar timestamp in `Registry.attributes` under `library::exported_nodes_ts`. The timestamp driving every LWW arm comes from the wrapping `Delta`; `AttributeDelta` carries `value: Option<_>` so a single shape covers both `Set` (`Some`) and `Remove` (`None`) and `Set` vs. `Remove` has a defined winner. +- **Resources.** A resource's `hash` is LWW (content-derived, so concurrent resolves agree). Its source chain is an add-wins ordered OR-set keyed by `SourceKey` (fractional priority + peer tiebreak): concurrent `AddSource`s at distinct keys all survive; a re-add at the same key is LWW. Whole-resource `AddResource`/`RemoveResource` mirror the node/network add-remove pairs (`RemoveResource` snapshots the entry for O(1) reverse). + +The CRDT does not mask graph-shape conflicts. Concurrent same-slot `SetExport`s with different targets resolve by LWW, but the resulting wiring may be wrong; downstream consumers see it as a compile or wiring error. + +## History storage + +`HashMap` plus a `head: Rev` (the local cursor; advances only on local commits) and a `hot_log: Vec` (in-flight unretired ops). Walking history follows `delta.parents`; the default walk follows the first parent to reconstruct a single peer's local chain. Branches are siblings under a shared parent; merges aren't modeled as nodes — they're implicit in a delta listing multiple parents. + +## Editor metadata + +`DocumentNodePersistentMetadata` and `NodeNetworkPersistentMetadata` from the runtime — display names, locked/pinned, navigation/PTZ state, selection undo/redo stacks, layer/node type metadata — flow through the storage `Attributes` bucket under `ui::*` keys. Transient runtime caches (`DocumentNodeTransientMetadata`, click targets, resolved types, `OriginalLocation`) stay runtime-only and are not stored. + +Document-scoped editor settings (viewport view, render mode, overlay/ruler visibility, snapping, collapsed layers) ride the document-level `Registry.attributes` under `ui::doc::*` keys, supplied through `NodeMetadataSource::document_attributes`. This is what makes the `.gdd` a lossless replacement for the legacy format's document-handler fields. + +# Drawbacks + +- **Diffing two full `Registry`s on every autosave is O(N) in document size.** The interim cost of treating storage as a serialization layer derived from the runtime; currently triggered at autosave boundaries (`commit_storage_snapshot`) rather than per gesture, and addressed long-term by computing deltas directly on runtime mutations. +- **Attributes as `serde_json::Value` carry per-value overhead.** Mitigable with a typed fast path for hot keys without changing the design. They also force a self-describing codec, ruling out the most compact binary formats. +- **Single global format version is a sharp edge** when libraries diverge: a breaking change in one library bumps the version for documents that don't use it. +- **`RemoveNode` is non-durable under concurrency.** Any concurrent reference to a removed node revives it from history. + +# Rationale and alternatives + +**Delta-based vs. cleaner snapshot format.** A delta is the right unit for history, CRDT sync, and incremental compilation. Picking one representation for all three eliminates conversion seams between subsystems that need to interoperate. + +**CmRDT vs. state-based CRDT or OT.** State-based CRDTs require a merge function and large state vectors. OT requires a central server to mediate transforms. CmRDT only requires per-op commutativity plus a causal-order delivery layer, which the transport provides. + +**Ad-hoc resurrection vs. tombstones.** Tombstones add a permanent footprint to the data model and a GC policy question. Resurrection reuses the history log already needed for undo as the recovery mechanism, keeping the live `Registry` lean. The cost is that `RemoveNode` is not durable under concurrent edits. + +**Type-erased attributes vs. typed metadata fields.** Migrations operate on attribute values without keeping old Rust struct shapes alive. The cost is per-value overhead, mitigable without changing the model. + +**Flat node storage vs. nested networks.** All CRDT ops target nodes via a single uniform `NodeId` address space regardless of nesting depth. A nested representation would require ops to carry a path, complicating commutativity. + +**`.gdd` vs. reusing `.graphite`.** A distinct extension makes migration unambiguous and prevents older Graphite versions from trying to open a new-format file. + +**One self-describing binary codec (MessagePack).** Every persisted body — deltas, the registry, node-input values, and `ProtoNode` declarations — is a type-erased `serde_json::Value`, so it needs a self-describing codec to deserialize and to keep the serde-alias migration path alive; MessagePack provides that at a few percent size cost. Hash preimages (`NodeId`, `Rev`, node-path hashes) use the same codec: a fixed serializer emits one deterministic byte form per value, which is all `blake3` needs, so the codec doubles as the canonical hash encoding without a second format. + +**Source chain as a sorted `Vec` vs. `BTreeMap`.** `SourceKey` is a struct, so a `BTreeMap`-keyed chain can't serialize to JSON (string keys only). A sorted `Vec` of pairs keeps the same ordering and add-wins semantics losslessly across every codec. + +# Future possibilities + +- **Per-library format versioning** so a breaking change in one library doesn't bump the version for documents that don't use it. +- **History linearization** — prune unused branches from a convoluted tree to produce a clean undo/redo history. +- **Runtime-native deltas.** Move delta computation out of the storage layer into the runtime, eliminating per-edit `Registry` re-conversion. +- **Incremental compilation driven by deltas.** The compiler consumes runtime deltas and recompiles only changed regions. +- **Runtime-level aliasing for shared node-network definitions.** Storage already supports `Implementation::Network` as a reference; once the runtime supports sharing natively, the converter preserves it. +- **Online migration service** — active editors drop migrations older than some threshold; old documents go through a remote upgrade pipeline first. +- **Distributed / signed history.** Content-addressed `Rev` plus signing enables multi-author provenance and verifiable history. +- **Libraries as files** — a follow-up RFC will specify how `.gdd` files act as importable libraries via `Registry.exported_nodes`.