From 7eb2bb0561639953bd430958f686db0477c3978e Mon Sep 17 00:00:00 2001 From: Hubert Bugaj Date: Wed, 15 Apr 2026 16:33:20 +0200 Subject: [PATCH 1/3] feat: state migration benchmark --- src/dev/subcommands/migrate_cmd.rs | 94 ++++++++++++++++++++++++++++++ src/dev/subcommands/mod.rs | 5 ++ src/networks/mod.rs | 16 ++++- 3 files changed, 113 insertions(+), 2 deletions(-) create mode 100644 src/dev/subcommands/migrate_cmd.rs diff --git a/src/dev/subcommands/migrate_cmd.rs b/src/dev/subcommands/migrate_cmd.rs new file mode 100644 index 000000000000..60f9f0a905ea --- /dev/null +++ b/src/dev/subcommands/migrate_cmd.rs @@ -0,0 +1,94 @@ +// Copyright 2019-2026 ChainSafe Systems +// SPDX-License-Identifier: Apache-2.0, MIT + +use crate::daemon::bundle::load_actor_bundles; +use crate::db::{ + car::ManyCar, + db_engine::{Db, DbConfig}, +}; +use crate::networks::{ChainConfig, Height, NetworkChain}; +use crate::state_migration::run_state_migrations; +use anyhow::Context as _; +use clap::Args; +use std::{path::PathBuf, sync::Arc}; + +/// Runs a single state migration against the head of a snapshot, using a +/// throwaway on-disk ParityDb as the writable backing store so that timings +/// reflect the real production I/O path. +/// +/// The snapshot CAR file is attached as a read-only layer; any state tree +/// blocks produced by the migration are written to the temporary ParityDb, +/// which is removed when the command exits. +#[derive(Debug, Args)] +pub struct MigrateCommand { + /// Path to the snapshot CAR file (plain `.car` or zstd-compressed `.car.zst`). + #[arg(long, required = true)] + snapshot: PathBuf, + /// Migration height to run (e.g. `GoldenWeek`, `Xxx`). The migration will + /// be invoked as if the chain had reached that height's configured epoch + /// for the network detected from the snapshot's genesis. + #[arg(long, required = true)] + height: Height, +} + +impl MigrateCommand { + pub async fn run(self) -> anyhow::Result<()> { + let Self { snapshot, height } = self; + + // On-disk ParityDb so the benchmark reflects production I/O rather than + // the in-memory fast path. + let temp_dir = tempfile::Builder::new() + .prefix("forest-migrate-") + .tempdir()?; + let paritydb_path = temp_dir.path().join("paritydb"); + let paritydb = Db::open(&paritydb_path, &DbConfig::default())?; + tracing::info!("Using temporary ParityDb at {}", paritydb_path.display()); + + let store = Arc::new(ManyCar::new(paritydb)); + store + .read_only_file(&snapshot) + .with_context(|| format!("failed to attach snapshot {}", snapshot.display()))?; + + let head = store.heaviest_tipset()?; + let genesis = head.genesis(&store)?; + let network = NetworkChain::from_genesis(genesis.cid()).context( + "snapshot genesis does not match any known mainnet/calibnet/butterflynet genesis; custom devnets are not supported", + )?; + let chain_config = ChainConfig::from_chain(&network); + + // The migration reads the target-height actor bundle from the + // blockstore; load it into the writable layer so it's visible through + // the ManyCar. + load_actor_bundles(store.writer(), &network).await?; + + let epoch = chain_config.epoch(height); + anyhow::ensure!( + epoch > 0, + "no epoch configured for height {height} on {network}" + ); + + let parent_state = *head.parent_state(); + tracing::info!( + "Running {height} migration on {network} (epoch {epoch}); head epoch {head_epoch}, parent state {parent_state}", + head_epoch = head.epoch(), + ); + + let start = std::time::Instant::now(); + let new_state = run_state_migrations(epoch, &chain_config, &store, &parent_state)?; + let elapsed = start.elapsed(); + + match new_state { + Some(new_state) => { + tracing::info!( + "Migration completed: {parent_state} -> {new_state} in {elapsed}", + elapsed = humantime::format_duration(elapsed), + ); + } + None => anyhow::bail!( + "No migration ran. Check that the mapping for height {height} is registered for {network} in `get_migrations` and that the snapshot's head is compatible." + ), + } + + Ok(()) + } +} diff --git a/src/dev/subcommands/mod.rs b/src/dev/subcommands/mod.rs index caf49d19477b..4d8d7aa55e39 100644 --- a/src/dev/subcommands/mod.rs +++ b/src/dev/subcommands/mod.rs @@ -4,6 +4,7 @@ mod archive_missing_cmd; mod export_state_tree_cmd; mod export_tipset_lookup_cmd; +mod migrate_cmd; mod state_cmd; mod update_checkpoints_cmd; @@ -53,6 +54,9 @@ pub enum Subcommand { ArchiveMissing(archive_missing_cmd::ArchiveMissingCommand), ExportTipsetLookup(export_tipset_lookup_cmd::ExportTipsetLookupCommand), ExportStateTree(export_state_tree_cmd::ExportStateTreeCommand), + /// Run a single state migration on the head of a snapshot, backed by a + /// throwaway on-disk ParityDb. Primarily intended for benchmarking. + Migrate(migrate_cmd::MigrateCommand), } impl Subcommand { @@ -64,6 +68,7 @@ impl Subcommand { Self::ArchiveMissing(cmd) => cmd.run().await, Self::ExportTipsetLookup(cmd) => cmd.run().await, Self::ExportStateTree(cmd) => cmd.run().await, + Self::Migrate(cmd) => cmd.run().await, } } } diff --git a/src/networks/mod.rs b/src/networks/mod.rs index b564f0d28655..21cabeff2238 100644 --- a/src/networks/mod.rs +++ b/src/networks/mod.rs @@ -11,7 +11,7 @@ use itertools::Itertools; use libp2p::Multiaddr; use num_traits::Zero; use serde::{Deserialize, Serialize}; -use strum::{Display, EnumIter, IntoEnumIterator}; +use strum::{Display, EnumIter, EnumString, IntoEnumIterator}; use tracing::warn; use crate::beacon::{BeaconPoint, BeaconSchedule, DrandBeacon, DrandConfig}; @@ -136,8 +136,20 @@ impl NetworkChain { /// Defines the meaningful heights of the protocol. #[derive( - Debug, Default, Display, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, Hash, EnumIter, + Debug, + Default, + Display, + Clone, + Copy, + Serialize, + Deserialize, + PartialEq, + Eq, + Hash, + EnumIter, + EnumString, )] +#[strum(ascii_case_insensitive)] #[cfg_attr(test, derive(derive_quickcheck_arbitrary::Arbitrary))] pub enum Height { #[default] From 6fbc0bd1ad717d1be808de727dda5146c9c28a1b Mon Sep 17 00:00:00 2001 From: Hubert Bugaj Date: Wed, 22 Apr 2026 13:57:41 +0200 Subject: [PATCH 2/3] db backend --- src/dev/subcommands/migrate_cmd.rs | 161 ++++++++++++++++++++--------- 1 file changed, 113 insertions(+), 48 deletions(-) diff --git a/src/dev/subcommands/migrate_cmd.rs b/src/dev/subcommands/migrate_cmd.rs index 60f9f0a905ea..b27a3dc3cbfd 100644 --- a/src/dev/subcommands/migrate_cmd.rs +++ b/src/dev/subcommands/migrate_cmd.rs @@ -1,24 +1,36 @@ // Copyright 2019-2026 ChainSafe Systems // SPDX-License-Identifier: Apache-2.0, MIT +use crate::blocks::Tipset; use crate::daemon::bundle::load_actor_bundles; use crate::db::{ - car::ManyCar, + car::{AnyCar, ManyCar}, db_engine::{Db, DbConfig}, }; use crate::networks::{ChainConfig, Height, NetworkChain}; use crate::state_migration::run_state_migrations; +use crate::utils::db::car_util::load_car; use anyhow::Context as _; -use clap::Args; +use clap::{Args, ValueEnum}; +use fvm_ipld_blockstore::Blockstore; use std::{path::PathBuf, sync::Arc}; +/// Read-side layout for the snapshot during the benchmark. +#[derive(Debug, Clone, Copy, PartialEq, Eq, ValueEnum)] +enum Backend { + /// Attach the snapshot CAR as a read-only overlay on top of the temporary + /// ParityDb. Migration reads hit the CAR layer. + Car, + /// Ingest the snapshot into the temporary ParityDb before timing the + /// migration, so migration reads go through the writable db the same way + /// a long-running daemon would. + Db, +} + /// Runs a single state migration against the head of a snapshot, using a /// throwaway on-disk ParityDb as the writable backing store so that timings -/// reflect the real production I/O path. -/// -/// The snapshot CAR file is attached as a read-only layer; any state tree -/// blocks produced by the migration are written to the temporary ParityDb, -/// which is removed when the command exits. +/// reflect the real production I/O path. The temporary ParityDb is removed +/// when the command exits. #[derive(Debug, Args)] pub struct MigrateCommand { /// Path to the snapshot CAR file (plain `.car` or zstd-compressed `.car.zst`). @@ -29,11 +41,18 @@ pub struct MigrateCommand { /// for the network detected from the snapshot's genesis. #[arg(long, required = true)] height: Height, + /// Storage layout to benchmark against. + #[arg(long, value_enum, default_value_t = Backend::Car)] + backend: Backend, } impl MigrateCommand { pub async fn run(self) -> anyhow::Result<()> { - let Self { snapshot, height } = self; + let Self { + snapshot, + height, + backend, + } = self; // On-disk ParityDb so the benchmark reflects production I/O rather than // the in-memory fast path. @@ -44,51 +63,97 @@ impl MigrateCommand { let paritydb = Db::open(&paritydb_path, &DbConfig::default())?; tracing::info!("Using temporary ParityDb at {}", paritydb_path.display()); - let store = Arc::new(ManyCar::new(paritydb)); - store - .read_only_file(&snapshot) - .with_context(|| format!("failed to attach snapshot {}", snapshot.display()))?; - - let head = store.heaviest_tipset()?; - let genesis = head.genesis(&store)?; - let network = NetworkChain::from_genesis(genesis.cid()).context( - "snapshot genesis does not match any known mainnet/calibnet/butterflynet genesis; custom devnets are not supported", - )?; - let chain_config = ChainConfig::from_chain(&network); - - // The migration reads the target-height actor bundle from the - // blockstore; load it into the writable layer so it's visible through - // the ManyCar. - load_actor_bundles(store.writer(), &network).await?; - - let epoch = chain_config.epoch(height); - anyhow::ensure!( - epoch > 0, - "no epoch configured for height {height} on {network}" - ); + match backend { + Backend::Db => { + // The snapshot is about to be consumed into the writable db, so + // identify the network first and hold on to the head tipset. + let (head, network) = { + let car = AnyCar::try_from(snapshot.as_path()).with_context(|| { + format!("failed to open snapshot {}", snapshot.display()) + })?; + let head = car.heaviest_tipset()?; + let network = detect_network(&head, &car)?; + (head, network) + }; + let chain_config = ChainConfig::from_chain(&network); + ensure_epoch(&chain_config, height, &network)?; - let parent_state = *head.parent_state(); - tracing::info!( - "Running {height} migration on {network} (epoch {epoch}); head epoch {head_epoch}, parent state {parent_state}", - head_epoch = head.epoch(), - ); - - let start = std::time::Instant::now(); - let new_state = run_state_migrations(epoch, &chain_config, &store, &parent_state)?; - let elapsed = start.elapsed(); - - match new_state { - Some(new_state) => { + tracing::info!("Importing snapshot into temporary ParityDb…"); + let import_start = std::time::Instant::now(); + let file = tokio::fs::File::open(&snapshot) + .await + .with_context(|| format!("failed to open {}", snapshot.display()))?; + load_car(&paritydb, tokio::io::BufReader::new(file)).await?; tracing::info!( - "Migration completed: {parent_state} -> {new_state} in {elapsed}", - elapsed = humantime::format_duration(elapsed), + "Snapshot imported in {}", + humantime::format_duration(import_start.elapsed()) ); + let store = Arc::new(paritydb); + load_actor_bundles(&*store, &network).await?; + bench(&store, &chain_config, &network, head, height) + } + Backend::Car => { + let store = Arc::new(ManyCar::new(paritydb)); + store + .read_only_file(&snapshot) + .with_context(|| format!("failed to attach snapshot {}", snapshot.display()))?; + let head = store.heaviest_tipset()?; + let network = detect_network(&head, &store)?; + let chain_config = ChainConfig::from_chain(&network); + ensure_epoch(&chain_config, height, &network)?; + load_actor_bundles(store.writer(), &network).await?; + bench(&store, &chain_config, &network, head, height) } - None => anyhow::bail!( - "No migration ran. Check that the mapping for height {height} is registered for {network} in `get_migrations` and that the snapshot's head is compatible." - ), } + } +} + +fn detect_network(head: &Tipset, store: &impl Blockstore) -> anyhow::Result { + let genesis = head.genesis(store)?; + NetworkChain::from_genesis(genesis.cid()).context( + "snapshot genesis does not match any known mainnet/calibnet/butterflynet genesis; custom devnets are not supported", + ) +} + +fn ensure_epoch( + chain_config: &ChainConfig, + height: Height, + network: &NetworkChain, +) -> anyhow::Result<()> { + let epoch = chain_config.epoch(height); + anyhow::ensure!( + epoch > 0, + "no epoch configured for height {height} on {network}" + ); + Ok(()) +} + +fn bench( + store: &Arc, + chain_config: &ChainConfig, + network: &NetworkChain, + head: Tipset, + height: Height, +) -> anyhow::Result<()> { + let epoch = chain_config.epoch(height); + let parent_state = *head.parent_state(); + tracing::info!( + "Running {height} migration on {network} (epoch {epoch}); head epoch {head_epoch}, parent state {parent_state}", + head_epoch = head.epoch(), + ); + + let start = std::time::Instant::now(); + let new_state = run_state_migrations(epoch, chain_config, store, &parent_state)?; + let elapsed = start.elapsed(); - Ok(()) + match new_state { + Some(new_state) => tracing::info!( + "Migration completed: {parent_state} -> {new_state} in {elapsed}", + elapsed = humantime::format_duration(elapsed), + ), + None => anyhow::bail!( + "No migration ran. Check that the mapping for height {height} is registered for {network} in `get_migrations` and that the snapshot's head is compatible." + ), } + Ok(()) } From 519d8fd5325c0111d6fd36b5da525639535baac5 Mon Sep 17 00:00:00 2001 From: Hubert Bugaj Date: Wed, 22 Apr 2026 16:16:09 +0200 Subject: [PATCH 3/3] batch write --- src/utils/db/car_util.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/utils/db/car_util.rs b/src/utils/db/car_util.rs index 014a933c9bc3..17e03b81eaff 100644 --- a/src/utils/db/car_util.rs +++ b/src/utils/db/car_util.rs @@ -14,6 +14,7 @@ pub async fn load_car(db: &impl Blockstore, reader: R) -> anyhow::Result