From 37d67d53c71d0a48a5fcd13adece78198f9ef9ff Mon Sep 17 00:00:00 2001 From: Jake Correnti Date: Tue, 19 May 2026 11:25:21 -0400 Subject: [PATCH 01/11] vmm: bump tdx crate to 0.1.1 Upgrade tdx from 0.1.0 to 0.1.1 to enable access to the tdvf module needed for TD-Shim support. Also update related KVM dependencies (kvm-bindings to 0.14, kvm-ioctls to 0.24, vmm-sys-util to 0.15) across all crates to maintain compatibility. Assisted-by: Claude Code: claude-sonnet-4-6 Signed-off-by: Jake Correnti --- Cargo.lock | 52 ++++++++++++------------------------- src/arch/Cargo.toml | 8 +++--- src/cpuid/Cargo.toml | 6 ++--- src/devices/Cargo.toml | 4 +-- src/libkrun/Cargo.toml | 4 +-- src/rutabaga_gfx/Cargo.toml | 2 +- src/utils/Cargo.toml | 4 +-- src/vmm/Cargo.toml | 8 +++--- 8 files changed, 34 insertions(+), 54 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index ecb90d195..b4e2c4cb7 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -625,7 +625,7 @@ dependencies = [ "libc", "tdx", "vm-memory", - "vmm-sys-util 0.14.0", + "vmm-sys-util", ] [[package]] @@ -652,7 +652,7 @@ version = "0.1.0-1.18.0" dependencies = [ "kvm-bindings", "kvm-ioctls", - "vmm-sys-util 0.14.0", + "vmm-sys-util", ] [[package]] @@ -683,7 +683,7 @@ dependencies = [ "virtio-bindings", "vm-fdt", "vm-memory", - "vmm-sys-util 0.15.0", + "vmm-sys-util", "zerocopy", ] @@ -748,7 +748,7 @@ dependencies = [ "pkg-config", "remain", "thiserror 1.0.69", - "vmm-sys-util 0.14.0", + "vmm-sys-util", "winapi", "zerocopy", ] @@ -770,7 +770,7 @@ dependencies = [ "libc", "log", "nix 0.30.1", - "vmm-sys-util 0.14.0", + "vmm-sys-util", "windows-sys", ] @@ -805,7 +805,7 @@ dependencies = [ "serde_json", "tdx", "vm-memory", - "vmm-sys-util 0.14.0", + "vmm-sys-util", "zstd", ] @@ -819,23 +819,23 @@ dependencies = [ [[package]] name = "kvm-bindings" -version = "0.12.1" +version = "0.14.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9a537873e15e8daabb416667e606d9b0abc2a8fb9a45bd5853b888ae0ead82f9" +checksum = "4b3c06ff73c7ce03e780887ec2389d62d2a2a9ddf471ab05c2ff69207cd3f3b4" dependencies = [ - "vmm-sys-util 0.14.0", + "vmm-sys-util", ] [[package]] name = "kvm-ioctls" -version = "0.22.1" +version = "0.24.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0c8f7370330b4f57981e300fa39b02088f2f2a5c2d0f1f994e8090589619c56d" +checksum = "333f77a20344a448f3f70664918135fddeb804e938f28a99d685bd92926e0b19" dependencies = [ "bitflags 2.11.0", "kvm-bindings", "libc", - "vmm-sys-util 0.14.0", + "vmm-sys-util", ] [[package]] @@ -1432,9 +1432,9 @@ dependencies = [ [[package]] name = "tdx" -version = "0.1.0" +version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ad59e5bf374211a1fdd8e7439a07d5a5e617fe97f5cf21d03bcd1bf8c82b73af" +checksum = "83943e37cf46979f711ad11489c641fa058fd0fae92c122d1fc26a664e82acab" dependencies = [ "bitflags 2.11.0", "iocuddle", @@ -1442,7 +1442,7 @@ dependencies = [ "kvm-ioctls", "libc", "uuid", - "vmm-sys-util 0.12.1", + "vmm-sys-util", ] [[package]] @@ -1583,7 +1583,7 @@ dependencies = [ "libc", "uuid", "vm-memory", - "vmm-sys-util 0.15.0", + "vmm-sys-util", ] [[package]] @@ -1615,26 +1615,6 @@ dependencies = [ "winapi", ] -[[package]] -name = "vmm-sys-util" -version = "0.12.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1d1435039746e20da4f8d507a72ee1b916f7b4b05af7a91c093d2c6561934ede" -dependencies = [ - "bitflags 1.3.2", - "libc", -] - -[[package]] -name = "vmm-sys-util" -version = "0.14.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d21f366bf22bfba3e868349978766a965cbe628c323d58e026be80b8357ab789" -dependencies = [ - "bitflags 1.3.2", - "libc", -] - [[package]] name = "vmm-sys-util" version = "0.15.0" diff --git a/src/arch/Cargo.toml b/src/arch/Cargo.toml index a60963abb..7d13fd1b7 100644 --- a/src/arch/Cargo.toml +++ b/src/arch/Cargo.toml @@ -15,16 +15,16 @@ tdx = [ "tee", "dep:tdx" ] [dependencies] libc = ">=0.2.39" vm-memory = { version = "0.17", features = ["backend-mmap"] } -vmm-sys-util = "0.14" +vmm-sys-util = "0.15" arch_gen = { package = "krun-arch-gen", version = "=0.1.0-1.18.0", path = "../arch_gen" } smbios = { package = "krun-smbios", version = "=0.1.0-1.18.0", path = "../smbios" } utils = { package = "krun-utils", version = "=0.1.0-1.18.0", path = "../utils" } [target.'cfg(target_os = "linux")'.dependencies] -kvm-bindings = { version = "0.12", features = ["fam-wrappers"] } -kvm-ioctls = "0.22" -tdx = { version = "0.1.0", optional = true } +kvm-bindings = { version = "0.14", features = ["fam-wrappers"] } +kvm-ioctls = "0.24" +tdx = { version = "0.1.1", optional = true } [dev-dependencies] utils = { package = "krun-utils", version = "=0.1.0-1.18.0", path = "../utils" } diff --git a/src/cpuid/Cargo.toml b/src/cpuid/Cargo.toml index 8066b3c49..19e5e8dac 100644 --- a/src/cpuid/Cargo.toml +++ b/src/cpuid/Cargo.toml @@ -11,8 +11,8 @@ repository = "https://github.com/containers/libkrun" tdx = [] [dependencies] -vmm-sys-util = "0.14" +vmm-sys-util = "0.15" [target.'cfg(target_os = "linux")'.dependencies] -kvm-bindings = { version = "0.12", features = ["fam-wrappers"] } -kvm-ioctls = "0.22" +kvm-bindings = { version = "0.14", features = ["fam-wrappers"] } +kvm-ioctls = "0.24" diff --git a/src/devices/Cargo.toml b/src/devices/Cargo.toml index eacb6cc97..9f014c820 100644 --- a/src/devices/Cargo.toml +++ b/src/devices/Cargo.toml @@ -51,8 +51,8 @@ lru = ">=0.9" [target.'cfg(target_os = "linux")'.dependencies] rutabaga_gfx = { package = "krun-rutabaga-gfx", version = "=0.1.0-1.18.0", path = "../rutabaga_gfx", features = ["x"], optional = true } caps = "0.5.5" -kvm-bindings = { version = "0.12", features = ["fam-wrappers"] } -kvm-ioctls = "0.22" +kvm-bindings = { version = "0.14", features = ["fam-wrappers"] } +kvm-ioctls = "0.24" [target.'cfg(any(target_arch = "aarch64", target_arch = "riscv64"))'.dependencies] vm-fdt = ">= 0.2.0" diff --git a/src/libkrun/Cargo.toml b/src/libkrun/Cargo.toml index 4e54bf99c..e847469f3 100644 --- a/src/libkrun/Cargo.toml +++ b/src/libkrun/Cargo.toml @@ -40,8 +40,8 @@ rand = "0.9.2" hvf = { package = "krun-hvf", version = "=0.1.0-1.18.0", path = "../hvf" } [target.'cfg(target_os = "linux")'.dependencies] -kvm-bindings = { version = "0.12", features = ["fam-wrappers"] } -kvm-ioctls = "0.22" +kvm-bindings = { version = "0.14", features = ["fam-wrappers"] } +kvm-ioctls = "0.24" aws-nitro = { package = "krun-aws-nitro", version = "=0.1.0-1.18.0", path = "../aws_nitro", optional = true } nitro-enclaves = { version = "0.5.0", optional = true } vm-memory = { version = "0.17", features = ["backend-mmap"] } diff --git a/src/rutabaga_gfx/Cargo.toml b/src/rutabaga_gfx/Cargo.toml index f8e8efa2c..c2d98eb37 100644 --- a/src/rutabaga_gfx/Cargo.toml +++ b/src/rutabaga_gfx/Cargo.toml @@ -27,7 +27,7 @@ remain = "0.2" thiserror = "1.0.23" zerocopy = { version = "0.8.26", features = ["derive"] } log = "0.4" -vmm-sys-util = "0.14" +vmm-sys-util = "0.15" [target.'cfg(unix)'.dependencies] nix = { version = "0.30.1", features = ["event", "feature", "fs", "mman", "socket", "uio", "ioctl"] } diff --git a/src/utils/Cargo.toml b/src/utils/Cargo.toml index f24921bc4..5ae7ab27b 100644 --- a/src/utils/Cargo.toml +++ b/src/utils/Cargo.toml @@ -14,11 +14,11 @@ log = "0.4.0" [target.'cfg(unix)'.dependencies] libc = ">=0.2.85" nix = "0.30.1" -vmm-sys-util = "0.14" +vmm-sys-util = "0.15" crossbeam-channel = ">=0.5.15" [target.'cfg(target_os = "linux")'.dependencies] -kvm-bindings = { version = "0.12", features = ["fam-wrappers"] } +kvm-bindings = { version = "0.14", features = ["fam-wrappers"] } [target.'cfg(target_os = "macos")'.dependencies] nix = { version = "0.30.1", features = ["fs"] } diff --git a/src/vmm/Cargo.toml b/src/vmm/Cargo.toml index 377f9c731..57b0fa69a 100644 --- a/src/vmm/Cargo.toml +++ b/src/vmm/Cargo.toml @@ -27,7 +27,7 @@ linux-loader = { version = "0.13.2", features = ["bzimage", "elf", "pe"] } log = "0.4.0" nix = { version = "0.30.1", features = ["fs", "term"] } vm-memory = { version = "0.17.0", features = ["backend-mmap"] } -vmm-sys-util = "0.14" +vmm-sys-util = "0.15" krun_display = { package = "krun-display", version = "0.1.0", path = "../display", optional = true, features = ["bindgen_clang_runtime"] } krun_input = { package = "krun-input", version = "0.1.0", path = "../input", optional = true, features = ["bindgen_clang_runtime"] } @@ -52,9 +52,9 @@ cpuid = { package = "krun-cpuid", version = "=0.1.0-1.18.0", path = "../cpuid" } zstd = "0.13" [target.'cfg(target_os = "linux")'.dependencies] -tdx = { version = "0.1.0", optional = true } -kvm-bindings = { version = "0.12", features = ["fam-wrappers"] } -kvm-ioctls = "0.22" +tdx = { version = "0.1.1", optional = true } +kvm-bindings = { version = "0.14", features = ["fam-wrappers"] } +kvm-ioctls = "0.24" [target.'cfg(target_os = "macos")'.dependencies] hvf = { package = "krun-hvf", version = "=0.1.0-1.18.0", path = "../hvf" } From 01ea016540bfbc5d4d22c21571d5e0c5c6237dca Mon Sep 17 00:00:00 2001 From: Jake Correnti Date: Tue, 19 May 2026 11:35:12 -0400 Subject: [PATCH 02/11] vmm: add VmResources storage for TD-Shim Add `tee_firmware_config` field to `VmResources` with `set_tee_firmware_config()` accessor method. Add test to verify the field and method work correctly. Assisted-by: Claude Code: claude-sonnet-4-6 Signed-off-by: Jake Correnti --- src/vmm/src/resources.rs | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/src/vmm/src/resources.rs b/src/vmm/src/resources.rs index ffa9e6eac..29a49b7fc 100644 --- a/src/vmm/src/resources.rs +++ b/src/vmm/src/resources.rs @@ -160,6 +160,9 @@ pub struct VmResources { /// The parameters for the initrd bundle to be loaded in this microVM. #[cfg(feature = "tee")] pub initrd_bundle: Option, + /// Path to a user-provided TD-Shim binary for TDX guests. + #[cfg(feature = "tdx")] + pub tee_firmware_config: Option, /// The fs device. #[cfg(not(feature = "tee"))] pub fs: Vec, @@ -335,6 +338,11 @@ impl VmResources { Ok(()) } + #[cfg(feature = "tdx")] + pub fn set_tee_firmware_config(&mut self, cfg: FirmwareConfig) { + self.tee_firmware_config = Some(cfg); + } + #[cfg(not(feature = "tee"))] pub fn add_fs_device(&mut self, config: FsDeviceConfig) { self.fs.push(config) @@ -526,3 +534,24 @@ mod tests { ); } } + +#[cfg(all(test, feature = "tdx"))] +mod tee_firmware_tests { + use super::*; + use crate::vmm_config::firmware::FirmwareConfig; + use std::path::PathBuf; + + #[test] + fn test_set_and_get_tee_firmware_config() { + let mut r = VmResources::default(); + assert!(r.tee_firmware_config.is_none()); + let cfg = FirmwareConfig { + path: PathBuf::from("/tmp/td-shim.bin"), + }; + r.set_tee_firmware_config(cfg); + assert_eq!( + r.tee_firmware_config.unwrap().path, + PathBuf::from("/tmp/td-shim.bin") + ); + } +} From 672e05113438d024bac2c26ed6415bcb96007999 Mon Sep 17 00:00:00 2001 From: Jake Correnti Date: Tue, 19 May 2026 11:45:21 -0400 Subject: [PATCH 03/11] libkrun: add krun_set_tee_firmware() API for TD-Shim Expose krun_set_tee_firmware() in the C API so callers can specify a TD-Shim binary path for TDX guests. When set, the VMM uses the given firmware instead of the bundled qboot from libkrunfw-tdx. The path is stored in ContextConfig and forwarded to VmResources at start time. Assisted-by: Claude Code: claude-sonnet-4-6 Signed-off-by: Jake Correnti --- include/libkrun.h | 13 +++++++++++++ src/libkrun/src/lib.rs | 40 +++++++++++++++++++++++++++++++++++++++- 2 files changed, 52 insertions(+), 1 deletion(-) diff --git a/include/libkrun.h b/include/libkrun.h index 3004110f6..66099fdcf 100644 --- a/include/libkrun.h +++ b/include/libkrun.h @@ -974,6 +974,19 @@ int32_t krun_set_env(uint32_t ctx_id, const char *const envp[]); */ int32_t krun_set_tee_config_file(uint32_t ctx_id, const char *filepath); +/** + * Sets the path to a TD-Shim binary for TDX guests. If not called, the TDX guest uses + * the bundled qboot firmware from libkrunfw-tdx. Only available in libkrun-tdx. + * + * Arguments: + * "ctx_id" - the configuration context ID. + * "fw_path" - a null-terminated string representing the path to the TD-Shim binary. + * + * Returns: + * Zero on success or a negative error number on failure. + */ +int32_t krun_set_tee_firmware(uint32_t ctx_id, const char *fw_path); + /** * Adds a port-path pairing for guest IPC with a process in the host. * diff --git a/src/libkrun/src/lib.rs b/src/libkrun/src/lib.rs index a7b7eee6a..fc1b3d898 100644 --- a/src/libkrun/src/lib.rs +++ b/src/libkrun/src/lib.rs @@ -45,7 +45,6 @@ use vmm::resources::{ use vmm::vmm_config::block::{BlockDeviceConfig, BlockRootConfig}; #[cfg(not(feature = "tee"))] use vmm::vmm_config::external_kernel::{ExternalKernel, KernelFormat}; -#[cfg(not(feature = "tee"))] use vmm::vmm_config::firmware::FirmwareConfig; #[cfg(not(feature = "tee"))] use vmm::vmm_config::fs::FsDeviceConfig; @@ -159,6 +158,8 @@ struct ContextConfig { block_root: Option, #[cfg(feature = "tee")] tee_config_file: Option, + #[cfg(feature = "tdx")] + tee_firmware_path: Option, unix_ipc_port_map: Option>, shutdown_efd: Option, gpu_virgl_flags: Option, @@ -308,6 +309,16 @@ impl ContextConfig { self.tee_config_file.clone() } + #[cfg(feature = "tdx")] + fn set_tee_firmware_path(&mut self, path: PathBuf) { + self.tee_firmware_path = Some(path); + } + + #[cfg(feature = "tdx")] + fn get_tee_firmware_path(&self) -> Option { + self.tee_firmware_path.clone() + } + fn add_vsock_port(&mut self, port: u32, filepath: PathBuf, listen: bool) { if let Some(ref mut map) = &mut self.unix_ipc_port_map { map.insert(port, (filepath, listen)); @@ -1428,6 +1439,26 @@ pub unsafe extern "C" fn krun_set_tee_config_file(ctx_id: u32, c_filepath: *cons KRUN_SUCCESS } +#[allow(clippy::missing_safety_doc)] +#[no_mangle] +#[cfg(feature = "tdx")] +pub unsafe extern "C" fn krun_set_tee_firmware(ctx_id: u32, c_fw_path: *const c_char) -> i32 { + let path = match CStr::from_ptr(c_fw_path).to_str() { + Ok(p) => PathBuf::from(p), + Err(e) => { + error!("Error parsing fw_path: {e:?}"); + return -libc::EINVAL; + } + }; + + match CTX_MAP.lock().unwrap().entry(ctx_id) { + Entry::Occupied(mut ctx_cfg) => ctx_cfg.get_mut().set_tee_firmware_path(path), + Entry::Vacant(_) => return -libc::ENOENT, + } + + KRUN_SUCCESS +} + #[allow(clippy::missing_safety_doc)] #[no_mangle] pub unsafe extern "C" fn krun_add_vsock_port( @@ -2695,6 +2726,13 @@ pub extern "C" fn krun_start_enter(ctx_id: u32) -> i32 { return -libc::EINVAL; } + #[cfg(feature = "tdx")] + if let Some(fw_path) = ctx_cfg.get_tee_firmware_path() { + ctx_cfg + .vmr + .set_tee_firmware_config(FirmwareConfig { path: fw_path }); + } + let kernel_cmdline = KernelCmdlineConfig { prolog: Some(format!("{DEFAULT_KERNEL_CMDLINE} init={INIT_PATH}")), krun_env: Some(format!( From bfc9f814825078382970e2c1d894158ae92914cd Mon Sep 17 00:00:00 2001 From: Jake Correnti Date: Tue, 19 May 2026 11:56:21 -0400 Subject: [PATCH 04/11] vmm: add attributes field to MeasuredRegion for per-section TDX measurement Add a u32 attributes field to MeasuredRegion so each section can carry its own TDX measurement flags (e.g. MR_EXTEND). The TDX launcher now reads attributes from MeasuredRegion instead of inferring them from the guest address. Existing qboot and SNP regions use attributes=0; the qboot firmware region uses attributes=1 (measure and extend). Assisted-by: Claude Code: claude-sonnet-4-6 Signed-off-by: Jake Correnti --- src/vmm/src/builder.rs | 6 ++++++ src/vmm/src/linux/tee/inteltdx.rs | 2 +- src/vmm/src/linux/vstate.rs | 1 + 3 files changed, 8 insertions(+), 1 deletion(-) diff --git a/src/vmm/src/builder.rs b/src/vmm/src/builder.rs index b92b931d4..52eafadaa 100644 --- a/src/vmm/src/builder.rs +++ b/src/vmm/src/builder.rs @@ -677,6 +677,7 @@ pub fn build_microvm( .get_host_address(GuestAddress(arch::FIRMWARE_START)) .unwrap() as u64, size: qboot_size, + attributes: 0, }, MeasuredRegion { guest_addr: kernel_guest_addr, @@ -684,11 +685,13 @@ pub fn build_microvm( .get_host_address(GuestAddress(kernel_guest_addr)) .unwrap() as u64, size: kernel_size, + attributes: 0, }, MeasuredRegion { guest_addr: initrd_addr.0, host_addr: guest_memory.get_host_address(initrd_addr).unwrap() as u64, size: initrd_size, + attributes: 0, }, MeasuredRegion { guest_addr: arch::x86_64::layout::ZERO_PAGE_START, @@ -696,6 +699,7 @@ pub fn build_microvm( .get_host_address(GuestAddress(arch::x86_64::layout::ZERO_PAGE_START)) .unwrap() as u64, size: 4096, + attributes: 0, }, ] }; @@ -713,6 +717,7 @@ pub fn build_microvm( guest_addr: 0, host_addr: guest_memory.get_host_address(GuestAddress(0)).unwrap() as u64, size: 0x8000_0000, + attributes: 0, }, MeasuredRegion { guest_addr: arch::FIRMWARE_START, @@ -720,6 +725,7 @@ pub fn build_microvm( .get_host_address(GuestAddress(arch::FIRMWARE_START)) .unwrap() as u64, size: qboot_size, + attributes: 1, }, ]; diff --git a/src/vmm/src/linux/tee/inteltdx.rs b/src/vmm/src/linux/tee/inteltdx.rs index dc12f8cc3..68152dc28 100644 --- a/src/vmm/src/linux/tee/inteltdx.rs +++ b/src/vmm/src/linux/tee/inteltdx.rs @@ -38,7 +38,7 @@ impl IntelTdx { let mem_region = tdx::launch::MemRegion::new( region.guest_addr, (region.size / 4096) as u64, - (arch::FIRMWARE_START == region.guest_addr).into(), + region.attributes, region.host_addr, ); launcher diff --git a/src/vmm/src/linux/vstate.rs b/src/vmm/src/linux/vstate.rs index 05e58fbd7..9024a6e5e 100644 --- a/src/vmm/src/linux/vstate.rs +++ b/src/vmm/src/linux/vstate.rs @@ -432,6 +432,7 @@ pub struct MeasuredRegion { pub guest_addr: u64, pub host_addr: u64, pub size: usize, + pub attributes: u32, } /// Describes a KVM context that gets attached to the microVM. From f701395976b323825569ba4b5cfa7a8283c5e463 Mon Sep 17 00:00:00 2001 From: Jake Correnti Date: Tue, 19 May 2026 12:02:41 -0400 Subject: [PATCH 05/11] vmm/tee: add tdshim module with TDVF parsing and section loading Add tdshim.rs under linux/tee/ with TdShim::parse() to open a TD-Shim binary, extract TDVF section metadata, validate that BFV and TdHob sections are present, and retain the raw firmware bytes. Provides load_sections() to copy section data into guest memory with bounds checking, and high_firmware_range() to compute the address span of sections above the MMIO gap for memory layout purposes. Assisted-by: Claude Code: claude-sonnet-4-6 Signed-off-by: Jake Correnti --- src/vmm/src/linux/tee/mod.rs | 3 + src/vmm/src/linux/tee/tdshim.rs | 299 ++++++++++++++++++++++++++++++++ 2 files changed, 302 insertions(+) create mode 100644 src/vmm/src/linux/tee/tdshim.rs diff --git a/src/vmm/src/linux/tee/mod.rs b/src/vmm/src/linux/tee/mod.rs index 572856408..339e00236 100644 --- a/src/vmm/src/linux/tee/mod.rs +++ b/src/vmm/src/linux/tee/mod.rs @@ -3,3 +3,6 @@ pub mod amdsnp; #[cfg(feature = "tdx")] pub mod inteltdx; + +#[cfg(feature = "tdx")] +pub mod tdshim; diff --git a/src/vmm/src/linux/tee/tdshim.rs b/src/vmm/src/linux/tee/tdshim.rs new file mode 100644 index 000000000..04f101ae5 --- /dev/null +++ b/src/vmm/src/linux/tee/tdshim.rs @@ -0,0 +1,299 @@ +use std::fs::File; +use std::io::{self, Read, Seek}; +use std::path::Path; +use tdx::tdvf::{self, TdvfSection, TdvfSectionType}; +use vm_memory::{Bytes, GuestAddress, GuestMemory, GuestMemoryMmap}; + +#[derive(Debug)] +pub enum Error { + OpenFirmware(io::Error), + ReadFirmware(io::Error), + ParseSections(tdx::tdvf::Error), + InvalidSectionOffset, + MissingBfv, + MissingTdHob, + GuestMemory(vm_memory::GuestMemoryError), +} + +type Result = std::result::Result; + +pub struct TdShim { + pub sections: Vec, + pub hob_address: u64, + pub firmware_data: Vec, +} + +fn is_bfv(s: &TdvfSection) -> bool { + matches!(s.section_type, TdvfSectionType::Bfv) +} + +fn is_td_hob(s: &TdvfSection) -> bool { + matches!(s.section_type, TdvfSectionType::TdHob) +} + +fn validate_sections(sections: &[TdvfSection]) -> Result<()> { + if !sections.iter().any(is_bfv) { + return Err(Error::MissingBfv); + } + if !sections.iter().any(is_td_hob) { + return Err(Error::MissingTdHob); + } + Ok(()) +} + +impl TdShim { + pub fn parse(path: &Path) -> Result { + let mut file = File::open(path).map_err(Error::OpenFirmware)?; + let sections = tdvf::parse_sections(&mut file).map_err(Error::ParseSections)?; + validate_sections(§ions)?; + let hob_address = sections + .iter() + .find(|s| is_td_hob(s)) + .unwrap() + .memory_address; + file.rewind().map_err(Error::ReadFirmware)?; + let mut firmware_data = Vec::new(); + file.read_to_end(&mut firmware_data) + .map_err(Error::ReadFirmware)?; + Ok(Self { + sections, + hob_address, + firmware_data, + }) + } + + #[cfg(test)] + fn firmware_range(&self) -> (u64, u64) { + let min = self + .sections + .iter() + .map(|s| s.memory_address) + .min() + .unwrap(); + let max = self + .sections + .iter() + .map(|s| s.memory_address + s.memory_data_size) + .max() + .unwrap(); + (min, max) + } + + /// Returns [min_addr, max_addr) covering only sections above the 32-bit MMIO gap. + /// These sections need their own GuestMemoryMmap region; sections below the gap + /// fall within the normal RAM mapping and need no separate hole. + pub fn high_firmware_range(&self) -> Option<(u64, u64)> { + let mmio_start = arch::x86_64::layout::MMIO_MEM_START; + let min = self + .sections + .iter() + .filter(|s| s.memory_address >= mmio_start) + .map(|s| s.memory_address) + .min()?; + let max = self + .sections + .iter() + .filter(|s| s.memory_address >= mmio_start) + .map(|s| s.memory_address + s.memory_data_size) + .max()?; + Some((min, max)) + } + + /// Copies sections with raw data into guest memory. Zero-fill sections are + /// already handled by the mmap backing. + pub fn load_sections(&self, firmware_data: &[u8], guest_mem: &GuestMemoryMmap) -> Result<()> { + for section in &self.sections { + if section.raw_data_size <= 0 { + continue; + } + let start = section.data_offset as usize; + let end = start + section.raw_data_size as usize; + if end > firmware_data.len() { + return Err(Error::InvalidSectionOffset); + } + guest_mem + .write( + &firmware_data[start..end], + GuestAddress(section.memory_address), + ) + .map_err(Error::GuestMemory)?; + } + Ok(()) + } + + pub fn generate_hobs( + &self, + guest_mem: &GuestMemoryMmap, + kernel_entry_addr: u64, + ram_regions: &[(u64, u64)], + ) -> Result<()> { + let hob_section = self.sections.iter().find(|s| is_td_hob(s)).unwrap(); + let hob_size = hob_section.memory_data_size as usize; + let mut buf = vec![0u8; hob_size]; + + write_hob_chain( + &mut buf, + hob_section.memory_address, + hob_size, + ram_regions, + kernel_entry_addr, + )?; + + guest_mem + .write(&buf, GuestAddress(hob_section.memory_address)) + .map_err(Error::GuestMemory)?; + + Ok(()) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + fn bfv_section() -> TdvfSection { + TdvfSection { + data_offset: 0, + raw_data_size: 0x1000, + memory_address: 0xffff_0000, + memory_data_size: 0x1000, + section_type: TdvfSectionType::Bfv, + attributes: 1, + } + } + + fn hob_section() -> TdvfSection { + TdvfSection { + data_offset: 0, + raw_data_size: 0, + memory_address: 0x5000_0000, + memory_data_size: 0x1000, + section_type: TdvfSectionType::TdHob, + attributes: 0, + } + } + + #[test] + fn test_validate_missing_bfv() { + let sections = vec![hob_section()]; + assert!(matches!( + validate_sections(§ions), + Err(Error::MissingBfv) + )); + } + + #[test] + fn test_validate_missing_hob() { + let sections = vec![bfv_section()]; + assert!(matches!( + validate_sections(§ions), + Err(Error::MissingTdHob) + )); + } + + #[test] + fn test_validate_both_present() { + let sections = vec![bfv_section(), hob_section()]; + assert!(validate_sections(§ions).is_ok()); + } + + #[test] + fn test_hob_address_extracted() { + let hob_addr = 0x5000_0000u64; + let sections = vec![bfv_section(), hob_section()]; + validate_sections(§ions).unwrap(); + let found = sections.iter().find(|s| is_td_hob(s)).unwrap(); + assert_eq!(found.memory_address, hob_addr); + } + + #[test] + fn test_firmware_range() { + let sections = vec![bfv_section(), hob_section()]; + // hob at 0x5000_0000 with size 0x1000 and bfv at 0xffff_0000 with size 0x1000 + let td = TdShim { + hob_address: 0x5000_0000, + sections, + firmware_data: vec![], + }; + let (min, max) = td.firmware_range(); + assert_eq!(min, 0x5000_0000); + assert_eq!(max, 0xffff_0000 + 0x1000); + } + + #[test] + fn test_high_firmware_range_excludes_low_sections() { + let sections = vec![bfv_section(), hob_section()]; + let td = TdShim { + hob_address: 0x5000_0000, + sections, + firmware_data: vec![], + }; + let (start, end) = td.high_firmware_range().expect("BFV is above MMIO gap"); + assert_eq!(start, 0xffff_0000); + assert_eq!(end, 0xffff_0000 + 0x1000); + } + + #[test] + fn test_high_firmware_range_none_when_all_sections_low() { + let sections = vec![ + TdvfSection { + data_offset: 0, + raw_data_size: 0, + memory_address: 0x0080_0000, + memory_data_size: 0x1000, + section_type: TdvfSectionType::TempMem, + attributes: 0, + }, + hob_section(), + ]; + let td = TdShim { + hob_address: 0x5000_0000, + sections, + firmware_data: vec![], + }; + assert!(td.high_firmware_range().is_none()); + } + + #[test] + fn test_hob_chain_starts_with_phit() { + let mut buf = vec![0u8; 4096]; + write_hob_chain( + &mut buf, + 0x5000_0000, + 4096, + &[(0, 0x4000_0000)], + 0x0100_0000, + ) + .unwrap(); + let hob_type = u16::from_le_bytes([buf[0], buf[1]]); + assert_eq!(hob_type, 0x0001, "First HOB must be PHIT"); + } + + #[test] + fn test_hob_chain_ends_with_end_hob() { + let mut buf = vec![0u8; 4096]; + write_hob_chain(&mut buf, 0x5000_0000, 4096, &[(0, 0x4000_0000)], 0x100_0000).unwrap(); + let mut offset = 0usize; + let mut found_end = false; + while offset + 4 <= buf.len() { + let hob_type = u16::from_le_bytes([buf[offset], buf[offset + 1]]); + let hob_len = u16::from_le_bytes([buf[offset + 2], buf[offset + 3]]) as usize; + if hob_type == 0xFFFF { + found_end = true; + break; + } + if hob_len == 0 { + break; + } + offset += hob_len; + } + assert!(found_end, "HOB chain must terminate with 0xFFFF"); + } + + #[test] + fn test_hob_chain_too_small_returns_error() { + let mut buf = vec![0u8; 8]; + let result = write_hob_chain(&mut buf, 0x5000_0000, 8, &[(0, 0x1000)], 0x100_0000); + assert!(matches!(result, Err(Error::HobRegionTooSmall))); + } +} From bb4fffcfa12b817ec908cc44824281683f252bb4 Mon Sep 17 00:00:00 2001 From: Jake Correnti Date: Tue, 19 May 2026 13:49:47 -0400 Subject: [PATCH 06/11] vmm/tee: add HOB chain generation to tdshim module Implement write_hob_chain() to build a UEFI Hand-Off Block list that TD-Shim expects: PHIT header, resource descriptors for each RAM region, a payload info HOB with the kernel entry point (RawVmLinux image type), and an end-of-list marker. Add generate_hobs() on TdShim to write the chain into the TdHob section of guest memory. Includes unit tests for HOB chain structure, PHIT validation, and buffer overflow handling. Assisted-by: Claude Code: claude-sonnet-4-6 Signed-off-by: Jake Correnti --- src/vmm/src/linux/tee/tdshim.rs | 223 ++++++++++++++++++++++++++++++-- 1 file changed, 211 insertions(+), 12 deletions(-) diff --git a/src/vmm/src/linux/tee/tdshim.rs b/src/vmm/src/linux/tee/tdshim.rs index 04f101ae5..4091f3f76 100644 --- a/src/vmm/src/linux/tee/tdshim.rs +++ b/src/vmm/src/linux/tee/tdshim.rs @@ -2,7 +2,7 @@ use std::fs::File; use std::io::{self, Read, Seek}; use std::path::Path; use tdx::tdvf::{self, TdvfSection, TdvfSectionType}; -use vm_memory::{Bytes, GuestAddress, GuestMemory, GuestMemoryMmap}; +use vm_memory::{ByteValued, Bytes, GuestAddress, GuestMemory, GuestMemoryMmap}; #[derive(Debug)] pub enum Error { @@ -12,6 +12,7 @@ pub enum Error { InvalidSectionOffset, MissingBfv, MissingTdHob, + HobRegionTooSmall, GuestMemory(vm_memory::GuestMemoryError), } @@ -23,6 +24,171 @@ pub struct TdShim { pub firmware_data: Vec, } +const HOB_TYPE_PHIT: u16 = 0x0001; +const HOB_TYPE_RESOURCE: u16 = 0x0003; +const HOB_TYPE_GUID_EXT: u16 = 0x0004; +const HOB_TYPE_END: u16 = 0xFFFF; +const EFI_RESOURCE_SYSTEM_MEMORY: u32 = 0x00000000; + +const PAYLOAD_INFO_GUID: [u8; 16] = [ + 0x12, 0xa4, 0x6f, 0xb9, 0x1f, 0x46, 0xe3, 0x4b, 0x8c, 0x0d, 0xad, 0x80, 0x5a, 0x49, 0x7a, 0xc0, +]; + +#[derive(Copy, Clone, Default)] +#[repr(C, packed)] +struct HobHeader { + hob_type: u16, + hob_length: u16, + reserved: u32, +} +unsafe impl ByteValued for HobHeader {} + +#[derive(Copy, Clone, Default)] +#[repr(C, packed)] +struct PhitHob { + header: HobHeader, + version: u32, + boot_mode: u32, + efi_memory_top: u64, + efi_memory_bottom: u64, + efi_free_memory_top: u64, + efi_free_memory_bottom: u64, + efi_end_of_hob_list: u64, +} +unsafe impl ByteValued for PhitHob {} + +#[derive(Copy, Clone, Default)] +#[repr(C, packed)] +struct ResourceHob { + header: HobHeader, + owner: [u8; 16], + resource_type: u32, + resource_attributes: u32, + physical_start: u64, + resource_length: u64, +} +unsafe impl ByteValued for ResourceHob {} + +#[derive(Copy, Clone, Default)] +#[repr(C, packed)] +struct GuidExtHobHeader { + header: HobHeader, + name: [u8; 16], +} +unsafe impl ByteValued for GuidExtHobHeader {} + +#[derive(Copy, Clone, Default)] +#[repr(C, packed)] +struct PayloadInfoHob { + guid_header: GuidExtHobHeader, + image_type: u32, + reserved: u32, + entry_point: u64, +} +unsafe impl ByteValued for PayloadInfoHob {} + +#[derive(Copy, Clone, Default)] +#[repr(C, packed)] +struct EndHob { + header: HobHeader, +} +unsafe impl ByteValued for EndHob {} + +fn append(buf: &mut Vec, val: &T) { + buf.extend_from_slice(val.as_slice()); +} + +pub fn write_hob_chain( + out: &mut [u8], + hob_region_addr: u64, + memory_regions: &[(u64, u64)], + kernel_entry_addr: u64, +) -> Result<()> { + let mut chain: Vec = Vec::new(); + + let phit_offset = chain.len(); + append( + &mut chain, + &PhitHob { + header: HobHeader { + hob_type: HOB_TYPE_PHIT, + hob_length: 56, + reserved: 0, + }, + version: 0x0009, + ..Default::default() + }, + ); + + for &(start, length) in memory_regions { + append( + &mut chain, + &ResourceHob { + header: HobHeader { + hob_type: HOB_TYPE_RESOURCE, + hob_length: std::mem::size_of::() as u16, + reserved: 0, + }, + owner: [0u8; 16], + resource_type: EFI_RESOURCE_SYSTEM_MEMORY, + resource_attributes: 0x307, + physical_start: start, + resource_length: length, + }, + ); + } + + append( + &mut chain, + &PayloadInfoHob { + guid_header: GuidExtHobHeader { + header: HobHeader { + hob_type: HOB_TYPE_GUID_EXT, + hob_length: std::mem::size_of::() as u16, + reserved: 0, + }, + name: PAYLOAD_INFO_GUID, + }, + // libkrunfw packages the kernel as a raw ELF vmlinux (not bzImage). + // Use RawVmLinux (2) so td-shim jumps directly to entry_point rather + // than treating the image as a bzImage and miscalculating startup_64. + image_type: 2, + reserved: 0, + entry_point: kernel_entry_addr, + }, + ); + + append( + &mut chain, + &EndHob { + header: HobHeader { + hob_type: HOB_TYPE_END, + hob_length: 8, + reserved: 0, + }, + }, + ); + + // Patch PHIT: efi_end_of_hob_list points past the End HOB (exclusive end of the chain). + // td-shim validates: hob_length == efi_end_of_hob_list - hob_ptr, where + // hob_length = end_hob_offset + size_of::
() = chain.len(). + let end_of_list_addr = hob_region_addr + chain.len() as u64; + unsafe { + let phit_ptr = chain[phit_offset..].as_mut_ptr() as *mut PhitHob; + std::ptr::write_unaligned( + std::ptr::addr_of_mut!((*phit_ptr).efi_end_of_hob_list), + end_of_list_addr, + ); + } + + if chain.len() > out.len() { + return Err(Error::HobRegionTooSmall); + } + + out[..chain.len()].copy_from_slice(&chain); + Ok(()) +} + fn is_bfv(s: &TdvfSection) -> bool { matches!(s.section_type, TdvfSectionType::Bfv) } @@ -134,7 +300,6 @@ impl TdShim { write_hob_chain( &mut buf, hob_section.memory_address, - hob_size, ram_regions, kernel_entry_addr, )?; @@ -257,14 +422,7 @@ mod tests { #[test] fn test_hob_chain_starts_with_phit() { let mut buf = vec![0u8; 4096]; - write_hob_chain( - &mut buf, - 0x5000_0000, - 4096, - &[(0, 0x4000_0000)], - 0x0100_0000, - ) - .unwrap(); + write_hob_chain(&mut buf, 0x5000_0000, &[(0, 0x4000_0000)], 0x0100_0000).unwrap(); let hob_type = u16::from_le_bytes([buf[0], buf[1]]); assert_eq!(hob_type, 0x0001, "First HOB must be PHIT"); } @@ -272,7 +430,7 @@ mod tests { #[test] fn test_hob_chain_ends_with_end_hob() { let mut buf = vec![0u8; 4096]; - write_hob_chain(&mut buf, 0x5000_0000, 4096, &[(0, 0x4000_0000)], 0x100_0000).unwrap(); + write_hob_chain(&mut buf, 0x5000_0000, &[(0, 0x4000_0000)], 0x100_0000).unwrap(); let mut offset = 0usize; let mut found_end = false; while offset + 4 <= buf.len() { @@ -293,7 +451,48 @@ mod tests { #[test] fn test_hob_chain_too_small_returns_error() { let mut buf = vec![0u8; 8]; - let result = write_hob_chain(&mut buf, 0x5000_0000, 8, &[(0, 0x1000)], 0x100_0000); + let result = write_hob_chain(&mut buf, 0x5000_0000, &[(0, 0x1000)], 0x100_0000); assert!(matches!(result, Err(Error::HobRegionTooSmall))); } + + #[test] + fn test_phit_efi_end_of_hob_list_points_past_end_hob() { + // td-shim validates: hob_length == efi_end_of_hob_list - hob_region_addr + // where hob_length = end_hob_offset + 8 = chain.len() + let hob_region_addr = 0x5000_0000u64; + let mut buf = vec![0u8; 4096]; + write_hob_chain( + &mut buf, + hob_region_addr, + 4096, + &[(0, 0x4000_0000)], + 0x100_0000, + ) + .unwrap(); + + // Read efi_end_of_hob_list from PHIT at offset 48 (after header+version+boot_mode+3×u64) + let efi_end = u64::from_le_bytes(buf[48..56].try_into().unwrap()); + + // Find the End HOB and compute hob_length = end_hob_offset + 8 + let mut offset = 0usize; + let mut end_hob_offset = None; + while offset + 4 <= buf.len() { + let hob_type = u16::from_le_bytes([buf[offset], buf[offset + 1]]); + let hob_len = u16::from_le_bytes([buf[offset + 2], buf[offset + 3]]) as usize; + if hob_type == 0xFFFF { + end_hob_offset = Some(offset); + break; + } + if hob_len == 0 { + break; + } + offset += hob_len; + } + let hob_length = end_hob_offset.unwrap() + 8; + assert_eq!( + efi_end, + hob_region_addr + hob_length as u64, + "efi_end_of_hob_list must equal hob_region_addr + chain.len()" + ); + } } From 839f8095ef4bf93f11904f22a49523982a867d51 Mon Sep 17 00:00:00 2001 From: Jake Correnti Date: Tue, 19 May 2026 14:03:44 -0400 Subject: [PATCH 07/11] arch: accept dynamic firmware hole in TEE arch_memory_regions Replace the unused firmware_size parameter with a firmware_range tuple (start_addr, size) so the TEE memory layout can accommodate firmware at addresses other than the fixed qboot FIRMWARE_START. TD-Shim places sections at varying addresses; this lets the caller pass the actual range from high_firmware_range() instead of assuming the qboot layout. Falls back to the qboot defaults when no range is provided. Assisted-by: Claude Code: claude-sonnet-4-6 Signed-off-by: Jake Correnti --- src/arch/src/x86_64/mod.rs | 26 ++++++++++++++++++++------ src/vmm/src/builder.rs | 13 ++++++------- 2 files changed, 26 insertions(+), 13 deletions(-) diff --git a/src/arch/src/x86_64/mod.rs b/src/arch/src/x86_64/mod.rs index 7c4b6c83d..d9ad7673f 100644 --- a/src/arch/src/x86_64/mod.rs +++ b/src/arch/src/x86_64/mod.rs @@ -177,7 +177,7 @@ pub fn arch_memory_regions( kernel_load_addr: Option, kernel_size: usize, _initrd_size: u64, - _firmware_size: Option, + firmware_range: Option<(u64, usize)>, ) -> (ArchMemoryInfo, Vec<(GuestAddress, usize)>) { let page_size: usize = unsafe { libc::sysconf(libc::_SC_PAGESIZE).try_into().unwrap() }; @@ -196,21 +196,22 @@ pub fn arch_memory_regions( None | Some(0) => { let ram_last_addr = size as u64; let shm_start_addr = 0u64; + let (fw_addr, fw_sz) = + firmware_range.unwrap_or((FIRMWARE_START, FIRMWARE_SIZE as usize)); ( size as u64, 0, ram_last_addr, shm_start_addr, - vec![ - (GuestAddress(0), size), - (GuestAddress(FIRMWARE_START), FIRMWARE_SIZE as usize), - ], + vec![(GuestAddress(0), size), (GuestAddress(fw_addr), fw_sz)], ) } // case2: guest memory extends beyond the gap Some(remaining) => { let ram_last_addr = FIRST_ADDR_PAST_32BITS + remaining as u64; let shm_start_addr = 0u64; + let (fw_addr, fw_sz) = + firmware_range.unwrap_or((FIRMWARE_START, FIRMWARE_SIZE as usize)); ( MMIO_MEM_START, remaining as u64, @@ -218,7 +219,7 @@ pub fn arch_memory_regions( shm_start_addr, vec![ (GuestAddress(0), MMIO_MEM_START as usize), - (GuestAddress(FIRMWARE_START), FIRMWARE_SIZE as usize), + (GuestAddress(fw_addr), fw_sz), (GuestAddress(FIRST_ADDR_PAST_32BITS), remaining), ], ) @@ -431,6 +432,19 @@ mod tests { configure_system(&gm, &arch_mem_info, GuestAddress(0), 0, &None, no_vcpus).unwrap(); } + #[cfg(feature = "tee")] + #[test] + fn test_arch_memory_regions_tee_dynamic_firmware_hole() { + let fw_start = 0xfffe_0000u64; + let fw_size = 0x2_0000usize; + let (_info, regions) = + arch_memory_regions(1usize << 29, None, 0, 0, Some((fw_start, fw_size))); + let has_fw_region = regions + .iter() + .any(|&(addr, size)| addr.0 == fw_start && size == fw_size); + assert!(has_fw_region, "firmware region not found in memory regions"); + } + #[test] fn test_add_e820_entry() { let e820_map = [(e820entry { diff --git a/src/vmm/src/builder.rs b/src/vmm/src/builder.rs index 52eafadaa..e01ff0fd2 100644 --- a/src/vmm/src/builder.rs +++ b/src/vmm/src/builder.rs @@ -1546,13 +1546,9 @@ pub fn create_guest_memory( }; arch::arch_memory_regions(mem_size, Some(kernel_guest_addr), kernel_size, 0, None) } - Payload::ExternalKernel(external_kernel) => arch::arch_memory_regions( - mem_size, - None, - 0, - external_kernel.initramfs_size, - firmware_size, - ), + Payload::ExternalKernel(external_kernel) => { + arch::arch_memory_regions(mem_size, None, 0, external_kernel.initramfs_size, None) + } #[cfg(feature = "tee")] Payload::Tee => { let (kernel_guest_addr, kernel_size) = @@ -1565,7 +1561,10 @@ pub fn create_guest_memory( } #[cfg(test)] Payload::Empty => arch::arch_memory_regions(mem_size, None, 0, 0, None), + #[cfg(not(feature = "tee"))] Payload::Firmware => arch::arch_memory_regions(mem_size, None, 0, 0, firmware_size), + #[cfg(feature = "tee")] + Payload::Firmware => arch::arch_memory_regions(mem_size, None, 0, 0, None), }; #[cfg(any(target_arch = "aarch64", target_arch = "riscv64"))] let (arch_mem_info, mut arch_mem_regions) = match payload { From db84ecd7395288fad375496450dd8cc2835ae81e Mon Sep 17 00:00:00 2001 From: Jake Correnti Date: Tue, 19 May 2026 14:20:10 -0400 Subject: [PATCH 08/11] vmm: wire TD-Shim into TDX build path with HOB address support Parse TD-Shim firmware early to extract firmware_range, pass it to create_guest_memory so arch_memory_regions can carve the correct hole. In the TDX measured_regions block, branch on td_shim_parsed: load sections, build RAM HOBs (excluding the firmware hole), write the HOB chain, collect per-section MeasuredRegions, and return the HOB address. Pass that address to tdx_secure_virt_init_vcpus instead of 0. Also fix five MeasuredRegion initializers in amdsnp/mod.rs that were missing the attributes field added in an earlier task. Assisted-by: Claude Code: claude-sonnet-4-6 Signed-off-by: Jake Correnti --- src/vmm/src/builder.rs | 157 +++++++++++++++++++++++----- src/vmm/src/linux/tee/amdsnp/mod.rs | 5 + src/vmm/src/linux/vstate.rs | 8 +- 3 files changed, 143 insertions(+), 27 deletions(-) diff --git a/src/vmm/src/builder.rs b/src/vmm/src/builder.rs index e01ff0fd2..3a369a964 100644 --- a/src/vmm/src/builder.rs +++ b/src/vmm/src/builder.rs @@ -51,6 +51,8 @@ use devices::virtio::{port_io, MmioTransport, PortDescription, VirtioDevice, Vso use kbs_types::Tee; use crate::device_manager; +#[cfg(feature = "tdx")] +use crate::linux::tee::tdshim::TdShim; #[cfg(all(feature = "vhost-user", target_os = "linux"))] use crate::resources::VhostUserDeviceConfig; #[cfg(target_os = "linux")] @@ -99,6 +101,8 @@ use vm_memory::Bytes; use vm_memory::FileOffset; #[cfg(not(feature = "aws-nitro"))] use vm_memory::GuestMemory; +#[cfg(feature = "tdx")] +use vm_memory::GuestMemoryRegion; #[cfg(all(target_arch = "x86_64", not(feature = "tee")))] use vm_memory::GuestRegionMmap; use vm_memory::{GuestAddress, GuestMemoryMmap}; @@ -568,6 +572,23 @@ pub fn build_microvm( ) -> std::result::Result>, StartMicrovmError> { let payload = choose_payload(vm_resources)?; + #[cfg(feature = "tdx")] + let td_shim_parsed = if let Some(tee_fw_cfg) = &vm_resources.tee_firmware_config { + Some(TdShim::parse(&tee_fw_cfg.path).map_err(|e| { + StartMicrovmError::FirmwareRead(std::io::Error::other(format!("{e:?}"))) + })?) + } else { + None + }; + + #[cfg(feature = "tdx")] + let fw_range_for_mem = td_shim_parsed.as_ref().and_then(|ts| { + ts.high_firmware_range() + .map(|(start, end)| (start, (end - start) as usize)) + }); + #[cfg(all(feature = "tee", not(feature = "tdx")))] + let fw_range_for_mem: Option<(u64, usize)> = None; + let (guest_memory, arch_memory_info, mut _shm_manager, payload_config) = create_guest_memory( vm_resources .vm_config() @@ -575,6 +596,8 @@ pub fn build_microvm( .ok_or(StartMicrovmError::MissingMemSizeConfig)?, vm_resources, &payload, + #[cfg(feature = "tee")] + fw_range_for_mem, )?; let vcpu_config = vm_resources.vcpu_config(); @@ -705,31 +728,107 @@ pub fn build_microvm( }; #[cfg(feature = "tdx")] - let measured_regions = { + let (measured_regions, tdx_hob_address) = { println!("Injecting and measuring memory regions. This may take a while."); - let qboot_size = if let Some(qboot_bundle) = &vm_resources.qboot_bundle { - qboot_bundle.size - } else { - return Err(StartMicrovmError::MissingKernelConfig); - }; - let m = vec![ - MeasuredRegion { - guest_addr: 0, - host_addr: guest_memory.get_host_address(GuestAddress(0)).unwrap() as u64, - size: 0x8000_0000, - attributes: 0, - }, - MeasuredRegion { - guest_addr: arch::FIRMWARE_START, - host_addr: guest_memory - .get_host_address(GuestAddress(arch::FIRMWARE_START)) - .unwrap() as u64, - size: qboot_size, - attributes: 1, - }, - ]; - m + if let Some(td_shim) = td_shim_parsed { + // TD-Shim path + let firmware_data = + std::fs::read(&vm_resources.tee_firmware_config.as_ref().unwrap().path) + .map_err(StartMicrovmError::FirmwareRead)?; + td_shim + .load_sections(&firmware_data, &guest_memory) + .map_err(|e| { + StartMicrovmError::FirmwareRead(std::io::Error::other(format!("{e:?}"))) + })?; + + let high_fw = td_shim.high_firmware_range(); + let ram_regions: Vec<(u64, u64)> = guest_memory + .iter() + .filter_map(|region| { + let start = region.start_addr().0; + let len = region.len(); + // Exclude only the high-memory firmware region from RAM HOBs. + if let Some((fw_start, fw_end)) = high_fw { + if start >= fw_start && start < fw_end { + return None; + } + } + Some((start, len)) + }) + .collect(); + + let kernel_bundle = vm_resources + .kernel_bundle + .as_ref() + .ok_or(StartMicrovmError::MissingKernelConfig)?; + + // libkrunfw packages a raw ELF vmlinux, not a bzImage. The entry_addr + // is the ELF e_entry (startup_64 physical address), which td-shim + // uses directly with PayloadImageTypeRawVmLinux. + td_shim + .generate_hobs(&guest_memory, kernel_bundle.entry_addr, &ram_regions) + .map_err(|e| { + StartMicrovmError::FirmwareRead(std::io::Error::other(format!("{e:?}"))) + })?; + + // Build measured regions the same way as qboot: all RAM as one block + // (attributes=0, add but don't measure), plus the high firmware sections + // (BFV etc.) with their per-section attributes. Low-address TDVF sections + // (TempMem, TD_HOB) fall inside the RAM range and must not be added + // separately — TDX rejects duplicate TDH.MEM.PAGE.ADD calls. + let mut regions: Vec = guest_memory + .iter() + .filter(|r| r.start_addr().0 < arch::x86_64::layout::MMIO_MEM_START) + .map(|r| MeasuredRegion { + guest_addr: r.start_addr().0, + host_addr: guest_memory.get_host_address(r.start_addr()).unwrap() as u64, + size: r.len() as usize, + attributes: 0, + }) + .collect(); + + for section in &td_shim.sections { + if section.memory_address >= arch::x86_64::layout::MMIO_MEM_START { + regions.push(MeasuredRegion { + guest_addr: section.memory_address, + host_addr: guest_memory + .get_host_address(GuestAddress(section.memory_address)) + .unwrap() as u64, + size: section.memory_data_size as usize, + attributes: section.attributes, + }); + } + } + + let hob_addr = td_shim.hob_address; + + (regions, hob_addr) + } else { + // qboot path (unchanged) + let qboot_size = if let Some(qboot_bundle) = &vm_resources.qboot_bundle { + qboot_bundle.size + } else { + return Err(StartMicrovmError::MissingKernelConfig); + }; + let regions = vec![ + MeasuredRegion { + guest_addr: 0, + host_addr: guest_memory.get_host_address(GuestAddress(0)).unwrap() as u64, + size: 0x8000_0000, + attributes: 0, + }, + MeasuredRegion { + guest_addr: arch::FIRMWARE_START, + host_addr: guest_memory + .get_host_address(GuestAddress(arch::FIRMWARE_START)) + .unwrap() as u64, + size: qboot_size, + attributes: 1, + }, + ]; + (regions, 0u64) + } }; let mut serial_devices = Vec::new(); @@ -851,7 +950,8 @@ pub fn build_microvm( for vcpu in &vcpus { vcpu.tdx_secure_virt_prepare(&mut tdx_launcher); } - vm.tdx_secure_virt_init_vcpus(&mut tdx_launcher).unwrap(); + vm.tdx_secure_virt_init_vcpus(&mut tdx_launcher, tdx_hob_address) + .unwrap(); } // On aarch64, the vCPUs need to be created (i.e call KVM_CREATE_VCPU) and configured before @@ -1520,6 +1620,7 @@ pub fn create_guest_memory( mem_size: usize, vm_resources: &VmResources, payload: &Payload, + #[cfg(feature = "tee")] firmware_range: Option<(u64, usize)>, ) -> std::result::Result< (GuestMemoryMmap, ArchMemoryInfo, ShmManager, PayloadConfig), StartMicrovmError, @@ -1557,7 +1658,13 @@ pub fn create_guest_memory( } else { return Err(StartMicrovmError::MissingKernelConfig); }; - arch::arch_memory_regions(mem_size, Some(kernel_guest_addr), kernel_size, 0, None) + arch::arch_memory_regions( + mem_size, + Some(kernel_guest_addr), + kernel_size, + 0, + firmware_range, + ) } #[cfg(test)] Payload::Empty => arch::arch_memory_regions(mem_size, None, 0, 0, None), diff --git a/src/vmm/src/linux/tee/amdsnp/mod.rs b/src/vmm/src/linux/tee/amdsnp/mod.rs index 1474a0be8..abeb3e0d7 100644 --- a/src/vmm/src/linux/tee/amdsnp/mod.rs +++ b/src/vmm/src/linux/tee/amdsnp/mod.rs @@ -323,6 +323,7 @@ impl AmdSnp { .get_host_address(GuestAddress(SNP_LIDT_START)) .unwrap() as u64, size: 0x1000, + attributes: 0, }, &mut launcher, PageType::Zero, @@ -337,6 +338,7 @@ impl AmdSnp { .get_host_address(GuestAddress(SNP_SECRETS_START)) .unwrap() as u64, size: 0x1000, + attributes: 0, }, &mut launcher, PageType::Secrets, @@ -352,6 +354,7 @@ impl AmdSnp { .get_host_address(GuestAddress(SNP_CPUID_START)) .unwrap() as u64, size: 0x1000, + attributes: 0, }, &mut launcher, PageType::Cpuid, @@ -368,6 +371,7 @@ impl AmdSnp { .get_host_address(GuestAddress(SNP_CPUID_START)) .unwrap() as u64, size: 0x1000, + attributes: 0, }, &mut launcher, PageType::Cpuid, @@ -385,6 +389,7 @@ impl AmdSnp { .get_host_address(GuestAddress(SNP_FWDATA_START)) .unwrap() as u64, size: SNP_FWDATA_SIZE, + attributes: 0, }, &mut launcher, PageType::Zero, diff --git a/src/vmm/src/linux/vstate.rs b/src/vmm/src/linux/vstate.rs index 9024a6e5e..5b24743d6 100644 --- a/src/vmm/src/linux/vstate.rs +++ b/src/vmm/src/linux/vstate.rs @@ -763,10 +763,14 @@ impl Vm { } #[cfg(feature = "tdx")] - pub fn tdx_secure_virt_init_vcpus(&self, launcher: &mut tdx::launch::Launcher) -> Result<()> { + pub fn tdx_secure_virt_init_vcpus( + &self, + launcher: &mut tdx::launch::Launcher, + hob_address: u64, + ) -> Result<()> { match &self.tdx { Some(_) => { - launcher.init_vcpus(0).unwrap(); + launcher.init_vcpus(hob_address).unwrap(); Ok(()) } None => Err(Error::InvalidTee), From 9b275ac87985b2ac34852662e6d96b01d27f94ad Mon Sep 17 00:00:00 2001 From: Jake Correnti Date: Tue, 19 May 2026 14:44:14 -0400 Subject: [PATCH 09/11] examples/launch-tee: add TD-Shim support and console configuration Add --td-shim flag to specify a TD-Shim firmware binary. Configure a serial console on ttyS0 for kernel output, a virtio console for the shell, and disable the implicit console to prevent I/O redirection to /dev/null. Use RAW disk format and set up split irqchip for TDX. Assisted-by: Claude Code: claude-sonnet-4-6 Signed-off-by: Jake Correnti --- examples/launch-tee.c | 38 ++++++++++++++++++++++++++++++++------ 1 file changed, 32 insertions(+), 6 deletions(-) diff --git a/examples/launch-tee.c b/examples/launch-tee.c index 063cdd5f3..712bb32c0 100644 --- a/examples/launch-tee.c +++ b/examples/launch-tee.c @@ -6,6 +6,7 @@ */ #include +#include #include #include #include @@ -30,6 +31,11 @@ int main(int argc, char *const argv[]) "6=4096:8192", 0 }; + static const struct option long_opts[] = { + { "td-shim", required_argument, 0, 's' }, + { 0, 0, 0, 0 } + }; + const char *td_shim_path = NULL; char current_path[MAX_PATH]; char volume_tail[] = ":/work\0"; char *volume; @@ -37,10 +43,22 @@ int main(int argc, char *const argv[]) int ctx_id; int err; int i; + int opt; + + while ((opt = getopt_long(argc, argv, "", long_opts, NULL)) != -1) { + switch (opt) { + case 's': + td_shim_path = optarg; + break; + default: + printf("Usage: %s [--td-shim PATH] ROOT_DISK_IMAGE TEE_CONFIG_FILE DATA_DISK_IMAGE\n", argv[0]); + return -1; + } + } - if (argc != 4) { + if (argc - optind != 3) { printf("Invalid arguments\n"); - printf("Usage: %s ROOT_DISK_IMAGE TEE_CONFIG_FILE DATA_DISK_IMAGE\n", argv[0]); + printf("Usage: %s [--td-shim PATH] ROOT_DISK_IMAGE TEE_CONFIG_FILE DATA_DISK_IMAGE\n", argv[0]); return -1; } @@ -67,8 +85,8 @@ int main(int argc, char *const argv[]) return -1; } - // Use the first command line argument as the disk image containing the root fs. - if (err = krun_set_root_disk(ctx_id, argv[1])) { + // Use the first positional argument as the disk image containing the root fs. + if (err = krun_add_disk2(ctx_id, "root", argv[optind], KRUN_DISK_FORMAT_RAW, false)) { errno = -err; perror("Error configuring root disk image"); return -1; @@ -108,13 +126,21 @@ int main(int argc, char *const argv[]) return -1; } - if (err = krun_set_tee_config_file(ctx_id, argv[2])) { + if (err = krun_set_tee_config_file(ctx_id, argv[optind + 1])) { errno = -err; perror("Error setting the TEE config file"); return -1; } - if (err = krun_set_data_disk(ctx_id, argv[3])) { + if (td_shim_path != NULL) { + if (err = krun_set_tee_firmware(ctx_id, td_shim_path)) { + errno = -err; + perror("Error setting TD-Shim firmware path"); + return -1; + } + } + + if (err = krun_add_disk2(ctx_id, "data", argv[optind + 2], KRUN_DISK_FORMAT_RAW, false)) { errno = -err; perror("Error configuring the TEE config data disk"); return -1; From 21c6cb5c5fa5e9d40a7b1482df4af8dc04a5eb58 Mon Sep 17 00:00:00 2001 From: Jake Correnti Date: Tue, 19 May 2026 16:10:20 -0400 Subject: [PATCH 10/11] vmm/tee: write cmdline to guest memory for TD-Shim; add serial console to example For the TD-Shim boot path, load_cmdline is now called to write the kernel command line string to CMDLINE_START (0x20000). configure_system already sets cmd_line_ptr in boot_params to point there, so TD-Shim can reference the cmdline when setting up the Linux boot environment. The launch-tee example now adds a legacy serial device (ttyS0) wired to stdout so early kernel boot messages are visible. The kernel cmdline must include console=ttyS0 for output to appear. Assisted-by: Claude Code: claude-sonnet-4-6 Signed-off-by: Jake Correnti --- examples/launch-tee.c | 17 ++++++++++++++++- src/vmm/src/builder.rs | 9 ++++++++- 2 files changed, 24 insertions(+), 2 deletions(-) diff --git a/examples/launch-tee.c b/examples/launch-tee.c index 712bb32c0..21f15905a 100644 --- a/examples/launch-tee.c +++ b/examples/launch-tee.c @@ -86,7 +86,7 @@ int main(int argc, char *const argv[]) } // Use the first positional argument as the disk image containing the root fs. - if (err = krun_add_disk2(ctx_id, "root", argv[optind], KRUN_DISK_FORMAT_RAW, false)) { + if (err = krun_add_disk2(ctx_id, "root", argv[optind], KRUN_DISK_FORMAT_QCOW2, false)) { errno = -err; perror("Error configuring root disk image"); return -1; @@ -146,6 +146,21 @@ int main(int argc, char *const argv[]) return -1; } + // Add a legacy serial device (ttyS0) wired to stdout so early kernel boot + // messages reach the terminal, and override the default console= parameter + // (which is hvc0) to redirect kernel output to this serial port. + if (err = krun_add_serial_console_default(ctx_id, -1, STDOUT_FILENO)) { + errno = -err; + perror("Error adding serial console"); + return -1; + } + + if (err = krun_set_kernel_console(ctx_id, "ttyS0")) { + errno = -err; + perror("Error setting kernel console"); + return -1; + } + if (err = krun_split_irqchip(ctx_id, true)) { errno = -err; perror("Error setting split IRQCHIP property"); diff --git a/src/vmm/src/builder.rs b/src/vmm/src/builder.rs index 3a369a964..ff9b95d00 100644 --- a/src/vmm/src/builder.rs +++ b/src/vmm/src/builder.rs @@ -1196,8 +1196,15 @@ pub fn build_microvm( // Write the kernel command line to guest memory. This is x86_64 specific, since on // aarch64 the command line will be specified through the FDT. + // For the TD-Shim path, the cmdline is written so TD-Shim can reference it when + // populating boot_params for the Linux kernel (cmd_line_ptr already points here + // via configure_system). #[cfg(all(target_arch = "x86_64", not(feature = "tee")))] load_cmdline(&vmm)?; + #[cfg(all(target_arch = "x86_64", feature = "tdx"))] + if vm_resources.tee_firmware_config.is_some() { + load_cmdline(&vmm)?; + } vmm.configure_system( vcpus.as_slice(), @@ -1786,7 +1793,7 @@ pub fn create_guest_memory( Ok((guest_mem, arch_mem_info, shm_manager, payload_config)) } -#[cfg(all(target_arch = "x86_64", not(feature = "tee")))] +#[cfg(all(target_arch = "x86_64", any(not(feature = "tee"), feature = "tdx")))] fn load_cmdline(vmm: &Vmm) -> std::result::Result<(), StartMicrovmError> { kernel::loader::load_cmdline( vmm.guest_memory(), From fea9b415c0693e2b7a5d526fda996aeb479ec7dc Mon Sep 17 00:00:00 2001 From: Jake Correnti Date: Wed, 20 May 2026 13:14:13 -0400 Subject: [PATCH 11/11] vmm/tee: write initrd trampoline before setup_vm for TD-Shim boot td-shim (RawVmLinux) creates boot_params without ramdisk_image/ramdisk_size. Write a 34-byte x86_64 trampoline into the HOB section and the firmware sections into guest memory BEFORE setup_vm()/memory_init(), while pages are still plain anonymous mmap. After KVM registers the memory slots, TDH.MEM.PAGE.ADD copies the shared content to the TD's private memory, so writing before that point guarantees the TD sees the trampoline. The trampoline patches td-shim's boot_params (RSI) with the libkrunfw initrd location (INITRD_SEV_START = 0xa00000) and size before jumping to startup_64, letting the kernel find and unpack the initrd normally. Assisted-by: Claude Code: claude-sonnet-4-6 Signed-off-by: Jake Correnti --- examples/launch-tee.c | 28 +++++++--- src/arch/src/x86_64/mod.rs | 12 ++++- src/libkrun/src/lib.rs | 35 +++++++++---- src/vmm/src/builder.rs | 105 +++++++++++++++++++++++++++++++------ 4 files changed, 145 insertions(+), 35 deletions(-) diff --git a/examples/launch-tee.c b/examples/launch-tee.c index 21f15905a..d6daf3a15 100644 --- a/examples/launch-tee.c +++ b/examples/launch-tee.c @@ -86,7 +86,7 @@ int main(int argc, char *const argv[]) } // Use the first positional argument as the disk image containing the root fs. - if (err = krun_add_disk2(ctx_id, "root", argv[optind], KRUN_DISK_FORMAT_QCOW2, false)) { + if (err = krun_add_disk2(ctx_id, "root", argv[optind], KRUN_DISK_FORMAT_RAW, false)) { errno = -err; perror("Error configuring root disk image"); return -1; @@ -146,18 +146,32 @@ int main(int argc, char *const argv[]) return -1; } - // Add a legacy serial device (ttyS0) wired to stdout so early kernel boot - // messages reach the terminal, and override the default console= parameter - // (which is hvc0) to redirect kernel output to this serial port. - if (err = krun_add_serial_console_default(ctx_id, -1, STDOUT_FILENO)) { + // Serial console (ttyS0) for early boot messages (earlyprintk=ttyS0) and + // the main kernel console (console=ttyS0). Shell stdin/stdout inherit ttyS0. + if (err = krun_add_serial_console_default(ctx_id, STDIN_FILENO, STDOUT_FILENO)) { errno = -err; perror("Error adding serial console"); return -1; } - if (err = krun_set_kernel_console(ctx_id, "ttyS0")) { + // Disable the implicit virtio console: without explicit TTY fds, the implicit + // console creates krun-stdin/krun-stdout ports connected to /dev/null. + // setup_redirects() in init.krun finds those ports and silently redirects the + // shell's stdin/stdout to /dev/null, making all I/O disappear. + if (err = krun_disable_implicit_console(ctx_id)) { errno = -err; - perror("Error setting kernel console"); + perror("Error disabling implicit console"); + return -1; + } + + // Add an explicit virtio console (hvc0) with real TTY fds. With TTY fds, + // libkrun creates only a single console port — no krun-stdin/krun-stdout + // ports — so setup_redirects() finds /sys/class/virtio-ports (satisfying + // the existence check), iterates it, finds nothing to redirect, and returns 0. + // The shell then runs with the inherited ttyS0 fds from the serial console. + if (err = krun_add_virtio_console_default(ctx_id, STDIN_FILENO, STDOUT_FILENO, STDERR_FILENO)) { + errno = -err; + perror("Error adding virtio console"); return -1; } diff --git a/src/arch/src/x86_64/mod.rs b/src/arch/src/x86_64/mod.rs index d9ad7673f..d2a18f3a7 100644 --- a/src/arch/src/x86_64/mod.rs +++ b/src/arch/src/x86_64/mod.rs @@ -10,7 +10,7 @@ mod gdt; pub mod interrupts; /// Layout for the x86_64 system. pub mod layout; -#[cfg(not(feature = "tee"))] +#[cfg(any(not(feature = "tee"), feature = "tdx"))] mod mptable; /// Logic for configuring x86_64 model specific registers (MSRs). pub mod msr; @@ -43,7 +43,7 @@ pub enum Error { /// Invalid e820 setup params. E820Configuration, /// Error writing MP table to memory. - #[cfg(not(feature = "tee"))] + #[cfg(any(not(feature = "tee"), feature = "tdx"))] MpTableSetup(mptable::Error), /// Error writing the zero page of guest memory. ZeroPageSetup, @@ -242,6 +242,14 @@ pub fn arch_memory_regions( /// # Arguments /// /// * `guest_mem` - The memory to be used by the guest. +/// Writes an MP table to guest memory. Only needed for the TD-Shim path: TD-Shim's +/// ACPI MADT has no IOAPIC entry, so without an MP table the kernel never programs +/// the IOAPIC and virtio-mmio IRQs stop working after the PIC→APIC transition. +#[cfg(feature = "tdx")] +pub fn setup_mptable_for_tdshim(guest_mem: &GuestMemoryMmap, num_cpus: u8) -> super::Result<()> { + mptable::setup_mptable(guest_mem, num_cpus).map_err(Error::MpTableSetup) +} + /// * `cmdline_addr` - Address in `guest_mem` where the kernel command line was loaded. /// * `cmdline_size` - Size of the kernel command line in bytes including the null terminator. /// * `initrd` - Information about where the ramdisk image was loaded in the `guest_mem`. diff --git a/src/libkrun/src/lib.rs b/src/libkrun/src/lib.rs index fc1b3d898..3bc1f04dc 100644 --- a/src/libkrun/src/lib.rs +++ b/src/libkrun/src/lib.rs @@ -2733,17 +2733,34 @@ pub extern "C" fn krun_start_enter(ctx_id: u32) -> i32 { .set_tee_firmware_config(FirmwareConfig { path: fw_path }); } + // In the TD-Shim path, init.krun reads its exec config from the TEE config on + // the data disk (vdb), not from the kernel cmdline. Including krun_env and the + // " -- " epilog here would cause init.krun to take the non-TEE vsock code path + // and block waiting for a host-side exec command that never arrives. + #[cfg(feature = "tdx")] + let is_tdshim = ctx_cfg.vmr.tee_firmware_config.is_some(); + #[cfg(not(feature = "tdx"))] + let is_tdshim = false; + let kernel_cmdline = KernelCmdlineConfig { prolog: Some(format!("{DEFAULT_KERNEL_CMDLINE} init={INIT_PATH}")), - krun_env: Some(format!( - " {} {} {} {} {}", - ctx_cfg.get_exec_path(), - ctx_cfg.get_workdir(), - ctx_cfg.get_block_root(), - ctx_cfg.get_rlimits(), - ctx_cfg.get_env(), - )), - epilog: Some(format!(" -- {}", ctx_cfg.get_args())), + krun_env: if is_tdshim { + None + } else { + Some(format!( + " {} {} {} {} {}", + ctx_cfg.get_exec_path(), + ctx_cfg.get_workdir(), + ctx_cfg.get_block_root(), + ctx_cfg.get_rlimits(), + ctx_cfg.get_env(), + )) + }, + epilog: if is_tdshim { + None + } else { + Some(format!(" -- {}", ctx_cfg.get_args())) + }, }; if ctx_cfg.vmr.set_kernel_cmdline(kernel_cmdline).is_err() { diff --git a/src/vmm/src/builder.rs b/src/vmm/src/builder.rs index ff9b95d00..19d3a89de 100644 --- a/src/vmm/src/builder.rs +++ b/src/vmm/src/builder.rs @@ -632,6 +632,84 @@ pub fn build_microvm( kernel_cmdline.insert_str(cmdline).unwrap(); } + // Write the TD-Shim initrd trampoline and firmware sections into guest memory + // BEFORE setup_vm()/memory_init(). At this point the GuestMemoryMmap is backed + // by plain anonymous mmap pages. Once KVM registers the memory slots (memory_init), + // pages become KVM_MEM_PRIVATE and TDH.MEM.PAGE.ADD copies the shared content to + // the TD's private memory — so any writes here are guaranteed to reach the TD. + #[cfg(feature = "tdx")] + if let Some(ref td_shim) = td_shim_parsed { + td_shim + .load_sections(&td_shim.firmware_data, &guest_memory) + .map_err(|e| { + StartMicrovmError::FirmwareRead(std::io::Error::other(format!("{e:?}"))) + })?; + + arch::x86_64::setup_mptable_for_tdshim( + &guest_memory, + vm_resources.vm_config().vcpu_count.unwrap_or(1), + ) + .map_err(|e| StartMicrovmError::Internal(Error::ConfigureSystem(e)))?; + + if let (Some(kernel_bundle), Some(initrd_bundle)) = + (&vm_resources.kernel_bundle, &vm_resources.initrd_bundle) + { + let startup_64_addr = kernel_bundle.entry_addr; + let initrd_guest_addr: u32 = arch::x86_64::layout::INITRD_SEV_START as u32; + let initrd_size: u32 = initrd_bundle.size as u32; + + // Place the trampoline INSIDE the kernel image, just before startup_64. + // The HOB entry_point must be in the kernel's address range — any address + // outside (like 0x801000 in the TD_HOB section) causes td-shim to treat + // the jump target as HOB data and behave incorrectly before the kernel runs. + // + // Trampoline (27 bytes): writes ramdisk_image/ramdisk_size into td-shim's + // boot_params (RSI) then falls through to startup_64. + // + // The bytes before startup_64 are PE/COFF header data that td-shim's + // RawVmLinux boot path never reads or executes, so overwriting them is safe. + // Trampoline patches td-shim's boot_params (RSI) with: + // ramdisk_image [rsi+0x218] = INITRD_SEV_START + // ramdisk_size [rsi+0x21c] = initrd_bundle.size + // cmd_line_ptr [rsi+0x228] = CMDLINE_START (0x20000) + // + // td-shim creates its own boot_params with cmd_line_ptr=0, so the + // kernel falls back to CONFIG_CMDLINE and we have no way to inject a + // custom cmdline. Patching cmd_line_ptr here lets load_cmdline's + // content (written to CMDLINE_START) reach the kernel. + const TRAMPOLINE_SIZE: u64 = 38; + let trampoline_addr = startup_64_addr - TRAMPOLINE_SIZE; + + let mut trampoline: Vec = Vec::with_capacity(TRAMPOLINE_SIZE as usize); + // mov eax, initrd_guest_addr — ramdisk_image value + trampoline.push(0xB8); + trampoline.extend_from_slice(&initrd_guest_addr.to_le_bytes()); + // mov [rsi+0x218], eax — write ramdisk_image to boot_params + trampoline.extend_from_slice(&[0x89, 0x86, 0x18, 0x02, 0x00, 0x00]); + // mov eax, initrd_size — ramdisk_size value + trampoline.push(0xB8); + trampoline.extend_from_slice(&initrd_size.to_le_bytes()); + // mov [rsi+0x21c], eax — write ramdisk_size to boot_params + trampoline.extend_from_slice(&[0x89, 0x86, 0x1C, 0x02, 0x00, 0x00]); + // mov eax, CMDLINE_START (0x20000) — cmd_line_ptr value + trampoline.push(0xB8); + trampoline + .extend_from_slice(&(arch::x86_64::layout::CMDLINE_START as u32).to_le_bytes()); + // mov [rsi+0x228], eax — write cmd_line_ptr to boot_params + trampoline.extend_from_slice(&[0x89, 0x86, 0x28, 0x02, 0x00, 0x00]); + // jmp startup_64 (rel32=0 since startup_64 immediately follows) + trampoline.extend_from_slice(&[0xE9, 0x00, 0x00, 0x00, 0x00]); + + debug_assert_eq!(trampoline.len() as u64, TRAMPOLINE_SIZE); + + guest_memory + .write(&trampoline, GuestAddress(trampoline_addr)) + .map_err(|e| { + StartMicrovmError::FirmwareRead(std::io::Error::other(format!("{e:?}"))) + })?; + } + } + #[cfg(not(feature = "tee"))] #[allow(unused_mut)] let mut vm = setup_vm(&guest_memory, vm_resources.nested_enabled)?; @@ -732,16 +810,6 @@ pub fn build_microvm( println!("Injecting and measuring memory regions. This may take a while."); if let Some(td_shim) = td_shim_parsed { - // TD-Shim path - let firmware_data = - std::fs::read(&vm_resources.tee_firmware_config.as_ref().unwrap().path) - .map_err(StartMicrovmError::FirmwareRead)?; - td_shim - .load_sections(&firmware_data, &guest_memory) - .map_err(|e| { - StartMicrovmError::FirmwareRead(std::io::Error::other(format!("{e:?}"))) - })?; - let high_fw = td_shim.high_firmware_range(); let ram_regions: Vec<(u64, u64)> = guest_memory .iter() @@ -758,16 +826,19 @@ pub fn build_microvm( }) .collect(); - let kernel_bundle = vm_resources + const TRAMPOLINE_SIZE: u64 = 38; + let startup_64 = vm_resources .kernel_bundle .as_ref() - .ok_or(StartMicrovmError::MissingKernelConfig)?; - - // libkrunfw packages a raw ELF vmlinux, not a bzImage. The entry_addr - // is the ELF e_entry (startup_64 physical address), which td-shim - // uses directly with PayloadImageTypeRawVmLinux. + .ok_or(StartMicrovmError::MissingKernelConfig)? + .entry_addr; + let hob_entry_point = if vm_resources.initrd_bundle.is_some() { + startup_64 - TRAMPOLINE_SIZE + } else { + startup_64 + }; td_shim - .generate_hobs(&guest_memory, kernel_bundle.entry_addr, &ram_regions) + .generate_hobs(&guest_memory, hob_entry_point, &ram_regions) .map_err(|e| { StartMicrovmError::FirmwareRead(std::io::Error::other(format!("{e:?}"))) })?;