diff --git a/Cargo.lock b/Cargo.lock index ecb90d195..b4e2c4cb7 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -625,7 +625,7 @@ dependencies = [ "libc", "tdx", "vm-memory", - "vmm-sys-util 0.14.0", + "vmm-sys-util", ] [[package]] @@ -652,7 +652,7 @@ version = "0.1.0-1.18.0" dependencies = [ "kvm-bindings", "kvm-ioctls", - "vmm-sys-util 0.14.0", + "vmm-sys-util", ] [[package]] @@ -683,7 +683,7 @@ dependencies = [ "virtio-bindings", "vm-fdt", "vm-memory", - "vmm-sys-util 0.15.0", + "vmm-sys-util", "zerocopy", ] @@ -748,7 +748,7 @@ dependencies = [ "pkg-config", "remain", "thiserror 1.0.69", - "vmm-sys-util 0.14.0", + "vmm-sys-util", "winapi", "zerocopy", ] @@ -770,7 +770,7 @@ dependencies = [ "libc", "log", "nix 0.30.1", - "vmm-sys-util 0.14.0", + "vmm-sys-util", "windows-sys", ] @@ -805,7 +805,7 @@ dependencies = [ "serde_json", "tdx", "vm-memory", - "vmm-sys-util 0.14.0", + "vmm-sys-util", "zstd", ] @@ -819,23 +819,23 @@ dependencies = [ [[package]] name = "kvm-bindings" -version = "0.12.1" +version = "0.14.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9a537873e15e8daabb416667e606d9b0abc2a8fb9a45bd5853b888ae0ead82f9" +checksum = "4b3c06ff73c7ce03e780887ec2389d62d2a2a9ddf471ab05c2ff69207cd3f3b4" dependencies = [ - "vmm-sys-util 0.14.0", + "vmm-sys-util", ] [[package]] name = "kvm-ioctls" -version = "0.22.1" +version = "0.24.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0c8f7370330b4f57981e300fa39b02088f2f2a5c2d0f1f994e8090589619c56d" +checksum = "333f77a20344a448f3f70664918135fddeb804e938f28a99d685bd92926e0b19" dependencies = [ "bitflags 2.11.0", "kvm-bindings", "libc", - "vmm-sys-util 0.14.0", + "vmm-sys-util", ] [[package]] @@ -1432,9 +1432,9 @@ dependencies = [ [[package]] name = "tdx" -version = "0.1.0" +version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ad59e5bf374211a1fdd8e7439a07d5a5e617fe97f5cf21d03bcd1bf8c82b73af" +checksum = "83943e37cf46979f711ad11489c641fa058fd0fae92c122d1fc26a664e82acab" dependencies = [ "bitflags 2.11.0", "iocuddle", @@ -1442,7 +1442,7 @@ dependencies = [ "kvm-ioctls", "libc", "uuid", - "vmm-sys-util 0.12.1", + "vmm-sys-util", ] [[package]] @@ -1583,7 +1583,7 @@ dependencies = [ "libc", "uuid", "vm-memory", - "vmm-sys-util 0.15.0", + "vmm-sys-util", ] [[package]] @@ -1615,26 +1615,6 @@ dependencies = [ "winapi", ] -[[package]] -name = "vmm-sys-util" -version = "0.12.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1d1435039746e20da4f8d507a72ee1b916f7b4b05af7a91c093d2c6561934ede" -dependencies = [ - "bitflags 1.3.2", - "libc", -] - -[[package]] -name = "vmm-sys-util" -version = "0.14.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d21f366bf22bfba3e868349978766a965cbe628c323d58e026be80b8357ab789" -dependencies = [ - "bitflags 1.3.2", - "libc", -] - [[package]] name = "vmm-sys-util" version = "0.15.0" diff --git a/examples/launch-tee.c b/examples/launch-tee.c index 063cdd5f3..d6daf3a15 100644 --- a/examples/launch-tee.c +++ b/examples/launch-tee.c @@ -6,6 +6,7 @@ */ #include +#include #include #include #include @@ -30,6 +31,11 @@ int main(int argc, char *const argv[]) "6=4096:8192", 0 }; + static const struct option long_opts[] = { + { "td-shim", required_argument, 0, 's' }, + { 0, 0, 0, 0 } + }; + const char *td_shim_path = NULL; char current_path[MAX_PATH]; char volume_tail[] = ":/work\0"; char *volume; @@ -37,10 +43,22 @@ int main(int argc, char *const argv[]) int ctx_id; int err; int i; + int opt; + + while ((opt = getopt_long(argc, argv, "", long_opts, NULL)) != -1) { + switch (opt) { + case 's': + td_shim_path = optarg; + break; + default: + printf("Usage: %s [--td-shim PATH] ROOT_DISK_IMAGE TEE_CONFIG_FILE DATA_DISK_IMAGE\n", argv[0]); + return -1; + } + } - if (argc != 4) { + if (argc - optind != 3) { printf("Invalid arguments\n"); - printf("Usage: %s ROOT_DISK_IMAGE TEE_CONFIG_FILE DATA_DISK_IMAGE\n", argv[0]); + printf("Usage: %s [--td-shim PATH] ROOT_DISK_IMAGE TEE_CONFIG_FILE DATA_DISK_IMAGE\n", argv[0]); return -1; } @@ -67,8 +85,8 @@ int main(int argc, char *const argv[]) return -1; } - // Use the first command line argument as the disk image containing the root fs. - if (err = krun_set_root_disk(ctx_id, argv[1])) { + // Use the first positional argument as the disk image containing the root fs. + if (err = krun_add_disk2(ctx_id, "root", argv[optind], KRUN_DISK_FORMAT_RAW, false)) { errno = -err; perror("Error configuring root disk image"); return -1; @@ -108,18 +126,55 @@ int main(int argc, char *const argv[]) return -1; } - if (err = krun_set_tee_config_file(ctx_id, argv[2])) { + if (err = krun_set_tee_config_file(ctx_id, argv[optind + 1])) { errno = -err; perror("Error setting the TEE config file"); return -1; } - if (err = krun_set_data_disk(ctx_id, argv[3])) { + if (td_shim_path != NULL) { + if (err = krun_set_tee_firmware(ctx_id, td_shim_path)) { + errno = -err; + perror("Error setting TD-Shim firmware path"); + return -1; + } + } + + if (err = krun_add_disk2(ctx_id, "data", argv[optind + 2], KRUN_DISK_FORMAT_RAW, false)) { errno = -err; perror("Error configuring the TEE config data disk"); return -1; } + // Serial console (ttyS0) for early boot messages (earlyprintk=ttyS0) and + // the main kernel console (console=ttyS0). Shell stdin/stdout inherit ttyS0. + if (err = krun_add_serial_console_default(ctx_id, STDIN_FILENO, STDOUT_FILENO)) { + errno = -err; + perror("Error adding serial console"); + return -1; + } + + // Disable the implicit virtio console: without explicit TTY fds, the implicit + // console creates krun-stdin/krun-stdout ports connected to /dev/null. + // setup_redirects() in init.krun finds those ports and silently redirects the + // shell's stdin/stdout to /dev/null, making all I/O disappear. + if (err = krun_disable_implicit_console(ctx_id)) { + errno = -err; + perror("Error disabling implicit console"); + return -1; + } + + // Add an explicit virtio console (hvc0) with real TTY fds. With TTY fds, + // libkrun creates only a single console port — no krun-stdin/krun-stdout + // ports — so setup_redirects() finds /sys/class/virtio-ports (satisfying + // the existence check), iterates it, finds nothing to redirect, and returns 0. + // The shell then runs with the inherited ttyS0 fds from the serial console. + if (err = krun_add_virtio_console_default(ctx_id, STDIN_FILENO, STDOUT_FILENO, STDERR_FILENO)) { + errno = -err; + perror("Error adding virtio console"); + return -1; + } + if (err = krun_split_irqchip(ctx_id, true)) { errno = -err; perror("Error setting split IRQCHIP property"); diff --git a/include/libkrun.h b/include/libkrun.h index 3004110f6..66099fdcf 100644 --- a/include/libkrun.h +++ b/include/libkrun.h @@ -974,6 +974,19 @@ int32_t krun_set_env(uint32_t ctx_id, const char *const envp[]); */ int32_t krun_set_tee_config_file(uint32_t ctx_id, const char *filepath); +/** + * Sets the path to a TD-Shim binary for TDX guests. If not called, the TDX guest uses + * the bundled qboot firmware from libkrunfw-tdx. Only available in libkrun-tdx. + * + * Arguments: + * "ctx_id" - the configuration context ID. + * "fw_path" - a null-terminated string representing the path to the TD-Shim binary. + * + * Returns: + * Zero on success or a negative error number on failure. + */ +int32_t krun_set_tee_firmware(uint32_t ctx_id, const char *fw_path); + /** * Adds a port-path pairing for guest IPC with a process in the host. * diff --git a/src/arch/Cargo.toml b/src/arch/Cargo.toml index a60963abb..7d13fd1b7 100644 --- a/src/arch/Cargo.toml +++ b/src/arch/Cargo.toml @@ -15,16 +15,16 @@ tdx = [ "tee", "dep:tdx" ] [dependencies] libc = ">=0.2.39" vm-memory = { version = "0.17", features = ["backend-mmap"] } -vmm-sys-util = "0.14" +vmm-sys-util = "0.15" arch_gen = { package = "krun-arch-gen", version = "=0.1.0-1.18.0", path = "../arch_gen" } smbios = { package = "krun-smbios", version = "=0.1.0-1.18.0", path = "../smbios" } utils = { package = "krun-utils", version = "=0.1.0-1.18.0", path = "../utils" } [target.'cfg(target_os = "linux")'.dependencies] -kvm-bindings = { version = "0.12", features = ["fam-wrappers"] } -kvm-ioctls = "0.22" -tdx = { version = "0.1.0", optional = true } +kvm-bindings = { version = "0.14", features = ["fam-wrappers"] } +kvm-ioctls = "0.24" +tdx = { version = "0.1.1", optional = true } [dev-dependencies] utils = { package = "krun-utils", version = "=0.1.0-1.18.0", path = "../utils" } diff --git a/src/arch/src/x86_64/mod.rs b/src/arch/src/x86_64/mod.rs index 7c4b6c83d..d2a18f3a7 100644 --- a/src/arch/src/x86_64/mod.rs +++ b/src/arch/src/x86_64/mod.rs @@ -10,7 +10,7 @@ mod gdt; pub mod interrupts; /// Layout for the x86_64 system. pub mod layout; -#[cfg(not(feature = "tee"))] +#[cfg(any(not(feature = "tee"), feature = "tdx"))] mod mptable; /// Logic for configuring x86_64 model specific registers (MSRs). pub mod msr; @@ -43,7 +43,7 @@ pub enum Error { /// Invalid e820 setup params. E820Configuration, /// Error writing MP table to memory. - #[cfg(not(feature = "tee"))] + #[cfg(any(not(feature = "tee"), feature = "tdx"))] MpTableSetup(mptable::Error), /// Error writing the zero page of guest memory. ZeroPageSetup, @@ -177,7 +177,7 @@ pub fn arch_memory_regions( kernel_load_addr: Option, kernel_size: usize, _initrd_size: u64, - _firmware_size: Option, + firmware_range: Option<(u64, usize)>, ) -> (ArchMemoryInfo, Vec<(GuestAddress, usize)>) { let page_size: usize = unsafe { libc::sysconf(libc::_SC_PAGESIZE).try_into().unwrap() }; @@ -196,21 +196,22 @@ pub fn arch_memory_regions( None | Some(0) => { let ram_last_addr = size as u64; let shm_start_addr = 0u64; + let (fw_addr, fw_sz) = + firmware_range.unwrap_or((FIRMWARE_START, FIRMWARE_SIZE as usize)); ( size as u64, 0, ram_last_addr, shm_start_addr, - vec![ - (GuestAddress(0), size), - (GuestAddress(FIRMWARE_START), FIRMWARE_SIZE as usize), - ], + vec![(GuestAddress(0), size), (GuestAddress(fw_addr), fw_sz)], ) } // case2: guest memory extends beyond the gap Some(remaining) => { let ram_last_addr = FIRST_ADDR_PAST_32BITS + remaining as u64; let shm_start_addr = 0u64; + let (fw_addr, fw_sz) = + firmware_range.unwrap_or((FIRMWARE_START, FIRMWARE_SIZE as usize)); ( MMIO_MEM_START, remaining as u64, @@ -218,7 +219,7 @@ pub fn arch_memory_regions( shm_start_addr, vec![ (GuestAddress(0), MMIO_MEM_START as usize), - (GuestAddress(FIRMWARE_START), FIRMWARE_SIZE as usize), + (GuestAddress(fw_addr), fw_sz), (GuestAddress(FIRST_ADDR_PAST_32BITS), remaining), ], ) @@ -241,6 +242,14 @@ pub fn arch_memory_regions( /// # Arguments /// /// * `guest_mem` - The memory to be used by the guest. +/// Writes an MP table to guest memory. Only needed for the TD-Shim path: TD-Shim's +/// ACPI MADT has no IOAPIC entry, so without an MP table the kernel never programs +/// the IOAPIC and virtio-mmio IRQs stop working after the PIC→APIC transition. +#[cfg(feature = "tdx")] +pub fn setup_mptable_for_tdshim(guest_mem: &GuestMemoryMmap, num_cpus: u8) -> super::Result<()> { + mptable::setup_mptable(guest_mem, num_cpus).map_err(Error::MpTableSetup) +} + /// * `cmdline_addr` - Address in `guest_mem` where the kernel command line was loaded. /// * `cmdline_size` - Size of the kernel command line in bytes including the null terminator. /// * `initrd` - Information about where the ramdisk image was loaded in the `guest_mem`. @@ -431,6 +440,19 @@ mod tests { configure_system(&gm, &arch_mem_info, GuestAddress(0), 0, &None, no_vcpus).unwrap(); } + #[cfg(feature = "tee")] + #[test] + fn test_arch_memory_regions_tee_dynamic_firmware_hole() { + let fw_start = 0xfffe_0000u64; + let fw_size = 0x2_0000usize; + let (_info, regions) = + arch_memory_regions(1usize << 29, None, 0, 0, Some((fw_start, fw_size))); + let has_fw_region = regions + .iter() + .any(|&(addr, size)| addr.0 == fw_start && size == fw_size); + assert!(has_fw_region, "firmware region not found in memory regions"); + } + #[test] fn test_add_e820_entry() { let e820_map = [(e820entry { diff --git a/src/cpuid/Cargo.toml b/src/cpuid/Cargo.toml index 8066b3c49..19e5e8dac 100644 --- a/src/cpuid/Cargo.toml +++ b/src/cpuid/Cargo.toml @@ -11,8 +11,8 @@ repository = "https://github.com/containers/libkrun" tdx = [] [dependencies] -vmm-sys-util = "0.14" +vmm-sys-util = "0.15" [target.'cfg(target_os = "linux")'.dependencies] -kvm-bindings = { version = "0.12", features = ["fam-wrappers"] } -kvm-ioctls = "0.22" +kvm-bindings = { version = "0.14", features = ["fam-wrappers"] } +kvm-ioctls = "0.24" diff --git a/src/devices/Cargo.toml b/src/devices/Cargo.toml index eacb6cc97..9f014c820 100644 --- a/src/devices/Cargo.toml +++ b/src/devices/Cargo.toml @@ -51,8 +51,8 @@ lru = ">=0.9" [target.'cfg(target_os = "linux")'.dependencies] rutabaga_gfx = { package = "krun-rutabaga-gfx", version = "=0.1.0-1.18.0", path = "../rutabaga_gfx", features = ["x"], optional = true } caps = "0.5.5" -kvm-bindings = { version = "0.12", features = ["fam-wrappers"] } -kvm-ioctls = "0.22" +kvm-bindings = { version = "0.14", features = ["fam-wrappers"] } +kvm-ioctls = "0.24" [target.'cfg(any(target_arch = "aarch64", target_arch = "riscv64"))'.dependencies] vm-fdt = ">= 0.2.0" diff --git a/src/libkrun/Cargo.toml b/src/libkrun/Cargo.toml index 4e54bf99c..e847469f3 100644 --- a/src/libkrun/Cargo.toml +++ b/src/libkrun/Cargo.toml @@ -40,8 +40,8 @@ rand = "0.9.2" hvf = { package = "krun-hvf", version = "=0.1.0-1.18.0", path = "../hvf" } [target.'cfg(target_os = "linux")'.dependencies] -kvm-bindings = { version = "0.12", features = ["fam-wrappers"] } -kvm-ioctls = "0.22" +kvm-bindings = { version = "0.14", features = ["fam-wrappers"] } +kvm-ioctls = "0.24" aws-nitro = { package = "krun-aws-nitro", version = "=0.1.0-1.18.0", path = "../aws_nitro", optional = true } nitro-enclaves = { version = "0.5.0", optional = true } vm-memory = { version = "0.17", features = ["backend-mmap"] } diff --git a/src/libkrun/src/lib.rs b/src/libkrun/src/lib.rs index a7b7eee6a..3bc1f04dc 100644 --- a/src/libkrun/src/lib.rs +++ b/src/libkrun/src/lib.rs @@ -45,7 +45,6 @@ use vmm::resources::{ use vmm::vmm_config::block::{BlockDeviceConfig, BlockRootConfig}; #[cfg(not(feature = "tee"))] use vmm::vmm_config::external_kernel::{ExternalKernel, KernelFormat}; -#[cfg(not(feature = "tee"))] use vmm::vmm_config::firmware::FirmwareConfig; #[cfg(not(feature = "tee"))] use vmm::vmm_config::fs::FsDeviceConfig; @@ -159,6 +158,8 @@ struct ContextConfig { block_root: Option, #[cfg(feature = "tee")] tee_config_file: Option, + #[cfg(feature = "tdx")] + tee_firmware_path: Option, unix_ipc_port_map: Option>, shutdown_efd: Option, gpu_virgl_flags: Option, @@ -308,6 +309,16 @@ impl ContextConfig { self.tee_config_file.clone() } + #[cfg(feature = "tdx")] + fn set_tee_firmware_path(&mut self, path: PathBuf) { + self.tee_firmware_path = Some(path); + } + + #[cfg(feature = "tdx")] + fn get_tee_firmware_path(&self) -> Option { + self.tee_firmware_path.clone() + } + fn add_vsock_port(&mut self, port: u32, filepath: PathBuf, listen: bool) { if let Some(ref mut map) = &mut self.unix_ipc_port_map { map.insert(port, (filepath, listen)); @@ -1428,6 +1439,26 @@ pub unsafe extern "C" fn krun_set_tee_config_file(ctx_id: u32, c_filepath: *cons KRUN_SUCCESS } +#[allow(clippy::missing_safety_doc)] +#[no_mangle] +#[cfg(feature = "tdx")] +pub unsafe extern "C" fn krun_set_tee_firmware(ctx_id: u32, c_fw_path: *const c_char) -> i32 { + let path = match CStr::from_ptr(c_fw_path).to_str() { + Ok(p) => PathBuf::from(p), + Err(e) => { + error!("Error parsing fw_path: {e:?}"); + return -libc::EINVAL; + } + }; + + match CTX_MAP.lock().unwrap().entry(ctx_id) { + Entry::Occupied(mut ctx_cfg) => ctx_cfg.get_mut().set_tee_firmware_path(path), + Entry::Vacant(_) => return -libc::ENOENT, + } + + KRUN_SUCCESS +} + #[allow(clippy::missing_safety_doc)] #[no_mangle] pub unsafe extern "C" fn krun_add_vsock_port( @@ -2695,17 +2726,41 @@ pub extern "C" fn krun_start_enter(ctx_id: u32) -> i32 { return -libc::EINVAL; } + #[cfg(feature = "tdx")] + if let Some(fw_path) = ctx_cfg.get_tee_firmware_path() { + ctx_cfg + .vmr + .set_tee_firmware_config(FirmwareConfig { path: fw_path }); + } + + // In the TD-Shim path, init.krun reads its exec config from the TEE config on + // the data disk (vdb), not from the kernel cmdline. Including krun_env and the + // " -- " epilog here would cause init.krun to take the non-TEE vsock code path + // and block waiting for a host-side exec command that never arrives. + #[cfg(feature = "tdx")] + let is_tdshim = ctx_cfg.vmr.tee_firmware_config.is_some(); + #[cfg(not(feature = "tdx"))] + let is_tdshim = false; + let kernel_cmdline = KernelCmdlineConfig { prolog: Some(format!("{DEFAULT_KERNEL_CMDLINE} init={INIT_PATH}")), - krun_env: Some(format!( - " {} {} {} {} {}", - ctx_cfg.get_exec_path(), - ctx_cfg.get_workdir(), - ctx_cfg.get_block_root(), - ctx_cfg.get_rlimits(), - ctx_cfg.get_env(), - )), - epilog: Some(format!(" -- {}", ctx_cfg.get_args())), + krun_env: if is_tdshim { + None + } else { + Some(format!( + " {} {} {} {} {}", + ctx_cfg.get_exec_path(), + ctx_cfg.get_workdir(), + ctx_cfg.get_block_root(), + ctx_cfg.get_rlimits(), + ctx_cfg.get_env(), + )) + }, + epilog: if is_tdshim { + None + } else { + Some(format!(" -- {}", ctx_cfg.get_args())) + }, }; if ctx_cfg.vmr.set_kernel_cmdline(kernel_cmdline).is_err() { diff --git a/src/rutabaga_gfx/Cargo.toml b/src/rutabaga_gfx/Cargo.toml index f8e8efa2c..c2d98eb37 100644 --- a/src/rutabaga_gfx/Cargo.toml +++ b/src/rutabaga_gfx/Cargo.toml @@ -27,7 +27,7 @@ remain = "0.2" thiserror = "1.0.23" zerocopy = { version = "0.8.26", features = ["derive"] } log = "0.4" -vmm-sys-util = "0.14" +vmm-sys-util = "0.15" [target.'cfg(unix)'.dependencies] nix = { version = "0.30.1", features = ["event", "feature", "fs", "mman", "socket", "uio", "ioctl"] } diff --git a/src/utils/Cargo.toml b/src/utils/Cargo.toml index f24921bc4..5ae7ab27b 100644 --- a/src/utils/Cargo.toml +++ b/src/utils/Cargo.toml @@ -14,11 +14,11 @@ log = "0.4.0" [target.'cfg(unix)'.dependencies] libc = ">=0.2.85" nix = "0.30.1" -vmm-sys-util = "0.14" +vmm-sys-util = "0.15" crossbeam-channel = ">=0.5.15" [target.'cfg(target_os = "linux")'.dependencies] -kvm-bindings = { version = "0.12", features = ["fam-wrappers"] } +kvm-bindings = { version = "0.14", features = ["fam-wrappers"] } [target.'cfg(target_os = "macos")'.dependencies] nix = { version = "0.30.1", features = ["fs"] } diff --git a/src/vmm/Cargo.toml b/src/vmm/Cargo.toml index 377f9c731..57b0fa69a 100644 --- a/src/vmm/Cargo.toml +++ b/src/vmm/Cargo.toml @@ -27,7 +27,7 @@ linux-loader = { version = "0.13.2", features = ["bzimage", "elf", "pe"] } log = "0.4.0" nix = { version = "0.30.1", features = ["fs", "term"] } vm-memory = { version = "0.17.0", features = ["backend-mmap"] } -vmm-sys-util = "0.14" +vmm-sys-util = "0.15" krun_display = { package = "krun-display", version = "0.1.0", path = "../display", optional = true, features = ["bindgen_clang_runtime"] } krun_input = { package = "krun-input", version = "0.1.0", path = "../input", optional = true, features = ["bindgen_clang_runtime"] } @@ -52,9 +52,9 @@ cpuid = { package = "krun-cpuid", version = "=0.1.0-1.18.0", path = "../cpuid" } zstd = "0.13" [target.'cfg(target_os = "linux")'.dependencies] -tdx = { version = "0.1.0", optional = true } -kvm-bindings = { version = "0.12", features = ["fam-wrappers"] } -kvm-ioctls = "0.22" +tdx = { version = "0.1.1", optional = true } +kvm-bindings = { version = "0.14", features = ["fam-wrappers"] } +kvm-ioctls = "0.24" [target.'cfg(target_os = "macos")'.dependencies] hvf = { package = "krun-hvf", version = "=0.1.0-1.18.0", path = "../hvf" } diff --git a/src/vmm/src/builder.rs b/src/vmm/src/builder.rs index b92b931d4..19d3a89de 100644 --- a/src/vmm/src/builder.rs +++ b/src/vmm/src/builder.rs @@ -51,6 +51,8 @@ use devices::virtio::{port_io, MmioTransport, PortDescription, VirtioDevice, Vso use kbs_types::Tee; use crate::device_manager; +#[cfg(feature = "tdx")] +use crate::linux::tee::tdshim::TdShim; #[cfg(all(feature = "vhost-user", target_os = "linux"))] use crate::resources::VhostUserDeviceConfig; #[cfg(target_os = "linux")] @@ -99,6 +101,8 @@ use vm_memory::Bytes; use vm_memory::FileOffset; #[cfg(not(feature = "aws-nitro"))] use vm_memory::GuestMemory; +#[cfg(feature = "tdx")] +use vm_memory::GuestMemoryRegion; #[cfg(all(target_arch = "x86_64", not(feature = "tee")))] use vm_memory::GuestRegionMmap; use vm_memory::{GuestAddress, GuestMemoryMmap}; @@ -568,6 +572,23 @@ pub fn build_microvm( ) -> std::result::Result>, StartMicrovmError> { let payload = choose_payload(vm_resources)?; + #[cfg(feature = "tdx")] + let td_shim_parsed = if let Some(tee_fw_cfg) = &vm_resources.tee_firmware_config { + Some(TdShim::parse(&tee_fw_cfg.path).map_err(|e| { + StartMicrovmError::FirmwareRead(std::io::Error::other(format!("{e:?}"))) + })?) + } else { + None + }; + + #[cfg(feature = "tdx")] + let fw_range_for_mem = td_shim_parsed.as_ref().and_then(|ts| { + ts.high_firmware_range() + .map(|(start, end)| (start, (end - start) as usize)) + }); + #[cfg(all(feature = "tee", not(feature = "tdx")))] + let fw_range_for_mem: Option<(u64, usize)> = None; + let (guest_memory, arch_memory_info, mut _shm_manager, payload_config) = create_guest_memory( vm_resources .vm_config() @@ -575,6 +596,8 @@ pub fn build_microvm( .ok_or(StartMicrovmError::MissingMemSizeConfig)?, vm_resources, &payload, + #[cfg(feature = "tee")] + fw_range_for_mem, )?; let vcpu_config = vm_resources.vcpu_config(); @@ -609,6 +632,84 @@ pub fn build_microvm( kernel_cmdline.insert_str(cmdline).unwrap(); } + // Write the TD-Shim initrd trampoline and firmware sections into guest memory + // BEFORE setup_vm()/memory_init(). At this point the GuestMemoryMmap is backed + // by plain anonymous mmap pages. Once KVM registers the memory slots (memory_init), + // pages become KVM_MEM_PRIVATE and TDH.MEM.PAGE.ADD copies the shared content to + // the TD's private memory — so any writes here are guaranteed to reach the TD. + #[cfg(feature = "tdx")] + if let Some(ref td_shim) = td_shim_parsed { + td_shim + .load_sections(&td_shim.firmware_data, &guest_memory) + .map_err(|e| { + StartMicrovmError::FirmwareRead(std::io::Error::other(format!("{e:?}"))) + })?; + + arch::x86_64::setup_mptable_for_tdshim( + &guest_memory, + vm_resources.vm_config().vcpu_count.unwrap_or(1), + ) + .map_err(|e| StartMicrovmError::Internal(Error::ConfigureSystem(e)))?; + + if let (Some(kernel_bundle), Some(initrd_bundle)) = + (&vm_resources.kernel_bundle, &vm_resources.initrd_bundle) + { + let startup_64_addr = kernel_bundle.entry_addr; + let initrd_guest_addr: u32 = arch::x86_64::layout::INITRD_SEV_START as u32; + let initrd_size: u32 = initrd_bundle.size as u32; + + // Place the trampoline INSIDE the kernel image, just before startup_64. + // The HOB entry_point must be in the kernel's address range — any address + // outside (like 0x801000 in the TD_HOB section) causes td-shim to treat + // the jump target as HOB data and behave incorrectly before the kernel runs. + // + // Trampoline (27 bytes): writes ramdisk_image/ramdisk_size into td-shim's + // boot_params (RSI) then falls through to startup_64. + // + // The bytes before startup_64 are PE/COFF header data that td-shim's + // RawVmLinux boot path never reads or executes, so overwriting them is safe. + // Trampoline patches td-shim's boot_params (RSI) with: + // ramdisk_image [rsi+0x218] = INITRD_SEV_START + // ramdisk_size [rsi+0x21c] = initrd_bundle.size + // cmd_line_ptr [rsi+0x228] = CMDLINE_START (0x20000) + // + // td-shim creates its own boot_params with cmd_line_ptr=0, so the + // kernel falls back to CONFIG_CMDLINE and we have no way to inject a + // custom cmdline. Patching cmd_line_ptr here lets load_cmdline's + // content (written to CMDLINE_START) reach the kernel. + const TRAMPOLINE_SIZE: u64 = 38; + let trampoline_addr = startup_64_addr - TRAMPOLINE_SIZE; + + let mut trampoline: Vec = Vec::with_capacity(TRAMPOLINE_SIZE as usize); + // mov eax, initrd_guest_addr — ramdisk_image value + trampoline.push(0xB8); + trampoline.extend_from_slice(&initrd_guest_addr.to_le_bytes()); + // mov [rsi+0x218], eax — write ramdisk_image to boot_params + trampoline.extend_from_slice(&[0x89, 0x86, 0x18, 0x02, 0x00, 0x00]); + // mov eax, initrd_size — ramdisk_size value + trampoline.push(0xB8); + trampoline.extend_from_slice(&initrd_size.to_le_bytes()); + // mov [rsi+0x21c], eax — write ramdisk_size to boot_params + trampoline.extend_from_slice(&[0x89, 0x86, 0x1C, 0x02, 0x00, 0x00]); + // mov eax, CMDLINE_START (0x20000) — cmd_line_ptr value + trampoline.push(0xB8); + trampoline + .extend_from_slice(&(arch::x86_64::layout::CMDLINE_START as u32).to_le_bytes()); + // mov [rsi+0x228], eax — write cmd_line_ptr to boot_params + trampoline.extend_from_slice(&[0x89, 0x86, 0x28, 0x02, 0x00, 0x00]); + // jmp startup_64 (rel32=0 since startup_64 immediately follows) + trampoline.extend_from_slice(&[0xE9, 0x00, 0x00, 0x00, 0x00]); + + debug_assert_eq!(trampoline.len() as u64, TRAMPOLINE_SIZE); + + guest_memory + .write(&trampoline, GuestAddress(trampoline_addr)) + .map_err(|e| { + StartMicrovmError::FirmwareRead(std::io::Error::other(format!("{e:?}"))) + })?; + } + } + #[cfg(not(feature = "tee"))] #[allow(unused_mut)] let mut vm = setup_vm(&guest_memory, vm_resources.nested_enabled)?; @@ -677,6 +778,7 @@ pub fn build_microvm( .get_host_address(GuestAddress(arch::FIRMWARE_START)) .unwrap() as u64, size: qboot_size, + attributes: 0, }, MeasuredRegion { guest_addr: kernel_guest_addr, @@ -684,11 +786,13 @@ pub fn build_microvm( .get_host_address(GuestAddress(kernel_guest_addr)) .unwrap() as u64, size: kernel_size, + attributes: 0, }, MeasuredRegion { guest_addr: initrd_addr.0, host_addr: guest_memory.get_host_address(initrd_addr).unwrap() as u64, size: initrd_size, + attributes: 0, }, MeasuredRegion { guest_addr: arch::x86_64::layout::ZERO_PAGE_START, @@ -696,34 +800,106 @@ pub fn build_microvm( .get_host_address(GuestAddress(arch::x86_64::layout::ZERO_PAGE_START)) .unwrap() as u64, size: 4096, + attributes: 0, }, ] }; #[cfg(feature = "tdx")] - let measured_regions = { + let (measured_regions, tdx_hob_address) = { println!("Injecting and measuring memory regions. This may take a while."); - let qboot_size = if let Some(qboot_bundle) = &vm_resources.qboot_bundle { - qboot_bundle.size - } else { - return Err(StartMicrovmError::MissingKernelConfig); - }; - let m = vec![ - MeasuredRegion { - guest_addr: 0, - host_addr: guest_memory.get_host_address(GuestAddress(0)).unwrap() as u64, - size: 0x8000_0000, - }, - MeasuredRegion { - guest_addr: arch::FIRMWARE_START, - host_addr: guest_memory - .get_host_address(GuestAddress(arch::FIRMWARE_START)) - .unwrap() as u64, - size: qboot_size, - }, - ]; - m + if let Some(td_shim) = td_shim_parsed { + let high_fw = td_shim.high_firmware_range(); + let ram_regions: Vec<(u64, u64)> = guest_memory + .iter() + .filter_map(|region| { + let start = region.start_addr().0; + let len = region.len(); + // Exclude only the high-memory firmware region from RAM HOBs. + if let Some((fw_start, fw_end)) = high_fw { + if start >= fw_start && start < fw_end { + return None; + } + } + Some((start, len)) + }) + .collect(); + + const TRAMPOLINE_SIZE: u64 = 38; + let startup_64 = vm_resources + .kernel_bundle + .as_ref() + .ok_or(StartMicrovmError::MissingKernelConfig)? + .entry_addr; + let hob_entry_point = if vm_resources.initrd_bundle.is_some() { + startup_64 - TRAMPOLINE_SIZE + } else { + startup_64 + }; + td_shim + .generate_hobs(&guest_memory, hob_entry_point, &ram_regions) + .map_err(|e| { + StartMicrovmError::FirmwareRead(std::io::Error::other(format!("{e:?}"))) + })?; + + // Build measured regions the same way as qboot: all RAM as one block + // (attributes=0, add but don't measure), plus the high firmware sections + // (BFV etc.) with their per-section attributes. Low-address TDVF sections + // (TempMem, TD_HOB) fall inside the RAM range and must not be added + // separately — TDX rejects duplicate TDH.MEM.PAGE.ADD calls. + let mut regions: Vec = guest_memory + .iter() + .filter(|r| r.start_addr().0 < arch::x86_64::layout::MMIO_MEM_START) + .map(|r| MeasuredRegion { + guest_addr: r.start_addr().0, + host_addr: guest_memory.get_host_address(r.start_addr()).unwrap() as u64, + size: r.len() as usize, + attributes: 0, + }) + .collect(); + + for section in &td_shim.sections { + if section.memory_address >= arch::x86_64::layout::MMIO_MEM_START { + regions.push(MeasuredRegion { + guest_addr: section.memory_address, + host_addr: guest_memory + .get_host_address(GuestAddress(section.memory_address)) + .unwrap() as u64, + size: section.memory_data_size as usize, + attributes: section.attributes, + }); + } + } + + let hob_addr = td_shim.hob_address; + + (regions, hob_addr) + } else { + // qboot path (unchanged) + let qboot_size = if let Some(qboot_bundle) = &vm_resources.qboot_bundle { + qboot_bundle.size + } else { + return Err(StartMicrovmError::MissingKernelConfig); + }; + let regions = vec![ + MeasuredRegion { + guest_addr: 0, + host_addr: guest_memory.get_host_address(GuestAddress(0)).unwrap() as u64, + size: 0x8000_0000, + attributes: 0, + }, + MeasuredRegion { + guest_addr: arch::FIRMWARE_START, + host_addr: guest_memory + .get_host_address(GuestAddress(arch::FIRMWARE_START)) + .unwrap() as u64, + size: qboot_size, + attributes: 1, + }, + ]; + (regions, 0u64) + } }; let mut serial_devices = Vec::new(); @@ -845,7 +1021,8 @@ pub fn build_microvm( for vcpu in &vcpus { vcpu.tdx_secure_virt_prepare(&mut tdx_launcher); } - vm.tdx_secure_virt_init_vcpus(&mut tdx_launcher).unwrap(); + vm.tdx_secure_virt_init_vcpus(&mut tdx_launcher, tdx_hob_address) + .unwrap(); } // On aarch64, the vCPUs need to be created (i.e call KVM_CREATE_VCPU) and configured before @@ -1090,8 +1267,15 @@ pub fn build_microvm( // Write the kernel command line to guest memory. This is x86_64 specific, since on // aarch64 the command line will be specified through the FDT. + // For the TD-Shim path, the cmdline is written so TD-Shim can reference it when + // populating boot_params for the Linux kernel (cmd_line_ptr already points here + // via configure_system). #[cfg(all(target_arch = "x86_64", not(feature = "tee")))] load_cmdline(&vmm)?; + #[cfg(all(target_arch = "x86_64", feature = "tdx"))] + if vm_resources.tee_firmware_config.is_some() { + load_cmdline(&vmm)?; + } vmm.configure_system( vcpus.as_slice(), @@ -1514,6 +1698,7 @@ pub fn create_guest_memory( mem_size: usize, vm_resources: &VmResources, payload: &Payload, + #[cfg(feature = "tee")] firmware_range: Option<(u64, usize)>, ) -> std::result::Result< (GuestMemoryMmap, ArchMemoryInfo, ShmManager, PayloadConfig), StartMicrovmError, @@ -1540,13 +1725,9 @@ pub fn create_guest_memory( }; arch::arch_memory_regions(mem_size, Some(kernel_guest_addr), kernel_size, 0, None) } - Payload::ExternalKernel(external_kernel) => arch::arch_memory_regions( - mem_size, - None, - 0, - external_kernel.initramfs_size, - firmware_size, - ), + Payload::ExternalKernel(external_kernel) => { + arch::arch_memory_regions(mem_size, None, 0, external_kernel.initramfs_size, None) + } #[cfg(feature = "tee")] Payload::Tee => { let (kernel_guest_addr, kernel_size) = @@ -1555,11 +1736,20 @@ pub fn create_guest_memory( } else { return Err(StartMicrovmError::MissingKernelConfig); }; - arch::arch_memory_regions(mem_size, Some(kernel_guest_addr), kernel_size, 0, None) + arch::arch_memory_regions( + mem_size, + Some(kernel_guest_addr), + kernel_size, + 0, + firmware_range, + ) } #[cfg(test)] Payload::Empty => arch::arch_memory_regions(mem_size, None, 0, 0, None), + #[cfg(not(feature = "tee"))] Payload::Firmware => arch::arch_memory_regions(mem_size, None, 0, 0, firmware_size), + #[cfg(feature = "tee")] + Payload::Firmware => arch::arch_memory_regions(mem_size, None, 0, 0, None), }; #[cfg(any(target_arch = "aarch64", target_arch = "riscv64"))] let (arch_mem_info, mut arch_mem_regions) = match payload { @@ -1674,7 +1864,7 @@ pub fn create_guest_memory( Ok((guest_mem, arch_mem_info, shm_manager, payload_config)) } -#[cfg(all(target_arch = "x86_64", not(feature = "tee")))] +#[cfg(all(target_arch = "x86_64", any(not(feature = "tee"), feature = "tdx")))] fn load_cmdline(vmm: &Vmm) -> std::result::Result<(), StartMicrovmError> { kernel::loader::load_cmdline( vmm.guest_memory(), diff --git a/src/vmm/src/linux/tee/amdsnp/mod.rs b/src/vmm/src/linux/tee/amdsnp/mod.rs index 1474a0be8..abeb3e0d7 100644 --- a/src/vmm/src/linux/tee/amdsnp/mod.rs +++ b/src/vmm/src/linux/tee/amdsnp/mod.rs @@ -323,6 +323,7 @@ impl AmdSnp { .get_host_address(GuestAddress(SNP_LIDT_START)) .unwrap() as u64, size: 0x1000, + attributes: 0, }, &mut launcher, PageType::Zero, @@ -337,6 +338,7 @@ impl AmdSnp { .get_host_address(GuestAddress(SNP_SECRETS_START)) .unwrap() as u64, size: 0x1000, + attributes: 0, }, &mut launcher, PageType::Secrets, @@ -352,6 +354,7 @@ impl AmdSnp { .get_host_address(GuestAddress(SNP_CPUID_START)) .unwrap() as u64, size: 0x1000, + attributes: 0, }, &mut launcher, PageType::Cpuid, @@ -368,6 +371,7 @@ impl AmdSnp { .get_host_address(GuestAddress(SNP_CPUID_START)) .unwrap() as u64, size: 0x1000, + attributes: 0, }, &mut launcher, PageType::Cpuid, @@ -385,6 +389,7 @@ impl AmdSnp { .get_host_address(GuestAddress(SNP_FWDATA_START)) .unwrap() as u64, size: SNP_FWDATA_SIZE, + attributes: 0, }, &mut launcher, PageType::Zero, diff --git a/src/vmm/src/linux/tee/inteltdx.rs b/src/vmm/src/linux/tee/inteltdx.rs index dc12f8cc3..68152dc28 100644 --- a/src/vmm/src/linux/tee/inteltdx.rs +++ b/src/vmm/src/linux/tee/inteltdx.rs @@ -38,7 +38,7 @@ impl IntelTdx { let mem_region = tdx::launch::MemRegion::new( region.guest_addr, (region.size / 4096) as u64, - (arch::FIRMWARE_START == region.guest_addr).into(), + region.attributes, region.host_addr, ); launcher diff --git a/src/vmm/src/linux/tee/mod.rs b/src/vmm/src/linux/tee/mod.rs index 572856408..339e00236 100644 --- a/src/vmm/src/linux/tee/mod.rs +++ b/src/vmm/src/linux/tee/mod.rs @@ -3,3 +3,6 @@ pub mod amdsnp; #[cfg(feature = "tdx")] pub mod inteltdx; + +#[cfg(feature = "tdx")] +pub mod tdshim; diff --git a/src/vmm/src/linux/tee/tdshim.rs b/src/vmm/src/linux/tee/tdshim.rs new file mode 100644 index 000000000..4091f3f76 --- /dev/null +++ b/src/vmm/src/linux/tee/tdshim.rs @@ -0,0 +1,498 @@ +use std::fs::File; +use std::io::{self, Read, Seek}; +use std::path::Path; +use tdx::tdvf::{self, TdvfSection, TdvfSectionType}; +use vm_memory::{ByteValued, Bytes, GuestAddress, GuestMemory, GuestMemoryMmap}; + +#[derive(Debug)] +pub enum Error { + OpenFirmware(io::Error), + ReadFirmware(io::Error), + ParseSections(tdx::tdvf::Error), + InvalidSectionOffset, + MissingBfv, + MissingTdHob, + HobRegionTooSmall, + GuestMemory(vm_memory::GuestMemoryError), +} + +type Result = std::result::Result; + +pub struct TdShim { + pub sections: Vec, + pub hob_address: u64, + pub firmware_data: Vec, +} + +const HOB_TYPE_PHIT: u16 = 0x0001; +const HOB_TYPE_RESOURCE: u16 = 0x0003; +const HOB_TYPE_GUID_EXT: u16 = 0x0004; +const HOB_TYPE_END: u16 = 0xFFFF; +const EFI_RESOURCE_SYSTEM_MEMORY: u32 = 0x00000000; + +const PAYLOAD_INFO_GUID: [u8; 16] = [ + 0x12, 0xa4, 0x6f, 0xb9, 0x1f, 0x46, 0xe3, 0x4b, 0x8c, 0x0d, 0xad, 0x80, 0x5a, 0x49, 0x7a, 0xc0, +]; + +#[derive(Copy, Clone, Default)] +#[repr(C, packed)] +struct HobHeader { + hob_type: u16, + hob_length: u16, + reserved: u32, +} +unsafe impl ByteValued for HobHeader {} + +#[derive(Copy, Clone, Default)] +#[repr(C, packed)] +struct PhitHob { + header: HobHeader, + version: u32, + boot_mode: u32, + efi_memory_top: u64, + efi_memory_bottom: u64, + efi_free_memory_top: u64, + efi_free_memory_bottom: u64, + efi_end_of_hob_list: u64, +} +unsafe impl ByteValued for PhitHob {} + +#[derive(Copy, Clone, Default)] +#[repr(C, packed)] +struct ResourceHob { + header: HobHeader, + owner: [u8; 16], + resource_type: u32, + resource_attributes: u32, + physical_start: u64, + resource_length: u64, +} +unsafe impl ByteValued for ResourceHob {} + +#[derive(Copy, Clone, Default)] +#[repr(C, packed)] +struct GuidExtHobHeader { + header: HobHeader, + name: [u8; 16], +} +unsafe impl ByteValued for GuidExtHobHeader {} + +#[derive(Copy, Clone, Default)] +#[repr(C, packed)] +struct PayloadInfoHob { + guid_header: GuidExtHobHeader, + image_type: u32, + reserved: u32, + entry_point: u64, +} +unsafe impl ByteValued for PayloadInfoHob {} + +#[derive(Copy, Clone, Default)] +#[repr(C, packed)] +struct EndHob { + header: HobHeader, +} +unsafe impl ByteValued for EndHob {} + +fn append(buf: &mut Vec, val: &T) { + buf.extend_from_slice(val.as_slice()); +} + +pub fn write_hob_chain( + out: &mut [u8], + hob_region_addr: u64, + memory_regions: &[(u64, u64)], + kernel_entry_addr: u64, +) -> Result<()> { + let mut chain: Vec = Vec::new(); + + let phit_offset = chain.len(); + append( + &mut chain, + &PhitHob { + header: HobHeader { + hob_type: HOB_TYPE_PHIT, + hob_length: 56, + reserved: 0, + }, + version: 0x0009, + ..Default::default() + }, + ); + + for &(start, length) in memory_regions { + append( + &mut chain, + &ResourceHob { + header: HobHeader { + hob_type: HOB_TYPE_RESOURCE, + hob_length: std::mem::size_of::() as u16, + reserved: 0, + }, + owner: [0u8; 16], + resource_type: EFI_RESOURCE_SYSTEM_MEMORY, + resource_attributes: 0x307, + physical_start: start, + resource_length: length, + }, + ); + } + + append( + &mut chain, + &PayloadInfoHob { + guid_header: GuidExtHobHeader { + header: HobHeader { + hob_type: HOB_TYPE_GUID_EXT, + hob_length: std::mem::size_of::() as u16, + reserved: 0, + }, + name: PAYLOAD_INFO_GUID, + }, + // libkrunfw packages the kernel as a raw ELF vmlinux (not bzImage). + // Use RawVmLinux (2) so td-shim jumps directly to entry_point rather + // than treating the image as a bzImage and miscalculating startup_64. + image_type: 2, + reserved: 0, + entry_point: kernel_entry_addr, + }, + ); + + append( + &mut chain, + &EndHob { + header: HobHeader { + hob_type: HOB_TYPE_END, + hob_length: 8, + reserved: 0, + }, + }, + ); + + // Patch PHIT: efi_end_of_hob_list points past the End HOB (exclusive end of the chain). + // td-shim validates: hob_length == efi_end_of_hob_list - hob_ptr, where + // hob_length = end_hob_offset + size_of::
() = chain.len(). + let end_of_list_addr = hob_region_addr + chain.len() as u64; + unsafe { + let phit_ptr = chain[phit_offset..].as_mut_ptr() as *mut PhitHob; + std::ptr::write_unaligned( + std::ptr::addr_of_mut!((*phit_ptr).efi_end_of_hob_list), + end_of_list_addr, + ); + } + + if chain.len() > out.len() { + return Err(Error::HobRegionTooSmall); + } + + out[..chain.len()].copy_from_slice(&chain); + Ok(()) +} + +fn is_bfv(s: &TdvfSection) -> bool { + matches!(s.section_type, TdvfSectionType::Bfv) +} + +fn is_td_hob(s: &TdvfSection) -> bool { + matches!(s.section_type, TdvfSectionType::TdHob) +} + +fn validate_sections(sections: &[TdvfSection]) -> Result<()> { + if !sections.iter().any(is_bfv) { + return Err(Error::MissingBfv); + } + if !sections.iter().any(is_td_hob) { + return Err(Error::MissingTdHob); + } + Ok(()) +} + +impl TdShim { + pub fn parse(path: &Path) -> Result { + let mut file = File::open(path).map_err(Error::OpenFirmware)?; + let sections = tdvf::parse_sections(&mut file).map_err(Error::ParseSections)?; + validate_sections(§ions)?; + let hob_address = sections + .iter() + .find(|s| is_td_hob(s)) + .unwrap() + .memory_address; + file.rewind().map_err(Error::ReadFirmware)?; + let mut firmware_data = Vec::new(); + file.read_to_end(&mut firmware_data) + .map_err(Error::ReadFirmware)?; + Ok(Self { + sections, + hob_address, + firmware_data, + }) + } + + #[cfg(test)] + fn firmware_range(&self) -> (u64, u64) { + let min = self + .sections + .iter() + .map(|s| s.memory_address) + .min() + .unwrap(); + let max = self + .sections + .iter() + .map(|s| s.memory_address + s.memory_data_size) + .max() + .unwrap(); + (min, max) + } + + /// Returns [min_addr, max_addr) covering only sections above the 32-bit MMIO gap. + /// These sections need their own GuestMemoryMmap region; sections below the gap + /// fall within the normal RAM mapping and need no separate hole. + pub fn high_firmware_range(&self) -> Option<(u64, u64)> { + let mmio_start = arch::x86_64::layout::MMIO_MEM_START; + let min = self + .sections + .iter() + .filter(|s| s.memory_address >= mmio_start) + .map(|s| s.memory_address) + .min()?; + let max = self + .sections + .iter() + .filter(|s| s.memory_address >= mmio_start) + .map(|s| s.memory_address + s.memory_data_size) + .max()?; + Some((min, max)) + } + + /// Copies sections with raw data into guest memory. Zero-fill sections are + /// already handled by the mmap backing. + pub fn load_sections(&self, firmware_data: &[u8], guest_mem: &GuestMemoryMmap) -> Result<()> { + for section in &self.sections { + if section.raw_data_size <= 0 { + continue; + } + let start = section.data_offset as usize; + let end = start + section.raw_data_size as usize; + if end > firmware_data.len() { + return Err(Error::InvalidSectionOffset); + } + guest_mem + .write( + &firmware_data[start..end], + GuestAddress(section.memory_address), + ) + .map_err(Error::GuestMemory)?; + } + Ok(()) + } + + pub fn generate_hobs( + &self, + guest_mem: &GuestMemoryMmap, + kernel_entry_addr: u64, + ram_regions: &[(u64, u64)], + ) -> Result<()> { + let hob_section = self.sections.iter().find(|s| is_td_hob(s)).unwrap(); + let hob_size = hob_section.memory_data_size as usize; + let mut buf = vec![0u8; hob_size]; + + write_hob_chain( + &mut buf, + hob_section.memory_address, + ram_regions, + kernel_entry_addr, + )?; + + guest_mem + .write(&buf, GuestAddress(hob_section.memory_address)) + .map_err(Error::GuestMemory)?; + + Ok(()) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + fn bfv_section() -> TdvfSection { + TdvfSection { + data_offset: 0, + raw_data_size: 0x1000, + memory_address: 0xffff_0000, + memory_data_size: 0x1000, + section_type: TdvfSectionType::Bfv, + attributes: 1, + } + } + + fn hob_section() -> TdvfSection { + TdvfSection { + data_offset: 0, + raw_data_size: 0, + memory_address: 0x5000_0000, + memory_data_size: 0x1000, + section_type: TdvfSectionType::TdHob, + attributes: 0, + } + } + + #[test] + fn test_validate_missing_bfv() { + let sections = vec![hob_section()]; + assert!(matches!( + validate_sections(§ions), + Err(Error::MissingBfv) + )); + } + + #[test] + fn test_validate_missing_hob() { + let sections = vec![bfv_section()]; + assert!(matches!( + validate_sections(§ions), + Err(Error::MissingTdHob) + )); + } + + #[test] + fn test_validate_both_present() { + let sections = vec![bfv_section(), hob_section()]; + assert!(validate_sections(§ions).is_ok()); + } + + #[test] + fn test_hob_address_extracted() { + let hob_addr = 0x5000_0000u64; + let sections = vec![bfv_section(), hob_section()]; + validate_sections(§ions).unwrap(); + let found = sections.iter().find(|s| is_td_hob(s)).unwrap(); + assert_eq!(found.memory_address, hob_addr); + } + + #[test] + fn test_firmware_range() { + let sections = vec![bfv_section(), hob_section()]; + // hob at 0x5000_0000 with size 0x1000 and bfv at 0xffff_0000 with size 0x1000 + let td = TdShim { + hob_address: 0x5000_0000, + sections, + firmware_data: vec![], + }; + let (min, max) = td.firmware_range(); + assert_eq!(min, 0x5000_0000); + assert_eq!(max, 0xffff_0000 + 0x1000); + } + + #[test] + fn test_high_firmware_range_excludes_low_sections() { + let sections = vec![bfv_section(), hob_section()]; + let td = TdShim { + hob_address: 0x5000_0000, + sections, + firmware_data: vec![], + }; + let (start, end) = td.high_firmware_range().expect("BFV is above MMIO gap"); + assert_eq!(start, 0xffff_0000); + assert_eq!(end, 0xffff_0000 + 0x1000); + } + + #[test] + fn test_high_firmware_range_none_when_all_sections_low() { + let sections = vec![ + TdvfSection { + data_offset: 0, + raw_data_size: 0, + memory_address: 0x0080_0000, + memory_data_size: 0x1000, + section_type: TdvfSectionType::TempMem, + attributes: 0, + }, + hob_section(), + ]; + let td = TdShim { + hob_address: 0x5000_0000, + sections, + firmware_data: vec![], + }; + assert!(td.high_firmware_range().is_none()); + } + + #[test] + fn test_hob_chain_starts_with_phit() { + let mut buf = vec![0u8; 4096]; + write_hob_chain(&mut buf, 0x5000_0000, &[(0, 0x4000_0000)], 0x0100_0000).unwrap(); + let hob_type = u16::from_le_bytes([buf[0], buf[1]]); + assert_eq!(hob_type, 0x0001, "First HOB must be PHIT"); + } + + #[test] + fn test_hob_chain_ends_with_end_hob() { + let mut buf = vec![0u8; 4096]; + write_hob_chain(&mut buf, 0x5000_0000, &[(0, 0x4000_0000)], 0x100_0000).unwrap(); + let mut offset = 0usize; + let mut found_end = false; + while offset + 4 <= buf.len() { + let hob_type = u16::from_le_bytes([buf[offset], buf[offset + 1]]); + let hob_len = u16::from_le_bytes([buf[offset + 2], buf[offset + 3]]) as usize; + if hob_type == 0xFFFF { + found_end = true; + break; + } + if hob_len == 0 { + break; + } + offset += hob_len; + } + assert!(found_end, "HOB chain must terminate with 0xFFFF"); + } + + #[test] + fn test_hob_chain_too_small_returns_error() { + let mut buf = vec![0u8; 8]; + let result = write_hob_chain(&mut buf, 0x5000_0000, &[(0, 0x1000)], 0x100_0000); + assert!(matches!(result, Err(Error::HobRegionTooSmall))); + } + + #[test] + fn test_phit_efi_end_of_hob_list_points_past_end_hob() { + // td-shim validates: hob_length == efi_end_of_hob_list - hob_region_addr + // where hob_length = end_hob_offset + 8 = chain.len() + let hob_region_addr = 0x5000_0000u64; + let mut buf = vec![0u8; 4096]; + write_hob_chain( + &mut buf, + hob_region_addr, + 4096, + &[(0, 0x4000_0000)], + 0x100_0000, + ) + .unwrap(); + + // Read efi_end_of_hob_list from PHIT at offset 48 (after header+version+boot_mode+3×u64) + let efi_end = u64::from_le_bytes(buf[48..56].try_into().unwrap()); + + // Find the End HOB and compute hob_length = end_hob_offset + 8 + let mut offset = 0usize; + let mut end_hob_offset = None; + while offset + 4 <= buf.len() { + let hob_type = u16::from_le_bytes([buf[offset], buf[offset + 1]]); + let hob_len = u16::from_le_bytes([buf[offset + 2], buf[offset + 3]]) as usize; + if hob_type == 0xFFFF { + end_hob_offset = Some(offset); + break; + } + if hob_len == 0 { + break; + } + offset += hob_len; + } + let hob_length = end_hob_offset.unwrap() + 8; + assert_eq!( + efi_end, + hob_region_addr + hob_length as u64, + "efi_end_of_hob_list must equal hob_region_addr + chain.len()" + ); + } +} diff --git a/src/vmm/src/linux/vstate.rs b/src/vmm/src/linux/vstate.rs index 05e58fbd7..5b24743d6 100644 --- a/src/vmm/src/linux/vstate.rs +++ b/src/vmm/src/linux/vstate.rs @@ -432,6 +432,7 @@ pub struct MeasuredRegion { pub guest_addr: u64, pub host_addr: u64, pub size: usize, + pub attributes: u32, } /// Describes a KVM context that gets attached to the microVM. @@ -762,10 +763,14 @@ impl Vm { } #[cfg(feature = "tdx")] - pub fn tdx_secure_virt_init_vcpus(&self, launcher: &mut tdx::launch::Launcher) -> Result<()> { + pub fn tdx_secure_virt_init_vcpus( + &self, + launcher: &mut tdx::launch::Launcher, + hob_address: u64, + ) -> Result<()> { match &self.tdx { Some(_) => { - launcher.init_vcpus(0).unwrap(); + launcher.init_vcpus(hob_address).unwrap(); Ok(()) } None => Err(Error::InvalidTee), diff --git a/src/vmm/src/resources.rs b/src/vmm/src/resources.rs index ffa9e6eac..29a49b7fc 100644 --- a/src/vmm/src/resources.rs +++ b/src/vmm/src/resources.rs @@ -160,6 +160,9 @@ pub struct VmResources { /// The parameters for the initrd bundle to be loaded in this microVM. #[cfg(feature = "tee")] pub initrd_bundle: Option, + /// Path to a user-provided TD-Shim binary for TDX guests. + #[cfg(feature = "tdx")] + pub tee_firmware_config: Option, /// The fs device. #[cfg(not(feature = "tee"))] pub fs: Vec, @@ -335,6 +338,11 @@ impl VmResources { Ok(()) } + #[cfg(feature = "tdx")] + pub fn set_tee_firmware_config(&mut self, cfg: FirmwareConfig) { + self.tee_firmware_config = Some(cfg); + } + #[cfg(not(feature = "tee"))] pub fn add_fs_device(&mut self, config: FsDeviceConfig) { self.fs.push(config) @@ -526,3 +534,24 @@ mod tests { ); } } + +#[cfg(all(test, feature = "tdx"))] +mod tee_firmware_tests { + use super::*; + use crate::vmm_config::firmware::FirmwareConfig; + use std::path::PathBuf; + + #[test] + fn test_set_and_get_tee_firmware_config() { + let mut r = VmResources::default(); + assert!(r.tee_firmware_config.is_none()); + let cfg = FirmwareConfig { + path: PathBuf::from("/tmp/td-shim.bin"), + }; + r.set_tee_firmware_config(cfg); + assert_eq!( + r.tee_firmware_config.unwrap().path, + PathBuf::from("/tmp/td-shim.bin") + ); + } +}