From a11ddbf5fda4c96da3c9dd0c4b8422aac330d0e5 Mon Sep 17 00:00:00 2001 From: Kevin Wang Date: Fri, 20 Mar 2026 03:48:44 +0000 Subject: [PATCH 1/2] fix: unify RA-TLS cert attestation format and fix onboard os_image_hash 1. ra-tls: use unified PHALA_RATLS_ATTESTATION OID for all cert types (including TDX) instead of the legacy separate TDX_QUOTE + EVENT_LOG OIDs. The new format preserves vm_config (including os_image_hash). The reader already prefers the new OID and falls back to old OIDs for backward compat with existing certs. 2. kms: when the remote source KMS uses the old cert format (missing vm_config), the receiver-side ensure_kms_allowed fills os_image_hash from the local KMS value. This is safe because mrAggregated already validates OS image integrity through the RTMR measurement chain. TODO: remove once all source KMS instances use the new cert format. --- kms/src/main_service/upgrade_authority.rs | 13 ++++++++++++- ra-tls/src/cert.rs | 22 ++++------------------ 2 files changed, 16 insertions(+), 19 deletions(-) diff --git a/kms/src/main_service/upgrade_authority.rs b/kms/src/main_service/upgrade_authority.rs index 6b7ace06..f3082ab5 100644 --- a/kms/src/main_service/upgrade_authority.rs +++ b/kms/src/main_service/upgrade_authority.rs @@ -224,8 +224,19 @@ pub(crate) async fn ensure_kms_allowed( cfg: &KmsConfig, attestation: &VerifiedAttestation, ) -> Result<()> { - let boot_info = build_boot_info(attestation, false, "") + let mut boot_info = build_boot_info(attestation, false, "") .context("failed to build KMS boot info from attestation")?; + // Workaround: old source KMS instances use the legacy cert format (separate TDX_QUOTE + + // EVENT_LOG OIDs) which lacks vm_config, resulting in an empty os_image_hash. + // Fill it from the local KMS's own value. This is safe because mrAggregated already + // validates OS image integrity transitively through the RTMR measurement chain. + // TODO: remove once all source KMS instances use the unified PHALA_RATLS_ATTESTATION format. + if boot_info.os_image_hash.is_empty() { + let local_info = local_kms_boot_info(cfg.pccs_url.as_deref()) + .await + .context("failed to get local KMS boot info for os_image_hash fallback")?; + boot_info.os_image_hash = local_info.os_image_hash; + } let response = cfg .auth_api .is_app_allowed(&boot_info, true) diff --git a/ra-tls/src/cert.rs b/ra-tls/src/cert.rs index 5dff761b..4527573c 100644 --- a/ra-tls/src/cert.rs +++ b/ra-tls/src/cert.rs @@ -25,12 +25,11 @@ use x509_parser::x509::SubjectPublicKeyInfo; use crate::oids::{ PHALA_RATLS_APP_ID, PHALA_RATLS_APP_INFO, PHALA_RATLS_ATTESTATION, PHALA_RATLS_CERT_USAGE, - PHALA_RATLS_EVENT_LOG, PHALA_RATLS_TDX_QUOTE, }; use crate::traits::CertExt; #[cfg(feature = "quote")] use dstack_attest::attestation::QuoteContentType; -use dstack_attest::attestation::{AppInfo, Attestation, AttestationQuote, VersionedAttestation}; +use dstack_attest::attestation::{AppInfo, Attestation, VersionedAttestation}; /// A CA certificate and private key. pub struct CaCert { @@ -389,21 +388,8 @@ impl CertRequest<'_, Key> { add_ext(&mut params, PHALA_RATLS_CERT_USAGE, usage); } if let Some(ver_att) = self.attestation { - let VersionedAttestation::V0 { attestation } = &ver_att; - match &attestation.quote { - AttestationQuote::DstackTdx(tdx_quote) => { - // For backward compatibility, we serialize the quote to the classic oids. - let event_log = serde_json::to_vec(&tdx_quote.event_log) - .context("Failed to serialize event log")?; - add_ext(&mut params, PHALA_RATLS_TDX_QUOTE, &tdx_quote.quote); - add_ext(&mut params, PHALA_RATLS_EVENT_LOG, &event_log); - } - _ => { - // The event logs are too large on GCP TDX to put in the certificate, so we strip them - let attestation_bytes = ver_att.clone().into_stripped().to_scale(); - add_ext(&mut params, PHALA_RATLS_ATTESTATION, &attestation_bytes); - } - } + let attestation_bytes = ver_att.clone().into_stripped().to_scale(); + add_ext(&mut params, PHALA_RATLS_ATTESTATION, &attestation_bytes); } if let Some(ca_level) = self.ca_level { params.is_ca = IsCa::Ca(BasicConstraints::Constrained(ca_level)); @@ -576,7 +562,7 @@ pub fn generate_ra_cert_with_app_id( #[cfg(test)] mod tests { use super::*; - use dstack_attest::attestation::TdxQuote; + use dstack_attest::attestation::{AttestationQuote, TdxQuote}; use rcgen::PKCS_ECDSA_P256_SHA256; use scale::Encode; From abfebc0dafce2e02989b6c9da35298a5782954c7 Mon Sep 17 00:00:00 2001 From: Kevin Wang Date: Fri, 20 Mar 2026 03:48:47 +0000 Subject: [PATCH 2/2] docs: update KMS test guides for cert format fix --- tests/docs/kms-bootstrap-onboard.md | 4 ++-- tests/docs/kms-self-authorization.md | 16 +++++++--------- 2 files changed, 9 insertions(+), 11 deletions(-) diff --git a/tests/docs/kms-bootstrap-onboard.md b/tests/docs/kms-bootstrap-onboard.md index 3c242186..f9f4d323 100644 --- a/tests/docs/kms-bootstrap-onboard.md +++ b/tests/docs/kms-bootstrap-onboard.md @@ -78,7 +78,7 @@ Operational notes: 4. On teepod with gateway, onboard mode usually uses the `-8000` URL, while runtime TLS KMS RPC usually uses the `-8000s` URL. **Port forwarding** (`--port tcp:0.0.0.0::8000`) is simpler than gateway for testing, because gateway requires the auth API to return a `gatewayAppId` at boot time. 5. If you use a very small custom webhook instead of the real auth service, `KMS.GetMeta` may fail because `auth_api.get_info()` expects extra chain / contract metadata fields. In that case, use `GetTempCaCert` as the runtime readiness probe. 6. dstack CVMs use QEMU user-mode networking — the host is reachable at **`10.0.2.2`** from inside the CVM. The `source_url` in `Onboard.Onboard` must use a CVM-reachable address (e.g., `https://10.0.2.2:/prpc`), not `127.0.0.1`. -7. **Remote KMS attestation has an empty `osImageHash`.** When the receiver verifies the source KMS during onboard, the `osImageHash` is empty because `vm_config` is unavailable for remote attestation. Auth configs for receiver-side checks must include `"0x"` in the `osImages` array. +7. **~~Remote KMS attestation has an empty `osImageHash`.~~** Fixed: RA-TLS certs now use the unified `PHALA_RATLS_ATTESTATION` format which preserves `vm_config`. For old source KMS instances, the receiver-side check fills `osImageHash` from the local KMS's own value automatically. No special `"0x"` entry in `osImages` is needed anymore. --- @@ -109,7 +109,7 @@ At minimum, both policies must allow the KMS instance they serve. During onboard For `auth-simple`, `kms.mrAggregated = []` is a deny-all policy for KMS. Add the current KMS MR values explicitly when switching a test from deny to allow. -Include `"0x"` in the `osImages` array for configs used in receiver-side onboard checks (see operational note 7 above). +You no longer need `"0x"` in the `osImages` array — the receiver-side check now resolves `osImageHash` automatically. ### 4.3 Deploy `kms-src` and `kms-dst` diff --git a/tests/docs/kms-self-authorization.md b/tests/docs/kms-self-authorization.md index 91c40a6f..7df4ad18 100644 --- a/tests/docs/kms-self-authorization.md +++ b/tests/docs/kms-self-authorization.md @@ -25,7 +25,7 @@ This guide is written as a deployment-and-test runbook so an AI agent can follow > 7. KMS now always requires quote/attestation. For local development without TDX hardware, use `sdk/simulator` instead of trying to run a no-attestation KMS flow. > 8. For `auth-simple`, `kms.mrAggregated = []` is a deny-all policy for KMS. Use that as the baseline deny configuration, then add the measured KMS MR values for allow cases. > 9. **Port forwarding is simpler than gateway for testing.** Using `--gateway` requires the auth API to return a valid `gatewayAppId`, which adds unnecessary complexity. Use `--port tcp:0.0.0.0::8000` instead. -> 10. **Remote KMS attestation has an empty `osImageHash`.** When the receiver verifies the source KMS during onboard, the `osImageHash` field in the attestation is empty (because `vm_config` is not available for the remote attestation). Auth configs for receiver-side checks must include `"0x"` in the `osImages` array to match this empty hash. +> 10. **~~Remote KMS attestation has an empty `osImageHash`.~~** Fixed: RA-TLS certs now use the unified `PHALA_RATLS_ATTESTATION` format which preserves `vm_config`. For old source KMS instances that still use the legacy cert format, the receiver-side `ensure_kms_allowed` automatically fills `osImageHash` from the local KMS's own value. No special `"0x"` entry in `osImages` is needed anymore. > 11. The `source_url` in the `Onboard.Onboard` request must use an address **reachable from inside the CVM** (e.g., `https://10.0.2.2:/prpc`), not `127.0.0.1` which is the CVM's own loopback. --- @@ -49,13 +49,13 @@ This guide is written as a deployment-and-test runbook so an AI agent can follow ## 1. Why this document exists -PR #538 already proposes a richer `kms/e2e/` framework, but as of **2026-03-19** it is still open/draft and touches overlapping KMS files. To avoid waiting for that PR, this guide uses: +This guide provides a standalone test procedure that does not depend on a dedicated e2e framework. It uses: - existing KMS deploy flows - `auth-simple` as a controllable auth API - manual RPC calls via `curl` -This keeps the test independent from PR #538 while still exercising real deployment paths. +This exercises real deployment paths with minimal dependencies. --- @@ -98,7 +98,7 @@ Policy responsibilities: Before starting, make sure the following are available: -1. A branch or image containing the PR #573 KMS changes +1. A KMS image built from current `master` (includes PR #573 auth checks, #579 mandatory attestation, #581 dedup refactor) 2. A working `dstack-vmm` or teepod deployment target 3. Two routable KMS onboard URLs 4. `bun` installed on the host, because `kms/auth-simple` runs on Bun @@ -315,12 +315,10 @@ All three values above are expected to be hex strings **without** the `0x` prefi Use a wrong `mrAggregated` value while allowing the observed OS image. -> **Important:** include `"0x"` in `osImages` to handle remote KMS attestation during onboard receiver-side checks, where `osImageHash` is empty because `vm_config` is unavailable for the remote attestation. - ```bash cat > /tmp/kms-self-auth/deny-by-mr.json <<'EOF' { - "osImages": ["0xREPLACE_OS", "0x"], + "osImages": ["0xREPLACE_OS"], "gatewayAppId": "any", "kms": { "mrAggregated": ["0x0000000000000000000000000000000000000000000000000000000000000000"], @@ -337,7 +335,7 @@ EOF ```bash cat > /tmp/kms-self-auth/allow-single.json <<'EOF' { - "osImages": ["0xREPLACE_OS", "0x"], + "osImages": ["0xREPLACE_OS"], "gatewayAppId": "any", "kms": { "mrAggregated": ["0xREPLACE_MR"], @@ -354,7 +352,7 @@ EOF ```bash cat > /tmp/kms-self-auth/allow-src-and-dst.json <<'EOF' { - "osImages": ["0xREPLACE_SRC_OS", "0xREPLACE_DST_OS", "0x"], + "osImages": ["0xREPLACE_SRC_OS", "0xREPLACE_DST_OS"], "gatewayAppId": "any", "kms": { "mrAggregated": ["0xREPLACE_SRC_MR", "0xREPLACE_DST_MR"],