diff --git a/crates/trusted-server-adapter-fastly/src/main.rs b/crates/trusted-server-adapter-fastly/src/main.rs index 741ae8ff..a2030443 100644 --- a/crates/trusted-server-adapter-fastly/src/main.rs +++ b/crates/trusted-server-adapter-fastly/src/main.rs @@ -24,7 +24,10 @@ use trusted_server_core::ec::registry::PartnerRegistry; use trusted_server_core::ec::EcContext; use trusted_server_core::error::TrustedServerError; use trusted_server_core::geo::GeoInfo; -use trusted_server_core::integrations::{IntegrationRegistry, ProxyDispatchInput}; +use trusted_server_core::integrations::{ + IntegrationRegistry, ProxyDispatchInput, RequestFilterRegistryInput, + RequestFilterRegistryOutcome, +}; use trusted_server_core::platform::RuntimeServices; use trusted_server_core::proxy::{ handle_asset_proxy_request, handle_first_party_click, handle_first_party_proxy, @@ -322,6 +325,44 @@ async fn route_request( } } + let request_filter_effects = match integration_registry + .filter_request(RequestFilterRegistryInput { + settings, + services: runtime_services, + req: &mut req, + }) + .await + { + Ok(RequestFilterRegistryOutcome::Continue(effects)) => effects, + Ok(RequestFilterRegistryOutcome::Respond { response, effects }) => { + let mut response = *response; + ec_finalize_response( + settings, + &ec_context, + finalize_kv_graph.as_ref(), + partner_registry, + eids_cookie.as_deref(), + sharedid_cookie.as_deref(), + &mut response, + ); + finalize_response(settings, geo_info.as_ref(), &mut response); + effects.apply_to_response(&mut response); + return Ok(RouteOutcome { + response: Some(response), + pull_sync_context: None, + }); + } + Err(e) => { + log::error!("Failed to run integration request filters: {:?}", e); + let mut response = to_error_response(&e); + finalize_response(settings, geo_info.as_ref(), &mut response); + return Ok(RouteOutcome { + response: Some(response), + pull_sync_context: None, + }); + } + }; + // Get path and method for routing let path = req.get_path().to_string(); let method = req.get_method().clone(); @@ -446,6 +487,7 @@ async fn route_request( asset_response.into_response_and_body(); if let Some(body) = stream_body { finalize_response(settings, geo_info.as_ref(), &mut response); + request_filter_effects.apply_to_response(&mut response); asset_cache_policy.apply_after_route_finalization(&mut response); let mut streaming_body = response.stream_to_client(); @@ -502,6 +544,7 @@ async fn route_request( &mut response, ); finalize_response(settings, geo_info.as_ref(), &mut response); + request_filter_effects.apply_to_response(&mut response); let mut streaming_body = response.stream_to_client(); let mut stream_succeeded = false; @@ -568,6 +611,7 @@ async fn route_request( } finalize_response(settings, geo_info.as_ref(), &mut response); + request_filter_effects.apply_to_response(&mut response); asset_cache_policy.apply_after_route_finalization(&mut response); let pull_sync_context = if is_real_browser && organic_route && route_succeeded { diff --git a/crates/trusted-server-adapter-fastly/src/platform.rs b/crates/trusted-server-adapter-fastly/src/platform.rs index ad6d130e..7a963d14 100644 --- a/crates/trusted-server-adapter-fastly/src/platform.rs +++ b/crates/trusted-server-adapter-fastly/src/platform.rs @@ -521,6 +521,10 @@ pub fn build_runtime_services( client_ip: req.get_client_ip_addr(), tls_protocol: req.get_tls_protocol().map(str::to_string), tls_cipher: req.get_tls_cipher_openssl_name().map(str::to_string), + tls_ja4: req.get_tls_ja4().map(str::to_string), + h2_fingerprint: req.get_client_h2_fingerprint().map(str::to_string), + server_hostname: std::env::var("FASTLY_HOSTNAME").ok(), + server_region: std::env::var("FASTLY_REGION").ok(), }) .build() } diff --git a/crates/trusted-server-adapter-fastly/src/route_tests.rs b/crates/trusted-server-adapter-fastly/src/route_tests.rs index 15aa2573..8931b192 100644 --- a/crates/trusted-server-adapter-fastly/src/route_tests.rs +++ b/crates/trusted-server-adapter-fastly/src/route_tests.rs @@ -139,6 +139,7 @@ struct RecordingHttpClient { calls: Mutex>, response_status: StatusCode, response_headers: Vec<(String, String)>, + response_body: Vec, } struct StreamingRecordingHttpClient { @@ -151,6 +152,7 @@ impl RecordingHttpClient { calls: Mutex::new(Vec::new()), response_status, response_headers: Vec::new(), + response_body: Vec::new(), } } @@ -164,6 +166,11 @@ impl RecordingHttpClient { .collect(); self } + + fn with_response_body(mut self, body: impl Into>) -> Self { + self.response_body = body.into(); + self + } } impl StreamingRecordingHttpClient { @@ -214,7 +221,7 @@ impl PlatformHttpClient for RecordingHttpClient { builder = builder.header(name, value); } let edge_response = builder - .body(EdgeBody::from(Vec::new())) + .body(EdgeBody::from(self.response_body.clone())) .map_err(|_| Report::new(PlatformError::HttpClient))?; Ok(PlatformResponse::new(edge_response)) @@ -442,6 +449,41 @@ fn create_auction_test_settings(providers: &str) -> Settings { Settings::from_toml(&config).expect("should parse adapter auction route test settings") } +fn datadome_protection_toml() -> &'static str { + r#" + [integrations.datadome] + enabled = true + enable_protection = true + server_side_key_secret_store = "datadome" + server_side_key_secret_name = "server_side_key" + "# +} + +fn create_datadome_auction_test_settings(providers: &str) -> Settings { + let base = base_route_settings_toml(); + let datadome = datadome_protection_toml(); + let config = format!( + r#"{base} + +{datadome} + + [auction] + enabled = true + providers = {providers} + timeout_ms = 2000 + "#, + ); + + Settings::from_toml(&config).expect("should parse DataDome route test settings") +} + +fn datadome_secret_store() -> Arc { + Arc::new(HashMapSecretStore::new(HashMap::from([( + "server_side_key".to_string(), + b"datadome-server-side-key".to_vec(), + )]))) +} + fn build_route_stack(settings: &Settings) -> (AuctionOrchestrator, IntegrationRegistry) { let orchestrator = build_orchestrator(settings).expect("should build auction orchestrator"); let integration_registry = @@ -504,6 +546,10 @@ fn test_runtime_services_with_secret_http_client_and_geo( client_ip: req.get_client_ip_addr(), tls_protocol: req.get_tls_protocol().map(str::to_string), tls_cipher: req.get_tls_cipher_openssl_name().map(str::to_string), + tls_ja4: req.get_tls_ja4().map(str::to_string), + h2_fingerprint: req.get_client_h2_fingerprint().map(str::to_string), + server_hostname: None, + server_region: None, }) .build() } @@ -584,6 +630,229 @@ fn valid_banner_ad_unit_body() -> Vec { .expect("should serialize valid auction route test body") } +#[test] +fn datadome_challenge_short_circuits_before_publisher_origin() { + let settings = create_datadome_auction_test_settings("[]"); + let (orchestrator, integration_registry) = build_route_stack(&settings); + let req = Request::get("https://test.com/protected-page"); + let http_client = Arc::new( + RecordingHttpClient::new(StatusCode::FORBIDDEN) + .with_response_headers(vec![ + ("x-datadomeresponse", "403"), + ("x-datadome-headers", "Set-Cookie X-DD-B"), + ("set-cookie", "datadome=challenge; Path=/; HttpOnly"), + ("x-dd-b", "1"), + ]) + .with_response_body(b"blocked by datadome".to_vec()), + ); + let services = test_runtime_services_with_secret_and_http_client( + &req, + Arc::new(FixedBackend), + datadome_secret_store(), + Arc::clone(&http_client) as Arc, + ); + + let mut response = route_buffered_response( + &settings, + &orchestrator, + &integration_registry, + &services, + req, + "should route DataDome challenge response", + ); + + assert_eq!( + response.get_status(), + StatusCode::FORBIDDEN, + "should return the DataDome challenge status instead of contacting publisher origin" + ); + assert_eq!( + response.get_header_str("x-dd-b"), + Some("1"), + "should apply DataDome downstream challenge headers" + ); + assert_eq!( + response.get_header_str(header::SET_COOKIE), + Some("datadome=challenge; Path=/; HttpOnly"), + "should append the DataDome challenge cookie" + ); + assert_eq!( + response.take_body_str(), + "blocked by datadome", + "should return the DataDome challenge body" + ); + + let calls = http_client + .calls + .lock() + .expect("should lock recorded calls"); + assert_eq!(calls.len(), 1, "should call only the Protection API"); + assert_eq!(calls[0].method, Method::POST, "should POST to DataDome"); + assert_eq!( + calls[0].uri, "https://api-fastly.datadome.co/validate-request", + "should call the default DataDome Protection API endpoint" + ); +} + +#[test] +fn datadome_allow_applies_downstream_headers_and_protects_auction() { + let settings = create_datadome_auction_test_settings("[]"); + let (orchestrator, integration_registry) = build_route_stack(&settings); + let req = Request::post("https://test.com/auction") + .with_header(header::CONTENT_TYPE, "application/json") + .with_body(valid_banner_ad_unit_body()); + let http_client = Arc::new( + RecordingHttpClient::new(StatusCode::OK).with_response_headers(vec![ + ("x-datadomeresponse", "200"), + ("x-datadome-headers", "Set-Cookie X-DD-B"), + ("set-cookie", "datadome=allow; Path=/; HttpOnly"), + ("x-dd-b", "allowed"), + ]), + ); + let services = test_runtime_services_with_secret_and_http_client( + &req, + Arc::new(FixedBackend), + datadome_secret_store(), + Arc::clone(&http_client) as Arc, + ); + + let response = route_buffered_response( + &settings, + &orchestrator, + &integration_registry, + &services, + req, + "should route DataDome-allowed auction request", + ); + + assert_eq!( + response.get_status(), + StatusCode::BAD_GATEWAY, + "empty-provider auction should still run after DataDome allows the request" + ); + assert_eq!( + response.get_header_str("x-dd-b"), + Some("allowed"), + "should apply DataDome downstream headers after route finalization" + ); + assert_eq!( + response.get_header_str(header::SET_COOKIE), + Some("datadome=allow; Path=/; HttpOnly"), + "should preserve DataDome downstream Set-Cookie on allowed requests" + ); + + let calls = http_client + .calls + .lock() + .expect("should lock recorded calls"); + assert_eq!( + calls.len(), + 1, + "should protect /auction through DataDome by default" + ); + assert_eq!(calls[0].method, Method::POST, "should POST to DataDome"); +} + +#[test] +fn datadome_api_error_fails_open_before_routing() { + let settings = create_datadome_auction_test_settings("[]"); + let (orchestrator, integration_registry) = build_route_stack(&settings); + let req = Request::post("https://test.com/auction") + .with_header(header::CONTENT_TYPE, "application/json") + .with_body(b"{not-json".to_vec()); + let services = test_runtime_services_with_secret_and_http_client( + &req, + Arc::new(FixedBackend), + datadome_secret_store(), + Arc::new(NoopHttpClient) as Arc, + ); + + let response = route_buffered_response( + &settings, + &orchestrator, + &integration_registry, + &services, + req, + "should fail open when DataDome API call fails", + ); + + assert_eq!( + response.get_status(), + StatusCode::BAD_REQUEST, + "malformed auction JSON should be handled by the route after DataDome fails open" + ); + assert_eq!( + response.get_header_str("x-dd-b"), + None, + "should not apply DataDome headers when the Protection API call fails" + ); +} + +#[test] +fn datadome_skips_internal_and_static_asset_routes_by_default() { + let mut settings = create_datadome_auction_test_settings("[]"); + settings.publisher.origin_url = "https://".to_string(); + let (orchestrator, integration_registry) = build_route_stack(&settings); + let http_client = Arc::new( + RecordingHttpClient::new(StatusCode::OK).with_response_headers(vec![ + ("x-datadomeresponse", "200"), + ("x-datadome-headers", "X-DD-B"), + ("x-dd-b", "should-not-apply"), + ]), + ); + + let discovery_req = Request::get("https://test.com/.well-known/trusted-server.json"); + let discovery_services = test_runtime_services_with_secret_and_http_client( + &discovery_req, + Arc::new(FixedBackend), + datadome_secret_store(), + Arc::clone(&http_client) as Arc, + ); + let discovery_response = route_buffered_response( + &settings, + &orchestrator, + &integration_registry, + &discovery_services, + discovery_req, + "should route internal discovery request without DataDome", + ); + assert_eq!( + discovery_response.get_status(), + StatusCode::OK, + "discovery endpoint should stay internal" + ); + + let image_req = Request::get("https://test.com/logo.png"); + let image_services = test_runtime_services_with_secret_and_http_client( + &image_req, + Arc::new(FixedBackend), + datadome_secret_store(), + Arc::clone(&http_client) as Arc, + ); + let image_response = route_buffered_response( + &settings, + &orchestrator, + &integration_registry, + &image_services, + image_req, + "should route static asset request without DataDome", + ); + assert_eq!( + image_response.get_status(), + StatusCode::BAD_GATEWAY, + "static asset should skip DataDome then fail at the intentionally invalid publisher origin" + ); + + let calls = http_client + .calls + .lock() + .expect("should lock recorded calls"); + assert!( + calls.is_empty(), + "should not call DataDome for internal routes or default-excluded static assets" + ); +} + #[test] fn routes_use_request_local_consent() { let settings = create_test_settings(); diff --git a/crates/trusted-server-core/Cargo.toml b/crates/trusted-server-core/Cargo.toml index c80b0e6a..95ef3a03 100644 --- a/crates/trusted-server-core/Cargo.toml +++ b/crates/trusted-server-core/Cargo.toml @@ -45,6 +45,7 @@ toml = { workspace = true } trusted-server-js = { path = "../js" } trusted-server-openrtb = { path = "../openrtb" } url = { workspace = true } +urlencoding = { workspace = true } uuid = { workspace = true } validator = { workspace = true } ed25519-dalek = { workspace = true } @@ -70,7 +71,6 @@ default = [] criterion = { workspace = true } edgezero-core = { workspace = true, features = ["test-utils"] } temp-env = { workspace = true } -urlencoding = { workspace = true } [[bench]] name = "consent_decode" diff --git a/crates/trusted-server-core/src/http_util.rs b/crates/trusted-server-core/src/http_util.rs index 50314f4c..85665b12 100644 --- a/crates/trusted-server-core/src/http_util.rs +++ b/crates/trusted-server-core/src/http_util.rs @@ -466,11 +466,7 @@ mod tests { } fn default_client_info() -> ClientInfo { - ClientInfo { - client_ip: None, - tls_protocol: None, - tls_cipher: None, - } + ClientInfo::default() } #[test] @@ -819,9 +815,8 @@ mod tests { fn request_info_https_from_client_info_tls_protocol() { let req = build_request(Method::GET, "https://test.example.com/page"); let client_info = ClientInfo { - client_ip: None, tls_protocol: Some("TLSv1.3".to_string()), - tls_cipher: None, + ..ClientInfo::default() }; let info = RequestInfo::from_request(&req, &client_info); @@ -836,9 +831,8 @@ mod tests { fn request_info_https_from_client_info_tls_cipher() { let req = build_request(Method::GET, "https://test.example.com/page"); let client_info = ClientInfo { - client_ip: None, - tls_protocol: None, tls_cipher: Some("TLS_AES_128_GCM_SHA256".to_string()), + ..ClientInfo::default() }; let info = RequestInfo::from_request(&req, &client_info); diff --git a/crates/trusted-server-core/src/integrations/datadome.rs b/crates/trusted-server-core/src/integrations/datadome.rs index 79c34c83..b63d13a9 100644 --- a/crates/trusted-server-core/src/integrations/datadome.rs +++ b/crates/trusted-server-core/src/integrations/datadome.rs @@ -63,17 +63,22 @@ use fastly::http::{header, Method, StatusCode}; use fastly::{Request, Response}; use regex::Regex; use serde::Deserialize; +use serde_json::Value as JsonValue; +use url::Url; use validator::Validate; use crate::backend::BackendConfig; use crate::error::TrustedServerError; use crate::integrations::{ AttributeRewriteAction, IntegrationAttributeContext, IntegrationAttributeRewriter, - IntegrationEndpoint, IntegrationProxy, IntegrationRegistration, + IntegrationEndpoint, IntegrationHeadInjector, IntegrationHtmlContext, IntegrationProxy, + IntegrationRegistration, IntegrationRequestFilter, RequestFilterDecision, RequestFilterInput, }; use crate::platform::RuntimeServices; use crate::settings::{IntegrationConfig, Settings}; +mod protection; + const DATADOME_INTEGRATION_ID: &str = "datadome"; /// Regex pattern for matching and rewriting `DataDome` URLs in script content. @@ -100,6 +105,7 @@ static DATADOME_URL_PATTERN: LazyLock = LazyLock::new(|| { /// Configuration for `DataDome` integration. #[derive(Debug, Clone, Deserialize, Validate)] +#[serde(deny_unknown_fields)] pub struct DataDomeConfig { /// Enable/disable the integration #[serde(default = "default_enabled")] @@ -125,6 +131,56 @@ pub struct DataDomeConfig { /// Whether to rewrite `DataDome` script URLs in HTML to first-party paths #[serde(default = "default_rewrite_sdk")] pub rewrite_sdk: bool, + + /// Whether to call `DataDome` Protection API before route matching. + #[serde(default)] + pub enable_protection: bool, + + /// Runtime secret store containing the `DataDome` server-side key. + #[serde(default = "default_server_side_key_secret_store")] + pub server_side_key_secret_store: String, + + /// Secret name containing the `DataDome` server-side key. + #[serde(default = "default_server_side_key_secret_name")] + pub server_side_key_secret_name: String, + + /// Base URL for the `DataDome` Protection API. + #[serde(default = "default_protection_api_origin")] + #[validate(url)] + pub protection_api_origin: String, + + /// First-byte timeout for Protection API calls, in milliseconds. + #[serde(default = "default_timeout_ms")] + #[validate(range(min = 1, max = 10000))] + pub timeout_ms: u32, + + /// Regex for URLs to exclude from Protection API validation. + #[serde(default = "default_url_pattern_exclusion")] + pub url_pattern_exclusion: String, + + /// Regex for URLs to include in Protection API validation. + #[serde(default)] + pub url_pattern_inclusion: String, + + /// Reserved flag for future GraphQL payload extraction. + #[serde(default)] + pub enable_graphql_support: bool, + + /// `DataDome` client-side key used for auto-injecting the browser tag. + #[serde(default)] + pub client_side_key: String, + + /// Whether to auto-inject the `DataDome` browser tag when a client-side key exists. + #[serde(default = "default_inject_client_side_tag")] + pub inject_client_side_tag: bool, + + /// URL used for the injected `DataDome` browser tag. + #[serde(default = "default_client_side_tag_url")] + pub client_side_tag_url: String, + + /// Options assigned to `window.ddoptions` before loading the browser tag. + #[serde(default = "default_client_side_configuration")] + pub client_side_configuration: JsonValue, } fn default_enabled() -> bool { @@ -147,6 +203,38 @@ fn default_rewrite_sdk() -> bool { true } +fn default_protection_api_origin() -> String { + "https://api-fastly.datadome.co".to_string() +} + +fn default_server_side_key_secret_store() -> String { + "datadome".to_string() +} + +fn default_server_side_key_secret_name() -> String { + "server_side_key".to_string() +} + +fn default_timeout_ms() -> u32 { + 1500 +} + +fn default_url_pattern_exclusion() -> String { + r"\.(avi|flv|mka|mkv|mov|mp4|mpeg|mpg|mp3|flac|ogg|ogm|opus|wav|webm|webp|bmp|gif|ico|jpeg|jpg|png|svg|svgz|swf|eot|otf|ttf|woff|woff2|css|less|js|map)$".to_string() +} + +fn default_inject_client_side_tag() -> bool { + true +} + +fn default_client_side_tag_url() -> String { + "/integrations/datadome/tags.js".to_string() +} + +fn default_client_side_configuration() -> JsonValue { + serde_json::json!({ "ajaxListenerPath": true }) +} + impl Default for DataDomeConfig { fn default() -> Self { Self { @@ -155,6 +243,18 @@ impl Default for DataDomeConfig { api_origin: default_api_origin(), cache_ttl_seconds: default_cache_ttl(), rewrite_sdk: default_rewrite_sdk(), + enable_protection: false, + server_side_key_secret_store: default_server_side_key_secret_store(), + server_side_key_secret_name: default_server_side_key_secret_name(), + protection_api_origin: default_protection_api_origin(), + timeout_ms: default_timeout_ms(), + url_pattern_exclusion: default_url_pattern_exclusion(), + url_pattern_inclusion: String::new(), + enable_graphql_support: false, + client_side_key: String::new(), + inject_client_side_tag: default_inject_client_side_tag(), + client_side_tag_url: default_client_side_tag_url(), + client_side_configuration: default_client_side_configuration(), } } } @@ -168,11 +268,92 @@ impl IntegrationConfig for DataDomeConfig { /// `DataDome` integration implementation. pub struct DataDomeIntegration { config: DataDomeConfig, + protection_exclusion: Option, + protection_inclusion: Option, } impl DataDomeIntegration { + #[cfg(test)] fn new(config: DataDomeConfig) -> Arc { - Arc::new(Self { config }) + Self::try_new(config).expect("should create DataDome integration") + } + + fn try_new(mut config: DataDomeConfig) -> Result, Report> { + config.server_side_key_secret_store = + config.server_side_key_secret_store.trim().to_string(); + config.server_side_key_secret_name = config.server_side_key_secret_name.trim().to_string(); + config.protection_api_origin = config.protection_api_origin.trim().to_string(); + + if config.enable_protection { + if config.server_side_key_secret_store.is_empty() + || config.server_side_key_secret_name.is_empty() + { + return Err(Report::new(Self::error( + "server_side_key_secret_store and server_side_key_secret_name are required when enable_protection is true", + ))); + } + Self::validate_protection_api_origin(&config.protection_api_origin)?; + } + + if config.enable_graphql_support { + log::warn!("[datadome] enable_graphql_support is reserved and ignored in v1"); + } + + let protection_exclusion = + Self::compile_optional_regex(&config.url_pattern_exclusion, "url_pattern_exclusion")?; + let protection_inclusion = + Self::compile_optional_regex(&config.url_pattern_inclusion, "url_pattern_inclusion")?; + + Ok(Arc::new(Self { + config, + protection_exclusion, + protection_inclusion, + })) + } + + fn validate_protection_api_origin(origin: &str) -> Result<(), Report> { + let parsed = Url::parse(origin).map_err(|err| { + Report::new(Self::error(format!("Invalid protection_api_origin: {err}"))) + })?; + + if !parsed.scheme().eq_ignore_ascii_case("https") { + return Err(Report::new(Self::error( + "protection_api_origin must use https when enable_protection is true", + ))); + } + if parsed.host_str().is_none() { + return Err(Report::new(Self::error( + "protection_api_origin must include a host", + ))); + } + if !parsed.username().is_empty() || parsed.password().is_some() { + return Err(Report::new(Self::error( + "protection_api_origin must not include credentials", + ))); + } + if !matches!(parsed.path(), "" | "/") + || parsed.query().is_some() + || parsed.fragment().is_some() + { + return Err(Report::new(Self::error( + "protection_api_origin must be an origin URL without path, query, or fragment", + ))); + } + + Ok(()) + } + + fn compile_optional_regex( + pattern: &str, + name: &str, + ) -> Result, Report> { + if pattern.trim().is_empty() { + return Ok(None); + } + + Regex::new(&format!("(?i:{pattern})")) + .map(Some) + .map_err(|err| Report::new(Self::error(format!("Invalid {name}: {err}")))) } fn error(message: impl Into) -> TrustedServerError { @@ -423,6 +604,55 @@ impl IntegrationProxy for DataDomeIntegration { } } +#[async_trait(?Send)] +impl IntegrationRequestFilter for DataDomeIntegration { + fn integration_id(&self) -> &'static str { + DATADOME_INTEGRATION_ID + } + + async fn filter_request( + &self, + input: RequestFilterInput<'_>, + ) -> Result> { + Ok(self.filter_protection_request(input).await) + } +} + +impl IntegrationHeadInjector for DataDomeIntegration { + fn integration_id(&self) -> &'static str { + DATADOME_INTEGRATION_ID + } + + fn head_inserts(&self, _ctx: &IntegrationHtmlContext<'_>) -> Vec { + if !self.config.inject_client_side_tag || self.config.client_side_key.trim().is_empty() { + return Vec::new(); + } + + let key = serde_json::to_string(&self.config.client_side_key) + .unwrap_or_else(|err| { + log::warn!("[datadome] Failed to serialize client-side key: {err}"); + "\"\"".to_string() + }) + .replace("window.ddjskey={key};window.ddoptions={options};" + )] + } +} + impl IntegrationAttributeRewriter for DataDomeIntegration { fn integration_id(&self) -> &'static str { DATADOME_INTEGRATION_ID @@ -477,7 +707,7 @@ fn build( config.rewrite_sdk ); - Ok(Some(DataDomeIntegration::new(config))) + Ok(Some(DataDomeIntegration::try_new(config)?)) } /// Register the `DataDome` integration with Trusted Server. @@ -493,12 +723,16 @@ pub fn register( return Ok(None); }; - Ok(Some( - IntegrationRegistration::builder(DATADOME_INTEGRATION_ID) - .with_proxy(integration.clone()) - .with_attribute_rewriter(integration) - .build(), - )) + let mut builder = IntegrationRegistration::builder(DATADOME_INTEGRATION_ID) + .with_proxy(integration.clone()) + .with_attribute_rewriter(integration.clone()) + .with_head_injector(integration.clone()); + + if integration.config.enable_protection { + builder = builder.with_request_filter(integration); + } + + Ok(Some(builder.build())) } #[cfg(test)] @@ -512,6 +746,7 @@ mod tests { api_origin: "https://api-js.datadome.co".to_string(), cache_ttl_seconds: 3600, rewrite_sdk: true, + ..DataDomeConfig::default() } } @@ -686,6 +921,148 @@ mod tests { ); } + fn html_context_for_tests( + document_state: &crate::integrations::IntegrationDocumentState, + ) -> IntegrationHtmlContext<'_> { + IntegrationHtmlContext { + request_host: "publisher.example.com", + request_scheme: "https", + origin_host: "origin.example.com", + document_state, + } + } + + #[test] + fn protection_enabled_requires_server_side_key_secret_store() { + let mut config = test_config(); + config.enable_protection = true; + config.server_side_key_secret_store = " ".to_string(); + + let err = match DataDomeIntegration::try_new(config) { + Ok(_) => panic!("should reject empty store"), + Err(err) => err, + }; + assert!( + format!("{err:?}").contains("server_side_key_secret_store"), + "should mention secret store config" + ); + } + + #[test] + fn protection_enabled_requires_server_side_key_secret_name() { + let mut config = test_config(); + config.enable_protection = true; + config.server_side_key_secret_name = " ".to_string(); + + let err = match DataDomeIntegration::try_new(config) { + Ok(_) => panic!("should reject empty name"), + Err(err) => err, + }; + assert!( + format!("{err:?}").contains("server_side_key_secret_name"), + "should mention secret name config" + ); + } + + #[test] + fn protection_enabled_requires_https_protection_api_origin() { + let mut config = test_config(); + config.enable_protection = true; + config.protection_api_origin = "http://api-fastly.datadome.co".to_string(); + + let err = match DataDomeIntegration::try_new(config) { + Ok(_) => panic!("should reject plaintext Protection API origin"), + Err(err) => err, + }; + + assert!( + format!("{err:?}").contains("must use https"), + "should require HTTPS for the server-side key transport" + ); + } + + #[test] + fn protection_enabled_requires_origin_only_protection_api_origin() { + for origin in [ + "https://api-fastly.datadome.co/custom", + "https://api-fastly.datadome.co?region=test", + "https://api-fastly.datadome.co#fragment", + "https://user:pass@api-fastly.datadome.co", + ] { + let mut config = test_config(); + config.enable_protection = true; + config.protection_api_origin = origin.to_string(); + + let err = match DataDomeIntegration::try_new(config) { + Ok(_) => panic!("should reject non-origin Protection API URL: {origin}"), + Err(err) => err, + }; + + assert!( + format!("{err:?}").contains("protection_api_origin"), + "should explain rejected Protection API origin {origin}: {err:?}" + ); + } + } + + #[test] + fn protection_enabled_accepts_https_protection_api_origin_with_trailing_slash() { + let mut config = test_config(); + config.enable_protection = true; + config.protection_api_origin = "https://api-fastly.datadome.co/".to_string(); + + DataDomeIntegration::try_new(config) + .expect("should accept HTTPS origin URL with optional trailing slash"); + } + + #[test] + fn head_injector_emits_client_side_tag_when_key_configured() { + let mut config = test_config(); + config.client_side_key = "test-client-key".to_string(); + config.client_side_configuration = serde_json::json!({ "ajaxListenerPath": true }); + let integration = DataDomeIntegration::new(config); + let document_state = crate::integrations::IntegrationDocumentState::default(); + let ctx = html_context_for_tests(&document_state); + + let inserts = integration.head_inserts(&ctx); + + assert_eq!(inserts.len(), 1, "should emit one combined DataDome insert"); + assert!( + inserts[0].contains("window.ddjskey=\"test-client-key\""), + "should serialize the configured client-side key" + ); + assert!( + inserts[0].contains("window.ddoptions={\"ajaxListenerPath\":true}"), + "should serialize DataDome client-side options" + ); + assert!( + inserts[0].contains(""), + "should load the configured DataDome tag URL" + ); + } + + #[test] + fn head_injector_omits_client_side_tag_when_disabled_or_blank() { + let mut blank_key = test_config(); + blank_key.client_side_key = " ".to_string(); + let integration = DataDomeIntegration::new(blank_key); + let document_state = crate::integrations::IntegrationDocumentState::default(); + let ctx = html_context_for_tests(&document_state); + assert!( + integration.head_inserts(&ctx).is_empty(), + "should not inject a tag without a client-side key" + ); + + let mut disabled = test_config(); + disabled.client_side_key = "test-client-key".to_string(); + disabled.inject_client_side_tag = false; + let integration = DataDomeIntegration::new(disabled); + assert!( + integration.head_inserts(&ctx).is_empty(), + "should not inject a tag when injection is disabled" + ); + } + #[test] fn extract_host() { assert_eq!( diff --git a/crates/trusted-server-core/src/integrations/datadome/protection.rs b/crates/trusted-server-core/src/integrations/datadome/protection.rs new file mode 100644 index 00000000..1ebd3ae8 --- /dev/null +++ b/crates/trusted-server-core/src/integrations/datadome/protection.rs @@ -0,0 +1,743 @@ +use std::collections::HashMap; +use std::sync::{Arc, LazyLock, Mutex}; +use std::time::Duration; + +use edgezero_core::body::Body as EdgeBody; +use edgezero_core::http::{request_builder, HeaderMap, HeaderName}; +use error_stack::{Report, ResultExt}; +use fastly::http::{header, Method, StatusCode}; +use fastly::Response; +use url::Url; + +use crate::error::TrustedServerError; +use crate::integrations::{ + HeaderMutation, RequestFilterDecision, RequestFilterEffects, RequestFilterInput, +}; +use crate::platform::{PlatformBackendSpec, PlatformHttpRequest, RuntimeServices, StoreName}; +use crate::redacted::Redacted; + +use super::DataDomeIntegration; + +const VALIDATE_REQUEST_PATH: &str = "/validate-request"; +const REQUEST_MODULE_NAME: &str = "Trusted-Server-Rust"; +const MODULE_VERSION: &str = env!("CARGO_PKG_VERSION"); +const HEADER_DATADOME_RESPONSE: &str = "x-datadomeresponse"; +const HEADER_DATADOME_REQUEST_HEADERS: &str = "x-datadome-request-headers"; +const HEADER_DATADOME_HEADERS: &str = "x-datadome-headers"; +const HEADER_DATADOME_CLIENT_ID: &str = "x-datadome-clientid"; +const HEADER_DATADOME_X_SET_COOKIE: &str = "x-datadome-x-set-cookie"; +const DATADOME_COOKIE_NAME: &str = "datadome"; + +#[derive(Debug, Clone, Eq, Hash, PartialEq)] +struct DataDomeServerSideKeyCacheKey { + secret_store: String, + secret_name: String, +} + +static DATADOME_SERVER_SIDE_KEY_CACHE: LazyLock< + Mutex>>>, +> = LazyLock::new(|| Mutex::new(HashMap::new())); + +impl DataDomeIntegration { + pub(super) async fn filter_protection_request( + &self, + input: RequestFilterInput<'_>, + ) -> RequestFilterDecision { + if !self.config.enable_protection || !self.is_request_protected(input.request) { + return RequestFilterDecision::Continue(RequestFilterEffects::default()); + } + + match self.filter_protection_request_inner(input).await { + Ok(decision) => decision, + Err(err) => { + log::warn!("[datadome] Protection API failed open: {err:?}"); + RequestFilterDecision::Continue(RequestFilterEffects::default()) + } + } + } + + async fn filter_protection_request_inner( + &self, + input: RequestFilterInput<'_>, + ) -> Result> { + let api_url = self.protection_validate_url(); + let backend_name = self.ensure_protection_backend(input.services, &api_url)?; + let server_side_key = self.load_server_side_key(input.services)?; + let payload = self.build_protection_payload(&input, server_side_key.as_ref()); + let encoded_body = form_encode(&payload.fields); + + let mut builder = request_builder() + .method(Method::POST.as_str()) + .uri(api_url.as_str()) + .header( + header::CONTENT_TYPE.as_str(), + "application/x-www-form-urlencoded", + ) + .header( + header::CONTENT_LENGTH.as_str(), + encoded_body.len().to_string(), + ); + + if payload.uses_header_client_id { + builder = builder.header(HEADER_DATADOME_X_SET_COOKIE, "true"); + } + + let request = builder + .body(EdgeBody::from(encoded_body)) + .change_context(Self::error( + "Failed to build DataDome Protection API request", + ))?; + + let platform_response = input + .services + .http_client() + .send(PlatformHttpRequest::new(request, backend_name)) + .await + .change_context(Self::error("Failed to call DataDome Protection API"))?; + + Ok(self.classify_protection_response(platform_response.response)) + } + + fn is_request_protected(&self, req: &fastly::Request) -> bool { + if req.get_method() == Method::OPTIONS { + return false; + } + + let path = req.get_path(); + if is_internal_path(path) { + return false; + } + + let target = format!("{}{}", request_host(req), path); + + if let Some(inclusion) = &self.protection_inclusion { + if !inclusion.is_match(&target) { + return false; + } + } + + if let Some(exclusion) = &self.protection_exclusion { + if exclusion.is_match(&target) { + return false; + } + } + + true + } + + fn protection_validate_url(&self) -> String { + format!( + "{}{}", + self.config.protection_api_origin.trim_end_matches('/'), + VALIDATE_REQUEST_PATH + ) + } + + fn ensure_protection_backend( + &self, + services: &RuntimeServices, + api_url: &str, + ) -> Result> { + let parsed = Url::parse(api_url) + .change_context(Self::error("Invalid DataDome Protection API URL"))?; + let host = parsed + .host_str() + .ok_or_else(|| Report::new(Self::error("Missing DataDome Protection API host")))?; + let spec = PlatformBackendSpec { + scheme: parsed.scheme().to_string(), + host: host.to_string(), + port: parsed.port(), + certificate_check: true, + first_byte_timeout: Duration::from_millis(u64::from(self.config.timeout_ms)), + }; + + services.backend().ensure(&spec).change_context(Self::error( + "Failed to register DataDome Protection API backend", + )) + } + + fn load_server_side_key( + &self, + services: &RuntimeServices, + ) -> Result>, Report> { + let cache_key = server_side_key_cache_key(self); + if let Some(key) = DATADOME_SERVER_SIDE_KEY_CACHE + .lock() + .expect("should lock DataDome server-side key cache") + .get(&cache_key) + .cloned() + { + return Ok(key); + } + + let store_name = StoreName::from(self.config.server_side_key_secret_store.as_str()); + let key = services + .secret_store() + .get_string(&store_name, &self.config.server_side_key_secret_name) + .change_context(Self::error( + "Failed to read DataDome server-side key from secret store", + ))?; + let key = key.trim().to_string(); + if key.is_empty() { + return Err(Report::new(Self::error( + "DataDome server-side key secret must not be empty", + ))); + } + + let key = Arc::new(Redacted::new(key)); + let mut cache = DATADOME_SERVER_SIDE_KEY_CACHE + .lock() + .expect("should lock DataDome server-side key cache"); + Ok(Arc::clone(cache.entry(cache_key).or_insert(key))) + } + + fn build_protection_payload( + &self, + input: &RequestFilterInput<'_>, + server_side_key: &Redacted, + ) -> ProtectionPayload { + let req = input.request; + let client_info = input.services.client_info(); + let mut fields = Vec::new(); + let header_client_id = header_value(req, HEADER_DATADOME_CLIENT_ID); + let cookie_header = header_value(req, header::COOKIE.as_str()); + let cookie_client_id = parse_cookie_value(&cookie_header, DATADOME_COOKIE_NAME); + let client_id = if header_client_id.is_empty() { + cookie_client_id.unwrap_or_default() + } else { + header_client_id.clone() + }; + + push_field(&mut fields, "Key", server_side_key.expose()); + push_field( + &mut fields, + "IP", + client_info + .client_ip + .map(|ip| ip.to_string()) + .unwrap_or_default(), + ); + push_header_field(&mut fields, req, "Accept", header::ACCEPT.as_str()); + push_header_field(&mut fields, req, "AcceptCharset", "accept-charset"); + push_header_field( + &mut fields, + req, + "AcceptEncoding", + header::ACCEPT_ENCODING.as_str(), + ); + push_header_field( + &mut fields, + req, + "AcceptLanguage", + header::ACCEPT_LANGUAGE.as_str(), + ); + push_field( + &mut fields, + "AuthorizationLen", + header_value(req, header::AUTHORIZATION.as_str()) + .len() + .to_string(), + ); + push_header_field( + &mut fields, + req, + "CacheControl", + header::CACHE_CONTROL.as_str(), + ); + push_field(&mut fields, "ClientID", client_id); + push_header_field(&mut fields, req, "Connection", header::CONNECTION.as_str()); + push_header_field( + &mut fields, + req, + "ContentType", + header::CONTENT_TYPE.as_str(), + ); + push_field(&mut fields, "CookiesLen", cookie_header.len().to_string()); + push_header_field(&mut fields, req, "From", "from"); + push_field(&mut fields, "HeadersList", headers_list(req)); + push_field(&mut fields, "Host", request_host(req)); + push_field(&mut fields, "Method", req.get_method().as_str()); + push_field(&mut fields, "ModuleVersion", MODULE_VERSION); + push_header_field(&mut fields, req, "Origin", header::ORIGIN.as_str()); + push_field(&mut fields, "Port", "0"); + push_header_field( + &mut fields, + req, + "PostParamLen", + header::CONTENT_LENGTH.as_str(), + ); + push_header_field(&mut fields, req, "Pragma", header::PRAGMA.as_str()); + push_field(&mut fields, "Protocol", req.get_url().scheme()); + push_header_field(&mut fields, req, "Referer", header::REFERER.as_str()); + push_field(&mut fields, "Request", request_path_and_query(req)); + push_field(&mut fields, "RequestModuleName", REQUEST_MODULE_NAME); + push_header_field( + &mut fields, + req, + "SecCHDeviceMemory", + "sec-ch-device-memory", + ); + push_header_field(&mut fields, req, "SecCHUA", "sec-ch-ua"); + push_header_field(&mut fields, req, "SecCHUAArch", "sec-ch-ua-arch"); + push_header_field( + &mut fields, + req, + "SecCHUAFullVersionList", + "sec-ch-ua-full-version-list", + ); + push_header_field(&mut fields, req, "SecCHUAMobile", "sec-ch-ua-mobile"); + push_header_field(&mut fields, req, "SecCHUAModel", "sec-ch-ua-model"); + push_header_field(&mut fields, req, "SecCHUAPlatform", "sec-ch-ua-platform"); + push_header_field(&mut fields, req, "SecFetchDest", "sec-fetch-dest"); + push_header_field(&mut fields, req, "SecFetchMode", "sec-fetch-mode"); + push_header_field(&mut fields, req, "SecFetchSite", "sec-fetch-site"); + push_header_field( + &mut fields, + req, + "SecFetchStorageAccess", + "sec-fetch-storage-access", + ); + push_header_field(&mut fields, req, "SecFetchUser", "sec-fetch-user"); + push_field(&mut fields, "ServerHostname", request_host(req)); + push_field( + &mut fields, + "ServerName", + client_info.server_hostname.as_deref().unwrap_or_default(), + ); + push_field( + &mut fields, + "ServerRegion", + client_info.server_region.as_deref().unwrap_or_default(), + ); + push_field( + &mut fields, + "TimeRequest", + chrono::Utc::now().timestamp_micros().to_string(), + ); + push_header_field(&mut fields, req, "TrueClientIP", "true-client-ip"); + push_header_field(&mut fields, req, "UserAgent", header::USER_AGENT.as_str()); + push_header_field(&mut fields, req, "Via", header::VIA.as_str()); + push_header_field(&mut fields, req, "XForwardedForIP", "x-forwarded-for"); + push_header_field(&mut fields, req, "X-Real-IP", "x-real-ip"); + push_header_field(&mut fields, req, "X-Requested-With", "x-requested-with"); + push_field( + &mut fields, + "TlsProtocol", + client_info.tls_protocol.as_deref().unwrap_or_default(), + ); + push_field( + &mut fields, + "TlsCipher", + client_info.tls_cipher.as_deref().unwrap_or_default(), + ); + push_field( + &mut fields, + "JA4", + client_info.tls_ja4.as_deref().unwrap_or_default(), + ); + push_field( + &mut fields, + "H2Fingerprint", + client_info.h2_fingerprint.as_deref().unwrap_or_default(), + ); + + ProtectionPayload { + fields, + uses_header_client_id: !header_client_id.is_empty(), + } + } + + fn classify_protection_response( + &self, + response: edgezero_core::http::Response, + ) -> RequestFilterDecision { + let (parts, body) = response.into_parts(); + let status = parts.status; + let Some(datadome_status) = datadome_response_status(&parts.headers) else { + log::warn!("[datadome] Protection API response missing X-DataDomeResponse"); + return RequestFilterDecision::Continue(RequestFilterEffects::default()); + }; + + if datadome_status != status.as_u16() { + log::warn!( + "[datadome] Protection API status/header mismatch: status={} header={}", + status.as_u16(), + datadome_status + ); + return RequestFilterDecision::Continue(RequestFilterEffects::default()); + } + + let effects = RequestFilterEffects { + request_headers: extract_header_mutations( + &parts.headers, + HEADER_DATADOME_REQUEST_HEADERS, + ), + response_headers: extract_header_mutations(&parts.headers, HEADER_DATADOME_HEADERS), + }; + + if status == edgezero_core::http::StatusCode::OK { + return RequestFilterDecision::Continue(effects); + } + + if matches!(status.as_u16(), 301 | 302 | 401 | 403 | 429) { + if body.is_stream() { + log::warn!("[datadome] Protection API challenge body was streaming; failing open"); + return RequestFilterDecision::Continue(RequestFilterEffects::default()); + } + let body_bytes = body.into_bytes(); + let fastly_status = + StatusCode::from_u16(status.as_u16()).unwrap_or(StatusCode::FORBIDDEN); + let mut challenge = Response::from_status(fastly_status); + if !body_bytes.is_empty() { + challenge.set_body(body_bytes.as_ref()); + } + return RequestFilterDecision::Respond { + response: Box::new(challenge), + effects, + }; + } + + log::warn!( + "[datadome] Protection API returned fail-open status {}", + status.as_u16() + ); + RequestFilterDecision::Continue(RequestFilterEffects::default()) + } +} + +struct ProtectionPayload { + fields: Vec<(String, String)>, + uses_header_client_id: bool, +} + +fn server_side_key_cache_key(integration: &DataDomeIntegration) -> DataDomeServerSideKeyCacheKey { + DataDomeServerSideKeyCacheKey { + secret_store: integration.config.server_side_key_secret_store.clone(), + secret_name: integration.config.server_side_key_secret_name.clone(), + } +} + +#[cfg(test)] +fn clear_datadome_server_side_key_cache_for_tests() { + DATADOME_SERVER_SIDE_KEY_CACHE + .lock() + .expect("should lock DataDome server-side key cache") + .clear(); +} + +fn is_internal_path(path: &str) -> bool { + path.starts_with("/static/tsjs=") + || path.starts_with("/integrations/") + || path.starts_with("/first-party/") + || path == "/.well-known/trusted-server.json" + || path == "/verify-signature" + || path.starts_with("/admin/") + || path.starts_with("/_ts/admin/") + || path == "/_ts/api/v1/identify" + || path == "/_ts/api/v1/batch-sync" +} + +fn request_host(req: &fastly::Request) -> String { + req.get_header_str(header::HOST.as_str()) + .or_else(|| req.get_url().host_str()) + .unwrap_or_default() + .to_string() +} + +fn request_path_and_query(req: &fastly::Request) -> String { + match req.get_query_str() { + Some(query) => format!("{}?{}", req.get_path(), query), + None => req.get_path().to_string(), + } +} + +fn header_value(req: &fastly::Request, name: &str) -> String { + req.get_header_str(name).unwrap_or_default().to_string() +} + +fn headers_list(req: &fastly::Request) -> String { + req.get_headers() + .map(|(name, _)| name.as_str()) + .collect::>() + .join(",") +} + +fn push_header_field( + fields: &mut Vec<(String, String)>, + req: &fastly::Request, + field_name: &'static str, + header_name: &str, +) { + push_field(fields, field_name, header_value(req, header_name)); +} + +fn push_field(fields: &mut Vec<(String, String)>, key: &'static str, value: impl AsRef) { + let value = value.as_ref(); + if value.is_empty() { + return; + } + + fields.push((key.to_string(), truncate_field(key, value))); +} + +fn form_encode(fields: &[(String, String)]) -> String { + fields + .iter() + .map(|(key, value)| { + format!( + "{}={}", + urlencoding::encode(key), + urlencoding::encode(value) + ) + }) + .collect::>() + .join("&") +} + +fn datadome_response_status(headers: &HeaderMap) -> Option { + headers + .get(HEADER_DATADOME_RESPONSE) + .and_then(|value| value.to_str().ok()) + .and_then(|value| value.parse::().ok()) +} + +fn extract_header_mutations(headers: &HeaderMap, pointer_header: &str) -> Vec { + let mut mutations = Vec::new(); + + for pointer_value in headers.get_all(pointer_header) { + let Ok(pointer_value) = pointer_value.to_str() else { + continue; + }; + + for header_name in pointer_value.split_whitespace() { + if header_name.eq_ignore_ascii_case(HEADER_DATADOME_HEADERS) + || header_name.eq_ignore_ascii_case(HEADER_DATADOME_REQUEST_HEADERS) + || header_name.eq_ignore_ascii_case(HEADER_DATADOME_RESPONSE) + { + continue; + } + + let Ok(parsed_name) = HeaderName::from_bytes(header_name.as_bytes()) else { + log::warn!("[datadome] Ignoring invalid pointer header name: {header_name}"); + continue; + }; + + for value in headers.get_all(&parsed_name) { + let Ok(value) = value.to_str() else { + continue; + }; + if parsed_name + .as_str() + .eq_ignore_ascii_case(header::SET_COOKIE.as_str()) + { + mutations.push(HeaderMutation::append(parsed_name.as_str(), value)); + } else { + mutations.push(HeaderMutation::set(parsed_name.as_str(), value)); + } + } + } + } + + mutations +} + +fn parse_cookie_value(cookie_header: &str, name: &str) -> Option { + for pair in cookie_header.split(';') { + let trimmed = pair.trim(); + let Some((cookie_name, cookie_value)) = trimmed.split_once('=') else { + continue; + }; + if cookie_name == name { + let unquoted = cookie_value.trim_matches('"'); + return Some( + urlencoding::decode(unquoted) + .map(std::borrow::Cow::into_owned) + .unwrap_or_else(|_| unquoted.to_string()), + ); + } + } + + None +} + +fn truncate_field(key: &str, value: &str) -> String { + let limit = field_limit(key); + if limit == 0 { + return value.to_string(); + } + + truncate_utf8(value, limit) +} + +fn field_limit(key: &str) -> i32 { + match key.to_ascii_lowercase().as_str() { + "jsonrpcversion" + | "secchdevicememory" + | "secchuamobile" + | "secfetchstorageaccess" + | "secfetchuser" => 8, + "mcpparamsclientinfoversion" | "mcpprotocolversion" | "secchuaarch" => 16, + "secchuaplatform" | "secfetchdest" | "secfetchmode" => 32, + "contenttype" + | "jsonrpcrequestid" + | "mcpmethod" + | "mcpparamsclientinfoname" + | "mcpparamstoolname" + | "mcpsessionid" + | "secfetchsite" + | "tlscipher" => 64, + "acceptcharset" + | "acceptencoding" + | "cachecontrol" + | "connection" + | "from" + | "graphqloperationname" + | "pragma" + | "secchua" + | "secchuamodel" + | "trueclientip" + | "userid" + | "x-real-ip" + | "x-requested-with" + | "productid" => 128, + "acceptlanguage" | "secchuafullversionlist" | "via" => 256, + "accept" | "clientid" | "headerslist" | "host" | "origin" | "serverhostname" + | "servername" | "signature" | "signatureagent" => 512, + "xforwardedforip" => -512, + "useragent" => 768, + "cookieslist" | "referer" => 1024, + "request" | "signatureinput" => 2048, + _ => 0, + } +} + +fn truncate_utf8(value: &str, limit: i32) -> String { + let max = limit.unsigned_abs() as usize; + if value.len() <= max { + return value.to_string(); + } + + if limit > 0 { + let mut end = 0; + for (idx, ch) in value.char_indices() { + let next = idx + ch.len_utf8(); + if next > max { + break; + } + end = next; + } + value[..end].to_string() + } else { + let mut start = value.len(); + let mut used = 0; + for (idx, ch) in value.char_indices().rev() { + let next = used + ch.len_utf8(); + if next > max { + break; + } + used = next; + start = idx; + } + value[start..].to_string() + } +} + +#[cfg(test)] +mod tests { + use std::collections::HashMap; + + use crate::integrations::datadome::DataDomeConfig; + use crate::platform::test_support::{ + build_services_with_config_and_secret, HashMapSecretStore, NoopConfigStore, NoopSecretStore, + }; + + use super::*; + + fn protection_integration() -> Arc { + let config = DataDomeConfig { + enabled: true, + enable_protection: true, + ..DataDomeConfig::default() + }; + DataDomeIntegration::try_new(config).expect("should create integration") + } + + #[test] + fn load_server_side_key_reads_secret_store() { + clear_datadome_server_side_key_cache_for_tests(); + let mut secrets = HashMap::new(); + secrets.insert("server_side_key".to_string(), b"secret-from-store".to_vec()); + let services = build_services_with_config_and_secret( + NoopConfigStore, + HashMapSecretStore::new(secrets), + ); + let integration = protection_integration(); + + let key = integration + .load_server_side_key(&services) + .expect("should load server-side key"); + + assert_eq!(key.expose(), "secret-from-store"); + } + + #[test] + fn load_server_side_key_errors_when_secret_missing() { + clear_datadome_server_side_key_cache_for_tests(); + let services = build_services_with_config_and_secret(NoopConfigStore, NoopSecretStore); + let config = DataDomeConfig { + enabled: true, + enable_protection: true, + server_side_key_secret_name: "missing_server_side_key".to_string(), + ..DataDomeConfig::default() + }; + let integration = DataDomeIntegration::try_new(config).expect("should create integration"); + + let result = integration.load_server_side_key(&services); + + assert!(result.is_err(), "should error when secret is missing"); + } + + #[test] + fn extract_header_mutations_appends_set_cookie_and_sets_other_headers() { + let mut headers = HeaderMap::new(); + headers.insert( + HEADER_DATADOME_HEADERS, + edgezero_core::http::HeaderValue::from_static("Set-Cookie X-DD-B"), + ); + headers.append( + header::SET_COOKIE.as_str(), + edgezero_core::http::HeaderValue::from_static("datadome=abc; Path=/"), + ); + headers.insert("x-dd-b", edgezero_core::http::HeaderValue::from_static("1")); + + let mutations = extract_header_mutations(&headers, HEADER_DATADOME_HEADERS); + + assert_eq!( + mutations, + vec![ + HeaderMutation::append("set-cookie", "datadome=abc; Path=/"), + HeaderMutation::set("x-dd-b", "1"), + ], + "should append Set-Cookie while replacing non-cookie headers" + ); + } + + #[test] + fn parse_cookie_value_decodes_datadome_cookie() { + let value = parse_cookie_value("a=1; datadome=abc%20123; b=2", "datadome") + .expect("should parse datadome cookie"); + assert_eq!(value, "abc 123"); + } + + #[test] + fn truncate_utf8_preserves_char_boundaries() { + assert_eq!(truncate_utf8("ééé", 4), "éé"); + assert_eq!(truncate_utf8("ééé", -4), "éé"); + } + + #[test] + fn form_encode_url_encodes_values() { + let encoded = form_encode(&[("Key".to_string(), "a b+c".to_string())]); + assert_eq!(encoded, "Key=a%20b%2Bc"); + } +} diff --git a/crates/trusted-server-core/src/integrations/mod.rs b/crates/trusted-server-core/src/integrations/mod.rs index e9438b32..23bf7bf3 100644 --- a/crates/trusted-server-core/src/integrations/mod.rs +++ b/crates/trusted-server-core/src/integrations/mod.rs @@ -20,12 +20,14 @@ pub mod sourcepoint; pub mod testlight; pub use registry::{ - AttributeRewriteAction, AttributeRewriteOutcome, IntegrationAttributeContext, - IntegrationAttributeRewriter, IntegrationDocumentState, IntegrationEndpoint, - IntegrationHeadInjector, IntegrationHtmlContext, IntegrationHtmlPostProcessor, - IntegrationMetadata, IntegrationProxy, IntegrationRegistration, IntegrationRegistrationBuilder, - IntegrationRegistry, IntegrationScriptContext, IntegrationScriptRewriter, ProxyDispatchInput, - ScriptRewriteAction, + AttributeRewriteAction, AttributeRewriteOutcome, HeaderMutation, HeaderMutationMode, + IntegrationAttributeContext, IntegrationAttributeRewriter, IntegrationDocumentState, + IntegrationEndpoint, IntegrationHeadInjector, IntegrationHtmlContext, + IntegrationHtmlPostProcessor, IntegrationMetadata, IntegrationProxy, IntegrationRegistration, + IntegrationRegistrationBuilder, IntegrationRegistry, IntegrationRequestFilter, + IntegrationScriptContext, IntegrationScriptRewriter, ProxyDispatchInput, RequestFilterDecision, + RequestFilterEffects, RequestFilterInput, RequestFilterRegistryInput, + RequestFilterRegistryOutcome, ScriptRewriteAction, }; type IntegrationBuilder = diff --git a/crates/trusted-server-core/src/integrations/registry.rs b/crates/trusted-server-core/src/integrations/registry.rs index d32eea74..d48e0abd 100644 --- a/crates/trusted-server-core/src/integrations/registry.rs +++ b/crates/trusted-server-core/src/integrations/registry.rs @@ -324,6 +324,165 @@ pub trait IntegrationProxy: Send + Sync { } } +/// Input passed to integration request filters. +pub struct RequestFilterInput<'a> { + pub settings: &'a Settings, + pub services: &'a RuntimeServices, + pub request: &'a Request, +} + +/// How a header mutation should be applied. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum HeaderMutationMode { + Set, + Append, +} + +/// Header mutation requested by an integration filter. +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct HeaderMutation { + pub name: String, + pub value: String, + pub mode: HeaderMutationMode, +} + +impl HeaderMutation { + #[must_use] + pub fn set(name: impl Into, value: impl Into) -> Self { + Self { + name: name.into(), + value: value.into(), + mode: HeaderMutationMode::Set, + } + } + + #[must_use] + pub fn append(name: impl Into, value: impl Into) -> Self { + Self { + name: name.into(), + value: value.into(), + mode: HeaderMutationMode::Append, + } + } +} + +/// Request and response effects returned by request filters. +#[derive(Debug, Clone, Default, PartialEq, Eq)] +pub struct RequestFilterEffects { + pub request_headers: Vec, + pub response_headers: Vec, +} + +impl RequestFilterEffects { + fn extend(&mut self, next: Self) { + self.request_headers.extend(next.request_headers); + self.response_headers.extend(next.response_headers); + } + + fn apply_to_request(&self, req: &mut Request) { + for mutation in &self.request_headers { + apply_header_mutation_to_request(req, mutation); + } + } + + pub fn apply_to_response(&self, response: &mut Response) { + for mutation in &self.response_headers { + apply_header_mutation_to_response(response, mutation); + } + } +} + +/// Decision returned by an integration request filter. +pub enum RequestFilterDecision { + Continue(RequestFilterEffects), + Respond { + response: Box, + effects: RequestFilterEffects, + }, +} + +/// Input passed to [`IntegrationRegistry::filter_request`]. +pub struct RequestFilterRegistryInput<'a> { + pub settings: &'a Settings, + pub services: &'a RuntimeServices, + pub req: &'a mut Request, +} + +/// Outcome returned by [`IntegrationRegistry::filter_request`]. +pub enum RequestFilterRegistryOutcome { + Continue(RequestFilterEffects), + Respond { + response: Box, + effects: RequestFilterEffects, + }, +} + +/// Trait for integration-provided pre-routing request filters. +#[async_trait(?Send)] +pub trait IntegrationRequestFilter: Send + Sync { + /// Identifier for logging/diagnostics. + fn integration_id(&self) -> &'static str; + + /// Filter an incoming request before normal route matching. + async fn filter_request( + &self, + input: RequestFilterInput<'_>, + ) -> Result>; +} + +fn is_forbidden_filter_header(name: &str) -> bool { + let lower = name.to_ascii_lowercase(); + matches!( + lower.as_str(), + "connection" + | "keep-alive" + | "proxy-authenticate" + | "proxy-authorization" + | "te" + | "trailer" + | "transfer-encoding" + | "upgrade" + | "content-length" + | "host" + ) || lower.starts_with("x-ts-") +} + +fn apply_header_mutation_to_request(req: &mut Request, mutation: &HeaderMutation) { + if is_forbidden_filter_header(&mutation.name) { + log::warn!( + "Skipping forbidden request-filter header: {}", + mutation.name + ); + return; + } + + match mutation.mode { + HeaderMutationMode::Set => req.set_header(mutation.name.as_str(), mutation.value.as_str()), + HeaderMutationMode::Append => { + req.append_header(mutation.name.as_str(), mutation.value.as_str()) + } + } +} + +fn apply_header_mutation_to_response(response: &mut Response, mutation: &HeaderMutation) { + if is_forbidden_filter_header(&mutation.name) { + log::warn!( + "Skipping forbidden response-filter header: {}", + mutation.name + ); + return; + } + + match mutation.mode { + HeaderMutationMode::Set => { + response.set_header(mutation.name.as_str(), mutation.value.as_str()) + } + HeaderMutationMode::Append => { + response.append_header(mutation.name.as_str(), mutation.value.as_str()) + } + } +} + /// Trait for integration-provided HTML attribute rewrite hooks. pub trait IntegrationAttributeRewriter: Send + Sync { /// Identifier for logging/diagnostics. @@ -398,6 +557,7 @@ pub struct IntegrationRegistration { pub script_rewriters: Vec>, pub html_post_processors: Vec>, pub head_injectors: Vec>, + pub request_filters: Vec>, } impl IntegrationRegistration { @@ -422,6 +582,7 @@ impl IntegrationRegistrationBuilder { script_rewriters: Vec::new(), html_post_processors: Vec::new(), head_injectors: Vec::new(), + request_filters: Vec::new(), }, } } @@ -462,6 +623,12 @@ impl IntegrationRegistrationBuilder { self } + #[must_use] + pub fn with_request_filter(mut self, filter: Arc) -> Self { + self.registration.request_filters.push(filter); + self + } + /// Mark this integration's JS module for deferred loading via /// ` + +``` -| Option | Type | Default | Description | -| ------------------- | ------- | ---------------------------- | --------------------------------------------------------- | -| `enabled` | boolean | `false` | Enable the DataDome integration | -| `sdk_origin` | string | `https://js.datadome.co` | DataDome SDK origin URL (for tags.js) | -| `api_origin` | string | `https://api-js.datadome.co` | DataDome signal collection API origin URL (for /js/\*) | -| `cache_ttl_seconds` | integer | `3600` | Cache TTL for tags.js (1 hour default) | -| `rewrite_sdk` | boolean | `true` | Rewrite DataDome script URLs in HTML to first-party paths | +If your site already manages the DataDome tag, disable auto-injection: -## Usage +```toml +[integrations.datadome] +inject_client_side_tag = false +``` -### Publisher Page Setup +### Manual setup -Update your page to load DataDome through Trusted Server: +You can also load DataDome manually through the first-party path: ```html ``` -If `rewrite_sdk` is enabled, Trusted Server will automatically rewrite any existing DataDome script tags in your HTML: +If `rewrite_sdk` is enabled, Trusted Server rewrites existing DataDome script tags in HTML: ```html @@ -65,34 +130,94 @@ If `rewrite_sdk` is enabled, Trusted Server will automatically rewrite any exist ``` +## Server-side Protection API + +When `enable_protection = true`, Trusted Server calls DataDome before normal route matching. DataDome can return: + +- **Allow**: continue routing and optionally enrich the upstream request. +- **Challenge**: return the DataDome response directly without contacting the publisher origin. +- **Fail-open condition**: continue routing without DataDome effects when the Protection API times out, returns malformed instructions, or returns an unexpected status. + +The configured `server_side_key_secret_store` and `server_side_key_secret_name` must resolve to a non-empty secret when server-side protection is enabled. If the secret cannot be read, DataDome protection fails open for that request. + +### Protected traffic + +A request is protected when all of the following are true: + +1. The DataDome integration is enabled. +2. `enable_protection = true`. +3. The method is not `OPTIONS`. +4. The path is not one of Trusted Server's internal routes. +5. The `host + pathname` matches `url_pattern_inclusion`, when configured. +6. The `host + pathname` does not match `url_pattern_exclusion`, when configured. + +Static assets are excluded by default using a case-insensitive file-extension regex. Trusted Server internal routes such as `/static/tsjs=`, `/integrations/`, `/first-party/`, admin routes, discovery routes, and signature-verification routes are also excluded by default. + +Auction traffic at `/auction` is protected by default. + +### Header handling + +DataDome can return pointer headers that identify which headers Trusted Server should copy: + +| Pointer header | Applied to | +| ---------------------------- | ------------------------------------------ | +| `X-DataDome-request-headers` | Request forwarded to Trusted Server/origin | +| `X-DataDome-headers` | Final browser response | + +Trusted Server copies only the named headers. Pointer headers themselves are not forwarded. `Set-Cookie` is appended, while other copied headers are set/replaced. Unsafe hop-by-hop, framing, host, and internal `x-ts-*` headers are rejected. + +DataDome downstream response headers are applied after EC response finalization and generic Trusted Server response headers so DataDome challenge/cache/cookie headers win. + +### GraphQL limitation + +`enable_graphql_support` is reserved for future request-body inspection. Trusted Server v1 does not parse GraphQL bodies for DataDome payload enrichment. + ## Endpoints -The integration exposes the following routes: +The first-party layer exposes these routes: | Method | Path | Description | | ---------- | -------------------------------- | --------------------- | | `GET` | `/integrations/datadome/tags.js` | DataDome SDK script | | `GET/POST` | `/integrations/datadome/js/*` | Signal collection API | -## How It Works +## How it works ```mermaid sequenceDiagram participant Browser participant TS as Trusted Server + participant DD as DataDome Protection API participant SDK as js.datadome.co participant API as api-js.datadome.co + participant Origin as Publisher origin + + Browser->>TS: GET /page + TS->>DD: POST /validate-request + alt DataDome allows + DD-->>TS: 200 + header instructions + TS->>Origin: Forward enriched request + Origin-->>TS: Page response + TS-->>Browser: Final response + DataDome headers + else DataDome challenges + DD-->>TS: Challenge response + TS-->>Browser: Challenge response + DataDome headers + else DataDome unavailable + TS->>Origin: Fail open and continue + Origin-->>TS: Page response + TS-->>Browser: Final response + end Browser->>TS: GET /integrations/datadome/tags.js TS->>SDK: GET /tags.js SDK-->>TS: JavaScript SDK Note over TS: Rewrite internal URLs - TS-->>Browser: Modified SDK (first-party URLs) + TS-->>Browser: Modified SDK Browser->>TS: POST /integrations/datadome/js/ TS->>API: POST /js/ @@ -100,14 +225,7 @@ sequenceDiagram TS-->>Browser: Response ``` -### Request Flow - -1. **SDK Loading**: Browser requests `/integrations/datadome/tags.js` -2. **Proxy & Rewrite**: Trusted Server fetches from `js.datadome.co`, rewrites internal URLs to first-party paths -3. **Signal Collection**: SDK sends signals to `/integrations/datadome/js/` -4. **Transparent Proxy**: Trusted Server forwards to `api-js.datadome.co`, returns response - -## Environment Variables +## Environment variables Override configuration via environment variables: @@ -117,11 +235,15 @@ TRUSTED_SERVER__INTEGRATIONS__DATADOME__SDK_ORIGIN=https://js.datadome.co TRUSTED_SERVER__INTEGRATIONS__DATADOME__API_ORIGIN=https://api-js.datadome.co TRUSTED_SERVER__INTEGRATIONS__DATADOME__CACHE_TTL_SECONDS=3600 TRUSTED_SERVER__INTEGRATIONS__DATADOME__REWRITE_SDK=true +TRUSTED_SERVER__INTEGRATIONS__DATADOME__ENABLE_PROTECTION=true +TRUSTED_SERVER__INTEGRATIONS__DATADOME__SERVER_SIDE_KEY_SECRET_STORE=datadome +TRUSTED_SERVER__INTEGRATIONS__DATADOME__SERVER_SIDE_KEY_SECRET_NAME=server_side_key +TRUSTED_SERVER__INTEGRATIONS__DATADOME__CLIENT_SIDE_KEY=your-client-side-key ``` -## Client-Side Script Guard +## Client-side script guard -For single-page applications (SPAs) and frameworks like Next.js that dynamically insert script tags, the integration includes a client-side guard. When the `datadome` module is included in your tsjs bundle, it automatically intercepts dynamically inserted DataDome scripts and rewrites them to use first-party paths. +For single-page applications and frameworks like Next.js that dynamically insert script tags, the integration includes a client-side guard. When the `datadome` module is included in your TSJS bundle, it intercepts dynamically inserted DataDome scripts and rewrites them to use first-party paths. The guard handles: @@ -129,18 +251,11 @@ The guard handles: - `` elements - `` elements -This ensures DataDome scripts are always loaded through first-party context, even when inserted dynamically by client-side JavaScript. - -## Notes - -- **No Captcha Support**: This integration currently focuses on signal collection. CAPTCHA functionality may require additional configuration. -- **Cache Headers**: The SDK response includes caching headers based on `cache_ttl_seconds`. -- **Origin Headers**: Trusted Server forwards appropriate headers to DataDome for proper request context. -- **URL Rewriting**: Both `js.datadome.co` and `api-js.datadome.co` URLs in the SDK are rewritten to first-party paths. +This keeps DataDome scripts routed through first-party context, even when inserted dynamically by client-side JavaScript. ## Troubleshooting -### Script Not Loading +### Script not loading Check that the integration is enabled: @@ -149,19 +264,35 @@ Check that the integration is enabled: enabled = true ``` -### Signals Not Sending +If you rely on auto-injection, verify `client_side_key` is non-empty and `inject_client_side_tag = true`. + +### Signals not sending Verify that signal collection routes are working: ```bash -curl -X POST https://your-domain.com/integrations/datadome/js/check +curl -X POST https://www.example.com/integrations/datadome/js/check +``` + +### Server-side protection not running + +Check that both fields are configured: + +```toml +[integrations.datadome] +enabled = true +enable_protection = true +server_side_key_secret_store = "datadome" +server_side_key_secret_name = "server_side_key" ``` -### HTML Rewriting Not Working +Also verify the request is not excluded by the default internal/static route exclusions or your custom inclusion/exclusion regexes. + +### HTML rewriting not working Ensure `rewrite_sdk = true` and that your pages are being proxied through Trusted Server's HTML processing pipeline. -## See Also +## See also - [DataDome First-Party Integration Docs](https://docs.datadome.co/docs/integrations#first-party-javascript-tag) - [Integrations Overview](/guide/integrations-overview) diff --git a/docs/superpowers/specs/2026-06-11-datadome-server-side-protection-design.md b/docs/superpowers/specs/2026-06-11-datadome-server-side-protection-design.md new file mode 100644 index 00000000..1c06d9fb --- /dev/null +++ b/docs/superpowers/specs/2026-06-11-datadome-server-side-protection-design.md @@ -0,0 +1,754 @@ +# DataDome Server-Side Protection API Integration + +**Issue:** #317 +**Date:** 2026-06-11 +**Status:** In Progress + +## Problem + +Trusted Server already has a DataDome first-party proxy integration for the +client-side JavaScript tag and signal collection API. That layer improves +client-side signal delivery by routing DataDome browser traffic through the +publisher domain, but it does not perform server-side request validation before +requests reach Trusted Server routes or the publisher origin. + +DataDome's Fastly Compute module adds that missing layer by calling the +DataDome Protection API before forwarding traffic. The Protection API returns a +request decision and header-mutation instructions. Trusted Server needs an +implementation of that behavior in Rust that is not tied to DataDome's Fastly +JavaScript SDK. + +## Goals + +- Add a pre-routing integration hook that can block/challenge requests before + origin routing. +- Implement DataDome Protection API validation with fail-open behavior. +- Support DataDome pointer headers: + - upstream request enrichment for allowed requests + - downstream response headers/cookies for allowed and challenged requests +- Protect publisher-origin traffic and auction traffic by default. +- Exclude static assets and Trusted Server internal routes by default. +- Keep the Protection API client logic platform-neutral where possible by using + `RuntimeServices`, `PlatformBackend`, and `PlatformHttpClient`. +- Auto-inject the DataDome client-side tag when a client-side key is configured. +- Preserve the existing DataDome first-party proxy and URL-rewrite behavior. + +## Non-Goals + +- No GraphQL body parsing in the initial implementation. The config can reserve + a flag for it, but request-body inspection is deferred. +- No hard dependency on DataDome's JavaScript Fastly Compute package. +- No new edge-provider-specific behavior in `trusted-server-core` beyond the + existing `fastly::Request` integration surfaces. +- No replay-protection or MCP-specific fields in v1. +- No automatic de-duplication when a publisher already manually loads the + DataDome tag. The explicit `inject_client_side_tag = false` escape hatch is + sufficient for v1. +- No literal DataDome server-side secret value in `trusted-server.toml`. + Operators configure the runtime secret store and secret name, and the key is + read from Secret Store at request time with process-local caching. + +## Decisions from Design Discussion + +1. **Protection scope:** protect publisher-origin and auction traffic by + default. Default-exclude Trusted Server internal routes and static assets. +2. **Endpoint default:** default to DataDome's Fastly-specific Protection API + endpoint from the official Fastly Compute docs, while allowing override. +3. **Header precedence:** apply DataDome downstream headers last so DataDome + cookies/cache/challenge headers are not overwritten by generic finalization. +4. **GraphQL support:** defer. +5. **Client-side tag:** auto-inject when a client-side key is configured. +6. **Methods:** protect every non-`OPTIONS` method, including `HEAD`, when the + URL is otherwise in scope. +7. **Secret handling:** read the DataDome server-side key from runtime Secret + Store using configured store/name fields. Do not store the literal key in + `trusted-server.toml`. +8. **Timeout:** use `1500ms` as the default Protection API timeout for v1. +9. **Duplicate tag handling:** do not attempt automatic duplicate-tag + detection in v1; operators can disable injection with + `inject_client_side_tag = false`. + +## Current State + +Implementation branch status as of 2026-06-12: + +- Added the generic integration request-filter model in + `crates/trusted-server-core/src/integrations/registry.rs`. +- Wired the Fastly adapter to run request filters after basic auth and before + route matching in `crates/trusted-server-adapter-fastly/src/main.rs`. +- Added DataDome server-side configuration fields and validation in + `crates/trusted-server-core/src/integrations/datadome.rs`. +- Added the DataDome Protection API helper module at + `crates/trusted-server-core/src/integrations/datadome/protection.rs`. +- Added client-side tag auto-injection through `IntegrationHeadInjector`. +- Extended `ClientInfo` and Fastly runtime services with JA4, H2 fingerprint, + edge hostname, and edge region fields. +- Updated `trusted-server.toml` with the new DataDome configuration fields. +- Updated `docs/guide/integrations/datadome.md` with the first-party, + server-side protection, fail-open, header-enrichment, auto-injection, and + GraphQL-v1 limitation behavior. + +Known remaining work before the PR is ready: + +- Fix formatting and clippy blockers introduced by the implementation. +- Add the spec-driven registry, DataDome config, protection matching, payload, + response classification, and route tests listed in this document. +- Run the full CI gate after fixes: + - `cargo fmt --all -- --check` + - `cargo clippy --workspace --all-targets --all-features -- -D warnings` + - `cargo test --workspace` + - JS/doc checks as applicable + +Verification snapshot: + +- `cargo test --workspace` passed on 2026-06-12 for the current branch state. +- `cargo fmt --all -- --check` failed due to formatting drift. +- `cargo clippy --package trusted-server-core --all-targets --all-features -- -D warnings` + failed due to clippy issues in the new DataDome/request-filter code. + +Baseline DataDome integration before this work: + +- File: `crates/trusted-server-core/src/integrations/datadome.rs` +- Provides: + - `/integrations/datadome/tags.js` SDK proxy + - `/integrations/datadome/js/*` signal collection proxy + - HTML attribute rewriting for DataDome script URLs +- Registered: + - `IntegrationProxy` + - `IntegrationAttributeRewriter` + +Baseline integration registry before this work supported proxies, +attribute/script rewriters, HTML post-processors, and head injectors. It did not +have a pre-routing request-filter hook. + +Baseline Fastly routing flow before this work in +`crates/trusted-server-adapter-fastly/src/main.rs`: + +```text +sanitize forwarded headers +→ extract request context +→ batch-sync special case +→ build EC context +→ enforce basic auth +→ route matching +→ publisher origin fallback +→ EC/final response headers +``` + +The new request filter should run after successful basic auth and before route +matching. + +## Proposed Architecture + +### 1. Request Filter Hook + +Add a new integration hook in +`crates/trusted-server-core/src/integrations/registry.rs`. + +The hook must be richer than `Option` because DataDome can allow a +request while still requiring request and response header mutations. + +Suggested public model: + +```rust +#[async_trait(?Send)] +pub trait IntegrationRequestFilter: Send + Sync { + fn integration_id(&self) -> &'static str; + + async fn filter_request( + &self, + input: RequestFilterInput<'_>, + ) -> Result>; +} + +pub struct RequestFilterInput<'a> { + pub settings: &'a Settings, + pub services: &'a RuntimeServices, + pub request: &'a Request, +} + +pub enum RequestFilterDecision { + Continue(RequestFilterEffects), + Respond { + response: Response, + effects: RequestFilterEffects, + }, +} + +#[derive(Default)] +pub struct RequestFilterEffects { + pub request_headers: Vec, + pub response_headers: Vec, +} + +pub struct HeaderMutation { + pub name: String, + pub value: String, + pub mode: HeaderMutationMode, +} + +pub enum HeaderMutationMode { + Set, + Append, +} +``` + +Important behavior: + +- Filters run in registration order. +- On `Continue`, request header mutations are applied immediately before the + next filter and before route matching. +- Response header mutations are accumulated and applied to the final response. +- On `Respond`, routing short-circuits with that response while preserving any + downstream response header effects that must be applied after finalization. +- DataDome transport/API failures should not bubble out as registry errors; + DataDome should convert them to `Continue(Default::default())` to preserve + fail-open behavior. + +### 2. Registry Integration + +Extend these types: + +- `IntegrationRegistration` +- `IntegrationRegistrationBuilder` +- `IntegrationRegistryInner` +- `IntegrationRegistry` +- `IntegrationMetadata` + +Add builder method: + +```rust +.with_request_filter(integration.clone()) +``` + +Add registry runner, for example: + +```rust +pub async fn filter_request( + &self, + input: RequestFilterRegistryInput<'_>, +) -> Result> +``` + +The registry outcome should contain either an immediate response plus response +header mutations, or a continue decision with accumulated response header +mutations. + +### 3. Fastly Route Hook + +In `route_request()`, run filters after normal basic auth succeeds and before +`path` / `method` are captured for route matching. + +```text +basic auth ok +→ integration_registry.filter_request(...) + → Respond { response, effects }: finalize response, apply DataDome headers last, return + → Continue(effects): request is enriched; route normally; remember response effects +→ route matching +→ EC finalize +→ generic finalize_response +→ apply request-filter response headers last +``` + +Streaming publisher responses need the same treatment before headers are +committed via `stream_to_client()`. + +### 4. Header Mutation Semantics + +DataDome pointer headers are internal instructions and must not be forwarded. +Only headers named by the pointers should be copied. + +| Pointer header | Destination | +| ---------------------------- | -------------------------------------------------- | +| `X-DataDome-request-headers` | Request forwarded to Trusted Server route / origin | +| `X-DataDome-headers` | Response returned to browser | + +Rules: + +- `Set-Cookie` mutations use append mode. +- Other headers use set/replace mode. +- Pointer headers themselves are never forwarded. +- Header mutations must reject hop-by-hop, request-target, body framing, and + Trusted Server internal headers such as `Connection`, `Transfer-Encoding`, + `Content-Length`, `Host`, and `x-ts-*`. +- DataDome downstream headers are applied after `ec_finalize_response()` and + `finalize_response()`. + +## DataDome Protection Design + +### Configuration + +Extend `[integrations.datadome]` with server-side protection and client-side +injection fields. + +```toml +[integrations.datadome] +enabled = false + +# Existing first-party proxy layer +sdk_origin = "https://js.datadome.co" +api_origin = "https://api-js.datadome.co" +cache_ttl_seconds = 3600 +rewrite_sdk = true + +# New server-side protection layer +enable_protection = false +server_side_key_secret_store = "datadome" +server_side_key_secret_name = "server_side_key" +protection_api_origin = "https://api-fastly.datadome.co" +timeout_ms = 1500 +url_pattern_exclusion = "\\.(avi|flv|mka|mkv|mov|mp4|mpeg|mpg|mp3|flac|ogg|ogm|opus|wav|webm|webp|bmp|gif|ico|jpeg|jpg|png|svg|svgz|swf|eot|otf|ttf|woff|woff2|css|less|js|map)$" +url_pattern_inclusion = "" +enable_graphql_support = false + +# New client-side tag injection layer +client_side_key = "" +inject_client_side_tag = true +client_side_tag_url = "/integrations/datadome/tags.js" +client_side_configuration = { ajaxListenerPath = true } +``` + +Notes: + +- The literal server-side key is not stored in Rust config. Rust config stores + only `server_side_key_secret_store` and `server_side_key_secret_name`. +- `server_side_key_secret_store` and `server_side_key_secret_name` are required + only when `enable_protection = true`. +- The DataDome server-side key is read from Secret Store through + `RuntimeServices::secret_store()` and cached per process by configured + store/name. +- `client_side_key` is optional. Auto-injection emits a tag only when + `inject_client_side_tag = true` and `client_side_key` is non-empty; an empty + key is a valid no-op. +- `protection_api_origin` remains configurable for regional/static endpoint + selection. +- `url_pattern_exclusion` and `url_pattern_inclusion` match `host + pathname`, + not query string, mirroring the official Fastly module behavior. +- Static-asset exclusion should be case-insensitive so uppercase file + extensions such as `.PNG` are skipped. +- `enable_graphql_support` is reserved but should remain unsupported or ignored + with a warning until the deferred body-handling work is implemented. + +### Protection Scope + +A request is protected when: + +1. DataDome integration is enabled. +2. `enable_protection = true`. +3. The method is not `OPTIONS`; all other methods, including `HEAD`, are + eligible for protection. +4. The URL does not match the default internal/static exclusions. +5. If `url_pattern_inclusion` is configured, `host + pathname` matches it. +6. If `url_pattern_exclusion` is configured, `host + pathname` does not match it. + +Default internal exclusions should include: + +- `/static/tsjs=` +- `/integrations/` +- `/first-party/` +- `/.well-known/trusted-server.json` +- `/verify-signature` +- `/admin/` +- `/_ts/admin/` +- `/_ts/api/v1/identify` +- `/_ts/api/v1/batch-sync` +- CORS preflight `OPTIONS` requests + +Auction traffic at `/auction` is intentionally protected by default. + +### Protection API Request + +Add a DataDome protection helper module, either as a nested module in +`datadome.rs` or as: + +`crates/trusted-server-core/src/integrations/datadome/protection.rs` + +Responsibilities: + +1. Decide whether a request should be protected. +2. Build the form-encoded Protection API payload. +3. Send `POST /validate-request` through platform services. +4. Classify the API response. +5. Extract pointer-header mutations. +6. Return a request-filter decision. + +Use platform abstractions for the outbound call: + +- Parse `protection_api_origin` with `url`. +- Build a `PlatformBackendSpec` with `first_byte_timeout = timeout_ms`. +- Resolve/register backend with `RuntimeServices::backend().ensure(...)`. +- Send an `edgezero_core::http::Request` through + `RuntimeServices::http_client().send(...)`. + +Request headers: + +```text +Content-Type: application/x-www-form-urlencoded +Content-Length: +X-DataDome-X-Set-Cookie: true # only when X-DataDome-ClientID is used +``` + +Payload fields should include the core fields from DataDome's official module: + +- `Key` +- `IP` +- `Method` +- `Protocol` +- `Host` +- `ServerHostname` +- `Request` as path plus query +- `RequestModuleName` +- `ModuleVersion` +- `TimeRequest` +- `ClientID` +- `CookiesLen` +- `HeadersList` +- common request headers: + - `Accept` + - `Accept-Charset` + - `Accept-Encoding` + - `Accept-Language` + - `AuthorizationLen` + - `Cache-Control` + - `Connection` + - `Content-Type` + - `From` + - `Origin` + - `PostParamLen` + - `Pragma` + - `Referer` + - `User-Agent` + - `Via` + - `X-Forwarded-For` + - `X-Real-IP` + - `X-Requested-With` + - Sec-CH and Sec-Fetch headers supported by the official module +- TLS/client metadata when available from `RuntimeServices::client_info()` + +`ClientID` source priority: + +1. `X-DataDome-ClientID` request header +2. `datadome` cookie + +When `X-DataDome-ClientID` is used, send +`X-DataDome-X-Set-Cookie: true` to the Protection API. + +Encoding and size rules: + +- URL-encode all values. +- Omit empty fields. +- Apply per-field truncation before encoding. +- Keep the global payload under DataDome's documented limit. + +### Client Metadata + +Current `RuntimeServices::client_info()` exposes: + +- client IP +- TLS protocol +- TLS cipher + +For better DataDome signal quality, extend `ClientInfo` with optional fields +that adapters can populate when available: + +```rust +pub struct ClientInfo { + pub client_ip: Option, + pub tls_protocol: Option, + pub tls_cipher: Option, + pub tls_ja4: Option, + pub h2_fingerprint: Option, + pub server_hostname: Option, + pub server_region: Option, +} +``` + +Fastly can populate `tls_ja4` and `h2_fingerprint` from the request APIs already +used by the JA4/debug device-signal code. Other adapters may leave these fields +empty. + +### Protection API Response + +Before acting on a response, validate that the HTTP status code matches the +`X-DataDomeResponse` header. + +| Status | Meaning | Behavior | +| ------ | --------- | ---------------------------------------------- | +| `200` | Allow | Continue routing with request/response effects | +| `301` | Challenge | Return DataDome response directly | +| `302` | Challenge | Return DataDome response directly | +| `401` | Challenge | Return DataDome response directly | +| `403` | Challenge | Return DataDome response directly | +| `429` | Challenge | Return DataDome response directly | +| other | Fail-open | Continue without effects | + +If status/header mismatch, missing `X-DataDomeResponse`, timeout, network error, +backend error, malformed headers, or any unexpected Protection API behavior: +fail open and continue without effects. + +### Challenge Responses + +For challenge statuses: + +1. Build a response using DataDome's API response status and body. +2. Copy only headers listed in `X-DataDome-headers`. +3. Append `Set-Cookie` values. +4. Do not contact the publisher origin. +5. Still run Trusted Server response finalization, then apply DataDome headers + last. + +### Allowed Requests + +For allow status `200`: + +1. Copy headers listed in `X-DataDome-request-headers` into the request before + Trusted Server route matching. +2. Accumulate headers listed in `X-DataDome-headers` for the final browser + response. +3. Continue normal route matching. +4. Apply accumulated DataDome downstream headers after EC and generic response + finalization. + +## Client-Side Auto-Injection + +Implement `IntegrationHeadInjector` for DataDome when `client_side_key` is +configured and `inject_client_side_tag = true`. + +Injected snippet should run before the TSJS bundle and configure DataDome's +client-side tag: + +```html + + +``` + +Rust implementation requirements: + +- Serialize `client_side_key`, `client_side_configuration`, and + `client_side_tag_url` with `serde_json`. +- Escape `