diff --git a/crates/common/src/integrations/google_tag_manager.rs b/crates/common/src/integrations/google_tag_manager.rs new file mode 100644 index 0000000..52881a5 --- /dev/null +++ b/crates/common/src/integrations/google_tag_manager.rs @@ -0,0 +1,767 @@ +//! Google Tag Manager integration for first-party tag delivery. +//! +//! Proxies GTM scripts and Google Analytics beacons through the publisher's +//! domain, improving tracking accuracy and ad-blocker resistance. +//! +//! # Endpoints +//! +//! | Method | Path | Description | +//! |--------|------|-------------| +//! | `GET` | `.../gtm.js` | Proxies and rewrites the GTM script | +//! | `GET` | `.../gtag/js` | Proxies the gtag script | +//! | `GET/POST` | `.../collect` | Proxies GA analytics beacons | +//! | `GET/POST` | `.../g/collect` | Proxies GA4 analytics beacons | + +use std::sync::Arc; + +use async_trait::async_trait; +use error_stack::{Report, ResultExt}; +use fastly::http::StatusCode; +use fastly::{Request, Response}; +use once_cell::sync::Lazy; +use regex::Regex; +use serde::{Deserialize, Serialize}; +use validator::Validate; + +use crate::error::TrustedServerError; +use crate::integrations::{ + AttributeRewriteAction, IntegrationAttributeContext, IntegrationAttributeRewriter, + IntegrationEndpoint, IntegrationProxy, IntegrationRegistration, IntegrationScriptContext, + IntegrationScriptRewriter, ScriptRewriteAction, +}; +use crate::proxy::{proxy_request, ProxyRequestConfig}; +use crate::settings::{IntegrationConfig, Settings}; + +const GTM_INTEGRATION_ID: &str = "google_tag_manager"; +const DEFAULT_UPSTREAM: &str = "https://www.googletagmanager.com"; + +/// Regex pattern for matching and rewriting GTM and Google Analytics URLs. +/// +/// Handles all URL variants: +/// - `https://www.googletagmanager.com/gtm.js?id=...` +/// - `//www.googletagmanager.com/gtm.js?id=...` +/// - `https://www.google-analytics.com/collect` +/// - `//www.google-analytics.com/g/collect` +/// +/// The replacement target is `/integrations/google_tag_manager`. +static GTM_URL_PATTERN: Lazy = Lazy::new(|| { + Regex::new(r"(https?:)?//www\.(googletagmanager|google-analytics)\.com") + .expect("GTM URL regex should compile") +}); + +#[derive(Debug, Clone, Deserialize, Serialize, Validate)] +pub struct GoogleTagManagerConfig { + #[serde(default = "default_enabled")] + pub enabled: bool, + /// GTM Container ID (e.g., "GTM-XXXXXX"). + #[validate(length(min = 1))] + pub container_id: String, + /// Upstream URL for GTM (defaults to ). + #[serde(default = "default_upstream")] + #[validate(url)] + pub upstream_url: String, +} + +impl IntegrationConfig for GoogleTagManagerConfig { + fn is_enabled(&self) -> bool { + self.enabled + } +} + +fn default_enabled() -> bool { + false +} + +fn default_upstream() -> String { + DEFAULT_UPSTREAM.to_string() +} + +pub struct GoogleTagManagerIntegration { + config: GoogleTagManagerConfig, +} + +impl GoogleTagManagerIntegration { + fn new(config: GoogleTagManagerConfig) -> Arc { + Arc::new(Self { config }) + } + + fn error(message: impl Into) -> TrustedServerError { + TrustedServerError::Integration { + integration: GTM_INTEGRATION_ID.to_string(), + message: message.into(), + } + } + + fn upstream_url(&self) -> &str { + if self.config.upstream_url.is_empty() { + DEFAULT_UPSTREAM + } else { + &self.config.upstream_url + } + } + + /// Rewrite GTM and Google Analytics URLs to first-party proxy paths. + /// + /// Uses [`GTM_URL_PATTERN`] to handle all URL variants (https, protocol-relative) + /// for both `googletagmanager.com` and `google-analytics.com`. + fn rewrite_gtm_urls(content: &str) -> String { + let replacement = format!("/integrations/{}", GTM_INTEGRATION_ID); + GTM_URL_PATTERN + .replace_all(content, replacement.as_str()) + .into_owned() + } +} + +fn build(settings: &Settings) -> Option> { + let config = match settings.integration_config::(GTM_INTEGRATION_ID) { + Ok(Some(config)) => config, + Ok(None) => return None, + Err(err) => { + log::error!("Failed to load GTM integration config: {err:?}"); + return None; + } + }; + + Some(GoogleTagManagerIntegration::new(config)) +} + +#[must_use] +pub fn register(settings: &Settings) -> Option { + let integration = build(settings)?; + Some( + IntegrationRegistration::builder(GTM_INTEGRATION_ID) + .with_proxy(integration.clone()) + .with_attribute_rewriter(integration.clone()) + .with_script_rewriter(integration) + .build(), + ) +} + +#[async_trait(?Send)] +impl IntegrationProxy for GoogleTagManagerIntegration { + fn integration_name(&self) -> &'static str { + GTM_INTEGRATION_ID + } + + fn routes(&self) -> Vec { + vec![ + // Proxy for the main GTM script + self.get("/gtm.js"), + // Proxy for the gtag script (if used) + self.get("/gtag/js"), + // Analytics beacons (GA4/UA) + // The GTM script is rewritten to point these beacons to our proxy. + self.get("/collect"), + self.post("/collect"), + self.get("/g/collect"), + self.post("/g/collect"), + ] + } + + async fn handle( + &self, + settings: &Settings, + req: Request, + ) -> Result> { + let path = req.get_path().to_string(); + let method = req.get_method(); + log::debug!("Handling GTM request: {} {}", method, path); + + let upstream_base = self.upstream_url(); + + // Construct full target URL + let mut target_url = if path.ends_with("/gtm.js") { + format!("{}/gtm.js", upstream_base) + } else if path.ends_with("/gtag/js") { + format!("{}/gtag/js", upstream_base) + } else if path.ends_with("/collect") { + // Analytics beacons always go to google-analytics.com, not the + // configurable upstream_url (which is for googletagmanager.com). + if path.contains("/g/") { + "https://www.google-analytics.com/g/collect".to_string() + } else { + "https://www.google-analytics.com/collect".to_string() + } + } else { + return Ok(Response::from_status(StatusCode::NOT_FOUND)); + }; + + // Append query params if present, or add default ID for gtm.js + if let Some(query) = req.get_url().query() { + target_url = format!("{}?{}", target_url, query); + } else if path.ends_with("/gtm.js") { + target_url = format!("{}?id={}", target_url, self.config.container_id); + } + + log::debug!("Proxying to upstream: {}", target_url); + + let mut proxy_config = ProxyRequestConfig::new(&target_url); + + // If we are fetching gtm.js, we intend to rewrite the body. + // We must ensure the upstream returns uncompressed content. + if path.ends_with("/gtm.js") { + proxy_config = proxy_config.with_header( + fastly::http::header::ACCEPT_ENCODING, + fastly::http::HeaderValue::from_static("identity"), + ); + } + + let mut response = proxy_request(settings, req, proxy_config) + .await + .change_context(Self::error("Failed to proxy GTM request"))?; + + // If we are serving gtm.js, rewrite internal URLs to route beacons through us. + if path.ends_with("/gtm.js") { + if !response.get_status().is_success() { + log::warn!("GTM upstream returned status {}", response.get_status()); + return Ok(response); + } + log::debug!("Rewriting GTM script content"); + let body_str = response.take_body_str(); + let rewritten_body = Self::rewrite_gtm_urls(&body_str); + + response = Response::from_body(rewritten_body) + .with_header( + fastly::http::header::CONTENT_TYPE, + "application/javascript; charset=utf-8", + ) + .with_header(fastly::http::header::CACHE_CONTROL, "public, max-age=3600"); + } + + Ok(response) + } +} + +impl IntegrationAttributeRewriter for GoogleTagManagerIntegration { + fn integration_id(&self) -> &'static str { + GTM_INTEGRATION_ID + } + + fn handles_attribute(&self, attribute: &str) -> bool { + matches!(attribute, "src" | "href") + } + + fn rewrite( + &self, + _attr_name: &str, + attr_value: &str, + _ctx: &IntegrationAttributeContext<'_>, + ) -> AttributeRewriteAction { + if attr_value.contains("googletagmanager.com/gtm.js") { + AttributeRewriteAction::replace(Self::rewrite_gtm_urls(attr_value)) + } else { + AttributeRewriteAction::keep() + } + } +} + +impl IntegrationScriptRewriter for GoogleTagManagerIntegration { + fn integration_id(&self) -> &'static str { + GTM_INTEGRATION_ID + } + + fn selector(&self) -> &'static str { + "script" // Match all scripts to find inline GTM snippets + } + + fn rewrite(&self, content: &str, _ctx: &IntegrationScriptContext<'_>) -> ScriptRewriteAction { + // Look for the GTM snippet pattern. + // Standard snippet contains: "googletagmanager.com/gtm.js" + if content.contains("googletagmanager.com/gtm.js") { + return ScriptRewriteAction::replace(Self::rewrite_gtm_urls(content)); + } + + ScriptRewriteAction::keep() + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::html_processor::{create_html_processor, HtmlProcessorConfig}; + use crate::integrations::{ + AttributeRewriteAction, IntegrationAttributeContext, IntegrationAttributeRewriter, + IntegrationDocumentState, IntegrationRegistry, IntegrationScriptContext, + IntegrationScriptRewriter, ScriptRewriteAction, + }; + use crate::settings::Settings; + use crate::streaming_processor::{Compression, PipelineConfig, StreamingPipeline}; + use crate::test_support::tests::crate_test_settings_str; + use std::io::Cursor; + + #[test] + fn test_rewrite_gtm_urls() { + // All URL patterns should be rewritten via the shared regex + let input = r#" + var a = "https://www.googletagmanager.com/gtm.js"; + var b = "//www.googletagmanager.com/gtm.js"; + var c = "https://www.google-analytics.com/collect"; + var d = "//www.google-analytics.com/g/collect"; + var e = "http://www.googletagmanager.com/gtm.js"; + "#; + + let result = GoogleTagManagerIntegration::rewrite_gtm_urls(input); + + assert!(result.contains("/integrations/google_tag_manager/gtm.js")); + assert!(result.contains("/integrations/google_tag_manager/collect")); + assert!(result.contains("/integrations/google_tag_manager/g/collect")); + assert!(!result.contains("www.googletagmanager.com")); + assert!(!result.contains("www.google-analytics.com")); + } + + #[test] + fn test_rewrite_preserves_non_gtm_urls() { + let input = r#"var x = "https://example.com/script.js";"#; + let result = GoogleTagManagerIntegration::rewrite_gtm_urls(input); + assert_eq!(input, result); + } + + #[test] + fn test_attribute_rewriter() { + let config = GoogleTagManagerConfig { + enabled: true, + container_id: "GTM-TEST".to_string(), + upstream_url: "https://www.googletagmanager.com".to_string(), + }; + let integration = GoogleTagManagerIntegration::new(config); + + let ctx = IntegrationAttributeContext { + attribute_name: "src", + request_host: "example.com", + request_scheme: "https", + origin_host: "origin.example.com", + }; + + // Case 1: Standard HTTPS URL + let action = IntegrationAttributeRewriter::rewrite( + &*integration, + "src", + "https://www.googletagmanager.com/gtm.js?id=GTM-TEST", + &ctx, + ); + if let AttributeRewriteAction::Replace(val) = action { + assert_eq!(val, "/integrations/google_tag_manager/gtm.js?id=GTM-TEST"); + } else { + panic!("Expected Replace action for HTTPS URL, got {:?}", action); + } + + // Case 2: Protocol-relative URL + let action = IntegrationAttributeRewriter::rewrite( + &*integration, + "src", + "//www.googletagmanager.com/gtm.js?id=GTM-TEST", + &ctx, + ); + if let AttributeRewriteAction::Replace(val) = action { + assert_eq!(val, "/integrations/google_tag_manager/gtm.js?id=GTM-TEST"); + } else { + panic!( + "Expected Replace action for protocol-relative URL, got {:?}", + action + ); + } + + // Case 3: Other URL (should be kept) + let action = IntegrationAttributeRewriter::rewrite( + &*integration, + "src", + "https://other.com/script.js", + &ctx, + ); + assert!(matches!(action, AttributeRewriteAction::Keep)); + } + + #[test] + fn test_script_rewriter() { + let config = GoogleTagManagerConfig { + enabled: true, + container_id: "GTM-TEST".to_string(), + upstream_url: "https://www.googletagmanager.com".to_string(), + }; + let integration = GoogleTagManagerIntegration::new(config); + let doc_state = IntegrationDocumentState::default(); + + let ctx = IntegrationScriptContext { + selector: "script", + request_host: "example.com", + request_scheme: "https", + origin_host: "origin.example.com", + is_last_in_text_node: true, + document_state: &doc_state, + }; + + // Case 1: Inline GTM snippet + let snippet = r#"(function(w,d,s,l,i){w[l]=w[l]||[];w[l].push({'gtm.start': +new Date().getTime(),event:'gtm.js'});var f=d.getElementsByTagName(s)[0], +j=d.createElement(s),dl=l!='dataLayer'?'&l='+l:'';j.async=true;j.src= +'https://www.googletagmanager.com/gtm.js?id='+i+dl;f.parentNode.insertBefore(j,f); +})(window,document,'script','dataLayer','GTM-XXXX');"#; + + let action = IntegrationScriptRewriter::rewrite(&*integration, snippet, &ctx); + if let ScriptRewriteAction::Replace(val) = action { + assert!(val.contains("/integrations/google_tag_manager/gtm.js")); + assert!(!val.contains("https://www.googletagmanager.com/gtm.js")); + } else { + panic!("Expected Replace action for GTM snippet, got {:?}", action); + } + + // Case 2: Protocol relative + let snippet_proto = r#"j.src='//www.googletagmanager.com/gtm.js?id='+i+dl;"#; + let action = IntegrationScriptRewriter::rewrite(&*integration, snippet_proto, &ctx); + if let ScriptRewriteAction::Replace(val) = action { + assert!(val.contains("/integrations/google_tag_manager/gtm.js")); + assert!(!val.contains("//www.googletagmanager.com/gtm.js")); + } else { + panic!( + "Expected Replace action for proto-relative snippet, got {:?}", + action + ); + } + + // Case 3: Irrelevant script + let other_script = "console.log('hello');"; + let action = IntegrationScriptRewriter::rewrite(&*integration, other_script, &ctx); + assert!(matches!(action, ScriptRewriteAction::Keep)); + } + + #[test] + fn test_default_configuration() { + let config = GoogleTagManagerConfig { + enabled: default_enabled(), + container_id: "GTM-DEFAULT".to_string(), + upstream_url: default_upstream(), + }; + + assert!(!config.enabled); + assert_eq!(config.upstream_url, "https://www.googletagmanager.com"); + } + + #[test] + fn test_upstream_url_logic() { + // Default upstream + let config_default = GoogleTagManagerConfig { + enabled: true, + container_id: "GTM-123".to_string(), + upstream_url: "".to_string(), // Empty string should fallback to default in accessor + }; + let integration_default = GoogleTagManagerIntegration::new(config_default); + assert_eq!( + integration_default.upstream_url(), + "https://www.googletagmanager.com" + ); + + // Custom upstream + let config_custom = GoogleTagManagerConfig { + enabled: true, + container_id: "GTM-123".to_string(), + upstream_url: "https://gtm.example.com".to_string(), + }; + let integration_custom = GoogleTagManagerIntegration::new(config_custom); + assert_eq!(integration_custom.upstream_url(), "https://gtm.example.com"); + } + + #[test] + fn test_routes_registered() { + let config = GoogleTagManagerConfig { + enabled: true, + container_id: "GTM-TEST".to_string(), + upstream_url: default_upstream(), + }; + let integration = GoogleTagManagerIntegration::new(config); + let routes = integration.routes(); + + // GTM.js, Gtag.js, and 4 Collect endpoints (GET/POST for standard & dual-tagging) + assert_eq!(routes.len(), 6); + + assert!(routes + .iter() + .any(|r| r.path == "/integrations/google_tag_manager/gtm.js")); + assert!(routes + .iter() + .any(|r| r.path == "/integrations/google_tag_manager/gtag/js")); + assert!(routes + .iter() + .any(|r| r.path == "/integrations/google_tag_manager/collect")); + assert!(routes + .iter() + .any(|r| r.path == "/integrations/google_tag_manager/g/collect")); + } + + #[test] + fn test_handle_response_rewriting() { + let original_body = r#" + var x = "https://www.google-analytics.com/collect"; + var y = "https://www.googletagmanager.com/gtm.js"; + "#; + + let rewritten = GoogleTagManagerIntegration::rewrite_gtm_urls(original_body); + + assert!(rewritten.contains("/integrations/google_tag_manager/collect")); + assert!(rewritten.contains("/integrations/google_tag_manager/gtm.js")); + assert!(!rewritten.contains("https://www.google-analytics.com")); + } + + fn make_settings() -> Settings { + Settings::from_toml(&crate_test_settings_str()).expect("should parse settings") + } + + fn config_from_settings( + settings: &Settings, + registry: &IntegrationRegistry, + ) -> HtmlProcessorConfig { + HtmlProcessorConfig::from_settings( + settings, + registry, + "origin.example.com", + "test.example.com", + "https", + ) + } + + #[test] + fn test_config_parsing() { + let toml_str = r#" +[publisher] +domain = "test-publisher.com" +cookie_domain = ".test-publisher.com" +origin_url = "https://origin.test-publisher.com" +proxy_secret = "test-secret" + +[synthetic] +counter_store = "test-counter-store" +opid_store = "test-opid-store" +secret_key = "test-secret-key" +template = "{{client_ip}}:{{user_agent}}" + +[integrations.google_tag_manager] +enabled = true +container_id = "GTM-PARSED" +upstream_url = "https://custom.gtm.example" +"#; + let settings = Settings::from_toml(toml_str).expect("should parse TOML"); + let config = settings + .integration_config::(GTM_INTEGRATION_ID) + .expect("should get config") + .expect("should be enabled"); + + assert!(config.enabled); + assert_eq!(config.container_id, "GTM-PARSED"); + assert_eq!(config.upstream_url, "https://custom.gtm.example"); + } + + #[test] + fn test_config_defaults() { + let toml_str = r#" +[publisher] +domain = "test-publisher.com" +cookie_domain = ".test-publisher.com" +origin_url = "https://origin.test-publisher.com" +proxy_secret = "test-secret" + +[synthetic] +counter_store = "test-counter-store" +opid_store = "test-opid-store" +secret_key = "test-secret-key" +template = "{{client_ip}}:{{user_agent}}" + +[integrations.google_tag_manager] +container_id = "GTM-DEFAULT" +"#; + let settings = Settings::from_toml(toml_str).expect("should parse TOML"); + let config = settings + .integration_config::(GTM_INTEGRATION_ID) + .expect("should get config"); + + // Default is now false, so integration_config returns None for disabled + // When we explicitly parse the config with container_id but no enabled field, + // the config is present but disabled + assert!( + config.is_none(), + "Config with default enabled=false should return None from integration_config" + ); + } + + #[test] + fn test_html_processor_pipeline_rewrites_gtm() { + let html = r#" + + "#; + + let mut settings = make_settings(); + // Enable GTM + settings + .integrations + .insert_config( + "google_tag_manager", + &serde_json::json!({ + "enabled": true, + "container_id": "GTM-TEST", + "upstream_url": "https://www.googletagmanager.com" + }), + ) + .expect("should update gtm config"); + + let registry = IntegrationRegistry::new(&settings).expect("should create registry"); + let config = config_from_settings(&settings, ®istry); + let processor = create_html_processor(config); + let pipeline_config = PipelineConfig { + input_compression: Compression::None, + output_compression: Compression::None, + chunk_size: 8192, + }; + let mut pipeline = StreamingPipeline::new(pipeline_config, processor); + + let mut output = Vec::new(); + let result = pipeline.process(Cursor::new(html.as_bytes()), &mut output); + assert!(result.is_ok()); + + let processed = String::from_utf8_lossy(&output); + + // Verify rewrite happened + assert!(processed.contains("/integrations/google_tag_manager/gtm.js?id=GTM-TEST")); + assert!(!processed.contains("https://www.googletagmanager.com/gtm.js")); + } + + #[test] + fn test_html_processing_with_fixture() { + // 1. Configure Settings with GTM enabled + let mut settings = make_settings(); + + // Use the ID from the fixture: GTM-522ZT3X6 + settings + .integrations + .insert_config( + "google_tag_manager", + &serde_json::json!({ + "enabled": true, + "container_id": "GTM-522ZT3X6", + "upstream_url": "https://www.googletagmanager.com" + }), + ) + .expect("should update gtm config"); + + // 2. Setup Pipeline + let registry = IntegrationRegistry::new(&settings).expect("should create registry"); + let config = config_from_settings(&settings, ®istry); + let processor = create_html_processor(config); + let pipeline_config = PipelineConfig { + input_compression: Compression::None, + output_compression: Compression::None, + chunk_size: 8192, + }; + let mut pipeline = StreamingPipeline::new(pipeline_config, processor); + + // 3. Load Fixture + // Path is relative to this file: ../html_processor.test.html + let html_content = include_str!("../html_processor.test.html"); + + // 4. Run Pipeline + let mut output = Vec::new(); + let result = pipeline.process(Cursor::new(html_content.as_bytes()), &mut output); + assert!( + result.is_ok(), + "Pipeline processing failed: {:?}", + result.err() + ); + + let processed = String::from_utf8_lossy(&output); + + // 5. Assertions + + // a. Link Preload Rewrite: + // Original: + + + + + + "#; + + let mut output = Vec::new(); + pipeline + .process(Cursor::new(html_input.as_bytes()), &mut output) + .expect("should process"); + let processed = String::from_utf8_lossy(&output); + + let expected_src = "/integrations/google_tag_manager/gtm.js"; + + assert!( + processed.contains(expected_src), + "Inline script src not rewritten" + ); + + assert!( + !processed.contains("j.src='https://www.googletagmanager.com/gtm.js"), + "Original src should be gone" + ); + } + + #[test] + fn test_error_helper() { + let err = GoogleTagManagerIntegration::error("test failure"); + match err { + TrustedServerError::Integration { + integration, + message, + } => { + assert_eq!(integration, "google_tag_manager"); + assert_eq!(message, "test failure"); + } + other => panic!("Expected Integration error, got {:?}", other), + } + } +} diff --git a/crates/common/src/integrations/mod.rs b/crates/common/src/integrations/mod.rs index 464c36d..7d57757 100644 --- a/crates/common/src/integrations/mod.rs +++ b/crates/common/src/integrations/mod.rs @@ -6,6 +6,7 @@ pub mod adserver_mock; pub mod aps; pub mod datadome; pub mod didomi; +pub mod google_tag_manager; pub mod lockr; pub mod nextjs; pub mod permutive; @@ -31,6 +32,7 @@ pub(crate) fn builders() -> &'static [IntegrationBuilder] { permutive::register, lockr::register, didomi::register, + google_tag_manager::register, datadome::register, ] } diff --git a/docs/guide/integrations/google_tag_manager.md b/docs/guide/integrations/google_tag_manager.md new file mode 100644 index 0000000..5b91876 --- /dev/null +++ b/docs/guide/integrations/google_tag_manager.md @@ -0,0 +1,165 @@ +# Google Tag Manager Integration + +**Category**: Tag Management +**Status**: Production +**Type**: First-Party Tag Gateway + +## Overview + +The Google Tag Manager (GTM) integration enables Trusted Server to act as a first-party proxy for GTM scripts and analytics beacons. This improves performance, tracking accuracy, and privacy control by serving these assets from your own domain. + +## What is the Tag Gateway? + +The Tag Gateway intercepts requests for GTM scripts (`gtm.js`) and Google Analytics beacons (`collect`). Instead of the user's browser connecting directly to Google content servers, it connects to your Trusted Server. Trusted Server then fetches the content from Google and serves it back to the user. + +**Benefits**: + +- **Bypass Ad Blockers**: Serving scripts from a first-party domain can prevent them from being blocked by some ad blockers and privacy extensions. +- **Extended Cookie Life**: First-party cookies set by these scripts are more durable in environments like Safari (ITP). +- **Performance**: Utilize edge caching for scripts. +- **Privacy Control**: Strips client IP addresses before forwarding data to Google. + +## Configuration + +Add the GTM configuration to `trusted-server.toml`: + +```toml +[integrations.google_tag_manager] +enabled = true +container_id = "GTM-XXXXXX" +# upstream_url = "https://www.googletagmanager.com" # Optional override +``` + +### Configuration Options + +| Field | Type | Required | Description | +| -------------- | ------- | -------- | --------------------------------------------- | +| `enabled` | boolean | No | Enable/disable integration (default: `false`) | +| `container_id` | string | Yes | Your GTM Container ID (e.g., `GTM-A1B2C3`) | +| `upstream_url` | string | No | Custom upstream URL (advanced usage) | + +## How It Works + +```mermaid +flowchart TD + user["User Browser"] + server["Trusted Server"] + google["Google Servers
(gtm.js, collect)"] + + user -- "1. Request HTML" --> server + server -- "2. Rewrite HTML
(src=/integrations/...)" --> user + user -- "3. Request Script
(gtm.js w/ ID)" --> server + server -- "4. Fetch Script" --> google + google -- "5. Return Script" --> server + server -- "6. Rewrite Script Content
(replace www.google-analytics.com)" --> user + user -- "7. Send Beacon
(/collect w/ data)" --> server + server -- "8. Proxy Beacon" --> google +``` + +### 1. Script Rewriting + +When Trusted Server processes an HTML response, it automatically rewrites GTM script tags to point to the local proxy: + +**Before:** + +```html + +``` + +**After:** + +```html + +``` + +### 2. Script Proxying + +The proxy intercepts requests for the GTM library and modifies it on-the-fly. This is critical for First-Party context. + +1. **Fetch**: Retrieves the original `gtm.js` from Google. +2. **Rewrite**: Replaces hardcoded references to `www.google-analytics.com` and `www.googletagmanager.com` with the local proxy path. +3. **Serve**: Returns the modified script with correct caching headers. + +### 3. Beacon Proxying + +Analytics data (events, pageviews) normally sent to `google-analytics.com/collect` are now routed to: + +`https://your-server.com/integrations/google_tag_manager/collect` + +Trusted Server acts as a gateway, stripping client IP addresses (privacy) before forwarding the data to Google. + +## Core Endpoints + +### `GET .../gtm.js` - Script Proxy + +Proxies the Google Tag Manager library. + +**Request**: + +``` +GET /integrations/google_tag_manager/gtm.js?id=GTM-XXXXXX +``` + +**Behavior**: + +- Proxies to `https://www.googletagmanager.com/gtm.js` +- Rewrites internal URLs to use the first-party proxy +- Sets `Accept-Encoding: identity` during fetch to ensure rewriteable text response + +### `GET/POST .../collect` - Analytics Beacon + +Proxies analytics events (GA4/UA). + +**Request**: + +``` +POST /integrations/google_tag_manager/g/collect?v=2&... +``` + +**Behavior**: + +- Proxies to `https://www.google-analytics.com/g/collect` +- Forwarding: User-Agent, Referer, Payload +- Privacy: Does NOT forward client IP (Google sees Trusted Server IP) + +## Performance & Caching + +### Compression + +The integration requires the upstream `gtm.js` to be uncompressed to perform string replacement. Trusted Server fetches it with `Accept-Encoding: identity`. + +_Note: Trusted Server will re-compress the response (gzip/brotli) before sending it to the user if the `compression` feature is enabled._ + +### Direct Proxying + +Beacon requests (`/collect`) are proxied directly using streaming, minimizing latency overhead. + +## Manual Verification + +You can verify the integration using `curl`: + +**Test Script Result**: + +```bash +curl -v "http://localhost:8080/integrations/google_tag_manager/gtm.js?id=GTM-XXXXXX" +``` + +_Expected_: `200 OK`. Body should contain `/integrations/google_tag_manager` instead of `google-analytics.com`. + +**Test Beacon Result**: + +```bash +curl -v -X POST "http://localhost:8080/integrations/google_tag_manager/g/collect?v=2&tid=G-TEST" +``` + +_Expected_: `200 OK` (or 204). + +## Implementation Details + +See [crates/common/src/integrations/google_tag_manager.rs](https://github.com/IABTechLab/trusted-server/blob/main/crates/common/src/integrations/google_tag_manager.rs). + +## Next Steps + +- Review [Prebid Integration](/guide/integrations/prebid) for header bidding. +- Check [Configuration Guide](/guide/configuration) for other integration settings. +- Learn more about [Synthetic IDs](/guide/synthetic-ids) which are generated alongside this integration. diff --git a/trusted-server.toml b/trusted-server.toml index cdad1f5..2449798 100644 --- a/trusted-server.toml +++ b/trusted-server.toml @@ -114,6 +114,11 @@ pub_id = "your-aps-publisher-id" endpoint = "https://origin-mocktioneer.cdintel.com/e/dtb/bid" timeout_ms = 1000 +[integrations.google_tag_manager] +enabled = false +container_id = "GTM-XXXXXX" +# upstream_url = "https://www.googletagmanager.com" + [integrations.adserver_mock] enabled = false endpoint = "https://origin-mocktioneer.cdintel.com/adserver/mediate"