From 30a2f2210d2970ce5e8c6c3a32cf41f3ee1893c2 Mon Sep 17 00:00:00 2001 From: Siddharth Suresh Date: Wed, 27 May 2026 17:10:08 +0530 Subject: [PATCH 01/26] feat(ecs_cluster): Ravion-managed wildcard domain (opt-in) use_ravion_managed_domains toggle: ravion_certificate (shared_wildcard) issues *.-.; this module owns the public ALB HTTPS listener with that cert as default (alb submodule skips its HTTPS listener + cert ARNs); opens SG 443. Outputs the wildcard fqdn + listener arn + cert arn + aws account/region for ecs_service to nest under. Provider pinned ravion.com/ravion/ravion. Co-Authored-By: Claude Opus 4.7 (1M context) --- compute/ecs_cluster/load_balancers.tf | 8 ++- compute/ecs_cluster/outputs.tf | 35 ++++++++++- compute/ecs_cluster/ravion_domains.tf | 86 +++++++++++++++++++++++++++ compute/ecs_cluster/variables.tf | 28 +++++++++ compute/ecs_cluster/versions.tf | 6 ++ 5 files changed, 158 insertions(+), 5 deletions(-) create mode 100644 compute/ecs_cluster/ravion_domains.tf diff --git a/compute/ecs_cluster/load_balancers.tf b/compute/ecs_cluster/load_balancers.tf index 9eaec97..5537c1c 100644 --- a/compute/ecs_cluster/load_balancers.tf +++ b/compute/ecs_cluster/load_balancers.tf @@ -14,13 +14,15 @@ module "public_alb" { subnet_ids = var.public_subnet_ids internal = false - # Listener configuration + # Listener configuration. In Ravion-managed mode this module owns the HTTPS + # listener (ravion_domains.tf) with the Ravion wildcard cert as default, so + # the alb submodule skips its own HTTPS listener + customer cert ARNs. enable_http_listener = true - enable_https_listener = var.public_alb_enable_https + enable_https_listener = var.public_alb_enable_https && !local.enable_ravion_domain http_to_https_redirect = var.public_alb_enable_https # SSL/TLS - certificate_arns = var.public_alb_certificate_arns + certificate_arns = local.enable_ravion_domain ? [] : var.public_alb_certificate_arns ssl_policy = var.public_alb_ssl_policy # ALB settings diff --git a/compute/ecs_cluster/outputs.tf b/compute/ecs_cluster/outputs.tf index 8d5a5cd..ead1f3b 100644 --- a/compute/ecs_cluster/outputs.tf +++ b/compute/ecs_cluster/outputs.tf @@ -120,8 +120,10 @@ output "public_alb_http_listener_arn" { } output "public_alb_https_listener_arn" { - description = "The ARN of the public ALB HTTPS listener (null if HTTPS disabled)." - value = var.enable_public_alb && var.public_alb_enable_https ? module.public_alb[0].https_listener_arn : null + description = "The ARN of the public ALB HTTPS listener (Ravion-owned when use_ravion_managed_domains; null if HTTPS disabled)." + value = (var.enable_public_alb && var.public_alb_enable_https) ? ( + local.enable_ravion_domain ? aws_lb_listener.ravion_https[0].arn : module.public_alb[0].https_listener_arn + ) : null } ################################################################################ @@ -240,3 +242,32 @@ output "region" { description = "The AWS region where the resources are deployed." value = local.region } + +################################################################################ +# Ravion-managed domains +################################################################################ + +output "ravion_cluster_certificate_id" { + description = "Ravion managed-certificate id for the cluster wildcard (null unless use_ravion_managed_domains)." + value = local.enable_ravion_domain ? ravion_certificate.cluster[0].id : null +} + +output "ravion_cluster_domain_fqdn" { + description = "Cluster wildcard apex FQDN. Pass to ecs_service as cluster_parent_fqdn." + value = local.enable_ravion_domain ? ravion_certificate.cluster[0].fqdn : null +} + +output "ravion_cluster_cert_arn" { + description = "ACM ARN of the cluster wildcard cert." + value = local.enable_ravion_domain ? ravion_certificate.cluster[0].cert_arn : null +} + +output "ravion_aws_account_id" { + description = "Pass-through Ravion AwsAccount row id for ecs_service Mode B." + value = var.ravion_aws_account_id +} + +output "ravion_aws_region" { + description = "Pass-through Ravion cert region for ecs_service Mode B." + value = local.enable_ravion_domain ? coalesce(var.ravion_aws_region, local.region) : null +} diff --git a/compute/ecs_cluster/ravion_domains.tf b/compute/ecs_cluster/ravion_domains.tf new file mode 100644 index 0000000..35a74a8 --- /dev/null +++ b/compute/ecs_cluster/ravion_domains.tf @@ -0,0 +1,86 @@ +################################################################################ +# Ravion-managed cluster domain (opt-in) +################################################################################ +# When var.use_ravion_managed_domains = true, Ravion issues a wildcard cert +# `*.-.` (+ apex) and this module owns the public ALB +# HTTPS listener with that cert as its default. Services pass the outputs +# (ravion_cluster_domain_fqdn, public_alb_https_listener_arn, ...) to +# ecs_service to nest their domains under the wildcard. +# +# The listener lives here (not in the alb submodule) to avoid a DAG cycle: +# aws_lb.this -> ravion_certificate.cluster (targets the ALB) -> +# aws_lb_listener.ravion_https (uses the cert). ravion_certificate with +# role=shared_wildcard blocks until ISSUED, so cert_arn is valid at listener +# create time. + +locals { + enable_ravion_domain = var.use_ravion_managed_domains && var.enable_public_alb +} + +resource "ravion_certificate" "cluster" { + count = local.enable_ravion_domain ? 1 : 0 + + role = "shared_wildcard" + wildcard = true + name = coalesce(var.ravion_cluster_name, var.name) + aws_account_id = var.ravion_aws_account_id + aws_region = coalesce(var.ravion_aws_region, local.region) + + lifecycle { + precondition { + condition = !var.use_ravion_managed_domains || var.enable_public_alb + error_message = "use_ravion_managed_domains requires enable_public_alb = true." + } + precondition { + condition = !var.use_ravion_managed_domains || (var.ravion_aws_account_id != null && var.ravion_aws_account_id != "") + error_message = "ravion_aws_account_id (aws_*) is required when use_ravion_managed_domains = true." + } + } +} + +resource "aws_lb_listener" "ravion_https" { + count = local.enable_ravion_domain && var.public_alb_enable_https ? 1 : 0 + + load_balancer_arn = module.public_alb[0].alb_arn + port = 443 + protocol = "HTTPS" + ssl_policy = var.public_alb_ssl_policy + certificate_arn = ravion_certificate.cluster[0].cert_arn + + default_action { + type = "fixed-response" + fixed_response { + content_type = "text/plain" + message_body = "Not found" + status_code = "404" + } + } + + tags = merge(var.tags, { Name = "${var.name}-pub-https" }) +} + +# The alb submodule only opens port 443 when it owns the HTTPS listener; in +# Ravion mode it does not, so open it here (mirrors the submodule's rules). +resource "aws_vpc_security_group_ingress_rule" "ravion_https_ipv4" { + for_each = local.enable_ravion_domain && var.public_alb_enable_https ? toset(var.public_alb_ingress_cidr_blocks) : toset([]) + + security_group_id = module.public_alb[0].security_group_id + description = "Allow HTTPS from ${each.value} (Ravion-owned listener)" + cidr_ipv4 = each.value + from_port = 443 + to_port = 443 + ip_protocol = "tcp" + tags = var.tags +} + +resource "aws_vpc_security_group_ingress_rule" "ravion_https_ipv6" { + for_each = local.enable_ravion_domain && var.public_alb_enable_https ? toset(["::/0"]) : toset([]) + + security_group_id = module.public_alb[0].security_group_id + description = "Allow HTTPS from ${each.value} (Ravion-owned listener)" + cidr_ipv6 = each.value + from_port = 443 + to_port = 443 + ip_protocol = "tcp" + tags = var.tags +} diff --git a/compute/ecs_cluster/variables.tf b/compute/ecs_cluster/variables.tf index c7e37d7..4e419d4 100644 --- a/compute/ecs_cluster/variables.tf +++ b/compute/ecs_cluster/variables.tf @@ -629,3 +629,31 @@ variable "region" { description = "AWS region. When null, the provider's configured region is used." default = null } + +################################################################################ +# Ravion-managed domains (optional) +################################################################################ + +variable "use_ravion_managed_domains" { + type = bool + description = "Allocate a Ravion-managed wildcard domain for the cluster and have Ravion own the public ALB HTTPS listener cert. Requires enable_public_alb = true." + default = false +} + +variable "ravion_cluster_name" { + type = string + description = "Free-form name leaf for the cluster's Ravion wildcard domain (becomes -.). Defaults to var.name." + default = null +} + +variable "ravion_aws_account_id" { + type = string + description = "Ravion AwsAccount row id (aws_*) the wildcard ACM cert is issued in. Required when use_ravion_managed_domains = true." + default = null +} + +variable "ravion_aws_region" { + type = string + description = "AWS region the cluster wildcard cert lives in. Defaults to the module region." + default = null +} diff --git a/compute/ecs_cluster/versions.tf b/compute/ecs_cluster/versions.tf index bec739b..72b0a66 100644 --- a/compute/ecs_cluster/versions.tf +++ b/compute/ecs_cluster/versions.tf @@ -12,6 +12,12 @@ terraform { source = "hashicorp/aws" version = ">= 6.0" } + # Ravion domains provider — only exercised when + # var.use_ravion_managed_domains = true (see ravion_domains.tf). + ravion = { + source = "ravion.com/ravion/ravion" + version = ">= 0.1.0" + } } } From fb4da3beefcebd203e9a930bbe3f98969b1d0d5d Mon Sep 17 00:00:00 2001 From: Siddharth Suresh Date: Wed, 27 May 2026 17:14:22 +0530 Subject: [PATCH 02/26] feat(ecs_service): Ravion-managed domains (auto-FQDN + custom certs) cluster_parent_fqdn enables Ravion domains. Mode A (no domains): ravion_domain auto-FQDN . rides the cluster wildcard via a listener rule. Mode B (domains): per-service ravion_certificate (<=10 SANs) attached to the cluster listener + ravion_domain custom routing records; auto-FQDN retires once customs are healthy (ravion_auto_domain_status). Skips caller listener rules in Ravion mode. Outputs auto fqdn/url + custom cert arn. Co-Authored-By: Claude Opus 4.7 (1M context) --- compute/ecs_service/listener_rules.tf | 5 +- compute/ecs_service/outputs.tf | 19 +++++ compute/ecs_service/ravion_domains.tf | 104 ++++++++++++++++++++++++++ compute/ecs_service/variables.tf | 52 +++++++++++++ compute/ecs_service/versions.tf | 4 + 5 files changed, 183 insertions(+), 1 deletion(-) create mode 100644 compute/ecs_service/ravion_domains.tf diff --git a/compute/ecs_service/listener_rules.tf b/compute/ecs_service/listener_rules.tf index 7d238a1..6f24198 100644 --- a/compute/ecs_service/listener_rules.tf +++ b/compute/ecs_service/listener_rules.tf @@ -4,7 +4,10 @@ ################################################################################ resource "aws_lb_listener_rule" "alb" { - for_each = local.enable_load_balancer ? { + # In Ravion-managed mode, Ravion owns the listener rule (ravion_domains.tf); + # caller-supplied rules are skipped to avoid priority collisions on the + # shared listener. + for_each = local.enable_load_balancer && !local.ravion_managed ? { for idx, rule in var.load_balancer_attachment.listener_rules : idx => rule } : {} diff --git a/compute/ecs_service/outputs.tf b/compute/ecs_service/outputs.tf index a21ab18..7add7ef 100644 --- a/compute/ecs_service/outputs.tf +++ b/compute/ecs_service/outputs.tf @@ -249,3 +249,22 @@ output "region" { } + +################################################################################ +# Ravion-managed domains +################################################################################ + +output "ravion_domain_fqdn" { + description = "Auto-FQDN under the cluster wildcard (null in Mode B after cutover)." + value = local.ravion_auto_live ? ravion_domain.auto[0].fqdn : null +} + +output "ravion_domain_url" { + description = "https URL for the auto-FQDN." + value = local.ravion_auto_live ? ravion_domain.auto[0].url : null +} + +output "ravion_custom_cert_arn" { + description = "ACM ARN of the per-service custom cert (Mode B only)." + value = local.ravion_mode_b ? ravion_certificate.svc[0].cert_arn : null +} diff --git a/compute/ecs_service/ravion_domains.tf b/compute/ecs_service/ravion_domains.tf new file mode 100644 index 0000000..3dcfc37 --- /dev/null +++ b/compute/ecs_service/ravion_domains.tf @@ -0,0 +1,104 @@ +################################################################################ +# Ravion-managed service domain (optional) +################################################################################ +# Wired when cluster_parent_fqdn is set (piped from ecs_cluster). +# +# Mode A (domains = []): an auto-FQDN . rides the cluster +# wildcard cert — only a listener rule + routing record, no per-service cert. +# Mode B (domains = [...]): a per-service cert (<=10 SANs) attached to the +# cluster listener via Ravion; the auto-FQDN stays until the customs are +# healthy, then ravion_auto_domain_status flips retired -> auto is destroyed. + +locals { + ravion_managed = var.cluster_parent_fqdn != null && var.cluster_parent_fqdn != "" + ravion_mode_b = local.ravion_managed && length(var.domains) > 0 + ravion_retired = local.ravion_mode_b ? try(data.ravion_auto_domain_status.auto[0].retired, false) : false + ravion_auto_live = local.ravion_managed && !local.ravion_retired + + ravion_priority = var.ravion_listener_rule_priority > 0 ? var.ravion_listener_rule_priority : ((parseint(substr(sha256(var.name), 0, 4), 16) % 49000) + 1000) + + ravion_host_headers = concat( + [for d in ravion_domain.auto : d.fqdn], + local.ravion_mode_b ? var.domains : [], + ) + + ravion_target_group_arn = ( + length(aws_lb_target_group.this) > 0 ? aws_lb_target_group.this[0].arn : ( + length(aws_lb_target_group.tg_1) > 0 ? aws_lb_target_group.tg_1[0].arn : null + ) + ) +} + +data "ravion_auto_domain_status" "auto" { + count = local.ravion_managed ? 1 : 0 + + parent_domain_id = var.cluster_parent_fqdn + name = var.name +} + +# Mode A auto-FQDN under the cluster wildcard (no per-service cert). +resource "ravion_domain" "auto" { + count = local.ravion_auto_live ? 1 : 0 + + name = var.name + parent_fqdn = var.cluster_parent_fqdn +} + +# Mode B per-service certificate (<=10 SANs), attached to the cluster listener. +resource "ravion_certificate" "svc" { + count = local.ravion_mode_b ? 1 : 0 + + role = "instance" + domains = var.domains + aws_account_id = var.ravion_aws_account_id + aws_region = coalesce(var.ravion_aws_region, local.region) + target_arn = var.cluster_https_listener_arn + + lifecycle { + precondition { + condition = !local.ravion_mode_b || (var.ravion_aws_account_id != null && var.ravion_aws_account_id != "") + error_message = "ravion_aws_account_id is required when domains is non-empty." + } + precondition { + condition = !local.ravion_mode_b || (var.cluster_https_listener_arn != null && var.cluster_https_listener_arn != "") + error_message = "cluster_https_listener_arn is required when domains is non-empty." + } + precondition { + condition = length(var.domains) <= 10 + error_message = "A service may declare at most 10 custom domains (one cert per service)." + } + } +} + +# Mode B routing records the customer must add (one per custom FQDN). +resource "ravion_domain" "custom" { + for_each = local.ravion_mode_b ? toset(var.domains) : toset([]) + + name = each.value + target_dns_name = var.cluster_alb_dns_name + target_zone_id = var.cluster_alb_zone_id +} + +# Single listener rule routing all of this service's hostnames to its target +# group. Blue/green controllers flip the action externally. +resource "aws_lb_listener_rule" "ravion" { + count = local.ravion_managed && var.cluster_https_listener_arn != null && length(local.ravion_host_headers) > 0 ? 1 : 0 + + listener_arn = var.cluster_https_listener_arn + priority = local.ravion_priority + + condition { + host_header { + values = local.ravion_host_headers + } + } + + action { + type = "forward" + target_group_arn = local.ravion_target_group_arn + } + + lifecycle { + ignore_changes = [action] + } +} diff --git a/compute/ecs_service/variables.tf b/compute/ecs_service/variables.tf index 13f70cd..5a92296 100644 --- a/compute/ecs_service/variables.tf +++ b/compute/ecs_service/variables.tf @@ -599,3 +599,55 @@ variable "region" { description = "AWS region. When null, the provider's configured region is used." default = null } + +################################################################################ +# Ravion-managed domains (optional) +################################################################################ + +variable "cluster_parent_fqdn" { + type = string + description = "Cluster wildcard apex FQDN (pipe from ecs_cluster.ravion_cluster_domain_fqdn). Set to enable Ravion-managed domains for this service." + default = null +} + +variable "cluster_https_listener_arn" { + type = string + description = "Cluster ALB HTTPS listener ARN (pipe from ecs_cluster.public_alb_https_listener_arn). Required when cluster_parent_fqdn is set." + default = null +} + +variable "ravion_listener_rule_priority" { + type = number + description = "Listener rule priority (1-50000). 0 = auto-derive from sha256(name)." + default = 0 +} + +variable "domains" { + type = list(string) + description = "Customer FQDNs (Mode B). Empty = Mode A (auto-FQDN under the cluster wildcard). Max 10." + default = [] +} + +variable "cluster_alb_dns_name" { + type = string + description = "Cluster ALB DNS name (required for Mode B routing records)." + default = null +} + +variable "cluster_alb_zone_id" { + type = string + description = "Cluster ALB hosted zone id (required for Mode B routing records)." + default = null +} + +variable "ravion_aws_account_id" { + type = string + description = "Ravion AwsAccount row id (aws_*). Required for Mode B." + default = null +} + +variable "ravion_aws_region" { + type = string + description = "AWS region the per-service cert lives in. Defaults to the module region." + default = null +} diff --git a/compute/ecs_service/versions.tf b/compute/ecs_service/versions.tf index bec739b..7145e0b 100644 --- a/compute/ecs_service/versions.tf +++ b/compute/ecs_service/versions.tf @@ -12,6 +12,10 @@ terraform { source = "hashicorp/aws" version = ">= 6.0" } + ravion = { + source = "ravion.com/ravion/ravion" + version = ">= 0.1.0" + } } } From 67660a57c68e37cdfb4856518b4873ec198b26ec Mon Sep 17 00:00:00 2001 From: Siddharth Suresh Date: Wed, 27 May 2026 17:18:47 +0530 Subject: [PATCH 03/26] feat(static_site): Ravion-managed domains (CloudFront, us-east-1) use_ravion_managed_domains: ravion_certificate (instance, target_arn = the CloudFront distribution ARN, region us-east-1) covering custom domains or a generated auto-FQDN; ravion_domain custom routing records (ALIAS to the distribution, CloudFront zone Z2FDTNDATAQYW2). Configure var.distributions without aliases/cert in this mode (Ravion sets them server-side). Co-Authored-By: Claude Opus 4.7 (1M context) --- hosting/static_site/outputs.tf | 10 +++++ hosting/static_site/ravion_domains.tf | 53 +++++++++++++++++++++++++++ hosting/static_site/variables.tf | 22 +++++++++++ hosting/static_site/versions.tf | 4 ++ 4 files changed, 89 insertions(+) create mode 100644 hosting/static_site/ravion_domains.tf diff --git a/hosting/static_site/outputs.tf b/hosting/static_site/outputs.tf index f2fdd22..27ae7dd 100644 --- a/hosting/static_site/outputs.tf +++ b/hosting/static_site/outputs.tf @@ -138,3 +138,13 @@ output "region" { description = "The AWS region where the resources are deployed." value = local.region } + +output "ravion_certificate_arn" { + description = "ACM ARN of the Ravion-managed viewer cert (null unless use_ravion_managed_domains)." + value = var.use_ravion_managed_domains ? ravion_certificate.site[0].cert_arn : null +} + +output "ravion_fqdn" { + description = "Primary FQDN Ravion manages for the site." + value = var.use_ravion_managed_domains ? ravion_certificate.site[0].fqdn : null +} diff --git a/hosting/static_site/ravion_domains.tf b/hosting/static_site/ravion_domains.tf new file mode 100644 index 0000000..9cf44bc --- /dev/null +++ b/hosting/static_site/ravion_domains.tf @@ -0,0 +1,53 @@ +################################################################################ +# Ravion-managed domains for the static site (opt-in) +################################################################################ +# When use_ravion_managed_domains = true, Ravion owns the CloudFront viewer +# certificate + aliases server-side (attached via target_arn = the distribution +# ARN). The cert MUST live in us-east-1 (CloudFront requirement). +# +# domains = [] -> an auto-FQDN -. (instance cert). +# domains = [...] -> a per-site cert over those FQDNs + CUSTOMER routing +# records the user adds (ALIAS to the distribution domain). +# +# IMPORTANT: in Ravion mode, configure var.distributions WITHOUT aliases/ACM +# cert so the cdn submodule leaves the default CloudFront cert in place; Ravion +# swaps the viewer cert + sets aliases via UpdateDistribution. See OPEN_QUESTIONS +# (B-static) for the cdn-submodule ignore_changes follow-up that makes this +# drift-free across applies. + +locals { + ravion_static_enabled = var.use_ravion_managed_domains + ravion_distribution_arn = local.ravion_static_enabled ? try(values(module.cdn.distribution_arns)[0], null) : null + ravion_distribution_domain = local.ravion_static_enabled ? try(values(module.cdn.distribution_domain_names)[0], null) : null +} + +resource "ravion_certificate" "site" { + count = local.ravion_static_enabled ? 1 : 0 + + role = "instance" + domains = length(var.domains) > 0 ? var.domains : null + name = length(var.domains) == 0 ? var.name : null + aws_account_id = var.ravion_aws_account_id + aws_region = "us-east-1" + target_arn = local.ravion_distribution_arn + + lifecycle { + precondition { + condition = !var.use_ravion_managed_domains || (var.ravion_aws_account_id != null && var.ravion_aws_account_id != "") + error_message = "ravion_aws_account_id (aws_*) is required when use_ravion_managed_domains = true." + } + precondition { + condition = length(var.domains) <= 10 + error_message = "A static site may declare at most 10 custom domains." + } + } +} + +# CUSTOMER routing records (one per custom FQDN): ALIAS to the distribution. +resource "ravion_domain" "custom" { + for_each = local.ravion_static_enabled ? toset(var.domains) : toset([]) + + name = each.value + target_dns_name = local.ravion_distribution_domain + target_zone_id = "Z2FDTNDATAQYW2" # CloudFront's global hosted zone id +} diff --git a/hosting/static_site/variables.tf b/hosting/static_site/variables.tf index f185fa3..59c2766 100644 --- a/hosting/static_site/variables.tf +++ b/hosting/static_site/variables.tf @@ -430,3 +430,25 @@ variable "region" { description = "AWS region. When null, the provider's configured region is used." default = null } + +################################################################################ +# Ravion-managed domains (optional) +################################################################################ + +variable "use_ravion_managed_domains" { + type = bool + description = "Have Ravion own the CloudFront viewer cert + aliases (attached server-side). Configure var.distributions without aliases/ACM cert in this mode." + default = false +} + +variable "domains" { + type = list(string) + description = "Customer FQDNs for the site. Empty = a Ravion auto-FQDN. Max 10. Only used when use_ravion_managed_domains = true." + default = [] +} + +variable "ravion_aws_account_id" { + type = string + description = "Ravion AwsAccount row id (aws_*). Required when use_ravion_managed_domains = true." + default = null +} diff --git a/hosting/static_site/versions.tf b/hosting/static_site/versions.tf index f2ea0cd..0c97189 100644 --- a/hosting/static_site/versions.tf +++ b/hosting/static_site/versions.tf @@ -8,5 +8,9 @@ terraform { source = "hashicorp/aws" version = ">= 6.0" } + ravion = { + source = "ravion.com/ravion/ravion" + version = ">= 0.1.0" + } } } From cddbf960f30653008aa1419beccc669e02e61c07 Mon Sep 17 00:00:00 2001 From: Siddharth Suresh Date: Wed, 27 May 2026 18:25:48 +0530 Subject: [PATCH 04/26] feat(ecs): Ravion-managed domains support private ALB, not just public MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The cluster only wired the Ravion wildcard cert + 443 HTTPS listener on the public ALB, so private services (web-private / private-network-server) had no Ravion-owned listener to attach to. Mirror the public wiring onto the private ALB — one wildcard cert backs both listeners (an ACM ARN can default many). ecs_cluster: - enable_ravion_domain now triggers on public OR private ALB (was public-only); precondition requires at least one ALB instead of mandating the public one. - Add aws_lb_listener.ravion_https_private on the private ALB (same cluster cert) + a private-ALB 443 ingress rule; private alb submodule now skips its own HTTPS listener/cert in Ravion mode, same as the public submodule. - public/private_alb_https_listener_arn outputs surface the Ravion-owned listener when present (length()-guarded so a private-only cluster is valid). ecs_service: - Generalize cluster_https_listener_arn / cluster_alb_dns_name / cluster_alb_zone_id descriptions: pipe the public OR private ALB outputs per the service's visibility. The resources were already visibility-agnostic; only the docs hardcoded "public". Co-Authored-By: Claude Opus 4.7 (1M context) --- compute/ecs_cluster/load_balancers.tf | 8 +-- compute/ecs_cluster/outputs.tf | 8 +-- compute/ecs_cluster/ravion_domains.tf | 72 +++++++++++++++++++++------ compute/ecs_service/variables.tf | 6 +-- 4 files changed, 69 insertions(+), 25 deletions(-) diff --git a/compute/ecs_cluster/load_balancers.tf b/compute/ecs_cluster/load_balancers.tf index 5537c1c..7d22138 100644 --- a/compute/ecs_cluster/load_balancers.tf +++ b/compute/ecs_cluster/load_balancers.tf @@ -56,13 +56,15 @@ module "private_alb" { subnet_ids = var.private_subnet_ids internal = true - # Listener configuration + # Listener configuration. In Ravion-managed mode this module owns the HTTPS + # listener (ravion_domains.tf) with the Ravion wildcard cert as default, so + # the alb submodule skips its own HTTPS listener + customer cert ARNs. enable_http_listener = true - enable_https_listener = var.private_alb_enable_https + enable_https_listener = var.private_alb_enable_https && !local.enable_ravion_domain http_to_https_redirect = var.private_alb_enable_https # SSL/TLS - certificate_arns = var.private_alb_certificate_arns + certificate_arns = local.enable_ravion_domain ? [] : var.private_alb_certificate_arns ssl_policy = var.private_alb_ssl_policy # ALB settings diff --git a/compute/ecs_cluster/outputs.tf b/compute/ecs_cluster/outputs.tf index ead1f3b..317c46d 100644 --- a/compute/ecs_cluster/outputs.tf +++ b/compute/ecs_cluster/outputs.tf @@ -122,7 +122,7 @@ output "public_alb_http_listener_arn" { output "public_alb_https_listener_arn" { description = "The ARN of the public ALB HTTPS listener (Ravion-owned when use_ravion_managed_domains; null if HTTPS disabled)." value = (var.enable_public_alb && var.public_alb_enable_https) ? ( - local.enable_ravion_domain ? aws_lb_listener.ravion_https[0].arn : module.public_alb[0].https_listener_arn + length(aws_lb_listener.ravion_https) > 0 ? aws_lb_listener.ravion_https[0].arn : module.public_alb[0].https_listener_arn ) : null } @@ -166,8 +166,10 @@ output "private_alb_http_listener_arn" { } output "private_alb_https_listener_arn" { - description = "The ARN of the private ALB HTTPS listener (null if HTTPS disabled)." - value = var.enable_private_alb && var.private_alb_enable_https ? module.private_alb[0].https_listener_arn : null + description = "The ARN of the private ALB HTTPS listener (Ravion-owned when use_ravion_managed_domains; null if HTTPS disabled)." + value = (var.enable_private_alb && var.private_alb_enable_https) ? ( + length(aws_lb_listener.ravion_https_private) > 0 ? aws_lb_listener.ravion_https_private[0].arn : module.private_alb[0].https_listener_arn + ) : null } ################################################################################ diff --git a/compute/ecs_cluster/ravion_domains.tf b/compute/ecs_cluster/ravion_domains.tf index 35a74a8..5b8ffe6 100644 --- a/compute/ecs_cluster/ravion_domains.tf +++ b/compute/ecs_cluster/ravion_domains.tf @@ -1,20 +1,24 @@ ################################################################################ # Ravion-managed cluster domain (opt-in) ################################################################################ -# When var.use_ravion_managed_domains = true, Ravion issues a wildcard cert -# `*.-.` (+ apex) and this module owns the public ALB -# HTTPS listener with that cert as its default. Services pass the outputs -# (ravion_cluster_domain_fqdn, public_alb_https_listener_arn, ...) to -# ecs_service to nest their domains under the wildcard. +# When var.use_ravion_managed_domains = true, Ravion issues ONE wildcard cert +# `*.-.` (+ apex) and this module owns the cluster ALB +# HTTPS listener(s) with that cert as the default. The same cert backs BOTH the +# public and the private ALB listener (an ACM cert ARN can default many +# listeners), so public AND private services nest their domains under the one +# wildcard. Services pass the matching outputs to ecs_service: +# - public service -> public_alb_https_listener_arn + public_alb_dns_name/zone +# - private service -> private_alb_https_listener_arn + private_alb_dns_name/zone # -# The listener lives here (not in the alb submodule) to avoid a DAG cycle: -# aws_lb.this -> ravion_certificate.cluster (targets the ALB) -> -# aws_lb_listener.ravion_https (uses the cert). ravion_certificate with -# role=shared_wildcard blocks until ISSUED, so cert_arn is valid at listener -# create time. +# The listeners live here (not in the alb submodule) to avoid a DAG cycle: +# aws_lb.this -> ravion_certificate.cluster -> aws_lb_listener.ravion_https* +# (uses the cert). ravion_certificate with role=shared_wildcard blocks until +# ISSUED, so cert_arn is valid at listener create time. locals { - enable_ravion_domain = var.use_ravion_managed_domains && var.enable_public_alb + enable_ravion_domain = var.use_ravion_managed_domains && (var.enable_public_alb || var.enable_private_alb) + enable_ravion_public_listener = local.enable_ravion_domain && var.enable_public_alb && var.public_alb_enable_https + enable_ravion_private_listener = local.enable_ravion_domain && var.enable_private_alb && var.private_alb_enable_https } resource "ravion_certificate" "cluster" { @@ -28,8 +32,8 @@ resource "ravion_certificate" "cluster" { lifecycle { precondition { - condition = !var.use_ravion_managed_domains || var.enable_public_alb - error_message = "use_ravion_managed_domains requires enable_public_alb = true." + condition = !var.use_ravion_managed_domains || var.enable_public_alb || var.enable_private_alb + error_message = "use_ravion_managed_domains requires at least one ALB (enable_public_alb or enable_private_alb)." } precondition { condition = !var.use_ravion_managed_domains || (var.ravion_aws_account_id != null && var.ravion_aws_account_id != "") @@ -38,8 +42,9 @@ resource "ravion_certificate" "cluster" { } } +# Public ALB Ravion listener. resource "aws_lb_listener" "ravion_https" { - count = local.enable_ravion_domain && var.public_alb_enable_https ? 1 : 0 + count = local.enable_ravion_public_listener ? 1 : 0 load_balancer_arn = module.public_alb[0].alb_arn port = 443 @@ -59,10 +64,32 @@ resource "aws_lb_listener" "ravion_https" { tags = merge(var.tags, { Name = "${var.name}-pub-https" }) } +# Private ALB Ravion listener (same wildcard cert as the public one). +resource "aws_lb_listener" "ravion_https_private" { + count = local.enable_ravion_private_listener ? 1 : 0 + + load_balancer_arn = module.private_alb[0].alb_arn + port = 443 + protocol = "HTTPS" + ssl_policy = var.private_alb_ssl_policy + certificate_arn = ravion_certificate.cluster[0].cert_arn + + default_action { + type = "fixed-response" + fixed_response { + content_type = "text/plain" + message_body = "Not found" + status_code = "404" + } + } + + tags = merge(var.tags, { Name = "${var.name}-priv-https" }) +} + # The alb submodule only opens port 443 when it owns the HTTPS listener; in # Ravion mode it does not, so open it here (mirrors the submodule's rules). resource "aws_vpc_security_group_ingress_rule" "ravion_https_ipv4" { - for_each = local.enable_ravion_domain && var.public_alb_enable_https ? toset(var.public_alb_ingress_cidr_blocks) : toset([]) + for_each = local.enable_ravion_public_listener ? toset(var.public_alb_ingress_cidr_blocks) : toset([]) security_group_id = module.public_alb[0].security_group_id description = "Allow HTTPS from ${each.value} (Ravion-owned listener)" @@ -74,7 +101,7 @@ resource "aws_vpc_security_group_ingress_rule" "ravion_https_ipv4" { } resource "aws_vpc_security_group_ingress_rule" "ravion_https_ipv6" { - for_each = local.enable_ravion_domain && var.public_alb_enable_https ? toset(["::/0"]) : toset([]) + for_each = local.enable_ravion_public_listener ? toset(["::/0"]) : toset([]) security_group_id = module.public_alb[0].security_group_id description = "Allow HTTPS from ${each.value} (Ravion-owned listener)" @@ -84,3 +111,16 @@ resource "aws_vpc_security_group_ingress_rule" "ravion_https_ipv6" { ip_protocol = "tcp" tags = var.tags } + +# Private ALB 443 ingress (mirrors the public rules for the private listener). +resource "aws_vpc_security_group_ingress_rule" "ravion_https_private_ipv4" { + for_each = local.enable_ravion_private_listener ? toset(var.private_alb_ingress_cidr_blocks) : toset([]) + + security_group_id = module.private_alb[0].security_group_id + description = "Allow HTTPS from ${each.value} (Ravion-owned listener)" + cidr_ipv4 = each.value + from_port = 443 + to_port = 443 + ip_protocol = "tcp" + tags = var.tags +} diff --git a/compute/ecs_service/variables.tf b/compute/ecs_service/variables.tf index 5a92296..61dca16 100644 --- a/compute/ecs_service/variables.tf +++ b/compute/ecs_service/variables.tf @@ -612,7 +612,7 @@ variable "cluster_parent_fqdn" { variable "cluster_https_listener_arn" { type = string - description = "Cluster ALB HTTPS listener ARN (pipe from ecs_cluster.public_alb_https_listener_arn). Required when cluster_parent_fqdn is set." + description = "Cluster ALB HTTPS listener ARN this service attaches to. Pipe ecs_cluster.public_alb_https_listener_arn for a public service, or private_alb_https_listener_arn for a private one. Required when cluster_parent_fqdn is set." default = null } @@ -630,13 +630,13 @@ variable "domains" { variable "cluster_alb_dns_name" { type = string - description = "Cluster ALB DNS name (required for Mode B routing records)." + description = "Cluster ALB DNS name for Mode B routing records — public_alb_dns_name for a public service, private_alb_dns_name for a private one. Must match the ALB whose listener is in cluster_https_listener_arn." default = null } variable "cluster_alb_zone_id" { type = string - description = "Cluster ALB hosted zone id (required for Mode B routing records)." + description = "Cluster ALB hosted zone id for Mode B routing records — public_alb_zone_id for a public service, private_alb_zone_id for a private one. Must match the ALB whose listener is in cluster_https_listener_arn." default = null } From 10b1227ad14bb153e521ac0f376c3c6b070f4767 Mon Sep 17 00:00:00 2001 From: Siddharth Suresh Date: Wed, 27 May 2026 18:44:39 +0530 Subject: [PATCH 05/26] use my proxy --- compute/ecs_cluster/versions.tf | 2 +- compute/ecs_service/versions.tf | 2 +- hosting/static_site/versions.tf | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/compute/ecs_cluster/versions.tf b/compute/ecs_cluster/versions.tf index 72b0a66..9b7904b 100644 --- a/compute/ecs_cluster/versions.tf +++ b/compute/ecs_cluster/versions.tf @@ -15,7 +15,7 @@ terraform { # Ravion domains provider — only exercised when # var.use_ravion_managed_domains = true (see ravion_domains.tf). ravion = { - source = "ravion.com/ravion/ravion" + source = "providers.siddharthsuresh.dev/ravion/ravion" version = ">= 0.1.0" } } diff --git a/compute/ecs_service/versions.tf b/compute/ecs_service/versions.tf index 7145e0b..4fa4334 100644 --- a/compute/ecs_service/versions.tf +++ b/compute/ecs_service/versions.tf @@ -13,7 +13,7 @@ terraform { version = ">= 6.0" } ravion = { - source = "ravion.com/ravion/ravion" + source = "providers.siddharthsuresh.dev/ravion/ravion" version = ">= 0.1.0" } } diff --git a/hosting/static_site/versions.tf b/hosting/static_site/versions.tf index 0c97189..9773ab3 100644 --- a/hosting/static_site/versions.tf +++ b/hosting/static_site/versions.tf @@ -9,7 +9,7 @@ terraform { version = ">= 6.0" } ravion = { - source = "ravion.com/ravion/ravion" + source = "providers.siddharthsuresh.dev/ravion/ravion" version = ">= 0.1.0" } } From e1f338a0aa4165eb75414375b449da20b47a0212 Mon Sep 17 00:00:00 2001 From: Siddharth Suresh Date: Wed, 27 May 2026 22:53:29 +0530 Subject: [PATCH 06/26] fix(ecs_cluster): name the Ravion wildcard from the module instance given id The cluster wildcard FQDN used var.name (the project-environment slug, e.g. testttsss-prod-modules), not the user-facing instance given id (elysia-ecs-cluster). Declare module_instance_given_id (injected by the runner as TF_VAR_module_instance_given_id) and default the cert leaf to it, so the wildcard becomes -.. Co-Authored-By: Claude Opus 4.7 (1M context) --- compute/ecs_cluster/ravion_domains.tf | 2 +- compute/ecs_cluster/variables.tf | 8 +++++++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/compute/ecs_cluster/ravion_domains.tf b/compute/ecs_cluster/ravion_domains.tf index 5b8ffe6..8ca991b 100644 --- a/compute/ecs_cluster/ravion_domains.tf +++ b/compute/ecs_cluster/ravion_domains.tf @@ -26,7 +26,7 @@ resource "ravion_certificate" "cluster" { role = "shared_wildcard" wildcard = true - name = coalesce(var.ravion_cluster_name, var.name) + name = coalesce(var.ravion_cluster_name, var.module_instance_given_id, var.name) aws_account_id = var.ravion_aws_account_id aws_region = coalesce(var.ravion_aws_region, local.region) diff --git a/compute/ecs_cluster/variables.tf b/compute/ecs_cluster/variables.tf index 4e419d4..c30619c 100644 --- a/compute/ecs_cluster/variables.tf +++ b/compute/ecs_cluster/variables.tf @@ -642,7 +642,13 @@ variable "use_ravion_managed_domains" { variable "ravion_cluster_name" { type = string - description = "Free-form name leaf for the cluster's Ravion wildcard domain (becomes -.). Defaults to var.name." + description = "Free-form name leaf for the cluster's Ravion wildcard domain (becomes -.). Defaults to the module instance given id." + default = null +} + +variable "module_instance_given_id" { + type = string + description = "The module instance's user-facing given id (injected by the runner as TF_VAR_module_instance_given_id). Used as the default leaf for the Ravion wildcard domain." default = null } From 74d0889e7eaab780278bc22ee734032735d217b1 Mon Sep 17 00:00:00 2001 From: Siddharth Suresh Date: Wed, 27 May 2026 23:38:16 +0530 Subject: [PATCH 07/26] fix(ecs_service): auto-FQDN leaf from module instance given id Match the ecs_cluster fix: the service auto-domain (.) used var.name (project-env slug) instead of the user-facing instance given id. Declare module_instance_given_id (runner-injected) and default the leaf to it, so the auto-FQDN becomes .. Co-Authored-By: Claude Opus 4.7 (1M context) --- compute/ecs_service/ravion_domains.tf | 4 ++-- compute/ecs_service/variables.tf | 6 ++++++ 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/compute/ecs_service/ravion_domains.tf b/compute/ecs_service/ravion_domains.tf index 3dcfc37..2f7a5ec 100644 --- a/compute/ecs_service/ravion_domains.tf +++ b/compute/ecs_service/ravion_domains.tf @@ -33,14 +33,14 @@ data "ravion_auto_domain_status" "auto" { count = local.ravion_managed ? 1 : 0 parent_domain_id = var.cluster_parent_fqdn - name = var.name + name = coalesce(var.module_instance_given_id, var.name) } # Mode A auto-FQDN under the cluster wildcard (no per-service cert). resource "ravion_domain" "auto" { count = local.ravion_auto_live ? 1 : 0 - name = var.name + name = coalesce(var.module_instance_given_id, var.name) parent_fqdn = var.cluster_parent_fqdn } diff --git a/compute/ecs_service/variables.tf b/compute/ecs_service/variables.tf index 61dca16..7a4daf7 100644 --- a/compute/ecs_service/variables.tf +++ b/compute/ecs_service/variables.tf @@ -646,6 +646,12 @@ variable "ravion_aws_account_id" { default = null } +variable "module_instance_given_id" { + type = string + description = "The module instance's user-facing given id (injected by the runner as TF_VAR_module_instance_given_id). Used as the auto-FQDN leaf under the cluster wildcard." + default = null +} + variable "ravion_aws_region" { type = string description = "AWS region the per-service cert lives in. Defaults to the module region." From 0fa3e667013314ede46c52947ed3c07da3df21d5 Mon Sep 17 00:00:00 2001 From: Siddharth Suresh Date: Thu, 28 May 2026 00:49:58 +0530 Subject: [PATCH 08/26] feat(ecs_cluster): point wildcard cert at the cluster ALB ravion_certificate.cluster now passes target_dns_name/target_zone_id (public ALB if present, else private) so Ravion publishes a *. ALIAS to the cluster ALB. Service auto-FQDNs riding the wildcard then resolve with no per-service DNS. Co-Authored-By: Claude Opus 4.7 (1M context) --- compute/ecs_cluster/ravion_domains.tf | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/compute/ecs_cluster/ravion_domains.tf b/compute/ecs_cluster/ravion_domains.tf index 8ca991b..320309d 100644 --- a/compute/ecs_cluster/ravion_domains.tf +++ b/compute/ecs_cluster/ravion_domains.tf @@ -30,6 +30,13 @@ resource "ravion_certificate" "cluster" { aws_account_id = var.ravion_aws_account_id aws_region = coalesce(var.ravion_aws_region, local.region) + # Ravion publishes a *. ALIAS to this ALB so service auto-FQDNs + # (.) resolve under the cluster wildcard. Public ALB if present, + # else private. (A single wildcard record serves one ALB; mixed public+private + # clusters route to the public one.) + target_dns_name = var.enable_public_alb ? module.public_alb[0].alb_dns_name : (var.enable_private_alb ? module.private_alb[0].alb_dns_name : null) + target_zone_id = var.enable_public_alb ? module.public_alb[0].alb_zone_id : (var.enable_private_alb ? module.private_alb[0].alb_zone_id : null) + lifecycle { precondition { condition = !var.use_ravion_managed_domains || var.enable_public_alb || var.enable_private_alb From a695922ed21a4dbd07225f0f9908d0d4dbdf978a Mon Sep 17 00:00:00 2001 From: Siddharth Suresh Date: Thu, 28 May 2026 11:11:25 +0530 Subject: [PATCH 09/26] fix(ecs_service): use name_prefix for TG so attribute changes don't collide Fixed name + create_before_destroy = true means any attribute change that forces TG replacement (e.g. container_port change) fails apply because the new TG can't share the same name as the existing one. Switching to name_prefix lets AWS allocate a unique suffix on each create. Co-Authored-By: Claude Opus 4.7 --- compute/ecs_service/target_groups.tf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/compute/ecs_service/target_groups.tf b/compute/ecs_service/target_groups.tf index 44a3bdf..9aca924 100644 --- a/compute/ecs_service/target_groups.tf +++ b/compute/ecs_service/target_groups.tf @@ -5,7 +5,7 @@ resource "aws_lb_target_group" "this" { count = local.enable_load_balancer && var.deployment_type == "rolling" ? 1 : 0 - name = "${substr(var.name, 0, min(length(var.name), 28))}-tg" + name_prefix = substr(var.name, 0, 6) port = var.load_balancer_attachment.target_group.port protocol = var.load_balancer_attachment.target_group.protocol vpc_id = var.vpc_id From 291cac29cfc5a31d30746378e319714535eac7be Mon Sep 17 00:00:00 2001 From: Siddharth Suresh Date: Thu, 28 May 2026 15:29:39 +0530 Subject: [PATCH 10/26] feat(static_site): add routing="raw" for object sites MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Existing 'spa' and 'filesystem' modes append /index.html to extensionless paths, which is wrong for object sites whose URLs ARE the S3 keys — namely a Terraform provider registry, where the viewer requests `/v1/providers///versions` and must get back the literal JSON file at that key, not `/<...>/versions/index.html`. `raw` is a 1:1 viewer-URI → // mapping. KVS-driven versioning still applies. Use for terraform registries, S3-like content APIs, or any case where viewer URLs must equal S3 keys 1:1. Co-Authored-By: Claude Opus 4.7 (1M context) --- hosting/static_site/functions/rewrite.js | 18 +++++++++++++++++- hosting/static_site/variables.tf | 6 +++--- 2 files changed, 20 insertions(+), 4 deletions(-) diff --git a/hosting/static_site/functions/rewrite.js b/hosting/static_site/functions/rewrite.js index 9943901..ac25213 100644 --- a/hosting/static_site/functions/rewrite.js +++ b/hosting/static_site/functions/rewrite.js @@ -26,6 +26,15 @@ // /.well-known/foo // -> //.well-known/foo (dotted seg) // /foo[/] -> //foo/ (clean URLs) +// raw routing: +// / -> // (pass through verbatim) +// /foo -> //foo +// /foo.js -> //foo.js +// /v1/providers///versions +// -> //v1/providers///versions +// Use raw for object sites serving extensionless paths literally (e.g. a +// terraform provider registry where the URL is the S3 key). KVS-based +// versioning still applies — only the /index.html appending is skipped. // // Tokens substituted at apply time via templatefile(): // ${kvs_id}, ${default_version}, ${index_document}, ${routing} @@ -69,7 +78,14 @@ async function handler(event) { // canonical case. var hasDottedSegment = uri.indexOf('/.') >= 0; - if (uri === '/') { + if (ROUTING === 'raw') { + // 1:1 URI → // with no /index.html appending. Object sites + // (a terraform provider registry is the motivating case) MUST serve + // /v1/providers///versions as the literal file at that + // key, not as /<...>/versions/index.html. KVS-driven version + // selection still happens through the // prefix. + request.uri = '/' + version + uri; + } else if (uri === '/') { request.uri = '/' + version + '/' + INDEX_DOCUMENT; } else if (hasExtension || hasDottedSegment) { request.uri = '/' + version + uri; diff --git a/hosting/static_site/variables.tf b/hosting/static_site/variables.tf index 59c2766..9760b17 100644 --- a/hosting/static_site/variables.tf +++ b/hosting/static_site/variables.tf @@ -25,12 +25,12 @@ variable "tags" { variable "routing" { type = string - description = "URI rewriting style applied at the edge before the version prefix is added. 'spa' rewrites every non-asset path to //index.html so a client-side router takes over. 'filesystem' rewrites /foo and /foo/ to //foo/index.html and serves /foo.js etc. as-is. Both styles are versioned identically." + description = "URI rewriting style applied at the edge before the version prefix is added. 'spa' rewrites every non-asset path to //index.html so a client-side router takes over. 'filesystem' rewrites /foo and /foo/ to //foo/index.html and serves /foo.js etc. as-is. 'raw' is a 1:1 viewer-URI -> // mapping with NO /index.html appending — use it for object sites that serve specific extensionless paths verbatim (terraform provider registries, S3-like content APIs). All three styles are versioned identically through KVS." default = "spa" validation { - condition = contains(["spa", "filesystem"], var.routing) - error_message = "The routing must be 'spa' or 'filesystem'." + condition = contains(["spa", "filesystem", "raw"], var.routing) + error_message = "The routing must be 'spa', 'filesystem', or 'raw'." } } From 2f78c63e61c8b74b28c6a02c6aa5ff10cca095db Mon Sep 17 00:00:00 2001 From: Siddharth Suresh Date: Thu, 28 May 2026 17:18:27 +0530 Subject: [PATCH 11/26] feat: pull ravion provider from CloudFront-hosted registry MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Move all required_providers source references from `providers.siddharthsuresh.dev/ravion/ravion` (cloudflared → local registry on a laptop) to `provider-cf.siddharthsuresh.dev/ravion/ravion` (CloudFront → S3, multi-version, KMS-signed). The local-cloudflared path stays alive during the cutover so existing stacks pinning the old hostname keep working until they're migrated or destroyed. Migration note: existing stacks have provider addresses recorded in their cloud-backend state under the old hostname. A subsequent apply will need a state-replace-provider pass, OR be done as part of a destroy+recreate. New stacks pick up the new hostname automatically. Co-Authored-By: Claude Opus 4.7 (1M context) --- compute/ecs_cluster/versions.tf | 2 +- compute/ecs_service/versions.tf | 2 +- hosting/static_site/versions.tf | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/compute/ecs_cluster/versions.tf b/compute/ecs_cluster/versions.tf index 9b7904b..bed1016 100644 --- a/compute/ecs_cluster/versions.tf +++ b/compute/ecs_cluster/versions.tf @@ -15,7 +15,7 @@ terraform { # Ravion domains provider — only exercised when # var.use_ravion_managed_domains = true (see ravion_domains.tf). ravion = { - source = "providers.siddharthsuresh.dev/ravion/ravion" + source = "provider-cf.siddharthsuresh.dev/ravion/ravion" version = ">= 0.1.0" } } diff --git a/compute/ecs_service/versions.tf b/compute/ecs_service/versions.tf index 4fa4334..fb15899 100644 --- a/compute/ecs_service/versions.tf +++ b/compute/ecs_service/versions.tf @@ -13,7 +13,7 @@ terraform { version = ">= 6.0" } ravion = { - source = "providers.siddharthsuresh.dev/ravion/ravion" + source = "provider-cf.siddharthsuresh.dev/ravion/ravion" version = ">= 0.1.0" } } diff --git a/hosting/static_site/versions.tf b/hosting/static_site/versions.tf index 9773ab3..8d30acf 100644 --- a/hosting/static_site/versions.tf +++ b/hosting/static_site/versions.tf @@ -9,7 +9,7 @@ terraform { version = ">= 6.0" } ravion = { - source = "providers.siddharthsuresh.dev/ravion/ravion" + source = "provider-cf.siddharthsuresh.dev/ravion/ravion" version = ">= 0.1.0" } } From 638ab1408bb1f158c7a9283adc82c59d3f6e1a5c Mon Sep 17 00:00:00 2001 From: Siddharth Suresh Date: Thu, 28 May 2026 17:37:35 +0530 Subject: [PATCH 12/26] revert: keep ravion provider source on providers.siddharthsuresh.dev MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The runner's terraformrc network-mirror config is hardcoded for the canonical hostname (providers.siddharthsuresh.dev). Pointing modules at provider-cf.siddharthsuresh.dev triggers terraform's "requires authentication credentials" error since the runner doesn't emit a credentials/mirror block for that host. The proper migration is stage B — keep the source unchanged and point the canonical hostname's DNS at CloudFront — not a per-module source rewrite. Reverting `2f78c63`. Co-Authored-By: Claude Opus 4.7 (1M context) --- compute/ecs_cluster/versions.tf | 2 +- compute/ecs_service/versions.tf | 2 +- hosting/static_site/versions.tf | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/compute/ecs_cluster/versions.tf b/compute/ecs_cluster/versions.tf index bed1016..9b7904b 100644 --- a/compute/ecs_cluster/versions.tf +++ b/compute/ecs_cluster/versions.tf @@ -15,7 +15,7 @@ terraform { # Ravion domains provider — only exercised when # var.use_ravion_managed_domains = true (see ravion_domains.tf). ravion = { - source = "provider-cf.siddharthsuresh.dev/ravion/ravion" + source = "providers.siddharthsuresh.dev/ravion/ravion" version = ">= 0.1.0" } } diff --git a/compute/ecs_service/versions.tf b/compute/ecs_service/versions.tf index fb15899..4fa4334 100644 --- a/compute/ecs_service/versions.tf +++ b/compute/ecs_service/versions.tf @@ -13,7 +13,7 @@ terraform { version = ">= 6.0" } ravion = { - source = "provider-cf.siddharthsuresh.dev/ravion/ravion" + source = "providers.siddharthsuresh.dev/ravion/ravion" version = ">= 0.1.0" } } diff --git a/hosting/static_site/versions.tf b/hosting/static_site/versions.tf index 8d30acf..9773ab3 100644 --- a/hosting/static_site/versions.tf +++ b/hosting/static_site/versions.tf @@ -9,7 +9,7 @@ terraform { version = ">= 6.0" } ravion = { - source = "provider-cf.siddharthsuresh.dev/ravion/ravion" + source = "providers.siddharthsuresh.dev/ravion/ravion" version = ">= 0.1.0" } } From 3a4f1f77c72337d43e865d140b6d72c80d2a8464 Mon Sep 17 00:00:00 2001 From: Siddharth Suresh Date: Thu, 28 May 2026 17:46:50 +0530 Subject: [PATCH 13/26] Revert "revert: keep ravion provider source on providers.siddharthsuresh.dev" This reverts commit 638ab1408bb1f158c7a9283adc82c59d3f6e1a5c. --- compute/ecs_cluster/versions.tf | 2 +- compute/ecs_service/versions.tf | 2 +- hosting/static_site/versions.tf | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/compute/ecs_cluster/versions.tf b/compute/ecs_cluster/versions.tf index 9b7904b..bed1016 100644 --- a/compute/ecs_cluster/versions.tf +++ b/compute/ecs_cluster/versions.tf @@ -15,7 +15,7 @@ terraform { # Ravion domains provider — only exercised when # var.use_ravion_managed_domains = true (see ravion_domains.tf). ravion = { - source = "providers.siddharthsuresh.dev/ravion/ravion" + source = "provider-cf.siddharthsuresh.dev/ravion/ravion" version = ">= 0.1.0" } } diff --git a/compute/ecs_service/versions.tf b/compute/ecs_service/versions.tf index 4fa4334..fb15899 100644 --- a/compute/ecs_service/versions.tf +++ b/compute/ecs_service/versions.tf @@ -13,7 +13,7 @@ terraform { version = ">= 6.0" } ravion = { - source = "providers.siddharthsuresh.dev/ravion/ravion" + source = "provider-cf.siddharthsuresh.dev/ravion/ravion" version = ">= 0.1.0" } } diff --git a/hosting/static_site/versions.tf b/hosting/static_site/versions.tf index 9773ab3..8d30acf 100644 --- a/hosting/static_site/versions.tf +++ b/hosting/static_site/versions.tf @@ -9,7 +9,7 @@ terraform { version = ">= 6.0" } ravion = { - source = "providers.siddharthsuresh.dev/ravion/ravion" + source = "provider-cf.siddharthsuresh.dev/ravion/ravion" version = ">= 0.1.0" } } From 21cf2d5c22d23472e3a2c5ecb49b3ba1551bb30b Mon Sep 17 00:00:00 2001 From: Siddharth Suresh Date: Thu, 28 May 2026 21:34:27 +0530 Subject: [PATCH 14/26] Revert "feat(static_site): add routing="raw" for object sites" This reverts commit 291cac29cfc5a31d30746378e319714535eac7be. --- hosting/static_site/functions/rewrite.js | 18 +----------------- hosting/static_site/variables.tf | 6 +++--- 2 files changed, 4 insertions(+), 20 deletions(-) diff --git a/hosting/static_site/functions/rewrite.js b/hosting/static_site/functions/rewrite.js index ac25213..9943901 100644 --- a/hosting/static_site/functions/rewrite.js +++ b/hosting/static_site/functions/rewrite.js @@ -26,15 +26,6 @@ // /.well-known/foo // -> //.well-known/foo (dotted seg) // /foo[/] -> //foo/ (clean URLs) -// raw routing: -// / -> // (pass through verbatim) -// /foo -> //foo -// /foo.js -> //foo.js -// /v1/providers///versions -// -> //v1/providers///versions -// Use raw for object sites serving extensionless paths literally (e.g. a -// terraform provider registry where the URL is the S3 key). KVS-based -// versioning still applies — only the /index.html appending is skipped. // // Tokens substituted at apply time via templatefile(): // ${kvs_id}, ${default_version}, ${index_document}, ${routing} @@ -78,14 +69,7 @@ async function handler(event) { // canonical case. var hasDottedSegment = uri.indexOf('/.') >= 0; - if (ROUTING === 'raw') { - // 1:1 URI → // with no /index.html appending. Object sites - // (a terraform provider registry is the motivating case) MUST serve - // /v1/providers///versions as the literal file at that - // key, not as /<...>/versions/index.html. KVS-driven version - // selection still happens through the // prefix. - request.uri = '/' + version + uri; - } else if (uri === '/') { + if (uri === '/') { request.uri = '/' + version + '/' + INDEX_DOCUMENT; } else if (hasExtension || hasDottedSegment) { request.uri = '/' + version + uri; diff --git a/hosting/static_site/variables.tf b/hosting/static_site/variables.tf index 9760b17..59c2766 100644 --- a/hosting/static_site/variables.tf +++ b/hosting/static_site/variables.tf @@ -25,12 +25,12 @@ variable "tags" { variable "routing" { type = string - description = "URI rewriting style applied at the edge before the version prefix is added. 'spa' rewrites every non-asset path to //index.html so a client-side router takes over. 'filesystem' rewrites /foo and /foo/ to //foo/index.html and serves /foo.js etc. as-is. 'raw' is a 1:1 viewer-URI -> // mapping with NO /index.html appending — use it for object sites that serve specific extensionless paths verbatim (terraform provider registries, S3-like content APIs). All three styles are versioned identically through KVS." + description = "URI rewriting style applied at the edge before the version prefix is added. 'spa' rewrites every non-asset path to //index.html so a client-side router takes over. 'filesystem' rewrites /foo and /foo/ to //foo/index.html and serves /foo.js etc. as-is. Both styles are versioned identically." default = "spa" validation { - condition = contains(["spa", "filesystem", "raw"], var.routing) - error_message = "The routing must be 'spa', 'filesystem', or 'raw'." + condition = contains(["spa", "filesystem"], var.routing) + error_message = "The routing must be 'spa' or 'filesystem'." } } From c315c8f63c398580536f46e50f0b59e063e3d7f5 Mon Sep 17 00:00:00 2001 From: Siddharth Suresh Date: Fri, 29 May 2026 13:39:20 +0530 Subject: [PATCH 15/26] feat(domains): unify cluster ALB HTTPS listener + surface managed-domains state ecs_cluster always owns the public/private HTTPS listener at a stable TF address so toggling use_ravion_managed_domains is an in-place certificate swap, not a destroy+create across two addresses. Only the default cert (Ravion wildcard vs the customer's first ARN), the SNI cert set, and ravion_certificate.cluster change on toggle. - alb submodule: additive force_http_to_https_redirect keeps the HTTP->HTTPS redirect when a parent owns port 443; redirect deduped into locals - new ravion_managed_domains_enabled output for service-level show/hide - moved blocks for the in-root renames + submodule->root migration - focused tests/listeners.tftest.hcl (8/8 pass) Co-Authored-By: Claude Opus 4.8 (1M context) --- compute/ecs_cluster/listeners.tf | 204 ++++++++++++++ compute/ecs_cluster/load_balancers.tf | 36 ++- compute/ecs_cluster/outputs.tf | 17 +- compute/ecs_cluster/ravion_domains.tf | 106 +------ .../ecs_cluster/tests/listeners.tftest.hcl | 263 ++++++++++++++++++ networking/alb/README.md | 1 + networking/alb/listeners.tf | 9 +- networking/alb/locals.tf | 6 + networking/alb/variables.tf | 6 + 9 files changed, 524 insertions(+), 124 deletions(-) create mode 100644 compute/ecs_cluster/listeners.tf create mode 100644 compute/ecs_cluster/tests/listeners.tftest.hcl diff --git a/compute/ecs_cluster/listeners.tf b/compute/ecs_cluster/listeners.tf new file mode 100644 index 0000000..d61fcde --- /dev/null +++ b/compute/ecs_cluster/listeners.tf @@ -0,0 +1,204 @@ +################################################################################ +# Cluster ALB HTTPS listeners +################################################################################ +# ecs_cluster ALWAYS owns the cluster ALB HTTPS listener(s) (the alb submodule +# never creates them — see load_balancers.tf) so that toggling +# var.use_ravion_managed_domains is an IN-PLACE certificate swap on a stable TF +# address rather than a destroy+create across two addresses. Only the default +# certificate SOURCE changes by mode: +# +# - use_ravion_managed_domains = true -> the Ravion wildcard cert +# (ravion_certificate.cluster, see ravion_domains.tf) is the default cert on +# BOTH listeners; public/private services nest their auto-FQDNs under it. +# - use_ravion_managed_domains = false -> the listener uses the customer's +# first public/private_alb_certificate_arns entry as default and attaches +# the rest for SNI. +# +# The listeners live here (not in the alb submodule) to avoid a DAG cycle: +# aws_lb.this -> ravion_certificate.cluster -> aws_lb_listener.public_https +# (uses the cert). ravion_certificate with role=shared_wildcard blocks until +# ISSUED, so cert_arn is valid at listener create time. + +# Public ALB HTTPS listener. Mode-independent address: created whenever the +# public ALB has HTTPS enabled. +resource "aws_lb_listener" "public_https" { + count = var.enable_public_alb && var.public_alb_enable_https ? 1 : 0 + + load_balancer_arn = module.public_alb[0].alb_arn + port = 443 + protocol = "HTTPS" + ssl_policy = var.public_alb_ssl_policy + # try(...) defers to the precondition below for the clean error when BYO mode + # has no cert ARN, instead of a cryptic index-out-of-range. + certificate_arn = local.enable_ravion_domain ? ravion_certificate.cluster[0].cert_arn : try(var.public_alb_certificate_arns[0], null) + + default_action { + type = "fixed-response" + fixed_response { + content_type = "text/plain" + message_body = "Not found" + status_code = "404" + } + } + + lifecycle { + precondition { + condition = local.enable_ravion_domain || length(var.public_alb_certificate_arns) >= 1 + error_message = "public_alb_certificate_arns must include at least one ACM certificate ARN when public_alb_enable_https = true and use_ravion_managed_domains = false." + } + } + + tags = merge(local.tags, { Name = "${var.name}-pub-https" }) +} + +# Customer SNI certs for the public listener (BYO mode only; the Ravion wildcard +# needs no extra SNI certs). Gated on the listener existing so the slice is +# never evaluated when HTTPS / the ALB is off (mirrors the alb submodule's +# `additional` idiom and avoids slice([], 1, 0) on the default config). +resource "aws_lb_listener_certificate" "public_sni" { + # length > 1 keeps slice() self-safe (never slice([], 1, 0)) independent of the + # listener precondition: only the 2nd+ ARNs become SNI certs. + for_each = (var.enable_public_alb && var.public_alb_enable_https && !local.enable_ravion_domain && length(var.public_alb_certificate_arns) > 1) ? toset(slice(var.public_alb_certificate_arns, 1, length(var.public_alb_certificate_arns))) : toset([]) + + listener_arn = aws_lb_listener.public_https[0].arn + certificate_arn = each.value +} + +# Private ALB HTTPS listener (same Ravion wildcard cert as the public one in +# managed mode; the customer's first private cert ARN otherwise). +resource "aws_lb_listener" "private_https" { + count = var.enable_private_alb && var.private_alb_enable_https ? 1 : 0 + + load_balancer_arn = module.private_alb[0].alb_arn + port = 443 + protocol = "HTTPS" + ssl_policy = var.private_alb_ssl_policy + certificate_arn = local.enable_ravion_domain ? ravion_certificate.cluster[0].cert_arn : try(var.private_alb_certificate_arns[0], null) + + default_action { + type = "fixed-response" + fixed_response { + content_type = "text/plain" + message_body = "Not found" + status_code = "404" + } + } + + lifecycle { + precondition { + condition = local.enable_ravion_domain || length(var.private_alb_certificate_arns) >= 1 + error_message = "private_alb_certificate_arns must include at least one ACM certificate ARN when private_alb_enable_https = true and use_ravion_managed_domains = false." + } + } + + tags = merge(local.tags, { Name = "${var.name}-priv-https" }) +} + +# Customer SNI certs for the private listener (BYO mode only). +resource "aws_lb_listener_certificate" "private_sni" { + for_each = (var.enable_private_alb && var.private_alb_enable_https && !local.enable_ravion_domain && length(var.private_alb_certificate_arns) > 1) ? toset(slice(var.private_alb_certificate_arns, 1, length(var.private_alb_certificate_arns))) : toset([]) + + listener_arn = aws_lb_listener.private_https[0].arn + certificate_arn = each.value +} + +################################################################################ +# 443 ingress +################################################################################ +# The alb submodule only opens 443 when it owns the HTTPS listener; it no longer +# does, so ecs_cluster opens 443 here in BOTH modes (mirrors the submodule's +# rules). Mode-independent so toggling use_ravion_managed_domains never churns +# the SG rules. +resource "aws_vpc_security_group_ingress_rule" "public_https_ipv4" { + for_each = var.enable_public_alb && var.public_alb_enable_https ? toset(var.public_alb_ingress_cidr_blocks) : toset([]) + + security_group_id = module.public_alb[0].security_group_id + description = "Allow HTTPS from ${each.value}" + cidr_ipv4 = each.value + from_port = 443 + to_port = 443 + ip_protocol = "tcp" + tags = local.tags +} + +resource "aws_vpc_security_group_ingress_rule" "public_https_ipv6" { + for_each = var.enable_public_alb && var.public_alb_enable_https ? toset(["::/0"]) : toset([]) + + security_group_id = module.public_alb[0].security_group_id + description = "Allow HTTPS from ${each.value}" + cidr_ipv6 = each.value + from_port = 443 + to_port = 443 + ip_protocol = "tcp" + tags = local.tags +} + +# Private ALB 443 ingress (mirrors the public rules for the private listener). +resource "aws_vpc_security_group_ingress_rule" "private_https_ipv4" { + for_each = var.enable_private_alb && var.private_alb_enable_https ? toset(var.private_alb_ingress_cidr_blocks) : toset([]) + + security_group_id = module.private_alb[0].security_group_id + description = "Allow HTTPS from ${each.value}" + cidr_ipv4 = each.value + from_port = 443 + to_port = 443 + ip_protocol = "tcp" + tags = local.tags +} + +resource "aws_vpc_security_group_ingress_rule" "private_https_ipv6" { + for_each = var.enable_private_alb && var.private_alb_enable_https ? toset(["::/0"]) : toset([]) + + security_group_id = module.private_alb[0].security_group_id + description = "Allow HTTPS from ${each.value}" + cidr_ipv6 = each.value + from_port = 443 + to_port = 443 + ip_protocol = "tcp" + tags = local.tags +} + +################################################################################ +# State moves +################################################################################ +# Renames within ecs_cluster (clusters already in Ravion mode keep their state): +moved { + from = aws_lb_listener.ravion_https + to = aws_lb_listener.public_https +} + +moved { + from = aws_lb_listener.ravion_https_private + to = aws_lb_listener.private_https +} + +moved { + from = aws_vpc_security_group_ingress_rule.ravion_https_ipv4 + to = aws_vpc_security_group_ingress_rule.public_https_ipv4 +} + +moved { + from = aws_vpc_security_group_ingress_rule.ravion_https_ipv6 + to = aws_vpc_security_group_ingress_rule.public_https_ipv6 +} + +moved { + from = aws_vpc_security_group_ingress_rule.ravion_https_private_ipv4 + to = aws_vpc_security_group_ingress_rule.private_https_ipv4 +} + +# BYO clusters with existing state had their HTTPS listener inside the alb +# submodule; refactoring it out to the root is expressed with a cross-module +# moved block (supported "refactor out of a module" pattern). The submodule's +# 443 SG ingress rules came from a for_each in the security-groups module and +# cannot be moved this way — those are a one-time destroy+create on the BYO +# migration (acceptable: nothing is in prod yet). +moved { + from = module.public_alb[0].aws_lb_listener.https[0] + to = aws_lb_listener.public_https[0] +} + +moved { + from = module.private_alb[0].aws_lb_listener.https[0] + to = aws_lb_listener.private_https[0] +} diff --git a/compute/ecs_cluster/load_balancers.tf b/compute/ecs_cluster/load_balancers.tf index 7d22138..17145a6 100644 --- a/compute/ecs_cluster/load_balancers.tf +++ b/compute/ecs_cluster/load_balancers.tf @@ -14,15 +14,19 @@ module "public_alb" { subnet_ids = var.public_subnet_ids internal = false - # Listener configuration. In Ravion-managed mode this module owns the HTTPS - # listener (ravion_domains.tf) with the Ravion wildcard cert as default, so - # the alb submodule skips its own HTTPS listener + customer cert ARNs. - enable_http_listener = true - enable_https_listener = var.public_alb_enable_https && !local.enable_ravion_domain - http_to_https_redirect = var.public_alb_enable_https + # Listener configuration. ecs_cluster ALWAYS owns the HTTPS listener (in + # ravion_domains.tf) so toggling use_ravion_managed_domains is an in-place + # cert swap rather than a destroy+create across TF addresses. The alb + # submodule therefore never creates its own HTTPS listener nor holds cert + # ARNs; force_http_to_https_redirect keeps the HTTP listener redirecting to + # the parent-owned 443 listener. + enable_http_listener = true + enable_https_listener = false + http_to_https_redirect = var.public_alb_enable_https + force_http_to_https_redirect = var.public_alb_enable_https # SSL/TLS - certificate_arns = local.enable_ravion_domain ? [] : var.public_alb_certificate_arns + certificate_arns = [] ssl_policy = var.public_alb_ssl_policy # ALB settings @@ -56,15 +60,19 @@ module "private_alb" { subnet_ids = var.private_subnet_ids internal = true - # Listener configuration. In Ravion-managed mode this module owns the HTTPS - # listener (ravion_domains.tf) with the Ravion wildcard cert as default, so - # the alb submodule skips its own HTTPS listener + customer cert ARNs. - enable_http_listener = true - enable_https_listener = var.private_alb_enable_https && !local.enable_ravion_domain - http_to_https_redirect = var.private_alb_enable_https + # Listener configuration. ecs_cluster ALWAYS owns the HTTPS listener (in + # ravion_domains.tf) so toggling use_ravion_managed_domains is an in-place + # cert swap rather than a destroy+create across TF addresses. The alb + # submodule therefore never creates its own HTTPS listener nor holds cert + # ARNs; force_http_to_https_redirect keeps the HTTP listener redirecting to + # the parent-owned 443 listener. + enable_http_listener = true + enable_https_listener = false + http_to_https_redirect = var.private_alb_enable_https + force_http_to_https_redirect = var.private_alb_enable_https # SSL/TLS - certificate_arns = local.enable_ravion_domain ? [] : var.private_alb_certificate_arns + certificate_arns = [] ssl_policy = var.private_alb_ssl_policy # ALB settings diff --git a/compute/ecs_cluster/outputs.tf b/compute/ecs_cluster/outputs.tf index 317c46d..d639303 100644 --- a/compute/ecs_cluster/outputs.tf +++ b/compute/ecs_cluster/outputs.tf @@ -120,10 +120,8 @@ output "public_alb_http_listener_arn" { } output "public_alb_https_listener_arn" { - description = "The ARN of the public ALB HTTPS listener (Ravion-owned when use_ravion_managed_domains; null if HTTPS disabled)." - value = (var.enable_public_alb && var.public_alb_enable_https) ? ( - length(aws_lb_listener.ravion_https) > 0 ? aws_lb_listener.ravion_https[0].arn : module.public_alb[0].https_listener_arn - ) : null + description = "The ARN of the public ALB HTTPS listener (ecs_cluster-owned; null if HTTPS disabled)." + value = (var.enable_public_alb && var.public_alb_enable_https) ? aws_lb_listener.public_https[0].arn : null } ################################################################################ @@ -166,10 +164,8 @@ output "private_alb_http_listener_arn" { } output "private_alb_https_listener_arn" { - description = "The ARN of the private ALB HTTPS listener (Ravion-owned when use_ravion_managed_domains; null if HTTPS disabled)." - value = (var.enable_private_alb && var.private_alb_enable_https) ? ( - length(aws_lb_listener.ravion_https_private) > 0 ? aws_lb_listener.ravion_https_private[0].arn : module.private_alb[0].https_listener_arn - ) : null + description = "The ARN of the private ALB HTTPS listener (ecs_cluster-owned; null if HTTPS disabled)." + value = (var.enable_private_alb && var.private_alb_enable_https) ? aws_lb_listener.private_https[0].arn : null } ################################################################################ @@ -273,3 +269,8 @@ output "ravion_aws_region" { description = "Pass-through Ravion cert region for ecs_service Mode B." value = local.enable_ravion_domain ? coalesce(var.ravion_aws_region, local.region) : null } + +output "ravion_managed_domains_enabled" { + description = "True when the cluster owns a Ravion wildcard cert + HTTPS listener (use_ravion_managed_domains AND at least one ALB). Services read this to show/hide managed-domain fields." + value = local.enable_ravion_domain +} diff --git a/compute/ecs_cluster/ravion_domains.tf b/compute/ecs_cluster/ravion_domains.tf index 320309d..5f7e7e6 100644 --- a/compute/ecs_cluster/ravion_domains.tf +++ b/compute/ecs_cluster/ravion_domains.tf @@ -2,23 +2,16 @@ # Ravion-managed cluster domain (opt-in) ################################################################################ # When var.use_ravion_managed_domains = true, Ravion issues ONE wildcard cert -# `*.-.` (+ apex) and this module owns the cluster ALB -# HTTPS listener(s) with that cert as the default. The same cert backs BOTH the -# public and the private ALB listener (an ACM cert ARN can default many -# listeners), so public AND private services nest their domains under the one -# wildcard. Services pass the matching outputs to ecs_service: -# - public service -> public_alb_https_listener_arn + public_alb_dns_name/zone -# - private service -> private_alb_https_listener_arn + private_alb_dns_name/zone -# -# The listeners live here (not in the alb submodule) to avoid a DAG cycle: -# aws_lb.this -> ravion_certificate.cluster -> aws_lb_listener.ravion_https* -# (uses the cert). ravion_certificate with role=shared_wildcard blocks until -# ISSUED, so cert_arn is valid at listener create time. +# `*.-.` (+ apex). That cert becomes the default cert +# on the cluster ALB HTTPS listener(s) (see listeners.tf) — a single ACM cert +# can default both the public and the private listener, so public AND private +# services nest their domains under the one wildcard. The cert also publishes a +# `*.` ALIAS to the cluster ALB so service auto-FQDNs (.) +# resolve. When the flag is off, this resource is absent and the listeners fall +# back to the customer-supplied certificate ARNs. locals { - enable_ravion_domain = var.use_ravion_managed_domains && (var.enable_public_alb || var.enable_private_alb) - enable_ravion_public_listener = local.enable_ravion_domain && var.enable_public_alb && var.public_alb_enable_https - enable_ravion_private_listener = local.enable_ravion_domain && var.enable_private_alb && var.private_alb_enable_https + enable_ravion_domain = var.use_ravion_managed_domains && (var.enable_public_alb || var.enable_private_alb) } resource "ravion_certificate" "cluster" { @@ -48,86 +41,3 @@ resource "ravion_certificate" "cluster" { } } } - -# Public ALB Ravion listener. -resource "aws_lb_listener" "ravion_https" { - count = local.enable_ravion_public_listener ? 1 : 0 - - load_balancer_arn = module.public_alb[0].alb_arn - port = 443 - protocol = "HTTPS" - ssl_policy = var.public_alb_ssl_policy - certificate_arn = ravion_certificate.cluster[0].cert_arn - - default_action { - type = "fixed-response" - fixed_response { - content_type = "text/plain" - message_body = "Not found" - status_code = "404" - } - } - - tags = merge(var.tags, { Name = "${var.name}-pub-https" }) -} - -# Private ALB Ravion listener (same wildcard cert as the public one). -resource "aws_lb_listener" "ravion_https_private" { - count = local.enable_ravion_private_listener ? 1 : 0 - - load_balancer_arn = module.private_alb[0].alb_arn - port = 443 - protocol = "HTTPS" - ssl_policy = var.private_alb_ssl_policy - certificate_arn = ravion_certificate.cluster[0].cert_arn - - default_action { - type = "fixed-response" - fixed_response { - content_type = "text/plain" - message_body = "Not found" - status_code = "404" - } - } - - tags = merge(var.tags, { Name = "${var.name}-priv-https" }) -} - -# The alb submodule only opens port 443 when it owns the HTTPS listener; in -# Ravion mode it does not, so open it here (mirrors the submodule's rules). -resource "aws_vpc_security_group_ingress_rule" "ravion_https_ipv4" { - for_each = local.enable_ravion_public_listener ? toset(var.public_alb_ingress_cidr_blocks) : toset([]) - - security_group_id = module.public_alb[0].security_group_id - description = "Allow HTTPS from ${each.value} (Ravion-owned listener)" - cidr_ipv4 = each.value - from_port = 443 - to_port = 443 - ip_protocol = "tcp" - tags = var.tags -} - -resource "aws_vpc_security_group_ingress_rule" "ravion_https_ipv6" { - for_each = local.enable_ravion_public_listener ? toset(["::/0"]) : toset([]) - - security_group_id = module.public_alb[0].security_group_id - description = "Allow HTTPS from ${each.value} (Ravion-owned listener)" - cidr_ipv6 = each.value - from_port = 443 - to_port = 443 - ip_protocol = "tcp" - tags = var.tags -} - -# Private ALB 443 ingress (mirrors the public rules for the private listener). -resource "aws_vpc_security_group_ingress_rule" "ravion_https_private_ipv4" { - for_each = local.enable_ravion_private_listener ? toset(var.private_alb_ingress_cidr_blocks) : toset([]) - - security_group_id = module.private_alb[0].security_group_id - description = "Allow HTTPS from ${each.value} (Ravion-owned listener)" - cidr_ipv4 = each.value - from_port = 443 - to_port = 443 - ip_protocol = "tcp" - tags = var.tags -} diff --git a/compute/ecs_cluster/tests/listeners.tftest.hcl b/compute/ecs_cluster/tests/listeners.tftest.hcl new file mode 100644 index 0000000..d37f322 --- /dev/null +++ b/compute/ecs_cluster/tests/listeners.tftest.hcl @@ -0,0 +1,263 @@ +# Cluster ALB HTTPS listener tests (sprint #1: unify the HTTPS listener so +# toggling use_ravion_managed_domains is an in-place cert swap, not a +# destroy+create). Self-contained: mocks both providers, EC2 disabled +# throughout so these runs are independent of the EC2 capacity-provider tests. +# Run with: tofu test + +# Valid ARNs are required: aws_lb_listener validates load_balancer_arn / +# certificate_arn at plan, and the auto-fabricated mock values aren't ARNs. +mock_provider "aws" { + override_resource { + target = module.public_alb.aws_lb.this + values = { + arn = "arn:aws:elasticloadbalancing:us-east-1:123456789012:loadbalancer/app/test-public-alb/1234567890123456" + arn_suffix = "app/test-public-alb/1234567890123456" + dns_name = "test-public-alb-123456789.us-east-1.elb.amazonaws.com" + zone_id = "Z35SXDOTRQ7X7K" + } + } + + override_resource { + target = module.public_alb.aws_security_group.this + values = { + arn = "arn:aws:ec2:us-east-1:123456789012:security-group/sg-publicalb123456" + id = "sg-publicalb123456" + } + } + + override_resource { + target = aws_lb_listener.public_https + values = { + arn = "arn:aws:elasticloadbalancing:us-east-1:123456789012:listener/app/test-public-alb/1234567890123456/6543210987654321" + } + } + + override_resource { + target = module.private_alb.aws_lb.this + values = { + arn = "arn:aws:elasticloadbalancing:us-east-1:123456789012:loadbalancer/app/test-private-alb/1234567890123457" + arn_suffix = "app/test-private-alb/1234567890123457" + dns_name = "test-private-alb-123456789.us-east-1.elb.amazonaws.com" + zone_id = "Z35SXDOTRQ7X7K" + } + } + + override_resource { + target = module.private_alb.aws_security_group.this + values = { + arn = "arn:aws:ec2:us-east-1:123456789012:security-group/sg-privatealb123456" + id = "sg-privatealb123456" + } + } + + override_resource { + target = aws_lb_listener.private_https + values = { + arn = "arn:aws:elasticloadbalancing:us-east-1:123456789012:listener/app/test-private-alb/1234567890123457/6543210987654322" + } + } +} + +# ravion_certificate needs a DomainProvider JWT to configure against the real +# control plane; mock it so tests are hermetic. The cert_arn override is a valid +# ACM ARN so the listener's certificate_arn passes provider validation. +mock_provider "ravion" { + override_resource { + target = ravion_certificate.cluster + values = { + id = "cert_test" + cert_arn = "arn:aws:acm:us-east-1:123456789012:certificate/99999999-9999-9999-9999-999999999999" + fqdn = "*.test-cluster-abcd.ravion.app" + status = "ISSUED" + } + } +} + +variables { + name = "test-cluster" + vpc_id = "vpc-12345678" + private_subnet_ids = ["subnet-private1", "subnet-private2"] + public_subnet_ids = ["subnet-public1", "subnet-public2"] +} + +################################################################################ +# BYO certificate mode (use_ravion_managed_domains = false, the default) +################################################################################ + +# A single cert ARN: ecs_cluster owns the HTTPS listener at a stable root +# address, the alb submodule owns none, and there are no SNI certs. +run "byo_public_https_single_cert" { + command = plan + + variables { + enable_public_alb = true + public_alb_enable_https = true + public_alb_certificate_arns = ["arn:aws:acm:us-east-1:111122223333:certificate/aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa"] + } + + assert { + condition = length(aws_lb_listener.public_https) == 1 + error_message = "ecs_cluster must own the public HTTPS listener" + } + + assert { + condition = module.public_alb[0].https_listener_arn == null + error_message = "The alb submodule must NOT own the HTTPS listener (its https_listener_arn output is null)" + } + + assert { + condition = aws_lb_listener.public_https[0].certificate_arn == "arn:aws:acm:us-east-1:111122223333:certificate/aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa" + error_message = "BYO mode must use the customer's first cert ARN as the default cert" + } + + assert { + condition = length(aws_lb_listener_certificate.public_sni) == 0 + error_message = "A single cert ARN yields no SNI certificates" + } +} + +# Multiple cert ARNs: the 2nd+ are attached for SNI. +run "byo_public_https_sni" { + command = plan + + variables { + enable_public_alb = true + public_alb_enable_https = true + public_alb_certificate_arns = [ + "arn:aws:acm:us-east-1:111122223333:certificate/11111111-1111-1111-1111-111111111111", + "arn:aws:acm:us-east-1:111122223333:certificate/22222222-2222-2222-2222-222222222222", + "arn:aws:acm:us-east-1:111122223333:certificate/33333333-3333-3333-3333-333333333333", + ] + } + + assert { + condition = length(aws_lb_listener_certificate.public_sni) == 2 + error_message = "The 2nd+ cert ARNs must be attached as SNI certificates" + } +} + +# HTTPS enabled in BYO mode with no cert ARN must fail the precondition (the +# clean error, not a cryptic index-out-of-range). +run "byo_public_https_requires_cert" { + command = plan + + variables { + enable_public_alb = true + public_alb_enable_https = true + # no public_alb_certificate_arns; use_ravion_managed_domains defaults false + } + + expect_failures = [aws_lb_listener.public_https] +} + +# Private ALB BYO parity. +run "byo_private_https_single_cert" { + command = plan + + variables { + enable_private_alb = true + private_alb_enable_https = true + private_alb_certificate_arns = ["arn:aws:acm:us-east-1:111122223333:certificate/bbbbbbbb-bbbb-bbbb-bbbb-bbbbbbbbbbbb"] + } + + assert { + condition = length(aws_lb_listener.private_https) == 1 + error_message = "ecs_cluster must own the private HTTPS listener" + } + + assert { + condition = module.private_alb[0].https_listener_arn == null + error_message = "The alb submodule must NOT own the private HTTPS listener (its https_listener_arn output is null)" + } +} + +# Private multi-cert: the 2nd+ are attached for SNI. +run "byo_private_https_sni" { + command = plan + + variables { + enable_private_alb = true + private_alb_enable_https = true + private_alb_certificate_arns = [ + "arn:aws:acm:us-east-1:111122223333:certificate/44444444-4444-4444-4444-444444444444", + "arn:aws:acm:us-east-1:111122223333:certificate/55555555-5555-5555-5555-555555555555", + ] + } + + assert { + condition = length(aws_lb_listener_certificate.private_sni) == 1 + error_message = "The 2nd+ private cert ARNs must be attached as SNI certificates" + } +} + +# Private HTTPS in BYO mode with no cert ARN must fail the precondition. +run "byo_private_https_requires_cert" { + command = plan + + variables { + enable_private_alb = true + private_alb_enable_https = true + # no private_alb_certificate_arns; use_ravion_managed_domains defaults false + } + + expect_failures = [aws_lb_listener.private_https] +} + +################################################################################ +# Ravion-managed mode (use_ravion_managed_domains = true) +################################################################################ + +# The wildcard cert is issued and becomes the listener default; no SNI certs, +# no customer cert ARN required. Same listener address as BYO mode (the +# toggle is an in-place cert swap, not a destroy+create). +run "ravion_managed_public_https" { + command = plan + + variables { + enable_public_alb = true + public_alb_enable_https = true + use_ravion_managed_domains = true + ravion_aws_account_id = "aws_testaccount" + } + + assert { + condition = length(ravion_certificate.cluster) == 1 + error_message = "Ravion wildcard cert must be created in managed mode" + } + + assert { + condition = length(aws_lb_listener.public_https) == 1 + error_message = "The HTTPS listener exists at the same address in managed mode" + } + + assert { + condition = length(aws_lb_listener_certificate.public_sni) == 0 + error_message = "Managed mode attaches no customer SNI certs" + } +} + +################################################################################ +# HTTP-only: no HTTPS listener, and the SNI slice is never evaluated +################################################################################ + +# Regression guard for the slice([], 1, 0) crash: with HTTPS off and no certs, +# the SNI for_each must short-circuit to an empty set rather than evaluating +# the slice. +run "public_alb_http_only_no_sni_eval" { + command = plan + + variables { + enable_public_alb = true + public_alb_enable_https = false + } + + assert { + condition = length(aws_lb_listener.public_https) == 0 + error_message = "No HTTPS listener when public_alb_enable_https = false" + } + + assert { + condition = length(aws_lb_listener_certificate.public_sni) == 0 + error_message = "SNI set must be empty (slice not evaluated) when HTTPS is off" + } +} diff --git a/networking/alb/README.md b/networking/alb/README.md index 822819b..f7032bf 100644 --- a/networking/alb/README.md +++ b/networking/alb/README.md @@ -269,6 +269,7 @@ spec: | http_listener_port | The port for the HTTP listener | `number` | `80` | no | | https_listener_port | The port for the HTTPS listener | `number` | `443` | no | | http_to_https_redirect | Redirect HTTP traffic to HTTPS (when both listeners enabled) | `bool` | `true` | no | +| force_http_to_https_redirect | Redirect HTTP->HTTPS even when this module does not own the HTTPS listener (used when a parent module owns port 443) | `bool` | `false` | no | ### SSL/TLS diff --git a/networking/alb/listeners.tf b/networking/alb/listeners.tf index cf0f931..58ec031 100644 --- a/networking/alb/listeners.tf +++ b/networking/alb/listeners.tf @@ -9,10 +9,11 @@ resource "aws_lb_listener" "http" { port = var.http_listener_port protocol = "HTTP" - # If HTTPS is enabled and redirect is enabled, redirect to HTTPS - # Otherwise, return a fixed response + # Redirect to HTTPS when local.redirect_http_to_https (this module owns the + # HTTPS listener, or a parent owns 443 via force_http_to_https_redirect); + # otherwise return a fixed response. dynamic "default_action" { - for_each = var.http_to_https_redirect && local.create_https_listener ? [1] : [] + for_each = local.redirect_http_to_https ? [1] : [] content { type = "redirect" redirect { @@ -24,7 +25,7 @@ resource "aws_lb_listener" "http" { } dynamic "default_action" { - for_each = !var.http_to_https_redirect || !local.create_https_listener ? [1] : [] + for_each = local.redirect_http_to_https ? [] : [1] content { type = "fixed-response" fixed_response { diff --git a/networking/alb/locals.tf b/networking/alb/locals.tf index bb3721f..f797ad1 100644 --- a/networking/alb/locals.tf +++ b/networking/alb/locals.tf @@ -23,6 +23,12 @@ locals { # Listener configuration create_http_listener = var.enable_http_listener create_https_listener = var.enable_https_listener + + # The HTTP listener redirects to HTTPS when redirect is requested and either + # this module owns the HTTPS listener OR a parent module owns 443 + # (force_http_to_https_redirect). Otherwise the HTTP listener returns the + # fixed response. + redirect_http_to_https = var.http_to_https_redirect && (local.create_https_listener || var.force_http_to_https_redirect) } diff --git a/networking/alb/variables.tf b/networking/alb/variables.tf index f3118fd..cc1b59f 100644 --- a/networking/alb/variables.tf +++ b/networking/alb/variables.tf @@ -164,6 +164,12 @@ variable "http_to_https_redirect" { default = true } +variable "force_http_to_https_redirect" { + type = bool + description = "Redirect HTTP->HTTPS even when this module does not own the HTTPS listener (used when the parent owns it)." + default = false +} + ################################################################################ # SSL/TLS ################################################################################ From 64eb79e709cc44ac628fc52e040f16fdce31e894 Mon Sep 17 00:00:00 2001 From: Siddharth Suresh Date: Fri, 29 May 2026 14:17:09 +0530 Subject: [PATCH 16/26] feat(domains): collapse ecs_service Mode A/B into per-entry apex classification MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Each domains entry is classified per-entry instead of all-or-nothing: . (one label under the cluster apex) rides the cluster wildcard cert via SNI (no per-service cert, no DNS record); everything else gets one per-service instance cert + a customer routing record. Empty list falls back to the auto-FQDN .. Removes the ravion_auto_domain_status retirement flow — the domains list is the single source of truth. Co-Authored-By: Claude Opus 4.8 (1M context) --- compute/ecs_service/outputs.tf | 12 ++-- compute/ecs_service/ravion_domains.tf | 95 ++++++++++++++++----------- compute/ecs_service/variables.tf | 2 +- 3 files changed, 63 insertions(+), 46 deletions(-) diff --git a/compute/ecs_service/outputs.tf b/compute/ecs_service/outputs.tf index 7add7ef..4a699db 100644 --- a/compute/ecs_service/outputs.tf +++ b/compute/ecs_service/outputs.tf @@ -255,16 +255,16 @@ output "region" { ################################################################################ output "ravion_domain_fqdn" { - description = "Auto-FQDN under the cluster wildcard (null in Mode B after cutover)." - value = local.ravion_auto_live ? ravion_domain.auto[0].fqdn : null + description = "Primary FQDN for this service (first entry in the domains list; the auto-FQDN under the cluster wildcard when present). Null when the cluster has no Ravion-managed domains." + value = length(local.effective_domains) > 0 ? local.effective_domains[0] : null } output "ravion_domain_url" { - description = "https URL for the auto-FQDN." - value = local.ravion_auto_live ? ravion_domain.auto[0].url : null + description = "https URL for the primary FQDN." + value = length(local.effective_domains) > 0 ? "https://${local.effective_domains[0]}" : null } output "ravion_custom_cert_arn" { - description = "ACM ARN of the per-service custom cert (Mode B only)." - value = local.ravion_mode_b ? ravion_certificate.svc[0].cert_arn : null + description = "ACM ARN of the per-service instance cert covering the custom (non-wildcard) domains. Null when there are none." + value = length(local.custom_domains) > 0 ? ravion_certificate.svc[0].cert_arn : null } diff --git a/compute/ecs_service/ravion_domains.tf b/compute/ecs_service/ravion_domains.tf index 2f7a5ec..dd19a53 100644 --- a/compute/ecs_service/ravion_domains.tf +++ b/compute/ecs_service/ravion_domains.tf @@ -1,26 +1,48 @@ ################################################################################ -# Ravion-managed service domain (optional) +# Ravion-managed service domains ################################################################################ -# Wired when cluster_parent_fqdn is set (piped from ecs_cluster). +# Wired when cluster_parent_fqdn is set (piped from ecs_cluster). The `domains` +# list is the single source of truth — each entry is classified by whether the +# cluster wildcard cert covers it: # -# Mode A (domains = []): an auto-FQDN . rides the cluster -# wildcard cert — only a listener rule + routing record, no per-service cert. -# Mode B (domains = [...]): a per-service cert (<=10 SANs) attached to the -# cluster listener via Ravion; the auto-FQDN stays until the customs are -# healthy, then ravion_auto_domain_status flips retired -> auto is destroyed. +# - wildcard-covered (., exactly one label under the cluster apex): +# nests under the cluster wildcard cert via SNI. No per-service cert, and no +# per-domain DNS record — the cluster's `*.` ALIAS already routes it. +# - custom (anything else — external FQDNs, or names deeper than one label +# under the apex the wildcard can't cover): covered by ONE per-service +# instance ACM cert (<=10 SANs) attached to the cluster listener, plus a +# routing record the customer adds. +# +# When `domains` is empty the service still gets an auto-FQDN +# `.` (a wildcard-covered entry), so a service with no custom +# domains is reachable out of the box. The frontend pre-fills this same value +# into the domains list as the default; clearing it opts out. locals { - ravion_managed = var.cluster_parent_fqdn != null && var.cluster_parent_fqdn != "" - ravion_mode_b = local.ravion_managed && length(var.domains) > 0 - ravion_retired = local.ravion_mode_b ? try(data.ravion_auto_domain_status.auto[0].retired, false) : false - ravion_auto_live = local.ravion_managed && !local.ravion_retired + ravion_managed = var.cluster_parent_fqdn != null && var.cluster_parent_fqdn != "" + apex = local.ravion_managed ? var.cluster_parent_fqdn : "" - ravion_priority = var.ravion_listener_rule_priority > 0 ? var.ravion_listener_rule_priority : ((parseint(substr(sha256(var.name), 0, 4), 16) % 49000) + 1000) + # Auto-FQDN used when the domains list is empty (matches the frontend default). + auto_fqdn = local.ravion_managed ? "${coalesce(var.module_instance_given_id, var.name)}.${local.apex}" : "" - ravion_host_headers = concat( - [for d in ravion_domain.auto : d.fqdn], - local.ravion_mode_b ? var.domains : [], - ) + # The effective list: the user's domains, or the auto-FQDN when empty. + effective_domains = local.ravion_managed ? (length(var.domains) > 0 ? var.domains : [local.auto_fqdn]) : [] + + # Per-entry classification. wildcard-covered = "." with exactly one + # label below the apex (the only shape the `*.` cert + ALIAS cover). + wildcard_covered = [ + for d in local.effective_domains : d + if endswith(d, ".${local.apex}") && !strcontains(trimsuffix(d, ".${local.apex}"), ".") + ] + custom_domains = [ + for d in local.effective_domains : d + if !(endswith(d, ".${local.apex}") && !strcontains(trimsuffix(d, ".${local.apex}"), ".")) + ] + + # All of this service's hostnames route to its target group via one rule. + ravion_host_headers = local.effective_domains + + ravion_priority = var.ravion_listener_rule_priority > 0 ? var.ravion_listener_rule_priority : ((parseint(substr(sha256(var.name), 0, 4), 16) % 49000) + 1000) ravion_target_group_arn = ( length(aws_lb_target_group.this) > 0 ? aws_lb_target_group.this[0].arn : ( @@ -29,50 +51,45 @@ locals { ) } -data "ravion_auto_domain_status" "auto" { - count = local.ravion_managed ? 1 : 0 - - parent_domain_id = var.cluster_parent_fqdn - name = coalesce(var.module_instance_given_id, var.name) -} - -# Mode A auto-FQDN under the cluster wildcard (no per-service cert). -resource "ravion_domain" "auto" { - count = local.ravion_auto_live ? 1 : 0 +# Wildcard-covered domains (incl. the auto-FQDN): nest under the cluster +# wildcard. No per-service cert; the cluster `*.` ALIAS routes them. +resource "ravion_domain" "wildcard" { + for_each = toset(local.wildcard_covered) - name = coalesce(var.module_instance_given_id, var.name) - parent_fqdn = var.cluster_parent_fqdn + name = trimsuffix(each.value, ".${local.apex}") + parent_fqdn = local.apex } -# Mode B per-service certificate (<=10 SANs), attached to the cluster listener. +# Per-service certificate covering the custom (non-wildcard) domains (<=10 SANs), +# attached to the cluster listener via Ravion. resource "ravion_certificate" "svc" { - count = local.ravion_mode_b ? 1 : 0 + count = length(local.custom_domains) > 0 ? 1 : 0 role = "instance" - domains = var.domains + domains = local.custom_domains aws_account_id = var.ravion_aws_account_id aws_region = coalesce(var.ravion_aws_region, local.region) target_arn = var.cluster_https_listener_arn lifecycle { precondition { - condition = !local.ravion_mode_b || (var.ravion_aws_account_id != null && var.ravion_aws_account_id != "") - error_message = "ravion_aws_account_id is required when domains is non-empty." + condition = length(local.custom_domains) == 0 || (var.ravion_aws_account_id != null && var.ravion_aws_account_id != "") + error_message = "ravion_aws_account_id is required when the domains list includes a custom (non-wildcard) domain." } precondition { - condition = !local.ravion_mode_b || (var.cluster_https_listener_arn != null && var.cluster_https_listener_arn != "") - error_message = "cluster_https_listener_arn is required when domains is non-empty." + condition = length(local.custom_domains) == 0 || (var.cluster_https_listener_arn != null && var.cluster_https_listener_arn != "") + error_message = "cluster_https_listener_arn is required when the domains list includes a custom (non-wildcard) domain." } precondition { - condition = length(var.domains) <= 10 - error_message = "A service may declare at most 10 custom domains (one cert per service)." + condition = length(local.custom_domains) <= 10 + error_message = "A service may declare at most 10 custom (non-wildcard) domains (one cert per service)." } } } -# Mode B routing records the customer must add (one per custom FQDN). +# Routing records the customer must add for each custom domain (one per FQDN). resource "ravion_domain" "custom" { - for_each = local.ravion_mode_b ? toset(var.domains) : toset([]) + for_each = toset(local.custom_domains) name = each.value target_dns_name = var.cluster_alb_dns_name diff --git a/compute/ecs_service/variables.tf b/compute/ecs_service/variables.tf index 7a4daf7..2c6720d 100644 --- a/compute/ecs_service/variables.tf +++ b/compute/ecs_service/variables.tf @@ -624,7 +624,7 @@ variable "ravion_listener_rule_priority" { variable "domains" { type = list(string) - description = "Customer FQDNs (Mode B). Empty = Mode A (auto-FQDN under the cluster wildcard). Max 10." + description = "Service FQDNs. Each entry that is one label under the cluster apex (.) rides the cluster wildcard cert; any other (custom/external) entry is covered by a per-service instance cert (max 10 custom). Empty = an auto-FQDN . under the cluster wildcard." default = [] } From ce3eac120708f6bfcef6c3b875bb3be16051ba16 Mon Sep 17 00:00:00 2001 From: Siddharth Suresh Date: Fri, 29 May 2026 14:19:09 +0530 Subject: [PATCH 17/26] fix(ecs_service): normalize domain entries before apex classification Lowercase + strip trailing dot/whitespace + drop empties, and require a non-empty leaf for the wildcard bucket, so mixed-case / trailing-dot / empty-leaf entries classify correctly and never yield an invalid ALB host header. Co-Authored-By: Claude Opus 4.8 (1M context) --- compute/ecs_service/ravion_domains.tf | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/compute/ecs_service/ravion_domains.tf b/compute/ecs_service/ravion_domains.tf index dd19a53..3ad99f5 100644 --- a/compute/ecs_service/ravion_domains.tf +++ b/compute/ecs_service/ravion_domains.tf @@ -20,23 +20,31 @@ locals { ravion_managed = var.cluster_parent_fqdn != null && var.cluster_parent_fqdn != "" - apex = local.ravion_managed ? var.cluster_parent_fqdn : "" + apex = local.ravion_managed ? lower(var.cluster_parent_fqdn) : "" # Auto-FQDN used when the domains list is empty (matches the frontend default). auto_fqdn = local.ravion_managed ? "${coalesce(var.module_instance_given_id, var.name)}.${local.apex}" : "" - # The effective list: the user's domains, or the auto-FQDN when empty. - effective_domains = local.ravion_managed ? (length(var.domains) > 0 ? var.domains : [local.auto_fqdn]) : [] - - # Per-entry classification. wildcard-covered = "." with exactly one - # label below the apex (the only shape the `*.` cert + ALIAS cover). + # The effective list: the user's domains (or the auto-FQDN when empty), + # normalized — lowercased, trailing dot + surrounding whitespace stripped, + # empties dropped. Keeps classification consistent with DNS case-insensitivity + # and the backend's lowercase sanitizeLabel. + effective_domains = local.ravion_managed ? [ + for d in(length(var.domains) > 0 ? var.domains : [local.auto_fqdn]) : + lower(trimsuffix(trimspace(d), ".")) if trimspace(d) != "" + ] : [] + + # Per-entry classification. wildcard-covered = "." with a non-empty + # single label below the apex (the only shape the `*.` cert + ALIAS + # cover). The non-empty-leaf guard keeps a malformed "." out of the + # wildcard bucket (an empty leaf would produce an invalid ALB host header). wildcard_covered = [ for d in local.effective_domains : d - if endswith(d, ".${local.apex}") && !strcontains(trimsuffix(d, ".${local.apex}"), ".") + if endswith(d, ".${local.apex}") && length(trimsuffix(d, ".${local.apex}")) > 0 && !strcontains(trimsuffix(d, ".${local.apex}"), ".") ] custom_domains = [ for d in local.effective_domains : d - if !(endswith(d, ".${local.apex}") && !strcontains(trimsuffix(d, ".${local.apex}"), ".")) + if !(endswith(d, ".${local.apex}") && length(trimsuffix(d, ".${local.apex}")) > 0 && !strcontains(trimsuffix(d, ".${local.apex}"), ".")) ] # All of this service's hostnames route to its target group via one rule. From 1a4577fe3624c546cbb098bc07c67ccec8e6f877 Mon Sep 17 00:00:00 2001 From: Siddharth Suresh Date: Fri, 29 May 2026 16:51:03 +0530 Subject: [PATCH 18/26] feat(ecs_service): reject in-apex non-single-label domains at plan time Domains under the cluster apex that aren't a single-label . (the bare apex, or names more than one label deep) can't ride the *. wildcard cert and can't be satisfied with a customer record (the record would live in the Ravion-managed zone). Add an invalid_apex_domains local + a lifecycle.precondition on ravion_certificate.svc that fails the plan with the offending entries and the fix, instead of silently mis-routing them into a per-service cert + an unwritable routing record. Pairs with the server-side RejectCustomDomainUnderApex backstop for direct-API callers. Co-Authored-By: Claude Opus 4.8 (1M context) --- compute/ecs_service/ravion_domains.tf | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/compute/ecs_service/ravion_domains.tf b/compute/ecs_service/ravion_domains.tf index 3ad99f5..4bb2df4 100644 --- a/compute/ecs_service/ravion_domains.tf +++ b/compute/ecs_service/ravion_domains.tf @@ -47,6 +47,19 @@ locals { if !(endswith(d, ".${local.apex}") && length(trimsuffix(d, ".${local.apex}")) > 0 && !strcontains(trimsuffix(d, ".${local.apex}"), ".")) ] + # Domains under the cluster apex that are NOT a single-label `.`: + # the bare apex itself, or a name more than one label deep. The `*.` + # wildcard cert covers exactly one label, and the customer cannot add records + # to the Ravion-managed zone, so these can never be satisfied — they fall into + # custom_domains today and would silently emit a per-service cert + an + # unwritable routing record. Fail the plan instead (the server-side + # RejectCustomDomainUnderApex is the same backstop for direct-API callers). + invalid_apex_domains = [ + for d in local.custom_domains : d + if d == local.apex || endswith(d, ".${local.apex}") + ] + invalid_apex_domains_msg = join(", ", local.invalid_apex_domains) + # All of this service's hostnames route to its target group via one rule. ravion_host_headers = local.effective_domains @@ -80,6 +93,10 @@ resource "ravion_certificate" "svc" { target_arn = var.cluster_https_listener_arn lifecycle { + precondition { + condition = length(local.invalid_apex_domains) == 0 + error_message = "Domains under the cluster apex must be a single label that rides the cluster wildcard, like checkout.${local.apex}. These entries are the bare apex or more than one label deep, so the wildcard certificate does not cover them and their routing record would have to live in the Ravion-managed zone (which you cannot edit): ${local.invalid_apex_domains_msg}. Use a single-label name under the apex, or a domain in a DNS zone you control." + } precondition { condition = length(local.custom_domains) == 0 || (var.ravion_aws_account_id != null && var.ravion_aws_account_id != "") error_message = "ravion_aws_account_id is required when the domains list includes a custom (non-wildcard) domain." From 9c97fcdeebeb9f02b63f7da3357609d2344ae584 Mon Sep 17 00:00:00 2001 From: Siddharth Suresh Date: Fri, 29 May 2026 23:58:39 +0530 Subject: [PATCH 19/26] fix(managed-domains): review findings B3/M13/M14/#39/#40 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - B3: mock the ravion provider in the pre-existing ecs_cluster + ecs_service basic.tftest.hcl so the suites stop aborting on "Missing Ravion API key" (declaring the provider configures it even when all ravion resources are count=0 on the BYO path). - M13: split the ravion listener rule's host headers into chunks of <=5 values (AWS ALB's per-rule condition-value quota), each chunk with its own priority. - M14: keep the rolling target group's pre-branch stable name substr(var.name,0,28)+"-tg" instead of name_prefix — avoids the one-time ForceNew that deadlocks against the listener rule's ignore_changes=[action], and stays within ALB's 32-char TG-name limit. - #39: gate the ravion listener rule (and cert/domain) on enable_load_balancer so it can't be created with a null target_group_arn. - #40: widen the auto-derived rule-priority hash entropy to cut collisions on the shared cluster listener. Co-Authored-By: Claude Opus 4.8 (1M context) --- compute/ecs_cluster/tests/basic.tftest.hcl | 7 ++++ compute/ecs_service/ravion_domains.tf | 48 ++++++++++++++++++---- compute/ecs_service/target_groups.tf | 11 ++++- compute/ecs_service/tests/basic.tftest.hcl | 6 +++ 4 files changed, 62 insertions(+), 10 deletions(-) diff --git a/compute/ecs_cluster/tests/basic.tftest.hcl b/compute/ecs_cluster/tests/basic.tftest.hcl index 2deb274..5458a45 100644 --- a/compute/ecs_cluster/tests/basic.tftest.hcl +++ b/compute/ecs_cluster/tests/basic.tftest.hcl @@ -170,6 +170,13 @@ mock_provider "aws" { } } +# Default (BYO) runs never create ravion_certificate.cluster (count = 0), but +# Terraform still configures the ravion provider because the module declares it. +# An empty mock prevents the provider's real Configure (which requires +# RAVION_API_KEY) from failing the plan. listeners.tftest.hcl mocks it with +# overrides because those runs actually issue the wildcard cert. +mock_provider "ravion" {} + variables { name = "test-cluster" vpc_id = "vpc-12345678" diff --git a/compute/ecs_service/ravion_domains.tf b/compute/ecs_service/ravion_domains.tf index 4bb2df4..f5367fe 100644 --- a/compute/ecs_service/ravion_domains.tf +++ b/compute/ecs_service/ravion_domains.tf @@ -60,10 +60,21 @@ locals { ] invalid_apex_domains_msg = join(", ", local.invalid_apex_domains) - # All of this service's hostnames route to its target group via one rule. - ravion_host_headers = local.effective_domains - - ravion_priority = var.ravion_listener_rule_priority > 0 ? var.ravion_listener_rule_priority : ((parseint(substr(sha256(var.name), 0, 4), 16) % 49000) + 1000) + # All of this service's hostnames route to its target group. AWS ALB allows at + # most 5 values in a single rule condition, so the host headers are split into + # chunks of <=5 — one aws_lb_listener_rule per chunk (see below), each with its + # own derived priority. (chunklist([], 5) == [], handled by the rule's guard.) + ravion_host_headers = local.effective_domains + ravion_host_header_chunks = chunklist(local.ravion_host_headers, 5) + + # Base listener-rule priority. When ravion_listener_rule_priority is 0 (the + # default) it is derived from sha256(name) using 12 hex chars (~48 bits) so the + # collision probability stays low across many services sharing the cluster + # listener; mod 48000 (instead of 49000) leaves headroom below the ALB max of + # 50000 for the per-chunk offset (priority = base + chunk index). On a residual + # collision ("priority already in use") set ravion_listener_rule_priority + # explicitly to a free value. + ravion_priority = var.ravion_listener_rule_priority > 0 ? var.ravion_listener_rule_priority : ((parseint(substr(sha256(var.name), 0, 12), 16) % 48000) + 1000) ravion_target_group_arn = ( length(aws_lb_target_group.this) > 0 ? aws_lb_target_group.this[0].arn : ( @@ -121,17 +132,21 @@ resource "ravion_domain" "custom" { target_zone_id = var.cluster_alb_zone_id } -# Single listener rule routing all of this service's hostnames to its target -# group. Blue/green controllers flip the action externally. +# One listener rule per chunk of <=5 host headers (AWS ALB's per-condition value +# quota), together routing all of this service's hostnames to its target group. +# Each chunk gets its own priority (base + chunk index). Blue/green controllers +# flip the action externally. resource "aws_lb_listener_rule" "ravion" { - count = local.ravion_managed && var.cluster_https_listener_arn != null && length(local.ravion_host_headers) > 0 ? 1 : 0 + for_each = local.ravion_managed && var.cluster_https_listener_arn != null && length(local.ravion_host_headers) > 0 ? { + for idx, chunk in local.ravion_host_header_chunks : idx => chunk + } : {} listener_arn = var.cluster_https_listener_arn - priority = local.ravion_priority + priority = local.ravion_priority + tonumber(each.key) condition { host_header { - values = local.ravion_host_headers + values = each.value } } @@ -141,6 +156,21 @@ resource "aws_lb_listener_rule" "ravion" { } lifecycle { + # A Ravion-managed service forwards its hostnames to its own target group, so + # it must have a load balancer attachment. Without one ravion_target_group_arn + # is null, which would otherwise surface as a cryptic provider-side + # "target_group_arn must not be empty" at apply. + precondition { + condition = !local.ravion_managed || local.enable_load_balancer + error_message = "A Ravion-managed service (cluster_parent_fqdn set) requires an enabled load_balancer_attachment so its hostnames have a target group to forward to." + } ignore_changes = [action] } } + +# Earlier revisions created a single count-based rule; migrate that instance to +# the first for_each chunk so adopting the chunked layout is not a destroy+create. +moved { + from = aws_lb_listener_rule.ravion[0] + to = aws_lb_listener_rule.ravion["0"] +} diff --git a/compute/ecs_service/target_groups.tf b/compute/ecs_service/target_groups.tf index 9aca924..120b0c3 100644 --- a/compute/ecs_service/target_groups.tf +++ b/compute/ecs_service/target_groups.tf @@ -5,7 +5,16 @@ resource "aws_lb_target_group" "this" { count = local.enable_load_balancer && var.deployment_type == "rolling" ? 1 : 0 - name_prefix = substr(var.name, 0, 6) + # Stable name (the EXACT pre-branch expression) rather than name_prefix: the + # ECS service ignores load_balancer changes and the listener rules ignore + # action, so neither will repoint to a replacement TG. A name_prefix forces a + # one-time ForceNew on existing rolling services, and the old TG can never be + # released ("in use by listener rule"/ECS service) -> apply deadlock. Keeping + # the original stable name means no replacement at all. The substr/28 cap is + # load-bearing: it both matches the currently-deployed name (so no ForceNew) + # and keeps the TG name within ALB's 32-char limit. (Blue/green tg_1/tg_2 are + # already stably named and are flipped, never replaced.) + name = "${substr(var.name, 0, min(length(var.name), 28))}-tg" port = var.load_balancer_attachment.target_group.port protocol = var.load_balancer_attachment.target_group.protocol vpc_id = var.vpc_id diff --git a/compute/ecs_service/tests/basic.tftest.hcl b/compute/ecs_service/tests/basic.tftest.hcl index 4216a22..2a3acbd 100644 --- a/compute/ecs_service/tests/basic.tftest.hcl +++ b/compute/ecs_service/tests/basic.tftest.hcl @@ -5,6 +5,12 @@ # Mock provider for testing mock_provider "aws" {} +# These runs leave cluster_parent_fqdn unset, so ravion_domains.tf creates no +# ravion resources. Terraform still configures the ravion provider because the +# module declares it, so an empty mock prevents its real Configure (which +# requires RAVION_API_KEY) from failing the plan. +mock_provider "ravion" {} + ################################################################################ # Variables for Tests ################################################################################ From 6591dd8d7e4dd8085f612bb31f0f38733fdba2ae Mon Sep 17 00:00:00 2001 From: Siddharth Suresh Date: Sat, 30 May 2026 20:53:42 +0530 Subject: [PATCH 20/26] fix(ecs_cluster): create_before_destroy on cluster wildcard cert for in-place rotation Rotating the shared_wildcard cert (any RequiresReplace change, e.g. a renamed apex) destroyed the old cert before swapping the listener, hitting ACM ResourceInUse and deadlocking. create_before_destroy issues the new cert and swaps it onto the listener in-place before deleting the old one. Co-Authored-By: Claude Opus 4.8 (1M context) --- compute/ecs_cluster/ravion_domains.tf | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/compute/ecs_cluster/ravion_domains.tf b/compute/ecs_cluster/ravion_domains.tf index 5f7e7e6..982935f 100644 --- a/compute/ecs_cluster/ravion_domains.tf +++ b/compute/ecs_cluster/ravion_domains.tf @@ -31,6 +31,14 @@ resource "ravion_certificate" "cluster" { target_zone_id = var.enable_public_alb ? module.public_alb[0].alb_zone_id : (var.enable_private_alb ? module.private_alb[0].alb_zone_id : null) lifecycle { + # Rotating the cluster wildcard cert (any RequiresReplace change, e.g. a + # renamed apex) must issue the new cert and swap it onto the HTTPS + # listener(s) BEFORE the old one is torn down. Without this, terraform + # destroys the old cert first while it is still the listener's default — + # ACM returns ResourceInUse and the rotation deadlocks. create_before_destroy + # makes it new -> listener in-place swap -> delete old (now detached). + create_before_destroy = true + precondition { condition = !var.use_ravion_managed_domains || var.enable_public_alb || var.enable_private_alb error_message = "use_ravion_managed_domains requires at least one ALB (enable_public_alb or enable_private_alb)." From bbe5d8c732614b2143dbbdf1cad0ca973acafa3c Mon Sep 17 00:00:00 2001 From: Siddharth Suresh Date: Sun, 31 May 2026 03:27:53 +0530 Subject: [PATCH 21/26] feat(ecs_cluster): reject duplicate wildcard apex via ravion_dns_collision_check A second cluster claiming an apex another cluster already owns silently hijacked the *. routing ALIAS. Add a plan-time precondition backed by the ravion_dns_collision_check data source (the backend resolves the name leaf to *.. and reports a collision only when a DIFFERENT module instance owns it, so a self re-apply passes). The allocator enforces the same rule server-side as an apply-time backstop. Co-Authored-By: Claude Opus 4.8 (1M context) --- compute/ecs_cluster/ravion_domains.tf | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/compute/ecs_cluster/ravion_domains.tf b/compute/ecs_cluster/ravion_domains.tf index 982935f..62d7dd1 100644 --- a/compute/ecs_cluster/ravion_domains.tf +++ b/compute/ecs_cluster/ravion_domains.tf @@ -14,6 +14,16 @@ locals { enable_ravion_domain = var.use_ravion_managed_domains && (var.enable_public_alb || var.enable_private_alb) } +# Plan-time guard against two clusters claiming the same wildcard apex. The +# backend resolves the bare name leaf to the managed wildcard (*..) +# and reports collides=true ONLY when a DIFFERENT module instance already owns +# that domain — a re-apply of THIS cluster does not collide with itself. The +# allocator enforces the same rule server-side as an apply-time backstop. +data "ravion_dns_collision_check" "cluster" { + count = local.enable_ravion_domain ? 1 : 0 + fqdn = coalesce(var.ravion_cluster_name, var.module_instance_given_id, var.name) +} + resource "ravion_certificate" "cluster" { count = local.enable_ravion_domain ? 1 : 0 @@ -47,5 +57,9 @@ resource "ravion_certificate" "cluster" { condition = !var.use_ravion_managed_domains || (var.ravion_aws_account_id != null && var.ravion_aws_account_id != "") error_message = "ravion_aws_account_id (aws_*) is required when use_ravion_managed_domains = true." } + precondition { + condition = !coalesce(one(data.ravion_dns_collision_check.cluster[*].collides), false) + error_message = "Cluster wildcard apex is already claimed by another cluster: a managed *.. domain owned by a different module instance already exists. Pick a unique ravion_cluster_name." + } } } From 8e85aeed9d7cc4b9b2d2935867f8ce1429f8b0e7 Mon Sep 17 00:00:00 2001 From: Siddharth Suresh Date: Mon, 1 Jun 2026 14:06:49 +0530 Subject: [PATCH 22/26] feat(ecs_service): plan-time parent-apex authorization + require provider >= 1.0.0 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a ravion_parent_apex_check data source + a precondition on the service's nested wildcard domains: the plan fails if cluster_parent_fqdn points at an apex the service isn't entitled to (e.g. another cluster's apex). The control plane enforces the same rule at apply (Dns:PARENT_APEX_UNAUTHORIZED), so this only surfaces the failure earlier — users never write the guard. The empty-domains auto-FQDN fallback (effective_domains) is unchanged: a service with no custom domains still gets . and stays accessible. Bump the ravion provider pin to >= 1.0.0 across ecs_service, ecs_cluster, and static_site (the build that ships the parent-apex data source + the server-side apex guards). Co-Authored-By: Claude Opus 4.8 (1M context) --- compute/ecs_cluster/versions.tf | 2 +- compute/ecs_service/ravion_domains.tf | 20 ++++++++++++++++++++ compute/ecs_service/versions.tf | 2 +- hosting/static_site/versions.tf | 2 +- 4 files changed, 23 insertions(+), 3 deletions(-) diff --git a/compute/ecs_cluster/versions.tf b/compute/ecs_cluster/versions.tf index bed1016..cbae5c4 100644 --- a/compute/ecs_cluster/versions.tf +++ b/compute/ecs_cluster/versions.tf @@ -16,7 +16,7 @@ terraform { # var.use_ravion_managed_domains = true (see ravion_domains.tf). ravion = { source = "provider-cf.siddharthsuresh.dev/ravion/ravion" - version = ">= 0.1.0" + version = ">= 1.0.0" } } } diff --git a/compute/ecs_service/ravion_domains.tf b/compute/ecs_service/ravion_domains.tf index f5367fe..77742dd 100644 --- a/compute/ecs_service/ravion_domains.tf +++ b/compute/ecs_service/ravion_domains.tf @@ -83,6 +83,16 @@ locals { ) } +# Plan-time authorization guard: a service may only nest its auto-domains under +# a cluster wildcard apex it is entitled to (its own cluster's wildcard, in the +# same environment). Fails the plan with a clear message if cluster_parent_fqdn +# was pointed at another cluster's apex. The control plane enforces the same rule +# at apply (Dns:PARENT_APEX_UNAUTHORIZED), so this only moves the failure earlier. +data "ravion_parent_apex_check" "cluster" { + count = local.ravion_managed && length(local.wildcard_covered) > 0 ? 1 : 0 + parent_fqdn = local.apex +} + # Wildcard-covered domains (incl. the auto-FQDN): nest under the cluster # wildcard. No per-service cert; the cluster `*.` ALIAS routes them. resource "ravion_domain" "wildcard" { @@ -90,6 +100,16 @@ resource "ravion_domain" "wildcard" { name = trimsuffix(each.value, ".${local.apex}") parent_fqdn = local.apex + + lifecycle { + precondition { + # try(...) allows when the check is skipped (count 0) or the apex isn't yet + # resolvable (first apply before the cluster exists) — the apply-time guard + # takes over there. + condition = try(one(data.ravion_parent_apex_check.cluster[*].authorized), true) + error_message = "This service may not nest ${each.value} under ${local.apex}: it is not a live cluster wildcard in this environment. Ensure cluster_parent_fqdn points at your own cluster's wildcard apex." + } + } } # Per-service certificate covering the custom (non-wildcard) domains (<=10 SANs), diff --git a/compute/ecs_service/versions.tf b/compute/ecs_service/versions.tf index fb15899..680df02 100644 --- a/compute/ecs_service/versions.tf +++ b/compute/ecs_service/versions.tf @@ -14,7 +14,7 @@ terraform { } ravion = { source = "provider-cf.siddharthsuresh.dev/ravion/ravion" - version = ">= 0.1.0" + version = ">= 1.0.0" } } } diff --git a/hosting/static_site/versions.tf b/hosting/static_site/versions.tf index 8d30acf..5c1d482 100644 --- a/hosting/static_site/versions.tf +++ b/hosting/static_site/versions.tf @@ -10,7 +10,7 @@ terraform { } ravion = { source = "provider-cf.siddharthsuresh.dev/ravion/ravion" - version = ">= 0.1.0" + version = ">= 1.0.0" } } } From ac14c22d5af72d5e4231cd89a863d97bd5d29ce9 Mon Sep 17 00:00:00 2001 From: Siddharth Suresh Date: Mon, 1 Jun 2026 14:36:57 +0530 Subject: [PATCH 23/26] feat(ecs_cluster): surface wildcard-apex dependents via ravion_apex_dependents Wire the second new data source: read the live service domains nested under the cluster wildcard apex and expose them as the ravion_cluster_dependent_domains output for the UI / safe-teardown orchestration. Deliberately an output, not a precondition: a cluster legitimately has dependents during normal operation and Terraform can't scope a precondition to destroy-time, so a dependent_count == 0 gate would block every apply. The control plane already refuses a wildcard-cert teardown while dependents exist (Dns:CERT_APEX_IN_USE). Co-Authored-By: Claude Opus 4.8 (1M context) --- compute/ecs_cluster/outputs.tf | 5 +++++ compute/ecs_cluster/ravion_domains.tf | 12 ++++++++++++ 2 files changed, 17 insertions(+) diff --git a/compute/ecs_cluster/outputs.tf b/compute/ecs_cluster/outputs.tf index d639303..cb8ecbd 100644 --- a/compute/ecs_cluster/outputs.tf +++ b/compute/ecs_cluster/outputs.tf @@ -274,3 +274,8 @@ output "ravion_managed_domains_enabled" { description = "True when the cluster owns a Ravion wildcard cert + HTTPS listener (use_ravion_managed_domains AND at least one ALB). Services read this to show/hide managed-domain fields." value = local.enable_ravion_domain } + +output "ravion_cluster_dependent_domains" { + description = "Live service domains nested under the cluster wildcard apex (they ride its cert/ALIAS). Tearing the cluster down while these exist is refused by the control plane (Dns:CERT_APEX_IN_USE)." + value = local.enable_ravion_domain ? one(data.ravion_apex_dependents.cluster[*].dependents) : [] +} diff --git a/compute/ecs_cluster/ravion_domains.tf b/compute/ecs_cluster/ravion_domains.tf index 62d7dd1..5588f4d 100644 --- a/compute/ecs_cluster/ravion_domains.tf +++ b/compute/ecs_cluster/ravion_domains.tf @@ -63,3 +63,15 @@ resource "ravion_certificate" "cluster" { } } } + +# Live service domains nested under this cluster's wildcard apex (they ride its +# cert + `*.` ALIAS). Surfaced via the ravion_cluster_dependent_domains +# output for the UI / safe-teardown orchestration. NOT used as a precondition: +# a cluster legitimately has dependents during normal operation and Terraform +# can't scope a precondition to destroy-time, so it would block every apply. +# The control plane already refuses a teardown while dependents exist +# (Dns:CERT_APEX_IN_USE), which is the real backstop. +data "ravion_apex_dependents" "cluster" { + count = local.enable_ravion_domain ? 1 : 0 + apex = ravion_certificate.cluster[0].fqdn +} From 3a3cdc541354a45a123d072a8b8c048ed7b70f29 Mon Sep 17 00:00:00 2001 From: Siddharth Suresh Date: Mon, 1 Jun 2026 19:28:26 +0530 Subject: [PATCH 24/26] docs(ecs_service): parent-apex guard wording reflects reference-based rule The control plane now authorizes parent-apex nesting from a signed token claim (the clusters the run references), not same-environment. Update the guard comment + precondition error message to say "references that cluster" instead of "in this environment." Co-Authored-By: Claude Opus 4.8 (1M context) --- compute/ecs_service/ravion_domains.tf | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/compute/ecs_service/ravion_domains.tf b/compute/ecs_service/ravion_domains.tf index 77742dd..1437c6c 100644 --- a/compute/ecs_service/ravion_domains.tf +++ b/compute/ecs_service/ravion_domains.tf @@ -84,10 +84,11 @@ locals { } # Plan-time authorization guard: a service may only nest its auto-domains under -# a cluster wildcard apex it is entitled to (its own cluster's wildcard, in the -# same environment). Fails the plan with a clear message if cluster_parent_fqdn -# was pointed at another cluster's apex. The control plane enforces the same rule -# at apply (Dns:PARENT_APEX_UNAUTHORIZED), so this only moves the failure earlier. +# a cluster wildcard apex it actually references in its config. Fails the plan +# with a clear message if cluster_parent_fqdn was pointed at another cluster's +# apex the run doesn't reference. The control plane enforces the same rule at +# apply against a signed token claim (Dns:PARENT_APEX_UNAUTHORIZED), so this +# only moves the failure earlier. data "ravion_parent_apex_check" "cluster" { count = local.ravion_managed && length(local.wildcard_covered) > 0 ? 1 : 0 parent_fqdn = local.apex @@ -107,7 +108,7 @@ resource "ravion_domain" "wildcard" { # resolvable (first apply before the cluster exists) — the apply-time guard # takes over there. condition = try(one(data.ravion_parent_apex_check.cluster[*].authorized), true) - error_message = "This service may not nest ${each.value} under ${local.apex}: it is not a live cluster wildcard in this environment. Ensure cluster_parent_fqdn points at your own cluster's wildcard apex." + error_message = "This service may not nest ${each.value} under ${local.apex}: this deployment does not reference that cluster. Set cluster_parent_fqdn from your own cluster's ravion_cluster_domain_fqdn output." } } } From b1a577915a97e52f3e2afa7cf3d0bf9e2868ad3c Mon Sep 17 00:00:00 2001 From: Siddharth Suresh Date: Mon, 1 Jun 2026 23:35:28 +0530 Subject: [PATCH 25/26] feat(ecs): pass module_instance_id to Ravion certs/domains ravion_certificate/ravion_domain now require module_instance_id so the resources work outside a Ravion stack run (service-account API key) as well as inside it. Add a module_instance_id variable to ecs_cluster + ecs_service and forward it to every ravion_certificate/ravion_domain block (cluster wildcard cert; service wildcard/custom domains + instance cert). Inside a stack run the runner injects TF_VAR_module_instance_id, so this is populated automatically; external/API-key runs set it explicitly. The control plane still prefers the signed token's instance when a stack-run JWT is present. Co-Authored-By: Claude Opus 4.8 (1M context) --- compute/ecs_cluster/ravion_domains.tf | 11 ++++++----- compute/ecs_cluster/variables.tf | 6 ++++++ compute/ecs_service/ravion_domains.tf | 23 +++++++++++++---------- compute/ecs_service/variables.tf | 6 ++++++ 4 files changed, 31 insertions(+), 15 deletions(-) diff --git a/compute/ecs_cluster/ravion_domains.tf b/compute/ecs_cluster/ravion_domains.tf index 5588f4d..6b4b81e 100644 --- a/compute/ecs_cluster/ravion_domains.tf +++ b/compute/ecs_cluster/ravion_domains.tf @@ -27,11 +27,12 @@ data "ravion_dns_collision_check" "cluster" { resource "ravion_certificate" "cluster" { count = local.enable_ravion_domain ? 1 : 0 - role = "shared_wildcard" - wildcard = true - name = coalesce(var.ravion_cluster_name, var.module_instance_given_id, var.name) - aws_account_id = var.ravion_aws_account_id - aws_region = coalesce(var.ravion_aws_region, local.region) + role = "shared_wildcard" + wildcard = true + name = coalesce(var.ravion_cluster_name, var.module_instance_given_id, var.name) + module_instance_id = var.module_instance_id + aws_account_id = var.ravion_aws_account_id + aws_region = coalesce(var.ravion_aws_region, local.region) # Ravion publishes a *. ALIAS to this ALB so service auto-FQDNs # (.) resolve under the cluster wildcard. Public ALB if present, diff --git a/compute/ecs_cluster/variables.tf b/compute/ecs_cluster/variables.tf index c30619c..85571a5 100644 --- a/compute/ecs_cluster/variables.tf +++ b/compute/ecs_cluster/variables.tf @@ -652,6 +652,12 @@ variable "module_instance_given_id" { default = null } +variable "module_instance_id" { + type = string + description = "The Ravion module instance id (minst_*) that owns this cluster's Ravion-managed certificate. Injected by the runner as TF_VAR_module_instance_id inside a stack run; set it explicitly for external/API-key runs. Required when use_ravion_managed_domains = true." + default = null +} + variable "ravion_aws_account_id" { type = string description = "Ravion AwsAccount row id (aws_*) the wildcard ACM cert is issued in. Required when use_ravion_managed_domains = true." diff --git a/compute/ecs_service/ravion_domains.tf b/compute/ecs_service/ravion_domains.tf index 1437c6c..b8cdbaa 100644 --- a/compute/ecs_service/ravion_domains.tf +++ b/compute/ecs_service/ravion_domains.tf @@ -99,8 +99,9 @@ data "ravion_parent_apex_check" "cluster" { resource "ravion_domain" "wildcard" { for_each = toset(local.wildcard_covered) - name = trimsuffix(each.value, ".${local.apex}") - parent_fqdn = local.apex + name = trimsuffix(each.value, ".${local.apex}") + module_instance_id = var.module_instance_id + parent_fqdn = local.apex lifecycle { precondition { @@ -118,11 +119,12 @@ resource "ravion_domain" "wildcard" { resource "ravion_certificate" "svc" { count = length(local.custom_domains) > 0 ? 1 : 0 - role = "instance" - domains = local.custom_domains - aws_account_id = var.ravion_aws_account_id - aws_region = coalesce(var.ravion_aws_region, local.region) - target_arn = var.cluster_https_listener_arn + role = "instance" + domains = local.custom_domains + module_instance_id = var.module_instance_id + aws_account_id = var.ravion_aws_account_id + aws_region = coalesce(var.ravion_aws_region, local.region) + target_arn = var.cluster_https_listener_arn lifecycle { precondition { @@ -148,9 +150,10 @@ resource "ravion_certificate" "svc" { resource "ravion_domain" "custom" { for_each = toset(local.custom_domains) - name = each.value - target_dns_name = var.cluster_alb_dns_name - target_zone_id = var.cluster_alb_zone_id + name = each.value + module_instance_id = var.module_instance_id + target_dns_name = var.cluster_alb_dns_name + target_zone_id = var.cluster_alb_zone_id } # One listener rule per chunk of <=5 host headers (AWS ALB's per-condition value diff --git a/compute/ecs_service/variables.tf b/compute/ecs_service/variables.tf index 2c6720d..ec7c668 100644 --- a/compute/ecs_service/variables.tf +++ b/compute/ecs_service/variables.tf @@ -652,6 +652,12 @@ variable "module_instance_given_id" { default = null } +variable "module_instance_id" { + type = string + description = "The Ravion module instance id (minst_*) that owns this service's Ravion-managed domains/certificate. Injected by the runner as TF_VAR_module_instance_id inside a stack run; set it explicitly for external/API-key runs. Required when use_ravion_managed_domains = true." + default = null +} + variable "ravion_aws_region" { type = string description = "AWS region the per-service cert lives in. Defaults to the module region." From 7fffc23c726135a092ed2481bb2c2bed12fbf8a0 Mon Sep 17 00:00:00 2001 From: Siddharth Suresh Date: Thu, 4 Jun 2026 11:46:44 +0530 Subject: [PATCH 26/26] naming fixes --- compute/ecs_cluster/listeners.tf | 10 +++++----- compute/ecs_cluster/outputs.tf | 6 +++--- compute/ecs_cluster/ravion_domains.tf | 8 ++++---- compute/ecs_cluster/tests/basic.tftest.hcl | 2 +- compute/ecs_cluster/tests/listeners.tftest.hcl | 8 ++++---- compute/ecs_service/outputs.tf | 2 +- compute/ecs_service/ravion_domains.tf | 8 ++++---- 7 files changed, 22 insertions(+), 22 deletions(-) diff --git a/compute/ecs_cluster/listeners.tf b/compute/ecs_cluster/listeners.tf index d61fcde..4c0785f 100644 --- a/compute/ecs_cluster/listeners.tf +++ b/compute/ecs_cluster/listeners.tf @@ -8,15 +8,15 @@ # certificate SOURCE changes by mode: # # - use_ravion_managed_domains = true -> the Ravion wildcard cert -# (ravion_certificate.cluster, see ravion_domains.tf) is the default cert on +# (ravion_aws_acm_certificate.cluster, see ravion_domains.tf) is the default cert on # BOTH listeners; public/private services nest their auto-FQDNs under it. # - use_ravion_managed_domains = false -> the listener uses the customer's # first public/private_alb_certificate_arns entry as default and attaches # the rest for SNI. # # The listeners live here (not in the alb submodule) to avoid a DAG cycle: -# aws_lb.this -> ravion_certificate.cluster -> aws_lb_listener.public_https -# (uses the cert). ravion_certificate with role=shared_wildcard blocks until +# aws_lb.this -> ravion_aws_acm_certificate.cluster -> aws_lb_listener.public_https +# (uses the cert). ravion_aws_acm_certificate with role=shared_wildcard blocks until # ISSUED, so cert_arn is valid at listener create time. # Public ALB HTTPS listener. Mode-independent address: created whenever the @@ -30,7 +30,7 @@ resource "aws_lb_listener" "public_https" { ssl_policy = var.public_alb_ssl_policy # try(...) defers to the precondition below for the clean error when BYO mode # has no cert ARN, instead of a cryptic index-out-of-range. - certificate_arn = local.enable_ravion_domain ? ravion_certificate.cluster[0].cert_arn : try(var.public_alb_certificate_arns[0], null) + certificate_arn = local.enable_ravion_domain ? ravion_aws_acm_certificate.cluster[0].arn : try(var.public_alb_certificate_arns[0], null) default_action { type = "fixed-response" @@ -73,7 +73,7 @@ resource "aws_lb_listener" "private_https" { port = 443 protocol = "HTTPS" ssl_policy = var.private_alb_ssl_policy - certificate_arn = local.enable_ravion_domain ? ravion_certificate.cluster[0].cert_arn : try(var.private_alb_certificate_arns[0], null) + certificate_arn = local.enable_ravion_domain ? ravion_aws_acm_certificate.cluster[0].arn : try(var.private_alb_certificate_arns[0], null) default_action { type = "fixed-response" diff --git a/compute/ecs_cluster/outputs.tf b/compute/ecs_cluster/outputs.tf index cb8ecbd..4f59dcc 100644 --- a/compute/ecs_cluster/outputs.tf +++ b/compute/ecs_cluster/outputs.tf @@ -247,17 +247,17 @@ output "region" { output "ravion_cluster_certificate_id" { description = "Ravion managed-certificate id for the cluster wildcard (null unless use_ravion_managed_domains)." - value = local.enable_ravion_domain ? ravion_certificate.cluster[0].id : null + value = local.enable_ravion_domain ? ravion_aws_acm_certificate.cluster[0].id : null } output "ravion_cluster_domain_fqdn" { description = "Cluster wildcard apex FQDN. Pass to ecs_service as cluster_parent_fqdn." - value = local.enable_ravion_domain ? ravion_certificate.cluster[0].fqdn : null + value = local.enable_ravion_domain ? ravion_aws_acm_certificate.cluster[0].domain_name : null } output "ravion_cluster_cert_arn" { description = "ACM ARN of the cluster wildcard cert." - value = local.enable_ravion_domain ? ravion_certificate.cluster[0].cert_arn : null + value = local.enable_ravion_domain ? ravion_aws_acm_certificate.cluster[0].arn : null } output "ravion_aws_account_id" { diff --git a/compute/ecs_cluster/ravion_domains.tf b/compute/ecs_cluster/ravion_domains.tf index 6b4b81e..581203b 100644 --- a/compute/ecs_cluster/ravion_domains.tf +++ b/compute/ecs_cluster/ravion_domains.tf @@ -20,11 +20,11 @@ locals { # that domain — a re-apply of THIS cluster does not collide with itself. The # allocator enforces the same rule server-side as an apply-time backstop. data "ravion_dns_collision_check" "cluster" { - count = local.enable_ravion_domain ? 1 : 0 - fqdn = coalesce(var.ravion_cluster_name, var.module_instance_given_id, var.name) + count = local.enable_ravion_domain ? 1 : 0 + domain_name = coalesce(var.ravion_cluster_name, var.module_instance_given_id, var.name) } -resource "ravion_certificate" "cluster" { +resource "ravion_aws_acm_certificate" "cluster" { count = local.enable_ravion_domain ? 1 : 0 role = "shared_wildcard" @@ -74,5 +74,5 @@ resource "ravion_certificate" "cluster" { # (Dns:CERT_APEX_IN_USE), which is the real backstop. data "ravion_apex_dependents" "cluster" { count = local.enable_ravion_domain ? 1 : 0 - apex = ravion_certificate.cluster[0].fqdn + apex = ravion_aws_acm_certificate.cluster[0].domain_name } diff --git a/compute/ecs_cluster/tests/basic.tftest.hcl b/compute/ecs_cluster/tests/basic.tftest.hcl index 5458a45..16a1506 100644 --- a/compute/ecs_cluster/tests/basic.tftest.hcl +++ b/compute/ecs_cluster/tests/basic.tftest.hcl @@ -170,7 +170,7 @@ mock_provider "aws" { } } -# Default (BYO) runs never create ravion_certificate.cluster (count = 0), but +# Default (BYO) runs never create ravion_aws_acm_certificate.cluster (count = 0), but # Terraform still configures the ravion provider because the module declares it. # An empty mock prevents the provider's real Configure (which requires # RAVION_API_KEY) from failing the plan. listeners.tftest.hcl mocks it with diff --git a/compute/ecs_cluster/tests/listeners.tftest.hcl b/compute/ecs_cluster/tests/listeners.tftest.hcl index d37f322..2b727a7 100644 --- a/compute/ecs_cluster/tests/listeners.tftest.hcl +++ b/compute/ecs_cluster/tests/listeners.tftest.hcl @@ -58,16 +58,16 @@ mock_provider "aws" { } } -# ravion_certificate needs a DomainProvider JWT to configure against the real +# ravion_aws_acm_certificate needs a DomainProvider JWT to configure against the real # control plane; mock it so tests are hermetic. The cert_arn override is a valid # ACM ARN so the listener's certificate_arn passes provider validation. mock_provider "ravion" { override_resource { - target = ravion_certificate.cluster + target = ravion_aws_acm_certificate.cluster values = { id = "cert_test" cert_arn = "arn:aws:acm:us-east-1:123456789012:certificate/99999999-9999-9999-9999-999999999999" - fqdn = "*.test-cluster-abcd.ravion.app" + domain_name = "*.test-cluster-abcd.ravion.app" status = "ISSUED" } } @@ -221,7 +221,7 @@ run "ravion_managed_public_https" { } assert { - condition = length(ravion_certificate.cluster) == 1 + condition = length(ravion_aws_acm_certificate.cluster) == 1 error_message = "Ravion wildcard cert must be created in managed mode" } diff --git a/compute/ecs_service/outputs.tf b/compute/ecs_service/outputs.tf index 4a699db..3f95214 100644 --- a/compute/ecs_service/outputs.tf +++ b/compute/ecs_service/outputs.tf @@ -266,5 +266,5 @@ output "ravion_domain_url" { output "ravion_custom_cert_arn" { description = "ACM ARN of the per-service instance cert covering the custom (non-wildcard) domains. Null when there are none." - value = length(local.custom_domains) > 0 ? ravion_certificate.svc[0].cert_arn : null + value = length(local.custom_domains) > 0 ? ravion_aws_acm_certificate.svc[0].arn : null } diff --git a/compute/ecs_service/ravion_domains.tf b/compute/ecs_service/ravion_domains.tf index b8cdbaa..775536f 100644 --- a/compute/ecs_service/ravion_domains.tf +++ b/compute/ecs_service/ravion_domains.tf @@ -90,8 +90,8 @@ locals { # apply against a signed token claim (Dns:PARENT_APEX_UNAUTHORIZED), so this # only moves the failure earlier. data "ravion_parent_apex_check" "cluster" { - count = local.ravion_managed && length(local.wildcard_covered) > 0 ? 1 : 0 - parent_fqdn = local.apex + count = local.ravion_managed && length(local.wildcard_covered) > 0 ? 1 : 0 + parent_domain_name = local.apex } # Wildcard-covered domains (incl. the auto-FQDN): nest under the cluster @@ -101,7 +101,7 @@ resource "ravion_domain" "wildcard" { name = trimsuffix(each.value, ".${local.apex}") module_instance_id = var.module_instance_id - parent_fqdn = local.apex + parent_domain_name = local.apex lifecycle { precondition { @@ -116,7 +116,7 @@ resource "ravion_domain" "wildcard" { # Per-service certificate covering the custom (non-wildcard) domains (<=10 SANs), # attached to the cluster listener via Ravion. -resource "ravion_certificate" "svc" { +resource "ravion_aws_acm_certificate" "svc" { count = length(local.custom_domains) > 0 ? 1 : 0 role = "instance"