From 215246544770bb4473f1f1c8f100f7e991c2bd17 Mon Sep 17 00:00:00 2001 From: aram price Date: Thu, 30 Apr 2026 16:21:33 -0700 Subject: [PATCH 1/3] Add job to ensure expected GCP integration network is created MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Why --- The bats and test-stemcells-ipv4 jobs assume a GCP subnetwork named stemcell-builder-integration- exists in the bosh-concourse VPC, with a /24 at 10.100..0/24, private Google access, and IPV4_ONLY stack type. They also require a matching ingress firewall rule (all-protocol, source CIDR → tags test-stemcells-bats/bat) so that compilation VMs and BAT deployment VMs can reach the BOSH director's NATS server. Until now both resources had to be created and maintained out of band; their absence caused consistent compilation-VM agent timeouts (builds 466–475). What ---- * ci/tasks/gcp/ensure-integration-network.sh - Authenticates via GCP_JSON_KEY service account. - Derives SUBNET_NAME and SUBNET_CIDR from SUBNET_INT. - Captures stderr via mktemp temp file (cleaned up by trap on EXIT) so that gcloud failures are classified: "was not found" → create the resource; anything else → print the error and exit non-zero. This prevents auth/permission/transient API errors from being silently misinterpreted as "resource missing". - Subnetwork: single gcloud describe call captures exit code (for existence) and attributes (for drift detection). Validates network, ipCidrRange, privateIpGoogleAccess, and stackType; exits non-zero with a clear diff on any mismatch. - Firewall rule: same stderr-capture pattern. Validates network, direction, allowed[0].IPProtocol (must be "all"), sourceRanges[0], and disabled (must be False) in one describe call. Validates targetTags in a second describe call, sorting both sides before comparison to be order-insensitive. Both 'test-stemcells-bats' and 'bat' tags are required, mirroring the existing stemcell-builder-integration-22 rule. * ci/tasks/gcp/ensure-integration-network.yml - Concourse task definition. All params (GCP_JSON_KEY, GCP_PROJECT_ID, GCP_REGION, GCP_NETWORK_NAME, SUBNET_INT) are required; no defaults, values are provided explicitly by the pipeline. * ci/pipelines/builder.yml - New infrastructure group containing the new job. - New job ensure-integration-network: * serial: true, manual trigger only. * Gets bosh-stemcells-ci and bosh-integration-image, then runs the task with GCP_REGION=europe-north2 and GCP_NETWORK_NAME=bosh-concourse passed explicitly. * No passed: constraint on existing jobs; run on demand when the subnet/firewall needs to be created or reconciled. Verification ------------ * ytt -f ci/pipelines/builder.yml -f ci/pipelines/vars.yml renders successfully. * fly validate-pipeline -c reports "looks good". Co-authored-by: Cursor --- ci/pipelines/builder.yml | 23 ++++- ci/tasks/gcp/ensure-integration-network.sh | 98 +++++++++++++++++++++ ci/tasks/gcp/ensure-integration-network.yml | 15 ++++ 3 files changed, 135 insertions(+), 1 deletion(-) create mode 100755 ci/tasks/gcp/ensure-integration-network.sh create mode 100644 ci/tasks/gcp/ensure-integration-network.yml diff --git a/ci/pipelines/builder.yml b/ci/pipelines/builder.yml index ea994eceda..0a7400bd63 100644 --- a/ci/pipelines/builder.yml +++ b/ci/pipelines/builder.yml @@ -44,6 +44,9 @@ groups: - name: docker jobs: - build-os-image-stemcell-builder +- name: infrastructure + jobs: + - ensure-integration-network #@yaml/text-templated-strings jobs: @@ -89,6 +92,25 @@ jobs: get_params: skip_download: true +#! Manually triggered job that idempotently ensures the GCP subnetwork and +#! firewall rule consumed by deploy-director / cleanup-bats-vms / prepare-bats +#! in the test-stemcells-ipv4 and bats jobs below exist. GCP is the source of +#! truth — no state file is required. +- name: ensure-integration-network + serial: true + plan: + - get: bosh-stemcells-ci + - get: bosh-integration-image + - task: ensure-integration-network + file: bosh-stemcells-ci/ci/tasks/gcp/ensure-integration-network.yml + image: bosh-integration-image + params: + GCP_JSON_KEY: ((gcp_json_key)) + GCP_PROJECT_ID: ((gcp_project_id)) + GCP_REGION: europe-north2 + GCP_NETWORK_NAME: bosh-concourse + SUBNET_INT: (@= data.values.stemcell_details.subnet_int @) + - name: process-high-critical-cves serial_groups: [log-cves] plan: @@ -885,7 +907,6 @@ resource_types: type: registry-image source: repository: frodenas/gcs-resource - #@yaml/text-templated-strings resources: - name: daily diff --git a/ci/tasks/gcp/ensure-integration-network.sh b/ci/tasks/gcp/ensure-integration-network.sh new file mode 100755 index 0000000000..a422b9abb8 --- /dev/null +++ b/ci/tasks/gcp/ensure-integration-network.sh @@ -0,0 +1,98 @@ +#!/usr/bin/env bash +set -eu -o pipefail + +: "${GCP_JSON_KEY:?}" +: "${GCP_PROJECT_ID:?}" +: "${GCP_REGION:?}" +: "${GCP_NETWORK_NAME:?}" +: "${SUBNET_INT:?}" + +echo "${GCP_JSON_KEY}" | gcloud auth activate-service-account --key-file - --project "${GCP_PROJECT_ID}" + +SUBNET_NAME="stemcell-builder-integration-${SUBNET_INT}" +SUBNET_CIDR="10.100.${SUBNET_INT}.0/24" + +# 'bat' => BATS created VM tag +# 'test-stemcells-bats' => director, and compilation VM tag +FIREWALL_TAGS="bat,test-stemcells-bats" + +gcloud_stderr="$(mktemp)" +trap 'rm -f "${gcloud_stderr}"' EXIT + +echo "Checking for subnet '${SUBNET_NAME}' in region '${GCP_REGION}'..." +current_subnet="$(gcloud compute networks subnets describe "${SUBNET_NAME}" \ + --region="${GCP_REGION}" \ + --project="${GCP_PROJECT_ID}" \ + --format='csv[no-heading](network.basename(),ipCidrRange,privateIpGoogleAccess,stackType)' \ + 2>"${gcloud_stderr}")" && subnet_exists=true || subnet_exists=false + +if ${subnet_exists}; then + expected_subnet="${GCP_NETWORK_NAME},${SUBNET_CIDR},True,IPV4_ONLY" + if [[ "${current_subnet}" != "${expected_subnet}" ]]; then + echo "ERROR: Subnet '${SUBNET_NAME}' exists but is misconfigured." + echo " Expected: ${expected_subnet}" + echo " Actual: ${current_subnet}" + exit 1 + fi + echo "Subnet '${SUBNET_NAME}' already exists and matches expected configuration." +elif grep -q "was not found" "${gcloud_stderr}"; then + echo "Creating subnet '${SUBNET_NAME}'..." + gcloud compute networks subnets create "${SUBNET_NAME}" \ + --network="${GCP_NETWORK_NAME}" \ + --region="${GCP_REGION}" \ + --range="${SUBNET_CIDR}" \ + --enable-private-ip-google-access \ + --stack-type=IPV4_ONLY \ + --project="${GCP_PROJECT_ID}" + echo "Subnet '${SUBNET_NAME}' created." +else + echo "ERROR: gcloud describe failed for subnet '${SUBNET_NAME}':" + cat "${gcloud_stderr}" >&2 + exit 1 +fi + +echo "Checking for firewall rule '${SUBNET_NAME}'..." +current_fw="$(gcloud compute firewall-rules describe "${SUBNET_NAME}" \ + --project="${GCP_PROJECT_ID}" \ + --format='csv[no-heading](network.basename(),direction,allowed[0].IPProtocol,sourceRanges[0],disabled)' \ + 2>"${gcloud_stderr}")" && fw_exists=true || fw_exists=false + +if ${fw_exists}; then + expected_fw="${GCP_NETWORK_NAME},INGRESS,all,${SUBNET_CIDR},False" + if [[ "${current_fw}" != "${expected_fw}" ]]; then + echo "ERROR: Firewall rule '${SUBNET_NAME}' exists but is misconfigured." + echo " Expected: ${expected_fw}" + echo " Actual: ${current_fw}" + exit 1 + fi + # Validate target tags independently; sort before comparing since order is not deterministic + current_tags="$(gcloud compute firewall-rules describe "${SUBNET_NAME}" \ + --project="${GCP_PROJECT_ID}" \ + --format='value(targetTags.list())' \ + | tr ',;' '\n' | LC_ALL=C sort | tr '\n' ',' | sed 's/,$//')" + expected_tags="$(printf '%s\n' ${FIREWALL_TAGS//,/ } | LC_ALL=C sort | tr '\n' ',' | sed 's/,$//')" + if [[ "${current_tags}" != "${expected_tags}" ]]; then + echo "ERROR: Firewall rule '${SUBNET_NAME}' has wrong target tags." + echo " Expected: ${expected_tags}" + echo " Actual: ${current_tags}" + exit 1 + fi + echo "Firewall rule '${SUBNET_NAME}' already exists and matches expected configuration." +elif grep -q "was not found" "${gcloud_stderr}"; then + echo "Creating firewall rule '${SUBNET_NAME}'..." + gcloud compute firewall-rules create "${SUBNET_NAME}" \ + --network="${GCP_NETWORK_NAME}" \ + --project="${GCP_PROJECT_ID}" \ + --direction=INGRESS \ + --priority=1000 \ + --allow=all \ + --source-ranges="${SUBNET_CIDR}" \ + --target-tags="${FIREWALL_TAGS}" + echo "Firewall rule '${SUBNET_NAME}' created." +else + echo "ERROR: gcloud describe failed for firewall rule '${SUBNET_NAME}':" + cat "${gcloud_stderr}" >&2 + exit 1 +fi + +echo "Integration network '${SUBNET_NAME}' is ready." diff --git a/ci/tasks/gcp/ensure-integration-network.yml b/ci/tasks/gcp/ensure-integration-network.yml new file mode 100644 index 0000000000..5ec8faf0e1 --- /dev/null +++ b/ci/tasks/gcp/ensure-integration-network.yml @@ -0,0 +1,15 @@ +--- +platform: linux + +inputs: + - name: bosh-stemcells-ci + +params: + GCP_JSON_KEY: + GCP_PROJECT_ID: + GCP_REGION: + GCP_NETWORK_NAME: + SUBNET_INT: + +run: + path: bosh-stemcells-ci/ci/tasks/gcp/ensure-integration-network.sh From 19ac1aaa88cc487b8e83e1037ce3a8f094bfb84c Mon Sep 17 00:00:00 2001 From: aram price Date: Fri, 8 May 2026 22:55:33 +0200 Subject: [PATCH 2/3] Apply suggestions from code review Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com> --- ci/tasks/gcp/ensure-integration-network.sh | 62 +++++++++++++--------- 1 file changed, 38 insertions(+), 24 deletions(-) diff --git a/ci/tasks/gcp/ensure-integration-network.sh b/ci/tasks/gcp/ensure-integration-network.sh index a422b9abb8..d8db6f4f5d 100755 --- a/ci/tasks/gcp/ensure-integration-network.sh +++ b/ci/tasks/gcp/ensure-integration-network.sh @@ -20,33 +20,41 @@ gcloud_stderr="$(mktemp)" trap 'rm -f "${gcloud_stderr}"' EXIT echo "Checking for subnet '${SUBNET_NAME}' in region '${GCP_REGION}'..." -current_subnet="$(gcloud compute networks subnets describe "${SUBNET_NAME}" \ - --region="${GCP_REGION}" \ +existing_subnet_name="$(gcloud compute networks subnets list \ + --regions="${GCP_REGION}" \ --project="${GCP_PROJECT_ID}" \ - --format='csv[no-heading](network.basename(),ipCidrRange,privateIpGoogleAccess,stackType)' \ - 2>"${gcloud_stderr}")" && subnet_exists=true || subnet_exists=false + --filter="name=('${SUBNET_NAME}')" \ + --format='value(name)' \ + 2>"${gcloud_stderr}")" && subnet_lookup_ok=true || subnet_lookup_ok=false -if ${subnet_exists}; then - expected_subnet="${GCP_NETWORK_NAME},${SUBNET_CIDR},True,IPV4_ONLY" - if [[ "${current_subnet}" != "${expected_subnet}" ]]; then - echo "ERROR: Subnet '${SUBNET_NAME}' exists but is misconfigured." - echo " Expected: ${expected_subnet}" - echo " Actual: ${current_subnet}" - exit 1 +if ${subnet_lookup_ok}; then + if [[ -n "${existing_subnet_name}" ]]; then + current_subnet="$(gcloud compute networks subnets describe "${SUBNET_NAME}" \ + --region="${GCP_REGION}" \ + --project="${GCP_PROJECT_ID}" \ + --format='csv[no-heading](network.basename(),ipCidrRange,privateIpGoogleAccess,stackType)' \ + 2>"${gcloud_stderr}")" + expected_subnet="${GCP_NETWORK_NAME},${SUBNET_CIDR},True,IPV4_ONLY" + if [[ "${current_subnet}" != "${expected_subnet}" ]]; then + echo "ERROR: Subnet '${SUBNET_NAME}' exists but is misconfigured." + echo " Expected: ${expected_subnet}" + echo " Actual: ${current_subnet}" + exit 1 + fi + echo "Subnet '${SUBNET_NAME}' already exists and matches expected configuration." + else + echo "Creating subnet '${SUBNET_NAME}'..." + gcloud compute networks subnets create "${SUBNET_NAME}" \ + --network="${GCP_NETWORK_NAME}" \ + --region="${GCP_REGION}" \ + --range="${SUBNET_CIDR}" \ + --enable-private-ip-google-access \ + --stack-type=IPV4_ONLY \ + --project="${GCP_PROJECT_ID}" + echo "Subnet '${SUBNET_NAME}' created." fi - echo "Subnet '${SUBNET_NAME}' already exists and matches expected configuration." -elif grep -q "was not found" "${gcloud_stderr}"; then - echo "Creating subnet '${SUBNET_NAME}'..." - gcloud compute networks subnets create "${SUBNET_NAME}" \ - --network="${GCP_NETWORK_NAME}" \ - --region="${GCP_REGION}" \ - --range="${SUBNET_CIDR}" \ - --enable-private-ip-google-access \ - --stack-type=IPV4_ONLY \ - --project="${GCP_PROJECT_ID}" - echo "Subnet '${SUBNET_NAME}' created." else - echo "ERROR: gcloud describe failed for subnet '${SUBNET_NAME}':" + echo "ERROR: gcloud subnet lookup failed for subnet '${SUBNET_NAME}':" cat "${gcloud_stderr}" >&2 exit 1 fi @@ -69,7 +77,13 @@ if ${fw_exists}; then current_tags="$(gcloud compute firewall-rules describe "${SUBNET_NAME}" \ --project="${GCP_PROJECT_ID}" \ --format='value(targetTags.list())' \ - | tr ',;' '\n' | LC_ALL=C sort | tr '\n' ',' | sed 's/,$//')" + 2>"${gcloud_stderr}" \ + | tr ',;' '\n' | LC_ALL=C sort | tr '\n' ',' | sed 's/,$//')" && current_tags_read=true || current_tags_read=false + if ! ${current_tags_read}; then + echo "ERROR: gcloud describe failed while reading target tags for firewall rule '${SUBNET_NAME}':" + cat "${gcloud_stderr}" >&2 + exit 1 + fi expected_tags="$(printf '%s\n' ${FIREWALL_TAGS//,/ } | LC_ALL=C sort | tr '\n' ',' | sed 's/,$//')" if [[ "${current_tags}" != "${expected_tags}" ]]; then echo "ERROR: Firewall rule '${SUBNET_NAME}' has wrong target tags." From a470dc6ca3690c4fd19d3c7e3f866e1e738ff0f6 Mon Sep 17 00:00:00 2001 From: Ned Petrov Date: Tue, 12 May 2026 08:25:20 +0300 Subject: [PATCH 3/3] Harden firewall existence check and validation in ensure-integration-network --- ci/tasks/gcp/ensure-integration-network.sh | 111 +++++++++++++-------- 1 file changed, 69 insertions(+), 42 deletions(-) diff --git a/ci/tasks/gcp/ensure-integration-network.sh b/ci/tasks/gcp/ensure-integration-network.sh index d8db6f4f5d..6f38676044 100755 --- a/ci/tasks/gcp/ensure-integration-network.sh +++ b/ci/tasks/gcp/ensure-integration-network.sh @@ -60,51 +60,78 @@ else fi echo "Checking for firewall rule '${SUBNET_NAME}'..." -current_fw="$(gcloud compute firewall-rules describe "${SUBNET_NAME}" \ +existing_fw_name="$(gcloud compute firewall-rules list \ --project="${GCP_PROJECT_ID}" \ - --format='csv[no-heading](network.basename(),direction,allowed[0].IPProtocol,sourceRanges[0],disabled)' \ - 2>"${gcloud_stderr}")" && fw_exists=true || fw_exists=false - -if ${fw_exists}; then - expected_fw="${GCP_NETWORK_NAME},INGRESS,all,${SUBNET_CIDR},False" - if [[ "${current_fw}" != "${expected_fw}" ]]; then - echo "ERROR: Firewall rule '${SUBNET_NAME}' exists but is misconfigured." - echo " Expected: ${expected_fw}" - echo " Actual: ${current_fw}" - exit 1 - fi - # Validate target tags independently; sort before comparing since order is not deterministic - current_tags="$(gcloud compute firewall-rules describe "${SUBNET_NAME}" \ - --project="${GCP_PROJECT_ID}" \ - --format='value(targetTags.list())' \ - 2>"${gcloud_stderr}" \ - | tr ',;' '\n' | LC_ALL=C sort | tr '\n' ',' | sed 's/,$//')" && current_tags_read=true || current_tags_read=false - if ! ${current_tags_read}; then - echo "ERROR: gcloud describe failed while reading target tags for firewall rule '${SUBNET_NAME}':" - cat "${gcloud_stderr}" >&2 - exit 1 - fi - expected_tags="$(printf '%s\n' ${FIREWALL_TAGS//,/ } | LC_ALL=C sort | tr '\n' ',' | sed 's/,$//')" - if [[ "${current_tags}" != "${expected_tags}" ]]; then - echo "ERROR: Firewall rule '${SUBNET_NAME}' has wrong target tags." - echo " Expected: ${expected_tags}" - echo " Actual: ${current_tags}" - exit 1 + --filter="name=('${SUBNET_NAME}')" \ + --format='value(name)' \ + 2>"${gcloud_stderr}")" && fw_lookup_ok=true || fw_lookup_ok=false + +if ${fw_lookup_ok}; then + if [[ -n "${existing_fw_name}" ]]; then + current_fw_json="$(gcloud compute firewall-rules describe "${SUBNET_NAME}" \ + --project="${GCP_PROJECT_ID}" \ + --format=json \ + 2>"${gcloud_stderr}")" + + # Validate network, direction, disabled + actual_network="$(echo "${current_fw_json}" | jq -r '.network | split("/") | last')" + actual_direction="$(echo "${current_fw_json}" | jq -r '.direction')" + actual_disabled="$(echo "${current_fw_json}" | jq -r '.disabled')" + + if [[ "${actual_network}" != "${GCP_NETWORK_NAME}" ]] || \ + [[ "${actual_direction}" != "INGRESS" ]] || \ + [[ "${actual_disabled}" != "false" ]]; then + echo "ERROR: Firewall rule '${SUBNET_NAME}' exists but is misconfigured." + echo " Expected network=${GCP_NETWORK_NAME}, direction=INGRESS, disabled=false" + echo " Actual network=${actual_network}, direction=${actual_direction}, disabled=${actual_disabled}" + exit 1 + fi + + # Validate allowed (should be exactly [{IPProtocol: "all"}]) + actual_allowed="$(echo "${current_fw_json}" | jq -c '[.allowed[] | {protocol: .IPProtocol, ports: (.ports // [])}] | sort_by(.protocol)')" + expected_allowed='[{"protocol":"all","ports":[]}]' + if [[ "${actual_allowed}" != "${expected_allowed}" ]]; then + echo "ERROR: Firewall rule '${SUBNET_NAME}' has wrong allowed configuration." + echo " Expected: ${expected_allowed}" + echo " Actual: ${actual_allowed}" + exit 1 + fi + + # Validate sourceRanges (should be exactly the subnet CIDR) + actual_ranges="$(echo "${current_fw_json}" | jq -c '(.sourceRanges // []) | sort')" + expected_ranges="$(printf '["%s"]' "${SUBNET_CIDR}")" + if [[ "${actual_ranges}" != "${expected_ranges}" ]]; then + echo "ERROR: Firewall rule '${SUBNET_NAME}' has wrong source ranges." + echo " Expected: ${expected_ranges}" + echo " Actual: ${actual_ranges}" + exit 1 + fi + + # Validate targetTags (order-insensitive) + actual_tags="$(echo "${current_fw_json}" | jq -c '(.targetTags // []) | sort')" + expected_tags="$(printf '%s\n' ${FIREWALL_TAGS//,/ } | jq -R . | jq -sc 'sort')" + if [[ "${actual_tags}" != "${expected_tags}" ]]; then + echo "ERROR: Firewall rule '${SUBNET_NAME}' has wrong target tags." + echo " Expected: ${expected_tags}" + echo " Actual: ${actual_tags}" + exit 1 + fi + + echo "Firewall rule '${SUBNET_NAME}' already exists and matches expected configuration." + else + echo "Creating firewall rule '${SUBNET_NAME}'..." + gcloud compute firewall-rules create "${SUBNET_NAME}" \ + --network="${GCP_NETWORK_NAME}" \ + --project="${GCP_PROJECT_ID}" \ + --direction=INGRESS \ + --priority=1000 \ + --allow=all \ + --source-ranges="${SUBNET_CIDR}" \ + --target-tags="${FIREWALL_TAGS}" + echo "Firewall rule '${SUBNET_NAME}' created." fi - echo "Firewall rule '${SUBNET_NAME}' already exists and matches expected configuration." -elif grep -q "was not found" "${gcloud_stderr}"; then - echo "Creating firewall rule '${SUBNET_NAME}'..." - gcloud compute firewall-rules create "${SUBNET_NAME}" \ - --network="${GCP_NETWORK_NAME}" \ - --project="${GCP_PROJECT_ID}" \ - --direction=INGRESS \ - --priority=1000 \ - --allow=all \ - --source-ranges="${SUBNET_CIDR}" \ - --target-tags="${FIREWALL_TAGS}" - echo "Firewall rule '${SUBNET_NAME}' created." else - echo "ERROR: gcloud describe failed for firewall rule '${SUBNET_NAME}':" + echo "ERROR: gcloud firewall-rules lookup failed for '${SUBNET_NAME}':" cat "${gcloud_stderr}" >&2 exit 1 fi