diff --git a/.github/actionlint.yaml b/.github/actionlint.yaml new file mode 100644 index 0000000000..c2ec8d2d86 --- /dev/null +++ b/.github/actionlint.yaml @@ -0,0 +1,4 @@ +self-hosted-runner: + labels: + - large +config-variables: null diff --git a/.github/actions/append-encrypted-artifacts-help/README.md b/.github/actions/append-encrypted-artifacts-help/README.md new file mode 100644 index 0000000000..46ff813227 --- /dev/null +++ b/.github/actions/append-encrypted-artifacts-help/README.md @@ -0,0 +1,3 @@ +# append-encrypted-artifacts-help + +Appends `.github/scripts/templates/encrypted-artifacts-help.md` to `GITHUB_STEP_SUMMARY`. diff --git a/.github/actions/append-encrypted-artifacts-help/action.yml b/.github/actions/append-encrypted-artifacts-help/action.yml new file mode 100644 index 0000000000..4a4040c48f --- /dev/null +++ b/.github/actions/append-encrypted-artifacts-help/action.yml @@ -0,0 +1,15 @@ +name: Append encrypted artifacts help +description: Append encrypted artifact decryption instructions to the job summary. +inputs: + template: + description: Help template path. + required: false + default: .github/scripts/templates/encrypted-artifacts-help.md +runs: + using: composite + steps: + - name: Append encrypted artifacts help + shell: bash + env: + TEMPLATE: ${{ inputs.template }} + run: cat "$TEMPLATE" >> "$GITHUB_STEP_SUMMARY" diff --git a/.github/actions/gen-run-id/README.md b/.github/actions/gen-run-id/README.md new file mode 100644 index 0000000000..027e99d795 --- /dev/null +++ b/.github/actions/gen-run-id/README.md @@ -0,0 +1,3 @@ +# gen-run-id + +Generates the `date_start` and `randuuid4c` outputs used to name E2E namespaces and artifacts. diff --git a/.github/actions/gen-run-id/action.yml b/.github/actions/gen-run-id/action.yml new file mode 100644 index 0000000000..76a706b91b --- /dev/null +++ b/.github/actions/gen-run-id/action.yml @@ -0,0 +1,18 @@ +name: Generate E2E run id +description: Generate timestamp and short random suffix for E2E workflow runs. +outputs: + date_start: + description: Timestamp in %Y%m%d-%H%M%S format. + value: ${{ steps.vars.outputs.date-start }} + randuuid4c: + description: Four random hexadecimal characters. + value: ${{ steps.vars.outputs.randuuid4c }} +runs: + using: composite + steps: + - name: Generate run id + id: vars + shell: bash + run: | + echo "date-start=$(date +%Y%m%d-%H%M%S)" >> "$GITHUB_OUTPUT" + echo "randuuid4c=$(openssl rand -hex 2)" >> "$GITHUB_OUTPUT" diff --git a/.github/actions/gpg-encrypt-and-upload/README.md b/.github/actions/gpg-encrypt-and-upload/README.md new file mode 100644 index 0000000000..fa7bd8ec96 --- /dev/null +++ b/.github/actions/gpg-encrypt-and-upload/README.md @@ -0,0 +1,5 @@ +# gpg-encrypt-and-upload + +Encrypts artifacts with GPG symmetric AES256 encryption and uploads the resulting `.gpg` file. + +Set `archive: "true"` for directory or multi-path inputs that should be zipped before encryption. Set `archive: "false"` for a single file such as a kubeconfig. diff --git a/.github/actions/gpg-encrypt-and-upload/action.yml b/.github/actions/gpg-encrypt-and-upload/action.yml new file mode 100644 index 0000000000..0647c82648 --- /dev/null +++ b/.github/actions/gpg-encrypt-and-upload/action.yml @@ -0,0 +1,81 @@ +name: GPG encrypt and upload +description: Encrypt a file or archived paths with GPG and upload the encrypted artifact. +inputs: + path: + description: File path to encrypt, or paths to zip when archive is true. + required: true + passphrase: + description: GPG symmetric encryption passphrase. + required: true + artifact_name: + description: Base artifact name without .gpg suffix. + required: true + working-directory: + description: Directory used for archive path resolution. + required: false + default: "." + archive: + description: Zip the provided paths before encryption. + required: false + default: "true" + retention-days: + description: Artifact retention in days. + required: false + default: "3" + overwrite: + description: Whether to overwrite an existing artifact. + required: false + default: "true" + include-hidden-files: + description: Whether upload-artifact includes hidden files. + required: false + default: "true" +runs: + using: composite + steps: + - name: Encrypt artifact + id: encrypt + shell: bash + env: + ARTIFACT_NAME: ${{ inputs.artifact_name }} + ARCHIVE: ${{ inputs.archive }} + GPG_PASSPHRASE: ${{ inputs.passphrase }} + INPUT_PATH: ${{ inputs.path }} + WORKING_DIRECTORY: ${{ inputs.working-directory }} + run: | + if [ "$ARCHIVE" = "true" ]; then + pushd "$WORKING_DIRECTORY" + # INPUT_PATH intentionally supports a whitespace-separated path list. + zip -r "$RUNNER_TEMP/${ARTIFACT_NAME}.zip" $INPUT_PATH + popd + input_file="$RUNNER_TEMP/${ARTIFACT_NAME}.zip" + encrypted_file="$RUNNER_TEMP/${ARTIFACT_NAME}.zip.gpg" + upload_name="${ARTIFACT_NAME}.zip.gpg" + else + input_file="$INPUT_PATH" + encrypted_file="$RUNNER_TEMP/${ARTIFACT_NAME}.gpg" + upload_name="${ARTIFACT_NAME}.gpg" + fi + + gpg --symmetric --batch --yes --pinentry-mode loopback \ + --passphrase "$GPG_PASSPHRASE" \ + --cipher-algo AES256 \ + --output "$encrypted_file" \ + "$input_file" + + if [ "$ARCHIVE" = "true" ]; then + rm -f "$input_file" + fi + + echo "encrypted_path=$encrypted_file" >> "$GITHUB_OUTPUT" + echo "upload_name=$upload_name" >> "$GITHUB_OUTPUT" + + - name: Upload encrypted artifact + uses: actions/upload-artifact@v7 + with: + name: ${{ steps.encrypt.outputs.upload_name }} + path: ${{ steps.encrypt.outputs.encrypted_path }} + overwrite: ${{ inputs.overwrite }} + include-hidden-files: ${{ inputs.include-hidden-files }} + retention-days: ${{ inputs.retention-days }} + archive: false diff --git a/.github/actions/install-d8/action.yml b/.github/actions/install-d8/action.yml index 79c56e8f8a..31c7d504a9 100644 --- a/.github/actions/install-d8/action.yml +++ b/.github/actions/install-d8/action.yml @@ -2,14 +2,49 @@ name: Install deckhouse-cli description: Install deckhouse-cli inputs: version: - description: version of deckhouse-cli (like v0.25.1) + description: version of deckhouse-cli (like v0.29.24) required: false default: v0.29.24 + max-attempts: + description: maximum number of install attempts on transient failures + required: false + default: "5" + retry-delay: + description: base delay between retries in seconds + required: false + default: "10" runs: using: "composite" steps: - name: Install deckhouse-cli shell: bash + env: + D8_VERSION: ${{ inputs.version }} + MAX_ATTEMPTS: ${{ inputs.max-attempts }} + RETRY_DELAY: ${{ inputs.retry-delay }} run: | - sh -c "$(curl -fsSL https://raw.githubusercontent.com/deckhouse/deckhouse-cli/main/tools/install.sh)" "" --version ${{ inputs.version }} - echo "/opt/deckhouse/bin" >> $GITHUB_PATH + # GitHub downloads can intermittently fail with 5xx errors or hang + # both when fetching install.sh and when install.sh itself downloads + # the d8 binary. The outer loop retries the whole install (covering + # both downloads), while the curl timeouts turn a hung connection into + # a fast failure that the loop can retry. + set -o pipefail + + for ((attempt = 1; attempt <= MAX_ATTEMPTS; attempt++)); do + if curl -fsSL \ + --connect-timeout 10 --max-time 60 \ + https://raw.githubusercontent.com/deckhouse/deckhouse-cli/main/tools/install.sh \ + | sh -s -- --version "${D8_VERSION}"; then + echo "deckhouse-cli installed successfully on attempt ${attempt}" + echo "/opt/deckhouse/bin" >> "${GITHUB_PATH}" + exit 0 + fi + + if [ "${attempt}" -lt "${MAX_ATTEMPTS}" ]; then + echo "::warning::Install attempt ${attempt}/${MAX_ATTEMPTS} failed, retrying in ${RETRY_DELAY}s..." + sleep "${RETRY_DELAY}" + fi + done + + echo "::error::Failed to install deckhouse-cli after ${MAX_ATTEMPTS} attempts" + exit 1 diff --git a/.github/actions/registry-login/README.md b/.github/actions/registry-login/README.md new file mode 100644 index 0000000000..6e6d7da7a0 --- /dev/null +++ b/.github/actions/registry-login/README.md @@ -0,0 +1,5 @@ +# registry-login + +Parses a base64-encoded dockerconfigjson secret, logs in to its registry, and exposes the parsed registry host as the `registry` output. + +The action keeps the existing E2E workflow parsing approach based on `base64 -d`, `jq`, a second `base64 -d`, and `cut`. diff --git a/.github/actions/registry-login/action.yml b/.github/actions/registry-login/action.yml new file mode 100644 index 0000000000..81f0e72b1f --- /dev/null +++ b/.github/actions/registry-login/action.yml @@ -0,0 +1,28 @@ +name: Registry login +description: Parse a dockerconfigjson secret, log in to the registry, and expose the registry host. +inputs: + docker_cfg: + description: Base64-encoded dockerconfigjson content. + required: true +outputs: + registry: + description: Registry host parsed from the dockerconfigjson. + value: ${{ steps.login.outputs.registry }} +runs: + using: composite + steps: + - name: Log in to private registry + id: login + shell: bash + env: + DOCKER_CFG: ${{ inputs.docker_cfg }} + run: | + REGISTRY=$(base64 -d <<< "$DOCKER_CFG" | jq '.auths | to_entries | .[] | .key' -r) + USERNAME=$(base64 -d <<< "$DOCKER_CFG" | jq -r '.auths | to_entries | .[] | .value.auth' | base64 -d | cut -d ':' -f1) + PASSWORD=$(base64 -d <<< "$DOCKER_CFG" | jq -r '.auths | to_entries | .[] | .value.auth' | base64 -d | cut -d ':' -f2-) + + echo "::add-mask::$USERNAME" + echo "::add-mask::$PASSWORD" + echo "$PASSWORD" | docker login "$REGISTRY" --username "$USERNAME" --password-stdin + + echo "registry=$REGISTRY" >> "$GITHUB_OUTPUT" diff --git a/.github/actions/setup-e2e-toolchain/README.md b/.github/actions/setup-e2e-toolchain/README.md new file mode 100644 index 0000000000..e323d66812 --- /dev/null +++ b/.github/actions/setup-e2e-toolchain/README.md @@ -0,0 +1,6 @@ +# setup-e2e-toolchain + +Installs the common E2E workflow toolchain: checkout, Task, deckhouse-cli (`d8`), and kubectl. + +Use `checkout: "false"` for jobs that already checked out the repository before calling this action. +Set `install-htpasswd: "true"` for jobs that need the `htpasswd` utility from `apache2-utils`. diff --git a/.github/actions/setup-e2e-toolchain/action.yml b/.github/actions/setup-e2e-toolchain/action.yml new file mode 100644 index 0000000000..1fb7ac9b91 --- /dev/null +++ b/.github/actions/setup-e2e-toolchain/action.yml @@ -0,0 +1,67 @@ +name: Setup E2E toolchain +description: Checkout repository and install common E2E CLI tools. +inputs: + checkout: + description: Run actions/checkout before installing tools. + required: false + default: "true" + task-version: + description: go-task version to install. + required: false + default: 3.x + d8-version: + description: deckhouse-cli version to install. + required: false + default: v0.29.24 + install-kubectl: + description: Install kubectl via azure/setup-kubectl. + required: false + default: "true" + install-htpasswd: + description: Install htpasswd via apache2-utils. + required: false + default: "false" + github-token: + description: GitHub token passed to go-task/setup-task. + required: false + default: "" +runs: + using: composite + steps: + - name: Checkout + if: inputs.checkout == 'true' + uses: actions/checkout@v6 + + - name: Install Task + uses: go-task/setup-task@v2 + with: + version: ${{ inputs.task-version }} + repo-token: ${{ inputs.github-token }} + + - name: Restore d8 cache + id: d8-cache + uses: actions/cache@v4 + with: + path: /opt/deckhouse/bin/d8 + key: d8-${{ inputs.d8-version }}-${{ runner.os }} + + - name: Setup d8 + if: steps.d8-cache.outputs.cache-hit != 'true' + uses: ./.github/actions/install-d8 + with: + version: ${{ inputs.d8-version }} + + - name: Add d8 to PATH + shell: bash + run: echo "/opt/deckhouse/bin" >> "$GITHUB_PATH" + + - name: Install kubectl CLI + if: inputs.install-kubectl == 'true' + uses: azure/setup-kubectl@v4 + + - name: Install htpasswd utility + if: inputs.install-htpasswd == 'true' + shell: bash + run: | + sudo apt-get update + sudo apt-get install -y apache2-utils diff --git a/.github/actions/use-nested-kubeconfig/README.md b/.github/actions/use-nested-kubeconfig/README.md new file mode 100644 index 0000000000..408e89e26c --- /dev/null +++ b/.github/actions/use-nested-kubeconfig/README.md @@ -0,0 +1,3 @@ +# use-nested-kubeconfig + +Decodes the nested-cluster kubeconfig used by E2E workflows into `~/.kube/config`, fixes permissions, selects the requested context, and can wait for `kubectl get nodes` to succeed. diff --git a/.github/actions/use-nested-kubeconfig/action.yml b/.github/actions/use-nested-kubeconfig/action.yml new file mode 100644 index 0000000000..0447f59f6f --- /dev/null +++ b/.github/actions/use-nested-kubeconfig/action.yml @@ -0,0 +1,58 @@ +name: Use nested kubeconfig +description: Decode a nested-cluster kubeconfig and optionally wait for the API. +inputs: + kubeconfig: + description: Double-base64 encoded kubeconfig. + required: true + context: + description: Context to select after writing kubeconfig. + required: false + default: nested-e2e-nested-sa + check-api: + description: Run kubectl get nodes with retries. + required: false + default: "true" + attempts: + description: Number of kubectl get nodes attempts. + required: false + default: "30" + delay-seconds: + description: Delay between API check attempts. + required: false + default: "10" +runs: + using: composite + steps: + - name: Configure nested kubeconfig + shell: bash + env: + KUBECONFIG_B64: ${{ inputs.kubeconfig }} + KUBECONTEXT: ${{ inputs.context }} + CHECK_API: ${{ inputs.check-api }} + ATTEMPTS: ${{ inputs.attempts }} + DELAY_SECONDS: ${{ inputs.delay-seconds }} + run: | + mkdir -p ~/.kube + echo "[INFO] Configure kubeconfig for nested cluster" + printf '%s' "$KUBECONFIG_B64" | base64 -d | base64 -d > ~/.kube/config + chmod 600 ~/.kube/config + + if [ -n "$KUBECONTEXT" ]; then + kubectl config use-context "$KUBECONTEXT" + fi + + if [ "$CHECK_API" != "true" ]; then + exit 0 + fi + + for i in $(seq 1 "$ATTEMPTS"); do + echo "[INFO] Check nested kube-api availability ${i}/${ATTEMPTS}" + if kubectl get nodes; then + echo "[SUCCESS] Nested kube-api is available" + exit 0 + fi + sleep "$DELAY_SECONDS" + done + + echo "[ERROR] Nested kube-api is not available" + exit 1 diff --git a/.github/scripts/bash/e2e/cleanup-nightly-resources.sh b/.github/scripts/bash/e2e/cleanup-nightly-resources.sh new file mode 100644 index 0000000000..60ba28546b --- /dev/null +++ b/.github/scripts/bash/e2e/cleanup-nightly-resources.sh @@ -0,0 +1,85 @@ +#!/usr/bin/env bash + +# Copyright 2026 Flant JSC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -Eeuo pipefail + +SCRIPT_DIR="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" && pwd)" +# shellcheck source=.github/scripts/bash/e2e/common.sh +source "${SCRIPT_DIR}/common.sh" + +LABEL_SELECTOR="${LABEL_SELECTOR:-test=nightly-e2e}" +KEEP_HOURS="${KEEP_HOURS:-47}" +FRIDAY_KEEP_HOURS="${FRIDAY_KEEP_HOURS:-71}" + +current_date_seconds="$(date -u +%s)" + +collect_items_json() { + local resource="$1" + + kubectl get "${resource}" -l "${LABEL_SELECTOR}" -o json \ + | jq -c '.items[] | {name: .metadata.name, created_at: .metadata.creationTimestamp}' +} + +should_keep() { + local created_at="$1" + local resource_created_at_seconds + local age_seconds + local weekday_of_day + + resource_created_at_seconds="$(date -d "${created_at}" -u +%s)" + age_seconds="$(( current_date_seconds - resource_created_at_seconds ))" + weekday_of_day="$(date -d "${created_at}" -u +%u)" + + if [ "${age_seconds}" -lt "$(( KEEP_HOURS * 3600 ))" ]; then + echo "keep" + return 0 + fi + + if [ "${weekday_of_day}" -eq 5 ] && [ "${age_seconds}" -lt "$(( FRIDAY_KEEP_HOURS * 3600 ))" ]; then + echo "keep" + return 0 + fi + + echo "delete" +} + +cleanup_kind() { + local kind="$1" + local item + local name + local created_at + local decision + + echo "[INFO] Process ${kind} with label ${LABEL_SELECTOR}" + collect_items_json "${kind}" | while read -r item; do + name="$(echo "${item}" | jq -r '.name')" + created_at="$(echo "${item}" | jq -r '.created_at')" + [ -z "${name}" ] && continue + + decision="$(should_keep "${created_at}")" + if [ "${decision}" = "keep" ]; then + printf "%-63s %22s\n" "[INFO] Keep ${kind}/${name}:" "created_at ${created_at}" + continue + fi + + printf "%-63s %22s\n" "[INFO] Delete ${kind}/${name}:" "created_at ${created_at}" + kubectl delete "${kind}" "${name}" --timeout=300s || true + done || true +} + +cleanup_kind "namespaces" +echo " " +cleanup_kind "vmclass" diff --git a/.github/scripts/bash/e2e/collect-cloud-init-logs.sh b/.github/scripts/bash/e2e/collect-cloud-init-logs.sh new file mode 100644 index 0000000000..da9f70043f --- /dev/null +++ b/.github/scripts/bash/e2e/collect-cloud-init-logs.sh @@ -0,0 +1,38 @@ +#!/usr/bin/env bash + +# Copyright 2026 Flant JSC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -Eeuo pipefail + +SCRIPT_DIR="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" && pwd)" +# shellcheck source=.github/scripts/bash/e2e/common.sh +source "${SCRIPT_DIR}/common.sh" +# shellcheck source=.github/scripts/bash/e2e/d8-ssh.sh +source "${SCRIPT_DIR}/d8-ssh.sh" + +if [ "$#" -ne 4 ]; then + echo "[ERROR] Usage: $0 " >&2 + exit 1 +fi + +NAMESPACE="$1" +PREFIX="$2" +DEFAULT_USER="$3" +setup_cluster_type_path="$4" +export NAMESPACE DEFAULT_USER + +nested_master=$(kubectl -n "${NAMESPACE}" get vm -l "group=${PREFIX}-master" -o jsonpath="{.items[0].metadata.name}") + +d8vscp "${DEFAULT_USER}@${nested_master}.$NAMESPACE:/var/log/cloud-init*.log" "./${setup_cluster_type_path}/tmp/" diff --git a/.github/scripts/bash/e2e/common.sh b/.github/scripts/bash/e2e/common.sh index 24899f23e8..9fbfc0186a 100644 --- a/.github/scripts/bash/e2e/common.sh +++ b/.github/scripts/bash/e2e/common.sh @@ -1,5 +1,7 @@ #!/usr/bin/env bash +set -Eeuo pipefail + on_error() { local exit_code=$? echo "[ERROR] Command failed with exit code ${exit_code} at line ${BASH_LINENO[0]}: ${BASH_COMMAND}" >&2 diff --git a/.github/scripts/bash/e2e/configure-csi-nfs.sh b/.github/scripts/bash/e2e/configure-csi-nfs.sh new file mode 100644 index 0000000000..7ebce81d2f --- /dev/null +++ b/.github/scripts/bash/e2e/configure-csi-nfs.sh @@ -0,0 +1,83 @@ +#!/usr/bin/env bash + +# Copyright 2026 Flant JSC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -Eeuo pipefail + +SCRIPT_DIR="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" && pwd)" +# shellcheck source=.github/scripts/bash/e2e/common.sh +source "${SCRIPT_DIR}/common.sh" + +nfs_ready() { + local count=90 + local controller + local csi_controller + local csi_node_desired + local csi_node_ready + + for i in $(seq 1 "${count}"); do + echo "[INFO] Check d8-csi-nfs pods (attempt ${i}/${count})" + controller="$(kubectl -n d8-csi-nfs get deploy controller -o jsonpath='{.status.readyReplicas}' 2>/dev/null || echo "0")" + csi_controller="$(kubectl -n d8-csi-nfs get deploy csi-controller -o jsonpath='{.status.readyReplicas}' 2>/dev/null || echo "0")" + csi_node_desired="$(kubectl -n d8-csi-nfs get ds csi-node -o jsonpath='{.status.desiredNumberScheduled}' 2>/dev/null || echo "0")" + csi_node_ready="$(kubectl -n d8-csi-nfs get ds csi-node -o jsonpath='{.status.numberReady}' 2>/dev/null || echo "0")" + + if [[ "${controller}" -ge 1 && "${csi_controller}" -ge 1 && "${csi_node_desired}" -gt 0 && "${csi_node_ready}" -eq "${csi_node_desired}" ]]; then + echo "[SUCCESS] NFS CSI is ready (controller=${controller}, csi-controller=${csi_controller}, csi-node=${csi_node_ready}/${csi_node_desired})" + return 0 + fi + + echo "[WARNING] NFS CSI not ready: controller=${controller}, csi-controller=${csi_controller}, csi-node=${csi_node_ready}/${csi_node_desired}" + if (( i % 5 == 0 )); then + echo "[DEBUG] Pods in d8-csi-nfs:" + kubectl -n d8-csi-nfs get pods || echo "[WARNING] Failed to retrieve pods" + echo "[DEBUG] Deployments in d8-csi-nfs:" + kubectl -n d8-csi-nfs get deploy || echo "[WARNING] Failed to retrieve deployments" + echo "[DEBUG] DaemonSets in d8-csi-nfs:" + kubectl -n d8-csi-nfs get ds || echo "[WARNING] Failed to retrieve daemonsets" + echo "[DEBUG] csi-nfs module status:" + kubectl get modules csi-nfs -o wide || echo "[WARNING] Failed to retrieve module" + fi + sleep 10 + done + + echo "[ERROR] NFS CSI did not become ready in time" + kubectl -n d8-csi-nfs get pods || true + exit 1 +} + +echo "[INFO] Apply csi-nfs ModuleConfig, ModulePullOverride, snapshot-controller" +kubectl apply -f mc.yaml + +echo "[INFO] Wait for csi-nfs module to be ready" +kubectl wait --for=jsonpath='{.status.phase}'=Ready modules csi-nfs --timeout=300s + +echo "[INFO] Wait for csi-nfs pods to be ready" +nfs_ready + +echo "[INFO] Apply NFSStorageClass" +envsubst < storageclass.yaml | kubectl apply -f - + +configure_default_sc="${CONFIGURE_DEFAULT_SC:-true}" +if [[ "${configure_default_sc}" == "true" ]]; then + echo "[INFO] Configure default storage class" + # The workflow runs this script from test/dvp-static-cluster/storage/nfs. + ./default-sc-configure.sh +else + echo "[INFO] Skip default storage class configuration" +fi + +echo "[INFO] Show existing storageclasses" +kubectl get storageclass diff --git a/.github/scripts/bash/e2e/configure-sds-local-volume.sh b/.github/scripts/bash/e2e/configure-sds-local-volume.sh new file mode 100644 index 0000000000..da54a5b34c --- /dev/null +++ b/.github/scripts/bash/e2e/configure-sds-local-volume.sh @@ -0,0 +1,86 @@ +#!/usr/bin/env bash + +# Copyright 2026 Flant JSC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -Eeuo pipefail + +SCRIPT_DIR="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" && pwd)" +# shellcheck source=.github/scripts/bash/e2e/common.sh +source "${SCRIPT_DIR}/common.sh" +# shellcheck source=.github/scripts/bash/e2e/deckhouse.sh +source "${SCRIPT_DIR}/deckhouse.sh" + +sds_local_volume_ready() { + local count=90 + local local_volume_status + local csi_node_desired + local csi_node_ready + local deploy_count + local controller_ready + + for i in $(seq 1 "${count}"); do + local_volume_status="$(kubectl get modules sds-local-volume -o jsonpath='{.status.phase}' 2>/dev/null || echo "False")" + csi_node_desired="$(kubectl -n d8-sds-local-volume get ds csi-node -o jsonpath='{.status.desiredNumberScheduled}' 2>/dev/null || echo "0")" + csi_node_ready="$(kubectl -n d8-sds-local-volume get ds csi-node -o jsonpath='{.status.numberReady}' 2>/dev/null || echo "0")" + deploy_count="$(kubectl -n d8-sds-local-volume get deploy -o name 2>/dev/null | wc -l | tr -d ' ')" + controller_ready=false + + if [[ "${deploy_count}" -gt 0 ]] && kubectl -n d8-sds-local-volume wait --for=condition=Available deploy --all --timeout=10s >/dev/null 2>&1; then + controller_ready=true + fi + + if [[ "${local_volume_status}" == "Ready" && "${csi_node_desired}" -gt 0 && "${csi_node_ready}" -eq "${csi_node_desired}" && "${controller_ready}" == "true" ]]; then + echo "[SUCCESS] sds-local-volume is ready (module=${local_volume_status}, csi-node=${csi_node_ready}/${csi_node_desired}, deployments=${deploy_count})" + kubectl get modules sds-local-volume + kubectl -n d8-sds-local-volume get pods + return 0 + fi + + echo "[INFO] Waiting for sds-local-volume to be ready (attempt ${i}/${count})" + echo "[WARNING] Current state: module=${local_volume_status}, csi-node=${csi_node_ready}/${csi_node_desired}, deployments=${deploy_count}, controller_ready=${controller_ready}" + if (( i % 5 == 0 )); then + kubectl get ns d8-sds-local-volume || true + kubectl get modules sds-local-volume -o wide || true + kubectl -n d8-sds-local-volume get pods || true + kubectl -n d8-sds-local-volume get ds || true + kubectl -n d8-sds-local-volume get deploy || true + d8 s queue list | head -n 25 || true + fi + sleep 10 + done + + echo "[ERROR] sds-local-volume did not become ready in time" + kubectl get modules sds-local-volume -o wide || true + kubectl -n d8-sds-local-volume get pods || true + d8 s queue list || true + echo "::group::deckhouse logs" + d8 s logs | tail -n 100 + echo "::endgroup::" + exit 1 +} + +echo "[INFO] Apply sds-local-volume ModuleConfig" +kubectl apply -f mc.yaml + +echo "[INFO] Wait for sds-local-volume module queue" +d8_queue +kubectl wait --for=jsonpath='{.status.phase}'=Ready modules sds-local-volume --timeout=300s +sds_local_volume_ready + +chmod +x ./lsc-gen.sh +./lsc-gen.sh + +echo "[INFO] Show resulting local storage classes" +kubectl get localstorageclass || true diff --git a/.github/scripts/bash/e2e/configure-sds-replicated.sh b/.github/scripts/bash/e2e/configure-sds-replicated.sh new file mode 100644 index 0000000000..deec4479fc --- /dev/null +++ b/.github/scripts/bash/e2e/configure-sds-replicated.sh @@ -0,0 +1,53 @@ +#!/usr/bin/env bash + +# Copyright 2026 Flant JSC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -Eeuo pipefail + +SCRIPT_DIR="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" && pwd)" +# shellcheck source=.github/scripts/bash/e2e/wait-sds-replicated.sh +source "${SCRIPT_DIR}/wait-sds-replicated.sh" + +d8_queue + +kubectl apply -f ../sds-node-configurator/mc.yaml +kubectl apply -f mc.yaml + +echo "[INFO] Wait for sds-node-configurator" +kubectl wait --for=jsonpath='{.status.phase}'=Ready modules sds-node-configurator --timeout=300s + +echo "[INFO] Wait for sds-replicated-volume to be ready" +sds_replicated_ready +kubectl wait --for=jsonpath='{.status.phase}'=Ready modules sds-replicated-volume --timeout=300s + +echo "[INFO] Wait BlockDevice are ready" +blockdevices_ready + +echo "[INFO] Wait pods and webhooks sds-replicated pods" +sds_pods_ready + +chmod +x ../sds-node-configurator/lvg-gen.sh +../sds-node-configurator/lvg-gen.sh + +chmod +x rsc-gen.sh +./rsc-gen.sh + +echo "[INFO] Show existing storageclasses" +if ! kubectl get storageclass | grep -q nested; then + echo "[WARNING] No nested storageclasses" +else + kubectl get storageclass | grep nested + echo "[SUCCESS] Done" +fi diff --git a/.github/scripts/bash/e2e/configure-virtualization-release.sh b/.github/scripts/bash/e2e/configure-virtualization-release.sh new file mode 100644 index 0000000000..2ca1b39cef --- /dev/null +++ b/.github/scripts/bash/e2e/configure-virtualization-release.sh @@ -0,0 +1,90 @@ +#!/usr/bin/env bash + +# Copyright 2026 Flant JSC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -Eeuo pipefail + +SCRIPT_DIR="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" && pwd)" +# shellcheck source=.github/scripts/bash/e2e/common.sh +source "${SCRIPT_DIR}/common.sh" + +require_env DEV_REGISTRY_DOCKER_CFG +require_env CURRENT_RELEASE + +required_env_value() { + local name="$1" + + require_env "${name}" + printf '%s' "${!name}" +} + +dev_registry_docker_cfg="$(required_env_value DEV_REGISTRY_DOCKER_CFG)" +current_release="$(required_env_value CURRENT_RELEASE)" + +REGISTRY="$(base64 -d <<< "${dev_registry_docker_cfg}" | jq '.auths | to_entries | .[] | .key' -r)" + +echo "[INFO] Apply ModuleSource prod config" +kubectl apply -f - </dev/null; then + kubectl get vmclass/generic -o json | jq 'del(.status) | del(.metadata) | .metadata = {"name":"generic-for-e2e","annotations":{"virtualmachineclass.virtualization.deckhouse.io/is-default-class":"true"}} ' | kubectl create -f - +fi + +echo "[INFO] Showing existing vmclasses" +kubectl get vmclass diff --git a/.github/scripts/bash/e2e/d8-ssh.sh b/.github/scripts/bash/e2e/d8-ssh.sh new file mode 100644 index 0000000000..6480b455d7 --- /dev/null +++ b/.github/scripts/bash/e2e/d8-ssh.sh @@ -0,0 +1,69 @@ +#!/usr/bin/env bash + +# Copyright 2026 Flant JSC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -Eeuo pipefail + +SCRIPT_DIR="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" && pwd)" +# shellcheck source=.github/scripts/bash/e2e/common.sh +source "${SCRIPT_DIR}/common.sh" + +d8vssh() { + require_env DEFAULT_USER + require_env NAMESPACE + + local default_user="${DEFAULT_USER:-}" + local namespace="${NAMESPACE:-}" + local host + local cmd + + case "$#" in + 1) + require_env nested_master + host="${nested_master:-}" + cmd="$1" + ;; + 2) + host="$1" + cmd="$2" + ;; + *) + echo "[ERROR] Usage: d8vssh [host] command" >&2 + return 1 + ;; + esac + + d8 v ssh -i ./tmp/ssh/cloud \ + --local-ssh=true \ + --local-ssh-opts="-o StrictHostKeyChecking=no" \ + --local-ssh-opts="-o UserKnownHostsFile=/dev/null" \ + --local-ssh-opts="-o ServerAliveInterval=15" \ + --local-ssh-opts="-o ServerAliveCountMax=8" \ + --local-ssh-opts="-o ConnectTimeout=10" \ + "${default_user}@${host}.${namespace}" \ + -c "$cmd" +} + +d8vscp() { + local source=$1 + local dest=$2 + + d8 v scp -i ./tmp/ssh/cloud \ + --local-ssh=true \ + --local-ssh-opts="-o StrictHostKeyChecking=no" \ + --local-ssh-opts="-o UserKnownHostsFile=/dev/null" \ + "$source" "$dest" + echo "d8vscp: ${source} -> ${dest} - done" +} diff --git a/.github/scripts/bash/e2e/deckhouse.sh b/.github/scripts/bash/e2e/deckhouse.sh index f5534c17c5..bafc0d657c 100644 --- a/.github/scripts/bash/e2e/deckhouse.sh +++ b/.github/scripts/bash/e2e/deckhouse.sh @@ -7,6 +7,46 @@ show_deckhouse_state() { d8 s queue list | head -n25 || true } +d8_queue_list() { + d8 s queue list | grep -Po '([0-9]+)(?= active)' || echo "[WARNING] Failed to retrieve list queue" +} + +d8_queue() { + local count=90 + local delay=10 + local queue_count + + for i in $(seq 1 "$count"); do + queue_count="$(d8_queue_list)" + if [ -n "$queue_count" ] && [ "$queue_count" = "0" ]; then + echo "[SUCCESS] Queue is clear" + return 0 + fi + + echo "[INFO] Wait until queues are empty ${i}/${count}" + if (( i % 5 == 0 )); then + echo "[INFO] Show queue list" + d8 s queue list | head -n25 || echo "[WARNING] Failed to retrieve list queue" + echo " " + fi + + if (( i % 10 == 0 )); then + echo "[INFO] deckhouse logs" + echo "::group::deckhouse logs" + d8 s logs | tail -n 100 + echo "::endgroup::" + echo " " + fi + + if [ "$i" -lt "$count" ]; then + sleep "$delay" + fi + done + + echo "[ERROR] Deckhouse queue is not clear after ${count} attempts" + return 1 +} + wait_for_deckhouse_queue() { local count=60 local delay=10 diff --git a/.github/scripts/bash/e2e/detect-k8s-version.sh b/.github/scripts/bash/e2e/detect-k8s-version.sh new file mode 100644 index 0000000000..9200c7ddd2 --- /dev/null +++ b/.github/scripts/bash/e2e/detect-k8s-version.sh @@ -0,0 +1,56 @@ +#!/usr/bin/env bash + +# Copyright 2026 Flant JSC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -Eeuo pipefail + +SCRIPT_DIR="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" && pwd)" +# shellcheck source=.github/scripts/bash/e2e/common.sh +source "${SCRIPT_DIR}/common.sh" + +if [ "$#" -ne 1 ]; then + echo "[ERROR] Usage: $0 " >&2 + exit 1 +fi + +github_output="$1" + +version_json="$(kubectl version -o json)" +server_version="$(echo "${version_json}" | jq -r '.serverVersion.gitVersion')" +server_major="$(echo "${version_json}" | jq -r '.serverVersion.major' | tr -cd '0-9')" +server_minor="$(echo "${version_json}" | jq -r '.serverVersion.minor' | tr -cd '0-9')" + +if [[ -z "${server_major}" || -z "${server_minor}" ]]; then + echo "[ERROR] Failed to parse Kubernetes server version: ${server_version}" + exit 1 +fi + +label_filter="" +usb_supported=false + +if (( server_major > 1 || (server_major == 1 && server_minor >= 34) )); then + usb_supported=true + echo "[INFO] Kubernetes server version ${server_version} supports USB E2E tests" +else + label_filter="!usb-precheck" + echo "[INFO] Kubernetes server version ${server_version} does not support USB E2E tests" + echo "[INFO] USB-labeled specs will be excluded with label filter: ${label_filter}" +fi + +{ + echo "server-version=${server_version}" + echo "usb-supported=${usb_supported}" + echo "label-filter=${label_filter}" +} >> "${github_output}" diff --git a/.github/scripts/bash/e2e/gen-nested-kubeconfig.sh b/.github/scripts/bash/e2e/gen-nested-kubeconfig.sh new file mode 100644 index 0000000000..9fbf5908a8 --- /dev/null +++ b/.github/scripts/bash/e2e/gen-nested-kubeconfig.sh @@ -0,0 +1,69 @@ +#!/usr/bin/env bash + +# Copyright 2026 Flant JSC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -Eeuo pipefail + +SCRIPT_DIR="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" && pwd)" +# shellcheck source=.github/scripts/bash/e2e/common.sh +source "${SCRIPT_DIR}/common.sh" +# shellcheck source=.github/scripts/bash/e2e/d8-ssh.sh +source "${SCRIPT_DIR}/d8-ssh.sh" + +if [ "$#" -ne 5 ]; then + echo "[ERROR] Usage: $0 " >&2 + exit 1 +fi + +kubeconfig_path="$1" +NAMESPACE="$2" +PREFIX="$3" +DEFAULT_USER="$4" +github_output="$5" +export NAMESPACE DEFAULT_USER + +nested_master=$(kubectl -n "${NAMESPACE}" get vm -l "group=${PREFIX}-master" -o jsonpath="{.items[0].metadata.name}") + +echo "[INFO] Copy script for generating kubeconfig in nested cluster" +echo "[INFO] Copy scripts/gen-kubeconfig.sh to master" +d8vscp "./scripts/gen-kubeconfig.sh" "${DEFAULT_USER}@${nested_master}.${NAMESPACE}:/tmp/gen-kubeconfig.sh" +echo "" +d8vscp "./scripts/deckhouse-queue.sh" "${DEFAULT_USER}@${nested_master}.${NAMESPACE}:/tmp/deckhouse-queue.sh" +echo "" + +echo "[INFO] Set file exec permissions" +d8vssh 'chmod +x /tmp/{gen-kubeconfig.sh,deckhouse-queue.sh}' +d8vssh 'ls -la /tmp/' +echo "[INFO] Check d8 queue in nested cluster" +d8vssh 'sudo /tmp/deckhouse-queue.sh' + +echo "[INFO] Generate kube conf in nested cluster" +echo "[INFO] Run gen-kubeconfig.sh in nested cluster" +d8vssh "sudo /tmp/gen-kubeconfig.sh nested-sa nested nested-e2e /${kubeconfig_path}" +echo "" + +echo "[INFO] Copy kubeconfig to runner" +echo "[INFO] ${DEFAULT_USER}@${nested_master}.$NAMESPACE:/${kubeconfig_path} ./${kubeconfig_path}" +d8vscp "${DEFAULT_USER}@${nested_master}.$NAMESPACE:/${kubeconfig_path}" "./${kubeconfig_path}" + +echo "[INFO] Set rights for kubeconfig" +echo "[INFO] sudo chown 1001:1001 ${kubeconfig_path}" +sudo chown 1001:1001 "${kubeconfig_path}" +echo " " + +echo "[INFO] Kubeconf to github output" +CONFIG=$(base64 -w 0 < "${kubeconfig_path}") +CONFIG=$(echo "${CONFIG}" | base64 -w 0) +echo "kubeconfig=$CONFIG" >> "$github_output" diff --git a/.github/scripts/bash/e2e/power-off-nested-vms.sh b/.github/scripts/bash/e2e/power-off-nested-vms.sh new file mode 100755 index 0000000000..b27585effa --- /dev/null +++ b/.github/scripts/bash/e2e/power-off-nested-vms.sh @@ -0,0 +1,306 @@ +#!/usr/bin/env bash + +# Copyright 2026 Flant JSC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -Eeuo pipefail + +SCRIPT_DIR="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" && pwd)" +# shellcheck source=.github/scripts/bash/e2e/common.sh +source "${SCRIPT_DIR}/common.sh" + +# Constants (nested cluster: 1 master + 3 workers x2) +REQUIRED_MEM_GI=86 +REQUIRED_CPU=26 +MIN_MEM_GI_PER_NODE=12 +MIN_CPU_PER_NODE=4 +MIN_NODES_FOR_PLACEMENT=3 +POWER_OFF_POLL_INTERVAL_SEC=10 +POWER_OFF_WAIT_TIMEOUT_SEC=180 + +mem_to_gi() { + local q="$1" q_lower + q_lower=$(echo "$q" | tr '[:upper:]' '[:lower:]') + if [[ "$q_lower" =~ ^([0-9]+\.?[0-9]*)gi?$ ]]; then + echo "${BASH_REMATCH[1]}" + elif [[ "$q_lower" =~ ^([0-9]+\.?[0-9]*)mi?$ ]]; then + echo "scale=4; ${BASH_REMATCH[1]} / 1024" | bc + elif [[ "$q_lower" =~ ^([0-9]+\.?[0-9]*)ki?$ ]]; then + echo "scale=6; ${BASH_REMATCH[1]} / 1024 / 1024" | bc + elif [[ "$q" =~ ^[0-9]+\.?[0-9]*$ ]]; then + echo "scale=6; $q / 1024 / 1024 / 1024" | bc + else + echo "0" + fi +} + +cpu_to_cores() { + local q="${1:-0}" q_lower + q_lower=$(echo "$q" | tr '[:upper:]' '[:lower:]') + if [[ "$q_lower" == *m ]]; then + echo "scale=4; ${q%[mM]} / 1000" | bc + else + echo "${q:-0}" + fi +} + +float_gt() { (($(echo "$1 > $2" | bc))); } +float_le() { (($(echo "$1 <= $2" | bc))); } + +worker_nodes=$(kubectl get nodes -l node-role.kubernetes.io/worker -o jsonpath='{.items[*].metadata.name}') + +gather_node_resources() { + local available_mem_gi=0 + local available_cpu=0 + local nodes_meeting_min=0 + local node node_json alloc_mem_gi alloc_cpu pods_json requested_mem_gi requested_cpu + local node_free_mem node_free_cpu node_ok_mem node_ok_cpu + + for node in $worker_nodes; do + [[ -n "$node" ]] || continue + node_json=$(kubectl get node "$node" -o json 2>/dev/null) || true + if [[ -z "$node_json" ]]; then + echo "[WARN] Node $node: could not get node spec, skipping" >&2 + continue + fi + + alloc_mem_gi=$(mem_to_gi "$(echo "$node_json" | jq -r '.status.allocatable.memory // "0"')") + alloc_cpu=$(cpu_to_cores "$(echo "$node_json" | jq -r '.status.allocatable.cpu // "0"')") + + pods_json=$(kubectl get pods -A --field-selector spec.nodeName="$node" -o json 2>/dev/null) || true + requested_mem_gi=0 + requested_cpu=0 + if [[ -n "$pods_json" ]]; then + while read -r qty; do + [[ -z "$qty" ]] && continue + requested_mem_gi=$(echo "$requested_mem_gi + $(mem_to_gi "$qty")" | bc) + done < <(echo "$pods_json" | jq -r ' + .items[] + | select(.status.phase == "Running" or .status.phase == "Pending") + | [(.spec.containers[]? | try .resources.requests.memory catch null), (.spec.initContainers[]? | try .resources.requests.memory catch null)] + | .[] | . // "0" + ') + + while read -r qty; do + [[ -z "$qty" ]] && continue + requested_cpu=$(echo "$requested_cpu + $(cpu_to_cores "$qty")" | bc) + done < <(echo "$pods_json" | jq -r ' + .items[] + | select(.status.phase == "Running" or .status.phase == "Pending") + | [(.spec.containers[]? | try .resources.requests.cpu catch null), (.spec.initContainers[]? | try .resources.requests.cpu catch null)] + | .[] | . // "0" + ') + fi + + node_free_mem=$(echo "x = $alloc_mem_gi - $requested_mem_gi; if (x < 0) 0 else x" | bc 2>/dev/null || echo "0") + node_free_cpu=$(echo "x = $alloc_cpu - $requested_cpu; if (x < 0) 0 else x" | bc 2>/dev/null || echo "0") + + available_mem_gi=$(echo "$available_mem_gi + $node_free_mem" | bc) + available_cpu=$(echo "$available_cpu + $node_free_cpu" | bc) + + node_ok_mem=$(echo "$node_free_mem >= $MIN_MEM_GI_PER_NODE" | bc) + node_ok_cpu=$(echo "$node_free_cpu >= $MIN_CPU_PER_NODE" | bc) + if [[ "$node_ok_mem" -eq 1 && "$node_ok_cpu" -eq 1 ]]; then + nodes_meeting_min=$((nodes_meeting_min + 1)) + else + echo "[INFO] Node $node: does not meet placement min - free ${node_free_mem} Gi RAM, ${node_free_cpu} CPU (required: >= ${MIN_MEM_GI_PER_NODE} Gi, >= ${MIN_CPU_PER_NODE} CPU)" >&2 + fi + done + + printf '%s\t%s\t%s\n' "$available_mem_gi" "$available_cpu" "$nodes_meeting_min" +} + +refresh_resource_state() { + IFS=$'\t' read -r available_mem_gi available_cpu nodes_meeting_min < <(gather_node_resources) + deficit_mem=$(echo "$REQUIRED_MEM_GI - $available_mem_gi" | bc 2>/dev/null || echo "$REQUIRED_MEM_GI") + deficit_cpu=$(echo "$REQUIRED_CPU - $available_cpu" | bc 2>/dev/null || echo "$REQUIRED_CPU") + + total_sufficient=false + if float_le "$deficit_mem" 0 && float_le "$deficit_cpu" 0; then + total_sufficient=true + fi + + placement_sufficient=false + if [[ $nodes_meeting_min -ge $MIN_NODES_FOR_PLACEMENT ]]; then + placement_sufficient=true + fi +} + +get_vms_candidates() { + kubectl get vm -A -o json | jq -r ' + .items[] + | select(.metadata.namespace | test("^nightly-e2e-|static-cse") | not) + | select(.metadata.labels | tostring | test("e2e-cluster/do-not-stop-vm-on-e2e-run") | not) + | select(.status.phase != "Stopped") + | [.metadata.namespace, .metadata.name, (.spec.memory.size // "0"), (.spec.cpu.cores // 0), (.spec.cpu.coreFraction // "100%")] + | @tsv + ' +} + +sort_by_mem_desc() { + while IFS=$'\t' read -r ns name mem_qty cores core_frac; do + [[ -n "$ns" ]] || continue + mem_gi=$(mem_to_gi "$mem_qty") + printf '%s\t%s\t%s\t%s\t%s\t%s\n' "$mem_gi" "$ns" "$name" "$mem_qty" "$cores" "$core_frac" + done | sort -t$'\t' -k1,1 -rn +} + +vm_cpu_from_cores_and_fraction() { + local cores="$1" core_frac="$2" frac_pct=100 + [[ "$core_frac" =~ ^([0-9]+)%$ ]] && frac_pct="${BASH_REMATCH[1]}" + echo "scale=2; $cores * $frac_pct / 100" | bc +} + +print_power_off_plan() { + local plan_index=0 cumulative_mem=0 cumulative_cpu=0 + local vm_mem_gi ns name mem_qty cores core_frac vm_cpu + + echo "[INFO] Planned power-off order with projected VM-spec resources:" + echo "[INFO] Projection is based on VM spec memory/cpu; actual placement improvement depends on where workloads are running." + + while IFS=$'\t' read -r vm_mem_gi ns name mem_qty cores core_frac; do + [[ -n "$ns" ]] || continue + plan_index=$((plan_index + 1)) + vm_cpu=$(vm_cpu_from_cores_and_fraction "$cores" "$core_frac") + cumulative_mem=$(echo "$cumulative_mem + $vm_mem_gi" | bc) + cumulative_cpu=$(echo "$cumulative_cpu + $vm_cpu" | bc) + echo "[PLAN] ${plan_index}. ${ns}/${name} -> ${vm_mem_gi} Gi RAM, ${vm_cpu} CPU (cumulative: ${cumulative_mem} Gi RAM, ${cumulative_cpu} CPU)" + done < "$1" + + if [[ $plan_index -eq 0 ]]; then + echo "[WARN] No VM candidates available for power off" + fi +} + +count_stopped_requested_vms() { + local requested_vms_file="$1" + local stopped_requested=0 total_requested=0 + local ns name phase + + while IFS=$'\t' read -r ns name; do + [[ -n "$ns" ]] || continue + total_requested=$((total_requested + 1)) + phase=$(kubectl get vm -n "$ns" "$name" -o jsonpath='{.status.phase}' 2>/dev/null || echo "Unknown") + if [[ "$phase" == "Stopped" ]]; then + stopped_requested=$((stopped_requested + 1)) + fi + done < "$requested_vms_file" + + printf '%s\t%s\n' "$stopped_requested" "$total_requested" +} + +still_need_to_free() { + if ! $placement_sufficient; then return 0; fi + if ! $total_sufficient; then return 0; fi + return 1 +} + +main() { + refresh_resource_state + echo "[INFO] Workers: free ${available_mem_gi} Gi RAM, ${available_cpu} CPU; nodes with enough free resources for placement: ${nodes_meeting_min} (need at least ${MIN_NODES_FOR_PLACEMENT})" + echo "[INFO] Required: ${REQUIRED_MEM_GI} Gi, ${REQUIRED_CPU} CPU; need >= ${MIN_NODES_FOR_PLACEMENT} nodes with >= ${MIN_MEM_GI_PER_NODE} Gi and >= ${MIN_CPU_PER_NODE} CPU" + echo " " + + if $total_sufficient && $placement_sufficient; then + echo "[INFO] Resources sufficient (total + placement), no VMs to power off" + exit 0 + fi + + if $total_sufficient; then + echo "[INFO] Cluster has enough free memory and cpu." + else + shortage_parts="" + float_gt "$deficit_mem" 0 && shortage_parts="${deficit_mem} Gi RAM" + float_gt "$deficit_cpu" 0 && shortage_parts="${shortage_parts:+$shortage_parts, }${deficit_cpu} CPU" + echo "[INFO] Resources shortage: need to free ${shortage_parts}. Proceed with power off some VMs to free cluster resources." + fi + if $placement_sufficient; then + echo "[INFO] Cluster has enough available nodes." + else + echo "[INFO] Available nodes shortage: only ${nodes_meeting_min} node(s) meet free resources requirement, expect at least ${MIN_NODES_FOR_PLACEMENT} available nodes. Proceed with power off some VMs to free resources." + fi + echo "[Note] Will ignore VMs in 'nightly-e2e-*', 'static-cse' namespaces, and VMs with the 'e2e-cluster/do-not-stop-vm-on-e2e-run' label." + echo "[INFO] Power off candidates sorted by memory (largest first); stop when enough resources are freed." + + vm_candidates_file=$(mktemp) + requested_vms_file=$(mktemp) + trap 'rm -f "$vm_candidates_file" "$requested_vms_file"' EXIT + get_vms_candidates | sort_by_mem_desc > "$vm_candidates_file" + print_power_off_plan "$vm_candidates_file" + + requested_count=0 + while IFS=$'\t' read -r vm_mem_gi ns name mem_qty cores core_frac; do + [[ -n "$ns" ]] || continue + vm_cpu=$(vm_cpu_from_cores_and_fraction "$cores" "$core_frac") + + echo "[INFO] Request power off for vm $ns/$name (${vm_mem_gi} Gi, ${vm_cpu} CPU)" + if ! kubectl patch vm -n "$ns" "$name" --type=merge -p '{"spec":{"runPolicy":"AlwaysOff"}}'; then + echo "[WARN] Failed to power off vm $ns/$name, skip it and continue with next candidate" + continue + fi + printf '%s\t%s\n' "$ns" "$name" >> "$requested_vms_file" + requested_count=$((requested_count + 1)) + done < "$vm_candidates_file" + + if [[ $requested_count -eq 0 ]]; then + echo "[ERROR] No running VM candidates available for power off, but resources are still insufficient." + echo "[ERROR] Human intervention is required." + rm -f "$vm_candidates_file" "$requested_vms_file" + trap - EXIT + exit 1 + fi + + echo "[INFO] Requested power off for ${requested_count} VM(s). Waiting up to ${POWER_OFF_WAIT_TIMEOUT_SEC}s and checking cluster resources every ${POWER_OFF_POLL_INTERVAL_SEC}s." + + wait_elapsed=0 + prev_nodes_meeting_min="$nodes_meeting_min" + while true; do + refresh_resource_state + IFS=$'\t' read -r stopped_requested total_requested < <(count_stopped_requested_vms "$requested_vms_file") + echo "[INFO] Current workers free: ${available_mem_gi} Gi RAM, ${available_cpu} CPU; nodes with enough free resources for placement: ${nodes_meeting_min}" + echo "[INFO] Requested VMs stopped: ${stopped_requested}/${total_requested}; waited ${wait_elapsed}s/${POWER_OFF_WAIT_TIMEOUT_SEC}s" + if [[ $prev_nodes_meeting_min -lt $MIN_NODES_FOR_PLACEMENT && $nodes_meeting_min -ge $MIN_NODES_FOR_PLACEMENT ]]; then + echo "[INFO] Placement now sufficient: ${nodes_meeting_min} nodes with >= ${MIN_MEM_GI_PER_NODE} Gi and >= ${MIN_CPU_PER_NODE} CPU" + fi + prev_nodes_meeting_min="$nodes_meeting_min" + + if ! still_need_to_free; then + break + fi + + if [[ $total_requested -gt 0 && $stopped_requested -eq $total_requested ]]; then + echo "[INFO] All requested VMs are already stopped; no need to wait further." + break + fi + + if [[ $wait_elapsed -ge $POWER_OFF_WAIT_TIMEOUT_SEC ]]; then + break + fi + + sleep "$POWER_OFF_POLL_INTERVAL_SEC" + wait_elapsed=$((wait_elapsed + POWER_OFF_POLL_INTERVAL_SEC)) + done + + rm -f "$vm_candidates_file" "$requested_vms_file" + trap - EXIT + + echo "[INFO] Final workers free: ${available_mem_gi} Gi RAM, ${available_cpu} CPU; nodes with enough free resources for placement: ${nodes_meeting_min}" + + if still_need_to_free; then + echo "[ERROR] Stopping VMs did not free enough resources. Human intervention is required." + exit 1 + fi +} + +main "$@" diff --git a/.github/scripts/bash/e2e/prepare-artifact.sh b/.github/scripts/bash/e2e/prepare-artifact.sh new file mode 100644 index 0000000000..0e1233ab75 --- /dev/null +++ b/.github/scripts/bash/e2e/prepare-artifact.sh @@ -0,0 +1,41 @@ +#!/usr/bin/env bash + +# Copyright 2026 Flant JSC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -Eeuo pipefail + +SCRIPT_DIR="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" && pwd)" +# shellcheck source=.github/scripts/bash/e2e/common.sh +source "${SCRIPT_DIR}/common.sh" + +if [ "$#" -lt 1 ] || [ "$#" -gt 2 ]; then + echo "[ERROR] Usage: $0 [kubeconfig-b64]" >&2 + exit 1 +fi + +setup_cluster_type_path="$1" +kubeconfig_b64="${2:-}" + +sudo chown -fR 1001:1001 "${setup_cluster_type_path}" || true +yq e '.deckhouse.registryDockerCfg = "None"' -i "./${setup_cluster_type_path}/values.yaml" || true +yq e 'select(.kind == "InitConfiguration").deckhouse.registryDockerCfg = "None"' -i "./${setup_cluster_type_path}/tmp/config.yaml" || echo "The config.yaml file is not generated, skipping" +yq e '.discovered.registry_url = "None"' -i "./${setup_cluster_type_path}/tmp/discovered-values.yaml" || echo "The discovered-values.yaml file is not generated, skipping editing registry_url" +yq e '.discovered.registry_auth = "None"' -i "./${setup_cluster_type_path}/tmp/discovered-values.yaml" || echo "The discovered-values.yaml file is not generated, skipping editing registry_auth" + +if [ -n "${kubeconfig_b64}" ]; then + echo "${kubeconfig_b64}" | base64 -d | base64 -d > "./${setup_cluster_type_path}/kube-config" || echo "kubeconfig not available, skipping" +else + echo "kubeconfig not available, skipping" +fi diff --git a/.github/scripts/bash/e2e/render-dvp-static-values.sh b/.github/scripts/bash/e2e/render-dvp-static-values.sh new file mode 100644 index 0000000000..c47ed84b97 --- /dev/null +++ b/.github/scripts/bash/e2e/render-dvp-static-values.sh @@ -0,0 +1,68 @@ +#!/usr/bin/env bash + +# Copyright 2026 Flant JSC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -Eeuo pipefail + +SCRIPT_DIR="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" && pwd)" +# shellcheck source=.github/scripts/bash/e2e/common.sh +source "${SCRIPT_DIR}/common.sh" + +require_env NAMESPACE +require_env STORAGE_TYPE +require_env DECKHOUSE_CHANNEL +require_env POD_SUBNET_CIDR +require_env SERVICE_SUBNET_CIDR +require_env K8S_VERSION +require_env PROD_IO_REGISTRY_DOCKER_CFG +require_env VIRTUALIZATION_IMAGE_URL +require_env DEFAULT_USER +require_env APT_MIRROR_ENABLED +require_env APT_MIRROR_NAME +require_env APT_MIRROR_URL +require_env CLUSTER_CONFIG_WORKERS_MEMORY +require_env ADDITIONAL_DISK_SIZE +require_env NESTED_CLUSTER_NETWORK_NAME +require_env DEV_REGISTRY_DOCKER_CFG + +default_storage_class="$(kubectl get storageclass -o json \ + | jq -r '.items[] | select(.metadata.annotations."storageclass.kubernetes.io/is-default-class" == "true") | .metadata.name')" + +if [[ -z "${default_storage_class}" ]]; then + echo "No default StorageClass found in the cluster" >&2 + exit 1 +fi + +export DEFAULT_STORAGE_CLASS="${default_storage_class}" + +# Derive the envsubst whitelist from the template so it never drifts: only the +# placeholders actually used in values.yaml.tmpl are substituted, everything else +# is left intact. +envsubst_variables="$(grep -oE '\$\{[A-Z0-9_]+\}' values.yaml.tmpl | sort -u | tr '\n' ' ')" + +envsubst "${envsubst_variables}" \ + < values.yaml.tmpl > values.yaml + +mkdir -p tmp +touch tmp/discovered-values.yaml + +# shellcheck disable=SC2153,SC2154 +dev_registry_docker_cfg="$(base64 -d <<< "${DEV_REGISTRY_DOCKER_CFG}")" +registry="$(jq -r '.auths | to_entries[0].key' <<< "${dev_registry_docker_cfg}")" +auth="$(jq -r '.auths | to_entries[0].value.auth' <<< "${dev_registry_docker_cfg}")" + +REGISTRY="${registry}" AUTH="${auth}" yq eval --inplace \ + '.discovered.registry_url = env(REGISTRY) | .discovered.registry_auth = env(AUTH)' \ + tmp/discovered-values.yaml diff --git a/.github/scripts/bash/e2e/run-release-e2e.sh b/.github/scripts/bash/e2e/run-release-e2e.sh new file mode 100644 index 0000000000..c8c8ec6a17 --- /dev/null +++ b/.github/scripts/bash/e2e/run-release-e2e.sh @@ -0,0 +1,83 @@ +#!/usr/bin/env bash + +# Copyright 2026 Flant JSC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -Eeuo pipefail + +SCRIPT_DIR="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" && pwd)" +# shellcheck source=.github/scripts/bash/e2e/common.sh +source "${SCRIPT_DIR}/common.sh" + +# Read a required env var by name and print its value. +# Indirect expansion (${!name}) keeps shellcheck from flagging the env vars as unassigned. +required_env_value() { + local name="$1" + + require_env "${name}" + printf '%s' "${!name}" +} + +require_env STORAGE_CLASS_NAME +require_env E2E_CONFIG + +release_test_phase="$(required_env_value RELEASE_TEST_PHASE)" +runner_temp="$(required_env_value RUNNER_TEMP)" + +echo "[INFO] Release test phase: ${release_test_phase}" +echo "[INFO] Storage type: $(required_env_value CSI)" +echo "" + +case "${release_test_phase}" in + pre-upgrade) + require_env RELEASE_UPGRADE_CONTEXT_PATH + echo "[INFO] Current release tag: $(required_env_value CURRENT_RELEASE)" + echo "[INFO] Verifying virtualization module is running" + kubectl get modules virtualization + kubectl get mpo virtualization + echo "" + echo "[INFO] Running dedicated release suite" + echo "[INFO] Resources will be intentionally left in the cluster for the upgrade test" + ;; + post-upgrade) + require_env RELEASE_UPGRADE_STARTED_AT + echo "[INFO] New release tag: $(required_env_value NEW_RELEASE)" + echo "[INFO] Verifying virtualization module is running with new release" + kubectl get modules virtualization || true + kubectl get mpo virtualization || true + echo "" + echo "[INFO] Reusing namespace: $(required_env_value RELEASE_NAMESPACE)" + ;; + *) + echo "[ERROR] Unsupported RELEASE_TEST_PHASE: ${release_test_phase}" >&2 + exit 1 + ;; +esac + +# test/e2e is a separate Go module: "go tool ginkgo" and the ./release suite +# both resolve relative to it, so run from there. +echo "[INFO] Changing directory to ./test/e2e/" +cd ./test/e2e/ +ginkgo_result="$(mktemp -p "${runner_temp}")" +ginkgo_exit_code=0 +go tool ginkgo \ + -v --race --timeout=45m \ + ./release | tee "${ginkgo_result}" || ginkgo_exit_code=$? +echo "[INFO] Exit code: ${ginkgo_exit_code}" + +if [ "${release_test_phase}" = "post-upgrade" ]; then + echo "[INFO] Cluster is intentionally left running (no cleanup)" +fi + +exit "${ginkgo_exit_code}" diff --git a/.github/scripts/bash/e2e/show-nested-cluster-info.sh b/.github/scripts/bash/e2e/show-nested-cluster-info.sh new file mode 100644 index 0000000000..5ee10f2d24 --- /dev/null +++ b/.github/scripts/bash/e2e/show-nested-cluster-info.sh @@ -0,0 +1,51 @@ +#!/usr/bin/env bash + +# Copyright 2026 Flant JSC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -Eeuo pipefail + +SCRIPT_DIR="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" && pwd)" +# shellcheck source=.github/scripts/bash/e2e/common.sh +source "${SCRIPT_DIR}/common.sh" +# shellcheck source=.github/scripts/bash/e2e/d8-ssh.sh +source "${SCRIPT_DIR}/d8-ssh.sh" + +if [ "$#" -ne 3 ]; then + echo "[ERROR] Usage: $0 " >&2 + exit 1 +fi + +NAMESPACE="$1" +PREFIX="$2" +DEFAULT_USER="$3" +export NAMESPACE DEFAULT_USER + +nested_master=$(kubectl -n "${NAMESPACE}" get vm -l "group=${PREFIX}-master" -o jsonpath="{.items[0].metadata.name}") + +echo "[INFO] Pods in namespace $NAMESPACE" +kubectl get pods -n "${NAMESPACE}" +echo "" + +echo "[INFO] VMs in namespace $NAMESPACE" +kubectl get vm -n "${NAMESPACE}" +echo "" + +echo "[INFO] VDs in namespace $NAMESPACE" +kubectl get vd -n "${NAMESPACE}" +echo "" + +echo "Check connection to master" +d8vssh "${nested_master}" 'echo master os-release: ; cat /etc/os-release; echo " "; echo master hostname: ; hostname' +echo "" diff --git a/.github/scripts/bash/e2e/verify-image-digests.sh b/.github/scripts/bash/e2e/verify-image-digests.sh new file mode 100644 index 0000000000..d161eebfe9 --- /dev/null +++ b/.github/scripts/bash/e2e/verify-image-digests.sh @@ -0,0 +1,119 @@ +#!/usr/bin/env bash + +# Copyright 2026 Flant JSC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -Eeuo pipefail + +SCRIPT_DIR="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" && pwd)" +# shellcheck source=.github/scripts/bash/e2e/common.sh +source "${SCRIPT_DIR}/common.sh" + +require_env NEW_RELEASE +require_env DEV_MODULE_SOURCE + +required_env_value() { + local name="$1" + + require_env "${name}" + printf '%s' "${!name}" +} + +new_release="$(required_env_value NEW_RELEASE)" +dev_module_source="$(required_env_value DEV_MODULE_SOURCE)" + +MODULE_IMAGE="${dev_module_source}/virtualization:${new_release}" +echo "[INFO] Extracting images_digests.json from virtualization:${new_release}" +images_hash="$(crane export "${MODULE_IMAGE}" - | tar -Oxf - images_digests.json)" +echo "[INFO] Expected image digests:" +echo "::group::images_digests.json" +echo "${images_hash}" | jq . +echo "::endgroup::" + +audit_status="$(kubectl get mc virtualization -o=jsonpath='{.spec.settings.audit.enabled}' 2>/dev/null || true)" +audit_image_skip="true" +if [[ -n "${audit_status}" && "${audit_status}" == "true" ]]; then + audit_image_skip="false" +fi + +SKIP_IMAGES=() +if [[ "${audit_image_skip}" == "true" ]]; then + SKIP_IMAGES+=("virtualizationAudit") +fi +SKIP_IMAGES+=("virtualizationDraUsb") + +is_skipped_image() { + local img="$1" + + if [[ -z "${img}" ]]; then + return 1 + fi + + for skip in "${SKIP_IMAGES[@]}"; do + if [[ "${img}" == "${skip}" ]]; then + return 0 + fi + done + + return 1 +} + +retry_count=0 +max_retries=120 +sleep_interval=5 + +while true; do + all_hashes_found=true + + v12n_pods="$(kubectl -n d8-virtualization get pods -o json | jq -c)" + + while IFS= read -r image_entry; do + image="$(echo "${image_entry}" | jq -r '.key')" + hash="$(echo "${image_entry}" | jq -r '.value')" + + if [[ "${image,,}" =~ (libguestfs|predeletehook) ]]; then + continue + fi + + if is_skipped_image "${image}"; then + echo "- SKIP ${image}" + continue + fi + + if echo "${v12n_pods}" | grep -q "${hash}"; then + echo "- OK ${image} ${hash}" + else + echo "- MISS ${image} ${hash}" + all_hashes_found=false + fi + done < <(echo "${images_hash}" | jq -c '. | to_entries | sort_by(.key)[]') + + if [[ "${all_hashes_found}" == "true" ]]; then + echo "[SUCCESS] All image hashes found in pods after upgrade to ${new_release}" + break + fi + + retry_count=$((retry_count + 1)) + echo "[INFO] Some hashes are missing, rechecking... Attempt: ${retry_count}/${max_retries}" + + if [[ "${retry_count}" -ge "${max_retries}" ]]; then + echo "[ERROR] Timeout reached after $((retry_count * sleep_interval))s. Some image hashes are still missing." + echo "::group::pods in d8-virtualization" + kubectl -n d8-virtualization get pods -o wide || true + echo "::endgroup::" + exit 1 + fi + + sleep "${sleep_interval}" +done diff --git a/.github/scripts/bash/e2e/wait-nodenetworkinterfaces.sh b/.github/scripts/bash/e2e/wait-nodenetworkinterfaces.sh new file mode 100644 index 0000000000..b379623ab4 --- /dev/null +++ b/.github/scripts/bash/e2e/wait-nodenetworkinterfaces.sh @@ -0,0 +1,65 @@ +#!/usr/bin/env bash + +# Copyright 2026 Flant JSC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -Eeuo pipefail + +SCRIPT_DIR="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" && pwd)" +# shellcheck source=.github/scripts/bash/e2e/common.sh +source "${SCRIPT_DIR}/common.sh" + +count=60 +success=false +wait_time_seconds=5 + +for i in $(seq 1 "$count"); do + nodes=$(kubectl get nodes -o name | wc -l) + actual=$(kubectl get nodenetworkinterfaces -o json | jq -r '.items[] | select(.status.operationalState == "Up") | .metadata.name' | wc -l) || true + expected=$((nodes * 2)) + + echo "[INFO] Attempt $i/$count: expected=$expected, actual=$actual" + + if [ "$actual" -ge "$expected" ]; then + echo "[SUCCESS] All nodenetworkinterfaces are present (expected=$expected, actual=$actual)" + kubectl get nodenetworkinterfaces + success=true + break + fi + + if (( i % 5 == 0 )); then + echo "::group::[DEBUG] show namespaces d8-sdn" + kubectl -n d8-sdn get pods || true + echo "::endgroup::" + + echo "::group::[DEBUG] show nodenetworkinterfaces d8-sdn" + kubectl get nodenetworkinterfaces || true + echo "::endgroup::" + + echo "[INFO] Retrying in 10 seconds..." + sleep "$wait_time_seconds" + elif [ "$i" -lt "$count" ]; then + echo "[INFO] Retrying in 10 seconds..." + sleep "$wait_time_seconds" + fi +done + +if [ "$success" = false ]; then + echo "[ERROR] Failed to get all nodenetworkinterfaces after $count attempts (expected=$expected)" + echo "[DEBUG] Show namespaces d8-sdn" + kubectl -n d8-sdn get pods || true + echo "[DEBUG] Show nodenetworkinterfaces d8-sdn" + kubectl get nodenetworkinterfaces || true + exit 1 +fi diff --git a/.github/scripts/bash/e2e/wait-sds-replicated.sh b/.github/scripts/bash/e2e/wait-sds-replicated.sh new file mode 100644 index 0000000000..d5d2bc81cd --- /dev/null +++ b/.github/scripts/bash/e2e/wait-sds-replicated.sh @@ -0,0 +1,157 @@ +#!/usr/bin/env bash + +# Copyright 2026 Flant JSC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -Eeuo pipefail + +SCRIPT_DIR="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" && pwd)" +# shellcheck source=.github/scripts/bash/e2e/common.sh +source "${SCRIPT_DIR}/common.sh" +# shellcheck source=.github/scripts/bash/e2e/deckhouse.sh +source "${SCRIPT_DIR}/deckhouse.sh" + +sds_replicated_ready() { + local count=60 + local sds_replicated_volume_status + + for i in $(seq 1 "$count"); do + sds_replicated_volume_status="$(kubectl get ns d8-sds-replicated-volume -o jsonpath='{.status.phase}' || echo "False")" + + if [[ "$sds_replicated_volume_status" = "Active" ]]; then + echo "[SUCCESS] Namespaces sds-replicated-volume are Active" + kubectl get ns d8-sds-replicated-volume + return 0 + fi + + echo "[INFO] Waiting 10s for sds-replicated-volume namespace to be ready (attempt ${i}/${count})" + if (( i % 5 == 0 )); then + echo "[INFO] Show namespaces sds-replicated-volume" + kubectl get ns | grep sds-replicated-volume || echo "Namespaces sds-replicated-volume are not ready" + echo "[DEBUG] Show queue (first 25 lines)" + d8 s queue list | head -n25 || echo "No queues" + fi + sleep 10 + done + + echo "[ERROR] Namespaces sds-replicated-volume are not ready after ${count} attempts" + echo "[DEBUG] Show namespaces sds" + kubectl get ns | grep sds || echo "Namespaces sds-replicated-volume are not ready" + echo "[DEBUG] Show queue" + echo "::group::Show queue" + d8 s queue list || echo "No queues" + echo "::endgroup::" + echo "[DEBUG] Show deckhouse logs" + echo "::group::deckhouse logs" + d8 s logs | tail -n 100 + echo "::endgroup::" + return 1 +} + +sds_pods_ready() { + local count=100 + local linstor_node + local csi_node + local workers + + workers="$(kubectl get nodes -o name | grep -c worker || true)" + workers=$((workers)) + + echo "[INFO] Wait while linstor-node csi-node webhooks pods are ready" + for i in $(seq 1 "$count"); do + linstor_node="$(kubectl -n d8-sds-replicated-volume get pods | grep -c "linstor-node.*Running" || true)" + csi_node="$(kubectl -n d8-sds-replicated-volume get pods | grep -c "csi-node.*Running" || true)" + + echo "[INFO] Check if sds-replicated pods are ready" + if [[ "$linstor_node" -ge "$workers" && "$csi_node" -ge "$workers" ]]; then + echo "[SUCCESS] sds-replicated-volume is ready" + return 0 + fi + + echo "[WARNING] Not all pods are ready, linstor_node=${linstor_node}, csi_node=${csi_node}" + echo "[INFO] Waiting 10s for pods to be ready (attempt ${i}/${count})" + if (( i % 5 == 0 )); then + echo "[DEBUG] Get pods" + kubectl -n d8-sds-replicated-volume get pods || true + echo "[DEBUG] Show queue (first 25 lines)" + d8 s queue list | head -n 25 || echo "Failed to retrieve list queue" + echo " " + fi + sleep 10 + done + + echo "[ERROR] sds-replicated-volume is not ready after ${count} attempts" + echo "[DEBUG] Get pods" + echo "::group::sds-replicated-volume pods" + kubectl -n d8-sds-replicated-volume get pods || true + echo "::endgroup::" + echo "[DEBUG] Show queue" + echo "::group::Show queue" + d8 s queue list || echo "Failed to retrieve list queue" + echo "::endgroup::" + echo "[DEBUG] Show deckhouse logs" + echo "::group::deckhouse logs" + d8 s logs | tail -n 100 + echo "::endgroup::" + return 1 +} + +blockdevices_ready() { + local count=60 + local workers + local blockdevices + + workers="$(kubectl get nodes -o name | grep -c worker || true)" + workers=$((workers)) + + if [[ "$workers" -eq 0 ]]; then + echo "[ERROR] No worker nodes found" + return 1 + fi + + for i in $(seq 1 "$count"); do + blockdevices="$(kubectl get blockdevice -o name | wc -l | tr -d ' ' || true)" + blockdevices=$((blockdevices)) + if [[ "$blockdevices" -ge "$workers" ]]; then + echo "[SUCCESS] Blockdevices is greater or equal to $workers" + kubectl get blockdevice + return 0 + fi + + echo "[INFO] Wait 10 sec until blockdevices is greater or equal to $workers (attempt ${i}/${count})" + if (( i % 5 == 0 )); then + echo "[DEBUG] Show queue (first 25 lines)" + d8 s queue list | head -n25 || echo "No queues" + fi + + sleep 10 + done + + echo "[ERROR] Blockdevices is not 3" + echo "[DEBUG] Show cluster nodes" + kubectl get nodes || echo "[WARNING] Failed to get cluster nodes" + echo "[DEBUG] Show blockdevices" + kubectl get blockdevice || echo "[WARNING] Failed to get blockdevices" + echo "[DEBUG] Show sds namespaces" + kubectl get ns | grep sds || echo "[WARNING] Namespace sds is not found" + echo "[DEBUG] Show pods in sds-replicated-volume" + echo "::group::pods in sds-replicated-volume" + kubectl -n d8-sds-replicated-volume get pods || echo "[WARNING] Failed to get pods in sds-replicated-volume" + echo "::endgroup::" + echo "[DEBUG] Show deckhouse logs" + echo "::group::deckhouse logs" + d8 s logs | tail -n 100 || echo "[WARNING] Failed to get deckhouse logs" + echo "::endgroup::" + return 1 +} diff --git a/.github/scripts/bash/e2e/wait-virtualization-ready.sh b/.github/scripts/bash/e2e/wait-virtualization-ready.sh new file mode 100644 index 0000000000..0b633a6a71 --- /dev/null +++ b/.github/scripts/bash/e2e/wait-virtualization-ready.sh @@ -0,0 +1,188 @@ +#!/usr/bin/env bash + +# Copyright 2026 Flant JSC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -Eeuo pipefail + +SCRIPT_DIR="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" && pwd)" +# shellcheck source=.github/scripts/bash/e2e/common.sh +source "${SCRIPT_DIR}/common.sh" +# shellcheck source=.github/scripts/bash/e2e/deckhouse.sh +source "${SCRIPT_DIR}/deckhouse.sh" + +debug_output() { + local nodes + + echo "[ERROR] Virtualization module deploy failed" + echo "[DEBUG] Show describe virtualization module" + echo "::group::describe virtualization module" + kubectl describe modules virtualization || true + echo "::endgroup::" + echo "[DEBUG] Show namespace d8-virtualization" + kubectl get ns d8-virtualization || true + echo "[DEBUG] Show pods in namespace d8-virtualization" + kubectl -n d8-virtualization get pods || true + echo "[DEBUG] Show dvcr info" + echo "::group::dvcr pod describe" + kubectl -n d8-virtualization describe pod -l app=dvcr || true + echo "::endgroup::" + echo " " + echo "::group::dvcr pod yaml" + kubectl -n d8-virtualization get pods -l app=dvcr -o yaml || true + echo "::endgroup::" + echo " " + echo "::group::dvcr deployment yaml" + kubectl -n d8-virtualization get deployment -l app=dvcr -o yaml || true + echo "::endgroup::" + echo " " + echo "::group::dvcr deployment describe" + kubectl -n d8-virtualization describe deployment -l app=dvcr || true + echo "::endgroup::" + echo " " + echo "::group::dvcr service yaml" + kubectl -n d8-virtualization get service -l app=dvcr -o yaml || true + echo "::endgroup::" + echo " " + echo "[DEBUG] Show pvc in namespace d8-virtualization" + kubectl get pvc -n d8-virtualization || true + echo "[DEBUG] Show cluster StorageClasses" + kubectl get storageclasses || true + echo "[DEBUG] Show cluster nodes" + kubectl get node || true + + echo "[DEBUG] Show cluster node yaml and describe" + nodes="$(kubectl get no -o jsonpath='{range .items[?(@.metadata.name)]}{.metadata.name}{"\n"}{end}')" + for node in $nodes; do + echo "::group::show cluster node ${node} yaml" + kubectl get node "$node" -o yaml + echo "::endgroup::" + echo "::group::show cluster node ${node} describe" + kubectl describe node "$node" + echo "::endgroup::" + done + + echo "[DEBUG] Show queue (first 25 lines)" + d8 s queue list | head -n 25 || echo "[WARNING] Failed to retrieve list queue" + echo "[DEBUG] Show deckhouse logs" + echo "::group::deckhouse logs" + d8 s logs | tail -n 100 + echo "::endgroup::" +} + +virtualization_ready() { + local count=90 + local virtualization_status + + for i in $(seq 1 "$count"); do + virtualization_status="$(kubectl get modules virtualization -o jsonpath='{.status.phase}')" + if [ "$virtualization_status" = "Ready" ]; then + echo "[SUCCESS] Virtualization module is ready" + kubectl get modules virtualization + kubectl -n d8-virtualization get pods + kubectl get vmclass || echo "[WARNING] no vmclasses found" + return 0 + fi + + echo "[INFO] Waiting 10s for Virtualization module to be ready (attempt ${i}/${count})" + + if (( i % 5 == 0 )); then + echo " " + echo "[DEBUG] Show additional info" + kubectl get ns d8-virtualization || echo "[WARNING] Namespace virtualization is not ready" + echo " " + kubectl -n d8-virtualization get pods || echo "[WARNING] Pods in namespace virtualization is not ready" + kubectl get pvc -n d8-virtualization || echo "[WARNING] PVC in namespace virtualization is not ready" + echo " " + echo "d8-virtualization module status: ${virtualization_status}" + echo " " + fi + sleep 10 + done + + debug_output + return 1 +} + +virt_handler_ready() { + local count=180 + local virt_handler_ready + local workers + local time_wait=10 + + for i in $(seq 1 "$count"); do + workers="$(kubectl get nodes -o name | grep -c worker || true)" + workers=$((workers)) + if [[ "$workers" -eq 0 ]]; then + echo "[WARNING] No worker nodes found, keep waiting" + echo "[INFO] Wait ${time_wait}s virt-handler pods are ready (attempt ${i}/${count})" + sleep "$time_wait" + continue + fi + + virt_handler_ready="$(kubectl -n d8-virtualization get pods | grep -c "virt-handler.*Running" || true)" + + if [[ "$virt_handler_ready" -ge "$workers" ]]; then + echo "[SUCCESS] virt-handlers pods are ready ${virt_handler_ready}/${workers}" + return 0 + fi + + echo "[INFO] virt-handler pods ${virt_handler_ready}/${workers}" + echo "[INFO] Wait ${time_wait}s virt-handler pods are ready (attempt ${i}/${count})" + if (( i % 5 == 0 )); then + echo "[DEBUG] Show pods in namespace d8-virtualization" + echo "::group::virtualization pods" + kubectl -n d8-virtualization get pods || echo "[WARNING] No pods in virtualization namespace found" + echo "::endgroup::" + echo "[DEBUG] Show cluster nodes" + echo "::group::cluster nodes" + kubectl get node || echo "[WARNING] Failed to get cluster nodes" + echo "::endgroup::" + fi + sleep "$time_wait" + done + + debug_output + return 1 +} + +enable_maintenance_mode() { + if [ "$#" -ne 1 ]; then + echo "[ERROR] Usage: enable_maintenance_mode " >&2 + return 1 + fi + + local storage_type="$1" + + echo "[INFO] Switch virtualization module to maintenance mode" + kubectl patch mc virtualization --type merge --patch '{"spec":{"maintenance":"NoResourceReconciliation"}}' + + case "${storage_type}" in + replicated) + echo "[INFO] Switch sds-replicated-volume module to maintenance mode" + kubectl patch mc sds-replicated-volume --type merge --patch '{"spec":{"maintenance":"NoResourceReconciliation"}}' + ;; + nfs) + echo "[INFO] Switch csi-nfs module to maintenance mode" + kubectl patch mc csi-nfs --type merge --patch '{"spec":{"maintenance":"NoResourceReconciliation"}}' + ;; + local) + echo "[INFO] Switch sds-local-volume module to maintenance mode" + kubectl patch mc sds-local-volume --type merge --patch '{"spec":{"maintenance":"NoResourceReconciliation"}}' + ;; + *) + echo "[INFO] No storage module maintenance mode patch for storage type: ${storage_type}" + ;; + esac +} diff --git a/.github/scripts/templates/encrypted-artifacts-help.md b/.github/scripts/templates/encrypted-artifacts-help.md new file mode 100644 index 0000000000..0e13edec02 --- /dev/null +++ b/.github/scripts/templates/encrypted-artifacts-help.md @@ -0,0 +1,37 @@ +## Encrypted artifacts + +Some uploaded artifacts in this workflow are encrypted with GPG symmetric encryption. + +Secret used for decryption passphrase: +- `E2E_ARTIFACTS_GPG_PASSPHRASE` + +Encrypted artifact types: +- `*-generated-files-*.zip.gpg` +- `*-generated-files-ssh-*.zip.gpg` +- `*-generated-files-kubeconfig-*.gpg` +- `*-release-generated-files-*.zip.gpg` +- `*-release-generated-files-ssh-*.zip.gpg` +- `*-release-generated-files-kubeconfig-*.gpg` + +Decrypt examples: + +```bash +# zip.gpg artifact +gpg --decrypt --batch --yes --pinentry-mode loopback \ + --passphrase "$E2E_ARTIFACTS_GPG_PASSPHRASE" \ + --output artifact.zip \ + artifact.zip.gpg + +unzip -o artifact.zip + +# same, but with simultaneous decryption and extraction of the whole archive +gpg --decrypt --batch --yes --pinentry-mode loopback \ + --passphrase "$E2E_ARTIFACTS_GPG_PASSPHRASE" \ + artifact.zip.gpg > artifact.zip && unzip -o artifact.zip + +# single-file .gpg artifact +gpg --decrypt --batch --yes --pinentry-mode loopback \ + --passphrase "$E2E_ARTIFACTS_GPG_PASSPHRASE" \ + --output kube-config \ + artifact.gpg +``` diff --git a/.github/workflows/dev_validation.yaml b/.github/workflows/dev_validation.yaml index 3d600131a4..757d9a35a0 100644 --- a/.github/workflows/dev_validation.yaml +++ b/.github/workflows/dev_validation.yaml @@ -50,7 +50,7 @@ jobs: - .helmignore - Chart.yaml - Taskfile.yaml - route_forge: + vm_route_forge: - 'images/vm-route-forge/bpf/route_watcher.c' no_cyrillic: @@ -103,6 +103,59 @@ jobs: run: | task validation:doc-changes + shellcheck: + if: "!contains(github.event.pull_request.labels.*.name, 'validation/skip/shellcheck')" + runs-on: ubuntu-latest + name: Validation shellcheck + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + ref: ${{ github.event.pull_request.head.sha }} + + - name: Run shellcheck + uses: ludeeus/action-shellcheck@2.0.0 + with: + scandir: .github/scripts/bash/e2e + additional_files: >- + api/scripts/update-codegen.sh + images/virtualization-artifact/hack/args.sh + images/virtualization-artifact/hack/dlv.sh + images/virtualization-artifact/hack/pyroscope.sh + check_together: "yes" + severity: warning + ignore_paths: >- + vendor + images/cdi-artifact + images/virt-api/__virt + images/virt-controller/__virt + images/virt-handler/__virt + images/virt-launcher/__virt + + actionlint: + if: "!contains(github.event.pull_request.labels.*.name, 'validation/skip/actionlint')" + runs-on: ubuntu-latest + name: Validation actionlint + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + ref: ${{ github.event.pull_request.head.sha }} + + - name: Install actionlint + run: | + curl -sSfL \ + https://raw.githubusercontent.com/rhysd/actionlint/main/scripts/download-actionlint.bash \ + -o /tmp/download-actionlint.bash + bash /tmp/download-actionlint.bash 1.7.7 + + - name: Run actionlint + run: | + shellcheck_path="$(command -v shellcheck)" + ./actionlint -color -shellcheck="${shellcheck_path}" \ + .github/workflows/dev_validation.yaml \ + .github/workflows/e2e*.yml + # Run helm templates validation on changes in related files and without the skip labels. helm_templates: needs: paths_filter @@ -142,48 +195,39 @@ jobs: - { component: api, go-version: "1.25.10" } steps: - - name: Set skip flag - run: | - # Only run vm-route-forge if relevant files were changed - if [[ "${{ needs.paths_filter.outputs.vm_route_forge }}" == "true" ]]; then - echo "route_forge_skip=false" >> $GITHUB_ENV - else - echo "route_forge_skip=true" >> $GITHUB_ENV - fi - - name: Setup Go ${{ matrix.components.go-version }} - if: matrix.components.component != 'vm-route-forge' || env.route_forge_skip != 'true' + if: matrix.components.component != 'vm-route-forge' || needs.paths_filter.outputs.vm_route_forge == 'true' uses: actions/setup-go@v5 with: go-version: ${{ matrix.components.go-version }} - name: Install Task - if: matrix.components.component != 'vm-route-forge' || env.route_forge_skip != 'true' + if: matrix.components.component != 'vm-route-forge' || needs.paths_filter.outputs.vm_route_forge == 'true' uses: go-task/setup-task@v2 with: version: 3.x repo-token: ${{ secrets.GITHUB_TOKEN }} - name: Install dependencies - if: matrix.components.component != 'vm-route-forge' || env.route_forge_skip != 'true' + if: matrix.components.component != 'vm-route-forge' || needs.paths_filter.outputs.vm_route_forge == 'true' run: | echo "Install packages" export DEBIAN_FRONTEND=noninteractive sudo apt-get update sudo apt-get install -y -qq \ - llvm linux-headers-$(uname -r) clang \ + llvm "linux-headers-$(uname -r)" clang \ libbpf-dev uuid-runtime \ gcc-multilib yq sudo apt-get clean - uses: actions/checkout@v4 - if: matrix.components.component != 'vm-route-forge' || env.route_forge_skip != 'true' + if: matrix.components.component != 'vm-route-forge' || needs.paths_filter.outputs.vm_route_forge == 'true' with: fetch-depth: 0 ref: ${{ github.event.pull_request.head.sha }} - name: Run check gen files - ${{ matrix.components.component }} - if: matrix.components.component != 'vm-route-forge' || env.route_forge_skip != 'true' + if: matrix.components.component != 'vm-route-forge' || needs.paths_filter.outputs.vm_route_forge == 'true' run: | function check_diffs() { local folder=$1 @@ -215,8 +259,9 @@ jobs: ;; api) cd ./api - export GOPATH=$(go env GOPATH) - echo "GOPATH=$GOPATH" >> $GITHUB_ENV + GOPATH="$(go env GOPATH)" + export GOPATH + echo "GOPATH=$GOPATH" >> "$GITHUB_ENV" echo "Installing k8s tools..." go install tool diff --git a/.github/workflows/e2e-matrix.yml b/.github/workflows/e2e-matrix.yml deleted file mode 100644 index 421ef0cd57..0000000000 --- a/.github/workflows/e2e-matrix.yml +++ /dev/null @@ -1,533 +0,0 @@ -# Copyright 2025 Flant JSC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -name: E2E Matrix Tests (nested clusters) - -on: - workflow_dispatch: - -concurrency: - group: "${{ github.workflow }}-${{ github.event.number || github.ref }}" - cancel-in-progress: true - -defaults: - run: - shell: bash - -jobs: - cleanup-nested-clusters: - name: Cleanup nested clusters - runs-on: ubuntu-latest - steps: - - name: Configure kubectl via azure/k8s-set-context@v4 - uses: azure/k8s-set-context@v4 - with: - method: kubeconfig - context: e2e-cluster-nightly-e2e-virt-sa - kubeconfig: ${{ secrets.VIRT_E2E_NIGHTLY_SA_TOKEN }} - - name: Delete nested clusters - run: | - current_date_seconds="$(date -u +%s)" - FORMAT="%-63s %22s\n" - - # 47h = ~2 days with CI delay; 71h = ~3 days for Friday clusters - KEEP_HOURS=47 - FRIDAY_KEEP_HOURS=71 - - collect_items_json() { - local resource="$1" - kubectl get "${resource}" -l test=nightly-e2e -o json \ - | jq -c '.items[] | {name: .metadata.name, created_at: .metadata.creationTimestamp}' - } - - should_keep() { - local created_at="$1" - local recourse_created_at_seconds age_seconds weekday_of_day - - recourse_created_at_seconds="$(date -d "${created_at}" -u +%s)" - age_seconds="$(( current_date_seconds - recourse_created_at_seconds ))" - weekday_of_day="$(date -d "${created_at}" -u +%u)" - - if [ "${age_seconds}" -lt "$(( KEEP_HOURS * 3600 ))" ]; then - echo "keep" - return 0 - fi - - if [ "${weekday_of_day}" -eq 5 ] && [ "${age_seconds}" -lt "$(( FRIDAY_KEEP_HOURS * 3600 ))" ]; then - echo "keep" - return 0 - fi - - echo "delete" - return 0 - } - - cleanup_kind() { - local kind="$1" - local item name created_at decision parsed - - echo "[INFO] Process ${kind} with label test=nightly-e2e" - collect_items_json "${kind}" | while read -r item; do - name=$(echo $item | jq -r '.name') - created_at=$(echo $item | jq -r '.created_at') - [ -z "${name}" ] && continue - - decision="$(should_keep "${created_at}")" - if [ "${decision}" = "keep" ]; then - printf "$FORMAT" "[INFO] Keep ${kind}/${name}:" "created_at ${created_at}" - continue - fi - printf "$FORMAT" "[INFO] Delete ${kind}/${name}:" "created_at ${created_at}" - kubectl delete "${kind}" "${name}" --timeout=300s || true - done || true - } - - cleanup_kind "namespaces" - echo " " - cleanup_kind "vmclass" - - power-off-vms-for-nested: - name: Power off VMs for nested clusters - needs: cleanup-nested-clusters - runs-on: ubuntu-latest - steps: - - name: Configure kubectl via azure/k8s-set-context@v4 - uses: azure/k8s-set-context@v4 - with: - method: kubeconfig - context: e2e-cluster-nightly-e2e-virt-sa - kubeconfig: ${{ secrets.VIRT_E2E_NIGHTLY_SA_TOKEN }} - - name: Power off VMs to free resources for nested cluster setup - run: | - set -euo pipefail - - # Constants (nested cluster: 1 master + 3 workers x2) - REQUIRED_MEM_GI=86 - REQUIRED_CPU=26 - MIN_MEM_GI_PER_NODE=12 - MIN_CPU_PER_NODE=4 - MIN_NODES_FOR_PLACEMENT=3 - POWER_OFF_POLL_INTERVAL_SEC=10 - POWER_OFF_WAIT_TIMEOUT_SEC=180 - - # Helpers: Kubernetes quantity -> numeric (portable, no bash 4+) - mem_to_gi() { - local q="$1" q_lower - q_lower=$(echo "$q" | tr '[:upper:]' '[:lower:]') - if [[ "$q_lower" =~ ^([0-9]+\.?[0-9]*)gi?$ ]]; then - echo "${BASH_REMATCH[1]}" - elif [[ "$q_lower" =~ ^([0-9]+\.?[0-9]*)mi?$ ]]; then - echo "scale=4; ${BASH_REMATCH[1]} / 1024" | bc - elif [[ "$q_lower" =~ ^([0-9]+\.?[0-9]*)ki?$ ]]; then - echo "scale=6; ${BASH_REMATCH[1]} / 1024 / 1024" | bc - elif [[ "$q" =~ ^[0-9]+\.?[0-9]*$ ]]; then - echo "scale=6; $q / 1024 / 1024 / 1024" | bc - else - echo "0" - fi - } - - cpu_to_cores() { - local q="${1:-0}" q_lower - q_lower=$(echo "$q" | tr '[:upper:]' '[:lower:]') - if [[ "$q_lower" == *m ]]; then - echo "scale=4; ${q%[mM]} / 1000" | bc - else - echo "${q:-0}" - fi - } - - # Compare two numbers (bc outputs 0 or 1; (( )) treats 0 as false) - float_ge() { (( $(echo "$1 >= $2" | bc) )); } - float_gt() { (( $(echo "$1 > $2" | bc) )); } - float_le() { (( $(echo "$1 <= $2" | bc) )); } - float_lt() { (( $(echo "$1 < $2" | bc) )); } - - # Gather free resources like the scheduler: allocatable - sum(pod requests) per node. - worker_nodes=$(kubectl get nodes -l node-role.kubernetes.io/worker -o jsonpath='{.items[*].metadata.name}') - gather_node_resources() { - local available_mem_gi=0 - local available_cpu=0 - local nodes_meeting_min=0 - local node node_json alloc_mem_gi alloc_cpu pods_json requested_mem_gi requested_cpu - local node_free_mem node_free_cpu node_ok_mem node_ok_cpu - - for node in $worker_nodes; do - [[ -n "$node" ]] || continue - node_json=$(kubectl get node "$node" -o json 2>/dev/null) || true - if [[ -z "$node_json" ]]; then - echo "[WARN] Node $node: could not get node spec, skipping" >&2 - continue - fi - - alloc_mem_gi=$(mem_to_gi "$(echo "$node_json" | jq -r '.status.allocatable.memory // "0"')") - alloc_cpu=$(cpu_to_cores "$(echo "$node_json" | jq -r '.status.allocatable.cpu // "0"')") - - pods_json=$(kubectl get pods -A --field-selector spec.nodeName="$node" -o json 2>/dev/null) || true - requested_mem_gi=0 - requested_cpu=0 - if [[ -n "$pods_json" ]]; then - while read -r qty; do - [[ -z "$qty" ]] && continue - requested_mem_gi=$(echo "$requested_mem_gi + $(mem_to_gi "$qty")" | bc) - done < <(echo "$pods_json" | jq -r ' - .items[] - | select(.status.phase == "Running" or .status.phase == "Pending") - | [(.spec.containers[]? | try .resources.requests.memory catch null), (.spec.initContainers[]? | try .resources.requests.memory catch null)] - | .[] | . // "0" - ') - - while read -r qty; do - [[ -z "$qty" ]] && continue - requested_cpu=$(echo "$requested_cpu + $(cpu_to_cores "$qty")" | bc) - done < <(echo "$pods_json" | jq -r ' - .items[] - | select(.status.phase == "Running" or .status.phase == "Pending") - | [(.spec.containers[]? | try .resources.requests.cpu catch null), (.spec.initContainers[]? | try .resources.requests.cpu catch null)] - | .[] | . // "0" - ') - fi - - node_free_mem=$(echo "x = $alloc_mem_gi - $requested_mem_gi; if (x < 0) 0 else x" | bc 2>/dev/null || echo "0") - node_free_cpu=$(echo "x = $alloc_cpu - $requested_cpu; if (x < 0) 0 else x" | bc 2>/dev/null || echo "0") - - available_mem_gi=$(echo "$available_mem_gi + $node_free_mem" | bc) - available_cpu=$(echo "$available_cpu + $node_free_cpu" | bc) - - node_ok_mem=$(echo "$node_free_mem >= $MIN_MEM_GI_PER_NODE" | bc) - node_ok_cpu=$(echo "$node_free_cpu >= $MIN_CPU_PER_NODE" | bc) - if [[ "$node_ok_mem" -eq 1 && "$node_ok_cpu" -eq 1 ]]; then - nodes_meeting_min=$((nodes_meeting_min + 1)) - else - echo "[INFO] Node $node: does not meet placement min — free ${node_free_mem} Gi RAM, ${node_free_cpu} CPU (required: >= ${MIN_MEM_GI_PER_NODE} Gi, >= ${MIN_CPU_PER_NODE} CPU)" >&2 - fi - done - - printf '%s\t%s\t%s\n' "$available_mem_gi" "$available_cpu" "$nodes_meeting_min" - } - - refresh_resource_state() { - IFS=$'\t' read -r available_mem_gi available_cpu nodes_meeting_min < <(gather_node_resources) - deficit_mem=$(echo "$REQUIRED_MEM_GI - $available_mem_gi" | bc 2>/dev/null || echo "$REQUIRED_MEM_GI") - deficit_cpu=$(echo "$REQUIRED_CPU - $available_cpu" | bc 2>/dev/null || echo "$REQUIRED_CPU") - - total_sufficient=false - if float_le "$deficit_mem" 0 && float_le "$deficit_cpu" 0; then - total_sufficient=true - fi - - placement_sufficient=false - if [[ $nodes_meeting_min -ge $MIN_NODES_FOR_PLACEMENT ]]; then - placement_sufficient=true - fi - } - - refresh_resource_state - echo "[INFO] Workers: free ${available_mem_gi} Gi RAM, ${available_cpu} CPU; nodes with enough free resources for placement: ${nodes_meeting_min} (need at least ${MIN_NODES_FOR_PLACEMENT})" - echo "[INFO] Required: ${REQUIRED_MEM_GI} Gi, ${REQUIRED_CPU} CPU; need >= ${MIN_NODES_FOR_PLACEMENT} nodes with >= ${MIN_MEM_GI_PER_NODE} Gi and >= ${MIN_CPU_PER_NODE} CPU" - echo " " - - if $total_sufficient && $placement_sufficient; then - echo "[INFO] Resources sufficient (total + placement), no VMs to power off" - exit 0 - fi - - if $total_sufficient; then - echo "[INFO] Cluster has enough free memory and cpu." - else - shortage_parts="" - float_gt "$deficit_mem" 0 && shortage_parts="${deficit_mem} Gi RAM" - float_gt "$deficit_cpu" 0 && shortage_parts="${shortage_parts:+$shortage_parts, }${deficit_cpu} CPU" - echo "[INFO] Resources shortage: need to free ${shortage_parts}. Proceed with power off some VMs to free cluster resources." - fi - if $placement_sufficient; then - echo "[INFO] Cluster has enough available nodes." - else - echo "[INFO] Available nodes shortage: only ${nodes_meeting_min} node(s) meet free resources requirement, expect at least ${MIN_NODES_FOR_PLACEMENT} available nodes. Proceed with power off some VMs to free resources." - fi - echo "[Note] Will ignore VMs in 'nightly-e2e-*', 'static-cse' namespaces, and VMs with the 'e2e-cluster/do-not-stop-vm-on-e2e-run' label." - echo "[INFO] Power off candidates sorted by memory (largest first); stop when enough resources are freed." - - # Power off VMs until we have enough (exclude nightly-e2e-*, static-cse, do-not-stop) - get_vms_candidates() { - kubectl get vm -A -o json | jq -r ' - .items[] - | select(.metadata.namespace | test("^nightly-e2e-|static-cse") | not) - | select(.metadata.labels | tostring | test("e2e-cluster/do-not-stop-vm-on-e2e-run") | not) - | select(.status.phase != "Stopped") - | [.metadata.namespace, .metadata.name, (.spec.memory.size // "0"), (.spec.cpu.cores // 0), (.spec.cpu.coreFraction // "100%")] - | @tsv - ' - } - - # Sort by memory descending (largest first) so we free the most with fewer power-offs - sort_by_mem_desc() { - while IFS=$'\t' read -r ns name mem_qty cores core_frac; do - [[ -n "$ns" ]] || continue - mem_gi=$(mem_to_gi "$mem_qty") - printf '%s\t%s\t%s\t%s\t%s\t%s\n' "$mem_gi" "$ns" "$name" "$mem_qty" "$cores" "$core_frac" - done | sort -t$'\t' -k1,1 -rn - } - - vm_cpu_from_cores_and_fraction() { - local cores="$1" core_frac="$2" frac_pct=100 - [[ "$core_frac" =~ ^([0-9]+)%$ ]] && frac_pct="${BASH_REMATCH[1]}" - echo "scale=2; $cores * $frac_pct / 100" | bc - } - - print_power_off_plan() { - local plan_index=0 cumulative_mem=0 cumulative_cpu=0 - local vm_mem_gi ns name mem_qty cores core_frac vm_cpu - - echo "[INFO] Planned power-off order with projected VM-spec resources:" - echo "[INFO] Projection is based on VM spec memory/cpu; actual placement improvement depends on where workloads are running." - - while IFS=$'\t' read -r vm_mem_gi ns name mem_qty cores core_frac; do - [[ -n "$ns" ]] || continue - plan_index=$((plan_index + 1)) - vm_cpu=$(vm_cpu_from_cores_and_fraction "$cores" "$core_frac") - cumulative_mem=$(echo "$cumulative_mem + $vm_mem_gi" | bc) - cumulative_cpu=$(echo "$cumulative_cpu + $vm_cpu" | bc) - echo "[PLAN] ${plan_index}. ${ns}/${name} -> ${vm_mem_gi} Gi RAM, ${vm_cpu} CPU (cumulative: ${cumulative_mem} Gi RAM, ${cumulative_cpu} CPU)" - done < "$1" - - if [[ $plan_index -eq 0 ]]; then - echo "[WARN] No VM candidates available for power off" - fi - } - - count_stopped_requested_vms() { - local requested_vms_file="$1" - local stopped_requested=0 total_requested=0 - local ns name phase - - while IFS=$'\t' read -r ns name; do - [[ -n "$ns" ]] || continue - total_requested=$((total_requested + 1)) - phase=$(kubectl get vm -n "$ns" "$name" -o jsonpath='{.status.phase}' 2>/dev/null || echo "Unknown") - if [[ "$phase" == "Stopped" ]]; then - stopped_requested=$((stopped_requested + 1)) - fi - done < "$requested_vms_file" - - printf '%s\t%s\n' "$stopped_requested" "$total_requested" - } - - # Keep powering off while current cluster state still does not satisfy placement or total resources. - still_need_to_free() { - if ! $placement_sufficient; then return 0; fi - if ! $total_sufficient; then return 0; fi - return 1 - } - - vm_candidates_file=$(mktemp) - requested_vms_file=$(mktemp) - trap 'rm -f "$vm_candidates_file" "$requested_vms_file"' EXIT - get_vms_candidates | sort_by_mem_desc > "$vm_candidates_file" - print_power_off_plan "$vm_candidates_file" - - requested_count=0 - while IFS=$'\t' read -r vm_mem_gi ns name mem_qty cores core_frac; do - [[ -n "$ns" ]] || continue - vm_cpu=$(vm_cpu_from_cores_and_fraction "$cores" "$core_frac") - - echo "[INFO] Request power off for vm $ns/$name (${vm_mem_gi} Gi, ${vm_cpu} CPU)" - if ! kubectl patch vm -n "$ns" "$name" --type=merge -p '{"spec":{"runPolicy":"AlwaysOff"}}'; then - echo "[WARN] Failed to power off vm $ns/$name, skip it and continue with next candidate" - continue - fi - printf '%s\t%s\n' "$ns" "$name" >> "$requested_vms_file" - requested_count=$((requested_count + 1)) - done < "$vm_candidates_file" - - if [[ $requested_count -eq 0 ]]; then - echo "[ERROR] No running VM candidates available for power off, but resources are still insufficient." - echo "[ERROR] Human intervention is required." - rm -f "$vm_candidates_file" "$requested_vms_file" - trap - EXIT - exit 1 - fi - - echo "[INFO] Requested power off for ${requested_count} VM(s). Waiting up to ${POWER_OFF_WAIT_TIMEOUT_SEC}s and checking cluster resources every ${POWER_OFF_POLL_INTERVAL_SEC}s." - - wait_elapsed=0 - prev_nodes_meeting_min="$nodes_meeting_min" - while true; do - refresh_resource_state - IFS=$'\t' read -r stopped_requested total_requested < <(count_stopped_requested_vms "$requested_vms_file") - echo "[INFO] Current workers free: ${available_mem_gi} Gi RAM, ${available_cpu} CPU; nodes with enough free resources for placement: ${nodes_meeting_min}" - echo "[INFO] Requested VMs stopped: ${stopped_requested}/${total_requested}; waited ${wait_elapsed}s/${POWER_OFF_WAIT_TIMEOUT_SEC}s" - if [[ $prev_nodes_meeting_min -lt $MIN_NODES_FOR_PLACEMENT && $nodes_meeting_min -ge $MIN_NODES_FOR_PLACEMENT ]]; then - echo "[INFO] Placement now sufficient: ${nodes_meeting_min} nodes with >= ${MIN_MEM_GI_PER_NODE} Gi and >= ${MIN_CPU_PER_NODE} CPU" - fi - prev_nodes_meeting_min="$nodes_meeting_min" - - if ! still_need_to_free; then - break - fi - - if [[ $total_requested -gt 0 && $stopped_requested -eq $total_requested ]]; then - echo "[INFO] All requested VMs are already stopped; no need to wait further." - break - fi - - if [[ $wait_elapsed -ge $POWER_OFF_WAIT_TIMEOUT_SEC ]]; then - break - fi - - sleep "$POWER_OFF_POLL_INTERVAL_SEC" - wait_elapsed=$((wait_elapsed + POWER_OFF_POLL_INTERVAL_SEC)) - done - - rm -f "$vm_candidates_file" "$requested_vms_file" - trap - EXIT - - echo "[INFO] Final workers free: ${available_mem_gi} Gi RAM, ${available_cpu} CPU; nodes with enough free resources for placement: ${nodes_meeting_min}" - - if still_need_to_free; then - echo "[ERROR] Stopping VMs did not free enough resources. Human intervention is required." - exit 1 - fi - set-vars: - name: Set vars - needs: power-off-vms-for-nested - runs-on: ubuntu-latest - outputs: - date_start: ${{ steps.vars.outputs.date-start }} - randuuid4c: ${{ steps.vars.outputs.randuuid4c }} - steps: - - name: Set vars - id: vars - run: | - echo "date-start=$(date +%Y%m%d-%H%M%S)" >> $GITHUB_OUTPUT - echo "randuuid4c=$(openssl rand -hex 2)" >> $GITHUB_OUTPUT - - e2e-replicated: - name: E2E Pipeline (Replicated) - needs: - - set-vars - uses: ./.github/workflows/e2e-reusable-pipeline.yml - with: - storage_type: replicated - pipeline_job_name: "E2E Pipeline (Replicated)" - nested_storageclass_name: nested-thin-r1 - nested_cluster_network_name: cn-4006-for-e2e-test - branch: main - virtualization_tag: main - deckhouse_channel: alpha - default_user: cloud - go_version: "1.25.10" - e2e_timeout: "3.5h" - date_start: ${{ needs.set-vars.outputs.date_start }} - randuuid4c: ${{ needs.set-vars.outputs.randuuid4c }} - cluster_config_workers_memory: "9Gi" - cluster_config_k8s_version: "1.34" - apt_mirror_enabled: true - secrets: - DEV_REGISTRY_DOCKER_CFG: ${{ secrets.DEV_REGISTRY_DOCKER_CFG }} - VIRT_E2E_NIGHTLY_SA_TOKEN: ${{ secrets.VIRT_E2E_NIGHTLY_SA_TOKEN }} - PROD_IO_REGISTRY_DOCKER_CFG: ${{ secrets.PROD_IO_REGISTRY_DOCKER_CFG }} - BOOTSTRAP_DEV_PROXY: ${{ secrets.BOOTSTRAP_DEV_PROXY }} - E2E_ARTIFACTS_GPG_PASSPHRASE: ${{ secrets.E2E_ARTIFACTS_GPG_PASSPHRASE }} - - e2e-nfs: - name: E2E Pipeline (NFS) - needs: - - set-vars - uses: ./.github/workflows/e2e-reusable-pipeline.yml - with: - storage_type: nfs - pipeline_job_name: "E2E Pipeline (NFS)" - nested_storageclass_name: nfs - nested_cluster_network_name: cn-4006-for-e2e-test - branch: main - virtualization_tag: main - deckhouse_channel: alpha - default_user: cloud - go_version: "1.24.13" - e2e_timeout: "3.5h" - date_start: ${{ needs.set-vars.outputs.date_start }} - randuuid4c: ${{ needs.set-vars.outputs.randuuid4c }} - cluster_config_workers_memory: "9Gi" - cluster_config_k8s_version: "Automatic" - apt_mirror_enabled: true - secrets: - DEV_REGISTRY_DOCKER_CFG: ${{ secrets.DEV_REGISTRY_DOCKER_CFG }} - VIRT_E2E_NIGHTLY_SA_TOKEN: ${{ secrets.VIRT_E2E_NIGHTLY_SA_TOKEN }} - PROD_IO_REGISTRY_DOCKER_CFG: ${{ secrets.PROD_IO_REGISTRY_DOCKER_CFG }} - BOOTSTRAP_DEV_PROXY: ${{ secrets.BOOTSTRAP_DEV_PROXY }} - E2E_ARTIFACTS_GPG_PASSPHRASE: ${{ secrets.E2E_ARTIFACTS_GPG_PASSPHRASE }} - - report-to-channel: - runs-on: ubuntu-latest - name: End-to-End tests report - needs: - - e2e-replicated - - e2e-nfs - if: ${{ always()}} - steps: - - uses: actions/checkout@v4 - - - name: Download E2E report artifacts - uses: actions/download-artifact@v8 - continue-on-error: true - id: download-artifacts-pattern - with: - pattern: "e2e-report-*" - path: downloaded-artifacts/ - merge-multiple: false - - - name: Set up Python for chart rendering - uses: actions/setup-python@v5 - with: - python-version: "3.12" - cache: pip - cache-dependency-path: .github/scripts/python/requirements.txt - - - name: Install Python chart deps - run: python -m pip install -r .github/scripts/python/requirements.txt - - - name: Generate messenger chart files - run: >- - python .github/scripts/python/e2e_report/charts.py messenger-all - --reports-dir downloaded-artifacts - --out-dir tmp/messenger-charts - --manifest tmp/messenger-charts/manifest.json - - - name: Send results to channel - id: render-report - uses: actions/github-script@v7 - env: - EXPECTED_STORAGE_TYPES: '["replicated","nfs"]' - LOOP_API_BASE_URL: ${{ secrets.LOOP_API_BASE_URL }} - LOOP_CHANNEL_ID: ${{ secrets.LOOP_CHANNEL_ID }} - LOOP_TOKEN: ${{ secrets.LOOP_TOKEN }} - with: - script: | - const renderMessengerReport = require('./.github/scripts/js/e2e/report/messenger-report'); - await renderMessengerReport({core}); - - - name: Render top-5 slowest Describes per cluster - run: >- - python .github/scripts/python/e2e_report/charts.py top - --reports-dir downloaded-artifacts - --out-dir tmp/charts - --top-n 5 - - - name: Upload top-5 slowest Describe charts - uses: actions/upload-artifact@v4 - with: - name: e2e-report-slowest-by-describe - path: tmp/charts/ - if-no-files-found: warn diff --git a/.github/workflows/e2e-nightly-reusable-pipeline.yml b/.github/workflows/e2e-nightly-reusable-pipeline.yml new file mode 100644 index 0000000000..bc2e7a307f --- /dev/null +++ b/.github/workflows/e2e-nightly-reusable-pipeline.yml @@ -0,0 +1,702 @@ +# Copyright 2025 Flant JSC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +name: E2E Nightly Pipeline (Reusable) + +on: + workflow_call: + inputs: + date_start: + required: true + type: string + description: "Date start" + randuuid4c: + required: true + type: string + description: "Random UUID first 4 chars" + cluster_config_k8s_version: + required: false + type: string + default: "Automatic" + description: "Set k8s version for cluster config, like 1.34, 1.36 (without patch version)" + cluster_config_workers_memory: + required: false + type: string + default: "8Gi" + description: "Set memory for workers node in cluster config" + cluster_config_additional_disk_size: + required: false + type: string + default: "50Gi" + description: "Set additional disk size for workers node in cluster config" + storage_type: + required: true + type: string + description: "Storage type (ceph or replicated or etc.)" + nested_storageclass_name: + required: true + type: string + description: "Nested storage class name" + branch: + required: false + type: string + default: "main" + description: "Branch to use" + virtualization_tag: + required: false + type: string + default: "main" + description: "Virtualization tag" + virtualization_image_url: + required: false + type: string + default: "https://cloud-images.ubuntu.com/noble/current/noble-server-cloudimg-amd64.img" + description: "Virtualization image url (default noble-server-cloudimg-amd64.img)" + deckhouse_channel: + required: false + type: string + default: "alpha" + description: "Deckhouse release channel" + pod_subnet_cidr: + required: false + type: string + default: "10.88.0.0/16" + description: "Pod subnet CIDR" + service_subnet_cidr: + required: false + type: string + default: "10.99.0.0/16" + description: "Service subnet CIDR" + default_user: + required: false + type: string + default: "ubuntu" + description: "Default user for vms" + go_version: + required: false + type: string + default: "1.24.6" + description: "Go version" + e2e_timeout: + required: false + type: string + default: "3h" + description: "E2E tests timeout" + e2e_focus_tests: + required: false + type: string + default: "" + description: "E2E tests focus tests like 'VirtualMachineConfiguration' and so on (by default all tests are run)" + nested_cluster_network_name: + required: false + type: string + default: "cn-4006-for-e2e-test" + description: "ClusterNetwork name for nested VM additional network interface" + apt_mirror_enabled: + required: false + type: boolean + default: true + description: "Use custom APT mirror inside VMs cloud-init (Ubuntu). When false, VMs use stock Ubuntu repositories." + apt_mirror_name: + required: false + type: string + default: "hetzner" + description: "APT mirror short name (used as /etc/apt/sources.list.d/.list file name)" + apt_mirror_url: + required: false + type: string + default: "https://mirror.hetzner.com/ubuntu/packages" + description: "APT mirror base URL (without trailing slash)" + pipeline_job_name: + required: false + type: string + default: "" + description: "Display name of the calling pipeline job in the parent workflow (e.g. 'E2E Pipeline (Replicated)'). Used to resolve per-stage job URLs in the report." + secrets: + DEV_REGISTRY_DOCKER_CFG: + required: true + VIRT_E2E_NIGHTLY_SA_TOKEN: + required: true + PROD_IO_REGISTRY_DOCKER_CFG: + required: true + BOOTSTRAP_DEV_PROXY: + required: true + E2E_ARTIFACTS_GPG_PASSPHRASE: + required: true + outputs: + artifact-name: + description: "Name of the uploaded artifact with E2E report" + value: ${{ jobs.prepare-report.outputs.artifact-name }} + report_kind: + description: "E2E report kind for the cluster" + value: ${{ jobs.prepare-report.outputs.report_kind }} + status: + description: "E2E report status for the cluster" + value: ${{ jobs.prepare-report.outputs.status }} + failed_stage: + description: "Failed or final stage name for the cluster" + value: ${{ jobs.prepare-report.outputs.failed_stage }} + failed_stage_label: + description: "Human-readable failed or final stage label for the cluster" + value: ${{ jobs.prepare-report.outputs.failed_stage_label }} + workflow_run_url: + description: "Workflow run URL for the cluster pipeline" + value: ${{ jobs.prepare-report.outputs.workflow_run_url }} + branch: + description: "Branch used for the cluster pipeline" + value: ${{ jobs.prepare-report.outputs.branch }} + +env: + BRANCH: ${{ inputs.branch }} + VIRTUALIZATION_TAG: ${{ inputs.virtualization_tag }} + DECKHOUSE_CHANNEL: ${{ inputs.deckhouse_channel }} + DEFAULT_USER: ${{ inputs.default_user }} + GO_VERSION: ${{ inputs.go_version }} + SETUP_CLUSTER_TYPE_PATH: test/dvp-static-cluster + E2E_SCRIPT_DIR: ${{ github.workspace }}/.github/scripts/bash/e2e + K8S_VERSION: ${{ inputs.cluster_config_k8s_version }} + STORAGE_TYPE: ${{ inputs.storage_type }} + E2E_START_TIME: ${{ inputs.date_start }} + +defaults: + run: + shell: bash + +jobs: + bootstrap: + name: Bootstrap cluster + runs-on: ubuntu-latest + concurrency: + group: "${{ github.workflow }}-${{ github.event.number || github.ref }}-${{ inputs.storage_type }}" + cancel-in-progress: true + outputs: + kubeconfig: ${{ steps.generate-kubeconfig.outputs.kubeconfig }} + namespace: ${{ steps.vars.outputs.namespace }} + steps: + - uses: actions/checkout@v6 + + - name: Set outputs + env: + RANDUUID4C: ${{ inputs.randuuid4c }} + STORAGE_TYPE: ${{ inputs.storage_type }} + id: vars + run: | + GIT_SHORT_HASH=$(git rev-parse --short HEAD) + + namespace="nightly-e2e-$STORAGE_TYPE-$GIT_SHORT_HASH-$RANDUUID4C" + + echo "namespace=$namespace" >> "$GITHUB_OUTPUT" + echo "sha_short=$GIT_SHORT_HASH" >> "$GITHUB_OUTPUT" + + - name: Setup E2E toolchain + uses: ./.github/actions/setup-e2e-toolchain + with: + checkout: "false" + install-kubectl: "false" + install-htpasswd: "true" + github-token: ${{ secrets.GITHUB_TOKEN }} + + - name: Log in to private registry + id: registry-login + uses: ./.github/actions/registry-login + with: + docker_cfg: ${{ secrets.PROD_IO_REGISTRY_DOCKER_CFG }} + + - name: Configure kubectl via azure/k8s-set-context@v4 + uses: azure/k8s-set-context@v4 + with: + method: kubeconfig + context: e2e-cluster-nightly-e2e-virt-sa + kubeconfig: ${{ secrets.VIRT_E2E_NIGHTLY_SA_TOKEN }} + + - name: Generate values.yaml + working-directory: ${{ env.SETUP_CLUSTER_TYPE_PATH }} + env: + NAMESPACE: ${{ steps.vars.outputs.namespace }} + STORAGE_TYPE: ${{ inputs.storage_type }} + DECKHOUSE_CHANNEL: ${{ env.DECKHOUSE_CHANNEL }} + POD_SUBNET_CIDR: ${{ inputs.pod_subnet_cidr }} + SERVICE_SUBNET_CIDR: ${{ inputs.service_subnet_cidr }} + K8S_VERSION: ${{ env.K8S_VERSION }} + PROD_IO_REGISTRY_DOCKER_CFG: ${{ secrets.PROD_IO_REGISTRY_DOCKER_CFG }} + DEV_REGISTRY_DOCKER_CFG: ${{ secrets.DEV_REGISTRY_DOCKER_CFG }} + VIRTUALIZATION_IMAGE_URL: ${{ inputs.virtualization_image_url }} + DEFAULT_USER: ${{ env.DEFAULT_USER }} + APT_MIRROR_ENABLED: ${{ inputs.apt_mirror_enabled }} + APT_MIRROR_NAME: ${{ inputs.apt_mirror_name }} + APT_MIRROR_URL: ${{ inputs.apt_mirror_url }} + CLUSTER_CONFIG_WORKERS_MEMORY: ${{ inputs.cluster_config_workers_memory }} + ADDITIONAL_DISK_SIZE: ${{ inputs.cluster_config_additional_disk_size }} + ENABLED_MODULES: "" + NESTED_CLUSTER_NETWORK_NAME: ${{ inputs.nested_cluster_network_name }} + run: bash "${E2E_SCRIPT_DIR}/render-dvp-static-values.sh" + + - name: Bootstrap cluster [infra-deploy] + working-directory: ${{ env.SETUP_CLUSTER_TYPE_PATH }} + run: | + task infra-deploy + + - name: Bootstrap cluster [dhctl-bootstrap] + id: dhctl-bootstrap + working-directory: ${{ env.SETUP_CLUSTER_TYPE_PATH }} + run: | + if [[ $(yq eval '.deckhouse.proxyEnabled' values.yaml) == true ]]; then + export HTTP_PROXY="${{ secrets.BOOTSTRAP_DEV_PROXY }}" + export HTTPS_PROXY="${{ secrets.BOOTSTRAP_DEV_PROXY }}" + echo "Proxy settings - configured" + fi + + task dhctl-bootstrap + echo "[SUCCESS] Done" + timeout-minutes: 60 + + - name: Bootstrap cluster [show-connection-info] + working-directory: ${{ env.SETUP_CLUSTER_TYPE_PATH }} + run: | + task show-connection-info + + - name: Save ssh to secrets in cluster + env: + NAMESPACE: ${{ steps.vars.outputs.namespace }} + if: always() && steps.dhctl-bootstrap.outcome == 'success' + run: | + kubectl -n "$NAMESPACE" create secret generic ssh-key --from-file="${{ env.SETUP_CLUSTER_TYPE_PATH }}/tmp/ssh/cloud" + + - name: Get info about nested cluster and master VM + working-directory: ${{ env.SETUP_CLUSTER_TYPE_PATH }} + env: + NAMESPACE: ${{ steps.vars.outputs.namespace }} + STORAGE_TYPE: ${{ inputs.storage_type }} + DEFAULT_USER: ${{ env.DEFAULT_USER }} + run: | + bash "${E2E_SCRIPT_DIR}/show-nested-cluster-info.sh" \ + "${NAMESPACE}" \ + "${STORAGE_TYPE}" \ + "${DEFAULT_USER}" + + - name: Generate nested kubeconfig + id: generate-kubeconfig + working-directory: ${{ env.SETUP_CLUSTER_TYPE_PATH }} + env: + KUBE_CONFIG_PATH: tmp/kube.config + NAMESPACE: ${{ steps.vars.outputs.namespace }} + STORAGE_TYPE: ${{ inputs.storage_type }} + DEFAULT_USER: ${{ env.DEFAULT_USER }} + run: | + bash "${E2E_SCRIPT_DIR}/gen-nested-kubeconfig.sh" \ + "${KUBE_CONFIG_PATH}" \ + "${NAMESPACE}" \ + "${STORAGE_TYPE}" \ + "${DEFAULT_USER}" \ + "$GITHUB_OUTPUT" + + - name: cloud-init logs + if: steps.dhctl-bootstrap.outcome == 'failure' + env: + NAMESPACE: ${{ steps.vars.outputs.namespace }} + STORAGE_TYPE: ${{ inputs.storage_type }} + DEFAULT_USER: ${{ env.DEFAULT_USER }} + SETUP_CLUSTER_TYPE_PATH: ${{ env.SETUP_CLUSTER_TYPE_PATH }} + run: | + bash "${E2E_SCRIPT_DIR}/collect-cloud-init-logs.sh" \ + "${NAMESPACE}" \ + "${STORAGE_TYPE}" \ + "${DEFAULT_USER}" \ + "${SETUP_CLUSTER_TYPE_PATH}" + + - name: Prepare artifact + if: success() || failure() + env: + SETUP_CLUSTER_TYPE_PATH: ${{ env.SETUP_CLUSTER_TYPE_PATH }} + KUBECONFIG_B64: ${{ steps.generate-kubeconfig.outputs.kubeconfig }} + run: | + bash "${E2E_SCRIPT_DIR}/prepare-artifact.sh" \ + "${SETUP_CLUSTER_TYPE_PATH}" \ + "${KUBECONFIG_B64}" + + - name: Encrypt and upload generated files artifact + if: success() || failure() + uses: ./.github/actions/gpg-encrypt-and-upload + with: + path: tmp values.yaml + working-directory: ${{ env.SETUP_CLUSTER_TYPE_PATH }} + passphrase: ${{ secrets.E2E_ARTIFACTS_GPG_PASSPHRASE }} + artifact_name: ${{ env.STORAGE_TYPE }}-generated-files-${{ env.E2E_START_TIME }} + retention-days: 3 + + - name: Encrypt and upload ssh config artifact + if: always() + uses: ./.github/actions/gpg-encrypt-and-upload + with: + path: ssh + working-directory: ${{ env.SETUP_CLUSTER_TYPE_PATH }}/tmp + passphrase: ${{ secrets.E2E_ARTIFACTS_GPG_PASSPHRASE }} + artifact_name: ${{ env.STORAGE_TYPE }}-generated-files-ssh-${{ env.E2E_START_TIME }} + retention-days: 3 + + - name: Encrypt and upload kubeconfig artifact + if: always() + uses: ./.github/actions/gpg-encrypt-and-upload + with: + path: ${{ env.SETUP_CLUSTER_TYPE_PATH }}/kube-config + archive: "false" + passphrase: ${{ secrets.E2E_ARTIFACTS_GPG_PASSPHRASE }} + artifact_name: ${{ env.STORAGE_TYPE }}-generated-files-kubeconfig-${{ env.E2E_START_TIME }} + retention-days: 3 + + - name: Add encrypted artifacts help to job summary + if: always() + uses: ./.github/actions/append-encrypted-artifacts-help + + configure-sdn: + name: Configure SDN + runs-on: ubuntu-latest + needs: bootstrap + steps: + - uses: actions/checkout@v6 + + - name: Setup E2E toolchain + uses: ./.github/actions/setup-e2e-toolchain + with: + checkout: "false" + github-token: ${{ secrets.GITHUB_TOKEN }} + + - name: Check nested kube-api via generated kubeconfig + uses: ./.github/actions/use-nested-kubeconfig + with: + kubeconfig: ${{ needs.bootstrap.outputs.kubeconfig }} + - name: Enable SDN + run: | + # Keep SDN enable and readiness checks in a script because this step needs + # several retries around Deckhouse webhooks and detailed SDN diagnostics. + bash "${E2E_SCRIPT_DIR}/enable-sdn.sh" + + - name: Wait for nodenetworkinterfaces to be ready + run: bash "${E2E_SCRIPT_DIR}/wait-nodenetworkinterfaces.sh" + + - name: Configure ClusterNetwork + run: | + extraNic=$(kubectl get nodenetworkinterfaces -l network.deckhouse.io/interface-type=NIC -o json | jq -r '.items[] | select(.status.operationalState == "Up") | select(.status.ifName != "eno1" and .status.ifName != "enp1s0") | .metadata.name') + + for nic in $extraNic; do + echo "[INFO] Label nodenetworkinterface $nic nic-group=extra" + kubectl label nodenetworkinterfaces "$nic" nic-group=extra + done + + kubectl get nodenetworkinterface -l nic-group=extra + + # Use a retry wrapper because ClusterNetwork admission can race with the + # SDN webhook endpoint right after the module becomes Ready. + cat <<'EOF' | bash "${E2E_SCRIPT_DIR}/apply-clusternetworks.sh" + --- + apiVersion: network.deckhouse.io/v1alpha1 + kind: ClusterNetwork + metadata: + name: cn-4006-for-e2e-test + spec: + parentNodeNetworkInterfaces: + labelSelector: + matchLabels: + nic-group: extra + type: Access + --- + apiVersion: network.deckhouse.io/v1alpha1 + kind: ClusterNetwork + metadata: + name: cn-4007-for-e2e-test + spec: + parentNodeNetworkInterfaces: + labelSelector: + matchLabels: + nic-group: extra + type: VLAN + vlan: + id: 4007 + EOF + + echo "[INFO] Wait for ClusterNetwork cn-4006-for-e2e-test to be ready" + kubectl wait clusternetworks.network.deckhouse.io --for=condition=Ready cn-4006-for-e2e-test --timeout=120s + + echo "[INFO] Wait for ClusterNetwork cn-4007-for-e2e-test to be ready" + kubectl wait clusternetworks.network.deckhouse.io --for=condition=Ready cn-4007-for-e2e-test --timeout=120s + + configure-storage: + name: Configure storage + runs-on: ubuntu-latest + needs: + - configure-sdn + - bootstrap + steps: + - uses: actions/checkout@v6 + + - name: Setup E2E toolchain + uses: ./.github/actions/setup-e2e-toolchain + with: + checkout: "false" + github-token: ${{ secrets.GITHUB_TOKEN }} + + - name: Check nested kube-api via generated kubeconfig + uses: ./.github/actions/use-nested-kubeconfig + with: + kubeconfig: ${{ needs.bootstrap.outputs.kubeconfig }} + - name: Configure replicated storage + id: storage-replicated-setup + if: ${{ inputs.storage_type == 'replicated' }} + working-directory: ${{ env.SETUP_CLUSTER_TYPE_PATH }}/storage/sds-replicated + run: bash "${E2E_SCRIPT_DIR}/configure-sds-replicated.sh" + + - name: Configure NFS storage + if: ${{ inputs.storage_type == 'nfs' }} + id: storage-nfs-setup + working-directory: ${{ env.SETUP_CLUSTER_TYPE_PATH }}/storage/nfs + env: + NAMESPACE: ${{ needs.bootstrap.outputs.namespace }} + run: bash "${E2E_SCRIPT_DIR}/configure-csi-nfs.sh" + + configure-virtualization: + name: Configure Virtualization + runs-on: ubuntu-latest + needs: + - bootstrap + - configure-storage + steps: + - uses: actions/checkout@v6 + + - name: Setup E2E toolchain + uses: ./.github/actions/setup-e2e-toolchain + with: + checkout: "false" + github-token: ${{ secrets.GITHUB_TOKEN }} + + - name: Setup kubeconfig + uses: ./.github/actions/use-nested-kubeconfig + with: + kubeconfig: ${{ needs.bootstrap.outputs.kubeconfig }} + check-api: "false" + - name: Configure Virtualization + env: + DEV_REGISTRY_DOCKER_CFG: ${{ secrets.DEV_REGISTRY_DOCKER_CFG }} + NESTED_STORAGE_CLASS_NAME: ${{ inputs.nested_storageclass_name }} + VIRTUALIZATION_TAG: ${{ env.VIRTUALIZATION_TAG }} + run: | + # Keep virtualization configuration in a script because it waits for + # Deckhouse queue/source propagation before applying ModuleConfig. + bash "${E2E_SCRIPT_DIR}/configure-virtualization.sh" + - name: Wait for Virtualization to be ready + env: + STORAGE_TYPE: ${{ inputs.storage_type }} + run: | + source "${E2E_SCRIPT_DIR}/wait-virtualization-ready.sh" + + echo " " + echo "[INFO] Waiting for Virtualization module to be ready" + d8_queue + + virtualization_ready + + echo "[INFO] Checking Virtualization module deployments" + kubectl -n d8-virtualization wait --for=condition=Available deploy --all --timeout 900s + echo "[INFO] Checking virt-handler pods " + virt_handler_ready + + enable_maintenance_mode "${STORAGE_TYPE}" + + e2e-test: + name: E2E test + runs-on: ubuntu-latest + needs: + - bootstrap + - configure-storage + - configure-virtualization + steps: + - uses: actions/checkout@v6 + + - name: Set up Go ${{ env.GO_VERSION }} + uses: actions/setup-go@v5 + with: + go-version: "${{ env.GO_VERSION }}" + + - name: Setup E2E toolchain + uses: ./.github/actions/setup-e2e-toolchain + with: + checkout: "false" + github-token: ${{ secrets.GITHUB_TOKEN }} + + - name: Install ginkgo + working-directory: ./test/e2e/ + run: | + echo "Install ginkgo" + go install tool + + - name: Setup kubeconfig + uses: ./.github/actions/use-nested-kubeconfig + with: + kubeconfig: ${{ needs.bootstrap.outputs.kubeconfig }} + check-api: "false" + - name: Download dependencies + working-directory: ./test/e2e/ + run: | + echo "Download dependencies" + go mod download + + - name: Detect Kubernetes version for E2E + id: detect-k8s-version + run: bash "${E2E_SCRIPT_DIR}/detect-k8s-version.sh" "$GITHUB_OUTPUT" + + - name: Create vmclass for e2e tests + run: bash "${E2E_SCRIPT_DIR}/create-e2e-vmclass.sh" + + - name: Run E2E + id: e2e-report + env: + TIMEOUT: ${{ inputs.e2e_timeout }} + CSI: ${{ inputs.storage_type }} + STORAGE_CLASS_NAME: ${{ inputs.nested_storageclass_name }} + LABELS: ${{ steps.detect-k8s-version.outputs.label-filter }} + SERVER_K8S_VERSION: ${{ steps.detect-k8s-version.outputs.server-version }} + USB_SUPPORTED: ${{ steps.detect-k8s-version.outputs.usb-supported }} + FOCUS: ${{ inputs.e2e_focus_tests }} + working-directory: ./test/e2e/ + run: task run:ci + + - name: Upload summary test results (json) + uses: actions/upload-artifact@v7 + id: e2e-report-artifact + if: always() && steps.e2e-report.outcome != 'skipped' + with: + name: e2e-test-results-${{ inputs.storage_type }}-${{ github.run_id }}-${{ inputs.date_start }} + path: | + test/e2e/e2e_report_*.json + test/e2e/e2e_output_*.log + if-no-files-found: ignore + overwrite: true + retention-days: 3 + + - name: Upload resources from failed tests + uses: actions/upload-artifact@v7 + if: always() && steps.e2e-report.outcome != 'skipped' + with: + name: DUMP-${{ env.STORAGE_TYPE }}-${{ env.E2E_START_TIME }} + path: ${{ runner.temp }}/e2e_failed/ + if-no-files-found: ignore + retention-days: 3 + + prepare-report: + name: Prepare E2E report + runs-on: ubuntu-latest + needs: + - bootstrap + - configure-sdn + - configure-storage + - configure-virtualization + - e2e-test + if: always() + outputs: + artifact-name: ${{ steps.set-artifact-name.outputs.artifact-name }} + report_kind: ${{ steps.determine-stage.outputs.report_kind }} + status: ${{ steps.determine-stage.outputs.status }} + failed_stage: ${{ steps.determine-stage.outputs.failed_stage }} + failed_stage_label: ${{ steps.determine-stage.outputs.failed_stage_label }} + workflow_run_url: ${{ steps.determine-stage.outputs.workflow_run_url }} + branch: ${{ steps.determine-stage.outputs.branch }} + steps: + - uses: actions/checkout@v6 + + - name: Download E2E test results if available + uses: actions/download-artifact@v8 + continue-on-error: true + with: + name: e2e-test-results-${{ inputs.storage_type }}-${{ github.run_id }}-${{ inputs.date_start }} + path: test/e2e/ + + - name: Determine failed stage and prepare report + id: determine-stage + uses: actions/github-script@v7 + env: + STORAGE_TYPE: ${{ inputs.storage_type }} + PIPELINE_JOB_NAME: ${{ inputs.pipeline_job_name }} + NEEDS_CONTEXT: ${{ toJSON(needs) }} + with: + script: | + const buildClusterReport = require('./.github/scripts/js/e2e/report/cluster-report'); + await buildClusterReport({ + core, + context, + github, + }); + + - name: Upload E2E report artifact + id: upload-artifact + uses: actions/upload-artifact@v7 + with: + name: e2e-report-${{ inputs.storage_type }}-${{ github.run_id }}-${{ inputs.date_start }} + path: ${{ steps.determine-stage.outputs.report_file }} + overwrite: true + retention-days: 3 + + - name: Set artifact name output + id: set-artifact-name + run: | + ARTIFACT_NAME="e2e-report-${{ inputs.storage_type }}-${{ github.run_id }}-${{ inputs.date_start }}" + echo "artifact-name=$ARTIFACT_NAME" >> "$GITHUB_OUTPUT" + echo "[INFO] Artifact name: $ARTIFACT_NAME" + + undeploy-cluster: + name: Undeploy cluster + runs-on: ubuntu-latest + needs: + - bootstrap + - configure-sdn + - configure-storage + - configure-virtualization + - e2e-test + if: cancelled() || success() + steps: + - uses: actions/checkout@v6 + + - name: Setup E2E toolchain + uses: ./.github/actions/setup-e2e-toolchain + with: + checkout: "false" + github-token: ${{ secrets.GITHUB_TOKEN }} + + - name: Download artifacts + uses: actions/download-artifact@v8 + with: + name: ${{ env.STORAGE_TYPE }}-generated-files-${{ env.E2E_START_TIME }}.zip.gpg + path: ${{ runner.temp }}/encrypted-generated-files + + - name: Decrypt generated files artifact + env: + GPG_PASSPHRASE: ${{ secrets.E2E_ARTIFACTS_GPG_PASSPHRASE }} + ARTIFACT_NAME: ${{ env.STORAGE_TYPE }}-generated-files-${{ env.E2E_START_TIME }} + run: | + artifact_path="${{ runner.temp }}/encrypted-generated-files/${ARTIFACT_NAME}.zip.gpg" + gpg --decrypt --batch --yes --pinentry-mode loopback \ + --passphrase "$GPG_PASSPHRASE" \ + --output "${RUNNER_TEMP}/${ARTIFACT_NAME}.zip" \ + "$artifact_path" + unzip -o "${RUNNER_TEMP}/${ARTIFACT_NAME}.zip" -d "${{ env.SETUP_CLUSTER_TYPE_PATH }}" + + - name: Configure kubectl via azure/k8s-set-context@v4 + uses: azure/k8s-set-context@v4 + with: + method: kubeconfig + context: e2e-cluster-nightly-e2e-virt-sa + kubeconfig: ${{ secrets.VIRT_E2E_NIGHTLY_SA_TOKEN }} + + - name: infra-undeploy + working-directory: ${{ env.SETUP_CLUSTER_TYPE_PATH }} + run: | + task infra-undeploy diff --git a/.github/workflows/e2e-nightly.yml b/.github/workflows/e2e-nightly.yml new file mode 100644 index 0000000000..e79fc34175 --- /dev/null +++ b/.github/workflows/e2e-nightly.yml @@ -0,0 +1,201 @@ +# Copyright 2025 Flant JSC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +name: E2E Nightly + +on: + workflow_dispatch: + +concurrency: + group: "${{ github.workflow }}-${{ github.event.number || github.ref }}" + cancel-in-progress: true + +defaults: + run: + shell: bash + +jobs: + cleanup-nested-clusters: + name: Cleanup nested clusters + runs-on: ubuntu-latest + steps: + - name: Checkout code + uses: actions/checkout@v6 + + - name: Configure kubectl via azure/k8s-set-context@v4 + uses: azure/k8s-set-context@v4 + with: + method: kubeconfig + context: e2e-cluster-nightly-e2e-virt-sa + kubeconfig: ${{ secrets.VIRT_E2E_NIGHTLY_SA_TOKEN }} + + - name: Delete nested clusters + env: + LABEL_SELECTOR: test=nightly-e2e + KEEP_HOURS: "47" + FRIDAY_KEEP_HOURS: "71" + run: bash .github/scripts/bash/e2e/cleanup-nightly-resources.sh + + power-off-vms-for-nested: + name: Power off VMs for nested clusters + needs: cleanup-nested-clusters + runs-on: ubuntu-latest + steps: + - name: Checkout code + uses: actions/checkout@v6 + + - name: Configure kubectl via azure/k8s-set-context@v4 + uses: azure/k8s-set-context@v4 + with: + method: kubeconfig + context: e2e-cluster-nightly-e2e-virt-sa + kubeconfig: ${{ secrets.VIRT_E2E_NIGHTLY_SA_TOKEN }} + + - name: Power off VMs to free resources for nested cluster setup + run: .github/scripts/bash/e2e/power-off-nested-vms.sh + + set-vars: + name: Set vars + needs: power-off-vms-for-nested + runs-on: ubuntu-latest + outputs: + date_start: ${{ steps.vars.outputs.date_start }} + randuuid4c: ${{ steps.vars.outputs.randuuid4c }} + steps: + - uses: actions/checkout@v6 + + - name: Set vars + id: vars + uses: ./.github/actions/gen-run-id + + e2e-replicated: + name: E2E Pipeline (Replicated) + needs: + - set-vars + uses: ./.github/workflows/e2e-nightly-reusable-pipeline.yml + with: + storage_type: replicated + pipeline_job_name: "E2E Pipeline (Replicated)" + nested_storageclass_name: nested-thin-r1 + nested_cluster_network_name: cn-4006-for-e2e-test + branch: main + virtualization_tag: main + deckhouse_channel: alpha + default_user: cloud + go_version: "1.25.10" + e2e_timeout: "3.5h" + date_start: ${{ needs.set-vars.outputs.date_start }} + randuuid4c: ${{ needs.set-vars.outputs.randuuid4c }} + cluster_config_workers_memory: "9Gi" + cluster_config_additional_disk_size: "50Gi" + cluster_config_k8s_version: "1.34" + apt_mirror_enabled: true + secrets: + DEV_REGISTRY_DOCKER_CFG: ${{ secrets.DEV_REGISTRY_DOCKER_CFG }} + VIRT_E2E_NIGHTLY_SA_TOKEN: ${{ secrets.VIRT_E2E_NIGHTLY_SA_TOKEN }} + PROD_IO_REGISTRY_DOCKER_CFG: ${{ secrets.PROD_IO_REGISTRY_DOCKER_CFG }} + BOOTSTRAP_DEV_PROXY: ${{ secrets.BOOTSTRAP_DEV_PROXY }} + E2E_ARTIFACTS_GPG_PASSPHRASE: ${{ secrets.E2E_ARTIFACTS_GPG_PASSPHRASE }} + + e2e-nfs: + name: E2E Pipeline (NFS) + needs: + - set-vars + uses: ./.github/workflows/e2e-nightly-reusable-pipeline.yml + with: + storage_type: nfs + pipeline_job_name: "E2E Pipeline (NFS)" + nested_storageclass_name: nfs + nested_cluster_network_name: cn-4006-for-e2e-test + branch: main + virtualization_tag: main + deckhouse_channel: alpha + default_user: cloud + go_version: "1.24.13" + e2e_timeout: "3.5h" + date_start: ${{ needs.set-vars.outputs.date_start }} + randuuid4c: ${{ needs.set-vars.outputs.randuuid4c }} + cluster_config_workers_memory: "9Gi" + cluster_config_additional_disk_size: "50Gi" + cluster_config_k8s_version: "Automatic" + apt_mirror_enabled: true + secrets: + DEV_REGISTRY_DOCKER_CFG: ${{ secrets.DEV_REGISTRY_DOCKER_CFG }} + VIRT_E2E_NIGHTLY_SA_TOKEN: ${{ secrets.VIRT_E2E_NIGHTLY_SA_TOKEN }} + PROD_IO_REGISTRY_DOCKER_CFG: ${{ secrets.PROD_IO_REGISTRY_DOCKER_CFG }} + BOOTSTRAP_DEV_PROXY: ${{ secrets.BOOTSTRAP_DEV_PROXY }} + E2E_ARTIFACTS_GPG_PASSPHRASE: ${{ secrets.E2E_ARTIFACTS_GPG_PASSPHRASE }} + + report-to-channel: + runs-on: ubuntu-latest + name: End-to-End tests report + needs: + - e2e-replicated + - e2e-nfs + if: ${{ always()}} + steps: + - uses: actions/checkout@v6 + + - name: Download E2E report artifacts + uses: actions/download-artifact@v8 + continue-on-error: true + id: download-artifacts-pattern + with: + pattern: "e2e-report-*" + path: downloaded-artifacts/ + merge-multiple: false + + - name: Set up Python for chart rendering + uses: actions/setup-python@v5 + with: + python-version: "3.12" + cache: pip + cache-dependency-path: .github/scripts/python/requirements.txt + + - name: Install Python chart deps + run: python -m pip install -r .github/scripts/python/requirements.txt + + - name: Generate messenger chart files + run: >- + python .github/scripts/python/e2e_report/charts.py messenger-all + --reports-dir downloaded-artifacts + --out-dir tmp/messenger-charts + --manifest tmp/messenger-charts/manifest.json + + - name: Send results to channel + id: render-report + uses: actions/github-script@v7 + env: + EXPECTED_STORAGE_TYPES: '["replicated","nfs"]' + LOOP_API_BASE_URL: ${{ secrets.LOOP_API_BASE_URL }} + LOOP_CHANNEL_ID: ${{ secrets.LOOP_CHANNEL_ID }} + LOOP_TOKEN: ${{ secrets.LOOP_TOKEN }} + with: + script: | + const renderMessengerReport = require('./.github/scripts/js/e2e/report/messenger-report'); + await renderMessengerReport({core}); + + - name: Render top-5 slowest Describes per cluster + run: >- + python .github/scripts/python/e2e_report/charts.py top + --reports-dir downloaded-artifacts + --out-dir tmp/charts + --top-n 5 + + - name: Upload top-5 slowest Describe charts + uses: actions/upload-artifact@v7 + with: + name: e2e-report-slowest-by-describe + path: tmp/charts/ + if-no-files-found: warn diff --git a/.github/workflows/e2e-reusable-pipeline.yml b/.github/workflows/e2e-reusable-pipeline.yml deleted file mode 100644 index 9a452bb0b0..0000000000 --- a/.github/workflows/e2e-reusable-pipeline.yml +++ /dev/null @@ -1,1546 +0,0 @@ -# Copyright 2025 Flant JSC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -name: E2E Pipeline (Reusable) - -on: - workflow_call: - inputs: - date_start: - required: true - type: string - description: "Date start" - randuuid4c: - required: true - type: string - description: "Random UUID first 4 chars" - cluster_config_k8s_version: - required: false - type: string - default: "Automatic" - description: "Set k8s version for cluster config, like 1.34, 1.36 (without patch version)" - cluster_config_workers_memory: - required: false - type: string - default: "8Gi" - description: "Set memory for workers node in cluster config" - storage_type: - required: true - type: string - description: "Storage type (ceph or replicated or etc.)" - nested_storageclass_name: - required: true - type: string - description: "Nested storage class name" - branch: - required: false - type: string - default: "main" - description: "Branch to use" - virtualization_tag: - required: false - type: string - default: "main" - description: "Virtualization tag" - virtualization_image_url: - required: false - type: string - default: "https://cloud-images.ubuntu.com/noble/current/noble-server-cloudimg-amd64.img" - description: "Virtualization image url (default noble-server-cloudimg-amd64.img)" - deckhouse_channel: - required: false - type: string - default: "alpha" - description: "Deckhouse release channel" - pod_subnet_cidr: - required: false - type: string - default: "10.88.0.0/16" - description: "Pod subnet CIDR" - service_subnet_cidr: - required: false - type: string - default: "10.99.0.0/16" - description: "Service subnet CIDR" - default_user: - required: false - type: string - default: "ubuntu" - description: "Default user for vms" - go_version: - required: false - type: string - default: "1.24.6" - description: "Go version" - e2e_timeout: - required: false - type: string - default: "3h" - description: "E2E tests timeout" - e2e_focus_tests: - required: false - type: string - default: "" - description: "E2E tests focus tests like 'VirtualMachineConfiguration' and so on (by default all tests are run)" - nested_cluster_network_name: - required: false - type: string - default: "cn-4006-for-e2e-test" - description: "ClusterNetwork name for nested VM additional network interface" - apt_mirror_enabled: - required: false - type: boolean - default: true - description: "Use custom APT mirror inside VMs cloud-init (Ubuntu). When false, VMs use stock Ubuntu repositories." - apt_mirror_name: - required: false - type: string - default: "hetzner" - description: "APT mirror short name (used as /etc/apt/sources.list.d/.list file name)" - apt_mirror_url: - required: false - type: string - default: "https://mirror.hetzner.com/ubuntu/packages" - description: "APT mirror base URL (without trailing slash)" - pipeline_job_name: - required: false - type: string - default: "" - description: "Display name of the calling pipeline job in the parent workflow (e.g. 'E2E Pipeline (Replicated)'). Used to resolve per-stage job URLs in the report." - secrets: - DEV_REGISTRY_DOCKER_CFG: - required: true - VIRT_E2E_NIGHTLY_SA_TOKEN: - required: true - PROD_IO_REGISTRY_DOCKER_CFG: - required: true - BOOTSTRAP_DEV_PROXY: - required: true - E2E_ARTIFACTS_GPG_PASSPHRASE: - required: true - outputs: - artifact-name: - description: "Name of the uploaded artifact with E2E report" - value: ${{ jobs.prepare-report.outputs.artifact-name }} - report_kind: - description: "E2E report kind for the cluster" - value: ${{ jobs.prepare-report.outputs.report_kind }} - status: - description: "E2E report status for the cluster" - value: ${{ jobs.prepare-report.outputs.status }} - failed_stage: - description: "Failed or final stage name for the cluster" - value: ${{ jobs.prepare-report.outputs.failed_stage }} - failed_stage_label: - description: "Human-readable failed or final stage label for the cluster" - value: ${{ jobs.prepare-report.outputs.failed_stage_label }} - workflow_run_url: - description: "Workflow run URL for the cluster pipeline" - value: ${{ jobs.prepare-report.outputs.workflow_run_url }} - branch: - description: "Branch used for the cluster pipeline" - value: ${{ jobs.prepare-report.outputs.branch }} - -env: - BRANCH: ${{ inputs.branch }} - VIRTUALIZATION_TAG: ${{ inputs.virtualization_tag }} - DECKHOUSE_CHANNEL: ${{ inputs.deckhouse_channel }} - DEFAULT_USER: ${{ inputs.default_user }} - GO_VERSION: ${{ inputs.go_version }} - SETUP_CLUSTER_TYPE_PATH: test/dvp-static-cluster - K8S_VERSION: ${{ inputs.cluster_config_k8s_version }} - STORAGE_TYPE: ${{ inputs.storage_type }} - E2E_START_TIME: ${{ inputs.date_start }} - -defaults: - run: - shell: bash - -jobs: - bootstrap: - name: Bootstrap cluster - runs-on: ubuntu-latest - concurrency: - group: "${{ github.workflow }}-${{ github.event.number || github.ref }}-${{ inputs.storage_type }}" - cancel-in-progress: true - outputs: - kubeconfig: ${{ steps.generate-kubeconfig.outputs.kubeconfig }} - namespace: ${{ steps.vars.outputs.namespace }} - steps: - - uses: actions/checkout@v4 - - - name: Set outputs - env: - RANDUUID4C: ${{ inputs.randuuid4c }} - STORAGE_TYPE: ${{ inputs.storage_type }} - id: vars - run: | - GIT_SHORT_HASH=$(git rev-parse --short HEAD) - - namespace="nightly-e2e-$STORAGE_TYPE-$GIT_SHORT_HASH-$RANDUUID4C" - - echo "namespace=$namespace" >> $GITHUB_OUTPUT - echo "sha_short=$GIT_SHORT_HASH" >> $GITHUB_OUTPUT - - REGISTRY=$(base64 -d <<< ${{secrets.PROD_IO_REGISTRY_DOCKER_CFG}} | jq '.auths | to_entries | .[] | .key' -r) - echo "registry=$REGISTRY" >> $GITHUB_OUTPUT - - - name: Install htpasswd utility - run: | - sudo apt-get update - sudo apt-get install -y apache2-utils - - - name: Install Task - uses: go-task/setup-task@v2 - with: - version: 3.x - repo-token: ${{ secrets.GITHUB_TOKEN }} - - - name: Setup d8 - uses: ./.github/actions/install-d8 - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - - - name: Log in to private registry - env: - REGISTRY: ${{ steps.vars.outputs.registry }} - run: | - USERNAME=$(base64 -d <<< "${{ secrets.PROD_IO_REGISTRY_DOCKER_CFG }}" | jq -r '.auths | to_entries | .[] | .value.auth' | base64 -d | cut -d ':' -f1) - PASSWORD=$(base64 -d <<< "${{ secrets.PROD_IO_REGISTRY_DOCKER_CFG }}" | jq -r '.auths | to_entries | .[] | .value.auth' | base64 -d | cut -d ':' -f2) - echo "::add-mask::$USERNAME" - echo "::add-mask::$PASSWORD" - echo "$PASSWORD" | docker login "$REGISTRY" --username "$USERNAME" --password-stdin - - - name: Configure kubectl via azure/k8s-set-context@v4 - uses: azure/k8s-set-context@v4 - with: - method: kubeconfig - context: e2e-cluster-nightly-e2e-virt-sa - kubeconfig: ${{ secrets.VIRT_E2E_NIGHTLY_SA_TOKEN }} - - - name: Generate values.yaml - working-directory: ${{ env.SETUP_CLUSTER_TYPE_PATH }} - run: | - defaultStorageClass=$(kubectl get storageclass -o json \ - | jq -r '.items[] | select(.metadata.annotations."storageclass.kubernetes.io/is-default-class" == "true") | .metadata.name') - - cat < values.yaml - namespace: ${{ steps.vars.outputs.namespace }} - storageType: ${{ inputs.storage_type }} - storageClass: ${defaultStorageClass} - sa: dkp-sa - deckhouse: - channel: ${{ env.DECKHOUSE_CHANNEL }} - podSubnetCIDR: ${{ inputs.pod_subnet_cidr }} - serviceSubnetCIDR: ${{ inputs.service_subnet_cidr }} - kubernetesVersion: ${{ env.K8S_VERSION }} - registryDockerCfg: ${{ secrets.PROD_IO_REGISTRY_DOCKER_CFG }} - bundle: Default - proxyEnabled: false - image: - url: ${{ inputs.virtualization_image_url }} - defaultUser: ${{ env.DEFAULT_USER }} - bootloader: BIOS - ingressHosts: - - api - - grafana - - dex - - prometheus - - console - - virtualization - instances: - aptMirror: - enabled: ${{ inputs.apt_mirror_enabled }} - name: ${{ inputs.apt_mirror_name }} - url: ${{ inputs.apt_mirror_url }} - masterNodes: - count: 1 - cfg: - rootDiskSize: 60Gi - cpu: - cores: 4 - coreFraction: 50% - memory: - size: 12Gi - additionalNodes: - - name: worker - count: 3 - cfg: - cpu: - cores: 6 - coreFraction: 50% - memory: - size: ${{ inputs.cluster_config_workers_memory }} - additionalDisks: - - size: 50Gi - networkConfig: - clusterNetworkName: ${{ inputs.nested_cluster_network_name }} - EOF - - mkdir -p tmp - touch tmp/discovered-values.yaml - - export REGISTRY=$(base64 -d <<< ${{secrets.DEV_REGISTRY_DOCKER_CFG}} | jq '.auths | to_entries | .[] | .key' -r) - export AUTH=$(base64 -d <<< ${{ secrets.DEV_REGISTRY_DOCKER_CFG }} | jq '.auths | to_entries | .[] | .value.auth' -r) - - yq eval --inplace '.discovered.registry_url = env(REGISTRY)' tmp/discovered-values.yaml - yq eval --inplace '.discovered.registry_auth = env(AUTH)' tmp/discovered-values.yaml - - - name: Bootstrap cluster [infra-deploy] - working-directory: ${{ env.SETUP_CLUSTER_TYPE_PATH }} - run: | - task infra-deploy - - name: Bootstrap cluster [dhctl-bootstrap] - id: dhctl-bootstrap - working-directory: ${{ env.SETUP_CLUSTER_TYPE_PATH }} - run: | - if [[ $(yq eval '.deckhouse.proxyEnabled' values.yaml) == true ]]; then - export HTTP_PROXY="${{ secrets.BOOTSTRAP_DEV_PROXY }}" - export HTTPS_PROXY="${{ secrets.BOOTSTRAP_DEV_PROXY }}" - echo "Proxy settings - configured" - fi - - task dhctl-bootstrap - echo "[SUCCESS] Done" - timeout-minutes: 60 - - name: Bootstrap cluster [show-connection-info] - working-directory: ${{ env.SETUP_CLUSTER_TYPE_PATH }} - run: | - task show-connection-info - - - name: Save ssh to secrets in cluster - env: - NAMESPACE: ${{ steps.vars.outputs.namespace }} - if: always() && steps.dhctl-bootstrap.outcome == 'success' - run: | - kubectl -n $NAMESPACE create secret generic ssh-key --from-file=${{ env.SETUP_CLUSTER_TYPE_PATH }}/tmp/ssh/cloud - - - name: Get info about nested cluster and master VM - working-directory: ${{ env.SETUP_CLUSTER_TYPE_PATH }} - env: - NAMESPACE: ${{ steps.vars.outputs.namespace }} - PREFIX: ${{ inputs.storage_type }} - run: | - nested_master=$(kubectl -n ${NAMESPACE} get vm -l group=${PREFIX}-master -o jsonpath="{.items[0].metadata.name}") - - d8vssh() { - local host=$1 - local cmd=$2 - d8 v ssh -i ./tmp/ssh/cloud \ - --local-ssh=true \ - --local-ssh-opts="-o StrictHostKeyChecking=no" \ - --local-ssh-opts="-o UserKnownHostsFile=/dev/null" \ - --local-ssh-opts="-o ServerAliveInterval=15" \ - --local-ssh-opts="-o ServerAliveCountMax=8" \ - --local-ssh-opts="-o ConnectTimeout=10" \ - ${DEFAULT_USER}@${host}.${NAMESPACE} \ - -c "$cmd" - } - - echo "[INFO] Pods in namespace $NAMESPACE" - kubectl get pods -n "${NAMESPACE}" - echo "" - - echo "[INFO] VMs in namespace $NAMESPACE" - kubectl get vm -n "${NAMESPACE}" - echo "" - - echo "[INFO] VDs in namespace $NAMESPACE" - kubectl get vd -n "${NAMESPACE}" - echo "" - - echo "Check connection to master" - d8vssh "${nested_master}" 'echo master os-release: ; cat /etc/os-release; echo " "; echo master hostname: ; hostname' - echo "" - - - name: Generate nested kubeconfig - id: generate-kubeconfig - working-directory: ${{ env.SETUP_CLUSTER_TYPE_PATH }} - env: - kubeConfigPath: tmp/kube.config - NAMESPACE: ${{ steps.vars.outputs.namespace }} - PREFIX: ${{ inputs.storage_type }} - run: | - nested_master=$(kubectl -n ${NAMESPACE} get vm -l group=${PREFIX}-master -o jsonpath="{.items[0].metadata.name}") - - d8vscp() { - local source=$1 - local dest=$2 - d8 v scp -i ./tmp/ssh/cloud \ - --local-ssh=true \ - --local-ssh-opts="-o StrictHostKeyChecking=no" \ - --local-ssh-opts="-o UserKnownHostsFile=/dev/null" \ - "$source" "$dest" - echo "d8vscp: $source -> $dest - done" - } - - d8vssh() { - local cmd=$1 - d8 v ssh -i ./tmp/ssh/cloud \ - --local-ssh=true \ - --local-ssh-opts="-o StrictHostKeyChecking=no" \ - --local-ssh-opts="-o UserKnownHostsFile=/dev/null" \ - --local-ssh-opts="-o ServerAliveInterval=15" \ - --local-ssh-opts="-o ServerAliveCountMax=8" \ - --local-ssh-opts="-o ConnectTimeout=10" \ - ${DEFAULT_USER}@${nested_master}.${NAMESPACE} \ - -c "$cmd" - } - - echo "[INFO] Copy script for generating kubeconfig in nested cluster" - echo "[INFO] Copy scripts/gen-kubeconfig.sh to master" - d8vscp "./scripts/gen-kubeconfig.sh" "${DEFAULT_USER}@${nested_master}.${NAMESPACE}:/tmp/gen-kubeconfig.sh" - echo "" - d8vscp "./scripts/deckhouse-queue.sh" "${DEFAULT_USER}@${nested_master}.${NAMESPACE}:/tmp/deckhouse-queue.sh" - echo "" - - echo "[INFO] Set file exec permissions" - d8vssh 'chmod +x /tmp/{gen-kubeconfig.sh,deckhouse-queue.sh}' - d8vssh 'ls -la /tmp/' - echo "[INFO] Check d8 queue in nested cluster" - d8vssh 'sudo /tmp/deckhouse-queue.sh' - - echo "[INFO] Generate kube conf in nested cluster" - echo "[INFO] Run gen-kubeconfig.sh in nested cluster" - d8vssh "sudo /tmp/gen-kubeconfig.sh nested-sa nested nested-e2e /${kubeConfigPath}" - echo "" - - echo "[INFO] Copy kubeconfig to runner" - echo "[INFO] ${DEFAULT_USER}@${nested_master}.$NAMESPACE:/${kubeConfigPath} ./${kubeConfigPath}" - d8vscp "${DEFAULT_USER}@${nested_master}.$NAMESPACE:/${kubeConfigPath}" "./${kubeConfigPath}" - - echo "[INFO] Set rights for kubeconfig" - echo "[INFO] sudo chown 1001:1001 ${kubeConfigPath}" - sudo chown 1001:1001 ${kubeConfigPath} - echo " " - - echo "[INFO] Kubeconf to github output" - CONFIG=$(cat ${kubeConfigPath} | base64 -w 0) - CONFIG=$(echo $CONFIG | base64 -w 0) - echo "kubeconfig=$CONFIG" >> $GITHUB_OUTPUT - - - name: cloud-init logs - if: steps.dhctl-bootstrap.outcome == 'failure' - env: - NAMESPACE: ${{ steps.vars.outputs.namespace }} - PREFIX: ${{ inputs.storage_type }} - run: | - nested_master=$(kubectl -n ${NAMESPACE} get vm -l group=${PREFIX}-master -o jsonpath="{.items[0].metadata.name}") - - d8vscp() { - local source=$1 - local dest=$2 - d8 v scp -i ./tmp/ssh/cloud \ - --local-ssh=true \ - --local-ssh-opts="-o StrictHostKeyChecking=no" \ - --local-ssh-opts="-o UserKnownHostsFile=/dev/null" \ - "$source" "$dest" - echo "d8vscp: $source -> $dest - done" - } - - d8vscp "${DEFAULT_USER}@${nested_master}.$NAMESPACE:/var/log/cloud-init*.log" "./${{ env.SETUP_CLUSTER_TYPE_PATH }}/tmp/" - - - name: Prepare artifact - if: success() || failure() - run: | - sudo chown -fR 1001:1001 ${{ env.SETUP_CLUSTER_TYPE_PATH }} || true - yq e '.deckhouse.registryDockerCfg = "None"' -i ./${{ env.SETUP_CLUSTER_TYPE_PATH }}/values.yaml || true - yq e 'select(.kind == "InitConfiguration").deckhouse.registryDockerCfg = "None"' -i ./${{ env.SETUP_CLUSTER_TYPE_PATH }}/tmp/config.yaml || echo "The config.yaml file is not generated, skipping" - yq e '.discovered.registry_url = "None"' -i ./${{ env.SETUP_CLUSTER_TYPE_PATH }}/tmp/discovered-values.yaml || echo "The discovered-values.yaml file is not generated, skipping editing registry_url" - yq e '.discovered.registry_auth = "None"' -i ./${{ env.SETUP_CLUSTER_TYPE_PATH }}/tmp/discovered-values.yaml || echo "The discovered-values.yaml file is not generated, skipping editing registry_auth" - echo "${{ steps.generate-kubeconfig.outputs.kubeconfig }}" | base64 -d | base64 -d > ./${{ env.SETUP_CLUSTER_TYPE_PATH }}/kube-config || echo "kubeconfig not available, skipping" - - - name: Encrypt generated files artifact - if: success() || failure() - env: - GPG_PASSPHRASE: ${{ secrets.E2E_ARTIFACTS_GPG_PASSPHRASE }} - ARTIFACT_NAME: ${{ env.STORAGE_TYPE }}-generated-files-${{ env.E2E_START_TIME }} - run: | - pushd ${{ env.SETUP_CLUSTER_TYPE_PATH }} - zip -r $RUNNER_TEMP/${ARTIFACT_NAME}.zip tmp values.yaml - popd - gpg --symmetric --batch --yes --pinentry-mode loopback \ - --passphrase "$GPG_PASSPHRASE" \ - --cipher-algo AES256 \ - --output $RUNNER_TEMP/${ARTIFACT_NAME}.zip.gpg \ - $RUNNER_TEMP/${ARTIFACT_NAME}.zip - rm -f $RUNNER_TEMP/${ARTIFACT_NAME}.zip - - - name: Upload generated files - uses: actions/upload-artifact@v7 - if: success() || failure() - with: - path: ${{ runner.temp }}/${{ env.STORAGE_TYPE }}-generated-files-${{ env.E2E_START_TIME }}.zip.gpg - overwrite: true - include-hidden-files: true - retention-days: 3 - archive: false - - - name: Encrypt ssh config artifact - if: always() - env: - GPG_PASSPHRASE: ${{ secrets.E2E_ARTIFACTS_GPG_PASSPHRASE }} - ARTIFACT_NAME: ${{ env.STORAGE_TYPE }}-generated-files-ssh-${{ env.E2E_START_TIME }} - run: | - pushd ${{ env.SETUP_CLUSTER_TYPE_PATH }}/tmp - zip -r $RUNNER_TEMP/${ARTIFACT_NAME}.zip ssh - popd - gpg --symmetric --batch --yes --pinentry-mode loopback \ - --passphrase "$GPG_PASSPHRASE" \ - --cipher-algo AES256 \ - --output $RUNNER_TEMP/${ARTIFACT_NAME}.zip.gpg \ - $RUNNER_TEMP/${ARTIFACT_NAME}.zip - rm -f $RUNNER_TEMP/${ARTIFACT_NAME}.zip - - - name: Upload ssh config - uses: actions/upload-artifact@v7 - if: always() - with: - path: ${{ runner.temp }}/${{ env.STORAGE_TYPE }}-generated-files-ssh-${{ env.E2E_START_TIME }}.zip.gpg - overwrite: true - include-hidden-files: true - retention-days: 3 - archive: false - - - name: Encrypt kubeconfig artifact - if: always() - env: - GPG_PASSPHRASE: ${{ secrets.E2E_ARTIFACTS_GPG_PASSPHRASE }} - ARTIFACT_NAME: ${{ env.STORAGE_TYPE }}-generated-files-kubeconfig-${{ env.E2E_START_TIME }} - run: | - gpg --symmetric --batch --yes --pinentry-mode loopback \ - --passphrase "$GPG_PASSPHRASE" \ - --cipher-algo AES256 \ - --output $RUNNER_TEMP/${ARTIFACT_NAME}.gpg \ - ${{ env.SETUP_CLUSTER_TYPE_PATH }}/kube-config - - - name: Upload kubeconfig - uses: actions/upload-artifact@v7 - with: - path: ${{ runner.temp }}/${{ env.STORAGE_TYPE }}-generated-files-kubeconfig-${{ env.E2E_START_TIME }}.gpg - overwrite: true - include-hidden-files: true - retention-days: 3 - archive: false - - - name: Add encrypted artifacts help to job summary - if: always() - run: | - cat >> "$GITHUB_STEP_SUMMARY" <<'EOF' - ## Encrypted artifacts - - Some uploaded artifacts in this workflow are encrypted with GPG symmetric encryption. - - Secret used for decryption passphrase: - - `E2E_ARTIFACTS_GPG_PASSPHRASE` - - Encrypted artifact types: - - `*-generated-files-*.zip.gpg` - - `*-generated-files-ssh-*.zip.gpg` - - `*-generated-files-kubeconfig-*.gpg` - - Decrypt commands: - - ```bash - # zip.gpg artifact - gpg --decrypt --batch --yes --pinentry-mode loopback \ - --passphrase "$E2E_ARTIFACTS_GPG_PASSPHRASE" \ - --output artifact.zip \ - artifact.zip.gpg - - unzip -o artifact.zip - - # same, but with simultaneous decryption and extraction of the whole archive - gpg --decrypt --batch --yes --pinentry-mode loopback \ - --passphrase "$E2E_ARTIFACTS_GPG_PASSPHRASE" \ - artifact.zip.gpg > artifact.zip && unzip -o artifact.zip - - # single-file .gpg artifact - gpg --decrypt --batch --yes --pinentry-mode loopback \ - --passphrase "$E2E_ARTIFACTS_GPG_PASSPHRASE" \ - --output kube-config \ - artifact.gpg - ``` - EOF - - configure-sdn: - name: Configure SDN - runs-on: ubuntu-latest - needs: bootstrap - steps: - - uses: actions/checkout@v4 - - - name: Install Task - uses: go-task/setup-task@v2 - with: - version: 3.x - repo-token: ${{ secrets.GITHUB_TOKEN }} - - - name: Setup d8 - uses: ./.github/actions/install-d8 - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - - - name: Install kubectl CLI - uses: azure/setup-kubectl@v4 - - - name: Check nested kube-api via generated kubeconfig - run: | - mkdir -p ~/.kube - echo "[INFO] Configure kubeconfig for nested cluster" - echo "${{ needs.bootstrap.outputs.kubeconfig }}" | base64 -d | base64 -d > ~/.kube/config - - echo "[INFO] Show paths and files content" - ls -la ~/.kube - echo "[INFO] Set permissions for kubeconfig" - chmod 600 ~/.kube/config - - echo "[INFO] Show current kubeconfig context" - kubectl config get-contexts - - echo "[INFO] Show nodes in cluster" - # `kubectl get nodes` may return error, so we need to retry. - count=30 - success=false - for i in $(seq 1 $count); do - echo "[INFO] Attempt $i/$count..." - if kubectl get nodes; then - echo "[SUCCESS] Successfully retrieved nodes." - success=true - break - fi - - if [ $i -lt $count ]; then - echo "[INFO] Retrying in 10 seconds..." - sleep 10 - fi - done - - if [ "$success" = false ]; then - echo "[ERROR] Failed to retrieve nodes after $count attempts." - exit 1 - fi - - name: Enable SDN - run: | - # Keep SDN enable and readiness checks in a script because this step needs - # several retries around Deckhouse webhooks and detailed SDN diagnostics. - bash .github/scripts/bash/e2e/enable-sdn.sh - - - name: Wait for nodenetworkinterfaces to be ready - run: | - count=60 - success=false - wait_time_seconds=5 - - for i in $(seq 1 $count); do - nodes=$(kubectl get nodes -o name | wc -l) - actual=$(kubectl get nodenetworkinterfaces -o json | jq -r '.items[] | select(.status.operationalState == "Up") | .metadata.name' | wc -l) || true - expected=$((nodes * 2)) - - echo "[INFO] Attempt $i/$count: expected=$expected, actual=$actual" - - if [ "$actual" -ge "$expected" ]; then - echo "[SUCCESS] All nodenetworkinterfaces are present (expected=$expected, actual=$actual)" - kubectl get nodenetworkinterfaces - success=true - break - fi - - if (( i % 5 == 0 )) ; then - echo ::group::📝 [DEBUG] show namespaces d8-sdn - kubectl -n d8-sdn get pods || true - echo ::endgroup:: - - echo ::group::📝 [DEBUG] show nodenetworkinterfaces d8-sdn - kubectl get nodenetworkinterfaces || true - echo ::endgroup:: - - echo "[INFO] Retrying in 10 seconds..." - sleep $wait_time_seconds - elif [ $i -lt $count ]; then - echo "[INFO] Retrying in 10 seconds..." - sleep $wait_time_seconds - fi - done - - if [ "$success" = false ]; then - echo "[ERROR] Failed to get all nodenetworkinterfaces after $count attempts (expected=$expected)" - echo "[DEBUG] Show namespaces d8-sdn" - kubectl -n d8-sdn get pods || true - echo "[DEBUG] Show nodenetworkinterfaces d8-sdn" - kubectl get nodenetworkinterfaces || true - exit 1 - fi - - - name: Configure ClusterNetwork - run: | - extraNic=$(kubectl get nodenetworkinterfaces -l network.deckhouse.io/interface-type=NIC -o json | jq -r '.items[] | select(.status.operationalState == "Up") | select(.status.ifName != "eno1" and .status.ifName != "enp1s0") | .metadata.name') - - for nic in $extraNic; do - echo "[INFO] Label nodenetworkinterface $nic nic-group=extra" - kubectl label nodenetworkinterfaces $nic nic-group=extra - done - - kubectl get nodenetworkinterface -l nic-group=extra - - # Use a retry wrapper because ClusterNetwork admission can race with the - # SDN webhook endpoint right after the module becomes Ready. - cat <<'EOF' | bash .github/scripts/bash/e2e/apply-clusternetworks.sh - --- - apiVersion: network.deckhouse.io/v1alpha1 - kind: ClusterNetwork - metadata: - name: cn-4006-for-e2e-test - spec: - parentNodeNetworkInterfaces: - labelSelector: - matchLabels: - nic-group: extra - type: Access - --- - apiVersion: network.deckhouse.io/v1alpha1 - kind: ClusterNetwork - metadata: - name: cn-4007-for-e2e-test - spec: - parentNodeNetworkInterfaces: - labelSelector: - matchLabels: - nic-group: extra - type: VLAN - vlan: - id: 4007 - EOF - - echo "[INFO] Wait for ClusterNetwork cn-4006-for-e2e-test to be ready" - kubectl wait clusternetworks.network.deckhouse.io --for=condition=Ready cn-4006-for-e2e-test --timeout=120s - - echo "[INFO] Wait for ClusterNetwork cn-4007-for-e2e-test to be ready" - kubectl wait clusternetworks.network.deckhouse.io --for=condition=Ready cn-4007-for-e2e-test --timeout=120s - - configure-storage: - name: Configure storage - runs-on: ubuntu-latest - needs: - - configure-sdn - - bootstrap - steps: - - uses: actions/checkout@v4 - - - name: Install Task - uses: go-task/setup-task@v2 - with: - version: 3.x - repo-token: ${{ secrets.GITHUB_TOKEN }} - - - name: Setup d8 - uses: ./.github/actions/install-d8 - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - - - name: Install kubectl CLI - uses: azure/setup-kubectl@v4 - - - name: Check nested kube-api via generated kubeconfig - run: | - mkdir -p ~/.kube - echo "[INFO] Configure kubeconfig for nested cluster" - echo "${{ needs.bootstrap.outputs.kubeconfig }}" | base64 -d | base64 -d > ~/.kube/config - - echo "[INFO] Show paths and files content" - ls -la ~/.kube - echo "[INFO] Set permissions for kubeconfig" - chmod 600 ~/.kube/config - - echo "[INFO] Show current kubeconfig context" - kubectl config get-contexts - - echo "[INFO] Show nodes in cluster" - # `kubectl get nodes` may return error, so we need to retry. - count=30 - success=false - for i in $(seq 1 $count); do - echo "[INFO] Attempt $i/$count..." - if kubectl get nodes; then - echo "[SUCCESS] Successfully retrieved nodes." - success=true - break - fi - - if [ $i -lt $count ]; then - echo "[INFO] Retrying in 10 seconds..." - sleep 10 - fi - done - - if [ "$success" = false ]; then - echo "[ERROR] Failed to retrieve nodes after $count attempts." - exit 1 - fi - - - name: Configure replicated storage - id: storage-replicated-setup - if: ${{ inputs.storage_type == 'replicated' }} - working-directory: ${{ env.SETUP_CLUSTER_TYPE_PATH }}/storage/sds-replicated - run: | - d8_queue_list() { - d8 s queue list | grep -Po '([0-9]+)(?= active)' || echo "[WARNING] Failed to retrieve list queue" - } - - d8_queue() { - local count=90 - local queue_count - - for i in $(seq 1 $count) ; do - queue_count=$(d8_queue_list) - if [ -n "$queue_count" ] && [ "$queue_count" = "0" ]; then - echo "[SUCCESS] Queue is clear" - return 0 - fi - - echo "[INFO] Wait until queues are empty ${i}/${count}" - if (( i % 5 == 0 )); then - echo "[INFO] Show queue list" - d8 s queue list | head -n25 || echo "[WARNING] Failed to retrieve list queue" - echo " " - fi - - if (( i % 10 == 0 )); then - echo "[INFO] deckhouse logs" - echo "::group::📝 deckhouse logs" - d8 s logs | tail -n 100 - echo "::endgroup::" - echo " " - fi - sleep 10 - done - } - - sds_replicated_ready() { - local count=60 - for i in $(seq 1 $count); do - - sds_replicated_volume_status=$(kubectl get ns d8-sds-replicated-volume -o jsonpath='{.status.phase}' || echo "False") - - if [[ "${sds_replicated_volume_status}" = "Active" ]]; then - echo "[SUCCESS] Namespaces sds-replicated-volume are Active" - kubectl get ns d8-sds-replicated-volume - return 0 - fi - - echo "[INFO] Waiting 10s for sds-replicated-volume namespace to be ready (attempt ${i}/${count})" - if (( i % 5 == 0 )); then - echo "[INFO] Show namespaces sds-replicated-volume" - kubectl get ns | grep sds-replicated-volume || echo "Namespaces sds-replicated-volume are not ready" - echo "[DEBUG] Show queue (first 25 lines)" - d8 s queue list | head -n25 || echo "No queues" - fi - sleep 10 - done - - echo "[ERROR] Namespaces sds-replicated-volume are not ready after ${count} attempts" - echo "[DEBUG] Show namespaces sds" - kubectl get ns | grep sds || echo "Namespaces sds-replicated-volume are not ready" - echo "[DEBUG] Show queue" - echo "::group::📦 Show queue" - d8 s queue list || echo "No queues" - echo "::endgroup::" - echo "[DEBUG] Show deckhouse logs" - echo "::group::📝 deckhouse logs" - d8 s logs | tail -n 100 - echo "::endgroup::" - exit 1 - } - - sds_pods_ready() { - local count=100 - local linstor_node - local csi_node - local webhooks - local workers=$(kubectl get nodes -o name | grep worker | wc -l || true) - workers=$((workers)) - - echo "[INFO] Wait while linstor-node csi-node webhooks pods are ready" - for i in $(seq 1 $count); do - linstor_node=$(kubectl -n d8-sds-replicated-volume get pods | grep "linstor-node.*Running" | wc -l || true) - csi_node=$(kubectl -n d8-sds-replicated-volume get pods | grep "csi-node.*Running" | wc -l || true) - - echo "[INFO] Check if sds-replicated pods are ready" - if [[ ${linstor_node} -ge ${workers} && ${csi_node} -ge ${workers} ]]; then - echo "[SUCCESS] sds-replicated-volume is ready" - return 0 - fi - - echo "[WARNING] Not all pods are ready, linstor_node=${linstor_node}, csi_node=${csi_node}" - echo "[INFO] Waiting 10s for pods to be ready (attempt ${i}/${count})" - if (( i % 5 == 0 )); then - echo "[DEBUG] Get pods" - kubectl -n d8-sds-replicated-volume get pods || true - echo "[DEBUG] Show queue (first 25 lines)" - d8 s queue list | head -n 25 || echo "Failed to retrieve list queue" - echo " " - fi - sleep 10 - done - - echo "[ERROR] sds-replicated-volume is not ready after ${count} attempts" - echo "[DEBUG] Get pods" - echo "::group::📦 sds-replicated-volume pods" - kubectl -n d8-sds-replicated-volume get pods || true - echo "::endgroup::" - echo "[DEBUG] Show queue" - echo "::group::📦 Show queue" - d8 s queue list || echo "Failed to retrieve list queue" - echo "::endgroup::" - echo "[DEBUG] Show deckhouse logs" - echo "::group::📝 deckhouse logs" - d8 s logs | tail -n 100 - echo "::endgroup::" - exit 1 - } - - blockdevices_ready() { - local count=60 - workers=$(kubectl get nodes -o name | grep worker | wc -l) - workers=$((workers)) - - if [[ $workers -eq 0 ]]; then - echo "[ERROR] No worker nodes found" - exit 1 - fi - - for i in $(seq 1 $count); do - blockdevices=$(kubectl get blockdevice -o name | wc -l || true) - if [ $blockdevices -ge $workers ]; then - echo "[SUCCESS] Blockdevices is greater or equal to $workers" - kubectl get blockdevice - return 0 - fi - - echo "[INFO] Wait 10 sec until blockdevices is greater or equal to $workers (attempt ${i}/${count})" - if (( i % 5 == 0 )); then - echo "[DEBUG] Show queue (first 25 lines)" - d8 s queue list | head -n25 || echo "No queues" - fi - - sleep 10 - done - - echo "[ERROR] Blockdevices is not 3" - echo "[DEBUG] Show cluster nodes" - kubectl get nodes || echo "[WARNING] Failed to get cluster nodes" - echo "[DEBUG] Show blockdevices" - kubectl get blockdevice || echo "[WARNING] Failed to get blockdevices" - echo "[DEBUG] Show sds namespaces" - kubectl get ns | grep sds || echo "[WARNING] Namespace sds is not found" - echo "[DEBUG] Show pods in sds-replicated-volume" - echo "::group::📦 pods in sds-replicated-volume" - kubectl -n d8-sds-replicated-volume get pods || echo "[WARNING] Failed to get pods in sds-replicated-volume" - echo "::endgroup::" - echo "[DEBUG] Show deckhouse logs" - echo "::group::📝 deckhouse logs" - d8 s logs | tail -n 100 || echo "[WARNING] Failed to get deckhouse logs" - echo "::endgroup::" - exit 1 - } - - d8_queue - - kubectl apply -f ../sds-node-configurator/mc.yaml - kubectl apply -f mc.yaml - echo "[INFO] Wait for sds-node-configurator" - kubectl wait --for=jsonpath='{.status.phase}'=Ready modules sds-node-configurator --timeout=300s - - echo "[INFO] Wait for sds-replicated-volume to be ready" - sds_replicated_ready - kubectl wait --for=jsonpath='{.status.phase}'=Ready modules sds-replicated-volume --timeout=300s - - echo "[INFO] Wait BlockDevice are ready" - blockdevices_ready - - echo "[INFO] Wait pods and webhooks sds-replicated pods" - sds_pods_ready - - chmod +x ../sds-node-configurator/lvg-gen.sh - ../sds-node-configurator/lvg-gen.sh - - chmod +x rsc-gen.sh - ./rsc-gen.sh - - echo "[INFO] Show existing storageclasses" - if ! kubectl get storageclass | grep -q nested; then - echo "[WARNING] No nested storageclasses" - else - kubectl get storageclass | grep nested - echo "[SUCCESS] Done" - fi - - - name: Configure NFS storage - if: ${{ inputs.storage_type == 'nfs' }} - id: storage-nfs-setup - working-directory: ${{ env.SETUP_CLUSTER_TYPE_PATH }}/storage/nfs - env: - NAMESPACE: ${{ needs.bootstrap.outputs.namespace }} - run: | - nfs_ready() { - local count=90 - local controller - local csi_controller - local csi_node_desired - local csi_node_ready - - for i in $(seq 1 $count); do - echo "[INFO] Check d8-csi-nfs pods (attempt ${i}/${count})" - controller=$(kubectl -n d8-csi-nfs get deploy controller -o jsonpath='{.status.readyReplicas}' 2>/dev/null || echo "0") - csi_controller=$(kubectl -n d8-csi-nfs get deploy csi-controller -o jsonpath='{.status.readyReplicas}' 2>/dev/null || echo "0") - csi_node_desired=$(kubectl -n d8-csi-nfs get ds csi-node -o jsonpath='{.status.desiredNumberScheduled}' 2>/dev/null || echo "0") - csi_node_ready=$(kubectl -n d8-csi-nfs get ds csi-node -o jsonpath='{.status.numberReady}' 2>/dev/null || echo "0") - - if [[ "$controller" -ge 1 && "$csi_controller" -ge 1 && "$csi_node_desired" -gt 0 && "$csi_node_ready" -eq "$csi_node_desired" ]]; then - echo "[SUCCESS] NFS CSI is ready (controller=${controller}, csi-controller=${csi_controller}, csi-node=${csi_node_ready}/${csi_node_desired})" - return 0 - fi - - echo "[WARNING] NFS CSI not ready: controller=${controller}, csi-controller=${csi_controller}, csi-node=${csi_node_ready}/${csi_node_desired}" - if (( i % 5 == 0 )); then - echo "[DEBUG] Pods in d8-csi-nfs:" - kubectl -n d8-csi-nfs get pods || echo "[WARNING] Failed to retrieve pods" - echo "[DEBUG] Deployments in d8-csi-nfs:" - kubectl -n d8-csi-nfs get deploy || echo "[WARNING] Failed to retrieve deployments" - echo "[DEBUG] DaemonSets in d8-csi-nfs:" - kubectl -n d8-csi-nfs get ds || echo "[WARNING] Failed to retrieve daemonsets" - echo "[DEBUG] csi-nfs module status:" - kubectl get modules csi-nfs -o wide || echo "[WARNING] Failed to retrieve module" - fi - sleep 10 - done - - echo "[ERROR] NFS CSI did not become ready in time" - kubectl -n d8-csi-nfs get pods || true - exit 1 - } - - echo "[INFO] Apply csi-nfs ModuleConfig, ModulePullOverride, snapshot-controller" - kubectl apply -f mc.yaml - - echo "[INFO] Wait for csi-nfs module to be ready" - kubectl wait --for=jsonpath='{.status.phase}'=Ready modules csi-nfs --timeout=300s - - echo "[INFO] Wait for csi-nfs pods to be ready" - nfs_ready - - echo "[INFO] Apply NFSStorageClass" - envsubst < storageclass.yaml | kubectl apply -f - - - echo "[INFO] Configure default storage class" - ./default-sc-configure.sh - - - echo "[INFO] Show existing storageclasses" - kubectl get storageclass - - configure-virtualization: - name: Configure Virtualization - runs-on: ubuntu-latest - needs: - - bootstrap - - configure-storage - steps: - - uses: actions/checkout@v4 - - name: Install kubectl CLI - uses: azure/setup-kubectl@v4 - - name: Setup d8 - uses: ./.github/actions/install-d8 - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - - - name: Check kubeconfig - run: | - echo "[INFO] Configure kube config" - mkdir -p ~/.kube - echo "${{ needs.bootstrap.outputs.kubeconfig }}" | base64 -d | base64 -d > ~/.kube/config - chmod 600 ~/.kube/config - kubectl config use-context nested-e2e-nested-sa - - - name: Configure Virtualization - env: - DEV_REGISTRY_DOCKER_CFG: ${{ secrets.DEV_REGISTRY_DOCKER_CFG }} - NESTED_STORAGE_CLASS_NAME: ${{ inputs.nested_storageclass_name }} - VIRTUALIZATION_TAG: ${{ env.VIRTUALIZATION_TAG }} - run: | - # Keep virtualization configuration in a script because it waits for - # Deckhouse queue/source propagation before applying ModuleConfig. - bash .github/scripts/bash/e2e/configure-virtualization.sh - - name: Wait for Virtualization to be ready - run: | - d8_queue_list() { - d8 s queue list | grep -Po '([0-9]+)(?= active)' || echo "Failed to retrieve list queue" - } - - debug_output() { - local NODES - - echo "[ERROR] Virtualization module deploy failed" - echo "[DEBUG] Show describe virtualization module" - echo "::group::📦 describe virtualization module" - kubectl describe modules virtualization || true - echo "::endgroup::" - echo "[DEBUG] Show namespace d8-virtualization" - kubectl get ns d8-virtualization || true - echo "[DEBUG] Show pods in namespace d8-virtualization" - kubectl -n d8-virtualization get pods || true - echo "[DEBUG] Show dvcr info" - echo "::group::📦 dvcr pod describe" - kubectl -n d8-virtualization describe pod -l app=dvcr || true - echo "::endgroup::" - echo " " - echo "::group::📦 dvcr pod yaml" - kubectl -n d8-virtualization get pods -l app=dvcr -o yaml || true - echo "::endgroup::" - echo " " - echo "::group::📦 dvcr deployment yaml" - kubectl -n d8-virtualization get deployment -l app=dvcr -o yaml || true - echo "::endgroup::" - echo " " - echo "::group::📦 dvcr deployment describe" - kubectl -n d8-virtualization describe deployment -l app=dvcr || true - echo "::endgroup::" - echo " " - echo "::group::📦 dvcr service yaml" - kubectl -n d8-virtualization get service -l app=dvcr -o yaml || true - echo "::endgroup::" - echo " " - echo "[DEBUG] Show pvc in namespace d8-virtualization" - kubectl get pvc -n d8-virtualization || true - echo "[DEBUG] Show cluster StorageClasses" - kubectl get storageclasses || true - echo "[DEBUG] Show cluster nodes" - kubectl get node || true - - echo "[DEBUG] Show cluster node yaml and describe" - NODES=$(kubectl get no -o jsonpath='{range .items[?(@.metadata.name)]}{.metadata.name}{"\n"}{end}') - for node in $NODES; do - echo "::group::📝 show cluster node $node yaml" - kubectl get node $node -o yaml - echo "::endgroup::" - echo "::group::📝 show cluster node $node describe" - kubectl describe node $node - echo "::endgroup::" - done - - echo "[DEBUG] Show queue (first 25 lines)" - d8 s queue list | head -n 25 || echo "[WARNING] Failed to retrieve list queue" - echo "[DEBUG] Show deckhouse logs" - echo "::group::📝 deckhouse logs" - d8 s logs | tail -n 100 - echo "::endgroup::" - } - - d8_queue() { - local count=90 - local queue_count - - for i in $(seq 1 $count) ; do - queue_count=$(d8_queue_list) - if [ -n "$queue_count" ] && [ "$queue_count" = "0" ]; then - echo "[SUCCESS] Queue is clear" - return 0 - fi - - echo "[INFO] Wait until queues are empty ${i}/${count}" - if (( i % 5 == 0 )); then - echo "[INFO] Show queue list" - d8 s queue list | head -n25 || echo "[WARNING] Failed to retrieve list queue" - echo " " - fi - - if (( i % 10 == 0 )); then - echo "[INFO] deckhouse logs" - echo "::group::📝 deckhouse logs" - d8 s logs | tail -n 100 - echo "::endgroup::" - echo " " - fi - sleep 10 - done - } - - virtualization_ready() { - local count=90 - local virtualization_status - - for i in $(seq 1 $count) ; do - virtualization_status=$(kubectl get modules virtualization -o jsonpath='{.status.phase}') - if [ "$virtualization_status" == "Ready" ]; then - echo "[SUCCESS] Virtualization module is ready" - kubectl get modules virtualization - kubectl -n d8-virtualization get pods - kubectl get vmclass || echo "[WARNING] no vmclasses found" - return 0 - fi - - echo "[INFO] Waiting 10s for Virtualization module to be ready (attempt $i/$count)" - - if (( i % 5 == 0 )); then - echo " " - echo "[DEBUG] Show additional info" - kubectl get ns d8-virtualization || echo "[WARNING] Namespace virtualization is not ready" - echo " " - kubectl -n d8-virtualization get pods || echo "[WARNING] Pods in namespace virtualization is not ready" - kubectl get pvc -n d8-virtualization || echo "[WARNING] PVC in namespace virtualization is not ready" - echo " " - fi - sleep 10 - done - - debug_output - exit 1 - } - - virt_handler_ready() { - local count=180 - local virt_handler_ready - local workers - local time_wait=10 - - for i in $(seq 1 $count); do - workers=$(kubectl get nodes -o name | grep worker | wc -l || true) - workers=$((workers)) - if [[ $workers -eq 0 ]]; then - echo "[WARNING] No worker nodes found, keep waiting" - echo "[INFO] Wait ${time_wait}s virt-handler pods are ready (attempt $i/$count)" - sleep ${time_wait} - continue - fi - - virt_handler_ready=$(kubectl -n d8-virtualization get pods | grep "virt-handler.*Running" | wc -l || true) - - if [[ $virt_handler_ready -ge $workers ]]; then - echo "[SUCCESS] virt-handlers pods are ready $virt_handler_ready/$workers" - return 0 - fi - - echo "[INFO] virt-handler pods $virt_handler_ready/$workers" - echo "[INFO] Wait ${time_wait}s virt-handler pods are ready (attempt $i/$count)" - if (( i % 5 == 0 )); then - echo "[DEBUG] Show pods in namespace d8-virtualization" - echo "::group::📦 virtualization pods" - kubectl -n d8-virtualization get pods || echo "[WARNING] No pods in virtualization namespace found" - echo "::endgroup::" - echo "[DEBUG] Show cluster nodes" - echo "::group::📦 cluster nodes" - kubectl get node || echo "[WARNING] Failed to get cluster nodes" - echo "::endgroup::" - fi - sleep ${time_wait} - done - - debug_output - exit 1 - } - - echo " " - echo "[INFO] Waiting for Virtualization module to be ready" - d8_queue - - virtualization_ready - - echo "[INFO] Checking Virtualization module deployments" - kubectl -n d8-virtualization wait --for=condition=Available deploy --all --timeout 900s - echo "[INFO] Checking virt-handler pods " - virt_handler_ready - - echo "[INFO] Switch virtualization module to maintenance mode" - kubectl patch mc virtualization --type merge --patch '{"spec":{"maintenance":"NoResourceReconciliation"}}' - if [[ "${{ inputs.storage_type }}" == "replicated" ]]; then - echo "[INFO] Switch sds-replicated-volume module to maintenance mode" - kubectl patch mc sds-replicated-volume --type merge --patch '{"spec":{"maintenance":"NoResourceReconciliation"}}' - elif [[ "${{ inputs.storage_type }}" == "nfs" ]]; then - echo "[INFO] Switch csi-nfs module to maintenance mode" - kubectl patch mc csi-nfs --type merge --patch '{"spec":{"maintenance":"NoResourceReconciliation"}}' - fi - - e2e-test: - name: E2E test - runs-on: ubuntu-latest - needs: - - bootstrap - - configure-storage - - configure-virtualization - steps: - - uses: actions/checkout@v4 - - - name: Set up Go ${{ env.GO_VERSION }} - uses: actions/setup-go@v5 - with: - go-version: "${{ env.GO_VERSION }}" - - - name: Install Task - uses: go-task/setup-task@v2 - with: - version: 3.x - repo-token: ${{ secrets.GITHUB_TOKEN }} - - - name: Install ginkgo - working-directory: ./test/e2e/ - run: | - echo "Install ginkgo" - go install tool - - - name: Setup d8 - uses: ./.github/actions/install-d8 - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - - - name: Install kubectl CLI - uses: azure/setup-kubectl@v4 - - - name: Setup kubeconfig - run: | - mkdir -p ~/.kube - echo "${{ needs.bootstrap.outputs.kubeconfig }}" | base64 -d | base64 -d > ~/.kube/config - chmod 600 ~/.kube/config - echo "[INFO] Test cluster connection by showing existing vmclass" - kubectl get vmclass - - - name: Download dependencies - working-directory: ./test/e2e/ - run: | - echo "Download dependencies" - go mod download - - - name: Detect Kubernetes version for E2E - id: detect-k8s-version - run: | - set -euo pipefail - - VERSION_JSON=$(kubectl version -o json) - SERVER_VERSION=$(echo "$VERSION_JSON" | jq -r '.serverVersion.gitVersion') - SERVER_MAJOR=$(echo "$VERSION_JSON" | jq -r '.serverVersion.major' | tr -cd '0-9') - SERVER_MINOR=$(echo "$VERSION_JSON" | jq -r '.serverVersion.minor' | tr -cd '0-9') - - if [[ -z "$SERVER_MAJOR" || -z "$SERVER_MINOR" ]]; then - echo "[ERROR] Failed to parse Kubernetes server version: $SERVER_VERSION" - exit 1 - fi - - LABEL_FILTER="" - USB_SUPPORTED=false - - if (( SERVER_MAJOR > 1 || (SERVER_MAJOR == 1 && SERVER_MINOR >= 34) )); then - USB_SUPPORTED=true - echo "[INFO] Kubernetes server version $SERVER_VERSION supports USB E2E tests" - else - LABEL_FILTER="!usb-precheck" - echo "[INFO] Kubernetes server version $SERVER_VERSION does not support USB E2E tests" - echo "[INFO] USB-labeled specs will be excluded with label filter: $LABEL_FILTER" - fi - - echo "server-version=$SERVER_VERSION" >> "$GITHUB_OUTPUT" - echo "usb-supported=$USB_SUPPORTED" >> "$GITHUB_OUTPUT" - echo "label-filter=$LABEL_FILTER" >> "$GITHUB_OUTPUT" - - - name: Create vmclass for e2e tests - run: | - if ! (kubectl get vmclass generic-for-e2e 2>/dev/null); then - kubectl get vmclass/generic -o json | jq 'del(.status) | del(.metadata) | .metadata = {"name":"generic-for-e2e","annotations":{"virtualmachineclass.virtualization.deckhouse.io/is-default-class":"true"}} ' | kubectl create -f - - fi - - echo "[INFO] Showing exists vmclasses" - kubectl get vmclass - - - name: Run E2E - id: e2e-report - env: - TIMEOUT: ${{ inputs.e2e_timeout }} - CSI: ${{ inputs.storage_type }} - STORAGE_CLASS_NAME: ${{ inputs.nested_storageclass_name }} - LABELS: ${{ steps.detect-k8s-version.outputs.label-filter }} - SERVER_K8S_VERSION: ${{ steps.detect-k8s-version.outputs.server-version }} - USB_SUPPORTED: ${{ steps.detect-k8s-version.outputs.usb-supported }} - working-directory: ./test/e2e/ - run: | - DATE=$(date +"%Y-%m-%d") - e2e_report_file="e2e_report_${CSI}_${DATE}.json" - e2e_output_file="e2e_output_${CSI}_${DATE}.log" - FOCUS="${{ inputs.e2e_focus_tests }}" - - cp -a legacy/testdata /tmp/testdata - - echo "[INFO] Kubernetes server version: ${SERVER_K8S_VERSION}" - echo "[INFO] USB E2E supported: ${USB_SUPPORTED}" - if [ -n "${LABELS:-}" ]; then - echo "[INFO] Applying Ginkgo label filter: ${LABELS}" - fi - - ./scripts/precheck-prepare_ci.sh - - set +e - GINKGO_ARGS=( - -v - --race - --timeout="$TIMEOUT" - --json-report="$e2e_report_file" - ) - - if [ -n "${LABELS:-}" ]; then - GINKGO_ARGS+=(--label-filter="$LABELS") - fi - - if [ -n "$FOCUS" ]; then - GINKGO_ARGS+=(--focus="$FOCUS") - fi - - go tool ginkgo "${GINKGO_ARGS[@]}" . 2>&1 | tee "$e2e_output_file" - GINKGO_EXIT_CODE=${PIPESTATUS[0]} - set -e - - echo "[INFO] Exit code: $GINKGO_EXIT_CODE" - exit $GINKGO_EXIT_CODE - - - name: Upload summary test results (json) - uses: actions/upload-artifact@v7 - id: e2e-report-artifact - if: always() && steps.e2e-report.outcome != 'skipped' - with: - name: e2e-test-results-${{ inputs.storage_type }}-${{ github.run_id }}-${{ inputs.date_start }} - path: | - test/e2e/e2e_report_*.json - test/e2e/e2e_output_*.log - if-no-files-found: ignore - overwrite: true - retention-days: 3 - - - name: Upload resources from failed tests - uses: actions/upload-artifact@v7 - if: always() && steps.e2e-report.outcome != 'skipped' - with: - name: DUMP-${{ env.STORAGE_TYPE }}-${{ env.E2E_START_TIME }} - path: ${{ runner.temp }}/e2e_failed/ - if-no-files-found: ignore - retention-days: 3 - - prepare-report: - name: Prepare E2E report - runs-on: ubuntu-latest - needs: - - bootstrap - - configure-sdn - - configure-storage - - configure-virtualization - - e2e-test - if: always() - outputs: - artifact-name: ${{ steps.set-artifact-name.outputs.artifact-name }} - report_kind: ${{ steps.determine-stage.outputs.report_kind }} - status: ${{ steps.determine-stage.outputs.status }} - failed_stage: ${{ steps.determine-stage.outputs.failed_stage }} - failed_stage_label: ${{ steps.determine-stage.outputs.failed_stage_label }} - workflow_run_url: ${{ steps.determine-stage.outputs.workflow_run_url }} - branch: ${{ steps.determine-stage.outputs.branch }} - steps: - - uses: actions/checkout@v4 - - - name: Download E2E test results if available - uses: actions/download-artifact@v8 - continue-on-error: true - with: - name: e2e-test-results-${{ inputs.storage_type }}-${{ github.run_id }}-${{ inputs.date_start }} - path: test/e2e/ - - - name: Determine failed stage and prepare report - id: determine-stage - uses: actions/github-script@v7 - env: - STORAGE_TYPE: ${{ inputs.storage_type }} - PIPELINE_JOB_NAME: ${{ inputs.pipeline_job_name }} - NEEDS_CONTEXT: ${{ toJSON(needs) }} - with: - script: | - const buildClusterReport = require('./.github/scripts/js/e2e/report/cluster-report'); - await buildClusterReport({ - core, - context, - github, - }); - - - name: Upload E2E report artifact - id: upload-artifact - uses: actions/upload-artifact@v7 - with: - name: e2e-report-${{ inputs.storage_type }}-${{ github.run_id }}-${{ inputs.date_start }} - path: ${{ steps.determine-stage.outputs.report_file }} - overwrite: true - retention-days: 3 - - - name: Set artifact name output - id: set-artifact-name - run: | - ARTIFACT_NAME="e2e-report-${{ inputs.storage_type }}-${{ github.run_id }}-${{ inputs.date_start }}" - echo "artifact-name=$ARTIFACT_NAME" >> $GITHUB_OUTPUT - echo "[INFO] Artifact name: $ARTIFACT_NAME" - - undeploy-cluster: - name: Undeploy cluster - runs-on: ubuntu-latest - needs: - - bootstrap - - configure-sdn - - configure-storage - - configure-virtualization - - e2e-test - if: cancelled() || success() - steps: - - uses: actions/checkout@v4 - - - name: Setup d8 - uses: ./.github/actions/install-d8 - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - - - name: Install Task - uses: go-task/setup-task@v2 - with: - version: 3.x - repo-token: ${{ secrets.GITHUB_TOKEN }} - - - name: Download artifacts - uses: actions/download-artifact@v8 - with: - name: ${{ env.STORAGE_TYPE }}-generated-files-${{ env.E2E_START_TIME }}.zip.gpg - path: ${{ runner.temp }}/encrypted-generated-files - - - name: Decrypt generated files artifact - env: - GPG_PASSPHRASE: ${{ secrets.E2E_ARTIFACTS_GPG_PASSPHRASE }} - ARTIFACT_NAME: ${{ env.STORAGE_TYPE }}-generated-files-${{ env.E2E_START_TIME }} - run: | - artifact_path=${{ runner.temp }}/encrypted-generated-files/${ARTIFACT_NAME}.zip.gpg - gpg --decrypt --batch --yes --pinentry-mode loopback \ - --passphrase "$GPG_PASSPHRASE" \ - --output $RUNNER_TEMP/${ARTIFACT_NAME}.zip \ - "$artifact_path" - unzip -o $RUNNER_TEMP/${ARTIFACT_NAME}.zip -d ${{ env.SETUP_CLUSTER_TYPE_PATH }} - - - name: Configure kubectl via azure/k8s-set-context@v4 - uses: azure/k8s-set-context@v4 - with: - method: kubeconfig - context: e2e-cluster-nightly-e2e-virt-sa - kubeconfig: ${{ secrets.VIRT_E2E_NIGHTLY_SA_TOKEN }} - - - name: infra-undeploy - working-directory: ${{ env.SETUP_CLUSTER_TYPE_PATH }} - run: | - task infra-undeploy diff --git a/.github/workflows/e2e-test-releases-reusable-pipeline.yml b/.github/workflows/e2e-test-releases-reusable-pipeline.yml index 6946ab790b..4a8ebd8caa 100644 --- a/.github/workflows/e2e-test-releases-reusable-pipeline.yml +++ b/.github/workflows/e2e-test-releases-reusable-pipeline.yml @@ -43,6 +43,11 @@ on: type: string default: "8Gi" description: "Set memory for workers node in cluster config" + cluster_config_additional_disk_size: + required: false + type: string + default: "250Gi" + description: "Set additional disk size for workers node in cluster config" storage_type: required: true type: string @@ -122,6 +127,7 @@ env: DEFAULT_USER: ${{ inputs.default_user }} GO_VERSION: ${{ inputs.go_version }} SETUP_CLUSTER_TYPE_PATH: test/dvp-static-cluster + E2E_SCRIPT_DIR: ${{ github.workspace }}/.github/scripts/bash/e2e K8S_VERSION: ${{ inputs.cluster_config_k8s_version }} STORAGE_TYPE: ${{ inputs.storage_type }} E2E_START_TIME: ${{ inputs.date_start }} @@ -153,37 +159,22 @@ jobs: namespace="release-test-$STORAGE_TYPE-$GIT_SHORT_HASH-$RANDUUID4C" - echo "namespace=$namespace" >> $GITHUB_OUTPUT - echo "sha_short=$GIT_SHORT_HASH" >> $GITHUB_OUTPUT - - REGISTRY=$(base64 -d <<< ${{secrets.PROD_IO_REGISTRY_DOCKER_CFG}} | jq '.auths | to_entries | .[] | .key' -r) - echo "registry=$REGISTRY" >> $GITHUB_OUTPUT - - - name: Install htpasswd utility - run: | - sudo apt-get update - sudo apt-get install -y apache2-utils + echo "namespace=$namespace" >> "$GITHUB_OUTPUT" + echo "sha_short=$GIT_SHORT_HASH" >> "$GITHUB_OUTPUT" - - name: Install Task - uses: go-task/setup-task@v2 + - name: Setup E2E toolchain + uses: ./.github/actions/setup-e2e-toolchain with: - version: 3.x - repo-token: ${{ secrets.GITHUB_TOKEN }} - - - name: Setup d8 - uses: ./.github/actions/install-d8 - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + checkout: "false" + install-kubectl: "false" + install-htpasswd: "true" + github-token: ${{ secrets.GITHUB_TOKEN }} - name: Log in to private registry - env: - REGISTRY: ${{ steps.vars.outputs.registry }} - run: | - USERNAME=$(base64 -d <<< "${{ secrets.PROD_IO_REGISTRY_DOCKER_CFG }}" | jq -r '.auths | to_entries | .[] | .value.auth' | base64 -d | cut -d ':' -f1) - PASSWORD=$(base64 -d <<< "${{ secrets.PROD_IO_REGISTRY_DOCKER_CFG }}" | jq -r '.auths | to_entries | .[] | .value.auth' | base64 -d | cut -d ':' -f2) - echo "::add-mask::$USERNAME" - echo "::add-mask::$PASSWORD" - echo "$PASSWORD" | docker login "$REGISTRY" --username "$USERNAME" --password-stdin + id: registry-login + uses: ./.github/actions/registry-login + with: + docker_cfg: ${{ secrets.PROD_IO_REGISTRY_DOCKER_CFG }} - name: Configure kubectl via azure/k8s-set-context@v4 uses: azure/k8s-set-context@v4 @@ -194,73 +185,25 @@ jobs: - name: Generate values.yaml working-directory: ${{ env.SETUP_CLUSTER_TYPE_PATH }} - run: | - defaultStorageClass=$(kubectl get storageclass -o json \ - | jq -r '.items[] | select(.metadata.annotations."storageclass.kubernetes.io/is-default-class" == "true") | .metadata.name') - - cat < values.yaml - namespace: ${{ steps.vars.outputs.namespace }} - storageType: ${{ inputs.storage_type }} - storageClass: ${defaultStorageClass} - sa: dkp-sa - enabledModules: - - console - deckhouse: - channel: ${{ env.DECKHOUSE_CHANNEL }} - podSubnetCIDR: ${{ inputs.pod_subnet_cidr }} - serviceSubnetCIDR: ${{ inputs.service_subnet_cidr }} - kubernetesVersion: ${{ env.K8S_VERSION }} - registryDockerCfg: ${{ secrets.PROD_IO_REGISTRY_DOCKER_CFG }} - bundle: Default - proxyEnabled: false - image: - url: ${{ inputs.virtualization_image_url }} - defaultUser: ${{ env.DEFAULT_USER }} - bootloader: BIOS - ingressHosts: - - api - - grafana - - dex - - prometheus - - console - - virtualization - instances: - aptMirror: - enabled: ${{ inputs.apt_mirror_enabled }} - name: ${{ inputs.apt_mirror_name }} - url: ${{ inputs.apt_mirror_url }} - masterNodes: - count: 1 - cfg: - rootDiskSize: 60Gi - cpu: - cores: 4 - coreFraction: 50% - memory: - size: 12Gi - additionalNodes: - - name: worker - count: 3 - cfg: - cpu: - cores: 6 - coreFraction: 50% - memory: - size: ${{ inputs.cluster_config_workers_memory }} - additionalDisks: - - size: 250Gi - networkConfig: - clusterNetworkName: ${{ inputs.nested_cluster_network_name }} - EOF - - mkdir -p tmp - touch tmp/discovered-values.yaml - - export REGISTRY=$(base64 -d <<< ${{secrets.DEV_REGISTRY_DOCKER_CFG}} | jq '.auths | to_entries | .[] | .key' -r) - export AUTH=$(base64 -d <<< ${{ secrets.DEV_REGISTRY_DOCKER_CFG }} | jq '.auths | to_entries | .[] | .value.auth' -r) - - yq eval --inplace '.discovered.registry_url = env(REGISTRY)' tmp/discovered-values.yaml - yq eval --inplace '.discovered.registry_auth = env(AUTH)' tmp/discovered-values.yaml + env: + NAMESPACE: ${{ steps.vars.outputs.namespace }} + STORAGE_TYPE: ${{ inputs.storage_type }} + DECKHOUSE_CHANNEL: ${{ env.DECKHOUSE_CHANNEL }} + POD_SUBNET_CIDR: ${{ inputs.pod_subnet_cidr }} + SERVICE_SUBNET_CIDR: ${{ inputs.service_subnet_cidr }} + K8S_VERSION: ${{ env.K8S_VERSION }} + PROD_IO_REGISTRY_DOCKER_CFG: ${{ secrets.PROD_IO_REGISTRY_DOCKER_CFG }} + DEV_REGISTRY_DOCKER_CFG: ${{ secrets.DEV_REGISTRY_DOCKER_CFG }} + VIRTUALIZATION_IMAGE_URL: ${{ inputs.virtualization_image_url }} + DEFAULT_USER: ${{ env.DEFAULT_USER }} + APT_MIRROR_ENABLED: ${{ inputs.apt_mirror_enabled }} + APT_MIRROR_NAME: ${{ inputs.apt_mirror_name }} + APT_MIRROR_URL: ${{ inputs.apt_mirror_url }} + CLUSTER_CONFIG_WORKERS_MEMORY: ${{ inputs.cluster_config_workers_memory }} + ADDITIONAL_DISK_SIZE: ${{ inputs.cluster_config_additional_disk_size }} + NESTED_CLUSTER_NETWORK_NAME: ${{ inputs.nested_cluster_network_name }} + ENABLED_MODULES: console + run: bash "${E2E_SCRIPT_DIR}/render-dvp-static-values.sh" - name: Bootstrap cluster [infra-deploy] working-directory: ${{ env.SETUP_CLUSTER_TYPE_PATH }} @@ -269,10 +212,12 @@ jobs: - name: Bootstrap cluster [dhctl-bootstrap] id: dhctl-bootstrap working-directory: ${{ env.SETUP_CLUSTER_TYPE_PATH }} + env: + BOOTSTRAP_DEV_PROXY: ${{ secrets.BOOTSTRAP_DEV_PROXY }} run: | if [[ $(yq eval '.deckhouse.proxyEnabled' values.yaml) == true ]]; then - export HTTP_PROXY="${{ secrets.BOOTSTRAP_DEV_PROXY }}" - export HTTPS_PROXY="${{ secrets.BOOTSTRAP_DEV_PROXY }}" + export HTTP_PROXY="$BOOTSTRAP_DEV_PROXY" + export HTTPS_PROXY="$BOOTSTRAP_DEV_PROXY" echo "Proxy settings - configured" fi @@ -281,8 +226,8 @@ jobs: timeout-minutes: 60 - name: Label cluster to prevent deletion run: | - kubectl label namespace ${{ steps.vars.outputs.namespace }} e2e-cluster/do-not-stop-vm-on-e2e-run=true - kubectl label vmclass ${{ steps.vars.outputs.namespace }}-cpu e2e-cluster/do-not-stop-vm-on-e2e-run=true + kubectl label namespace "${{ steps.vars.outputs.namespace }}" e2e-cluster/do-not-stop-vm-on-e2e-run=true + kubectl label vmclass "${{ steps.vars.outputs.namespace }}-cpu" e2e-cluster/do-not-stop-vm-on-e2e-run=true - name: Bootstrap cluster [show-connection-info] working-directory: ${{ env.SETUP_CLUSTER_TYPE_PATH }} run: | @@ -293,253 +238,93 @@ jobs: NAMESPACE: ${{ steps.vars.outputs.namespace }} if: always() && steps.dhctl-bootstrap.outcome == 'success' run: | - kubectl -n $NAMESPACE create secret generic ssh-key --from-file=${{ env.SETUP_CLUSTER_TYPE_PATH }}/tmp/ssh/cloud + kubectl -n "$NAMESPACE" create secret generic ssh-key --from-file="${{ env.SETUP_CLUSTER_TYPE_PATH }}/tmp/ssh/cloud" - name: Get info about nested cluster and master VM working-directory: ${{ env.SETUP_CLUSTER_TYPE_PATH }} env: NAMESPACE: ${{ steps.vars.outputs.namespace }} - PREFIX: ${{ inputs.storage_type }} + STORAGE_TYPE: ${{ inputs.storage_type }} + DEFAULT_USER: ${{ env.DEFAULT_USER }} run: | - nested_master=$(kubectl -n ${NAMESPACE} get vm -l group=${PREFIX}-master -o jsonpath="{.items[0].metadata.name}") - - d8vssh() { - local host=$1 - local cmd=$2 - d8 v ssh -i ./tmp/ssh/cloud \ - --local-ssh=true \ - --local-ssh-opts="-o StrictHostKeyChecking=no" \ - --local-ssh-opts="-o UserKnownHostsFile=/dev/null" \ - ${DEFAULT_USER}@${host}.${NAMESPACE} \ - -c "$cmd" - } - - echo "[INFO] Pods in namespace $NAMESPACE" - kubectl get pods -n "${NAMESPACE}" - echo "" - - echo "[INFO] VMs in namespace $NAMESPACE" - kubectl get vm -n "${NAMESPACE}" - echo "" - - echo "[INFO] VDs in namespace $NAMESPACE" - kubectl get vd -n "${NAMESPACE}" - echo "" - - echo "Check connection to master" - d8vssh "${nested_master}" 'echo master os-release: ; cat /etc/os-release; echo " "; echo master hostname: ; hostname' - echo "" + bash "${E2E_SCRIPT_DIR}/show-nested-cluster-info.sh" \ + "${NAMESPACE}" \ + "${STORAGE_TYPE}" \ + "${DEFAULT_USER}" - name: Generate nested kubeconfig id: generate-kubeconfig working-directory: ${{ env.SETUP_CLUSTER_TYPE_PATH }} env: - kubeConfigPath: tmp/kube.config + KUBE_CONFIG_PATH: tmp/kube.config NAMESPACE: ${{ steps.vars.outputs.namespace }} - PREFIX: ${{ inputs.storage_type }} + STORAGE_TYPE: ${{ inputs.storage_type }} + DEFAULT_USER: ${{ env.DEFAULT_USER }} run: | - nested_master=$(kubectl -n ${NAMESPACE} get vm -l group=${PREFIX}-master -o jsonpath="{.items[0].metadata.name}") - - d8vscp() { - local source=$1 - local dest=$2 - d8 v scp -i ./tmp/ssh/cloud \ - --local-ssh=true \ - --local-ssh-opts="-o StrictHostKeyChecking=no" \ - --local-ssh-opts="-o UserKnownHostsFile=/dev/null" \ - "$source" "$dest" - echo "d8vscp: $source -> $dest - done" - } - - d8vssh() { - local cmd=$1 - d8 v ssh -i ./tmp/ssh/cloud \ - --local-ssh=true \ - --local-ssh-opts="-o StrictHostKeyChecking=no" \ - --local-ssh-opts="-o UserKnownHostsFile=/dev/null" \ - ${DEFAULT_USER}@${nested_master}.${NAMESPACE} \ - -c "$cmd" - } - - echo "[INFO] Copy script for generating kubeconfig in nested cluster" - echo "[INFO] Copy scripts/gen-kubeconfig.sh to master" - d8vscp "./scripts/gen-kubeconfig.sh" "${DEFAULT_USER}@${nested_master}.${NAMESPACE}:/tmp/gen-kubeconfig.sh" - echo "" - d8vscp "./scripts/deckhouse-queue.sh" "${DEFAULT_USER}@${nested_master}.${NAMESPACE}:/tmp/deckhouse-queue.sh" - echo "" - - echo "[INFO] Set file exec permissions" - d8vssh 'chmod +x /tmp/{gen-kubeconfig.sh,deckhouse-queue.sh}' - d8vssh 'ls -la /tmp/' - echo "[INFO] Check d8 queue in nested cluster" - d8vssh 'sudo /tmp/deckhouse-queue.sh' - - echo "[INFO] Generate kube conf in nested cluster" - echo "[INFO] Run gen-kubeconfig.sh in nested cluster" - d8vssh "sudo /tmp/gen-kubeconfig.sh nested-sa nested nested-e2e /${kubeConfigPath}" - echo "" - - echo "[INFO] Copy kubeconfig to runner" - echo "[INFO] ${DEFAULT_USER}@${nested_master}.$NAMESPACE:/${kubeConfigPath} ./${kubeConfigPath}" - d8vscp "${DEFAULT_USER}@${nested_master}.$NAMESPACE:/${kubeConfigPath}" "./${kubeConfigPath}" - - echo "[INFO] Set rights for kubeconfig" - echo "[INFO] sudo chown 1001:1001 ${kubeConfigPath}" - sudo chown 1001:1001 ${kubeConfigPath} - echo " " - - echo "[INFO] Kubeconf to github output" - CONFIG=$(cat "${kubeConfigPath}" | base64 -w 0) - CONFIG=$(echo "${CONFIG}" | base64 -w 0) - echo "kubeconfig=$CONFIG" >> $GITHUB_OUTPUT + bash "${E2E_SCRIPT_DIR}/gen-nested-kubeconfig.sh" \ + "${KUBE_CONFIG_PATH}" \ + "${NAMESPACE}" \ + "${STORAGE_TYPE}" \ + "${DEFAULT_USER}" \ + "$GITHUB_OUTPUT" - name: cloud-init logs if: steps.dhctl-bootstrap.outcome == 'failure' env: NAMESPACE: ${{ steps.vars.outputs.namespace }} - PREFIX: ${{ inputs.storage_type }} + STORAGE_TYPE: ${{ inputs.storage_type }} + DEFAULT_USER: ${{ env.DEFAULT_USER }} + SETUP_CLUSTER_TYPE_PATH: ${{ env.SETUP_CLUSTER_TYPE_PATH }} run: | - nested_master=$(kubectl -n ${NAMESPACE} get vm -l group=${PREFIX}-master -o jsonpath="{.items[0].metadata.name}") - - d8vscp() { - local source=$1 - local dest=$2 - d8 v scp -i ./tmp/ssh/cloud \ - --local-ssh=true \ - --local-ssh-opts="-o StrictHostKeyChecking=no" \ - --local-ssh-opts="-o UserKnownHostsFile=/dev/null" \ - "$source" "$dest" - echo "d8vscp: $source -> $dest - done" - } - - d8vscp "${DEFAULT_USER}@${nested_master}.$NAMESPACE:/var/log/cloud-init*.log" "./${{ env.SETUP_CLUSTER_TYPE_PATH }}/tmp/" + bash "${E2E_SCRIPT_DIR}/collect-cloud-init-logs.sh" \ + "${NAMESPACE}" \ + "${STORAGE_TYPE}" \ + "${DEFAULT_USER}" \ + "${SETUP_CLUSTER_TYPE_PATH}" - name: Prepare artifact - if: success() || failure() - run: | - sudo chown -fR 1001:1001 ${{ env.SETUP_CLUSTER_TYPE_PATH }} || true - yq e '.deckhouse.registryDockerCfg = "None"' -i ./${{ env.SETUP_CLUSTER_TYPE_PATH }}/values.yaml || true - yq e 'select(.kind == "InitConfiguration").deckhouse.registryDockerCfg = "None"' -i ./${{ env.SETUP_CLUSTER_TYPE_PATH }}/tmp/config.yaml || echo "The config.yaml file is not generated, skipping" - yq e '.discovered.registry_url = "None"' -i ./${{ env.SETUP_CLUSTER_TYPE_PATH }}/tmp/discovered-values.yaml || echo "The discovered-values.yaml file is not generated, skipping editing registry_url" - yq e '.discovered.registry_auth = "None"' -i ./${{ env.SETUP_CLUSTER_TYPE_PATH }}/tmp/discovered-values.yaml || echo "The discovered-values.yaml file is not generated, skipping editing registry_auth" - echo "${{ steps.generate-kubeconfig.outputs.kubeconfig }}" | base64 -d | base64 -d > ./${{ env.SETUP_CLUSTER_TYPE_PATH }}/kube-config || echo "kubeconfig not available, skipping" - - - name: Encrypt generated files artifact if: success() || failure() env: - GPG_PASSPHRASE: ${{ secrets.E2E_ARTIFACTS_GPG_PASSPHRASE }} - ARTIFACT_NAME: ${{ env.STORAGE_TYPE }}-release-generated-files-${{ env.E2E_START_TIME }} + SETUP_CLUSTER_TYPE_PATH: ${{ env.SETUP_CLUSTER_TYPE_PATH }} + KUBECONFIG_B64: ${{ steps.generate-kubeconfig.outputs.kubeconfig }} run: | - pushd ${{ env.SETUP_CLUSTER_TYPE_PATH }} - zip -r $RUNNER_TEMP/${ARTIFACT_NAME}.zip tmp values.yaml - popd - gpg --symmetric --batch --yes --pinentry-mode loopback \ - --passphrase "$GPG_PASSPHRASE" \ - --cipher-algo AES256 \ - --output $RUNNER_TEMP/${ARTIFACT_NAME}.zip.gpg \ - $RUNNER_TEMP/${ARTIFACT_NAME}.zip - rm -f $RUNNER_TEMP/${ARTIFACT_NAME}.zip - - - name: Upload generated files - uses: actions/upload-artifact@v7 + bash "${E2E_SCRIPT_DIR}/prepare-artifact.sh" \ + "${SETUP_CLUSTER_TYPE_PATH}" \ + "${KUBECONFIG_B64}" + + - name: Encrypt and upload generated files artifact if: success() || failure() + uses: ./.github/actions/gpg-encrypt-and-upload with: - name: ${{ env.STORAGE_TYPE }}-release-generated-files-${{ env.E2E_START_TIME }}.zip.gpg - path: ${{ runner.temp }}/${{ env.STORAGE_TYPE }}-release-generated-files-${{ env.E2E_START_TIME }}.zip.gpg - overwrite: true - include-hidden-files: true + path: tmp values.yaml + working-directory: ${{ env.SETUP_CLUSTER_TYPE_PATH }} + passphrase: ${{ secrets.E2E_ARTIFACTS_GPG_PASSPHRASE }} + artifact_name: ${{ env.STORAGE_TYPE }}-release-generated-files-${{ env.E2E_START_TIME }} retention-days: 3 - archive: false - - name: Encrypt ssh config artifact - if: always() - env: - GPG_PASSPHRASE: ${{ secrets.E2E_ARTIFACTS_GPG_PASSPHRASE }} - ARTIFACT_NAME: ${{ env.STORAGE_TYPE }}-release-generated-files-ssh-${{ env.E2E_START_TIME }} - run: | - pushd ${{ env.SETUP_CLUSTER_TYPE_PATH }}/tmp - zip -r $RUNNER_TEMP/${ARTIFACT_NAME}.zip ssh - popd - gpg --symmetric --batch --yes --pinentry-mode loopback \ - --passphrase "$GPG_PASSPHRASE" \ - --cipher-algo AES256 \ - --output $RUNNER_TEMP/${ARTIFACT_NAME}.zip.gpg \ - $RUNNER_TEMP/${ARTIFACT_NAME}.zip - rm -f $RUNNER_TEMP/${ARTIFACT_NAME}.zip - - - name: Upload ssh config - uses: actions/upload-artifact@v7 + - name: Encrypt and upload ssh config artifact if: always() + uses: ./.github/actions/gpg-encrypt-and-upload with: - name: ${{ env.STORAGE_TYPE }}-release-generated-files-ssh-${{ env.E2E_START_TIME }}.zip.gpg - path: ${{ runner.temp }}/${{ env.STORAGE_TYPE }}-release-generated-files-ssh-${{ env.E2E_START_TIME }}.zip.gpg - overwrite: true - include-hidden-files: true + path: ssh + working-directory: ${{ env.SETUP_CLUSTER_TYPE_PATH }}/tmp + passphrase: ${{ secrets.E2E_ARTIFACTS_GPG_PASSPHRASE }} + artifact_name: ${{ env.STORAGE_TYPE }}-release-generated-files-ssh-${{ env.E2E_START_TIME }} retention-days: 3 - archive: false - - - name: Encrypt kubeconfig artifact - if: always() - env: - GPG_PASSPHRASE: ${{ secrets.E2E_ARTIFACTS_GPG_PASSPHRASE }} - ARTIFACT_NAME: ${{ env.STORAGE_TYPE }}-release-generated-files-kubeconfig-${{ env.E2E_START_TIME }} - run: | - gpg --symmetric --batch --yes --pinentry-mode loopback \ - --passphrase "$GPG_PASSPHRASE" \ - --cipher-algo AES256 \ - --output $RUNNER_TEMP/${ARTIFACT_NAME}.gpg \ - ${{ env.SETUP_CLUSTER_TYPE_PATH }}/kube-config - - name: Upload kubeconfig - uses: actions/upload-artifact@v7 + - name: Encrypt and upload kubeconfig artifact if: always() + uses: ./.github/actions/gpg-encrypt-and-upload with: - name: ${{ env.STORAGE_TYPE }}-release-generated-files-kubeconfig-${{ env.E2E_START_TIME }}.gpg - path: ${{ runner.temp }}/${{ env.STORAGE_TYPE }}-release-generated-files-kubeconfig-${{ env.E2E_START_TIME }}.gpg - overwrite: true - include-hidden-files: true + path: ${{ env.SETUP_CLUSTER_TYPE_PATH }}/kube-config + archive: "false" + passphrase: ${{ secrets.E2E_ARTIFACTS_GPG_PASSPHRASE }} + artifact_name: ${{ env.STORAGE_TYPE }}-release-generated-files-kubeconfig-${{ env.E2E_START_TIME }} retention-days: 3 - archive: false - name: Add encrypted artifacts help to job summary if: always() - run: | - cat >> "$GITHUB_STEP_SUMMARY" <<'EOF' - ## Encrypted artifacts - - Some uploaded artifacts in this workflow are encrypted with GPG symmetric encryption. - - Secret used for decryption passphrase: - - `E2E_ARTIFACTS_GPG_PASSPHRASE` - - Encrypted artifact types: - - `*-release-generated-files-*.zip.gpg` - - `*-release-generated-files-ssh-*.zip.gpg` - - `*-release-generated-files-kubeconfig-*.gpg` - - Decrypt commands: - - ```bash - # zip.gpg artifact - gpg --decrypt --batch --yes --pinentry-mode loopback \ - --passphrase "$E2E_ARTIFACTS_GPG_PASSPHRASE" \ - --output artifact.zip \ - artifact.zip.gpg - - unzip -o artifact.zip - - # same, but with simultaneous decryption and extraction of the whole archive - gpg --decrypt --batch --yes --pinentry-mode loopback \ - --passphrase "$E2E_ARTIFACTS_GPG_PASSPHRASE" \ - artifact.zip.gpg > artifact.zip && unzip -o artifact.zip - - # single-file .gpg artifact - gpg --decrypt --batch --yes --pinentry-mode loopback \ - --passphrase "$E2E_ARTIFACTS_GPG_PASSPHRASE" \ - --output kube-config \ - artifact.gpg - ``` - EOF + uses: ./.github/actions/append-encrypted-artifacts-help configure-sdn: name: Configure SDN @@ -548,113 +333,21 @@ jobs: steps: - uses: actions/checkout@v6 - - name: Install Task - uses: go-task/setup-task@v2 + - name: Setup E2E toolchain + uses: ./.github/actions/setup-e2e-toolchain with: - version: 3.x - repo-token: ${{ secrets.GITHUB_TOKEN }} - - - name: Setup d8 - uses: ./.github/actions/install-d8 - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - - - name: Install kubectl CLI - uses: azure/setup-kubectl@v4 + checkout: "false" + github-token: ${{ secrets.GITHUB_TOKEN }} - name: Check nested kube-api via generated kubeconfig - run: | - mkdir -p ~/.kube - echo "[INFO] Configure kubeconfig for nested cluster" - echo "${{ needs.bootstrap.outputs.kubeconfig }}" | base64 -d | base64 -d > ~/.kube/config - - echo "[INFO] Show paths and files content" - ls -la ~/.kube - echo "[INFO] Set permissions for kubeconfig" - chmod 600 ~/.kube/config - - echo "[INFO] Show current kubeconfig context" - kubectl config get-contexts - - echo "[INFO] Show nodes in cluster" - # `kubectl get nodes` may return error, so we need to retry. - count=30 - success=false - for i in $(seq 1 $count); do - echo "[INFO] Attempt $i/$count..." - if kubectl get nodes; then - echo "[SUCCESS] Successfully retrieved nodes." - success=true - break - fi - - if [ $i -lt $count ]; then - echo "[INFO] Retrying in 10 seconds..." - sleep 10 - fi - done - - if [ "$success" = false ]; then - echo "[ERROR] Failed to retrieve nodes after $count attempts." - exit 1 - fi + uses: ./.github/actions/use-nested-kubeconfig + with: + kubeconfig: ${{ needs.bootstrap.outputs.kubeconfig }} - name: Enable SDN - run: | - echo "[INFO] Enable SDN" - d8 system module enable sdn - echo "[INFO] Wait for sdn modules to be ready, timeout: 300s" - kubectl wait --for=jsonpath='{.status.phase}'=Ready modules sdn --timeout=300s - echo "[INFO] Wait for sdn deployments to be ready, timeout: 300s" - kubectl -n d8-sdn wait --for=condition=Available deploy --all --timeout 300s - echo "[INFO] Wait for sdn daemonset agent to be ready, timeout: 300s" - kubectl -n d8-sdn rollout status daemonset agent --timeout=300s - echo "[SUCCESS] Done" + run: bash "${E2E_SCRIPT_DIR}/enable-sdn.sh" - name: Wait for nodenetworkinterfaces to be ready - run: | - count=60 - success=false - wait_time_seconds=5 - - for i in $(seq 1 $count); do - nodes=$(kubectl get nodes -o name | wc -l) - actual=$(kubectl get nodenetworkinterfaces -o json | jq -r '.items[] | select(.status.operationalState == "Up") | .metadata.name' | wc -l) || true - expected=$((nodes * 2)) - - echo "[INFO] Attempt $i/$count: expected=$expected, actual=$actual" - - if [ "$actual" -ge "$expected" ]; then - echo "[SUCCESS] All nodenetworkinterfaces are present (expected=$expected, actual=$actual)" - kubectl get nodenetworkinterfaces - success=true - break - fi - - if (( i % 5 == 0 )) ; then - echo "::group::[DEBUG] show namespaces d8-sdn" - kubectl -n d8-sdn get pods || true - echo "::endgroup::" - - echo "::group::[DEBUG] show nodenetworkinterfaces d8-sdn" - kubectl get nodenetworkinterfaces || true - echo "::endgroup::" - - echo "[INFO] Retrying in 10 seconds..." - sleep $wait_time_seconds - elif [ $i -lt $count ]; then - echo "[INFO] Retrying in 10 seconds..." - sleep $wait_time_seconds - fi - done - - if [ "$success" = false ]; then - echo "[ERROR] Failed to get all nodenetworkinterfaces after $count attempts (expected=$expected)" - echo "[DEBUG] Show namespaces d8-sdn" - kubectl -n d8-sdn get pods || true - echo "[DEBUG] Show nodenetworkinterfaces d8-sdn" - kubectl get nodenetworkinterfaces || true - exit 1 - fi + run: bash "${E2E_SCRIPT_DIR}/wait-nodenetworkinterfaces.sh" - name: Configure ClusterNetwork run: | @@ -662,12 +355,12 @@ jobs: for nic in $extraNic; do echo "[INFO] Label nodenetworkinterface $nic nic-group=extra" - kubectl label nodenetworkinterfaces $nic nic-group=extra + kubectl label nodenetworkinterfaces "$nic" nic-group=extra done kubectl get nodenetworkinterface -l nic-group=extra - cat <<'EOF' | kubectl apply -f - + cat <<'EOF' | bash "${E2E_SCRIPT_DIR}/apply-clusternetworks.sh" --- apiVersion: network.deckhouse.io/v1alpha1 kind: ClusterNetwork @@ -709,353 +402,26 @@ jobs: steps: - uses: actions/checkout@v6 - - name: Install Task - uses: go-task/setup-task@v2 + - name: Setup E2E toolchain + uses: ./.github/actions/setup-e2e-toolchain with: - version: 3.x - repo-token: ${{ secrets.GITHUB_TOKEN }} - - - name: Setup d8 - uses: ./.github/actions/install-d8 - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - - - name: Install kubectl CLI - uses: azure/setup-kubectl@v4 + checkout: "false" + github-token: ${{ secrets.GITHUB_TOKEN }} - name: Check nested kube-api via generated kubeconfig - run: | - mkdir -p ~/.kube - echo "[INFO] Configure kubeconfig for nested cluster" - echo "${{ needs.bootstrap.outputs.kubeconfig }}" | base64 -d | base64 -d > ~/.kube/config - - echo "[INFO] Show paths and files content" - ls -la ~/.kube - echo "[INFO] Set permissions for kubeconfig" - chmod 600 ~/.kube/config - - echo "[INFO] Show current kubeconfig context" - kubectl config get-contexts - - echo "[INFO] Show nodes in cluster" - count=30 - success=false - for i in $(seq 1 $count); do - echo "[INFO] Attempt $i/$count..." - if kubectl get nodes; then - echo "[SUCCESS] Successfully retrieved nodes." - success=true - break - fi - - if [ $i -lt $count ]; then - echo "[INFO] Retrying in 10 seconds..." - sleep 10 - fi - done - - if [ "$success" = false ]; then - echo "[ERROR] Failed to retrieve nodes after $count attempts." - exit 1 - fi - + uses: ./.github/actions/use-nested-kubeconfig + with: + kubeconfig: ${{ needs.bootstrap.outputs.kubeconfig }} - name: Configure replicated storage id: storage-replicated-setup if: ${{ inputs.storage_type == 'replicated' || inputs.storage_type == 'mixed' }} working-directory: ${{ env.SETUP_CLUSTER_TYPE_PATH }}/storage/sds-replicated - run: | - d8_queue_list() { - d8 s queue list | grep -Po '([0-9]+)(?= active)' || echo "[WARNING] Failed to retrieve list queue" - } - - d8_queue() { - local count=90 - local queue_count - - for i in $(seq 1 $count) ; do - queue_count=$(d8_queue_list) - if [ -n "$queue_count" ] && [ "$queue_count" = "0" ]; then - echo "[SUCCESS] Queue is clear" - return 0 - fi - - echo "[INFO] Wait until queues are empty ${i}/${count}" - if (( i % 5 == 0 )); then - echo "[INFO] Show queue list" - d8 s queue list | head -n25 || echo "[WARNING] Failed to retrieve list queue" - echo " " - fi - - if (( i % 10 == 0 )); then - echo "[INFO] deckhouse logs" - echo "::group::deckhouse logs" - d8 s logs | tail -n 100 - echo "::endgroup::" - echo " " - fi - sleep 10 - done - } - - sds_replicated_ready() { - local count=60 - for i in $(seq 1 $count); do - - sds_replicated_volume_status=$(kubectl get ns d8-sds-replicated-volume -o jsonpath='{.status.phase}' || echo "False") - - if [[ "${sds_replicated_volume_status}" = "Active" ]]; then - echo "[SUCCESS] Namespaces sds-replicated-volume are Active" - kubectl get ns d8-sds-replicated-volume - return 0 - fi - - echo "[INFO] Waiting 10s for sds-replicated-volume namespace to be ready (attempt ${i}/${count})" - if (( i % 5 == 0 )); then - echo "[INFO] Show namespaces sds-replicated-volume" - kubectl get ns | grep sds-replicated-volume || echo "Namespaces sds-replicated-volume are not ready" - echo "[DEBUG] Show queue (first 25 lines)" - d8 s queue list | head -n25 || echo "No queues" - fi - sleep 10 - done - - echo "[ERROR] Namespaces sds-replicated-volume are not ready after ${count} attempts" - echo "[DEBUG] Show namespaces sds" - kubectl get ns | grep sds || echo "Namespaces sds-replicated-volume are not ready" - echo "[DEBUG] Show queue" - echo "::group::Show queue" - d8 s queue list || echo "No queues" - echo "::endgroup::" - echo "[DEBUG] Show deckhouse logs" - echo "::group::deckhouse logs" - d8 s logs | tail -n 100 - echo "::endgroup::" - exit 1 - } - - sds_pods_ready() { - local count=100 - local linstor_node - local csi_node - local webhooks - local workers=$(kubectl get nodes -o name | grep worker | wc -l || true) - workers=$((workers)) - - echo "[INFO] Wait while linstor-node csi-node webhooks pods are ready" - for i in $(seq 1 $count); do - linstor_node=$(kubectl -n d8-sds-replicated-volume get pods | grep "linstor-node.*Running" | wc -l || true) - csi_node=$(kubectl -n d8-sds-replicated-volume get pods | grep "csi-node.*Running" | wc -l || true) - - echo "[INFO] Check if sds-replicated pods are ready" - if [[ ${linstor_node} -ge ${workers} && ${csi_node} -ge ${workers} ]]; then - echo "[SUCCESS] sds-replicated-volume is ready" - return 0 - fi - - echo "[WARNING] Not all pods are ready, linstor_node=${linstor_node}, csi_node=${csi_node}" - echo "[INFO] Waiting 10s for pods to be ready (attempt ${i}/${count})" - if (( i % 5 == 0 )); then - echo "[DEBUG] Get pods" - kubectl -n d8-sds-replicated-volume get pods || true - echo "[DEBUG] Show queue (first 25 lines)" - d8 s queue list | head -n 25 || echo "Failed to retrieve list queue" - echo " " - fi - sleep 10 - done - - echo "[ERROR] sds-replicated-volume is not ready after ${count} attempts" - echo "[DEBUG] Get pods" - echo "::group::sds-replicated-volume pods" - kubectl -n d8-sds-replicated-volume get pods || true - echo "::endgroup::" - echo "[DEBUG] Show queue" - echo "::group::Show queue" - d8 s queue list || echo "Failed to retrieve list queue" - echo "::endgroup::" - echo "[DEBUG] Show deckhouse logs" - echo "::group::deckhouse logs" - d8 s logs | tail -n 100 - echo "::endgroup::" - exit 1 - } - - blockdevices_ready() { - local count=60 - workers=$(kubectl get nodes -o name | grep worker | wc -l) - workers=$((workers)) - - if [[ $workers -eq 0 ]]; then - echo "[ERROR] No worker nodes found" - exit 1 - fi - - for i in $(seq 1 $count); do - blockdevices=$(kubectl get blockdevice -o name | wc -l || true) - if [ $blockdevices -ge $workers ]; then - echo "[SUCCESS] Blockdevices is greater or equal to $workers" - kubectl get blockdevice - return 0 - fi - - echo "[INFO] Wait 10 sec until blockdevices is greater or equal to $workers (attempt ${i}/${count})" - if (( i % 5 == 0 )); then - echo "[DEBUG] Show queue (first 25 lines)" - d8 s queue list | head -n25 || echo "No queues" - fi - - sleep 10 - done - - echo "[ERROR] Blockdevices is not 3" - echo "[DEBUG] Show cluster nodes" - kubectl get nodes - echo "[DEBUG] Show blockdevices" - kubectl get blockdevice - echo "[DEBUG] Show sds namespaces" - kubectl get ns | grep sds || echo "ns sds is not found" - echo "[DEBUG] Show pods in sds-replicated-volume" - echo "::group::pods in sds-replicated-volume" - kubectl -n d8-sds-replicated-volume get pods || true - echo "::endgroup::" - echo "[DEBUG] Show deckhouse logs" - echo "::group::deckhouse logs" - d8 s logs | tail -n 100 - echo "::endgroup::" - exit 1 - } - - d8_queue - - kubectl apply -f ../sds-node-configurator/mc.yaml - kubectl apply -f mc.yaml - echo "[INFO] Wait for sds-node-configurator" - kubectl wait --for=jsonpath='{.status.phase}'=Ready modules sds-node-configurator --timeout=300s - - echo "[INFO] Wait for sds-replicated-volume to be ready" - sds_replicated_ready - kubectl wait --for=jsonpath='{.status.phase}'=Ready modules sds-replicated-volume --timeout=300s - - echo "[INFO] Wait BlockDevice are ready" - blockdevices_ready - - echo "[INFO] Wait pods and webhooks sds-replicated pods" - sds_pods_ready - - chmod +x ../sds-node-configurator/lvg-gen.sh - ../sds-node-configurator/lvg-gen.sh - - echo "[INFO] Configure ReplicatedStorageClass and set default nested-thin-r1" - chmod +x rsc-gen.sh - ./rsc-gen.sh - - echo "[INFO] Show existing storageclasses" - if ! kubectl get storageclass | grep -q nested; then - echo "[WARNING] No nested storageclasses" - else - kubectl get storageclass | grep nested - echo "[SUCCESS] Done" - fi + run: bash "${E2E_SCRIPT_DIR}/configure-sds-replicated.sh" - name: Configure sds-local-volume if: ${{ inputs.storage_type == 'local' || inputs.storage_type == 'mixed' }} working-directory: ${{ env.SETUP_CLUSTER_TYPE_PATH }}/storage/sds-local-volume - run: | - d8_queue_list() { - d8 s queue list | grep -Po '([0-9]+)(?= active)' || echo "[WARNING] Failed to retrieve list queue" - } - - d8_queue() { - local count=90 - local queue_count - - for i in $(seq 1 $count) ; do - queue_count=$(d8_queue_list) - if [ -n "$queue_count" ] && [ "$queue_count" = "0" ]; then - echo "[SUCCESS] Queue is clear" - return 0 - fi - - echo "[INFO] Wait until queues are empty ${i}/${count}" - if (( i % 5 == 0 )); then - echo "[INFO] Show queue list" - d8 s queue list | head -n25 || echo "[WARNING] Failed to retrieve list queue" - echo " " - fi - - if (( i % 10 == 0 )); then - echo "[INFO] deckhouse logs" - echo "::group::deckhouse logs" - d8 s logs | tail -n 100 - echo "::endgroup::" - echo " " - fi - sleep 10 - done - } - - sds_local_volume_ready() { - local count=90 - local local_volume_status - local csi_node_desired - local csi_node_ready - local deploy_count - local controller_ready - - for i in $(seq 1 $count); do - local_volume_status=$(kubectl get modules sds-local-volume -o jsonpath='{.status.phase}' 2>/dev/null || echo "False") - csi_node_desired=$(kubectl -n d8-sds-local-volume get ds csi-node -o jsonpath='{.status.desiredNumberScheduled}' 2>/dev/null || echo "0") - csi_node_ready=$(kubectl -n d8-sds-local-volume get ds csi-node -o jsonpath='{.status.numberReady}' 2>/dev/null || echo "0") - deploy_count=$(kubectl -n d8-sds-local-volume get deploy -o name 2>/dev/null | wc -l | tr -d ' ') - controller_ready=false - - if [[ "${deploy_count}" -gt 0 ]] && kubectl -n d8-sds-local-volume wait --for=condition=Available deploy --all --timeout=10s >/dev/null 2>&1; then - controller_ready=true - fi - - if [[ "${local_volume_status}" == "Ready" && "${csi_node_desired}" -gt 0 && "${csi_node_ready}" -eq "${csi_node_desired}" && "${controller_ready}" == "true" ]]; then - echo "[SUCCESS] sds-local-volume is ready (module=${local_volume_status}, csi-node=${csi_node_ready}/${csi_node_desired}, deployments=${deploy_count})" - kubectl get modules sds-local-volume - kubectl -n d8-sds-local-volume get pods - return 0 - fi - - echo "[INFO] Waiting for sds-local-volume to be ready (attempt ${i}/${count})" - echo "[WARNING] Current state: module=${local_volume_status}, csi-node=${csi_node_ready}/${csi_node_desired}, deployments=${deploy_count}, controller_ready=${controller_ready}" - if (( i % 5 == 0 )); then - kubectl get ns d8-sds-local-volume || true - kubectl get modules sds-local-volume -o wide || true - kubectl -n d8-sds-local-volume get pods || true - kubectl -n d8-sds-local-volume get ds || true - kubectl -n d8-sds-local-volume get deploy || true - d8 s queue list | head -n 25 || true - fi - sleep 10 - done - - echo "[ERROR] sds-local-volume did not become ready in time" - kubectl get modules sds-local-volume -o wide || true - kubectl -n d8-sds-local-volume get pods || true - d8 s queue list || true - echo "::group::deckhouse logs" - d8 s logs | tail -n 100 - echo "::endgroup::" - exit 1 - } - - echo "[INFO] Apply sds-local-volume ModuleConfig" - kubectl apply -f mc.yaml - - echo "[INFO] Wait for sds-local-volume module queue" - d8_queue - kubectl wait --for=jsonpath='{.status.phase}'=Ready modules sds-local-volume --timeout=300s - sds_local_volume_ready - - chmod +x ./lsc-gen.sh - ./lsc-gen.sh - - echo "[INFO] Show resulting local storage classes" - kubectl get localstorageclass || true + run: bash "${E2E_SCRIPT_DIR}/configure-sds-local-volume.sh" - name: Configure NFS storage if: ${{ inputs.storage_type == 'nfs' || inputs.storage_type == 'mixed' }} @@ -1064,64 +430,8 @@ jobs: env: NAMESPACE: ${{ needs.bootstrap.outputs.namespace }} STORAGE_TYPE: ${{ inputs.storage_type }} - run: | - nfs_ready() { - local count=90 - local controller - local csi_controller - local csi_node_desired - local csi_node_ready - - for i in $(seq 1 $count); do - echo "[INFO] Check d8-csi-nfs pods (attempt ${i}/${count})" - controller=$(kubectl -n d8-csi-nfs get deploy controller -o jsonpath='{.status.readyReplicas}' 2>/dev/null || echo "0") - csi_controller=$(kubectl -n d8-csi-nfs get deploy csi-controller -o jsonpath='{.status.readyReplicas}' 2>/dev/null || echo "0") - csi_node_desired=$(kubectl -n d8-csi-nfs get ds csi-node -o jsonpath='{.status.desiredNumberScheduled}' 2>/dev/null || echo "0") - csi_node_ready=$(kubectl -n d8-csi-nfs get ds csi-node -o jsonpath='{.status.numberReady}' 2>/dev/null || echo "0") - - if [[ "$controller" -ge 1 && "$csi_controller" -ge 1 && "$csi_node_desired" -gt 0 && "$csi_node_ready" -eq "$csi_node_desired" ]]; then - echo "[SUCCESS] NFS CSI is ready (controller=${controller}, csi-controller=${csi_controller}, csi-node=${csi_node_ready}/${csi_node_desired})" - return 0 - fi - - echo "[WARNING] NFS CSI not ready: controller=${controller}, csi-controller=${csi_controller}, csi-node=${csi_node_ready}/${csi_node_desired}" - if (( i % 5 == 0 )); then - echo "[DEBUG] Pods in d8-csi-nfs:" - kubectl -n d8-csi-nfs get pods || echo "[WARNING] Failed to retrieve pods" - echo "[DEBUG] Deployments in d8-csi-nfs:" - kubectl -n d8-csi-nfs get deploy || echo "[WARNING] Failed to retrieve deployments" - echo "[DEBUG] DaemonSets in d8-csi-nfs:" - kubectl -n d8-csi-nfs get ds || echo "[WARNING] Failed to retrieve daemonsets" - echo "[DEBUG] csi-nfs module status:" - kubectl get modules csi-nfs -o wide || echo "[WARNING] Failed to retrieve module" - fi - sleep 10 - done - - echo "[ERROR] NFS CSI did not become ready in time" - kubectl -n d8-csi-nfs get pods || true - exit 1 - } - - echo "[INFO] Apply csi-nfs ModuleConfig, ModulePullOverride, snapshot-controller" - kubectl apply -f mc.yaml - - echo "[INFO] Wait for csi-nfs module to be ready" - kubectl wait --for=jsonpath='{.status.phase}'=Ready modules csi-nfs --timeout=300s - - echo "[INFO] Wait for csi-nfs pods to be ready" - nfs_ready - - echo "[INFO] Apply NFSStorageClass" - envsubst < storageclass.yaml | kubectl apply -f - - - if [[ "${STORAGE_TYPE}" != "mixed" ]]; then - echo "[INFO] Configure default storage class as ${STORAGE_TYPE}" - ./default-sc-configure.sh - fi - - echo "[INFO] Show existing storageclasses" - kubectl get storageclass + CONFIGURE_DEFAULT_SC: ${{ inputs.storage_type != 'mixed' }} + run: bash "${E2E_SCRIPT_DIR}/configure-csi-nfs.sh" configure-virtualization: name: Configure Virtualization (current-release) @@ -1131,241 +441,26 @@ jobs: - configure-storage steps: - uses: actions/checkout@v6 - - name: Install kubectl CLI - uses: azure/setup-kubectl@v4 - - name: Setup d8 - uses: ./.github/actions/install-d8 - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - - name: Check kubeconfig - run: | - echo "[INFO] Configure kube config" - mkdir -p ~/.kube - echo "${{ needs.bootstrap.outputs.kubeconfig }}" | base64 -d | base64 -d > ~/.kube/config - chmod 600 ~/.kube/config - kubectl config use-context nested-e2e-nested-sa + - name: Setup E2E toolchain + uses: ./.github/actions/setup-e2e-toolchain + with: + checkout: "false" + github-token: ${{ secrets.GITHUB_TOKEN }} + - name: Setup kubeconfig + uses: ./.github/actions/use-nested-kubeconfig + with: + kubeconfig: ${{ needs.bootstrap.outputs.kubeconfig }} + check-api: "false" - name: Configure Virtualization - run: | - REGISTRY=$(base64 -d <<< "${{secrets.DEV_REGISTRY_DOCKER_CFG}}" | jq '.auths | to_entries | .[] | .key' -r) - - echo "[INFO] Apply ModuleSource prod config" - kubectl apply -f -< ~/.kube/config - chmod 600 ~/.kube/config - kubectl config use-context nested-e2e-nested-sa - + uses: ./.github/actions/use-nested-kubeconfig + with: + kubeconfig: ${{ needs.bootstrap.outputs.kubeconfig }} + check-api: "false" - name: Install ginkgo working-directory: ./test/e2e/ run: | @@ -1422,44 +513,18 @@ jobs: go mod download - name: Create vmclass for release e2e tests - run: | - if ! (kubectl get vmclass generic-for-e2e 2>/dev/null); then - kubectl get vmclass/generic -o json | jq 'del(.status) | del(.metadata) | .metadata = {"name":"generic-for-e2e","annotations":{"virtualmachineclass.virtualization.deckhouse.io/is-default-class":"true"}} ' | kubectl create -f - - fi - - echo "[INFO] Showing existing vmclasses" - kubectl get vmclass + run: bash "${E2E_SCRIPT_DIR}/create-e2e-vmclass.sh" - name: "Run E2E tests on current-release" id: release-e2e env: + CURRENT_RELEASE: ${{ env.CURRENT_RELEASE }} CSI: ${{ inputs.storage_type }} STORAGE_CLASS_NAME: ${{ inputs.nested_storageclass_name }} E2E_CONFIG: ${{ github.workspace }}/test/e2e/default_config.yaml RELEASE_TEST_PHASE: pre-upgrade RELEASE_UPGRADE_CONTEXT_PATH: ${{ runner.temp }}/release-upgrade-context.json - run: | - echo "[INFO] Current release tag: ${{ env.CURRENT_RELEASE }}" - echo "[INFO] Storage type: ${{ inputs.storage_type }}" - echo "" - echo "[INFO] Verifying virtualization module is running" - kubectl get modules virtualization - kubectl get mpo virtualization - echo "" - echo "[INFO] Running dedicated release suite" - echo "[INFO] Resources will be intentionally left in the cluster for the upgrade test" - cd ./test/e2e/ - GINKGO_RESULT=$(mktemp -p "$RUNNER_TEMP") - junit_report="$GITHUB_WORKSPACE/test/e2e/release_current_suite.xml" - set +e - go tool ginkgo \ - -v --race --timeout=45m \ - --junit-report="$junit_report" \ - ./release | tee "$GINKGO_RESULT" - GINKGO_EXIT_CODE=$? - set -e - echo "[INFO] Exit code: $GINKGO_EXIT_CODE" - exit $GINKGO_EXIT_CODE + run: bash "${E2E_SCRIPT_DIR}/run-release-e2e.sh" - name: Export release upgrade context id: export-release-context @@ -1470,16 +535,6 @@ jobs: cat "${context_path}" echo "release_namespace=$(jq -r '.namespace' "${context_path}")" >> "$GITHUB_OUTPUT" - - name: Upload current-release test results - uses: actions/upload-artifact@v7 - if: always() && steps.release-e2e.outcome != 'skipped' - with: - name: current-release-e2e-results-${{ github.run_id }} - path: test/e2e/release_current_suite.xml - if-no-files-found: ignore - overwrite: true - retention-days: 3 - - name: Upload resources from failed current-release tests uses: actions/upload-artifact@v7 if: always() && steps.release-e2e.outcome != 'skipped' @@ -1500,20 +555,17 @@ jobs: steps: - uses: actions/checkout@v6 - - name: Install kubectl CLI - uses: azure/setup-kubectl@v4 - - - name: Setup d8 - uses: ./.github/actions/install-d8 - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + - name: Setup E2E toolchain + uses: ./.github/actions/setup-e2e-toolchain + with: + checkout: "false" + github-token: ${{ secrets.GITHUB_TOKEN }} - name: Setup kubeconfig - run: | - mkdir -p ~/.kube - echo "${{ needs.bootstrap.outputs.kubeconfig }}" | base64 -d | base64 -d > ~/.kube/config - chmod 600 ~/.kube/config - + uses: ./.github/actions/use-nested-kubeconfig + with: + kubeconfig: ${{ needs.bootstrap.outputs.kubeconfig }} + check-api: "false" - name: Show current MPO state run: | echo "[INFO] Current ModulePullOverride before patching:" @@ -1538,86 +590,10 @@ jobs: registry_password: ${{ secrets.DEV_MODULES_REGISTRY_PASSWORD }} - name: Verify image digests in pods after upgrade - run: | - MODULE_IMAGE="${{ vars.DEV_MODULE_SOURCE }}/virtualization:${{ env.NEW_RELEASE }}" - echo "[INFO] Extracting images_digests.json from virtualization:${{ env.NEW_RELEASE }}" - images_hash=$(crane export "${MODULE_IMAGE}" - | tar -Oxf - images_digests.json) - echo "[INFO] Expected image digests:" - echo "::group::images_digests.json" - echo "$images_hash" | jq . - echo "::endgroup::" - - audit_status=$(kubectl get mc virtualization -o=jsonpath='{.spec.settings.audit.enabled}' 2>/dev/null || true) - audit_image_skip="true" - if [ -n "$audit_status" ] && [ "$audit_status" == "true" ]; then - audit_image_skip="false" - fi - - SKIP_IMAGES=() - if [ "$audit_image_skip" == "true" ]; then - SKIP_IMAGES+=("virtualizationAudit") - fi - SKIP_IMAGES+=("virtualizationDraUsb") - - is_skipped_image() { - local img="$1" - if [ ${#img} -eq 0 ]; then return 1; fi - for skip in "${SKIP_IMAGES[@]}"; do - if [[ "$img" == "$skip" ]]; then - return 0 - fi - done - return 1 - } - - retry_count=0 - max_retries=120 - sleep_interval=5 - - while true; do - all_hashes_found=true - - v12n_pods=$(kubectl -n d8-virtualization get pods -o json | jq -c) - - while IFS= read -r image_entry; do - image=$(echo "$image_entry" | jq -r '.key') - hash=$(echo "$image_entry" | jq -r '.value') - - if [[ "${image,,}" =~ (libguestfs|predeletehook) ]]; then - continue - fi - - if is_skipped_image "$image"; then - echo "- SKIP $image" - continue - fi - - if echo "$v12n_pods" | grep -q "$hash"; then - echo "- OK $image $hash" - else - echo "- MISS $image $hash" - all_hashes_found=false - fi - done < <(echo "$images_hash" | jq -c '. | to_entries | sort_by(.key)[]') - - if [ "$all_hashes_found" = true ]; then - echo "[SUCCESS] All image hashes found in pods after upgrade to ${{ env.NEW_RELEASE }}" - break - fi - - retry_count=$((retry_count + 1)) - echo "[INFO] Some hashes are missing, rechecking... Attempt: ${retry_count}/${max_retries}" - - if [ "$retry_count" -ge "$max_retries" ]; then - echo "[ERROR] Timeout reached after $((retry_count * sleep_interval))s. Some image hashes are still missing." - echo "::group::pods in d8-virtualization" - kubectl -n d8-virtualization get pods -o wide || true - echo "::endgroup::" - exit 1 - fi - - sleep "$sleep_interval" - done + env: + DEV_MODULE_SOURCE: ${{ vars.DEV_MODULE_SOURCE }} + NEW_RELEASE: ${{ env.NEW_RELEASE }} + run: bash "${E2E_SCRIPT_DIR}/verify-image-digests.sh" - name: Show ModulePullOverride state after upgrade run: | @@ -1636,13 +612,11 @@ jobs: steps: - uses: actions/checkout@v6 - - name: Install kubectl CLI - uses: azure/setup-kubectl@v4 - - - name: Setup d8 - uses: ./.github/actions/install-d8 - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + - name: Setup E2E toolchain + uses: ./.github/actions/setup-e2e-toolchain + with: + checkout: "false" + github-token: ${{ secrets.GITHUB_TOKEN }} - name: Setup Go uses: actions/setup-go@v5 @@ -1662,31 +636,17 @@ jobs: go mod download - name: Setup kubeconfig - run: | - mkdir -p ~/.kube - echo "${{ needs.bootstrap.outputs.kubeconfig }}" | base64 -d | base64 -d > ~/.kube/config - chmod 600 ~/.kube/config - kubectl config use-context nested-e2e-nested-sa - + uses: ./.github/actions/use-nested-kubeconfig + with: + kubeconfig: ${{ needs.bootstrap.outputs.kubeconfig }} + check-api: "false" - name: "Run E2E tests on new-release" env: + NEW_RELEASE: ${{ env.NEW_RELEASE }} CSI: ${{ inputs.storage_type }} STORAGE_CLASS_NAME: ${{ inputs.nested_storageclass_name }} E2E_CONFIG: ${{ github.workspace }}/test/e2e/default_config.yaml RELEASE_TEST_PHASE: post-upgrade RELEASE_NAMESPACE: ${{ needs.test-current-release.outputs.release_namespace }} RELEASE_UPGRADE_STARTED_AT: ${{ needs.patch-modulepulloverride.outputs.upgrade_started_at }} - run: | - echo "[INFO] New release tag: ${{ env.NEW_RELEASE }}" - echo "[INFO] Storage type: ${{ inputs.storage_type }}" - echo "" - echo "[INFO] Verifying virtualization module is running with new release" - kubectl get modules virtualization || true - kubectl get mpo virtualization || true - echo "" - echo "[INFO] Reusing namespace: ${RELEASE_NAMESPACE}" - cd ./test/e2e/ - go tool ginkgo \ - -v --race --timeout=45m \ - ./release - echo "[INFO] Cluster is intentionally left running (no cleanup)" + run: bash "${E2E_SCRIPT_DIR}/run-release-e2e.sh" diff --git a/.github/workflows/e2e-test-releases.yml b/.github/workflows/e2e-test-releases.yml index 63aa7b7f2a..50d7acc5c7 100644 --- a/.github/workflows/e2e-test-releases.yml +++ b/.github/workflows/e2e-test-releases.yml @@ -46,14 +46,14 @@ jobs: name: Set vars runs-on: ubuntu-latest outputs: - date_start: ${{ steps.vars.outputs.date-start }} + date_start: ${{ steps.vars.outputs.date_start }} randuuid4c: ${{ steps.vars.outputs.randuuid4c }} steps: + - uses: actions/checkout@v6 + - name: Set vars id: vars - run: | - echo "date-start=$(date +%Y%m%d-%H%M%S)" >> $GITHUB_OUTPUT - echo "randuuid4c=$(openssl rand -hex 2)" >> $GITHUB_OUTPUT + uses: ./.github/actions/gen-run-id resolve-release-inputs: name: Resolve release inputs @@ -167,7 +167,7 @@ jobs: steps: - name: Setup Docker config run: | - echo "DOCKER_CONFIG=$(mktemp -d)" >> $GITHUB_ENV + echo "DOCKER_CONFIG=$(mktemp -d)" >> "$GITHUB_ENV" - name: Print vars run: | @@ -176,7 +176,7 @@ jobs: echo MODULES_MODULE_NAME=${{ vars.MODULE_NAME }} echo MODULES_MODULE_TAG=${{ matrix.module_tag }} echo CHECKOUT_REF=${{ matrix.checkout_ref }} - echo DOCKER_CONFIG=$DOCKER_CONFIG + echo "DOCKER_CONFIG=$DOCKER_CONFIG" - uses: actions/checkout@v4 with: @@ -222,6 +222,7 @@ jobs: date_start: ${{ needs.set-vars.outputs.date_start }} randuuid4c: ${{ needs.set-vars.outputs.randuuid4c }} cluster_config_workers_memory: "9Gi" + cluster_config_additional_disk_size: "250Gi" cluster_config_k8s_version: "1.34" secrets: DEV_REGISTRY_DOCKER_CFG: ${{ secrets.DEV_REGISTRY_DOCKER_CFG }} diff --git a/.shellcheckrc b/.shellcheckrc new file mode 100644 index 0000000000..72c1748832 --- /dev/null +++ b/.shellcheckrc @@ -0,0 +1,3 @@ +shell=bash +external-sources=true +enable=quote-safe-variables,deprecate-which,check-unassigned-uppercase diff --git a/Taskfile.yaml b/Taskfile.yaml index e698b063ed..9b1611c071 100644 --- a/Taskfile.yaml +++ b/Taskfile.yaml @@ -163,9 +163,38 @@ tasks: cmds: - task: lint:doc-ru - task: lint:prettier:yaml + - task: lint:shellcheck + - task: lint:actionlint - task: virtualization-controller:dvcr:lint - task: virtualization-controller:lint + lint:shellcheck: + desc: "Run shellcheck for CI shell scripts." + cmds: + - | + docker run --rm \ + -v "$PWD:/mnt" \ + -w /mnt \ + koalaman/shellcheck-alpine:v0.10.0 \ + shellcheck \ + .github/scripts/bash/e2e/*.sh \ + api/scripts/update-codegen.sh \ + images/virtualization-artifact/hack/args.sh \ + images/virtualization-artifact/hack/dlv.sh \ + images/virtualization-artifact/hack/pyroscope.sh + + lint:actionlint: + desc: "Run actionlint for E2E GitHub workflows." + cmds: + - | + docker run --rm \ + -v "$PWD:/repo" \ + -w /repo \ + rhysd/actionlint:1.7.7 \ + -color \ + -shellcheck= \ + .github/workflows/e2e*.yml + lint:doc-ru: desc: "Check the correspondence between description fields in the original crd and the Russian language version" cmds: diff --git a/api/scripts/update-codegen.sh b/api/scripts/update-codegen.sh index fccf4f01da..b07d310fc5 100755 --- a/api/scripts/update-codegen.sh +++ b/api/scripts/update-codegen.sh @@ -43,6 +43,7 @@ function source::settings { "NodeUSBDevice" "USBDevice") + # shellcheck source=/dev/null source "${CODEGEN_PKG}/kube_codegen.sh" } @@ -86,7 +87,7 @@ function generate::crds { if ! [[ " ${ALLOWED_RESOURCE_GEN_CRD[*]} " =~ [[:space:]]$(cat "$file" | yq '.spec.names.kind')[[:space:]] ]]; then continue fi - cp "$file" "${ROOT}/crds/$(echo $file | awk -Fio_ '{print $2}')" + cp "$file" "${ROOT}/crds/$(echo "$file" | awk -Fio_ '{print $2}')" done } diff --git a/images/virtualization-artifact/hack/args.sh b/images/virtualization-artifact/hack/args.sh index 481043edc2..98c08cfa03 100644 --- a/images/virtualization-artifact/hack/args.sh +++ b/images/virtualization-artifact/hack/args.sh @@ -36,7 +36,7 @@ function parse_flag() { local DEFAULT="${3:-}" local RESULT="" - for f in ${FLAGS[*]}; do + for f in "${FLAGS[@]}"; do case "${f}" in --${NAME}=*|-${SHORT_NAME}=*) RESULT="${f#*=}" diff --git a/images/virtualization-artifact/hack/dlv.sh b/images/virtualization-artifact/hack/dlv.sh index 9881789165..665a341911 100755 --- a/images/virtualization-artifact/hack/dlv.sh +++ b/images/virtualization-artifact/hack/dlv.sh @@ -109,6 +109,7 @@ DIR="$(dirname "$0")" ROOT="${DIR}/../../../" cd "$ROOT" +# shellcheck source=images/virtualization-artifact/hack/args.sh source "${DIR}/args.sh" set_flags_args "$@" diff --git a/images/virtualization-artifact/hack/pyroscope.sh b/images/virtualization-artifact/hack/pyroscope.sh index 80028a3428..af5e1b3e5c 100755 --- a/images/virtualization-artifact/hack/pyroscope.sh +++ b/images/virtualization-artifact/hack/pyroscope.sh @@ -88,6 +88,7 @@ function stop-pyroscope() { docker compose -f "${DOCKER_COMPOSE_FILE_PYROSCOPE_ONLY}" down } +# shellcheck source=images/virtualization-artifact/hack/args.sh source "${DIR}/args.sh" set_flags_args "$@" diff --git a/test/dvp-static-cluster/Taskfile.yaml b/test/dvp-static-cluster/Taskfile.yaml index 8b13c6e7f1..5fadae2635 100644 --- a/test/dvp-static-cluster/Taskfile.yaml +++ b/test/dvp-static-cluster/Taskfile.yaml @@ -94,19 +94,6 @@ tasks: cmds: - helm template static-dvp-over-dvp-infra ./charts/infra -f values.yaml -f {{ .DISCOVERED_VALUES_FILE }} >> {{ .TMP_DIR }}/infra.yaml - # Temporary workaround (TODO: remove afte fixing the issue). - # This task is needed to fix the issue with cluster migration while setting up and running tests. - infra-pin-vms: - desc: Pin VMs to nodes - cmds: - - | - for vm in $(kubectl -n {{ .NAMESPACE }} get vm -o name); do - echo "Getting VM $vm node" - pinNode=$(kubectl -n {{ .NAMESPACE }} get vm $vm jsonpath='{.status.nodeName}') - echo "Pinning VM $vm to node $pinNode" - kubectl -n {{ .NAMESPACE }} patch vm $vm --type merge --patch '{"spec":{"nodeSelector":{"kubernetes.io/hostname":"'${pinNode}'"}}}' - done - infra-deploy: deps: - render-infra diff --git a/test/dvp-static-cluster/values.yaml.tmpl b/test/dvp-static-cluster/values.yaml.tmpl new file mode 100644 index 0000000000..f267eee50b --- /dev/null +++ b/test/dvp-static-cluster/values.yaml.tmpl @@ -0,0 +1,51 @@ +namespace: ${NAMESPACE} +storageType: ${STORAGE_TYPE} +storageClass: ${DEFAULT_STORAGE_CLASS} +sa: dkp-sa +enabledModules: [${ENABLED_MODULES}] +deckhouse: + channel: ${DECKHOUSE_CHANNEL} + podSubnetCIDR: ${POD_SUBNET_CIDR} + serviceSubnetCIDR: ${SERVICE_SUBNET_CIDR} + kubernetesVersion: ${K8S_VERSION} + registryDockerCfg: ${PROD_IO_REGISTRY_DOCKER_CFG} + bundle: Default + proxyEnabled: false +image: + url: ${VIRTUALIZATION_IMAGE_URL} + defaultUser: ${DEFAULT_USER} + bootloader: BIOS +ingressHosts: + - api + - grafana + - dex + - prometheus + - console + - virtualization +instances: + aptMirror: + enabled: ${APT_MIRROR_ENABLED} + name: ${APT_MIRROR_NAME} + url: ${APT_MIRROR_URL} + masterNodes: + count: 1 + cfg: + rootDiskSize: 60Gi + cpu: + cores: 4 + coreFraction: 50% + memory: + size: 12Gi + additionalNodes: + - name: worker + count: 3 + cfg: + cpu: + cores: 6 + coreFraction: 50% + memory: + size: ${CLUSTER_CONFIG_WORKERS_MEMORY} + additionalDisks: + - size: ${ADDITIONAL_DISK_SIZE} + networkConfig: + clusterNetworkName: ${NESTED_CLUSTER_NETWORK_NAME} diff --git a/test/e2e/Taskfile.yaml b/test/e2e/Taskfile.yaml index ba503e6e96..43c54742bd 100644 --- a/test/e2e/Taskfile.yaml +++ b/test/e2e/Taskfile.yaml @@ -33,16 +33,13 @@ tasks: fi run:ci: - desc: "Separate task to run e2e tests in the CI environment" - env: - FOCUS: "VirtualMachineAdditionalNetworkInterfaces" + desc: "Run e2e tests in GitHub Actions" deps: - copy - kubectl - d8 - - precheck:prepare cmds: - - ./scripts/task_run_ci.sh + - bash ./scripts/e2e-ci.sh precheck:prepare: desc: "Generate JSON report via ginkgo dry-run for precheck preparation" diff --git a/test/e2e/scripts/e2e-ci.sh b/test/e2e/scripts/e2e-ci.sh new file mode 100644 index 0000000000..6a0856717a --- /dev/null +++ b/test/e2e/scripts/e2e-ci.sh @@ -0,0 +1,57 @@ +#!/usr/bin/env bash + +# Copyright 2026 Flant JSC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -Eeuo pipefail + +TIMEOUT="${TIMEOUT:-3h}" +FOCUS="${FOCUS:-}" +LABELS="${LABELS:-}" +CSI="${CSI:-unknown}" + +date_tag="$(date +"%Y-%m-%d")" +e2e_report_file="e2e_report_${CSI}_${date_tag}.json" +e2e_output_file="e2e_output_${CSI}_${date_tag}.log" + +echo "[INFO] Kubernetes server version: ${SERVER_K8S_VERSION:-unknown}" +echo "[INFO] USB E2E supported: ${USB_SUPPORTED:-unknown}" +if [ -n "${LABELS}" ]; then + echo "[INFO] Applying Ginkgo label filter: ${LABELS}" +fi + +./scripts/precheck-prepare_ci.sh + +set +e +ginkgo_args=( + -v + --race + --timeout="${TIMEOUT}" + --json-report="${e2e_report_file}" +) + +if [ -n "${LABELS}" ]; then + ginkgo_args+=(--label-filter="${LABELS}") +fi + +if [ -n "${FOCUS}" ]; then + ginkgo_args+=(--focus="${FOCUS}") +fi + +go tool ginkgo "${ginkgo_args[@]}" . 2>&1 | tee "${e2e_output_file}" +ginkgo_exit_code="${PIPESTATUS[0]}" +set -e + +echo "[INFO] Exit code: ${ginkgo_exit_code}" +exit "${ginkgo_exit_code}" diff --git a/test/e2e/scripts/task_run_ci.sh b/test/e2e/scripts/task_run_ci.sh deleted file mode 100755 index 33e05b0bdb..0000000000 --- a/test/e2e/scripts/task_run_ci.sh +++ /dev/null @@ -1,71 +0,0 @@ -#!/usr/bin/env bash - -# Copyright 2025 Flant JSC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -GINKGO_RESULT=$(mktemp) - -DATE=$(date +"%Y-%m-%d") -echo "DATE=$DATE" >> $GITHUB_ENV -START_TIME=$(date +"%H:%M:%S") -echo "START_TIME=$START_TIME" >> $GITHUB_ENV - -go tool ginkgo -v \ - --race \ - --focus=$FOCUS \ - --timeout=$TIMEOUT | tee $GINKGO_RESULT -EXIT_CODE="${PIPESTATUS[0]}" -RESULT=$(sed -e "s/\x1b\[[0-9;]*m//g" $GINKGO_RESULT | grep --color=never -E "FAIL!|SUCCESS!") -if [[ $RESULT == FAIL!* || $EXIT_CODE -ne "0" ]]; then - RESULT_STATUS=":x: FAIL!" -elif [[ $RESULT == SUCCESS!* ]]; then - RESULT_STATUS=":white_check_mark: SUCCESS!" -else - RESULT_STATUS=":question: UNKNOWN" - EXIT_CODE=1 -fi - -PASSED=$(echo "$RESULT" | grep -oP "\d+(?= Passed)") -FAILED=$(echo "$RESULT" | grep -oP "\d+(?= Failed)") -PENDING=$(echo "$RESULT" | grep -oP "\d+(?= Pending)") -SKIPPED=$(echo "$RESULT" | grep -oP "\d+(?= Skipped)") - -SUMMARY=$(jq -n \ - --arg csi "$CSI" \ - --arg date "$DATE" \ - --arg startTime "$START_TIME" \ - --arg branch "$GITHUB_REF_NAME" \ - --arg status "$RESULT_STATUS" \ - --argjson passed "$PASSED" \ - --argjson failed "$FAILED" \ - --argjson pending "$PENDING" \ - --argjson skipped "$SKIPPED" \ - --arg link "$GITHUB_SERVER_URL/$GITHUB_REPOSITORY/actions/runs/$GITHUB_RUN_ID" \ - '{ - CSI: $csi, - Date: $date, - StartTime: $startTime, - Branch: $branch, - Status: $status, - Passed: $passed, - Failed: $failed, - Pending: $pending, - Skipped: $skipped, - Link: $link - }' -) - -echo "$SUMMARY" -echo "SUMMARY=$(echo "$SUMMARY" | jq -c .)" >> $GITHUB_ENV -exit $EXIT_CODE