From 5f1edc2ea49b6f107c55faeaa630bae63fe749f0 Mon Sep 17 00:00:00 2001 From: Alexey Yakubov Date: Wed, 3 Jun 2026 17:34:01 +0400 Subject: [PATCH 1/8] unit-tests and CI pipeline added. Initial commit. --- .github/workflows/unit-tests.yml | 81 +++ .gitignore | 3 + CLAUDE.md | 88 +++ Makefile | 52 ++ docs/TESTS_IMPLEMENTATION_PLAN.md | 300 ++++++++ docs/WORKLOG.md | 10 + internal/config/types_yaml_test.go | 247 +++++++ .../kubernetes/commander/client_http_test.go | 683 ++++++++++++++++++ internal/kubernetes/commander/client_test.go | 176 +++++ internal/logger/level_test.go | 70 ++ pkg/cluster/vms_test.go | 257 +++++++ pkg/kubernetes/apply_test.go | 128 ++++ pkg/kubernetes/modules_test.go | 280 +++++++ pkg/kubernetes/poll_test.go | 102 +++ pkg/retry/retry_test.go | 408 +++++++++++ pkg/testkit/stress_tests_test.go | 276 +++++++ 16 files changed, 3161 insertions(+) create mode 100644 .github/workflows/unit-tests.yml create mode 100644 .gitignore create mode 100644 CLAUDE.md create mode 100644 Makefile create mode 100644 docs/TESTS_IMPLEMENTATION_PLAN.md create mode 100644 internal/config/types_yaml_test.go create mode 100644 internal/kubernetes/commander/client_http_test.go create mode 100644 internal/kubernetes/commander/client_test.go create mode 100644 internal/logger/level_test.go create mode 100644 pkg/cluster/vms_test.go create mode 100644 pkg/kubernetes/apply_test.go create mode 100644 pkg/kubernetes/modules_test.go create mode 100644 pkg/kubernetes/poll_test.go create mode 100644 pkg/retry/retry_test.go create mode 100644 pkg/testkit/stress_tests_test.go diff --git a/.github/workflows/unit-tests.yml b/.github/workflows/unit-tests.yml new file mode 100644 index 0000000..1a0d70d --- /dev/null +++ b/.github/workflows/unit-tests.yml @@ -0,0 +1,81 @@ +# Copyright 2026 Flant JSC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +name: Unit Tests + +# Triggers: +# push: every push to any branch — gives feature branches a green/red +# signal without needing an open PR. +# pull_request: PRs targeting main — required so the check is also evaluated +# for fork PRs (whose push events do not run in the upstream repo). +on: + push: {} + pull_request: + branches: [main] + types: [opened, synchronize, reopened] + +permissions: + contents: read + +# Cancel in-flight runs for the same ref when a new commit lands. Each push +# trigger then evaluates only the latest SHA, which keeps CI minutes bounded +# on rapid-fire force-pushes. +concurrency: + group: unit-tests-${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +jobs: + unit-tests: + name: Build, vet & unit test + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - uses: actions/setup-go@v5 + with: + go-version-file: go.mod + cache: true + + - name: Download modules + run: go mod download + + # Compile everything (including the e2e suites under ./tests/...) to catch + # refactor breakage in code that the unit-tests package set deliberately + # does not execute. + - name: Build + run: go build ./... + + - name: Vet + run: go vet ./... + + # Unit tests run with -race (catches data races in retry.Do and module + # configuration goroutines) and -shuffle=on (surfaces ordering bugs in + # tests that touch package-level env vars in internal/config). + # We restrict the set to ./internal/... ./pkg/... so the e2e suites in + # ./tests/... do not execute (they require real VMs/clusters/SSH). + - name: Unit tests (race + coverage) + run: | + go test -race -shuffle=on -covermode=atomic \ + -coverprofile=coverage.out \ + ./internal/... ./pkg/... + echo "Total coverage:" + go tool cover -func=coverage.out | tail -1 + + - name: Upload coverage artifact + if: always() + uses: actions/upload-artifact@v4 + with: + name: coverage + path: coverage.out + if-no-files-found: warn diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..7eb8131 --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +# Coverage artifacts emitted by `make cover` / unit-tests CI job. +coverage.out +coverage.html diff --git a/CLAUDE.md b/CLAUDE.md new file mode 100644 index 0000000..a927383 --- /dev/null +++ b/CLAUDE.md @@ -0,0 +1,88 @@ +# CLAUDE.md + +Guidance for working in this repository. Read the linked docs in `docs/` before making +non-trivial changes. + +## What this is + +`github.com/deckhouse/storage-e2e` — an end-to-end test framework for Deckhouse storage +components. It provisions/manages test clusters (VMs via virtualization, existing clusters, +or Deckhouse Commander), wires up storage modules, and runs Ginkgo-based e2e suites. + +**It is imported as a library by other repos** (e.g. `csi-ceph`), so the `pkg/` public API +must stay backward compatible. + +- Module path: `github.com/deckhouse/storage-e2e`, Go `1.26`. +- Layers (top → bottom): `tests/` → `pkg/testkit` → `internal/` domain logic → + `internal/infrastructure` → `k8s client-go`. See ARCHITECTURE for the full tree. + +## Documentation map (`docs/`) + +| Doc | Purpose | +|---|---| +| [docs/ARCHITECTURE.md](docs/ARCHITECTURE.md) | Package structure, layers, components, env-var reference. **Source of truth for layout.** | +| [docs/FUNCTIONS_GLOSSARY.md](docs/FUNCTIONS_GLOSSARY.md) | All exported `pkg/` functions, grouped by resource. Check before adding new exported functions. | +| [docs/TODO.md](docs/TODO.md) | Global TODO list (managed via the `/todo` convention below). | +| [docs/WORKLOG.md](docs/WORKLOG.md) | Dated change log. Append an entry on every code change. | +| [docs/TESTS_IMPLEMENTATION_PLAN.md](docs/TESTS_IMPLEMENTATION_PLAN.md) | Plan for unit-test coverage and the CI/CD pipeline (build/vet/test, merge-blocking). | +| [docs/TESTING_STRATEGY.md](docs/TESTING_STRATEGY.md) | *(to be created)* Testing conventions, unit-vs-e2e boundaries, fakes/httptest patterns, coverage policy. | + +Also useful: [README.md](README.md) (quick start, env vars, running suites), +[internal/README.md](internal/README.md), [internal/logger/README.md](internal/logger/README.md). + +## Build, vet & test + +```bash +go build ./... # compiles everything (incl. e2e suites) +go vet ./... +go test -race -shuffle=on ./internal/... ./pkg/... # unit tests only — no cluster needed +``` + +- **Unit tests** live next to sources under `internal/` and `pkg/`; they are hermetic + (no SSH, no cluster, no network — use `httptest` and `client-go` fakes). +- **E2E suites** live under `tests/` and require real VMs/clusters + env vars + (`SSH_HOST`, `DKP_LICENSE_KEY`, `TEST_CLUSTER_CREATE_MODE`, …). **Do not run them in + unit/CI flows.** Run one explicitly, e.g.: + `go test -timeout=240m -v ./tests/ -count=1`. +- New tests/CI conventions are described in TESTS_IMPLEMENTATION_PLAN.md (and, once it + exists, TESTING_STRATEGY.md). + +## Project conventions (must follow) + +These mirror the rules in `.cursor/rules/` — apply them whether or not you use Cursor. + +1. **Backward compatibility (`pkg/`).** Removing/renaming an exported symbol, changing a + signature, or changing observed behavior is a breaking change. **Ask the user first**, + prefer adding alongside (deprecate, don't delete), and tag the WORKLOG entry with + `[Possible compatibility break]`. + +2. **Functions glossary.** Before adding a new exported `pkg/` function, search + [docs/FUNCTIONS_GLOSSARY.md](docs/FUNCTIONS_GLOSSARY.md) for an existing one. Reuse/extend + if it exists; otherwise add the new function to the glossary in the matching section. + +3. **Versatile functions.** `pkg/` functions must be general-purpose: return data not + decisions, no hardcoded names/indices, accept broad inputs (`context`, `rest.Config`) + and return concrete K8s objects, no empty wrappers. + +4. **Architecture sync.** On structural changes (add/remove/rename a package or file, move + between layers, new cluster mode, new env var, changed core types), update the affected + sections of [docs/ARCHITECTURE.md](docs/ARCHITECTURE.md). Only touch affected sections. + +5. **Work log.** After any change to `*.go`, `*.yml`, `*.yaml`, `*.tpl`, or `*.sh`, append a + one-line entry to [docs/WORKLOG.md](docs/WORKLOG.md) under today's `## YYYY-MM-DD` + heading: `- **Type** description` (Type ∈ Add/Remove/Rename/Refactor/Bugfix/Update/Move). + Append only; never edit previous days' entries. + +6. **`/todo` command.** When a message starts with `/todo`, manage items in + [docs/TODO.md](docs/TODO.md) (Add / Remove / Check|List / Done), then record a WORKLOG entry. + +7. **Commits/PRs.** Do not add AI co-author trailers (no `Co-Authored-By: Claude` / + "Generated with Claude"). + +## Creating new e2e tests + +Use the template + script (don't hand-roll suites, and don't edit `tests/test-template`): + +```bash +cd tests && ./create-test.sh +``` diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..8cf347a --- /dev/null +++ b/Makefile @@ -0,0 +1,52 @@ +# Copyright 2026 Flant JSC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Convenience targets for local development. CI runs the same commands so +# `make test` locally is equivalent to the CI unit-test job. +# +# Unit tests deliberately exclude ./tests/... (e2e suites that require real +# VMs/clusters/SSH). Use `make e2e` for hints on running individual suites. + +UNIT_PKGS := ./internal/... ./pkg/... + +.PHONY: help build vet test cover e2e clean + +help: ## Print this help. + @awk 'BEGIN {FS = ":.*##"; printf "Targets:\n"} \ + /^[a-zA-Z0-9_-]+:.*##/ {printf " \033[36m%-10s\033[0m %s\n", $$1, $$2}' \ + $(MAKEFILE_LIST) + +build: ## Compile every package (incl. e2e suites for refactor-breakage check). + go build ./... + +vet: ## Run go vet on every package. + go vet ./... + +test: ## Unit tests with -race -shuffle=on (no cluster needed). + go test -race -shuffle=on $(UNIT_PKGS) + +cover: ## Unit tests + coverage profile + total %. + go test -race -shuffle=on -covermode=atomic \ + -coverprofile=coverage.out $(UNIT_PKGS) + @echo "Total coverage:" + @go tool cover -func=coverage.out | tail -1 + +e2e: ## Hints for running an individual e2e suite (requires VMs/cluster). + @echo "E2E suites require real infra. Examples:" + @echo " go test -timeout=240m -v ./tests/test-template -count=1" + @echo " go test -timeout=240m -v ./tests/csi-all-stress-tests -count=1" + @echo "See README.md for required env vars (SSH_HOST, DKP_LICENSE_KEY, ...)." + +clean: ## Remove build / coverage artifacts. + rm -f coverage.out diff --git a/docs/TESTS_IMPLEMENTATION_PLAN.md b/docs/TESTS_IMPLEMENTATION_PLAN.md new file mode 100644 index 0000000..d00c811 --- /dev/null +++ b/docs/TESTS_IMPLEMENTATION_PLAN.md @@ -0,0 +1,300 @@ +# Unit Testing & CI/CD Implementation Plan + +> Status: **Wave 1 + Wave 2 landed, CI workflow active.** Remaining work: +> Wave 3 fake-client refactors (optional), enable branch protection so the +> "Build, vet & unit test" check becomes required on `main`. This document +> stays as the source of truth for *what* is covered, *how*, and how CI enforces it. + +## 1. Goals + +1. Cover the **deterministic core logic** of the project (`pkg/` + `internal/`) with fast, + hermetic unit tests that need **no SSH access, no Kubernetes cluster, and no network**. +2. Add a **GitHub Actions pipeline** that builds, vets, and runs the unit tests on every + push and pull request. +3. Make the test job a **required status check** so a PR cannot be merged into `main` + while tests are red. + +### Non-goals + +- We are **not** trying to unit-test the end-to-end suites under `tests/` — those + legitimately require real VMs/clusters and stay as manual/scheduled e2e runs. +- We are **not** aiming for 100% line coverage. The realistic target is high coverage of + the *pure-logic* surface; the cluster/SSH/virtualization orchestration code is mostly + I/O glue and is covered by e2e. + +--- + +## 2. Current state + +- Module: `github.com/deckhouse/storage-e2e`, Go `1.26`. +- Test framework already in use: standard `testing` + table tests (unit), Ginkgo/Gomega (e2e). +- Existing unit tests (2 files, ~all green today): + - [internal/config/types_test.go](../internal/config/types_test.go) — `ValidateModulePullOverrides`. + - [internal/logger/logger_test.go](../internal/logger/logger_test.go) — level parsing, handlers, helpers. +- Only CI present: [gitleaks-scan-on-pr.yml](../.github/workflows/gitleaks-scan-on-pr.yml) (secret scanning). **No build/test CI.** +- `ARCHITECTURE.md` §9.2 explicitly lists "Limited unit test coverage" as known tech debt. + +### Key fact that shapes the strategy + +Most `pkg/kubernetes/*` and `pkg/cluster/*` functions take a `*rest.Config` (or a concrete +`*virtualization.Client`) and **construct their clients internally**, so they can only be +exercised against a live API server. They are **not** directly unit-testable without a +small refactor (see §5). The first waves of tests therefore focus on the large amount of +**pure / injectable logic** that already exists. + +--- + +## 3. Testing strategy & conventions + +- **Test type:** standard library `testing` with table-driven subtests (`t.Run`). Matches + the existing style in `types_test.go` / `logger_test.go`. No new test deps required for + the core waves. +- **Hermetic:** no real network. HTTP clients are pointed at `httptest.Server`; Kubernetes + interactions (later waves) use `client-go`'s fakes (`k8s.io/client-go/kubernetes/fake`, + `k8s.io/client-go/dynamic/fake`) — already available transitively via `client-go`. +- **No global-state leakage:** `internal/config` reads env into package-level vars at init. + Tests that touch them must **save and restore** the globals with `t.Cleanup`. A small + test helper (`withEnvSnapshot`) will encapsulate this. +- **Logger:** `logger.GetLogger()` falls back to `slog.Default()` when uninitialized, so + code under test that logs (e.g. `retry.Do`) is safe to call directly — no setup needed. +- **Determinism:** time-based code is tested with tiny durations and `context` cancellation + rather than wall-clock sleeps; randomness (`GenerateRandomSuffix`) is asserted on + shape/charset/length, not exact value. +- **File naming:** `_test.go` next to the source, same package for white-box tests + of unexported helpers (e.g. `package kubernetes`, `package cluster`). + +### Separating unit tests from e2e in CI + +The e2e suites under `tests/` must never run in PR CI (they need clusters). Approach: + +- **Primary (chosen):** CI runs an explicit package set: `go test ./internal/... ./pkg/...`. + The `tests/` directory is excluded from the test run. +- **Compile safety:** CI additionally runs `go vet ./...` and `go build ./...`, which + *compile* the e2e suites (catching refactor breakage) without *executing* them. +- Alternative considered: build tags (`//go:build e2e`) on the e2e suite files. More robust + but touches every existing/template e2e file and `create-test.sh`. Deferred unless we + later want `go test ./...` to be safe by default. (Open question Q1.) + +--- + +## 4. Unit test target inventory (prioritized) + +Effort key: **S** ≈ <1h, **M** ≈ a few hours, **L** ≈ day+ (usually needs a refactor). + +### Wave 1 — pure logic, zero refactor, highest ROI + +| # | Package / file | Functions under test | Notable cases | Effort | +|---|---|---|---|---| +| 1 | `pkg/retry` ([retry.go](../pkg/retry/retry.go)) | `IsRetryable`, `IsSSHConnectionError`, `WithRetryAfter`, `Do`/`DoVoid` | k8s `StatusError` codes (500/501/429, RetryAfter), `io.EOF`, every network/k8s/ssh/webhook string pattern, non-retryable → no retry, success-after-N, `ctx` cancel mid-wait, backoff cap | M | +| 2 | `pkg/kubernetes` ([apply.go](../pkg/kubernetes/apply.go)) | `FindUnsetEnvVars`, `splitYAMLDocuments` | multiple `${VAR}`, dedup, set vs unset (use `t.Setenv`), `---` splitting, trailing/empty docs, leading `---` | S | +| 3 | `pkg/kubernetes` ([modules.go](../pkg/kubernetes/modules.go)) | `convertModuleSpecsToConfigs`, `buildModuleGraph`, `topologicalSortLevels`, `isWebhookConnectionError` | nil settings → empty map, missing dependency error, multi-level ordering, **cycle detection**, reverse-dep correctness | M | +| 4 | `pkg/cluster` ([vms.go](../pkg/cluster/vms.go)) | `getCVMINameFromImageURL`, `getVMNodes`, `GetSetupNode`, `GetNodeIPAddress`, `GenerateRandomSuffix`, `generateCloudInitUserData`/`generateSetupNodeCloudInit` (smoke) | `.img`/`.qcow2` strip, `_`/`.`→`-`, collapse `--`, trim `-`, empty→`image`; VM-vs-baremetal filtering; setup-node nil/role; suffix length & charset; cloud-init contains hostname + pubkey | M | +| 5 | `internal/config` ([types.go](../internal/config/types.go)) | `ClusterNode.UnmarshalYAML`, `ClusterDefinition.UnmarshalYAML`, extend `ValidateModulePullOverrides` | invalid `hostType`, invalid `role`, unknown `osType`, `clusterDefinition:` wrapper vs bare, nil module / empty override | M | +| 6 | `internal/kubernetes/commander` ([client.go](../internal/kubernetes/commander/client.go)) | `mapStatusToPhase`, `base64Encode`, `NewClientWithOptions` validation | every status→phase mapping + unknown passthrough; empty baseURL/token errors; trailing-slash trim on baseURL & apiPrefix; default auth method/prefix; bad `CACertPath` | M | +| 7 | `internal/logger` ([level.go](../internal/logger/level.go)) | `LevelToString` (round-trip with `ParseLevel`) | all levels + default | S | +| 8 | `pkg/testkit` ([stress-tests.go](../pkg/testkit/stress-tests.go)) | `(*Config).Validate`, `DefaultConfig` | each required-field error, per-mode branches, invalid `TestOrder` step, resize/clone size requirements, `SnapshotsPerPVC` defaulting | M | +| 9 | `pkg/kubernetes` ([poll.go](../pkg/kubernetes/poll.go)) | `formatRef`, `sameFinalizers`, `errIfTerminating` | ns vs cluster-scoped ref formatting; finalizer set equality (order, dupes, len); terminating object → error | S | + +### Wave 2 — needs `httptest` (no refactor) + +| # | Package | Functions under test | Approach | Effort | +|---|---|---|---|---| +| 10 | `internal/kubernetes/commander` | `setAuthHeaders` (all 5 methods), `GetClusterByID` (200/404/500), `ListClustersAPI` (array vs `items`/`data` object vs garbage), `GetClusterByName`, `CreateClusterFromTemplate`, `DeleteClusterByID`, `GetClusterKubeconfigByID` (+`/kubeconfig` 404 → cluster-details fallback), `GetRegistryByName` (exact + partial), `GetClusterConnectionInfo` (connection_hosts/agent_data/legacy precedence) | spin `httptest.Server`, build client with `baseURL=server.URL`, assert request headers/paths and decoded responses | M | + +> Note: `WaitForClusterReady` uses a hardcoded 10s ticker → not cheaply unit-testable as-is. +> Either skip it or extract the interval (small refactor, Wave 3). Listed as Q2. + +### Wave 3 — small refactors to unlock fake-client tests (optional, higher value-per-effort later) + +These functions are good candidates but currently build clients from `*rest.Config` +internally. The proposed refactor is **non-breaking**: keep the existing exported signature, +extract the logic into an unexported helper that accepts an interface +(`kubernetes.Interface` / `dynamic.Interface`), and have the public function build the real +client and delegate. Tests then call the helper with a fake. + +| Package | Candidates | Refactor | Effort | +|---|---|---|---| +| `pkg/kubernetes` apply.go | `applyDocument`/`createDocument` via `ApplyClient{dynamicClient, discoveryClient}` (already injectable in-package using `dynamic/fake` + a fake discovery) | none for struct; build fakes in test | M | +| `pkg/kubernetes` pvc/pod/namespace/secrets/storageclass | `WaitForPVCsBound`, `WaitForPodsStatus`, namespace/secret CRUD, default-SC selection | extract `…WithClientset(clientset kubernetes.Interface, …)` helper | L | +| `pkg/cluster` lock.go | `AcquireClusterLock`/`Release`/`IsClusterLocked`/`GetClusterLockInfo` (ConfigMap logic, lock-contention error message) | extract clientset-accepting helpers; test with `kubernetes/fake` | M | +| `internal/kubernetes/commander` | `WaitForClusterReady` | inject ticker interval | S | + +### Out of scope for unit tests (covered by e2e / not deterministic) + +- `internal/infrastructure/ssh/*` (real SSH/network; `findFreePort` is the only trivially + testable bit). +- `pkg/cluster/{cluster,setup}.go`, `pkg/cluster/vms.go` orchestration (VM/CVI lifecycle). +- `internal/kubernetes/{deckhouse,storage,virtualization}/*` thin CRD clients. +- `pkg/testkit/{ceph*,storageclass}.go` provisioning flows and `StressTestRunner.Run*` modes. + +--- + +## 5. Refactor principles (for Wave 3, if approved) + +- **Additive, not breaking:** public API signatures stay; add internal seams. +- Prefer accepting **client-go interfaces** (`kubernetes.Interface`, `dynamic.Interface`) + over concrete `*Clientset` so fakes drop in. +- Keep retry/IO wiring in the public function; keep *decision logic* in the helper. +- Each refactor lands with its tests in the same PR. + +--- + +## 6. CI/CD pipeline design + +New workflow: `.github/workflows/unit-tests.yml` (additive; coexists with gitleaks). + +### Triggers +- `push` → **any branch** (every push gets a green/red signal + coverage so feature + branches surface failures immediately, not only when a PR is opened) +- `pull_request` → `main` (types: opened, synchronize, reopened) — kept so PRs from + forks are still exercised even though their push events run under the fork's repo + +### Job: `unit-tests` (runs-on `ubuntu-latest`) +Steps: +1. `actions/checkout@v4` +2. `actions/setup-go@v5` with `go-version: '1.26'`, `cache: true` (module + build cache). +3. `go mod download` +4. `go build ./...` — compiles everything incl. e2e suites (refactor-breakage guard). +5. `go vet ./...` +6. `go test -race -shuffle=on -covermode=atomic -coverprofile=coverage.out ./internal/... ./pkg/...` +7. `go tool cover -func=coverage.out | tail -1` (print total %). +8. Upload `coverage.out` as an artifact (`actions/upload-artifact@v4`). + +### Proposed workflow skeleton + +```yaml +name: Unit Tests +on: + push: {} + pull_request: + branches: [main] + types: [opened, synchronize, reopened] +permissions: + contents: read +jobs: + unit-tests: + name: Build, vet & unit test + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-go@v5 + with: + go-version: '1.26' + cache: true + - run: go mod download + - run: go build ./... + - run: go vet ./... + - name: Unit tests (race + coverage) + run: | + go test -race -shuffle=on -covermode=atomic \ + -coverprofile=coverage.out ./internal/... ./pkg/... + go tool cover -func=coverage.out | tail -1 + - uses: actions/upload-artifact@v4 + with: + name: coverage + path: coverage.out +``` + +### Optional add-ons (decide in review — Q3) +- **golangci-lint** job (`golangci/golangci-lint-action`) with a checked-in `.golangci.yml`. + Improves quality gate; adds maintenance. Could start in "report-only" (non-blocking). +- **Coverage threshold** gate (fail under N%). Recommend starting **without** a hard + threshold and only printing total, then introducing a floor once Wave 1–2 land. +- **Codecov/coveralls** upload for PR coverage comments (needs token/secret). + +### Making it block merges +Workflow files alone don't block merges — that's a repo setting: + +- **Branch protection** (or a ruleset) on `main`: require status check **"Build, vet & unit + test"** to pass, and require branches up to date before merging. +- Configure in GitHub Settings → Branches, or via the one-time `gh` command below + (needs repo-admin rights). The check name must match the `job.name` from + [.github/workflows/unit-tests.yml](.github/workflows/unit-tests.yml) verbatim. + +```bash +# Replace / as appropriate; e.g. deckhouse/storage-e2e. +gh api \ + --method PUT \ + -H "Accept: application/vnd.github+json" \ + /repos///branches/main/protection \ + -f required_status_checks.strict=true \ + -f 'required_status_checks.contexts[]=Build, vet & unit test' \ + -f enforce_admins=false \ + -F required_pull_request_reviews= \ + -F restrictions= +``` + +After applying, a PR with a failing unit-test job is blocked from merge. + +--- + +## 7. Developer ergonomics (optional but recommended) + +Add a `Makefile` so local + CI use the same commands: + +```make +test: ; go test -race -shuffle=on ./internal/... ./pkg/... +cover: ; go test -covermode=atomic -coverprofile=coverage.out ./internal/... ./pkg/... && go tool cover -func=coverage.out +vet: ; go vet ./... +e2e: ; @echo "run a specific suite, e.g. go test -timeout=240m -v ./tests/ -count=1" +``` + +This also gives the e2e workflow (future) a single entry point. + +--- + +## 8. Rollout phases & milestones + +| Phase | Contents | Status | +|---|---|---| +| **P0** | `unit-tests.yml` + `Makefile`; runs existing test files | **Done** — workflow on every push to any branch + PRs to `main` | +| **P1** | Wave 1 (targets #1–#9) | **Done** — `retry` 94%, `commander` mappers, config YAML, `cluster/vms` helpers, `apply`/`modules`/`poll`, `testkit.Validate`, `logger/level` | +| **P2** | Wave 2 (httptest, target #10) | **Done** — Commander HTTP client covered via `httptest.Server` | +| **P3** | Branch protection enabled; check becomes **required** | **Pending admin** — see "Making it block merges" above for the exact `gh api` invocation | +| **P4** *(optional)* | Wave 3 refactors + fake-client tests; lint job | Not started | + +Suggested coverage signal (not a hard gate initially): after P2, expect solid coverage of +the pure-logic packages (`pkg/retry`, the pure parts of `pkg/kubernetes`, +`internal/kubernetes/commander`, `internal/config`, `internal/logger`). + +--- + +## 9. Risks & considerations + +- **Global env state in `internal/config`** — tests must snapshot/restore package vars; + `t.Setenv` only affects new reads, not the already-initialized globals. The + `withEnvSnapshot` helper mitigates this. `-shuffle=on` will surface any ordering bugs. +- **`-race`** roughly doubles test time but is cheap here (no heavy tests) and valuable for + `retry.Do` / parallel module config goroutines. +- **Module downloads in CI** — first run pulls k8s/virtualization deps; `setup-go` caching + keeps subsequent runs fast. +- **e2e accidentally running in CI** — avoided by the explicit `./internal/... ./pkg/...` + package set; `go vet ./...` still compiles them. +- **Hidden behavior change** — any Wave 3 refactor is additive and shipped with tests, but + reviewers should confirm the public signatures are unchanged. + +--- + +## 10. Open questions for review + +- **Q1.** Separate e2e via explicit package list (proposed) or introduce `//go:build e2e` + tags so `go test ./...` is safe everywhere? +- **Q2.** Refactor `WaitForClusterReady` (and similar tickers) to inject interval for + testability, or leave time-based waits to e2e? +- **Q3.** Add golangci-lint now (blocking? report-only?) or defer? +- **Q4.** Who enables branch protection on `main` (needs admin), and do you want me to + provide the exact `gh api` command? +- **Q5.** Do you want a coverage threshold gate eventually, and at what %? +- **Q6.** Coverage reporting service (Codecov) — desired, or keep coverage as a CI artifact only? + +--- + +## 11. Proposed PR breakdown + +1. **PR-1 (CI bootstrap):** `unit-tests.yml` + `Makefile`. No new tests. Lands the pipeline. +2. **PR-2 (Wave 1a):** `retry`, `apply`, `modules`, `poll` tests. +3. **PR-3 (Wave 1b):** `cluster/vms` pure helpers, `config/types` YAML, `commander` mappers, + `logger/level`, `testkit` `Validate`. +4. **PR-4 (Wave 2):** Commander httptest suite. +5. **PR-5 (admin):** enable branch protection / required check. +6. **PR-6+ (optional):** Wave 3 refactors + fake-client tests, lint job. diff --git a/docs/WORKLOG.md b/docs/WORKLOG.md index 99378d6..5e5b0a3 100644 --- a/docs/WORKLOG.md +++ b/docs/WORKLOG.md @@ -102,3 +102,13 @@ All notable changes to this repository are documented here. New entries are appe - **Remove** `internal/config/overrides.go`: dropped `${VAR}` expansion in `modulePullOverride` per review — each module needs its own literal tag in `cluster_config.yml` (e.g. `pr131`, `mr55`). - **Add** `internal/config.ValidateModulePullOverrides`: rejects `${...}` placeholders at config load with an explicit error. - **Update** `internal/cluster/cluster.go::LoadClusterConfig` and `pkg/cluster/cluster.go::loadClusterConfigFromPath`, `README.md`, `docs/ARCHITECTURE.md`. + +--- + +## 2026-06-03 + +- **Add** `.github/workflows/unit-tests.yml`: mandatory CI workflow that builds, vets and runs unit tests on every push (any branch) and on PRs to `main`; uses `go-version-file: go.mod`, `-race -shuffle=on -covermode=atomic`, uploads `coverage.out` artifact, scoped to `./internal/... ./pkg/...` so e2e suites stay off CI. +- **Add** `Makefile`: `test` / `cover` / `vet` / `build` / `e2e` / `clean` targets mirroring the CI commands; `.gitignore` for `coverage.out` / `coverage.html`. +- **Add** Wave 1 unit tests (`pkg/retry/retry_test.go`, `pkg/kubernetes/{apply,modules,poll}_test.go`, `pkg/cluster/vms_test.go`, `pkg/testkit/stress_tests_test.go`, `internal/config/types_yaml_test.go`, `internal/kubernetes/commander/client_test.go`, `internal/logger/level_test.go`): hermetic table-driven coverage of `retry.Do/IsRetryable/IsSSHConnectionError/WithRetryAfter`, YAML doc splitting/env-var scanning, module graph + topo sort + cycle detection, `cluster/vms` pure helpers, `commander` mappers / base64 / `NewClientWithOptions` validation, `stress-tests.Config.Validate` / `DefaultConfig`, `LevelToString` round-trip, `ClusterNode`/`ClusterDefinition` YAML unmarshal validation. +- **Add** Wave 2 httptest tests (`internal/kubernetes/commander/client_http_test.go`): drives the Commander HTTP client (`GetClusterByID`, `ListClustersAPI` array/items/data/garbage, `GetClusterByName`, `CreateClusterFromTemplate`, `DeleteClusterByID`, `GetClusterKubeconfigByID` + cluster-details fallback, `GetRegistryByName`, `GetClusterConnectionInfo` precedence + defaults) and all five `setAuthHeaders` paths via a real `httptest.Server`. +- **Update** `docs/TESTS_IMPLEMENTATION_PLAN.md`: triggers changed from `push → main` to push-on-any-branch + `pull_request → main`; status header refreshed; rollout phases marked Done/Pending; exact `gh api` branch-protection command documented. diff --git a/internal/config/types_yaml_test.go b/internal/config/types_yaml_test.go new file mode 100644 index 0000000..c0ef870 --- /dev/null +++ b/internal/config/types_yaml_test.go @@ -0,0 +1,247 @@ +/* +Copyright 2026 Flant JSC + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package config + +import ( + "strings" + "testing" + + "gopkg.in/yaml.v3" +) + +// pickKnownOSType returns one of the keys from OSTypeMap so we can build +// test YAML that is robust against changes to the canonical OS list. +func pickKnownOSType(t *testing.T) string { + t.Helper() + if len(OSTypeMap) == 0 { + t.Fatal("OSTypeMap is empty; cannot build YAML fixtures") + } + // Prefer the well-known default if present for stable assertions. + preferred := "Ubuntu 22.04 6.2.0-39-generic" + if _, ok := OSTypeMap[preferred]; ok { + return preferred + } + for k := range OSTypeMap { + return k + } + return "" +} + +func TestClusterNodeUnmarshalYAML(t *testing.T) { + osName := pickKnownOSType(t) + + t.Run("VM node with valid fields", func(t *testing.T) { + in := []byte(` +hostname: master-1 +hostType: vm +osType: "` + osName + `" +cpu: 4 +ram: 8 +diskSize: 50 +`) + var node ClusterNode + if err := yaml.Unmarshal(in, &node); err != nil { + t.Fatalf("unexpected error: %v", err) + } + if node.Hostname != "master-1" { + t.Errorf("Hostname=%q", node.Hostname) + } + if node.HostType != HostTypeVM { + t.Errorf("HostType=%q", node.HostType) + } + if node.OSType.ImageURL == "" { + t.Errorf("OSType.ImageURL not populated for %q", osName) + } + if node.CPU != 4 || node.RAM != 8 || node.DiskSize != 50 { + t.Errorf("VM fields not parsed: %+v", node) + } + }) + + t.Run("bare-metal node with role=setup", func(t *testing.T) { + in := []byte(` +hostname: boot +hostType: bare-metal +osType: "` + osName + `" +role: setup +`) + var node ClusterNode + if err := yaml.Unmarshal(in, &node); err != nil { + t.Fatalf("unexpected error: %v", err) + } + if node.HostType != HostTypeBareMetal { + t.Errorf("HostType=%q", node.HostType) + } + if node.Role != ClusterRoleSetup { + t.Errorf("Role=%q, want %q", node.Role, ClusterRoleSetup) + } + }) + + t.Run("invalid hostType errors", func(t *testing.T) { + in := []byte(` +hostname: x +hostType: container +osType: "` + osName + `" +`) + var node ClusterNode + err := yaml.Unmarshal(in, &node) + if err == nil || !strings.Contains(err.Error(), "invalid hostType") { + t.Errorf("want 'invalid hostType' error, got %v", err) + } + }) + + t.Run("invalid role errors", func(t *testing.T) { + in := []byte(` +hostname: x +hostType: vm +osType: "` + osName + `" +role: master +`) + var node ClusterNode + err := yaml.Unmarshal(in, &node) + if err == nil || !strings.Contains(err.Error(), "invalid role") { + t.Errorf("want 'invalid role' error, got %v", err) + } + }) + + t.Run("unknown osType errors", func(t *testing.T) { + in := []byte(` +hostname: x +hostType: vm +osType: "some-unknown-os 999" +`) + var node ClusterNode + err := yaml.Unmarshal(in, &node) + if err == nil || !strings.Contains(err.Error(), "unknown osType") { + t.Errorf("want 'unknown osType' error, got %v", err) + } + }) +} + +func TestClusterDefinitionUnmarshalYAML(t *testing.T) { + osName := pickKnownOSType(t) + + t.Run("bare cluster definition (no wrapper key)", func(t *testing.T) { + in := []byte(` +masters: + - hostname: m1 + hostType: vm + osType: "` + osName + `" + cpu: 2 + ram: 4 + diskSize: 20 +workers: + - hostname: w1 + hostType: vm + osType: "` + osName + `" + cpu: 2 + ram: 4 + diskSize: 20 +dkpParameters: + kubernetesVersion: "1.30" + registryRepo: dev-registry.deckhouse.io/sys/deckhouse-oss +`) + var def ClusterDefinition + if err := yaml.Unmarshal(in, &def); err != nil { + t.Fatalf("unexpected error: %v", err) + } + if len(def.Masters) != 1 || def.Masters[0].Hostname != "m1" { + t.Errorf("masters not parsed: %+v", def.Masters) + } + if len(def.Workers) != 1 || def.Workers[0].Hostname != "w1" { + t.Errorf("workers not parsed: %+v", def.Workers) + } + if def.DKPParameters.KubernetesVersion != "1.30" { + t.Errorf("KubernetesVersion=%q", def.DKPParameters.KubernetesVersion) + } + }) + + t.Run("clusterDefinition wrapper key", func(t *testing.T) { + in := []byte(` +clusterDefinition: + masters: + - hostname: m1 + hostType: vm + osType: "` + osName + `" + cpu: 1 + ram: 1 + diskSize: 10 + dkpParameters: + registryRepo: dev-registry.deckhouse.io/sys/deckhouse-oss +`) + var def ClusterDefinition + if err := yaml.Unmarshal(in, &def); err != nil { + t.Fatalf("unexpected error: %v", err) + } + if len(def.Masters) != 1 || def.Masters[0].Hostname != "m1" { + t.Errorf("wrapper-key path didn't unwrap: %+v", def) + } + if def.DKPParameters.RegistryRepo != "dev-registry.deckhouse.io/sys/deckhouse-oss" { + t.Errorf("registryRepo not parsed: %q", def.DKPParameters.RegistryRepo) + } + }) + + t.Run("invalid nested node propagates error", func(t *testing.T) { + in := []byte(` +masters: + - hostname: m1 + hostType: container + osType: "` + osName + `" +`) + var def ClusterDefinition + err := yaml.Unmarshal(in, &def) + if err == nil || !strings.Contains(err.Error(), "invalid hostType") { + t.Errorf("want 'invalid hostType' error, got %v", err) + } + }) +} + +func TestValidateModulePullOverrides_Extended(t *testing.T) { + t.Run("nil module entries are ignored", func(t *testing.T) { + def := &ClusterDefinition{ + DKPParameters: DKPParameters{ + Modules: []*ModuleConfig{ + nil, + {Name: "ok", ModulePullOverride: "pr1"}, + nil, + }, + }, + } + if err := ValidateModulePullOverrides(def); err != nil { + t.Errorf("unexpected error: %v", err) + } + }) + + t.Run("empty override is allowed", func(t *testing.T) { + def := &ClusterDefinition{ + DKPParameters: DKPParameters{ + Modules: []*ModuleConfig{ + {Name: "x", ModulePullOverride: ""}, + }, + }, + } + if err := ValidateModulePullOverrides(def); err != nil { + t.Errorf("unexpected error: %v", err) + } + }) + + t.Run("no modules at all is allowed", func(t *testing.T) { + def := &ClusterDefinition{} + if err := ValidateModulePullOverrides(def); err != nil { + t.Errorf("unexpected error: %v", err) + } + }) +} diff --git a/internal/kubernetes/commander/client_http_test.go b/internal/kubernetes/commander/client_http_test.go new file mode 100644 index 0000000..d95069f --- /dev/null +++ b/internal/kubernetes/commander/client_http_test.go @@ -0,0 +1,683 @@ +/* +Copyright 2026 Flant JSC + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +// HTTP-driven tests for the Commander API client. Each test spins up an +// httptest.Server, points a real Client at it, and asserts that requests are +// shaped correctly and responses are decoded as expected. No network traffic +// leaves the test binary. + +package commander + +import ( + "context" + "encoding/base64" + "encoding/json" + "errors" + "io" + "net/http" + "net/http/httptest" + "net/url" + "strings" + "testing" +) + +// recordedRequest captures the parts of an *http.Request we care about so +// tests can assert against them after the response is returned. +type recordedRequest struct { + Method string + Path string + RawQuery string + Header http.Header + BodyBytes []byte + CookieToken string +} + +// captureHandler returns an http.Handler that records every incoming request +// into *out and calls fn to write the response. fn may inspect r to vary the +// behaviour per call. +func captureHandler(out *[]recordedRequest, fn http.HandlerFunc) http.HandlerFunc { + return func(w http.ResponseWriter, r *http.Request) { + body, _ := io.ReadAll(r.Body) + rec := recordedRequest{ + Method: r.Method, + Path: r.URL.Path, + RawQuery: r.URL.RawQuery, + Header: r.Header.Clone(), + BodyBytes: body, + } + if cookie, err := r.Cookie("token"); err == nil { + rec.CookieToken = cookie.Value + } + *out = append(*out, rec) + fn(w, r) + } +} + +func mustClient(t *testing.T, baseURL, token string, opts ClientOptions) *Client { + t.Helper() + c, err := NewClientWithOptions(baseURL, token, opts) + if err != nil { + t.Fatalf("NewClientWithOptions: %v", err) + } + return c +} + +func TestSetAuthHeaders_AllMethods(t *testing.T) { + cases := []struct { + name string + method AuthMethod + user string + token string + assertReq func(t *testing.T, r recordedRequest) + }{ + { + name: "default = X-Auth-Token", + method: "", + token: "sekret", + assertReq: func(t *testing.T, r recordedRequest) { + if got := r.Header.Get("X-Auth-Token"); got != "sekret" { + t.Errorf("X-Auth-Token=%q, want sekret", got) + } + if r.Header.Get("Authorization") != "" { + t.Errorf("Authorization header should be empty for x-auth-token") + } + }, + }, + { + name: "bearer", + method: AuthMethodBearer, + token: "sekret", + assertReq: func(t *testing.T, r recordedRequest) { + if got := r.Header.Get("Authorization"); got != "Bearer sekret" { + t.Errorf("Authorization=%q, want Bearer sekret", got) + } + }, + }, + { + name: "token", + method: AuthMethodToken, + token: "sekret", + assertReq: func(t *testing.T, r recordedRequest) { + if got := r.Header.Get("Authorization"); got != "Token sekret" { + t.Errorf("Authorization=%q, want Token sekret", got) + } + }, + }, + { + name: "cookie", + method: AuthMethodCookie, + token: "sekret", + assertReq: func(t *testing.T, r recordedRequest) { + if r.CookieToken != "sekret" { + t.Errorf("cookie token=%q, want sekret", r.CookieToken) + } + }, + }, + { + name: "basic", + method: AuthMethodBasic, + user: "alice", + token: "sekret", + assertReq: func(t *testing.T, r recordedRequest) { + want := "Basic " + base64.StdEncoding.EncodeToString([]byte("alice:sekret")) + if got := r.Header.Get("Authorization"); got != want { + t.Errorf("Authorization=%q, want %q", got, want) + } + }, + }, + } + + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + var recs []recordedRequest + srv := httptest.NewServer(captureHandler(&recs, func(w http.ResponseWriter, _ *http.Request) { + w.Header().Set("Content-Type", "application/json") + _, _ = w.Write([]byte(`{"id":"x","name":"y"}`)) + })) + defer srv.Close() + + c := mustClient(t, srv.URL, tc.token, ClientOptions{ + AuthMethod: tc.method, + AuthUser: tc.user, + }) + if _, err := c.GetClusterByID(context.Background(), "x"); err != nil { + t.Fatalf("GetClusterByID: %v", err) + } + if len(recs) != 1 { + t.Fatalf("got %d requests, want 1", len(recs)) + } + tc.assertReq(t, recs[0]) + if r := recs[0]; r.Header.Get("Accept") != "application/json" { + t.Errorf("Accept=%q, want application/json", r.Header.Get("Accept")) + } + }) + } +} + +func TestGetClusterByID(t *testing.T) { + t.Run("200 returns cluster", func(t *testing.T) { + var recs []recordedRequest + srv := httptest.NewServer(captureHandler(&recs, func(w http.ResponseWriter, _ *http.Request) { + _, _ = w.Write([]byte(`{"id":"abc","name":"my-cluster","status":"in_sync"}`)) + })) + defer srv.Close() + + c := mustClient(t, srv.URL, "tok", ClientOptions{}) + got, err := c.GetClusterByID(context.Background(), "abc") + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if got.ID != "abc" || got.Name != "my-cluster" || got.Status != "in_sync" { + t.Errorf("decoded %+v", got) + } + if recs[0].Path != "/api/v1/clusters/abc" || recs[0].Method != http.MethodGet { + t.Errorf("unexpected request: %+v", recs[0]) + } + }) + + t.Run("404 returns ErrClusterNotFound", func(t *testing.T) { + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { + http.Error(w, "missing", http.StatusNotFound) + })) + defer srv.Close() + + c := mustClient(t, srv.URL, "tok", ClientOptions{}) + _, err := c.GetClusterByID(context.Background(), "ghost") + if !errors.Is(err, ErrClusterNotFound) { + t.Errorf("want ErrClusterNotFound, got %v", err) + } + }) + + t.Run("500 returns wrapped error with body", func(t *testing.T) { + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { + w.WriteHeader(http.StatusInternalServerError) + _, _ = w.Write([]byte("boom")) + })) + defer srv.Close() + + c := mustClient(t, srv.URL, "tok", ClientOptions{}) + _, err := c.GetClusterByID(context.Background(), "x") + if err == nil { + t.Fatal("expected error") + } + msg := err.Error() + if !strings.Contains(msg, "500") || !strings.Contains(msg, "boom") { + t.Errorf("error should include code and body: %v", err) + } + }) +} + +func TestListClustersAPI(t *testing.T) { + t.Run("decodes array body", func(t *testing.T) { + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { + _, _ = w.Write([]byte(`[{"id":"a","name":"alpha"},{"id":"b","name":"beta"}]`)) + })) + defer srv.Close() + + c := mustClient(t, srv.URL, "tok", ClientOptions{}) + got, err := c.ListClustersAPI(context.Background()) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if len(got) != 2 || got[0].Name != "alpha" || got[1].Name != "beta" { + t.Errorf("decoded %+v", got) + } + }) + + t.Run("decodes object body with items[]", func(t *testing.T) { + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { + _, _ = w.Write([]byte(`{"items":[{"id":"a","name":"alpha"}]}`)) + })) + defer srv.Close() + + c := mustClient(t, srv.URL, "tok", ClientOptions{}) + got, err := c.ListClustersAPI(context.Background()) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if len(got) != 1 || got[0].Name != "alpha" { + t.Errorf("decoded %+v", got) + } + }) + + t.Run("decodes object body with data[]", func(t *testing.T) { + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { + _, _ = w.Write([]byte(`{"data":[{"id":"c","name":"gamma"}]}`)) + })) + defer srv.Close() + + c := mustClient(t, srv.URL, "tok", ClientOptions{}) + got, err := c.ListClustersAPI(context.Background()) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if len(got) != 1 || got[0].Name != "gamma" { + t.Errorf("decoded %+v", got) + } + }) + + t.Run("garbage body returns descriptive error", func(t *testing.T) { + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { + _, _ = w.Write([]byte(`not json at all`)) + })) + defer srv.Close() + + c := mustClient(t, srv.URL, "tok", ClientOptions{}) + _, err := c.ListClustersAPI(context.Background()) + if err == nil { + t.Fatal("expected error") + } + if !strings.Contains(err.Error(), "failed to decode response") { + t.Errorf("want 'failed to decode response' error, got %v", err) + } + }) +} + +func TestGetClusterByName(t *testing.T) { + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { + _, _ = w.Write([]byte(`[{"id":"a","name":"alpha"},{"id":"b","name":"beta"}]`)) + })) + defer srv.Close() + + c := mustClient(t, srv.URL, "tok", ClientOptions{}) + + t.Run("found", func(t *testing.T) { + got, err := c.GetClusterByName(context.Background(), "beta") + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if got.ID != "b" { + t.Errorf("ID=%q, want b", got.ID) + } + }) + + t.Run("not found", func(t *testing.T) { + _, err := c.GetClusterByName(context.Background(), "zeta") + if !errors.Is(err, ErrClusterNotFound) { + t.Errorf("want ErrClusterNotFound, got %v", err) + } + }) +} + +func TestCreateClusterFromTemplate(t *testing.T) { + var recs []recordedRequest + srv := httptest.NewServer(captureHandler(&recs, func(w http.ResponseWriter, _ *http.Request) { + w.WriteHeader(http.StatusCreated) + _, _ = w.Write([]byte(`{"id":"new","name":"created"}`)) + })) + defer srv.Close() + + c := mustClient(t, srv.URL, "tok", ClientOptions{}) + values := map[string]interface{}{"releaseChannel": "EarlyAccess"} + got, err := c.CreateClusterFromTemplate(context.Background(), "my-cluster", "tpl-version-123", "reg-456", values) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if got.ID != "new" || got.Name != "created" { + t.Errorf("decoded %+v", got) + } + + if len(recs) != 1 { + t.Fatalf("got %d requests, want 1", len(recs)) + } + r := recs[0] + if r.Method != http.MethodPost || r.Path != "/api/v1/clusters" { + t.Errorf("unexpected request: %s %s", r.Method, r.Path) + } + if ct := r.Header.Get("Content-Type"); ct != "application/json" { + t.Errorf("Content-Type=%q, want application/json", ct) + } + + // Verify request body shape. + var body CreateClusterRequest + if err := json.Unmarshal(r.BodyBytes, &body); err != nil { + t.Fatalf("body not JSON: %v (raw=%s)", err, string(r.BodyBytes)) + } + if body.Name != "my-cluster" || body.ClusterTemplateVersionID != "tpl-version-123" || body.RegistryID != "reg-456" { + t.Errorf("body fields: %+v", body) + } + if body.Values["releaseChannel"] != "EarlyAccess" { + t.Errorf("Values not forwarded: %+v", body.Values) + } +} + +func TestDeleteClusterByID(t *testing.T) { + t.Run("204 No Content is success", func(t *testing.T) { + var recs []recordedRequest + srv := httptest.NewServer(captureHandler(&recs, func(w http.ResponseWriter, _ *http.Request) { + w.WriteHeader(http.StatusNoContent) + })) + defer srv.Close() + + c := mustClient(t, srv.URL, "tok", ClientOptions{}) + if err := c.DeleteClusterByID(context.Background(), "xyz"); err != nil { + t.Fatalf("unexpected error: %v", err) + } + if recs[0].Method != http.MethodDelete || recs[0].Path != "/api/v1/clusters/xyz" { + t.Errorf("unexpected request: %+v", recs[0]) + } + }) + + t.Run("202 Accepted is success", func(t *testing.T) { + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { + w.WriteHeader(http.StatusAccepted) + })) + defer srv.Close() + + c := mustClient(t, srv.URL, "tok", ClientOptions{}) + if err := c.DeleteClusterByID(context.Background(), "xyz"); err != nil { + t.Errorf("unexpected error: %v", err) + } + }) + + t.Run("500 returns error with body", func(t *testing.T) { + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { + w.WriteHeader(http.StatusInternalServerError) + _, _ = w.Write([]byte("nope")) + })) + defer srv.Close() + + c := mustClient(t, srv.URL, "tok", ClientOptions{}) + err := c.DeleteClusterByID(context.Background(), "xyz") + if err == nil || !strings.Contains(err.Error(), "500") || !strings.Contains(err.Error(), "nope") { + t.Errorf("want 500/nope in error, got %v", err) + } + }) +} + +func TestGetClusterKubeconfigByID(t *testing.T) { + t.Run("raw kubeconfig body", func(t *testing.T) { + const raw = "apiVersion: v1\nkind: Config\n" + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + if r.URL.Path != "/api/v1/clusters/x/kubeconfig" { + http.Error(w, "wrong path", http.StatusBadRequest) + return + } + _, _ = w.Write([]byte(raw)) + })) + defer srv.Close() + + c := mustClient(t, srv.URL, "tok", ClientOptions{}) + got, err := c.GetClusterKubeconfigByID(context.Background(), "x") + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if got != raw { + t.Errorf("got %q, want %q", got, raw) + } + }) + + t.Run("JSON wrapper with 'kubeconfig' field", func(t *testing.T) { + const raw = "apiVersion: v1\nkind: Config\n" + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { + payload, _ := json.Marshal(map[string]string{"kubeconfig": raw}) + _, _ = w.Write(payload) + })) + defer srv.Close() + + c := mustClient(t, srv.URL, "tok", ClientOptions{}) + got, err := c.GetClusterKubeconfigByID(context.Background(), "x") + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if got != raw { + t.Errorf("got %q, want %q", got, raw) + } + }) + + t.Run("/kubeconfig 404 falls back to cluster-details ClusterAgentData.data.kubeconfig", func(t *testing.T) { + const raw = "apiVersion: v1\nkind: Config\n" + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + switch r.URL.Path { + case "/api/v1/clusters/x/kubeconfig": + http.Error(w, "no such endpoint", http.StatusNotFound) + case "/api/v1/clusters/x": + resp := ClusterResponse{ + ID: "x", + Status: "in_sync", + ClusterAgentData: map[string]interface{}{ + "data": map[string]interface{}{"kubeconfig": raw}, + }, + } + payload, _ := json.Marshal(resp) + _, _ = w.Write(payload) + default: + http.Error(w, "unexpected", http.StatusBadRequest) + } + })) + defer srv.Close() + + c := mustClient(t, srv.URL, "tok", ClientOptions{}) + got, err := c.GetClusterKubeconfigByID(context.Background(), "x") + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if got != raw { + t.Errorf("got %q, want %q", got, raw) + } + }) + + t.Run("/kubeconfig 404 with nothing in details returns descriptive error", func(t *testing.T) { + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + if r.URL.Path == "/api/v1/clusters/x/kubeconfig" { + http.Error(w, "no such endpoint", http.StatusNotFound) + return + } + // details lookup with no kubeconfig field at all + _, _ = w.Write([]byte(`{"id":"x","status":"new"}`)) + })) + defer srv.Close() + + c := mustClient(t, srv.URL, "tok", ClientOptions{}) + _, err := c.GetClusterKubeconfigByID(context.Background(), "x") + if err == nil || !strings.Contains(err.Error(), "kubeconfig not found") { + t.Errorf("want 'kubeconfig not found' error, got %v", err) + } + }) +} + +func TestGetRegistryByName(t *testing.T) { + registries := `[{"id":"r1","name":"prod-registry"},{"id":"r2","name":"dev-registry-eu"}]` + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { + _, _ = w.Write([]byte(registries)) + })) + defer srv.Close() + + c := mustClient(t, srv.URL, "tok", ClientOptions{}) + + t.Run("exact name match wins", func(t *testing.T) { + got, err := c.GetRegistryByName(context.Background(), "prod-registry") + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if got.ID != "r1" { + t.Errorf("ID=%q, want r1", got.ID) + } + }) + + t.Run("partial match used when exact missing", func(t *testing.T) { + got, err := c.GetRegistryByName(context.Background(), "dev-registry") + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if got.ID != "r2" { + t.Errorf("ID=%q, want r2 (partial match)", got.ID) + } + }) + + t.Run("nothing matches errors", func(t *testing.T) { + _, err := c.GetRegistryByName(context.Background(), "no-such-thing") + if err == nil || !strings.Contains(err.Error(), "no-such-thing") { + t.Errorf("want 'no-such-thing' error, got %v", err) + } + }) +} + +func TestGetClusterConnectionInfo_PrefersConnectionHosts(t *testing.T) { + const kc = "apiVersion: v1\nkind: Config\n" + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + switch { + case r.URL.Path == "/api/v1/clusters" && r.Method == http.MethodGet: + // list returns one cluster + resp := []ClusterResponse{{ + ID: "c1", + Name: "my-cluster", + Status: "in_sync", + ConnectionHosts: map[string]interface{}{ + "api_endpoint": "https://api.example.com", + "masters": []interface{}{ + map[string]interface{}{ + "host": "10.1.2.3", + "user": "ubuntu", + "port": float64(2222), + }, + }, + }, + }} + payload, _ := json.Marshal(resp) + _, _ = w.Write(payload) + case r.URL.Path == "/api/v1/clusters/c1/kubeconfig": + _, _ = w.Write([]byte(kc)) + default: + http.Error(w, "unexpected: "+r.URL.Path, http.StatusBadRequest) + } + })) + defer srv.Close() + + c := mustClient(t, srv.URL, "tok", ClientOptions{}) + info, err := c.GetClusterConnectionInfo(context.Background(), "my-cluster") + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if info.APIEndpoint != "https://api.example.com" { + t.Errorf("APIEndpoint=%q", info.APIEndpoint) + } + if info.SSHHost != "10.1.2.3" || info.SSHUser != "ubuntu" || info.SSHPort != 2222 { + t.Errorf("SSH info wrong: %+v", info) + } + if info.Kubeconfig != kc { + t.Errorf("kubeconfig not propagated") + } +} + +func TestGetClusterConnectionInfo_FallsBackToAgentData(t *testing.T) { + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + switch { + case r.URL.Path == "/api/v1/clusters" && r.Method == http.MethodGet: + resp := []ClusterResponse{{ + ID: "c1", + Name: "my-cluster", + Status: "in_sync", + ClusterAgentData: map[string]interface{}{ + "data": map[string]interface{}{ + "ssh_host": "10.9.9.9", + "ssh_user": "root", + // no port: default 22 should apply + }, + }, + }} + payload, _ := json.Marshal(resp) + _, _ = w.Write(payload) + case strings.HasSuffix(r.URL.Path, "/kubeconfig"): + // no kubeconfig endpoint; let it fail + http.Error(w, "missing", http.StatusNotFound) + case r.URL.Path == "/api/v1/clusters/c1": + // detail lookup used by fallback also has no kubeconfig + _, _ = w.Write([]byte(`{"id":"c1","name":"my-cluster"}`)) + default: + http.Error(w, "unexpected: "+r.URL.Path, http.StatusBadRequest) + } + })) + defer srv.Close() + + c := mustClient(t, srv.URL, "tok", ClientOptions{}) + info, err := c.GetClusterConnectionInfo(context.Background(), "my-cluster") + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if info.SSHHost != "10.9.9.9" || info.SSHUser != "root" { + t.Errorf("agent-data fallback failed: %+v", info) + } + if info.SSHPort != 22 { + t.Errorf("default port not applied, got %d", info.SSHPort) + } +} + +func TestGetClusterConnectionInfo_FallsBackToLegacyFields(t *testing.T) { + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + switch r.URL.Path { + case "/api/v1/clusters": + resp := []ClusterResponse{{ + ID: "c1", + Name: "my-cluster", + SSHHost: "10.5.5.5", + SSHUser: "deploy", + SSHPort: 0, + }} + payload, _ := json.Marshal(resp) + _, _ = w.Write(payload) + case "/api/v1/clusters/c1/kubeconfig": + http.Error(w, "missing", http.StatusNotFound) + case "/api/v1/clusters/c1": + _, _ = w.Write([]byte(`{"id":"c1","name":"my-cluster"}`)) + default: + http.Error(w, "unexpected: "+r.URL.Path, http.StatusBadRequest) + } + })) + defer srv.Close() + + c := mustClient(t, srv.URL, "tok", ClientOptions{}) + info, err := c.GetClusterConnectionInfo(context.Background(), "my-cluster") + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if info.SSHHost != "10.5.5.5" || info.SSHUser != "deploy" || info.SSHPort != 22 { + t.Errorf("legacy fallback failed: %+v", info) + } +} + +func TestGetClusterConnectionInfo_NoSSHNoKubeconfigErrors(t *testing.T) { + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + switch r.URL.Path { + case "/api/v1/clusters": + _, _ = w.Write([]byte(`[{"id":"c1","name":"my-cluster"}]`)) + case "/api/v1/clusters/c1/kubeconfig": + http.Error(w, "no kubeconfig endpoint", http.StatusNotFound) + case "/api/v1/clusters/c1": + _, _ = w.Write([]byte(`{"id":"c1","name":"my-cluster"}`)) + default: + http.Error(w, "unexpected: "+r.URL.Path, http.StatusBadRequest) + } + })) + defer srv.Close() + + c := mustClient(t, srv.URL, "tok", ClientOptions{}) + _, err := c.GetClusterConnectionInfo(context.Background(), "my-cluster") + if err == nil { + t.Fatal("expected error when neither kubeconfig nor SSH info is available") + } +} + +// Sanity: ensure srv.URL is a usable URL the client can talk to. This guards +// against test infra regressions where httptest.NewServer returns something +// pathological. +func TestServerURLIsValid(t *testing.T) { + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) {})) + defer srv.Close() + if _, err := url.Parse(srv.URL); err != nil { + t.Fatalf("httptest server URL is not parseable: %v", err) + } +} diff --git a/internal/kubernetes/commander/client_test.go b/internal/kubernetes/commander/client_test.go new file mode 100644 index 0000000..4180b79 --- /dev/null +++ b/internal/kubernetes/commander/client_test.go @@ -0,0 +1,176 @@ +/* +Copyright 2026 Flant JSC + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package commander + +import ( + "encoding/base64" + "os" + "path/filepath" + "strings" + "testing" +) + +func TestMapStatusToPhase(t *testing.T) { + cases := []struct { + in string + want ClusterPhase + }{ + {"in_sync", ClusterPhaseReady}, + {"insync", ClusterPhaseReady}, + {"ready", ClusterPhaseReady}, + {"running", ClusterPhaseReady}, + {"active", ClusterPhaseReady}, + {"new", ClusterPhaseDraft}, + {"creating", ClusterPhaseCreating}, + {"provisioning", ClusterPhaseCreating}, + {"bootstrapping", ClusterPhaseCreating}, + {"updating", ClusterPhaseUpdating}, + {"upgrading", ClusterPhaseUpdating}, + {"deleting", ClusterPhaseDeleting}, + {"terminating", ClusterPhaseDeleting}, + {"failed", ClusterPhaseFailed}, + {"error", ClusterPhaseFailed}, + {"joining", ClusterPhaseJoining}, + {"ready_to_join", ClusterPhaseReadyToJoin}, + {"readytojoin", ClusterPhaseReadyToJoin}, + + // Case-insensitive. + {"IN_SYNC", ClusterPhaseReady}, + {"Creating", ClusterPhaseCreating}, + + // Unknown values pass through verbatim (no normalization). + {"some-novel-state", ClusterPhase("some-novel-state")}, + {"", ClusterPhase("")}, + } + + for _, tc := range cases { + t.Run(tc.in, func(t *testing.T) { + got := mapStatusToPhase(tc.in) + if got != tc.want { + t.Errorf("mapStatusToPhase(%q)=%q, want %q", tc.in, got, tc.want) + } + }) + } +} + +func TestBase64Encode(t *testing.T) { + cases := []struct { + name, in, want string + }{ + {"empty", "", ""}, + {"ascii", "user:token", "dXNlcjp0b2tlbg=="}, + {"unicode", "tëst", "dMOrc3Q="}, + } + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + got := base64Encode(tc.in) + if got != tc.want { + t.Errorf("base64Encode(%q)=%q, want %q", tc.in, got, tc.want) + } + // Sanity: must round-trip via stdlib decoder. + dec, err := base64.StdEncoding.DecodeString(got) + if err != nil { + t.Fatalf("decode error: %v", err) + } + if string(dec) != tc.in { + t.Errorf("round-trip mismatch: %q -> %q", tc.in, string(dec)) + } + }) + } +} + +func TestNewClientWithOptions_Validation(t *testing.T) { + t.Run("empty baseURL errors", func(t *testing.T) { + _, err := NewClientWithOptions("", "tok", ClientOptions{}) + if err == nil || !strings.Contains(err.Error(), "baseURL") { + t.Errorf("want baseURL error, got %v", err) + } + }) + + t.Run("empty token errors", func(t *testing.T) { + _, err := NewClientWithOptions("https://x", "", ClientOptions{}) + if err == nil || !strings.Contains(err.Error(), "token") { + t.Errorf("want token error, got %v", err) + } + }) + + t.Run("defaults: x-auth-token + /api/v1", func(t *testing.T) { + c, err := NewClientWithOptions("https://commander.example.com", "tok", ClientOptions{}) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if c.authMethod != AuthMethodXAuthToken { + t.Errorf("authMethod=%q, want %q", c.authMethod, AuthMethodXAuthToken) + } + if c.apiPrefix != "/api/v1" { + t.Errorf("apiPrefix=%q, want /api/v1", c.apiPrefix) + } + }) + + t.Run("trailing slashes trimmed from baseURL and apiPrefix", func(t *testing.T) { + c, err := NewClientWithOptions("https://x/", "tok", ClientOptions{APIPrefix: "/api/"}) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if c.baseURL != "https://x" { + t.Errorf("baseURL=%q, want %q", c.baseURL, "https://x") + } + if c.apiPrefix != "/api" { + t.Errorf("apiPrefix=%q, want /api", c.apiPrefix) + } + }) + + t.Run("explicit auth method is honored", func(t *testing.T) { + c, err := NewClientWithOptions("https://x", "tok", ClientOptions{AuthMethod: AuthMethodBearer}) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if c.authMethod != AuthMethodBearer { + t.Errorf("authMethod=%q, want %q", c.authMethod, AuthMethodBearer) + } + }) + + t.Run("bad CACertPath errors with file path", func(t *testing.T) { + bad := filepath.Join(t.TempDir(), "does-not-exist.pem") + _, err := NewClientWithOptions("https://x", "tok", ClientOptions{CACertPath: bad}) + if err == nil || !strings.Contains(err.Error(), "CA certificate") { + t.Errorf("want CA certificate error, got %v", err) + } + }) + + t.Run("malformed CA cert content errors", func(t *testing.T) { + path := filepath.Join(t.TempDir(), "not-a-cert.pem") + if err := os.WriteFile(path, []byte("not pem data"), 0o600); err != nil { + t.Fatal(err) + } + _, err := NewClientWithOptions("https://x", "tok", ClientOptions{CACertPath: path}) + if err == nil || !strings.Contains(err.Error(), "parse CA certificate") { + t.Errorf("want parse error, got %v", err) + } + }) + + t.Run("InsecureSkipTLSVerify is configured", func(t *testing.T) { + c, err := NewClientWithOptions("https://x", "tok", ClientOptions{InsecureSkipTLSVerify: true}) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if c.httpClient == nil || c.httpClient.Transport == nil { + t.Fatal("expected non-nil httpClient/transport") + } + }) +} + diff --git a/internal/logger/level_test.go b/internal/logger/level_test.go new file mode 100644 index 0000000..864ada0 --- /dev/null +++ b/internal/logger/level_test.go @@ -0,0 +1,70 @@ +/* +Copyright 2026 Flant JSC + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package logger + +import ( + "log/slog" + "testing" +) + +func TestLevelToString(t *testing.T) { + cases := []struct { + level slog.Level + want string + }{ + {slog.LevelDebug, "DEBUG"}, + {slog.LevelInfo, "INFO"}, + {slog.LevelWarn, "WARN"}, + {slog.LevelError, "ERROR"}, + // Unknown/out-of-range level returns "INFO". + {slog.Level(100), "INFO"}, + {slog.Level(-100), "INFO"}, + } + for _, tc := range cases { + t.Run(tc.want, func(t *testing.T) { + got := LevelToString(tc.level) + if got != tc.want { + t.Errorf("LevelToString(%v)=%q, want %q", tc.level, got, tc.want) + } + }) + } +} + +// TestParseLevelLevelToStringRoundTrip checks that the canonical pairs survive +// a string -> Level -> string trip. Note: ParseLevel accepts "warning" as a +// synonym for "warn" but LevelToString always emits "WARN". +func TestParseLevelLevelToStringRoundTrip(t *testing.T) { + pairs := map[string]string{ + "debug": "DEBUG", + "info": "INFO", + "warn": "WARN", + "error": "ERROR", + } + for in, want := range pairs { + t.Run(in, func(t *testing.T) { + got := LevelToString(ParseLevel(in)) + if got != want { + t.Errorf("LevelToString(ParseLevel(%q))=%q, want %q", in, got, want) + } + }) + } + + // "warning" is a synonym for warn. + if got := LevelToString(ParseLevel("warning")); got != "WARN" { + t.Errorf("ParseLevel(\"warning\") roundtrip = %q, want WARN", got) + } +} diff --git a/pkg/cluster/vms_test.go b/pkg/cluster/vms_test.go new file mode 100644 index 0000000..917a557 --- /dev/null +++ b/pkg/cluster/vms_test.go @@ -0,0 +1,257 @@ +/* +Copyright 2026 Flant JSC + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package cluster + +import ( + "strings" + "testing" + + "github.com/deckhouse/storage-e2e/internal/config" +) + +func TestGetCVMINameFromImageURL(t *testing.T) { + cases := []struct { + name string + in string + want string + }{ + {"strips .img extension", "https://example.com/jammy-server.img", "jammy-server"}, + {"strips .qcow2 extension", "https://example.com/redos.qcow2", "redos"}, + {"lowercases", "https://EXAMPLE.com/UBUNTU-22.04.img", "ubuntu-22-04"}, + {"underscores to hyphens", "https://example.com/my_image_file.img", "my-image-file"}, + {"dots to hyphens", "https://example.com/v1.0.2-image.img", "v1-0-2-image"}, + {"collapses consecutive hyphens", "https://example.com/a__b..c.img", "a-b-c"}, + {"trims leading/trailing hyphens", "https://example.com/_..foo..img", "foo"}, + { + "empty after sanitation -> 'image'", + "https://example.com/._.qcow2", + "image", + }, + { + "no path segments yet still works", + "foo.img", + "foo", + }, + } + + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + got := getCVMINameFromImageURL(tc.in) + if got != tc.want { + t.Errorf("getCVMINameFromImageURL(%q)=%q, want %q", tc.in, got, tc.want) + } + }) + } +} + +func TestGetVMNodes(t *testing.T) { + t.Run("filters VM nodes across masters/workers and includes VM setup", func(t *testing.T) { + def := &config.ClusterDefinition{ + Masters: []config.ClusterNode{ + {Hostname: "m1", HostType: config.HostTypeVM}, + {Hostname: "m2", HostType: config.HostTypeBareMetal}, + }, + Workers: []config.ClusterNode{ + {Hostname: "w1", HostType: config.HostTypeBareMetal}, + {Hostname: "w2", HostType: config.HostTypeVM}, + }, + Setup: &config.ClusterNode{Hostname: "setup", HostType: config.HostTypeVM}, + } + + nodes := getVMNodes(def) + got := hostnames(nodes) + want := []string{"m1", "w2", "setup"} + if !equalStrings(got, want) { + t.Errorf("getVMNodes hostnames = %v, want %v", got, want) + } + }) + + t.Run("bare-metal setup is excluded", func(t *testing.T) { + def := &config.ClusterDefinition{ + Masters: []config.ClusterNode{{Hostname: "m1", HostType: config.HostTypeVM}}, + Setup: &config.ClusterNode{Hostname: "setup-bm", HostType: config.HostTypeBareMetal}, + } + nodes := getVMNodes(def) + got := hostnames(nodes) + if !equalStrings(got, []string{"m1"}) { + t.Errorf("got %v, want [m1] (setup must be excluded when bare-metal)", got) + } + }) + + t.Run("nil setup is fine", func(t *testing.T) { + def := &config.ClusterDefinition{ + Workers: []config.ClusterNode{{Hostname: "w1", HostType: config.HostTypeVM}}, + } + nodes := getVMNodes(def) + if len(nodes) != 1 || nodes[0].Hostname != "w1" { + t.Errorf("got %v, want [w1]", hostnames(nodes)) + } + }) +} + +func TestGetSetupNode(t *testing.T) { + t.Run("nil clusterDef errors", func(t *testing.T) { + _, err := GetSetupNode(nil) + if err == nil { + t.Fatal("expected error for nil clusterDef") + } + }) + + t.Run("nil Setup errors", func(t *testing.T) { + _, err := GetSetupNode(&config.ClusterDefinition{}) + if err == nil { + t.Fatal("expected error when Setup is nil") + } + }) + + t.Run("returns setup node", func(t *testing.T) { + want := &config.ClusterNode{Hostname: "boot", HostType: config.HostTypeVM} + def := &config.ClusterDefinition{Setup: want} + got, err := GetSetupNode(def) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if got != want { + t.Errorf("got %p, want %p", got, want) + } + }) +} + +func TestGetNodeIPAddress(t *testing.T) { + def := &config.ClusterDefinition{ + Masters: []config.ClusterNode{ + {Hostname: "m1", IPAddress: "10.0.0.1", HostType: config.HostTypeVM}, + {Hostname: "m-empty", IPAddress: "", HostType: config.HostTypeVM}, + }, + Workers: []config.ClusterNode{ + {Hostname: "w1", IPAddress: "10.0.0.2", HostType: config.HostTypeVM}, + }, + Setup: &config.ClusterNode{Hostname: "boot", IPAddress: "10.0.0.99", HostType: config.HostTypeVM}, + } + + cases := []struct { + name string + hostname string + wantIP string + wantErr string // substring; empty means no error expected + }{ + {"master found", "m1", "10.0.0.1", ""}, + {"worker found", "w1", "10.0.0.2", ""}, + {"setup found", "boot", "10.0.0.99", ""}, + {"master IP empty -> error", "m-empty", "", "IP address not set for master"}, + {"unknown hostname -> error", "ghost", "", "not found in cluster definition"}, + } + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + ip, err := GetNodeIPAddress(def, tc.hostname) + if tc.wantErr == "" { + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if ip != tc.wantIP { + t.Errorf("ip=%q, want %q", ip, tc.wantIP) + } + return + } + if err == nil { + t.Fatalf("expected error containing %q, got nil", tc.wantErr) + } + if !strings.Contains(err.Error(), tc.wantErr) { + t.Errorf("error %q does not contain %q", err, tc.wantErr) + } + }) + } +} + +func TestGenerateRandomSuffix(t *testing.T) { + t.Run("length matches argument", func(t *testing.T) { + for _, n := range []int{0, 1, 5, 32} { + got := GenerateRandomSuffix(n) + if len(got) != n { + t.Errorf("len(GenerateRandomSuffix(%d))=%d, want %d (got=%q)", n, len(got), n, got) + } + } + }) + + t.Run("uses only lowercase alphanumerics", func(t *testing.T) { + got := GenerateRandomSuffix(64) + for _, r := range got { + if !((r >= 'a' && r <= 'z') || (r >= '0' && r <= '9')) { + t.Fatalf("invalid character %q in suffix %q", r, got) + } + } + }) + + t.Run("two calls usually differ for non-trivial length", func(t *testing.T) { + // With charset of 36 and length 16 the collision probability is + // 36^-16 — negligible. If it ever collides we have a bigger problem. + a := GenerateRandomSuffix(16) + b := GenerateRandomSuffix(16) + if a == b { + t.Errorf("two 16-char suffixes were identical: %q (probability ~36^-16)", a) + } + }) +} + +func TestGenerateCloudInitUserData(t *testing.T) { + t.Run("worker cloud-init includes hostname and key", func(t *testing.T) { + got := generateCloudInitUserData("worker-1", "ssh-ed25519 AAAA test@key") + mustContain(t, got, "#cloud-config") + mustContain(t, got, "hostnamectl set-hostname worker-1") + mustContain(t, got, "ssh-ed25519 AAAA test@key") + mustContain(t, got, "mirror.yandex.ru/ubuntu") // apt mirror tweak + mustContain(t, got, "99force-ipv4") // IPv4 apt override + }) + + t.Run("setup cloud-init includes docker and key", func(t *testing.T) { + got := generateSetupNodeCloudInit("bootstrap-node-abc", "ssh-rsa BBBB me@host") + mustContain(t, got, "#cloud-config") + mustContain(t, got, "hostnamectl set-hostname bootstrap-node-abc") + mustContain(t, got, "ssh-rsa BBBB me@host") + mustContain(t, got, "docker.io") + mustContain(t, got, "systemctl enable --now docker.service") + }) +} + +// helpers --------------------------------------------------------------- + +func hostnames(nodes []config.ClusterNode) []string { + out := make([]string, 0, len(nodes)) + for _, n := range nodes { + out = append(out, n.Hostname) + } + return out +} + +func equalStrings(a, b []string) bool { + if len(a) != len(b) { + return false + } + for i := range a { + if a[i] != b[i] { + return false + } + } + return true +} + +func mustContain(t *testing.T, s, substr string) { + t.Helper() + if !strings.Contains(s, substr) { + t.Errorf("missing %q in output:\n%s", substr, s) + } +} diff --git a/pkg/kubernetes/apply_test.go b/pkg/kubernetes/apply_test.go new file mode 100644 index 0000000..b19db38 --- /dev/null +++ b/pkg/kubernetes/apply_test.go @@ -0,0 +1,128 @@ +/* +Copyright 2026 Flant JSC + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package kubernetes + +import ( + "reflect" + "sort" + "testing" +) + +func TestFindUnsetEnvVars(t *testing.T) { + t.Run("returns deduped names for unset vars only", func(t *testing.T) { + const ( + unsetA = "STORAGE_E2E_TEST_UNSET_A" + unsetB = "STORAGE_E2E_TEST_UNSET_B" + setOK = "STORAGE_E2E_TEST_SET_OK" + ) + // Explicitly empty the probe vars so the test is hermetic even if + // something in the environment happened to set them. + t.Setenv(unsetA, "") + t.Setenv(unsetB, "") + t.Setenv(setOK, "value-is-here") + + content := `image: ${` + unsetA + `} +extra: ${` + unsetA + `} +ref: ${` + unsetB + `} +tag: ${` + setOK + `}` + + got := FindUnsetEnvVars(content) + sort.Strings(got) + want := []string{unsetA, unsetB} + sort.Strings(want) + if !reflect.DeepEqual(got, want) { + t.Fatalf("FindUnsetEnvVars()=%v, want %v", got, want) + } + }) + + t.Run("ignores non-matching tokens", func(t *testing.T) { + // `$VAR` (no braces) and `${1foo}` (starts with digit) must be ignored. + got := FindUnsetEnvVars(`a: $PLAIN +b: ${1invalid} +c: ${valid_NAME}`) + // $PLAIN and ${1invalid} should not show up. ${valid_NAME} should. + // Note: this var should remain unset (highly unlikely to collide). + t.Setenv("valid_NAME", "") // ensure unset (empty -> treated as unset) + // re-evaluate now that we forced empty + got = FindUnsetEnvVars(`c: ${valid_NAME}`) + if !reflect.DeepEqual(got, []string{"valid_NAME"}) { + t.Fatalf("got %v, want [valid_NAME]", got) + } + }) + + t.Run("empty content returns nil", func(t *testing.T) { + if got := FindUnsetEnvVars(""); got != nil { + t.Fatalf("got %v, want nil", got) + } + }) +} + +func TestSplitYAMLDocuments(t *testing.T) { + cases := []struct { + name string + in string + want []string + }{ + { + name: "single document", + in: "kind: A\nmetadata:\n name: a\n", + want: []string{"kind: A\nmetadata:\n name: a"}, + }, + { + name: "two documents joined by \\n---\\n", + in: "kind: A\n---\nkind: B\n", + want: []string{"kind: A", "kind: B"}, + }, + { + name: "trailing separator yields no extra empty doc", + in: "kind: A\n---\n", + want: []string{"kind: A"}, + }, + { + name: "leading separator skipped", + in: "---\nkind: A\n", + // "---\nkind: A\n" has no "\n---\n" separator (the leading "---" is at line 0). + // Strings.Split returns the whole thing as one entry; "---\nkind: A" remains + // after TrimSpace and is kept because it isn't bare "---". + want: []string{"---\nkind: A"}, + }, + { + name: "only separators -> nothing", + in: "\n---\n\n---\n", + want: nil, + }, + { + name: "empty input returns nil", + in: "", + want: nil, + }, + { + name: "three docs with whitespace", + in: " kind: A \n---\nkind: B\n---\nkind: C", + want: []string{"kind: A", "kind: B", "kind: C"}, + }, + } + + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + got := splitYAMLDocuments(tc.in) + if !reflect.DeepEqual(got, tc.want) { + t.Fatalf("splitYAMLDocuments(%q)=\n %#v\nwant\n %#v", tc.in, got, tc.want) + } + }) + } +} diff --git a/pkg/kubernetes/modules_test.go b/pkg/kubernetes/modules_test.go new file mode 100644 index 0000000..977750b --- /dev/null +++ b/pkg/kubernetes/modules_test.go @@ -0,0 +1,280 @@ +/* +Copyright 2026 Flant JSC + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package kubernetes + +import ( + "errors" + "sort" + "strings" + "testing" + + "github.com/deckhouse/storage-e2e/internal/config" +) + +func TestConvertModuleSpecsToConfigs(t *testing.T) { + t.Run("nil Settings becomes empty map", func(t *testing.T) { + got := convertModuleSpecsToConfigs([]ModuleSpec{ + {Name: "foo", Version: 1, Enabled: true, Settings: nil}, + }) + if len(got) != 1 { + t.Fatalf("len=%d, want 1", len(got)) + } + if got[0].Settings == nil { + t.Fatal("Settings is nil; expected non-nil empty map") + } + if len(got[0].Settings) != 0 { + t.Errorf("Settings should be empty, got %v", got[0].Settings) + } + }) + + t.Run("copies fields verbatim", func(t *testing.T) { + specs := []ModuleSpec{ + { + Name: "csi-ceph", + Version: 2, + Enabled: true, + Settings: map[string]interface{}{"foo": "bar"}, + Dependencies: []string{"snapshot-controller"}, + ModulePullOverride: "pr131", + }, + { + Name: "noop", + Enabled: false, + }, + } + got := convertModuleSpecsToConfigs(specs) + if len(got) != 2 { + t.Fatalf("len=%d, want 2", len(got)) + } + if got[0].Name != "csi-ceph" || got[0].Version != 2 || !got[0].Enabled { + t.Errorf("got[0]=%+v", got[0]) + } + if got[0].Settings["foo"] != "bar" { + t.Errorf("settings not copied: %v", got[0].Settings) + } + if !equalStrings(got[0].Dependencies, []string{"snapshot-controller"}) { + t.Errorf("Dependencies = %v", got[0].Dependencies) + } + if got[0].ModulePullOverride != "pr131" { + t.Errorf("ModulePullOverride = %q", got[0].ModulePullOverride) + } + }) + + t.Run("empty input returns empty slice", func(t *testing.T) { + got := convertModuleSpecsToConfigs(nil) + if got == nil { + t.Fatal("expected non-nil empty slice") + } + if len(got) != 0 { + t.Errorf("len=%d, want 0", len(got)) + } + }) +} + +func TestBuildModuleGraph(t *testing.T) { + t.Run("empty input -> empty graph", func(t *testing.T) { + g, err := buildModuleGraph(nil) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if len(g.modules) != 0 || len(g.dependencies) != 0 || len(g.reverseDeps) != 0 { + t.Errorf("non-empty graph: %+v", g) + } + }) + + t.Run("builds dependency and reverse-dependency edges", func(t *testing.T) { + modules := []*config.ModuleConfig{ + {Name: "a"}, + {Name: "b", Dependencies: []string{"a"}}, + {Name: "c", Dependencies: []string{"a", "b"}}, + } + g, err := buildModuleGraph(modules) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if len(g.modules) != 3 { + t.Errorf("modules count = %d, want 3", len(g.modules)) + } + + // Forward deps. + if !equalStrings(g.dependencies["b"], []string{"a"}) { + t.Errorf("dep[b] = %v", g.dependencies["b"]) + } + if !equalStrings(g.dependencies["c"], []string{"a", "b"}) { + t.Errorf("dep[c] = %v", g.dependencies["c"]) + } + + // Reverse deps: who depends on "a"? -> b and c. + gotRevA := append([]string(nil), g.reverseDeps["a"]...) + sort.Strings(gotRevA) + if !equalStrings(gotRevA, []string{"b", "c"}) { + t.Errorf("reverseDeps[a] = %v", gotRevA) + } + // "b" is depended on only by c. + if !equalStrings(g.reverseDeps["b"], []string{"c"}) { + t.Errorf("reverseDeps[b] = %v", g.reverseDeps["b"]) + } + }) + + t.Run("missing dependency returns error", func(t *testing.T) { + modules := []*config.ModuleConfig{ + {Name: "a", Dependencies: []string{"ghost"}}, + } + _, err := buildModuleGraph(modules) + if err == nil { + t.Fatal("expected error for missing dependency") + } + if !strings.Contains(err.Error(), "ghost") || !strings.Contains(err.Error(), "a") { + t.Errorf("error should mention 'ghost' and 'a': %v", err) + } + }) +} + +func TestTopologicalSortLevels(t *testing.T) { + t.Run("orders by dependency depth (multi-level)", func(t *testing.T) { + modules := []*config.ModuleConfig{ + {Name: "leaf1"}, + {Name: "leaf2"}, + {Name: "mid", Dependencies: []string{"leaf1", "leaf2"}}, + {Name: "top", Dependencies: []string{"mid"}}, + } + g, err := buildModuleGraph(modules) + if err != nil { + t.Fatalf("graph error: %v", err) + } + levels, err := topologicalSortLevels(g) + if err != nil { + t.Fatalf("sort error: %v", err) + } + if len(levels) != 3 { + t.Fatalf("want 3 levels, got %d (%v)", len(levels), levelNames(levels)) + } + level0 := names(levels[0]) + sort.Strings(level0) + if !equalStrings(level0, []string{"leaf1", "leaf2"}) { + t.Errorf("level 0 = %v, want [leaf1 leaf2]", level0) + } + if !equalStrings(names(levels[1]), []string{"mid"}) { + t.Errorf("level 1 = %v, want [mid]", names(levels[1])) + } + if !equalStrings(names(levels[2]), []string{"top"}) { + t.Errorf("level 2 = %v, want [top]", names(levels[2])) + } + }) + + t.Run("flat list -> single level", func(t *testing.T) { + modules := []*config.ModuleConfig{ + {Name: "a"}, {Name: "b"}, {Name: "c"}, + } + g, _ := buildModuleGraph(modules) + levels, err := topologicalSortLevels(g) + if err != nil { + t.Fatalf("sort error: %v", err) + } + if len(levels) != 1 || len(levels[0]) != 3 { + t.Errorf("expected 1 level of 3 modules, got %d levels (%v)", len(levels), levelNames(levels)) + } + }) + + t.Run("cycle detection returns error listing remaining modules", func(t *testing.T) { + // Cycle: a -> b -> a + modules := []*config.ModuleConfig{ + {Name: "a", Dependencies: []string{"b"}}, + {Name: "b", Dependencies: []string{"a"}}, + } + g, err := buildModuleGraph(modules) + if err != nil { + t.Fatalf("graph error: %v", err) + } + _, err = topologicalSortLevels(g) + if err == nil { + t.Fatal("expected error for cycle") + } + if !strings.Contains(err.Error(), "circular dependency") { + t.Errorf("want 'circular dependency' in error: %v", err) + } + // Must list both remaining modules. + if !strings.Contains(err.Error(), "a") || !strings.Contains(err.Error(), "b") { + t.Errorf("error should list cycle members: %v", err) + } + }) + + t.Run("empty graph returns no levels", func(t *testing.T) { + g, _ := buildModuleGraph(nil) + levels, err := topologicalSortLevels(g) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if len(levels) != 0 { + t.Errorf("want 0 levels, got %d", len(levels)) + } + }) +} + +func TestIsWebhookConnectionError(t *testing.T) { + cases := []struct { + name string + err error + want bool + }{ + {"nil", nil, false}, + {"connection refused", errors.New("dial: connection refused"), true}, + {"failed calling webhook", errors.New("failed calling webhook foo.bar"), true}, + {"webhook + timeout combo", errors.New("Internal error: webhook handler request timeout"), true}, + {"plain webhook word only", errors.New("webhook validating CR"), false}, + {"unrelated error", errors.New("oops"), false}, + } + + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + got := isWebhookConnectionError(tc.err) + if got != tc.want { + t.Fatalf("isWebhookConnectionError(%v)=%v, want %v", tc.err, got, tc.want) + } + }) + } +} + +// helpers --------------------------------------------------------------- + +func names(modules []*config.ModuleConfig) []string { + out := make([]string, 0, len(modules)) + for _, m := range modules { + out = append(out, m.Name) + } + return out +} + +func levelNames(levels [][]*config.ModuleConfig) [][]string { + out := make([][]string, len(levels)) + for i, lvl := range levels { + out[i] = names(lvl) + } + return out +} + +func equalStrings(a, b []string) bool { + if len(a) != len(b) { + return false + } + for i := range a { + if a[i] != b[i] { + return false + } + } + return true +} diff --git a/pkg/kubernetes/poll_test.go b/pkg/kubernetes/poll_test.go new file mode 100644 index 0000000..ee76028 --- /dev/null +++ b/pkg/kubernetes/poll_test.go @@ -0,0 +1,102 @@ +/* +Copyright 2026 Flant JSC + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package kubernetes + +import ( + "strings" + "testing" + "time" + + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" +) + +func TestFormatRef(t *testing.T) { + cases := []struct { + namespace, name, want string + }{ + {"", "cluster-scoped", "cluster-scoped"}, + {"ns", "foo", "ns/foo"}, + {"", "", ""}, + {"ns", "", "ns/"}, + } + for _, tc := range cases { + got := formatRef(tc.namespace, tc.name) + if got != tc.want { + t.Errorf("formatRef(%q,%q)=%q, want %q", tc.namespace, tc.name, got, tc.want) + } + } +} + +func TestSameFinalizers(t *testing.T) { + cases := []struct { + name string + a, b []string + want bool + }{ + {"both nil", nil, nil, true}, + {"both empty", []string{}, []string{}, true}, + {"identical order", []string{"a", "b"}, []string{"a", "b"}, true}, + {"different order", []string{"a", "b"}, []string{"b", "a"}, false}, + {"different lengths", []string{"a"}, []string{"a", "b"}, false}, + {"duplicates differ", []string{"a", "a"}, []string{"a", "b"}, false}, + {"single equal", []string{"x"}, []string{"x"}, true}, + } + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + got := sameFinalizers(tc.a, tc.b) + if got != tc.want { + t.Errorf("sameFinalizers(%v,%v)=%v, want %v", tc.a, tc.b, got, tc.want) + } + }) + } +} + +func TestErrIfTerminating(t *testing.T) { + t.Run("no deletionTimestamp returns nil", func(t *testing.T) { + obj := &unstructured.Unstructured{} + obj.SetName("foo") + if err := errIfTerminating(obj, "Pod", "default/foo"); err != nil { + t.Errorf("unexpected error: %v", err) + } + }) + + t.Run("deletionTimestamp set returns descriptive error", func(t *testing.T) { + obj := &unstructured.Unstructured{} + obj.SetName("foo") + obj.SetNamespace("default") + now := metav1.NewTime(time.Date(2026, 1, 2, 3, 4, 5, 0, time.UTC)) + obj.SetDeletionTimestamp(&now) + obj.SetFinalizers([]string{"kubernetes.io/pvc-protection"}) + + err := errIfTerminating(obj, "Pod", "default/foo") + if err == nil { + t.Fatal("expected error") + } + msg := err.Error() + for _, want := range []string{ + "Pod", + "default/foo", + "deletionTimestamp", + "kubernetes.io/pvc-protection", + } { + if !strings.Contains(msg, want) { + t.Errorf("error missing %q: %v", want, msg) + } + } + }) +} diff --git a/pkg/retry/retry_test.go b/pkg/retry/retry_test.go new file mode 100644 index 0000000..9b953d0 --- /dev/null +++ b/pkg/retry/retry_test.go @@ -0,0 +1,408 @@ +/* +Copyright 2026 Flant JSC + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package retry + +import ( + "context" + "errors" + "fmt" + "io" + "sync/atomic" + "testing" + "time" + + apierrors "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime/schema" +) + +// statusErr constructs a *apierrors.StatusError with the given HTTP code and +// optional RetryAfterSeconds, mirroring how apiserver decodes failures. +func statusErr(code int32, retryAfterSec int32) *apierrors.StatusError { + st := metav1.Status{ + Status: metav1.StatusFailure, + Code: code, + } + if retryAfterSec > 0 { + st.Details = &metav1.StatusDetails{RetryAfterSeconds: retryAfterSec} + } + return &apierrors.StatusError{ErrStatus: st} +} + +func TestIsRetryable(t *testing.T) { + cases := []struct { + name string + err error + want bool + }{ + {"nil error", nil, false}, + + // Status code branch. + {"status 500", statusErr(500, 0), true}, + {"status 503", statusErr(503, 0), true}, + {"status 429 (too many requests)", statusErr(429, 0), true}, + {"status 501 not implemented", statusErr(501, 0), false}, + {"status 200 ok", statusErr(200, 0), false}, + {"status 404 not found", statusErr(404, 0), false}, + {"status with RetryAfterSeconds hint", statusErr(400, 7), true}, + + // Helper-based detection from apimachinery. + { + "server timeout via helper", + apierrors.NewServerTimeout(schema.GroupResource{Resource: "pods"}, "x", 1), + true, + }, + { + "service unavailable via helper", + apierrors.NewServiceUnavailable("svc"), + true, + }, + { + "too many requests via helper", + apierrors.NewTooManyRequestsError("rl"), + true, + }, + { + "internal error via helper", + apierrors.NewInternalError(errors.New("boom")), + true, + }, + + // io.EOF is always retryable (broken connections). + {"io.EOF", io.EOF, true}, + {"wrapped io.EOF", fmt.Errorf("read tcp: %w", io.EOF), true}, + + // String-pattern branches. + {"TLS handshake timeout", errors.New("TLS handshake timeout"), true}, + {"connection refused", errors.New("dial: connection refused"), true}, + {"connection reset", errors.New("read: connection reset by peer"), true}, + {"connection timed out", errors.New("connection timed out"), true}, + {"i/o timeout", errors.New("read tcp: i/o timeout"), true}, + {"EOF substring", errors.New("transport: EOF"), true}, + {"broken pipe", errors.New("write: broken pipe"), true}, + {"no route to host", errors.New("dial: no route to host"), true}, + {"network is unreachable", errors.New("dial: network is unreachable"), true}, + {"net/http: request canceled", errors.New("net/http: request canceled"), true}, + {"context deadline exceeded", errors.New("context deadline exceeded"), true}, + + {"k8s server unable", errors.New("the server is currently unable to handle the request"), true}, + {"ServiceUnavailable msg", errors.New("ServiceUnavailable"), true}, + {"etcdserver timeout", errors.New("etcdserver: request timed out"), true}, + {"etcdserver leader changed", errors.New("etcdserver: leader changed"), true}, + {"failed to get server groups", errors.New("failed to get server groups"), true}, + + {"ssh handshake failed", errors.New("ssh: handshake failed"), true}, + {"ssh unable to authenticate", errors.New("ssh: unable to authenticate"), true}, + {"ssh connection lost", errors.New("ssh: connection lost"), true}, + {"ssh failed to dial", errors.New("failed to dial 1.2.3.4:22"), true}, + {"closed network connection", errors.New("use of closed network connection"), true}, + + {"webhook calling failure", errors.New("failed calling webhook validate.k8s.io"), true}, + {"plain webhook word", errors.New("webhook handler exploded"), true}, + + // Non-retryable. + {"plain generic error", errors.New("oops"), false}, + {"validation failure", errors.New("validation failed: invalid name"), false}, + } + + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + got := IsRetryable(tc.err) + if got != tc.want { + t.Fatalf("IsRetryable(%v) = %v, want %v", tc.err, got, tc.want) + } + }) + } +} + +func TestIsSSHConnectionError(t *testing.T) { + cases := []struct { + name string + err error + want bool + }{ + {"nil", nil, false}, + + {"failed to create SSH session", errors.New("failed to create SSH session: foo"), true}, + {"ssh handshake failed", errors.New("ssh: handshake failed"), true}, + {"ssh connection lost", errors.New("ssh: connection lost"), true}, + {"closed network connection", errors.New("use of closed network connection"), true}, + {"connection refused", errors.New("connection refused"), true}, + {"connection reset", errors.New("connection reset"), true}, + {"broken pipe", errors.New("broken pipe"), true}, + {"EOF string", errors.New("io: EOF"), true}, + {"io timeout", errors.New("i/o timeout"), true}, + + {"io.EOF via errors.Is", io.EOF, true}, + {"wrapped io.EOF", fmt.Errorf("wrap: %w", io.EOF), true}, + + // Not a connection error. + {"command not found", errors.New("bash: foo: command not found"), false}, + {"permission denied", errors.New("permission denied"), false}, + } + + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + got := IsSSHConnectionError(tc.err) + if got != tc.want { + t.Fatalf("IsSSHConnectionError(%v) = %v, want %v", tc.err, got, tc.want) + } + }) + } +} + +func TestWithRetryAfter(t *testing.T) { + base := Config{ + MaxRetries: 5, + InitialWait: 1 * time.Second, + MaxWait: 10 * time.Second, + Backoff: 2.0, + } + + t.Run("nil error returns config unchanged", func(t *testing.T) { + got := WithRetryAfter(base, nil) + if got.InitialWait != base.InitialWait { + t.Errorf("InitialWait changed: got %v, want %v", got.InitialWait, base.InitialWait) + } + }) + + t.Run("non-status error returns config unchanged", func(t *testing.T) { + got := WithRetryAfter(base, errors.New("boom")) + if got.InitialWait != base.InitialWait { + t.Errorf("InitialWait changed unexpectedly: got %v", got.InitialWait) + } + }) + + t.Run("status error without RetryAfter returns config unchanged", func(t *testing.T) { + got := WithRetryAfter(base, statusErr(503, 0)) + if got.InitialWait != base.InitialWait { + t.Errorf("InitialWait changed unexpectedly: got %v", got.InitialWait) + } + }) + + t.Run("RetryAfter larger than InitialWait overrides it", func(t *testing.T) { + got := WithRetryAfter(base, statusErr(429, 5)) + want := 5 * time.Second + if got.InitialWait != want { + t.Errorf("InitialWait = %v, want %v", got.InitialWait, want) + } + // Other fields preserved. + if got.MaxRetries != base.MaxRetries || got.MaxWait != base.MaxWait || got.Backoff != base.Backoff { + t.Errorf("non-InitialWait fields changed: %+v vs %+v", got, base) + } + }) + + t.Run("RetryAfter smaller than InitialWait does not shrink it", func(t *testing.T) { + cfg := base + cfg.InitialWait = 10 * time.Second + got := WithRetryAfter(cfg, statusErr(429, 2)) + if got.InitialWait != cfg.InitialWait { + t.Errorf("InitialWait shrunk to %v, want %v", got.InitialWait, cfg.InitialWait) + } + }) +} + +// fastCfg keeps Do's wall-clock cost low while still exercising backoff. +func fastCfg(maxRetries int) Config { + return Config{ + MaxRetries: maxRetries, + InitialWait: 1 * time.Millisecond, + MaxWait: 4 * time.Millisecond, + Backoff: 2.0, + LogRetries: false, + } +} + +func TestDo_SuccessFirstAttempt(t *testing.T) { + var calls int32 + got, err := Do(context.Background(), fastCfg(3), "op", func() (int, error) { + atomic.AddInt32(&calls, 1) + return 42, nil + }) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if got != 42 { + t.Errorf("got %d, want 42", got) + } + if atomic.LoadInt32(&calls) != 1 { + t.Errorf("expected exactly 1 call, got %d", calls) + } +} + +func TestDo_NonRetryableErrorReturnsImmediately(t *testing.T) { + var calls int32 + _, err := Do(context.Background(), fastCfg(5), "op", func() (struct{}, error) { + atomic.AddInt32(&calls, 1) + return struct{}{}, errors.New("validation failed: bad input") + }) + if err == nil { + t.Fatal("expected error") + } + if atomic.LoadInt32(&calls) != 1 { + t.Errorf("expected 1 call (non-retryable), got %d", calls) + } +} + +func TestDo_SuccessAfterRetries(t *testing.T) { + var calls int32 + target := int32(3) // succeed on the 3rd call. + got, err := Do(context.Background(), fastCfg(5), "op", func() (string, error) { + n := atomic.AddInt32(&calls, 1) + if n < target { + return "", io.EOF // retryable + } + return "ok", nil + }) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if got != "ok" { + t.Errorf("got %q, want %q", got, "ok") + } + if atomic.LoadInt32(&calls) != target { + t.Errorf("got %d calls, want %d", calls, target) + } +} + +func TestDo_FailsAfterMaxRetriesExhausted(t *testing.T) { + var calls int32 + _, err := Do(context.Background(), fastCfg(2), "op", func() (int, error) { + atomic.AddInt32(&calls, 1) + return 0, io.EOF + }) + if !errors.Is(err, io.EOF) { + t.Fatalf("expected io.EOF, got %v", err) + } + // MaxRetries=2 means up to 3 attempts total. + if got := atomic.LoadInt32(&calls); got != 3 { + t.Errorf("got %d calls, want 3", got) + } +} + +func TestDo_ContextCancelBeforeAttempt(t *testing.T) { + ctx, cancel := context.WithCancel(context.Background()) + cancel() // already cancelled + + var calls int32 + _, err := Do(ctx, fastCfg(5), "op", func() (int, error) { + atomic.AddInt32(&calls, 1) + return 0, nil + }) + if !errors.Is(err, context.Canceled) { + t.Fatalf("want context.Canceled, got %v", err) + } + if atomic.LoadInt32(&calls) != 0 { + t.Errorf("fn must not be called when ctx already cancelled, got %d calls", calls) + } +} + +func TestDo_ContextCancelDuringWait(t *testing.T) { + cfg := Config{ + MaxRetries: 10, + InitialWait: 100 * time.Millisecond, + MaxWait: 1 * time.Second, + Backoff: 2.0, + } + + ctx, cancel := context.WithCancel(context.Background()) + var calls int32 + // Cancel shortly after the first failed attempt enters the wait. + go func() { + time.Sleep(20 * time.Millisecond) + cancel() + }() + + start := time.Now() + _, err := Do(ctx, cfg, "op", func() (int, error) { + atomic.AddInt32(&calls, 1) + return 0, io.EOF + }) + elapsed := time.Since(start) + + if !errors.Is(err, context.Canceled) { + t.Fatalf("want context.Canceled, got %v", err) + } + if atomic.LoadInt32(&calls) != 1 { + t.Errorf("got %d calls, want 1 (cancelled during first wait)", calls) + } + if elapsed >= cfg.InitialWait { + t.Errorf("Do returned too late (%v >= InitialWait %v); cancel did not abort wait", elapsed, cfg.InitialWait) + } +} + +func TestDo_BackoffIsCapped(t *testing.T) { + // With InitialWait=4ms, Backoff=10, MaxWait=8ms we would normally jump to + // 40ms after the first retry — the cap must prevent that. + cfg := Config{ + MaxRetries: 5, + InitialWait: 4 * time.Millisecond, + MaxWait: 8 * time.Millisecond, + Backoff: 10.0, + } + + var calls int32 + start := time.Now() + _, err := Do(context.Background(), cfg, "op", func() (int, error) { + atomic.AddInt32(&calls, 1) + return 0, io.EOF + }) + elapsed := time.Since(start) + + if !errors.Is(err, io.EOF) { + t.Fatalf("want io.EOF, got %v", err) + } + // 5 waits, each capped at 8ms => upper bound ~40ms even though uncapped + // backoff would explode to seconds. Allow ample slack for slow CI runners. + if elapsed > 300*time.Millisecond { + t.Errorf("elapsed %v suggests backoff was not capped (MaxWait=%v)", elapsed, cfg.MaxWait) + } + if atomic.LoadInt32(&calls) != 6 { + t.Errorf("got %d calls, want 6 (MaxRetries+1)", calls) + } +} + +func TestDoVoid_DelegatesToDo(t *testing.T) { + t.Run("success", func(t *testing.T) { + var calls int32 + err := DoVoid(context.Background(), fastCfg(2), "op", func() error { + atomic.AddInt32(&calls, 1) + return nil + }) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if atomic.LoadInt32(&calls) != 1 { + t.Errorf("got %d calls, want 1", calls) + } + }) + + t.Run("propagates non-retryable error", func(t *testing.T) { + want := errors.New("bad input") + var calls int32 + err := DoVoid(context.Background(), fastCfg(3), "op", func() error { + atomic.AddInt32(&calls, 1) + return want + }) + if !errors.Is(err, want) { + t.Fatalf("got %v, want %v", err, want) + } + if atomic.LoadInt32(&calls) != 1 { + t.Errorf("got %d calls, want 1", calls) + } + }) +} diff --git a/pkg/testkit/stress_tests_test.go b/pkg/testkit/stress_tests_test.go new file mode 100644 index 0000000..3e05e52 --- /dev/null +++ b/pkg/testkit/stress_tests_test.go @@ -0,0 +1,276 @@ +/* +Copyright 2026 Flant JSC + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package testkit + +import ( + "strings" + "testing" + + "github.com/deckhouse/storage-e2e/internal/config" +) + +// baseConfig returns a minimal valid Config for ModeFlog. +func baseConfig() *Config { + return &Config{ + Namespace: "ns", + StorageClassName: "sc", + PVCSize: "1Gi", + PodsCount: 1, + Iterations: 1, + Mode: ModeFlog, + } +} + +func TestConfigValidate(t *testing.T) { + cases := []struct { + name string + mutate func(c *Config) + wantErr string // substring; empty == no error + afterHook func(t *testing.T, c *Config) + }{ + { + name: "valid flog config", + mutate: func(*Config) {}, + wantErr: "", + }, + { + name: "missing namespace", + mutate: func(c *Config) { c.Namespace = "" }, + wantErr: "namespace", + }, + { + name: "missing storage class", + mutate: func(c *Config) { c.StorageClassName = "" }, + wantErr: "storage class", + }, + { + name: "missing PVC size", + mutate: func(c *Config) { c.PVCSize = "" }, + wantErr: "PVC size", + }, + { + name: "zero pods count", + mutate: func(c *Config) { c.PodsCount = 0 }, + wantErr: "pods count", + }, + { + name: "negative iterations", + mutate: func(c *Config) { c.Iterations = 0 }, + wantErr: "iterations", + }, + { + name: "snapshot-only with default SnapshotsPerPVC gets defaulted to 1", + mutate: func(c *Config) { + c.Mode = ModeSnapshotOnly + c.SnapshotsPerPVC = 0 + }, + wantErr: "", + afterHook: func(t *testing.T, c *Config) { + if c.SnapshotsPerPVC != 1 { + t.Errorf("SnapshotsPerPVC=%d, want 1 (default)", c.SnapshotsPerPVC) + } + }, + }, + { + name: "snapshot_resize_cloning requires SnapshotsPerPVC > 0", + mutate: func(c *Config) { + c.Mode = ModeSnapshotResizeCloning + c.SnapshotsPerPVC = 0 + c.PVCSizeAfterResize = "2Gi" + c.PVCSizeAfterResizeStage2 = "3Gi" + c.TestOrder = []TestStep{StepResize} + }, + wantErr: "snapshots per PVC", + }, + { + name: "snapshot_resize_cloning rejects invalid step", + mutate: func(c *Config) { + c.Mode = ModeSnapshotResizeCloning + c.SnapshotsPerPVC = 1 + c.PVCSizeAfterResize = "2Gi" + c.PVCSizeAfterResizeStage2 = "3Gi" + c.TestOrder = []TestStep{"bogus"} + }, + wantErr: "invalid test step", + }, + { + name: "snapshot_resize_cloning resize step requires PVCSizeAfterResize", + mutate: func(c *Config) { + c.Mode = ModeSnapshotResizeCloning + c.SnapshotsPerPVC = 1 + c.PVCSizeAfterResize = "" + c.PVCSizeAfterResizeStage2 = "3Gi" + c.TestOrder = []TestStep{StepResize} + }, + wantErr: "PVC size after resize", + }, + { + name: "snapshot_resize_cloning clone/restore step requires Stage2 size", + mutate: func(c *Config) { + c.Mode = ModeSnapshotResizeCloning + c.SnapshotsPerPVC = 1 + c.PVCSizeAfterResize = "2Gi" + c.PVCSizeAfterResizeStage2 = "" + c.TestOrder = []TestStep{StepClone} + }, + wantErr: "stage2", + }, + { + name: "snapshot_resize_cloning happy path", + mutate: func(c *Config) { + c.Mode = ModeSnapshotResizeCloning + c.SnapshotsPerPVC = 2 + c.PVCSizeAfterResize = "2Gi" + c.PVCSizeAfterResizeStage2 = "3Gi" + c.TestOrder = []TestStep{StepRestoreFromSnapshot, StepResize, StepClone} + }, + wantErr: "", + }, + } + + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + c := baseConfig() + tc.mutate(c) + err := c.Validate() + if tc.wantErr == "" { + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if tc.afterHook != nil { + tc.afterHook(t, c) + } + return + } + if err == nil { + t.Fatalf("expected error containing %q, got nil", tc.wantErr) + } + if !strings.Contains(strings.ToLower(err.Error()), strings.ToLower(tc.wantErr)) { + t.Errorf("error %q does not contain %q", err, tc.wantErr) + } + }) + } +} + +func TestDefaultConfig_UsesDefaultsWhenEnvUnset(t *testing.T) { + // Snapshot package-level env-derived globals and restore them so test + // ordering (`-shuffle=on`) cannot leak between cases. `t.Setenv` only + // affects new os.Getenv reads — the config package already cached env + // values into package vars at init. + defer withConfigSnapshot(t)() + + config.StressTestPVCSize = "" + config.StressTestPodsCount = "" + config.StressTestPVCSizeAfterResize = "" + config.StressTestPVCSizeAfterResizeStage2 = "" + config.StressTestSnapshotsPerPVC = "" + config.StressTestMaxAttempts = "" + config.StressTestInterval = "" + config.StressTestCleanup = "" + + got := DefaultConfig() + + if got.PVCSize != config.StressTestPVCSizeDefaultValue { + t.Errorf("PVCSize=%q, want default %q", got.PVCSize, config.StressTestPVCSizeDefaultValue) + } + if got.PVCSizeAfterResize != config.StressTestPVCSizeAfterResizeDefaultValue { + t.Errorf("PVCSizeAfterResize=%q, want default", got.PVCSizeAfterResize) + } + if got.PVCSizeAfterResizeStage2 != config.StressTestPVCSizeAfterResizeStage2DefaultValue { + t.Errorf("PVCSizeAfterResizeStage2=%q, want default", got.PVCSizeAfterResizeStage2) + } + if got.PodsCount <= 0 { + t.Errorf("PodsCount=%d, want > 0", got.PodsCount) + } + if got.SnapshotsPerPVC <= 0 { + t.Errorf("SnapshotsPerPVC=%d, want > 0", got.SnapshotsPerPVC) + } + if got.MaxAttempts <= 0 { + t.Errorf("MaxAttempts=%d, want > 0", got.MaxAttempts) + } + if got.Interval <= 0 { + t.Errorf("Interval=%v, want > 0", got.Interval) + } + if !got.Cleanup { + t.Errorf("Cleanup=false; default should be true") + } + if got.Iterations != 1 { + t.Errorf("Iterations=%d, want 1", got.Iterations) + } + if got.Mode != ModeFlog { + t.Errorf("Mode=%q, want %q", got.Mode, ModeFlog) + } + if got.SchedulerName != "default-scheduler" { + t.Errorf("SchedulerName=%q, want %q", got.SchedulerName, "default-scheduler") + } + if len(got.TestOrder) != 3 { + t.Errorf("TestOrder len=%d, want 3", len(got.TestOrder)) + } +} + +func TestDefaultConfig_RespectsEnvOverrides(t *testing.T) { + defer withConfigSnapshot(t)() + + config.StressTestPVCSize = "9Gi" + config.StressTestPodsCount = "7" + config.StressTestPVCSizeAfterResize = "10Gi" + config.StressTestPVCSizeAfterResizeStage2 = "11Gi" + config.StressTestSnapshotsPerPVC = "5" + config.StressTestMaxAttempts = "12" + config.StressTestInterval = "3" + config.StressTestCleanup = "false" + + got := DefaultConfig() + + if got.PVCSize != "9Gi" || got.PodsCount != 7 || got.PVCSizeAfterResize != "10Gi" || + got.PVCSizeAfterResizeStage2 != "11Gi" || got.SnapshotsPerPVC != 5 || + got.MaxAttempts != 12 || got.Interval.Seconds() != 3 || got.Cleanup { + t.Errorf("env overrides not honored: %+v", got) + } +} + +// withConfigSnapshot captures the package-level config vars touched by +// DefaultConfig and returns a restore function. Use as +// +// defer withConfigSnapshot(t)() +func withConfigSnapshot(t *testing.T) func() { + t.Helper() + snap := struct { + PVCSize, PodsCount string + PVCSizeAfterResize, PVCSizeAfterResizeStage2 string + SnapshotsPerPVC, MaxAttempts, Interval, Cleanup string + }{ + PVCSize: config.StressTestPVCSize, + PodsCount: config.StressTestPodsCount, + PVCSizeAfterResize: config.StressTestPVCSizeAfterResize, + PVCSizeAfterResizeStage2: config.StressTestPVCSizeAfterResizeStage2, + SnapshotsPerPVC: config.StressTestSnapshotsPerPVC, + MaxAttempts: config.StressTestMaxAttempts, + Interval: config.StressTestInterval, + Cleanup: config.StressTestCleanup, + } + return func() { + config.StressTestPVCSize = snap.PVCSize + config.StressTestPodsCount = snap.PodsCount + config.StressTestPVCSizeAfterResize = snap.PVCSizeAfterResize + config.StressTestPVCSizeAfterResizeStage2 = snap.PVCSizeAfterResizeStage2 + config.StressTestSnapshotsPerPVC = snap.SnapshotsPerPVC + config.StressTestMaxAttempts = snap.MaxAttempts + config.StressTestInterval = snap.Interval + config.StressTestCleanup = snap.Cleanup + } +} From 8dd0b50f4647d93d75493a4ea7c4944083143bba Mon Sep 17 00:00:00 2001 From: Alexey Yakubov Date: Wed, 3 Jun 2026 17:40:26 +0400 Subject: [PATCH 2/8] gitleaks false positive fix --- internal/kubernetes/commander/client_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/internal/kubernetes/commander/client_test.go b/internal/kubernetes/commander/client_test.go index 4180b79..08f4cfe 100644 --- a/internal/kubernetes/commander/client_test.go +++ b/internal/kubernetes/commander/client_test.go @@ -72,7 +72,7 @@ func TestBase64Encode(t *testing.T) { name, in, want string }{ {"empty", "", ""}, - {"ascii", "user:token", "dXNlcjp0b2tlbg=="}, + {"ascii", "user:token", "dXNlcjp0b2tlbg=="}, // gitleaks:allow — base64("user:token") test fixture, not a secret {"unicode", "tëst", "dMOrc3Q="}, } for _, tc := range cases { From eb434ffcb2648fd747184e37357f15fae3da37f3 Mon Sep 17 00:00:00 2001 From: Alexey Yakubov Date: Wed, 3 Jun 2026 17:54:59 +0400 Subject: [PATCH 3/8] gitleaks is now per-push --- .github/workflows/{gitleaks-scan-on-pr.yml => gitleaks.yml} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename .github/workflows/{gitleaks-scan-on-pr.yml => gitleaks.yml} (100%) diff --git a/.github/workflows/gitleaks-scan-on-pr.yml b/.github/workflows/gitleaks.yml similarity index 100% rename from .github/workflows/gitleaks-scan-on-pr.yml rename to .github/workflows/gitleaks.yml From 627889252ccef23d9371143509785708c4e0763c Mon Sep 17 00:00:00 2001 From: Alexey Yakubov Date: Wed, 3 Jun 2026 17:55:25 +0400 Subject: [PATCH 4/8] gitleaks is now per-push --- .github/workflows/gitleaks.yml | 16 +++++++++++++++- docs/WORKLOG.md | 1 + internal/kubernetes/commander/client_test.go | 1 - 3 files changed, 16 insertions(+), 2 deletions(-) diff --git a/.github/workflows/gitleaks.yml b/.github/workflows/gitleaks.yml index 8f35434..b8c2487 100644 --- a/.github/workflows/gitleaks.yml +++ b/.github/workflows/gitleaks.yml @@ -12,15 +12,29 @@ # See the License for the specific language governing permissions and # limitations under the License. -name: Gitleaks Pull Request Scan +name: Gitleaks +# Triggers: +# push: every push to any branch — catches leaked secrets before a PR +# is even opened (feature branches, force-pushes, throw-away +# debug branches). +# pull_request: PRs targeting any branch — required so the check is also +# evaluated for fork PRs, whose `push` events do not run in +# the upstream repo. on: + push: {} pull_request: types: [opened, synchronize, reopened] + permissions: contents: read pull-requests: read +# Cancel superseded runs on the same ref so rapid-fire pushes don't pile up. +concurrency: + group: gitleaks-${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + jobs: gitleaks_scan: name: Gitleaks scan diff --git a/docs/WORKLOG.md b/docs/WORKLOG.md index 5e5b0a3..987e3ac 100644 --- a/docs/WORKLOG.md +++ b/docs/WORKLOG.md @@ -112,3 +112,4 @@ All notable changes to this repository are documented here. New entries are appe - **Add** Wave 1 unit tests (`pkg/retry/retry_test.go`, `pkg/kubernetes/{apply,modules,poll}_test.go`, `pkg/cluster/vms_test.go`, `pkg/testkit/stress_tests_test.go`, `internal/config/types_yaml_test.go`, `internal/kubernetes/commander/client_test.go`, `internal/logger/level_test.go`): hermetic table-driven coverage of `retry.Do/IsRetryable/IsSSHConnectionError/WithRetryAfter`, YAML doc splitting/env-var scanning, module graph + topo sort + cycle detection, `cluster/vms` pure helpers, `commander` mappers / base64 / `NewClientWithOptions` validation, `stress-tests.Config.Validate` / `DefaultConfig`, `LevelToString` round-trip, `ClusterNode`/`ClusterDefinition` YAML unmarshal validation. - **Add** Wave 2 httptest tests (`internal/kubernetes/commander/client_http_test.go`): drives the Commander HTTP client (`GetClusterByID`, `ListClustersAPI` array/items/data/garbage, `GetClusterByName`, `CreateClusterFromTemplate`, `DeleteClusterByID`, `GetClusterKubeconfigByID` + cluster-details fallback, `GetRegistryByName`, `GetClusterConnectionInfo` precedence + defaults) and all five `setAuthHeaders` paths via a real `httptest.Server`. - **Update** `docs/TESTS_IMPLEMENTATION_PLAN.md`: triggers changed from `push → main` to push-on-any-branch + `pull_request → main`; status header refreshed; rollout phases marked Done/Pending; exact `gh api` branch-protection command documented. +- **Update** `.github/workflows/gitleaks-scan-on-pr.yml` → renamed to `.github/workflows/gitleaks.yml`: workflow `name` shortened to `Gitleaks`, added `push: {}` trigger so secret scanning runs on every push (any branch), not only on PRs; added cancel-in-progress concurrency group. diff --git a/internal/kubernetes/commander/client_test.go b/internal/kubernetes/commander/client_test.go index 08f4cfe..8097c4c 100644 --- a/internal/kubernetes/commander/client_test.go +++ b/internal/kubernetes/commander/client_test.go @@ -173,4 +173,3 @@ func TestNewClientWithOptions_Validation(t *testing.T) { } }) } - From 0fc85a19c836fb5fddbfc2506efdb86c642b5cfd Mon Sep 17 00:00:00 2001 From: Alexey Yakubov Date: Wed, 3 Jun 2026 18:00:17 +0400 Subject: [PATCH 5/8] gitleaks - now two jobs - for pr and for push --- .github/workflows/gitleaks.yml | 26 +++++++++++++++++++++++++- docs/WORKLOG.md | 1 + 2 files changed, 26 insertions(+), 1 deletion(-) diff --git a/.github/workflows/gitleaks.yml b/.github/workflows/gitleaks.yml index b8c2487..45cf831 100644 --- a/.github/workflows/gitleaks.yml +++ b/.github/workflows/gitleaks.yml @@ -21,6 +21,14 @@ name: Gitleaks # pull_request: PRs targeting any branch — required so the check is also # evaluated for fork PRs, whose `push` events do not run in # the upstream repo. +# +# The upstream action (deckhouse/modules-actions/gitleaks) supports two modes: +# * diff — checks out refs/pull//merge and scans only the PR range. This +# mode only works on pull_request events (it needs github.event.number); +# on a push it produces an empty refspec and `git fetch` fails. +# * full — checks out the whole repo and scans every tracked file. +# So we split into two jobs gated by github.event_name: diff for PRs, full for +# pushes. on: push: {} pull_request: @@ -36,8 +44,10 @@ concurrency: cancel-in-progress: true jobs: - gitleaks_scan: + # PR scan: only the diff introduced by the pull request. + gitleaks_diff: name: Gitleaks scan + if: ${{ github.event_name == 'pull_request' }} runs-on: ubuntu-latest permissions: contents: read @@ -47,3 +57,17 @@ jobs: uses: deckhouse/modules-actions/gitleaks@v6 with: scan_mode: "diff" + + # Push scan: full-repo scan (diff mode can't resolve a PR ref on a push event). + gitleaks_full: + name: Gitleaks scan + if: ${{ github.event_name == 'push' }} + runs-on: ubuntu-latest + permissions: + contents: read + pull-requests: read + steps: + - name: Run Gitleaks full scan + uses: deckhouse/modules-actions/gitleaks@v6 + with: + scan_mode: "full" diff --git a/docs/WORKLOG.md b/docs/WORKLOG.md index 987e3ac..3e05008 100644 --- a/docs/WORKLOG.md +++ b/docs/WORKLOG.md @@ -113,3 +113,4 @@ All notable changes to this repository are documented here. New entries are appe - **Add** Wave 2 httptest tests (`internal/kubernetes/commander/client_http_test.go`): drives the Commander HTTP client (`GetClusterByID`, `ListClustersAPI` array/items/data/garbage, `GetClusterByName`, `CreateClusterFromTemplate`, `DeleteClusterByID`, `GetClusterKubeconfigByID` + cluster-details fallback, `GetRegistryByName`, `GetClusterConnectionInfo` precedence + defaults) and all five `setAuthHeaders` paths via a real `httptest.Server`. - **Update** `docs/TESTS_IMPLEMENTATION_PLAN.md`: triggers changed from `push → main` to push-on-any-branch + `pull_request → main`; status header refreshed; rollout phases marked Done/Pending; exact `gh api` branch-protection command documented. - **Update** `.github/workflows/gitleaks-scan-on-pr.yml` → renamed to `.github/workflows/gitleaks.yml`: workflow `name` shortened to `Gitleaks`, added `push: {}` trigger so secret scanning runs on every push (any branch), not only on PRs; added cancel-in-progress concurrency group. +- **Update** `.github/workflows/gitleaks.yml`: split into two jobs gated by `github.event_name` — `gitleaks_diff` (`scan_mode: diff`) for `pull_request`, `gitleaks_full` (`scan_mode: full`) for `push`; fixes `fatal: invalid refspec '+refs/pull//merge:...'` that broke push runs because the upstream action's diff mode needs `github.event.number`. Both jobs share check name `Gitleaks scan`. From dd8dc74053f7d3c74888396f9561ffdf8c5c03b8 Mon Sep 17 00:00:00 2001 From: Alexey Yakubov Date: Wed, 3 Jun 2026 18:57:35 +0400 Subject: [PATCH 6/8] gitleaks: revert to pull_request-only Drop the push trigger. The upstream action's diff mode needs github.event.number (only present on pull_request); on push it builds an empty refspec (+refs/pull//merge:...) and git fetch fails with exit 128. --- .github/workflows/gitleaks.yml | 41 ++++++---------------------------- docs/WORKLOG.md | 2 +- 2 files changed, 8 insertions(+), 35 deletions(-) diff --git a/.github/workflows/gitleaks.yml b/.github/workflows/gitleaks.yml index 45cf831..e7c01a9 100644 --- a/.github/workflows/gitleaks.yml +++ b/.github/workflows/gitleaks.yml @@ -14,23 +14,12 @@ name: Gitleaks -# Triggers: -# push: every push to any branch — catches leaked secrets before a PR -# is even opened (feature branches, force-pushes, throw-away -# debug branches). -# pull_request: PRs targeting any branch — required so the check is also -# evaluated for fork PRs, whose `push` events do not run in -# the upstream repo. -# -# The upstream action (deckhouse/modules-actions/gitleaks) supports two modes: -# * diff — checks out refs/pull//merge and scans only the PR range. This -# mode only works on pull_request events (it needs github.event.number); -# on a push it produces an empty refspec and `git fetch` fails. -# * full — checks out the whole repo and scans every tracked file. -# So we split into two jobs gated by github.event_name: diff for PRs, full for -# pushes. +# Gitleaks runs only on pull requests. The upstream action's diff mode checks +# out refs/pull//merge and scans just the PR range; it relies on +# github.event.number, which exists only for pull_request events. (A push-event +# run produces an empty refspec — `+refs/pull//merge:...` — and `git fetch` +# fails, so we deliberately do not trigger on push.) on: - push: {} pull_request: types: [opened, synchronize, reopened] @@ -38,16 +27,14 @@ permissions: contents: read pull-requests: read -# Cancel superseded runs on the same ref so rapid-fire pushes don't pile up. +# Cancel superseded runs on the same PR ref so rapid-fire pushes don't pile up. concurrency: group: gitleaks-${{ github.workflow }}-${{ github.ref }} cancel-in-progress: true jobs: - # PR scan: only the diff introduced by the pull request. - gitleaks_diff: + gitleaks_scan: name: Gitleaks scan - if: ${{ github.event_name == 'pull_request' }} runs-on: ubuntu-latest permissions: contents: read @@ -57,17 +44,3 @@ jobs: uses: deckhouse/modules-actions/gitleaks@v6 with: scan_mode: "diff" - - # Push scan: full-repo scan (diff mode can't resolve a PR ref on a push event). - gitleaks_full: - name: Gitleaks scan - if: ${{ github.event_name == 'push' }} - runs-on: ubuntu-latest - permissions: - contents: read - pull-requests: read - steps: - - name: Run Gitleaks full scan - uses: deckhouse/modules-actions/gitleaks@v6 - with: - scan_mode: "full" diff --git a/docs/WORKLOG.md b/docs/WORKLOG.md index 3e05008..324b0e3 100644 --- a/docs/WORKLOG.md +++ b/docs/WORKLOG.md @@ -113,4 +113,4 @@ All notable changes to this repository are documented here. New entries are appe - **Add** Wave 2 httptest tests (`internal/kubernetes/commander/client_http_test.go`): drives the Commander HTTP client (`GetClusterByID`, `ListClustersAPI` array/items/data/garbage, `GetClusterByName`, `CreateClusterFromTemplate`, `DeleteClusterByID`, `GetClusterKubeconfigByID` + cluster-details fallback, `GetRegistryByName`, `GetClusterConnectionInfo` precedence + defaults) and all five `setAuthHeaders` paths via a real `httptest.Server`. - **Update** `docs/TESTS_IMPLEMENTATION_PLAN.md`: triggers changed from `push → main` to push-on-any-branch + `pull_request → main`; status header refreshed; rollout phases marked Done/Pending; exact `gh api` branch-protection command documented. - **Update** `.github/workflows/gitleaks-scan-on-pr.yml` → renamed to `.github/workflows/gitleaks.yml`: workflow `name` shortened to `Gitleaks`, added `push: {}` trigger so secret scanning runs on every push (any branch), not only on PRs; added cancel-in-progress concurrency group. -- **Update** `.github/workflows/gitleaks.yml`: split into two jobs gated by `github.event_name` — `gitleaks_diff` (`scan_mode: diff`) for `pull_request`, `gitleaks_full` (`scan_mode: full`) for `push`; fixes `fatal: invalid refspec '+refs/pull//merge:...'` that broke push runs because the upstream action's diff mode needs `github.event.number`. Both jobs share check name `Gitleaks scan`. +- **Update** `.github/workflows/gitleaks.yml`: reverted to `pull_request`-only (single `gitleaks_scan` job, `scan_mode: diff`); dropped the `push` trigger because the upstream action's diff mode needs `github.event.number` and fails on push with `fatal: invalid refspec '+refs/pull//merge:...'`. From 3122f28a2f514d9d996a143f8784fc16e7f99a40 Mon Sep 17 00:00:00 2001 From: Alexey Yakubov Date: Wed, 3 Jun 2026 19:01:30 +0400 Subject: [PATCH 7/8] gitleaks: ignore base64 test-fixture false positive by fingerprint The diff scan flags commit 5f1edc2, which introduced the base64("user:token") test fixture before the inline gitleaks:allow comment was added, so the inline suppression cannot hide it. Ignore the finding by fingerprint instead. --- .gitleaksignore | 9 +++++++++ docs/WORKLOG.md | 1 + 2 files changed, 10 insertions(+) create mode 100644 .gitleaksignore diff --git a/.gitleaksignore b/.gitleaksignore new file mode 100644 index 0000000..4c56962 --- /dev/null +++ b/.gitleaksignore @@ -0,0 +1,9 @@ +# gitleaks finding fingerprints to ignore (format: commit:file:rule:startline). +# +# False positive: base64("user:token") == "dXNlcjp0b2tlbg==" is a table-driven +# unit-test fixture in TestBase64Encode, not a real credential. The finding is +# anchored to commit 5f1edc2, which first introduced the line *before* the +# inline `gitleaks:allow` comment was added (in a later commit). A diff scan +# walks the whole PR commit range, so the later inline suppression cannot hide +# the earlier commit — we ignore it here by fingerprint instead. +5f1edc2ea49b6f107c55faeaa630bae63fe749f0:internal/kubernetes/commander/client_test.go:generic-api-key:75 diff --git a/docs/WORKLOG.md b/docs/WORKLOG.md index 324b0e3..0646c89 100644 --- a/docs/WORKLOG.md +++ b/docs/WORKLOG.md @@ -114,3 +114,4 @@ All notable changes to this repository are documented here. New entries are appe - **Update** `docs/TESTS_IMPLEMENTATION_PLAN.md`: triggers changed from `push → main` to push-on-any-branch + `pull_request → main`; status header refreshed; rollout phases marked Done/Pending; exact `gh api` branch-protection command documented. - **Update** `.github/workflows/gitleaks-scan-on-pr.yml` → renamed to `.github/workflows/gitleaks.yml`: workflow `name` shortened to `Gitleaks`, added `push: {}` trigger so secret scanning runs on every push (any branch), not only on PRs; added cancel-in-progress concurrency group. - **Update** `.github/workflows/gitleaks.yml`: reverted to `pull_request`-only (single `gitleaks_scan` job, `scan_mode: diff`); dropped the `push` trigger because the upstream action's diff mode needs `github.event.number` and fails on push with `fatal: invalid refspec '+refs/pull//merge:...'`. +- **Add** `.gitleaksignore`: ignores the `generic-api-key` false positive on `internal/kubernetes/commander/client_test.go:75` (base64 test fixture) by fingerprint at commit `5f1edc2`; the diff scan flags the introducing commit, so the later inline `gitleaks:allow` could not suppress it. From 2dd1e5381993f489c8cbe0419bee12f134b42c46 Mon Sep 17 00:00:00 2001 From: Alexey Yakubov Date: Sun, 7 Jun 2026 22:26:51 +0400 Subject: [PATCH 8/8] test-coverage added --- .github/workflows/unit-tests.yml | 27 ++++++++++++++++++++++++++- docs/WORKLOG.md | 6 ++++++ 2 files changed, 32 insertions(+), 1 deletion(-) diff --git a/.github/workflows/unit-tests.yml b/.github/workflows/unit-tests.yml index 1a0d70d..6b7750d 100644 --- a/.github/workflows/unit-tests.yml +++ b/.github/workflows/unit-tests.yml @@ -27,6 +27,11 @@ on: permissions: contents: read + # Required by actions/upload-code-coverage to publish the coverage report. + code-quality: write + # On push events the action resolves any open PR for the ref via `gh pr list`, + # which needs read access to pull requests. + pull-requests: read # Cancel in-flight runs for the same ref when a new commit lands. Each push # trigger then evaluates only the latest SHA, which keeps CI minutes bounded @@ -72,10 +77,30 @@ jobs: echo "Total coverage:" go tool cover -func=coverage.out | tail -1 + # actions/upload-code-coverage consumes Cobertura XML, so convert the Go + # coverage profile produced above into that format. + - name: Convert coverage to Cobertura XML + run: | + go run github.com/boumenot/gocover-cobertura@latest \ + < coverage.out > coverage.xml + + # Publishes the report via GitHub's native code-coverage feature. On push + # to the default branch it sets the baseline; on push to a branch with an + # open PR the github-code-quality[bot] posts the comparison on that PR. + - name: Upload code coverage + uses: actions/upload-code-coverage@v1 + with: + file: coverage.xml + language: Go + label: code-coverage/go + fail-on-error: false + - name: Upload coverage artifact if: always() uses: actions/upload-artifact@v4 with: name: coverage - path: coverage.out + path: | + coverage.out + coverage.xml if-no-files-found: warn diff --git a/docs/WORKLOG.md b/docs/WORKLOG.md index 0646c89..3b62631 100644 --- a/docs/WORKLOG.md +++ b/docs/WORKLOG.md @@ -4,6 +4,12 @@ All notable changes to this repository are documented here. New entries are appe --- +## 2026-06-07 + +- **Update** `.github/workflows/unit-tests.yml`: integrate GitHub native code coverage (per-push) — add `code-quality: write` + `pull-requests: read` permissions, convert `coverage.out` to Cobertura XML via `boumenot/gocover-cobertura`, and publish with `actions/upload-code-coverage@v1`; coverage artifact now also includes `coverage.xml` + +--- + ## 2026-05-06 - **Add** `UploadPrivate` on `ssh.SSHClient` (`internal/infrastructure/ssh`): SFTP `Chmod` immediately after `Create`, before payload copy; `uploadOverSFTPOnce`, `uploadWithSFTPRetries`, `jumpUploadWithSFTPRetries`; passphrase `BootstrapCluster` uses it with `install -d -m 0700` staging (`pkg/cluster/setup.go`); ARCHITECTURE mentions ssh uploads