diff --git a/internal/planner/archive.go b/internal/planner/archive.go index a940c7c..3bc5d53 100644 --- a/internal/planner/archive.go +++ b/internal/planner/archive.go @@ -28,10 +28,21 @@ func (p *archiveNodePlanner) BuildPlan(node *seiv1alpha1.SeiNode) (*seiv1alpha1. if node.Status.Phase == seiv1alpha1.PhaseRunning { return buildRunningPlan(node) } - return buildBasePlan(node, node.Spec.Peers, nil, &seiconfig.ConfigIntent{ + params, err := p.BuildConfigIntent(node) + if err != nil { + return nil, err + } + return buildBasePlan(node, node.Spec.Peers, nil, params) +} + +func (p *archiveNodePlanner) BuildConfigIntent(node *seiv1alpha1.SeiNode) (*seiconfig.ConfigIntent, error) { + if node.Spec.Archive == nil { + return nil, fmt.Errorf("archive sub-spec is nil") + } + return &seiconfig.ConfigIntent{ Mode: seiconfig.ModeArchive, Overrides: mergeOverrides(mergeOverrides(commonOverrides(node), p.controllerOverrides(node)), node.Spec.Overrides), - }) + }, nil } func (p *archiveNodePlanner) controllerOverrides(node *seiv1alpha1.SeiNode) map[string]string { diff --git a/internal/planner/full.go b/internal/planner/full.go index f04596e..f130d9c 100644 --- a/internal/planner/full.go +++ b/internal/planner/full.go @@ -33,9 +33,9 @@ func (p *fullNodePlanner) BuildPlan(node *seiv1alpha1.SeiNode) (*seiv1alpha1.Tas return buildRunningPlan(node) } fn := node.Spec.FullNode - params := &seiconfig.ConfigIntent{ - Mode: seiconfig.ModeFull, - Overrides: mergeOverrides(mergeOverrides(commonOverrides(node), p.controllerOverrides(node)), node.Spec.Overrides), + params, err := p.BuildConfigIntent(node) + if err != nil { + return nil, err } if NeedsBootstrap(node) { return buildBootstrapPlan(node, node.Spec.Peers, fn.Snapshot, params) @@ -43,6 +43,16 @@ func (p *fullNodePlanner) BuildPlan(node *seiv1alpha1.SeiNode) (*seiv1alpha1.Tas return buildBasePlan(node, node.Spec.Peers, fn.Snapshot, params) } +func (p *fullNodePlanner) BuildConfigIntent(node *seiv1alpha1.SeiNode) (*seiconfig.ConfigIntent, error) { + if node.Spec.FullNode == nil { + return nil, fmt.Errorf("fullNode sub-spec is nil") + } + return &seiconfig.ConfigIntent{ + Mode: seiconfig.ModeFull, + Overrides: mergeOverrides(mergeOverrides(commonOverrides(node), p.controllerOverrides(node)), node.Spec.Overrides), + }, nil +} + func (p *fullNodePlanner) controllerOverrides(node *seiv1alpha1.SeiNode) map[string]string { sg := node.Spec.FullNode.SnapshotGeneration if sg == nil || sg.Tendermint == nil { diff --git a/internal/planner/node_update_test.go b/internal/planner/node_update_test.go index 333bc66..6f5a93f 100644 --- a/internal/planner/node_update_test.go +++ b/internal/planner/node_update_test.go @@ -1,10 +1,12 @@ package planner import ( + "encoding/json" "fmt" "testing" . "github.com/onsi/gomega" + seiconfig "github.com/sei-protocol/sei-config" "k8s.io/apimachinery/pkg/api/meta" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -68,6 +70,8 @@ func TestBuildRunningPlan_ImageDrift_ReturnsNodeUpdatePlan(t *testing.T) { want := []string{ task.TaskTypeApplyStatefulSet, task.TaskTypeApplyService, + TaskConfigApply, + TaskConfigValidate, task.TaskTypeReplacePod, task.TaskTypeObserveImage, TaskMarkReady, @@ -82,6 +86,138 @@ func TestBuildRunningPlan_ImageDrift_ReturnsNodeUpdatePlan(t *testing.T) { } } +// configIntentFromPlan finds the config-apply task and unmarshals its +// params back into a ConfigIntent for inspection. +func configIntentFromPlan(t *testing.T, plan *seiv1alpha1.TaskPlan) *seiconfig.ConfigIntent { + t.Helper() + g := NewWithT(t) + for _, pt := range plan.Tasks { + if pt.Type != TaskConfigApply { + continue + } + g.Expect(pt.Params).NotTo(BeNil()) + var intent seiconfig.ConfigIntent + g.Expect(json.Unmarshal(pt.Params.Raw, &intent)).To(Succeed()) + return &intent + } + t.Fatal("plan has no config-apply task") + return nil +} + +// The day-2 config-apply must carry the mode planner's full ConfigIntent — +// non-empty Mode, Overrides containing the controller-managed keys — +// otherwise sei-config's ResolveIntent rejects the empty-Mode and +// TaskConfigApply fails at runtime. +func TestBuildNodeUpdatePlan_ConfigApplyHasIntentWithMode(t *testing.T) { + g := NewWithT(t) + node := runningFullNode() + node.Spec.ExternalAddress = "syncer-0-0-p2p.atlantic-2.harbor.platform.sei.io:26656" + node.Spec.Image = testImageV2 + + plan, err := buildRunningPlan(node) + g.Expect(err).NotTo(HaveOccurred()) + g.Expect(plan).NotTo(BeNil()) + + intent := configIntentFromPlan(t, plan) + g.Expect(intent.Mode).To(Equal(seiconfig.ModeFull), "Mode must be set so ResolveIntent accepts the intent") + g.Expect(intent.Overrides).To(HaveKeyWithValue( + seiconfig.KeyP2PExternalAddress, + "syncer-0-0-p2p.atlantic-2.harbor.platform.sei.io:26656", + ), "commonOverrides should propagate Spec.ExternalAddress") +} + +// Day-2 must set Incremental=true. The init path uses applyFull which +// regenerates config from mode defaults — running that on a running node +// would wipe persistent-peers, state-sync trust point, and operator-managed +// TOML keys outside the controller's overrides set. Incremental reads +// on-disk config and patches only the keys we own. +func TestBuildNodeUpdatePlan_ConfigApplyIsIncremental(t *testing.T) { + g := NewWithT(t) + node := runningFullNode() + node.Spec.Image = testImageV2 + + plan, err := buildRunningPlan(node) + g.Expect(err).NotTo(HaveOccurred()) + g.Expect(plan).NotTo(BeNil()) + + intent := configIntentFromPlan(t, plan) + g.Expect(intent.Incremental).To(BeTrue(), + "day-2 ConfigIntent must be incremental to avoid clobbering on-disk state") +} + +// Init plans must NOT set Incremental — the init path uses applyFull +// (regenerate config from mode defaults + overrides). Pinning this here +// makes accidental flips during future refactors a loud failure. +func TestBuildPlan_InitPlanConfigIntentNotIncremental(t *testing.T) { + g := NewWithT(t) + node := &seiv1alpha1.SeiNode{ + ObjectMeta: metav1.ObjectMeta{Name: "full-init", Namespace: "default", Generation: 1}, + Spec: seiv1alpha1.SeiNodeSpec{ + ChainID: "atlantic-2", + Image: "sei:v1.0.0", + FullNode: &seiv1alpha1.FullNodeSpec{}, + }, + // No phase — defaults to "" (init path). + } + + plan, err := (&fullNodePlanner{}).BuildPlan(node) + g.Expect(err).NotTo(HaveOccurred()) + g.Expect(plan).NotTo(BeNil()) + + intent := configIntentFromPlan(t, plan) + g.Expect(intent.Incremental).To(BeFalse(), + "init ConfigIntent must NOT be incremental — applyFull regenerates from mode defaults") +} + +// Per-mode coverage: the day-2 intent should match the mode planner's +// Mode value. Catches future drift if a new mode lands and isn't wired +// into BuildConfigIntent. +func TestBuildNodeUpdatePlan_ConfigApplyIntentMatchesModePlanner(t *testing.T) { + cases := []struct { + name string + mutate func(*seiv1alpha1.SeiNode) + wantMode seiconfig.NodeMode + }{ + { + name: "full", + mutate: func(_ *seiv1alpha1.SeiNode) {}, + wantMode: seiconfig.ModeFull, + }, + { + name: "archive", + mutate: func(n *seiv1alpha1.SeiNode) { + n.Spec.FullNode = nil + n.Spec.Archive = &seiv1alpha1.ArchiveSpec{} + }, + wantMode: seiconfig.ModeArchive, + }, + { + name: "validator", + mutate: func(n *seiv1alpha1.SeiNode) { + n.Spec.FullNode = nil + n.Spec.Validator = &seiv1alpha1.ValidatorSpec{} + }, + wantMode: seiconfig.ModeValidator, + }, + } + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + g := NewWithT(t) + node := runningFullNode() + tc.mutate(node) + node.Spec.Image = testImageV2 + + plan, err := buildRunningPlan(node) + g.Expect(err).NotTo(HaveOccurred()) + g.Expect(plan).NotTo(BeNil()) + + intent := configIntentFromPlan(t, plan) + g.Expect(intent.Mode).To(Equal(tc.wantMode)) + g.Expect(intent.Incremental).To(BeTrue()) + }) + } +} + // --- ResolvePlan condition tests --- func TestResolvePlan_NodeUpdate_SetsCondition(t *testing.T) { @@ -381,10 +517,12 @@ func TestBuildRunningPlan_ImageDriftWinsOverSidecar(t *testing.T) { g.Expect(err).NotTo(HaveOccurred()) g.Expect(plan).NotTo(BeNil()) // Image update plan ends with MarkReady, which also resolves the sidecar. - g.Expect(plan.Tasks).To(HaveLen(5), "should be full node-update plan, not one-task mark-ready") + g.Expect(plan.Tasks).To(HaveLen(7), "should be full node-update plan, not one-task mark-ready") g.Expect(planTaskTypes(plan)).To(Equal([]string{ task.TaskTypeApplyStatefulSet, task.TaskTypeApplyService, + TaskConfigApply, + TaskConfigValidate, task.TaskTypeReplacePod, task.TaskTypeObserveImage, TaskMarkReady, diff --git a/internal/planner/planner.go b/internal/planner/planner.go index 1b62a92..3831a16 100644 --- a/internal/planner/planner.go +++ b/internal/planner/planner.go @@ -63,6 +63,13 @@ type NodePlanner interface { Validate(node *seiv1alpha1.SeiNode) error BuildPlan(node *seiv1alpha1.SeiNode) (*seiv1alpha1.TaskPlan, error) Mode() string + + // BuildConfigIntent returns the ConfigIntent the mode's plan would feed + // to TaskConfigApply (mode + merged controller / spec overrides). Shared + // by BuildPlan (init) and buildNodeUpdatePlan (day-2) so they emit + // identical config payloads for the same spec. Day-2 callers set + // Incremental on the returned value before passing it through. + BuildConfigIntent(node *seiv1alpha1.SeiNode) (*seiconfig.ConfigIntent, error) } // GroupPlanner encapsulates logic for building a group-level task plan. @@ -735,18 +742,44 @@ func buildMarkReadyPlan(node *seiv1alpha1.SeiNode) (*seiv1alpha1.TaskPlan, error } // buildNodeUpdatePlan constructs a plan to roll out an image update on a -// Running node. The plan applies the new StatefulSet spec, waits for the -// rollout to complete, then re-initializes the sidecar. +// Running node. The plan applies the new StatefulSet spec, re-applies +// config.toml (incremental — only the keys the controller owns), then +// cycles the pod and re-initializes the sidecar. // // FailedPhase is deliberately empty: a failure retries on the next reconcile // rather than transitioning the node out of Running. +// +// TaskConfigApply is placed before ReplacePod so the old pod's sidecar +// writes the new config.toml to the PVC; the new pod then mounts the +// PVC and reads the fresh file at seid start. external_address has no +// hot-reload path in seid today (see seictl config_reload.go), so the +// pod restart is the trigger that makes new values take effect. +// +// Incremental=true is mandatory on the day-2 intent. The init path uses +// the non-incremental applyFull, which regenerates config from mode +// defaults + overrides; running that on day 2 would wipe persistent-peers, +// state-sync trust point, and any operator-managed TOML keys that the +// controller doesn't track in overrides. applyIncremental reads on-disk +// config and patches only the overrides we own. func buildNodeUpdatePlan(node *seiv1alpha1.SeiNode) (*seiv1alpha1.TaskPlan, error) { setNodeUpdateCondition(node, metav1.ConditionTrue, "UpdateStarted", fmt.Sprintf("image drift detected: spec=%s current=%s", node.Spec.Image, node.Status.CurrentImage)) + mode, err := plannerForMode(node) + if err != nil { + return nil, fmt.Errorf("resolving mode planner: %w", err) + } + intent, err := mode.BuildConfigIntent(node) + if err != nil { + return nil, fmt.Errorf("building config intent: %w", err) + } + intent.Incremental = true + prog := []string{ task.TaskTypeApplyStatefulSet, task.TaskTypeApplyService, + TaskConfigApply, + TaskConfigValidate, task.TaskTypeReplacePod, task.TaskTypeObserveImage, sidecar.TaskTypeMarkReady, @@ -755,7 +788,7 @@ func buildNodeUpdatePlan(node *seiv1alpha1.SeiNode) (*seiv1alpha1.TaskPlan, erro planID := uuid.New().String() tasks := make([]seiv1alpha1.PlannedTask, len(prog)) for i, taskType := range prog { - t, err := buildPlannedTask(planID, taskType, i, paramsForTaskType(node, taskType, nil, nil)) + t, err := buildPlannedTask(planID, taskType, i, paramsForTaskType(node, taskType, nil, intent)) if err != nil { return nil, err } diff --git a/internal/planner/replay.go b/internal/planner/replay.go index e6ec1dc..8adb5ba 100644 --- a/internal/planner/replay.go +++ b/internal/planner/replay.go @@ -37,9 +37,9 @@ func (p *replayerPlanner) BuildPlan(node *seiv1alpha1.SeiNode) (*seiv1alpha1.Tas if node.Status.Phase == seiv1alpha1.PhaseRunning { return buildRunningPlan(node) } - params := &seiconfig.ConfigIntent{ - Mode: seiconfig.ModeFull, - Overrides: mergeOverrides(mergeOverrides(commonOverrides(node), p.controllerOverrides()), node.Spec.Overrides), + params, err := p.BuildConfigIntent(node) + if err != nil { + return nil, err } if NeedsBootstrap(node) { return buildBootstrapPlan(node, node.Spec.Peers, &node.Spec.Replayer.Snapshot, params) @@ -47,6 +47,16 @@ func (p *replayerPlanner) BuildPlan(node *seiv1alpha1.SeiNode) (*seiv1alpha1.Tas return buildBasePlan(node, node.Spec.Peers, &node.Spec.Replayer.Snapshot, params) } +func (p *replayerPlanner) BuildConfigIntent(node *seiv1alpha1.SeiNode) (*seiconfig.ConfigIntent, error) { + if node.Spec.Replayer == nil { + return nil, fmt.Errorf("replayer sub-spec is nil") + } + return &seiconfig.ConfigIntent{ + Mode: seiconfig.ModeFull, + Overrides: mergeOverrides(mergeOverrides(commonOverrides(node), p.controllerOverrides()), node.Spec.Overrides), + }, nil +} + func (p *replayerPlanner) controllerOverrides() map[string]string { return map[string]string{ keySCAsyncCommitBuffer: "100", diff --git a/internal/planner/validator.go b/internal/planner/validator.go index 88157b5..e5b2cb0 100644 --- a/internal/planner/validator.go +++ b/internal/planner/validator.go @@ -100,12 +100,22 @@ func (p *validatorPlanner) BuildPlan(node *seiv1alpha1.SeiNode) (*seiv1alpha1.Ta return buildGenesisPlan(node) } v := node.Spec.Validator - params := &seiconfig.ConfigIntent{ - Mode: seiconfig.ModeValidator, - Overrides: mergeOverrides(commonOverrides(node), node.Spec.Overrides), + params, err := p.BuildConfigIntent(node) + if err != nil { + return nil, err } if NeedsBootstrap(node) { return buildBootstrapPlan(node, node.Spec.Peers, v.Snapshot, params) } return buildBasePlan(node, node.Spec.Peers, v.Snapshot, params) } + +func (p *validatorPlanner) BuildConfigIntent(node *seiv1alpha1.SeiNode) (*seiconfig.ConfigIntent, error) { + if node.Spec.Validator == nil { + return nil, fmt.Errorf("validator sub-spec is nil") + } + return &seiconfig.ConfigIntent{ + Mode: seiconfig.ModeValidator, + Overrides: mergeOverrides(commonOverrides(node), node.Spec.Overrides), + }, nil +}