Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 13 additions & 2 deletions internal/planner/archive.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,10 +28,21 @@ func (p *archiveNodePlanner) BuildPlan(node *seiv1alpha1.SeiNode) (*seiv1alpha1.
if node.Status.Phase == seiv1alpha1.PhaseRunning {
return buildRunningPlan(node)
}
return buildBasePlan(node, node.Spec.Peers, nil, &seiconfig.ConfigIntent{
params, err := p.BuildConfigIntent(node)
if err != nil {
return nil, err
}
return buildBasePlan(node, node.Spec.Peers, nil, params)
}

func (p *archiveNodePlanner) BuildConfigIntent(node *seiv1alpha1.SeiNode) (*seiconfig.ConfigIntent, error) {
if node.Spec.Archive == nil {
return nil, fmt.Errorf("archive sub-spec is nil")
}
return &seiconfig.ConfigIntent{
Mode: seiconfig.ModeArchive,
Overrides: mergeOverrides(mergeOverrides(commonOverrides(node), p.controllerOverrides(node)), node.Spec.Overrides),
})
}, nil
}

func (p *archiveNodePlanner) controllerOverrides(node *seiv1alpha1.SeiNode) map[string]string {
Expand Down
16 changes: 13 additions & 3 deletions internal/planner/full.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,16 +33,26 @@ func (p *fullNodePlanner) BuildPlan(node *seiv1alpha1.SeiNode) (*seiv1alpha1.Tas
return buildRunningPlan(node)
}
fn := node.Spec.FullNode
params := &seiconfig.ConfigIntent{
Mode: seiconfig.ModeFull,
Overrides: mergeOverrides(mergeOverrides(commonOverrides(node), p.controllerOverrides(node)), node.Spec.Overrides),
params, err := p.BuildConfigIntent(node)
if err != nil {
return nil, err
}
if NeedsBootstrap(node) {
return buildBootstrapPlan(node, node.Spec.Peers, fn.Snapshot, params)
}
return buildBasePlan(node, node.Spec.Peers, fn.Snapshot, params)
}

func (p *fullNodePlanner) BuildConfigIntent(node *seiv1alpha1.SeiNode) (*seiconfig.ConfigIntent, error) {
if node.Spec.FullNode == nil {
return nil, fmt.Errorf("fullNode sub-spec is nil")
}
return &seiconfig.ConfigIntent{
Mode: seiconfig.ModeFull,
Overrides: mergeOverrides(mergeOverrides(commonOverrides(node), p.controllerOverrides(node)), node.Spec.Overrides),
}, nil
}

func (p *fullNodePlanner) controllerOverrides(node *seiv1alpha1.SeiNode) map[string]string {
sg := node.Spec.FullNode.SnapshotGeneration
if sg == nil || sg.Tendermint == nil {
Expand Down
140 changes: 139 additions & 1 deletion internal/planner/node_update_test.go
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
package planner

import (
"encoding/json"
"fmt"
"testing"

. "github.com/onsi/gomega"
seiconfig "github.com/sei-protocol/sei-config"
"k8s.io/apimachinery/pkg/api/meta"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"

Expand Down Expand Up @@ -68,6 +70,8 @@ func TestBuildRunningPlan_ImageDrift_ReturnsNodeUpdatePlan(t *testing.T) {
want := []string{
task.TaskTypeApplyStatefulSet,
task.TaskTypeApplyService,
TaskConfigApply,
TaskConfigValidate,
task.TaskTypeReplacePod,
task.TaskTypeObserveImage,
TaskMarkReady,
Expand All @@ -82,6 +86,138 @@ func TestBuildRunningPlan_ImageDrift_ReturnsNodeUpdatePlan(t *testing.T) {
}
}

// configIntentFromPlan finds the config-apply task and unmarshals its
// params back into a ConfigIntent for inspection.
func configIntentFromPlan(t *testing.T, plan *seiv1alpha1.TaskPlan) *seiconfig.ConfigIntent {
t.Helper()
g := NewWithT(t)
for _, pt := range plan.Tasks {
if pt.Type != TaskConfigApply {
continue
}
g.Expect(pt.Params).NotTo(BeNil())
var intent seiconfig.ConfigIntent
g.Expect(json.Unmarshal(pt.Params.Raw, &intent)).To(Succeed())
return &intent
}
t.Fatal("plan has no config-apply task")
return nil
}

// The day-2 config-apply must carry the mode planner's full ConfigIntent —
// non-empty Mode, Overrides containing the controller-managed keys —
// otherwise sei-config's ResolveIntent rejects the empty-Mode and
// TaskConfigApply fails at runtime.
func TestBuildNodeUpdatePlan_ConfigApplyHasIntentWithMode(t *testing.T) {
g := NewWithT(t)
node := runningFullNode()
node.Spec.ExternalAddress = "syncer-0-0-p2p.atlantic-2.harbor.platform.sei.io:26656"
node.Spec.Image = testImageV2

plan, err := buildRunningPlan(node)
g.Expect(err).NotTo(HaveOccurred())
g.Expect(plan).NotTo(BeNil())

intent := configIntentFromPlan(t, plan)
g.Expect(intent.Mode).To(Equal(seiconfig.ModeFull), "Mode must be set so ResolveIntent accepts the intent")
g.Expect(intent.Overrides).To(HaveKeyWithValue(
seiconfig.KeyP2PExternalAddress,
"syncer-0-0-p2p.atlantic-2.harbor.platform.sei.io:26656",
), "commonOverrides should propagate Spec.ExternalAddress")
}

// Day-2 must set Incremental=true. The init path uses applyFull which
// regenerates config from mode defaults — running that on a running node
// would wipe persistent-peers, state-sync trust point, and operator-managed
// TOML keys outside the controller's overrides set. Incremental reads
// on-disk config and patches only the keys we own.
func TestBuildNodeUpdatePlan_ConfigApplyIsIncremental(t *testing.T) {
g := NewWithT(t)
node := runningFullNode()
node.Spec.Image = testImageV2

plan, err := buildRunningPlan(node)
g.Expect(err).NotTo(HaveOccurred())
g.Expect(plan).NotTo(BeNil())

intent := configIntentFromPlan(t, plan)
g.Expect(intent.Incremental).To(BeTrue(),
"day-2 ConfigIntent must be incremental to avoid clobbering on-disk state")
}

// Init plans must NOT set Incremental — the init path uses applyFull
// (regenerate config from mode defaults + overrides). Pinning this here
// makes accidental flips during future refactors a loud failure.
func TestBuildPlan_InitPlanConfigIntentNotIncremental(t *testing.T) {
g := NewWithT(t)
node := &seiv1alpha1.SeiNode{
ObjectMeta: metav1.ObjectMeta{Name: "full-init", Namespace: "default", Generation: 1},
Spec: seiv1alpha1.SeiNodeSpec{
ChainID: "atlantic-2",
Image: "sei:v1.0.0",
FullNode: &seiv1alpha1.FullNodeSpec{},
},
// No phase — defaults to "" (init path).
}

plan, err := (&fullNodePlanner{}).BuildPlan(node)
g.Expect(err).NotTo(HaveOccurred())
g.Expect(plan).NotTo(BeNil())

intent := configIntentFromPlan(t, plan)
g.Expect(intent.Incremental).To(BeFalse(),
"init ConfigIntent must NOT be incremental — applyFull regenerates from mode defaults")
}

// Per-mode coverage: the day-2 intent should match the mode planner's
// Mode value. Catches future drift if a new mode lands and isn't wired
// into BuildConfigIntent.
func TestBuildNodeUpdatePlan_ConfigApplyIntentMatchesModePlanner(t *testing.T) {
cases := []struct {
name string
mutate func(*seiv1alpha1.SeiNode)
wantMode seiconfig.NodeMode
}{
{
name: "full",
mutate: func(_ *seiv1alpha1.SeiNode) {},
wantMode: seiconfig.ModeFull,
},
{
name: "archive",
mutate: func(n *seiv1alpha1.SeiNode) {
n.Spec.FullNode = nil
n.Spec.Archive = &seiv1alpha1.ArchiveSpec{}
},
wantMode: seiconfig.ModeArchive,
},
{
name: "validator",
mutate: func(n *seiv1alpha1.SeiNode) {
n.Spec.FullNode = nil
n.Spec.Validator = &seiv1alpha1.ValidatorSpec{}
},
wantMode: seiconfig.ModeValidator,
},
}
for _, tc := range cases {
t.Run(tc.name, func(t *testing.T) {
g := NewWithT(t)
node := runningFullNode()
tc.mutate(node)
node.Spec.Image = testImageV2

plan, err := buildRunningPlan(node)
g.Expect(err).NotTo(HaveOccurred())
g.Expect(plan).NotTo(BeNil())

intent := configIntentFromPlan(t, plan)
g.Expect(intent.Mode).To(Equal(tc.wantMode))
g.Expect(intent.Incremental).To(BeTrue())
})
}
}

// --- ResolvePlan condition tests ---

func TestResolvePlan_NodeUpdate_SetsCondition(t *testing.T) {
Expand Down Expand Up @@ -381,10 +517,12 @@ func TestBuildRunningPlan_ImageDriftWinsOverSidecar(t *testing.T) {
g.Expect(err).NotTo(HaveOccurred())
g.Expect(plan).NotTo(BeNil())
// Image update plan ends with MarkReady, which also resolves the sidecar.
g.Expect(plan.Tasks).To(HaveLen(5), "should be full node-update plan, not one-task mark-ready")
g.Expect(plan.Tasks).To(HaveLen(7), "should be full node-update plan, not one-task mark-ready")
g.Expect(planTaskTypes(plan)).To(Equal([]string{
task.TaskTypeApplyStatefulSet,
task.TaskTypeApplyService,
TaskConfigApply,
TaskConfigValidate,
task.TaskTypeReplacePod,
task.TaskTypeObserveImage,
TaskMarkReady,
Expand Down
39 changes: 36 additions & 3 deletions internal/planner/planner.go
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,13 @@ type NodePlanner interface {
Validate(node *seiv1alpha1.SeiNode) error
BuildPlan(node *seiv1alpha1.SeiNode) (*seiv1alpha1.TaskPlan, error)
Mode() string

// BuildConfigIntent returns the ConfigIntent the mode's plan would feed
// to TaskConfigApply (mode + merged controller / spec overrides). Shared
// by BuildPlan (init) and buildNodeUpdatePlan (day-2) so they emit
// identical config payloads for the same spec. Day-2 callers set
// Incremental on the returned value before passing it through.
BuildConfigIntent(node *seiv1alpha1.SeiNode) (*seiconfig.ConfigIntent, error)
}

// GroupPlanner encapsulates logic for building a group-level task plan.
Expand Down Expand Up @@ -735,18 +742,44 @@ func buildMarkReadyPlan(node *seiv1alpha1.SeiNode) (*seiv1alpha1.TaskPlan, error
}

// buildNodeUpdatePlan constructs a plan to roll out an image update on a
// Running node. The plan applies the new StatefulSet spec, waits for the
// rollout to complete, then re-initializes the sidecar.
// Running node. The plan applies the new StatefulSet spec, re-applies
// config.toml (incremental — only the keys the controller owns), then
// cycles the pod and re-initializes the sidecar.
//
// FailedPhase is deliberately empty: a failure retries on the next reconcile
// rather than transitioning the node out of Running.
//
// TaskConfigApply is placed before ReplacePod so the old pod's sidecar
// writes the new config.toml to the PVC; the new pod then mounts the
// PVC and reads the fresh file at seid start. external_address has no
// hot-reload path in seid today (see seictl config_reload.go), so the
// pod restart is the trigger that makes new values take effect.
//
// Incremental=true is mandatory on the day-2 intent. The init path uses
// the non-incremental applyFull, which regenerates config from mode
// defaults + overrides; running that on day 2 would wipe persistent-peers,
// state-sync trust point, and any operator-managed TOML keys that the
// controller doesn't track in overrides. applyIncremental reads on-disk
// config and patches only the overrides we own.
func buildNodeUpdatePlan(node *seiv1alpha1.SeiNode) (*seiv1alpha1.TaskPlan, error) {
setNodeUpdateCondition(node, metav1.ConditionTrue, "UpdateStarted",
fmt.Sprintf("image drift detected: spec=%s current=%s", node.Spec.Image, node.Status.CurrentImage))

mode, err := plannerForMode(node)
if err != nil {
return nil, fmt.Errorf("resolving mode planner: %w", err)
}
intent, err := mode.BuildConfigIntent(node)
if err != nil {
return nil, fmt.Errorf("building config intent: %w", err)
}
intent.Incremental = true

prog := []string{
task.TaskTypeApplyStatefulSet,
task.TaskTypeApplyService,
TaskConfigApply,
TaskConfigValidate,
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Node update plan missing key validation readiness gates

High Severity

buildNodeUpdatePlan includes TaskTypeReplacePod (cycles the pod) but omits the TaskTypeValidateSigningKey, TaskTypeValidateNodeKey, and TaskTypeValidateOperatorKeyring readiness gates that both buildBasePlan and buildBootstrapPlan conditionally include before pod creation. For validator nodes with key secrets mounted as volumes, an image-drift rollout would recreate the pod without first verifying those secrets exist, risking a kubelet volume-mount failure instead of a clear controller-side validation error.

Additional Locations (1)
Fix in Cursor Fix in Web

Triggered by learned rule: Planner readiness gates must apply to all pod-cycling plans

Reviewed by Cursor Bugbot for commit 1d4dc7d. Configure here.

task.TaskTypeReplacePod,
task.TaskTypeObserveImage,
sidecar.TaskTypeMarkReady,
Expand All @@ -755,7 +788,7 @@ func buildNodeUpdatePlan(node *seiv1alpha1.SeiNode) (*seiv1alpha1.TaskPlan, erro
planID := uuid.New().String()
tasks := make([]seiv1alpha1.PlannedTask, len(prog))
for i, taskType := range prog {
t, err := buildPlannedTask(planID, taskType, i, paramsForTaskType(node, taskType, nil, nil))
t, err := buildPlannedTask(planID, taskType, i, paramsForTaskType(node, taskType, nil, intent))
if err != nil {
return nil, err
}
Expand Down
16 changes: 13 additions & 3 deletions internal/planner/replay.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,16 +37,26 @@ func (p *replayerPlanner) BuildPlan(node *seiv1alpha1.SeiNode) (*seiv1alpha1.Tas
if node.Status.Phase == seiv1alpha1.PhaseRunning {
return buildRunningPlan(node)
}
params := &seiconfig.ConfigIntent{
Mode: seiconfig.ModeFull,
Overrides: mergeOverrides(mergeOverrides(commonOverrides(node), p.controllerOverrides()), node.Spec.Overrides),
params, err := p.BuildConfigIntent(node)
if err != nil {
return nil, err
}
if NeedsBootstrap(node) {
return buildBootstrapPlan(node, node.Spec.Peers, &node.Spec.Replayer.Snapshot, params)
}
return buildBasePlan(node, node.Spec.Peers, &node.Spec.Replayer.Snapshot, params)
}

func (p *replayerPlanner) BuildConfigIntent(node *seiv1alpha1.SeiNode) (*seiconfig.ConfigIntent, error) {
if node.Spec.Replayer == nil {
return nil, fmt.Errorf("replayer sub-spec is nil")
}
return &seiconfig.ConfigIntent{
Mode: seiconfig.ModeFull,
Overrides: mergeOverrides(mergeOverrides(commonOverrides(node), p.controllerOverrides()), node.Spec.Overrides),
}, nil
}

func (p *replayerPlanner) controllerOverrides() map[string]string {
return map[string]string{
keySCAsyncCommitBuffer: "100",
Expand Down
16 changes: 13 additions & 3 deletions internal/planner/validator.go
Original file line number Diff line number Diff line change
Expand Up @@ -100,12 +100,22 @@ func (p *validatorPlanner) BuildPlan(node *seiv1alpha1.SeiNode) (*seiv1alpha1.Ta
return buildGenesisPlan(node)
}
v := node.Spec.Validator
params := &seiconfig.ConfigIntent{
Mode: seiconfig.ModeValidator,
Overrides: mergeOverrides(commonOverrides(node), node.Spec.Overrides),
params, err := p.BuildConfigIntent(node)
if err != nil {
return nil, err
}
if NeedsBootstrap(node) {
return buildBootstrapPlan(node, node.Spec.Peers, v.Snapshot, params)
}
return buildBasePlan(node, node.Spec.Peers, v.Snapshot, params)
}

func (p *validatorPlanner) BuildConfigIntent(node *seiv1alpha1.SeiNode) (*seiconfig.ConfigIntent, error) {
if node.Spec.Validator == nil {
return nil, fmt.Errorf("validator sub-spec is nil")
}
return &seiconfig.ConfigIntent{
Mode: seiconfig.ModeValidator,
Overrides: mergeOverrides(commonOverrides(node), node.Spec.Overrides),
}, nil
}
Loading