Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions api/v1alpha1/seinode_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -332,6 +332,14 @@ type SeiNodeStatus struct {
// +optional
CurrentImage string `json:"currentImage,omitempty"`

// CurrentSidecarImage is the sidecar container image observed running
// on the owned StatefulSet. Stamped jointly with CurrentImage on
// rollout completion. Empty means "not yet observed" and is treated
// as no-drift so a controller upgrade doesn't fleet-roll every node
// on first reconcile.
// +optional
CurrentSidecarImage string `json:"currentSidecarImage,omitempty"`

// +listType=map
// +listMapKey=type
// +optional
Expand Down
5 changes: 4 additions & 1 deletion cmd/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -191,7 +191,10 @@ func main() {
Scheme: mgr.GetScheme(),
Recorder: nodeRecorder,
Platform: platformCfg,
Planner: &planner.NodeResolver{BuildSidecarClient: buildSidecarClient},
Planner: &planner.NodeResolver{
BuildSidecarClient: buildSidecarClient,
Platform: platformCfg,
},
Comment thread
cursor[bot] marked this conversation as resolved.
PlanExecutor: &planner.Executor[*seiv1alpha1.SeiNode]{
ConfigFor: func(_ context.Context, node *seiv1alpha1.SeiNode) task.ExecutionConfig {
return task.ExecutionConfig{
Expand Down
8 changes: 8 additions & 0 deletions config/crd/sei.io_seinodes.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -813,6 +813,14 @@ spec:
Parent controllers compare this against spec.image to determine
whether a spec change has been fully actuated.
type: string
currentSidecarImage:
description: |-
CurrentSidecarImage is the sidecar container image observed running
on the owned StatefulSet. Stamped jointly with CurrentImage on
rollout completion. Empty means "not yet observed" and is treated
as no-drift so a controller upgrade doesn't fleet-roll every node
on first reconcile.
type: string
phase:
description: Phase is the high-level lifecycle state.
enum:
Expand Down
3 changes: 2 additions & 1 deletion internal/controller/node/plan_execution_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,7 @@ func newProgressionReconciler(t *testing.T, mock *mockSidecarClient, objs ...cli
Platform: platformtest.Config(),
Planner: &planner.NodeResolver{
BuildSidecarClient: func(_ *seiv1alpha1.SeiNode) (task.SidecarClient, error) { return mock, nil },
Platform: platformtest.Config(),
},
PlanExecutor: &planner.Executor[*seiv1alpha1.SeiNode]{
ConfigFor: func(_ context.Context, node *seiv1alpha1.SeiNode) task.ExecutionConfig {
Expand Down Expand Up @@ -796,7 +797,7 @@ func TestReconcileInitializing_SidecarClientError_Requeues(t *testing.T) {
Scheme: s,
Recorder: record.NewFakeRecorder(100),
Platform: platformtest.Config(),
Planner: &planner.NodeResolver{},
Planner: &planner.NodeResolver{Platform: platformtest.Config()},
PlanExecutor: &planner.Executor[*seiv1alpha1.SeiNode]{
ConfigFor: func(_ context.Context, n *seiv1alpha1.SeiNode) task.ExecutionConfig {
return task.ExecutionConfig{
Expand Down
1 change: 1 addition & 0 deletions internal/controller/node/reconciler_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ func newNodeReconcilerWithSidecar(t *testing.T, mock *mockSidecarClient, objs ..
Platform: platformtest.Config(),
Planner: &planner.NodeResolver{
BuildSidecarClient: func(_ *seiv1alpha1.SeiNode) (task.SidecarClient, error) { return mock, nil },
Platform: platformtest.Config(),
},
PlanExecutor: &planner.Executor[*seiv1alpha1.SeiNode]{
ConfigFor: func(_ context.Context, node *seiv1alpha1.SeiNode) task.ExecutionConfig {
Expand Down
5 changes: 5 additions & 0 deletions internal/controller/nodedeployment/envtest/suite_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -165,6 +165,11 @@ func run(m *testing.M) (int, error) {
BuildSidecarClient: func(_ *seiv1alpha1.SeiNode) (task.SidecarClient, error) {
return stubSC, nil
},
// Must match ExecutionConfig.Platform below: ObserveImage stamps
// CurrentSidecarImage from ExecutionConfig.Platform, then the
// planner re-resolves the effective image from NodeResolver.Platform.
// Asymmetric values would loop sidecarImageDrifted on every reconcile.
Platform: platformCfg,
Comment thread
cursor[bot] marked this conversation as resolved.
},
PlanExecutor: &planner.Executor[*seiv1alpha1.SeiNode]{
ConfigFor: func(_ context.Context, node *seiv1alpha1.SeiNode) task.ExecutionConfig {
Expand Down
6 changes: 4 additions & 2 deletions internal/planner/archive.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,12 @@ import (
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"

seiv1alpha1 "github.com/sei-protocol/sei-k8s-controller/api/v1alpha1"
"github.com/sei-protocol/sei-k8s-controller/internal/platform"
"github.com/sei-protocol/sei-k8s-controller/internal/task"
)

type archiveNodePlanner struct {
platform platform.Config
}

func (p *archiveNodePlanner) Mode() string { return string(seiconfig.ModeArchive) }
Expand Down Expand Up @@ -40,8 +42,8 @@ func (p *archiveNodePlanner) BuildPlan(node *seiv1alpha1.SeiNode) (*seiv1alpha1.
// buildRunningPlan returns the update plan for a Running archive node.
// Same shape as full nodes (no extra validation gates).
func (p *archiveNodePlanner) buildRunningPlan(node *seiv1alpha1.SeiNode) (*seiv1alpha1.TaskPlan, error) {
if imageDrifted(node) {
setNodeUpdateCondition(node, metav1.ConditionTrue, "UpdateStarted", imageDriftMessage(node))
if imageDrifted(node) || sidecarImageDrifted(node, p.platform) {
setNodeUpdateCondition(node, metav1.ConditionTrue, "UpdateStarted", imageDriftMessage(node, p.platform))
prog := []string{
task.TaskTypeApplyStatefulSet,
task.TaskTypeApplyService,
Expand Down
6 changes: 4 additions & 2 deletions internal/planner/full.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,12 @@ import (
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"

seiv1alpha1 "github.com/sei-protocol/sei-k8s-controller/api/v1alpha1"
"github.com/sei-protocol/sei-k8s-controller/internal/platform"
"github.com/sei-protocol/sei-k8s-controller/internal/task"
)

type fullNodePlanner struct {
platform platform.Config
}

func (p *fullNodePlanner) Mode() string { return string(seiconfig.ModeFull) }
Expand Down Expand Up @@ -49,8 +51,8 @@ func (p *fullNodePlanner) BuildPlan(node *seiv1alpha1.SeiNode) (*seiv1alpha1.Tas
// nil if no drift. pelletier/go-toml/v2 does not preserve comments on
// re-encode — the first config-patch erases operator-added comments.
func (p *fullNodePlanner) buildRunningPlan(node *seiv1alpha1.SeiNode) (*seiv1alpha1.TaskPlan, error) {
if imageDrifted(node) {
setNodeUpdateCondition(node, metav1.ConditionTrue, "UpdateStarted", imageDriftMessage(node))
if imageDrifted(node) || sidecarImageDrifted(node, p.platform) {
setNodeUpdateCondition(node, metav1.ConditionTrue, "UpdateStarted", imageDriftMessage(node, p.platform))
prog := []string{
task.TaskTypeApplyStatefulSet,
task.TaskTypeApplyService,
Expand Down
106 changes: 104 additions & 2 deletions internal/planner/node_update_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,14 +10,22 @@ import (
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"

seiv1alpha1 "github.com/sei-protocol/sei-k8s-controller/api/v1alpha1"
"github.com/sei-protocol/sei-k8s-controller/internal/platform"
"github.com/sei-protocol/sei-k8s-controller/internal/task"
)

const (
testImageV2 = "sei:v2.0.0"
testExternalAddrAtl = "syncer-0-0-p2p.atlantic-2.harbor.platform.sei.io:26656"
testImageV2 = "sei:v2.0.0"
testExternalAddrAtl = "syncer-0-0-p2p.atlantic-2.harbor.platform.sei.io:26656"
testSidecarImageV1 = "ghcr.io/sei-protocol/seictl@sha256:1111"
testSidecarImageV2 = "ghcr.io/sei-protocol/seictl@sha256:2222"
testSidecarOverrideV = "ghcr.io/sei-protocol/seictl@sha256:3333"
)

func platformWithSidecar(image string) platform.Config {
return platform.Config{SidecarImage: image}
}

// runningFullNode returns a SeiNode in the Running phase with currentImage matching spec.image.
func runningFullNode() *seiv1alpha1.SeiNode {
return &seiv1alpha1.SeiNode{
Expand Down Expand Up @@ -165,6 +173,100 @@ func TestArchivePlanner_ImageDrift_UpdateProgression(t *testing.T) {
}))
}

// --- sidecar image drift tests ---

// sidecarDriftedNode primes CurrentSidecarImage so the backfill guard
// doesn't fire; the test then drives drift via Spec.Sidecar or platform.
func sidecarDriftedNode() *seiv1alpha1.SeiNode {
node := runningFullNode()
node.Status.CurrentSidecarImage = testSidecarImageV1
return node
}

// Platform default drift (Spec.Sidecar nil) triggers an update plan.
func TestFullPlanner_SidecarDriftFromPlatformDefault_UpdateProgression(t *testing.T) {
g := NewWithT(t)
node := sidecarDriftedNode()
p := platformWithSidecar(testSidecarImageV2)

plan, err := (&fullNodePlanner{platform: p}).BuildPlan(node)
g.Expect(err).NotTo(HaveOccurred())
g.Expect(plan).NotTo(BeNil(), "sidecar drift should trigger update plan")
g.Expect(planTaskTypes(plan)).To(Equal([]string{
task.TaskTypeApplyStatefulSet,
task.TaskTypeApplyService,
TaskConfigPatch,
TaskConfigValidate,
task.TaskTypeReplacePod,
task.TaskTypeObserveImage,
TaskMarkReady,
}))
}

// Spec.Sidecar.Image override takes precedence over the platform default.
func TestFullPlanner_SidecarDriftFromOverride_UpdateProgression(t *testing.T) {
g := NewWithT(t)
node := sidecarDriftedNode()
node.Spec.Sidecar = &seiv1alpha1.SidecarConfig{Image: testSidecarOverrideV}
p := platformWithSidecar(testSidecarImageV1)

plan, err := (&fullNodePlanner{platform: p}).BuildPlan(node)
g.Expect(err).NotTo(HaveOccurred())
g.Expect(plan).NotTo(BeNil(), "sidecar override drift should trigger update plan")
g.Expect(plan.Tasks).To(HaveLen(7))
}

// Combined drift — single update plan covers both.
func TestFullPlanner_CombinedDrift_SingleUpdatePlan(t *testing.T) {
g := NewWithT(t)
node := sidecarDriftedNode()
node.Spec.Image = testImageV2
p := platformWithSidecar(testSidecarImageV2)

plan, err := (&fullNodePlanner{platform: p}).BuildPlan(node)
g.Expect(err).NotTo(HaveOccurred())
g.Expect(plan).NotTo(BeNil())
g.Expect(plan.Tasks).To(HaveLen(7), "one plan covers both drifts")

cond := meta.FindStatusCondition(node.Status.Conditions, seiv1alpha1.ConditionNodeUpdateInProgress)
g.Expect(cond).NotTo(BeNil())
g.Expect(cond.Message).To(ContainSubstring("seid spec="))
g.Expect(cond.Message).To(ContainSubstring("sidecar spec="))
}

// Backfill guard: empty CurrentSidecarImage must not fire drift, else a
// controller upgrade fleet-rolls every node before ObserveImage backfills.
func TestFullPlanner_NoCurrentSidecarImage_NoDrift(t *testing.T) {
g := NewWithT(t)
node := runningFullNode()
p := platformWithSidecar(testSidecarImageV2)

plan, err := (&fullNodePlanner{platform: p}).BuildPlan(node)
g.Expect(err).NotTo(HaveOccurred())
g.Expect(plan).To(BeNil(), "empty CurrentSidecarImage must NOT trigger drift")
}

// Diagnostic message names which image drifted.
func TestImageDriftMessage_NamesWhichDrifted(t *testing.T) {
g := NewWithT(t)
p := platformWithSidecar(testSidecarImageV2)

seidOnly := sidecarDriftedNode()
seidOnly.Spec.Image = testImageV2
g.Expect(imageDriftMessage(seidOnly, platformWithSidecar(testSidecarImageV1))).To(And(
ContainSubstring("image drift detected"),
Not(ContainSubstring("sidecar"))))

sidecarOnly := sidecarDriftedNode()
g.Expect(imageDriftMessage(sidecarOnly, p)).To(ContainSubstring("sidecar image drift detected"))

both := sidecarDriftedNode()
both.Spec.Image = testImageV2
g.Expect(imageDriftMessage(both, p)).To(And(
ContainSubstring("seid spec="),
ContainSubstring("sidecar spec=")))
}

// Replayer shares the full/archive update shape — symmetry test.
func TestReplayerPlanner_ImageDrift_UpdateProgression(t *testing.T) {
g := NewWithT(t)
Expand Down
52 changes: 41 additions & 11 deletions internal/planner/planner.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ import (
seiv1alpha1 "github.com/sei-protocol/sei-k8s-controller/api/v1alpha1"
"github.com/sei-protocol/sei-k8s-controller/internal/controller/observability"
"github.com/sei-protocol/sei-k8s-controller/internal/noderesource"
"github.com/sei-protocol/sei-k8s-controller/internal/platform"
"github.com/sei-protocol/sei-k8s-controller/internal/task"
)

Expand Down Expand Up @@ -128,6 +129,9 @@ func isConditionTrue(group *seiv1alpha1.SeiNodeDeployment, condType string) bool
type NodeResolver struct {
// Nil factory skips the sidecar probe; used by tests.
BuildSidecarClient func(node *seiv1alpha1.SeiNode) (task.SidecarClient, error)
// Platform supplies the controller-wide sidecar image fallback used
// by sidecarImageDrifted when Spec.Sidecar.Image is unset.
Platform platform.Config
}

func (p *NodeResolver) ResolvePlan(ctx context.Context, node *seiv1alpha1.SeiNode) error {
Expand All @@ -145,7 +149,7 @@ func (p *NodeResolver) ResolvePlan(ctx context.Context, node *seiv1alpha1.SeiNod

handleTerminalPlan(ctx, node)

mode, err := plannerForMode(node)
mode, err := p.plannerForMode(node)
if err != nil {
return err
}
Expand Down Expand Up @@ -261,18 +265,19 @@ func planFailureMessage(plan *seiv1alpha1.TaskPlan) string {
return unknownValue
}

// plannerForMode returns the appropriate NodePlanner based on which mode
// sub-spec is populated on the SeiNode.
func plannerForMode(node *seiv1alpha1.SeiNode) (NodePlanner, error) {
// plannerForMode returns the appropriate NodePlanner for the SeiNode's
// mode sub-spec, threaded with the resolver's Platform config so each
// planner can resolve the effective sidecar image for drift detection.
func (r *NodeResolver) plannerForMode(node *seiv1alpha1.SeiNode) (NodePlanner, error) {
switch {
case node.Spec.FullNode != nil:
return &fullNodePlanner{}, nil
return &fullNodePlanner{platform: r.Platform}, nil
case node.Spec.Archive != nil:
return &archiveNodePlanner{}, nil
return &archiveNodePlanner{platform: r.Platform}, nil
case node.Spec.Replayer != nil:
return &replayerPlanner{}, nil
return &replayerPlanner{platform: r.Platform}, nil
case node.Spec.Validator != nil:
return &validatorPlanner{}, nil
return &validatorPlanner{platform: r.Platform}, nil
default:
return nil, fmt.Errorf("no mode sub-spec set on SeiNode %s/%s", node.Namespace, node.Name)
}
Expand Down Expand Up @@ -733,10 +738,35 @@ func imageDrifted(node *seiv1alpha1.SeiNode) bool {
return node.Spec.Image != node.Status.CurrentImage
}

// sidecarImageDrifted reports whether the effective sidecar image diverges
// from what was last observed. Empty Status.CurrentSidecarImage means
// "not yet observed" and is treated as no-drift so a controller upgrade
// doesn't fleet-roll every node before ObserveImage backfills the field.
func sidecarImageDrifted(node *seiv1alpha1.SeiNode, p platform.Config) bool {
if node.Status.CurrentSidecarImage == "" {
return false
}
return task.EffectiveSidecarImage(node, p) != node.Status.CurrentSidecarImage
}

// imageDriftMessage formats the NodeUpdateInProgress message every mode
// planner stamps before an image-drift-triggered assembleUpdatePlan call.
func imageDriftMessage(node *seiv1alpha1.SeiNode) string {
return fmt.Sprintf("image drift detected: spec=%s current=%s", node.Spec.Image, node.Status.CurrentImage)
// planner stamps before an update plan. Names which image(s) drifted so an
// operator reading the condition can tell seid bumps from sidecar bumps.
func imageDriftMessage(node *seiv1alpha1.SeiNode, p platform.Config) string {
seid := imageDrifted(node)
sc := sidecarImageDrifted(node, p)
switch {
case seid && sc:
return fmt.Sprintf("image drift detected: seid spec=%s current=%s; sidecar spec=%s current=%s",
node.Spec.Image, node.Status.CurrentImage,
task.EffectiveSidecarImage(node, p), node.Status.CurrentSidecarImage)
case sc:
return fmt.Sprintf("sidecar image drift detected: spec=%s current=%s",
task.EffectiveSidecarImage(node, p), node.Status.CurrentSidecarImage)
default:
return fmt.Sprintf("image drift detected: spec=%s current=%s",
node.Spec.Image, node.Status.CurrentImage)
}
}

// externalAddressPatch is the config.toml patch that stamps the
Expand Down
6 changes: 4 additions & 2 deletions internal/planner/replay.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,12 @@ import (
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"

seiv1alpha1 "github.com/sei-protocol/sei-k8s-controller/api/v1alpha1"
"github.com/sei-protocol/sei-k8s-controller/internal/platform"
"github.com/sei-protocol/sei-k8s-controller/internal/task"
)

type replayerPlanner struct {
platform platform.Config
}

func (p *replayerPlanner) Mode() string { return string(seiconfig.ModeFull) }
Expand Down Expand Up @@ -52,8 +54,8 @@ func (p *replayerPlanner) BuildPlan(node *seiv1alpha1.SeiNode) (*seiv1alpha1.Tas
// buildRunningPlan returns the update plan for a Running replayer node.
// Same shape as full and archive.
func (p *replayerPlanner) buildRunningPlan(node *seiv1alpha1.SeiNode) (*seiv1alpha1.TaskPlan, error) {
if imageDrifted(node) {
setNodeUpdateCondition(node, metav1.ConditionTrue, "UpdateStarted", imageDriftMessage(node))
if imageDrifted(node) || sidecarImageDrifted(node, p.platform) {
setNodeUpdateCondition(node, metav1.ConditionTrue, "UpdateStarted", imageDriftMessage(node, p.platform))
prog := []string{
task.TaskTypeApplyStatefulSet,
task.TaskTypeApplyService,
Expand Down
6 changes: 4 additions & 2 deletions internal/planner/validator.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,12 @@ import (
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"

seiv1alpha1 "github.com/sei-protocol/sei-k8s-controller/api/v1alpha1"
"github.com/sei-protocol/sei-k8s-controller/internal/platform"
"github.com/sei-protocol/sei-k8s-controller/internal/task"
)

type validatorPlanner struct {
platform platform.Config
}

func (p *validatorPlanner) Mode() string { return string(seiconfig.ModeValidator) }
Expand Down Expand Up @@ -117,8 +119,8 @@ func (p *validatorPlanner) BuildPlan(node *seiv1alpha1.SeiNode) (*seiv1alpha1.Ta
// malformed secret aborts with a clear controller-side error rather than
// a kubelet volume-mount failure on the recreated pod.
func (p *validatorPlanner) buildRunningPlan(node *seiv1alpha1.SeiNode) (*seiv1alpha1.TaskPlan, error) {
if imageDrifted(node) {
setNodeUpdateCondition(node, metav1.ConditionTrue, "UpdateStarted", imageDriftMessage(node))
if imageDrifted(node) || sidecarImageDrifted(node, p.platform) {
setNodeUpdateCondition(node, metav1.ConditionTrue, "UpdateStarted", imageDriftMessage(node, p.platform))
prog := []string{
task.TaskTypeValidateSigningKey,
task.TaskTypeValidateNodeKey,
Expand Down
Loading
Loading