diff --git a/config/rbac/role.yaml b/config/rbac/role.yaml index c898cca..3222678 100644 --- a/config/rbac/role.yaml +++ b/config/rbac/role.yaml @@ -55,6 +55,14 @@ rules: - patch - update - watch +- apiGroups: + - apps + resources: + - statefulsets/status + verbs: + - get + - patch + - update - apiGroups: - batch resources: diff --git a/internal/controller/node/controller.go b/internal/controller/node/controller.go index 3d98b57..9aed533 100644 --- a/internal/controller/node/controller.go +++ b/internal/controller/node/controller.go @@ -54,6 +54,7 @@ type SeiNodeReconciler struct { // +kubebuilder:rbac:groups=sei.io,resources=seinodes/status,verbs=get;update;patch // +kubebuilder:rbac:groups=sei.io,resources=seinodes/finalizers,verbs=update // +kubebuilder:rbac:groups=apps,resources=statefulsets,verbs=get;list;watch;create;update;patch;delete +// +kubebuilder:rbac:groups=apps,resources=statefulsets/status,verbs=get;patch;update // +kubebuilder:rbac:groups=batch,resources=jobs,verbs=get;list;watch;create;update;patch;delete // +kubebuilder:rbac:groups="",resources=services,verbs=get;list;watch;create;update;patch;delete // +kubebuilder:rbac:groups="",resources=persistentvolumeclaims,verbs=get;list;watch;create;update;patch;delete diff --git a/internal/task/observe_image.go b/internal/task/observe_image.go index ca6b510..4c7e288 100644 --- a/internal/task/observe_image.go +++ b/internal/task/observe_image.go @@ -8,6 +8,7 @@ import ( appsv1 "k8s.io/api/apps/v1" apierrors "k8s.io/apimachinery/pkg/api/errors" "k8s.io/apimachinery/pkg/types" + "sigs.k8s.io/controller-runtime/pkg/client" seiv1alpha1 "github.com/sei-protocol/sei-k8s-controller/api/v1alpha1" "github.com/sei-protocol/sei-k8s-controller/internal/platform" @@ -76,10 +77,44 @@ func (e *observeImageExecution) Execute(ctx context.Context) error { // whole pod spec), so seid and sidecar containers roll together. node.Status.CurrentImage = node.Spec.Image node.Status.CurrentSidecarImage = EffectiveSidecarImage(node, e.cfg.Platform) + + // Advance the StatefulSet's status.currentRevision to match + // updateRevision. Our StatefulSets use updateStrategy: OnDelete (see + // noderesource.GenerateNodeStatefulSet) because pod replacement is + // driven by the replace-pod task, not the StatefulSet controller's + // rolling-update loop. As a side effect, the StatefulSet controller + // never advances currentRevision after a successful rollout, which + // causes the kube-prometheus-mixin alert KubeStatefulSetUpdateNotRolledOut + // to latch indefinitely. We close that gap here, after rollout is + // confirmed complete by UpdatedReplicas. + if err := advanceStatefulSetCurrentRevision(ctx, e.cfg.KubeClient, sts); err != nil { + return fmt.Errorf("advancing statefulset currentRevision: %w", err) + } + e.complete() return nil } +// advanceStatefulSetCurrentRevision patches sts.Status.CurrentRevision to +// match UpdateRevision when the rollout is observed complete. No-op when +// already equal or when UpdateRevision is empty (controller hasn't +// observed the spec yet). +func advanceStatefulSetCurrentRevision(ctx context.Context, kc client.Client, sts *appsv1.StatefulSet) error { + if sts.Status.UpdateRevision == "" { + return nil + } + if sts.Status.CurrentRevision == sts.Status.UpdateRevision { + return nil + } + patch := client.MergeFrom(sts.DeepCopy()) + sts.Status.CurrentRevision = sts.Status.UpdateRevision + sts.Status.CurrentReplicas = sts.Status.UpdatedReplicas + if err := kc.Status().Patch(ctx, sts, patch); err != nil { + return err + } + return nil +} + // EffectiveSidecarImage returns the sidecar container image actually // rendered onto the StatefulSet: the per-SeiNode override if set, else // the controller-wide default from SEI_SIDECAR_IMAGE. Mirrors the diff --git a/internal/task/observe_image_test.go b/internal/task/observe_image_test.go index e941f4c..38af348 100644 --- a/internal/task/observe_image_test.go +++ b/internal/task/observe_image_test.go @@ -9,6 +9,7 @@ import ( appsv1 "k8s.io/api/apps/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/types" clientgoscheme "k8s.io/client-go/kubernetes/scheme" "sigs.k8s.io/controller-runtime/pkg/client/fake" @@ -40,7 +41,8 @@ func observeImageCfg(t *testing.T, node *seiv1alpha1.SeiNode, sts *appsv1.Statef if err := seiv1alpha1.AddToScheme(s); err != nil { t.Fatal(err) } - builder := fake.NewClientBuilder().WithScheme(s) + builder := fake.NewClientBuilder().WithScheme(s). + WithStatusSubresource(&appsv1.StatefulSet{}) if sts != nil { builder = builder.WithObjects(sts) } @@ -187,3 +189,99 @@ func TestObserveImage_DeserializeEmptyParams(t *testing.T) { g.Expect(err).NotTo(HaveOccurred()) g.Expect(exec).NotTo(BeNil()) } + +// On rollout completion under OnDelete strategy, the StatefulSet controller +// does not advance status.currentRevision. ObserveImage patches it to +// match updateRevision so the kube-prometheus-mixin alert +// KubeStatefulSetUpdateNotRolledOut clears. +func TestObserveImage_RolloutComplete_AdvancesStatefulSetCurrentRevision(t *testing.T) { + g := NewWithT(t) + node := observeImageNode() + sts := &appsv1.StatefulSet{ + ObjectMeta: metav1.ObjectMeta{Name: node.Name, Namespace: node.Namespace, Generation: 2}, + Spec: appsv1.StatefulSetSpec{Replicas: int32Ptr(1)}, + Status: appsv1.StatefulSetStatus{ + ObservedGeneration: 2, + UpdatedReplicas: 1, + Replicas: 1, + CurrentRevision: "node-1-abc123", + UpdateRevision: "node-1-def456", + CurrentReplicas: 0, + }, + } + + cfg := observeImageCfg(t, node, sts) + exec := newObserveImageExec(t, cfg) + + g.Expect(exec.Execute(context.Background())).To(Succeed()) + g.Expect(exec.Status(context.Background())).To(Equal(ExecutionComplete)) + + // Verify the on-cluster STS was patched. The fake client returns the + // persisted object. + got := &appsv1.StatefulSet{} + g.Expect(cfg.KubeClient.Get(context.Background(), + types.NamespacedName{Name: node.Name, Namespace: node.Namespace}, got)).To(Succeed()) + g.Expect(got.Status.CurrentRevision).To(Equal("node-1-def456")) + g.Expect(got.Status.CurrentReplicas).To(Equal(int32(1))) +} + +// When CurrentRevision already matches UpdateRevision, ObserveImage skips +// the patch (no-op). Verified by leaving the revision strings equal and +// confirming the task still completes without error. +func TestObserveImage_RolloutComplete_RevisionsAlreadyMatch_NoOp(t *testing.T) { + g := NewWithT(t) + node := observeImageNode() + sts := &appsv1.StatefulSet{ + ObjectMeta: metav1.ObjectMeta{Name: node.Name, Namespace: node.Namespace, Generation: 2}, + Spec: appsv1.StatefulSetSpec{Replicas: int32Ptr(1)}, + Status: appsv1.StatefulSetStatus{ + ObservedGeneration: 2, + UpdatedReplicas: 1, + Replicas: 1, + CurrentRevision: "node-1-same", + UpdateRevision: "node-1-same", + CurrentReplicas: 1, + }, + } + + cfg := observeImageCfg(t, node, sts) + exec := newObserveImageExec(t, cfg) + + g.Expect(exec.Execute(context.Background())).To(Succeed()) + g.Expect(exec.Status(context.Background())).To(Equal(ExecutionComplete)) + + got := &appsv1.StatefulSet{} + g.Expect(cfg.KubeClient.Get(context.Background(), + types.NamespacedName{Name: node.Name, Namespace: node.Namespace}, got)).To(Succeed()) + g.Expect(got.Status.CurrentRevision).To(Equal("node-1-same")) +} + +// UpdateRevision can be empty briefly when the StatefulSet controller has +// not yet observed the new spec (despite ObservedGeneration catching up). +// In that race window we must not blank CurrentRevision. +func TestObserveImage_RolloutComplete_EmptyUpdateRevision_NoOp(t *testing.T) { + g := NewWithT(t) + node := observeImageNode() + sts := &appsv1.StatefulSet{ + ObjectMeta: metav1.ObjectMeta{Name: node.Name, Namespace: node.Namespace, Generation: 2}, + Spec: appsv1.StatefulSetSpec{Replicas: int32Ptr(1)}, + Status: appsv1.StatefulSetStatus{ + ObservedGeneration: 2, + UpdatedReplicas: 1, + Replicas: 1, + CurrentRevision: "node-1-prior", + UpdateRevision: "", + }, + } + + cfg := observeImageCfg(t, node, sts) + exec := newObserveImageExec(t, cfg) + + g.Expect(exec.Execute(context.Background())).To(Succeed()) + g.Expect(exec.Status(context.Background())).To(Equal(ExecutionComplete)) + + got := &appsv1.StatefulSet{} + g.Expect(cfg.KubeClient.Get(context.Background(), + types.NamespacedName{Name: node.Name, Namespace: node.Namespace}, got)).To(Succeed()) + g.Expect(got.Status.CurrentRevision).To(Equal("node-1-prior")) +} diff --git a/manifests/role.yaml b/manifests/role.yaml index c898cca..3222678 100644 --- a/manifests/role.yaml +++ b/manifests/role.yaml @@ -55,6 +55,14 @@ rules: - patch - update - watch +- apiGroups: + - apps + resources: + - statefulsets/status + verbs: + - get + - patch + - update - apiGroups: - batch resources: