diff --git a/cmd/main.go b/cmd/main.go index 01d3eddd..60e8a564 100644 --- a/cmd/main.go +++ b/cmd/main.go @@ -489,6 +489,17 @@ func setupManagementControllers(mgr mcmanager.Manager, federationClient client.C return nil, fmt.Errorf("InstanceProjector: %w", err) } + // WorkloadDeploymentStatusSyncer watches WDs on the Karmada hub and + // reactively syncs their aggregated status back to the project cluster. + // This complements WorkloadDeploymentFederator.syncStatusFromDownstream, + // which only fires when the project-side WD changes (spec updates/restarts). + if err = (&controller.WorkloadDeploymentStatusSyncer{ + FederationClient: federationClient, + MCManager: mgr, + }).SetupWithManager(federationMgr); err != nil { + return nil, fmt.Errorf("WorkloadDeploymentStatusSyncer: %w", err) + } + return []manager.Runnable{federationMgr}, nil } diff --git a/internal/controller/instance_projector.go b/internal/controller/instance_projector.go index fa0b69b6..c1f5fa1d 100644 --- a/internal/controller/instance_projector.go +++ b/internal/controller/instance_projector.go @@ -5,6 +5,7 @@ package controller import ( "context" "fmt" + "maps" "strings" "time" @@ -152,9 +153,13 @@ func (r *InstanceProjector) Reconcile(ctx context.Context, req ctrl.Request) (ct if projection.Labels == nil { projection.Labels = make(map[string]string) } - for k, v := range downstreamInstance.Labels { - projection.Labels[k] = v - } + maps.Copy(projection.Labels, downstreamInstance.Labels) + // Overwrite the WD UID label with the project-cluster WD UID. The + // downstream Instance carries the cell-plane WD UID (assigned by Karmada + // when it propagated the WD), which never matches the project WD UID. + // Consumers doing label-selector lookups by WorkloadDeploymentUIDLabel + // (e.g. CLI CITY column) must see the project-side UID. + projection.Labels[computev1alpha.WorkloadDeploymentUIDLabel] = string(ownerWD.UID) projection.Spec = downstreamInstance.Spec diff --git a/internal/controller/instance_projector_test.go b/internal/controller/instance_projector_test.go index 7dcc8168..e77ed5f4 100644 --- a/internal/controller/instance_projector_test.go +++ b/internal/controller/instance_projector_test.go @@ -368,9 +368,17 @@ func TestInstanceProjector_Reconcile(t *testing.T) { require.NoError(t, err, "expected projection to exist in project namespace") - // Labels should be copied from the Karmada instance. + // Labels should be copied from the Karmada instance, with one + // deliberate exception: WorkloadDeploymentUIDLabel is overwritten with + // the project-cluster WD UID (not the edge/Karmada UID) so that + // label-selector lookups in the project cluster return correct results. if tt.karmadaInstance != nil { for k, v := range tt.karmadaInstance.Labels { + if k == computev1alpha.WorkloadDeploymentUIDLabel { + // Checked separately below — the projection must carry + // the project-cluster UID, not the cell/Karmada UID. + continue + } assert.Equal(t, v, projection.Labels[k], "projection label %q should match Karmada instance label", k) } @@ -383,10 +391,18 @@ func TestInstanceProjector_Reconcile(t *testing.T) { tt.karmadaInstance.Labels[computev1alpha.WorkloadUIDLabel], projection.Labels[computev1alpha.WorkloadUIDLabel], "WorkloadUIDLabel must be propagated to the projection") + // WorkloadDeploymentUIDLabel must carry the PROJECT-cluster WD UID, + // not the cell/Karmada UID. The projector overwrites this label after + // copying the rest. Verify the project UID is present and that the + // edge UID was NOT passed through. assert.Equal(t, - tt.karmadaInstance.Labels[computev1alpha.WorkloadDeploymentUIDLabel], + string(projTestWDUID), projection.Labels[computev1alpha.WorkloadDeploymentUIDLabel], - "WorkloadDeploymentUIDLabel must be propagated to the projection") + "WorkloadDeploymentUIDLabel must be the project-cluster WD UID on the projection") + assert.NotEqual(t, + string(projTestEdgeWDUID), + projection.Labels[computev1alpha.WorkloadDeploymentUIDLabel], + "WorkloadDeploymentUIDLabel must NOT be the edge/Karmada WD UID on the projection") assert.Equal(t, tt.karmadaInstance.Labels[computev1alpha.WorkloadDeploymentNameLabel], projection.Labels[computev1alpha.WorkloadDeploymentNameLabel], diff --git a/internal/controller/workloaddeployment_controller.go b/internal/controller/workloaddeployment_controller.go index 9b17266e..def9341a 100644 --- a/internal/controller/workloaddeployment_controller.go +++ b/internal/controller/workloaddeployment_controller.go @@ -261,7 +261,10 @@ func (r *WorkloadDeploymentReconciler) reconcileInstanceGates( } if apimeta.IsStatusConditionTrue(instance.Status.Conditions, computev1alpha.InstanceProgrammed) { - if instance.Status.Controller.ObservedTemplateHash == templateHash { + // Status.Controller is a pointer that the infra provider may not have + // populated yet even after it sets Programmed=True. Guard the deref to + // avoid a panic that would abort the reconcile before the status write. + if instance.Status.Controller != nil && instance.Status.Controller.ObservedTemplateHash == templateHash { currentReplicas++ } } diff --git a/internal/controller/workloaddeployment_federator.go b/internal/controller/workloaddeployment_federator.go index 9c736cf0..adb799c7 100644 --- a/internal/controller/workloaddeployment_federator.go +++ b/internal/controller/workloaddeployment_federator.go @@ -318,6 +318,13 @@ func (r *WorkloadDeploymentFederator) ensurePropagationPolicy( // syncStatusFromDownstream reads the aggregated status of the WorkloadDeployment // from the downstream namespace and writes it back to the project-namespace // object. It is a no-op when the downstream object does not yet exist. +// +// Belt-and-suspenders: WorkloadDeploymentStatusSyncer (registered with +// federationMgr) is the primary reactive path — it fires whenever Karmada +// updates the hub WD status. This call ensures status is also pulled during +// spec-change reconciles (e.g. manager restarts, propagation re-syncs). +// Removal of this call is deferred to a separate cleanup PR once the syncer +// is proven in production. func (r *WorkloadDeploymentFederator) syncStatusFromDownstream( ctx context.Context, projectClient client.Client, diff --git a/internal/controller/workloaddeployment_status_syncer.go b/internal/controller/workloaddeployment_status_syncer.go new file mode 100644 index 00000000..9ba52c3d --- /dev/null +++ b/internal/controller/workloaddeployment_status_syncer.go @@ -0,0 +1,158 @@ +// SPDX-License-Identifier: AGPL-3.0-only + +package controller + +import ( + "context" + "fmt" + "strings" + + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/equality" + apierrors "k8s.io/apimachinery/pkg/api/errors" + ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/log" + "sigs.k8s.io/controller-runtime/pkg/manager" + mcmanager "sigs.k8s.io/multicluster-runtime/pkg/manager" + "sigs.k8s.io/multicluster-runtime/pkg/multicluster" + + computev1alpha "go.datum.net/compute/api/v1alpha" + "go.miloapis.com/milo/pkg/downstreamclient" +) + +// WorkloadDeploymentStatusSyncer watches WorkloadDeployments on the Karmada +// federation hub and syncs their aggregated status back to the corresponding +// project-cluster WorkloadDeployment. +// +// It is registered with the federationMgr (a standard manager.Manager pointed +// at the Karmada control plane) so it reacts to status changes written by +// Karmada's statusAggregation interpreter. This complements the existing +// syncStatusFromDownstream call in WorkloadDeploymentFederator, which only runs +// when the project-side WD changes (spec updates, restarts). The syncer makes +// status propagation reactive: status flows to the project cluster as soon as +// Karmada aggregates it from the cell. +// +// Namespace resolution: the Karmada WD lives in ns-. +// That namespace is labeled by WorkloadDeploymentFederator.ensureDownstreamNamespace +// with UpstreamOwnerClusterNameLabel and UpstreamOwnerNamespaceLabel. The syncer +// reads those labels to locate the project cluster and namespace, then looks up +// the project WD by name (which is stable across all planes). +type WorkloadDeploymentStatusSyncer struct { + // FederationClient reads WorkloadDeployments from the Karmada federation + // control plane. + FederationClient client.Client + + // MCManager provides project-cluster clients via GetCluster. + MCManager mcmanager.Manager +} + +// +kubebuilder:rbac:groups=compute.datumapis.com,resources=workloaddeployments/status,verbs=get;update;patch + +func (r *WorkloadDeploymentStatusSyncer) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { + logger := log.FromContext(ctx).WithValues("workloaddeployment", req.NamespacedName) + + // 1. Fetch the WorkloadDeployment from the Karmada federation hub. + var karmadaWD computev1alpha.WorkloadDeployment + if err := r.FederationClient.Get(ctx, req.NamespacedName, &karmadaWD); err != nil { + if apierrors.IsNotFound(err) { + // Deleted from the hub — no projection to update. + return ctrl.Result{}, nil + } + return ctrl.Result{}, fmt.Errorf("failed getting Karmada WorkloadDeployment: %w", err) + } + + // 2. Read the Karmada namespace to resolve the upstream cluster name. + // WorkloadDeploymentFederator.ensureDownstreamNamespace stamps the namespace + // with UpstreamOwnerClusterNameLabel ("cluster-") and + // UpstreamOwnerNamespaceLabel (project namespace name). + var ns corev1.Namespace + if err := r.FederationClient.Get(ctx, client.ObjectKey{Name: req.Namespace}, &ns); err != nil { + if apierrors.IsNotFound(err) { + // Namespace not yet labeled or removed — not an error; skip quietly. + logger.Info("Karmada namespace not found, skipping status sync", "namespace", req.Namespace) + return ctrl.Result{}, nil + } + return ctrl.Result{}, fmt.Errorf("failed getting Karmada namespace %q: %w", req.Namespace, err) + } + + encodedClusterName, ok := ns.Labels[downstreamclient.UpstreamOwnerClusterNameLabel] + if !ok { + // Namespace not yet stamped by the federator (e.g. WD predates the label). + logger.Info("Karmada namespace missing upstream-cluster-name label, skipping status sync", + "namespace", req.Namespace) + return ctrl.Result{}, nil + } + + // 3. Decode the cluster name ("cluster-" with "/" encoded as "_"). + clusterName := strings.TrimPrefix(encodedClusterName, "cluster-") + clusterName = strings.ReplaceAll(clusterName, "_", "/") + + // 4. Resolve the project namespace from the WD's own label (stamped by + // upsertDownstreamDeployment). Fall back to the namespace label if absent. + targetNamespace := karmadaWD.Labels[downstreamclient.UpstreamOwnerNamespaceLabel] + if targetNamespace == "" { + targetNamespace = ns.Labels[downstreamclient.UpstreamOwnerNamespaceLabel] + } + if targetNamespace == "" { + logger.Info("cannot resolve project namespace for Karmada WorkloadDeployment, skipping status sync", + "namespace", req.Namespace, "name", req.Name) + return ctrl.Result{}, nil + } + + // 5. Obtain the project cluster client. + projectCluster, err := r.MCManager.GetCluster(ctx, multicluster.ClusterName(clusterName)) + if err != nil { + return ctrl.Result{}, fmt.Errorf("failed getting project cluster %q: %w", clusterName, err) + } + projectClient := projectCluster.GetClient() + + // 6. Fetch the project-cluster WD by name. The WD name is stable across all + // planes (project, Karmada, cell) and is the correct cross-plane identifier. + var projectWD computev1alpha.WorkloadDeployment + if err := projectClient.Get(ctx, client.ObjectKey{ + Namespace: targetNamespace, + Name: req.Name, + }, &projectWD); err != nil { + if apierrors.IsNotFound(err) { + // Project WD may not exist yet (ordering race) or may have been + // deleted. Not an error — the federator will handle cleanup. + logger.Info("project WorkloadDeployment not found, skipping status sync", + "cluster", clusterName, "namespace", targetNamespace, "name", req.Name) + return ctrl.Result{}, nil + } + return ctrl.Result{}, fmt.Errorf("failed getting project WorkloadDeployment %s/%s in cluster %q: %w", + targetNamespace, req.Name, clusterName, err) + } + + // 7. No-op when status is already equal — avoid spurious writes. + if equality.Semantic.DeepEqual(projectWD.Status, karmadaWD.Status) { + return ctrl.Result{}, nil + } + + // 8. Write the Karmada-aggregated status to the project WD. + // Use Status().Update() rather than Patch() so that zero-value int32 fields + // (currentReplicas, readyReplicas) are always included in the request body. + // MergeFrom omits unchanged zero-value fields, which would silently drop + // required status sub-fields on the project side. + projectWD.Status = karmadaWD.Status + if err := projectClient.Status().Update(ctx, &projectWD); err != nil { + return ctrl.Result{}, fmt.Errorf("failed updating project WorkloadDeployment status: %w", err) + } + + logger.Info("synced WorkloadDeployment status from Karmada hub to project cluster", + "cluster", clusterName, "namespace", targetNamespace) + + return ctrl.Result{}, nil +} + +// SetupWithManager registers the WorkloadDeploymentStatusSyncer with +// federationMgr, a standard manager.Manager pointed at the Karmada federation +// control plane. It watches WorkloadDeployments on the hub and reacts when their +// status is updated by Karmada's statusAggregation interpreter. +func (r *WorkloadDeploymentStatusSyncer) SetupWithManager(federationMgr manager.Manager) error { + return ctrl.NewControllerManagedBy(federationMgr). + For(&computev1alpha.WorkloadDeployment{}). + Named("workload-deployment-status-syncer"). + Complete(r) +}