From e9798cf413fc141219f62ccd003ce911bc4c6f3c Mon Sep 17 00:00:00 2001 From: Dan Prince Date: Fri, 19 Dec 2025 13:43:04 -0500 Subject: [PATCH] Force reinstall of operator resources on release version upgrade Fixes upgrade failures from 0.4 to main caused by incompatible webhook configuration changes that trigger index out of range panics during manifest merging. When OPENSTACK_RELEASE_VERSION is bumped, the controller now: - Detects the version change by comparing against status.ReleaseVersion - Deletes all owned resources (deployments, services, serviceaccounts, configmaps) - Removes managed webhooks (validating and mutating configurations) - Requeues to recreate resources with new manifests This one-time cleanup ensures a clean slate for incompatible upgrades where the structure of resources (especially webhooks) has changed between versions. Adds ReleaseVersion field to OpenStackStatus to track the deployed version. Also, adds bounds checking to skip copying clientConfig for new webhooks that don't exist in the current configuration, allowing the merge to complete successfully. Jira: OSPRH-23865 Co-authored-by: Brendan Shephard --- .../operator.openstack.org_openstacks.yaml | 4 + api/operator/v1beta1/openstack_types.go | 3 + api/operator/v1beta1/zz_generated.deepcopy.go | 5 + .../operator.openstack.org_openstacks.yaml | 4 + .../operator/openstack_controller.go | 108 +++++++++++++++++- .../dataplane/util/image_registry_test.go | 2 +- internal/operator/bindata/merge.go | 10 +- 7 files changed, 133 insertions(+), 3 deletions(-) diff --git a/api/bases/operator.openstack.org_openstacks.yaml b/api/bases/operator.openstack.org_openstacks.yaml index 63f986df9..32ea32218 100644 --- a/api/bases/operator.openstack.org_openstacks.yaml +++ b/api/bases/operator.openstack.org_openstacks.yaml @@ -268,6 +268,10 @@ spec: for this object. format: int64 type: integer + releaseVersion: + description: ReleaseVersion - the OpenStack release version that has + been successfully deployed + type: string totalOperatorCount: description: TotalOperatorCount - the number all operators available type: integer diff --git a/api/operator/v1beta1/openstack_types.go b/api/operator/v1beta1/openstack_types.go index b8833a8b5..1918bd2a3 100644 --- a/api/operator/v1beta1/openstack_types.go +++ b/api/operator/v1beta1/openstack_types.go @@ -256,6 +256,9 @@ type OpenStackStatus struct { // ContainerImage - the container image that has been successfully deployed ContainerImage *string `json:"containerImage,omitempty"` + + // ReleaseVersion - the OpenStack release version that has been successfully deployed + ReleaseVersion *string `json:"releaseVersion,omitempty"` } // +kubebuilder:object:root=true diff --git a/api/operator/v1beta1/zz_generated.deepcopy.go b/api/operator/v1beta1/zz_generated.deepcopy.go index 64cc6b57c..601347ecf 100644 --- a/api/operator/v1beta1/zz_generated.deepcopy.go +++ b/api/operator/v1beta1/zz_generated.deepcopy.go @@ -158,6 +158,11 @@ func (in *OpenStackStatus) DeepCopyInto(out *OpenStackStatus) { *out = new(string) **out = **in } + if in.ReleaseVersion != nil { + in, out := &in.ReleaseVersion, &out.ReleaseVersion + *out = new(string) + **out = **in + } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new OpenStackStatus. diff --git a/config/crd/bases/operator.openstack.org_openstacks.yaml b/config/crd/bases/operator.openstack.org_openstacks.yaml index 63f986df9..32ea32218 100644 --- a/config/crd/bases/operator.openstack.org_openstacks.yaml +++ b/config/crd/bases/operator.openstack.org_openstacks.yaml @@ -268,6 +268,10 @@ spec: for this object. format: int64 type: integer + releaseVersion: + description: ReleaseVersion - the OpenStack release version that has + been successfully deployed + type: string totalOperatorCount: description: TotalOperatorCount - the number all operators available type: integer diff --git a/internal/controller/operator/openstack_controller.go b/internal/controller/operator/openstack_controller.go index 52708237a..6cc9d7eec 100644 --- a/internal/controller/operator/openstack_controller.go +++ b/internal/controller/operator/openstack_controller.go @@ -43,6 +43,7 @@ import ( "github.com/openstack-k8s-operators/openstack-operator/internal/operator" "github.com/openstack-k8s-operators/openstack-operator/internal/operator/bindata" "github.com/pkg/errors" + admissionv1 "k8s.io/api/admissionregistration/v1" appsv1 "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" discoveryv1 "k8s.io/api/discovery/v1" @@ -250,6 +251,48 @@ func (r *OpenStackReconciler) Reconcile(ctx context.Context, req ctrl.Request) ( return ctrl.Result{}, err } + // Check if OPENSTACK_RELEASE_VERSION has changed - if so, delete all owned resources + // This is a one-time fix to handle incompatible upgrades + shouldReinstall := false + if instance.Status.ReleaseVersion != nil && *instance.Status.ReleaseVersion != openstackReleaseVersion { + // Explicit version change detected + shouldReinstall = true + } else if !isNewInstance && instance.Status.ReleaseVersion == nil { + // Existing installation upgrading from version without ReleaseVersion field + shouldReinstall = true + } + + if shouldReinstall { + Log.Info("OpenStack release version changed, deleting all owned resources", + "old", instance.Status.ReleaseVersion, + "new", openstackReleaseVersion) + + if err := r.deleteAllOwnedResources(ctx, instance); err != nil { + instance.Status.Conditions.Set(condition.FalseCondition( + operatorv1beta1.OpenStackOperatorReadyCondition, + condition.ErrorReason, + condition.SeverityWarning, + operatorv1beta1.OpenStackOperatorErrorMessage, + err)) + return ctrl.Result{}, err + } + + // Reset the container image status to force re-application of CRDs and RBAC + instance.Status.ContainerImage = nil + + // Update the release version in status + instance.Status.ReleaseVersion = &openstackReleaseVersion + + // Requeue to allow resources to be deleted before recreating + Log.Info("Resources deleted, requeuing to recreate with new version") + return ctrl.Result{RequeueAfter: time.Duration(5) * time.Second}, nil + } + + // Set the release version if not set (for new installations only) + if instance.Status.ReleaseVersion == nil { + instance.Status.ReleaseVersion = &openstackReleaseVersion + } + if err := r.applyManifests(ctx, instance); err != nil { instance.Status.Conditions.Set(condition.FalseCondition( operatorv1beta1.OpenStackOperatorReadyCondition, @@ -316,6 +359,69 @@ func (r *OpenStackReconciler) Reconcile(ctx context.Context, req ctrl.Request) ( } +func deleteOwnedResources[L client.ObjectList, T any]( + ctx context.Context, + r *OpenStackReconciler, + instance client.Object, + list L, + itemsGetter func(L) []T, +) error { + log := r.GetLogger(ctx) + + err := r.List(ctx, any(list).(client.ObjectList), &client.ListOptions{Namespace: instance.GetNamespace()}) + if err != nil { + return errors.Wrap(err, "failed to list resources") + } + + for _, item := range itemsGetter(list) { + obj := any(&item).(client.Object) + if metav1.IsControlledBy(obj, instance) { + log.Info("Deleting owned resource", "kind", obj.GetObjectKind().GroupVersionKind().Kind, "name", obj.GetName()) + err := r.Delete(ctx, obj) + if err != nil && !apierrors.IsNotFound(err) { + return errors.Wrapf(err, "failed to delete %s", obj.GetName()) + } + } + } + return nil +} + +func (r *OpenStackReconciler) deleteAllOwnedResources(ctx context.Context, instance *operatorv1beta1.OpenStack) error { + Log := r.GetLogger(ctx) + Log.Info("Deleting all owned resources for release version upgrade") + + err := deleteOwnedResources(ctx, r, instance, &appsv1.DeploymentList{}, func(l *appsv1.DeploymentList) []appsv1.Deployment { return l.Items }) + if err != nil { + return err + } + + err = deleteOwnedResources(ctx, r, instance, &corev1.ServiceAccountList{}, func(l *corev1.ServiceAccountList) []corev1.ServiceAccount { return l.Items }) + if err != nil { + return err + } + + err = deleteOwnedResources(ctx, r, instance, &corev1.ServiceList{}, func(l *corev1.ServiceList) []corev1.Service { return l.Items }) + if err != nil { + return err + } + + labelSelector, _ := metav1.LabelSelectorAsSelector(&metav1.LabelSelector{ + MatchLabels: map[string]string{"openstack.openstack.org/managed": "true"}, + }) + + deleteOpts := client.DeleteAllOfOptions{ + ListOptions: client.ListOptions{LabelSelector: labelSelector}, + } + + err = r.DeleteAllOf(ctx, &admissionv1.ValidatingWebhookConfiguration{}, &deleteOpts) + if err != nil && !apierrors.IsNotFound(err) { + return errors.Wrap(err, "failed to delete validating webhooks") + } + + Log.Info("All owned resources deleted successfully") + return nil +} + func (r *OpenStackReconciler) reconcileDelete(ctx context.Context, instance *operatorv1beta1.OpenStack, helper *helper.Helper) (ctrl.Result, error) { Log := r.GetLogger(ctx) Log.Info("Reconciling OpenStack initialization resource delete") @@ -987,7 +1093,7 @@ func (r *OpenStackReconciler) postCleanupObsoleteResources(ctx context.Context, // The horizon-operator.openstack-operators has references to old roles/bindings // the code below will delete those references before continuing for _, ref := range refs { - refData := ref.(map[string]interface{}) + refData := ref.(map[string]any) Log.Info("Deleting operator reference", "Reference", ref) obj := uns.Unstructured{} obj.SetName(refData["name"].(string)) diff --git a/internal/dataplane/util/image_registry_test.go b/internal/dataplane/util/image_registry_test.go index 7027ff2bd..8755011f0 100644 --- a/internal/dataplane/util/image_registry_test.go +++ b/internal/dataplane/util/image_registry_test.go @@ -14,7 +14,7 @@ See the License for the specific language governing permissions and limitations under the License. */ -package util +package util //nolint:revive // util is an acceptable package name in this context import ( "context" diff --git a/internal/operator/bindata/merge.go b/internal/operator/bindata/merge.go index ab7a6d762..20ab615df 100644 --- a/internal/operator/bindata/merge.go +++ b/internal/operator/bindata/merge.go @@ -60,10 +60,18 @@ func MergeWebhookConfigurationForUpdate(current, updated *uns.Unstructured) erro gvk := updated.GroupVersionKind() if gvk.Group == "admissionregistration.k8s.io" && (gvk.Kind == "MutatingWebhookConfiguration" || gvk.Kind == "ValidatingWebhookConfiguration") { + currentWebhooks, currentExists := current.Object["webhooks"].([]interface{}) + if !currentExists { + return nil + } for i, webhook := range updated.Object["webhooks"].([]interface{}) { + // Check if the index exists in the current webhooks list + if i >= len(currentWebhooks) { + continue + } - currentClientConfig := current.Object["webhooks"].([]interface{})[i].(map[string]interface{})["clientConfig"].(map[string]interface{}) + currentClientConfig := currentWebhooks[i].(map[string]interface{})["clientConfig"].(map[string]interface{}) if currentClientConfig != nil { webhook.(map[string]interface{})["clientConfig"] = currentClientConfig }