-
Notifications
You must be signed in to change notification settings - Fork 216
OTA-1546: Reconcile cv.spec.desiredUpdate.acceptRisks #1284
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
e9f3c19
aa851c2
1061efd
9cec473
7a10659
14dffd7
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -12,16 +12,21 @@ import ( | |
| "time" | ||
|
|
||
| "github.com/blang/semver/v4" | ||
| "github.com/google/go-cmp/cmp" | ||
| "github.com/google/go-cmp/cmp/cmpopts" | ||
| "github.com/google/uuid" | ||
| "k8s.io/apimachinery/pkg/api/equality" | ||
| "k8s.io/apimachinery/pkg/api/meta" | ||
| metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" | ||
| utilerrors "k8s.io/apimachinery/pkg/util/errors" | ||
| "k8s.io/apimachinery/pkg/util/sets" | ||
| "k8s.io/klog/v2" | ||
|
|
||
| configv1 "github.com/openshift/api/config/v1" | ||
| "github.com/openshift/cluster-version-operator/lib/resourcemerge" | ||
| "github.com/openshift/cluster-version-operator/pkg/cincinnati" | ||
| "github.com/openshift/cluster-version-operator/pkg/clusterconditions" | ||
| "github.com/openshift/cluster-version-operator/pkg/internal" | ||
| ) | ||
|
|
||
| const noArchitecture string = "NoArchitecture" | ||
|
|
@@ -50,6 +55,12 @@ func (optr *Operator) syncAvailableUpdates(ctx context.Context, config *configv1 | |
| channel := config.Spec.Channel | ||
| desiredArch := optr.getDesiredArchitecture(config.Spec.DesiredUpdate) | ||
| currentArch := optr.getCurrentArchitecture() | ||
| acceptRisks := sets.New[string]() | ||
| if config.Spec.DesiredUpdate != nil { | ||
| for _, risk := range config.Spec.DesiredUpdate.AcceptRisks { | ||
| acceptRisks.Insert(risk.Name) | ||
| } | ||
| } | ||
|
|
||
coderabbitai[bot] marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| // updates are only checked at most once per minimumUpdateCheckInterval or if the generation changes | ||
| optrAvailableUpdates := optr.getAvailableUpdates() | ||
|
|
@@ -129,6 +140,9 @@ func (optr *Operator) syncAvailableUpdates(ctx context.Context, config *configv1 | |
| optrAvailableUpdates.UpdateService = updateService | ||
| optrAvailableUpdates.Channel = channel | ||
| optrAvailableUpdates.Architecture = desiredArch | ||
| optrAvailableUpdates.ShouldReconcileAcceptRisks = optr.shouldReconcileAcceptRisks | ||
| optrAvailableUpdates.AcceptRisks = acceptRisks | ||
| optrAvailableUpdates.RiskConditions = map[string][]metav1.Condition{} | ||
| optrAvailableUpdates.ConditionRegistry = optr.conditionRegistry | ||
| optrAvailableUpdates.Condition = condition | ||
|
|
||
|
|
@@ -167,9 +181,11 @@ func (optr *Operator) syncAvailableUpdates(ctx context.Context, config *configv1 | |
| } | ||
|
|
||
| type availableUpdates struct { | ||
| UpdateService string | ||
| Channel string | ||
| Architecture string | ||
| UpdateService string | ||
| Channel string | ||
| Architecture string | ||
| ShouldReconcileAcceptRisks func() bool | ||
| AcceptRisks sets.Set[string] | ||
|
|
||
| // LastAttempt records the time of the most recent attempt at update | ||
| // retrieval, regardless of whether it was successful. | ||
|
|
@@ -192,6 +208,9 @@ type availableUpdates struct { | |
| ConditionRegistry clusterconditions.ConditionRegistry | ||
|
|
||
| Condition configv1.ClusterOperatorStatusCondition | ||
|
|
||
| // RiskConditions stores the condition for every risk (name, url, message, matchingRules). | ||
| RiskConditions map[string][]metav1.Condition | ||
| } | ||
|
|
||
| func (u *availableUpdates) RecentlyAttempted(interval time.Duration) bool { | ||
|
|
@@ -291,14 +310,17 @@ func (optr *Operator) getAvailableUpdates() *availableUpdates { | |
| } | ||
|
|
||
| u := &availableUpdates{ | ||
| UpdateService: optr.availableUpdates.UpdateService, | ||
| Channel: optr.availableUpdates.Channel, | ||
| Architecture: optr.availableUpdates.Architecture, | ||
| LastAttempt: optr.availableUpdates.LastAttempt, | ||
| LastSyncOrConfigChange: optr.availableUpdates.LastSyncOrConfigChange, | ||
| Current: *optr.availableUpdates.Current.DeepCopy(), | ||
| ConditionRegistry: optr.availableUpdates.ConditionRegistry, // intentionally not a copy, to preserve cache state | ||
| Condition: optr.availableUpdates.Condition, | ||
| UpdateService: optr.availableUpdates.UpdateService, | ||
| Channel: optr.availableUpdates.Channel, | ||
| Architecture: optr.availableUpdates.Architecture, | ||
| ShouldReconcileAcceptRisks: optr.shouldReconcileAcceptRisks, | ||
| AcceptRisks: optr.availableUpdates.AcceptRisks, | ||
| RiskConditions: optr.availableUpdates.RiskConditions, | ||
| LastAttempt: optr.availableUpdates.LastAttempt, | ||
| LastSyncOrConfigChange: optr.availableUpdates.LastSyncOrConfigChange, | ||
| Current: *optr.availableUpdates.Current.DeepCopy(), | ||
| ConditionRegistry: optr.availableUpdates.ConditionRegistry, // intentionally not a copy, to preserve cache state | ||
| Condition: optr.availableUpdates.Condition, | ||
| } | ||
|
|
||
| if optr.availableUpdates.Updates != nil { | ||
|
|
@@ -426,8 +448,11 @@ func (u *availableUpdates) evaluateConditionalUpdates(ctx context.Context) { | |
| vj := semver.MustParse(u.ConditionalUpdates[j].Release.Version) | ||
| return vi.GTE(vj) | ||
| }) | ||
| if err := sanityCheck(u.ConditionalUpdates); err != nil { | ||
| klog.Errorf("Sanity check failed which might impact risk evaluation: %v", err) | ||
| } | ||
| for i, conditionalUpdate := range u.ConditionalUpdates { | ||
| condition := evaluateConditionalUpdate(ctx, conditionalUpdate.Risks, u.ConditionRegistry) | ||
| condition := evaluateConditionalUpdate(ctx, conditionalUpdate.Risks, u.ConditionRegistry, u.AcceptRisks, u.ShouldReconcileAcceptRisks, u.RiskConditions) | ||
|
|
||
| if condition.Status == metav1.ConditionTrue { | ||
| u.addUpdate(conditionalUpdate.Release) | ||
|
|
@@ -441,6 +466,25 @@ func (u *availableUpdates) evaluateConditionalUpdates(ctx context.Context) { | |
| } | ||
| } | ||
|
|
||
| func sanityCheck(updates []configv1.ConditionalUpdate) error { | ||
| risks := map[string]configv1.ConditionalUpdateRisk{} | ||
| var errs []error | ||
| for _, update := range updates { | ||
| for _, risk := range update.Risks { | ||
| if v, ok := risks[risk.Name]; ok { | ||
| if diff := cmp.Diff(v, risk, cmpopts.IgnoreFields(configv1.ConditionalUpdateRisk{}, "Conditions")); diff != "" { | ||
| errs = append(errs, fmt.Errorf("found collision on risk %s: %v and %v", risk.Name, v, risk)) | ||
| } | ||
| } else if trimmed := strings.TrimSpace(risk.Name); trimmed == "" { | ||
| errs = append(errs, fmt.Errorf("found invalid name on risk %v", risk)) | ||
| } else { | ||
| risks[risk.Name] = risk | ||
| } | ||
| } | ||
| } | ||
| return utilerrors.NewAggregate(errs) | ||
| } | ||
|
|
||
| func (u *availableUpdates) addUpdate(release configv1.Release) { | ||
| for _, update := range u.Updates { | ||
| if update.Image == release.Image { | ||
|
|
@@ -478,33 +522,47 @@ func newRecommendedStatus(now, want metav1.ConditionStatus) metav1.ConditionStat | |
| } | ||
|
|
||
| const ( | ||
| recommendedReasonRisksNotExposed = "NotExposedToRisks" | ||
| recommendedReasonEvaluationFailed = "EvaluationFailed" | ||
| recommendedReasonMultiple = "MultipleReasons" | ||
| recommendedReasonRisksNotExposed = "NotExposedToRisks" | ||
| recommendedReasonAllExposedRisksAccepted = "AllExposedRisksAccepted" | ||
| recommendedReasonEvaluationFailed = "EvaluationFailed" | ||
| recommendedReasonMultiple = "MultipleReasons" | ||
|
|
||
| // recommendedReasonExposed is used instead of the original name if it does | ||
| // not match the pattern for a valid k8s condition reason. | ||
| recommendedReasonExposed = "ExposedToRisks" | ||
|
|
||
| riskConditionReasonEvaluationFailed = "EvaluationFailed" | ||
| riskConditionReasonMatch = "Match" | ||
| riskConditionReasonNotMatch = "NotMatch" | ||
| ) | ||
|
|
||
| // Reasons follow same pattern as k8s Condition Reasons | ||
| // https://github.com/openshift/api/blob/59fa376de7cb668ddb95a7ee4e9879d7f6ca2767/vendor/k8s.io/apimachinery/pkg/apis/meta/v1/types.go#L1535-L1536 | ||
| var reasonPattern = regexp.MustCompile(`^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$`) | ||
|
|
||
| func newRecommendedReason(now, want string) string { | ||
| switch now { | ||
| case recommendedReasonRisksNotExposed: | ||
| switch { | ||
| case now == recommendedReasonRisksNotExposed || | ||
| now == recommendedReasonAllExposedRisksAccepted || | ||
| now == want: | ||
| return want | ||
| case want: | ||
| case want == recommendedReasonRisksNotExposed: | ||
| return now | ||
| default: | ||
| return recommendedReasonMultiple | ||
| } | ||
| } | ||
|
|
||
| func evaluateConditionalUpdate(ctx context.Context, risks []configv1.ConditionalUpdateRisk, conditionRegistry clusterconditions.ConditionRegistry) metav1.Condition { | ||
| func evaluateConditionalUpdate( | ||
| ctx context.Context, | ||
| risks []configv1.ConditionalUpdateRisk, | ||
| conditionRegistry clusterconditions.ConditionRegistry, | ||
| acceptRisks sets.Set[string], | ||
| shouldReconcileAcceptRisks func() bool, | ||
| riskConditions map[string][]metav1.Condition, | ||
| ) metav1.Condition { | ||
| recommended := metav1.Condition{ | ||
| Type: ConditionalUpdateConditionTypeRecommended, | ||
| Type: internal.ConditionalUpdateConditionTypeRecommended, | ||
| Status: metav1.ConditionTrue, | ||
| // FIXME: ObservedGeneration? That would capture upstream/channel, but not necessarily the currently-reconciling version. | ||
| Reason: recommendedReasonRisksNotExposed, | ||
|
|
@@ -513,18 +571,39 @@ func evaluateConditionalUpdate(ctx context.Context, risks []configv1.Conditional | |
|
|
||
| var errorMessages []string | ||
| for _, risk := range risks { | ||
| riskCondition := metav1.Condition{ | ||
| Type: internal.ConditionalUpdateRiskConditionTypeApplies, | ||
| Status: metav1.ConditionFalse, | ||
| Reason: riskConditionReasonNotMatch, | ||
| } | ||
| if match, err := conditionRegistry.Match(ctx, risk.MatchingRules); err != nil { | ||
| msg := unknownExposureMessage(risk, err) | ||
| recommended.Status = newRecommendedStatus(recommended.Status, metav1.ConditionUnknown) | ||
| recommended.Reason = newRecommendedReason(recommended.Reason, recommendedReasonEvaluationFailed) | ||
| errorMessages = append(errorMessages, unknownExposureMessage(risk, err)) | ||
| errorMessages = append(errorMessages, msg) | ||
| riskCondition.Status = metav1.ConditionUnknown | ||
| riskCondition.Reason = riskConditionReasonEvaluationFailed | ||
| riskCondition.Message = msg | ||
| } else if match { | ||
| recommended.Status = newRecommendedStatus(recommended.Status, metav1.ConditionFalse) | ||
| wantReason := recommendedReasonExposed | ||
| if reasonPattern.MatchString(risk.Name) { | ||
| wantReason = risk.Name | ||
| riskCondition.Status = metav1.ConditionTrue | ||
| riskCondition.Reason = riskConditionReasonMatch | ||
| if shouldReconcileAcceptRisks() && acceptRisks.Has(risk.Name) { | ||
| recommended.Status = newRecommendedStatus(recommended.Status, metav1.ConditionTrue) | ||
| recommended.Reason = newRecommendedReason(recommended.Reason, recommendedReasonAllExposedRisksAccepted) | ||
| recommended.Message = "The update is recommended, because either risk does not apply to this cluster or it is accepted by cluster admins." | ||
| klog.V(2).Infof("Risk with name %q is accepted by the cluster admin and thus not in the evaluation of conditional update", risk.Name) | ||
| } else { | ||
| recommended.Status = newRecommendedStatus(recommended.Status, metav1.ConditionFalse) | ||
| wantReason := recommendedReasonExposed | ||
| if reasonPattern.MatchString(risk.Name) { | ||
| wantReason = risk.Name | ||
| } | ||
| recommended.Reason = newRecommendedReason(recommended.Reason, wantReason) | ||
| errorMessages = append(errorMessages, fmt.Sprintf("%s %s", risk.Message, risk.URL)) | ||
| } | ||
| recommended.Reason = newRecommendedReason(recommended.Reason, wantReason) | ||
| errorMessages = append(errorMessages, fmt.Sprintf("%s %s", risk.Message, risk.URL)) | ||
| } | ||
| if _, ok := riskConditions[risk.Name]; !ok { | ||
| riskConditions[risk.Name] = []metav1.Condition{riskCondition} | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm worried that if this stays conditional on
That avoids needing to drop down to the per-target level when managing the
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thought about this too while code. |
||
| } | ||
| } | ||
| if len(errorMessages) > 0 { | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.