diff --git a/install/0000_50_cluster-update-console-plugin_10_namespace.yaml b/install/0000_50_cluster-update-console-plugin_10_namespace.yaml new file mode 100644 index 000000000..2e8383be0 --- /dev/null +++ b/install/0000_50_cluster-update-console-plugin_10_namespace.yaml @@ -0,0 +1,15 @@ +apiVersion: v1 +kind: Namespace +metadata: + name: openshift-cluster-update-console-plugin + annotations: + kubernetes.io/description: The OpenShift cluster-update console plugin provides a web-console interface for managing ClusterVersion updates. + capability.openshift.io/name: Console + release.openshift.io/feature-set: TechPreviewNoUpgrade + exclude.release.openshift.io/internal-openshift-hosted: "true" + include.release.openshift.io/self-managed-high-availability: "true" + labels: + openshift.io/cluster-monitoring: "true" + pod-security.kubernetes.io/audit: restricted + pod-security.kubernetes.io/enforce: restricted + pod-security.kubernetes.io/warn: restricted diff --git a/install/0000_50_cluster-update-console-plugin_20_networkpolicy.yaml b/install/0000_50_cluster-update-console-plugin_20_networkpolicy.yaml new file mode 100644 index 000000000..78bc82235 --- /dev/null +++ b/install/0000_50_cluster-update-console-plugin_20_networkpolicy.yaml @@ -0,0 +1,16 @@ +apiVersion: networking.k8s.io/v1 +kind: NetworkPolicy +metadata: + name: default-deny + namespace: openshift-cluster-update-console-plugin + annotations: + kubernetes.io/description: This NetworkPolicy is used to deny all ingress and egress traffic by default in this namespace, matching all Pods, and serving as a baseline. + capability.openshift.io/name: Console + release.openshift.io/feature-set: TechPreviewNoUpgrade + exclude.release.openshift.io/internal-openshift-hosted: "true" + include.release.openshift.io/self-managed-high-availability: "true" +spec: + podSelector: {} + policyTypes: + - Ingress + - Egress diff --git a/install/0000_50_cluster-update-console-plugin_50_deployment.yaml b/install/0000_50_cluster-update-console-plugin_50_deployment.yaml new file mode 100644 index 000000000..e6ad8892f --- /dev/null +++ b/install/0000_50_cluster-update-console-plugin_50_deployment.yaml @@ -0,0 +1,67 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: cluster-update-console-plugin + namespace: openshift-cluster-update-console-plugin + annotations: + kubernetes.io/description: The OpenShift cluster-update console plugin provides a web-console interface for managing ClusterVersion updates. + capability.openshift.io/name: Console + release.openshift.io/feature-set: TechPreviewNoUpgrade + exclude.release.openshift.io/internal-openshift-hosted: "true" + include.release.openshift.io/self-managed-high-availability: "true" +spec: + selector: + matchLabels: + app: cluster-update-console-plugin + strategy: + rollingUpdate: + maxSurge: 25% + maxUnavailable: 1 + type: RollingUpdate + template: + metadata: + annotations: + target.workload.openshift.io/management: '{"effect": "PreferredDuringScheduling"}' + openshift.io/required-scc: restricted-v3 + labels: + app: cluster-update-console-plugin + spec: + automountServiceAccountToken: false + containers: + - name: plugin + image: '{{index .Images "cluster-update-console-plugin"}}' + imagePullPolicy: IfNotPresent + ports: + - name: https + containerPort: 9001 + resources: + requests: + cpu: 20m + memory: 50Mi + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + terminationMessagePolicy: FallbackToLogsOnError + volumeMounts: + - mountPath: /var/cert + name: cluster-update-console-plugin-cert + readOnly: true + dnsPolicy: Default + hostUsers: false + priorityClassName: system-cluster-critical + securityContext: + runAsNonRoot: true + seccompProfile: + type: RuntimeDefault + terminationGracePeriodSeconds: 30 + tolerations: + - effect: NoSchedule + key: node-role.kubernetes.io/infra + operator: Exists + volumes: + - name: cluster-update-console-plugin-cert + secret: + defaultMode: 420 + secretName: cluster-update-console-plugin-cert diff --git a/install/0000_50_cluster-update-console-plugin_60_service.yaml b/install/0000_50_cluster-update-console-plugin_60_service.yaml new file mode 100644 index 000000000..cd5f7b2b4 --- /dev/null +++ b/install/0000_50_cluster-update-console-plugin_60_service.yaml @@ -0,0 +1,20 @@ +apiVersion: v1 +kind: Service +metadata: + name: openshift-cluster-update-console-plugin + namespace: openshift-cluster-update-console-plugin + annotations: + kubernetes.io/description: The OpenShift cluster-update console plugin provides a web-console interface for managing ClusterVersion updates. + service.beta.openshift.io/serving-cert-secret-name: cluster-update-console-plugin-cert + capability.openshift.io/name: Console + release.openshift.io/feature-set: TechPreviewNoUpgrade + exclude.release.openshift.io/internal-openshift-hosted: "true" + include.release.openshift.io/self-managed-high-availability: "true" +spec: + type: ClusterIP + selector: + app: cluster-update-console-plugin + ports: + - name: https + port: 9001 + targetPort: https diff --git a/install/0000_50_cluster-update-console-plugin_80_servicemonitor.yaml b/install/0000_50_cluster-update-console-plugin_80_servicemonitor.yaml new file mode 100644 index 000000000..a4af42847 --- /dev/null +++ b/install/0000_50_cluster-update-console-plugin_80_servicemonitor.yaml @@ -0,0 +1,27 @@ +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + labels: + k8s-app: cluster-version-operator + name: cluster-version-operator + namespace: openshift-cluster-version + annotations: + kubernetes.io/description: Configure Prometheus to monitor cluster-version operator metrics. + capability.openshift.io/name: Console + release.openshift.io/feature-set: TechPreviewNoUpgrade + exclude.release.openshift.io/internal-openshift-hosted: "true" + include.release.openshift.io/self-managed-high-availability: "true" +spec: + endpoints: + - interval: 30s + port: metrics + scheme: https + tlsConfig: + serverName: cluster-version-operator.openshift-cluster-version.svc + scrapeClass: tls-client-certificate-auth + namespaceSelector: + matchNames: + - openshift-cluster-version + selector: + matchLabels: + k8s-app: cluster-version-operator diff --git a/install/0000_50_cluster-update-console-plugin_81_prometheusrule.yaml b/install/0000_50_cluster-update-console-plugin_81_prometheusrule.yaml new file mode 100644 index 000000000..fc3ddd7fa --- /dev/null +++ b/install/0000_50_cluster-update-console-plugin_81_prometheusrule.yaml @@ -0,0 +1,122 @@ +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + labels: + k8s-app: cluster-version-operator + name: cluster-version-operator + namespace: openshift-cluster-version + annotations: + kubernetes.io/description: Alerting rules for when cluster-version operator metrics call for administrator attention. + capability.openshift.io/name: Console + release.openshift.io/feature-set: TechPreviewNoUpgrade + exclude.release.openshift.io/internal-openshift-hosted: "true" + include.release.openshift.io/self-managed-high-availability: "true" +spec: + groups: + - name: cluster-version + rules: + - alert: ClusterVersionOperatorDown + annotations: + summary: Cluster version operator has disappeared from Prometheus target discovery. + description: The operator may be down or disabled. The cluster will not be kept up to date and upgrades will not be possible. Inspect the openshift-cluster-version namespace for events or changes to the cluster-version-operator deployment or pods to diagnose and repair. {{ "{{ with $console_url := \"console_url\" | query }}{{ if ne (len (label \"url\" (first $console_url ) ) ) 0}} For more information refer to {{ label \"url\" (first $console_url ) }}/k8s/cluster/projects/openshift-cluster-version.{{ end }}{{ end }}" }} + runbook_url: https://github.com/openshift/runbooks/blob/master/alerts/cluster-version-operator/ClusterVersionOperatorDown.md + expr: | + absent(up{job="cluster-version-operator"} == 1) + for: 10m + labels: + namespace: openshift-cluster-version + severity: critical + - alert: CannotRetrieveUpdates + annotations: + summary: Cluster version operator has not retrieved updates in {{ "{{ $value | humanizeDuration }}" }}. + description: Failure to retrieve updates means that cluster administrators will need to monitor for available updates on their own or risk falling behind on security or other bugfixes. If the failure is expected, you can clear spec.channel in the ClusterVersion object to tell the cluster-version operator to not retrieve updates. Failure reason {{ "{{ with $cluster_operator_conditions := \"cluster_operator_conditions\" | query}}{{range $value := .}}{{if and (eq (label \"name\" $value) \"version\") (eq (label \"condition\" $value) \"RetrievedUpdates\") (eq (label \"endpoint\" $value) \"metrics\") (eq (value $value) 0.0)}}{{label \"reason\" $value}} {{end}}{{end}}{{end}}" }}. For more information refer to `oc get clusterversion/version -o=jsonpath="{.status.conditions[?(.type=='RetrievedUpdates')]}{'\n'}"`{{ "{{ with $console_url := \"console_url\" | query }}{{ if ne (len (label \"url\" (first $console_url ) ) ) 0}} or {{ label \"url\" (first $console_url ) }}/settings/cluster/{{ end }}{{ end }}" }}. + expr: | + max by (namespace) + ( + ( + time()-cluster_version_operator_update_retrieval_timestamp_seconds + ) >= 3600 + and ignoring(condition, name, reason) + (cluster_operator_conditions{name="version", condition="RetrievedUpdates", endpoint="metrics", reason!="NoChannel"}) + ) + labels: + severity: warning + - alert: UpdateAvailable + annotations: + summary: Your upstream update recommendation service recommends you update your cluster. + description: For more information refer to 'oc adm upgrade'{{ "{{ with $console_url := \"console_url\" | query }}{{ if ne (len (label \"url\" (first $console_url ) ) ) 0}} or {{ label \"url\" (first $console_url ) }}/settings/cluster/{{ end }}{{ end }}" }}. + expr: | + sum by (channel, namespace, upstream) (cluster_version_available_updates) > 0 + labels: + severity: info + - alert: ClusterReleaseNotAccepted + annotations: + summary: The desired cluster release has not been accepted for at least an hour. + description: The desired cluster release has not been accepted because {{ "{{ $labels.reason }}" }}, and the cluster will continue to reconcile an earlier release instead of moving towards that desired release. For more information refer to 'oc adm upgrade'{{ "{{ with $console_url := \"console_url\" | query }}{{ if ne (len (label \"url\" (first $console_url ) ) ) 0}} or {{ label \"url\" (first $console_url ) }}/settings/cluster/{{ end }}{{ end }}" }}. + expr: | + max by (namespace, name, reason) (cluster_operator_conditions{name="version", condition="ReleaseAccepted", endpoint="metrics"} == 0) + for: 60m + labels: + severity: warning + - name: cluster-operators + rules: + - alert: ClusterNotUpgradeable + annotations: + summary: One or more cluster operators have been blocking minor or major version cluster updates for at least an hour. + description: In most cases, you will still be able to apply patch releases. Reason {{ "{{ with $cluster_operator_conditions := \"cluster_operator_conditions\" | query}}{{range $value := .}}{{if and (eq (label \"name\" $value) \"version\") (eq (label \"condition\" $value) \"Upgradeable\") (eq (label \"endpoint\" $value) \"metrics\") (eq (value $value) 0.0) (ne (len (label \"reason\" $value)) 0) }}{{label \"reason\" $value}}.{{end}}{{end}}{{end}}"}} For more information refer to 'oc adm upgrade'{{ "{{ with $console_url := \"console_url\" | query }}{{ if ne (len (label \"url\" (first $console_url ) ) ) 0}} or {{ label \"url\" (first $console_url ) }}/settings/cluster/{{ end }}{{ end }}" }}. + expr: | + max by (namespace, name, condition, endpoint) (cluster_operator_conditions{name="version", condition="Upgradeable", endpoint="metrics"} == 0) + for: 60m + labels: + severity: info + - alert: ClusterOperatorDown + annotations: + summary: Cluster operator has not been available for 10 minutes. + description: The {{ "{{ $labels.name }}" }} operator may be down or disabled because {{ "{{ $labels.reason }}" }}, and the components it manages may be unavailable or degraded. Cluster upgrades may not complete. For more information refer to '{{ "{{ if eq $labels.name \"version\" }}oc adm upgrade{{ else }}oc get -o yaml clusteroperator {{ $labels.name }}{{ end }}" }}'{{ "{{ with $console_url := \"console_url\" | query }}{{ if ne (len (label \"url\" (first $console_url ) ) ) 0}} or {{ label \"url\" (first $console_url ) }}/settings/cluster/{{ end }}{{ end }}" }}. + runbook_url: https://github.com/openshift/runbooks/blob/master/alerts/cluster-monitoring-operator/ClusterOperatorDown.md + expr: | + max by (namespace, name, reason) (cluster_operator_up{job="cluster-version-operator"} == 0) + for: 10m + labels: + severity: critical + - alert: ClusterOperatorDegraded + annotations: + summary: Cluster operator has been degraded for 30 minutes. + description: The {{ "{{ $labels.name }}" }} operator is degraded because {{ "{{ $labels.reason }}" }}, and the components it manages may have reduced quality of service. Cluster upgrades may not complete. For more information refer to '{{ "{{ if eq $labels.name \"version\" }}oc adm upgrade{{ else }}oc get -o yaml clusteroperator {{ $labels.name }}{{ end }}" }}'{{ "{{ with $console_url := \"console_url\" | query }}{{ if ne (len (label \"url\" (first $console_url ) ) ) 0}} or {{ label \"url\" (first $console_url ) }}/settings/cluster/{{ end }}{{ end }}" }}. + runbook_url: https://github.com/openshift/runbooks/blob/master/alerts/cluster-monitoring-operator/ClusterOperatorDegraded.md + expr: | + max by (namespace, name, reason) + ( + ( + cluster_operator_conditions{job="cluster-version-operator", name!="version", condition="Degraded"} + or on (namespace, name) + cluster_operator_conditions{job="cluster-version-operator", name="version", condition="Failing"} + or on (namespace, name) + group by (namespace, name) (cluster_operator_up{job="cluster-version-operator"}) + ) == 1 + ) + for: 30m + labels: + severity: warning + - alert: ClusterOperatorFlapping + annotations: + summary: Cluster operator up status is changing often. + description: The {{ "{{ $labels.name }}" }} operator behavior might cause upgrades to be unstable. For more information refer to '{{ "{{ if eq $labels.name \"version\" }}oc adm upgrade{{ else }}oc get -o yaml clusteroperator {{ $labels.name }}{{ end }}" }}'{{ "{{ with $console_url := \"console_url\" | query }}{{ if ne (len (label \"url\" (first $console_url ) ) ) 0}} or {{ label \"url\" (first $console_url ) }}/settings/cluster/{{ end }}{{ end }}" }}. + expr: | + max by (namespace, name) (changes(cluster_operator_up{job="cluster-version-operator"}[2m]) > 2) + for: 10m + labels: + severity: warning + - alert: CannotEvaluateConditionalUpdates + annotations: + summary: Cluster Version Operator cannot evaluate conditional update matches for {{ "{{ $value | humanizeDuration }}" }}. + description: Failure to evaluate conditional update matches means that Cluster Version Operator cannot decide whether an update path is recommended or not. + expr: | + max by (version, condition, status, reason) + ( + ( + time()-cluster_version_conditional_update_condition_seconds{condition="Recommended", status="Unknown"} + ) >= 3600 + ) + labels: + severity: warning diff --git a/install/0000_50_cluster-update-console-plugin_90_consoleplugin.yaml b/install/0000_50_cluster-update-console-plugin_90_consoleplugin.yaml new file mode 100644 index 000000000..69bd0d9ee --- /dev/null +++ b/install/0000_50_cluster-update-console-plugin_90_consoleplugin.yaml @@ -0,0 +1,21 @@ +apiVersion: console.openshift.io/v1 +kind: ConsolePlugin +metadata: + name: openshift-cluster-update-console-plugin + annotations: + kubernetes.io/description: The OpenShift cluster-update console plugin provides a web-console interface for managing ClusterVersion updates. + capability.openshift.io/name: Console + release.openshift.io/feature-set: TechPreviewNoUpgrade + exclude.release.openshift.io/internal-openshift-hosted: "true" + include.release.openshift.io/self-managed-high-availability: "true" +spec: + displayName: Cluster Updates + i18n: + loadType: Preload + backend: + type: Service + service: + name: openshift-cluster-update-console-plugin + namespace: openshift-cluster-update-console-plugin + port: 9001 + basePath: / diff --git a/pkg/payload/payload.go b/pkg/payload/payload.go index 7c7af838b..2430c2c0c 100644 --- a/pkg/payload/payload.go +++ b/pkg/payload/payload.go @@ -153,7 +153,7 @@ func LoadUpdate(dir, releaseImage, excludeIdentifier string, requiredFeatureSet return nil, err } - tasks := loadPayloadTasks(releaseDir, cvoDir, releaseImage, profile) + tasks := loadPayloadTasks(releaseDir, cvoDir, releaseImage, profile, payload.ImageRef) var onlyKnownCaps *configv1.ClusterVersionCapabilitiesStatus @@ -317,13 +317,14 @@ type payloadTasks struct { skipFiles sets.Set[string] } -func loadPayloadTasks(releaseDir, cvoDir, releaseImage, clusterProfile string) []payloadTasks { +func loadPayloadTasks(releaseDir, cvoDir, releaseImage, clusterProfile string, imageRef *imagev1.ImageStream) []payloadTasks { cjf := filepath.Join(releaseDir, cincinnatiJSONFile) irf := filepath.Join(releaseDir, imageReferencesFile) mrc := manifestRenderConfig{ ReleaseImage: releaseImage, ClusterProfile: clusterProfile, + Images: imagesFromImageRef(imageRef), } return []payloadTasks{{ diff --git a/pkg/payload/render.go b/pkg/payload/render.go index fc075a6e5..696aa0ea8 100644 --- a/pkg/payload/render.go +++ b/pkg/payload/render.go @@ -20,6 +20,7 @@ import ( "github.com/openshift/api/config" configv1 "github.com/openshift/api/config/v1" + imagev1 "github.com/openshift/api/image/v1" "github.com/openshift/library-go/pkg/manifest" ) @@ -38,6 +39,13 @@ func Render(outputDir, releaseImage, clusterVersionManifestPath, featureGateMani } ) + imageRef, err := loadImageReferences(releaseManifestsDir) + if err != nil { + klog.Warningf("Failed to load image references for manifest rendering: %v", err) + } else { + renderConfig.Images = imagesFromImageRef(imageRef) + } + overrides, err := parseClusterVersionManifest(clusterVersionManifestPath) if err != nil { return fmt.Errorf("error parsing cluster version manifest: %w", err) @@ -181,6 +189,21 @@ func renderDir(renderConfig manifestRenderConfig, idir, odir string, overrides [ type manifestRenderConfig struct { ReleaseImage string ClusterProfile string + Images map[string]string +} + +// imagesFromImageRef builds a map from image short names to their resolved URIs. +func imagesFromImageRef(imageRef *imagev1.ImageStream) map[string]string { + images := make(map[string]string) + if imageRef == nil { + return images + } + for _, tag := range imageRef.Spec.Tags { + if tag.From != nil && tag.From.Kind == "DockerImage" { + images[tag.Name] = tag.From.Name + } + } + return images } // renderManifest Executes go text template from `manifestBytes` with `config`. diff --git a/pkg/payload/render_test.go b/pkg/payload/render_test.go index deb475b16..64ed21e20 100644 --- a/pkg/payload/render_test.go +++ b/pkg/payload/render_test.go @@ -313,6 +313,9 @@ func Test_cvoManifests(t *testing.T) { config := manifestRenderConfig{ ReleaseImage: "quay.io/cvo/release:latest", ClusterProfile: "some-profile", + Images: map[string]string{ + "cluster-update-console-plugin": "quay.io/openshift/cluster-update-console-plugin:latest", + }, } tests := []struct {