-
Notifications
You must be signed in to change notification settings - Fork 230
USHIFT-6797: C2CC Probe remote clusters #6729
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
d2ed223
1fd4d5e
f6e1153
a2b1521
815543c
8158bb2
6277f6e
9256717
6f96e91
9f7b26a
b395683
9c2451e
12d1d4a
00bf8b1
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,28 @@ | ||
| apiVersion: rbac.authorization.k8s.io/v1 | ||
| kind: ClusterRole | ||
| metadata: | ||
| name: microshift-c2cc-probe | ||
| rules: | ||
| - apiGroups: | ||
| - microshift.io | ||
| resources: | ||
| - remoteclusters | ||
| verbs: | ||
| - get | ||
| - list | ||
| - watch | ||
| - apiGroups: | ||
| - microshift.io | ||
| resources: | ||
| - remoteclusters/status | ||
| verbs: | ||
| - update | ||
| - patch | ||
| - apiGroups: | ||
| - security.openshift.io | ||
| resources: | ||
| - securitycontextconstraints | ||
| verbs: | ||
| - use | ||
| resourceNames: | ||
| - privileged |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,12 @@ | ||
| apiVersion: rbac.authorization.k8s.io/v1 | ||
| kind: ClusterRoleBinding | ||
| metadata: | ||
| name: microshift-c2cc-probe | ||
| roleRef: | ||
| apiGroup: rbac.authorization.k8s.io | ||
| kind: ClusterRole | ||
| name: microshift-c2cc-probe | ||
| subjects: | ||
| - kind: ServiceAccount | ||
| namespace: microshift-c2cc | ||
| name: c2cc-probe |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,68 @@ | ||
| apiVersion: apps/v1 | ||
| kind: Deployment | ||
| metadata: | ||
| namespace: microshift-c2cc | ||
| name: c2cc-probe | ||
| labels: | ||
| app: c2cc-probe | ||
| spec: | ||
| replicas: 1 | ||
| strategy: | ||
| type: Recreate | ||
| selector: | ||
| matchLabels: | ||
| app: c2cc-probe | ||
| template: | ||
| metadata: | ||
| labels: | ||
| app: c2cc-probe | ||
| annotations: | ||
| target.workload.openshift.io/management: '{"effect": "PreferredDuringScheduling"}' | ||
| openshift.io/required-scc: privileged | ||
| spec: | ||
| serviceAccountName: c2cc-probe | ||
| containers: | ||
| - name: c2cc-probe | ||
| image: '{{ .ReleaseImage.cli }}' | ||
| imagePullPolicy: IfNotPresent | ||
| command: | ||
| - /host/usr/bin/microshift | ||
| - c2cc-probe | ||
| ports: | ||
| - containerPort: 8080 | ||
| name: probe | ||
| protocol: TCP | ||
| livenessProbe: | ||
| httpGet: | ||
| path: / | ||
| port: 8080 | ||
| initialDelaySeconds: 10 | ||
| periodSeconds: 10 | ||
|
Comment on lines
+35
to
+40
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Add a readiness probe for endpoint gating. Without readiness, the Service can route traffic before the probe server is actually listening, which can cause avoidable startup failures. Suggested change livenessProbe:
httpGet:
path: /
port: 8080
initialDelaySeconds: 10
periodSeconds: 10
+ readinessProbe:
+ httpGet:
+ path: /
+ port: 8080
+ initialDelaySeconds: 2
+ periodSeconds: 5🤖 Prompt for AI Agents |
||
| resources: | ||
| requests: | ||
| cpu: 50m | ||
| memory: 64Mi | ||
| volumeMounts: | ||
| - name: microshift-binary | ||
| mountPath: /host/usr/bin/microshift | ||
| readOnly: true | ||
| volumes: | ||
| - name: microshift-binary | ||
| hostPath: | ||
| path: /usr/bin/microshift | ||
| type: File | ||
| nodeSelector: | ||
| node-role.kubernetes.io/master: "" | ||
| priorityClassName: system-cluster-critical | ||
| tolerations: | ||
| - key: node-role.kubernetes.io/master | ||
| operator: Exists | ||
| effect: NoSchedule | ||
|
Comment on lines
+54
to
+60
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🧩 Analysis chain🏁 Script executed: #!/bin/bash
set -euo pipefail
FILE="assets/components/c2cc/deployment.yaml"
echo "=== File exists? ==="
ls -l "$FILE" || exit 1
echo
echo "=== Lines 1-120 (context) ==="
nl -ba "$FILE" | sed -n '1,120p'
echo
echo "=== Lines 45-75 (scheduling snippet focus) ==="
nl -ba "$FILE" | sed -n '45,75p'
echo
echo "=== Search for scheduling/topology fields in this file ==="
rg -n --hidden --no-ignore-vcs "(nodeSelector|affinity|podAntiAffinity|topologySpreadConstraints|topologyKey|replicas|podDisruptionBudget|PDB|node-role.kubernetes.io/master|control-plane|master|kubernetes.io/hostname)" "$FILE" || true
echo
echo "=== Repo-wide usage of node-role.kubernetes.io/master ==="
rg -n --hidden --no-ignore-vcs "node-role\.kubernetes\.io/master" . || trueRepository: openshift/microshift Length of output: 266 🏁 Script executed: #!/bin/bash
set -euo pipefail
FILE="assets/components/c2cc/deployment.yaml"
echo "=== File exists? ==="
ls -l "$FILE" || exit 1
echo
echo "=== Lines 1-120 (context) ==="
awk 'NR>=1 && NR<=120 {printf "%6d %s\n", NR, $0}' "$FILE"
echo
echo "=== Lines 45-75 (scheduling snippet focus) ==="
awk 'NR>=45 && NR<=75 {printf "%6d %s\n", NR, $0}' "$FILE"
echo
echo "=== Search for scheduling/topology fields in this file ==="
rg -n --hidden --no-ignore-vcs "(nodeSelector|affinity|podAntiAffinity|topologySpreadConstraints|topologyKey|replicas|podDisruptionBudget|PDB|node-role.kubernetes.io/master|control-plane|master|kubernetes.io/hostname)" "$FILE" || true
echo
echo "=== Repo-wide usage of node-role.kubernetes.io/master ==="
rg -n --hidden --no-ignore-vcs "node-role\.kubernetes\.io/master" . || trueRepository: openshift/microshift Length of output: 15624 🏁 Script executed: #!/bin/bash
set -euo pipefail
FILE="assets/components/c2cc/deployment.yaml"
echo "=== File exists? ==="
ls -l "$FILE" || exit 1
echo
echo "=== Lines 1-120 (context) ==="
awk 'NR>=1 && NR<=120 {printf "%6d %s\n", NR, $0}' "$FILE"
echo
echo "=== Lines 45-75 (scheduling snippet focus) ==="
awk 'NR>=45 && NR<=75 {printf "%6d %s\n", NR, $0}' "$FILE"
echo
echo "=== Search for scheduling/topology fields in this file ==="
rg -n --hidden --no-ignore-vcs "(nodeSelector|affinity|podAntiAffinity|topologySpreadConstraints|topologyKey|replicas|podDisruptionBudget|PDB|node-role.kubernetes.io/master|control-plane|master|kubernetes.io/hostname)" "$FILE" || true
echo
echo "=== Repo-wide usage of node-role.kubernetes.io/master ==="
rg -n --hidden --no-ignore-vcs "node-role\.kubernetes\.io/master" . || trueRepository: openshift/microshift Length of output: 15624 🏁 Script executed: #!/bin/bash
set -euo pipefail
echo "=== Repo-wide: control-plane label usage ==="
rg -n --hidden --no-ignore-vcs "node-role\.kubernetes\.io/control-plane" . || true
echo
echo "=== Repo-wide: arbiter mentions ==="
rg -n --hidden --no-ignore-vcs "arbiter" . || true
echo
echo "=== Repo-wide: SNO mention ==="
rg -n --hidden --no-ignore-vcs "\bSNO\b|single[- ]node|single[- ]node" . || true
echo
echo "=== CI/topology job hints ==="
rg -n --hidden --no-ignore-vcs "topology|topological|spread|DoNotSchedule|hostname|podAntiAffinity" .github . || trueRepository: openshift/microshift Length of output: 50378 🏁 Script executed: #!/bin/bash
set -euo pipefail
echo "=== Repo-wide: control-plane label usage ==="
rg -n --hidden --no-ignore-vcs "node-role\.kubernetes\.io/control-plane" . || true
echo
echo "=== Repo-wide: arbiter mentions ==="
rg -n --hidden --no-ignore-vcs "arbiter" . || true
echo
echo "=== Repo-wide: SNO mention ==="
rg -n --hidden --no-ignore-vcs "\bSNO\b|single[- ]node|single[- ]node" . || true
echo
echo "=== CI/topology job hints ==="
rg -n --hidden --no-ignore-vcs "topology|topological|spread|DoNotSchedule|hostname|podAntiAffinity" .github . || trueRepository: openshift/microshift Length of output: 50378 Avoid hard pinning c2cc-probe to master-labeled control-plane nodes 🤖 Prompt for AI Agents
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Won't fix — MicroShift is single-node. Every other deployment (service-ca, OVN, CSI, DNS) uses the identical
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
✏️ Learnings added
|
||
| - key: node.kubernetes.io/unreachable | ||
| operator: Exists | ||
| effect: NoExecute | ||
| tolerationSeconds: 120 | ||
| - key: node.kubernetes.io/not-ready | ||
| operator: Exists | ||
| effect: NoExecute | ||
| tolerationSeconds: 120 | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,11 @@ | ||
| apiVersion: v1 | ||
| kind: Namespace | ||
| metadata: | ||
| name: microshift-c2cc | ||
| labels: | ||
| pod-security.kubernetes.io/enforce: privileged | ||
| pod-security.kubernetes.io/audit: privileged | ||
| pod-security.kubernetes.io/warn: privileged | ||
| annotations: | ||
| openshift.io/node-selector: "" | ||
| workload.openshift.io/allowed: "management" |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,14 @@ | ||
| apiVersion: v1 | ||
| kind: Service | ||
| metadata: | ||
| namespace: microshift-c2cc | ||
| name: c2cc-probe | ||
| spec: | ||
| clusterIP: '{{ .ProbeServiceClusterIP }}' | ||
| ports: | ||
| - name: probe | ||
| port: 8080 | ||
| targetPort: 8080 | ||
| protocol: TCP | ||
| selector: | ||
| app: c2cc-probe |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,5 @@ | ||
| apiVersion: v1 | ||
| kind: ServiceAccount | ||
| metadata: | ||
| namespace: microshift-c2cc | ||
| name: c2cc-probe |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,85 @@ | ||
| --- | ||
| apiVersion: apiextensions.k8s.io/v1 | ||
| kind: CustomResourceDefinition | ||
| metadata: | ||
| annotations: | ||
| controller-gen.kubebuilder.io/version: v0.18.0 | ||
| name: remoteclusters.microshift.io | ||
| spec: | ||
| group: microshift.io | ||
| names: | ||
| kind: RemoteCluster | ||
| listKind: RemoteClusterList | ||
| plural: remoteclusters | ||
| singular: remotecluster | ||
| scope: Cluster | ||
| versions: | ||
| - name: v1alpha1 | ||
| schema: | ||
| openAPIV3Schema: | ||
| description: |- | ||
| RemoteCluster represents a remote cluster's healthcheck probe target. | ||
| Created by the C2CC controller, read and updated by the probe pod. | ||
| properties: | ||
| apiVersion: | ||
| description: |- | ||
| APIVersion defines the versioned schema of this representation of an object. | ||
| Servers should convert recognized schemas to the latest internal value, and | ||
| may reject unrecognized values. | ||
| More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources | ||
| type: string | ||
| kind: | ||
| description: |- | ||
| Kind is a string value representing the REST resource this object represents. | ||
| Servers may infer this from the endpoint the client submits requests to. | ||
| Cannot be updated. | ||
| In CamelCase. | ||
| More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds | ||
| type: string | ||
| metadata: | ||
| type: object | ||
| spec: | ||
| properties: | ||
| probeInterval: | ||
| default: 10s | ||
| description: Interval between probe attempts (e.g. "10s", "1m"). | ||
| type: string | ||
| probeTarget: | ||
|
Comment on lines
+43
to
+47
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Enforce
🤖 Prompt for AI Agents |
||
| description: IP:port of the remote cluster's probe service (11th IP | ||
| in remote service CIDR, port 8080). | ||
| type: string | ||
| required: | ||
| - probeInterval | ||
| - probeTarget | ||
| type: object | ||
| status: | ||
| description: RemoteClusterStatus is populated by the probe pod with health | ||
| probe results. | ||
| properties: | ||
| errors: | ||
| items: | ||
| type: string | ||
| type: array | ||
| lastProbeTime: | ||
| format: date-time | ||
| type: string | ||
| lastSuccessfulProbe: | ||
| format: date-time | ||
| type: string | ||
| state: | ||
| default: NeverProbed | ||
| enum: | ||
| - NeverProbed | ||
| - Healthy | ||
| - Unhealthy | ||
| type: string | ||
| required: | ||
| - state | ||
| type: object | ||
| required: | ||
| - spec | ||
| type: object | ||
| served: true | ||
| storage: true | ||
| subresources: | ||
| status: {} | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,5 @@ | ||
| // +kubebuilder:object:generate=true | ||
| // +groupName=microshift.io | ||
| // +k8s:deepcopy-gen=package | ||
|
|
||
| package v1alpha1 |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,30 @@ | ||
| package v1alpha1 | ||
|
|
||
| import ( | ||
| metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" | ||
| "k8s.io/apimachinery/pkg/runtime" | ||
| "k8s.io/apimachinery/pkg/runtime/schema" | ||
| ) | ||
|
|
||
| var ( | ||
| GroupName = "microshift.io" | ||
| GroupVersion = schema.GroupVersion{Group: GroupName, Version: "v1alpha1"} | ||
|
|
||
| SchemeGroupVersion = GroupVersion | ||
|
|
||
| schemeBuilder = runtime.NewSchemeBuilder(addKnownTypes) | ||
| AddToScheme = schemeBuilder.AddToScheme | ||
| ) | ||
|
|
||
| func Resource(resource string) schema.GroupResource { | ||
| return schema.GroupResource{Group: GroupName, Resource: resource} | ||
| } | ||
|
|
||
| func addKnownTypes(scheme *runtime.Scheme) error { | ||
| scheme.AddKnownTypes(GroupVersion, | ||
| &RemoteCluster{}, | ||
| &RemoteClusterList{}, | ||
| ) | ||
| metav1.AddToGroupVersion(scheme, GroupVersion) | ||
| return nil | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,56 @@ | ||
| package v1alpha1 | ||
|
|
||
| import ( | ||
| metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" | ||
| ) | ||
|
|
||
| // +genclient | ||
| // +genclient:nonNamespaced | ||
| // +kubebuilder:object:root=true | ||
| // +kubebuilder:resource:scope=Cluster | ||
| // +kubebuilder:subresource:status | ||
| // +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object | ||
|
|
||
| // RemoteCluster represents a remote cluster's healthcheck probe target. | ||
| // Created by the C2CC controller, read and updated by the probe pod. | ||
| type RemoteCluster struct { | ||
| metav1.TypeMeta `json:",inline"` | ||
| metav1.ObjectMeta `json:"metadata,omitempty"` | ||
|
|
||
| Spec RemoteClusterSpec `json:"spec"` | ||
| Status RemoteClusterStatus `json:"status,omitempty"` | ||
| } | ||
|
|
||
| type RemoteClusterSpec struct { | ||
| // IP:port of the remote cluster's probe service (11th IP in remote service CIDR, port 8080). | ||
| // +kubebuilder:validation:Required | ||
| ProbeTarget string `json:"probeTarget"` | ||
|
|
||
| // Interval between probe attempts (e.g. "10s", "1m"). | ||
| // +kubebuilder:default="10s" | ||
| ProbeInterval metav1.Duration `json:"probeInterval"` | ||
| } | ||
|
|
||
| // RemoteClusterStatus is populated by the probe pod with health probe results. | ||
| type RemoteClusterStatus struct { | ||
| // +kubebuilder:validation:Enum=NeverProbed;Healthy;Unhealthy | ||
| // +kubebuilder:default="NeverProbed" | ||
| State string `json:"state"` | ||
| // +optional | ||
| LastSuccessfulProbe *metav1.Time `json:"lastSuccessfulProbe,omitempty"` | ||
| // +optional | ||
| LastProbeTime *metav1.Time `json:"lastProbeTime,omitempty"` | ||
| // +optional | ||
| Errors []string `json:"errors,omitempty"` | ||
| } | ||
|
|
||
| // +kubebuilder:object:root=true | ||
| // +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object | ||
|
|
||
| // RemoteClusterList contains a list of RemoteCluster resources. | ||
| type RemoteClusterList struct { | ||
| metav1.TypeMeta `json:",inline"` | ||
| metav1.ListMeta `json:"metadata,omitempty"` | ||
|
|
||
| Items []RemoteCluster `json:"items"` | ||
| } |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
🛠️ Refactor suggestion | 🟠 Major | ⚡ Quick win
Wire
generate-crdsinto the default generate/verify flow.Right now this target is leaf-only, so the visible
make,make verify, andmake verify-configpaths won't catch stale CRDs or generated clients after API changes.Suggested change
🤖 Prompt for AI Agents