diff --git a/.gitignore b/.gitignore index 009e10da1..480a9b5b4 100644 --- a/.gitignore +++ b/.gitignore @@ -8,7 +8,7 @@ bin charts/**/charts charts/koperator/requirements.lock - +charts/kafka-operator/ingress # Test binary, build with `go test -c` *.test diff --git a/__debug_bin b/__debug_bin new file mode 100644 index 000000000..e69de29bb diff --git a/api/v1beta1/kafkacluster_types.go b/api/v1beta1/kafkacluster_types.go index cdb7671e1..bafbdec07 100644 --- a/api/v1beta1/kafkacluster_types.go +++ b/api/v1beta1/kafkacluster_types.go @@ -157,9 +157,22 @@ type KafkaClusterSpec struct { // This is default to be true; if set to false, the Kafka cluster is in ZooKeeper mode. // +kubebuilder:default=false // +optional - KRaftMode bool `json:"kRaft"` - HeadlessServiceEnabled bool `json:"headlessServiceEnabled"` - ListenersConfig ListenersConfig `json:"listenersConfig"` + KRaftMode bool `json:"kRaft"` + HeadlessServiceEnabled bool `json:"headlessServiceEnabled"` + // localDebugEnabled is used to decide whether to create a separate loadbalancer services for the + // Kafka and Cruise Control Pods. These services will expose the internal listener ports of the Kafka + // cluster with LoadBalancer type, which can be used for running Koperator on a local machine against + // a kafkaCluster instance on a Kind Cluster. + // +kubebuilder:default=false + // +optional + LocalDebugEnabled bool `json:"localDebugEnabled"` + // Allows ScaleOps to manage Memory and CPU Resource Requests for Kafka Broker Pods. + // This Disables CPU and Memory request reconciliation from the desired state defined in + // the KafkaCluster to the current state in the Kubernetes Cluster + // +kubebuilder:default=false + // +optional + ScaleOpsEnabled bool `json:"scaleOpsEnabled"` + ListenersConfig ListenersConfig `json:"listenersConfig"` // Custom ports to expose in the container. Example use case: a custom kafka distribution, that includes an integrated metrics api endpoint AdditionalPorts []corev1.ContainerPort `json:"additionalPorts,omitempty"` // ZKAddresses specifies the ZooKeeper connection string diff --git a/charts/kafka-operator/crds/kafkaclusters.yaml b/charts/kafka-operator/crds/kafkaclusters.yaml index e3fd3e25d..99f432da8 100644 --- a/charts/kafka-operator/crds/kafkaclusters.yaml +++ b/charts/kafka-operator/crds/kafkaclusters.yaml @@ -23670,6 +23670,14 @@ spec: required: - internalListeners type: object + localDebugEnabled: + default: false + description: |- + localDebugEnabled is used to decide whether to create a separate loadbalancer services for the + Kafka and Cruise Control Pods. These services will expose the internal listener ports of the Kafka + cluster with LoadBalancer type, which can be used for running Koperator on a local machine against + a kafkaCluster instance on a Kind Cluster. + type: boolean monitoringConfig: description: MonitoringConfig defines the config for monitoring Kafka and Cruise Control @@ -23735,6 +23743,13 @@ spec: required: - failureThreshold type: object + scaleOpsEnabled: + default: false + description: |- + Allows ScaleOps to manage Memory and CPU Resource Requests for Kafka Broker Pods. + This Disables CPU and Memory request reconciliation from the desired state defined in + the KafkaCluster to the current state in the Kubernetes Cluster + type: boolean taintedBrokersSelector: description: Selector for broker pods that need to be recycled/reconciled properties: diff --git a/comparenodes.txt b/comparenodes.txt new file mode 100644 index 000000000..944298aed --- /dev/null +++ b/comparenodes.txt @@ -0,0 +1,97 @@ + +pipeline-kafka-1101-wltvt 2/2 Running 0 21d 172.19.12.11 vmss-agent-w13v5az1-23prodva7-ehuor00000l +pipeline-kafka-1102-w4cd7 2/2 Running 0 21d 172.19.84.45 vmss-agent-w13v5az1-23prodva7-ehuor00001c +pipeline-kafka-1103-ck4kx 2/2 Running 0 21d 172.19.15.64 vmss-agent-w13v5az1-23prodva7-ehuor00000m +pipeline-kafka-1104-gqg7l 2/2 Running 0 21d 172.19.16.40 vmss-agent-w13v5az1-23prodva7-ehuor00000n +pipeline-kafka-1105-5rsfr 2/2 Running 0 21d 172.19.36.120 vmss-agent-w13v5az1-23prodva7-ehuor00000y +pipeline-kafka-1106-dgwfd 2/2 Running 0 21d 172.19.55.20 vmss-agent-w13v5az1-23prodva7-ehuor000010 +pipeline-kafka-1107-djtps 2/2 Running 0 21d 172.19.56.4 vmss-agent-w13v5az1-23prodva7-ehuor000014 +pipeline-kafka-1108-cgwwd 2/2 Running 0 21d 172.19.58.48 vmss-agent-w13v5az1-23prodva7-ehuor000013 +pipeline-kafka-1109-bkp4q 2/2 Running 0 21d 172.19.57.155 vmss-agent-w13v5az1-23prodva7-ehuor000011 +pipeline-kafka-1110-ld67w 2/2 Running 0 21d 172.19.59.103 vmss-agent-w13v5az1-23prodva7-ehuor000012 +pipeline-kafka-1111-plgmt 2/2 Running 0 21d 172.19.29.107 vmss-agent-w13v5az1-23prodva7-ehuor00000s +pipeline-kafka-1112-5kgk2 2/2 Running 0 21d 172.19.17.18 vmss-agent-w13v5az1-23prodva7-ehuor00000t +pipeline-kafka-1113-ckcml 2/2 Running 0 21d 172.19.21.49 vmss-agent-w13v5az1-23prodva7-ehuor00000u +pipeline-kafka-1114-jpml5 2/2 Running 0 21d 172.19.18.133 vmss-agent-w13v5az1-23prodva7-ehuor00000o +pipeline-kafka-1115-k8wsj 2/2 Running 0 21d 172.19.24.227 vmss-agent-w13v5az1-23prodva7-ehuor00000z +pipeline-kafka-1116-vn6n7 2/2 Running 0 21d 172.19.14.31 vmss-agent-w13v5az1-23prodva7-ehuor00000r +pipeline-kafka-1117-fljkh 2/2 Running 0 21d 172.19.20.166 vmss-agent-w13v5az1-23prodva7-ehuor00000v +pipeline-kafka-1118-mxsxk 2/2 Running 0 21d 172.19.23.181 vmss-agent-w13v5az1-23prodva7-ehuor00000x +pipeline-kafka-1119-bk82z 2/2 Running 0 21d 172.19.22.160 vmss-agent-w13v5az1-23prodva7-ehuor00000w +pipeline-kafka-1120-bkcpp 2/2 Running 0 21d 172.19.19.58 vmss-agent-w13v5az1-23prodva7-ehuor00000q +pipeline-kafka-1121-7dpfd 2/2 Running 0 21d 172.19.13.130 vmss-agent-w13v5az1-23prodva7-ehuor00000p +pipeline-kafka-1122-tqg8g 2/2 Running 0 21d 172.19.77.183 vmss-agent-w13v5az1-23prodva7-ehuor000015 +pipeline-kafka-1123-dkcj6 2/2 Running 0 21d 172.19.78.2 vmss-agent-w13v5az1-23prodva7-ehuor000016 +pipeline-kafka-1124-xqw8d 2/2 Running 0 21d 172.19.79.165 vmss-agent-w13v5az1-23prodva7-ehuor000017 +pipeline-kafka-1125-4rprl 2/2 Running 0 21d 172.19.80.33 vmss-agent-w13v5az1-23prodva7-ehuor000018 +pipeline-kafka-1126-dcp88 2/2 Running 0 21d 172.19.81.121 vmss-agent-w13v5az1-23prodva7-ehuor000019 +pipeline-kafka-1127-s7bwn 2/2 Running 0 21d 172.19.82.25 vmss-agent-w13v5az1-23prodva7-ehuor00001a +pipeline-kafka-1128-4djgb 2/2 Running 0 21d 172.19.83.104 vmss-agent-w13v5az1-23prodva7-ehuor00001b +pipeline-kafka-1129-w7rct 2/2 Running 0 19d 172.19.124.144 vmss-agent-w13v5az1-23prodva7-ehuor00001f +pipeline-kafka-1130-kjl2c 2/2 Running 0 19d 172.19.126.250 vmss-agent-w13v5az1-23prodva7-ehuor00001h +pipeline-kafka-1131-xzjjj 2/2 Running 0 19d 172.19.125.210 vmss-agent-w13v5az1-23prodva7-ehuor00001j +pipeline-kafka-1132-vbfss 2/2 Running 0 19d 172.19.123.199 vmss-agent-w13v5az1-23prodva7-ehuor00001e +pipeline-kafka-1201-tj9gt 2/2 Running 0 21d 172.19.73.108 vmss-agent-w14v5az2-23prodva7-ehuor000009 +pipeline-kafka-1202-thrj5 2/2 Running 0 21d 172.19.76.81 vmss-agent-w14v5az2-23prodva7-ehuor00000c +pipeline-kafka-1203-nzqm9 2/2 Running 0 44h 172.19.71.85 vmss-agent-w14v5az2-23prodva7-ehuor00000a +pipeline-kafka-1204-gvvpw 2/2 Running 0 21d 172.19.74.168 vmss-agent-w14v5az2-23prodva7-ehuor000007 +pipeline-kafka-1205-wm2mb 2/2 Running 0 21d 172.19.35.121 vmss-agent-w13v5az2-23prodva7-ehuor00000o +pipeline-kafka-1206-w4w8f 2/2 Running 0 21d 172.19.38.94 vmss-agent-w13v5az2-23prodva7-ehuor00000m +pipeline-kafka-1207-vwcm9 2/2 Running 0 21d 172.19.54.54 vmss-agent-w13v5az2-23prodva7-ehuor00000h +pipeline-kafka-1208-zdnfq 2/2 Running 0 21d 172.19.72.1 vmss-agent-w14v5az2-23prodva7-ehuor000008 +pipeline-kafka-1209-5mq52 2/2 Running 0 21d 172.19.28.103 vmss-agent-w13v5az2-23prodva7-ehuor00000s +pipeline-kafka-1210-ffzsk 2/2 Running 0 21d 172.19.26.74 vmss-agent-w13v5az2-23prodva7-ehuor00000u +pipeline-kafka-1211-75l4f 2/2 Running 0 3d8h 172.19.30.196 vmss-agent-w13v5az2-23prodva7-ehuor00000i +pipeline-kafka-1212-276xm 2/2 Running 0 21d 172.19.37.73 vmss-agent-w13v5az2-23prodva7-ehuor00000k +pipeline-kafka-1213-jlj6b 2/2 Running 0 21d 172.19.33.4 vmss-agent-w13v5az2-23prodva7-ehuor00000n +pipeline-kafka-1214-fc8lr 2/2 Running 0 21d 172.19.31.40 vmss-agent-w13v5az2-23prodva7-ehuor00000r +pipeline-kafka-1215-5ls95 2/2 Running 0 21d 172.19.53.67 vmss-agent-w13v5az2-23prodva7-ehuor00000t +pipeline-kafka-1216-p4847 2/2 Running 0 21d 172.19.70.35 vmss-agent-w14v5az2-23prodva7-ehuor000005 +pipeline-kafka-1217-lssnb 2/2 Running 0 21d 172.19.75.85 vmss-agent-w14v5az2-23prodva7-ehuor00000b +pipeline-kafka-1218-x5dl4 2/2 Running 0 21d 172.19.69.124 vmss-agent-w14v5az2-23prodva7-ehuor000006 +pipeline-kafka-1219-qx8gl 2/2 Running 0 21d 172.19.27.43 vmss-agent-w13v5az2-23prodva7-ehuor00000q +pipeline-kafka-1220-pk74t 2/2 Running 0 21d 172.19.34.210 vmss-agent-w13v5az2-23prodva7-ehuor00000p +pipeline-kafka-1221-kbp49 2/2 Running 0 21d 172.19.85.96 vmss-agent-w13v5az2-23prodva7-ehuor000013 +pipeline-kafka-1222-c97nh 2/2 Running 0 21d 172.19.86.213 vmss-agent-w13v5az2-23prodva7-ehuor000014 +pipeline-kafka-1223-7k9nw 2/2 Running 0 21d 172.19.87.146 vmss-agent-w13v5az2-23prodva7-ehuor000015 +pipeline-kafka-1224-m25z6 2/2 Running 0 21d 172.19.88.150 vmss-agent-w13v5az2-23prodva7-ehuor000016 +pipeline-kafka-1225-bkwzv 2/2 Running 0 21d 172.19.89.142 vmss-agent-w13v5az2-23prodva7-ehuor000017 +pipeline-kafka-1226-hsn8r 2/2 Running 0 21d 172.19.90.114 vmss-agent-w13v5az2-23prodva7-ehuor000018 +pipeline-kafka-1227-kwvfc 2/2 Running 0 21d 172.19.91.37 vmss-agent-w13v5az2-23prodva7-ehuor000019 +pipeline-kafka-1228-p9pk8 2/2 Running 0 21d 172.19.92.99 vmss-agent-w13v5az2-23prodva7-ehuor00001a +pipeline-kafka-1229-rm6lk 2/2 Running 0 19d 172.19.129.163 vmss-agent-w13v5az2-23prodva7-ehuor00001g +pipeline-kafka-1230-vt94d 2/2 Running 0 19d 172.19.128.14 vmss-agent-w13v5az2-23prodva7-ehuor00001i +pipeline-kafka-1231-45khb 2/2 Running 0 19d 172.19.132.164 vmss-agent-w13v5az2-23prodva7-ehuor00001h +pipeline-kafka-1232-dm4md 2/2 Running 0 19d 172.19.127.53 vmss-agent-w13v5az2-23prodva7-ehuor00001j +pipeline-kafka-1301-2hb5m 2/2 Running 0 21d 172.19.60.59 vmss-agent-w13v5az3-23prodva7-ehuor000015 +pipeline-kafka-1302-r472g 2/2 Running 0 21d 172.19.44.177 vmss-agent-w13v5az3-23prodva7-ehuor00000q +pipeline-kafka-1303-sbd77 2/2 Running 0 21d 172.19.47.85 vmss-agent-w13v5az3-23prodva7-ehuor00000y +pipeline-kafka-1304-h7k8k 2/2 Running 0 21d 172.19.67.113 vmss-agent-w13v5az3-23prodva7-ehuor000019 +pipeline-kafka-1305-49khj 2/2 Running 0 21d 172.19.51.50 vmss-agent-w13v5az3-23prodva7-ehuor000011 +pipeline-kafka-1306-lf9jh 2/2 Running 0 21d 172.19.45.136 vmss-agent-w13v5az3-23prodva7-ehuor00000r +pipeline-kafka-1307-m6vrq 2/2 Running 0 21d 172.19.50.4 vmss-agent-w13v5az3-23prodva7-ehuor00000s +pipeline-kafka-1308-jrgwx 2/2 Running 0 21d 172.19.52.213 vmss-agent-w13v5az3-23prodva7-ehuor00000z +pipeline-kafka-1309-zwcff 2/2 Running 0 21d 172.19.40.26 vmss-agent-w13v5az3-23prodva7-ehuor00000w +pipeline-kafka-1310-fpvp4 2/2 Running 0 21d 172.19.4.98 vmss-agent-w13v5az3-23prodva7-ehuor00000o +pipeline-kafka-1311-rh4xm 2/2 Running 0 21d 172.19.42.121 vmss-agent-w13v5az3-23prodva7-ehuor000012 +pipeline-kafka-1312-hxqlq 2/2 Running 0 21d 172.19.41.143 vmss-agent-w13v5az3-23prodva7-ehuor000010 +pipeline-kafka-1313-vszh4 2/2 Running 0 21d 172.19.65.118 vmss-agent-w13v5az3-23prodva7-ehuor000016 +pipeline-kafka-1314-jpgrc 2/2 Running 0 21d 172.19.48.232 vmss-agent-w13v5az3-23prodva7-ehuor00000t +pipeline-kafka-1315-4hl5l 2/2 Running 0 21d 172.19.49.242 vmss-agent-w13v5az3-23prodva7-ehuor00000v +pipeline-kafka-1316-w2mvp 2/2 Running 0 21d 172.19.68.24 vmss-agent-w13v5az3-23prodva7-ehuor00001b +pipeline-kafka-1317-x25xf 2/2 Running 0 21d 172.19.98.190 vmss-agent-w13v5az3-23prodva7-ehuor00001g +pipeline-kafka-1318-9mpmp 2/2 Running 0 21d 172.19.100.85 vmss-agent-w13v5az3-23prodva7-ehuor00001i +pipeline-kafka-1319-j6kqg 2/2 Running 0 21d 172.19.99.243 vmss-agent-w13v5az3-23prodva7-ehuor00001c +pipeline-kafka-1320-7g9mb 2/2 Running 0 21d 172.19.96.21 vmss-agent-w13v5az3-23prodva7-ehuor00001d +pipeline-kafka-1321-f95jl 2/2 Running 0 21d 172.19.97.66 vmss-agent-w13v5az3-23prodva7-ehuor00001f +pipeline-kafka-1322-zvzvq 2/2 Running 0 21d 172.19.95.182 vmss-agent-w13v5az3-23prodva7-ehuor00001e +pipeline-kafka-1323-4xfg5 2/2 Running 0 21d 172.19.94.61 vmss-agent-w13v5az3-23prodva7-ehuor00001h +pipeline-kafka-1324-wfht9 2/2 Running 0 21d 172.19.101.187 vmss-agent-w13v5az3-23prodva7-ehuor00001j +pipeline-kafka-1325-9ldfc 2/2 Running 0 21d 172.19.105.78 vmss-agent-w13v5az3-23prodva7-ehuor00001m +pipeline-kafka-1326-khrlc 2/2 Running 0 21d 172.19.102.83 vmss-agent-w13v5az3-23prodva7-ehuor00001k +pipeline-kafka-1327-smdkz 2/2 Running 0 21d 172.19.103.166 vmss-agent-w13v5az3-23prodva7-ehuor00001n +pipeline-kafka-1328-m26gj 2/2 Running 0 21d 172.19.104.4 vmss-agent-w13v5az3-23prodva7-ehuor00001l +pipeline-kafka-1329-j8pph 2/2 Running 0 19d 172.19.136.30 vmss-agent-w13v5az3-23prodva7-ehuor00001r +pipeline-kafka-1330-7hrkj 2/2 Running 0 3d7h 172.19.134.69 vmss-agent-w13v5az3-23prodva7-ehuor00001p +pipeline-kafka-1331-6nrgw 2/2 Running 0 19d 172.19.133.93 vmss-agent-w13v5az3-23prodva7-ehuor00001q +pipeline-kafka-1332-fcstz 2/2 Running 0 19d 172.19.135.162 vmss-agent-w13v5az3-23prodva7-ehuor00001o diff --git a/config/base/crds/kafka.banzaicloud.io_kafkaclusters.yaml b/config/base/crds/kafka.banzaicloud.io_kafkaclusters.yaml index e3fd3e25d..99f432da8 100644 --- a/config/base/crds/kafka.banzaicloud.io_kafkaclusters.yaml +++ b/config/base/crds/kafka.banzaicloud.io_kafkaclusters.yaml @@ -23670,6 +23670,14 @@ spec: required: - internalListeners type: object + localDebugEnabled: + default: false + description: |- + localDebugEnabled is used to decide whether to create a separate loadbalancer services for the + Kafka and Cruise Control Pods. These services will expose the internal listener ports of the Kafka + cluster with LoadBalancer type, which can be used for running Koperator on a local machine against + a kafkaCluster instance on a Kind Cluster. + type: boolean monitoringConfig: description: MonitoringConfig defines the config for monitoring Kafka and Cruise Control @@ -23735,6 +23743,13 @@ spec: required: - failureThreshold type: object + scaleOpsEnabled: + default: false + description: |- + Allows ScaleOps to manage Memory and CPU Resource Requests for Kafka Broker Pods. + This Disables CPU and Memory request reconciliation from the desired state defined in + the KafkaCluster to the current state in the Kubernetes Cluster + type: boolean taintedBrokersSelector: description: Selector for broker pods that need to be recycled/reconciled properties: diff --git a/config/samples/simpleZookeeper.yaml b/config/samples/simpleZookeeper.yaml new file mode 100644 index 000000000..6bf70aa9c --- /dev/null +++ b/config/samples/simpleZookeeper.yaml @@ -0,0 +1,10 @@ +apiVersion: zookeeper.pravega.io/v1beta1 +kind: ZookeeperCluster +metadata: + name: zookeeper-server + namespace: zookeeper +spec: + replicas: 3 + persistence: + reclaimPolicy: Delete + diff --git a/config/samples/simplekafkacluster.yaml b/config/samples/simplekafkacluster.yaml index d890f8551..307e37999 100644 --- a/config/samples/simplekafkacluster.yaml +++ b/config/samples/simplekafkacluster.yaml @@ -5,10 +5,11 @@ metadata: controller-tools.k8s.io: "1.0" name: kafka spec: + localDebugEnabled: true kRaft: false monitoringConfig: jmxImage: "ghcr.io/adobe/koperator/jmx-javaagent:1.4.0" - headlessServiceEnabled: true + headlessServiceEnabled: false zkAddresses: - "zookeeper-server-client.zookeeper:2181" propagateLabels: false diff --git a/config/scaleops/CustomOwnerGrouping.yaml b/config/scaleops/CustomOwnerGrouping.yaml new file mode 100644 index 000000000..7e9760d82 --- /dev/null +++ b/config/scaleops/CustomOwnerGrouping.yaml @@ -0,0 +1,22 @@ + +kind: CustomOwnerGrouping +apiVersion: analysis.scaleops.sh/v1alpha1 +metadata: + name: kafkabroker + namespace: scaleops-system +spec: + groupBy: + positiveRegexMatch: false + groupBys: + - labels: + - 'isBrokerNode: true' + positiveRegexMatch: false + topOwnerController: + apiVersion: kafka.banzaicloud.io/v1beta1 + kind: KafkaCluster + displayOptions: + hideGeneratedSuffix: true + fields: + - ownerName + defaultPolicy: kafka-brokers + enabled: true diff --git a/kafkaclusterdev.yaml b/kafkaclusterdev.yaml new file mode 100644 index 000000000..d06d26609 --- /dev/null +++ b/kafkaclusterdev.yaml @@ -0,0 +1,2573 @@ +apiVersion: kafka.banzaicloud.io/v1beta1 +kind: KafkaCluster +metadata: + annotations: + argocd.argoproj.io/sync-options: 'Prune=false, Delete=false' + helm.sh/chart: kafka-2.1.32 + kubectl.kubernetes.io/last-applied-configuration: > + {"apiVersion":"kafka.banzaicloud.io/v1beta1","kind":"KafkaCluster","metadata":{"annotations":{"argocd.argoproj.io/sync-options":"Prune=false, + Delete=false","helm.sh/chart":"kafka-2.1.32","pipeline_config_version":"dev"},"labels":{"app.kubernetes.io/instance":"pipeline-kafka","app.kubernetes.io/managed-by":"Helm","app.kubernetes.io/name":"kafka","app.kubernetes.io/version":"2.1.32","flex.ethos.corp.adobe.com/instance":"experience-platform--pipeline-kafka-deploy--ethos21-st-2d95c39c","pipeline_cluster":"VA7","pipeline_env":"dev"},"name":"pipeline-kafka","namespace":"ns-team-aep-pipeline-kafka-1-dev"},"spec":{"brokerConfigGroups":{"az1":{"affinity":{"nodeAffinity":{"requiredDuringSchedulingIgnoredDuringExecution":{"nodeSelectorTerms":[{"matchExpressions":[{"key":"node.kubernetes.io/pipeline-workload","operator":"In","values":["true"]},{"key":"node.kubernetes.io/ethos-workload.arm64","operator":"In","values":["true"]}]}]}},"podAntiAffinity":{"requiredDuringSchedulingIgnoredDuringExecution":[{"labelSelector":{"matchExpressions":[{"key":"app","operator":"In","values":["kafka"]},{"key":"isControllerNode","operator":"In","values":["false"]}]},"topologyKey":"kubernetes.io/hostname"}]}},"brokerAnnotations":{"arc.ethos.adobe.net/ignore":"true","broker_group":"az1","cluster-autoscaler.kubernetes.io/safe-to-evict":"false","io.kubernetes.cri-o.LinkLogs":"logging-volume"},"containers":[{"env":[{"name":"SPLUNK_HOST","value":"splunk-hec.loc.adobe.net"},{"name":"SPLUNK_PORT","value":"8088"},{"name":"SPLUNK_INDEX","value":"plat_app_preprod"},{"name":"SPLUNK_TOKEN","valueFrom":{"secretKeyRef":{"key":"token","name":"splunk-token"}}},{"name":"SPLUNK_SOURCETYPE","value":"log4j"},{"name":"POD_UID_FLUENT_BIT","valueFrom":{"fieldRef":{"fieldPath":"metadata.uid"}}},{"name":"POD_NAME","valueFrom":{"fieldRef":{"fieldPath":"metadata.name"}}},{"name":"POD_NAMESPACE","valueFrom":{"fieldRef":{"fieldPath":"metadata.namespace"}}},{"name":"POD_IP","valueFrom":{"fieldRef":{"fieldPath":"status.podIP"}}},{"name":"NODE_NAME","valueFrom":{"fieldRef":{"fieldPath":"spec.nodeName"}}},{"name":"NODE_IP","valueFrom":{"fieldRef":{"fieldPath":"status.hostIP"}}},{"name":"LOG_PARSER","value":"docker"},{"name":"POD_ENV","value":"dev"},{"name":"POD_CLUSTER","value":"VA7"}],"image":"docker-k8s-infrastructure-public-release.dr-uw2.adobeitc.com/ethos/ethos-fluent-bit:3.2.1.1-ethos","name":"fluent-bit","ports":[{"containerPort":2020,"name":"fb-metrics","protocol":"TCP"}],"resources":{"limits":{"cpu":"100m","memory":"256Mi"},"requests":{"cpu":"100m","memory":"256Mi"}},"volumeMounts":[{"mountPath":"/logging-volume","mountPropagation":"HostToContainer","name":"logging-volume"},{"mountPath":"/var/fluent-bit","name":"fluent-data"},{"mountPath":"/fluent-bit/etc","name":"fluent-bit-config"}]}],"initContainers":[{"command":["cp","-r","/pipeline/kafka-libs/.","/opt/kafka/libs/extensions/"],"image":"docker-pipeline-upstream-mirror.dr-uw2.adobeitc.com/broker-libs-injector:0.1.10","imagePullPolicy":"IfNotPresent","name":"broker-libs-injector","resources":{"limits":{"cpu":"100m","memory":"100Mi"},"requests":{"cpu":"100m","memory":"100Mi"}},"volumeMounts":[{"mountPath":"/opt/kafka/libs/extensions","name":"extensions"}]}],"kafkaHeapOpts":"-XX:InitialRAMPercentage=30 + -XX:MaxRAMPercentage=70 + -XX:MinRAMPercentage=70","kafkaJvmPerfOpts":"-server -XX:+UseG1GC + -XX:MaxGCPauseMillis=20 -XX:InitiatingHeapOccupancyPercent=35 + -XX:+ExplicitGCInvokesConcurrent -XX:MetaspaceSize=96m + -XX:G1HeapRegionSize=16M -XX:MinMetaspaceFreeRatio=50 + -XX:MaxMetaspaceFreeRatio=80 -Djava.awt.headless=true + -Dsun.net.inetaddr.ttl=60 -Dcom.sun.management.jmxremote.port=1090 + -Dcom.sun.management.jmxremote.rmi.port=1090 + -Dcom.sun.management.jmxremote.local.only=false + -Djava.rmi.server.hostname=127.0.0.1 + -Djute.maxbuffer=0x9fffff","log4jConfig":"log4j.rootLogger=INFO, + stdout\n\nlog4j.appender.stdout=org.apache.log4j.ConsoleAppender\nlog4j.appender.stdout.layout=org.apache.log4j.PatternLayout\nlog4j.appender.stdout.layout.ConversionPattern=[%d] + %p %m (%c)%n\n\n# Change the line below to adjust ZK client + logging\nlog4j.logger.org.apache.zookeeper=INFO\n\n# Change the two lines + below to adjust the general broker logging level (output to server.log and + stdout)\nlog4j.logger.kafka=INFO\nlog4j.logger.org.apache.kafka=INFO\n\n# + Change to DEBUG or TRACE to enable request + logging\nlog4j.logger.kafka.request.logger=WARN\n\n# Uncomment the lines + below and change log4j.logger.kafka.network.RequestChannel$ to TRACE for + additional output\n# related to the handling of + requests\n#log4j.logger.kafka.network.Processor=TRACE, + requestAppender\n#log4j.logger.kafka.server.KafkaApis=TRACE, + requestAppender\n#\nlog4j.logger.kafka.network.RequestChannel$=WARN\nlog4j.logger.kafka.controller=DEBUG\nlog4j.logger.kafka.log.LogCleaner=INFO\nlog4j.logger.state.change.logger=INFO\n\n# + Access denials are logged at INFO level, change to DEBUG to also log + allowed accesses\nlog4j.logger.kafka.authorizer.logger=INFO\n\n# + Additional logging to reduce + noise\nlog4j.logger.org.apache.kafka.common.network.Selector=WARN","nodeSelector":{"topology.kubernetes.io/zone":"eastus2-1"},"resourceRequirements":{"limits":{"cpu":"4","memory":"4Gi"},"requests":{"cpu":"1","memory":"4Gi"}},"serviceAccountName":"kafka-cluster","storageConfigs":[{"mountPath":"/csi-kafka-logs2","pvcSpec":{"accessModes":["ReadWriteOnce"],"resources":{"requests":{"storage":"512Gi"}},"storageClassName":"premium-lazy-csi-xfs"}}],"tolerations":[{"effect":"NoSchedule","key":"node.kubernetes.io/pipeline-workload","operator":"Equal","value":"true"},{"effect":"NoSchedule","key":"ethos.corp.adobe.com/ethos-workload","operator":"Equal","value":"arm64"},{"effect":"NoSchedule","key":"node.kubernetes.io/pipeline-workload","operator":"Equal","value":"io-optimized"}],"volumes":[{"emptyDir":{},"name":"logging-volume"},{"emptyDir":{},"name":"fluent-data"},{"configMap":{"name":"pipeline-kafka-fluent-bit"},"name":"fluent-bit-config"}]},"az2":{"affinity":{"nodeAffinity":{"requiredDuringSchedulingIgnoredDuringExecution":{"nodeSelectorTerms":[{"matchExpressions":[{"key":"node.kubernetes.io/pipeline-workload","operator":"In","values":["true"]},{"key":"node.kubernetes.io/ethos-workload.arm64","operator":"In","values":["true"]}]}]}},"podAntiAffinity":{"requiredDuringSchedulingIgnoredDuringExecution":[{"labelSelector":{"matchExpressions":[{"key":"app","operator":"In","values":["kafka"]},{"key":"isControllerNode","operator":"In","values":["false"]}]},"topologyKey":"kubernetes.io/hostname"}]}},"brokerAnnotations":{"arc.ethos.adobe.net/ignore":"true","broker_group":"az2","cluster-autoscaler.kubernetes.io/safe-to-evict":"false","io.kubernetes.cri-o.LinkLogs":"logging-volume"},"containers":[{"env":[{"name":"SPLUNK_HOST","value":"splunk-hec.loc.adobe.net"},{"name":"SPLUNK_PORT","value":"8088"},{"name":"SPLUNK_INDEX","value":"plat_app_preprod"},{"name":"SPLUNK_TOKEN","valueFrom":{"secretKeyRef":{"key":"token","name":"splunk-token"}}},{"name":"SPLUNK_SOURCETYPE","value":"log4j"},{"name":"POD_UID_FLUENT_BIT","valueFrom":{"fieldRef":{"fieldPath":"metadata.uid"}}},{"name":"POD_NAME","valueFrom":{"fieldRef":{"fieldPath":"metadata.name"}}},{"name":"POD_NAMESPACE","valueFrom":{"fieldRef":{"fieldPath":"metadata.namespace"}}},{"name":"POD_IP","valueFrom":{"fieldRef":{"fieldPath":"status.podIP"}}},{"name":"NODE_NAME","valueFrom":{"fieldRef":{"fieldPath":"spec.nodeName"}}},{"name":"NODE_IP","valueFrom":{"fieldRef":{"fieldPath":"status.hostIP"}}},{"name":"LOG_PARSER","value":"docker"},{"name":"POD_ENV","value":"dev"},{"name":"POD_CLUSTER","value":"VA7"}],"image":"docker-k8s-infrastructure-public-release.dr-uw2.adobeitc.com/ethos/ethos-fluent-bit:3.2.1.1-ethos","name":"fluent-bit","ports":[{"containerPort":2020,"name":"fb-metrics","protocol":"TCP"}],"resources":{"limits":{"cpu":"100m","memory":"256Mi"},"requests":{"cpu":"100m","memory":"256Mi"}},"volumeMounts":[{"mountPath":"/logging-volume","mountPropagation":"HostToContainer","name":"logging-volume"},{"mountPath":"/var/fluent-bit","name":"fluent-data"},{"mountPath":"/fluent-bit/etc","name":"fluent-bit-config"}]}],"initContainers":[{"command":["cp","-r","/pipeline/kafka-libs/.","/opt/kafka/libs/extensions/"],"image":"docker-pipeline-upstream-mirror.dr-uw2.adobeitc.com/broker-libs-injector:0.1.10","imagePullPolicy":"IfNotPresent","name":"broker-libs-injector","resources":{"limits":{"cpu":"100m","memory":"100Mi"},"requests":{"cpu":"100m","memory":"100Mi"}},"volumeMounts":[{"mountPath":"/opt/kafka/libs/extensions","name":"extensions"}]}],"kafkaHeapOpts":"-XX:InitialRAMPercentage=30 + -XX:MaxRAMPercentage=70 + -XX:MinRAMPercentage=70","kafkaJvmPerfOpts":"-server -XX:+UseG1GC + -XX:MaxGCPauseMillis=20 -XX:InitiatingHeapOccupancyPercent=35 + -XX:+ExplicitGCInvokesConcurrent -XX:MetaspaceSize=96m + -XX:G1HeapRegionSize=16M -XX:MinMetaspaceFreeRatio=50 + -XX:MaxMetaspaceFreeRatio=80 -Djava.awt.headless=true + -Dsun.net.inetaddr.ttl=60 -Dcom.sun.management.jmxremote.port=1090 + -Dcom.sun.management.jmxremote.rmi.port=1090 + -Dcom.sun.management.jmxremote.local.only=false + -Djava.rmi.server.hostname=127.0.0.1 + -Djute.maxbuffer=0x9fffff","log4jConfig":"log4j.rootLogger=INFO, + stdout\n\nlog4j.appender.stdout=org.apache.log4j.ConsoleAppender\nlog4j.appender.stdout.layout=org.apache.log4j.PatternLayout\nlog4j.appender.stdout.layout.ConversionPattern=[%d] + %p %m (%c)%n\n\n# Change the line below to adjust ZK client + logging\nlog4j.logger.org.apache.zookeeper=INFO\n\n# Change the two lines + below to adjust the general broker logging level (output to server.log and + stdout)\nlog4j.logger.kafka=INFO\nlog4j.logger.org.apache.kafka=INFO\n\n# + Change to DEBUG or TRACE to enable request + logging\nlog4j.logger.kafka.request.logger=WARN\n\n# Uncomment the lines + below and change log4j.logger.kafka.network.RequestChannel$ to TRACE for + additional output\n# related to the handling of + requests\n#log4j.logger.kafka.network.Processor=TRACE, + requestAppender\n#log4j.logger.kafka.server.KafkaApis=TRACE, + requestAppender\n#\nlog4j.logger.kafka.network.RequestChannel$=WARN\nlog4j.logger.kafka.controller=DEBUG\nlog4j.logger.kafka.log.LogCleaner=INFO\nlog4j.logger.state.change.logger=INFO\n\n# + Access denials are logged at INFO level, change to DEBUG to also log + allowed accesses\nlog4j.logger.kafka.authorizer.logger=INFO\n\n# + Additional logging to reduce + noise\nlog4j.logger.org.apache.kafka.common.network.Selector=WARN","nodeSelector":{"topology.kubernetes.io/zone":"eastus2-2"},"resourceRequirements":{"limits":{"cpu":"4","memory":"4Gi"},"requests":{"cpu":"1","memory":"4Gi"}},"serviceAccountName":"kafka-cluster","storageConfigs":[{"mountPath":"/csi-kafka-logs2","pvcSpec":{"accessModes":["ReadWriteOnce"],"resources":{"requests":{"storage":"512Gi"}},"storageClassName":"premium-lazy-csi-xfs"}}],"tolerations":[{"effect":"NoSchedule","key":"node.kubernetes.io/pipeline-workload","operator":"Equal","value":"true"},{"effect":"NoSchedule","key":"ethos.corp.adobe.com/ethos-workload","operator":"Equal","value":"arm64"},{"effect":"NoSchedule","key":"node.kubernetes.io/pipeline-workload","operator":"Equal","value":"io-optimized"}],"volumes":[{"emptyDir":{},"name":"logging-volume"},{"emptyDir":{},"name":"fluent-data"},{"configMap":{"name":"pipeline-kafka-fluent-bit"},"name":"fluent-bit-config"}]},"az3":{"affinity":{"nodeAffinity":{"requiredDuringSchedulingIgnoredDuringExecution":{"nodeSelectorTerms":[{"matchExpressions":[{"key":"node.kubernetes.io/pipeline-workload","operator":"In","values":["true"]},{"key":"node.kubernetes.io/ethos-workload.arm64","operator":"In","values":["true"]}]}]}},"podAntiAffinity":{"requiredDuringSchedulingIgnoredDuringExecution":[{"labelSelector":{"matchExpressions":[{"key":"app","operator":"In","values":["kafka"]},{"key":"isControllerNode","operator":"In","values":["false"]}]},"topologyKey":"kubernetes.io/hostname"}]}},"brokerAnnotations":{"arc.ethos.adobe.net/ignore":"true","broker_group":"az3","cluster-autoscaler.kubernetes.io/safe-to-evict":"false","io.kubernetes.cri-o.LinkLogs":"logging-volume"},"containers":[{"env":[{"name":"SPLUNK_HOST","value":"splunk-hec.loc.adobe.net"},{"name":"SPLUNK_PORT","value":"8088"},{"name":"SPLUNK_INDEX","value":"plat_app_preprod"},{"name":"SPLUNK_TOKEN","valueFrom":{"secretKeyRef":{"key":"token","name":"splunk-token"}}},{"name":"SPLUNK_SOURCETYPE","value":"log4j"},{"name":"POD_UID_FLUENT_BIT","valueFrom":{"fieldRef":{"fieldPath":"metadata.uid"}}},{"name":"POD_NAME","valueFrom":{"fieldRef":{"fieldPath":"metadata.name"}}},{"name":"POD_NAMESPACE","valueFrom":{"fieldRef":{"fieldPath":"metadata.namespace"}}},{"name":"POD_IP","valueFrom":{"fieldRef":{"fieldPath":"status.podIP"}}},{"name":"NODE_NAME","valueFrom":{"fieldRef":{"fieldPath":"spec.nodeName"}}},{"name":"NODE_IP","valueFrom":{"fieldRef":{"fieldPath":"status.hostIP"}}},{"name":"LOG_PARSER","value":"docker"},{"name":"POD_ENV","value":"dev"},{"name":"POD_CLUSTER","value":"VA7"}],"image":"docker-k8s-infrastructure-public-release.dr-uw2.adobeitc.com/ethos/ethos-fluent-bit:3.2.1.1-ethos","name":"fluent-bit","ports":[{"containerPort":2020,"name":"fb-metrics","protocol":"TCP"}],"resources":{"limits":{"cpu":"100m","memory":"256Mi"},"requests":{"cpu":"100m","memory":"256Mi"}},"volumeMounts":[{"mountPath":"/logging-volume","mountPropagation":"HostToContainer","name":"logging-volume"},{"mountPath":"/var/fluent-bit","name":"fluent-data"},{"mountPath":"/fluent-bit/etc","name":"fluent-bit-config"}]}],"initContainers":[{"command":["cp","-r","/pipeline/kafka-libs/.","/opt/kafka/libs/extensions/"],"image":"docker-pipeline-upstream-mirror.dr-uw2.adobeitc.com/broker-libs-injector:0.1.10","imagePullPolicy":"IfNotPresent","name":"broker-libs-injector","resources":{"limits":{"cpu":"100m","memory":"100Mi"},"requests":{"cpu":"100m","memory":"100Mi"}},"volumeMounts":[{"mountPath":"/opt/kafka/libs/extensions","name":"extensions"}]}],"kafkaHeapOpts":"-XX:InitialRAMPercentage=30 + -XX:MaxRAMPercentage=70 + -XX:MinRAMPercentage=70","kafkaJvmPerfOpts":"-server -XX:+UseG1GC + -XX:MaxGCPauseMillis=20 -XX:InitiatingHeapOccupancyPercent=35 + -XX:+ExplicitGCInvokesConcurrent -XX:MetaspaceSize=96m + -XX:G1HeapRegionSize=16M -XX:MinMetaspaceFreeRatio=50 + -XX:MaxMetaspaceFreeRatio=80 -Djava.awt.headless=true + -Dsun.net.inetaddr.ttl=60 -Dcom.sun.management.jmxremote.port=1090 + -Dcom.sun.management.jmxremote.rmi.port=1090 + -Dcom.sun.management.jmxremote.local.only=false + -Djava.rmi.server.hostname=127.0.0.1 + -Djute.maxbuffer=0x9fffff","log4jConfig":"log4j.rootLogger=INFO, + stdout\n\nlog4j.appender.stdout=org.apache.log4j.ConsoleAppender\nlog4j.appender.stdout.layout=org.apache.log4j.PatternLayout\nlog4j.appender.stdout.layout.ConversionPattern=[%d] + %p %m (%c)%n\n\n# Change the line below to adjust ZK client + logging\nlog4j.logger.org.apache.zookeeper=INFO\n\n# Change the two lines + below to adjust the general broker logging level (output to server.log and + stdout)\nlog4j.logger.kafka=INFO\nlog4j.logger.org.apache.kafka=INFO\n\n# + Change to DEBUG or TRACE to enable request + logging\nlog4j.logger.kafka.request.logger=WARN\n\n# Uncomment the lines + below and change log4j.logger.kafka.network.RequestChannel$ to TRACE for + additional output\n# related to the handling of + requests\n#log4j.logger.kafka.network.Processor=TRACE, + requestAppender\n#log4j.logger.kafka.server.KafkaApis=TRACE, + requestAppender\n#\nlog4j.logger.kafka.network.RequestChannel$=WARN\nlog4j.logger.kafka.controller=DEBUG\nlog4j.logger.kafka.log.LogCleaner=INFO\nlog4j.logger.state.change.logger=INFO\n\n# + Access denials are logged at INFO level, change to DEBUG to also log + allowed accesses\nlog4j.logger.kafka.authorizer.logger=INFO\n\n# + Additional logging to reduce + noise\nlog4j.logger.org.apache.kafka.common.network.Selector=WARN","nodeSelector":{"topology.kubernetes.io/zone":"eastus2-3"},"resourceRequirements":{"limits":{"cpu":"4","memory":"4Gi"},"requests":{"cpu":"1","memory":"4Gi"}},"serviceAccountName":"kafka-cluster","storageConfigs":[{"mountPath":"/csi-kafka-logs2","pvcSpec":{"accessModes":["ReadWriteOnce"],"resources":{"requests":{"storage":"512Gi"}},"storageClassName":"premium-lazy-csi-xfs"}}],"tolerations":[{"effect":"NoSchedule","key":"node.kubernetes.io/pipeline-workload","operator":"Equal","value":"true"},{"effect":"NoSchedule","key":"ethos.corp.adobe.com/ethos-workload","operator":"Equal","value":"arm64"},{"effect":"NoSchedule","key":"node.kubernetes.io/pipeline-workload","operator":"Equal","value":"io-optimized"}],"volumes":[{"emptyDir":{},"name":"logging-volume"},{"emptyDir":{},"name":"fluent-data"},{"configMap":{"name":"pipeline-kafka-fluent-bit"},"name":"fluent-bit-config"}]}},"brokers":[{"brokerConfig":{"brokerIngressMapping":["ingress-az1","corpingress-az1","secureingress-az1"],"terminationGracePeriodSeconds":120},"brokerConfigGroup":"az1","id":101,"readOnlyConfig":"broker.rack=eastus2-1\n"},{"brokerConfig":{"brokerIngressMapping":["ingress-az1","corpingress-az1","secureingress-az1"],"terminationGracePeriodSeconds":120},"brokerConfigGroup":"az1","id":102,"readOnlyConfig":"broker.rack=eastus2-1\n"},{"brokerConfig":{"brokerIngressMapping":["ingress-az2","corpingress-az2","secureingress-az2"],"terminationGracePeriodSeconds":120},"brokerConfigGroup":"az2","id":201,"readOnlyConfig":"broker.rack=eastus2-2\n"},{"brokerConfig":{"brokerIngressMapping":["ingress-az2","corpingress-az2","secureingress-az2"],"terminationGracePeriodSeconds":120},"brokerConfigGroup":"az2","id":202,"readOnlyConfig":"broker.rack=eastus2-2\n"},{"brokerConfig":{"brokerIngressMapping":["ingress-az3","corpingress-az3","secureingress-az3"],"terminationGracePeriodSeconds":120},"brokerConfigGroup":"az3","id":301,"readOnlyConfig":"broker.rack=eastus2-3\n"},{"brokerConfig":{"brokerIngressMapping":["ingress-az3","corpingress-az3","secureingress-az3"],"terminationGracePeriodSeconds":120},"brokerConfigGroup":"az3","id":302,"readOnlyConfig":"broker.rack=eastus2-3\n"}],"clusterImage":"docker-pipeline-upstream-mirror.dr-uw2.adobeitc.com/adobe/koperator/kafka:2.13-3.9.2-jdk21.0.11","clusterMetricsReporterImage":"docker-pipeline-upstream-mirror.dr-uw2.adobeitc.com/adobe/cruise-control:3.0.3-adbe-20260423","cruiseControlConfig":{"affinity":{"nodeAffinity":{"requiredDuringSchedulingIgnoredDuringExecution":{"nodeSelectorTerms":[{"matchExpressions":[{"key":"node.kubernetes.io/pipeline-workload","operator":"In","values":["true"]},{"key":"node.kubernetes.io/ethos-workload.arm64","operator":"In","values":["true"]}]}]}}},"capacityConfig":"{\n + \"brokerCapacities\":[\n {\n \"brokerId\": \"-1\",\n + \"capacity\": {\n \"DISK\": {\"/csi-kafka-logs2/kafka\": + \"524288\"},\n \"CPU\": {\"num.cores\": \"1\"},\n \"NW_IN\": + \"900000\",\n \"NW_OUT\": \"900000\"\n },\n \"doc\": + \"This is the default capacity. Capacity unit used for disk is in MB, cpu + is in cores, network throughput is in KB.\"\n }\n + ]\n}","clusterConfig":"{\n \"min.insync.replicas\": 2\n}","config":"\n# + Configuration for the metadata client.\n# + =======================================\n# The maximum interval in + milliseconds between two metadata + refreshes.\n#metadata.max.age.ms=300000\n# Client id for the Cruise + Control. It is used for the metadata + client.\n#client.id=kafka-cruise-control\n# The size of TCP send buffer + bytes for the metadata client.\n#send.buffer.bytes=131072\n# The size of + TCP receive buffer size for the metadata + client.\n#receive.buffer.bytes=131072\n# The time to wait before + disconnect an idle TCP connection.\n#connections.max.idle.ms=540000\n# The + time to wait before reconnect to a given + host.\n#reconnect.backoff.ms=50\n# The time to wait for a response from a + host after sending a request.\n#request.timeout.ms=30000\n# The time to + wait for broker logdir to respond after sending a + request.\n#logdir.response.timeout.ms=10000\n# Configurations for the load + monitor\n# =======================================\n# The number of metric + fetcher thread to fetch metrics for the Kafka + cluster\nnum.metric.fetchers=1\n# The metric sampler + class\nmetric.sampler.class=com.linkedin.kafka.cruisecontrol.monitor.sampling.CruiseControlMetricsReporterSampler\n# + True if the sampling process allows CPU capacity estimation of brokers + used for CPU utilization + estimation.\nsampling.allow.cpu.capacity.estimation=true\n# Configurations + for + CruiseControlMetricsReporterSampler\nmetric.reporter.topic=__CruiseControlMetrics\n# + The sample store class + name\nsample.store.class=com.linkedin.kafka.cruisecontrol.monitor.sampling.KafkaSampleStore\n# + The config for the Kafka sample store to save the partition metric + samples\npartition.metric.sample.store.topic=__KafkaCruiseControlPartitionMetricSamples\n# + The config for the Kafka sample store to save the model training + samples\nbroker.metric.sample.store.topic=__KafkaCruiseControlModelTrainingSamples\n# + The replication factor of Kafka metric sample store + topic\nsample.store.topic.replication.factor=3\npartition.sample.store.topic.partition.count=15\nbroker.sample.store.topic.partition.count=15\n# + The config for the number of Kafka sample store consumer + threads\nnum.sample.loading.threads=8\n# The partition assignor class for + the metric + samplers\nmetric.sampler.partition.assignor.class=com.linkedin.kafka.cruisecontrol.monitor.sampling.DefaultMetricSamplerPartitionAssignor\n# + The metric sampling interval in + milliseconds\nmetric.sampling.interval.ms=60000\n# The partition metrics + window size in milliseconds\npartition.metrics.window.ms=300000\n# The + number of partition metric windows to keep in + memory\nnum.partition.metrics.windows=20\n# The minimum partition metric + samples required for a partition in each + window\nmin.samples.per.partition.metrics.window=1\n# The broker metrics + window size in milliseconds\nbroker.metrics.window.ms=300000\n# The number + of broker metric windows to keep in memory\n# see + https://github.com/linkedin/cruise-control/issues/1149\nnum.broker.metrics.windows=20\n# + The minimum broker metric samples required for a partition in each + window\nmin.samples.per.broker.metrics.window=1\n# The configuration for + the BrokerCapacityConfigFileResolver (supports JBOD, non-JBOD, and + heterogeneous CPU core + capacities)\ncapacity.config.file=config/capacity.json\n# Configurations + for the analyzer\n# =======================================\n# The list of + goals to optimize the Kafka cluster for with pre-computed + proposals\ndefault.goals=com.linkedin.kafka.cruisecontrol.analyzer.goals.MinTopicLeadersPerBrokerGoal,\\\n + com.linkedin.kafka.cruisecontrol.analyzer.goals.RackAwareDistributionGoal,\\\n + com.linkedin.kafka.cruisecontrol.analyzer.goals.ReplicaCapacityGoal,\\\n + com.linkedin.kafka.cruisecontrol.analyzer.goals.DiskCapacityGoal,\\\n + com.linkedin.kafka.cruisecontrol.analyzer.goals.NetworkInboundCapacityGoal,\\\n + com.linkedin.kafka.cruisecontrol.analyzer.goals.NetworkOutboundCapacityGoal,\\\n + com.linkedin.kafka.cruisecontrol.analyzer.goals.CpuCapacityGoal,\\\n + com.linkedin.kafka.cruisecontrol.analyzer.goals.DiskUsageDistributionGoal,\\\n + com.linkedin.kafka.cruisecontrol.analyzer.goals.NetworkInboundUsageDistributionGoal,\\\n + com.linkedin.kafka.cruisecontrol.analyzer.goals.NetworkOutboundUsageDistributionGoal,\\\n + com.linkedin.kafka.cruisecontrol.analyzer.goals.CpuUsageDistributionGoal,\\\n + com.linkedin.kafka.cruisecontrol.analyzer.goals.TopicReplicaDistributionGoal,\\\n + com.linkedin.kafka.cruisecontrol.analyzer.goals.TopicLeaderReplicaDistributionGoal,\\\n + com.linkedin.kafka.cruisecontrol.analyzer.goals.LeaderBytesInDistributionGoal,\\\n + com.linkedin.kafka.cruisecontrol.analyzer.goals.LeaderReplicaDistributionGoal,\\\n + com.linkedin.kafka.cruisecontrol.analyzer.goals.ReplicaDistributionGoal\n# + The list of supported + goals\ngoals=com.linkedin.kafka.cruisecontrol.analyzer.goals.MinTopicLeadersPerBrokerGoal,\\\n + com.linkedin.kafka.cruisecontrol.analyzer.goals.RackAwareDistributionGoal,\\\n + com.linkedin.kafka.cruisecontrol.analyzer.goals.ReplicaCapacityGoal,\\\n + com.linkedin.kafka.cruisecontrol.analyzer.goals.DiskCapacityGoal,\\\n + com.linkedin.kafka.cruisecontrol.analyzer.goals.NetworkInboundCapacityGoal,\\\n + com.linkedin.kafka.cruisecontrol.analyzer.goals.NetworkOutboundCapacityGoal,\\\n + com.linkedin.kafka.cruisecontrol.analyzer.goals.CpuCapacityGoal,\\\n + com.linkedin.kafka.cruisecontrol.analyzer.goals.ReplicaDistributionGoal,\\\n + com.linkedin.kafka.cruisecontrol.analyzer.goals.PotentialNwOutGoal,\\\n + com.linkedin.kafka.cruisecontrol.analyzer.goals.DiskUsageDistributionGoal,\\\n + com.linkedin.kafka.cruisecontrol.analyzer.goals.NetworkInboundUsageDistributionGoal,\\\n + com.linkedin.kafka.cruisecontrol.analyzer.goals.NetworkOutboundUsageDistributionGoal,\\\n + com.linkedin.kafka.cruisecontrol.analyzer.goals.CpuUsageDistributionGoal,\\\n + com.linkedin.kafka.cruisecontrol.analyzer.goals.TopicReplicaDistributionGoal,\\\n + com.linkedin.kafka.cruisecontrol.analyzer.goals.TopicLeaderReplicaDistributionGoal,\\\n + com.linkedin.kafka.cruisecontrol.analyzer.goals.LeaderReplicaDistributionGoal,\\\n + com.linkedin.kafka.cruisecontrol.analyzer.goals.LeaderBytesInDistributionGoal,\\\n + com.linkedin.kafka.cruisecontrol.analyzer.kafkaassigner.KafkaAssignerDiskUsageDistributionGoal,\\\n + com.linkedin.kafka.cruisecontrol.analyzer.kafkaassigner.KafkaAssignerEvenRackAwareGoal,\\\n + com.linkedin.kafka.cruisecontrol.analyzer.goals.PreferredLeaderElectionGoal\n# + The list of supported intra-broker + goals\nintra.broker.goals=com.linkedin.kafka.cruisecontrol.analyzer.goals.IntraBrokerDiskCapacityGoal,\\\n + com.linkedin.kafka.cruisecontrol.analyzer.goals.IntraBrokerDiskUsageDistributionGoal\n# + The list of supported hard + goals\nhard.goals=com.linkedin.kafka.cruisecontrol.analyzer.goals.RackAwareDistributionGoal,\\\n + com.linkedin.kafka.cruisecontrol.analyzer.goals.ReplicaCapacityGoal,\\\n + com.linkedin.kafka.cruisecontrol.analyzer.goals.DiskCapacityGoal,\\\n + com.linkedin.kafka.cruisecontrol.analyzer.goals.NetworkInboundCapacityGoal,\\\n + com.linkedin.kafka.cruisecontrol.analyzer.goals.NetworkOutboundCapacityGoal,\\\n + com.linkedin.kafka.cruisecontrol.analyzer.goals.CpuCapacityGoal\n# The + minimum percentage of well monitored partitions out of all the + partitions\nmin.valid.partition.ratio=0.95\n# The balance threshold for + CPU\ncpu.balance.threshold=1.3\n# The balance threshold for + disk\ndisk.balance.threshold=1.1\n# The balance threshold for network + inbound utilization\nnetwork.inbound.balance.threshold=1.3\n# The balance + threshold for network outbound + utilization\nnetwork.outbound.balance.threshold=1.3\n# The balance + threshold for the replica + count\nreplica.count.balance.threshold=1.1\nleader.replica.count.balance.threshold=1.05\ntopic.replica.count.balance.threshold=1.5\ntopic.replica.count.balance.max.gap=20\ntopic.leader.replica.count.balance.threshold=1.10\ntopic.leader.replica.count.balance.min.gap=1\ntopic.leader.replica.count.balance.max.gap=5\n# + The capacity threshold for CPU in + percentage\ncpu.capacity.threshold=0.8\n# The capacity threshold for disk + in percentage\ndisk.capacity.threshold=0.8\n# The capacity threshold for + network inbound utilization in + percentage\nnetwork.inbound.capacity.threshold=0.8\n# The capacity + threshold for network outbound utilization in + percentage\nnetwork.outbound.capacity.threshold=0.8\n# The threshold to + define the cluster to be in a low CPU utilization + state\ncpu.low.utilization.threshold=0.2\n# The threshold to define the + cluster to be in a low disk utilization + state\ndisk.low.utilization.threshold=0.2\n# The threshold to define the + cluster to be in a low network inbound utilization + state\nnetwork.inbound.low.utilization.threshold=0.2\n# The threshold to + define the cluster to be in a low disk utilization + state\nnetwork.outbound.low.utilization.threshold=0.2\n# The metric + anomaly percentile upper + threshold\nmetric.anomaly.percentile.upper.threshold=90.0\n# The metric + anomaly percentile lower + threshold\nmetric.anomaly.percentile.lower.threshold=10.0\n# How often + should the cached proposal be expired and recalculated if + necessary\nproposal.expiration.ms=60000\n# The maximum number of replicas + that can reside on a broker at any given + time.\nmax.replicas.per.broker=14000\n# The number of threads to use for + proposal candidate precomputing.\nnum.proposal.precompute.threads=1\n# the + topics that should be excluded from the partition + movement.\n#topics.excluded.from.partition.movement=\n# the topics that + should have even number of leaders distriubted across + brokers\ntopics.with.min.leaders.per.broker=__consumer_offsets\n# enable + dynamic min leaders per topic + computation\nmin.topic.leaders.per.broker=0\n# The impact of having one + level higher goal priority on the relative balancedness + score.\n#goal.balancedness.priority.weight\n# The impact of strictness on + the relative balancedness score.\n#goal.balancedness.strictness.weight\n# + The maximum number of replicas that should reside on each broker to + consider a cluster as overprovisioned after balancing its replica + distribution.\noverprovisioned.max.replicas.per.broker=3000\n# + Configurations for the executor\n# + =======================================\n# If true, appropriate zookeeper + Client { .. } entry required in jaas file located at + $base_dir/config/cruise_control_jaas.conf\nzookeeper.security.enabled=false\n# + The max number of partitions to move in/out on a given broker at a given + time.\nnum.concurrent.partition.movements.per.broker=5\n# The max number + of partitions to move between disks within a given broker at a given + time.\nnum.concurrent.intra.broker.partition.movements=2\n# The max number + of leadership movement within the whole cluster at a given + time.\nnum.concurrent.leader.movements=1000\n# Default replica movement + throttle. If not specified, movements unthrottled by default.\n# Set to 50 + MBps (in Bps)\ndefault.replication.throttle=52428800\n# The interval + between two execution progress + checks.\nexecution.progress.check.interval.ms=10000\ndefault.replica.movement.strategies=com.linkedin.kafka.cruisecontrol.executor.strategy.PostponeUrpReplicaMovementStrategy,\\\n + com.linkedin.kafka.cruisecontrol.executor.strategy.PrioritizeMinIsrWithOfflineReplicasStrategy,\\\n + com.linkedin.kafka.cruisecontrol.executor.strategy.PrioritizeOneAboveMinIsrWithOfflineReplicasStrategy,\\\n + com.linkedin.kafka.cruisecontrol.executor.strategy.PrioritizeLargeReplicaMovementStrategy,\\\n + com.linkedin.kafka.cruisecontrol.executor.strategy.BaseReplicaMovementStrategy\n# + Configurations for anomaly detector\n# + =======================================\n# The goal violation notifier + class\nanomaly.notifier.class=com.linkedin.kafka.cruisecontrol.detector.notifier.SelfHealingNotifier\n# + The metric anomaly finder class\nmetric.anomaly.finder.class=\n# The + anomaly detection interval\nanomaly.detection.interval.ms=600000\n# The + goal violation to + detect.\nanomaly.detection.goals=com.linkedin.kafka.cruisecontrol.analyzer.goals.MinTopicLeadersPerBrokerGoal,\\\n + com.linkedin.kafka.cruisecontrol.analyzer.goals.RackAwareDistributionGoal,\\\n + com.linkedin.kafka.cruisecontrol.analyzer.goals.ReplicaCapacityGoal,\\\n + com.linkedin.kafka.cruisecontrol.analyzer.goals.DiskCapacityGoal,\\\n + com.linkedin.kafka.cruisecontrol.analyzer.goals.NetworkInboundCapacityGoal,\\\n + com.linkedin.kafka.cruisecontrol.analyzer.goals.NetworkOutboundCapacityGoal,\\\n + com.linkedin.kafka.cruisecontrol.analyzer.goals.CpuCapacityGoal,\\\n + com.linkedin.kafka.cruisecontrol.analyzer.goals.DiskUsageDistributionGoal,\\\n + com.linkedin.kafka.cruisecontrol.analyzer.goals.TopicReplicaDistributionGoal,\\\n + com.linkedin.kafka.cruisecontrol.analyzer.goals.TopicLeaderReplicaDistributionGoal,\\\n + com.linkedin.kafka.cruisecontrol.analyzer.goals.LeaderReplicaDistributionGoal,\\\n + com.linkedin.kafka.cruisecontrol.analyzer.goals.ReplicaDistributionGoal\nself.healing.goals=com.linkedin.kafka.cruisecontrol.analyzer.goals.MinTopicLeadersPerBrokerGoal,\\\n + com.linkedin.kafka.cruisecontrol.analyzer.goals.RackAwareDistributionGoal,\\\n + com.linkedin.kafka.cruisecontrol.analyzer.goals.ReplicaCapacityGoal,\\\n + com.linkedin.kafka.cruisecontrol.analyzer.goals.DiskCapacityGoal,\\\n + com.linkedin.kafka.cruisecontrol.analyzer.goals.NetworkInboundCapacityGoal,\\\n + com.linkedin.kafka.cruisecontrol.analyzer.goals.NetworkOutboundCapacityGoal,\\\n + com.linkedin.kafka.cruisecontrol.analyzer.goals.CpuCapacityGoal,\\\n + com.linkedin.kafka.cruisecontrol.analyzer.goals.DiskUsageDistributionGoal,\\\n + com.linkedin.kafka.cruisecontrol.analyzer.goals.TopicReplicaDistributionGoal,\\\n + com.linkedin.kafka.cruisecontrol.analyzer.goals.TopicLeaderReplicaDistributionGoal,\\\n + com.linkedin.kafka.cruisecontrol.analyzer.goals.LeaderReplicaDistributionGoal,\\\n + com.linkedin.kafka.cruisecontrol.analyzer.goals.ReplicaDistributionGoal\n# + The interested metrics for metric anomaly + analyzer.\nmetric.anomaly.analyzer.metrics=BROKER_PRODUCE_LOCAL_TIME_MS_MAX,\\\n + BROKER_PRODUCE_LOCAL_TIME_MS_MEAN,\\\n + BROKER_CONSUMER_FETCH_LOCAL_TIME_MS_MAX,\\\n + BROKER_CONSUMER_FETCH_LOCAL_TIME_MS_MEAN,\\\n + BROKER_FOLLOWER_FETCH_LOCAL_TIME_MS_MAX,\\\n + BROKER_FOLLOWER_FETCH_LOCAL_TIME_MS_MEAN,\\\n + BROKER_LOG_FLUSH_TIME_MS_MAX,\\\n + BROKER_LOG_FLUSH_TIME_MS_MEAN\n# True if recently demoted brokers are + excluded from optimizations during broker failure self healing, false + otherwise\nbroker.failure.exclude.recently.demoted.brokers=true\n# True if + recently removed brokers are excluded from optimizations during broker + failure self healing, false + otherwise\nbroker.failure.exclude.recently.removed.brokers=true\n# True if + recently demoted brokers are excluded from optimizations during goal + violation self healing, false + otherwise\ngoal.violation.exclude.recently.demoted.brokers=true\n# True if + recently removed brokers are excluded from optimizations during goal + violation self healing, false + otherwise\ngoal.violation.exclude.recently.removed.brokers=true\n# The + file path to store the failed broker list.\n# This is to persist the + broker failure time in case Cruise Control failed and restarted when some + brokers are down.\nfailed.brokers.file.path=failedBrokers.txt\n# Topic + config provider + class\ntopic.config.provider.class=com.linkedin.kafka.cruisecontrol.config.KafkaAdminTopicConfigProvider\n# + The cluster configurations for the + TopicConfigProvider\ncluster.configs.file=config/clusterConfigs.json\n# + The maximum time in milliseconds to store the response and access details + of a completed kafka monitoring user + task.\ncompleted.kafka.monitor.user.task.retention.time.ms=86400000\n# The + maximum time in milliseconds to store the response and access details of a + completed cruise control monitoring user + task.\ncompleted.cruise.control.monitor.user.task.retention.time.ms=86400000\n# + The maximum time in milliseconds to store the response and access details + of a completed kafka admin user + task.\ncompleted.kafka.admin.user.task.retention.time.ms=604800000\n# The + maximum time in milliseconds to store the response and access details of a + completed cruise control admin user + task.\ncompleted.cruise.control.admin.user.task.retention.time.ms=604800000\n# + The fallback maximum time in milliseconds to store the response and access + details of a completed user + task.\ncompleted.user.task.retention.time.ms=86400000\n# The maximum time + in milliseconds to retain the demotion history of + brokers.\ndemotion.history.retention.time.ms=900000\n# The maximum time in + milliseconds to retain the removal history of + brokers.\nremoval.history.retention.time.ms=900000\n# The maximum number + of completed kafka monitoring user tasks for which the response and access + details will be + cached.\nmax.cached.completed.kafka.monitor.user.tasks=20\n# The maximum + number of completed cruise control monitoring user tasks for which the + response and access details will be + cached.\nmax.cached.completed.cruise.control.monitor.user.tasks=20\n# The + maximum number of completed kafka admin user tasks for which the response + and access details will be + cached.\nmax.cached.completed.kafka.admin.user.tasks=30\n# The maximum + number of completed cruise control admin user tasks for which the response + and access details will be + cached.\nmax.cached.completed.cruise.control.admin.user.tasks=30\n# The + fallback maximum number of completed user tasks of certain type for which + the response and access details will be + cached.\nmax.cached.completed.user.tasks=25\n# The maximum number of user + tasks for concurrently running in async endpoints across all + users.\nmax.active.user.tasks=1000\n# Enable self healing for all anomaly + detectors, unless the particular anomaly detector is explicitly + disabled\nself.healing.enabled=true\n# Enable self healing for broker + failure detector\n#self.healing.broker.failure.enabled=true\n# Enable self + healing for goal violation + detector\n#self.healing.goal.violation.enabled=true\n# Enable self healing + for metric anomaly detector\nself.healing.metric.anomaly.enabled=false\n# + Enable self healing for disk failure + detector\n#self.healing.disk.failure.enabled=true\n# Use the Kafka API to + detect broker failures (and not old ZK + interface)\nkafka.broker.failure.detection.enable=true\n# Defines the + threshold to mark a broker as dead. If a non-empty broker leaves the + cluster at time T and did not join\n# the cluster before T + + broker.failure.alert.threshold.ms, the broker is defined as dead broker + since T.\n# An alert will be triggered in this case.\n# Set to 15 + minutes\nbroker.failure.alert.threshold.ms=900000\n# If self-healing is + enabled and a broker is dead at T,\n# self-healing will be triggered at T + + broker.failure.self.healing.threshold.ms.\n# Set to 90 + minutes\nbroker.failure.self.healing.threshold.ms=5400000\n# The + multiplier applied to the threshold of distribution goals used by + goal.violation.detector.\n#goal.violation.distribution.threshold.multiplier=2.50\n# + The flag to indicate whether use of provisioner is + enabled\nprovisioner.enable=false\n# configurations for the webserver\n# + ================================\n# HTTP listen + port\nwebserver.http.port=9090\n# HTTP listen + address\nwebserver.http.address=0.0.0.0\n# Whether CORS support is enabled + for API or not\nwebserver.http.cors.enabled=false\n# Value for + Access-Control-Allow-Origin\nwebserver.http.cors.origin=http://localhost:8080/\n# + Value for + Access-Control-Request-Method\nwebserver.http.cors.allowmethods=OPTIONS,GET,POST\n# + Headers that should be exposed to the Browser (Webapp)\n# This is a + special header that is used by the\n# User Tasks subsystem and should be + explicitly\n# Enabled when CORS mode is used as part of the\n# Admin + Interface\nwebserver.http.cors.exposeheaders=User-Task-ID\n# REST API + default prefix\n# (dont forget the ending + *)\nwebserver.api.urlprefix=/kafkacruisecontrol/*\n# Location where the + Cruise Control frontend is + deployed\nwebserver.ui.diskpath=./cruise-control-ui/dist/\n# URL path + prefix for UI\n# (dont forget the ending *)\nwebserver.ui.urlprefix=/*\n# + Time After which request is converted to + Async\nwebserver.request.maxBlockTimeMs=10000\n# Default Session Expiry + Period\nwebserver.session.maxExpiryTimeMs=60000\n# Session cookie + path\nwebserver.session.path=/\n# Server Access + Logs\nwebserver.accesslog.enabled=true\n# Configurations for servlet\n# + ==========================\n# Enable two-step verification for processing + POST requests.\ntwo.step.verification.enabled=false\n# The maximum time in + milliseconds to retain the requests in two-step (verification) + purgatory.\ntwo.step.purgatory.retention.time.ms=1209600000\n# The maximum + number of requests in two-step (verification) + purgatory.\ntwo.step.purgatory.max.requests=25\n\ndefault.replication.throttle=104857600\nmax.replicas.per.broker=20000\nremove.slow.broker=false\nself.healing.slow.broker.removal.enabled=true\ntopics.with.min.leaders.per.broker=__consumer_offsets|aep_pipeline_kafka_monitor","cruiseControlAnnotations":{"arc.ethos.adobe.net/ignore":"true"},"cruiseControlTaskSpec":{"RetryDurationMinutes":2147483647},"image":"docker-pipeline-upstream-mirror.dr-uw2.adobeitc.com/adobe/cruise-control:3.0.3-adbe-20260423","log4jConfig":"rootLogger.level=INFO\nappenders=console\nappender.console.type=Console\nappender.console.name=STDOUT\nappender.console.layout.type=PatternLayout\nappender.console.layout.pattern=[%d] + %p %replace{%msg}{[\\r\\n]}{|} + %throwable{separator(|)}(%c{2})%n\nrootLogger.appenderRefs=console\nrootLogger.appenderRef.console.ref=STDOUT","resourceRequirements":{"limits":{"cpu":"4","memory":"2Gi"},"requests":{"cpu":"4","memory":"2Gi"}},"serviceAccountName":"kafka-cluster","tolerations":[{"effect":"NoSchedule","key":"node.kubernetes.io/pipeline-workload","operator":"Equal","value":"true"},{"effect":"NoSchedule","key":"ethos.corp.adobe.com/ethos-workload","operator":"Equal","value":"arm64"},{"effect":"NoSchedule","key":"node.kubernetes.io/pipeline-workload","operator":"Equal","value":"io-optimized"}],"topicConfig":{"partitions":6,"replicationFactor":3}},"disruptionBudget":{"budget":"0","create":true},"envoyConfig":{"affinity":{"nodeAffinity":{"requiredDuringSchedulingIgnoredDuringExecution":{"nodeSelectorTerms":[{"matchExpressions":[{"key":"node.kubernetes.io/pipeline-workload","operator":"In","values":["true"]},{"key":"node.kubernetes.io/ethos-workload.arm64","operator":"In","values":["true"]}]}]}},"podAffinity":{"preferredDuringSchedulingIgnoredDuringExecution":[{"podAffinityTerm":{"labelSelector":{"matchExpressions":[{"key":"app","operator":"In","values":["kafka"]}]},"topologyKey":"kubernetes.io/hostname"},"weight":1}]},"podAntiAffinity":{"requiredDuringSchedulingIgnoredDuringExecution":[{"labelSelector":{"matchExpressions":[{"key":"eListenerName","operator":"In","values":["plaintext-ingress-az1","plaintext-ingress-az2","plaintext-ingress-az3","external-ingress-az1","external-ingress-az2","external-ingress-az3"]}]},"topologyKey":"kubernetes.io/hostname"}]}},"annotations":{"arc.ethos.adobe.net/ignore":"true","ops/certVersion":"11"},"disruptionBudget":{"budget":"25%","create":true,"strategy":"maxUnavailable"},"envoyCommandLineArgs":{"concurrency":2},"image":"docker-pipeline-upstream-mirror.dr-uw2.adobeitc.com/envoyproxy/envoy:v1.36.2","replicas":2,"resourceRequirements":{"limits":{"cpu":"1","memory":"1Gi"},"requests":{"cpu":"1","memory":"1Gi"}},"serviceAccountName":"kafka-cluster","tolerations":[{"effect":"NoSchedule","key":"node.kubernetes.io/pipeline-workload","operator":"Equal","value":"true"},{"effect":"NoSchedule","key":"ethos.corp.adobe.com/ethos-workload","operator":"Equal","value":"arm64"},{"effect":"NoSchedule","key":"node.kubernetes.io/pipeline-workload","operator":"Equal","value":"io-optimized"}],"topologySpreadConstraints":[{"labelSelector":{"matchLabels":{"app":"envoyingress"}},"maxSkew":1,"topologyKey":"kubernetes.io/hostname","whenUnsatisfiable":"ScheduleAnyway"}]},"envs":[{"name":"POD_UID","valueFrom":{"fieldRef":{"fieldPath":"metadata.uid"}}},{"name":"KAFKA_LOG4J_LOGGERS","value":"io.aiven.kafka.tieredstorage=DEBUG"},{"name":"AZURE_CLIENT_ID","valueFrom":{"secretKeyRef":{"key":"AZURE_CLIENT_ID","name":"azure-secrets"}}},{"name":"AZURE_CLIENT_SECRET","valueFrom":{"secretKeyRef":{"key":"AZURE_CLIENT_SECRET","name":"azure-secrets"}}},{"name":"AZURE_TENANT_ID","valueFrom":{"secretKeyRef":{"key":"AZURE_TENANT_ID","name":"azure-secrets"}}}],"headlessServiceEnabled":true,"ingressController":"envoy","listenersConfig":{"externalListeners":[{"accessMethod":"LoadBalancer","anyCastPort":443,"config":{"defaultIngressConfig":"","ingressConfig":{"corpingress-az1":{"envoyConfig":{"annotations":{"broker_group":"az1"},"brokerHostnameTemplate":"kafka-1-%id-va7-corp.dev.pipeline.adobedc.net","nodeSelector":{"topology.kubernetes.io/zone":"eastus2-1"},"replicas":1,"resourceRequirements":{"limits":{"cpu":"50m","memory":"100Mi"},"requests":{"cpu":"10m","memory":"100Mi"}}},"hostnameOverride":"kafka-1-az1-va7-corp.dev.pipeline.adobedc.net","serviceType":"ClusterIP"},"corpingress-az2":{"envoyConfig":{"annotations":{"broker_group":"az2"},"brokerHostnameTemplate":"kafka-1-%id-va7-corp.dev.pipeline.adobedc.net","nodeSelector":{"topology.kubernetes.io/zone":"eastus2-2"},"replicas":1,"resourceRequirements":{"limits":{"cpu":"50m","memory":"100Mi"},"requests":{"cpu":"10m","memory":"100Mi"}}},"hostnameOverride":"kafka-1-az2-va7-corp.dev.pipeline.adobedc.net","serviceType":"ClusterIP"},"corpingress-az3":{"envoyConfig":{"annotations":{"broker_group":"az3"},"brokerHostnameTemplate":"kafka-1-%id-va7-corp.dev.pipeline.adobedc.net","nodeSelector":{"topology.kubernetes.io/zone":"eastus2-3"},"replicas":1,"resourceRequirements":{"limits":{"cpu":"50m","memory":"100Mi"},"requests":{"cpu":"10m","memory":"100Mi"}}},"hostnameOverride":"kafka-1-az3-va7-corp.dev.pipeline.adobedc.net","serviceType":"ClusterIP"}}},"containerPort":29098,"externalStartingPort":-1,"name":"corp","tlsSecretName":"dev-adobedc-net-tls","type":"sasl_plaintext","usedForInnerBrokerCommunication":false},{"accessMethod":"LoadBalancer","anyCastPort":9097,"config":{"defaultIngressConfig":"","ingressConfig":{"secureingress-az1":{"envoyConfig":{"affinity":{"podAffinity":{"preferredDuringSchedulingIgnoredDuringExecution":[{"podAffinityTerm":{"labelSelector":{"matchExpressions":[{"key":"app","operator":"In","values":["kafka"]}]},"topologyKey":"kubernetes.io/hostname"},"weight":1}]},"podAntiAffinity":{"requiredDuringSchedulingIgnoredDuringExecution":[{"labelSelector":{"matchExpressions":[{"key":"eListenerName","operator":"In","values":["secure-secureingress-az1","secure-secureingress-az2","secure-secureingress-az3"]}]},"topologyKey":"kubernetes.io/hostname"}]}},"annotations":{"broker_group":"az1"},"brokerHostnameTemplate":"kafka-1-%id-va7.dev.pipeline.adobedc.net","nodeSelector":{"topology.kubernetes.io/zone":"eastus2-1"}},"hostnameOverride":"kafka-1-az1-va7-secure.dev.pipeline.adobedc.net","serviceType":"ClusterIP"},"secureingress-az2":{"envoyConfig":{"affinity":{"podAffinity":{"preferredDuringSchedulingIgnoredDuringExecution":[{"podAffinityTerm":{"labelSelector":{"matchExpressions":[{"key":"app","operator":"In","values":["kafka"]}]},"topologyKey":"kubernetes.io/hostname"},"weight":1}]},"podAntiAffinity":{"requiredDuringSchedulingIgnoredDuringExecution":[{"labelSelector":{"matchExpressions":[{"key":"eListenerName","operator":"In","values":["secure-secureingress-az1","secure-secureingress-az2","secure-secureingress-az3"]}]},"topologyKey":"kubernetes.io/hostname"}]}},"annotations":{"broker_group":"az2"},"brokerHostnameTemplate":"kafka-1-%id-va7.dev.pipeline.adobedc.net","nodeSelector":{"topology.kubernetes.io/zone":"eastus2-2"}},"hostnameOverride":"kafka-1-az2-va7-secure.dev.pipeline.adobedc.net","serviceType":"ClusterIP"},"secureingress-az3":{"envoyConfig":{"affinity":{"podAffinity":{"preferredDuringSchedulingIgnoredDuringExecution":[{"podAffinityTerm":{"labelSelector":{"matchExpressions":[{"key":"app","operator":"In","values":["kafka"]}]},"topologyKey":"kubernetes.io/hostname"},"weight":1}]},"podAntiAffinity":{"requiredDuringSchedulingIgnoredDuringExecution":[{"labelSelector":{"matchExpressions":[{"key":"eListenerName","operator":"In","values":["secure-secureingress-az1","secure-secureingress-az2","secure-secureingress-az3"]}]},"topologyKey":"kubernetes.io/hostname"}]}},"annotations":{"broker_group":"az3"},"brokerHostnameTemplate":"kafka-1-%id-va7.dev.pipeline.adobedc.net","nodeSelector":{"topology.kubernetes.io/zone":"eastus2-3"}},"hostnameOverride":"kafka-1-az3-va7-secure.dev.pipeline.adobedc.net","serviceType":"ClusterIP"}}},"containerPort":29095,"externalStartingPort":-1,"name":"secure","tlsSecretName":"dev-adobedc-net-tls","type":"sasl_plaintext","usedForInnerBrokerCommunication":false}],"internalListeners":[{"containerPort":29092,"internalStartingPort":0,"name":"internal","type":"plaintext","usedForInnerBrokerCommunication":true},{"containerPort":29093,"internalStartingPort":0,"name":"controller","type":"plaintext","usedForControllerCommunication":true,"usedForInnerBrokerCommunication":false},{"containerPort":29096,"internalStartingPort":0,"name":"sasl_plain","type":"sasl_plaintext","usedForInnerBrokerCommunication":false}]},"monitoringConfig":{"jmxImage":"docker-pipeline-upstream-mirror.dr-uw2.adobeitc.com/amuraru/jmx-javaagent:0.19.1-multi","pathToJar":"/jmx_prometheus_javaagent.jar"},"oneBrokerPerNode":true,"propagateLabels":true,"readOnlyConfig":"__do_no_edit_diskSize=1048576\nauthorizer.class.name=com.adobe.core.pipeline.kafka.security.server.auth.CustomAclAuthorizerWithAccessTrackingMetrics\nauto.create.topics.enable=false\nauto.leader.rebalance.enable=true\nbackground.threads=20\nbroker.id.generation.enable=false\ncruise.control.metrics.reporter.acks=1\ncruise.control.metrics.topic=__CruiseControlMetrics\ncruise.control.metrics.topic.min.insync.replicas=1\ndefault.replication.factor=3\ninter.broker.protocol.version=3.9\nleader.imbalance.per.broker.percentage=0\nlistener.name.corp.oauthbearer.sasl.jaas.config=org.apache.kafka.common.security.oauthbearer.OAuthBearerLoginModule + required;\nlistener.name.corp.oauthbearer.sasl.server.callback.handler.class=com.adobe.core.pipeline.kafka.security.server.auth.ImsValidatingCallbackHandler\nlistener.name.sasl_plain.oauthbearer.sasl.jaas.config=org.apache.kafka.common.security.oauthbearer.OAuthBearerLoginModule + required;\nlistener.name.sasl_plain.oauthbearer.sasl.server.callback.handler.class=com.adobe.core.pipeline.kafka.security.server.auth.ImsValidatingCallbackHandler\nlistener.name.secure.oauthbearer.sasl.jaas.config=org.apache.kafka.common.security.oauthbearer.OAuthBearerLoginModule + required;\nlistener.name.secure.oauthbearer.sasl.server.callback.handler.class=com.adobe.core.pipeline.kafka.security.server.auth.ImsValidatingCallbackHandler\nlog.message.timestamp.after.max.ms=86400000\nlog.segment.bytes=536870912\nmax.incremental.fetch.session.cache.slots=1000\nmin.insync.replicas=2\nnum.io.threads=48\nnum.network.threads=40\nnum.partitions=10\nnum.recovery.threads.per.data.dir=8\nnum.replica.fetchers=4\noffsets.commit.required.acks=1\nprincipal.builder.class=com.adobe.core.pipeline.kafka.security.server.auth.PipelinePrincipalBuilder\nqueued.max.requests=1000\nremote.fetch.max.wait.ms=1000\nremote.log.metadata.manager.class.name=org.apache.kafka.server.log.remote.metadata.storage.TopicBasedRemoteLogMetadataManager\nremote.log.metadata.manager.listener.name=INTERNAL\nremote.log.storage.manager.class.name=io.aiven.kafka.tieredstorage.RemoteStorageManager\nremote.log.storage.manager.class.path=/opt/kafka/libs/extensions/core/*:/opt/kafka/libs/extensions/azure/*\nremote.log.storage.system.enable=true\nreplica.fetch.max.bytes=5242880\nreplica.lag.time.max.ms=15000\nreplica.socket.receive.buffer.bytes=-1\nrsm.config.chunk.size=4194304\nrsm.config.compression.enabled=true\nrsm.config.compression.heuristic.enabled=true\nrsm.config.custom.metadata.fields.include=REMOTE_SIZE\nrsm.config.encryption.enabled=false\nrsm.config.fetch.chunk.cache.class=io.aiven.kafka.tieredstorage.fetch.cache.DiskChunkCache\nrsm.config.fetch.chunk.cache.path=/root\nrsm.config.fetch.chunk.cache.prefetch.max.size=16777216\nrsm.config.fetch.chunk.cache.retention.ms=600000\nrsm.config.fetch.chunk.cache.size=26843545600\nrsm.config.storage.azure.account.name=va7devsava7\nrsm.config.storage.azure.container.name=kafka\nrsm.config.storage.azure.upload.block.size=67108864\nrsm.config.storage.backend.class=io.aiven.kafka.tieredstorage.storage.azure.AzureBlobStorage\nrsm.config.upload.rate.limit.bytes.per.second=209715200\nsasl.enabled.mechanisms=OAUTHBEARER\nsasl.ims.certificate.location=static.adobelogin.com/keys/nonprod/\nsasl.ims.url=https://ims-na1-stg1.adobelogin.com/\nsocket.listen.backlog.size=1024\nsocket.receive.buffer.bytes=-1\nsocket.send.buffer.bytes=-1\nsuper.users=Broker:ANONYMOUS\nzookeeper.connection.timeout.ms=18000","rollingUpgradeConfig":{"concurrentBrokerRestartCountPerRack":2,"failureThreshold":2},"taintedBrokersSelector":{"matchExpressions":[{"key":"shredder.ethos.adobe.net/upgrade-status","operator":"In","values":["parked"]}]},"zkAddresses":["pipeline-zookeeper-client:2181"],"zkPath":"/kafka"}} + pipeline_config_version: dev + creationTimestamp: '2026-05-20T16:50:57Z' + finalizers: + - finalizer.kafkaclusters.kafka.banzaicloud.io + - topics.kafkaclusters.kafka.banzaicloud.io + - users.kafkaclusters.kafka.banzaicloud.io + generation: 8 + labels: + app.kubernetes.io/instance: pipeline-kafka + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/name: kafka + app.kubernetes.io/version: 2.1.32 + flex.ethos.corp.adobe.com/instance: experience-platform--pipeline-kafka-deploy--ethos21-st-2d95c39c + pipeline_cluster: VA7 + pipeline_env: dev + name: pipeline-kafka + namespace: ns-team-aep-pipeline-kafka-1-dev + resourceVersion: '8117227591' + uid: baa65e25-0d73-429e-9fab-73b50ed78a58 +spec: + brokerConfigGroups: + az1: + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: node.kubernetes.io/pipeline-workload + operator: In + values: + - 'true' + - key: node.kubernetes.io/ethos-workload.arm64 + operator: In + values: + - 'true' + podAntiAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + - labelSelector: + matchExpressions: + - key: app + operator: In + values: + - kafka + - key: isControllerNode + operator: In + values: + - 'false' + topologyKey: kubernetes.io/hostname + brokerAnnotations: + arc.ethos.adobe.net/ignore: 'true' + broker_group: az1 + cluster-autoscaler.kubernetes.io/safe-to-evict: 'false' + io.kubernetes.cri-o.LinkLogs: logging-volume + containers: + - env: + - name: SPLUNK_HOST + value: splunk-hec.loc.adobe.net + - name: SPLUNK_PORT + value: '8088' + - name: SPLUNK_INDEX + value: plat_app_preprod + - name: SPLUNK_TOKEN + valueFrom: + secretKeyRef: + key: token + name: splunk-token + - name: SPLUNK_SOURCETYPE + value: log4j + - name: POD_UID_FLUENT_BIT + valueFrom: + fieldRef: + fieldPath: metadata.uid + - name: POD_NAME + valueFrom: + fieldRef: + fieldPath: metadata.name + - name: POD_NAMESPACE + valueFrom: + fieldRef: + fieldPath: metadata.namespace + - name: POD_IP + valueFrom: + fieldRef: + fieldPath: status.podIP + - name: NODE_NAME + valueFrom: + fieldRef: + fieldPath: spec.nodeName + - name: NODE_IP + valueFrom: + fieldRef: + fieldPath: status.hostIP + - name: LOG_PARSER + value: docker + - name: POD_ENV + value: dev + - name: POD_CLUSTER + value: VA7 + image: >- + docker-k8s-infrastructure-public-release.dr-uw2.adobeitc.com/ethos/ethos-fluent-bit:3.2.1.1-ethos + name: fluent-bit + ports: + - containerPort: 2020 + name: fb-metrics + protocol: TCP + resources: + limits: + cpu: 100m + memory: 256Mi + requests: + cpu: 100m + memory: 256Mi + volumeMounts: + - mountPath: /logging-volume + mountPropagation: HostToContainer + name: logging-volume + - mountPath: /var/fluent-bit + name: fluent-data + - mountPath: /fluent-bit/etc + name: fluent-bit-config + initContainers: + - command: + - cp + - '-r' + - /pipeline/kafka-libs/. + - /opt/kafka/libs/extensions/ + image: >- + docker-pipeline-upstream-mirror.dr-uw2.adobeitc.com/broker-libs-injector:0.1.10 + imagePullPolicy: IfNotPresent + name: broker-libs-injector + resources: + limits: + cpu: 100m + memory: 100Mi + requests: + cpu: 100m + memory: 100Mi + volumeMounts: + - mountPath: /opt/kafka/libs/extensions + name: extensions + kafkaHeapOpts: >- + -XX:InitialRAMPercentage=30 -XX:MaxRAMPercentage=70 + -XX:MinRAMPercentage=70 + kafkaJvmPerfOpts: >- + -server -XX:+UseG1GC -XX:MaxGCPauseMillis=20 + -XX:InitiatingHeapOccupancyPercent=35 -XX:+ExplicitGCInvokesConcurrent + -XX:MetaspaceSize=96m -XX:G1HeapRegionSize=16M + -XX:MinMetaspaceFreeRatio=50 -XX:MaxMetaspaceFreeRatio=80 + -Djava.awt.headless=true -Dsun.net.inetaddr.ttl=60 + -Dcom.sun.management.jmxremote.port=1090 + -Dcom.sun.management.jmxremote.rmi.port=1090 + -Dcom.sun.management.jmxremote.local.only=false + -Djava.rmi.server.hostname=127.0.0.1 -Djute.maxbuffer=0x9fffff + log4jConfig: >- + log4j.rootLogger=INFO, stdout + + + log4j.appender.stdout=org.apache.log4j.ConsoleAppender + + log4j.appender.stdout.layout=org.apache.log4j.PatternLayout + + log4j.appender.stdout.layout.ConversionPattern=[%d] %p %m (%c)%n + + + # Change the line below to adjust ZK client logging + + log4j.logger.org.apache.zookeeper=INFO + + + # Change the two lines below to adjust the general broker logging level + (output to server.log and stdout) + + log4j.logger.kafka=INFO + + log4j.logger.org.apache.kafka=INFO + + + # Change to DEBUG or TRACE to enable request logging + + log4j.logger.kafka.request.logger=WARN + + + # Uncomment the lines below and change + log4j.logger.kafka.network.RequestChannel$ to TRACE for additional + output + + # related to the handling of requests + + #log4j.logger.kafka.network.Processor=TRACE, requestAppender + + #log4j.logger.kafka.server.KafkaApis=TRACE, requestAppender + + # + + log4j.logger.kafka.network.RequestChannel$=WARN + + log4j.logger.kafka.controller=DEBUG + + log4j.logger.kafka.log.LogCleaner=INFO + + log4j.logger.state.change.logger=INFO + + + # Access denials are logged at INFO level, change to DEBUG to also log + allowed accesses + + log4j.logger.kafka.authorizer.logger=INFO + + + # Additional logging to reduce noise + + log4j.logger.org.apache.kafka.common.network.Selector=WARN + nodeSelector: + topology.kubernetes.io/zone: eastus2-1 + resourceRequirements: + limits: + cpu: '4' + memory: 4Gi + requests: + cpu: '1' + memory: 4Gi + serviceAccountName: kafka-cluster + storageConfigs: + - mountPath: /csi-kafka-logs2 + pvcSpec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 512Gi + storageClassName: premium-lazy-csi-xfs + terminationGracePeriodSeconds: 120 + tolerations: + - effect: NoSchedule + key: node.kubernetes.io/pipeline-workload + operator: Equal + value: 'true' + - effect: NoSchedule + key: ethos.corp.adobe.com/ethos-workload + operator: Equal + value: arm64 + - effect: NoSchedule + key: node.kubernetes.io/pipeline-workload + operator: Equal + value: io-optimized + volumes: + - emptyDir: {} + name: logging-volume + - emptyDir: {} + name: fluent-data + - configMap: + name: pipeline-kafka-fluent-bit + name: fluent-bit-config + az2: + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: node.kubernetes.io/pipeline-workload + operator: In + values: + - 'true' + - key: node.kubernetes.io/ethos-workload.arm64 + operator: In + values: + - 'true' + podAntiAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + - labelSelector: + matchExpressions: + - key: app + operator: In + values: + - kafka + - key: isControllerNode + operator: In + values: + - 'false' + topologyKey: kubernetes.io/hostname + brokerAnnotations: + arc.ethos.adobe.net/ignore: 'true' + broker_group: az2 + cluster-autoscaler.kubernetes.io/safe-to-evict: 'false' + io.kubernetes.cri-o.LinkLogs: logging-volume + containers: + - env: + - name: SPLUNK_HOST + value: splunk-hec.loc.adobe.net + - name: SPLUNK_PORT + value: '8088' + - name: SPLUNK_INDEX + value: plat_app_preprod + - name: SPLUNK_TOKEN + valueFrom: + secretKeyRef: + key: token + name: splunk-token + - name: SPLUNK_SOURCETYPE + value: log4j + - name: POD_UID_FLUENT_BIT + valueFrom: + fieldRef: + fieldPath: metadata.uid + - name: POD_NAME + valueFrom: + fieldRef: + fieldPath: metadata.name + - name: POD_NAMESPACE + valueFrom: + fieldRef: + fieldPath: metadata.namespace + - name: POD_IP + valueFrom: + fieldRef: + fieldPath: status.podIP + - name: NODE_NAME + valueFrom: + fieldRef: + fieldPath: spec.nodeName + - name: NODE_IP + valueFrom: + fieldRef: + fieldPath: status.hostIP + - name: LOG_PARSER + value: docker + - name: POD_ENV + value: dev + - name: POD_CLUSTER + value: VA7 + image: >- + docker-k8s-infrastructure-public-release.dr-uw2.adobeitc.com/ethos/ethos-fluent-bit:3.2.1.1-ethos + name: fluent-bit + ports: + - containerPort: 2020 + name: fb-metrics + protocol: TCP + resources: + limits: + cpu: 100m + memory: 256Mi + requests: + cpu: 100m + memory: 256Mi + volumeMounts: + - mountPath: /logging-volume + mountPropagation: HostToContainer + name: logging-volume + - mountPath: /var/fluent-bit + name: fluent-data + - mountPath: /fluent-bit/etc + name: fluent-bit-config + initContainers: + - command: + - cp + - '-r' + - /pipeline/kafka-libs/. + - /opt/kafka/libs/extensions/ + image: >- + docker-pipeline-upstream-mirror.dr-uw2.adobeitc.com/broker-libs-injector:0.1.10 + imagePullPolicy: IfNotPresent + name: broker-libs-injector + resources: + limits: + cpu: 100m + memory: 100Mi + requests: + cpu: 100m + memory: 100Mi + volumeMounts: + - mountPath: /opt/kafka/libs/extensions + name: extensions + kafkaHeapOpts: >- + -XX:InitialRAMPercentage=30 -XX:MaxRAMPercentage=70 + -XX:MinRAMPercentage=70 + kafkaJvmPerfOpts: >- + -server -XX:+UseG1GC -XX:MaxGCPauseMillis=20 + -XX:InitiatingHeapOccupancyPercent=35 -XX:+ExplicitGCInvokesConcurrent + -XX:MetaspaceSize=96m -XX:G1HeapRegionSize=16M + -XX:MinMetaspaceFreeRatio=50 -XX:MaxMetaspaceFreeRatio=80 + -Djava.awt.headless=true -Dsun.net.inetaddr.ttl=60 + -Dcom.sun.management.jmxremote.port=1090 + -Dcom.sun.management.jmxremote.rmi.port=1090 + -Dcom.sun.management.jmxremote.local.only=false + -Djava.rmi.server.hostname=127.0.0.1 -Djute.maxbuffer=0x9fffff + log4jConfig: >- + log4j.rootLogger=INFO, stdout + + + log4j.appender.stdout=org.apache.log4j.ConsoleAppender + + log4j.appender.stdout.layout=org.apache.log4j.PatternLayout + + log4j.appender.stdout.layout.ConversionPattern=[%d] %p %m (%c)%n + + + # Change the line below to adjust ZK client logging + + log4j.logger.org.apache.zookeeper=INFO + + + # Change the two lines below to adjust the general broker logging level + (output to server.log and stdout) + + log4j.logger.kafka=INFO + + log4j.logger.org.apache.kafka=INFO + + + # Change to DEBUG or TRACE to enable request logging + + log4j.logger.kafka.request.logger=WARN + + + # Uncomment the lines below and change + log4j.logger.kafka.network.RequestChannel$ to TRACE for additional + output + + # related to the handling of requests + + #log4j.logger.kafka.network.Processor=TRACE, requestAppender + + #log4j.logger.kafka.server.KafkaApis=TRACE, requestAppender + + # + + log4j.logger.kafka.network.RequestChannel$=WARN + + log4j.logger.kafka.controller=DEBUG + + log4j.logger.kafka.log.LogCleaner=INFO + + log4j.logger.state.change.logger=INFO + + + # Access denials are logged at INFO level, change to DEBUG to also log + allowed accesses + + log4j.logger.kafka.authorizer.logger=INFO + + + # Additional logging to reduce noise + + log4j.logger.org.apache.kafka.common.network.Selector=WARN + nodeSelector: + topology.kubernetes.io/zone: eastus2-2 + resourceRequirements: + limits: + cpu: '4' + memory: 4Gi + requests: + cpu: '1' + memory: 4Gi + serviceAccountName: kafka-cluster + storageConfigs: + - mountPath: /csi-kafka-logs2 + pvcSpec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 512Gi + storageClassName: premium-lazy-csi-xfs + terminationGracePeriodSeconds: 120 + tolerations: + - effect: NoSchedule + key: node.kubernetes.io/pipeline-workload + operator: Equal + value: 'true' + - effect: NoSchedule + key: ethos.corp.adobe.com/ethos-workload + operator: Equal + value: arm64 + - effect: NoSchedule + key: node.kubernetes.io/pipeline-workload + operator: Equal + value: io-optimized + volumes: + - emptyDir: {} + name: logging-volume + - emptyDir: {} + name: fluent-data + - configMap: + name: pipeline-kafka-fluent-bit + name: fluent-bit-config + az3: + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: node.kubernetes.io/pipeline-workload + operator: In + values: + - 'true' + - key: node.kubernetes.io/ethos-workload.arm64 + operator: In + values: + - 'true' + podAntiAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + - labelSelector: + matchExpressions: + - key: app + operator: In + values: + - kafka + - key: isControllerNode + operator: In + values: + - 'false' + topologyKey: kubernetes.io/hostname + brokerAnnotations: + arc.ethos.adobe.net/ignore: 'true' + broker_group: az3 + cluster-autoscaler.kubernetes.io/safe-to-evict: 'false' + io.kubernetes.cri-o.LinkLogs: logging-volume + containers: + - env: + - name: SPLUNK_HOST + value: splunk-hec.loc.adobe.net + - name: SPLUNK_PORT + value: '8088' + - name: SPLUNK_INDEX + value: plat_app_preprod + - name: SPLUNK_TOKEN + valueFrom: + secretKeyRef: + key: token + name: splunk-token + - name: SPLUNK_SOURCETYPE + value: log4j + - name: POD_UID_FLUENT_BIT + valueFrom: + fieldRef: + fieldPath: metadata.uid + - name: POD_NAME + valueFrom: + fieldRef: + fieldPath: metadata.name + - name: POD_NAMESPACE + valueFrom: + fieldRef: + fieldPath: metadata.namespace + - name: POD_IP + valueFrom: + fieldRef: + fieldPath: status.podIP + - name: NODE_NAME + valueFrom: + fieldRef: + fieldPath: spec.nodeName + - name: NODE_IP + valueFrom: + fieldRef: + fieldPath: status.hostIP + - name: LOG_PARSER + value: docker + - name: POD_ENV + value: dev + - name: POD_CLUSTER + value: VA7 + image: >- + docker-k8s-infrastructure-public-release.dr-uw2.adobeitc.com/ethos/ethos-fluent-bit:3.2.1.1-ethos + name: fluent-bit + ports: + - containerPort: 2020 + name: fb-metrics + protocol: TCP + resources: + limits: + cpu: 100m + memory: 256Mi + requests: + cpu: 100m + memory: 256Mi + volumeMounts: + - mountPath: /logging-volume + mountPropagation: HostToContainer + name: logging-volume + - mountPath: /var/fluent-bit + name: fluent-data + - mountPath: /fluent-bit/etc + name: fluent-bit-config + initContainers: + - command: + - cp + - '-r' + - /pipeline/kafka-libs/. + - /opt/kafka/libs/extensions/ + image: >- + docker-pipeline-upstream-mirror.dr-uw2.adobeitc.com/broker-libs-injector:0.1.10 + imagePullPolicy: IfNotPresent + name: broker-libs-injector + resources: + limits: + cpu: 100m + memory: 100Mi + requests: + cpu: 100m + memory: 100Mi + volumeMounts: + - mountPath: /opt/kafka/libs/extensions + name: extensions + kafkaHeapOpts: >- + -XX:InitialRAMPercentage=30 -XX:MaxRAMPercentage=70 + -XX:MinRAMPercentage=70 + kafkaJvmPerfOpts: >- + -server -XX:+UseG1GC -XX:MaxGCPauseMillis=20 + -XX:InitiatingHeapOccupancyPercent=35 -XX:+ExplicitGCInvokesConcurrent + -XX:MetaspaceSize=96m -XX:G1HeapRegionSize=16M + -XX:MinMetaspaceFreeRatio=50 -XX:MaxMetaspaceFreeRatio=80 + -Djava.awt.headless=true -Dsun.net.inetaddr.ttl=60 + -Dcom.sun.management.jmxremote.port=1090 + -Dcom.sun.management.jmxremote.rmi.port=1090 + -Dcom.sun.management.jmxremote.local.only=false + -Djava.rmi.server.hostname=127.0.0.1 -Djute.maxbuffer=0x9fffff + log4jConfig: >- + log4j.rootLogger=INFO, stdout + + + log4j.appender.stdout=org.apache.log4j.ConsoleAppender + + log4j.appender.stdout.layout=org.apache.log4j.PatternLayout + + log4j.appender.stdout.layout.ConversionPattern=[%d] %p %m (%c)%n + + + # Change the line below to adjust ZK client logging + + log4j.logger.org.apache.zookeeper=INFO + + + # Change the two lines below to adjust the general broker logging level + (output to server.log and stdout) + + log4j.logger.kafka=INFO + + log4j.logger.org.apache.kafka=INFO + + + # Change to DEBUG or TRACE to enable request logging + + log4j.logger.kafka.request.logger=WARN + + + # Uncomment the lines below and change + log4j.logger.kafka.network.RequestChannel$ to TRACE for additional + output + + # related to the handling of requests + + #log4j.logger.kafka.network.Processor=TRACE, requestAppender + + #log4j.logger.kafka.server.KafkaApis=TRACE, requestAppender + + # + + log4j.logger.kafka.network.RequestChannel$=WARN + + log4j.logger.kafka.controller=DEBUG + + log4j.logger.kafka.log.LogCleaner=INFO + + log4j.logger.state.change.logger=INFO + + + # Access denials are logged at INFO level, change to DEBUG to also log + allowed accesses + + log4j.logger.kafka.authorizer.logger=INFO + + + # Additional logging to reduce noise + + log4j.logger.org.apache.kafka.common.network.Selector=WARN + nodeSelector: + topology.kubernetes.io/zone: eastus2-3 + resourceRequirements: + limits: + cpu: '4' + memory: 4Gi + requests: + cpu: '1' + memory: 4Gi + serviceAccountName: kafka-cluster + storageConfigs: + - mountPath: /csi-kafka-logs2 + pvcSpec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 512Gi + storageClassName: premium-lazy-csi-xfs + terminationGracePeriodSeconds: 120 + tolerations: + - effect: NoSchedule + key: node.kubernetes.io/pipeline-workload + operator: Equal + value: 'true' + - effect: NoSchedule + key: ethos.corp.adobe.com/ethos-workload + operator: Equal + value: arm64 + - effect: NoSchedule + key: node.kubernetes.io/pipeline-workload + operator: Equal + value: io-optimized + volumes: + - emptyDir: {} + name: logging-volume + - emptyDir: {} + name: fluent-data + - configMap: + name: pipeline-kafka-fluent-bit + name: fluent-bit-config + brokers: + - brokerConfig: + brokerIngressMapping: + - ingress-az1 + - corpingress-az1 + - secureingress-az1 + terminationGracePeriodSeconds: 120 + brokerConfigGroup: az1 + id: 101 + readOnlyConfig: | + broker.rack=eastus2-1 + - brokerConfig: + brokerIngressMapping: + - ingress-az1 + - corpingress-az1 + - secureingress-az1 + terminationGracePeriodSeconds: 120 + brokerConfigGroup: az1 + id: 102 + readOnlyConfig: | + broker.rack=eastus2-1 + - brokerConfig: + brokerIngressMapping: + - ingress-az2 + - corpingress-az2 + - secureingress-az2 + terminationGracePeriodSeconds: 120 + brokerConfigGroup: az2 + id: 201 + readOnlyConfig: | + broker.rack=eastus2-2 + - brokerConfig: + brokerIngressMapping: + - ingress-az2 + - corpingress-az2 + - secureingress-az2 + terminationGracePeriodSeconds: 120 + brokerConfigGroup: az2 + id: 202 + readOnlyConfig: | + broker.rack=eastus2-2 + - brokerConfig: + brokerIngressMapping: + - ingress-az3 + - corpingress-az3 + - secureingress-az3 + terminationGracePeriodSeconds: 120 + brokerConfigGroup: az3 + id: 301 + readOnlyConfig: | + broker.rack=eastus2-3 + - brokerConfig: + brokerIngressMapping: + - ingress-az3 + - corpingress-az3 + - secureingress-az3 + terminationGracePeriodSeconds: 120 + brokerConfigGroup: az3 + id: 302 + readOnlyConfig: | + broker.rack=eastus2-3 + clusterImage: >- + docker-pipeline-upstream-mirror.dr-uw2.adobeitc.com/adobe/koperator/kafka:2.13-3.9.2-jdk21.0.11 + clusterMetricsReporterImage: >- + docker-pipeline-upstream-mirror.dr-uw2.adobeitc.com/adobe/cruise-control:3.0.3-adbe-20260423 + contourIngressConfig: + brokerFQDNTemplate: '' + tlsSecretName: '' + cruiseControlConfig: + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: node.kubernetes.io/pipeline-workload + operator: In + values: + - 'true' + - key: node.kubernetes.io/ethos-workload.arm64 + operator: In + values: + - 'true' + capacityConfig: |- + { + "brokerCapacities":[ + { + "brokerId": "-1", + "capacity": { + "DISK": {"/csi-kafka-logs2/kafka": "524288"}, + "CPU": {"num.cores": "1"}, + "NW_IN": "900000", + "NW_OUT": "900000" + }, + "doc": "This is the default capacity. Capacity unit used for disk is in MB, cpu is in cores, network throughput is in KB." + } + ] + } + clusterConfig: |- + { + "min.insync.replicas": 2 + } + config: >- + + # Configuration for the metadata client. + + # ======================================= + + # The maximum interval in milliseconds between two metadata refreshes. + + #metadata.max.age.ms=300000 + + # Client id for the Cruise Control. It is used for the metadata client. + + #client.id=kafka-cruise-control + + # The size of TCP send buffer bytes for the metadata client. + + #send.buffer.bytes=131072 + + # The size of TCP receive buffer size for the metadata client. + + #receive.buffer.bytes=131072 + + # The time to wait before disconnect an idle TCP connection. + + #connections.max.idle.ms=540000 + + # The time to wait before reconnect to a given host. + + #reconnect.backoff.ms=50 + + # The time to wait for a response from a host after sending a request. + + #request.timeout.ms=30000 + + # The time to wait for broker logdir to respond after sending a request. + + #logdir.response.timeout.ms=10000 + + # Configurations for the load monitor + + # ======================================= + + # The number of metric fetcher thread to fetch metrics for the Kafka + cluster + + num.metric.fetchers=1 + + # The metric sampler class + + metric.sampler.class=com.linkedin.kafka.cruisecontrol.monitor.sampling.CruiseControlMetricsReporterSampler + + # True if the sampling process allows CPU capacity estimation of brokers + used for CPU utilization estimation. + + sampling.allow.cpu.capacity.estimation=true + + # Configurations for CruiseControlMetricsReporterSampler + + metric.reporter.topic=__CruiseControlMetrics + + # The sample store class name + + sample.store.class=com.linkedin.kafka.cruisecontrol.monitor.sampling.KafkaSampleStore + + # The config for the Kafka sample store to save the partition metric + samples + + partition.metric.sample.store.topic=__KafkaCruiseControlPartitionMetricSamples + + # The config for the Kafka sample store to save the model training samples + + broker.metric.sample.store.topic=__KafkaCruiseControlModelTrainingSamples + + # The replication factor of Kafka metric sample store topic + + sample.store.topic.replication.factor=3 + + partition.sample.store.topic.partition.count=15 + + broker.sample.store.topic.partition.count=15 + + # The config for the number of Kafka sample store consumer threads + + num.sample.loading.threads=8 + + # The partition assignor class for the metric samplers + + metric.sampler.partition.assignor.class=com.linkedin.kafka.cruisecontrol.monitor.sampling.DefaultMetricSamplerPartitionAssignor + + # The metric sampling interval in milliseconds + + metric.sampling.interval.ms=60000 + + # The partition metrics window size in milliseconds + + partition.metrics.window.ms=300000 + + # The number of partition metric windows to keep in memory + + num.partition.metrics.windows=20 + + # The minimum partition metric samples required for a partition in each + window + + min.samples.per.partition.metrics.window=1 + + # The broker metrics window size in milliseconds + + broker.metrics.window.ms=300000 + + # The number of broker metric windows to keep in memory + + # see https://github.com/linkedin/cruise-control/issues/1149 + + num.broker.metrics.windows=20 + + # The minimum broker metric samples required for a partition in each + window + + min.samples.per.broker.metrics.window=1 + + # The configuration for the BrokerCapacityConfigFileResolver (supports + JBOD, non-JBOD, and heterogeneous CPU core capacities) + + capacity.config.file=config/capacity.json + + # Configurations for the analyzer + + # ======================================= + + # The list of goals to optimize the Kafka cluster for with pre-computed + proposals + + default.goals=com.linkedin.kafka.cruisecontrol.analyzer.goals.MinTopicLeadersPerBrokerGoal,\ + com.linkedin.kafka.cruisecontrol.analyzer.goals.RackAwareDistributionGoal,\ + com.linkedin.kafka.cruisecontrol.analyzer.goals.ReplicaCapacityGoal,\ + com.linkedin.kafka.cruisecontrol.analyzer.goals.DiskCapacityGoal,\ + com.linkedin.kafka.cruisecontrol.analyzer.goals.NetworkInboundCapacityGoal,\ + com.linkedin.kafka.cruisecontrol.analyzer.goals.NetworkOutboundCapacityGoal,\ + com.linkedin.kafka.cruisecontrol.analyzer.goals.CpuCapacityGoal,\ + com.linkedin.kafka.cruisecontrol.analyzer.goals.DiskUsageDistributionGoal,\ + com.linkedin.kafka.cruisecontrol.analyzer.goals.NetworkInboundUsageDistributionGoal,\ + com.linkedin.kafka.cruisecontrol.analyzer.goals.NetworkOutboundUsageDistributionGoal,\ + com.linkedin.kafka.cruisecontrol.analyzer.goals.CpuUsageDistributionGoal,\ + com.linkedin.kafka.cruisecontrol.analyzer.goals.TopicReplicaDistributionGoal,\ + com.linkedin.kafka.cruisecontrol.analyzer.goals.TopicLeaderReplicaDistributionGoal,\ + com.linkedin.kafka.cruisecontrol.analyzer.goals.LeaderBytesInDistributionGoal,\ + com.linkedin.kafka.cruisecontrol.analyzer.goals.LeaderReplicaDistributionGoal,\ + com.linkedin.kafka.cruisecontrol.analyzer.goals.ReplicaDistributionGoal + # The list of supported goals + + goals=com.linkedin.kafka.cruisecontrol.analyzer.goals.MinTopicLeadersPerBrokerGoal,\ + com.linkedin.kafka.cruisecontrol.analyzer.goals.RackAwareDistributionGoal,\ + com.linkedin.kafka.cruisecontrol.analyzer.goals.ReplicaCapacityGoal,\ + com.linkedin.kafka.cruisecontrol.analyzer.goals.DiskCapacityGoal,\ + com.linkedin.kafka.cruisecontrol.analyzer.goals.NetworkInboundCapacityGoal,\ + com.linkedin.kafka.cruisecontrol.analyzer.goals.NetworkOutboundCapacityGoal,\ + com.linkedin.kafka.cruisecontrol.analyzer.goals.CpuCapacityGoal,\ + com.linkedin.kafka.cruisecontrol.analyzer.goals.ReplicaDistributionGoal,\ + com.linkedin.kafka.cruisecontrol.analyzer.goals.PotentialNwOutGoal,\ + com.linkedin.kafka.cruisecontrol.analyzer.goals.DiskUsageDistributionGoal,\ + com.linkedin.kafka.cruisecontrol.analyzer.goals.NetworkInboundUsageDistributionGoal,\ + com.linkedin.kafka.cruisecontrol.analyzer.goals.NetworkOutboundUsageDistributionGoal,\ + com.linkedin.kafka.cruisecontrol.analyzer.goals.CpuUsageDistributionGoal,\ + com.linkedin.kafka.cruisecontrol.analyzer.goals.TopicReplicaDistributionGoal,\ + com.linkedin.kafka.cruisecontrol.analyzer.goals.TopicLeaderReplicaDistributionGoal,\ + com.linkedin.kafka.cruisecontrol.analyzer.goals.LeaderReplicaDistributionGoal,\ + com.linkedin.kafka.cruisecontrol.analyzer.goals.LeaderBytesInDistributionGoal,\ + com.linkedin.kafka.cruisecontrol.analyzer.kafkaassigner.KafkaAssignerDiskUsageDistributionGoal,\ + com.linkedin.kafka.cruisecontrol.analyzer.kafkaassigner.KafkaAssignerEvenRackAwareGoal,\ + com.linkedin.kafka.cruisecontrol.analyzer.goals.PreferredLeaderElectionGoal + # The list of supported intra-broker goals + + intra.broker.goals=com.linkedin.kafka.cruisecontrol.analyzer.goals.IntraBrokerDiskCapacityGoal,\ + com.linkedin.kafka.cruisecontrol.analyzer.goals.IntraBrokerDiskUsageDistributionGoal + # The list of supported hard goals + + hard.goals=com.linkedin.kafka.cruisecontrol.analyzer.goals.RackAwareDistributionGoal,\ + com.linkedin.kafka.cruisecontrol.analyzer.goals.ReplicaCapacityGoal,\ + com.linkedin.kafka.cruisecontrol.analyzer.goals.DiskCapacityGoal,\ + com.linkedin.kafka.cruisecontrol.analyzer.goals.NetworkInboundCapacityGoal,\ + com.linkedin.kafka.cruisecontrol.analyzer.goals.NetworkOutboundCapacityGoal,\ + com.linkedin.kafka.cruisecontrol.analyzer.goals.CpuCapacityGoal + # The minimum percentage of well monitored partitions out of all the + partitions + + min.valid.partition.ratio=0.95 + + # The balance threshold for CPU + + cpu.balance.threshold=1.3 + + # The balance threshold for disk + + disk.balance.threshold=1.1 + + # The balance threshold for network inbound utilization + + network.inbound.balance.threshold=1.3 + + # The balance threshold for network outbound utilization + + network.outbound.balance.threshold=1.3 + + # The balance threshold for the replica count + + replica.count.balance.threshold=1.1 + + leader.replica.count.balance.threshold=1.05 + + topic.replica.count.balance.threshold=1.5 + + topic.replica.count.balance.max.gap=20 + + topic.leader.replica.count.balance.threshold=1.10 + + topic.leader.replica.count.balance.min.gap=1 + + topic.leader.replica.count.balance.max.gap=5 + + # The capacity threshold for CPU in percentage + + cpu.capacity.threshold=0.8 + + # The capacity threshold for disk in percentage + + disk.capacity.threshold=0.8 + + # The capacity threshold for network inbound utilization in percentage + + network.inbound.capacity.threshold=0.8 + + # The capacity threshold for network outbound utilization in percentage + + network.outbound.capacity.threshold=0.8 + + # The threshold to define the cluster to be in a low CPU utilization state + + cpu.low.utilization.threshold=0.2 + + # The threshold to define the cluster to be in a low disk utilization + state + + disk.low.utilization.threshold=0.2 + + # The threshold to define the cluster to be in a low network inbound + utilization state + + network.inbound.low.utilization.threshold=0.2 + + # The threshold to define the cluster to be in a low disk utilization + state + + network.outbound.low.utilization.threshold=0.2 + + # The metric anomaly percentile upper threshold + + metric.anomaly.percentile.upper.threshold=90.0 + + # The metric anomaly percentile lower threshold + + metric.anomaly.percentile.lower.threshold=10.0 + + # How often should the cached proposal be expired and recalculated if + necessary + + proposal.expiration.ms=60000 + + # The maximum number of replicas that can reside on a broker at any given + time. + + max.replicas.per.broker=14000 + + # The number of threads to use for proposal candidate precomputing. + + num.proposal.precompute.threads=1 + + # the topics that should be excluded from the partition movement. + + #topics.excluded.from.partition.movement= + + # the topics that should have even number of leaders distriubted across + brokers + + topics.with.min.leaders.per.broker=__consumer_offsets + + # enable dynamic min leaders per topic computation + + min.topic.leaders.per.broker=0 + + # The impact of having one level higher goal priority on the relative + balancedness score. + + #goal.balancedness.priority.weight + + # The impact of strictness on the relative balancedness score. + + #goal.balancedness.strictness.weight + + # The maximum number of replicas that should reside on each broker to + consider a cluster as overprovisioned after balancing its replica + distribution. + + overprovisioned.max.replicas.per.broker=3000 + + # Configurations for the executor + + # ======================================= + + # If true, appropriate zookeeper Client { .. } entry required in jaas file + located at $base_dir/config/cruise_control_jaas.conf + + zookeeper.security.enabled=false + + # The max number of partitions to move in/out on a given broker at a given + time. + + num.concurrent.partition.movements.per.broker=5 + + # The max number of partitions to move between disks within a given broker + at a given time. + + num.concurrent.intra.broker.partition.movements=2 + + # The max number of leadership movement within the whole cluster at a + given time. + + num.concurrent.leader.movements=1000 + + # Default replica movement throttle. If not specified, movements + unthrottled by default. + + # Set to 50 MBps (in Bps) + + default.replication.throttle=52428800 + + # The interval between two execution progress checks. + + execution.progress.check.interval.ms=10000 + + default.replica.movement.strategies=com.linkedin.kafka.cruisecontrol.executor.strategy.PostponeUrpReplicaMovementStrategy,\ + com.linkedin.kafka.cruisecontrol.executor.strategy.PrioritizeMinIsrWithOfflineReplicasStrategy,\ + com.linkedin.kafka.cruisecontrol.executor.strategy.PrioritizeOneAboveMinIsrWithOfflineReplicasStrategy,\ + com.linkedin.kafka.cruisecontrol.executor.strategy.PrioritizeLargeReplicaMovementStrategy,\ + com.linkedin.kafka.cruisecontrol.executor.strategy.BaseReplicaMovementStrategy + # Configurations for anomaly detector + + # ======================================= + + # The goal violation notifier class + + anomaly.notifier.class=com.linkedin.kafka.cruisecontrol.detector.notifier.SelfHealingNotifier + + # The metric anomaly finder class + + metric.anomaly.finder.class= + + # The anomaly detection interval + + anomaly.detection.interval.ms=600000 + + # The goal violation to detect. + + anomaly.detection.goals=com.linkedin.kafka.cruisecontrol.analyzer.goals.MinTopicLeadersPerBrokerGoal,\ + com.linkedin.kafka.cruisecontrol.analyzer.goals.RackAwareDistributionGoal,\ + com.linkedin.kafka.cruisecontrol.analyzer.goals.ReplicaCapacityGoal,\ + com.linkedin.kafka.cruisecontrol.analyzer.goals.DiskCapacityGoal,\ + com.linkedin.kafka.cruisecontrol.analyzer.goals.NetworkInboundCapacityGoal,\ + com.linkedin.kafka.cruisecontrol.analyzer.goals.NetworkOutboundCapacityGoal,\ + com.linkedin.kafka.cruisecontrol.analyzer.goals.CpuCapacityGoal,\ + com.linkedin.kafka.cruisecontrol.analyzer.goals.DiskUsageDistributionGoal,\ + com.linkedin.kafka.cruisecontrol.analyzer.goals.TopicReplicaDistributionGoal,\ + com.linkedin.kafka.cruisecontrol.analyzer.goals.TopicLeaderReplicaDistributionGoal,\ + com.linkedin.kafka.cruisecontrol.analyzer.goals.LeaderReplicaDistributionGoal,\ + com.linkedin.kafka.cruisecontrol.analyzer.goals.ReplicaDistributionGoal + self.healing.goals=com.linkedin.kafka.cruisecontrol.analyzer.goals.MinTopicLeadersPerBrokerGoal,\ + com.linkedin.kafka.cruisecontrol.analyzer.goals.RackAwareDistributionGoal,\ + com.linkedin.kafka.cruisecontrol.analyzer.goals.ReplicaCapacityGoal,\ + com.linkedin.kafka.cruisecontrol.analyzer.goals.DiskCapacityGoal,\ + com.linkedin.kafka.cruisecontrol.analyzer.goals.NetworkInboundCapacityGoal,\ + com.linkedin.kafka.cruisecontrol.analyzer.goals.NetworkOutboundCapacityGoal,\ + com.linkedin.kafka.cruisecontrol.analyzer.goals.CpuCapacityGoal,\ + com.linkedin.kafka.cruisecontrol.analyzer.goals.DiskUsageDistributionGoal,\ + com.linkedin.kafka.cruisecontrol.analyzer.goals.TopicReplicaDistributionGoal,\ + com.linkedin.kafka.cruisecontrol.analyzer.goals.TopicLeaderReplicaDistributionGoal,\ + com.linkedin.kafka.cruisecontrol.analyzer.goals.LeaderReplicaDistributionGoal,\ + com.linkedin.kafka.cruisecontrol.analyzer.goals.ReplicaDistributionGoal + # The interested metrics for metric anomaly analyzer. + + metric.anomaly.analyzer.metrics=BROKER_PRODUCE_LOCAL_TIME_MS_MAX,\ + BROKER_PRODUCE_LOCAL_TIME_MS_MEAN,\ + BROKER_CONSUMER_FETCH_LOCAL_TIME_MS_MAX,\ + BROKER_CONSUMER_FETCH_LOCAL_TIME_MS_MEAN,\ + BROKER_FOLLOWER_FETCH_LOCAL_TIME_MS_MAX,\ + BROKER_FOLLOWER_FETCH_LOCAL_TIME_MS_MEAN,\ + BROKER_LOG_FLUSH_TIME_MS_MAX,\ + BROKER_LOG_FLUSH_TIME_MS_MEAN + # True if recently demoted brokers are excluded from optimizations during + broker failure self healing, false otherwise + + broker.failure.exclude.recently.demoted.brokers=true + + # True if recently removed brokers are excluded from optimizations during + broker failure self healing, false otherwise + + broker.failure.exclude.recently.removed.brokers=true + + # True if recently demoted brokers are excluded from optimizations during + goal violation self healing, false otherwise + + goal.violation.exclude.recently.demoted.brokers=true + + # True if recently removed brokers are excluded from optimizations during + goal violation self healing, false otherwise + + goal.violation.exclude.recently.removed.brokers=true + + # The file path to store the failed broker list. + + # This is to persist the broker failure time in case Cruise Control failed + and restarted when some brokers are down. + + failed.brokers.file.path=failedBrokers.txt + + # Topic config provider class + + topic.config.provider.class=com.linkedin.kafka.cruisecontrol.config.KafkaAdminTopicConfigProvider + + # The cluster configurations for the TopicConfigProvider + + cluster.configs.file=config/clusterConfigs.json + + # The maximum time in milliseconds to store the response and access + details of a completed kafka monitoring user task. + + completed.kafka.monitor.user.task.retention.time.ms=86400000 + + # The maximum time in milliseconds to store the response and access + details of a completed cruise control monitoring user task. + + completed.cruise.control.monitor.user.task.retention.time.ms=86400000 + + # The maximum time in milliseconds to store the response and access + details of a completed kafka admin user task. + + completed.kafka.admin.user.task.retention.time.ms=604800000 + + # The maximum time in milliseconds to store the response and access + details of a completed cruise control admin user task. + + completed.cruise.control.admin.user.task.retention.time.ms=604800000 + + # The fallback maximum time in milliseconds to store the response and + access details of a completed user task. + + completed.user.task.retention.time.ms=86400000 + + # The maximum time in milliseconds to retain the demotion history of + brokers. + + demotion.history.retention.time.ms=900000 + + # The maximum time in milliseconds to retain the removal history of + brokers. + + removal.history.retention.time.ms=900000 + + # The maximum number of completed kafka monitoring user tasks for which + the response and access details will be cached. + + max.cached.completed.kafka.monitor.user.tasks=20 + + # The maximum number of completed cruise control monitoring user tasks for + which the response and access details will be cached. + + max.cached.completed.cruise.control.monitor.user.tasks=20 + + # The maximum number of completed kafka admin user tasks for which the + response and access details will be cached. + + max.cached.completed.kafka.admin.user.tasks=30 + + # The maximum number of completed cruise control admin user tasks for + which the response and access details will be cached. + + max.cached.completed.cruise.control.admin.user.tasks=30 + + # The fallback maximum number of completed user tasks of certain type for + which the response and access details will be cached. + + max.cached.completed.user.tasks=25 + + # The maximum number of user tasks for concurrently running in async + endpoints across all users. + + max.active.user.tasks=1000 + + # Enable self healing for all anomaly detectors, unless the particular + anomaly detector is explicitly disabled + + self.healing.enabled=true + + # Enable self healing for broker failure detector + + #self.healing.broker.failure.enabled=true + + # Enable self healing for goal violation detector + + #self.healing.goal.violation.enabled=true + + # Enable self healing for metric anomaly detector + + self.healing.metric.anomaly.enabled=false + + # Enable self healing for disk failure detector + + #self.healing.disk.failure.enabled=true + + # Use the Kafka API to detect broker failures (and not old ZK interface) + + kafka.broker.failure.detection.enable=true + + # Defines the threshold to mark a broker as dead. If a non-empty broker + leaves the cluster at time T and did not join + + # the cluster before T + broker.failure.alert.threshold.ms, the broker is + defined as dead broker since T. + + # An alert will be triggered in this case. + + # Set to 15 minutes + + broker.failure.alert.threshold.ms=900000 + + # If self-healing is enabled and a broker is dead at T, + + # self-healing will be triggered at T + + broker.failure.self.healing.threshold.ms. + + # Set to 90 minutes + + broker.failure.self.healing.threshold.ms=5400000 + + # The multiplier applied to the threshold of distribution goals used by + goal.violation.detector. + + #goal.violation.distribution.threshold.multiplier=2.50 + + # The flag to indicate whether use of provisioner is enabled + + provisioner.enable=false + + # configurations for the webserver + + # ================================ + + # HTTP listen port + + webserver.http.port=9090 + + # HTTP listen address + + webserver.http.address=0.0.0.0 + + # Whether CORS support is enabled for API or not + + webserver.http.cors.enabled=false + + # Value for Access-Control-Allow-Origin + + webserver.http.cors.origin=http://localhost:8080/ + + # Value for Access-Control-Request-Method + + webserver.http.cors.allowmethods=OPTIONS,GET,POST + + # Headers that should be exposed to the Browser (Webapp) + + # This is a special header that is used by the + + # User Tasks subsystem and should be explicitly + + # Enabled when CORS mode is used as part of the + + # Admin Interface + + webserver.http.cors.exposeheaders=User-Task-ID + + # REST API default prefix + + # (dont forget the ending *) + + webserver.api.urlprefix=/kafkacruisecontrol/* + + # Location where the Cruise Control frontend is deployed + + webserver.ui.diskpath=./cruise-control-ui/dist/ + + # URL path prefix for UI + + # (dont forget the ending *) + + webserver.ui.urlprefix=/* + + # Time After which request is converted to Async + + webserver.request.maxBlockTimeMs=10000 + + # Default Session Expiry Period + + webserver.session.maxExpiryTimeMs=60000 + + # Session cookie path + + webserver.session.path=/ + + # Server Access Logs + + webserver.accesslog.enabled=true + + # Configurations for servlet + + # ========================== + + # Enable two-step verification for processing POST requests. + + two.step.verification.enabled=false + + # The maximum time in milliseconds to retain the requests in two-step + (verification) purgatory. + + two.step.purgatory.retention.time.ms=1209600000 + + # The maximum number of requests in two-step (verification) purgatory. + + two.step.purgatory.max.requests=25 + + + default.replication.throttle=104857600 + + max.replicas.per.broker=20000 + + remove.slow.broker=false + + self.healing.slow.broker.removal.enabled=true + + topics.with.min.leaders.per.broker=__consumer_offsets|aep_pipeline_kafka_monitor + cruiseControlAnnotations: + arc.ethos.adobe.net/ignore: 'true' + cruiseControlTaskSpec: + RetryDurationMinutes: 2147483647 + image: >- + docker-pipeline-upstream-mirror.dr-uw2.adobeitc.com/adobe/cruise-control:3.0.3-adbe-20260423 + log4jConfig: >- + rootLogger.level=INFO + + appenders=console + + appender.console.type=Console + + appender.console.name=STDOUT + + appender.console.layout.type=PatternLayout + + appender.console.layout.pattern=[%d] %p %replace{%msg}{[\r\n]}{|} + %throwable{separator(|)}(%c{2})%n + + rootLogger.appenderRefs=console + + rootLogger.appenderRef.console.ref=STDOUT + resourceRequirements: + limits: + cpu: '4' + memory: 2Gi + requests: + cpu: '4' + memory: 2Gi + serviceAccountName: kafka-cluster + tolerations: + - effect: NoSchedule + key: node.kubernetes.io/pipeline-workload + operator: Equal + value: 'true' + - effect: NoSchedule + key: ethos.corp.adobe.com/ethos-workload + operator: Equal + value: arm64 + - effect: NoSchedule + key: node.kubernetes.io/pipeline-workload + operator: Equal + value: io-optimized + topicConfig: + partitions: 6 + replicationFactor: 3 + disruptionBudget: + budget: '0' + create: true + envoyConfig: + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: node.kubernetes.io/pipeline-workload + operator: In + values: + - 'true' + - key: node.kubernetes.io/ethos-workload.arm64 + operator: In + values: + - 'true' + podAffinity: + preferredDuringSchedulingIgnoredDuringExecution: + - podAffinityTerm: + labelSelector: + matchExpressions: + - key: app + operator: In + values: + - kafka + topologyKey: kubernetes.io/hostname + weight: 1 + podAntiAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + - labelSelector: + matchExpressions: + - key: eListenerName + operator: In + values: + - plaintext-ingress-az1 + - plaintext-ingress-az2 + - plaintext-ingress-az3 + - external-ingress-az1 + - external-ingress-az2 + - external-ingress-az3 + topologyKey: kubernetes.io/hostname + annotations: + arc.ethos.adobe.net/ignore: 'true' + ops/certVersion: '11' + disruptionBudget: + budget: 25% + create: true + strategy: maxUnavailable + envoyCommandLineArgs: + concurrency: 2 + image: >- + docker-pipeline-upstream-mirror.dr-uw2.adobeitc.com/envoyproxy/envoy:v1.36.2 + replicas: 2 + resourceRequirements: + limits: + cpu: '1' + memory: 1Gi + requests: + cpu: '1' + memory: 1Gi + serviceAccountName: kafka-cluster + tolerations: + - effect: NoSchedule + key: node.kubernetes.io/pipeline-workload + operator: Equal + value: 'true' + - effect: NoSchedule + key: ethos.corp.adobe.com/ethos-workload + operator: Equal + value: arm64 + - effect: NoSchedule + key: node.kubernetes.io/pipeline-workload + operator: Equal + value: io-optimized + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: envoyingress + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + envs: + - name: POD_UID + valueFrom: + fieldRef: + fieldPath: metadata.uid + - name: KAFKA_LOG4J_LOGGERS + value: io.aiven.kafka.tieredstorage=DEBUG + - name: AZURE_CLIENT_ID + valueFrom: + secretKeyRef: + key: AZURE_CLIENT_ID + name: azure-secrets + - name: AZURE_CLIENT_SECRET + valueFrom: + secretKeyRef: + key: AZURE_CLIENT_SECRET + name: azure-secrets + - name: AZURE_TENANT_ID + valueFrom: + secretKeyRef: + key: AZURE_TENANT_ID + name: azure-secrets + headlessServiceEnabled: true + ingressController: envoy + istioIngressConfig: {} + kRaft: false + listenersConfig: + externalListeners: + - accessMethod: LoadBalancer + anyCastPort: 443 + config: + defaultIngressConfig: '' + ingressConfig: + corpingress-az1: + envoyConfig: + annotations: + broker_group: az1 + brokerHostnameTemplate: kafka-1-%id-va7-corp.dev.pipeline.adobedc.net + nodeSelector: + topology.kubernetes.io/zone: eastus2-1 + replicas: 1 + resourceRequirements: + limits: + cpu: 50m + memory: 100Mi + requests: + cpu: 10m + memory: 100Mi + hostnameOverride: kafka-1-az1-va7-corp.dev.pipeline.adobedc.net + serviceType: ClusterIP + corpingress-az2: + envoyConfig: + annotations: + broker_group: az2 + brokerHostnameTemplate: kafka-1-%id-va7-corp.dev.pipeline.adobedc.net + nodeSelector: + topology.kubernetes.io/zone: eastus2-2 + replicas: 1 + resourceRequirements: + limits: + cpu: 50m + memory: 100Mi + requests: + cpu: 10m + memory: 100Mi + hostnameOverride: kafka-1-az2-va7-corp.dev.pipeline.adobedc.net + serviceType: ClusterIP + corpingress-az3: + envoyConfig: + annotations: + broker_group: az3 + brokerHostnameTemplate: kafka-1-%id-va7-corp.dev.pipeline.adobedc.net + nodeSelector: + topology.kubernetes.io/zone: eastus2-3 + replicas: 1 + resourceRequirements: + limits: + cpu: 50m + memory: 100Mi + requests: + cpu: 10m + memory: 100Mi + hostnameOverride: kafka-1-az3-va7-corp.dev.pipeline.adobedc.net + serviceType: ClusterIP + containerPort: 29098 + externalStartingPort: -1 + name: corp + tlsSecretName: dev-adobedc-net-tls + type: sasl_plaintext + usedForInnerBrokerCommunication: false + - accessMethod: LoadBalancer + anyCastPort: 9097 + config: + defaultIngressConfig: '' + ingressConfig: + secureingress-az1: + envoyConfig: + affinity: + podAffinity: + preferredDuringSchedulingIgnoredDuringExecution: + - podAffinityTerm: + labelSelector: + matchExpressions: + - key: app + operator: In + values: + - kafka + topologyKey: kubernetes.io/hostname + weight: 1 + podAntiAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + - labelSelector: + matchExpressions: + - key: eListenerName + operator: In + values: + - secure-secureingress-az1 + - secure-secureingress-az2 + - secure-secureingress-az3 + topologyKey: kubernetes.io/hostname + annotations: + broker_group: az1 + brokerHostnameTemplate: kafka-1-%id-va7.dev.pipeline.adobedc.net + nodeSelector: + topology.kubernetes.io/zone: eastus2-1 + hostnameOverride: kafka-1-az1-va7-secure.dev.pipeline.adobedc.net + serviceType: ClusterIP + secureingress-az2: + envoyConfig: + affinity: + podAffinity: + preferredDuringSchedulingIgnoredDuringExecution: + - podAffinityTerm: + labelSelector: + matchExpressions: + - key: app + operator: In + values: + - kafka + topologyKey: kubernetes.io/hostname + weight: 1 + podAntiAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + - labelSelector: + matchExpressions: + - key: eListenerName + operator: In + values: + - secure-secureingress-az1 + - secure-secureingress-az2 + - secure-secureingress-az3 + topologyKey: kubernetes.io/hostname + annotations: + broker_group: az2 + brokerHostnameTemplate: kafka-1-%id-va7.dev.pipeline.adobedc.net + nodeSelector: + topology.kubernetes.io/zone: eastus2-2 + hostnameOverride: kafka-1-az2-va7-secure.dev.pipeline.adobedc.net + serviceType: ClusterIP + secureingress-az3: + envoyConfig: + affinity: + podAffinity: + preferredDuringSchedulingIgnoredDuringExecution: + - podAffinityTerm: + labelSelector: + matchExpressions: + - key: app + operator: In + values: + - kafka + topologyKey: kubernetes.io/hostname + weight: 1 + podAntiAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + - labelSelector: + matchExpressions: + - key: eListenerName + operator: In + values: + - secure-secureingress-az1 + - secure-secureingress-az2 + - secure-secureingress-az3 + topologyKey: kubernetes.io/hostname + annotations: + broker_group: az3 + brokerHostnameTemplate: kafka-1-%id-va7.dev.pipeline.adobedc.net + nodeSelector: + topology.kubernetes.io/zone: eastus2-3 + hostnameOverride: kafka-1-az3-va7-secure.dev.pipeline.adobedc.net + serviceType: ClusterIP + containerPort: 29095 + externalStartingPort: -1 + name: secure + tlsSecretName: dev-adobedc-net-tls + type: sasl_plaintext + usedForInnerBrokerCommunication: false + internalListeners: + - containerPort: 29092 + internalStartingPort: 0 + name: internal + type: plaintext + usedForInnerBrokerCommunication: true + - containerPort: 29093 + internalStartingPort: 0 + name: controller + type: plaintext + usedForControllerCommunication: true + usedForInnerBrokerCommunication: false + - containerPort: 29096 + internalStartingPort: 0 + name: sasl_plain + type: sasl_plaintext + usedForInnerBrokerCommunication: false + monitoringConfig: + jmxImage: >- + docker-pipeline-upstream-mirror.dr-uw2.adobeitc.com/amuraru/jmx-javaagent:0.19.1-multi + pathToJar: /jmx_prometheus_javaagent.jar + oneBrokerPerNode: true + propagateLabels: true + readOnlyConfig: >- + __do_no_edit_diskSize=1048576 + + authorizer.class.name=com.adobe.core.pipeline.kafka.security.server.auth.CustomAclAuthorizerWithAccessTrackingMetrics + + auto.create.topics.enable=false + + auto.leader.rebalance.enable=true + + background.threads=20 + + broker.id.generation.enable=false + + cruise.control.metrics.reporter.acks=1 + + cruise.control.metrics.topic=__CruiseControlMetrics + + cruise.control.metrics.topic.min.insync.replicas=1 + + default.replication.factor=3 + + inter.broker.protocol.version=3.9 + + leader.imbalance.per.broker.percentage=0 + + listener.name.corp.oauthbearer.sasl.jaas.config=org.apache.kafka.common.security.oauthbearer.OAuthBearerLoginModule + required; + + listener.name.corp.oauthbearer.sasl.server.callback.handler.class=com.adobe.core.pipeline.kafka.security.server.auth.ImsValidatingCallbackHandler + + listener.name.sasl_plain.oauthbearer.sasl.jaas.config=org.apache.kafka.common.security.oauthbearer.OAuthBearerLoginModule + required; + + listener.name.sasl_plain.oauthbearer.sasl.server.callback.handler.class=com.adobe.core.pipeline.kafka.security.server.auth.ImsValidatingCallbackHandler + + listener.name.secure.oauthbearer.sasl.jaas.config=org.apache.kafka.common.security.oauthbearer.OAuthBearerLoginModule + required; + + listener.name.secure.oauthbearer.sasl.server.callback.handler.class=com.adobe.core.pipeline.kafka.security.server.auth.ImsValidatingCallbackHandler + + log.message.timestamp.after.max.ms=86400000 + + log.segment.bytes=536870912 + + max.incremental.fetch.session.cache.slots=1000 + + min.insync.replicas=2 + + num.io.threads=48 + + num.network.threads=40 + + num.partitions=10 + + num.recovery.threads.per.data.dir=8 + + num.replica.fetchers=4 + + offsets.commit.required.acks=1 + + principal.builder.class=com.adobe.core.pipeline.kafka.security.server.auth.PipelinePrincipalBuilder + + queued.max.requests=1000 + + remote.fetch.max.wait.ms=1000 + + remote.log.metadata.manager.class.name=org.apache.kafka.server.log.remote.metadata.storage.TopicBasedRemoteLogMetadataManager + + remote.log.metadata.manager.listener.name=INTERNAL + + remote.log.storage.manager.class.name=io.aiven.kafka.tieredstorage.RemoteStorageManager + + remote.log.storage.manager.class.path=/opt/kafka/libs/extensions/core/*:/opt/kafka/libs/extensions/azure/* + + remote.log.storage.system.enable=true + + replica.fetch.max.bytes=5242880 + + replica.lag.time.max.ms=15000 + + replica.socket.receive.buffer.bytes=-1 + + rsm.config.chunk.size=4194304 + + rsm.config.compression.enabled=true + + rsm.config.compression.heuristic.enabled=true + + rsm.config.custom.metadata.fields.include=REMOTE_SIZE + + rsm.config.encryption.enabled=false + + rsm.config.fetch.chunk.cache.class=io.aiven.kafka.tieredstorage.fetch.cache.DiskChunkCache + + rsm.config.fetch.chunk.cache.path=/root + + rsm.config.fetch.chunk.cache.prefetch.max.size=16777216 + + rsm.config.fetch.chunk.cache.retention.ms=600000 + + rsm.config.fetch.chunk.cache.size=26843545600 + + rsm.config.storage.azure.account.name=va7devsava7 + + rsm.config.storage.azure.container.name=kafka + + rsm.config.storage.azure.upload.block.size=67108864 + + rsm.config.storage.backend.class=io.aiven.kafka.tieredstorage.storage.azure.AzureBlobStorage + + rsm.config.upload.rate.limit.bytes.per.second=209715200 + + sasl.enabled.mechanisms=OAUTHBEARER + + sasl.ims.certificate.location=static.adobelogin.com/keys/nonprod/ + + sasl.ims.url=https://ims-na1-stg1.adobelogin.com/ + + socket.listen.backlog.size=1024 + + socket.receive.buffer.bytes=-1 + + socket.send.buffer.bytes=-1 + + super.users=Broker:ANONYMOUS + + zookeeper.connection.timeout.ms=18000 + removeUnusedIngressResources: false + rollingUpgradeConfig: + concurrentBrokerRestartCountPerRack: 2 + failureThreshold: 2 + taintedBrokersSelector: + matchExpressions: + - key: shredder.ethos.adobe.net/upgrade-status + operator: In + values: + - parked + zkAddresses: + - 'pipeline-zookeeper-client:2181' + zkPath: /kafka +status: + alertCount: 0 + brokersState: + '101': + configurationBackup: >- + H4sIAAAAAAAA/1TNMY+DMAwF4P/yZjgRxkg33YBuOF2ljm2HNHGRRWtHDgwt4r93gKGM/vz03gxO8K5xFa6mA9mPyo37znTK8AgvhwpGIf3L/bn+4Lfol4U4fFMo41Ta2p0F+xL4ebt/pTcq5S/kzNLDn8Cr1OtAVMt7KRQno0+7VBjJHixhZJXOQqQDGWs6UlRJBd61zbK8AwAA///DeF/e0QAAAA== + configurationState: ConfigInSync + externalListenerConfigNames: + - ingress-az1 + - corpingress-az1 + - secureingress-az1 + gracefulActionState: + cruiseControlState: GracefulUpscaleSucceeded + volumeStates: + /csi-kafka-logs2: + cruiseControlOperationReference: + name: pipeline-kafka-rebalance-55sbz + cruiseControlVolumeState: GracefulDiskRebalanceSucceeded + image: >- + docker-pipeline-upstream-mirror.dr-uw2.adobeitc.com/adobe/koperator/kafka:2.13-3.9.2-jdk21.0.11 + perBrokerConfigurationState: PerBrokerConfigInSync + rackAwarenessState: '' + version: 3.9.2 + '102': + configurationBackup: >- + H4sIAAAAAAAA/1TNMY+DMAwF4P/yZjgRxkg33YBuOF2ljm2HNHGRRWtHDgwt4r93gKGM/vz03gxO8K5pK1xNB7IflRv3nemU4RFeDhWMQvqX+3P9wW/RLwtx+KZQxqm0tTsL9iXw83b/Sm9Uyl/ImaWHP4FXqdeBqJb3UihORp92qTCSPVjCyCqdhUgHMtZ0pKiSCrxrm2V5BwAA//95cqZm0QAAAA== + configurationState: ConfigInSync + externalListenerConfigNames: + - ingress-az1 + - corpingress-az1 + - secureingress-az1 + gracefulActionState: + cruiseControlState: GracefulUpscaleSucceeded + volumeStates: + /csi-kafka-logs2: + cruiseControlOperationReference: + name: pipeline-kafka-rebalance-55sbz + cruiseControlVolumeState: GracefulDiskRebalanceSucceeded + image: >- + docker-pipeline-upstream-mirror.dr-uw2.adobeitc.com/adobe/koperator/kafka:2.13-3.9.2-jdk21.0.11 + perBrokerConfigurationState: PerBrokerConfigInSync + rackAwarenessState: '' + version: 3.9.2 + '201': + configurationBackup: >- + H4sIAAAAAAAA/1TNMY+DMAwF4P/yZjhBxkg33YBuOF2ljm2HNHGRRWtHDgwt4r93gKGM/vz03gxO8K5pK1xNB7IflRv3nemU4RFeDhWMQvqX+3P9wW/RLwtx+KZQxqm42p0F+xL4ebt/pTcq5S/kzNLDn8Cr1OtAVMt7KRQno0+7VBjJHixhZJXOQqQDGWs6UlRJBb51zbK8AwAA//9KqJuw0QAAAA== + configurationState: ConfigInSync + externalListenerConfigNames: + - ingress-az2 + - corpingress-az2 + - secureingress-az2 + gracefulActionState: + cruiseControlState: GracefulUpscaleSucceeded + volumeStates: + /csi-kafka-logs2: + cruiseControlOperationReference: + name: pipeline-kafka-rebalance-55sbz + cruiseControlVolumeState: GracefulDiskRebalanceSucceeded + image: >- + docker-pipeline-upstream-mirror.dr-uw2.adobeitc.com/adobe/koperator/kafka:2.13-3.9.2-jdk21.0.11 + perBrokerConfigurationState: PerBrokerConfigInSync + rackAwarenessState: '' + version: 3.9.2 + '202': + configurationBackup: >- + H4sIAAAAAAAA/1TNMY/CMAwF4P/y5vbUyxjpphsqBgQSIzCExFRWwY6cdoCq/52hHejoz0/vTeAE7xpX4Wbak/2r3LlrTccMj/B2qGAU0kEer+UHv0Z/LMT+j0IZxuJqdxFsS+Cn9d5JZ1TKPuTM0sGfwYvUy0BUy1spFEejb7tWGMieLGFgldZCpCMZazpRVEkF/tc18/wJAAD///CiYgjRAAAA + configurationState: ConfigInSync + externalListenerConfigNames: + - ingress-az2 + - corpingress-az2 + - secureingress-az2 + gracefulActionState: + cruiseControlState: GracefulUpscaleSucceeded + volumeStates: + /csi-kafka-logs2: + cruiseControlOperationReference: + name: pipeline-kafka-rebalance-55sbz + cruiseControlVolumeState: GracefulDiskRebalanceSucceeded + image: >- + docker-pipeline-upstream-mirror.dr-uw2.adobeitc.com/adobe/koperator/kafka:2.13-3.9.2-jdk21.0.11 + perBrokerConfigurationState: PerBrokerConfigInSync + rackAwarenessState: '' + version: 3.9.2 + '301': + configurationBackup: >- + H4sIAAAAAAAA/1TNvY7CQAwE4HeZOjnlp1vpqiuiK06HRAkUy66JrIC98iYFRHl3iqQgpT+PZmZwhGurusDVdCD7Ublx35lOCQ7+1aKAkY//cn+uP7gt+mU+DN/k8zjlpmzPgn0J3Lzdv9Ib5fznU2Lp4U7gVcp1IKilvWQKk9GnXQqMZA8WP7JKZz7QgYw1HimoxAxXN9WyvAMAAP//zecnldEAAAA= + configurationState: ConfigInSync + externalListenerConfigNames: + - ingress-az3 + - corpingress-az3 + - secureingress-az3 + gracefulActionState: + cruiseControlState: GracefulUpscaleSucceeded + volumeStates: + /csi-kafka-logs2: + cruiseControlOperationReference: + name: pipeline-kafka-rebalance-55sbz + cruiseControlVolumeState: GracefulDiskRebalanceSucceeded + image: >- + docker-pipeline-upstream-mirror.dr-uw2.adobeitc.com/adobe/koperator/kafka:2.13-3.9.2-jdk21.0.11 + perBrokerConfigurationState: PerBrokerConfigInSync + rackAwarenessState: '' + version: 3.9.2 + '302': + configurationBackup: >- + H4sIAAAAAAAA/1TNMY+CQBAF4P/yarhw0G1y1RXEwmhiqRbr7kgm6MxmFgol/HcLKKScb17em8ARrqnqAjfTnuxf5c5dazomOPh3gwJGPh7k8Vp+cGv0x3zo/8jnYcx12VwE2xK4ab130hnlvPcpsXRwZ/Ai5TIQ1NJWMoXR6NuuBQayJ4sfWKU1H+hIxhpPFFRihvutq3n+BAAA//937d4t0QAAAA== + configurationState: ConfigInSync + externalListenerConfigNames: + - ingress-az3 + - corpingress-az3 + - secureingress-az3 + gracefulActionState: + cruiseControlState: GracefulUpscaleSucceeded + volumeStates: + /csi-kafka-logs2: + cruiseControlOperationReference: + name: pipeline-kafka-rebalance-55sbz + cruiseControlVolumeState: GracefulDiskRebalanceSucceeded + image: >- + docker-pipeline-upstream-mirror.dr-uw2.adobeitc.com/adobe/koperator/kafka:2.13-3.9.2-jdk21.0.11 + perBrokerConfigurationState: PerBrokerConfigInSync + rackAwarenessState: '' + version: 3.9.2 + cruiseControlTopicStatus: CruiseControlTopicReady + listenerStatuses: + externalListeners: + corp: + - address: 'kafka-1-az1-va7-corp.dev.pipeline.adobedc.net:443' + name: any-broker-corpingress-az1 + - address: 'kafka-1-az2-va7-corp.dev.pipeline.adobedc.net:443' + name: any-broker-corpingress-az2 + - address: 'kafka-1-az3-va7-corp.dev.pipeline.adobedc.net:443' + name: any-broker-corpingress-az3 + - address: 'kafka-1-101-va7-corp.dev.pipeline.adobedc.net:443' + name: broker-101 + - address: 'kafka-1-102-va7-corp.dev.pipeline.adobedc.net:443' + name: broker-102 + - address: 'kafka-1-201-va7-corp.dev.pipeline.adobedc.net:443' + name: broker-201 + - address: 'kafka-1-202-va7-corp.dev.pipeline.adobedc.net:443' + name: broker-202 + - address: 'kafka-1-301-va7-corp.dev.pipeline.adobedc.net:443' + name: broker-301 + - address: 'kafka-1-302-va7-corp.dev.pipeline.adobedc.net:443' + name: broker-302 + secure: + - address: 'kafka-1-az1-va7-secure.dev.pipeline.adobedc.net:9097' + name: any-broker-secureingress-az1 + - address: 'kafka-1-az2-va7-secure.dev.pipeline.adobedc.net:9097' + name: any-broker-secureingress-az2 + - address: 'kafka-1-az3-va7-secure.dev.pipeline.adobedc.net:9097' + name: any-broker-secureingress-az3 + - address: 'kafka-1-101-va7.dev.pipeline.adobedc.net:9097' + name: broker-101 + - address: 'kafka-1-102-va7.dev.pipeline.adobedc.net:9097' + name: broker-102 + - address: 'kafka-1-201-va7.dev.pipeline.adobedc.net:9097' + name: broker-201 + - address: 'kafka-1-202-va7.dev.pipeline.adobedc.net:9097' + name: broker-202 + - address: 'kafka-1-301-va7.dev.pipeline.adobedc.net:9097' + name: broker-301 + - address: 'kafka-1-302-va7.dev.pipeline.adobedc.net:9097' + name: broker-302 + internalListeners: + internal: + - address: >- + pipeline-kafka-headless.ns-team-aep-pipeline-kafka-1-dev.svc.cluster.local:29092 + name: headless + - address: >- + pipeline-kafka-101.pipeline-kafka-headless.ns-team-aep-pipeline-kafka-1-dev.svc.cluster.local:29092 + name: broker-101 + - address: >- + pipeline-kafka-102.pipeline-kafka-headless.ns-team-aep-pipeline-kafka-1-dev.svc.cluster.local:29092 + name: broker-102 + - address: >- + pipeline-kafka-201.pipeline-kafka-headless.ns-team-aep-pipeline-kafka-1-dev.svc.cluster.local:29092 + name: broker-201 + - address: >- + pipeline-kafka-202.pipeline-kafka-headless.ns-team-aep-pipeline-kafka-1-dev.svc.cluster.local:29092 + name: broker-202 + - address: >- + pipeline-kafka-301.pipeline-kafka-headless.ns-team-aep-pipeline-kafka-1-dev.svc.cluster.local:29092 + name: broker-301 + - address: >- + pipeline-kafka-302.pipeline-kafka-headless.ns-team-aep-pipeline-kafka-1-dev.svc.cluster.local:29092 + name: broker-302 + sasl_plain: + - address: >- + pipeline-kafka-headless.ns-team-aep-pipeline-kafka-1-dev.svc.cluster.local:29096 + name: headless + - address: >- + pipeline-kafka-101.pipeline-kafka-headless.ns-team-aep-pipeline-kafka-1-dev.svc.cluster.local:29096 + name: broker-101 + - address: >- + pipeline-kafka-102.pipeline-kafka-headless.ns-team-aep-pipeline-kafka-1-dev.svc.cluster.local:29096 + name: broker-102 + - address: >- + pipeline-kafka-201.pipeline-kafka-headless.ns-team-aep-pipeline-kafka-1-dev.svc.cluster.local:29096 + name: broker-201 + - address: >- + pipeline-kafka-202.pipeline-kafka-headless.ns-team-aep-pipeline-kafka-1-dev.svc.cluster.local:29096 + name: broker-202 + - address: >- + pipeline-kafka-301.pipeline-kafka-headless.ns-team-aep-pipeline-kafka-1-dev.svc.cluster.local:29096 + name: broker-301 + - address: >- + pipeline-kafka-302.pipeline-kafka-headless.ns-team-aep-pipeline-kafka-1-dev.svc.cluster.local:29096 + name: broker-302 + rollingUpgradeStatus: + errorCount: 0 + lastSuccess: '2026-06-11 14:52:22' + state: ClusterRunning diff --git a/pkg/resources/cruisecontrol/service.go b/pkg/resources/cruisecontrol/service.go index d868eacf4..18eb10731 100644 --- a/pkg/resources/cruisecontrol/service.go +++ b/pkg/resources/cruisecontrol/service.go @@ -26,7 +26,7 @@ import ( ) func (r *Reconciler) service() runtime.Object { - return &corev1.Service{ + svc := &corev1.Service{ ObjectMeta: templates.ObjectMeta( fmt.Sprintf(serviceNameTemplate, r.KafkaCluster.Name), apiutil.MergeLabels(ccLabelSelector(r.KafkaCluster.Name), r.KafkaCluster.Labels), @@ -34,6 +34,7 @@ func (r *Reconciler) service() runtime.Object { ), Spec: corev1.ServiceSpec{ Selector: ccLabelSelector(r.KafkaCluster.Name), + Type: corev1.ServiceTypeClusterIP, Ports: []corev1.ServicePort{ { Name: "cc", @@ -50,4 +51,10 @@ func (r *Reconciler) service() runtime.Object { }, }, } + + if r.KafkaCluster.Spec.LocalDebugEnabled { + svc.Spec.Type = corev1.ServiceTypeLoadBalancer + } + + return svc } diff --git a/pkg/resources/kafka/allBrokerService.go b/pkg/resources/kafka/allBrokerService.go index ecfdd5b7b..b5fa40239 100644 --- a/pkg/resources/kafka/allBrokerService.go +++ b/pkg/resources/kafka/allBrokerService.go @@ -39,7 +39,7 @@ func (r *Reconciler) allBrokerService() runtime.Object { usedPorts = append(usedPorts, generateServicePortForAdditionalPorts(r.KafkaCluster.Spec.AdditionalPorts)...) - return &corev1.Service{ + svc := &corev1.Service{ ObjectMeta: templates.ObjectMetaWithAnnotations( fmt.Sprintf(kafkautils.AllBrokerServiceTemplate, r.KafkaCluster.GetName()), apiutil.LabelsForKafka(r.KafkaCluster.GetName()), @@ -52,4 +52,10 @@ func (r *Reconciler) allBrokerService() runtime.Object { Ports: usedPorts, }, } + + if r.KafkaCluster.Spec.LocalDebugEnabled { + svc.Spec.Type = corev1.ServiceTypeLoadBalancer + } + + return svc } diff --git a/pkg/resources/kafka/kafka.go b/pkg/resources/kafka/kafka.go index eec273a7a..908c5be40 100644 --- a/pkg/resources/kafka/kafka.go +++ b/pkg/resources/kafka/kafka.go @@ -834,6 +834,7 @@ func (r *Reconciler) reconcileKafkaPod(log logr.Logger, desiredPod *corev1.Pod, return errorfactory.New(errorfactory.APIFailure{}, err, "getting resource failed", "kind", desiredType) } switch { + //initial run - Create Pod case len(podList.Items) == 0: if err := patch.DefaultAnnotator.SetLastAppliedAnnotation(desiredPod); err != nil { return errors.WrapIf(err, "could not apply last state to annotation") @@ -957,6 +958,13 @@ func (r *Reconciler) handleRollingUpgrade(log logr.Logger, desiredPod, currentPo } desiredPod.Spec.Tolerations = uniqueTolerations } + // Ignore CPU/memory request diffs — changing requests does not require a pod restart. + if r.KafkaCluster.Spec.ScaleOpsEnabled { + syncResourceRequests(desiredPod, currentPod) + // If current pod had affinities created by ScaleOps, we need to sync them to desiredPod, + // otherwise they will be removed and cause pod restart + syncScaleOpsAffinities(desiredPod, currentPod) + } // Check if the resource actually updated or if labels match TaintedBrokersSelector patchResult, err := patch.DefaultPatchMaker.Calculate(currentPod, desiredPod) switch { diff --git a/pkg/resources/kafka/service.go b/pkg/resources/kafka/service.go index fed334635..84e7e5c79 100644 --- a/pkg/resources/kafka/service.go +++ b/pkg/resources/kafka/service.go @@ -46,7 +46,7 @@ func (r *Reconciler) service(id int32, _ *v1beta1.BrokerConfig) runtime.Object { Protocol: corev1.ProtocolTCP, }) - return &corev1.Service{ + svc := &corev1.Service{ ObjectMeta: templates.ObjectMetaWithAnnotations(fmt.Sprintf("%s-%d", r.KafkaCluster.Name, id), apiutil.MergeLabels( apiutil.LabelsForKafka(r.KafkaCluster.Name), @@ -61,4 +61,8 @@ func (r *Reconciler) service(id int32, _ *v1beta1.BrokerConfig) runtime.Object { Ports: usedPorts, }, } + if r.KafkaCluster.Spec.LocalDebugEnabled { + svc.Spec.Type = corev1.ServiceTypeLoadBalancer + } + return svc } diff --git a/pkg/resources/kafka/service_test.go b/pkg/resources/kafka/service_test.go new file mode 100644 index 000000000..f5a16448a --- /dev/null +++ b/pkg/resources/kafka/service_test.go @@ -0,0 +1,230 @@ +// Copyright © 2023 Cisco Systems, Inc. and/or its affiliates +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package kafka + +import ( + "testing" + + "github.com/stretchr/testify/require" + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/util/intstr" + + "go.uber.org/mock/gomock" + + apiutil "github.com/banzaicloud/koperator/api/util" + "github.com/banzaicloud/koperator/api/v1beta1" + "github.com/banzaicloud/koperator/pkg/resources" + mocks "github.com/banzaicloud/koperator/pkg/resources/kafka/mocks" + "github.com/banzaicloud/koperator/pkg/util" +) + +func TestService(t *testing.T) { + testCases := []struct { + testName string + r *Reconciler + expectedService *corev1.Service + }{ + { + testName: "Basic Internal And External Service", + r: &Reconciler{ + Reconciler: resources.Reconciler{ + KafkaCluster: &v1beta1.KafkaCluster{ + ObjectMeta: metav1.ObjectMeta{ + Name: "kafka", + Namespace: "kafka", + }, + Spec: v1beta1.KafkaClusterSpec{ + LocalDebugEnabled: false, + KRaftMode: false, + ListenersConfig: v1beta1.ListenersConfig{ + InternalListeners: []v1beta1.InternalListenerConfig{ + { + CommonListenerSpec: v1beta1.CommonListenerSpec{ + Name: "internal", + ContainerPort: 29092, + Type: "plaintext", + UsedForInnerBrokerCommunication: true, + }, + }, + }, + ExternalListeners: []v1beta1.ExternalListenerConfig{ + { + CommonListenerSpec: v1beta1.CommonListenerSpec{ + Name: "plaintext", + ContainerPort: 29094, + Type: "plaintext", + UsedForInnerBrokerCommunication: false, + }, + AccessMethod: corev1.ServiceTypeLoadBalancer, + }, + }, + }, + }, + }, + }, + }, + expectedService: &corev1.Service{ + ObjectMeta: metav1.ObjectMeta{ + Name: "kafka-1", + Namespace: "kafka", + Labels: map[string]string{"app": "kafka", "brokerId": "1", "kafka_cr": "kafka"}, + Annotations: map[string]string{}, + OwnerReferences: []metav1.OwnerReference{ + { + APIVersion: "", + Kind: "", + Name: "kafka", + UID: "", + Controller: util.BoolPointer(true), + BlockOwnerDeletion: util.BoolPointer(true), + }, + }, + }, + Spec: corev1.ServiceSpec{ + Type: corev1.ServiceTypeClusterIP, + SessionAffinity: corev1.ServiceAffinityNone, + Selector: apiutil.MergeLabels(apiutil.LabelsForKafka("kafka"), map[string]string{v1beta1.BrokerIdLabelKey: "1"}), + Ports: []corev1.ServicePort{ + { + Name: "tcp-internal", + Protocol: "TCP", + Port: 29092, + TargetPort: intstr.FromInt(29092), + NodePort: 0, + }, + { + Name: "tcp-plaintext", + Protocol: "TCP", + Port: 29094, + TargetPort: intstr.FromInt(29094), + NodePort: 0, + }, + { + Name: "metrics", + Protocol: "TCP", + Port: 9020, + TargetPort: intstr.FromInt(9020), + NodePort: 0, + }, + }, + ClusterIP: "", + PublishNotReadyAddresses: false, + }, + }, + }, + { + testName: "Basic Internal And External Service", + r: &Reconciler{ + Reconciler: resources.Reconciler{ + KafkaCluster: &v1beta1.KafkaCluster{ + ObjectMeta: metav1.ObjectMeta{ + Name: "kafka", + Namespace: "kafka", + }, + Spec: v1beta1.KafkaClusterSpec{ + LocalDebugEnabled: true, + KRaftMode: false, + ListenersConfig: v1beta1.ListenersConfig{ + InternalListeners: []v1beta1.InternalListenerConfig{ + { + CommonListenerSpec: v1beta1.CommonListenerSpec{ + Name: "internal", + ContainerPort: 29092, + Type: "plaintext", + UsedForInnerBrokerCommunication: true, + }, + }, + }, + ExternalListeners: []v1beta1.ExternalListenerConfig{ + { + CommonListenerSpec: v1beta1.CommonListenerSpec{ + Name: "plaintext", + ContainerPort: 29094, + Type: "plaintext", + UsedForInnerBrokerCommunication: false, + }, + AccessMethod: corev1.ServiceTypeLoadBalancer, + }, + }, + }, + }, + }, + }, + }, + expectedService: &corev1.Service{ + ObjectMeta: metav1.ObjectMeta{ + Name: "kafka-1", + Namespace: "kafka", + Labels: map[string]string{"app": "kafka", "brokerId": "1", "kafka_cr": "kafka"}, + Annotations: map[string]string{}, + OwnerReferences: []metav1.OwnerReference{ + { + APIVersion: "", + Kind: "", + Name: "kafka", + UID: "", + Controller: util.BoolPointer(true), + BlockOwnerDeletion: util.BoolPointer(true), + }, + }, + }, + Spec: corev1.ServiceSpec{ + Type: corev1.ServiceTypeLoadBalancer, + SessionAffinity: corev1.ServiceAffinityNone, + Selector: apiutil.MergeLabels(apiutil.LabelsForKafka("kafka"), map[string]string{v1beta1.BrokerIdLabelKey: "1"}), + Ports: []corev1.ServicePort{ + { + Name: "tcp-internal", + Protocol: "TCP", + Port: 29092, + TargetPort: intstr.FromInt(29092), + NodePort: 0, + }, + { + Name: "tcp-plaintext", + Protocol: "TCP", + Port: 29094, + TargetPort: intstr.FromInt(29094), + NodePort: 0, + }, + { + Name: "metrics", + Protocol: "TCP", + Port: 9020, + TargetPort: intstr.FromInt(9020), + NodePort: 0, + }, + }, + ClusterIP: "", + PublishNotReadyAddresses: false, + }, + }, + }, + } + mockCtrl := gomock.NewController(t) + + for _, test := range testCases { + t.Run(test.testName, func(t *testing.T) { + mockClient := mocks.NewMockClient(mockCtrl) + mockClient.EXPECT().Get(gomock.Any(), gomock.Any(), gomock.Any()).Return(nil).AnyTimes() + r := test.r + + actualService := r.service(1, nil) + + require.Equal(t, test.expectedService, actualService) + }) + } +} diff --git a/pkg/resources/kafka/util.go b/pkg/resources/kafka/util.go index cfafbae14..d3c9c8c67 100644 --- a/pkg/resources/kafka/util.go +++ b/pkg/resources/kafka/util.go @@ -18,9 +18,11 @@ package kafka import ( "encoding/base64" "fmt" + "reflect" "sort" "github.com/google/uuid" + corev1 "k8s.io/api/core/v1" "github.com/banzaicloud/koperator/api/v1beta1" ) @@ -73,3 +75,189 @@ func generateRandomClusterID() string { randomUUID := uuid.New() return base64.URLEncoding.EncodeToString(randomUUID[:]) } + +// syncResourceRequests overwrites CPU and memory requests in desiredPod's containers +// with the values from currentPod so that request-only changes do not trigger a pod restart. +func syncResourceRequests(desiredPod, currentPod *corev1.Pod) { + syncContainerResourceRequests(desiredPod.Spec.Containers, currentPod.Spec.Containers) + syncContainerResourceRequests(desiredPod.Spec.InitContainers, currentPod.Spec.InitContainers) +} + +func syncContainerResourceRequests(desired, current []corev1.Container) { + index := make(map[string]corev1.ResourceList, len(current)) + for _, c := range current { + index[c.Name] = c.Resources.Requests + } + for i := range desired { + c := &desired[i] + reqs, ok := index[c.Name] + if !ok { + continue + } + if c.Resources.Requests == nil { + c.Resources.Requests = make(corev1.ResourceList) + } + for _, res := range []corev1.ResourceName{corev1.ResourceCPU, corev1.ResourceMemory} { + if val, exists := reqs[res]; exists { + c.Resources.Requests[res] = val + } else { + delete(c.Resources.Requests, res) + } + } + } +} + +// syncScaleOpsAffinities syncs all scale ops related affinities from the current pod to the desired pod. +// This includes pod affinities with "scaleops.sh/managed-unevictable" label selector +// and node affinities with "scaleops.sh/node-packing=true" selector. +func syncScaleOpsAffinities(desiredPod, currentPod *corev1.Pod) { + syncScaleOpsPodAffinities(desiredPod, currentPod) + syncScaleOpsNodeAffinities(desiredPod, currentPod) +} + +// syncScaleOpsPodAffinities syncs preferred pod affinities with "scaleops.sh/managed-unevictable" +// label selector from current pod to desired pod. +func syncScaleOpsPodAffinities(desiredPod, currentPod *corev1.Pod) { + if currentPod.Spec.Affinity == nil || currentPod.Spec.Affinity.PodAffinity == nil { + return + } + + currentPodAffinity := currentPod.Spec.Affinity.PodAffinity + + // Filter preferred pod affinities with "scaleops.sh/managed-unevictable" label selector + var scaleOpsPreferredAffinities []corev1.WeightedPodAffinityTerm + if currentPodAffinity.PreferredDuringSchedulingIgnoredDuringExecution != nil { + for _, term := range currentPodAffinity.PreferredDuringSchedulingIgnoredDuringExecution { + if term.PodAffinityTerm.LabelSelector != nil { + hasScaleOpsLabel := false + + // Check MatchExpressions + for _, requirement := range term.PodAffinityTerm.LabelSelector.MatchExpressions { + if requirement.Key == "scaleops.sh/managed-unevictable" { + hasScaleOpsLabel = true + break + } + } + + // Check MatchLabels if not found in MatchExpressions + if !hasScaleOpsLabel { + if _, exists := term.PodAffinityTerm.LabelSelector.MatchLabels["scaleops.sh/managed-unevictable"]; exists { + hasScaleOpsLabel = true + } + } + + if hasScaleOpsLabel { + scaleOpsPreferredAffinities = append(scaleOpsPreferredAffinities, term) + } + } + } + } + + // If we found any scale ops preferred affinities, add them to the desired pod + if len(scaleOpsPreferredAffinities) > 0 { + if desiredPod.Spec.Affinity == nil { + desiredPod.Spec.Affinity = &corev1.Affinity{} + } + if desiredPod.Spec.Affinity.PodAffinity == nil { + desiredPod.Spec.Affinity.PodAffinity = &corev1.PodAffinity{} + } + + // Merge scale ops preferred affinities, avoiding duplicates + existingTerms := desiredPod.Spec.Affinity.PodAffinity.PreferredDuringSchedulingIgnoredDuringExecution + for _, newTerm := range scaleOpsPreferredAffinities { + // Check if this term already exists + found := false + for _, existing := range existingTerms { + if reflect.DeepEqual(existing.PodAffinityTerm, newTerm.PodAffinityTerm) && existing.Weight == newTerm.Weight { + found = true + break + } + } + if !found { + existingTerms = append(existingTerms, newTerm) + } + } + desiredPod.Spec.Affinity.PodAffinity.PreferredDuringSchedulingIgnoredDuringExecution = existingTerms + } +} + +// syncScaleOpsNodeAffinities syncs preferred node affinities with "scaleops.sh/node-packing=true" +// selector from current pod to desired pod. +func syncScaleOpsNodeAffinities(desiredPod, currentPod *corev1.Pod) { + if currentPod.Spec.Affinity == nil || currentPod.Spec.Affinity.NodeAffinity == nil { + return + } + + currentNodeAffinity := currentPod.Spec.Affinity.NodeAffinity + + // Filter preferred node affinities with "scaleops.sh/node-packing=true" selector + var scaleOpsPreferredTerms []corev1.PreferredSchedulingTerm + if currentNodeAffinity.PreferredDuringSchedulingIgnoredDuringExecution != nil { + for _, term := range currentNodeAffinity.PreferredDuringSchedulingIgnoredDuringExecution { + hasScaleOpsNodePacking := false + + // Check MatchExpressions + for _, requirement := range term.Preference.MatchExpressions { + if requirement.Key == "scaleops.sh/node-packing" { + for _, val := range requirement.Values { + if val == "true" { + hasScaleOpsNodePacking = true + break + } + } + if hasScaleOpsNodePacking { + break + } + } + } + + // Check MatchFields if not found in MatchExpressions + if !hasScaleOpsNodePacking { + for _, requirement := range term.Preference.MatchFields { + if requirement.Key == "scaleops.sh/node-packing" { + for _, val := range requirement.Values { + if val == "true" { + hasScaleOpsNodePacking = true + break + } + } + if hasScaleOpsNodePacking { + break + } + } + } + } + + if hasScaleOpsNodePacking { + scaleOpsPreferredTerms = append(scaleOpsPreferredTerms, term) + } + } + } + + // If we found any scale ops node affinities, add them to the desired pod + if len(scaleOpsPreferredTerms) > 0 { + if desiredPod.Spec.Affinity == nil { + desiredPod.Spec.Affinity = &corev1.Affinity{} + } + if desiredPod.Spec.Affinity.NodeAffinity == nil { + desiredPod.Spec.Affinity.NodeAffinity = &corev1.NodeAffinity{} + } + + // Merge scale ops node affinities, avoiding duplicates + existingTerms := desiredPod.Spec.Affinity.NodeAffinity.PreferredDuringSchedulingIgnoredDuringExecution + for _, newTerm := range scaleOpsPreferredTerms { + // Check if this term already exists + found := false + for _, existing := range existingTerms { + if reflect.DeepEqual(existing.Preference, newTerm.Preference) && existing.Weight == newTerm.Weight { + found = true + break + } + } + if !found { + existingTerms = append(existingTerms, newTerm) + } + } + desiredPod.Spec.Affinity.NodeAffinity.PreferredDuringSchedulingIgnoredDuringExecution = existingTerms + } +} diff --git a/pkg/resources/kafka/util_test.go b/pkg/resources/kafka/util_test.go index d4c04045e..96f9db5eb 100644 --- a/pkg/resources/kafka/util_test.go +++ b/pkg/resources/kafka/util_test.go @@ -20,6 +20,9 @@ import ( "reflect" "testing" + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "github.com/banzaicloud/koperator/api/v1beta1" ) @@ -402,3 +405,563 @@ func TestGenerateQuorumVoters(t *testing.T) { }) } } + +func TestSyncScaleOpsPodAffinities(t *testing.T) { + tests := []struct { + name string + currentPod *corev1.Pod + desiredPod *corev1.Pod + expectedPodAffinity bool + expectedTermCount int + }{ + { + name: "no affinity in current pod", + currentPod: &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{Name: "test-pod"}, + Spec: corev1.PodSpec{}, + }, + desiredPod: &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{Name: "test-pod"}, + Spec: corev1.PodSpec{}, + }, + expectedPodAffinity: false, + expectedTermCount: 0, + }, + { + name: "no pod affinity in current pod", + currentPod: &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{Name: "test-pod"}, + Spec: corev1.PodSpec{ + Affinity: &corev1.Affinity{}, + }, + }, + desiredPod: &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{Name: "test-pod"}, + Spec: corev1.PodSpec{}, + }, + expectedPodAffinity: false, + expectedTermCount: 0, + }, + { + name: "pod affinity with scaleops managed-unevictable in MatchLabels", + currentPod: &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{Name: "test-pod"}, + Spec: corev1.PodSpec{ + Affinity: &corev1.Affinity{ + PodAffinity: &corev1.PodAffinity{ + PreferredDuringSchedulingIgnoredDuringExecution: []corev1.WeightedPodAffinityTerm{ + { + Weight: 100, + PodAffinityTerm: corev1.PodAffinityTerm{ + LabelSelector: &metav1.LabelSelector{ + MatchLabels: map[string]string{ + "scaleops.sh/managed-unevictable": "true", + }, + }, + TopologyKey: "kubernetes.io/hostname", + }, + }, + }, + }, + }, + }, + }, + desiredPod: &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{Name: "test-pod"}, + Spec: corev1.PodSpec{}, + }, + expectedPodAffinity: true, + expectedTermCount: 1, + }, + { + name: "pod affinity with scaleops managed-unevictable in MatchExpressions", + currentPod: &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{Name: "test-pod"}, + Spec: corev1.PodSpec{ + Affinity: &corev1.Affinity{ + PodAffinity: &corev1.PodAffinity{ + PreferredDuringSchedulingIgnoredDuringExecution: []corev1.WeightedPodAffinityTerm{ + { + Weight: 50, + PodAffinityTerm: corev1.PodAffinityTerm{ + LabelSelector: &metav1.LabelSelector{ + MatchExpressions: []metav1.LabelSelectorRequirement{ + { + Key: "scaleops.sh/managed-unevictable", + Operator: metav1.LabelSelectorOpIn, + Values: []string{"true"}, + }, + }, + }, + TopologyKey: "kubernetes.io/hostname", + }, + }, + }, + }, + }, + }, + }, + desiredPod: &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{Name: "test-pod"}, + Spec: corev1.PodSpec{}, + }, + expectedPodAffinity: true, + expectedTermCount: 1, + }, + { + name: "pod affinity with mixed terms, only scaleops managed-unevictable should be synced", + currentPod: &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{Name: "test-pod"}, + Spec: corev1.PodSpec{ + Affinity: &corev1.Affinity{ + PodAffinity: &corev1.PodAffinity{ + PreferredDuringSchedulingIgnoredDuringExecution: []corev1.WeightedPodAffinityTerm{ + { + Weight: 100, + PodAffinityTerm: corev1.PodAffinityTerm{ + LabelSelector: &metav1.LabelSelector{ + MatchLabels: map[string]string{ + "app": "other", + }, + }, + TopologyKey: "kubernetes.io/hostname", + }, + }, + { + Weight: 50, + PodAffinityTerm: corev1.PodAffinityTerm{ + LabelSelector: &metav1.LabelSelector{ + MatchLabels: map[string]string{ + "scaleops.sh/managed-unevictable": "true", + }, + }, + TopologyKey: "kubernetes.io/hostname", + }, + }, + }, + }, + }, + }, + }, + desiredPod: &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{Name: "test-pod"}, + Spec: corev1.PodSpec{}, + }, + expectedPodAffinity: true, + expectedTermCount: 1, + }, + { + name: "desired pod already has pod affinity, scaleops affinity should be merged", + currentPod: &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{Name: "test-pod"}, + Spec: corev1.PodSpec{ + Affinity: &corev1.Affinity{ + PodAffinity: &corev1.PodAffinity{ + PreferredDuringSchedulingIgnoredDuringExecution: []corev1.WeightedPodAffinityTerm{ + { + Weight: 100, + PodAffinityTerm: corev1.PodAffinityTerm{ + LabelSelector: &metav1.LabelSelector{ + MatchLabels: map[string]string{ + "scaleops.sh/managed-unevictable": "true", + }, + }, + TopologyKey: "kubernetes.io/hostname", + }, + }, + }, + }, + }, + }, + }, + desiredPod: &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{Name: "test-pod"}, + Spec: corev1.PodSpec{ + Affinity: &corev1.Affinity{ + PodAffinity: &corev1.PodAffinity{ + PreferredDuringSchedulingIgnoredDuringExecution: []corev1.WeightedPodAffinityTerm{ + { + Weight: 80, + PodAffinityTerm: corev1.PodAffinityTerm{ + LabelSelector: &metav1.LabelSelector{ + MatchLabels: map[string]string{ + "app": "myapp", + }, + }, + TopologyKey: "kubernetes.io/hostname", + }, + }, + }, + }, + }, + }, + }, + expectedPodAffinity: true, + expectedTermCount: 2, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + syncScaleOpsPodAffinities(tt.desiredPod, tt.currentPod) + + if !tt.expectedPodAffinity { + if tt.desiredPod.Spec.Affinity != nil && tt.desiredPod.Spec.Affinity.PodAffinity != nil { + t.Errorf("expected no pod affinity, but got one") + } + return + } + + if tt.desiredPod.Spec.Affinity == nil || tt.desiredPod.Spec.Affinity.PodAffinity == nil { + t.Errorf("expected pod affinity to be set") + return + } + + gotTermCount := len(tt.desiredPod.Spec.Affinity.PodAffinity.PreferredDuringSchedulingIgnoredDuringExecution) + if gotTermCount != tt.expectedTermCount { + t.Errorf("expected %d pod affinity terms, got %d", tt.expectedTermCount, gotTermCount) + } + + // Verify all synced terms have the scaleops label + for _, term := range tt.desiredPod.Spec.Affinity.PodAffinity.PreferredDuringSchedulingIgnoredDuringExecution { + if term.PodAffinityTerm.LabelSelector != nil { + hasScaleOpsLabel := false + for _, req := range term.PodAffinityTerm.LabelSelector.MatchExpressions { + if req.Key == "scaleops.sh/managed-unevictable" { + hasScaleOpsLabel = true + break + } + } + if !hasScaleOpsLabel { + if _, exists := term.PodAffinityTerm.LabelSelector.MatchLabels["scaleops.sh/managed-unevictable"]; !exists { + // This term should have been filtered out if it doesn't have scaleops label + // unless it came from the original desired pod + } + } + } + } + }) + } +} + +func TestSyncScaleOpsNodeAffinities(t *testing.T) { + tests := []struct { + name string + currentPod *corev1.Pod + desiredPod *corev1.Pod + expectedNodeAffinity bool + expectedTermCount int + }{ + { + name: "no affinity in current pod", + currentPod: &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{Name: "test-pod"}, + Spec: corev1.PodSpec{}, + }, + desiredPod: &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{Name: "test-pod"}, + Spec: corev1.PodSpec{}, + }, + expectedNodeAffinity: false, + expectedTermCount: 0, + }, + { + name: "no node affinity in current pod", + currentPod: &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{Name: "test-pod"}, + Spec: corev1.PodSpec{ + Affinity: &corev1.Affinity{}, + }, + }, + desiredPod: &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{Name: "test-pod"}, + Spec: corev1.PodSpec{}, + }, + expectedNodeAffinity: false, + expectedTermCount: 0, + }, + { + name: "node affinity with scaleops node-packing in MatchExpressions", + currentPod: &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{Name: "test-pod"}, + Spec: corev1.PodSpec{ + Affinity: &corev1.Affinity{ + NodeAffinity: &corev1.NodeAffinity{ + PreferredDuringSchedulingIgnoredDuringExecution: []corev1.PreferredSchedulingTerm{ + { + Weight: 100, + Preference: corev1.NodeSelectorTerm{ + MatchExpressions: []corev1.NodeSelectorRequirement{ + { + Key: "scaleops.sh/node-packing", + Operator: corev1.NodeSelectorOpIn, + Values: []string{"true"}, + }, + }, + }, + }, + }, + }, + }, + }, + }, + desiredPod: &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{Name: "test-pod"}, + Spec: corev1.PodSpec{}, + }, + expectedNodeAffinity: true, + expectedTermCount: 1, + }, + { + name: "node affinity with scaleops node-packing in MatchFields", + currentPod: &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{Name: "test-pod"}, + Spec: corev1.PodSpec{ + Affinity: &corev1.Affinity{ + NodeAffinity: &corev1.NodeAffinity{ + PreferredDuringSchedulingIgnoredDuringExecution: []corev1.PreferredSchedulingTerm{ + { + Weight: 50, + Preference: corev1.NodeSelectorTerm{ + MatchFields: []corev1.NodeSelectorRequirement{ + { + Key: "scaleops.sh/node-packing", + Operator: corev1.NodeSelectorOpIn, + Values: []string{"true"}, + }, + }, + }, + }, + }, + }, + }, + }, + }, + desiredPod: &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{Name: "test-pod"}, + Spec: corev1.PodSpec{}, + }, + expectedNodeAffinity: true, + expectedTermCount: 1, + }, + { + name: "node affinity with mixed terms, only scaleops node-packing should be synced", + currentPod: &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{Name: "test-pod"}, + Spec: corev1.PodSpec{ + Affinity: &corev1.Affinity{ + NodeAffinity: &corev1.NodeAffinity{ + PreferredDuringSchedulingIgnoredDuringExecution: []corev1.PreferredSchedulingTerm{ + { + Weight: 100, + Preference: corev1.NodeSelectorTerm{ + MatchExpressions: []corev1.NodeSelectorRequirement{ + { + Key: "disktype", + Operator: corev1.NodeSelectorOpIn, + Values: []string{"ssd"}, + }, + }, + }, + }, + { + Weight: 50, + Preference: corev1.NodeSelectorTerm{ + MatchExpressions: []corev1.NodeSelectorRequirement{ + { + Key: "scaleops.sh/node-packing", + Operator: corev1.NodeSelectorOpIn, + Values: []string{"true"}, + }, + }, + }, + }, + }, + }, + }, + }, + }, + desiredPod: &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{Name: "test-pod"}, + Spec: corev1.PodSpec{}, + }, + expectedNodeAffinity: true, + expectedTermCount: 1, + }, + { + name: "desired pod already has node affinity, scaleops affinity should be merged", + currentPod: &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{Name: "test-pod"}, + Spec: corev1.PodSpec{ + Affinity: &corev1.Affinity{ + NodeAffinity: &corev1.NodeAffinity{ + PreferredDuringSchedulingIgnoredDuringExecution: []corev1.PreferredSchedulingTerm{ + { + Weight: 100, + Preference: corev1.NodeSelectorTerm{ + MatchExpressions: []corev1.NodeSelectorRequirement{ + { + Key: "scaleops.sh/node-packing", + Operator: corev1.NodeSelectorOpIn, + Values: []string{"true"}, + }, + }, + }, + }, + }, + }, + }, + }, + }, + desiredPod: &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{Name: "test-pod"}, + Spec: corev1.PodSpec{ + Affinity: &corev1.Affinity{ + NodeAffinity: &corev1.NodeAffinity{ + PreferredDuringSchedulingIgnoredDuringExecution: []corev1.PreferredSchedulingTerm{ + { + Weight: 80, + Preference: corev1.NodeSelectorTerm{ + MatchExpressions: []corev1.NodeSelectorRequirement{ + { + Key: "disktype", + Operator: corev1.NodeSelectorOpIn, + Values: []string{"ssd"}, + }, + }, + }, + }, + }, + }, + }, + }, + }, + expectedNodeAffinity: true, + expectedTermCount: 2, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + syncScaleOpsNodeAffinities(tt.desiredPod, tt.currentPod) + + if !tt.expectedNodeAffinity { + if tt.desiredPod.Spec.Affinity != nil && tt.desiredPod.Spec.Affinity.NodeAffinity != nil { + t.Errorf("expected no node affinity, but got one") + } + return + } + + if tt.desiredPod.Spec.Affinity == nil || tt.desiredPod.Spec.Affinity.NodeAffinity == nil { + t.Errorf("expected node affinity to be set") + return + } + + gotTermCount := len(tt.desiredPod.Spec.Affinity.NodeAffinity.PreferredDuringSchedulingIgnoredDuringExecution) + if gotTermCount != tt.expectedTermCount { + t.Errorf("expected %d node affinity terms, got %d", tt.expectedTermCount, gotTermCount) + } + }) + } +} + +func TestSyncScaleOpsAffinities(t *testing.T) { + tests := []struct { + name string + currentPod *corev1.Pod + desiredPod *corev1.Pod + expectPodAffinity bool + expectNodeAffinity bool + }{ + { + name: "no affinities in current pod", + currentPod: &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{Name: "test-pod"}, + Spec: corev1.PodSpec{}, + }, + desiredPod: &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{Name: "test-pod"}, + Spec: corev1.PodSpec{}, + }, + expectPodAffinity: false, + expectNodeAffinity: false, + }, + { + name: "both pod and node affinities with scaleops labels", + currentPod: &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{Name: "test-pod"}, + Spec: corev1.PodSpec{ + Affinity: &corev1.Affinity{ + PodAffinity: &corev1.PodAffinity{ + PreferredDuringSchedulingIgnoredDuringExecution: []corev1.WeightedPodAffinityTerm{ + { + Weight: 100, + PodAffinityTerm: corev1.PodAffinityTerm{ + LabelSelector: &metav1.LabelSelector{ + MatchLabels: map[string]string{ + "scaleops.sh/managed-unevictable": "true", + }, + }, + TopologyKey: "kubernetes.io/hostname", + }, + }, + }, + }, + NodeAffinity: &corev1.NodeAffinity{ + PreferredDuringSchedulingIgnoredDuringExecution: []corev1.PreferredSchedulingTerm{ + { + Weight: 50, + Preference: corev1.NodeSelectorTerm{ + MatchExpressions: []corev1.NodeSelectorRequirement{ + { + Key: "scaleops.sh/node-packing", + Operator: corev1.NodeSelectorOpIn, + Values: []string{"true"}, + }, + }, + }, + }, + }, + }, + }, + }, + }, + desiredPod: &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{Name: "test-pod"}, + Spec: corev1.PodSpec{}, + }, + expectPodAffinity: true, + expectNodeAffinity: true, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + syncScaleOpsAffinities(tt.desiredPod, tt.currentPod) + + if tt.expectPodAffinity { + if tt.desiredPod.Spec.Affinity == nil || tt.desiredPod.Spec.Affinity.PodAffinity == nil { + t.Errorf("expected pod affinity to be set") + } + } else { + if tt.desiredPod.Spec.Affinity != nil && tt.desiredPod.Spec.Affinity.PodAffinity != nil { + if len(tt.desiredPod.Spec.Affinity.PodAffinity.PreferredDuringSchedulingIgnoredDuringExecution) > 0 { + t.Errorf("expected no pod affinity") + } + } + } + + if tt.expectNodeAffinity { + if tt.desiredPod.Spec.Affinity == nil || tt.desiredPod.Spec.Affinity.NodeAffinity == nil { + t.Errorf("expected node affinity to be set") + } + } else { + if tt.desiredPod.Spec.Affinity != nil && tt.desiredPod.Spec.Affinity.NodeAffinity != nil { + if len(tt.desiredPod.Spec.Affinity.NodeAffinity.PreferredDuringSchedulingIgnoredDuringExecution) > 0 { + t.Errorf("expected no node affinity") + } + } + } + }) + } +} diff --git a/run-local.sh b/run-local.sh new file mode 100755 index 000000000..80a53ce42 --- /dev/null +++ b/run-local.sh @@ -0,0 +1,199 @@ +#!/bin/bash + +## PREREQUISITES +# 1. Install Kind: https://kind.sigs.k8s.io/docs/user/quick-start/ +# 2. Start Docker Daemon and ensure it's running +# 3. If using SCALEOPS, set SCALEOPS_TOKEN env variable with your ScaleOps API token +# 4. Install and Start cloud-provider-kind to enable LoadBalancer services on Kind (Required for Local Debugging). https://github.com/kubernetes-sigs/cloud-provider-kind + +## USAGE +# ./run-local.sh [--local] [--scaleops] [--cleanup] +# +# --local Run koperator as a local process instead of as a container on Kind. +# Starts cloud-provider-kind and runs `make install && make run`. +# --scaleops Install the ScaleOps helm chart. Requires SCALEOPS_TOKEN to be set. +# --cleanup Delete the Kind cluster and stop cloud-provider-kind process. + + +## IMPORTANT NOTES (for running koperator locally with --local flag) +# +# Make sure to set `lcoalDebugEnabled: true` in your KafkaCluster spec. This will +# create LoadBalancer services for the Kafka and Cruise Control pods, allowing +# your local koperator to access services running on the Kind cluster. +# +# Cloud Provider KIND is required to enable LoadBalancer services on Kind. +# If you don't want to run it, you can port-forward the services instead. If you are running in local +# mode and notice that your kafka services don't have an external IP, it's because cloud-provider-kind +# either isn't running or has some issue. Local koperator won't be able to communicate +# with kafka pods without these. +# +# Finally, you'll need to update your /etc/hosts file to direct requests from +# Koperator to the LoadBalancer IPs. You can find the LoadBalancer IPs by running: +# kubectl get svc -n kafka +# +# Your /etc/hosts entries should look something like this: +# 172.18.0.7 kafka-0.kafka.svc.cluster.local +# 172.18.0.9 kafka-1.kafka.svc.cluster.local +# 172.18.0.10 kafka-2.kafka.svc.cluster.local +# 172.18.0.11 kafka-all-broker.kafka.svc.cluster.local +# 172.18.0.8 kafka-cruisecontrol-svc.kafka.svc.cluster.local + + +## ATTACHING A DEBUGGER TO LOCAL KOPERATOR +# If you need to debug your local koperator, you can find the logs in /tmp/koperator.log. +# Additionally, you can attach a debugger to the koperator process using VSCODE. Instead of running `make run`, +# start koperator as a Go application with debug enabled from VSCode, and set breakpoints as needed. +# This can be done by simply opening main.go in VSCode, going to the DEBUG Tab, and clicking Run and Debug. + +LOCAL=false +SCALEOPS=false +CLEANUP=false + +KOPERATOR_IMAGE=docker.io/library/koperator_e2e_test +CERT_DIR="/etc/webhook/certs" + +while [[ $# -gt 0 ]]; do + case $1 in + --local) LOCAL=true; shift ;; + --scaleops) SCALEOPS=true; shift ;; + --cleanup) CLEANUP=true; shift ;; + *) echo "Unknown flag: $1"; exit 1 ;; + esac +done + +if $SCALEOPS && [[ -n "${SCALEOPS_TOKEN}" ]]; then + echo "Error: --scaleops requires SCALEOPS_TOKEN to be set" + exit 1 +fi + +## Handle cleanup option +if $CLEANUP; then + echo "Cleaning up Kind cluster and cloud-provider-kind..." + + ## Delete Kind cluster + echo "Deleting Kind cluster 'kind-kafka'..." + kind delete cluster --name=kind-kafka || true + + ## Stop cloud-provider-kind + echo "Stopping cloud-provider-kind..." + if pgrep -f cloud-provider-kind &>/dev/null; then + sudo pkill -f cloud-provider-kind + echo "cloud-provider-kind stopped" + else + echo "cloud-provider-kind is not running" + fi + + echo "Cleanup completed" + exit 0 +fi + +## Check if Docker daemon is running +if ! docker ps &>/dev/null; then + echo "Error: Docker daemon is not running. Please start Docker and try again." + exit 1 +fi + +## Create kind cluster +kind delete clusters kind-kafka +kind create cluster --config=./tests/e2e/platforms/kind/kind_config.yaml --name=kind-kafka + +## Validate kubectl context is set to kind +CURRENT_CONTEXT=$(kubectl config current-context) +if [[ ! "$CURRENT_CONTEXT" =~ kind ]]; then + echo "Error: kubectl context is not set to a kind cluster. Current context: $CURRENT_CONTEXT" + exit 1 +fi + +## Build/Load images (Kafka 3.7.0) +kind load docker-image docker-pipeline-upstream-mirror.dr-uw2.adobeitc.com/adobe/kafka:2.13-3.7.0 --name kind-kafka + +if ! $LOCAL; then + docker build . -t $KOPERATOR_IMAGE + kind load docker-image koperator_e2e_test:latest --name kind-kafka +fi + +## Install Helm Charts and CRDs +### project contour +helm repo add contour https://projectcontour.github.io/helm-charts/ --force-update +helm upgrade --install contour contour/contour --namespace projectcontour --create-namespace + +### cert-manager +helm repo add jetstack https://charts.jetstack.io --force-update +helm upgrade --install cert-manager jetstack/cert-manager --namespace cert-manager --create-namespace --version v1.16.2 --set crds.enabled=true + +### zookeeper-operator +helm repo add pravega https://charts.pravega.io --force-update +helm upgrade --install zookeeper-operator pravega/zookeeper-operator --version 0.2.15 --namespace zookeeper --create-namespace --set crd.create=true + +### prometheus +helm repo add prometheus https://prometheus-community.github.io/helm-charts --force-update +helm upgrade --install prometheus prometheus/kube-prometheus-stack --version 54.1.0 --namespace prometheus --create-namespace + +### scaleops +if $SCALEOPS; then + helm upgrade --install --create-namespace -n scaleops-system \ + --repo https://registry.scaleops.com/charts/ \ + --username scaleops --password "${SCALEOPS_TOKEN}" \ + --set scaleopsToken="${SCALEOPS_TOKEN}" \ + --set clusterName="$(kubectl config current-context)" \ + scaleops scaleops + kubectl apply -f config/scaleops/CustomOwnerGrouping.yaml +fi + +## Run Koperator +if $LOCAL; then + ## Start cloud-provider-kind in the background if not already running + if pgrep -f cloud-provider-kind &>/dev/null; then + echo "cloud-provider-kind is already running" + else + echo "Starting cloud-provider-kind in the background..." + sudo -b sh -c "KUBECONFIG=$HOME/.kube/config cloud-provider-kind >> /tmp/cloudproviderkind.log 2>&1" + sleep 2 + + ## Check if cloud-provider-kind started successfully + if ! pgrep -f cloud-provider-kind &>/dev/null; then + echo "Warning: cloud-provider-kind failed to start. LoadBalancer services may not work properly." + echo "Check /tmp/cloudproviderkind.log for details." + else + echo "cloud-provider-kind started successfully" + fi + fi + + kubectl get namespace kafka &>/dev/null || kubectl create namespace kafka + kubectl config set-context --current --namespace=kafka + make install + +else + helm upgrade --install kafka-operator charts/kafka-operator \ + --set operator.image.repository=$KOPERATOR_IMAGE \ + --set operator.image.tag=latest \ + --set prometheusMetrics.enabled=false \ + --namespace kafka --create-namespace +fi + +## Initialize Zookeeper and Kafka Cluster +kubectl apply -f config/samples/simplezookeeper.yaml -n zookeeper + +if ! $LOCAL; then + kubectl wait --for=condition=Ready pod -l app.kubernetes.io/name=kafka-operator -n kafka --timeout=120s + sleep 5 +fi + +kubectl apply -f config/samples/simplekafkacluster.yaml -n kafka + +## Start Local Koperator +if $LOCAL; then + if [[ ! -f "$CERT_DIR/tls.crt" || ! -f "$CERT_DIR/tls.key" ]]; then + echo "Webhook certs not found, generating self-signed certs..." + mkdir -p "$CERT_DIR" + openssl req -x509 -newkey rsa:4096 \ + -keyout "$CERT_DIR/tls.key" \ + -out "$CERT_DIR/tls.crt" \ + -days 365 -nodes \ + -subj '/CN=localhost' + else + echo "Webhook certs already exist, skipping generation." + fi + + make run +fi diff --git a/tests/e2e/platforms/kind/kind_config.yaml b/tests/e2e/platforms/kind/kind_config.yaml index 65d601b47..15a139f3f 100644 --- a/tests/e2e/platforms/kind/kind_config.yaml +++ b/tests/e2e/platforms/kind/kind_config.yaml @@ -3,6 +3,7 @@ # topology.kubernetes.io/zone (e.g. config/samples/simplekafkacluster_affinity.yaml). kind: Cluster apiVersion: kind.x-k8s.io/v1alpha4 +name: kind-kafka nodes: - role: control-plane kubeadmConfigPatches: @@ -32,9 +33,11 @@ nodes: nodeRegistration: kubeletExtraArgs: node-labels: "topology.kubernetes.io/zone=zone-c" -containerdConfigPatches: -- |- - [plugins."io.containerd.grpc.v1.cri".containerd] - snapshotter = "overlayfs" - [plugins."io.containerd.grpc.v1.cri".registry.mirrors."localhost:5000"] - endpoint = ["http://localhost:5000"] + extraPortMappings: + - containerPort: 80 + hostPort: 80 + listenAddress: "0.0.0.0" + - containerPort: 443 + hostPort: 443 + listenAddress: "0.0.0.0" + \ No newline at end of file