From f6b72d6671a9abf5a3e1417b77afd4236db95649 Mon Sep 17 00:00:00 2001 From: Daniel Vaseekaran Date: Tue, 10 Mar 2026 14:09:48 -0400 Subject: [PATCH 01/25] Run Koperator on Kind Cluster Locally --- run-local.sh | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) create mode 100755 run-local.sh diff --git a/run-local.sh b/run-local.sh new file mode 100755 index 000000000..33fd0758d --- /dev/null +++ b/run-local.sh @@ -0,0 +1,33 @@ +#!/bin/bash +## Create kind cluster +kind delete clusters e2e-kind +kind create cluster --config=/Users/dvaseeka/Documents/adobe/pipeline-services/koperator/tests/e2e/platforms/kind/kind_config.yaml --name=e2e-kind + +## Build/Load images +kind load docker-image docker-pipeline-upstream-mirror.dr-uw2.adobeitc.com/adobe/kafka:2.13-3.7.0 --name e2e-kind +docker build . -t koperator_e2e_test +kind load docker-image koperator_e2e_test:latest --name e2e-kind + +## Install Helm Charts and CRDs +### project contour +helm repo add contour https://projectcontour.github.io/helm-charts/ +helm install contour contour/contour --namespace projectcontour --create-namespace + +### cert-manager +helm repo add jetstack https://charts.jetstack.io --force-update +helm install cert-manager jetstack/cert-manager --namespace cert-manager --create-namespace --version v1.16.2 --set crds.enabled=true + +### zookeeper-operator +helm repo add pravega https://charts.pravega.io +helm install zookeeper-operator pravega/zookeeper-operator --version 0.2.15 --namespace zookeeper --create-namespace --set crd.create=true + +### prometheus +helm repo add prometheus https://prometheus-community.github.io/helm-charts +helm install prometheus prometheus/kube-prometheus-stack --version 54.1.0 --namespace prometheus --create-namespace + +### koperator +helm install kafka-operator charts/kafka-operator --set operator.image.repository=koperator_e2e_test --set operator.image.tag=latest --namespace kafka --create-namespace +kubectl create -f charts/kafka-operator/crds/ + +### Initialize Kafka Cluster +k apply -f config/samples/kraft/simplekafkacluster_kraft.yaml -n kafka From 67c8acd00eff236987a9e021ace3ea52b214cb4d Mon Sep 17 00:00:00 2001 From: Daniel Vaseekaran Date: Wed, 8 Apr 2026 13:54:38 -0400 Subject: [PATCH 02/25] [CORE-149726] - Local Debug support --- .gitignore | 2 +- api/v1beta1/kafkacluster_types.go | 13 +++++++++--- charts/kafka-operator/crds/kafkaclusters.yaml | 8 ++++++++ .../kafka.banzaicloud.io_kafkaclusters.yaml | 8 ++++++++ config/samples/simplekafkacluster.yaml | 3 ++- pkg/resources/cruisecontrol/service.go | 9 ++++++++- pkg/resources/kafka/allBrokerService.go | 8 +++++++- pkg/resources/kafka/service.go | 7 ++++++- run-local.sh | 20 ++++++++++++------- tests/e2e/platforms/kind/kind_config.yaml | 14 +++++++------ 10 files changed, 71 insertions(+), 21 deletions(-) diff --git a/.gitignore b/.gitignore index 009e10da1..480a9b5b4 100644 --- a/.gitignore +++ b/.gitignore @@ -8,7 +8,7 @@ bin charts/**/charts charts/koperator/requirements.lock - +charts/kafka-operator/ingress # Test binary, build with `go test -c` *.test diff --git a/api/v1beta1/kafkacluster_types.go b/api/v1beta1/kafkacluster_types.go index 3cf2da28c..c96cddce8 100644 --- a/api/v1beta1/kafkacluster_types.go +++ b/api/v1beta1/kafkacluster_types.go @@ -157,9 +157,16 @@ type KafkaClusterSpec struct { // This is default to be true; if set to false, the Kafka cluster is in ZooKeeper mode. // +kubebuilder:default=false // +optional - KRaftMode bool `json:"kRaft"` - HeadlessServiceEnabled bool `json:"headlessServiceEnabled"` - ListenersConfig ListenersConfig `json:"listenersConfig"` + KRaftMode bool `json:"kRaft"` + HeadlessServiceEnabled bool `json:"headlessServiceEnabled"` + // DebugEnabled is used to decide whether to create a separate loadbalancer services for the + // Kafka and Cruise Control Pods. These services will expose the internal listener ports of the Kafka + // cluster with LoadBalancer type, which can be used for running Koperator on a local machine against + // a kafkaCluster instance on a Kind Cluster. + // +kubebuilder:default=false + // +optional + DebugEnabled bool `json:"debugEnabled"` + ListenersConfig ListenersConfig `json:"listenersConfig"` // Custom ports to expose in the container. Example use case: a custom kafka distribution, that includes an integrated metrics api endpoint AdditionalPorts []corev1.ContainerPort `json:"additionalPorts,omitempty"` // ZKAddresses specifies the ZooKeeper connection string diff --git a/charts/kafka-operator/crds/kafkaclusters.yaml b/charts/kafka-operator/crds/kafkaclusters.yaml index 402f282b5..4df25a979 100644 --- a/charts/kafka-operator/crds/kafkaclusters.yaml +++ b/charts/kafka-operator/crds/kafkaclusters.yaml @@ -19231,6 +19231,14 @@ spec: type: object type: array type: object + debugEnabled: + default: false + description: |- + DebugEnabled is used to decide whether to create a separate loadbalancer services for the + Kafka and Cruise Control Pods. These services will expose the internal listener ports of the Kafka + cluster with LoadBalancer type, which can be used for running Koperator on a local machine against + a kafkaCluster instance on a Kind Cluster. + type: boolean disruptionBudget: description: DisruptionBudget defines the configuration for PodDisruptionBudget where the workload is managed by the kafka-operator diff --git a/config/base/crds/kafka.banzaicloud.io_kafkaclusters.yaml b/config/base/crds/kafka.banzaicloud.io_kafkaclusters.yaml index 402f282b5..4df25a979 100644 --- a/config/base/crds/kafka.banzaicloud.io_kafkaclusters.yaml +++ b/config/base/crds/kafka.banzaicloud.io_kafkaclusters.yaml @@ -19231,6 +19231,14 @@ spec: type: object type: array type: object + debugEnabled: + default: false + description: |- + DebugEnabled is used to decide whether to create a separate loadbalancer services for the + Kafka and Cruise Control Pods. These services will expose the internal listener ports of the Kafka + cluster with LoadBalancer type, which can be used for running Koperator on a local machine against + a kafkaCluster instance on a Kind Cluster. + type: boolean disruptionBudget: description: DisruptionBudget defines the configuration for PodDisruptionBudget where the workload is managed by the kafka-operator diff --git a/config/samples/simplekafkacluster.yaml b/config/samples/simplekafkacluster.yaml index d890f8551..cf08d8980 100644 --- a/config/samples/simplekafkacluster.yaml +++ b/config/samples/simplekafkacluster.yaml @@ -5,10 +5,11 @@ metadata: controller-tools.k8s.io: "1.0" name: kafka spec: + debugEnabled: true kRaft: false monitoringConfig: jmxImage: "ghcr.io/adobe/koperator/jmx-javaagent:1.4.0" - headlessServiceEnabled: true + headlessServiceEnabled: false zkAddresses: - "zookeeper-server-client.zookeeper:2181" propagateLabels: false diff --git a/pkg/resources/cruisecontrol/service.go b/pkg/resources/cruisecontrol/service.go index d868eacf4..2c1c64439 100644 --- a/pkg/resources/cruisecontrol/service.go +++ b/pkg/resources/cruisecontrol/service.go @@ -26,7 +26,7 @@ import ( ) func (r *Reconciler) service() runtime.Object { - return &corev1.Service{ + svc := &corev1.Service{ ObjectMeta: templates.ObjectMeta( fmt.Sprintf(serviceNameTemplate, r.KafkaCluster.Name), apiutil.MergeLabels(ccLabelSelector(r.KafkaCluster.Name), r.KafkaCluster.Labels), @@ -34,6 +34,7 @@ func (r *Reconciler) service() runtime.Object { ), Spec: corev1.ServiceSpec{ Selector: ccLabelSelector(r.KafkaCluster.Name), + Type: corev1.ServiceTypeClusterIP, Ports: []corev1.ServicePort{ { Name: "cc", @@ -50,4 +51,10 @@ func (r *Reconciler) service() runtime.Object { }, }, } + + if r.KafkaCluster.Spec.DebugEnabled { + svc.Spec.Type = corev1.ServiceTypeLoadBalancer + } + + return svc } diff --git a/pkg/resources/kafka/allBrokerService.go b/pkg/resources/kafka/allBrokerService.go index ecfdd5b7b..ed0eed60c 100644 --- a/pkg/resources/kafka/allBrokerService.go +++ b/pkg/resources/kafka/allBrokerService.go @@ -39,7 +39,7 @@ func (r *Reconciler) allBrokerService() runtime.Object { usedPorts = append(usedPorts, generateServicePortForAdditionalPorts(r.KafkaCluster.Spec.AdditionalPorts)...) - return &corev1.Service{ + svc := &corev1.Service{ ObjectMeta: templates.ObjectMetaWithAnnotations( fmt.Sprintf(kafkautils.AllBrokerServiceTemplate, r.KafkaCluster.GetName()), apiutil.LabelsForKafka(r.KafkaCluster.GetName()), @@ -52,4 +52,10 @@ func (r *Reconciler) allBrokerService() runtime.Object { Ports: usedPorts, }, } + + if r.KafkaCluster.Spec.DebugEnabled { + svc.Spec.Type = corev1.ServiceTypeLoadBalancer + } + + return svc } diff --git a/pkg/resources/kafka/service.go b/pkg/resources/kafka/service.go index fd53e5dc1..dd663d894 100644 --- a/pkg/resources/kafka/service.go +++ b/pkg/resources/kafka/service.go @@ -46,7 +46,7 @@ func (r *Reconciler) service(id int32, _ *v1beta1.BrokerConfig) runtime.Object { Protocol: corev1.ProtocolTCP, }) - return &corev1.Service{ + svc := &corev1.Service{ ObjectMeta: templates.ObjectMetaWithAnnotations(fmt.Sprintf("%s-%d", r.KafkaCluster.Name, id), apiutil.MergeLabels( apiutil.LabelsForKafka(r.KafkaCluster.Name), @@ -61,4 +61,9 @@ func (r *Reconciler) service(id int32, _ *v1beta1.BrokerConfig) runtime.Object { Ports: usedPorts, }, } + if r.KafkaCluster.Spec.DebugEnabled { + svc.Spec.Type = corev1.ServiceTypeLoadBalancer + } + return svc + } diff --git a/run-local.sh b/run-local.sh index 33fd0758d..b590c815e 100755 --- a/run-local.sh +++ b/run-local.sh @@ -1,12 +1,12 @@ #!/bin/bash ## Create kind cluster -kind delete clusters e2e-kind -kind create cluster --config=/Users/dvaseeka/Documents/adobe/pipeline-services/koperator/tests/e2e/platforms/kind/kind_config.yaml --name=e2e-kind +kind delete clusters kind-kafka +kind create cluster --config=./tests/e2e/platforms/kind/kind_config.yaml --name=kind-kafka ## Build/Load images -kind load docker-image docker-pipeline-upstream-mirror.dr-uw2.adobeitc.com/adobe/kafka:2.13-3.7.0 --name e2e-kind +kind load docker-image docker-pipeline-upstream-mirror.dr-uw2.adobeitc.com/adobe/kafka:2.13-3.7.0 --name kind-kafka docker build . -t koperator_e2e_test -kind load docker-image koperator_e2e_test:latest --name e2e-kind +kind load docker-image koperator_e2e_test:latest --name kind-kafka ## Install Helm Charts and CRDs ### project contour @@ -25,9 +25,15 @@ helm install zookeeper-operator pravega/zookeeper-operator --version 0.2.15 --na helm repo add prometheus https://prometheus-community.github.io/helm-charts helm install prometheus prometheus/kube-prometheus-stack --version 54.1.0 --namespace prometheus --create-namespace -### koperator +### koperator - Run as container on Kind helm install kafka-operator charts/kafka-operator --set operator.image.repository=koperator_e2e_test --set operator.image.tag=latest --namespace kafka --create-namespace -kubectl create -f charts/kafka-operator/crds/ + +### Local koperator from koperator root directory: +make install +make run ### Initialize Kafka Cluster -k apply -f config/samples/kraft/simplekafkacluster_kraft.yaml -n kafka +k apply -f charts/kafka-operator/ingress/zookeeper.yaml -n kafka +k apply -f config/samples/simplekafkacluster.yaml -n kafka + + diff --git a/tests/e2e/platforms/kind/kind_config.yaml b/tests/e2e/platforms/kind/kind_config.yaml index 65d601b47..6515c31f4 100644 --- a/tests/e2e/platforms/kind/kind_config.yaml +++ b/tests/e2e/platforms/kind/kind_config.yaml @@ -3,6 +3,7 @@ # topology.kubernetes.io/zone (e.g. config/samples/simplekafkacluster_affinity.yaml). kind: Cluster apiVersion: kind.x-k8s.io/v1alpha4 +name: kind-kafka nodes: - role: control-plane kubeadmConfigPatches: @@ -32,9 +33,10 @@ nodes: nodeRegistration: kubeletExtraArgs: node-labels: "topology.kubernetes.io/zone=zone-c" -containerdConfigPatches: -- |- - [plugins."io.containerd.grpc.v1.cri".containerd] - snapshotter = "overlayfs" - [plugins."io.containerd.grpc.v1.cri".registry.mirrors."localhost:5000"] - endpoint = ["http://localhost:5000"] + extraPortMappings: + - containerPort: 80 + hostPort: 80 + listenAddress: "0.0.0.0" + - containerPort: 443 + hostPort: 443 + listenAddress: "0.0.0.0" \ No newline at end of file From 7e7daea505cc7037070c7397c0c0912cfce55207 Mon Sep 17 00:00:00 2001 From: Daniel Vaseekaran Date: Wed, 8 Apr 2026 14:34:46 -0400 Subject: [PATCH 03/25] [CORE-149726] - Local Debug support --- config/samples/simpleZookeeper.yaml | 9 +++++++ run-local.sh | 40 ++++++++++++++++++++++++----- 2 files changed, 43 insertions(+), 6 deletions(-) create mode 100644 config/samples/simpleZookeeper.yaml diff --git a/config/samples/simpleZookeeper.yaml b/config/samples/simpleZookeeper.yaml new file mode 100644 index 000000000..82123498c --- /dev/null +++ b/config/samples/simpleZookeeper.yaml @@ -0,0 +1,9 @@ +apiVersion: zookeeper.pravega.io/v1beta1 +kind: ZookeeperCluster +metadata: + name: zookeeper-server + namespace: zookeeper +spec: + replicas: 3 + persistence: + reclaimPolicy: Delete \ No newline at end of file diff --git a/run-local.sh b/run-local.sh index b590c815e..bd22311d6 100755 --- a/run-local.sh +++ b/run-local.sh @@ -3,8 +3,9 @@ kind delete clusters kind-kafka kind create cluster --config=./tests/e2e/platforms/kind/kind_config.yaml --name=kind-kafka -## Build/Load images +## Build/Load images (Kafka 3.7.0) kind load docker-image docker-pipeline-upstream-mirror.dr-uw2.adobeitc.com/adobe/kafka:2.13-3.7.0 --name kind-kafka +### Skip if you want to run koperator locally docker build . -t koperator_e2e_test kind load docker-image koperator_e2e_test:latest --name kind-kafka @@ -25,15 +26,42 @@ helm install zookeeper-operator pravega/zookeeper-operator --version 0.2.15 --na helm repo add prometheus https://prometheus-community.github.io/helm-charts helm install prometheus prometheus/kube-prometheus-stack --version 54.1.0 --namespace prometheus --create-namespace -### koperator - Run as container on Kind +## Run Koperator on Kind +### koperator - Run as container on Kind (Skip if you want to run koperator locally) helm install kafka-operator charts/kafka-operator --set operator.image.repository=koperator_e2e_test --set operator.image.tag=latest --namespace kafka --create-namespace -### Local koperator from koperator root directory: +## Run Koperator Locally +### Start Cloud Provider Kind in the background to enable LoadBalancer services for local koperator +sudo ~/go/bin/cloud-provider-kind & + +### Start Local Koperator instance: make install make run -### Initialize Kafka Cluster -k apply -f charts/kafka-operator/ingress/zookeeper.yaml -n kafka +## Initialize Zookeeper and Kafka Cluster +k apply -f config/samples/simplezookeeper.yaml -n zookeeper +k create namespace kafka +k ens kafka k apply -f config/samples/simplekafkacluster.yaml -n kafka - +# NOTES for running koperator locally: +# +# If you want to run koperator locally, make sure to set `debugEnabled: true` +# in your KafkaCluster spec. This will create LoadBalancer services for the +# Kafka and Cruise Control pods, allowing your local koperator to access +# services running on the Kind cluster. +# +# Cloud Provider KIND is required to enable LoadBalancer services on Kind. +# This is necessary for local koperator access. If you don't want to run it, +# you can port-forward the services instead. +# +# Finally, you'll need to update your /etc/hosts file to direct request from +# Koperator to the LoadBalancer IPs. You can find the LoadBalancer IPs by running: +# kubectl get svc -n kafka +# +# Your /etc/hosts entries should look something like this: +# 172.18.0.7 kafka-0.kafka.svc.cluster.local +# 172.18.0.9 kafka-1.kafka.svc.cluster.local +# 172.18.0.10 kafka-2.kafka.svc.cluster.local +# 172.18.0.11 kafka-all-broker.kafka.svc.cluster.local +# 172.18.0.8 kafka-cruisecontrol-svc.kafka.svc.cluster.local \ No newline at end of file From 3f9066b6902bde500bf6de4e60d00ede3618bccd Mon Sep 17 00:00:00 2001 From: Daniel Vaseekaran Date: Wed, 8 Apr 2026 14:44:21 -0400 Subject: [PATCH 04/25] [CORE-149726] - Local Debug support --- api/v1beta1/kafkacluster_types.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/api/v1beta1/kafkacluster_types.go b/api/v1beta1/kafkacluster_types.go index 5fa3d22e9..d71ef1c05 100644 --- a/api/v1beta1/kafkacluster_types.go +++ b/api/v1beta1/kafkacluster_types.go @@ -165,7 +165,7 @@ type KafkaClusterSpec struct { // a kafkaCluster instance on a Kind Cluster. // +kubebuilder:default=false // +optional - DebugEnabled bool `json:"debugEnabled"` + DebugEnabled bool `json:"debugEnabled,omitempty"` ListenersConfig ListenersConfig `json:"listenersConfig"` // Custom ports to expose in the container. Example use case: a custom kafka distribution, that includes an integrated metrics api endpoint AdditionalPorts []corev1.ContainerPort `json:"additionalPorts,omitempty"` From c7ba004a0d7192b18e4b01b5f37970e203fca0a9 Mon Sep 17 00:00:00 2001 From: Daniel Vaseekaran Date: Wed, 8 Apr 2026 14:49:31 -0400 Subject: [PATCH 05/25] Clean up Lint --- api/v1beta1/kafkacluster_types.go | 2 +- pkg/resources/kafka/service.go | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/api/v1beta1/kafkacluster_types.go b/api/v1beta1/kafkacluster_types.go index d71ef1c05..5fa3d22e9 100644 --- a/api/v1beta1/kafkacluster_types.go +++ b/api/v1beta1/kafkacluster_types.go @@ -165,7 +165,7 @@ type KafkaClusterSpec struct { // a kafkaCluster instance on a Kind Cluster. // +kubebuilder:default=false // +optional - DebugEnabled bool `json:"debugEnabled,omitempty"` + DebugEnabled bool `json:"debugEnabled"` ListenersConfig ListenersConfig `json:"listenersConfig"` // Custom ports to expose in the container. Example use case: a custom kafka distribution, that includes an integrated metrics api endpoint AdditionalPorts []corev1.ContainerPort `json:"additionalPorts,omitempty"` diff --git a/pkg/resources/kafka/service.go b/pkg/resources/kafka/service.go index dd663d894..5ed75b1e7 100644 --- a/pkg/resources/kafka/service.go +++ b/pkg/resources/kafka/service.go @@ -65,5 +65,4 @@ func (r *Reconciler) service(id int32, _ *v1beta1.BrokerConfig) runtime.Object { svc.Spec.Type = corev1.ServiceTypeLoadBalancer } return svc - } From d3813cd6aaeacef03c626c39bf17d535f311552e Mon Sep 17 00:00:00 2001 From: Daniel Vaseekaran Date: Fri, 24 Apr 2026 10:12:46 -0400 Subject: [PATCH 06/25] Add Scaleops to Local Env --- config/scaleops/CustomOwnerGrouping.yaml | 22 ++++++++++++++++++++++ run-local.sh | 10 +++++++--- 2 files changed, 29 insertions(+), 3 deletions(-) create mode 100644 config/scaleops/CustomOwnerGrouping.yaml diff --git a/config/scaleops/CustomOwnerGrouping.yaml b/config/scaleops/CustomOwnerGrouping.yaml new file mode 100644 index 000000000..7e9760d82 --- /dev/null +++ b/config/scaleops/CustomOwnerGrouping.yaml @@ -0,0 +1,22 @@ + +kind: CustomOwnerGrouping +apiVersion: analysis.scaleops.sh/v1alpha1 +metadata: + name: kafkabroker + namespace: scaleops-system +spec: + groupBy: + positiveRegexMatch: false + groupBys: + - labels: + - 'isBrokerNode: true' + positiveRegexMatch: false + topOwnerController: + apiVersion: kafka.banzaicloud.io/v1beta1 + kind: KafkaCluster + displayOptions: + hideGeneratedSuffix: true + fields: + - ownerName + defaultPolicy: kafka-brokers + enabled: true diff --git a/run-local.sh b/run-local.sh index bd22311d6..4c241f245 100755 --- a/run-local.sh +++ b/run-local.sh @@ -26,6 +26,10 @@ helm install zookeeper-operator pravega/zookeeper-operator --version 0.2.15 --na helm repo add prometheus https://prometheus-community.github.io/helm-charts helm install prometheus prometheus/kube-prometheus-stack --version 54.1.0 --namespace prometheus --create-namespace +### scaleops +helm install --create-namespace -n scaleops-system --repo https://registry.scaleops.com/charts/ --username scaleops --password ${SCALEOPS_TOKEN} --set scaleopsToken=${SCALEOPS_TOKEN} --set clusterName=$(kubectl config current-context) scaleops scaleops +k apply -f config/scaleops/CustomOwnerGrouping.yaml + ## Run Koperator on Kind ### koperator - Run as container on Kind (Skip if you want to run koperator locally) helm install kafka-operator charts/kafka-operator --set operator.image.repository=koperator_e2e_test --set operator.image.tag=latest --namespace kafka --create-namespace @@ -35,13 +39,13 @@ helm install kafka-operator charts/kafka-operator --set operator.image.repositor sudo ~/go/bin/cloud-provider-kind & ### Start Local Koperator instance: +kubectl create namespace kafka +kubectl ens kafka make install make run ## Initialize Zookeeper and Kafka Cluster -k apply -f config/samples/simplezookeeper.yaml -n zookeeper -k create namespace kafka -k ens kafka +kubectl apply -f config/samples/simplezookeeper.yaml -n zookeeper k apply -f config/samples/simplekafkacluster.yaml -n kafka # NOTES for running koperator locally: From e6a9e84f3ce7b5fd59cf5bef646f724dc704ba6e Mon Sep 17 00:00:00 2001 From: Daniel Vaseekaran Date: Fri, 24 Apr 2026 14:17:35 -0400 Subject: [PATCH 07/25] [CORE-149726] - Scaleops Update - Ignore Resource Request Differences between current and desired pods --- pkg/resources/kafka/kafka.go | 34 ++++++++++++++++++++++++++++++++++ run-local.sh | 2 ++ 2 files changed, 36 insertions(+) diff --git a/pkg/resources/kafka/kafka.go b/pkg/resources/kafka/kafka.go index 9aa91808f..9e9a2c00e 100644 --- a/pkg/resources/kafka/kafka.go +++ b/pkg/resources/kafka/kafka.go @@ -828,6 +828,7 @@ func (r *Reconciler) reconcileKafkaPod(log logr.Logger, desiredPod *corev1.Pod, return errorfactory.New(errorfactory.APIFailure{}, err, "getting resource failed", "kind", desiredType) } switch { + //initial run - Create Pod case len(podList.Items) == 0: if err := patch.DefaultAnnotator.SetLastAppliedAnnotation(desiredPod); err != nil { return errors.WrapIf(err, "could not apply last state to annotation") @@ -935,6 +936,37 @@ func (r *Reconciler) updateStatusWithDockerImageAndVersion(brokerId int32, broke return nil } +// syncResourceRequests overwrites CPU and memory requests in desiredPod's containers +// with the values from currentPod so that request-only changes do not trigger a pod restart. +func syncResourceRequests(desiredPod, currentPod *corev1.Pod) { + syncContainerResourceRequests(desiredPod.Spec.Containers, currentPod.Spec.Containers) + syncContainerResourceRequests(desiredPod.Spec.InitContainers, currentPod.Spec.InitContainers) +} + +func syncContainerResourceRequests(desired, current []corev1.Container) { + index := make(map[string]corev1.ResourceList, len(current)) + for _, c := range current { + index[c.Name] = c.Resources.Requests + } + for i := range desired { + c := &desired[i] + reqs, ok := index[c.Name] + if !ok { + continue + } + if c.Resources.Requests == nil { + c.Resources.Requests = make(corev1.ResourceList) + } + for _, res := range []corev1.ResourceName{corev1.ResourceCPU, corev1.ResourceMemory} { + if val, exists := reqs[res]; exists { + c.Resources.Requests[res] = val + } else { + delete(c.Resources.Requests, res) + } + } + } +} + //gocyclo:ignore func (r *Reconciler) handleRollingUpgrade(log logr.Logger, desiredPod, currentPod *corev1.Pod, desiredType reflect.Type) error { // Since toleration does not support patchStrategy:"merge,retainKeys", @@ -951,6 +983,8 @@ func (r *Reconciler) handleRollingUpgrade(log logr.Logger, desiredPod, currentPo } desiredPod.Spec.Tolerations = uniqueTolerations } + // Ignore CPU/memory request diffs — changing requests does not require a pod restart. + syncResourceRequests(desiredPod, currentPod) // Check if the resource actually updated or if labels match TaintedBrokersSelector patchResult, err := patch.DefaultPatchMaker.Calculate(currentPod, desiredPod) switch { diff --git a/run-local.sh b/run-local.sh index 4c241f245..9ed3b35b4 100755 --- a/run-local.sh +++ b/run-local.sh @@ -29,6 +29,8 @@ helm install prometheus prometheus/kube-prometheus-stack --version 54.1.0 --name ### scaleops helm install --create-namespace -n scaleops-system --repo https://registry.scaleops.com/charts/ --username scaleops --password ${SCALEOPS_TOKEN} --set scaleopsToken=${SCALEOPS_TOKEN} --set clusterName=$(kubectl config current-context) scaleops scaleops k apply -f config/scaleops/CustomOwnerGrouping.yaml +#### Scaleops Dashboard Port Forward +k port-forward scaleops-dashboard-pod-xxxx 8080 ## Run Koperator on Kind ### koperator - Run as container on Kind (Skip if you want to run koperator locally) From cc65cdb2772781f5d8e436ca6053077194374c42 Mon Sep 17 00:00:00 2001 From: Cameron Wright Date: Tue, 5 May 2026 17:00:36 -0400 Subject: [PATCH 08/25] updating run-local script to use kubectl instead of k --- run-local.sh | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/run-local.sh b/run-local.sh index 9ed3b35b4..40af15c53 100755 --- a/run-local.sh +++ b/run-local.sh @@ -28,9 +28,9 @@ helm install prometheus prometheus/kube-prometheus-stack --version 54.1.0 --name ### scaleops helm install --create-namespace -n scaleops-system --repo https://registry.scaleops.com/charts/ --username scaleops --password ${SCALEOPS_TOKEN} --set scaleopsToken=${SCALEOPS_TOKEN} --set clusterName=$(kubectl config current-context) scaleops scaleops -k apply -f config/scaleops/CustomOwnerGrouping.yaml +kubectl apply -f config/scaleops/CustomOwnerGrouping.yaml #### Scaleops Dashboard Port Forward -k port-forward scaleops-dashboard-pod-xxxx 8080 +kubectl port-forward scaleops-dashboard-pod-xxxx 8080 ## Run Koperator on Kind ### koperator - Run as container on Kind (Skip if you want to run koperator locally) @@ -48,7 +48,7 @@ make run ## Initialize Zookeeper and Kafka Cluster kubectl apply -f config/samples/simplezookeeper.yaml -n zookeeper -k apply -f config/samples/simplekafkacluster.yaml -n kafka +kubectl apply -f config/samples/simplekafkacluster.yaml -n kafka # NOTES for running koperator locally: # @@ -70,4 +70,4 @@ k apply -f config/samples/simplekafkacluster.yaml -n kafka # 172.18.0.9 kafka-1.kafka.svc.cluster.local # 172.18.0.10 kafka-2.kafka.svc.cluster.local # 172.18.0.11 kafka-all-broker.kafka.svc.cluster.local -# 172.18.0.8 kafka-cruisecontrol-svc.kafka.svc.cluster.local \ No newline at end of file +# 172.18.0.8 kafka-cruisecontrol-svc.kafka.svc.cluster.local From 25792bde8377aee954456f72e4ab5ad12822593e Mon Sep 17 00:00:00 2001 From: Cameron Wright Date: Tue, 5 May 2026 17:30:08 -0400 Subject: [PATCH 09/25] adding checks to make run more resilient while also commenting out manual steps --- run-local.sh | 44 ++++++++++++++++++++++++++------------------ 1 file changed, 26 insertions(+), 18 deletions(-) diff --git a/run-local.sh b/run-local.sh index 40af15c53..674d282c5 100755 --- a/run-local.sh +++ b/run-local.sh @@ -1,4 +1,12 @@ #!/bin/bash +set -e + +## Prerequisite checks +if [ -z "${SCALEOPS_TOKEN}" ]; then + echo "Error: SCALEOPS_TOKEN environment variable is not set" + exit 1 +fi + ## Create kind cluster kind delete clusters kind-kafka kind create cluster --config=./tests/e2e/platforms/kind/kind_config.yaml --name=kind-kafka @@ -11,40 +19,40 @@ kind load docker-image koperator_e2e_test:latest --name kind-kafka ## Install Helm Charts and CRDs ### project contour -helm repo add contour https://projectcontour.github.io/helm-charts/ -helm install contour contour/contour --namespace projectcontour --create-namespace +helm repo add contour https://projectcontour.github.io/helm-charts/ || true +helm upgrade --install contour contour/contour --namespace projectcontour --create-namespace ### cert-manager -helm repo add jetstack https://charts.jetstack.io --force-update -helm install cert-manager jetstack/cert-manager --namespace cert-manager --create-namespace --version v1.16.2 --set crds.enabled=true +helm repo add jetstack https://charts.jetstack.io --force-update || true +helm upgrade --install cert-manager jetstack/cert-manager --namespace cert-manager --create-namespace --version v1.16.2 --set crds.enabled=true ### zookeeper-operator -helm repo add pravega https://charts.pravega.io -helm install zookeeper-operator pravega/zookeeper-operator --version 0.2.15 --namespace zookeeper --create-namespace --set crd.create=true +helm repo add pravega https://charts.pravega.io || true +helm upgrade --install zookeeper-operator pravega/zookeeper-operator --version 0.2.15 --namespace zookeeper --create-namespace --set crd.create=true ### prometheus -helm repo add prometheus https://prometheus-community.github.io/helm-charts -helm install prometheus prometheus/kube-prometheus-stack --version 54.1.0 --namespace prometheus --create-namespace +helm repo add prometheus https://prometheus-community.github.io/helm-charts || true +helm upgrade --install prometheus prometheus/kube-prometheus-stack --version 54.1.0 --namespace prometheus --create-namespace ### scaleops -helm install --create-namespace -n scaleops-system --repo https://registry.scaleops.com/charts/ --username scaleops --password ${SCALEOPS_TOKEN} --set scaleopsToken=${SCALEOPS_TOKEN} --set clusterName=$(kubectl config current-context) scaleops scaleops +helm upgrade --install --create-namespace -n scaleops-system --repo https://registry.scaleops.com/charts/ --username scaleops --password ${SCALEOPS_TOKEN} --set scaleopsToken=${SCALEOPS_TOKEN} --set clusterName=$(kubectl config current-context) scaleops scaleops kubectl apply -f config/scaleops/CustomOwnerGrouping.yaml +kubectl apply -f config/scaleops/KafkaBrokersPolicy.yaml #### Scaleops Dashboard Port Forward -kubectl port-forward scaleops-dashboard-pod-xxxx 8080 - -## Run Koperator on Kind -### koperator - Run as container on Kind (Skip if you want to run koperator locally) -helm install kafka-operator charts/kafka-operator --set operator.image.repository=koperator_e2e_test --set operator.image.tag=latest --namespace kafka --create-namespace +# kubectl port-forward 8080 -n scaleops-system +# (find pod name with: kubectl get pods -n scaleops-system) ## Run Koperator Locally ### Start Cloud Provider Kind in the background to enable LoadBalancer services for local koperator -sudo ~/go/bin/cloud-provider-kind & +# sudo ~/go/bin/cloud-provider-kind +# (run this manually in a separate terminal before starting koperator) ### Start Local Koperator instance: -kubectl create namespace kafka +kubectl create namespace kafka || true kubectl ens kafka make install -make run +# Run koperator locally in a separate terminal: +# go run ./main.go --metrics-addr=:8090 --disable-webhooks ## Initialize Zookeeper and Kafka Cluster kubectl apply -f config/samples/simplezookeeper.yaml -n zookeeper @@ -61,7 +69,7 @@ kubectl apply -f config/samples/simplekafkacluster.yaml -n kafka # This is necessary for local koperator access. If you don't want to run it, # you can port-forward the services instead. # -# Finally, you'll need to update your /etc/hosts file to direct request from +# Finally, you'll need to update your /etc/hosts file to direct request from # Koperator to the LoadBalancer IPs. You can find the LoadBalancer IPs by running: # kubectl get svc -n kafka # From 221a175fef545479f7ceeb5332f1b45ff8404ff4 Mon Sep 17 00:00:00 2001 From: Daniel Vaseekaran Date: Wed, 27 May 2026 13:41:13 -0400 Subject: [PATCH 10/25] Adding Pod Affinity Sync --- api/v1beta1/kafkacluster_types.go | 8 +++++++- charts/kafka-operator/crds/kafkaclusters.yaml | 7 +++++++ config/base/crds/kafka.banzaicloud.io_kafkaclusters.yaml | 7 +++++++ pkg/resources/kafka/kafka.go | 9 ++++++++- pkg/resources/kafka/kafka_test.go | 3 +++ 5 files changed, 32 insertions(+), 2 deletions(-) diff --git a/api/v1beta1/kafkacluster_types.go b/api/v1beta1/kafkacluster_types.go index 5fa3d22e9..39c0e1871 100644 --- a/api/v1beta1/kafkacluster_types.go +++ b/api/v1beta1/kafkacluster_types.go @@ -165,7 +165,13 @@ type KafkaClusterSpec struct { // a kafkaCluster instance on a Kind Cluster. // +kubebuilder:default=false // +optional - DebugEnabled bool `json:"debugEnabled"` + DebugEnabled bool `json:"debugEnabled"` + // Allows ScaleOps to manage Memory and CPU Resource Requests for Kafka Broker Pods. + // This Disables CPU and Memory request reconciliation from the desired state defined in + // the KafkaCluster to the current state in the Kubernetes Cluster + // +kubebuilder:default=false + // +optional + ScaleOpsEnabled bool `json:"scaleOpsEnabled"` ListenersConfig ListenersConfig `json:"listenersConfig"` // Custom ports to expose in the container. Example use case: a custom kafka distribution, that includes an integrated metrics api endpoint AdditionalPorts []corev1.ContainerPort `json:"additionalPorts,omitempty"` diff --git a/charts/kafka-operator/crds/kafkaclusters.yaml b/charts/kafka-operator/crds/kafkaclusters.yaml index 3e14a374d..e46947c03 100644 --- a/charts/kafka-operator/crds/kafkaclusters.yaml +++ b/charts/kafka-operator/crds/kafkaclusters.yaml @@ -23743,6 +23743,13 @@ spec: required: - failureThreshold type: object + scaleOpsEnabled: + default: false + description: |- + Allows ScaleOps to manage Memory and CPU Resource Requests for Kafka Broker Pods. + This Disables CPU and Memory request reconciliation from the desired state defined in + the KafkaCluster to the current state in the Kubernetes Cluster + type: boolean taintedBrokersSelector: description: Selector for broker pods that need to be recycled/reconciled properties: diff --git a/config/base/crds/kafka.banzaicloud.io_kafkaclusters.yaml b/config/base/crds/kafka.banzaicloud.io_kafkaclusters.yaml index 3e14a374d..e46947c03 100644 --- a/config/base/crds/kafka.banzaicloud.io_kafkaclusters.yaml +++ b/config/base/crds/kafka.banzaicloud.io_kafkaclusters.yaml @@ -23743,6 +23743,13 @@ spec: required: - failureThreshold type: object + scaleOpsEnabled: + default: false + description: |- + Allows ScaleOps to manage Memory and CPU Resource Requests for Kafka Broker Pods. + This Disables CPU and Memory request reconciliation from the desired state defined in + the KafkaCluster to the current state in the Kubernetes Cluster + type: boolean taintedBrokersSelector: description: Selector for broker pods that need to be recycled/reconciled properties: diff --git a/pkg/resources/kafka/kafka.go b/pkg/resources/kafka/kafka.go index 9e9a2c00e..20d0b89b2 100644 --- a/pkg/resources/kafka/kafka.go +++ b/pkg/resources/kafka/kafka.go @@ -941,6 +941,11 @@ func (r *Reconciler) updateStatusWithDockerImageAndVersion(brokerId int32, broke func syncResourceRequests(desiredPod, currentPod *corev1.Pod) { syncContainerResourceRequests(desiredPod.Spec.Containers, currentPod.Spec.Containers) syncContainerResourceRequests(desiredPod.Spec.InitContainers, currentPod.Spec.InitContainers) + syncPodAffinities(desiredPod, currentPod) +} + +func syncPodAffinities(desiredPod, currentPod *corev1.Pod) { + panic("unimplemented") } func syncContainerResourceRequests(desired, current []corev1.Container) { @@ -984,7 +989,9 @@ func (r *Reconciler) handleRollingUpgrade(log logr.Logger, desiredPod, currentPo desiredPod.Spec.Tolerations = uniqueTolerations } // Ignore CPU/memory request diffs — changing requests does not require a pod restart. - syncResourceRequests(desiredPod, currentPod) + if r.KafkaCluster.Spec.ScaleOpsEnabled { + syncResourceRequests(desiredPod, currentPod) + } // Check if the resource actually updated or if labels match TaintedBrokersSelector patchResult, err := patch.DefaultPatchMaker.Calculate(currentPod, desiredPod) switch { diff --git a/pkg/resources/kafka/kafka_test.go b/pkg/resources/kafka/kafka_test.go index 4ca517e6f..3b9e09120 100644 --- a/pkg/resources/kafka/kafka_test.go +++ b/pkg/resources/kafka/kafka_test.go @@ -1986,3 +1986,6 @@ func TestGetBrokerAzMap(t *testing.T) { }) } } + +func TestScaleOps(t. *testing.T) { + \ No newline at end of file From 390e0dfc906a560290a6a8d8fe5d46196089cafa Mon Sep 17 00:00:00 2001 From: Daniel Vaseekaran Date: Mon, 1 Jun 2026 13:43:34 -0400 Subject: [PATCH 11/25] Local Run Improvements --- run-local.sh | 145 +++++++++++++++++++++++++++++++++++---------------- 1 file changed, 99 insertions(+), 46 deletions(-) diff --git a/run-local.sh b/run-local.sh index 4c241f245..43dac7de2 100755 --- a/run-local.sh +++ b/run-local.sh @@ -1,71 +1,124 @@ #!/bin/bash +set -m # enable job control so fg works + +## PREREQUISITES: +### 1. Install Kind: https://kind.sigs.k8s.io/docs/user/quick-start/ +### 2. Start Docker Daemon and ensure it's running +### 3. If using SCALEOPS, set SCALEOPS_TOKEN env variable with your ScaleOps API token +### 4. Cloud Provider KIND is required to enable LoadBalancer services on Kind (For Local Koperator Degugging). + +## Usage: +## ./run-local.sh [--local] [--scaleops] +## +## --local Run koperator as a local process instead of as a container on Kind. +## Starts cloud-provider-kind and runs `make install && make run`. +## --scaleops Install the ScaleOps helm chart. Requires SCALEOPS_TOKEN to be set. + + +# NOTES for running koperator locally (--local flag): +# +# Make sure to set `debugEnabled: true` in your KafkaCluster spec. This will +# create LoadBalancer services for the Kafka and Cruise Control pods, allowing +# your local koperator to access services running on the Kind cluster. +# +# Cloud Provider KIND is required to enable LoadBalancer services on Kind. +# If you don't want to run it, you can port-forward the services instead. +# The script does this for you if you use the --local flag. +# +# Finally, you'll need to update your /etc/hosts file to direct requests from +# Koperator to the LoadBalancer IPs. You can find the LoadBalancer IPs by running: +# kubectl get svc -n kafka +# +# Your /etc/hosts entries should look something like this: +# 172.18.0.7 kafka-0.kafka.svc.cluster.local +# 172.18.0.9 kafka-1.kafka.svc.cluster.local +# 172.18.0.10 kafka-2.kafka.svc.cluster.local +# 172.18.0.11 kafka-all-broker.kafka.svc.cluster.local +# 172.18.0.8 kafka-cruisecontrol-svc.kafka.svc.cluster.local +# +# DEBUGGING Koperator Locally +# If you need to debug your local koperator, you can find the logs in /tmp/koperator.log. +# Additionally, you can attach a debugger to the koperator process using VSCODE. Instead of running `make run`, +# start koperator as a Go application with debug enabled from VSCode, and set breakpoints as needed. +# This can be done by opening main.go in VSCode, going to the DEBUG Tab and cliking Run and Debug. + +LOCAL=false +SCALEOPS=false + +while [[ $# -gt 0 ]]; do + case $1 in + --local) LOCAL=true; shift ;; + --scaleops) SCALEOPS=true; shift ;; + *) echo "Unknown flag: $1"; exit 1 ;; + esac +done + +if $SCALEOPS && [[ -z "${SCALEOPS_TOKEN}" ]]; then + echo "Error: --scaleops requires SCALEOPS_TOKEN to be set" + exit 1 +fi + ## Create kind cluster kind delete clusters kind-kafka kind create cluster --config=./tests/e2e/platforms/kind/kind_config.yaml --name=kind-kafka ## Build/Load images (Kafka 3.7.0) kind load docker-image docker-pipeline-upstream-mirror.dr-uw2.adobeitc.com/adobe/kafka:2.13-3.7.0 --name kind-kafka -### Skip if you want to run koperator locally -docker build . -t koperator_e2e_test -kind load docker-image koperator_e2e_test:latest --name kind-kafka + +if ! $LOCAL; then + docker build . -t koperator_e2e_test + kind load docker-image koperator_e2e_test:latest --name kind-kafka +fi ## Install Helm Charts and CRDs ### project contour -helm repo add contour https://projectcontour.github.io/helm-charts/ -helm install contour contour/contour --namespace projectcontour --create-namespace +helm repo add contour https://projectcontour.github.io/helm-charts/ --force-update +helm upgrade --install contour contour/contour --namespace projectcontour --create-namespace ### cert-manager helm repo add jetstack https://charts.jetstack.io --force-update -helm install cert-manager jetstack/cert-manager --namespace cert-manager --create-namespace --version v1.16.2 --set crds.enabled=true +helm upgrade --install cert-manager jetstack/cert-manager --namespace cert-manager --create-namespace --version v1.16.2 --set crds.enabled=true ### zookeeper-operator -helm repo add pravega https://charts.pravega.io -helm install zookeeper-operator pravega/zookeeper-operator --version 0.2.15 --namespace zookeeper --create-namespace --set crd.create=true +helm repo add pravega https://charts.pravega.io --force-update +helm upgrade --install zookeeper-operator pravega/zookeeper-operator --version 0.2.15 --namespace zookeeper --create-namespace --set crd.create=true ### prometheus -helm repo add prometheus https://prometheus-community.github.io/helm-charts -helm install prometheus prometheus/kube-prometheus-stack --version 54.1.0 --namespace prometheus --create-namespace +helm repo add prometheus https://prometheus-community.github.io/helm-charts --force-update +helm upgrade --install prometheus prometheus/kube-prometheus-stack --version 54.1.0 --namespace prometheus --create-namespace ### scaleops -helm install --create-namespace -n scaleops-system --repo https://registry.scaleops.com/charts/ --username scaleops --password ${SCALEOPS_TOKEN} --set scaleopsToken=${SCALEOPS_TOKEN} --set clusterName=$(kubectl config current-context) scaleops scaleops -k apply -f config/scaleops/CustomOwnerGrouping.yaml - -## Run Koperator on Kind -### koperator - Run as container on Kind (Skip if you want to run koperator locally) -helm install kafka-operator charts/kafka-operator --set operator.image.repository=koperator_e2e_test --set operator.image.tag=latest --namespace kafka --create-namespace +if $SCALEOPS; then + helm upgrade --install --create-namespace -n scaleops-system \ + --repo https://registry.scaleops.com/charts/ \ + --username scaleops --password "${SCALEOPS_TOKEN}" \ + --set scaleopsToken="${SCALEOPS_TOKEN}" \ + --set clusterName="$(kubectl config current-context)" \ + scaleops scaleops + kubectl apply -f config/scaleops/CustomOwnerGrouping.yaml +fi -## Run Koperator Locally -### Start Cloud Provider Kind in the background to enable LoadBalancer services for local koperator -sudo ~/go/bin/cloud-provider-kind & +## Run Koperator +if $LOCAL; then + ## Start Cloud Provider Kind in the background to enable LoadBalancer services + pgrep -f cloud-provider-kind &>/dev/null || sudo ~/go/bin/cloud-provider-kind > /tmp/cloudproviderkind.log 2>&1 & -### Start Local Koperator instance: -kubectl create namespace kafka -kubectl ens kafka -make install -make run + kubectl get namespace kafka &>/dev/null || kubectl create namespace kafka + kubectl config set-context --current --namespace=kafka + make install + make run > /tmp/koperator.log 2>&1 & +else + helm upgrade --install kafka-operator charts/kafka-operator \ + --set operator.image.repository=koperator_e2e_test \ + --set operator.image.tag=latest \ + --set prometheusMetrics.enabled=false \ + --namespace kafka --create-namespace +fi ## Initialize Zookeeper and Kafka Cluster kubectl apply -f config/samples/simplezookeeper.yaml -n zookeeper -k apply -f config/samples/simplekafkacluster.yaml -n kafka -# NOTES for running koperator locally: -# -# If you want to run koperator locally, make sure to set `debugEnabled: true` -# in your KafkaCluster spec. This will create LoadBalancer services for the -# Kafka and Cruise Control pods, allowing your local koperator to access -# services running on the Kind cluster. -# -# Cloud Provider KIND is required to enable LoadBalancer services on Kind. -# This is necessary for local koperator access. If you don't want to run it, -# you can port-forward the services instead. -# -# Finally, you'll need to update your /etc/hosts file to direct request from -# Koperator to the LoadBalancer IPs. You can find the LoadBalancer IPs by running: -# kubectl get svc -n kafka -# -# Your /etc/hosts entries should look something like this: -# 172.18.0.7 kafka-0.kafka.svc.cluster.local -# 172.18.0.9 kafka-1.kafka.svc.cluster.local -# 172.18.0.10 kafka-2.kafka.svc.cluster.local -# 172.18.0.11 kafka-all-broker.kafka.svc.cluster.local -# 172.18.0.8 kafka-cruisecontrol-svc.kafka.svc.cluster.local \ No newline at end of file +kubectl wait --for=condition=Ready pod -l app.kubernetes.io/name=kafka-operator -n kafka --timeout=120s +sleep 5 + +kubectl apply -f config/samples/simplekafkacluster.yaml -n kafka From d9681100ccb24440eaf9fccaf81a199d027dd43a Mon Sep 17 00:00:00 2001 From: Daniel Vaseekaran Date: Mon, 1 Jun 2026 14:07:46 -0400 Subject: [PATCH 12/25] Local Run Improvements --- run-local.sh | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/run-local.sh b/run-local.sh index 43dac7de2..e81192b9c 100755 --- a/run-local.sh +++ b/run-local.sh @@ -101,12 +101,12 @@ fi ## Run Koperator if $LOCAL; then ## Start Cloud Provider Kind in the background to enable LoadBalancer services - pgrep -f cloud-provider-kind &>/dev/null || sudo ~/go/bin/cloud-provider-kind > /tmp/cloudproviderkind.log 2>&1 & + pgrep -f cloud-provider-kind &>/dev/null || cloud-provider-kind > /tmp/cloudproviderkind.log 2>&1 & kubectl get namespace kafka &>/dev/null || kubectl create namespace kafka kubectl config set-context --current --namespace=kafka make install - make run > /tmp/koperator.log 2>&1 & + else helm upgrade --install kafka-operator charts/kafka-operator \ --set operator.image.repository=koperator_e2e_test \ @@ -122,3 +122,8 @@ kubectl wait --for=condition=Ready pod -l app.kubernetes.io/name=kafka-operator sleep 5 kubectl apply -f config/samples/simplekafkacluster.yaml -n kafka + +## Start Local Koperator +if $LOCAL; then + make run +fi \ No newline at end of file From 1174455d3260c533225763255255eb61aaa2f07a Mon Sep 17 00:00:00 2001 From: Daniel Vaseekaran Date: Tue, 2 Jun 2026 15:34:07 -0400 Subject: [PATCH 13/25] Added test case for LoadBalancer Service --- pkg/resources/kafka/service_test.go | 232 ++++++++++++++++++++++++++++ 1 file changed, 232 insertions(+) create mode 100644 pkg/resources/kafka/service_test.go diff --git a/pkg/resources/kafka/service_test.go b/pkg/resources/kafka/service_test.go new file mode 100644 index 000000000..d88381e13 --- /dev/null +++ b/pkg/resources/kafka/service_test.go @@ -0,0 +1,232 @@ +// Copyright © 2023 Cisco Systems, Inc. and/or its affiliates +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package kafka + +import ( + "fmt" + "testing" + + "github.com/stretchr/testify/require" + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/util/intstr" + + "go.uber.org/mock/gomock" + + apiutil "github.com/banzaicloud/koperator/api/util" + "github.com/banzaicloud/koperator/api/v1beta1" + banzaiv1beta1 "github.com/banzaicloud/koperator/api/v1beta1" + "github.com/banzaicloud/koperator/pkg/resources" + mocks "github.com/banzaicloud/koperator/pkg/resources/kafka/mocks" + "github.com/banzaicloud/koperator/pkg/util" +) + +func TestService(t *testing.T) { + testCases := []struct { + testName string + r *Reconciler + expectedService *corev1.Service + }{ + { + testName: "Basic Internal And External Service", + r: &Reconciler{ + Reconciler: resources.Reconciler{ + KafkaCluster: &v1beta1.KafkaCluster{ + ObjectMeta: metav1.ObjectMeta{ + Name: "kafka", + Namespace: "kafka", + }, + Spec: v1beta1.KafkaClusterSpec{ + DebugEnabled: false, + KRaftMode: false, + ListenersConfig: v1beta1.ListenersConfig{ + InternalListeners: []banzaiv1beta1.InternalListenerConfig{ + { + CommonListenerSpec: v1beta1.CommonListenerSpec{ + Name: "internal", + ContainerPort: 29092, + Type: "plaintext", + UsedForInnerBrokerCommunication: true, + }, + }, + }, + ExternalListeners: []banzaiv1beta1.ExternalListenerConfig{ + { + CommonListenerSpec: v1beta1.CommonListenerSpec{ + Name: "plaintext", + ContainerPort: 29094, + Type: "plaintext", + UsedForInnerBrokerCommunication: false, + }, + AccessMethod: corev1.ServiceTypeLoadBalancer, + }, + }, + }, + }, + }, + }, + }, + expectedService: &corev1.Service{ + ObjectMeta: metav1.ObjectMeta{ + Name: "kafka-1", + Namespace: "kafka", + Labels: map[string]string{"app": "kafka", "brokerId": "1", "kafka_cr": "kafka"}, + Annotations: map[string]string{}, + OwnerReferences: []metav1.OwnerReference{ + { + APIVersion: "", + Kind: "", + Name: "kafka", + UID: "", + Controller: util.BoolPointer(true), + BlockOwnerDeletion: util.BoolPointer(true), + }, + }, + }, + Spec: corev1.ServiceSpec{ + Type: corev1.ServiceTypeClusterIP, + SessionAffinity: corev1.ServiceAffinityNone, + Selector: apiutil.MergeLabels(apiutil.LabelsForKafka("kafka"), map[string]string{v1beta1.BrokerIdLabelKey: fmt.Sprintf("1")}), + Ports: []corev1.ServicePort{ + { + Name: "tcp-internal", + Protocol: "TCP", + Port: 29092, + TargetPort: intstr.FromInt(29092), + NodePort: 0, + }, + { + Name: "tcp-plaintext", + Protocol: "TCP", + Port: 29094, + TargetPort: intstr.FromInt(29094), + NodePort: 0, + }, + { + Name: "metrics", + Protocol: "TCP", + Port: 9020, + TargetPort: intstr.FromInt(9020), + NodePort: 0, + }, + }, + ClusterIP: "", + PublishNotReadyAddresses: false, + }, + }, + }, + { + testName: "Basic Internal And External Service", + r: &Reconciler{ + Reconciler: resources.Reconciler{ + KafkaCluster: &v1beta1.KafkaCluster{ + ObjectMeta: metav1.ObjectMeta{ + Name: "kafka", + Namespace: "kafka", + }, + Spec: v1beta1.KafkaClusterSpec{ + DebugEnabled: true, + KRaftMode: false, + ListenersConfig: v1beta1.ListenersConfig{ + InternalListeners: []banzaiv1beta1.InternalListenerConfig{ + { + CommonListenerSpec: v1beta1.CommonListenerSpec{ + Name: "internal", + ContainerPort: 29092, + Type: "plaintext", + UsedForInnerBrokerCommunication: true, + }, + }, + }, + ExternalListeners: []banzaiv1beta1.ExternalListenerConfig{ + { + CommonListenerSpec: v1beta1.CommonListenerSpec{ + Name: "plaintext", + ContainerPort: 29094, + Type: "plaintext", + UsedForInnerBrokerCommunication: false, + }, + AccessMethod: corev1.ServiceTypeLoadBalancer, + }, + }, + }, + }, + }, + }, + }, + expectedService: &corev1.Service{ + ObjectMeta: metav1.ObjectMeta{ + Name: "kafka-1", + Namespace: "kafka", + Labels: map[string]string{"app": "kafka", "brokerId": "1", "kafka_cr": "kafka"}, + Annotations: map[string]string{}, + OwnerReferences: []metav1.OwnerReference{ + { + APIVersion: "", + Kind: "", + Name: "kafka", + UID: "", + Controller: util.BoolPointer(true), + BlockOwnerDeletion: util.BoolPointer(true), + }, + }, + }, + Spec: corev1.ServiceSpec{ + Type: corev1.ServiceTypeLoadBalancer, + SessionAffinity: corev1.ServiceAffinityNone, + Selector: apiutil.MergeLabels(apiutil.LabelsForKafka("kafka"), map[string]string{v1beta1.BrokerIdLabelKey: fmt.Sprintf("1")}), + Ports: []corev1.ServicePort{ + { + Name: "tcp-internal", + Protocol: "TCP", + Port: 29092, + TargetPort: intstr.FromInt(29092), + NodePort: 0, + }, + { + Name: "tcp-plaintext", + Protocol: "TCP", + Port: 29094, + TargetPort: intstr.FromInt(29094), + NodePort: 0, + }, + { + Name: "metrics", + Protocol: "TCP", + Port: 9020, + TargetPort: intstr.FromInt(9020), + NodePort: 0, + }, + }, + ClusterIP: "", + PublishNotReadyAddresses: false, + }, + }, + }, + } + mockCtrl := gomock.NewController(t) + + for _, test := range testCases { + t.Run(test.testName, func(t *testing.T) { + mockClient := mocks.NewMockClient(mockCtrl) + mockClient.EXPECT().Get(gomock.Any(), gomock.Any(), gomock.Any()).Return(nil).AnyTimes() + r := test.r + + actualService := r.service(1, nil) + + require.Equal(t, test.expectedService, actualService) + }) + } +} From 5b932a0ce286da3aabf63abea5df5129772e120e Mon Sep 17 00:00:00 2001 From: Daniel Vaseekaran Date: Tue, 2 Jun 2026 15:41:00 -0400 Subject: [PATCH 14/25] NIT: NEW LINES --- config/samples/simpleZookeeper.yaml | 3 ++- run-local.sh | 2 +- tests/e2e/platforms/kind/kind_config.yaml | 3 ++- 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/config/samples/simpleZookeeper.yaml b/config/samples/simpleZookeeper.yaml index 82123498c..6bf70aa9c 100644 --- a/config/samples/simpleZookeeper.yaml +++ b/config/samples/simpleZookeeper.yaml @@ -6,4 +6,5 @@ metadata: spec: replicas: 3 persistence: - reclaimPolicy: Delete \ No newline at end of file + reclaimPolicy: Delete + diff --git a/run-local.sh b/run-local.sh index e81192b9c..15b99638e 100755 --- a/run-local.sh +++ b/run-local.sh @@ -126,4 +126,4 @@ kubectl apply -f config/samples/simplekafkacluster.yaml -n kafka ## Start Local Koperator if $LOCAL; then make run -fi \ No newline at end of file +fi diff --git a/tests/e2e/platforms/kind/kind_config.yaml b/tests/e2e/platforms/kind/kind_config.yaml index 6515c31f4..15a139f3f 100644 --- a/tests/e2e/platforms/kind/kind_config.yaml +++ b/tests/e2e/platforms/kind/kind_config.yaml @@ -39,4 +39,5 @@ nodes: listenAddress: "0.0.0.0" - containerPort: 443 hostPort: 443 - listenAddress: "0.0.0.0" \ No newline at end of file + listenAddress: "0.0.0.0" + \ No newline at end of file From 34ef3dd4c4ef0646e7a9c286d04d57f87b9a7c2e Mon Sep 17 00:00:00 2001 From: Daniel Vaseekaran Date: Wed, 3 Jun 2026 12:36:18 -0400 Subject: [PATCH 15/25] Add kube-context and cloud-provider-kind checks --- run-local.sh | 31 ++++++++++++++++++++++++------- 1 file changed, 24 insertions(+), 7 deletions(-) diff --git a/run-local.sh b/run-local.sh index 15b99638e..171ce2c57 100755 --- a/run-local.sh +++ b/run-local.sh @@ -1,11 +1,10 @@ #!/bin/bash -set -m # enable job control so fg works ## PREREQUISITES: ### 1. Install Kind: https://kind.sigs.k8s.io/docs/user/quick-start/ ### 2. Start Docker Daemon and ensure it's running ### 3. If using SCALEOPS, set SCALEOPS_TOKEN env variable with your ScaleOps API token -### 4. Cloud Provider KIND is required to enable LoadBalancer services on Kind (For Local Koperator Degugging). +### 4. Install and Start cloud-provider-kind to enable LoadBalancer services on Kind (Required for Local Debugging). https://github.com/kubernetes-sigs/cloud-provider-kind ## Usage: ## ./run-local.sh [--local] [--scaleops] @@ -15,7 +14,7 @@ set -m # enable job control so fg works ## --scaleops Install the ScaleOps helm chart. Requires SCALEOPS_TOKEN to be set. -# NOTES for running koperator locally (--local flag): +# IMPORTANT NOTES for running koperator locally (--local flag): # # Make sure to set `debugEnabled: true` in your KafkaCluster spec. This will # create LoadBalancer services for the Kafka and Cruise Control pods, allowing @@ -58,10 +57,23 @@ if $SCALEOPS && [[ -z "${SCALEOPS_TOKEN}" ]]; then exit 1 fi +## Check if Docker daemon is running +if ! docker ps &>/dev/null; then + echo "Error: Docker daemon is not running. Please start Docker and try again." + exit 1 +fi + ## Create kind cluster kind delete clusters kind-kafka kind create cluster --config=./tests/e2e/platforms/kind/kind_config.yaml --name=kind-kafka +## Validate kubectl context is set to kind +CURRENT_CONTEXT=$(kubectl config current-context) +if [[ ! "$CURRENT_CONTEXT" =~ kind ]]; then + echo "Error: kubectl context is not set to a kind cluster. Current context: $CURRENT_CONTEXT" + exit 1 +fi + ## Build/Load images (Kafka 3.7.0) kind load docker-image docker-pipeline-upstream-mirror.dr-uw2.adobeitc.com/adobe/kafka:2.13-3.7.0 --name kind-kafka @@ -100,8 +112,11 @@ fi ## Run Koperator if $LOCAL; then - ## Start Cloud Provider Kind in the background to enable LoadBalancer services - pgrep -f cloud-provider-kind &>/dev/null || cloud-provider-kind > /tmp/cloudproviderkind.log 2>&1 & + ## Check if cloud-provider-kind started successfully + if ! pgrep -f cloud-provider-kind &>/dev/null; then + echo "Warning: cloud-provider-kind failed to start. LoadBalancer services may not work properly." + echo "Check /tmp/cloudproviderkind.log for details." + fi kubectl get namespace kafka &>/dev/null || kubectl create namespace kafka kubectl config set-context --current --namespace=kafka @@ -118,8 +133,10 @@ fi ## Initialize Zookeeper and Kafka Cluster kubectl apply -f config/samples/simplezookeeper.yaml -n zookeeper -kubectl wait --for=condition=Ready pod -l app.kubernetes.io/name=kafka-operator -n kafka --timeout=120s -sleep 5 +if ! $LOCAL; then + kubectl wait --for=condition=Ready pod -l app.kubernetes.io/name=kafka-operator -n kafka --timeout=120s + sleep 5 +fi kubectl apply -f config/samples/simplekafkacluster.yaml -n kafka From 46f8a455ee74c725435ebfee4a2fb34319d1a52c Mon Sep 17 00:00:00 2001 From: Daniel Vaseekaran Date: Wed, 3 Jun 2026 14:45:56 -0400 Subject: [PATCH 16/25] Clean up imports --- pkg/resources/kafka/service_test.go | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/pkg/resources/kafka/service_test.go b/pkg/resources/kafka/service_test.go index d88381e13..cdb4e6650 100644 --- a/pkg/resources/kafka/service_test.go +++ b/pkg/resources/kafka/service_test.go @@ -27,7 +27,6 @@ import ( apiutil "github.com/banzaicloud/koperator/api/util" "github.com/banzaicloud/koperator/api/v1beta1" - banzaiv1beta1 "github.com/banzaicloud/koperator/api/v1beta1" "github.com/banzaicloud/koperator/pkg/resources" mocks "github.com/banzaicloud/koperator/pkg/resources/kafka/mocks" "github.com/banzaicloud/koperator/pkg/util" @@ -52,7 +51,7 @@ func TestService(t *testing.T) { DebugEnabled: false, KRaftMode: false, ListenersConfig: v1beta1.ListenersConfig{ - InternalListeners: []banzaiv1beta1.InternalListenerConfig{ + InternalListeners: []v1beta1.InternalListenerConfig{ { CommonListenerSpec: v1beta1.CommonListenerSpec{ Name: "internal", @@ -62,7 +61,7 @@ func TestService(t *testing.T) { }, }, }, - ExternalListeners: []banzaiv1beta1.ExternalListenerConfig{ + ExternalListeners: []v1beta1.ExternalListenerConfig{ { CommonListenerSpec: v1beta1.CommonListenerSpec{ Name: "plaintext", @@ -140,7 +139,7 @@ func TestService(t *testing.T) { DebugEnabled: true, KRaftMode: false, ListenersConfig: v1beta1.ListenersConfig{ - InternalListeners: []banzaiv1beta1.InternalListenerConfig{ + InternalListeners: []v1beta1.InternalListenerConfig{ { CommonListenerSpec: v1beta1.CommonListenerSpec{ Name: "internal", @@ -150,7 +149,7 @@ func TestService(t *testing.T) { }, }, }, - ExternalListeners: []banzaiv1beta1.ExternalListenerConfig{ + ExternalListeners: []v1beta1.ExternalListenerConfig{ { CommonListenerSpec: v1beta1.CommonListenerSpec{ Name: "plaintext", From 3556b78e9cdf51f763dd3303e6a8312e7573b9cb Mon Sep 17 00:00:00 2001 From: Daniel Vaseekaran Date: Fri, 5 Jun 2026 12:12:35 -0400 Subject: [PATCH 17/25] Clean up documentation --- run-local.sh | 43 +++++++++++++++++++++++-------------------- 1 file changed, 23 insertions(+), 20 deletions(-) diff --git a/run-local.sh b/run-local.sh index 171ce2c57..a9e5d7cb8 100755 --- a/run-local.sh +++ b/run-local.sh @@ -1,28 +1,30 @@ #!/bin/bash -## PREREQUISITES: -### 1. Install Kind: https://kind.sigs.k8s.io/docs/user/quick-start/ -### 2. Start Docker Daemon and ensure it's running -### 3. If using SCALEOPS, set SCALEOPS_TOKEN env variable with your ScaleOps API token -### 4. Install and Start cloud-provider-kind to enable LoadBalancer services on Kind (Required for Local Debugging). https://github.com/kubernetes-sigs/cloud-provider-kind - -## Usage: -## ./run-local.sh [--local] [--scaleops] -## -## --local Run koperator as a local process instead of as a container on Kind. -## Starts cloud-provider-kind and runs `make install && make run`. -## --scaleops Install the ScaleOps helm chart. Requires SCALEOPS_TOKEN to be set. +## PREREQUISITES +# 1. Install Kind: https://kind.sigs.k8s.io/docs/user/quick-start/ +# 2. Start Docker Daemon and ensure it's running +# 3. If using SCALEOPS, set SCALEOPS_TOKEN env variable with your ScaleOps API token +# 4. Install and Start cloud-provider-kind to enable LoadBalancer services on Kind (Required for Local Debugging). https://github.com/kubernetes-sigs/cloud-provider-kind + +## USAGE +# ./run-local.sh [--local] [--scaleops] +# +# --local Run koperator as a local process instead of as a container on Kind. +# Starts cloud-provider-kind and runs `make install && make run`. +# --scaleops Install the ScaleOps helm chart. Requires SCALEOPS_TOKEN to be set. -# IMPORTANT NOTES for running koperator locally (--local flag): +## IMPORTANT NOTES (for running koperator locally with --local flag) # # Make sure to set `debugEnabled: true` in your KafkaCluster spec. This will # create LoadBalancer services for the Kafka and Cruise Control pods, allowing # your local koperator to access services running on the Kind cluster. # # Cloud Provider KIND is required to enable LoadBalancer services on Kind. -# If you don't want to run it, you can port-forward the services instead. -# The script does this for you if you use the --local flag. +# If you don't want to run it, you can port-forward the services instead. If you are running in local +# mode and notice that your kafka services don't have an external IP, it's because cloud-provider-kind +# either isn't running or has some issue. Local koperator won't be able to communicate +# with kafka pods without these. # # Finally, you'll need to update your /etc/hosts file to direct requests from # Koperator to the LoadBalancer IPs. You can find the LoadBalancer IPs by running: @@ -34,12 +36,13 @@ # 172.18.0.10 kafka-2.kafka.svc.cluster.local # 172.18.0.11 kafka-all-broker.kafka.svc.cluster.local # 172.18.0.8 kafka-cruisecontrol-svc.kafka.svc.cluster.local -# -# DEBUGGING Koperator Locally + + +## ATTACHING A DEBUGGER TO LOCAL KOPERATOR # If you need to debug your local koperator, you can find the logs in /tmp/koperator.log. -# Additionally, you can attach a debugger to the koperator process using VSCODE. Instead of running `make run`, +# Additionally, you can attach a debugger to the koperator process using VSCODE. Instead of running `make run`, # start koperator as a Go application with debug enabled from VSCode, and set breakpoints as needed. -# This can be done by opening main.go in VSCode, going to the DEBUG Tab and cliking Run and Debug. +# This can be done by simply opening main.go in VSCode, going to the DEBUG Tab, and clicking Run and Debug. LOCAL=false SCALEOPS=false @@ -52,7 +55,7 @@ while [[ $# -gt 0 ]]; do esac done -if $SCALEOPS && [[ -z "${SCALEOPS_TOKEN}" ]]; then +if $SCALEOPS && [[ -n "${SCALEOPS_TOKEN}" ]]; then echo "Error: --scaleops requires SCALEOPS_TOKEN to be set" exit 1 fi From 9bb9017016b091ee22265cc9dd51e71676b1f518 Mon Sep 17 00:00:00 2001 From: Daniel Vaseekaran Date: Mon, 8 Jun 2026 14:53:19 -0400 Subject: [PATCH 18/25] Clean up Test Case --- pkg/resources/kafka/service_test.go | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/pkg/resources/kafka/service_test.go b/pkg/resources/kafka/service_test.go index cdb4e6650..f00b7e178 100644 --- a/pkg/resources/kafka/service_test.go +++ b/pkg/resources/kafka/service_test.go @@ -15,7 +15,6 @@ package kafka import ( - "fmt" "testing" "github.com/stretchr/testify/require" @@ -97,7 +96,7 @@ func TestService(t *testing.T) { Spec: corev1.ServiceSpec{ Type: corev1.ServiceTypeClusterIP, SessionAffinity: corev1.ServiceAffinityNone, - Selector: apiutil.MergeLabels(apiutil.LabelsForKafka("kafka"), map[string]string{v1beta1.BrokerIdLabelKey: fmt.Sprintf("1")}), + Selector: apiutil.MergeLabels(apiutil.LabelsForKafka("kafka"), map[string]string{v1beta1.BrokerIdLabelKey: "1"}), Ports: []corev1.ServicePort{ { Name: "tcp-internal", @@ -185,7 +184,7 @@ func TestService(t *testing.T) { Spec: corev1.ServiceSpec{ Type: corev1.ServiceTypeLoadBalancer, SessionAffinity: corev1.ServiceAffinityNone, - Selector: apiutil.MergeLabels(apiutil.LabelsForKafka("kafka"), map[string]string{v1beta1.BrokerIdLabelKey: fmt.Sprintf("1")}), + Selector: apiutil.MergeLabels(apiutil.LabelsForKafka("kafka"), map[string]string{v1beta1.BrokerIdLabelKey: "1"}), Ports: []corev1.ServicePort{ { Name: "tcp-internal", From 3b66b385ae1343a2c69794ea957017c5d3b27435 Mon Sep 17 00:00:00 2001 From: Ha Van Date: Tue, 9 Jun 2026 11:57:02 -0500 Subject: [PATCH 19/25] Update to run-local script --- run-local.sh | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/run-local.sh b/run-local.sh index a9e5d7cb8..264989e67 100755 --- a/run-local.sh +++ b/run-local.sh @@ -47,6 +47,9 @@ LOCAL=false SCALEOPS=false +KOPERATOR_IMAGE=docker.io/library/koperator_e2e_test +CERT_DIR="/etc/webhook/certs" + while [[ $# -gt 0 ]]; do case $1 in --local) LOCAL=true; shift ;; @@ -81,7 +84,7 @@ fi kind load docker-image docker-pipeline-upstream-mirror.dr-uw2.adobeitc.com/adobe/kafka:2.13-3.7.0 --name kind-kafka if ! $LOCAL; then - docker build . -t koperator_e2e_test + docker build . -t $KOPERATOR_IMAGE kind load docker-image koperator_e2e_test:latest --name kind-kafka fi @@ -127,7 +130,7 @@ if $LOCAL; then else helm upgrade --install kafka-operator charts/kafka-operator \ - --set operator.image.repository=koperator_e2e_test \ + --set operator.image.repository=$KOPERATOR_IMAGE \ --set operator.image.tag=latest \ --set prometheusMetrics.enabled=false \ --namespace kafka --create-namespace @@ -145,5 +148,20 @@ kubectl apply -f config/samples/simplekafkacluster.yaml -n kafka ## Start Local Koperator if $LOCAL; then + if [[ ! -f "$CERT_DIR/tls.crt" || ! -f "$CERT_DIR/tls.key" ]]; then + echo "Webhook certs not found, generating self-signed certs..." + mkdir -p "$CERT_DIR" + openssl req -x509 -newkey rsa:4096 \ + -keyout "$CERT_DIR/tls.key" \ + -out "$CERT_DIR/tls.crt" \ + -days 365 -nodes \ + -subj '/CN=localhost' + else + echo "Webhook certs already exist, skipping generation." + fi + + ## TODO: run cloud-provider-kind in the background + ## TODO: print command to modify /etc/hosts for svc + make run fi From f60328eaee18d45fb1732d81dd86039a049b8ad9 Mon Sep 17 00:00:00 2001 From: Daniel Vaseekaran Date: Fri, 12 Jun 2026 13:12:32 -0400 Subject: [PATCH 20/25] Add Affinity Syncing with Scaleops --- pkg/resources/kafka/kafka.go | 8 +- pkg/resources/kafka/kafka_test.go | 3 - pkg/resources/kafka/util.go | 157 +++++++++ pkg/resources/kafka/util_test.go | 563 ++++++++++++++++++++++++++++++ 4 files changed, 726 insertions(+), 5 deletions(-) diff --git a/pkg/resources/kafka/kafka.go b/pkg/resources/kafka/kafka.go index 8fc5e042c..f4deb7795 100644 --- a/pkg/resources/kafka/kafka.go +++ b/pkg/resources/kafka/kafka.go @@ -947,11 +947,12 @@ func (r *Reconciler) updateStatusWithDockerImageAndVersion(brokerId int32, broke func syncResourceRequests(desiredPod, currentPod *corev1.Pod) { syncContainerResourceRequests(desiredPod.Spec.Containers, currentPod.Spec.Containers) syncContainerResourceRequests(desiredPod.Spec.InitContainers, currentPod.Spec.InitContainers) - syncPodAffinities(desiredPod, currentPod) } +// syncPodAffinities syncs ScaleOps-related pod affinities from the current pod to the desired pod. +// This preserves affinities created by ScaleOps to prevent unnecessary pod restarts. func syncPodAffinities(desiredPod, currentPod *corev1.Pod) { - panic("unimplemented") + syncScaleOpsAffinities(desiredPod, currentPod) } func syncContainerResourceRequests(desired, current []corev1.Container) { @@ -997,6 +998,9 @@ func (r *Reconciler) handleRollingUpgrade(log logr.Logger, desiredPod, currentPo // Ignore CPU/memory request diffs — changing requests does not require a pod restart. if r.KafkaCluster.Spec.ScaleOpsEnabled { syncResourceRequests(desiredPod, currentPod) + // If current pod had affinities created by ScaleOps, we need to sync them to desiredPod, + // otherwise they will be removed and cause pod restart + syncPodAffinities(desiredPod, currentPod) } // Check if the resource actually updated or if labels match TaintedBrokersSelector patchResult, err := patch.DefaultPatchMaker.Calculate(currentPod, desiredPod) diff --git a/pkg/resources/kafka/kafka_test.go b/pkg/resources/kafka/kafka_test.go index f636715e5..ad9e6db4b 100644 --- a/pkg/resources/kafka/kafka_test.go +++ b/pkg/resources/kafka/kafka_test.go @@ -1986,6 +1986,3 @@ func TestGetBrokerAzMap(t *testing.T) { }) } } - -func TestScaleOps(t. *testing.T) { - \ No newline at end of file diff --git a/pkg/resources/kafka/util.go b/pkg/resources/kafka/util.go index cfafbae14..7f1d6c077 100644 --- a/pkg/resources/kafka/util.go +++ b/pkg/resources/kafka/util.go @@ -18,9 +18,11 @@ package kafka import ( "encoding/base64" "fmt" + "reflect" "sort" "github.com/google/uuid" + corev1 "k8s.io/api/core/v1" "github.com/banzaicloud/koperator/api/v1beta1" ) @@ -73,3 +75,158 @@ func generateRandomClusterID() string { randomUUID := uuid.New() return base64.URLEncoding.EncodeToString(randomUUID[:]) } + +// syncScaleOpsAffinities syncs all scale ops related affinities from the current pod to the desired pod. +// This includes pod affinities with "scaleops.sh/managed-unevictable" label selector +// and node affinities with "scaleops.sh/node-packing=true" selector. +func syncScaleOpsAffinities(desiredPod, currentPod *corev1.Pod) { + syncScaleOpsPodAffinities(desiredPod, currentPod) + syncScaleOpsNodeAffinities(desiredPod, currentPod) +} + +// syncScaleOpsPodAffinities syncs preferred pod affinities with "scaleops.sh/managed-unevictable" +// label selector from current pod to desired pod. +func syncScaleOpsPodAffinities(desiredPod, currentPod *corev1.Pod) { + if currentPod.Spec.Affinity == nil || currentPod.Spec.Affinity.PodAffinity == nil { + return + } + + currentPodAffinity := currentPod.Spec.Affinity.PodAffinity + + // Filter preferred pod affinities with "scaleops.sh/managed-unevictable" label selector + var scaleOpsPreferredAffinities []corev1.WeightedPodAffinityTerm + if currentPodAffinity.PreferredDuringSchedulingIgnoredDuringExecution != nil { + for _, term := range currentPodAffinity.PreferredDuringSchedulingIgnoredDuringExecution { + if term.PodAffinityTerm.LabelSelector != nil { + hasScaleOpsLabel := false + + // Check MatchExpressions + for _, requirement := range term.PodAffinityTerm.LabelSelector.MatchExpressions { + if requirement.Key == "scaleops.sh/managed-unevictable" { + hasScaleOpsLabel = true + break + } + } + + // Check MatchLabels if not found in MatchExpressions + if !hasScaleOpsLabel { + if _, exists := term.PodAffinityTerm.LabelSelector.MatchLabels["scaleops.sh/managed-unevictable"]; exists { + hasScaleOpsLabel = true + } + } + + if hasScaleOpsLabel { + scaleOpsPreferredAffinities = append(scaleOpsPreferredAffinities, term) + } + } + } + } + + // If we found any scale ops preferred affinities, add them to the desired pod + if len(scaleOpsPreferredAffinities) > 0 { + if desiredPod.Spec.Affinity == nil { + desiredPod.Spec.Affinity = &corev1.Affinity{} + } + if desiredPod.Spec.Affinity.PodAffinity == nil { + desiredPod.Spec.Affinity.PodAffinity = &corev1.PodAffinity{} + } + + // Merge scale ops preferred affinities, avoiding duplicates + existingTerms := desiredPod.Spec.Affinity.PodAffinity.PreferredDuringSchedulingIgnoredDuringExecution + for _, newTerm := range scaleOpsPreferredAffinities { + // Check if this term already exists + found := false + for _, existing := range existingTerms { + if reflect.DeepEqual(existing.PodAffinityTerm, newTerm.PodAffinityTerm) && existing.Weight == newTerm.Weight { + found = true + break + } + } + if !found { + existingTerms = append(existingTerms, newTerm) + } + } + desiredPod.Spec.Affinity.PodAffinity.PreferredDuringSchedulingIgnoredDuringExecution = existingTerms + } +} + +// syncScaleOpsNodeAffinities syncs preferred node affinities with "scaleops.sh/node-packing=true" +// selector from current pod to desired pod. +func syncScaleOpsNodeAffinities(desiredPod, currentPod *corev1.Pod) { + if currentPod.Spec.Affinity == nil || currentPod.Spec.Affinity.NodeAffinity == nil { + return + } + + currentNodeAffinity := currentPod.Spec.Affinity.NodeAffinity + + // Filter preferred node affinities with "scaleops.sh/node-packing=true" selector + var scaleOpsPreferredTerms []corev1.PreferredSchedulingTerm + if currentNodeAffinity.PreferredDuringSchedulingIgnoredDuringExecution != nil { + for _, term := range currentNodeAffinity.PreferredDuringSchedulingIgnoredDuringExecution { + hasScaleOpsNodePacking := false + + // Check MatchExpressions + for _, requirement := range term.Preference.MatchExpressions { + if requirement.Key == "scaleops.sh/node-packing" { + for _, val := range requirement.Values { + if val == "true" { + hasScaleOpsNodePacking = true + break + } + } + if hasScaleOpsNodePacking { + break + } + } + } + + // Check MatchFields if not found in MatchExpressions + if !hasScaleOpsNodePacking { + for _, requirement := range term.Preference.MatchFields { + if requirement.Key == "scaleops.sh/node-packing" { + for _, val := range requirement.Values { + if val == "true" { + hasScaleOpsNodePacking = true + break + } + } + if hasScaleOpsNodePacking { + break + } + } + } + } + + if hasScaleOpsNodePacking { + scaleOpsPreferredTerms = append(scaleOpsPreferredTerms, term) + } + } + } + + // If we found any scale ops node affinities, add them to the desired pod + if len(scaleOpsPreferredTerms) > 0 { + if desiredPod.Spec.Affinity == nil { + desiredPod.Spec.Affinity = &corev1.Affinity{} + } + if desiredPod.Spec.Affinity.NodeAffinity == nil { + desiredPod.Spec.Affinity.NodeAffinity = &corev1.NodeAffinity{} + } + + // Merge scale ops node affinities, avoiding duplicates + existingTerms := desiredPod.Spec.Affinity.NodeAffinity.PreferredDuringSchedulingIgnoredDuringExecution + for _, newTerm := range scaleOpsPreferredTerms { + // Check if this term already exists + found := false + for _, existing := range existingTerms { + if reflect.DeepEqual(existing.Preference, newTerm.Preference) && existing.Weight == newTerm.Weight { + found = true + break + } + } + if !found { + existingTerms = append(existingTerms, newTerm) + } + } + desiredPod.Spec.Affinity.NodeAffinity.PreferredDuringSchedulingIgnoredDuringExecution = existingTerms + } +} diff --git a/pkg/resources/kafka/util_test.go b/pkg/resources/kafka/util_test.go index d4c04045e..96f9db5eb 100644 --- a/pkg/resources/kafka/util_test.go +++ b/pkg/resources/kafka/util_test.go @@ -20,6 +20,9 @@ import ( "reflect" "testing" + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "github.com/banzaicloud/koperator/api/v1beta1" ) @@ -402,3 +405,563 @@ func TestGenerateQuorumVoters(t *testing.T) { }) } } + +func TestSyncScaleOpsPodAffinities(t *testing.T) { + tests := []struct { + name string + currentPod *corev1.Pod + desiredPod *corev1.Pod + expectedPodAffinity bool + expectedTermCount int + }{ + { + name: "no affinity in current pod", + currentPod: &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{Name: "test-pod"}, + Spec: corev1.PodSpec{}, + }, + desiredPod: &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{Name: "test-pod"}, + Spec: corev1.PodSpec{}, + }, + expectedPodAffinity: false, + expectedTermCount: 0, + }, + { + name: "no pod affinity in current pod", + currentPod: &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{Name: "test-pod"}, + Spec: corev1.PodSpec{ + Affinity: &corev1.Affinity{}, + }, + }, + desiredPod: &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{Name: "test-pod"}, + Spec: corev1.PodSpec{}, + }, + expectedPodAffinity: false, + expectedTermCount: 0, + }, + { + name: "pod affinity with scaleops managed-unevictable in MatchLabels", + currentPod: &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{Name: "test-pod"}, + Spec: corev1.PodSpec{ + Affinity: &corev1.Affinity{ + PodAffinity: &corev1.PodAffinity{ + PreferredDuringSchedulingIgnoredDuringExecution: []corev1.WeightedPodAffinityTerm{ + { + Weight: 100, + PodAffinityTerm: corev1.PodAffinityTerm{ + LabelSelector: &metav1.LabelSelector{ + MatchLabels: map[string]string{ + "scaleops.sh/managed-unevictable": "true", + }, + }, + TopologyKey: "kubernetes.io/hostname", + }, + }, + }, + }, + }, + }, + }, + desiredPod: &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{Name: "test-pod"}, + Spec: corev1.PodSpec{}, + }, + expectedPodAffinity: true, + expectedTermCount: 1, + }, + { + name: "pod affinity with scaleops managed-unevictable in MatchExpressions", + currentPod: &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{Name: "test-pod"}, + Spec: corev1.PodSpec{ + Affinity: &corev1.Affinity{ + PodAffinity: &corev1.PodAffinity{ + PreferredDuringSchedulingIgnoredDuringExecution: []corev1.WeightedPodAffinityTerm{ + { + Weight: 50, + PodAffinityTerm: corev1.PodAffinityTerm{ + LabelSelector: &metav1.LabelSelector{ + MatchExpressions: []metav1.LabelSelectorRequirement{ + { + Key: "scaleops.sh/managed-unevictable", + Operator: metav1.LabelSelectorOpIn, + Values: []string{"true"}, + }, + }, + }, + TopologyKey: "kubernetes.io/hostname", + }, + }, + }, + }, + }, + }, + }, + desiredPod: &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{Name: "test-pod"}, + Spec: corev1.PodSpec{}, + }, + expectedPodAffinity: true, + expectedTermCount: 1, + }, + { + name: "pod affinity with mixed terms, only scaleops managed-unevictable should be synced", + currentPod: &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{Name: "test-pod"}, + Spec: corev1.PodSpec{ + Affinity: &corev1.Affinity{ + PodAffinity: &corev1.PodAffinity{ + PreferredDuringSchedulingIgnoredDuringExecution: []corev1.WeightedPodAffinityTerm{ + { + Weight: 100, + PodAffinityTerm: corev1.PodAffinityTerm{ + LabelSelector: &metav1.LabelSelector{ + MatchLabels: map[string]string{ + "app": "other", + }, + }, + TopologyKey: "kubernetes.io/hostname", + }, + }, + { + Weight: 50, + PodAffinityTerm: corev1.PodAffinityTerm{ + LabelSelector: &metav1.LabelSelector{ + MatchLabels: map[string]string{ + "scaleops.sh/managed-unevictable": "true", + }, + }, + TopologyKey: "kubernetes.io/hostname", + }, + }, + }, + }, + }, + }, + }, + desiredPod: &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{Name: "test-pod"}, + Spec: corev1.PodSpec{}, + }, + expectedPodAffinity: true, + expectedTermCount: 1, + }, + { + name: "desired pod already has pod affinity, scaleops affinity should be merged", + currentPod: &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{Name: "test-pod"}, + Spec: corev1.PodSpec{ + Affinity: &corev1.Affinity{ + PodAffinity: &corev1.PodAffinity{ + PreferredDuringSchedulingIgnoredDuringExecution: []corev1.WeightedPodAffinityTerm{ + { + Weight: 100, + PodAffinityTerm: corev1.PodAffinityTerm{ + LabelSelector: &metav1.LabelSelector{ + MatchLabels: map[string]string{ + "scaleops.sh/managed-unevictable": "true", + }, + }, + TopologyKey: "kubernetes.io/hostname", + }, + }, + }, + }, + }, + }, + }, + desiredPod: &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{Name: "test-pod"}, + Spec: corev1.PodSpec{ + Affinity: &corev1.Affinity{ + PodAffinity: &corev1.PodAffinity{ + PreferredDuringSchedulingIgnoredDuringExecution: []corev1.WeightedPodAffinityTerm{ + { + Weight: 80, + PodAffinityTerm: corev1.PodAffinityTerm{ + LabelSelector: &metav1.LabelSelector{ + MatchLabels: map[string]string{ + "app": "myapp", + }, + }, + TopologyKey: "kubernetes.io/hostname", + }, + }, + }, + }, + }, + }, + }, + expectedPodAffinity: true, + expectedTermCount: 2, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + syncScaleOpsPodAffinities(tt.desiredPod, tt.currentPod) + + if !tt.expectedPodAffinity { + if tt.desiredPod.Spec.Affinity != nil && tt.desiredPod.Spec.Affinity.PodAffinity != nil { + t.Errorf("expected no pod affinity, but got one") + } + return + } + + if tt.desiredPod.Spec.Affinity == nil || tt.desiredPod.Spec.Affinity.PodAffinity == nil { + t.Errorf("expected pod affinity to be set") + return + } + + gotTermCount := len(tt.desiredPod.Spec.Affinity.PodAffinity.PreferredDuringSchedulingIgnoredDuringExecution) + if gotTermCount != tt.expectedTermCount { + t.Errorf("expected %d pod affinity terms, got %d", tt.expectedTermCount, gotTermCount) + } + + // Verify all synced terms have the scaleops label + for _, term := range tt.desiredPod.Spec.Affinity.PodAffinity.PreferredDuringSchedulingIgnoredDuringExecution { + if term.PodAffinityTerm.LabelSelector != nil { + hasScaleOpsLabel := false + for _, req := range term.PodAffinityTerm.LabelSelector.MatchExpressions { + if req.Key == "scaleops.sh/managed-unevictable" { + hasScaleOpsLabel = true + break + } + } + if !hasScaleOpsLabel { + if _, exists := term.PodAffinityTerm.LabelSelector.MatchLabels["scaleops.sh/managed-unevictable"]; !exists { + // This term should have been filtered out if it doesn't have scaleops label + // unless it came from the original desired pod + } + } + } + } + }) + } +} + +func TestSyncScaleOpsNodeAffinities(t *testing.T) { + tests := []struct { + name string + currentPod *corev1.Pod + desiredPod *corev1.Pod + expectedNodeAffinity bool + expectedTermCount int + }{ + { + name: "no affinity in current pod", + currentPod: &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{Name: "test-pod"}, + Spec: corev1.PodSpec{}, + }, + desiredPod: &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{Name: "test-pod"}, + Spec: corev1.PodSpec{}, + }, + expectedNodeAffinity: false, + expectedTermCount: 0, + }, + { + name: "no node affinity in current pod", + currentPod: &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{Name: "test-pod"}, + Spec: corev1.PodSpec{ + Affinity: &corev1.Affinity{}, + }, + }, + desiredPod: &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{Name: "test-pod"}, + Spec: corev1.PodSpec{}, + }, + expectedNodeAffinity: false, + expectedTermCount: 0, + }, + { + name: "node affinity with scaleops node-packing in MatchExpressions", + currentPod: &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{Name: "test-pod"}, + Spec: corev1.PodSpec{ + Affinity: &corev1.Affinity{ + NodeAffinity: &corev1.NodeAffinity{ + PreferredDuringSchedulingIgnoredDuringExecution: []corev1.PreferredSchedulingTerm{ + { + Weight: 100, + Preference: corev1.NodeSelectorTerm{ + MatchExpressions: []corev1.NodeSelectorRequirement{ + { + Key: "scaleops.sh/node-packing", + Operator: corev1.NodeSelectorOpIn, + Values: []string{"true"}, + }, + }, + }, + }, + }, + }, + }, + }, + }, + desiredPod: &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{Name: "test-pod"}, + Spec: corev1.PodSpec{}, + }, + expectedNodeAffinity: true, + expectedTermCount: 1, + }, + { + name: "node affinity with scaleops node-packing in MatchFields", + currentPod: &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{Name: "test-pod"}, + Spec: corev1.PodSpec{ + Affinity: &corev1.Affinity{ + NodeAffinity: &corev1.NodeAffinity{ + PreferredDuringSchedulingIgnoredDuringExecution: []corev1.PreferredSchedulingTerm{ + { + Weight: 50, + Preference: corev1.NodeSelectorTerm{ + MatchFields: []corev1.NodeSelectorRequirement{ + { + Key: "scaleops.sh/node-packing", + Operator: corev1.NodeSelectorOpIn, + Values: []string{"true"}, + }, + }, + }, + }, + }, + }, + }, + }, + }, + desiredPod: &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{Name: "test-pod"}, + Spec: corev1.PodSpec{}, + }, + expectedNodeAffinity: true, + expectedTermCount: 1, + }, + { + name: "node affinity with mixed terms, only scaleops node-packing should be synced", + currentPod: &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{Name: "test-pod"}, + Spec: corev1.PodSpec{ + Affinity: &corev1.Affinity{ + NodeAffinity: &corev1.NodeAffinity{ + PreferredDuringSchedulingIgnoredDuringExecution: []corev1.PreferredSchedulingTerm{ + { + Weight: 100, + Preference: corev1.NodeSelectorTerm{ + MatchExpressions: []corev1.NodeSelectorRequirement{ + { + Key: "disktype", + Operator: corev1.NodeSelectorOpIn, + Values: []string{"ssd"}, + }, + }, + }, + }, + { + Weight: 50, + Preference: corev1.NodeSelectorTerm{ + MatchExpressions: []corev1.NodeSelectorRequirement{ + { + Key: "scaleops.sh/node-packing", + Operator: corev1.NodeSelectorOpIn, + Values: []string{"true"}, + }, + }, + }, + }, + }, + }, + }, + }, + }, + desiredPod: &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{Name: "test-pod"}, + Spec: corev1.PodSpec{}, + }, + expectedNodeAffinity: true, + expectedTermCount: 1, + }, + { + name: "desired pod already has node affinity, scaleops affinity should be merged", + currentPod: &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{Name: "test-pod"}, + Spec: corev1.PodSpec{ + Affinity: &corev1.Affinity{ + NodeAffinity: &corev1.NodeAffinity{ + PreferredDuringSchedulingIgnoredDuringExecution: []corev1.PreferredSchedulingTerm{ + { + Weight: 100, + Preference: corev1.NodeSelectorTerm{ + MatchExpressions: []corev1.NodeSelectorRequirement{ + { + Key: "scaleops.sh/node-packing", + Operator: corev1.NodeSelectorOpIn, + Values: []string{"true"}, + }, + }, + }, + }, + }, + }, + }, + }, + }, + desiredPod: &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{Name: "test-pod"}, + Spec: corev1.PodSpec{ + Affinity: &corev1.Affinity{ + NodeAffinity: &corev1.NodeAffinity{ + PreferredDuringSchedulingIgnoredDuringExecution: []corev1.PreferredSchedulingTerm{ + { + Weight: 80, + Preference: corev1.NodeSelectorTerm{ + MatchExpressions: []corev1.NodeSelectorRequirement{ + { + Key: "disktype", + Operator: corev1.NodeSelectorOpIn, + Values: []string{"ssd"}, + }, + }, + }, + }, + }, + }, + }, + }, + }, + expectedNodeAffinity: true, + expectedTermCount: 2, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + syncScaleOpsNodeAffinities(tt.desiredPod, tt.currentPod) + + if !tt.expectedNodeAffinity { + if tt.desiredPod.Spec.Affinity != nil && tt.desiredPod.Spec.Affinity.NodeAffinity != nil { + t.Errorf("expected no node affinity, but got one") + } + return + } + + if tt.desiredPod.Spec.Affinity == nil || tt.desiredPod.Spec.Affinity.NodeAffinity == nil { + t.Errorf("expected node affinity to be set") + return + } + + gotTermCount := len(tt.desiredPod.Spec.Affinity.NodeAffinity.PreferredDuringSchedulingIgnoredDuringExecution) + if gotTermCount != tt.expectedTermCount { + t.Errorf("expected %d node affinity terms, got %d", tt.expectedTermCount, gotTermCount) + } + }) + } +} + +func TestSyncScaleOpsAffinities(t *testing.T) { + tests := []struct { + name string + currentPod *corev1.Pod + desiredPod *corev1.Pod + expectPodAffinity bool + expectNodeAffinity bool + }{ + { + name: "no affinities in current pod", + currentPod: &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{Name: "test-pod"}, + Spec: corev1.PodSpec{}, + }, + desiredPod: &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{Name: "test-pod"}, + Spec: corev1.PodSpec{}, + }, + expectPodAffinity: false, + expectNodeAffinity: false, + }, + { + name: "both pod and node affinities with scaleops labels", + currentPod: &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{Name: "test-pod"}, + Spec: corev1.PodSpec{ + Affinity: &corev1.Affinity{ + PodAffinity: &corev1.PodAffinity{ + PreferredDuringSchedulingIgnoredDuringExecution: []corev1.WeightedPodAffinityTerm{ + { + Weight: 100, + PodAffinityTerm: corev1.PodAffinityTerm{ + LabelSelector: &metav1.LabelSelector{ + MatchLabels: map[string]string{ + "scaleops.sh/managed-unevictable": "true", + }, + }, + TopologyKey: "kubernetes.io/hostname", + }, + }, + }, + }, + NodeAffinity: &corev1.NodeAffinity{ + PreferredDuringSchedulingIgnoredDuringExecution: []corev1.PreferredSchedulingTerm{ + { + Weight: 50, + Preference: corev1.NodeSelectorTerm{ + MatchExpressions: []corev1.NodeSelectorRequirement{ + { + Key: "scaleops.sh/node-packing", + Operator: corev1.NodeSelectorOpIn, + Values: []string{"true"}, + }, + }, + }, + }, + }, + }, + }, + }, + }, + desiredPod: &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{Name: "test-pod"}, + Spec: corev1.PodSpec{}, + }, + expectPodAffinity: true, + expectNodeAffinity: true, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + syncScaleOpsAffinities(tt.desiredPod, tt.currentPod) + + if tt.expectPodAffinity { + if tt.desiredPod.Spec.Affinity == nil || tt.desiredPod.Spec.Affinity.PodAffinity == nil { + t.Errorf("expected pod affinity to be set") + } + } else { + if tt.desiredPod.Spec.Affinity != nil && tt.desiredPod.Spec.Affinity.PodAffinity != nil { + if len(tt.desiredPod.Spec.Affinity.PodAffinity.PreferredDuringSchedulingIgnoredDuringExecution) > 0 { + t.Errorf("expected no pod affinity") + } + } + } + + if tt.expectNodeAffinity { + if tt.desiredPod.Spec.Affinity == nil || tt.desiredPod.Spec.Affinity.NodeAffinity == nil { + t.Errorf("expected node affinity to be set") + } + } else { + if tt.desiredPod.Spec.Affinity != nil && tt.desiredPod.Spec.Affinity.NodeAffinity != nil { + if len(tt.desiredPod.Spec.Affinity.NodeAffinity.PreferredDuringSchedulingIgnoredDuringExecution) > 0 { + t.Errorf("expected no node affinity") + } + } + } + }) + } +} From e813891cc7772ee3fd91e598037fc0fc28017cc2 Mon Sep 17 00:00:00 2001 From: Daniel Vaseekaran Date: Fri, 12 Jun 2026 13:36:44 -0400 Subject: [PATCH 21/25] Address review comments: add cleanup, run cpk in background --- run-local.sh | 48 ++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 40 insertions(+), 8 deletions(-) diff --git a/run-local.sh b/run-local.sh index 264989e67..e6c4e738b 100755 --- a/run-local.sh +++ b/run-local.sh @@ -7,11 +7,12 @@ # 4. Install and Start cloud-provider-kind to enable LoadBalancer services on Kind (Required for Local Debugging). https://github.com/kubernetes-sigs/cloud-provider-kind ## USAGE -# ./run-local.sh [--local] [--scaleops] +# ./run-local.sh [--local] [--scaleops] [--cleanup] # # --local Run koperator as a local process instead of as a container on Kind. # Starts cloud-provider-kind and runs `make install && make run`. # --scaleops Install the ScaleOps helm chart. Requires SCALEOPS_TOKEN to be set. +# --cleanup Delete the Kind cluster and stop cloud-provider-kind process. ## IMPORTANT NOTES (for running koperator locally with --local flag) @@ -46,6 +47,7 @@ LOCAL=false SCALEOPS=false +CLEANUP=false KOPERATOR_IMAGE=docker.io/library/koperator_e2e_test CERT_DIR="/etc/webhook/certs" @@ -54,6 +56,7 @@ while [[ $# -gt 0 ]]; do case $1 in --local) LOCAL=true; shift ;; --scaleops) SCALEOPS=true; shift ;; + --cleanup) CLEANUP=true; shift ;; *) echo "Unknown flag: $1"; exit 1 ;; esac done @@ -63,6 +66,27 @@ if $SCALEOPS && [[ -n "${SCALEOPS_TOKEN}" ]]; then exit 1 fi +## Handle cleanup option +if $CLEANUP; then + echo "Cleaning up Kind cluster and cloud-provider-kind..." + + ## Delete Kind cluster + echo "Deleting Kind cluster 'kind-kafka'..." + kind delete cluster --name=kind-kafka || true + + ## Stop cloud-provider-kind + echo "Stopping cloud-provider-kind..." + if pgrep -f cloud-provider-kind &>/dev/null; then + sudo pkill -f cloud-provider-kind + echo "cloud-provider-kind stopped" + else + echo "cloud-provider-kind is not running" + fi + + echo "Cleanup completed" + exit 0 +fi + ## Check if Docker daemon is running if ! docker ps &>/dev/null; then echo "Error: Docker daemon is not running. Please start Docker and try again." @@ -118,10 +142,21 @@ fi ## Run Koperator if $LOCAL; then - ## Check if cloud-provider-kind started successfully - if ! pgrep -f cloud-provider-kind &>/dev/null; then - echo "Warning: cloud-provider-kind failed to start. LoadBalancer services may not work properly." - echo "Check /tmp/cloudproviderkind.log for details." + ## Start cloud-provider-kind in the background if not already running + if pgrep -f cloud-provider-kind &>/dev/null; then + echo "cloud-provider-kind is already running" + else + echo "Starting cloud-provider-kind in the background..." + sudo -b sh -c 'cloud-provider-kind 2>&1 | tee /tmp/cloudproviderkind.log' & + sleep 2 + + ## Check if cloud-provider-kind started successfully + if ! pgrep -f cloud-provider-kind &>/dev/null; then + echo "Warning: cloud-provider-kind failed to start. LoadBalancer services may not work properly." + echo "Check /tmp/cloudproviderkind.log for details." + else + echo "cloud-provider-kind started successfully" + fi fi kubectl get namespace kafka &>/dev/null || kubectl create namespace kafka @@ -160,8 +195,5 @@ if $LOCAL; then echo "Webhook certs already exist, skipping generation." fi - ## TODO: run cloud-provider-kind in the background - ## TODO: print command to modify /etc/hosts for svc - make run fi From 1961403b6992b68df199344fd3b55d0831edd571 Mon Sep 17 00:00:00 2001 From: Daniel Vaseekaran Date: Fri, 12 Jun 2026 13:48:49 -0400 Subject: [PATCH 22/25] Address review comments: rename debugEnagled to localDebugEnabled --- api/v1beta1/kafkacluster_types.go | 6 +++--- charts/kafka-operator/crds/kafkaclusters.yaml | 16 ++++++++-------- .../crds/kafka.banzaicloud.io_kafkaclusters.yaml | 16 ++++++++-------- config/samples/simplekafkacluster.yaml | 2 +- pkg/resources/cruisecontrol/service.go | 2 +- pkg/resources/kafka/allBrokerService.go | 2 +- pkg/resources/kafka/service.go | 2 +- pkg/resources/kafka/service_test.go | 8 ++++---- run-local.sh | 2 +- 9 files changed, 28 insertions(+), 28 deletions(-) diff --git a/api/v1beta1/kafkacluster_types.go b/api/v1beta1/kafkacluster_types.go index d4e979ffa..1b8deda44 100644 --- a/api/v1beta1/kafkacluster_types.go +++ b/api/v1beta1/kafkacluster_types.go @@ -159,14 +159,14 @@ type KafkaClusterSpec struct { // +optional KRaftMode bool `json:"kRaft"` HeadlessServiceEnabled bool `json:"headlessServiceEnabled"` - // DebugEnabled is used to decide whether to create a separate loadbalancer services for the + // localDebugEnabled is used to decide whether to create a separate loadbalancer services for the // Kafka and Cruise Control Pods. These services will expose the internal listener ports of the Kafka // cluster with LoadBalancer type, which can be used for running Koperator on a local machine against // a kafkaCluster instance on a Kind Cluster. // +kubebuilder:default=false // +optional - DebugEnabled bool `json:"debugEnabled"` - ListenersConfig ListenersConfig `json:"listenersConfig"` + LocalDebugEnabled bool `json:"localDebugEnabled,omitempty"` + ListenersConfig ListenersConfig `json:"listenersConfig"` // Custom ports to expose in the container. Example use case: a custom kafka distribution, that includes an integrated metrics api endpoint AdditionalPorts []corev1.ContainerPort `json:"additionalPorts,omitempty"` // ZKAddresses specifies the ZooKeeper connection string diff --git a/charts/kafka-operator/crds/kafkaclusters.yaml b/charts/kafka-operator/crds/kafkaclusters.yaml index 2bd304cea..8a029e57d 100644 --- a/charts/kafka-operator/crds/kafkaclusters.yaml +++ b/charts/kafka-operator/crds/kafkaclusters.yaml @@ -19231,14 +19231,6 @@ spec: type: object type: array type: object - debugEnabled: - default: false - description: |- - DebugEnabled is used to decide whether to create a separate loadbalancer services for the - Kafka and Cruise Control Pods. These services will expose the internal listener ports of the Kafka - cluster with LoadBalancer type, which can be used for running Koperator on a local machine against - a kafkaCluster instance on a Kind Cluster. - type: boolean disruptionBudget: description: DisruptionBudget defines the configuration for PodDisruptionBudget where the workload is managed by the kafka-operator @@ -23678,6 +23670,14 @@ spec: required: - internalListeners type: object + localDebugEnabled: + default: false + description: |- + localDebugEnabled is used to decide whether to create a separate loadbalancer services for the + Kafka and Cruise Control Pods. These services will expose the internal listener ports of the Kafka + cluster with LoadBalancer type, which can be used for running Koperator on a local machine against + a kafkaCluster instance on a Kind Cluster. + type: boolean monitoringConfig: description: MonitoringConfig defines the config for monitoring Kafka and Cruise Control diff --git a/config/base/crds/kafka.banzaicloud.io_kafkaclusters.yaml b/config/base/crds/kafka.banzaicloud.io_kafkaclusters.yaml index 2bd304cea..8a029e57d 100644 --- a/config/base/crds/kafka.banzaicloud.io_kafkaclusters.yaml +++ b/config/base/crds/kafka.banzaicloud.io_kafkaclusters.yaml @@ -19231,14 +19231,6 @@ spec: type: object type: array type: object - debugEnabled: - default: false - description: |- - DebugEnabled is used to decide whether to create a separate loadbalancer services for the - Kafka and Cruise Control Pods. These services will expose the internal listener ports of the Kafka - cluster with LoadBalancer type, which can be used for running Koperator on a local machine against - a kafkaCluster instance on a Kind Cluster. - type: boolean disruptionBudget: description: DisruptionBudget defines the configuration for PodDisruptionBudget where the workload is managed by the kafka-operator @@ -23678,6 +23670,14 @@ spec: required: - internalListeners type: object + localDebugEnabled: + default: false + description: |- + localDebugEnabled is used to decide whether to create a separate loadbalancer services for the + Kafka and Cruise Control Pods. These services will expose the internal listener ports of the Kafka + cluster with LoadBalancer type, which can be used for running Koperator on a local machine against + a kafkaCluster instance on a Kind Cluster. + type: boolean monitoringConfig: description: MonitoringConfig defines the config for monitoring Kafka and Cruise Control diff --git a/config/samples/simplekafkacluster.yaml b/config/samples/simplekafkacluster.yaml index cf08d8980..307e37999 100644 --- a/config/samples/simplekafkacluster.yaml +++ b/config/samples/simplekafkacluster.yaml @@ -5,7 +5,7 @@ metadata: controller-tools.k8s.io: "1.0" name: kafka spec: - debugEnabled: true + localDebugEnabled: true kRaft: false monitoringConfig: jmxImage: "ghcr.io/adobe/koperator/jmx-javaagent:1.4.0" diff --git a/pkg/resources/cruisecontrol/service.go b/pkg/resources/cruisecontrol/service.go index 2c1c64439..18eb10731 100644 --- a/pkg/resources/cruisecontrol/service.go +++ b/pkg/resources/cruisecontrol/service.go @@ -52,7 +52,7 @@ func (r *Reconciler) service() runtime.Object { }, } - if r.KafkaCluster.Spec.DebugEnabled { + if r.KafkaCluster.Spec.LocalDebugEnabled { svc.Spec.Type = corev1.ServiceTypeLoadBalancer } diff --git a/pkg/resources/kafka/allBrokerService.go b/pkg/resources/kafka/allBrokerService.go index ed0eed60c..b5fa40239 100644 --- a/pkg/resources/kafka/allBrokerService.go +++ b/pkg/resources/kafka/allBrokerService.go @@ -53,7 +53,7 @@ func (r *Reconciler) allBrokerService() runtime.Object { }, } - if r.KafkaCluster.Spec.DebugEnabled { + if r.KafkaCluster.Spec.LocalDebugEnabled { svc.Spec.Type = corev1.ServiceTypeLoadBalancer } diff --git a/pkg/resources/kafka/service.go b/pkg/resources/kafka/service.go index fa9dca5cf..84e7e5c79 100644 --- a/pkg/resources/kafka/service.go +++ b/pkg/resources/kafka/service.go @@ -61,7 +61,7 @@ func (r *Reconciler) service(id int32, _ *v1beta1.BrokerConfig) runtime.Object { Ports: usedPorts, }, } - if r.KafkaCluster.Spec.DebugEnabled { + if r.KafkaCluster.Spec.LocalDebugEnabled { svc.Spec.Type = corev1.ServiceTypeLoadBalancer } return svc diff --git a/pkg/resources/kafka/service_test.go b/pkg/resources/kafka/service_test.go index f00b7e178..f5a16448a 100644 --- a/pkg/resources/kafka/service_test.go +++ b/pkg/resources/kafka/service_test.go @@ -47,8 +47,8 @@ func TestService(t *testing.T) { Namespace: "kafka", }, Spec: v1beta1.KafkaClusterSpec{ - DebugEnabled: false, - KRaftMode: false, + LocalDebugEnabled: false, + KRaftMode: false, ListenersConfig: v1beta1.ListenersConfig{ InternalListeners: []v1beta1.InternalListenerConfig{ { @@ -135,8 +135,8 @@ func TestService(t *testing.T) { Namespace: "kafka", }, Spec: v1beta1.KafkaClusterSpec{ - DebugEnabled: true, - KRaftMode: false, + LocalDebugEnabled: true, + KRaftMode: false, ListenersConfig: v1beta1.ListenersConfig{ InternalListeners: []v1beta1.InternalListenerConfig{ { diff --git a/run-local.sh b/run-local.sh index e6c4e738b..76b30402c 100755 --- a/run-local.sh +++ b/run-local.sh @@ -17,7 +17,7 @@ ## IMPORTANT NOTES (for running koperator locally with --local flag) # -# Make sure to set `debugEnabled: true` in your KafkaCluster spec. This will +# Make sure to set `lcoalDebugEnabled: true` in your KafkaCluster spec. This will # create LoadBalancer services for the Kafka and Cruise Control pods, allowing # your local koperator to access services running on the Kind cluster. # From 68b680b9c488f225c6cde90ff7586eb94290648a Mon Sep 17 00:00:00 2001 From: Daniel Vaseekaran Date: Fri, 12 Jun 2026 14:41:09 -0400 Subject: [PATCH 23/25] Address review comments: Cleanup cloud-provider-kind --- run-local.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/run-local.sh b/run-local.sh index 76b30402c..80a53ce42 100755 --- a/run-local.sh +++ b/run-local.sh @@ -147,7 +147,7 @@ if $LOCAL; then echo "cloud-provider-kind is already running" else echo "Starting cloud-provider-kind in the background..." - sudo -b sh -c 'cloud-provider-kind 2>&1 | tee /tmp/cloudproviderkind.log' & + sudo -b sh -c "KUBECONFIG=$HOME/.kube/config cloud-provider-kind >> /tmp/cloudproviderkind.log 2>&1" sleep 2 ## Check if cloud-provider-kind started successfully From d911f813990143352d294d1f2a648a3f5c833f5f Mon Sep 17 00:00:00 2001 From: Daniel Vaseekaran Date: Tue, 16 Jun 2026 12:06:53 -0400 Subject: [PATCH 24/25] Cleanup --- tmp/basic-kafka-pod.yaml | 444 ----------- tmp/exampleKafkacluster.yaml | 1430 ---------------------------------- 2 files changed, 1874 deletions(-) delete mode 100644 tmp/basic-kafka-pod.yaml delete mode 100644 tmp/exampleKafkacluster.yaml diff --git a/tmp/basic-kafka-pod.yaml b/tmp/basic-kafka-pod.yaml deleted file mode 100644 index f38ffab6a..000000000 --- a/tmp/basic-kafka-pod.yaml +++ /dev/null @@ -1,444 +0,0 @@ -apiVersion: v1 -kind: Pod -metadata: - annotations: - banzaicloud.com/last-applied: UEsDBBQACAAIAAAAAAAAAAAAAAAAAAAAAAAIAAAAb3JpZ2luYWzMWX1v2ziT/yoDbg9t75Fs2Wn7bF0EOK+Tpr4mcS5Oui3qwKCpkc1GIrUkFcfX9Qe6r3Gf7DCk7Pgl6XaB++MJ0FSiZoa/eeFvSOY7K9DxlDvOOt8ZV0o77qRWll5Lowt0M6xsQ+pmqY1jHfY2aScs2vlmheElsg5zpkK2jNgUFRru8JwXNHzLs1seJzGLWM4nmHvzvCxXn1jEJkbfoumnrMPIvrS/+YFzna7t0mhPK2d0nq+/ZDy39MnbGQuzNrmMmOIF2pIL3JhHzxWaS8zQoBJoWecrIZGf0Fip1UqwMeHqv7kUua5ScvCuNUHHW4Qz1+J2QEaOMEfndQhdxMQa2mrkViry5yNZ7OWVdWhYQLUBqJIk86bd+uckQx6nfJLEr968TuJf/4lZ3HrzRrxO+evWqxZny5tlxGyJwocvy6SSbuEzpdOucrK7OWTIR4PpUWWkmg7FDNMql2ranyq9Hj6+R1EFJ74GM7WJKzQFmfHpGmKOwmlDAwV3Ynb6RBL3k7CMmNOlzvV08REXNF5N0Ch06Atnpq3z8VhGbI5yOnOs00qS5Q0pUkC5VGhCloQuCk4B/com3M5YxGLBIvbLSP0CPV0uDKnD//4PtJN2G37zCYQeZXCkvNCpFKgsplCpFA24GUK35GKGqy8R1GUA7UYCL0hgxOpvI/byHRlZ6AoKvgClHVQWwc2khUzmCHgvsHQgFQhdlLnkSiDMpZv5mWozDbLxpbahJ+QfcBC6XIDONgWBuxo2/cycKzvN5nw+b3APuaHNtJkHUds87feOz4fHcbuR1ErXKkdrweAflTSYwmQBvCxzKfgkR8j5HLQBPjWIKThNoOdGOqmmEViduTk3SGZSaZ2Rk8ptRW0FUdotAa2AKxix7hD6wxGD37rD/jAiM7/3rz4Mrq/g9+7lZff8qn88hMEl9AbnR/2r/uB8CIP30D3/Ah/750cRoHQzNID3pSEftAFJ8cTUB2+IuAUi0wEUrQuZSQE5V9OKTxGm+g6NkmoKJZpCWsqsBa6oHiCXhay5bt+zho+izODrV4jJpWfH558GX8bD/tFxr3s5Hl51r67JxZubd6SnRgqgN7g+vzpM6PGs+zm8Pfve6sQHyZIGPxx3T68+fAm6vcHR8eGItZNkxOjjfEYlRLTxDlJNIyuDz15QIOCZf4N/QOtl+DrsHT57ISqTQ2wh1tBM8a6pqjyHeA4j9m/fqWbGQqe4HLFVAeVa8JwWXaf1OkmSpkGeLmqDKGYaRmzOfR34uKK60wsojb5fUJUIXSBU5Yi9Cxo2RyyhVb+FcI3Ys2FvxODwkB73fB4x+PNP+rIK0Vp09bYZUvqZGOS34SWT9H+qFY4UPTtdiRk077hpmko1CXkz1bHSLsZ76eIFupGiZHchwzkoFGgtNwuwDktLgOcI3CCV/8dLnjkodIrbif/eO70eXh1fjvtHyx10v0A/LFnrtKGCkxZ47kNK0Su4o1XxAhvTBoT2Bgat48bZl5FXDH2xVm/YGcxlnoO9leXKwCoTbsbdap4wt5tVlvALrsDyDPMFmEpt4Qk2oCZOMDjlJvW8ILeBz7iFCaLagK0NURxNVdfF+wdAIVYrZU9xIrQ36B/BTsjIRFOXrumdbU6kau65XQON49pMLNNHYh/H0veu+AFlLKAptMrktBkiHIe3UZ2gbpoSYE2RL/QdPT+0aVA6RUuFTbHwXXrtx1xXeQrOyOkUzXqU9MhGVU4NTxGsBp7nXj3YkiEBK3mqLU5cuiJ3hfN8ATxNMW16RJhuALKNgPr3GarQWGa8LFHZKKB7bsEvYPij0qYqwDruMDSfgHeCoCuXcgqNldR/pKOqJEEpiPo2XLVo7sg3rp470vSViWmdT62yXAq3HTFyMNPwghocN6vWkWlfC5iS+z0f/zNevoyCM1ZTlaq60cxl6TF6HMGZBy+oCDNtaig0tS/SgNOGNNF0lH3q0nyysrSXVGmhKmOnYwpGcGlOS8gHYw9rQPriXDvsAM/dTFfTGcFONQXnVun5esnUaAhhXJW+B1ZIrlnB6+JopnquVinfhRbVOaENxIybIqty0g7V4DX2s1uH5bH5S6OJ1YIHMEHBw65kLekpxWAsDJK9xyfgGZWrdMEuxQ5sJchwVuV56BBbrHg6OBkf9S+Hu5wI0H8/PHwePQfiQYgNh9PByRBGVZIciM3fu2ZqiteGFCg79ffh1/+4WY7YQ1sE+K/rweX1me/Cx+P3/VPqpF52WZPMeFyvwPHqeBMnzeB17L1eTRe8gjjzs+3Z9d5tt6ONTnm5opRNyyGemdEFjEZPGB2N2AMA+jHFjyCsJEP7WzXA8E7/RupxfvX5j31GiWSfIEpT/FUHZTcRQ3XnN+H14aV32h0OL7pXH1jE7nhe0dgGiFxObBPvHSq/52r+O1tGa93HtlIrM++N9seOTGKeXmK2fr7gbsY669NqY+Oo+vW5lSkKbhrSOqn9edRxV9nnN2y53Jj4Y/f9x+74w3H3Yjy4uBpuQI8/F/ftE4g/F7Z9wvZU/vPT2fji+PL94PKse9473tOuV1r8+XPnH9cWT1onPf9yxu9Pehe0IM9knkt72E78eF9JJzkR3Afk5UCIquRKLC7QCFTu8OB1MHV8Txt26U56fXWnb5EOv6IyBpWD+Ogbv+MNPneNGXLf1A9p9wjxka1UQ6FrSEXBSk3DufzwTbLv1q4bZJFPUbmOT+W34j7Gezr2o6GX8caZ/xs3h2+TdtJpohPbkqGuGgte5Gx5EzFZ8CmZn86EodzwVE+weatLNNxpEyqm0260DuKDxtsGnbFzmaFYiBzrg+zQ6ZIe8T4cfZ84Cf7tHfsPd+T/z/vrn90db5PpLZF3bOHq+PIMnr0oZaozoDx5m5hb3Jb72D893ZcjkljJ/shiJtkNrZjdKwrK7OoMXh/JL/xtUPtt8ra9FneijKVyaBTPwxWR00LnrMOuehe+/B5RP9hS37hD+RkDVIJr/QKdkcI+ongTMYNWV8Zf+Hxn/uznn0RZsQ6jXBUsYgUW2ixYhx2cSLYkpT8qtFuSm1JtklpG7E7nVYFnulJ1lAp6rBmrXg8P9z5b9Oud2hLfpOIHJSJjaiz78k+S7qbyeuxR9a3l+6BGw9+4if3t4J5i6DG5ntqdK604tNt9jT2m2FUMvzdF2PLGU4iSrvfkZZBveTUL4D2LmCghvgtnDmEqaXFVVruvk0rmaTPFElXKlYt9BLdl4rqqYoMBElEf/KDV/YQ622DFwIbbSp2DRtI4iHk6wbidtF8nvyavHmK1M8HKMPurGt8p8VaSnD1V5I+L/mWl/81SvAkLep1HUbKI7TSa8bon+cBFjxTsfmdiP9F1QnGv+l2r8aqRbFf+ugT/NeL6U2v0pmY62vFd6FwKmuUc77wXtEmRArtCkO36Yj7FjFc5EY1DU0jld1Qnhgu8QCN1OkShVWpZp0VEG2CuO0E4O5F/tZkzfyX/aoOTw5IOsp4TnmZBLEq3OJKGdb4vH+e9JyS2yO1RmT0me4yvIlaisdI6VO6Td7SXc+l3o4Iedv6UUd9exEk8Ld62C7as29NPB+UxtvsJEf8nAL+9JReX/xcAAP//UEsHCHeQthCECgAAwRkAAFBLAQIUABQACAAIAAAAAAB3kLYQhAoAAMEZAAAIAAAAAAAAAAAAAAAAAAAAAABvcmlnaW5hbFBLBQYAAAAAAQABADYAAAC6CgAAAAA= - prometheus.io/port: "9020" - prometheus.io/scrape: "true" - creationTimestamp: "2026-06-05T15:43:19Z" - generateName: kafka-0- - generation: 1 - labels: - app: kafka - brokerId: "0" - isBrokerNode: "true" - isControllerNode: "false" - kafka_cr: kafka - topology.kubernetes.io/zone: zone-b - name: kafka-0-vj6c5 - namespace: kafka - ownerReferences: - - apiVersion: kafka.banzaicloud.io/v1beta1 - blockOwnerDeletion: true - controller: true - kind: KafkaCluster - name: kafka - uid: 6217bfea-dab0-4650-87ef-166c5da5141a - resourceVersion: "2505" - uid: ef0421df-954f-46e7-88f7-032993188aba -spec: - affinity: - podAntiAffinity: - preferredDuringSchedulingIgnoredDuringExecution: - - podAffinityTerm: - labelSelector: - matchLabels: - app: kafka - kafka_cr: kafka - topologyKey: kubernetes.io/hostname - weight: 100 - containers: - - command: - - bash - - -c - - | - # - # Copyright © 2022 Banzai Cloud - # - # Licensed under the Apache License, Version 2.0 (the "License"); - # you may not use this file except in compliance with the License. - # You may obtain a copy of the License at - # - # http://www.apache.org/licenses/LICENSE-2.0 - # - # Unless required by applicable law or agreed to in writing, software - # distributed under the License is distributed on an "AS IS" BASIS, - # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - # See the License for the specific language governing permissions and - # limitations under the License. - # - if [[ -n "$ENVOY_SIDECAR_STATUS" ]]; then - COUNT=0 - MAXCOUNT=${1:-30} - HEALTHYSTATUSCODE="200" - while true; do - COUNT=$(expr $COUNT + 1) - SC=$(curl -s -o /dev/null -w "%{http_code}" http://localhost:15000/ready) - echo "waiting for envoy proxy to come up"; - sleep 1; - if [[ "$SC" == "$HEALTHYSTATUSCODE" || "$MAXCOUNT" == "$COUNT" ]]; then - break - fi - done - fi - touch /var/run/wait/do-not-exit-yet - - # A few necessary steps if we are in KRaft mode - if [[ -n "${CLUSTER_ID}" ]]; then - # If the storage is already formatted (e.g. broker restarts), the kafka-storage.sh will skip formatting for that storage - # thus we can safely run the storage format command regardless if the storage has been formatted or not - echo "Formatting KRaft storage with cluster ID ${CLUSTER_ID}" - /opt/kafka/bin/kafka-storage.sh format --cluster-id "${CLUSTER_ID}" --ignore-formatted -c /config/broker-config - - # Adding or removing controller nodes to the Kafka cluster would trigger cluster rolling upgrade so all the nodes in the cluster are aware of the newly added/removed controllers. - # When this happens, Kafka's local quorum state file would be outdated since it is static and the Kafka server can't be started with conflicting controllers info (compared to info stored in ConfigMap), - # so we need to wipe out the local state files before starting the server so the information about the controller nodes is up-to-date with what is stored in ConfigMap - # (Note: although we don't know if the server start-up is due to scaling up/down of the controller nodes, it is not harmful to remove the quorum state file before the server start-up process - # because the server will re-create the quorum state file after it starts up successfully) - if [[ -n "${LOG_DIRS}" ]]; then - IFS=',' read -ra LOGS <<< "${LOG_DIRS}" - for LOG in "${LOGS[@]}"; do - QUORUM_STATE_FILE="${LOG}/kafka/__cluster_metadata-0/quorum-state" - if [ -f "${QUORUM_STATE_FILE}" ]; then - echo "Removing quorum-state file from \"${QUORUM_STATE_FILE}\"" - rm -f "${QUORUM_STATE_FILE}" - fi - done - fi - fi - - /opt/kafka/bin/kafka-server-start.sh /config/broker-config - rm /var/run/wait/do-not-exit-yet - env: - - name: CLASSPATH - value: /opt/kafka/libs/extensions/* - - name: ENVOY_SIDECAR_STATUS - valueFrom: - fieldRef: - apiVersion: v1 - fieldPath: metadata.annotations['sidecar.istio.io/status'] - - name: KAFKA_HEAP_OPTS - value: -Xmx2G -Xms2G - - name: KAFKA_JVM_PERFORMANCE_OPTS - value: -server -XX:+UseG1GC -XX:MaxGCPauseMillis=20 -XX:InitiatingHeapOccupancyPercent=35 - -XX:+ExplicitGCInvokesConcurrent -Djava.awt.headless=true -Dsun.net.inetaddr.ttl=60 - - name: KAFKA_OPTS - value: -javaagent:/opt/jmx-exporter/jmx_prometheus.jar=9020:/etc/jmx-exporter/config.yaml - image: ghcr.io/adobe/koperator/kafka:2.13-3.9.1 - imagePullPolicy: IfNotPresent - lifecycle: - preStop: - exec: - command: - - bash - - -c - - |2- - - if [[ -n "$ENVOY_SIDECAR_STATUS" ]]; then - HEALTHYSTATUSCODE="200" - SC=$(curl -s -o /dev/null -w "%{http_code}" http://localhost:15000/ready) - if [[ "$SC" == "$HEALTHYSTATUSCODE" ]]; then - kill -s TERM $(pidof java) - else - kill -s KILL $(pidof java) - fi - else - kill -s TERM $(pidof java) - fi - name: kafka - ports: - - containerPort: 29092 - name: tcp-internal - protocol: TCP - - containerPort: 29093 - name: tcp-controller - protocol: TCP - - containerPort: 9020 - name: metrics - protocol: TCP - resources: - limits: - cpu: 1500m - memory: 3Gi - requests: - cpu: "1" - memory: 2Gi - terminationMessagePath: /dev/termination-log - terminationMessagePolicy: File - volumeMounts: - - mountPath: /config - name: broker-config - - mountPath: /var/run/wait - name: exitfile - - mountPath: /opt/kafka/libs/extensions - name: extensions - - mountPath: /opt/jmx-exporter/ - name: jmx-jar-data - - mountPath: /kafka-logs - name: kafka-data-0 - - mountPath: /etc/jmx-exporter/ - name: kafka-kafka-jmx-exporter - - mountPath: /var/run/secrets/kubernetes.io/serviceaccount - name: kube-api-access-6chcq - readOnly: true - dnsPolicy: ClusterFirst - enableServiceLinks: true - initContainers: - - command: - - /bin/sh - - -cex - - cp -v /opt/cruise-control/cruise-control/build/dependant-libs/cruise-control-metrics-reporter.jar - /opt/kafka/libs/extensions/cruise-control-metrics-reporter.jar - image: adobe/cruise-control:3.0.3-adbe-20250804 - imagePullPolicy: IfNotPresent - name: cruise-control-reporter - resources: - limits: - cpu: 100m - memory: 100Mi - requests: - cpu: 100m - memory: 100Mi - terminationMessagePath: /dev/termination-log - terminationMessagePolicy: File - volumeMounts: - - mountPath: /opt/kafka/libs/extensions - name: extensions - - mountPath: /var/run/secrets/kubernetes.io/serviceaccount - name: kube-api-access-6chcq - readOnly: true - - command: - - cp - - /jmx_prometheus_javaagent.jar - - /opt/jmx-exporter/jmx_prometheus.jar - image: ghcr.io/adobe/koperator/jmx-javaagent:1.4.0 - imagePullPolicy: IfNotPresent - name: jmx-exporter - resources: - limits: - cpu: 100m - memory: 100Mi - requests: - cpu: 100m - memory: 100Mi - terminationMessagePath: /dev/termination-log - terminationMessagePolicy: File - volumeMounts: - - mountPath: /opt/jmx-exporter/ - name: jmx-jar-data - - mountPath: /var/run/secrets/kubernetes.io/serviceaccount - name: kube-api-access-6chcq - readOnly: true - nodeName: kind-kafka-worker2 - preemptionPolicy: PreemptLowerPriority - priority: 0 - restartPolicy: Never - schedulerName: default-scheduler - securityContext: {} - serviceAccount: default - serviceAccountName: default - terminationGracePeriodSeconds: 120 - tolerations: - - effect: NoExecute - key: node.kubernetes.io/not-ready - operator: Exists - tolerationSeconds: 300 - - effect: NoExecute - key: node.kubernetes.io/unreachable - operator: Exists - tolerationSeconds: 300 - volumes: - - configMap: - defaultMode: 420 - name: kafka-config-0 - name: broker-config - - emptyDir: {} - name: exitfile - - emptyDir: {} - name: extensions - - emptyDir: {} - name: jmx-jar-data - - name: kafka-data-0 - persistentVolumeClaim: - claimName: kafka-0-storage-0-gm92m - - configMap: - defaultMode: 420 - name: kafka-kafka-jmx-exporter - name: kafka-kafka-jmx-exporter - - name: kube-api-access-6chcq - projected: - defaultMode: 420 - sources: - - serviceAccountToken: - expirationSeconds: 3607 - path: token - - configMap: - items: - - key: ca.crt - path: ca.crt - name: kube-root-ca.crt - - downwardAPI: - items: - - fieldRef: - apiVersion: v1 - fieldPath: metadata.namespace - path: namespace -status: - conditions: - - lastProbeTime: null - lastTransitionTime: "2026-06-05T15:43:37Z" - observedGeneration: 1 - status: "True" - type: PodReadyToStartContainers - - lastProbeTime: null - lastTransitionTime: "2026-06-05T15:43:39Z" - observedGeneration: 1 - status: "True" - type: Initialized - - lastProbeTime: null - lastTransitionTime: "2026-06-05T15:43:53Z" - observedGeneration: 1 - status: "True" - type: Ready - - lastProbeTime: null - lastTransitionTime: "2026-06-05T15:43:53Z" - observedGeneration: 1 - status: "True" - type: ContainersReady - - lastProbeTime: null - lastTransitionTime: "2026-06-05T15:43:23Z" - observedGeneration: 1 - status: "True" - type: PodScheduled - containerStatuses: - - allocatedResources: - cpu: "1" - memory: 2Gi - containerID: containerd://6b55fc7526c9a12f8d261a2b0e31a267f3224d69049b753371ba68bcc5c0d46f - image: ghcr.io/adobe/koperator/kafka:2.13-3.9.1 - imageID: ghcr.io/adobe/koperator/kafka@sha256:279358e7bc789aba1e3457421e251278ec993a57d0ad2b9691274f1f3ddae134 - lastState: {} - name: kafka - ready: true - resources: - limits: - cpu: 1500m - memory: 3Gi - requests: - cpu: "1" - memory: 2Gi - restartCount: 0 - started: true - state: - running: - startedAt: "2026-06-05T15:43:53Z" - user: - linux: - gid: 0 - supplementalGroups: - - 0 - uid: 0 - volumeMounts: - - mountPath: /config - name: broker-config - - mountPath: /var/run/wait - name: exitfile - - mountPath: /opt/kafka/libs/extensions - name: extensions - - mountPath: /opt/jmx-exporter/ - name: jmx-jar-data - - mountPath: /kafka-logs - name: kafka-data-0 - - mountPath: /etc/jmx-exporter/ - name: kafka-kafka-jmx-exporter - - mountPath: /var/run/secrets/kubernetes.io/serviceaccount - name: kube-api-access-6chcq - readOnly: true - recursiveReadOnly: Disabled - hostIP: 172.18.0.2 - hostIPs: - - ip: 172.18.0.2 - initContainerStatuses: - - allocatedResources: - cpu: 100m - memory: 100Mi - containerID: containerd://b46efeaa3a728981ea8fde946a252c5b744a3001d1f31ab93184e34113c02cf0 - image: docker.io/adobe/cruise-control:3.0.3-adbe-20250804 - imageID: docker.io/adobe/cruise-control@sha256:a30f376d9cdd611d531bcb0d17301daf7295471b9cd5d0070532bca6f12c6513 - lastState: {} - name: cruise-control-reporter - ready: true - resources: - limits: - cpu: 100m - memory: 100Mi - requests: - cpu: 100m - memory: 100Mi - restartCount: 0 - started: false - state: - terminated: - containerID: containerd://b46efeaa3a728981ea8fde946a252c5b744a3001d1f31ab93184e34113c02cf0 - exitCode: 0 - finishedAt: "2026-06-05T15:43:37Z" - reason: Completed - startedAt: "2026-06-05T15:43:37Z" - user: - linux: - gid: 0 - supplementalGroups: - - 0 - uid: 0 - volumeMounts: - - mountPath: /opt/kafka/libs/extensions - name: extensions - - mountPath: /var/run/secrets/kubernetes.io/serviceaccount - name: kube-api-access-6chcq - readOnly: true - recursiveReadOnly: Disabled - - allocatedResources: - cpu: 100m - memory: 100Mi - containerID: containerd://9c0848495b1a18f663da46cc1534ca8413c6d37348836f76139859213738d81c - image: ghcr.io/adobe/koperator/jmx-javaagent:1.4.0 - imageID: ghcr.io/adobe/koperator/jmx-javaagent@sha256:72f4b4d48a66d423da09d72c2313a98b047d9ae3c3febeb4776d046879c7a987 - lastState: {} - name: jmx-exporter - ready: true - resources: - limits: - cpu: 100m - memory: 100Mi - requests: - cpu: 100m - memory: 100Mi - restartCount: 0 - started: false - state: - terminated: - containerID: containerd://9c0848495b1a18f663da46cc1534ca8413c6d37348836f76139859213738d81c - exitCode: 0 - finishedAt: "2026-06-05T15:43:39Z" - reason: Completed - startedAt: "2026-06-05T15:43:39Z" - user: - linux: - gid: 0 - supplementalGroups: - - 0 - - 1 - - 2 - - 3 - - 4 - - 6 - - 10 - - 11 - - 20 - - 26 - - 27 - uid: 0 - volumeMounts: - - mountPath: /opt/jmx-exporter/ - name: jmx-jar-data - - mountPath: /var/run/secrets/kubernetes.io/serviceaccount - name: kube-api-access-6chcq - readOnly: true - recursiveReadOnly: Disabled - observedGeneration: 1 - phase: Running - podIP: 10.244.3.11 - podIPs: - - ip: 10.244.3.11 - qosClass: Burstable - startTime: "2026-06-05T15:43:23Z" diff --git a/tmp/exampleKafkacluster.yaml b/tmp/exampleKafkacluster.yaml deleted file mode 100644 index 8f2142fbc..000000000 --- a/tmp/exampleKafkacluster.yaml +++ /dev/null @@ -1,1430 +0,0 @@ -apiVersion: v1 -items: -- apiVersion: kafka.banzaicloud.io/v1beta1 - kind: KafkaCluster - metadata: - annotations: - argocd.argoproj.io/sync-options: Prune=false, Delete=false - artifact.spinnaker.io/location: ns-team-aep-pipeline-kafka-1 - artifact.spinnaker.io/name: pipeline-kafka - artifact.spinnaker.io/type: kubernetes/KafkaCluster.kafka.banzaicloud.io - artifact.spinnaker.io/version: "" - helm.sh/chart: kafka-2.1.27 - kubectl.kubernetes.io/last-applied-configuration: | - {"apiVersion":"kafka.banzaicloud.io/v1beta1","kind":"KafkaCluster","metadata":{"annotations":{"argocd.argoproj.io/sync-options":"Prune=false, Delete=false","helm.sh/chart":"kafka-2.1.27","pipeline_config_version":"dev"},"labels":{"app.kubernetes.io/instance":"pipeline-kafka","app.kubernetes.io/managed-by":"Helm","app.kubernetes.io/name":"kafka","app.kubernetes.io/version":"3.6.1","flex.ethos.corp.adobe.com/instance":"experience-platform--pipeline-kafka-deploy--ethos12-pr-ad6d62b6","pipeline_cluster":"OR1","pipeline_env":"prod"},"name":"pipeline-kafka","namespace":"ns-team-aep-pipeline-kafka-1"},"spec":{"brokerConfigGroups":{"az1":{"affinity":{"nodeAffinity":{},"podAntiAffinity":{"requiredDuringSchedulingIgnoredDuringExecution":[{"labelSelector":{"matchExpressions":[{"key":"app","operator":"In","values":["kafka"]}]},"namespaces":["ns-team-aep-pipeline-kafka-1","ns-team-aep-pipeline-kafka-2","ns-team-aep-pipeline-kafka-3","ns-team-aep-pipeline-kafka-a1","ns-team-aep-pipeline-kafka-a2","ns-team-aep-pipeline-kafka-a3","ns-team-aep-pipeline-kafka-a5"],"topologyKey":"kubernetes.io/hostname"}]}},"brokerAnnotations":{"arc.ethos.adobe.net/ignore":"true","broker_group":"az1","cluster-autoscaler.kubernetes.io/safe-to-evict":"false","io.kubernetes.cri-o.LinkLogs":"logging-volume"},"containers":[{"env":[{"name":"SPLUNK_HOST","value":"splunk-hec-relay.loc.adobe.net"},{"name":"SPLUNK_PORT","value":"8088"},{"name":"SPLUNK_INDEX","value":"plat_app_prod"},{"name":"SPLUNK_TOKEN","valueFrom":{"secretKeyRef":{"key":"token","name":"splunk-token"}}},{"name":"SPLUNK_SOURCETYPE","value":"log4j"},{"name":"POD_UID_FLUENT_BIT","valueFrom":{"fieldRef":{"fieldPath":"metadata.uid"}}},{"name":"POD_NAME","valueFrom":{"fieldRef":{"fieldPath":"metadata.name"}}},{"name":"POD_NAMESPACE","valueFrom":{"fieldRef":{"fieldPath":"metadata.namespace"}}},{"name":"POD_IP","valueFrom":{"fieldRef":{"fieldPath":"status.podIP"}}},{"name":"NODE_NAME","valueFrom":{"fieldRef":{"fieldPath":"spec.nodeName"}}},{"name":"NODE_IP","valueFrom":{"fieldRef":{"fieldPath":"status.hostIP"}}},{"name":"LOG_PARSER","value":"docker"},{"name":"POD_ENV","value":"prod"},{"name":"POD_CLUSTER","value":"OR1"}],"image":"docker-k8s-infrastructure-public-release.dr-uw2.adobeitc.com/ethos/ethos-fluent-bit:3.2.1.1-ethos","name":"fluent-bit","ports":[{"containerPort":2020,"name":"fb-metrics","protocol":"TCP"}],"resources":{"limits":{"cpu":"100m","memory":"256Mi"},"requests":{"cpu":"100m","memory":"256Mi"}},"volumeMounts":[{"mountPath":"/logging-volume","mountPropagation":"HostToContainer","name":"logging-volume"},{"mountPath":"/var/fluent-bit","name":"fluent-data"},{"mountPath":"/fluent-bit/etc","name":"fluent-bit-config"}]}],"initContainers":[{"command":["cp","-r","/pipeline/kafka-libs/.","/opt/kafka/libs/extensions/"],"image":"docker-pipeline-upstream-mirror.dr-uw2.adobeitc.com/broker-libs-injector:0.1.10","imagePullPolicy":"IfNotPresent","name":"broker-libs-injector","resources":{"limits":{"cpu":"100m","memory":"100Mi"},"requests":{"cpu":"100m","memory":"100Mi"}},"volumeMounts":[{"mountPath":"/opt/kafka/libs/extensions","name":"extensions"}]}],"kafkaHeapOpts":"-XX:InitialRAMPercentage=30 -XX:MaxRAMPercentage=70 -XX:MinRAMPercentage=70","kafkaJvmPerfOpts":"-server -XX:+UseG1GC -XX:MaxGCPauseMillis=20 -XX:InitiatingHeapOccupancyPercent=35 -XX:+ExplicitGCInvokesConcurrent -XX:MetaspaceSize=96m -XX:G1HeapRegionSize=16M -XX:MinMetaspaceFreeRatio=50 -XX:MaxMetaspaceFreeRatio=80 -Djava.awt.headless=true -Dsun.net.inetaddr.ttl=60 -Dcom.sun.management.jmxremote.port=1090 -Dcom.sun.management.jmxremote.rmi.port=1090 -Dcom.sun.management.jmxremote.local.only=false -Djava.rmi.server.hostname=127.0.0.1 -Djute.maxbuffer=0x9fffff","log4jConfig":"log4j.rootLogger=INFO, stdout\n\nlog4j.appender.stdout=org.apache.log4j.ConsoleAppender\nlog4j.appender.stdout.layout=org.apache.log4j.PatternLayout\nlog4j.appender.stdout.layout.ConversionPattern=[%d] %p %m (%c)%n\n\n# Change the line below to adjust ZK client logging\nlog4j.logger.org.apache.zookeeper=INFO\n\n# Change the two lines below to adjust the general broker logging level (output to server.log and stdout)\nlog4j.logger.kafka=INFO\nlog4j.logger.org.apache.kafka=INFO\n\n# Change to DEBUG or TRACE to enable request logging\nlog4j.logger.kafka.request.logger=WARN\n\n# Uncomment the lines below and change log4j.logger.kafka.network.RequestChannel$ to TRACE for additional output\n# related to the handling of requests\n#log4j.logger.kafka.network.Processor=TRACE, requestAppender\n#log4j.logger.kafka.server.KafkaApis=TRACE, requestAppender\n#\nlog4j.logger.kafka.network.RequestChannel$=WARN\nlog4j.logger.kafka.controller=DEBUG\nlog4j.logger.kafka.log.LogCleaner=INFO\nlog4j.logger.state.change.logger=INFO\n\n# Access denials are logged at INFO level, change to DEBUG to also log allowed accesses\nlog4j.logger.kafka.authorizer.logger=INFO\n\n# Additional logging to reduce noise\nlog4j.logger.org.apache.kafka.common.network.Selector=WARN","resourceRequirements":{"limits":{"cpu":"18","memory":"36Gi"},"requests":{"cpu":"18","memory":"36Gi"}},"serviceAccountName":"kafka-cluster","storageConfigs":[{"mountPath":"/kafka-logs1","pvcSpec":{"accessModes":["ReadWriteOnce"],"resources":{"requests":{"storage":"4Ti"}},"storageClassName":"vsphere-block-12prodor1-s1-1"}}],"tolerations":[{"effect":"NoSchedule","key":"node.kubernetes.io/pipeline-workload","operator":"Equal","value":"true"},{"effect":"NoSchedule","key":"ethos.corp.adobe.com/ethos-workload","operator":"Equal","value":"arm64"}],"volumes":[{"emptyDir":{},"name":"logging-volume"},{"emptyDir":{},"name":"fluent-data"},{"configMap":{"name":"pipeline-kafka-fluent-bit"},"name":"fluent-bit-config"}]},"az2":{"affinity":{"nodeAffinity":{},"podAntiAffinity":{"requiredDuringSchedulingIgnoredDuringExecution":[{"labelSelector":{"matchExpressions":[{"key":"app","operator":"In","values":["kafka"]}]},"namespaces":["ns-team-aep-pipeline-kafka-1","ns-team-aep-pipeline-kafka-2","ns-team-aep-pipeline-kafka-3","ns-team-aep-pipeline-kafka-a1","ns-team-aep-pipeline-kafka-a2","ns-team-aep-pipeline-kafka-a3","ns-team-aep-pipeline-kafka-a5"],"topologyKey":"kubernetes.io/hostname"}]}},"brokerAnnotations":{"arc.ethos.adobe.net/ignore":"true","broker_group":"az2","cluster-autoscaler.kubernetes.io/safe-to-evict":"false","io.kubernetes.cri-o.LinkLogs":"logging-volume"},"containers":[{"env":[{"name":"SPLUNK_HOST","value":"splunk-hec-relay.loc.adobe.net"},{"name":"SPLUNK_PORT","value":"8088"},{"name":"SPLUNK_INDEX","value":"plat_app_prod"},{"name":"SPLUNK_TOKEN","valueFrom":{"secretKeyRef":{"key":"token","name":"splunk-token"}}},{"name":"SPLUNK_SOURCETYPE","value":"log4j"},{"name":"POD_UID_FLUENT_BIT","valueFrom":{"fieldRef":{"fieldPath":"metadata.uid"}}},{"name":"POD_NAME","valueFrom":{"fieldRef":{"fieldPath":"metadata.name"}}},{"name":"POD_NAMESPACE","valueFrom":{"fieldRef":{"fieldPath":"metadata.namespace"}}},{"name":"POD_IP","valueFrom":{"fieldRef":{"fieldPath":"status.podIP"}}},{"name":"NODE_NAME","valueFrom":{"fieldRef":{"fieldPath":"spec.nodeName"}}},{"name":"NODE_IP","valueFrom":{"fieldRef":{"fieldPath":"status.hostIP"}}},{"name":"LOG_PARSER","value":"docker"},{"name":"POD_ENV","value":"prod"},{"name":"POD_CLUSTER","value":"OR1"}],"image":"docker-k8s-infrastructure-public-release.dr-uw2.adobeitc.com/ethos/ethos-fluent-bit:3.2.1.1-ethos","name":"fluent-bit","ports":[{"containerPort":2020,"name":"fb-metrics","protocol":"TCP"}],"resources":{"limits":{"cpu":"100m","memory":"256Mi"},"requests":{"cpu":"100m","memory":"256Mi"}},"volumeMounts":[{"mountPath":"/logging-volume","mountPropagation":"HostToContainer","name":"logging-volume"},{"mountPath":"/var/fluent-bit","name":"fluent-data"},{"mountPath":"/fluent-bit/etc","name":"fluent-bit-config"}]}],"initContainers":[{"command":["cp","-r","/pipeline/kafka-libs/.","/opt/kafka/libs/extensions/"],"image":"docker-pipeline-upstream-mirror.dr-uw2.adobeitc.com/broker-libs-injector:0.1.10","imagePullPolicy":"IfNotPresent","name":"broker-libs-injector","resources":{"limits":{"cpu":"100m","memory":"100Mi"},"requests":{"cpu":"100m","memory":"100Mi"}},"volumeMounts":[{"mountPath":"/opt/kafka/libs/extensions","name":"extensions"}]}],"kafkaHeapOpts":"-XX:InitialRAMPercentage=30 -XX:MaxRAMPercentage=70 -XX:MinRAMPercentage=70","kafkaJvmPerfOpts":"-server -XX:+UseG1GC -XX:MaxGCPauseMillis=20 -XX:InitiatingHeapOccupancyPercent=35 -XX:+ExplicitGCInvokesConcurrent -XX:MetaspaceSize=96m -XX:G1HeapRegionSize=16M -XX:MinMetaspaceFreeRatio=50 -XX:MaxMetaspaceFreeRatio=80 -Djava.awt.headless=true -Dsun.net.inetaddr.ttl=60 -Dcom.sun.management.jmxremote.port=1090 -Dcom.sun.management.jmxremote.rmi.port=1090 -Dcom.sun.management.jmxremote.local.only=false -Djava.rmi.server.hostname=127.0.0.1 -Djute.maxbuffer=0x9fffff","log4jConfig":"log4j.rootLogger=INFO, stdout\n\nlog4j.appender.stdout=org.apache.log4j.ConsoleAppender\nlog4j.appender.stdout.layout=org.apache.log4j.PatternLayout\nlog4j.appender.stdout.layout.ConversionPattern=[%d] %p %m (%c)%n\n\n# Change the line below to adjust ZK client logging\nlog4j.logger.org.apache.zookeeper=INFO\n\n# Change the two lines below to adjust the general broker logging level (output to server.log and stdout)\nlog4j.logger.kafka=INFO\nlog4j.logger.org.apache.kafka=INFO\n\n# Change to DEBUG or TRACE to enable request logging\nlog4j.logger.kafka.request.logger=WARN\n\n# Uncomment the lines below and change log4j.logger.kafka.network.RequestChannel$ to TRACE for additional output\n# related to the handling of requests\n#log4j.logger.kafka.network.Processor=TRACE, requestAppender\n#log4j.logger.kafka.server.KafkaApis=TRACE, requestAppender\n#\nlog4j.logger.kafka.network.RequestChannel$=WARN\nlog4j.logger.kafka.controller=DEBUG\nlog4j.logger.kafka.log.LogCleaner=INFO\nlog4j.logger.state.change.logger=INFO\n\n# Access denials are logged at INFO level, change to DEBUG to also log allowed accesses\nlog4j.logger.kafka.authorizer.logger=INFO\n\n# Additional logging to reduce noise\nlog4j.logger.org.apache.kafka.common.network.Selector=WARN","resourceRequirements":{"limits":{"cpu":"18","memory":"36Gi"},"requests":{"cpu":"18","memory":"36Gi"}},"serviceAccountName":"kafka-cluster","storageConfigs":[{"mountPath":"/kafka-logs1","pvcSpec":{"accessModes":["ReadWriteOnce"],"resources":{"requests":{"storage":"4Ti"}},"storageClassName":"vsphere-block-12prodor1-s1-2"}}],"tolerations":[{"effect":"NoSchedule","key":"node.kubernetes.io/pipeline-workload","operator":"Equal","value":"true"},{"effect":"NoSchedule","key":"ethos.corp.adobe.com/ethos-workload","operator":"Equal","value":"arm64"}],"volumes":[{"emptyDir":{},"name":"logging-volume"},{"emptyDir":{},"name":"fluent-data"},{"configMap":{"name":"pipeline-kafka-fluent-bit"},"name":"fluent-bit-config"}]},"az3":{"affinity":{"nodeAffinity":{},"podAntiAffinity":{"requiredDuringSchedulingIgnoredDuringExecution":[{"labelSelector":{"matchExpressions":[{"key":"app","operator":"In","values":["kafka"]}]},"namespaces":["ns-team-aep-pipeline-kafka-1","ns-team-aep-pipeline-kafka-2","ns-team-aep-pipeline-kafka-3","ns-team-aep-pipeline-kafka-a1","ns-team-aep-pipeline-kafka-a2","ns-team-aep-pipeline-kafka-a3","ns-team-aep-pipeline-kafka-a5"],"topologyKey":"kubernetes.io/hostname"}]}},"brokerAnnotations":{"arc.ethos.adobe.net/ignore":"true","broker_group":"az3","cluster-autoscaler.kubernetes.io/safe-to-evict":"false","io.kubernetes.cri-o.LinkLogs":"logging-volume"},"containers":[{"env":[{"name":"SPLUNK_HOST","value":"splunk-hec-relay.loc.adobe.net"},{"name":"SPLUNK_PORT","value":"8088"},{"name":"SPLUNK_INDEX","value":"plat_app_prod"},{"name":"SPLUNK_TOKEN","valueFrom":{"secretKeyRef":{"key":"token","name":"splunk-token"}}},{"name":"SPLUNK_SOURCETYPE","value":"log4j"},{"name":"POD_UID_FLUENT_BIT","valueFrom":{"fieldRef":{"fieldPath":"metadata.uid"}}},{"name":"POD_NAME","valueFrom":{"fieldRef":{"fieldPath":"metadata.name"}}},{"name":"POD_NAMESPACE","valueFrom":{"fieldRef":{"fieldPath":"metadata.namespace"}}},{"name":"POD_IP","valueFrom":{"fieldRef":{"fieldPath":"status.podIP"}}},{"name":"NODE_NAME","valueFrom":{"fieldRef":{"fieldPath":"spec.nodeName"}}},{"name":"NODE_IP","valueFrom":{"fieldRef":{"fieldPath":"status.hostIP"}}},{"name":"LOG_PARSER","value":"docker"},{"name":"POD_ENV","value":"prod"},{"name":"POD_CLUSTER","value":"OR1"}],"image":"docker-k8s-infrastructure-public-release.dr-uw2.adobeitc.com/ethos/ethos-fluent-bit:3.2.1.1-ethos","name":"fluent-bit","ports":[{"containerPort":2020,"name":"fb-metrics","protocol":"TCP"}],"resources":{"limits":{"cpu":"100m","memory":"256Mi"},"requests":{"cpu":"100m","memory":"256Mi"}},"volumeMounts":[{"mountPath":"/logging-volume","mountPropagation":"HostToContainer","name":"logging-volume"},{"mountPath":"/var/fluent-bit","name":"fluent-data"},{"mountPath":"/fluent-bit/etc","name":"fluent-bit-config"}]}],"initContainers":[{"command":["cp","-r","/pipeline/kafka-libs/.","/opt/kafka/libs/extensions/"],"image":"docker-pipeline-upstream-mirror.dr-uw2.adobeitc.com/broker-libs-injector:0.1.10","imagePullPolicy":"IfNotPresent","name":"broker-libs-injector","resources":{"limits":{"cpu":"100m","memory":"100Mi"},"requests":{"cpu":"100m","memory":"100Mi"}},"volumeMounts":[{"mountPath":"/opt/kafka/libs/extensions","name":"extensions"}]}],"kafkaHeapOpts":"-XX:InitialRAMPercentage=30 -XX:MaxRAMPercentage=70 -XX:MinRAMPercentage=70","kafkaJvmPerfOpts":"-server -XX:+UseG1GC -XX:MaxGCPauseMillis=20 -XX:InitiatingHeapOccupancyPercent=35 -XX:+ExplicitGCInvokesConcurrent -XX:MetaspaceSize=96m -XX:G1HeapRegionSize=16M -XX:MinMetaspaceFreeRatio=50 -XX:MaxMetaspaceFreeRatio=80 -Djava.awt.headless=true -Dsun.net.inetaddr.ttl=60 -Dcom.sun.management.jmxremote.port=1090 -Dcom.sun.management.jmxremote.rmi.port=1090 -Dcom.sun.management.jmxremote.local.only=false -Djava.rmi.server.hostname=127.0.0.1 -Djute.maxbuffer=0x9fffff","log4jConfig":"log4j.rootLogger=INFO, stdout\n\nlog4j.appender.stdout=org.apache.log4j.ConsoleAppender\nlog4j.appender.stdout.layout=org.apache.log4j.PatternLayout\nlog4j.appender.stdout.layout.ConversionPattern=[%d] %p %m (%c)%n\n\n# Change the line below to adjust ZK client logging\nlog4j.logger.org.apache.zookeeper=INFO\n\n# Change the two lines below to adjust the general broker logging level (output to server.log and stdout)\nlog4j.logger.kafka=INFO\nlog4j.logger.org.apache.kafka=INFO\n\n# Change to DEBUG or TRACE to enable request logging\nlog4j.logger.kafka.request.logger=WARN\n\n# Uncomment the lines below and change log4j.logger.kafka.network.RequestChannel$ to TRACE for additional output\n# related to the handling of requests\n#log4j.logger.kafka.network.Processor=TRACE, requestAppender\n#log4j.logger.kafka.server.KafkaApis=TRACE, requestAppender\n#\nlog4j.logger.kafka.network.RequestChannel$=WARN\nlog4j.logger.kafka.controller=DEBUG\nlog4j.logger.kafka.log.LogCleaner=INFO\nlog4j.logger.state.change.logger=INFO\n\n# Access denials are logged at INFO level, change to DEBUG to also log allowed accesses\nlog4j.logger.kafka.authorizer.logger=INFO\n\n# Additional logging to reduce noise\nlog4j.logger.org.apache.kafka.common.network.Selector=WARN","resourceRequirements":{"limits":{"cpu":"18","memory":"36Gi"},"requests":{"cpu":"18","memory":"36Gi"}},"serviceAccountName":"kafka-cluster","storageConfigs":[{"mountPath":"/kafka-logs1","pvcSpec":{"accessModes":["ReadWriteOnce"],"resources":{"requests":{"storage":"4Ti"}},"storageClassName":"vsphere-block-12prodor1-s1-3"}}],"tolerations":[{"effect":"NoSchedule","key":"node.kubernetes.io/pipeline-workload","operator":"Equal","value":"true"},{"effect":"NoSchedule","key":"ethos.corp.adobe.com/ethos-workload","operator":"Equal","value":"arm64"}],"volumes":[{"emptyDir":{},"name":"logging-volume"},{"emptyDir":{},"name":"fluent-data"},{"configMap":{"name":"pipeline-kafka-fluent-bit"},"name":"fluent-bit-config"}]}},"brokers":[{"brokerConfig":{"brokerIngressMapping":["ingress-az1","secureingress-az1"]},"brokerConfigGroup":"az1","id":1101,"readOnlyConfig":"broker.rack=az1\n"},{"brokerConfig":{"brokerIngressMapping":["ingress-az1","secureingress-az1"]},"brokerConfigGroup":"az1","id":1102,"readOnlyConfig":"broker.rack=az1\n"},{"brokerConfig":{"brokerIngressMapping":["ingress-az1","secureingress-az1"]},"brokerConfigGroup":"az1","id":1103,"readOnlyConfig":"broker.rack=az1\n"},{"brokerConfig":{"brokerIngressMapping":["ingress-az1","secureingress-az1"]},"brokerConfigGroup":"az1","id":1104,"readOnlyConfig":"broker.rack=az1\n"},{"brokerConfig":{"brokerIngressMapping":["ingress-az1","secureingress-az1"]},"brokerConfigGroup":"az1","id":1105,"readOnlyConfig":"broker.rack=az1\n"},{"brokerConfig":{"brokerIngressMapping":["ingress-az1","secureingress-az1"]},"brokerConfigGroup":"az1","id":1106,"readOnlyConfig":"broker.rack=az1\n"},{"brokerConfig":{"brokerIngressMapping":["ingress-az2","secureingress-az2"]},"brokerConfigGroup":"az2","id":1201,"readOnlyConfig":"broker.rack=az2\n"},{"brokerConfig":{"brokerIngressMapping":["ingress-az2","secureingress-az2"]},"brokerConfigGroup":"az2","id":1202,"readOnlyConfig":"broker.rack=az2\n"},{"brokerConfig":{"brokerIngressMapping":["ingress-az2","secureingress-az2"]},"brokerConfigGroup":"az2","id":1203,"readOnlyConfig":"broker.rack=az2\n"},{"brokerConfig":{"brokerIngressMapping":["ingress-az2","secureingress-az2"]},"brokerConfigGroup":"az2","id":1204,"readOnlyConfig":"broker.rack=az2\n"},{"brokerConfig":{"brokerIngressMapping":["ingress-az2","secureingress-az2"]},"brokerConfigGroup":"az2","id":1205,"readOnlyConfig":"broker.rack=az2\n"},{"brokerConfig":{"brokerIngressMapping":["ingress-az2","secureingress-az2"]},"brokerConfigGroup":"az2","id":1206,"readOnlyConfig":"broker.rack=az2\n"},{"brokerConfig":{"brokerIngressMapping":["ingress-az3","secureingress-az3"]},"brokerConfigGroup":"az3","id":1301,"readOnlyConfig":"broker.rack=az3\n"},{"brokerConfig":{"brokerIngressMapping":["ingress-az3","secureingress-az3"]},"brokerConfigGroup":"az3","id":1302,"readOnlyConfig":"broker.rack=az3\n"},{"brokerConfig":{"brokerIngressMapping":["ingress-az3","secureingress-az3"]},"brokerConfigGroup":"az3","id":1303,"readOnlyConfig":"broker.rack=az3\n"},{"brokerConfig":{"brokerIngressMapping":["ingress-az3","secureingress-az3"]},"brokerConfigGroup":"az3","id":1304,"readOnlyConfig":"broker.rack=az3\n"},{"brokerConfig":{"brokerIngressMapping":["ingress-az3","secureingress-az3"]},"brokerConfigGroup":"az3","id":1305,"readOnlyConfig":"broker.rack=az3\n"},{"brokerConfig":{"brokerIngressMapping":["ingress-az3","secureingress-az3"]},"brokerConfigGroup":"az3","id":1306,"readOnlyConfig":"broker.rack=az3\n"}],"clusterImage":"docker-pipeline-upstream-mirror.dr-uw2.adobeitc.com/adobe/kafka:2.13-3.6.1-1","clusterMetricsReporterImage":"docker-pipeline-upstream-mirror.dr-uw2.adobeitc.com/adobe/cruise-control:2.5.133-adbe-20240313","cruiseControlConfig":{"affinity":{"nodeAffinity":{}},"capacityConfig":"{\n \"brokerCapacities\":[\n {\n \"brokerId\": \"-1\",\n \"capacity\": {\n \"DISK\": {\"/kafka-logs1/kafka\": \"4194304\"},\n \"CPU\": {\"num.cores\": \"18\"},\n \"NW_IN\": \"900000\",\n \"NW_OUT\": \"900000\"\n },\n \"doc\": \"This is the default capacity. Capacity unit used for disk is in MB, cpu is in cores, network throughput is in KB.\"\n }\n ]\n}","clusterConfig":"{\n \"min.insync.replicas\": 2\n}","config":"\n# Configuration for the metadata client.\n# =======================================\n# The maximum interval in milliseconds between two metadata refreshes.\n#metadata.max.age.ms=300000\n# Client id for the Cruise Control. It is used for the metadata client.\n#client.id=kafka-cruise-control\n# The size of TCP send buffer bytes for the metadata client.\n#send.buffer.bytes=131072\n# The size of TCP receive buffer size for the metadata client.\n#receive.buffer.bytes=131072\n# The time to wait before disconnect an idle TCP connection.\n#connections.max.idle.ms=540000\n# The time to wait before reconnect to a given host.\n#reconnect.backoff.ms=50\n# The time to wait for a response from a host after sending a request.\n#request.timeout.ms=30000\n# The time to wait for broker logdir to respond after sending a request.\n#logdir.response.timeout.ms=10000\n# Configurations for the load monitor\n# =======================================\n# The number of metric fetcher thread to fetch metrics for the Kafka cluster\nnum.metric.fetchers=1\n# The metric sampler class\nmetric.sampler.class=com.linkedin.kafka.cruisecontrol.monitor.sampling.CruiseControlMetricsReporterSampler\n# True if the sampling process allows CPU capacity estimation of brokers used for CPU utilization estimation.\nsampling.allow.cpu.capacity.estimation=true\n# Configurations for CruiseControlMetricsReporterSampler\nmetric.reporter.topic=__CruiseControlMetrics\n# The sample store class name\nsample.store.class=com.linkedin.kafka.cruisecontrol.monitor.sampling.KafkaSampleStore\n# The config for the Kafka sample store to save the partition metric samples\npartition.metric.sample.store.topic=__KafkaCruiseControlPartitionMetricSamples\n# The config for the Kafka sample store to save the model training samples\nbroker.metric.sample.store.topic=__KafkaCruiseControlModelTrainingSamples\n# The replication factor of Kafka metric sample store topic\nsample.store.topic.replication.factor=3\npartition.sample.store.topic.partition.count=15\nbroker.sample.store.topic.partition.count=15\n# The config for the number of Kafka sample store consumer threads\nnum.sample.loading.threads=8\n# The partition assignor class for the metric samplers\nmetric.sampler.partition.assignor.class=com.linkedin.kafka.cruisecontrol.monitor.sampling.DefaultMetricSamplerPartitionAssignor\n# The metric sampling interval in milliseconds\nmetric.sampling.interval.ms=60000\n# The partition metrics window size in milliseconds\npartition.metrics.window.ms=300000\n# The number of partition metric windows to keep in memory\nnum.partition.metrics.windows=20\n# The minimum partition metric samples required for a partition in each window\nmin.samples.per.partition.metrics.window=1\n# The broker metrics window size in milliseconds\nbroker.metrics.window.ms=300000\n# The number of broker metric windows to keep in memory\n# see https://github.com/linkedin/cruise-control/issues/1149\nnum.broker.metrics.windows=20\n# The minimum broker metric samples required for a partition in each window\nmin.samples.per.broker.metrics.window=1\n# The configuration for the BrokerCapacityConfigFileResolver (supports JBOD, non-JBOD, and heterogeneous CPU core capacities)\ncapacity.config.file=config/capacity.json\n# Configurations for the analyzer\n# =======================================\n# The list of goals to optimize the Kafka cluster for with pre-computed proposals\ndefault.goals=com.linkedin.kafka.cruisecontrol.analyzer.goals.MinTopicLeadersPerBrokerGoal,\\\n com.linkedin.kafka.cruisecontrol.analyzer.goals.RackAwareDistributionGoal,\\\n com.linkedin.kafka.cruisecontrol.analyzer.goals.ReplicaCapacityGoal,\\\n com.linkedin.kafka.cruisecontrol.analyzer.goals.DiskCapacityGoal,\\\n com.linkedin.kafka.cruisecontrol.analyzer.goals.NetworkInboundCapacityGoal,\\\n com.linkedin.kafka.cruisecontrol.analyzer.goals.NetworkOutboundCapacityGoal,\\\n com.linkedin.kafka.cruisecontrol.analyzer.goals.CpuCapacityGoal,\\\n com.linkedin.kafka.cruisecontrol.analyzer.goals.DiskUsageDistributionGoal,\\\n com.linkedin.kafka.cruisecontrol.analyzer.goals.NetworkInboundUsageDistributionGoal,\\\n com.linkedin.kafka.cruisecontrol.analyzer.goals.NetworkOutboundUsageDistributionGoal,\\\n com.linkedin.kafka.cruisecontrol.analyzer.goals.CpuUsageDistributionGoal,\\\n com.linkedin.kafka.cruisecontrol.analyzer.goals.TopicReplicaDistributionGoal,\\\n com.linkedin.kafka.cruisecontrol.analyzer.goals.LeaderBytesInDistributionGoal,\\\n com.linkedin.kafka.cruisecontrol.analyzer.goals.LeaderReplicaDistributionGoal,\\\n com.linkedin.kafka.cruisecontrol.analyzer.goals.ReplicaDistributionGoal\n# The list of supported goals\ngoals=com.linkedin.kafka.cruisecontrol.analyzer.goals.MinTopicLeadersPerBrokerGoal,\\\n com.linkedin.kafka.cruisecontrol.analyzer.goals.RackAwareDistributionGoal,\\\n com.linkedin.kafka.cruisecontrol.analyzer.goals.ReplicaCapacityGoal,\\\n com.linkedin.kafka.cruisecontrol.analyzer.goals.DiskCapacityGoal,\\\n com.linkedin.kafka.cruisecontrol.analyzer.goals.NetworkInboundCapacityGoal,\\\n com.linkedin.kafka.cruisecontrol.analyzer.goals.NetworkOutboundCapacityGoal,\\\n com.linkedin.kafka.cruisecontrol.analyzer.goals.CpuCapacityGoal,\\\n com.linkedin.kafka.cruisecontrol.analyzer.goals.ReplicaDistributionGoal,\\\n com.linkedin.kafka.cruisecontrol.analyzer.goals.PotentialNwOutGoal,\\\n com.linkedin.kafka.cruisecontrol.analyzer.goals.DiskUsageDistributionGoal,\\\n com.linkedin.kafka.cruisecontrol.analyzer.goals.NetworkInboundUsageDistributionGoal,\\\n com.linkedin.kafka.cruisecontrol.analyzer.goals.NetworkOutboundUsageDistributionGoal,\\\n com.linkedin.kafka.cruisecontrol.analyzer.goals.CpuUsageDistributionGoal,\\\n com.linkedin.kafka.cruisecontrol.analyzer.goals.TopicReplicaDistributionGoal,\\\n com.linkedin.kafka.cruisecontrol.analyzer.goals.LeaderReplicaDistributionGoal,\\\n com.linkedin.kafka.cruisecontrol.analyzer.goals.LeaderBytesInDistributionGoal,\\\n com.linkedin.kafka.cruisecontrol.analyzer.kafkaassigner.KafkaAssignerDiskUsageDistributionGoal,\\\n com.linkedin.kafka.cruisecontrol.analyzer.kafkaassigner.KafkaAssignerEvenRackAwareGoal,\\\n com.linkedin.kafka.cruisecontrol.analyzer.goals.PreferredLeaderElectionGoal\n# The list of supported intra-broker goals\nintra.broker.goals=com.linkedin.kafka.cruisecontrol.analyzer.goals.IntraBrokerDiskCapacityGoal,\\\n com.linkedin.kafka.cruisecontrol.analyzer.goals.IntraBrokerDiskUsageDistributionGoal\n# The list of supported hard goals\nhard.goals=com.linkedin.kafka.cruisecontrol.analyzer.goals.RackAwareDistributionGoal,\\\n com.linkedin.kafka.cruisecontrol.analyzer.goals.ReplicaCapacityGoal,\\\n com.linkedin.kafka.cruisecontrol.analyzer.goals.DiskCapacityGoal,\\\n com.linkedin.kafka.cruisecontrol.analyzer.goals.NetworkInboundCapacityGoal,\\\n com.linkedin.kafka.cruisecontrol.analyzer.goals.NetworkOutboundCapacityGoal,\\\n com.linkedin.kafka.cruisecontrol.analyzer.goals.CpuCapacityGoal\n# The minimum percentage of well monitored partitions out of all the partitions\nmin.valid.partition.ratio=0.95\n# The balance threshold for CPU\ncpu.balance.threshold=1.3\n# The balance threshold for disk\ndisk.balance.threshold=1.1\n# The balance threshold for network inbound utilization\nnetwork.inbound.balance.threshold=1.3\n# The balance threshold for network outbound utilization\nnetwork.outbound.balance.threshold=1.3\n# The balance threshold for the replica count\nreplica.count.balance.threshold=1.2\n# The capacity threshold for CPU in percentage\ncpu.capacity.threshold=0.8\n# The capacity threshold for disk in percentage\ndisk.capacity.threshold=0.8\n# The capacity threshold for network inbound utilization in percentage\nnetwork.inbound.capacity.threshold=0.8\n# The capacity threshold for network outbound utilization in percentage\nnetwork.outbound.capacity.threshold=0.8\n# The threshold to define the cluster to be in a low CPU utilization state\ncpu.low.utilization.threshold=0.2\n# The threshold to define the cluster to be in a low disk utilization state\ndisk.low.utilization.threshold=0.2\n# The threshold to define the cluster to be in a low network inbound utilization state\nnetwork.inbound.low.utilization.threshold=0.2\n# The threshold to define the cluster to be in a low disk utilization state\nnetwork.outbound.low.utilization.threshold=0.2\n# The metric anomaly percentile upper threshold\nmetric.anomaly.percentile.upper.threshold=90.0\n# The metric anomaly percentile lower threshold\nmetric.anomaly.percentile.lower.threshold=10.0\n# How often should the cached proposal be expired and recalculated if necessary\nproposal.expiration.ms=60000\n# The maximum number of replicas that can reside on a broker at any given time.\nmax.replicas.per.broker=14000\n# The number of threads to use for proposal candidate precomputing.\nnum.proposal.precompute.threads=1\n# the topics that should be excluded from the partition movement.\n#topics.excluded.from.partition.movement=\n# the topics that should have even number of leaders distriubted across brokers\ntopics.with.min.leaders.per.broker=__consumer_offsets\n# enable dynamic min leaders per topic computation\nmin.topic.leaders.per.broker=0\n# The impact of having one level higher goal priority on the relative balancedness score.\n#goal.balancedness.priority.weight\n# The impact of strictness on the relative balancedness score.\n#goal.balancedness.strictness.weight\n# The maximum number of replicas that should reside on each broker to consider a cluster as overprovisioned after balancing its replica distribution.\noverprovisioned.max.replicas.per.broker=3000\n# Configurations for the executor\n# =======================================\n# If true, appropriate zookeeper Client { .. } entry required in jaas file located at $base_dir/config/cruise_control_jaas.conf\nzookeeper.security.enabled=false\n# The max number of partitions to move in/out on a given broker at a given time.\nnum.concurrent.partition.movements.per.broker=5\n# The max number of partitions to move between disks within a given broker at a given time.\nnum.concurrent.intra.broker.partition.movements=2\n# The max number of leadership movement within the whole cluster at a given time.\nnum.concurrent.leader.movements=1000\n# Default replica movement throttle. If not specified, movements unthrottled by default.\n# Set to 50 MBps (in Bps)\ndefault.replication.throttle=52428800\n# The interval between two execution progress checks.\nexecution.progress.check.interval.ms=10000\ndefault.replica.movement.strategies=com.linkedin.kafka.cruisecontrol.executor.strategy.PostponeUrpReplicaMovementStrategy,\\\n com.linkedin.kafka.cruisecontrol.executor.strategy.PrioritizeMinIsrWithOfflineReplicasStrategy,\\\n com.linkedin.kafka.cruisecontrol.executor.strategy.PrioritizeOneAboveMinIsrWithOfflineReplicasStrategy,\\\n com.linkedin.kafka.cruisecontrol.executor.strategy.PrioritizeLargeReplicaMovementStrategy,\\\n com.linkedin.kafka.cruisecontrol.executor.strategy.BaseReplicaMovementStrategy\n# Configurations for anomaly detector\n# =======================================\n# The goal violation notifier class\nanomaly.notifier.class=com.linkedin.kafka.cruisecontrol.detector.notifier.SelfHealingNotifier\n# The metric anomaly finder class\nmetric.anomaly.finder.class=\n# The anomaly detection interval\nanomaly.detection.interval.ms=600000\n# The goal violation to detect.\nanomaly.detection.goals=com.linkedin.kafka.cruisecontrol.analyzer.goals.MinTopicLeadersPerBrokerGoal,\\\n com.linkedin.kafka.cruisecontrol.analyzer.goals.RackAwareDistributionGoal,\\\n com.linkedin.kafka.cruisecontrol.analyzer.goals.ReplicaCapacityGoal,\\\n com.linkedin.kafka.cruisecontrol.analyzer.goals.DiskCapacityGoal,\\\n com.linkedin.kafka.cruisecontrol.analyzer.goals.NetworkInboundCapacityGoal,\\\n com.linkedin.kafka.cruisecontrol.analyzer.goals.NetworkOutboundCapacityGoal,\\\n com.linkedin.kafka.cruisecontrol.analyzer.goals.CpuCapacityGoal,\\\n com.linkedin.kafka.cruisecontrol.analyzer.goals.DiskUsageDistributionGoal,\\\n com.linkedin.kafka.cruisecontrol.analyzer.goals.LeaderReplicaDistributionGoal,\\\n com.linkedin.kafka.cruisecontrol.analyzer.goals.ReplicaDistributionGoal\nself.healing.goals=com.linkedin.kafka.cruisecontrol.analyzer.goals.MinTopicLeadersPerBrokerGoal,\\\n com.linkedin.kafka.cruisecontrol.analyzer.goals.RackAwareDistributionGoal,\\\n com.linkedin.kafka.cruisecontrol.analyzer.goals.ReplicaCapacityGoal,\\\n com.linkedin.kafka.cruisecontrol.analyzer.goals.DiskCapacityGoal,\\\n com.linkedin.kafka.cruisecontrol.analyzer.goals.NetworkInboundCapacityGoal,\\\n com.linkedin.kafka.cruisecontrol.analyzer.goals.NetworkOutboundCapacityGoal,\\\n com.linkedin.kafka.cruisecontrol.analyzer.goals.CpuCapacityGoal,\\\n com.linkedin.kafka.cruisecontrol.analyzer.goals.DiskUsageDistributionGoal,\\\n com.linkedin.kafka.cruisecontrol.analyzer.goals.LeaderReplicaDistributionGoal,\\\n com.linkedin.kafka.cruisecontrol.analyzer.goals.ReplicaDistributionGoal\n# The interested metrics for metric anomaly analyzer.\nmetric.anomaly.analyzer.metrics=BROKER_PRODUCE_LOCAL_TIME_MS_MAX,\\\n BROKER_PRODUCE_LOCAL_TIME_MS_MEAN,\\\n BROKER_CONSUMER_FETCH_LOCAL_TIME_MS_MAX,\\\n BROKER_CONSUMER_FETCH_LOCAL_TIME_MS_MEAN,\\\n BROKER_FOLLOWER_FETCH_LOCAL_TIME_MS_MAX,\\\n BROKER_FOLLOWER_FETCH_LOCAL_TIME_MS_MEAN,\\\n BROKER_LOG_FLUSH_TIME_MS_MAX,\\\n BROKER_LOG_FLUSH_TIME_MS_MEAN\n# True if recently demoted brokers are excluded from optimizations during broker failure self healing, false otherwise\nbroker.failure.exclude.recently.demoted.brokers=true\n# True if recently removed brokers are excluded from optimizations during broker failure self healing, false otherwise\nbroker.failure.exclude.recently.removed.brokers=true\n# True if recently demoted brokers are excluded from optimizations during goal violation self healing, false otherwise\ngoal.violation.exclude.recently.demoted.brokers=true\n# True if recently removed brokers are excluded from optimizations during goal violation self healing, false otherwise\ngoal.violation.exclude.recently.removed.brokers=true\n# The file path to store the failed broker list.\n# This is to persist the broker failure time in case Cruise Control failed and restarted when some brokers are down.\nfailed.brokers.file.path=failedBrokers.txt\n# Topic config provider class\ntopic.config.provider.class=com.linkedin.kafka.cruisecontrol.config.KafkaAdminTopicConfigProvider\n# The cluster configurations for the TopicConfigProvider\ncluster.configs.file=config/clusterConfigs.json\n# The maximum time in milliseconds to store the response and access details of a completed kafka monitoring user task.\ncompleted.kafka.monitor.user.task.retention.time.ms=86400000\n# The maximum time in milliseconds to store the response and access details of a completed cruise control monitoring user task.\ncompleted.cruise.control.monitor.user.task.retention.time.ms=86400000\n# The maximum time in milliseconds to store the response and access details of a completed kafka admin user task.\ncompleted.kafka.admin.user.task.retention.time.ms=604800000\n# The maximum time in milliseconds to store the response and access details of a completed cruise control admin user task.\ncompleted.cruise.control.admin.user.task.retention.time.ms=604800000\n# The fallback maximum time in milliseconds to store the response and access details of a completed user task.\ncompleted.user.task.retention.time.ms=86400000\n# The maximum time in milliseconds to retain the demotion history of brokers.\ndemotion.history.retention.time.ms=900000\n# The maximum time in milliseconds to retain the removal history of brokers.\nremoval.history.retention.time.ms=900000\n# The maximum number of completed kafka monitoring user tasks for which the response and access details will be cached.\nmax.cached.completed.kafka.monitor.user.tasks=20\n# The maximum number of completed cruise control monitoring user tasks for which the response and access details will be cached.\nmax.cached.completed.cruise.control.monitor.user.tasks=20\n# The maximum number of completed kafka admin user tasks for which the response and access details will be cached.\nmax.cached.completed.kafka.admin.user.tasks=30\n# The maximum number of completed cruise control admin user tasks for which the response and access details will be cached.\nmax.cached.completed.cruise.control.admin.user.tasks=30\n# The fallback maximum number of completed user tasks of certain type for which the response and access details will be cached.\nmax.cached.completed.user.tasks=25\n# The maximum number of user tasks for concurrently running in async endpoints across all users.\nmax.active.user.tasks=1000\n# Enable self healing for all anomaly detectors, unless the particular anomaly detector is explicitly disabled\nself.healing.enabled=true\n# Enable self healing for broker failure detector\n#self.healing.broker.failure.enabled=true\n# Enable self healing for goal violation detector\n#self.healing.goal.violation.enabled=true\n# Enable self healing for metric anomaly detector\nself.healing.metric.anomaly.enabled=false\n# Enable self healing for disk failure detector\n#self.healing.disk.failure.enabled=true\n# Use the Kafka API to detect broker failures (and not old ZK interface)\nkafka.broker.failure.detection.enable=true\n# Defines the threshold to mark a broker as dead. If a non-empty broker leaves the cluster at time T and did not join\n# the cluster before T + broker.failure.alert.threshold.ms, the broker is defined as dead broker since T.\n# An alert will be triggered in this case.\n# Set to 15 minutes\nbroker.failure.alert.threshold.ms=900000\n# If self-healing is enabled and a broker is dead at T,\n# self-healing will be triggered at T + broker.failure.self.healing.threshold.ms.\n# Set to 90 minutes\nbroker.failure.self.healing.threshold.ms=5400000\n# The multiplier applied to the threshold of distribution goals used by goal.violation.detector.\n#goal.violation.distribution.threshold.multiplier=2.50\n# The flag to indicate whether use of provisioner is enabled\nprovisioner.enable=false\n# configurations for the webserver\n# ================================\n# HTTP listen port\nwebserver.http.port=9090\n# HTTP listen address\nwebserver.http.address=0.0.0.0\n# Whether CORS support is enabled for API or not\nwebserver.http.cors.enabled=false\n# Value for Access-Control-Allow-Origin\nwebserver.http.cors.origin=http://localhost:8080/\n# Value for Access-Control-Request-Method\nwebserver.http.cors.allowmethods=OPTIONS,GET,POST\n# Headers that should be exposed to the Browser (Webapp)\n# This is a special header that is used by the\n# User Tasks subsystem and should be explicitly\n# Enabled when CORS mode is used as part of the\n# Admin Interface\nwebserver.http.cors.exposeheaders=User-Task-ID\n# REST API default prefix\n# (dont forget the ending *)\nwebserver.api.urlprefix=/kafkacruisecontrol/*\n# Location where the Cruise Control frontend is deployed\nwebserver.ui.diskpath=./cruise-control-ui/dist/\n# URL path prefix for UI\n# (dont forget the ending *)\nwebserver.ui.urlprefix=/*\n# Time After which request is converted to Async\nwebserver.request.maxBlockTimeMs=10000\n# Default Session Expiry Period\nwebserver.session.maxExpiryTimeMs=60000\n# Session cookie path\nwebserver.session.path=/\n# Server Access Logs\nwebserver.accesslog.enabled=true\n# Configurations for servlet\n# ==========================\n# Enable two-step verification for processing POST requests.\ntwo.step.verification.enabled=false\n# The maximum time in milliseconds to retain the requests in two-step (verification) purgatory.\ntwo.step.purgatory.retention.time.ms=1209600000\n# The maximum number of requests in two-step (verification) purgatory.\ntwo.step.purgatory.max.requests=25\n\ndefault.replication.throttle=20971520\ndisk.balance.threshold=1.05\nmax.replicas.per.broker=20000\nnum.concurrent.partition.movements.per.broker=5\ntopics.with.min.leaders.per.broker=__consumer_offsets|mccs_push_notifications_feedback|triggers|aliases|aliases-realtime|profiles","cruiseControlAnnotations":{"arc.ethos.adobe.net/ignore":"true"},"cruiseControlTaskSpec":{"RetryDurationMinutes":2147483647},"image":"docker-pipeline-upstream-mirror.dr-uw2.adobeitc.com/adobe/cruise-control:2.5.133-adbe-20240313","log4jConfig":"rootLogger.level=INFO\nappenders=console\nappender.console.type=Console\nappender.console.name=STDOUT\nappender.console.layout.type=PatternLayout\nappender.console.layout.pattern=[%d] %p %replace{%msg}{[\\r\\n]}{|} %throwable{separator(|)}(%c{2})%n\nrootLogger.appenderRefs=console\nrootLogger.appenderRef.console.ref=STDOUT","resourceRequirements":{"limits":{"cpu":"4","memory":"2Gi"},"requests":{"cpu":"4","memory":"2Gi"}},"serviceAccountName":"kafka-cluster","tolerations":[{"effect":"NoSchedule","key":"node.kubernetes.io/pipeline-workload","operator":"Equal","value":"true"},{"effect":"NoSchedule","key":"ethos.corp.adobe.com/ethos-workload","operator":"Equal","value":"arm64"}],"topicConfig":{"partitions":6,"replicationFactor":3}},"disruptionBudget":{"budget":"1","create":true},"envoyConfig":{"affinity":{"nodeAffinity":{},"podAffinity":{"preferredDuringSchedulingIgnoredDuringExecution":[{"podAffinityTerm":{"labelSelector":{"matchExpressions":[{"key":"app","operator":"In","values":["kafka"]}]},"topologyKey":"kubernetes.io/hostname"},"weight":1}]},"podAntiAffinity":{"requiredDuringSchedulingIgnoredDuringExecution":[{"labelSelector":{"matchExpressions":[{"key":"eListenerName","operator":"In","values":["plaintext-ingress-az1","plaintext-ingress-az2","plaintext-ingress-az3","external-ingress-az1","external-ingress-az2","external-ingress-az3"]}]},"topologyKey":"kubernetes.io/hostname"}]}},"annotations":{"arc.ethos.adobe.net/ignore":"true","ops/certVersion":"12"},"disruptionBudget":{"budget":"25%","create":true,"strategy":"maxUnavailable"},"envoyCommandLineArgs":{"concurrency":2},"image":"docker-pipeline-upstream-mirror.dr-uw2.adobeitc.com/envoyproxy/envoy:v1.36.2","replicas":6,"resourceRequirements":{"limits":{"cpu":"1","memory":"1Gi"},"requests":{"cpu":"1","memory":"1Gi"}},"serviceAccountName":"kafka-cluster","tolerations":[{"effect":"NoSchedule","key":"node.kubernetes.io/pipeline-workload","operator":"Equal","value":"true"},{"effect":"NoSchedule","key":"ethos.corp.adobe.com/ethos-workload","operator":"Equal","value":"arm64"}],"topologySpreadConstraints":[{"labelSelector":{"matchLabels":{"app":"envoyingress"}},"maxSkew":1,"topologyKey":"kubernetes.io/hostname","whenUnsatisfiable":"ScheduleAnyway"}]},"envs":[{"name":"POD_UID","valueFrom":{"fieldRef":{"fieldPath":"metadata.uid"}}}],"headlessServiceEnabled":true,"ingressController":"envoy","listenersConfig":{"externalListeners":[{"accessMethod":"LoadBalancer","anyCastPort":9096,"config":{"defaultIngressConfig":"","ingressConfig":{"ingress-az1":{"envoyConfig":{"annotations":{"broker_group":"az1"}},"hostnameOverride":"kafka-1-az1-or1.prd.pipeline.adobedc.net","serviceType":"ClusterIP"},"ingress-az2":{"envoyConfig":{"annotations":{"broker_group":"az2"}},"hostnameOverride":"kafka-1-az2-or1.prd.pipeline.adobedc.net","serviceType":"ClusterIP"},"ingress-az3":{"envoyConfig":{"annotations":{"broker_group":"az3"}},"hostnameOverride":"kafka-1-az3-or1.prd.pipeline.adobedc.net","serviceType":"ClusterIP"}}},"containerPort":29094,"externalStartingPort":8000,"name":"plaintext","type":"plaintext","usedForInnerBrokerCommunication":false},{"accessMethod":"LoadBalancer","anyCastPort":9097,"config":{"defaultIngressConfig":"","ingressConfig":{"secureingress-az1":{"envoyConfig":{"affinity":{"podAffinity":{"preferredDuringSchedulingIgnoredDuringExecution":[{"podAffinityTerm":{"labelSelector":{"matchExpressions":[{"key":"app","operator":"In","values":["kafka"]}]},"topologyKey":"kubernetes.io/hostname"},"weight":1}]},"podAntiAffinity":{"requiredDuringSchedulingIgnoredDuringExecution":[{"labelSelector":{"matchExpressions":[{"key":"eListenerName","operator":"In","values":["secure-secureingress-az1","secure-secureingress-az2","secure-secureingress-az3"]}]},"topologyKey":"kubernetes.io/hostname"}]}},"annotations":{"broker_group":"az1"},"brokerHostnameTemplate":"kafka-1-%id-or1.prd.pipeline.adobedc.net"},"hostnameOverride":"kafka-1-az1-or1-secure.prd.pipeline.adobedc.net","serviceType":"ClusterIP"},"secureingress-az2":{"envoyConfig":{"affinity":{"podAffinity":{"preferredDuringSchedulingIgnoredDuringExecution":[{"podAffinityTerm":{"labelSelector":{"matchExpressions":[{"key":"app","operator":"In","values":["kafka"]}]},"topologyKey":"kubernetes.io/hostname"},"weight":1}]},"podAntiAffinity":{"requiredDuringSchedulingIgnoredDuringExecution":[{"labelSelector":{"matchExpressions":[{"key":"eListenerName","operator":"In","values":["secure-secureingress-az1","secure-secureingress-az2","secure-secureingress-az3"]}]},"topologyKey":"kubernetes.io/hostname"}]}},"annotations":{"broker_group":"az2"},"brokerHostnameTemplate":"kafka-1-%id-or1.prd.pipeline.adobedc.net"},"hostnameOverride":"kafka-1-az2-or1-secure.prd.pipeline.adobedc.net","serviceType":"ClusterIP"},"secureingress-az3":{"envoyConfig":{"affinity":{"podAffinity":{"preferredDuringSchedulingIgnoredDuringExecution":[{"podAffinityTerm":{"labelSelector":{"matchExpressions":[{"key":"app","operator":"In","values":["kafka"]}]},"topologyKey":"kubernetes.io/hostname"},"weight":1}]},"podAntiAffinity":{"requiredDuringSchedulingIgnoredDuringExecution":[{"labelSelector":{"matchExpressions":[{"key":"eListenerName","operator":"In","values":["secure-secureingress-az1","secure-secureingress-az2","secure-secureingress-az3"]}]},"topologyKey":"kubernetes.io/hostname"}]}},"annotations":{"broker_group":"az3"},"brokerHostnameTemplate":"kafka-1-%id-or1.prd.pipeline.adobedc.net"},"hostnameOverride":"kafka-1-az3-or1-secure.prd.pipeline.adobedc.net","serviceType":"ClusterIP"}}},"containerPort":29095,"externalStartingPort":-1,"name":"secure","tlsSecretName":"prod-adobedc-net-tls","type":"sasl_plaintext","usedForInnerBrokerCommunication":false}],"internalListeners":[{"containerPort":29092,"internalStartingPort":0,"name":"internal","type":"plaintext","usedForInnerBrokerCommunication":true},{"containerPort":29093,"internalStartingPort":0,"name":"controller","type":"plaintext","usedForControllerCommunication":true,"usedForInnerBrokerCommunication":false},{"containerPort":29096,"internalStartingPort":0,"name":"sasl_plain","type":"sasl_plaintext","usedForInnerBrokerCommunication":false}]},"monitoringConfig":{"jmxImage":"docker-pipeline-upstream-mirror.dr-uw2.adobeitc.com/amuraru/jmx-javaagent:0.19.1-multi","pathToJar":"/jmx_prometheus_javaagent.jar"},"oneBrokerPerNode":true,"propagateLabels":true,"readOnlyConfig":"__do_no_edit_diskSize=4194304\nauthorizer.class.name=com.adobe.core.pipeline.kafka.security.server.auth.CustomAclAuthorizerWithAccessTrackingMetrics\nauto.create.topics.enable=false\nauto.leader.rebalance.enable=true\nbackground.threads=20\nbroker.id.generation.enable=false\ncruise.control.metrics.reporter.acks=1\ncruise.control.metrics.topic=__CruiseControlMetrics\ncruise.control.metrics.topic.min.insync.replicas=1\ndefault.replication.factor=3\ninter.broker.protocol.version=3.6\nlistener.name.sasl_plain.oauthbearer.sasl.jaas.config=org.apache.kafka.common.security.oauthbearer.OAuthBearerLoginModule required;\nlistener.name.sasl_plain.oauthbearer.sasl.server.callback.handler.class=com.adobe.core.pipeline.kafka.security.server.auth.ImsValidatingCallbackHandler\nlistener.name.secure.oauthbearer.sasl.jaas.config=org.apache.kafka.common.security.oauthbearer.OAuthBearerLoginModule required;\nlistener.name.secure.oauthbearer.sasl.server.callback.handler.class=com.adobe.core.pipeline.kafka.security.server.auth.ImsValidatingCallbackHandler\nlog.message.format.version=0.11.0\nlog.message.timestamp.after.max.ms=86400000\nlog.segment.bytes=536870912\nmax.incremental.fetch.session.cache.slots=1000\nmin.insync.replicas=2\nnum.io.threads=144\nnum.network.threads=120\nnum.partitions=10\nnum.recovery.threads.per.data.dir=8\nnum.replica.fetchers=4\noffsets.commit.required.acks=1\nprincipal.builder.class=com.adobe.core.pipeline.kafka.security.server.auth.PipelinePrincipalBuilder\nqueued.max.requests=1000\nreplica.fetch.max.bytes=5242880\nreplica.lag.time.max.ms=15000\nreplica.socket.receive.buffer.bytes=-1\nsasl.enabled.mechanisms=OAUTHBEARER\nsasl.ims.certificate.location=static.adobelogin.com/keys/prod/\nsasl.ims.url=https://ims-na1.adobelogin.com/\nsocket.listen.backlog.size=1024\nsocket.receive.buffer.bytes=-1\nsocket.send.buffer.bytes=-1\nsuper.users=Broker:ANONYMOUS\nzookeeper.connection.timeout.ms=18000","rollingUpgradeConfig":{"concurrentBrokerRestartCountPerRack":2,"failureThreshold":2},"taintedBrokersSelector":{"matchExpressions":[{"key":"shredder.ethos.adobe.net/upgrade-status","operator":"In","values":["parked"]}]},"zkAddresses":["pipeline-zookeeper-client:2181"],"zkPath":"/kafka"}} - moniker.spinnaker.io/application: pipelinefab - moniker.spinnaker.io/cluster: KafkaCluster.kafka.banzaicloud.io pipeline-kafka - pipeline_config_version: dev - strategy.spinnaker.io/replace: "true" - creationTimestamp: "2021-08-25T10:02:37Z" - finalizers: - - finalizer.kafkaclusters.kafka.banzaicloud.io - - topics.kafkaclusters.kafka.banzaicloud.io - - users.kafkaclusters.kafka.banzaicloud.io - generation: 109 - labels: - app.kubernetes.io/instance: pipeline-kafka - app.kubernetes.io/managed-by: Helm - app.kubernetes.io/name: kafka - app.kubernetes.io/version: 3.6.1 - flex.ethos.corp.adobe.com/instance: experience-platform--pipeline-kafka-deploy--ethos12-pr-ad6d62b6 - pipeline_cluster: OR1 - pipeline_env: prod - name: pipeline-kafka - namespace: ns-team-aep-pipeline-kafka-1 - resourceVersion: "13499091137" - uid: 16c37145-142f-4579-9116-fce1a1cf62d2 - spec: - brokerConfigGroups: - az1: - affinity: - nodeAffinity: {} - podAntiAffinity: - requiredDuringSchedulingIgnoredDuringExecution: - - labelSelector: - matchExpressions: - - key: app - operator: In - values: - - kafka - namespaces: - - ns-team-aep-pipeline-kafka-1 - - ns-team-aep-pipeline-kafka-2 - - ns-team-aep-pipeline-kafka-3 - - ns-team-aep-pipeline-kafka-a1 - - ns-team-aep-pipeline-kafka-a2 - - ns-team-aep-pipeline-kafka-a3 - - ns-team-aep-pipeline-kafka-a5 - topologyKey: kubernetes.io/hostname - brokerAnnotations: - arc.ethos.adobe.net/ignore: "true" - broker_group: az1 - cluster-autoscaler.kubernetes.io/safe-to-evict: "false" - io.kubernetes.cri-o.LinkLogs: logging-volume - containers: - - env: - - name: SPLUNK_HOST - value: splunk-hec-relay.loc.adobe.net - - name: SPLUNK_PORT - value: "8088" - - name: SPLUNK_INDEX - value: plat_app_prod - - name: SPLUNK_TOKEN - valueFrom: - secretKeyRef: - key: token - name: splunk-token - - name: SPLUNK_SOURCETYPE - value: log4j - - name: POD_UID_FLUENT_BIT - valueFrom: - fieldRef: - fieldPath: metadata.uid - - name: POD_NAME - valueFrom: - fieldRef: - fieldPath: metadata.name - - name: POD_NAMESPACE - valueFrom: - fieldRef: - fieldPath: metadata.namespace - - name: POD_IP - valueFrom: - fieldRef: - fieldPath: status.podIP - - name: NODE_NAME - valueFrom: - fieldRef: - fieldPath: spec.nodeName - - name: NODE_IP - valueFrom: - fieldRef: - fieldPath: status.hostIP - - name: LOG_PARSER - value: docker - - name: POD_ENV - value: prod - - name: POD_CLUSTER - value: OR1 - image: docker-k8s-infrastructure-public-release.dr-uw2.adobeitc.com/ethos/ethos-fluent-bit:3.2.1.1-ethos - name: fluent-bit - ports: - - containerPort: 2020 - name: fb-metrics - protocol: TCP - resources: - limits: - cpu: 100m - memory: 256Mi - requests: - cpu: 100m - memory: 256Mi - volumeMounts: - - mountPath: /logging-volume - mountPropagation: HostToContainer - name: logging-volume - - mountPath: /var/fluent-bit - name: fluent-data - - mountPath: /fluent-bit/etc - name: fluent-bit-config - initContainers: - - command: - - cp - - -r - - /pipeline/kafka-libs/. - - /opt/kafka/libs/extensions/ - image: docker-pipeline-upstream-mirror.dr-uw2.adobeitc.com/broker-libs-injector:0.1.10 - imagePullPolicy: IfNotPresent - name: broker-libs-injector - resources: - limits: - cpu: 100m - memory: 100Mi - requests: - cpu: 100m - memory: 100Mi - volumeMounts: - - mountPath: /opt/kafka/libs/extensions - name: extensions - kafkaHeapOpts: -XX:InitialRAMPercentage=30 -XX:MaxRAMPercentage=70 -XX:MinRAMPercentage=70 - kafkaJvmPerfOpts: -server -XX:+UseG1GC -XX:MaxGCPauseMillis=20 -XX:InitiatingHeapOccupancyPercent=35 - -XX:+ExplicitGCInvokesConcurrent -XX:MetaspaceSize=96m -XX:G1HeapRegionSize=16M - -XX:MinMetaspaceFreeRatio=50 -XX:MaxMetaspaceFreeRatio=80 -Djava.awt.headless=true - -Dsun.net.inetaddr.ttl=60 -Dcom.sun.management.jmxremote.port=1090 -Dcom.sun.management.jmxremote.rmi.port=1090 - -Dcom.sun.management.jmxremote.local.only=false -Djava.rmi.server.hostname=127.0.0.1 - -Djute.maxbuffer=0x9fffff - log4jConfig: |- - log4j.rootLogger=INFO, stdout - - log4j.appender.stdout=org.apache.log4j.ConsoleAppender - log4j.appender.stdout.layout=org.apache.log4j.PatternLayout - log4j.appender.stdout.layout.ConversionPattern=[%d] %p %m (%c)%n - - # Change the line below to adjust ZK client logging - log4j.logger.org.apache.zookeeper=INFO - - # Change the two lines below to adjust the general broker logging level (output to server.log and stdout) - log4j.logger.kafka=INFO - log4j.logger.org.apache.kafka=INFO - - # Change to DEBUG or TRACE to enable request logging - log4j.logger.kafka.request.logger=WARN - - # Uncomment the lines below and change log4j.logger.kafka.network.RequestChannel$ to TRACE for additional output - # related to the handling of requests - #log4j.logger.kafka.network.Processor=TRACE, requestAppender - #log4j.logger.kafka.server.KafkaApis=TRACE, requestAppender - # - log4j.logger.kafka.network.RequestChannel$=WARN - log4j.logger.kafka.controller=DEBUG - log4j.logger.kafka.log.LogCleaner=INFO - log4j.logger.state.change.logger=INFO - - # Access denials are logged at INFO level, change to DEBUG to also log allowed accesses - log4j.logger.kafka.authorizer.logger=INFO - - # Additional logging to reduce noise - log4j.logger.org.apache.kafka.common.network.Selector=WARN - resourceRequirements: - limits: - cpu: "18" - memory: 36Gi - requests: - cpu: "18" - memory: 36Gi - serviceAccountName: kafka-cluster - storageConfigs: - - mountPath: /kafka-logs1 - pvcSpec: - accessModes: - - ReadWriteOnce - resources: - requests: - storage: 4Ti - storageClassName: vsphere-block-12prodor1-s1-1 - terminationGracePeriodSeconds: 120 - tolerations: - - effect: NoSchedule - key: node.kubernetes.io/pipeline-workload - operator: Equal - value: "true" - - effect: NoSchedule - key: ethos.corp.adobe.com/ethos-workload - operator: Equal - value: arm64 - volumes: - - emptyDir: {} - name: logging-volume - - emptyDir: {} - name: fluent-data - - configMap: - name: pipeline-kafka-fluent-bit - name: fluent-bit-config - az2: - affinity: - nodeAffinity: {} - podAntiAffinity: - requiredDuringSchedulingIgnoredDuringExecution: - - labelSelector: - matchExpressions: - - key: app - operator: In - values: - - kafka - namespaces: - - ns-team-aep-pipeline-kafka-1 - - ns-team-aep-pipeline-kafka-2 - - ns-team-aep-pipeline-kafka-3 - - ns-team-aep-pipeline-kafka-a1 - - ns-team-aep-pipeline-kafka-a2 - - ns-team-aep-pipeline-kafka-a3 - - ns-team-aep-pipeline-kafka-a5 - topologyKey: kubernetes.io/hostname - brokerAnnotations: - arc.ethos.adobe.net/ignore: "true" - broker_group: az2 - cluster-autoscaler.kubernetes.io/safe-to-evict: "false" - io.kubernetes.cri-o.LinkLogs: logging-volume - containers: - - env: - - name: SPLUNK_HOST - value: splunk-hec-relay.loc.adobe.net - - name: SPLUNK_PORT - value: "8088" - - name: SPLUNK_INDEX - value: plat_app_prod - - name: SPLUNK_TOKEN - valueFrom: - secretKeyRef: - key: token - name: splunk-token - - name: SPLUNK_SOURCETYPE - value: log4j - - name: POD_UID_FLUENT_BIT - valueFrom: - fieldRef: - fieldPath: metadata.uid - - name: POD_NAME - valueFrom: - fieldRef: - fieldPath: metadata.name - - name: POD_NAMESPACE - valueFrom: - fieldRef: - fieldPath: metadata.namespace - - name: POD_IP - valueFrom: - fieldRef: - fieldPath: status.podIP - - name: NODE_NAME - valueFrom: - fieldRef: - fieldPath: spec.nodeName - - name: NODE_IP - valueFrom: - fieldRef: - fieldPath: status.hostIP - - name: LOG_PARSER - value: docker - - name: POD_ENV - value: prod - - name: POD_CLUSTER - value: OR1 - image: docker-k8s-infrastructure-public-release.dr-uw2.adobeitc.com/ethos/ethos-fluent-bit:3.2.1.1-ethos - name: fluent-bit - ports: - - containerPort: 2020 - name: fb-metrics - protocol: TCP - resources: - limits: - cpu: 100m - memory: 256Mi - requests: - cpu: 100m - memory: 256Mi - volumeMounts: - - mountPath: /logging-volume - mountPropagation: HostToContainer - name: logging-volume - - mountPath: /var/fluent-bit - name: fluent-data - - mountPath: /fluent-bit/etc - name: fluent-bit-config - initContainers: - - command: - - cp - - -r - - /pipeline/kafka-libs/. - - /opt/kafka/libs/extensions/ - image: docker-pipeline-upstream-mirror.dr-uw2.adobeitc.com/broker-libs-injector:0.1.10 - imagePullPolicy: IfNotPresent - name: broker-libs-injector - resources: - limits: - cpu: 100m - memory: 100Mi - requests: - cpu: 100m - memory: 100Mi - volumeMounts: - - mountPath: /opt/kafka/libs/extensions - name: extensions - kafkaHeapOpts: -XX:InitialRAMPercentage=30 -XX:MaxRAMPercentage=70 -XX:MinRAMPercentage=70 - kafkaJvmPerfOpts: -server -XX:+UseG1GC -XX:MaxGCPauseMillis=20 -XX:InitiatingHeapOccupancyPercent=35 - -XX:+ExplicitGCInvokesConcurrent -XX:MetaspaceSize=96m -XX:G1HeapRegionSize=16M - -XX:MinMetaspaceFreeRatio=50 -XX:MaxMetaspaceFreeRatio=80 -Djava.awt.headless=true - -Dsun.net.inetaddr.ttl=60 -Dcom.sun.management.jmxremote.port=1090 -Dcom.sun.management.jmxremote.rmi.port=1090 - -Dcom.sun.management.jmxremote.local.only=false -Djava.rmi.server.hostname=127.0.0.1 - -Djute.maxbuffer=0x9fffff - log4jConfig: |- - log4j.rootLogger=INFO, stdout - - log4j.appender.stdout=org.apache.log4j.ConsoleAppender - log4j.appender.stdout.layout=org.apache.log4j.PatternLayout - log4j.appender.stdout.layout.ConversionPattern=[%d] %p %m (%c)%n - - # Change the line below to adjust ZK client logging - log4j.logger.org.apache.zookeeper=INFO - - # Change the two lines below to adjust the general broker logging level (output to server.log and stdout) - log4j.logger.kafka=INFO - log4j.logger.org.apache.kafka=INFO - - # Change to DEBUG or TRACE to enable request logging - log4j.logger.kafka.request.logger=WARN - - # Uncomment the lines below and change log4j.logger.kafka.network.RequestChannel$ to TRACE for additional output - # related to the handling of requests - #log4j.logger.kafka.network.Processor=TRACE, requestAppender - #log4j.logger.kafka.server.KafkaApis=TRACE, requestAppender - # - log4j.logger.kafka.network.RequestChannel$=WARN - log4j.logger.kafka.controller=DEBUG - log4j.logger.kafka.log.LogCleaner=INFO - log4j.logger.state.change.logger=INFO - - # Access denials are logged at INFO level, change to DEBUG to also log allowed accesses - log4j.logger.kafka.authorizer.logger=INFO - - # Additional logging to reduce noise - log4j.logger.org.apache.kafka.common.network.Selector=WARN - resourceRequirements: - limits: - cpu: "18" - memory: 36Gi - requests: - cpu: "18" - memory: 36Gi - serviceAccountName: kafka-cluster - storageConfigs: - - mountPath: /kafka-logs1 - pvcSpec: - accessModes: - - ReadWriteOnce - resources: - requests: - storage: 4Ti - storageClassName: vsphere-block-12prodor1-s1-2 - terminationGracePeriodSeconds: 120 - tolerations: - - effect: NoSchedule - key: node.kubernetes.io/pipeline-workload - operator: Equal - value: "true" - - effect: NoSchedule - key: ethos.corp.adobe.com/ethos-workload - operator: Equal - value: arm64 - volumes: - - emptyDir: {} - name: logging-volume - - emptyDir: {} - name: fluent-data - - configMap: - name: pipeline-kafka-fluent-bit - name: fluent-bit-config - az3: - affinity: - nodeAffinity: {} - podAntiAffinity: - requiredDuringSchedulingIgnoredDuringExecution: - - labelSelector: - matchExpressions: - - key: app - operator: In - values: - - kafka - namespaces: - - ns-team-aep-pipeline-kafka-1 - - ns-team-aep-pipeline-kafka-2 - - ns-team-aep-pipeline-kafka-3 - - ns-team-aep-pipeline-kafka-a1 - - ns-team-aep-pipeline-kafka-a2 - - ns-team-aep-pipeline-kafka-a3 - - ns-team-aep-pipeline-kafka-a5 - topologyKey: kubernetes.io/hostname - brokerAnnotations: - arc.ethos.adobe.net/ignore: "true" - broker_group: az3 - cluster-autoscaler.kubernetes.io/safe-to-evict: "false" - io.kubernetes.cri-o.LinkLogs: logging-volume - containers: - - env: - - name: SPLUNK_HOST - value: splunk-hec-relay.loc.adobe.net - - name: SPLUNK_PORT - value: "8088" - - name: SPLUNK_INDEX - value: plat_app_prod - - name: SPLUNK_TOKEN - valueFrom: - secretKeyRef: - key: token - name: splunk-token - - name: SPLUNK_SOURCETYPE - value: log4j - - name: POD_UID_FLUENT_BIT - valueFrom: - fieldRef: - fieldPath: metadata.uid - - name: POD_NAME - valueFrom: - fieldRef: - fieldPath: metadata.name - - name: POD_NAMESPACE - valueFrom: - fieldRef: - fieldPath: metadata.namespace - - name: POD_IP - valueFrom: - fieldRef: - fieldPath: status.podIP - - name: NODE_NAME - valueFrom: - fieldRef: - fieldPath: spec.nodeName - - name: NODE_IP - valueFrom: - fieldRef: - fieldPath: status.hostIP - - name: LOG_PARSER - value: docker - - name: POD_ENV - value: prod - - name: POD_CLUSTER - value: OR1 - image: docker-k8s-infrastructure-public-release.dr-uw2.adobeitc.com/ethos/ethos-fluent-bit:3.2.1.1-ethos - name: fluent-bit - ports: - - containerPort: 2020 - name: fb-metrics - protocol: TCP - resources: - limits: - cpu: 100m - memory: 256Mi - requests: - cpu: 100m - memory: 256Mi - volumeMounts: - - mountPath: /logging-volume - mountPropagation: HostToContainer - name: logging-volume - - mountPath: /var/fluent-bit - name: fluent-data - - mountPath: /fluent-bit/etc - name: fluent-bit-config - initContainers: - - command: - - cp - - -r - - /pipeline/kafka-libs/. - - /opt/kafka/libs/extensions/ - image: docker-pipeline-upstream-mirror.dr-uw2.adobeitc.com/broker-libs-injector:0.1.10 - imagePullPolicy: IfNotPresent - name: broker-libs-injector - resources: - limits: - cpu: 100m - memory: 100Mi - requests: - cpu: 100m - memory: 100Mi - volumeMounts: - - mountPath: /opt/kafka/libs/extensions - name: extensions - kafkaHeapOpts: -XX:InitialRAMPercentage=30 -XX:MaxRAMPercentage=70 -XX:MinRAMPercentage=70 - kafkaJvmPerfOpts: -server -XX:+UseG1GC -XX:MaxGCPauseMillis=20 -XX:InitiatingHeapOccupancyPercent=35 - -XX:+ExplicitGCInvokesConcurrent -XX:MetaspaceSize=96m -XX:G1HeapRegionSize=16M - -XX:MinMetaspaceFreeRatio=50 -XX:MaxMetaspaceFreeRatio=80 -Djava.awt.headless=true - -Dsun.net.inetaddr.ttl=60 -Dcom.sun.management.jmxremote.port=1090 -Dcom.sun.management.jmxremote.rmi.port=1090 - -Dcom.sun.management.jmxremote.local.only=false -Djava.rmi.server.hostname=127.0.0.1 - -Djute.maxbuffer=0x9fffff - log4jConfig: |- - log4j.rootLogger=INFO, stdout - - log4j.appender.stdout=org.apache.log4j.ConsoleAppender - log4j.appender.stdout.layout=org.apache.log4j.PatternLayout - log4j.appender.stdout.layout.ConversionPattern=[%d] %p %m (%c)%n - - # Change the line below to adjust ZK client logging - log4j.logger.org.apache.zookeeper=INFO - - # Change the two lines below to adjust the general broker logging level (output to server.log and stdout) - log4j.logger.kafka=INFO - log4j.logger.org.apache.kafka=INFO - - # Change to DEBUG or TRACE to enable request logging - log4j.logger.kafka.request.logger=WARN - - # Uncomment the lines below and change log4j.logger.kafka.network.RequestChannel$ to TRACE for additional output - # related to the handling of requests - #log4j.logger.kafka.network.Processor=TRACE, requestAppender - #log4j.logger.kafka.server.KafkaApis=TRACE, requestAppender - # - log4j.logger.kafka.network.RequestChannel$=WARN - log4j.logger.kafka.controller=DEBUG - log4j.logger.kafka.log.LogCleaner=INFO - log4j.logger.state.change.logger=INFO - - # Access denials are logged at INFO level, change to DEBUG to also log allowed accesses - log4j.logger.kafka.authorizer.logger=INFO - - # Additional logging to reduce noise - log4j.logger.org.apache.kafka.common.network.Selector=WARN - resourceRequirements: - limits: - cpu: "18" - memory: 36Gi - requests: - cpu: "18" - memory: 36Gi - serviceAccountName: kafka-cluster - storageConfigs: - - mountPath: /kafka-logs1 - pvcSpec: - accessModes: - - ReadWriteOnce - resources: - requests: - storage: 4Ti - storageClassName: vsphere-block-12prodor1-s1-3 - terminationGracePeriodSeconds: 120 - tolerations: - - effect: NoSchedule - key: node.kubernetes.io/pipeline-workload - operator: Equal - value: "true" - - effect: NoSchedule - key: ethos.corp.adobe.com/ethos-workload - operator: Equal - value: arm64 - volumes: - - emptyDir: {} - name: logging-volume - - emptyDir: {} - name: fluent-data - - configMap: - name: pipeline-kafka-fluent-bit - name: fluent-bit-config - brokers: - - brokerConfig: - brokerIngressMapping: - - ingress-az1 - - secureingress-az1 - terminationGracePeriodSeconds: 120 - brokerConfigGroup: az1 - id: 1101 - readOnlyConfig: | - broker.rack=az1 - - brokerConfig: - brokerIngressMapping: - - ingress-az1 - - secureingress-az1 - terminationGracePeriodSeconds: 120 - brokerConfigGroup: az1 - id: 1102 - readOnlyConfig: | - broker.rack=az1 - - brokerConfig: - brokerIngressMapping: - - ingress-az1 - - secureingress-az1 - terminationGracePeriodSeconds: 120 - brokerConfigGroup: az1 - id: 1103 - readOnlyConfig: | - broker.rack=az1 - - brokerConfig: - brokerIngressMapping: - - ingress-az1 - - secureingress-az1 - terminationGracePeriodSeconds: 120 - brokerConfigGroup: az1 - id: 1104 - readOnlyConfig: | - broker.rack=az1 - - brokerConfig: - brokerIngressMapping: - - ingress-az1 - - secureingress-az1 - terminationGracePeriodSeconds: 120 - brokerConfigGroup: az1 - id: 1105 - readOnlyConfig: | - broker.rack=az1 - - brokerConfig: - brokerIngressMapping: - - ingress-az1 - - secureingress-az1 - terminationGracePeriodSeconds: 120 - brokerConfigGroup: az1 - id: 1106 - readOnlyConfig: | - broker.rack=az1 - - brokerConfig: - brokerIngressMapping: - - ingress-az2 - - secureingress-az2 - terminationGracePeriodSeconds: 120 - brokerConfigGroup: az2 - id: 1201 - readOnlyConfig: | - broker.rack=az2 - - brokerConfig: - brokerIngressMapping: - - ingress-az2 - - secureingress-az2 - terminationGracePeriodSeconds: 120 - brokerConfigGroup: az2 - id: 1202 - readOnlyConfig: | - broker.rack=az2 - - brokerConfig: - brokerIngressMapping: - - ingress-az2 - - secureingress-az2 - terminationGracePeriodSeconds: 120 - brokerConfigGroup: az2 - id: 1203 - readOnlyConfig: | - broker.rack=az2 - - brokerConfig: - brokerIngressMapping: - - ingress-az2 - - secureingress-az2 - terminationGracePeriodSeconds: 120 - brokerConfigGroup: az2 - id: 1204 - readOnlyConfig: | - broker.rack=az2 - - brokerConfig: - brokerIngressMapping: - - ingress-az2 - - secureingress-az2 - terminationGracePeriodSeconds: 120 - brokerConfigGroup: az2 - id: 1205 - readOnlyConfig: | - broker.rack=az2 - - brokerConfig: - brokerIngressMapping: - - ingress-az2 - - secureingress-az2 - terminationGracePeriodSeconds: 120 - brokerConfigGroup: az2 - id: 1206 - readOnlyConfig: | - broker.rack=az2 - - brokerConfig: - brokerIngressMapping: - - ingress-az3 - - secureingress-az3 - terminationGracePeriodSeconds: 120 - brokerConfigGroup: az3 - id: 1301 - readOnlyConfig: | - broker.rack=az3 - - brokerConfig: - brokerIngressMapping: - - ingress-az3 - - secureingress-az3 - terminationGracePeriodSeconds: 120 - brokerConfigGroup: az3 - id: 1302 - readOnlyConfig: | - broker.rack=az3 - - brokerConfig: - brokerIngressMapping: - - ingress-az3 - - secureingress-az3 - terminationGracePeriodSeconds: 120 - brokerConfigGroup: az3 - id: 1303 - readOnlyConfig: | - broker.rack=az3 - - brokerConfig: - brokerIngressMapping: - - ingress-az3 - - secureingress-az3 - terminationGracePeriodSeconds: 120 - brokerConfigGroup: az3 - id: 1304 - readOnlyConfig: | - broker.rack=az3 - - brokerConfig: - brokerIngressMapping: - - ingress-az3 - - secureingress-az3 - terminationGracePeriodSeconds: 120 - brokerConfigGroup: az3 - id: 1305 - readOnlyConfig: | - broker.rack=az3 - - brokerConfig: - brokerIngressMapping: - - ingress-az3 - - secureingress-az3 - terminationGracePeriodSeconds: 120 - brokerConfigGroup: az3 - id: 1306 - readOnlyConfig: | - broker.rack=az3 - clusterImage: docker-pipeline-upstream-mirror.dr-uw2.adobeitc.com/adobe/kafka:2.13-3.6.1-1 - clusterMetricsReporterImage: docker-pipeline-upstream-mirror.dr-uw2.adobeitc.com/adobe/cruise-control:2.5.133-adbe-20240313 - contourIngressConfig: - brokerFQDNTemplate: "" - tlsSecretName: "" - cruiseControlConfig: - affinity: - nodeAffinity: {} - capacityConfig: |- - { - "brokerCapacities":[ - { - "brokerId": "-1", - "capacity": { - "DISK": {"/kafka-logs1/kafka": "4194304"}, - "CPU": {"num.cores": "18"}, - "NW_IN": "900000", - "NW_OUT": "900000" - }, - "doc": "This is the default capacity. Capacity unit used for disk is in MB, cpu is in cores, network throughput is in KB." - } - ] - } - clusterConfig: |- - { - "min.insync.replicas": 2 - } - config: |2- - - # Configuration for the metadata client. - # ======================================= - # The maximum interval in milliseconds between two metadata refreshes. - #metadata.max.age.ms=300000 - # Client id for the Cruise Control. It is used for the metadata client. - #client.id=kafka-cruise-control - # The size of TCP send buffer bytes for the metadata client. - #send.buffer.bytes=131072 - # The size of TCP receive buffer size for the metadata client. - #receive.buffer.bytes=131072 - # The time to wait before disconnect an idle TCP connection. - #connections.max.idle.ms=540000 - # The time to wait before reconnect to a given host. - #reconnect.backoff.ms=50 - # The time to wait for a response from a host after sending a request. - #request.timeout.ms=30000 - # The time to wait for broker logdir to respond after sending a request. - #logdir.response.timeout.ms=10000 - # Configurations for the load monitor - # ======================================= - # The number of metric fetcher thread to fetch metrics for the Kafka cluster - num.metric.fetchers=1 - # The metric sampler class - metric.sampler.class=com.linkedin.kafka.cruisecontrol.monitor.sampling.CruiseControlMetricsReporterSampler - # True if the sampling process allows CPU capacity estimation of brokers used for CPU utilization estimation. - sampling.allow.cpu.capacity.estimation=true - # Configurations for CruiseControlMetricsReporterSampler - metric.reporter.topic=__CruiseControlMetrics - # The sample store class name - sample.store.class=com.linkedin.kafka.cruisecontrol.monitor.sampling.KafkaSampleStore - # The config for the Kafka sample store to save the partition metric samples - partition.metric.sample.store.topic=__KafkaCruiseControlPartitionMetricSamples - # The config for the Kafka sample store to save the model training samples - broker.metric.sample.store.topic=__KafkaCruiseControlModelTrainingSamples - # The replication factor of Kafka metric sample store topic - sample.store.topic.replication.factor=3 - partition.sample.store.topic.partition.count=15 - broker.sample.store.topic.partition.count=15 - # The config for the number of Kafka sample store consumer threads - num.sample.loading.threads=8 - # The partition assignor class for the metric samplers - metric.sampler.partition.assignor.class=com.linkedin.kafka.cruisecontrol.monitor.sampling.DefaultMetricSamplerPartitionAssignor - # The metric sampling interval in milliseconds - metric.sampling.interval.ms=60000 - # The partition metrics window size in milliseconds - partition.metrics.window.ms=300000 - # The number of partition metric windows to keep in memory - num.partition.metrics.windows=20 - # The minimum partition metric samples required for a partition in each window - min.samples.per.partition.metrics.window=1 - # The broker metrics window size in milliseconds - broker.metrics.window.ms=300000 - # The number of broker metric windows to keep in memory - # see https://github.com/linkedin/cruise-control/issues/1149 - num.broker.metrics.windows=20 - # The minimum broker metric samples required for a partition in each window - min.samples.per.broker.metrics.window=1 - # The configuration for the BrokerCapacityConfigFileResolver (supports JBOD, non-JBOD, and heterogeneous CPU core capacities) - capacity.config.file=config/capacity.json - # Configurations for the analyzer - # ======================================= - # The list of goals to optimize the Kafka cluster for with pre-computed proposals - default.goals=com.linkedin.kafka.cruisecontrol.analyzer.goals.MinTopicLeadersPerBrokerGoal,\ - com.linkedin.kafka.cruisecontrol.analyzer.goals.RackAwareDistributionGoal,\ - com.linkedin.kafka.cruisecontrol.analyzer.goals.ReplicaCapacityGoal,\ - com.linkedin.kafka.cruisecontrol.analyzer.goals.DiskCapacityGoal,\ - com.linkedin.kafka.cruisecontrol.analyzer.goals.NetworkInboundCapacityGoal,\ - com.linkedin.kafka.cruisecontrol.analyzer.goals.NetworkOutboundCapacityGoal,\ - com.linkedin.kafka.cruisecontrol.analyzer.goals.CpuCapacityGoal,\ - com.linkedin.kafka.cruisecontrol.analyzer.goals.DiskUsageDistributionGoal,\ - com.linkedin.kafka.cruisecontrol.analyzer.goals.NetworkInboundUsageDistributionGoal,\ - com.linkedin.kafka.cruisecontrol.analyzer.goals.NetworkOutboundUsageDistributionGoal,\ - com.linkedin.kafka.cruisecontrol.analyzer.goals.CpuUsageDistributionGoal,\ - com.linkedin.kafka.cruisecontrol.analyzer.goals.TopicReplicaDistributionGoal,\ - com.linkedin.kafka.cruisecontrol.analyzer.goals.LeaderBytesInDistributionGoal,\ - com.linkedin.kafka.cruisecontrol.analyzer.goals.LeaderReplicaDistributionGoal,\ - com.linkedin.kafka.cruisecontrol.analyzer.goals.ReplicaDistributionGoal - # The list of supported goals - goals=com.linkedin.kafka.cruisecontrol.analyzer.goals.MinTopicLeadersPerBrokerGoal,\ - com.linkedin.kafka.cruisecontrol.analyzer.goals.RackAwareDistributionGoal,\ - com.linkedin.kafka.cruisecontrol.analyzer.goals.ReplicaCapacityGoal,\ - com.linkedin.kafka.cruisecontrol.analyzer.goals.DiskCapacityGoal,\ - com.linkedin.kafka.cruisecontrol.analyzer.goals.NetworkInboundCapacityGoal,\ - com.linkedin.kafka.cruisecontrol.analyzer.goals.NetworkOutboundCapacityGoal,\ - com.linkedin.kafka.cruisecontrol.analyzer.goals.CpuCapacityGoal,\ - com.linkedin.kafka.cruisecontrol.analyzer.goals.ReplicaDistributionGoal,\ - com.linkedin.kafka.cruisecontrol.analyzer.goals.PotentialNwOutGoal,\ - com.linkedin.kafka.cruisecontrol.analyzer.goals.DiskUsageDistributionGoal,\ - com.linkedin.kafka.cruisecontrol.analyzer.goals.NetworkInboundUsageDistributionGoal,\ - com.linkedin.kafka.cruisecontrol.analyzer.goals.NetworkOutboundUsageDistributionGoal,\ - com.linkedin.kafka.cruisecontrol.analyzer.goals.CpuUsageDistributionGoal,\ - com.linkedin.kafka.cruisecontrol.analyzer.goals.TopicReplicaDistributionGoal,\ - com.linkedin.kafka.cruisecontrol.analyzer.goals.LeaderReplicaDistributionGoal,\ - com.linkedin.kafka.cruisecontrol.analyzer.goals.LeaderBytesInDistributionGoal,\ - com.linkedin.kafka.cruisecontrol.analyzer.kafkaassigner.KafkaAssignerDiskUsageDistributionGoal,\ - com.linkedin.kafka.cruisecontrol.analyzer.kafkaassigner.KafkaAssignerEvenRackAwareGoal,\ - com.linkedin.kafka.cruisecontrol.analyzer.goals.PreferredLeaderElectionGoal - # The list of supported intra-broker goals - intra.broker.goals=com.linkedin.kafka.cruisecontrol.analyzer.goals.IntraBrokerDiskCapacityGoal,\ - com.linkedin.kafka.cruisecontrol.analyzer.goals.IntraBrokerDiskUsageDistributionGoal - # The list of supported hard goals - hard.goals=com.linkedin.kafka.cruisecontrol.analyzer.goals.RackAwareDistributionGoal,\ - com.linkedin.kafka.cruisecontrol.analyzer.goals.ReplicaCapacityGoal,\ - com.linkedin.kafka.cruisecontrol.analyzer.goals.DiskCapacityGoal,\ - com.linkedin.kafka.cruisecontrol.analyzer.goals.NetworkInboundCapacityGoal,\ - com.linkedin.kafka.cruisecontrol.analyzer.goals.NetworkOutboundCapacityGoal,\ - com.linkedin.kafka.cruisecontrol.analyzer.goals.CpuCapacityGoal - # The minimum percentage of well monitored partitions out of all the partitions - min.valid.partition.ratio=0.95 - # The balance threshold for CPU - cpu.balance.threshold=1.3 - # The balance threshold for disk - disk.balance.threshold=1.1 - # The balance threshold for network inbound utilization - network.inbound.balance.threshold=1.3 - # The balance threshold for network outbound utilization - network.outbound.balance.threshold=1.3 - # The balance threshold for the replica count - replica.count.balance.threshold=1.2 - # The capacity threshold for CPU in percentage - cpu.capacity.threshold=0.8 - # The capacity threshold for disk in percentage - disk.capacity.threshold=0.8 - # The capacity threshold for network inbound utilization in percentage - network.inbound.capacity.threshold=0.8 - # The capacity threshold for network outbound utilization in percentage - network.outbound.capacity.threshold=0.8 - # The threshold to define the cluster to be in a low CPU utilization state - cpu.low.utilization.threshold=0.2 - # The threshold to define the cluster to be in a low disk utilization state - disk.low.utilization.threshold=0.2 - # The threshold to define the cluster to be in a low network inbound utilization state - network.inbound.low.utilization.threshold=0.2 - # The threshold to define the cluster to be in a low disk utilization state - network.outbound.low.utilization.threshold=0.2 - # The metric anomaly percentile upper threshold - metric.anomaly.percentile.upper.threshold=90.0 - # The metric anomaly percentile lower threshold - metric.anomaly.percentile.lower.threshold=10.0 - # How often should the cached proposal be expired and recalculated if necessary - proposal.expiration.ms=60000 - # The maximum number of replicas that can reside on a broker at any given time. - max.replicas.per.broker=14000 - # The number of threads to use for proposal candidate precomputing. - num.proposal.precompute.threads=1 - # the topics that should be excluded from the partition movement. - #topics.excluded.from.partition.movement= - # the topics that should have even number of leaders distriubted across brokers - topics.with.min.leaders.per.broker=__consumer_offsets - # enable dynamic min leaders per topic computation - min.topic.leaders.per.broker=0 - # The impact of having one level higher goal priority on the relative balancedness score. - #goal.balancedness.priority.weight - # The impact of strictness on the relative balancedness score. - #goal.balancedness.strictness.weight - # The maximum number of replicas that should reside on each broker to consider a cluster as overprovisioned after balancing its replica distribution. - overprovisioned.max.replicas.per.broker=3000 - # Configurations for the executor - # ======================================= - # If true, appropriate zookeeper Client { .. } entry required in jaas file located at $base_dir/config/cruise_control_jaas.conf - zookeeper.security.enabled=false - # The max number of partitions to move in/out on a given broker at a given time. - num.concurrent.partition.movements.per.broker=5 - # The max number of partitions to move between disks within a given broker at a given time. - num.concurrent.intra.broker.partition.movements=2 - # The max number of leadership movement within the whole cluster at a given time. - num.concurrent.leader.movements=1000 - # Default replica movement throttle. If not specified, movements unthrottled by default. - # Set to 50 MBps (in Bps) - default.replication.throttle=52428800 - # The interval between two execution progress checks. - execution.progress.check.interval.ms=10000 - default.replica.movement.strategies=com.linkedin.kafka.cruisecontrol.executor.strategy.PostponeUrpReplicaMovementStrategy,\ - com.linkedin.kafka.cruisecontrol.executor.strategy.PrioritizeMinIsrWithOfflineReplicasStrategy,\ - com.linkedin.kafka.cruisecontrol.executor.strategy.PrioritizeOneAboveMinIsrWithOfflineReplicasStrategy,\ - com.linkedin.kafka.cruisecontrol.executor.strategy.PrioritizeLargeReplicaMovementStrategy,\ - com.linkedin.kafka.cruisecontrol.executor.strategy.BaseReplicaMovementStrategy - # Configurations for anomaly detector - # ======================================= - # The goal violation notifier class - anomaly.notifier.class=com.linkedin.kafka.cruisecontrol.detector.notifier.SelfHealingNotifier - # The metric anomaly finder class - metric.anomaly.finder.class= - # The anomaly detection interval - anomaly.detection.interval.ms=600000 - # The goal violation to detect. - anomaly.detection.goals=com.linkedin.kafka.cruisecontrol.analyzer.goals.MinTopicLeadersPerBrokerGoal,\ - com.linkedin.kafka.cruisecontrol.analyzer.goals.RackAwareDistributionGoal,\ - com.linkedin.kafka.cruisecontrol.analyzer.goals.ReplicaCapacityGoal,\ - com.linkedin.kafka.cruisecontrol.analyzer.goals.DiskCapacityGoal,\ - com.linkedin.kafka.cruisecontrol.analyzer.goals.NetworkInboundCapacityGoal,\ - com.linkedin.kafka.cruisecontrol.analyzer.goals.NetworkOutboundCapacityGoal,\ - com.linkedin.kafka.cruisecontrol.analyzer.goals.CpuCapacityGoal,\ - com.linkedin.kafka.cruisecontrol.analyzer.goals.DiskUsageDistributionGoal,\ - com.linkedin.kafka.cruisecontrol.analyzer.goals.LeaderReplicaDistributionGoal,\ - com.linkedin.kafka.cruisecontrol.analyzer.goals.ReplicaDistributionGoal - self.healing.goals=com.linkedin.kafka.cruisecontrol.analyzer.goals.MinTopicLeadersPerBrokerGoal,\ - com.linkedin.kafka.cruisecontrol.analyzer.goals.RackAwareDistributionGoal,\ - com.linkedin.kafka.cruisecontrol.analyzer.goals.ReplicaCapacityGoal,\ - com.linkedin.kafka.cruisecontrol.analyzer.goals.DiskCapacityGoal,\ - com.linkedin.kafka.cruisecontrol.analyzer.goals.NetworkInboundCapacityGoal,\ - com.linkedin.kafka.cruisecontrol.analyzer.goals.NetworkOutboundCapacityGoal,\ - com.linkedin.kafka.cruisecontrol.analyzer.goals.CpuCapacityGoal,\ - com.linkedin.kafka.cruisecontrol.analyzer.goals.DiskUsageDistributionGoal,\ - com.linkedin.kafka.cruisecontrol.analyzer.goals.LeaderReplicaDistributionGoal,\ - com.linkedin.kafka.cruisecontrol.analyzer.goals.ReplicaDistributionGoal - # The interested metrics for metric anomaly analyzer. - metric.anomaly.analyzer.metrics=BROKER_PRODUCE_LOCAL_TIME_MS_MAX,\ - BROKER_PRODUCE_LOCAL_TIME_MS_MEAN,\ - BROKER_CONSUMER_FETCH_LOCAL_TIME_MS_MAX,\ - BROKER_CONSUMER_FETCH_LOCAL_TIME_MS_MEAN,\ - BROKER_FOLLOWER_FETCH_LOCAL_TIME_MS_MAX,\ - BROKER_FOLLOWER_FETCH_LOCAL_TIME_MS_MEAN,\ - BROKER_LOG_FLUSH_TIME_MS_MAX,\ - BROKER_LOG_FLUSH_TIME_MS_MEAN - # True if recently demoted brokers are excluded from optimizations during broker failure self healing, false otherwise - broker.failure.exclude.recently.demoted.brokers=true - # True if recently removed brokers are excluded from optimizations during broker failure self healing, false otherwise - broker.failure.exclude.recently.removed.brokers=true - # True if recently demoted brokers are excluded from optimizations during goal violation self healing, false otherwise - goal.violation.exclude.recently.demoted.brokers=true - # True if recently removed brokers are excluded from optimizations during goal violation self healing, false otherwise - goal.violation.exclude.recently.removed.brokers=true - # The file path to store the failed broker list. - # This is to persist the broker failure time in case Cruise Control failed and restarted when some brokers are down. - failed.brokers.file.path=failedBrokers.txt - # Topic config provider class - topic.config.provider.class=com.linkedin.kafka.cruisecontrol.config.KafkaAdminTopicConfigProvider - # The cluster configurations for the TopicConfigProvider - cluster.configs.file=config/clusterConfigs.json - # The maximum time in milliseconds to store the response and access details of a completed kafka monitoring user task. - completed.kafka.monitor.user.task.retention.time.ms=86400000 - # The maximum time in milliseconds to store the response and access details of a completed cruise control monitoring user task. - completed.cruise.control.monitor.user.task.retention.time.ms=86400000 - # The maximum time in milliseconds to store the response and access details of a completed kafka admin user task. - completed.kafka.admin.user.task.retention.time.ms=604800000 - # The maximum time in milliseconds to store the response and access details of a completed cruise control admin user task. - completed.cruise.control.admin.user.task.retention.time.ms=604800000 - # The fallback maximum time in milliseconds to store the response and access details of a completed user task. - completed.user.task.retention.time.ms=86400000 - # The maximum time in milliseconds to retain the demotion history of brokers. - demotion.history.retention.time.ms=900000 - # The maximum time in milliseconds to retain the removal history of brokers. - removal.history.retention.time.ms=900000 - # The maximum number of completed kafka monitoring user tasks for which the response and access details will be cached. - max.cached.completed.kafka.monitor.user.tasks=20 - # The maximum number of completed cruise control monitoring user tasks for which the response and access details will be cached. - max.cached.completed.cruise.control.monitor.user.tasks=20 - # The maximum number of completed kafka admin user tasks for which the response and access details will be cached. - max.cached.completed.kafka.admin.user.tasks=30 - # The maximum number of completed cruise control admin user tasks for which the response and access details will be cached. - max.cached.completed.cruise.control.admin.user.tasks=30 - # The fallback maximum number of completed user tasks of certain type for which the response and access details will be cached. - max.cached.completed.user.tasks=25 - # The maximum number of user tasks for concurrently running in async endpoints across all users. - max.active.user.tasks=1000 - # Enable self healing for all anomaly detectors, unless the particular anomaly detector is explicitly disabled - self.healing.enabled=true - # Enable self healing for broker failure detector - #self.healing.broker.failure.enabled=true - # Enable self healing for goal violation detector - #self.healing.goal.violation.enabled=true - # Enable self healing for metric anomaly detector - self.healing.metric.anomaly.enabled=false - # Enable self healing for disk failure detector - #self.healing.disk.failure.enabled=true - # Use the Kafka API to detect broker failures (and not old ZK interface) - kafka.broker.failure.detection.enable=true - # Defines the threshold to mark a broker as dead. If a non-empty broker leaves the cluster at time T and did not join - # the cluster before T + broker.failure.alert.threshold.ms, the broker is defined as dead broker since T. - # An alert will be triggered in this case. - # Set to 15 minutes - broker.failure.alert.threshold.ms=900000 - # If self-healing is enabled and a broker is dead at T, - # self-healing will be triggered at T + broker.failure.self.healing.threshold.ms. - # Set to 90 minutes - broker.failure.self.healing.threshold.ms=5400000 - # The multiplier applied to the threshold of distribution goals used by goal.violation.detector. - #goal.violation.distribution.threshold.multiplier=2.50 - # The flag to indicate whether use of provisioner is enabled - provisioner.enable=false - # configurations for the webserver - # ================================ - # HTTP listen port - webserver.http.port=9090 - # HTTP listen address - webserver.http.address=0.0.0.0 - # Whether CORS support is enabled for API or not - webserver.http.cors.enabled=false - # Value for Access-Control-Allow-Origin - webserver.http.cors.origin=http://localhost:8080/ - # Value for Access-Control-Request-Method - webserver.http.cors.allowmethods=OPTIONS,GET,POST - # Headers that should be exposed to the Browser (Webapp) - # This is a special header that is used by the - # User Tasks subsystem and should be explicitly - # Enabled when CORS mode is used as part of the - # Admin Interface - webserver.http.cors.exposeheaders=User-Task-ID - # REST API default prefix - # (dont forget the ending *) - webserver.api.urlprefix=/kafkacruisecontrol/* - # Location where the Cruise Control frontend is deployed - webserver.ui.diskpath=./cruise-control-ui/dist/ - # URL path prefix for UI - # (dont forget the ending *) - webserver.ui.urlprefix=/* - # Time After which request is converted to Async - webserver.request.maxBlockTimeMs=10000 - # Default Session Expiry Period - webserver.session.maxExpiryTimeMs=60000 - # Session cookie path - webserver.session.path=/ - # Server Access Logs - webserver.accesslog.enabled=true - # Configurations for servlet - # ========================== - # Enable two-step verification for processing POST requests. - two.step.verification.enabled=false - # The maximum time in milliseconds to retain the requests in two-step (verification) purgatory. - two.step.purgatory.retention.time.ms=1209600000 - # The maximum number of requests in two-step (verification) purgatory. - two.step.purgatory.max.requests=25 - - default.replication.throttle=20971520 - disk.balance.threshold=1.05 - max.replicas.per.broker=20000 - num.concurrent.partition.movements.per.broker=5 - topics.with.min.leaders.per.broker=__consumer_offsets|mccs_push_notifications_feedback|triggers|aliases|aliases-realtime|profiles - cruiseControlAnnotations: - arc.ethos.adobe.net/ignore: "true" - cruiseControlTaskSpec: - RetryDurationMinutes: 2147483647 - image: docker-pipeline-upstream-mirror.dr-uw2.adobeitc.com/adobe/cruise-control:2.5.133-adbe-20240313 - log4jConfig: |- - rootLogger.level=INFO - appenders=console - appender.console.type=Console - appender.console.name=STDOUT - appender.console.layout.type=PatternLayout - appender.console.layout.pattern=[%d] %p %replace{%msg}{[\r\n]}{|} %throwable{separator(|)}(%c{2})%n - rootLogger.appenderRefs=console - rootLogger.appenderRef.console.ref=STDOUT - resourceRequirements: - limits: - cpu: "4" - memory: 2Gi - requests: - cpu: "4" - memory: 2Gi - serviceAccountName: kafka-cluster - tolerations: - - effect: NoSchedule - key: node.kubernetes.io/pipeline-workload - operator: Equal - value: "true" - - effect: NoSchedule - key: ethos.corp.adobe.com/ethos-workload - operator: Equal - value: arm64 - topicConfig: - partitions: 6 - replicationFactor: 3 - disruptionBudget: - budget: "1" - create: true - envoyConfig: - affinity: - nodeAffinity: {} - podAffinity: - preferredDuringSchedulingIgnoredDuringExecution: - - podAffinityTerm: - labelSelector: - matchExpressions: - - key: app - operator: In - values: - - kafka - topologyKey: kubernetes.io/hostname - weight: 1 - podAntiAffinity: - requiredDuringSchedulingIgnoredDuringExecution: - - labelSelector: - matchExpressions: - - key: eListenerName - operator: In - values: - - plaintext-ingress-az1 - - plaintext-ingress-az2 - - plaintext-ingress-az3 - - external-ingress-az1 - - external-ingress-az2 - - external-ingress-az3 - topologyKey: kubernetes.io/hostname - annotations: - arc.ethos.adobe.net/ignore: "true" - ops/certVersion: "12" - disruptionBudget: - budget: 25% - create: true - strategy: maxUnavailable - envoyCommandLineArgs: - concurrency: 2 - image: docker-pipeline-upstream-mirror.dr-uw2.adobeitc.com/envoyproxy/envoy:v1.36.2 - replicas: 6 - resourceRequirements: - limits: - cpu: "1" - memory: 1Gi - requests: - cpu: "1" - memory: 1Gi - serviceAccountName: kafka-cluster - tolerations: - - effect: NoSchedule - key: node.kubernetes.io/pipeline-workload - operator: Equal - value: "true" - - effect: NoSchedule - key: ethos.corp.adobe.com/ethos-workload - operator: Equal - value: arm64 - topologySpreadConstraints: - - labelSelector: - matchLabels: - app: envoyingress - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - envs: - - name: POD_UID - valueFrom: - fieldRef: - fieldPath: metadata.uid - headlessServiceEnabled: true - ingressController: envoy - istioIngressConfig: {} - kRaft: false - listenersConfig: - externalListeners: - - accessMethod: LoadBalancer - anyCastPort: 9096 - config: - defaultIngressConfig: "" - ingressConfig: - ingress-az1: - envoyConfig: - annotations: - broker_group: az1 - hostnameOverride: kafka-1-az1-or1.prd.pipeline.adobedc.net - serviceType: ClusterIP - ingress-az2: - envoyConfig: - annotations: - broker_group: az2 - hostnameOverride: kafka-1-az2-or1.prd.pipeline.adobedc.net - serviceType: ClusterIP - ingress-az3: - envoyConfig: - annotations: - broker_group: az3 - hostnameOverride: kafka-1-az3-or1.prd.pipeline.adobedc.net - serviceType: ClusterIP - containerPort: 29094 - externalStartingPort: 8000 - name: plaintext - type: plaintext - usedForInnerBrokerCommunication: false - - accessMethod: LoadBalancer - anyCastPort: 9097 - config: - defaultIngressConfig: "" - ingressConfig: - secureingress-az1: - envoyConfig: - affinity: - podAffinity: - preferredDuringSchedulingIgnoredDuringExecution: - - podAffinityTerm: - labelSelector: - matchExpressions: - - key: app - operator: In - values: - - kafka - topologyKey: kubernetes.io/hostname - weight: 1 - podAntiAffinity: - requiredDuringSchedulingIgnoredDuringExecution: - - labelSelector: - matchExpressions: - - key: eListenerName - operator: In - values: - - secure-secureingress-az1 - - secure-secureingress-az2 - - secure-secureingress-az3 - topologyKey: kubernetes.io/hostname - annotations: - broker_group: az1 - brokerHostnameTemplate: kafka-1-%id-or1.prd.pipeline.adobedc.net - hostnameOverride: kafka-1-az1-or1-secure.prd.pipeline.adobedc.net - serviceType: ClusterIP - secureingress-az2: - envoyConfig: - affinity: - podAffinity: - preferredDuringSchedulingIgnoredDuringExecution: - - podAffinityTerm: - labelSelector: - matchExpressions: - - key: app - operator: In - values: - - kafka - topologyKey: kubernetes.io/hostname - weight: 1 - podAntiAffinity: - requiredDuringSchedulingIgnoredDuringExecution: - - labelSelector: - matchExpressions: - - key: eListenerName - operator: In - values: - - secure-secureingress-az1 - - secure-secureingress-az2 - - secure-secureingress-az3 - topologyKey: kubernetes.io/hostname - annotations: - broker_group: az2 - brokerHostnameTemplate: kafka-1-%id-or1.prd.pipeline.adobedc.net - hostnameOverride: kafka-1-az2-or1-secure.prd.pipeline.adobedc.net - serviceType: ClusterIP - secureingress-az3: - envoyConfig: - affinity: - podAffinity: - preferredDuringSchedulingIgnoredDuringExecution: - - podAffinityTerm: - labelSelector: - matchExpressions: - - key: app - operator: In - values: - - kafka - topologyKey: kubernetes.io/hostname - weight: 1 - podAntiAffinity: - requiredDuringSchedulingIgnoredDuringExecution: - - labelSelector: - matchExpressions: - - key: eListenerName - operator: In - values: - - secure-secureingress-az1 - - secure-secureingress-az2 - - secure-secureingress-az3 - topologyKey: kubernetes.io/hostname - annotations: - broker_group: az3 - brokerHostnameTemplate: kafka-1-%id-or1.prd.pipeline.adobedc.net - hostnameOverride: kafka-1-az3-or1-secure.prd.pipeline.adobedc.net - serviceType: ClusterIP - containerPort: 29095 - externalStartingPort: -1 - name: secure - tlsSecretName: prod-adobedc-net-tls - type: sasl_plaintext - usedForInnerBrokerCommunication: false - internalListeners: - - containerPort: 29092 - internalStartingPort: 0 - name: internal - type: plaintext - usedForInnerBrokerCommunication: true - - containerPort: 29093 - internalStartingPort: 0 - name: controller - type: plaintext - usedForControllerCommunication: true - usedForInnerBrokerCommunication: false - - containerPort: 29096 - internalStartingPort: 0 - name: sasl_plain - type: sasl_plaintext - usedForInnerBrokerCommunication: false - monitoringConfig: - jmxImage: docker-pipeline-upstream-mirror.dr-uw2.adobeitc.com/amuraru/jmx-javaagent:0.19.1-multi - pathToJar: /jmx_prometheus_javaagent.jar - oneBrokerPerNode: true - propagateLabels: true - readOnlyConfig: |- - __do_no_edit_diskSize=4194304 - authorizer.class.name=com.adobe.core.pipeline.kafka.security.server.auth.CustomAclAuthorizerWithAccessTrackingMetrics - auto.create.topics.enable=false - auto.leader.rebalance.enable=true - background.threads=20 - broker.id.generation.enable=false - cruise.control.metrics.reporter.acks=1 - cruise.control.metrics.topic=__CruiseControlMetrics - cruise.control.metrics.topic.min.insync.replicas=1 - default.replication.factor=3 - inter.broker.protocol.version=3.6 - listener.name.sasl_plain.oauthbearer.sasl.jaas.config=org.apache.kafka.common.security.oauthbearer.OAuthBearerLoginModule required; - listener.name.sasl_plain.oauthbearer.sasl.server.callback.handler.class=com.adobe.core.pipeline.kafka.security.server.auth.ImsValidatingCallbackHandler - listener.name.secure.oauthbearer.sasl.jaas.config=org.apache.kafka.common.security.oauthbearer.OAuthBearerLoginModule required; - listener.name.secure.oauthbearer.sasl.server.callback.handler.class=com.adobe.core.pipeline.kafka.security.server.auth.ImsValidatingCallbackHandler - log.message.format.version=0.11.0 - log.message.timestamp.after.max.ms=86400000 - log.segment.bytes=536870912 - max.incremental.fetch.session.cache.slots=1000 - min.insync.replicas=2 - num.io.threads=144 - num.network.threads=120 - num.partitions=10 - num.recovery.threads.per.data.dir=8 - num.replica.fetchers=4 - offsets.commit.required.acks=1 - principal.builder.class=com.adobe.core.pipeline.kafka.security.server.auth.PipelinePrincipalBuilder - queued.max.requests=1000 - replica.fetch.max.bytes=5242880 - replica.lag.time.max.ms=15000 - replica.socket.receive.buffer.bytes=-1 - sasl.enabled.mechanisms=OAUTHBEARER - sasl.ims.certificate.location=static.adobelogin.com/keys/prod/ - sasl.ims.url=https://ims-na1.adobelogin.com/ - socket.listen.backlog.size=1024 - socket.receive.buffer.bytes=-1 - socket.send.buffer.bytes=-1 - super.users=Broker:ANONYMOUS - zookeeper.connection.timeout.ms=18000 - removeUnusedIngressResources: false - rollingUpgradeConfig: - concurrentBrokerRestartCountPerRack: 2 - failureThreshold: 2 - taintedBrokersSelector: - matchExpressions: - - key: shredder.ethos.adobe.net/upgrade-status - operator: In - values: - - parked - zkAddresses: - - pipeline-zookeeper-client:2181 - zkPath: /kafka - -kind: List -metadata: - resourceVersion: "" From f7452fc857c6ff4e5857019f7d8f1d5d7a4ba9d8 Mon Sep 17 00:00:00 2001 From: Daniel Vaseekaran Date: Tue, 16 Jun 2026 14:08:13 -0400 Subject: [PATCH 25/25] Move to Util --- pkg/resources/kafka/kafka.go | 39 +----------------------------------- pkg/resources/kafka/util.go | 31 ++++++++++++++++++++++++++++ 2 files changed, 32 insertions(+), 38 deletions(-) diff --git a/pkg/resources/kafka/kafka.go b/pkg/resources/kafka/kafka.go index f4deb7795..908c5be40 100644 --- a/pkg/resources/kafka/kafka.go +++ b/pkg/resources/kafka/kafka.go @@ -942,43 +942,6 @@ func (r *Reconciler) updateStatusWithDockerImageAndVersion(brokerId int32, broke return nil } -// syncResourceRequests overwrites CPU and memory requests in desiredPod's containers -// with the values from currentPod so that request-only changes do not trigger a pod restart. -func syncResourceRequests(desiredPod, currentPod *corev1.Pod) { - syncContainerResourceRequests(desiredPod.Spec.Containers, currentPod.Spec.Containers) - syncContainerResourceRequests(desiredPod.Spec.InitContainers, currentPod.Spec.InitContainers) -} - -// syncPodAffinities syncs ScaleOps-related pod affinities from the current pod to the desired pod. -// This preserves affinities created by ScaleOps to prevent unnecessary pod restarts. -func syncPodAffinities(desiredPod, currentPod *corev1.Pod) { - syncScaleOpsAffinities(desiredPod, currentPod) -} - -func syncContainerResourceRequests(desired, current []corev1.Container) { - index := make(map[string]corev1.ResourceList, len(current)) - for _, c := range current { - index[c.Name] = c.Resources.Requests - } - for i := range desired { - c := &desired[i] - reqs, ok := index[c.Name] - if !ok { - continue - } - if c.Resources.Requests == nil { - c.Resources.Requests = make(corev1.ResourceList) - } - for _, res := range []corev1.ResourceName{corev1.ResourceCPU, corev1.ResourceMemory} { - if val, exists := reqs[res]; exists { - c.Resources.Requests[res] = val - } else { - delete(c.Resources.Requests, res) - } - } - } -} - //gocyclo:ignore func (r *Reconciler) handleRollingUpgrade(log logr.Logger, desiredPod, currentPod *corev1.Pod, desiredType reflect.Type) error { // Since toleration does not support patchStrategy:"merge,retainKeys", @@ -1000,7 +963,7 @@ func (r *Reconciler) handleRollingUpgrade(log logr.Logger, desiredPod, currentPo syncResourceRequests(desiredPod, currentPod) // If current pod had affinities created by ScaleOps, we need to sync them to desiredPod, // otherwise they will be removed and cause pod restart - syncPodAffinities(desiredPod, currentPod) + syncScaleOpsAffinities(desiredPod, currentPod) } // Check if the resource actually updated or if labels match TaintedBrokersSelector patchResult, err := patch.DefaultPatchMaker.Calculate(currentPod, desiredPod) diff --git a/pkg/resources/kafka/util.go b/pkg/resources/kafka/util.go index 7f1d6c077..d3c9c8c67 100644 --- a/pkg/resources/kafka/util.go +++ b/pkg/resources/kafka/util.go @@ -76,6 +76,37 @@ func generateRandomClusterID() string { return base64.URLEncoding.EncodeToString(randomUUID[:]) } +// syncResourceRequests overwrites CPU and memory requests in desiredPod's containers +// with the values from currentPod so that request-only changes do not trigger a pod restart. +func syncResourceRequests(desiredPod, currentPod *corev1.Pod) { + syncContainerResourceRequests(desiredPod.Spec.Containers, currentPod.Spec.Containers) + syncContainerResourceRequests(desiredPod.Spec.InitContainers, currentPod.Spec.InitContainers) +} + +func syncContainerResourceRequests(desired, current []corev1.Container) { + index := make(map[string]corev1.ResourceList, len(current)) + for _, c := range current { + index[c.Name] = c.Resources.Requests + } + for i := range desired { + c := &desired[i] + reqs, ok := index[c.Name] + if !ok { + continue + } + if c.Resources.Requests == nil { + c.Resources.Requests = make(corev1.ResourceList) + } + for _, res := range []corev1.ResourceName{corev1.ResourceCPU, corev1.ResourceMemory} { + if val, exists := reqs[res]; exists { + c.Resources.Requests[res] = val + } else { + delete(c.Resources.Requests, res) + } + } + } +} + // syncScaleOpsAffinities syncs all scale ops related affinities from the current pod to the desired pod. // This includes pod affinities with "scaleops.sh/managed-unevictable" label selector // and node affinities with "scaleops.sh/node-packing=true" selector.