From 93424f095f95d4a2b08743451257722c8e7bade1 Mon Sep 17 00:00:00 2001 From: Paurush Garg Date: Wed, 25 Feb 2026 09:12:43 -0800 Subject: [PATCH 1/2] Add metric to track ingested histogram bucket count per user Signed-off-by: Paurush Garg --- pkg/cortexpb/histograms.go | 16 ++++++++++++++++ pkg/ingester/ingester.go | 4 ++++ pkg/ingester/metrics.go | 6 ++++++ 3 files changed, 26 insertions(+) diff --git a/pkg/cortexpb/histograms.go b/pkg/cortexpb/histograms.go index 4e06d2254ec..c4ea2a63715 100644 --- a/pkg/cortexpb/histograms.go +++ b/pkg/cortexpb/histograms.go @@ -160,3 +160,19 @@ func spansPromProtoToSpansProto(s []prompb.BucketSpan) []BucketSpan { return spans } + +func (h Histogram) BucketCount() int { + count := 0 + if h.IsFloatHistogram() { + count = len(h.PositiveCounts) + len(h.NegativeCounts) + if h.GetZeroCountFloat() > 0 { + count++ + } + } else { + count = len(h.PositiveDeltas) + len(h.NegativeDeltas) + if h.GetZeroCountInt() > 0 { + count++ + } + } + return count +} diff --git a/pkg/ingester/ingester.go b/pkg/ingester/ingester.go index d53dc35a573..ad8a572bc75 100644 --- a/pkg/ingester/ingester.go +++ b/pkg/ingester/ingester.go @@ -1342,6 +1342,7 @@ func (i *Ingester) Push(ctx context.Context, req *cortexpb.WriteRequest) (*corte succeededExemplarsCount = 0 failedExemplarsCount = 0 startAppend = time.Now() + succeededHistogramBucketsCount = 0 sampleOutOfBoundsCount = 0 sampleOutOfOrderCount = 0 sampleTooOldCount = 0 @@ -1507,6 +1508,7 @@ func (i *Ingester) Push(ctx context.Context, req *cortexpb.WriteRequest) (*corte if ref != 0 { if _, err = app.AppendHistogram(ref, copiedLabels, hp.TimestampMs, h, fh); err == nil { succeededHistogramsCount++ + succeededHistogramBucketsCount += hp.BucketCount() continue } } else { @@ -1518,6 +1520,7 @@ func (i *Ingester) Push(ctx context.Context, req *cortexpb.WriteRequest) (*corte newSeries = append(newSeries, copiedLabels) } succeededHistogramsCount++ + succeededHistogramBucketsCount += hp.BucketCount() continue } } @@ -1612,6 +1615,7 @@ func (i *Ingester) Push(ctx context.Context, req *cortexpb.WriteRequest) (*corte i.metrics.ingestedSamplesFail.Add(float64(failedSamplesCount)) i.metrics.ingestedHistograms.Add(float64(succeededHistogramsCount)) i.metrics.ingestedHistogramsFail.Add(float64(failedHistogramsCount)) + i.metrics.ingestedHistogramBuckets.WithLabelValues(userID).Add(float64(succeededHistogramBucketsCount)) i.metrics.ingestedExemplars.Add(float64(succeededExemplarsCount)) i.metrics.ingestedExemplarsFail.Add(float64(failedExemplarsCount)) diff --git a/pkg/ingester/metrics.go b/pkg/ingester/metrics.go index b0b39241a15..6dbfe9d0f0c 100644 --- a/pkg/ingester/metrics.go +++ b/pkg/ingester/metrics.go @@ -38,6 +38,7 @@ type ingesterMetrics struct { ingestedHistogramsFail prometheus.Counter ingestedExemplarsFail prometheus.Counter ingestedMetadataFail prometheus.Counter + ingestedHistogramBuckets *prometheus.CounterVec oooLabelsTotal *prometheus.CounterVec queries prometheus.Counter queriedSamples prometheus.Histogram @@ -130,6 +131,10 @@ func newIngesterMetrics(r prometheus.Registerer, Name: "cortex_ingester_ingested_metadata_failures_total", Help: "The total number of metadata that errored on ingestion.", }), + ingestedHistogramBuckets: promauto.With(r).NewCounterVec(prometheus.CounterOpts{ + Name: "cortex_ingester_ingested_histogram_buckets_total", + Help: "The total number of histogram buckets ingested per user.", + }, []string{"user"}), oooLabelsTotal: promauto.With(r).NewCounterVec(prometheus.CounterOpts{ Name: "cortex_ingester_out_of_order_labels_total", Help: "The total number of out of order label found per user.", @@ -359,6 +364,7 @@ func (m *ingesterMetrics) deletePerUserMetrics(userID string) { m.usagePerLabelSet.DeletePartialMatch(prometheus.Labels{"user": userID}) m.limitsPerLabelSet.DeletePartialMatch(prometheus.Labels{"user": userID}) m.pushErrorsTotal.DeletePartialMatch(prometheus.Labels{"user": userID}) + m.ingestedHistogramBuckets.DeleteLabelValues(userID) if m.memSeriesCreatedTotal != nil { m.memSeriesCreatedTotal.DeleteLabelValues(userID) From 2650e245c759b32acfe4d447a0079137e53768b7 Mon Sep 17 00:00:00 2001 From: Paurush Garg Date: Wed, 25 Feb 2026 14:31:39 -0800 Subject: [PATCH 2/2] Changing tracking metric type to histogram from counter Signed-off-by: Paurush Garg --- CHANGELOG.md | 1 + pkg/ingester/ingester.go | 6 ++-- pkg/ingester/metrics.go | 56 +++++++++++++++++++----------------- pkg/ingester/metrics_test.go | 2 ++ 4 files changed, 35 insertions(+), 30 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f7c526927d8..3a23e9b389d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -46,6 +46,7 @@ * [ENHANCEMENT] Distributor: Validate metric name before removing empty labels. #7253 * [ENHANCEMENT] Make cortex_ingester_tsdb_sample_ooo_delta metric per-tenant #7278 * [ENHANCEMENT] Distributor: Add dimension `nhcb` to keep track of nhcb samples in `cortex_distributor_received_samples_total` and `cortex_distributor_samples_in_total` metrics. +* [ENHANCEMENT] Ingester: Added `cortex_ingester_ingested_histogram_buckets` metric to track number of histogram buckets ingested per user. #7297 * [BUGFIX] Distributor: If remote write v2 is disabled, explicitly return HTTP 415 (Unsupported Media Type) for Remote Write V2 requests instead of attempting to parse them as V1. #7238 * [BUGFIX] Ring: Change DynamoDB KV to retry indefinitely for WatchKey. #7088 * [BUGFIX] Ruler: Add XFunctions validation support. #7111 diff --git a/pkg/ingester/ingester.go b/pkg/ingester/ingester.go index ad8a572bc75..ff37b5a335b 100644 --- a/pkg/ingester/ingester.go +++ b/pkg/ingester/ingester.go @@ -1342,7 +1342,6 @@ func (i *Ingester) Push(ctx context.Context, req *cortexpb.WriteRequest) (*corte succeededExemplarsCount = 0 failedExemplarsCount = 0 startAppend = time.Now() - succeededHistogramBucketsCount = 0 sampleOutOfBoundsCount = 0 sampleOutOfOrderCount = 0 sampleTooOldCount = 0 @@ -1508,7 +1507,7 @@ func (i *Ingester) Push(ctx context.Context, req *cortexpb.WriteRequest) (*corte if ref != 0 { if _, err = app.AppendHistogram(ref, copiedLabels, hp.TimestampMs, h, fh); err == nil { succeededHistogramsCount++ - succeededHistogramBucketsCount += hp.BucketCount() + i.metrics.ingestedHistogramBuckets.WithLabelValues(userID).Observe(float64(hp.BucketCount())) continue } } else { @@ -1520,7 +1519,7 @@ func (i *Ingester) Push(ctx context.Context, req *cortexpb.WriteRequest) (*corte newSeries = append(newSeries, copiedLabels) } succeededHistogramsCount++ - succeededHistogramBucketsCount += hp.BucketCount() + i.metrics.ingestedHistogramBuckets.WithLabelValues(userID).Observe(float64(hp.BucketCount())) continue } } @@ -1615,7 +1614,6 @@ func (i *Ingester) Push(ctx context.Context, req *cortexpb.WriteRequest) (*corte i.metrics.ingestedSamplesFail.Add(float64(failedSamplesCount)) i.metrics.ingestedHistograms.Add(float64(succeededHistogramsCount)) i.metrics.ingestedHistogramsFail.Add(float64(failedHistogramsCount)) - i.metrics.ingestedHistogramBuckets.WithLabelValues(userID).Add(float64(succeededHistogramBucketsCount)) i.metrics.ingestedExemplars.Add(float64(succeededExemplarsCount)) i.metrics.ingestedExemplarsFail.Add(float64(failedExemplarsCount)) diff --git a/pkg/ingester/metrics.go b/pkg/ingester/metrics.go index 6dbfe9d0f0c..773646cb12b 100644 --- a/pkg/ingester/metrics.go +++ b/pkg/ingester/metrics.go @@ -30,29 +30,29 @@ const ( ) type ingesterMetrics struct { - ingestedSamples prometheus.Counter - ingestedHistograms prometheus.Counter - ingestedExemplars prometheus.Counter - ingestedMetadata prometheus.Counter - ingestedSamplesFail prometheus.Counter - ingestedHistogramsFail prometheus.Counter - ingestedExemplarsFail prometheus.Counter - ingestedMetadataFail prometheus.Counter - ingestedHistogramBuckets *prometheus.CounterVec - oooLabelsTotal *prometheus.CounterVec - queries prometheus.Counter - queriedSamples prometheus.Histogram - queriedExemplars prometheus.Histogram - queriedSeries prometheus.Histogram - queriedChunks prometheus.Histogram - memSeries prometheus.Gauge - memMetadata prometheus.Gauge - memUsers prometheus.Gauge - memSeriesCreatedTotal *prometheus.CounterVec - memMetadataCreatedTotal *prometheus.CounterVec - memSeriesRemovedTotal *prometheus.CounterVec - memMetadataRemovedTotal *prometheus.CounterVec - pushErrorsTotal *prometheus.CounterVec + ingestedSamples prometheus.Counter + ingestedHistograms prometheus.Counter + ingestedExemplars prometheus.Counter + ingestedMetadata prometheus.Counter + ingestedSamplesFail prometheus.Counter + ingestedHistogramsFail prometheus.Counter + ingestedExemplarsFail prometheus.Counter + ingestedMetadataFail prometheus.Counter + ingestedHistogramBuckets *prometheus.HistogramVec + oooLabelsTotal *prometheus.CounterVec + queries prometheus.Counter + queriedSamples prometheus.Histogram + queriedExemplars prometheus.Histogram + queriedSeries prometheus.Histogram + queriedChunks prometheus.Histogram + memSeries prometheus.Gauge + memMetadata prometheus.Gauge + memUsers prometheus.Gauge + memSeriesCreatedTotal *prometheus.CounterVec + memMetadataCreatedTotal *prometheus.CounterVec + memSeriesRemovedTotal *prometheus.CounterVec + memMetadataRemovedTotal *prometheus.CounterVec + pushErrorsTotal *prometheus.CounterVec activeSeriesPerUser *prometheus.GaugeVec activeNHSeriesPerUser *prometheus.GaugeVec @@ -131,9 +131,13 @@ func newIngesterMetrics(r prometheus.Registerer, Name: "cortex_ingester_ingested_metadata_failures_total", Help: "The total number of metadata that errored on ingestion.", }), - ingestedHistogramBuckets: promauto.With(r).NewCounterVec(prometheus.CounterOpts{ - Name: "cortex_ingester_ingested_histogram_buckets_total", - Help: "The total number of histogram buckets ingested per user.", + ingestedHistogramBuckets: promauto.With(r).NewHistogramVec(prometheus.HistogramOpts{ + Name: "cortex_ingester_ingested_histogram_buckets", + Help: "The number of ingested native histogram buckets per user.", + NativeHistogramBucketFactor: 1.1, + NativeHistogramMaxBucketNumber: 100, + NativeHistogramMinResetDuration: 1, + Buckets: prometheus.ExponentialBuckets(1, 2, 10), // 1 to 512 buckets }, []string{"user"}), oooLabelsTotal: promauto.With(r).NewCounterVec(prometheus.CounterOpts{ Name: "cortex_ingester_out_of_order_labels_total", diff --git a/pkg/ingester/metrics_test.go b/pkg/ingester/metrics_test.go index dc23bb3b136..011e530c896 100644 --- a/pkg/ingester/metrics_test.go +++ b/pkg/ingester/metrics_test.go @@ -157,6 +157,8 @@ func TestIngesterMetrics(t *testing.T) { # HELP cortex_ingester_ingested_native_histograms_failures_total The total number of native histograms that errored on ingestion. # TYPE cortex_ingester_ingested_native_histograms_failures_total counter cortex_ingester_ingested_native_histograms_failures_total 0 + # HELP cortex_ingester_ingested_histogram_buckets The number of ingested native histogram buckets per user. + # TYPE cortex_ingester_ingested_histogram_buckets histogram # HELP cortex_ingester_ingestion_rate_samples_per_second Current ingestion rate in samples/sec that ingester is using to limit access. # TYPE cortex_ingester_ingestion_rate_samples_per_second gauge cortex_ingester_ingestion_rate_samples_per_second 0