From f37a5954c7cd4719375cb069e0e44f1d1fb4a1a2 Mon Sep 17 00:00:00 2001 From: Essam Eldaly Date: Tue, 5 May 2026 12:11:56 -0700 Subject: [PATCH 1/3] Add reason to query stats. Add query too expensive to qfe reasons Signed-off-by: Essam Eldaly --- pkg/frontend/transport/handler.go | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/pkg/frontend/transport/handler.go b/pkg/frontend/transport/handler.go index 47ec69876b6..08f08582bba 100644 --- a/pkg/frontend/transport/handler.go +++ b/pkg/frontend/transport/handler.go @@ -65,6 +65,7 @@ const ( reasonChunksLimitStoreGateway = "store_gateway_chunks_limit" reasonBytesLimitStoreGateway = "store_gateway_bytes_limit" reasonUnOptimizedRegexMatcher = `unoptimized_regex_matcher` + reasonQueryTooExpensive = "query_too_expensive" limitTooManySamples = `query processing would load too many samples into memory` limitTimeRangeExceeded = `the query time range exceeds the limit` @@ -74,6 +75,7 @@ const ( limitChunkBytesFetched = `the query hit the aggregated chunks size limit` limitDataBytesFetched = `the query hit the aggregated data size limit` limitUnOptimizedRegexMatcher = `unoptimized regex matcher` + limitQueryTooExpensive = `query spent too long in evaluation` // Store gateway limits. limitSeriesStoreGateway = `exceeded series limit` @@ -562,16 +564,6 @@ func (f *Handler) reportQueryStats(r *http.Request, source, userID string, query } } - shouldLog := source == requestmeta.SourceAPI || (f.cfg.EnabledRulerQueryStatsLog && source == requestmeta.SourceRuler) - if shouldLog { - logMessage = append(logMessage, formatQueryString(queryString)...) - if error != nil { - level.Error(util_log.WithContext(r.Context(), f.log)).Log(logMessage...) - } else { - level.Info(util_log.WithContext(r.Context(), f.log)).Log(logMessage...) - } - } - var reason string if statusCode == http.StatusTooManyRequests { reason = reasonTooManyRequests @@ -602,6 +594,8 @@ func (f *Handler) reportQueryStats(r *http.Request, source, userID string, query reason = reasonBytesLimitStoreGateway } else if strings.Contains(errMsg, limitUnOptimizedRegexMatcher) { reason = reasonUnOptimizedRegexMatcher + } else if strings.Contains(errMsg, limitQueryTooExpensive) { + reason = reasonQueryTooExpensive } } else if statusCode == http.StatusServiceUnavailable && error != nil { errMsg := error.Error() @@ -610,9 +604,20 @@ func (f *Handler) reportQueryStats(r *http.Request, source, userID string, query } } if len(reason) > 0 { + logMessage = append(logMessage, "reason", reason) f.rejectedQueries.WithLabelValues(reason, source, userID).Inc() stats.LimitHit = reason } + + shouldLog := source == requestmeta.SourceAPI || (f.cfg.EnabledRulerQueryStatsLog && source == requestmeta.SourceRuler) + if shouldLog { + logMessage = append(logMessage, formatQueryString(queryString)...) + if error != nil { + level.Error(util_log.WithContext(r.Context(), f.log)).Log(logMessage...) + } else { + level.Info(util_log.WithContext(r.Context(), f.log)).Log(logMessage...) + } + } } func (f *Handler) parseRequestQueryString(r *http.Request, bodyBuf bytes.Buffer) url.Values { From 56fde5ff621df11efc0c6b25bff9673583f10473 Mon Sep 17 00:00:00 2001 From: Essam Eldaly Date: Tue, 5 May 2026 12:17:21 -0700 Subject: [PATCH 2/3] update changelog Signed-off-by: Essam Eldaly --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 1a7640e63d8..29fe55f95a6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -18,6 +18,7 @@ * [ENHANCEMENT] Compactor: Prevent partition compaction to compact any blocks marked for deletion. #7391 * [ENHANCEMENT] Distributor: Optimize memory allocations by reusing the existing capacity of these pooled slices in the Prometheus Remote Write 2.0 path. #7392 * [ENHANCEMENT] Upgrade gRPC from v1.71.2 to v1.79.3 to address CVE-2026-33186. #7460 +* [ENHANCEMENT] Query Frontend: Add `query_too_expensive` reason to QFE and `reason` field to query stats. #7479 * [BUGFIX] Querier: Fix queryWithRetry and labelsWithRetry returning (nil, nil) on cancelled context by propagating ctx.Err(). #7370 * [BUGFIX] Metrics Helper: Fix non-deterministic bucket order in merged histograms by sorting buckets after map iteration, matching Prometheus client library behavior. #7380 * [BUGFIX] Distributor: Return HTTP 401 Unauthorized when tenant ID resolution fails in the Prometheus Remote Write 2.0 path. #7389 From 9bd23d5d5389e9b308b9afccfd0f7ed3541896de Mon Sep 17 00:00:00 2001 From: Essam Eldaly Date: Tue, 5 May 2026 13:54:38 -0700 Subject: [PATCH 3/3] Add tests Signed-off-by: Essam Eldaly --- pkg/frontend/transport/handler_test.go | 37 ++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/pkg/frontend/transport/handler_test.go b/pkg/frontend/transport/handler_test.go index 6cd90789993..c04976b0aa3 100644 --- a/pkg/frontend/transport/handler_test.go +++ b/pkg/frontend/transport/handler_test.go @@ -400,6 +400,22 @@ func TestHandler_ServeHTTP(t *testing.T) { }, expectedStatusCode: http.StatusServiceUnavailable, }, + { + name: "test handler with reasonQueryTooExpensive", + cfg: HandlerConfig{QueryStatsEnabled: true}, + expectedMetrics: 6, + roundTripperFunc: roundTripperFunc(func(req *http.Request) (*http.Response, error) { + return &http.Response{ + StatusCode: http.StatusUnprocessableEntity, + Body: io.NopCloser(strings.NewReader("query timed out: query spent too long in evaluation - consider simplifying your query")), + }, nil + }), + additionalMetricsCheckFunc: func(h *Handler) { + v := promtest.ToFloat64(h.rejectedQueries.WithLabelValues(reasonQueryTooExpensive, requestmeta.SourceAPI, userID)) + assert.Equal(t, float64(1), v) + }, + expectedStatusCode: http.StatusUnprocessableEntity, + }, { name: "test cortex_slow_queries_total", cfg: HandlerConfig{QueryStatsEnabled: true, LogQueriesLongerThan: time.Second * 2}, @@ -584,6 +600,27 @@ func TestReportQueryStatsFormat(t *testing.T) { } } +func TestReportQueryStatsRejectionReason(t *testing.T) { + outputBuf := bytes.NewBuffer(nil) + logger := log.NewSyncLogger(log.NewLogfmtLogger(outputBuf)) + userID := "fake" + req, _ := http.NewRequest(http.MethodGet, "http://localhost:8080/prometheus/api/v1/query", nil) + resp := &http.Response{ContentLength: 0} + responseTime := time.Second + + handler := NewHandler(HandlerConfig{QueryStatsEnabled: true}, tenantfederation.Config{}, http.DefaultTransport, logger, nil) + req = req.WithContext(requestmeta.ContextWithRequestSource(context.Background(), requestmeta.SourceAPI)) + + queryErr := httpgrpc.Errorf(http.StatusUnprocessableEntity, "%s", `query timed out: query spent too long in evaluation - consider simplifying your query`) + handler.reportQueryStats(req, requestmeta.SourceAPI, userID, nil, responseTime, &querier_stats.QueryStats{}, queryErr, http.StatusUnprocessableEntity, resp) + + data, err := io.ReadAll(outputBuf) + require.NoError(t, err) + logLine := string(data) + assert.Contains(t, logLine, "reason=query_too_expensive") + assert.Contains(t, logLine, "status_code=422") +} + func Test_ExtractTenantIDs(t *testing.T) { roundTripper := roundTripperFunc(func(req *http.Request) (*http.Response, error) { return &http.Response{