@@ -65,7 +65,7 @@ def collect(self):
6565 Metric: Prometheus Metric objects that are not excluded.
6666 """
6767 for metric in self .base_registry .collect ():
68- if not any (name .startswith (metric . name ) for name in self .exclude_names ):
68+ if not any (metric . name .startswith (name ) for name in self .exclude_names ):
6969 yield metric
7070
7171
@@ -83,11 +83,15 @@ def get_filtered_metrics() -> str:
8383 multiprocess .MultiProcessCollector (base_registry )
8484
8585 filtered_registry = CollectorRegistry ()
86- # 注册一个新的colletor,过滤gauge指标
87- filtered_registry .register (SimpleCollector (base_registry , EXCLUDE_LABELS ))
86+ # 动态获取需要排除的 gauge 指标列表
87+ exclude_labels = main_process_metrics .get_excluded_metrics ()
88+ # 注册一个新的collector,过滤gauge指标
89+ filtered_registry .register (SimpleCollector (base_registry , exclude_labels ))
8890
8991 # 将gauge指标重新注册到filtered_registry中,从内存中读取
9092 main_process_metrics .re_register_gauge (filtered_registry )
93+ # 将speculative中的gauge指标也重新注册
94+ main_process_metrics .re_register_speculative_gauge (filtered_registry )
9195
9296 return generate_latest (filtered_registry ).decode ("utf-8" )
9397
@@ -195,7 +199,7 @@ class MetricsManager:
195199 "type" : Gauge ,
196200 "name" : "fastdeploy:num_requests_running" ,
197201 "description" : "Number of requests currently running" ,
198- "kwargs" : {"multiprocess_mode" : "sum" },
202+ "kwargs" : {},
199203 },
200204 "num_requests_waiting" : {
201205 "type" : Gauge ,
@@ -625,19 +629,22 @@ def __init__(self):
625629 # 在模块加载,指标注册先设置Prometheus环境变量
626630 setup_multiprocess_prometheus ()
627631
628- # 动态创建所有指标
632+ # 动态创建所有非 gauge 型指标
629633 for metric_name , config in self .METRICS .items ():
630634 setattr (
631635 self ,
632636 metric_name ,
633637 config ["type" ](config ["name" ], config ["description" ], ** config ["kwargs" ]),
634638 )
635- # 动态创建所有指标
639+ # 动态创建所有 gauge 型指标,统一配置 multiprocess_mode 为 livesum
636640 for metric_name , config in self .GAUGE_METRICS .items ():
641+ kwargs = config ["kwargs" ].copy ()
642+ if "multiprocess_mode" not in kwargs :
643+ kwargs ["multiprocess_mode" ] = "livesum"
637644 setattr (
638645 self ,
639646 metric_name ,
640- config ["type" ](config ["name" ], config ["description" ], ** config [ " kwargs" ] ),
647+ config ["type" ](config ["name" ], config ["description" ], ** kwargs ),
641648 )
642649 # 动态创建server metrics
643650 for metric_name , config in self .SERVER_METRICS .items ():
@@ -695,17 +702,22 @@ def _init_speculative_metrics(self, speculative_method, num_speculative_tokens):
695702 Gauge (
696703 f"{ config ['name' ]} _{ i } " ,
697704 f"{ config ['description' ]} (head { i } )" ,
705+ multiprocess_mode = "livesum" ,
698706 )
699707 )
700708 setattr (self , metric_name , gauges )
701709 else :
710+ # For Gauge metrics, automatically add multiprocess_mode="livesum"
711+ kwargs = config ["kwargs" ].copy ()
712+ if config ["type" ] == Gauge and "multiprocess_mode" not in kwargs :
713+ kwargs ["multiprocess_mode" ] = "livesum"
702714 setattr (
703715 self ,
704716 metric_name ,
705717 config ["type" ](
706718 config ["name" ],
707719 config ["description" ],
708- ** config [ " kwargs" ] ,
720+ ** kwargs ,
709721 ),
710722 )
711723
@@ -766,6 +778,19 @@ def register_speculative_metrics(self, registry: CollectorRegistry):
766778 else :
767779 registry .register (getattr (self , metric_name ))
768780
781+ def re_register_speculative_gauge (self , registry : CollectorRegistry ):
782+ """Re-register gauge metrics from SPECULATIVE_METRICS to the specified registry"""
783+ # Check if SPECULATIVE_METRICS was initialized in this process
784+ # (it's an instance attribute set by _init_speculative_metrics, not the class-level empty dict)
785+ if not hasattr (self , "spec_decode_draft_acceptance_rate" ):
786+ return
787+ for metric_name , config in self .SPECULATIVE_METRICS .items ():
788+ if metric_name == "spec_decode_draft_single_head_acceptance_rate" :
789+ for gauge in getattr (self , metric_name ):
790+ registry .register (gauge )
791+ elif config ["type" ] == Gauge :
792+ registry .register (getattr (self , metric_name ))
793+
769794 def re_register_gauge (self , registry : CollectorRegistry ):
770795 """Re-register gauge to the specified registry"""
771796 for metric_name in self .GAUGE_METRICS :
@@ -789,16 +814,19 @@ def register_all(self, registry: CollectorRegistry):
789814 if hasattr (main_process_metrics , "spec_decode_draft_acceptance_rate" ):
790815 self .register_speculative_metrics (registry )
791816
792- @classmethod
793- def get_excluded_metrics (cls ) -> Set [str ]:
817+ def get_excluded_metrics (self ) -> Set [str ]:
794818 """Get the set of indicator names that need to be excluded"""
795- return {config ["name" ] for config in cls .GAUGE_METRICS .values ()}
819+ excluded = {config ["name" ] for config in self .GAUGE_METRICS .values ()}
820+ # Also add gauge metrics from SPECULATIVE_METRICS (if initialized)
821+ if hasattr (self , "SPECULATIVE_METRICS" ):
822+ for config in self .SPECULATIVE_METRICS .values ():
823+ if config ["type" ] == Gauge or config ["type" ] == list [Gauge ]:
824+ excluded .add (config ["name" ])
825+ return excluded
796826
797827
798828main_process_metrics = MetricsManager ()
799829
800830# 由于zmq指标记录比较耗时,默认不开启,通过DEBUG参数开启
801831if envs .FD_DEBUG :
802832 main_process_metrics .init_zmq_metrics ()
803-
804- EXCLUDE_LABELS = MetricsManager .get_excluded_metrics ()
0 commit comments