@@ -18,10 +18,11 @@ class GrpcServerMetricInterceptor(BaseGrpcServerInterceptor):
1818 """A gRPC server interceptor for collecting and reporting metrics using Prometheus.
1919
2020 This interceptor measures the response time of gRPC methods and records it in a Prometheus histogram.
21+ It also tracks the number of active requests using a Prometheus gauge.
2122 It also captures errors and logs them for monitoring purposes.
2223 """
2324
24- from prometheus_client import Histogram
25+ from prometheus_client import Gauge , Histogram
2526
2627 "Buckets for measuring response times between 0 and 1 second."
2728 ZERO_TO_ONE_SECONDS_BUCKETS : ClassVar [list [float ]] = [i / 1000 for i in range (0 , 1000 , 5 )]
@@ -39,20 +40,27 @@ class GrpcServerMetricInterceptor(BaseGrpcServerInterceptor):
3940
4041 "Prometheus histogram for tracking response times of gRPC methods."
4142 RESPONSE_TIME_SECONDS = Histogram (
42- "response_time_seconds " ,
43- "Time spent processing request" ,
43+ "grpc_response_time_seconds " ,
44+ "Time spent processing gRPC request" ,
4445 labelnames = ("package" , "service" , "method" , "status_code" ),
4546 buckets = TOTAL_BUCKETS ,
4647 )
4748
49+ "Prometheus gauge for tracking active gRPC requests."
50+ ACTIVE_REQUESTS = Gauge (
51+ "grpc_active_requests" ,
52+ "Number of active gRPC requests" ,
53+ labelnames = ("package" , "service" , "method" ),
54+ )
55+
4856 def intercept (
4957 self ,
5058 method : Callable ,
5159 request : object ,
5260 context : grpc .ServicerContext ,
5361 method_name_model : MethodName ,
5462 ) -> object :
55- """Intercepts a gRPC server call to measure response time and capture errors .
63+ """Intercepts a gRPC server call to measure response time and track active requests .
5664
5765 Args:
5866 method (Callable): The gRPC method being intercepted.
@@ -66,47 +74,53 @@ def intercept(
6674 Raises:
6775 Exception: If an exception occurs during the method execution, it is captured and logged.
6876 """
69- try :
70- # Skip metric collection if Prometheus is disabled
71- if not BaseConfig .global_config ().PROMETHEUS .IS_ENABLED :
72- return method (request , context )
77+ if not BaseConfig .global_config ().PROMETHEUS .IS_ENABLED :
78+ return method (request , context )
79+
80+ package = method_name_model .package
81+ service = method_name_model .service
82+ method_name = method_name_model .method
7383
74- # Measure the start time
75- start_time = time .time ()
84+ self .ACTIVE_REQUESTS .labels (package = package , service = service , method = method_name ).inc ()
7685
77- # Execute the gRPC method
86+ start_time = time .time ()
87+ status_code = "OK"
88+
89+ try :
7890 result = method (request , context )
7991
80- # Record the response time in the Prometheus histogram
81- status_code = "OK"
8292 if hasattr (context , "code" ) and callable (context .code ):
8393 code_method = cast ("Callable[[], Any]" , context .code )
8494 code_obj = code_method ()
8595 if code_obj is not None :
8696 code_name = getattr (code_obj , "name" , None )
8797 if code_name is not None :
8898 status_code = code_name
89- self .RESPONSE_TIME_SECONDS .labels (
90- package = method_name_model .package ,
91- service = method_name_model .service ,
92- method = method_name_model .method ,
93- status_code = status_code ,
94- ).observe (time .time () - start_time )
9599 except Exception as exception :
96100 BaseUtils .capture_exception (exception )
97101 raise
98102 else :
99103 return result
104+ finally :
105+ duration = time .time () - start_time
106+ self .RESPONSE_TIME_SECONDS .labels (
107+ package = package ,
108+ service = service ,
109+ method = method_name ,
110+ status_code = status_code ,
111+ ).observe (duration )
112+ self .ACTIVE_REQUESTS .labels (package = package , service = service , method = method_name ).dec ()
100113
101114
102115class AsyncGrpcServerMetricInterceptor (BaseAsyncGrpcServerInterceptor ):
103116 """An async gRPC server interceptor for collecting and reporting metrics using Prometheus.
104117
105118 This interceptor measures the response time of async gRPC methods and records it in a Prometheus histogram.
119+ It also tracks the number of active requests using a Prometheus gauge.
106120 It also captures errors and logs them for monitoring purposes.
107121 """
108122
109- from prometheus_client import Histogram
123+ from prometheus_client import Gauge , Histogram
110124
111125 "Buckets for measuring response times between 0 and 1 second."
112126 ZERO_TO_ONE_SECONDS_BUCKETS : ClassVar [list [float ]] = [i / 1000 for i in range (0 , 1000 , 5 )]
@@ -124,20 +138,27 @@ class AsyncGrpcServerMetricInterceptor(BaseAsyncGrpcServerInterceptor):
124138
125139 "Prometheus histogram for tracking response times of async gRPC methods."
126140 RESPONSE_TIME_SECONDS = Histogram (
127- "grpc_async_server_response_time_seconds " ,
141+ "grpc_async_response_time_seconds " ,
128142 "Time spent processing async gRPC request" ,
129143 labelnames = ("package" , "service" , "method" , "status_code" ),
130144 buckets = TOTAL_BUCKETS ,
131145 )
132146
147+ "Prometheus gauge for tracking active async gRPC requests."
148+ ACTIVE_REQUESTS = Gauge (
149+ "grpc_async_active_requests" ,
150+ "Number of active async gRPC requests" ,
151+ labelnames = ("package" , "service" , "method" ),
152+ )
153+
133154 async def intercept (
134155 self ,
135156 method : Callable ,
136157 request : object ,
137158 context : grpc .aio .ServicerContext ,
138159 method_name_model : MethodName ,
139160 ) -> object :
140- """Intercepts an async gRPC server call to measure response time and capture errors .
161+ """Intercepts an async gRPC server call to measure response time and track active requests .
141162
142163 Args:
143164 method (Callable): The async gRPC method being intercepted.
@@ -151,24 +172,25 @@ async def intercept(
151172 Raises:
152173 Exception: If an exception occurs during the method execution, it is captured and logged.
153174 """
154- try :
155- # Skip metric collection if Prometheus is disabled
156- if not BaseConfig .global_config ().PROMETHEUS .IS_ENABLED :
157- return await method (request , context )
175+ if not BaseConfig .global_config ().PROMETHEUS .IS_ENABLED :
176+ return await method (request , context )
177+
178+ package = method_name_model .package
179+ service = method_name_model .service
180+ method_name = method_name_model .method
158181
159- # Measure the start time using asyncio event loop time for better precision
160- start_time = asyncio .get_event_loop ().time ()
161- status_code = "OK"
182+ self .ACTIVE_REQUESTS .labels (package = package , service = service , method = method_name ).inc ()
162183
184+ start_time = asyncio .get_event_loop ().time ()
185+ status_code = "OK"
186+
187+ try :
163188 try :
164- # Execute the async gRPC method
165189 result = await method (request , context )
166190
167- # Get the actual status code from context
168191 if hasattr (context , "code" ) and context .code ():
169192 status_code = context .code ().name
170193 except Exception as e :
171- # Determine error status code
172194 if isinstance (e , grpc .aio .AioRpcError ):
173195 code_obj = e .code ()
174196 if code_obj is not None :
@@ -188,14 +210,14 @@ async def intercept(
188210 else :
189211 return result
190212 finally :
191- # Record the response time in the Prometheus histogram
192213 duration = asyncio .get_event_loop ().time () - start_time
193214 self .RESPONSE_TIME_SECONDS .labels (
194- package = method_name_model . package ,
195- service = method_name_model . service ,
196- method = method_name_model . method ,
215+ package = package ,
216+ service = service ,
217+ method = method_name ,
197218 status_code = status_code ,
198219 ).observe (duration )
220+ self .ACTIVE_REQUESTS .labels (package = package , service = service , method = method_name ).dec ()
199221
200222 except Exception as exception :
201223 BaseUtils .capture_exception (exception )
0 commit comments