77from typing import Any , Dict , Iterable , List , Optional , Set , Tuple
88
99import opentelemetry .metrics as metrics_api
10- from opentelemetry .metrics import Counter , Meter
10+ from opentelemetry .metrics import Counter , Histogram , Meter
1111
1212from ..telemetry import metrics_constants as constants
1313from ..types .content import Message
@@ -121,22 +121,34 @@ class ToolMetrics:
121121 error_count : int = 0
122122 total_time : float = 0.0
123123
124- def add_call (self , tool : ToolUse , duration : float , success : bool ) -> None :
124+ def add_call (
125+ self ,
126+ tool : ToolUse ,
127+ duration : float ,
128+ success : bool ,
129+ metrics_client : "MetricsClient" ,
130+ attributes : Optional [Dict [str , Any ]] = None ,
131+ ) -> None :
125132 """Record a new tool call with its outcome.
126133
127134 Args:
128135 tool: The tool that was called.
129136 duration: How long the call took in seconds.
130137 success: Whether the call was successful.
138+ metrics_client: The metrics client for recording the metrics.
139+ attributes: attributes of the metrics.
131140 """
132141 self .tool = tool # Update with latest tool state
133142 self .call_count += 1
134143 self .total_time += duration
135-
144+ metrics_client .tool_call_count .add (1 , attributes = attributes )
145+ metrics_client .tool_duration .record (duration , attributes = attributes )
136146 if success :
137147 self .success_count += 1
148+ metrics_client .tool_success_count .add (1 , attributes = attributes )
138149 else :
139150 self .error_count += 1
151+ metrics_client .tool_error_count .add (1 , attributes = attributes )
140152
141153
142154@dataclass
@@ -159,32 +171,53 @@ class EventLoopMetrics:
159171 accumulated_usage : Usage = field (default_factory = lambda : Usage (inputTokens = 0 , outputTokens = 0 , totalTokens = 0 ))
160172 accumulated_metrics : Metrics = field (default_factory = lambda : Metrics (latencyMs = 0 ))
161173
162- def start_cycle (self ) -> Tuple [float , Trace ]:
174+ @property
175+ def _metrics_client (self ) -> "MetricsClient" :
176+ """Get the singleton MetricsClient instance."""
177+ return MetricsClient ()
178+
179+ def start_cycle (
180+ self ,
181+ attributes : Optional [Dict [str , Any ]] = None ,
182+ ) -> Tuple [float , Trace ]:
163183 """Start a new event loop cycle and create a trace for it.
164184
185+ Args:
186+ attributes: attributes of the metrics.
187+
165188 Returns:
166189 A tuple containing the start time and the cycle trace object.
167190 """
191+ self ._metrics_client .event_loop_cycle_count .add (1 , attributes = attributes )
192+ self ._metrics_client .event_loop_start_cycle .add (1 , attributes = attributes )
168193 self .cycle_count += 1
169194 start_time = time .time ()
170195 cycle_trace = Trace (f"Cycle { self .cycle_count } " , start_time = start_time )
171196 self .traces .append (cycle_trace )
172197 return start_time , cycle_trace
173198
174- def end_cycle (self , start_time : float , cycle_trace : Trace ) -> None :
199+ def end_cycle (self , start_time : float , cycle_trace : Trace , attributes : Optional [ Dict [ str , Any ]] = None ) -> None :
175200 """End the current event loop cycle and record its duration.
176201
177202 Args:
178203 start_time: The timestamp when the cycle started.
179204 cycle_trace: The trace object for this cycle.
205+ attributes: attributes of the metrics.
180206 """
207+ self ._metrics_client .event_loop_end_cycle .add (1 , attributes )
181208 end_time = time .time ()
182209 duration = end_time - start_time
210+ self ._metrics_client .event_loop_cycle_duration .record (duration , attributes )
183211 self .cycle_durations .append (duration )
184212 cycle_trace .end (end_time )
185213
186214 def add_tool_usage (
187- self , tool : ToolUse , duration : float , tool_trace : Trace , success : bool , message : Message
215+ self ,
216+ tool : ToolUse ,
217+ duration : float ,
218+ tool_trace : Trace ,
219+ success : bool ,
220+ message : Message ,
188221 ) -> None :
189222 """Record metrics for a tool invocation.
190223
@@ -207,8 +240,16 @@ def add_tool_usage(
207240 tool_trace .raw_name = f"{ tool_name } - { tool_use_id } "
208241 tool_trace .add_message (message )
209242
210- self .tool_metrics .setdefault (tool_name , ToolMetrics (tool )).add_call (tool , duration , success )
211-
243+ self .tool_metrics .setdefault (tool_name , ToolMetrics (tool )).add_call (
244+ tool ,
245+ duration ,
246+ success ,
247+ self ._metrics_client ,
248+ attributes = {
249+ "tool_name" : tool_name ,
250+ "tool_use_id" : tool_use_id ,
251+ },
252+ )
212253 tool_trace .end ()
213254
214255 def update_usage (self , usage : Usage ) -> None :
@@ -217,6 +258,8 @@ def update_usage(self, usage: Usage) -> None:
217258 Args:
218259 usage: The usage data to add to the accumulated totals.
219260 """
261+ self ._metrics_client .event_loop_input_tokens .record (usage ["inputTokens" ])
262+ self ._metrics_client .event_loop_output_tokens .record (usage ["outputTokens" ])
220263 self .accumulated_usage ["inputTokens" ] += usage ["inputTokens" ]
221264 self .accumulated_usage ["outputTokens" ] += usage ["outputTokens" ]
222265 self .accumulated_usage ["totalTokens" ] += usage ["totalTokens" ]
@@ -227,6 +270,7 @@ def update_metrics(self, metrics: Metrics) -> None:
227270 Args:
228271 metrics: The metrics data to add to the accumulated totals.
229272 """
273+ self ._metrics_client .event_loop_latency .record (metrics ["latencyMs" ])
230274 self .accumulated_metrics ["latencyMs" ] += metrics ["latencyMs" ]
231275
232276 def get_summary (self ) -> Dict [str , Any ]:
@@ -370,7 +414,18 @@ class MetricsClient:
370414
371415 _instance : Optional ["MetricsClient" ] = None
372416 meter : Meter
373- strands_agent_invocation_count : Counter
417+ event_loop_cycle_count : Counter
418+ event_loop_start_cycle : Counter
419+ event_loop_end_cycle : Counter
420+ event_loop_cycle_duration : Histogram
421+ event_loop_latency : Histogram
422+ event_loop_input_tokens : Histogram
423+ event_loop_output_tokens : Histogram
424+
425+ tool_call_count : Counter
426+ tool_success_count : Counter
427+ tool_error_count : Counter
428+ tool_duration : Histogram
374429
375430 def __new__ (cls ) -> "MetricsClient" :
376431 """Create or return the singleton instance of MetricsClient.
@@ -398,6 +453,24 @@ def __init__(self) -> None:
398453
399454 def create_instruments (self ) -> None :
400455 """Create and initialize all OpenTelemetry metric instruments."""
401- self .strands_agent_invocation_count = self .meter .create_counter (
402- name = constants .STRANDS_AGENT_INVOCATION_COUNT , unit = "Count"
456+ self .event_loop_cycle_count = self .meter .create_counter (
457+ name = constants .STRANDS_EVENT_LOOP_CYCLE_COUNT , unit = "Count"
458+ )
459+ self .event_loop_start_cycle = self .meter .create_counter (
460+ name = constants .STRANDS_EVENT_LOOP_START_CYCLE , unit = "Count"
461+ )
462+ self .event_loop_end_cycle = self .meter .create_counter (name = constants .STRANDS_EVENT_LOOP_END_CYCLE , unit = "Count" )
463+ self .event_loop_cycle_duration = self .meter .create_histogram (
464+ name = constants .STRANDS_EVENT_LOOP_CYCLE_DURATION , unit = "s"
465+ )
466+ self .event_loop_latency = self .meter .create_histogram (name = constants .STRANDS_EVENT_LOOP_LATENCY , unit = "ms" )
467+ self .tool_call_count = self .meter .create_counter (name = constants .STRANDS_TOOL_CALL_COUNT , unit = "Count" )
468+ self .tool_success_count = self .meter .create_counter (name = constants .STRANDS_TOOL_SUCCESS_COUNT , unit = "Count" )
469+ self .tool_error_count = self .meter .create_counter (name = constants .STRANDS_TOOL_ERROR_COUNT , unit = "Count" )
470+ self .tool_duration = self .meter .create_histogram (name = constants .STRANDS_TOOL_DURATION , unit = "s" )
471+ self .event_loop_input_tokens = self .meter .create_histogram (
472+ name = constants .STRANDS_EVENT_LOOP_INPUT_TOKENS , unit = "token"
473+ )
474+ self .event_loop_output_tokens = self .meter .create_histogram (
475+ name = constants .STRANDS_EVENT_LOOP_OUTPUT_TOKENS , unit = "token"
403476 )
0 commit comments