From b3a23293ba23445de3b2650bc64f128a35367a93 Mon Sep 17 00:00:00 2001 From: Chirag Date: Fri, 27 Mar 2026 16:11:19 +0530 Subject: [PATCH] Add simulation analytics and metrics endpoints with documentation --- src/lib/api-navigation.ts | 20 ++ src/lib/navigation.ts | 8 + .../api/simulation-analytics/analytics.mdx | 239 +++++++++++++++ .../docs/api/simulation-analytics/metrics.mdx | 245 +++++++++++++++ .../docs/api/simulation-analytics/runs.mdx | 284 ++++++++++++++++++ 5 files changed, 796 insertions(+) create mode 100644 src/pages/docs/api/simulation-analytics/analytics.mdx create mode 100644 src/pages/docs/api/simulation-analytics/metrics.mdx create mode 100644 src/pages/docs/api/simulation-analytics/runs.mdx diff --git a/src/lib/api-navigation.ts b/src/lib/api-navigation.ts index 8c240fc9..eb9e8f7f 100644 --- a/src/lib/api-navigation.ts +++ b/src/lib/api-navigation.ts @@ -144,6 +144,26 @@ export const apiNavigation: ApiNavGroup[] = [ } ] }, + { + "title": "Simulation Analytics", + "items": [ + { + "title": "Get Simulation Metrics", + "href": "/docs/api/simulation-analytics/metrics", + "method": "GET" + }, + { + "title": "Get Simulation Runs", + "href": "/docs/api/simulation-analytics/runs", + "method": "GET" + }, + { + "title": "Get Simulation Analytics", + "href": "/docs/api/simulation-analytics/analytics", + "method": "GET" + } + ] + }, { "title": "Annotation Scores", "items": [ diff --git a/src/lib/navigation.ts b/src/lib/navigation.ts index 868091cd..cc465cda 100644 --- a/src/lib/navigation.ts +++ b/src/lib/navigation.ts @@ -743,6 +743,14 @@ export const tabNavigation: NavTab[] = [ { title: 'Execute Run Test', href: '/docs/api/run-tests/executeruntest' }, ] }, + { + title: 'Simulation Analytics', + items: [ + { title: 'Get Simulation Metrics', href: '/docs/api/simulation-analytics/metrics' }, + { title: 'Get Simulation Runs', href: '/docs/api/simulation-analytics/runs' }, + { title: 'Get Simulation Analytics', href: '/docs/api/simulation-analytics/analytics' }, + ] + }, { title: 'Annotation Scores', items: [ diff --git a/src/pages/docs/api/simulation-analytics/analytics.mdx b/src/pages/docs/api/simulation-analytics/analytics.mdx new file mode 100644 index 00000000..003ee356 --- /dev/null +++ b/src/pages/docs/api/simulation-analytics/analytics.mdx @@ -0,0 +1,239 @@ +--- +title: "Get Simulation Analytics" +description: "Retrieve aggregated analytics — eval scores, eval averages, system summary, and FMA suggestions — for a simulation run." +--- + +# Get Simulation Analytics + +Returns the aggregated analytics view for a simulation run. This corresponds to the **Analytics tab** in the FutureAGI UI — eval scores (radar chart data), per-metric averages, system summary, and critical issues with Fix My Agent suggestions. + + + +## Authentication + +This endpoint uses API key authentication. Include both headers in every request: + +```bash +X-Api-Key: YOUR_API_KEY +X-Secret-Key: YOUR_SECRET_KEY +``` + +## Query Parameters + +| Parameter | Type | Required | Description | +|-----------|------|----------|-------------| +| `run_test_name` | string | One of these is required | Name of the run test. Returns analytics for the latest completed execution. | +| `execution_id` | UUID | | UUID of a test execution. Returns analytics for that execution. | +| `eval_name` | string | No | Comma-separated eval names to filter. Only matching evals are included. | +| `summary` | boolean | No | Include FMA explanation summary and critical issues. Default: `true`. | + +## Responses + +### 200 — Analytics for an execution + +Returns eval scores, averages, system summary, and optionally FMA suggestions. + +```json +{ + "status": true, + "result": { + "execution_id": "d2fa3f2c-...", + "run_test_name": "My Agent Test", + "status": "completed", + "eval_results": [ + { + "name": "conversation_coherence", + "id": "...", + "output_type": "Pass/Fail", + "total_pass_rate": 85.0, + "result": [ + { + "name": "coherence_check", + "id": "...", + "total_cells": 48, + "output": { + "pass": 85.0, + "fail": 15.0, + "pass_count": 41, + "fail_count": 7 + } + } + ] + }, + { + "name": "conversation_resolution", + "id": "...", + "output_type": "Pass/Fail", + "total_pass_rate": 92.0, + "result": [...] + } + ], + "eval_averages": { + "avg_conversation_coherence": 85.0, + "avg_conversation_resolution": 92.0, + "avg_bias_detection": 100.0 + }, + "system_summary": { + "total_calls": 50, + "completed_calls": 48, + "failed_calls": 2, + "avg_score": 82.5, + "avg_response_time_ms": 290.0, + "total_duration_seconds": 6000 + }, + "eval_explanation_summary": { + "coherence_check": [ + { + "cluster_name": "Pricing contradictions", + "call_execution_ids": ["uuid1", "uuid2"], + "description": "Agent gives different prices when asked about the same product." + } + ] + }, + "eval_explanation_summary_status": "completed" + } +} +``` + +### 200 — By `run_test_name` with no completed executions + +```json +{ + "status": true, + "result": { + "run_test_name": "My Agent Test", + "message": "No completed executions found.", + "eval_results": [], + "eval_averages": {}, + "system_summary": {} + } +} +``` + +### 200 — With `summary=false` + +Same response but without `eval_explanation_summary` and `eval_explanation_summary_status` fields. + +### 400 + +Missing or invalid parameters. + +### 404 + +The specified run test or execution was not found. + +### 500 + +Internal server error. + +## Response Fields + +### `eval_results` + +Detailed eval scores broken down by eval template and config. Each entry includes pass/fail counts, rates, or score percentiles depending on the eval type. + +### `eval_averages` + +Flat key-value map of averaged eval scores across all calls. Keys follow the pattern `avg_{eval_name}`. Useful for quick comparisons and threshold checks. + +### `system_summary` + +Aggregated system-level metrics: call counts, average score, response time, and total duration. + +### `eval_explanation_summary` + +LLM-generated analysis that clusters failure reasons and provides actionable improvement suggestions. This is the same data shown in the **Critical Issues** panel in the UI. + +## Code Examples + +### cURL + +```bash +# Get full analytics for latest execution of a run test +curl "https://api.futureagi.com/sdk/api/v1/simulation/analytics/?run_test_name=My%20Agent%20Test" \ + -H "X-Api-Key: YOUR_API_KEY" \ + -H "X-Secret-Key: YOUR_SECRET_KEY" + +# Get analytics for a specific execution, no FMA +curl "https://api.futureagi.com/sdk/api/v1/simulation/analytics/?execution_id=YOUR_EXECUTION_ID&summary=false" \ + -H "X-Api-Key: YOUR_API_KEY" \ + -H "X-Secret-Key: YOUR_SECRET_KEY" + +# Filter to specific evals only +curl "https://api.futureagi.com/sdk/api/v1/simulation/analytics/?execution_id=YOUR_EXECUTION_ID&eval_name=Coherence,Resolution" \ + -H "X-Api-Key: YOUR_API_KEY" \ + -H "X-Secret-Key: YOUR_SECRET_KEY" +``` + +### Python — Automated promotion gate + +```python +import requests + +url = "https://api.futureagi.com/sdk/api/v1/simulation/analytics/" +headers = { + "X-Api-Key": "YOUR_API_KEY", + "X-Secret-Key": "YOUR_SECRET_KEY", +} + +response = requests.get(url, headers=headers, params={ + "run_test_name": "My Agent Test", +}) +data = response.json()["result"] + +# Check if agent meets promotion criteria +eval_averages = data["eval_averages"] +min_threshold = 80.0 + +all_passing = all( + score >= min_threshold + for key, score in eval_averages.items() + if key.startswith("avg_") +) + +if all_passing: + print("Agent meets quality bar — promoting to production.") +else: + # Feed critical issues into your LLM for improvement suggestions + issues = data.get("eval_explanation_summary", {}) + for eval_name, clusters in issues.items(): + for cluster in clusters: + print(f"[{eval_name}] {cluster['cluster_name']}: {cluster['description']}") +``` + +### JavaScript — Dashboard integration + +```javascript +const response = await fetch( + "https://api.futureagi.com/sdk/api/v1/simulation/analytics/?run_test_name=My%20Agent%20Test", + { + headers: { + "X-Api-Key": "YOUR_API_KEY", + "X-Secret-Key": "YOUR_SECRET_KEY", + }, + } +); + +const { result } = await response.json(); + +// Build radar chart data from eval_results +const radarData = result.eval_results.map((eval) => ({ + label: eval.name, + value: eval.total_pass_rate ?? eval.total_avg ?? 0, +})); + +// Display system summary +console.log(`Calls: ${result.system_summary.total_calls}`); +console.log(`Avg Score: ${result.system_summary.avg_score}`); +console.log(`Avg Response Time: ${result.system_summary.avg_response_time_ms}ms`); +``` diff --git a/src/pages/docs/api/simulation-analytics/metrics.mdx b/src/pages/docs/api/simulation-analytics/metrics.mdx new file mode 100644 index 00000000..1b419277 --- /dev/null +++ b/src/pages/docs/api/simulation-analytics/metrics.mdx @@ -0,0 +1,245 @@ +--- +title: "Get Simulation Metrics" +description: "Retrieve aggregated system metrics — latency, cost, and conversation analytics — for a simulation run." +--- + +# Get Simulation Metrics + +Returns system-level performance metrics for simulation executions. Supports three query modes based on the level of detail needed. + + + +## Authentication + +This endpoint uses API key authentication. Include both headers in every request: + +```bash +X-Api-Key: YOUR_API_KEY +X-Secret-Key: YOUR_SECRET_KEY +``` + +## Query Parameters + +| Parameter | Type | Required | Description | +|-----------|------|----------|-------------| +| `run_test_name` | string | One of these is required | Name of the run test. Returns paginated list of executions with aggregated metrics. | +| `execution_id` | UUID | | UUID of a test execution. Returns aggregated metrics for that execution. | +| `call_execution_id` | UUID | | UUID of a call execution. Returns raw per-call metrics. | +| `page` | integer | No | Page number for paginated results. Default: `1`. | +| `limit` | integer | No | Number of results per page. Default: `10`. | + +## Responses + +### 200 — By `call_execution_id` + +Returns raw metrics for a single call. + +```json +{ + "status": true, + "result": { + "call_execution_id": "5af9e484-...", + "execution_id": "2b19f6e6-...", + "status": "completed", + "duration_seconds": 88, + "latency": { + "avg_agent_latency_ms": 1234, + "response_time_ms": null, + "customer_latency_metrics": { + "bot_wpm": 233.69, + "user_wpm": 214.37, + "talk_ratio": 0.217, + "ai_interruption_rate": 0.67, + "avg_agent_latency_ms": 1234 + } + }, + "cost": { + "total_cost_cents": 24, + "stt_cost_cents": 0, + "llm_cost_cents": 0, + "tts_cost_cents": 0, + "customer_cost_breakdown": {} + }, + "conversation": { + "user_wpm": 214.37, + "bot_wpm": 233.69, + "talk_ratio": 0.217, + "user_interruption_count": 0, + "user_interruption_rate": 0.0, + "ai_interruption_count": 1, + "ai_interruption_rate": 0.67, + "avg_stop_time_after_interruption_ms": null + }, + "chat_metrics": { + "input_tokens": 12685, + "total_tokens": 12885, + "output_tokens": 200, + "message_count": 15, + "turn_count": 10 + } + } +} +``` + +### 200 — By `execution_id` + +Returns aggregated metrics across all calls in the execution. + +```json +{ + "status": true, + "result": { + "execution_id": "5819e158-...", + "status": "completed", + "started_at": "2025-11-30T06:57:38.592Z", + "completed_at": "2025-11-30T07:17:57.583Z", + "total_calls": 30, + "completed_calls": 27, + "failed_calls": 0, + "metrics": { + "latency": { + "avg_agent_latency_ms": 2887.0, + "avg_response_time_ms": 3123.0, + "percentiles": { + "p50": 3199.5, + "p95": 3445.8, + "p99": 3465.2 + } + }, + "cost": { + "total_duration_seconds": 69 + }, + "conversation": { + "avg_user_wpm": 147.0, + "avg_bot_wpm": 253.0, + "avg_talk_ratio": 6.73, + "avg_user_interruption_rate": 1.52, + "avg_ai_interruption_rate": 0.0, + "avg_stop_time_after_interruption_ms": 4770.0 + }, + "chat": { + "avg_total_tokens": 0.0, + "avg_input_tokens": 0.0, + "avg_output_tokens": 0.0, + "avg_chat_latency_ms": 0.0, + "avg_turn_count": 0.0, + "avg_csat_score": 0.0 + }, + "calls": { + "total": 30, + "completed": 27, + "failed": 0, + "pending": 0 + } + } + } +} +``` + +### 200 — By `run_test_name` + +Returns a paginated list of executions, each with aggregated metrics. + +```json +{ + "status": true, + "result": { + "total_pages": 5, + "current_page": 1, + "count": 50, + "results": [ + { + "execution_id": "...", + "status": "completed", + "started_at": "...", + "completed_at": "...", + "total_calls": 30, + "completed_calls": 27, + "failed_calls": 0, + "metrics": { ... } + } + ] + } +} +``` + +### 400 + +Missing or invalid parameters. + +### 404 + +The specified run test, execution, or call execution was not found. + +### 500 + +Internal server error. + +## Code Examples + +### cURL + +```bash +# Get metrics for a specific execution +curl "https://api.futureagi.com/sdk/api/v1/simulation/metrics/?execution_id=YOUR_EXECUTION_ID" \ + -H "X-Api-Key: YOUR_API_KEY" \ + -H "X-Secret-Key: YOUR_SECRET_KEY" + +# Get metrics for all executions of a run test +curl "https://api.futureagi.com/sdk/api/v1/simulation/metrics/?run_test_name=My%20Agent%20Test&limit=5" \ + -H "X-Api-Key: YOUR_API_KEY" \ + -H "X-Secret-Key: YOUR_SECRET_KEY" + +# Get raw metrics for a single call +curl "https://api.futureagi.com/sdk/api/v1/simulation/metrics/?call_execution_id=YOUR_CALL_ID" \ + -H "X-Api-Key: YOUR_API_KEY" \ + -H "X-Secret-Key: YOUR_SECRET_KEY" +``` + +### Python + +```python +import requests + +url = "https://api.futureagi.com/sdk/api/v1/simulation/metrics/" +headers = { + "X-Api-Key": "YOUR_API_KEY", + "X-Secret-Key": "YOUR_SECRET_KEY", +} + +# By execution ID +response = requests.get(url, headers=headers, params={ + "execution_id": "YOUR_EXECUTION_ID" +}) +data = response.json() +metrics = data["result"]["metrics"] +print(f"P95 Latency: {metrics['latency']['percentiles']['p95']}ms") +``` + +### JavaScript + +```javascript +const response = await fetch( + "https://api.futureagi.com/sdk/api/v1/simulation/metrics/?execution_id=YOUR_EXECUTION_ID", + { + headers: { + "X-Api-Key": "YOUR_API_KEY", + "X-Secret-Key": "YOUR_SECRET_KEY", + }, + } +); + +const data = await response.json(); +console.log(data.result.metrics.latency.percentiles); +``` diff --git a/src/pages/docs/api/simulation-analytics/runs.mdx b/src/pages/docs/api/simulation-analytics/runs.mdx new file mode 100644 index 00000000..41cb392c --- /dev/null +++ b/src/pages/docs/api/simulation-analytics/runs.mdx @@ -0,0 +1,284 @@ +--- +title: "Get Simulation Runs" +description: "Retrieve run-level records with eval scores, scenario metadata, and per-call breakdowns." +--- + +# Get Simulation Runs + +Returns run records with evaluation scores, scenario metadata, and call details. Use this to inspect what happened in each execution and why calls passed or failed. + + + +## Authentication + +This endpoint uses API key authentication. Include both headers in every request: + +```bash +X-Api-Key: YOUR_API_KEY +X-Secret-Key: YOUR_SECRET_KEY +``` + +## Query Parameters + +| Parameter | Type | Required | Description | +|-----------|------|----------|-------------| +| `run_test_name` | string | One of these is required | Name of the run test. Returns paginated list of executions with eval scores. | +| `execution_id` | UUID | | UUID of a test execution. Returns one execution with paginated call results. | +| `call_execution_id` | UUID | | UUID of a call execution. Returns full detail for that call. | +| `eval_name` | string | No | Comma-separated eval names to filter. Only matching evals are returned. Example: `Coherence,Tone` | +| `summary` | boolean | No | Include the FMA (Fix My Agent) explanation summary. Default: `false`. | +| `page` | integer | No | Page number for paginated results. Default: `1`. | +| `limit` | integer | No | Number of results per page. Default: `10`. | + +## Responses + +### 200 — By `call_execution_id` + +Returns full detail for a single call including eval outputs, latency, and cost. + +```json +{ + "status": true, + "result": { + "call_execution_id": "5af9e484-...", + "execution_id": "2b19f6e6-...", + "scenario_id": "cc3c8111-...", + "scenario_name": "Billing Inquiry", + "status": "completed", + "started_at": "2026-03-23T20:01:04.450Z", + "completed_at": "2026-03-23T20:02:32.123Z", + "duration_seconds": 88, + "ended_reason": "customer-ended-call", + "call_summary": "Customer called about a billing discrepancy...", + "eval_outputs": { + "eval-config-1": { + "name": "Coherence", + "output": "Passed", + "output_type": "Pass/Fail", + "reason": "Agent maintained context throughout the conversation." + }, + "eval-config-2": { + "name": "Resolution", + "output": false, + "output_type": "Pass/Fail", + "reason": "Customer hung up without resolution." + } + }, + "latency": { + "avg_agent_latency_ms": 1234, + "response_time_ms": null + }, + "cost": { + "total_cost_cents": 24, + "stt_cost_cents": 0, + "llm_cost_cents": 0, + "tts_cost_cents": 0 + } + } +} +``` + +### 200 — By `execution_id` + +Returns one execution with eval summary and paginated per-call breakdown. + +```json +{ + "status": true, + "result": { + "execution_id": "aabfa5b5-...", + "status": "completed", + "started_at": "2026-01-19T07:42:26.006Z", + "completed_at": "2026-01-19T08:15:00.000Z", + "total_calls": 30, + "completed_calls": 26, + "failed_calls": 4, + "eval_results": [ + { + "name": "is_helpful", + "id": "e283f838-...", + "output_type": "Pass/Fail", + "total_pass_rate": 80.77, + "result": [ + { + "name": "helpful_or_no", + "id": "bcff05d0-...", + "total_cells": 26, + "output": { + "pass": 80.77, + "fail": 19.23, + "pass_count": 21, + "fail_count": 5 + } + } + ] + } + ], + "call_results": { + "total_pages": 3, + "current_page": 1, + "count": 30, + "results": [ + { + "call_execution_id": "839b6662-...", + "scenario_id": "d6607d90-...", + "scenario_name": "Billing Inquiry", + "status": "completed", + "duration_seconds": 120, + "eval_outputs": { + "eval-config-1": { + "name": "is_helpful", + "output": "Passed", + "output_type": "Pass/Fail" + } + } + } + ] + } + } +} +``` + +### 200 — By `execution_id` with `summary=true` + +Same as above, with additional FMA explanation fields. + +```json +{ + "status": true, + "result": { + "execution_id": "...", + "eval_results": [...], + "call_results": {...}, + "eval_explanation_summary": { + "is_helpful": [ + { + "cluster_name": "Pricing contradictions", + "call_execution_ids": ["uuid1", "uuid2"], + "description": "Agent gives different prices for the same product." + } + ] + }, + "eval_explanation_summary_status": "completed" + } +} +``` + +### 200 — By `run_test_name` + +Returns a paginated list of all executions for the run test, each with eval scores. + +```json +{ + "status": true, + "result": { + "total_pages": 12, + "current_page": 1, + "count": 12, + "results": [ + { + "execution_id": "75f6a314-...", + "status": "completed", + "started_at": "2026-03-05T10:12:32.790Z", + "completed_at": "2026-03-05T10:45:00.000Z", + "total_calls": 30, + "completed_calls": 28, + "failed_calls": 2, + "eval_results": [...] + } + ] + } +} +``` + +### 400 + +Missing or invalid parameters. + +### 404 + +The specified run test, execution, or call execution was not found. + +### 500 + +Internal server error. + +## Code Examples + +### cURL + +```bash +# Get all executions for a run test +curl "https://api.futureagi.com/sdk/api/v1/simulation/runs/?run_test_name=My%20Agent%20Test" \ + -H "X-Api-Key: YOUR_API_KEY" \ + -H "X-Secret-Key: YOUR_SECRET_KEY" + +# Get one execution with FMA summary +curl "https://api.futureagi.com/sdk/api/v1/simulation/runs/?execution_id=YOUR_EXECUTION_ID&summary=true" \ + -H "X-Api-Key: YOUR_API_KEY" \ + -H "X-Secret-Key: YOUR_SECRET_KEY" + +# Get one call, filtered to specific evals +curl "https://api.futureagi.com/sdk/api/v1/simulation/runs/?call_execution_id=YOUR_CALL_ID&eval_name=Coherence,Tone" \ + -H "X-Api-Key: YOUR_API_KEY" \ + -H "X-Secret-Key: YOUR_SECRET_KEY" +``` + +### Python + +```python +import requests + +url = "https://api.futureagi.com/sdk/api/v1/simulation/runs/" +headers = { + "X-Api-Key": "YOUR_API_KEY", + "X-Secret-Key": "YOUR_SECRET_KEY", +} + +# Get execution with call breakdown and failure reasons +response = requests.get(url, headers=headers, params={ + "execution_id": "YOUR_EXECUTION_ID", + "summary": "true", +}) +data = response.json() + +# Extract failure reasons for your LLM pipeline +for call in data["result"]["call_results"]["results"]: + for eval_id, eval_data in call["eval_outputs"].items(): + if eval_data.get("output") in [False, "Failed"]: + print(f"Failed: {eval_data['name']} — {eval_data.get('reason')}") +``` + +### JavaScript + +```javascript +const response = await fetch( + "https://api.futureagi.com/sdk/api/v1/simulation/runs/?execution_id=YOUR_EXECUTION_ID&summary=true", + { + headers: { + "X-Api-Key": "YOUR_API_KEY", + "X-Secret-Key": "YOUR_SECRET_KEY", + }, + } +); + +const data = await response.json(); +const { eval_results, eval_explanation_summary } = data.result; + +// Check if agent is ready to promote +const allPassing = eval_results.every(e => e.total_pass_rate > 90); +console.log(`Agent ${allPassing ? "ready" : "needs work"}`); +```