From dab185a9e6eb6e90e3d2b239214f4264777483a9 Mon Sep 17 00:00:00 2001 From: Jason Dai Date: Fri, 20 Mar 2026 14:03:52 -0700 Subject: [PATCH] feat: GenAI Client(evals) - BREAKING_CHANGE: The agent engine resource name is now passed as a separate `agent` parameter to `create_evaluation_run` methods, rather than being part of the `AgentInfo` object. This parameter is now required if `agent_info` is provided PiperOrigin-RevId: 886965034 --- .../replays/test_create_evaluation_run.py | 21 +++++++++++++----- tests/unit/vertexai/genai/test_evals.py | 5 ----- vertexai/_genai/_evals_common.py | 10 ++++----- vertexai/_genai/evals.py | 22 ++++++++++++------- vertexai/_genai/types/evals.py | 20 ++--------------- 5 files changed, 37 insertions(+), 41 deletions(-) diff --git a/tests/unit/vertexai/genai/replays/test_create_evaluation_run.py b/tests/unit/vertexai/genai/replays/test_create_evaluation_run.py index e39953eb39..3fb251d16d 100644 --- a/tests/unit/vertexai/genai/replays/test_create_evaluation_run.py +++ b/tests/unit/vertexai/genai/replays/test_create_evaluation_run.py @@ -19,6 +19,8 @@ from google.genai import types as genai_types import pandas as pd import pytest +from unittest import mock +import uuid GCS_DEST = "gs://lakeyk-limited-bucket/eval_run_output" GENERAL_QUALITY_METRIC = types.EvaluationRunMetric( @@ -79,8 +81,8 @@ ) ] ) +AGENT_RESOURCE_NAME = "projects/123/locations/us-central1/reasoningEngines/456" AGENT_INFO = types.evals.AgentInfo( - agent_resource_name=("projects/123/locations/us-central1/reasoningEngines/456"), name="agent-1", agents={ "agent-1": types.evals.AgentConfig( @@ -124,6 +126,7 @@ def test_create_eval_run_data_source_evaluation_set(client): BLEU_COMPUTATION_BASED_METRIC, ], agent_info=AGENT_INFO, + agent=AGENT_RESOURCE_NAME, labels={"label1": "value1"}, ) assert isinstance(evaluation_run, types.EvaluationRun) @@ -148,7 +151,7 @@ def test_create_eval_run_data_source_evaluation_set(client): ] == types.EvaluationRunInferenceConfig( agent_configs=AGENT_INFO.agents, agent_run_config=types.AgentRunConfig( - agent_engine=AGENT_INFO.agent_resource_name, + agent_engine=AGENT_RESOURCE_NAME, user_simulator_config={"max_turn": 5}, ), ) @@ -219,6 +222,7 @@ def test_create_eval_run_with_user_simulator_config(client): dest=GCS_DEST, metrics=[GENERAL_QUALITY_METRIC], agent_info=AGENT_INFO, + agent=AGENT_RESOURCE_NAME, user_simulator_config=types.evals.UserSimulatorConfig( max_turn=5, ), @@ -243,7 +247,7 @@ def test_create_eval_run_with_user_simulator_config(client): ] == types.EvaluationRunInferenceConfig( agent_configs=AGENT_INFO.agents, agent_run_config=types.AgentRunConfig( - agent_engine=AGENT_INFO.agent_resource_name, + agent_engine=AGENT_RESOURCE_NAME, user_simulator_config=types.evals.UserSimulatorConfig(max_turn=5), ), ) @@ -290,8 +294,14 @@ def test_create_eval_run_with_inference_configs(client): assert evaluation_run.error is None -def test_create_eval_run_with_metric_resource_name(client): +@mock.patch("uuid.uuid4") +def test_create_eval_run_with_metric_resource_name(mock_uuid4, client): """Tests create_evaluation_run with metric_resource_name.""" + mock_uuid4.side_effect = [ + uuid.UUID("d392c573-9e81-4a30-b984-8a6aa4656369"), + uuid.UUID("49128576-accd-459e-aace-41391e163b3c"), + uuid.UUID("9bcc726e-d2cf-448c-967b-f49480d8c1c2"), + ] client._api_client._http_options.api_version = "v1beta1" client._api_client._http_options.base_url = ( "https://us-central1-staging-aiplatform.sandbox.googleapis.com/" @@ -733,6 +743,7 @@ async def test_create_eval_run_async_with_user_simulator_config(client): dest=GCS_DEST, metrics=[GENERAL_QUALITY_METRIC], agent_info=AGENT_INFO, + agent=AGENT_RESOURCE_NAME, user_simulator_config=types.evals.UserSimulatorConfig( max_turn=5, ), @@ -757,7 +768,7 @@ async def test_create_eval_run_async_with_user_simulator_config(client): ] == types.EvaluationRunInferenceConfig( agent_configs=AGENT_INFO.agents, agent_run_config=types.AgentRunConfig( - agent_engine=AGENT_INFO.agent_resource_name, + agent_engine=AGENT_RESOURCE_NAME, user_simulator_config=types.evals.UserSimulatorConfig(max_turn=5), ), ) diff --git a/tests/unit/vertexai/genai/test_evals.py b/tests/unit/vertexai/genai/test_evals.py index 9ad33e62ac..fca15b18e6 100644 --- a/tests/unit/vertexai/genai/test_evals.py +++ b/tests/unit/vertexai/genai/test_evals.py @@ -3503,16 +3503,11 @@ def my_search_tool(query: str) -> str: agent_info = vertexai_genai_types.evals.AgentInfo.load_from_agent( agent=mock_agent, - agent_resource_name="projects/123/locations/abc/reasoningEngines/456", ) assert agent_info.name == "mock_agent" assert agent_info.agents["mock_agent"].instruction == "mock instruction" assert agent_info.agents["mock_agent"].description == "mock description" - assert ( - agent_info.agent_resource_name - == "projects/123/locations/abc/reasoningEngines/456" - ) assert len(agent_info.agents["mock_agent"].tools) == 1 assert isinstance(agent_info.agents["mock_agent"].tools[0], genai_types.Tool) assert agent_info.agents["mock_agent"].tools[0].function_declarations == [ diff --git a/vertexai/_genai/_evals_common.py b/vertexai/_genai/_evals_common.py index f2ca33648c..2c12de0193 100644 --- a/vertexai/_genai/_evals_common.py +++ b/vertexai/_genai/_evals_common.py @@ -451,14 +451,14 @@ def _resolve_inference_configs( def _add_evaluation_run_labels( labels: Optional[dict[str, str]] = None, - parsed_agent_info: Optional[types.evals.AgentInfo] = None, + agent: Optional[str] = None, ) -> Optional[dict[str, str]]: """Adds labels to the evaluation run.""" - if parsed_agent_info and parsed_agent_info.agent_resource_name: + if agent: labels = labels or {} - labels["vertex-ai-evaluation-agent-engine-id"] = ( - parsed_agent_info.agent_resource_name.split("reasoningEngines/")[-1] - ) + labels["vertex-ai-evaluation-agent-engine-id"] = agent.split( + "reasoningEngines/" + )[-1] return labels diff --git a/vertexai/_genai/evals.py b/vertexai/_genai/evals.py index 1d2deb22a4..5d5b128867 100644 --- a/vertexai/_genai/evals.py +++ b/vertexai/_genai/evals.py @@ -2101,6 +2101,7 @@ def create_evaluation_run( name: Optional[str] = None, display_name: Optional[str] = None, agent_info: Optional[evals_types.AgentInfoOrDict] = None, + agent: Optional[str] = None, user_simulator_config: Optional[evals_types.UserSimulatorConfigOrDict] = None, inference_configs: Optional[ dict[str, types.EvaluationRunInferenceConfigOrDict] @@ -2118,6 +2119,10 @@ def create_evaluation_run( display_name: The display name of the evaluation run. agent_info: The agent info to evaluate. Mutually exclusive with `inference_configs`. + agent: The agent engine resource name in str type, with format + `projects/{project}/locations/{location}/reasoningEngines/{reasoning_engine_id}`. + If provided, runs inference with the deployed agent to get agent responses + for evaluation. This is required if `agent_info` is provided. user_simulator_config: The user simulator configuration for agent evaluation. If `agent_info` is provided without `inference_configs`, this config is used to automatically construct the inference configuration. If not specified, @@ -2158,7 +2163,7 @@ def create_evaluation_run( candidate_name: types.EvaluationRunInferenceConfig( agent_configs=parsed_agent_info.agents, agent_run_config=types.AgentRunConfig( - agent_engine=parsed_agent_info.agent_resource_name, + agent_engine=agent, user_simulator_config=parsed_user_simulator_config, ), ) @@ -2181,9 +2186,7 @@ def create_evaluation_run( resolved_inference_configs = _evals_common._resolve_inference_configs( self._api_client, resolved_dataset, inference_configs, parsed_agent_info ) - resolved_labels = _evals_common._add_evaluation_run_labels( - labels, parsed_agent_info - ) + resolved_labels = _evals_common._add_evaluation_run_labels(labels, agent) resolved_name = name or f"evaluation_run_{uuid.uuid4()}" return self._create_evaluation_run( name=resolved_name, @@ -3307,6 +3310,7 @@ async def create_evaluation_run( name: Optional[str] = None, display_name: Optional[str] = None, agent_info: Optional[evals_types.AgentInfo] = None, + agent: Optional[str] = None, user_simulator_config: Optional[evals_types.UserSimulatorConfigOrDict] = None, inference_configs: Optional[ dict[str, types.EvaluationRunInferenceConfigOrDict] @@ -3324,6 +3328,10 @@ async def create_evaluation_run( display_name: The display name of the evaluation run. agent_info: The agent info to evaluate. Mutually exclusive with `inference_configs`. + agent: The agent engine resource name in str type, with format + `projects/{project}/locations/{location}/reasoningEngines/{reasoning_engine_id}`. + If provided, runs inference with the deployed agent to get agent responses + for evaluation. This is required if `agent_info` is provided. user_simulator_config: The user simulator configuration for agent evaluation. If `agent_info` is provided without `inference_configs`, this config is used to automatically construct the inference configuration. If not specified, @@ -3364,7 +3372,7 @@ async def create_evaluation_run( candidate_name: types.EvaluationRunInferenceConfig( agent_configs=parsed_agent_info.agents, agent_run_config=types.AgentRunConfig( - agent_engine=parsed_agent_info.agent_resource_name, + agent_engine=agent, user_simulator_config=parsed_user_simulator_config, ), ) @@ -3387,9 +3395,7 @@ async def create_evaluation_run( resolved_inference_configs = _evals_common._resolve_inference_configs( self._api_client, resolved_dataset, inference_configs, parsed_agent_info ) - resolved_labels = _evals_common._add_evaluation_run_labels( - labels, parsed_agent_info - ) + resolved_labels = _evals_common._add_evaluation_run_labels(labels, agent) resolved_name = name or f"evaluation_run_{uuid.uuid4()}" result = await self._create_evaluation_run( diff --git a/vertexai/_genai/types/evals.py b/vertexai/_genai/types/evals.py index 3419584a67..57c85951e0 100644 --- a/vertexai/_genai/types/evals.py +++ b/vertexai/_genai/types/evals.py @@ -388,11 +388,6 @@ class AgentDataDict(TypedDict, total=False): class AgentInfo(_common.BaseModel): """The agent info of an agent system, used for agent evaluation.""" - agent_resource_name: Optional[str] = Field( - default=None, - description="""The agent engine used to run agent. Agent engine resource name in str type, with format - `projects/{project}/locations/{location}/reasoningEngines/{reasoning_engine_id}`.""", - ) name: Optional[str] = Field( default=None, description="""Agent candidate name, used as an identifier.""" ) @@ -407,14 +402,11 @@ class AgentInfo(_common.BaseModel): ) @classmethod - def load_from_agent( - cls, agent: Any, agent_resource_name: Optional[str] = None - ) -> "AgentInfo": + def load_from_agent(cls, agent: Any) -> "AgentInfo": """Loads agent info from an ADK agent. Args: agent: The root agent to get the agent info from, data type is google.adk.agents.LLMAgent type. - agent_resource_name: Optional. The agent engine resource name for the deployed agent. Returns: The agent info of the agent system. @@ -423,10 +415,7 @@ def load_from_agent( ``` from vertexai._genai import types - agent_info = types.evals.AgentInfo.load_from_agent( - agent=my_agent, - agent_resource_name="projects/123/locations/us-central1/reasoningEngines/456" - ) + agent_info = types.evals.AgentInfo.load_from_agent(agent=my_agent) ``` """ agent_name = getattr(agent, "name", None) @@ -434,7 +423,6 @@ def load_from_agent( raise ValueError(f"Agent {agent} must have a name.") return cls( # pytype: disable=missing-parameter name=agent_name, - agent_resource_name=agent_resource_name, agents=AgentData.get_agents_map(agent), root_agent_id=agent_name, ) @@ -443,10 +431,6 @@ def load_from_agent( class AgentInfoDict(TypedDict, total=False): """The agent info of an agent system, used for agent evaluation.""" - agent_resource_name: Optional[str] - """The agent engine used to run agent. Agent engine resource name in str type, with format - `projects/{project}/locations/{location}/reasoningEngines/{reasoning_engine_id}`.""" - name: Optional[str] """Agent candidate name, used as an identifier."""