From a4ffb673911fb4e0e25174c1b92054eb4a443d34 Mon Sep 17 00:00:00 2001 From: A Vertex SDK engineer Date: Tue, 12 May 2026 14:07:54 -0700 Subject: [PATCH] fix: Inject agents map from agent_info into agent_data for create_evaluation_run PiperOrigin-RevId: 914468929 --- tests/unit/vertexai/genai/test_evals.py | 127 ++++++++++++++++++++++++ vertexai/_genai/_evals_common.py | 20 +++- 2 files changed, 144 insertions(+), 3 deletions(-) diff --git a/tests/unit/vertexai/genai/test_evals.py b/tests/unit/vertexai/genai/test_evals.py index 10d2b9144d..8745516937 100644 --- a/tests/unit/vertexai/genai/test_evals.py +++ b/tests/unit/vertexai/genai/test_evals.py @@ -8869,6 +8869,133 @@ def test_create_evaluation_set_with_agent_data( assert candidate_response["candidate"] == "test-candidate" assert candidate_response["agent_data"] == agent_data + @mock.patch.object(_evals_common, "evals") + @mock.patch.object(_evals_common, "_gcs_utils") + def test_create_evaluation_set_injects_agents_map_from_agent_info( + self, mock_gcs_utils, mock_evals_module + ): + """Tests that agents map is injected from agent_info when agent_data has no agents.""" + agent_data = { + "turns": [ + { + "turn_index": 0, + "turn_id": "turn_0", + "events": [ + { + "author": "my_agent", + "content": { + "parts": [{"text": "hello"}], + "role": "model", + }, + } + ], + } + ] + } + eval_df = pd.DataFrame([{"prompt": "test prompt", "agent_data": agent_data}]) + + agent_info = vertexai_genai_types.evals.AgentInfo( + name="my_agent", + agents={ + "my_agent": vertexai_genai_types.evals.AgentConfig( + agent_id="my_agent", + instruction="You are a helpful agent.", + ) + }, + root_agent_id="my_agent", + ) + + mock_gcs_instance = mock_gcs_utils.GcsUtils.return_value + mock_gcs_instance.upload_json_to_prefix.return_value = ( + "gs://bucket/path/request.json" + ) + + mock_evals_instance = mock_evals_module.Evals.return_value + mock_eval_item = mock.Mock() + mock_eval_item.name = "eval_item_1" + mock_evals_instance.create_evaluation_item.return_value = mock_eval_item + + mock_eval_set = mock.Mock() + mock_evals_instance.create_evaluation_set.return_value = mock_eval_set + + _evals_common._create_evaluation_set_from_dataframe( + api_client=self.mock_api_client, + gcs_dest_prefix="gs://bucket/prefix", + eval_df=eval_df, + candidate_name="test-candidate", + parsed_agent_info=agent_info, + ) + + call_args = mock_gcs_instance.upload_json_to_prefix.call_args + uploaded_data = call_args.kwargs["data"] + + candidate_response = uploaded_data["candidate_responses"][0] + uploaded_agent_data = candidate_response["agent_data"] + assert "agents" in uploaded_agent_data + assert "my_agent" in uploaded_agent_data["agents"] + assert ( + uploaded_agent_data["agents"]["my_agent"]["instruction"] + == "You are a helpful agent." + ) + + @mock.patch.object(_evals_common, "evals") + @mock.patch.object(_evals_common, "_gcs_utils") + def test_create_evaluation_set_preserves_existing_agents_map( + self, mock_gcs_utils, mock_evals_module + ): + """Tests that an existing agents map in agent_data is not overwritten.""" + agent_data = { + "turns": [{"turn_id": "turn1", "events": []}], + "agents": { + "original_agent": { + "agent_id": "original_agent", + "instruction": "original instruction", + } + }, + } + eval_df = pd.DataFrame([{"prompt": "test prompt", "agent_data": agent_data}]) + + agent_info = vertexai_genai_types.evals.AgentInfo( + name="different_agent", + agents={ + "different_agent": vertexai_genai_types.evals.AgentConfig( + agent_id="different_agent", + instruction="different instruction", + ) + }, + root_agent_id="different_agent", + ) + + mock_gcs_instance = mock_gcs_utils.GcsUtils.return_value + mock_gcs_instance.upload_json_to_prefix.return_value = ( + "gs://bucket/path/request.json" + ) + + mock_evals_instance = mock_evals_module.Evals.return_value + mock_eval_item = mock.Mock() + mock_eval_item.name = "eval_item_1" + mock_evals_instance.create_evaluation_item.return_value = mock_eval_item + + mock_eval_set = mock.Mock() + mock_evals_instance.create_evaluation_set.return_value = mock_eval_set + + _evals_common._create_evaluation_set_from_dataframe( + api_client=self.mock_api_client, + gcs_dest_prefix="gs://bucket/prefix", + eval_df=eval_df, + candidate_name="test-candidate", + parsed_agent_info=agent_info, + ) + + call_args = mock_gcs_instance.upload_json_to_prefix.call_args + uploaded_data = call_args.kwargs["data"] + + candidate_response = uploaded_data["candidate_responses"][0] + uploaded_agent_data = candidate_response["agent_data"] + # Original agents map should be preserved, not overwritten + assert "original_agent" in uploaded_agent_data["agents"] + assert "different_agent" not in uploaded_agent_data["agents"] + @mock.patch.object(_evals_common, "evals") @mock.patch.object(_evals_common, "_gcs_utils") def test_create_evaluation_set_with_history_column( diff --git a/vertexai/_genai/_evals_common.py b/vertexai/_genai/_evals_common.py index 196e7c70b0..73e3775f1f 100644 --- a/vertexai/_genai/_evals_common.py +++ b/vertexai/_genai/_evals_common.py @@ -438,6 +438,7 @@ def _resolve_dataset( dest, eval_df, candidate_name, + parsed_agent_info=parsed_agent_info, ) dataset = types.EvaluationRunDataSource(evaluation_set=eval_set.name) return dataset @@ -2845,6 +2846,7 @@ def _create_evaluation_set_from_dataframe( gcs_dest_prefix: str, eval_df: pd.DataFrame, candidate_name: Optional[str] = None, + parsed_agent_info: Optional[types.evals.AgentInfo] = None, ) -> Union[types.EvaluationSet, Any]: """Converts a dataframe to an EvaluationSet.""" eval_item_requests = [] @@ -2877,6 +2879,18 @@ def _create_evaluation_set_from_dataframe( elif isinstance(agent_data_val, types.evals.AgentData): agent_data_obj = agent_data_val + # When agent_data exists but has no agents map (e.g. from remote + # agent_engine inference), inject the agents map from agent_info so + # the server-side autorater can access tool definitions and + # instructions. + if ( + agent_data_obj + and not agent_data_obj.agents + and parsed_agent_info + and parsed_agent_info.agents + ): + agent_data_obj.agents = parsed_agent_info.agents + candidate_responses = [] if _evals_constant.RESPONSE in row or agent_data_obj or intermediate_events: # Resolve the oneof conflict: prioritize agent_data over flat text @@ -2884,9 +2898,9 @@ def _create_evaluation_set_from_dataframe( if agent_data_obj and response_text: logger.info( - "Both 'response' and 'agent_data' columns found in the evaluation dataset. " - "Prioritizing 'agent_data' and omitting 'response' text to satisfy " - "CandidateResponse protobuf oneof constraints." + "Both 'response' and 'agent_data' columns found in the evaluation" + " dataset. Prioritizing 'agent_data' and omitting 'response' text" + " to satisfy CandidateResponse protobuf oneof constraints." ) response_text = None