diff --git a/tests/unit/vertexai/genai/replays/test_generate_user_scenarios.py b/tests/unit/vertexai/genai/replays/test_generate_user_scenarios.py index afa54271f8..955ea690cc 100644 --- a/tests/unit/vertexai/genai/replays/test_generate_user_scenarios.py +++ b/tests/unit/vertexai/genai/replays/test_generate_user_scenarios.py @@ -22,24 +22,27 @@ def test_gen_user_scenarios(client): """Tests that generate_user_scenarios() correctly calls the API and parses the response.""" eval_dataset = client.evals.generate_user_scenarios( - agents={ - "booking-agent": types.evals.AgentConfig( - agent_id="booking-agent", - agent_type="service_agent", - description="An agent capable of booking flights and hotels.", - instruction="You are a helpful travel assistant. Use tools to find flights.", - tools=[ - { - "function_declarations": [ - { - "name": "search_flights", - "description": "Search for available flights.", - } - ] - } - ], - ) - }, + agent_info=types.evals.AgentInfo( + agents={ + "booking-agent": types.evals.AgentConfig( + agent_id="booking-agent", + agent_type="service_agent", + description="An agent capable of booking flights and hotels.", + instruction="You are a helpful travel assistant. Use tools to find flights.", + tools=[ + { + "function_declarations": [ + { + "name": "search_flights", + "description": "Search for available flights.", + } + ] + } + ], + ) + }, + root_agent_id="booking-agent", + ), user_scenario_generation_config=types.evals.UserScenarioGenerationConfig( user_scenario_count=2, simulation_instruction=( @@ -49,18 +52,11 @@ def test_gen_user_scenarios(client): environment_data="Today is Monday. Flights to Paris are available.", model_name="gemini-2.5-flash", ), - root_agent_id="booking-agent", ) assert isinstance(eval_dataset, types.EvaluationDataset) assert len(eval_dataset.eval_cases) == 2 - assert ( - eval_dataset.eval_cases[0].user_scenario.starting_prompt - == "I want to find a flight from New York to London." - ) - assert ( - eval_dataset.eval_cases[0].user_scenario.conversation_plan - == "Actually, I meant Paris, not London. Please search for flights to Paris." - ) + assert eval_dataset.eval_cases[0].user_scenario.starting_prompt + assert eval_dataset.eval_cases[0].user_scenario.conversation_plan pytest_plugins = ("pytest_asyncio",) @@ -70,24 +66,27 @@ def test_gen_user_scenarios(client): async def test_gen_user_scenarios_async(client): """Tests that generate_user_scenarios() async correctly calls the API and parses the response.""" eval_dataset = await client.aio.evals.generate_user_scenarios( - agents={ - "booking-agent": types.evals.AgentConfig( - agent_id="booking-agent", - agent_type="service_agent", - description="An agent capable of booking flights and hotels.", - instruction="You are a helpful travel assistant. Use tools to find flights.", - tools=[ - { - "function_declarations": [ - { - "name": "search_flights", - "description": "Search for available flights.", - } - ] - } - ], - ) - }, + agent_info=types.evals.AgentInfo( + agents={ + "booking-agent": types.evals.AgentConfig( + agent_id="booking-agent", + agent_type="service_agent", + description="An agent capable of booking flights and hotels.", + instruction="You are a helpful travel assistant. Use tools to find flights.", + tools=[ + { + "function_declarations": [ + { + "name": "search_flights", + "description": "Search for available flights.", + } + ] + } + ], + ) + }, + root_agent_id="booking-agent", + ), user_scenario_generation_config=types.evals.UserScenarioGenerationConfig( user_scenario_count=2, simulation_instruction=( @@ -97,18 +96,11 @@ async def test_gen_user_scenarios_async(client): environment_data="Today is Monday. Flights to Paris are available.", model_name="gemini-2.5-flash", ), - root_agent_id="booking-agent", ) assert isinstance(eval_dataset, types.EvaluationDataset) assert len(eval_dataset.eval_cases) == 2 - assert ( - eval_dataset.eval_cases[1].user_scenario.starting_prompt - == "Find me a flight from Boston to Rome for next month." - ) - assert ( - eval_dataset.eval_cases[1].user_scenario.conversation_plan - == "Wait, change of plans. I need to go to Milan instead, and it needs to be a round trip, returning two weeks after departure." - ) + assert eval_dataset.eval_cases[1].user_scenario.starting_prompt + assert eval_dataset.eval_cases[1].user_scenario.conversation_plan pytestmark = pytest_helper.setup( diff --git a/tests/unit/vertexai/genai/test_evals.py b/tests/unit/vertexai/genai/test_evals.py index b0d914d18e..c9fd9ed635 100644 --- a/tests/unit/vertexai/genai/test_evals.py +++ b/tests/unit/vertexai/genai/test_evals.py @@ -6213,9 +6213,8 @@ def test_generate_user_scenarios(self): evals_module = evals.Evals(api_client_=self.mock_api_client) eval_dataset = evals_module.generate_user_scenarios( - agents={"agent_1": {}}, + agent_info={"agents": {"agent_1": {}}, "root_agent_id": "agent_1"}, user_scenario_generation_config={"user_scenario_count": 2}, - root_agent_id="agent_1", ) assert isinstance(eval_dataset, vertexai_genai_types.EvaluationDataset) assert len(eval_dataset.eval_cases) == 2 @@ -6243,9 +6242,8 @@ async def test_async_generate_user_scenarios(self): async_evals_module = evals.AsyncEvals(api_client_=self.mock_api_client) eval_dataset = await async_evals_module.generate_user_scenarios( - agents={"agent_1": {}}, + agent_info={"agents": {"agent_1": {}}, "root_agent_id": "agent_1"}, user_scenario_generation_config={"user_scenario_count": 2}, - root_agent_id="agent_1", ) assert isinstance(eval_dataset, vertexai_genai_types.EvaluationDataset) assert len(eval_dataset.eval_cases) == 2 diff --git a/vertexai/_genai/evals.py b/vertexai/_genai/evals.py index 05b37bd369..0881bd571e 100644 --- a/vertexai/_genai/evals.py +++ b/vertexai/_genai/evals.py @@ -2129,7 +2129,7 @@ def create_evaluation_run( raise ValueError( "At most one of agent_info or inference_configs can be provided." ) - agent_info_pydantic = ( + parsed_agent_info = ( evals_types.AgentInfo.model_validate(agent_info) if isinstance(agent_info, dict) else (agent_info or evals_types.AgentInfo()) @@ -2137,7 +2137,7 @@ def create_evaluation_run( if isinstance(dataset, types.EvaluationDataset): _evals_utils._validate_dataset_agent_data(dataset, inference_configs) resolved_dataset = _evals_common._resolve_dataset( - self._api_client, dataset, dest, agent_info_pydantic + self._api_client, dataset, dest, parsed_agent_info ) output_config = genai_types.OutputConfig( gcs_destination=genai_types.GcsDestination(output_uri_prefix=dest) @@ -2149,10 +2149,10 @@ def create_evaluation_run( output_config=output_config, metrics=resolved_metrics ) resolved_inference_configs = _evals_common._resolve_inference_configs( - self._api_client, resolved_dataset, inference_configs, agent_info_pydantic + self._api_client, resolved_dataset, inference_configs, parsed_agent_info ) resolved_labels = _evals_common._add_evaluation_run_labels( - labels, agent_info_pydantic + labels, parsed_agent_info ) resolved_name = name or f"evaluation_run_{uuid.uuid4()}" return self._create_evaluation_run( @@ -2306,26 +2306,29 @@ def create_evaluation_set( def generate_user_scenarios( self, *, - agents: dict[str, evals_types.AgentConfigOrDict], + agent_info: evals_types.AgentInfoOrDict, user_scenario_generation_config: evals_types.UserScenarioGenerationConfigOrDict, - root_agent_id: str, ) -> types.EvaluationDataset: """Generates an evaluation dataset with user scenarios, which helps to generate conversations between a simulated user and the agent under test. Args: - agents: A map of agent ID to AgentConfig. + agent_info: The agent info to generate user scenarios for. user_scenario_generation_config: Configuration for generating user scenarios. - root_agent_id: The ID of the root agent. Returns: An EvaluationDataset containing the generated user scenarios. """ + parsed_agent_info = ( + evals_types.AgentInfo.model_validate(agent_info) + if isinstance(agent_info, dict) + else agent_info + ) response = self._generate_user_scenarios( - agents=agents, + agents=parsed_agent_info.agents, + root_agent_id=parsed_agent_info.root_agent_id, user_scenario_generation_config=user_scenario_generation_config, - root_agent_id=root_agent_id, ) return _evals_utils._postprocess_user_scenarios_response(response) @@ -3304,7 +3307,7 @@ async def create_evaluation_run( raise ValueError( "At most one of agent_info or inference_configs can be provided." ) - agent_info_pydantic = ( + parsed_agent_info = ( evals_types.AgentInfo.model_validate(agent_info) if isinstance(agent_info, dict) else (agent_info or evals_types.AgentInfo()) @@ -3312,7 +3315,7 @@ async def create_evaluation_run( if isinstance(dataset, types.EvaluationDataset): _evals_utils._validate_dataset_agent_data(dataset, inference_configs) resolved_dataset = _evals_common._resolve_dataset( - self._api_client, dataset, dest, agent_info_pydantic + self._api_client, dataset, dest, parsed_agent_info ) output_config = genai_types.OutputConfig( gcs_destination=genai_types.GcsDestination(output_uri_prefix=dest) @@ -3324,10 +3327,10 @@ async def create_evaluation_run( output_config=output_config, metrics=resolved_metrics ) resolved_inference_configs = _evals_common._resolve_inference_configs( - self._api_client, resolved_dataset, inference_configs, agent_info_pydantic + self._api_client, resolved_dataset, inference_configs, parsed_agent_info ) resolved_labels = _evals_common._add_evaluation_run_labels( - labels, agent_info_pydantic + labels, parsed_agent_info ) resolved_name = name or f"evaluation_run_{uuid.uuid4()}" @@ -3488,26 +3491,29 @@ async def create_evaluation_set( async def generate_user_scenarios( self, *, - agents: dict[str, evals_types.AgentConfigOrDict], + agent_info: evals_types.AgentInfoOrDict, user_scenario_generation_config: evals_types.UserScenarioGenerationConfigOrDict, - root_agent_id: str, ) -> types.EvaluationDataset: """Generates an evaluation dataset with user scenarios, which helps to generate conversations between a simulated user and the agent under test. Args: - agents: A map of agent ID to AgentConfig. + agent_info: The agent info to generate user scenarios for. user_scenario_generation_config: Configuration for generating user scenarios. - root_agent_id: The ID of the root agent. Returns: An EvaluationDataset containing the generated user scenarios. """ + parsed_agent_info = ( + evals_types.AgentInfo.model_validate(agent_info) + if isinstance(agent_info, dict) + else agent_info + ) response = await self._generate_user_scenarios( - agents=agents, + agents=parsed_agent_info.agents, + root_agent_id=parsed_agent_info.root_agent_id, user_scenario_generation_config=user_scenario_generation_config, - root_agent_id=root_agent_id, ) return _evals_utils._postprocess_user_scenarios_response(response)