|
17 | 17 | from tests.unit.vertexai.genai.replays import pytest_helper |
18 | 18 | from vertexai import types |
19 | 19 | from google.genai import types as genai_types |
20 | | -import pytest |
21 | 20 | import pandas as pd |
| 21 | +import pytest |
22 | 22 |
|
23 | 23 | GCS_DEST = "gs://lakeyk-limited-bucket/eval_run_output" |
24 | 24 | GENERAL_QUALITY_METRIC = types.EvaluationRunMetric( |
|
42 | 42 | metric_config=types.UnifiedMetric( |
43 | 43 | llm_based_metric_spec=types.LLMBasedMetricSpec( |
44 | 44 | metric_prompt_template=( |
45 | | - "\nEvaluate the fluency of the response. Provide a score from 1-5." |
| 45 | + "\nEvaluate the fluency of the response. Provide a score from" " 1-5." |
46 | 46 | ) |
47 | 47 | ) |
48 | 48 | ), |
|
65 | 65 | ), |
66 | 66 | ) |
67 | 67 | INFERENCE_CONFIG = types.EvaluationRunInferenceConfig( |
68 | | - model="projects/503583131166/locations/us-central1/publishers/google/models/gemini-2.5-flash" |
| 68 | + model="projects/977012026409/locations/us-central1/publishers/google/models/gemini-2.5-flash" |
69 | 69 | ) |
70 | 70 | TOOL = genai_types.Tool( |
71 | 71 | function_declarations=[ |
|
80 | 80 | ] |
81 | 81 | ) |
82 | 82 | AGENT_INFO = types.evals.AgentInfo( |
83 | | - agent_resource_name="projects/123/locations/us-central1/reasoningEngines/456", |
| 83 | + agent_resource_name=("projects/123/locations/us-central1/reasoningEngines/456"), |
84 | 84 | name="agent-1", |
85 | | - instruction="agent-1 instruction", |
86 | | - tool_declarations=[TOOL], |
| 85 | + agents={ |
| 86 | + "agent-1": types.evals.AgentConfig( |
| 87 | + agent_id="agent-1", |
| 88 | + instruction="agent-1 instruction", |
| 89 | + tools=[TOOL], |
| 90 | + ) |
| 91 | + }, |
| 92 | + root_agent_id="agent-1", |
87 | 93 | ) |
88 | 94 | DEFAULT_PROMPT_TEMPLATE = "{prompt}" |
89 | 95 | INPUT_DF_WITH_CONTEXT_AND_HISTORY = pd.DataFrame( |
|
96 | 102 | } |
97 | 103 | ) |
98 | 104 | CANDIDATE_NAME = "candidate_1" |
99 | | -MODEL_NAME = "projects/503583131166/locations/us-central1/publishers/google/models/gemini-2.5-flash" |
| 105 | +MODEL_NAME = "projects/977012026409/locations/us-central1/publishers/google/models/gemini-2.5-flash" |
100 | 106 | EVAL_SET_NAME = ( |
101 | | - "projects/503583131166/locations/us-central1/evaluationSets/6619939608513740800" |
| 107 | + "projects/977012026409/locations/us-central1/evaluationSets/6619939608513740800" |
102 | 108 | ) |
103 | 109 |
|
104 | 110 |
|
@@ -140,12 +146,11 @@ def test_create_eval_run_data_source_evaluation_set(client): |
140 | 146 | assert evaluation_run.inference_configs[ |
141 | 147 | AGENT_INFO.name |
142 | 148 | ] == types.EvaluationRunInferenceConfig( |
143 | | - agent_config=types.EvaluationRunAgentConfig( |
144 | | - developer_instruction=genai_types.Content( |
145 | | - parts=[genai_types.Part(text="agent-1 instruction")] |
146 | | - ), |
147 | | - tools=[TOOL], |
148 | | - ) |
| 149 | + agent_configs=AGENT_INFO.agents, |
| 150 | + agent_run_config=types.AgentRunConfig( |
| 151 | + agent_engine=AGENT_INFO.agent_resource_name, |
| 152 | + user_simulator_config={"max_turn": 5}, |
| 153 | + ), |
149 | 154 | ) |
150 | 155 | assert evaluation_run.labels == { |
151 | 156 | "vertex-ai-evaluation-agent-engine-id": "456", |
@@ -202,6 +207,53 @@ def test_create_eval_run_data_source_bigquery_request_set(client): |
202 | 207 | assert evaluation_run.error is None |
203 | 208 |
|
204 | 209 |
|
| 210 | +def test_create_eval_run_with_user_simulator_config(client): |
| 211 | + """Tests that create_evaluation_run() creates a correctly structured EvaluationRun with user_simulator_config.""" |
| 212 | + client._api_client._http_options.api_version = "v1beta1" |
| 213 | + evaluation_run = client.evals.create_evaluation_run( |
| 214 | + name="test_user_simulator_config", |
| 215 | + display_name="test_user_simulator_config", |
| 216 | + dataset=types.EvaluationRunDataSource( |
| 217 | + evaluation_set="projects/977012026409/locations/us-central1/evaluationSets/3885168317211607040" |
| 218 | + ), |
| 219 | + dest=GCS_DEST, |
| 220 | + metrics=[GENERAL_QUALITY_METRIC], |
| 221 | + agent_info=AGENT_INFO, |
| 222 | + user_simulator_config=types.evals.UserSimulatorConfig( |
| 223 | + max_turn=5, |
| 224 | + ), |
| 225 | + labels={"label1": "value1"}, |
| 226 | + ) |
| 227 | + assert isinstance(evaluation_run, types.EvaluationRun) |
| 228 | + assert evaluation_run.display_name == "test_user_simulator_config" |
| 229 | + assert evaluation_run.state == types.EvaluationRunState.PENDING |
| 230 | + assert isinstance(evaluation_run.data_source, types.EvaluationRunDataSource) |
| 231 | + assert ( |
| 232 | + evaluation_run.data_source.evaluation_set |
| 233 | + == "projects/977012026409/locations/us-central1/evaluationSets/3885168317211607040" |
| 234 | + ) |
| 235 | + assert evaluation_run.evaluation_config == types.EvaluationRunConfig( |
| 236 | + output_config=genai_types.OutputConfig( |
| 237 | + gcs_destination=genai_types.GcsDestination(output_uri_prefix=GCS_DEST) |
| 238 | + ), |
| 239 | + metrics=[GENERAL_QUALITY_METRIC], |
| 240 | + ) |
| 241 | + assert evaluation_run.inference_configs[ |
| 242 | + AGENT_INFO.name |
| 243 | + ] == types.EvaluationRunInferenceConfig( |
| 244 | + agent_configs=AGENT_INFO.agents, |
| 245 | + agent_run_config=types.AgentRunConfig( |
| 246 | + agent_engine=AGENT_INFO.agent_resource_name, |
| 247 | + user_simulator_config=types.evals.UserSimulatorConfig(max_turn=5), |
| 248 | + ), |
| 249 | + ) |
| 250 | + assert evaluation_run.labels == { |
| 251 | + "vertex-ai-evaluation-agent-engine-id": "456", |
| 252 | + "label1": "value1", |
| 253 | + } |
| 254 | + assert evaluation_run.error is None |
| 255 | + |
| 256 | + |
205 | 257 | def test_create_eval_run_with_inference_configs(client): |
206 | 258 | """Tests that create_evaluation_run() creates a correctly structured EvaluationRun with inference_configs.""" |
207 | 259 | client._api_client._http_options.api_version = "v1beta1" |
@@ -668,6 +720,54 @@ async def test_create_eval_run_async(client): |
668 | 720 | assert evaluation_run.error is None |
669 | 721 |
|
670 | 722 |
|
| 723 | +@pytest.mark.asyncio |
| 724 | +async def test_create_eval_run_async_with_user_simulator_config(client): |
| 725 | + """Tests that create_evaluation_run() creates a correctly structured EvaluationRun with user_simulator_config asynchronously.""" |
| 726 | + client._api_client._http_options.api_version = "v1beta1" |
| 727 | + evaluation_run = await client.aio.evals.create_evaluation_run( |
| 728 | + name="test_user_simulator_config_async", |
| 729 | + display_name="test_user_simulator_config_async", |
| 730 | + dataset=types.EvaluationRunDataSource( |
| 731 | + evaluation_set="projects/977012026409/locations/us-central1/evaluationSets/3885168317211607040" |
| 732 | + ), |
| 733 | + dest=GCS_DEST, |
| 734 | + metrics=[GENERAL_QUALITY_METRIC], |
| 735 | + agent_info=AGENT_INFO, |
| 736 | + user_simulator_config=types.evals.UserSimulatorConfig( |
| 737 | + max_turn=5, |
| 738 | + ), |
| 739 | + labels={"label1": "value1"}, |
| 740 | + ) |
| 741 | + assert isinstance(evaluation_run, types.EvaluationRun) |
| 742 | + assert evaluation_run.display_name == "test_user_simulator_config_async" |
| 743 | + assert evaluation_run.state == types.EvaluationRunState.PENDING |
| 744 | + assert isinstance(evaluation_run.data_source, types.EvaluationRunDataSource) |
| 745 | + assert ( |
| 746 | + evaluation_run.data_source.evaluation_set |
| 747 | + == "projects/977012026409/locations/us-central1/evaluationSets/3885168317211607040" |
| 748 | + ) |
| 749 | + assert evaluation_run.evaluation_config == types.EvaluationRunConfig( |
| 750 | + output_config=genai_types.OutputConfig( |
| 751 | + gcs_destination=genai_types.GcsDestination(output_uri_prefix=GCS_DEST) |
| 752 | + ), |
| 753 | + metrics=[GENERAL_QUALITY_METRIC], |
| 754 | + ) |
| 755 | + assert evaluation_run.inference_configs[ |
| 756 | + AGENT_INFO.name |
| 757 | + ] == types.EvaluationRunInferenceConfig( |
| 758 | + agent_configs=AGENT_INFO.agents, |
| 759 | + agent_run_config=types.AgentRunConfig( |
| 760 | + agent_engine=AGENT_INFO.agent_resource_name, |
| 761 | + user_simulator_config=types.evals.UserSimulatorConfig(max_turn=5), |
| 762 | + ), |
| 763 | + ) |
| 764 | + assert evaluation_run.labels == { |
| 765 | + "label1": "value1", |
| 766 | + "vertex-ai-evaluation-agent-engine-id": "456", |
| 767 | + } |
| 768 | + assert evaluation_run.error is None |
| 769 | + |
| 770 | + |
671 | 771 | @pytest.mark.asyncio |
672 | 772 | async def test_create_eval_run_async_with_inference_configs(client): |
673 | 773 | """Tests that create_evaluation_run() creates a correctly structured EvaluationRun with inference_configs asynchronously.""" |
|
0 commit comments