diff --git a/sdk/ai/azure-ai-projects/samples/datasets/sample_dataset_generation_job_simpleqna_for_finetuning.py b/sdk/ai/azure-ai-projects/samples/datasets/sample_dataset_generation_job_simpleqna_for_finetuning.py index 8a545c3dd6b3..5aaca24250c7 100644 --- a/sdk/ai/azure-ai-projects/samples/datasets/sample_dataset_generation_job_simpleqna_for_finetuning.py +++ b/sdk/ai/azure-ai-projects/samples/datasets/sample_dataset_generation_job_simpleqna_for_finetuning.py @@ -83,32 +83,226 @@ f"Lower DATASET_NAME (currently `{dataset_name}`) so that `-` fits within 50 characters." ) -# Reference document the sample uploads as an Azure OpenAI file. The service -# requires the file to contain at least 1 KB of content to generate QnA from. -SEED_REFERENCE_DOCUMENT = """# Widgets and Gizmos Reference - -## Products -- Widget: blue, manufactured at Factory 7 in Acme, carbon-fiber, rated to 80 C, sold in packs of 4, 250 g each. -- Gizmo: red, manufactured at Factory 12 in Bedrock, carbon-fiber, rated to 80 C, sold individually, 1.2 kg each. -- Sprocket: green, manufactured at Factory 3 in Acme, stainless steel, rated to 200 C, sold individually, 500 g each. - -## Operations -- Factory operates weekdays 0700-1900 local time. -- Closed on public holidays, except for the annual maintenance run on December 27. -- ISO 9001 certified; audited annually by an independent third party. -- Quality control samples every 100th unit and runs full destructive testing on every 5000th unit. - -## Customer support -- Warranty claims: email support@example.com with the serial number printed on the underside of the product. -- Returns: accepted within 30 days if unopened; opened items are eligible for repair only. -- Bulk orders (50+ units): contact sales@example.com for volume pricing and an extended 90-day return window. -- Replacement parts: orderable directly from the support portal using the original order number. - -## Pricing and SLAs -- Widget pack: USD 24.99 per 4-pack; free shipping on orders over USD 75. -- Gizmo unit: USD 49.99; free shipping on orders over USD 75. -- Sprocket unit: USD 14.99; ships from regional warehouses in 1-2 business days. -- Standard support response: within one business day. Priority support response: within four hours. +# Reference document the sample uploads as an Azure OpenAI file. SUPERVISED_FINETUNING +# QnA generation requires a substantially richer corpus than the eval scenario does; +# a 1-2 KB summary is not enough and the service will reject it with +# "File content lacks sufficient context to generate quality questions." Keep this +# block at roughly 8-12 KB of varied prose so the service has enough material to +# synthesize diverse question/answer pairs. +SEED_REFERENCE_DOCUMENT = """# Widgets, Gizmos, and Sprockets: Complete Product and Operations Reference + +## 1. Product Catalog + +### 1.1 Widget (model WDG-100) +The Widget is a structural carbon-fiber component manufactured at Factory 7 in Acme. It is +finished in matte blue (Pantone 2935 C) using a UV-stable powder coating. Each unit measures +120 mm x 40 mm x 18 mm and weighs 250 g (+/- 5 g). Widgets are rated for continuous service +up to 80 degrees Celsius and a transient peak of 95 degrees Celsius for up to 60 seconds. +Widgets ship in packs of 4, packaged in recyclable cardboard with biodegradable foam inserts. +The serial number is laser-etched on the underside in the format WDG-100-YYWW-NNNNN, where +YY is the two-digit year, WW is the ISO week, and NNNNN is the per-week sequence number. + +Compatible mounting hardware: M5 stainless steel bolts, torqued to 6.0 Nm. Substituting +non-stainless bolts voids the corrosion portion of the warranty. + +### 1.2 Gizmo (model GZM-200) +The Gizmo is a precision carbon-fiber assembly manufactured at Factory 12 in Bedrock. It is +finished in gloss red (Pantone 186 C). Each Gizmo measures 220 mm x 110 mm x 60 mm and +weighs 1.2 kg (+/- 20 g). Gizmos are sold individually and are rated to 80 degrees Celsius. +They include an integrated thermal cutoff that disables the unit at 88 degrees Celsius and +re-enables it after a five minute cool-down. The serial number is engraved on the side and +follows the format GZM-200-YYWW-NNNNN. + +Compatible mounting hardware: M8 stainless steel bolts, torqued to 18 Nm. Gizmos should be +installed on a flat surface with no more than 0.5 mm of warp across the 220 mm dimension. + +### 1.3 Sprocket (model SPR-300) +The Sprocket is a stainless-steel rotating component manufactured at Factory 3 in Acme. +It is finished in anodised green and weighs 500 g. The Sprocket is rated for continuous +service up to 200 degrees Celsius. The teeth count is 24, the pitch diameter is 60 mm, +and the bore is 12 mm with a standard 4 mm keyway. Sprockets ship individually with a +laser-etched serial number on the hub in the format SPR-300-YYWW-NNNNN. + +Compatible mounting hardware: M12 stainless steel set screws, torqued to 22 Nm. + +### 1.4 Compatibility matrix +* Widget + Gizmo: fully compatible, no adapter required. +* Widget + Sprocket: requires the WDG-SPR adapter plate (part WDG-SPR-A01). +* Gizmo + Sprocket: requires the GZM-SPR adapter plate (part GZM-SPR-A02) and a 4 mm shim. +* Widget + Gizmo + Sprocket (three-way stack): requires both adapter plates and the + triple-stack bracket WGS-T01. Torque all bolts to spec in the sequence Widget, + Gizmo, Sprocket. + +## 2. Manufacturing and Operations + +### 2.1 Factory schedule +All three factories operate weekdays from 0700 to 1900 local time. Factories are closed +on national public holidays except for the annual maintenance run on December 27, when +each factory performs cleaning, lubrication, and recalibration of CNC equipment and +finishing lines. The maintenance run runs from 0600 to 1400 local time and does not +produce shippable inventory. + +### 2.2 Quality control +Every factory is ISO 9001:2015 certified and is audited annually by an independent +third party. Quality control samples every 100th unit for visual and dimensional +inspection. Every 5000th unit undergoes full destructive testing including tensile, +compressive, and thermal cycling. Destructive test results are archived for seven +years and are available to enterprise customers on request. + +### 2.3 Lot traceability +The first four characters of every serial number identify the model, the next four +characters identify the ISO year and week, and the remaining five characters identify +the per-week sequence number. Given any serial number, customer support can identify +the production line, the shift, the operator, and the raw material lot that produced +the unit. Lot traceability records are retained for the life of the product plus three +years. + +### 2.4 Environmental +All three factories are powered by a mix of on-site solar and grid-tied wind generation. +Total Scope 1 and Scope 2 emissions for FY2025 were 12,400 tonnes CO2e, a 14 percent +reduction from FY2024. Packaging is fully recyclable; the cardboard boxes are made from +80 percent post-consumer recycled fibre and the biodegradable foam is corn-starch based. + +## 3. Pricing and Ordering + +### 3.1 Standard list prices +* Widget 4-pack (WDG-100-PK4): USD 24.99 +* Gizmo single (GZM-200): USD 49.99 +* Sprocket single (SPR-300): USD 14.99 +* WDG-SPR adapter plate: USD 6.50 +* GZM-SPR adapter plate: USD 7.50 +* Triple-stack bracket WGS-T01: USD 18.00 + +### 3.2 Shipping +Free standard shipping is provided on orders over USD 75 within the United States and +Canada. International orders incur shipping based on weight and destination, computed +at checkout. Standard transit time within North America is 3 to 5 business days. Express +overnight shipping is available for an additional USD 18 per shipment. + +### 3.3 Bulk orders +Bulk orders of 50 or more units of any single product receive a 12 percent discount +on the list price plus a 90 day return window. Bulk orders of 250 or more units +receive an 18 percent discount and the option of a dedicated account manager. Contact +sales@example.com for bulk orders. + +### 3.4 Payment terms +Standard payment is due at checkout via credit card or PayPal. Enterprise customers +with an approved purchase order may pay net 30 days from invoice date. Late payments +incur a 1.5 percent monthly service charge. + +## 4. Warranty and Returns + +### 4.1 Standard warranty +All products carry a two year limited warranty against defects in materials and +workmanship from the date of purchase. The warranty does not cover damage from +incorrect installation, exposure beyond the rated temperature range, modification, +or normal wear. Warranty service is provided by repair, replacement, or refund at +the manufacturer's discretion. + +### 4.2 Filing a warranty claim +Warranty claims are filed by emailing support@example.com with the product serial +number, a description of the issue, and photographs of the failure mode. Acme will +respond within one business day with either a Return Merchandise Authorisation (RMA) +number or a request for additional information. RMAs are valid for 30 days and must +be referenced on the outside of any returned package. + +### 4.3 Returns +Unopened products can be returned within 30 days of receipt for a full refund. +Opened products are eligible for repair only, except where required by local law. +Bulk orders (50+ units) are eligible for return within 90 days under the same +unopened/opened rules. Custom-finished products are non-returnable. + +### 4.4 Repair turnaround +The target turnaround for in-warranty repair is 10 business days from receipt at the +service centre. Out-of-warranty repair is offered at a fixed rate of USD 35 per +Widget, USD 60 per Gizmo, or USD 20 per Sprocket, plus return shipping. + +## 5. Installation and Use + +### 5.1 Pre-installation checks +Before installing any product, inspect for transit damage. If the box shows signs of +crushing or moisture, photograph the damage before opening and report it to +support@example.com within 48 hours. Confirm that the serial number on the unit +matches the packing slip. + +### 5.2 Widget installation +Mount Widgets on a flat surface with M5 stainless steel bolts torqued to 6.0 Nm in +a star pattern. Apply a thin film of anti-seize compound to the bolt threads. Allow +the assembly to cure for 30 minutes before applying load. + +### 5.3 Gizmo installation +Mount Gizmos on a flat surface with M8 stainless steel bolts torqued to 18 Nm. +Do not exceed 22 Nm; over-torquing can crack the carbon-fiber housing. The thermal +cutoff cable must be routed away from heat sources and secured with the supplied +P-clips at intervals of no more than 200 mm. + +### 5.4 Sprocket installation +Press the Sprocket onto a 12 mm shaft using an arbor press. Hand pressure or +percussive installation will damage the bore tolerance. Once seated, install the +M12 set screw in the keyway and torque to 22 Nm. + +### 5.5 Periodic maintenance +Inspect mounting hardware every 6 months. Re-torque to spec if any fastener has +loosened. Replace any fastener that shows corrosion or thread damage. Clean exterior +surfaces with isopropyl alcohol and a microfiber cloth; do not use abrasive cleaners. + +## 6. Customer Support + +### 6.1 Contact channels +* Email: support@example.com (response within one business day) +* Priority email: priority@example.com (response within four hours for enterprise + customers with a current support agreement) +* Phone: 1-800-555-0100, Monday to Friday, 0800 to 1800 Eastern Time +* Self-service portal: https://support.example.com + +### 6.2 Service level agreements +The standard SLA is a one business day first response for general inquiries and a +four hour first response for priority inquiries. Critical production-down issues for +enterprise customers receive a one hour first response and a continuous-effort +resolution target until the issue is resolved. + +### 6.3 Replacement parts +Replacement parts including bolts, adapter plates, P-clips, and thermal cutoff +cables can be ordered directly from the support portal using the original order +number. Common parts ship the same business day if ordered before 1500 Eastern Time. + +## 7. Frequently Asked Questions + +Q. What is the maximum operating temperature of a Widget? +A. 80 degrees Celsius continuous, with a transient peak of 95 degrees Celsius for +up to 60 seconds. + +Q. Can I install a Gizmo with non-stainless bolts? +A. No. Using non-stainless bolts voids the corrosion portion of the warranty. + +Q. Does the Sprocket fit a 12 mm shaft? +A. Yes. The Sprocket bore is 12 mm with a standard 4 mm keyway. + +Q. What is the lead time for bulk orders of 250 units? +A. Standard lead time is 10 to 15 business days, plus shipping. + +Q. How do I know when my Gizmo's thermal cutoff has tripped? +A. The unit will go silent and the status LED will blink red twice per second. +After five minutes the unit will automatically re-enable and resume normal operation. + +Q. Where do I find the serial number? +A. Widget: laser-etched on the underside. Gizmo: engraved on the side. Sprocket: +laser-etched on the hub. + +Q. Are your products RoHS compliant? +A. Yes. All three products comply with EU RoHS 2 (Directive 2011/65/EU) and RoHS 3 +(Directive 2015/863). + +Q. Do you offer custom colours? +A. Custom finishes are available for orders of 500 or more units. Contact +sales@example.com for a custom-finish quote. Custom-finished products are +non-returnable. + +Q. What torque should I use for the M8 bolts on a Gizmo? +A. 18 Nm. Do not exceed 22 Nm. + +Q. How long is the warranty? +A. Two years from date of purchase against defects in materials and workmanship. """ TERMINAL_STATUSES = {JobStatus.SUCCEEDED, JobStatus.FAILED, JobStatus.CANCELLED} diff --git a/sdk/ai/azure-ai-projects/samples/evaluations/sample_agent_evaluation.py b/sdk/ai/azure-ai-projects/samples/evaluations/sample_agent_evaluation.py index 5a930105fb95..113bc8348e79 100644 --- a/sdk/ai/azure-ai-projects/samples/evaluations/sample_agent_evaluation.py +++ b/sdk/ai/azure-ai-projects/samples/evaluations/sample_agent_evaluation.py @@ -132,7 +132,7 @@ time.sleep(5) if agent_eval_run.status == "completed": - print("\n✓ Evaluation run completed successfully!") + print("\n[OK] Evaluation run completed successfully!") print(f"Result Counts: {agent_eval_run.result_counts}") output_items = list( @@ -143,7 +143,7 @@ pprint(output_items) print(f"{'-'*60}") else: - print("\n✗ Evaluation run failed.") + print("\n[FAIL] Evaluation run failed.") openai_client.evals.delete(eval_id=eval_object.id) print("Evaluation deleted") diff --git a/sdk/ai/azure-ai-projects/samples/evaluations/sample_agent_response_evaluation.py b/sdk/ai/azure-ai-projects/samples/evaluations/sample_agent_response_evaluation.py index 2e60f43b3430..4bab6dfb8623 100644 --- a/sdk/ai/azure-ai-projects/samples/evaluations/sample_agent_response_evaluation.py +++ b/sdk/ai/azure-ai-projects/samples/evaluations/sample_agent_response_evaluation.py @@ -113,7 +113,7 @@ time.sleep(5) if response_eval_run.status == "completed": - print("\n✓ Evaluation run completed successfully!") + print("\n[OK] Evaluation run completed successfully!") print(f"Result Counts: {response_eval_run.result_counts}") output_items = list( @@ -126,7 +126,7 @@ pprint(output_items) print(f"{'-'*60}") else: - print("\n✗ Evaluation run failed.") + print("\n[FAIL] Evaluation run failed.") openai_client.evals.delete(eval_id=eval_object.id) print("Evaluation deleted") diff --git a/sdk/ai/azure-ai-projects/samples/evaluations/sample_agent_response_evaluation_with_function_tool.py b/sdk/ai/azure-ai-projects/samples/evaluations/sample_agent_response_evaluation_with_function_tool.py index 03d33aa6dd40..2e1eff7feb8b 100644 --- a/sdk/ai/azure-ai-projects/samples/evaluations/sample_agent_response_evaluation_with_function_tool.py +++ b/sdk/ai/azure-ai-projects/samples/evaluations/sample_agent_response_evaluation_with_function_tool.py @@ -167,7 +167,7 @@ def get_horoscope(sign: str) -> str: time.sleep(5) if response_eval_run.status == "completed": - print("\n✓ Evaluation run completed successfully!") + print("\n[OK] Evaluation run completed successfully!") print(f"Result Counts: {response_eval_run.result_counts}") output_items = list( @@ -181,7 +181,7 @@ def get_horoscope(sign: str) -> str: print(f"{'-'*60}") else: print(f"Eval Run Report URL: {response_eval_run.report_url}") - print("\n✗ Evaluation run failed.") + print("\n[FAIL] Evaluation run failed.") openai_client.evals.delete(eval_id=eval_object.id) print("Evaluation deleted") diff --git a/sdk/ai/azure-ai-projects/samples/evaluations/sample_agent_trace_evaluation_smart_filter.py b/sdk/ai/azure-ai-projects/samples/evaluations/sample_agent_trace_evaluation_smart_filter.py index bb1e44c4898a..9286b214e5f0 100644 --- a/sdk/ai/azure-ai-projects/samples/evaluations/sample_agent_trace_evaluation_smart_filter.py +++ b/sdk/ai/azure-ai-projects/samples/evaluations/sample_agent_trace_evaluation_smart_filter.py @@ -159,7 +159,7 @@ time.sleep(5) if run.status == "completed": - print("\n✓ Evaluation run completed successfully!") + print("\n[OK] Evaluation run completed successfully!") print(f"Result Counts: {run.result_counts}") output_items = list(client.evals.runs.output_items.list(run_id=run.id, eval_id=eval_object.id)) @@ -170,7 +170,7 @@ print(f"\nEval Run Report URL: {run.report_url}") else: - print(f"\n✗ Evaluation run failed: {run.error}") + print(f"\n[FAIL] Evaluation run failed: {run.error}") client.evals.delete(eval_id=eval_object.id) print("Evaluation deleted") \ No newline at end of file diff --git a/sdk/ai/azure-ai-projects/samples/evaluations/sample_evaluation_cluster_insight.py b/sdk/ai/azure-ai-projects/samples/evaluations/sample_evaluation_cluster_insight.py index a125dd62ed04..d4613060054f 100644 --- a/sdk/ai/azure-ai-projects/samples/evaluations/sample_evaluation_cluster_insight.py +++ b/sdk/ai/azure-ai-projects/samples/evaluations/sample_evaluation_cluster_insight.py @@ -119,7 +119,7 @@ # If the eval run completed successfully, generate cluster insights if eval_run.status == "completed": - print("\n✓ Evaluation run completed successfully!") + print("\n[OK] Evaluation run completed successfully!") print(f"Evaluation run result counts: {eval_run.result_counts}") clusterInsight = project_client.beta.insights.generate( @@ -141,13 +141,13 @@ time.sleep(5) if clusterInsight.state == OperationState.SUCCEEDED: - print("\n✓ Cluster insights generated successfully!") + print("\n[OK] Cluster insights generated successfully!") pprint(clusterInsight) else: - print("\n✗ Cluster insight generation failed.") + print("\n[FAIL] Cluster insight generation failed.") else: - print("\n✗ Evaluation run failed. Cannot generate cluster insights.") + print("\n[FAIL] Evaluation run failed. Cannot generate cluster insights.") openai_client.evals.delete(eval_id=eval_object.id) print("Evaluation deleted") diff --git a/sdk/ai/azure-ai-projects/samples/evaluations/sample_evaluation_compare_insight.py b/sdk/ai/azure-ai-projects/samples/evaluations/sample_evaluation_compare_insight.py index 0b48752f4a90..d7bc4ab6e1d0 100644 --- a/sdk/ai/azure-ai-projects/samples/evaluations/sample_evaluation_compare_insight.py +++ b/sdk/ai/azure-ai-projects/samples/evaluations/sample_evaluation_compare_insight.py @@ -131,7 +131,7 @@ failed_runs = [run for run in completed_runs.values() if run.status == "failed"] if not failed_runs: - print("\n✓ Both evaluation runs completed successfully!") + print("\n[OK] Both evaluation runs completed successfully!") # Generate comparison insights compareInsight = project_client.beta.insights.generate( @@ -150,11 +150,11 @@ time.sleep(5) if compareInsight.state == OperationState.SUCCEEDED: - print("\n✓ Evaluation comparison generated successfully!") + print("\n[OK] Evaluation comparison generated successfully!") pprint(compareInsight) else: - print("\n✗ One or more eval runs failed. Cannot generate comparison insight.") + print("\n[FAIL] One or more eval runs failed. Cannot generate comparison insight.") openai_client.evals.delete(eval_id=eval_object.id) print("Evaluation deleted") diff --git a/sdk/ai/azure-ai-projects/samples/evaluations/sample_model_evaluation.py b/sdk/ai/azure-ai-projects/samples/evaluations/sample_model_evaluation.py index 82b18c6f045d..e03dfd32a381 100644 --- a/sdk/ai/azure-ai-projects/samples/evaluations/sample_model_evaluation.py +++ b/sdk/ai/azure-ai-projects/samples/evaluations/sample_model_evaluation.py @@ -112,7 +112,7 @@ time.sleep(5) if agent_eval_run.status == "completed": - print("\n✓ Evaluation run completed successfully!") + print("\n[OK] Evaluation run completed successfully!") print(f"Result Counts: {agent_eval_run.result_counts}") output_items = list( @@ -123,7 +123,7 @@ pprint(output_items) print(f"{'-'*60}") else: - print("\n✗ Evaluation run failed.") + print("\n[FAIL] Evaluation run failed.") openai_client.evals.delete(eval_id=eval_object.id) print("Evaluation deleted") diff --git a/sdk/ai/azure-ai-projects/samples/evaluations/sample_model_evaluation_instant_model.py b/sdk/ai/azure-ai-projects/samples/evaluations/sample_model_evaluation_instant_model.py index 1517b05eab62..10280bc82927 100644 --- a/sdk/ai/azure-ai-projects/samples/evaluations/sample_model_evaluation_instant_model.py +++ b/sdk/ai/azure-ai-projects/samples/evaluations/sample_model_evaluation_instant_model.py @@ -112,7 +112,7 @@ time.sleep(5) if agent_eval_run.status == "completed": - print("\n✓ Evaluation run completed successfully!") + print("\n[OK] Evaluation run completed successfully!") print(f"Result Counts: {agent_eval_run.result_counts}") output_items = list( @@ -123,7 +123,7 @@ pprint(output_items) print(f"{'-'*60}") else: - print("\n✗ Evaluation run failed.") + print("\n[FAIL] Evaluation run failed.") openai_client.evals.delete(eval_id=eval_object.id) print("Evaluation deleted") diff --git a/sdk/ai/azure-ai-projects/samples/evaluations/sample_multiturn_conversation_evaluation.py b/sdk/ai/azure-ai-projects/samples/evaluations/sample_multiturn_conversation_evaluation.py index 375478f59768..4dc36168008c 100644 --- a/sdk/ai/azure-ai-projects/samples/evaluations/sample_multiturn_conversation_evaluation.py +++ b/sdk/ai/azure-ai-projects/samples/evaluations/sample_multiturn_conversation_evaluation.py @@ -159,7 +159,7 @@ time.sleep(5) if run.status == "completed": - print("\n✓ Evaluation run completed successfully!") + print("\n[OK] Evaluation run completed successfully!") print(f"Result Counts: {run.result_counts}") output_items = list(client.evals.runs.output_items.list(run_id=run.id, eval_id=eval_object.id)) @@ -170,7 +170,7 @@ print(f"\nEval Run Report URL: {run.report_url}") else: - print(f"\n✗ Evaluation run failed: {run.error}") + print(f"\n[FAIL] Evaluation run failed: {run.error}") client.evals.delete(eval_id=eval_object.id) print("Evaluation deleted") diff --git a/sdk/ai/azure-ai-projects/samples/evaluations/sample_multiturn_conversation_simulation.py b/sdk/ai/azure-ai-projects/samples/evaluations/sample_multiturn_conversation_simulation.py index 6dd7db32cd9f..76f7a7a8f01b 100644 --- a/sdk/ai/azure-ai-projects/samples/evaluations/sample_multiturn_conversation_simulation.py +++ b/sdk/ai/azure-ai-projects/samples/evaluations/sample_multiturn_conversation_simulation.py @@ -20,7 +20,7 @@ Key concepts: - data_source type is "azure_ai_target_completions" with item_generation_params.type = "conversation_gen_preview" - - num_conversations is per seed scenario (e.g., 2 conversations × 3 scenarios = 6 total) + - num_conversations is per seed scenario (e.g., 2 conversations per scenario) - max_turns controls the maximum exchanges per conversation - The seed scenarios source is at the data_source root level @@ -40,6 +40,8 @@ import os import time +import uuid +from datetime import datetime, timezone from pprint import pprint from dotenv import load_dotenv from openai.types.eval_create_params import DataSourceConfigCustom @@ -58,6 +60,10 @@ data_folder = os.environ.get("DATA_FOLDER", os.path.join(script_dir, "data_folder")) scenarios_file = os.path.join(data_folder, "sample_data_simulation_scenarios.jsonl") +# Tag every run with a unique id so each invocation uploads a fresh dataset rather +# than silently re-using a stale cached version on the service. +run_id = f"{datetime.now(tz=timezone.utc).strftime('%y%m%d%H%M%S')}-{uuid.uuid4().hex[:4]}" + with ( DefaultAzureCredential() as credential, AIProjectClient(endpoint=endpoint, credential=credential) as project_client, @@ -127,20 +133,18 @@ ) print(f"Evaluation created (id: {eval_object.id})") - # Upload the simulation scenarios dataset - try: - dataset = project_client.datasets.upload_file( - name="simulation-scenarios", - version="1", - file_path=scenarios_file, - ) - assert dataset.id is not None, "Dataset upload returned no ID" - scenarios_id: str = dataset.id - print(f"Scenarios dataset uploaded (id: {scenarios_id})") - except Exception: - # Dataset already exists — use the existing URI - scenarios_id = f"azureai://accounts/{endpoint.split('/')[2].split('.')[0]}/projects/{endpoint.rstrip('/').split('/')[-1]}/data/simulation-scenarios/versions/1" - print(f"Using existing scenarios dataset (id: {scenarios_id})") + # Upload the simulation scenarios dataset. The name is suffixed with `run_id` so + # every invocation creates a fresh dataset on the service; without this the + # service would reject a re-upload of a same-named dataset and the sample would + # silently fall back to whatever stale version was last cached. + dataset = project_client.datasets.upload_file( + name=f"simulation-scenarios-{run_id}", + version="1", + file_path=scenarios_file, + ) + assert dataset.id is not None, "Dataset upload returned no ID" + scenarios_id: str = dataset.id + print(f"Scenarios dataset uploaded (id: {scenarios_id})") # Create a simulation run # - source: the seed scenarios dataset (each row is a test case) @@ -193,10 +197,12 @@ time.sleep(10) if run.status == "completed": - print("\n✓ Simulation run completed successfully!") + print("\n[OK] Simulation run completed successfully!") print(f"Result Counts: {run.result_counts}") - # With 3 seed scenarios and num_conversations=2, expect 6 total conversations - print(f"Expected: {3 * 2} conversations (3 scenarios × 2 per scenario)") + # Total conversations = (rows in scenarios_file) * num_conversations + with open(scenarios_file, encoding="utf-8") as f: + num_scenarios = sum(1 for line in f if line.strip()) + print(f"Expected: {num_scenarios * 2} conversations ({num_scenarios} scenarios x 2 per scenario)") output_items = list(client.evals.runs.output_items.list(run_id=run.id, eval_id=eval_object.id)) print(f"\nOUTPUT ITEMS (Total: {len(output_items)})") @@ -206,7 +212,7 @@ print(f"\nEval Run Report URL: {run.report_url}") else: - print(f"\n✗ Simulation run failed: {run.error}") + print(f"\n[FAIL] Simulation run failed: {run.error}") client.evals.delete(eval_id=eval_object.id) print("Evaluation deleted") diff --git a/sdk/ai/azure-ai-projects/samples/evaluations/sample_multiturn_trace_evaluation_agent_filter.py b/sdk/ai/azure-ai-projects/samples/evaluations/sample_multiturn_trace_evaluation_agent_filter.py index 7a65c9f6b4ba..d3a00ac1d34f 100644 --- a/sdk/ai/azure-ai-projects/samples/evaluations/sample_multiturn_trace_evaluation_agent_filter.py +++ b/sdk/ai/azure-ai-projects/samples/evaluations/sample_multiturn_trace_evaluation_agent_filter.py @@ -162,7 +162,7 @@ def main() -> None: time.sleep(5) if run.status == "completed": - print("\n✓ Evaluation run completed successfully!") + print("\n[OK] Evaluation run completed successfully!") print(f"Result Counts: {run.result_counts}") output_items = list(client.evals.runs.output_items.list(run_id=run.id, eval_id=eval_object.id)) @@ -173,7 +173,7 @@ def main() -> None: print(f"\nEval Run Report URL: {run.report_url}") else: - print(f"\n✗ Evaluation run failed: {run.error}") + print(f"\n[FAIL] Evaluation run failed: {run.error}") client.evals.delete(eval_id=eval_object.id) print("Evaluation deleted") diff --git a/sdk/ai/azure-ai-projects/samples/evaluations/sample_multiturn_trace_evaluation_by_id.py b/sdk/ai/azure-ai-projects/samples/evaluations/sample_multiturn_trace_evaluation_by_id.py index f8117be7ae3b..e50bf5efde07 100644 --- a/sdk/ai/azure-ai-projects/samples/evaluations/sample_multiturn_trace_evaluation_by_id.py +++ b/sdk/ai/azure-ai-projects/samples/evaluations/sample_multiturn_trace_evaluation_by_id.py @@ -149,7 +149,7 @@ time.sleep(5) if run.status == "completed": - print("\n✓ Evaluation run completed successfully!") + print("\n[OK] Evaluation run completed successfully!") print(f"Result Counts: {run.result_counts}") output_items = list(client.evals.runs.output_items.list(run_id=run.id, eval_id=eval_object.id)) @@ -160,7 +160,7 @@ print(f"\nEval Run Report URL: {run.report_url}") else: - print(f"\n✗ Evaluation run failed: {run.error}") + print(f"\n[FAIL] Evaluation run failed: {run.error}") client.evals.delete(eval_id=eval_object.id) print("Evaluation deleted") diff --git a/sdk/ai/azure-ai-projects/samples/evaluations/sample_scheduled_agent_traces_evaluation_smart_filter.py b/sdk/ai/azure-ai-projects/samples/evaluations/sample_scheduled_agent_traces_evaluation_smart_filter.py index 4e39ea2eb539..a41a69c78174 100644 --- a/sdk/ai/azure-ai-projects/samples/evaluations/sample_scheduled_agent_traces_evaluation_smart_filter.py +++ b/sdk/ai/azure-ai-projects/samples/evaluations/sample_scheduled_agent_traces_evaluation_smart_filter.py @@ -176,7 +176,7 @@ def assign_rbac(): # pylint: disable=too-many-statements raise elif "RoleAssignmentExists" in error_message: - print("\n✅ ROLE ASSIGNMENT ALREADY EXISTS:") + print("\n[OK] ROLE ASSIGNMENT ALREADY EXISTS:") print("The 'Foundry User' role is already assigned to the project's managed identity.") print("No action needed - the required permissions are already in place.") @@ -194,7 +194,7 @@ def assign_rbac(): # pylint: disable=too-many-statements print("This usually indicates a service availability issue.") else: - print("\n❌ UNEXPECTED ERROR:") + print("\n[FAIL] UNEXPECTED ERROR:") print("An unexpected error occurred. Please check the error details above.") raise diff --git a/sdk/ai/azure-ai-projects/samples/evaluations/sample_scheduled_evaluations.py b/sdk/ai/azure-ai-projects/samples/evaluations/sample_scheduled_evaluations.py index 7155d540b640..6bd9a6d02505 100644 --- a/sdk/ai/azure-ai-projects/samples/evaluations/sample_scheduled_evaluations.py +++ b/sdk/ai/azure-ai-projects/samples/evaluations/sample_scheduled_evaluations.py @@ -192,7 +192,7 @@ def assign_rbac(): # pylint: disable=too-many-statements raise elif "RoleAssignmentExists" in error_message: - print("\n✅ ROLE ASSIGNMENT ALREADY EXISTS:") + print("\n[OK] ROLE ASSIGNMENT ALREADY EXISTS:") print("The 'Azure AI User' role is already assigned to the project's managed identity.") print("No action needed - the required permissions are already in place.") @@ -210,7 +210,7 @@ def assign_rbac(): # pylint: disable=too-many-statements print("This usually indicates a service availability issue.") else: - print("\n❌ UNEXPECTED ERROR:") + print("\n[FAIL] UNEXPECTED ERROR:") print("An unexpected error occurred. Please check the error details above.") raise diff --git a/sdk/ai/azure-ai-projects/samples/evaluations/sample_synthetic_data_agent_evaluation.py b/sdk/ai/azure-ai-projects/samples/evaluations/sample_synthetic_data_agent_evaluation.py index dada3b8e2418..d390ea8b5b90 100644 --- a/sdk/ai/azure-ai-projects/samples/evaluations/sample_synthetic_data_agent_evaluation.py +++ b/sdk/ai/azure-ai-projects/samples/evaluations/sample_synthetic_data_agent_evaluation.py @@ -147,7 +147,7 @@ time.sleep(5) if eval_run.status == "completed": - print("\n✓ Evaluation run completed successfully!") + print("\n[OK] Evaluation run completed successfully!") print(f"Result Counts: {eval_run.result_counts}") output_items = list(client.evals.runs.output_items.list(run_id=eval_run.id, eval_id=eval_object.id)) @@ -164,7 +164,7 @@ if output_dataset_id: print(f"Output Dataset ID (for reuse): {output_dataset_id}") else: - print("\n✗ Evaluation run failed.") + print("\n[FAIL] Evaluation run failed.") client.evals.delete(eval_id=eval_object.id) print("Evaluation deleted") diff --git a/sdk/ai/azure-ai-projects/samples/evaluations/sample_synthetic_data_model_evaluation.py b/sdk/ai/azure-ai-projects/samples/evaluations/sample_synthetic_data_model_evaluation.py index f155654b6a86..18dec83017ac 100644 --- a/sdk/ai/azure-ai-projects/samples/evaluations/sample_synthetic_data_model_evaluation.py +++ b/sdk/ai/azure-ai-projects/samples/evaluations/sample_synthetic_data_model_evaluation.py @@ -151,7 +151,7 @@ time.sleep(5) if eval_run.status == "completed": - print("\n✓ Evaluation run completed successfully!") + print("\n[OK] Evaluation run completed successfully!") print(f"Result Counts: {eval_run.result_counts}") output_items = list(client.evals.runs.output_items.list(run_id=eval_run.id, eval_id=eval_object.id)) @@ -168,7 +168,7 @@ if output_dataset_id: print(f"Output Dataset ID (for reuse): {output_dataset_id}") else: - print("\n✗ Evaluation run failed.") + print("\n[FAIL] Evaluation run failed.") client.evals.delete(eval_id=eval_object.id) print("Evaluation deleted")