Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,7 @@
time.sleep(5)

if agent_eval_run.status == "completed":
print("\n Evaluation run completed successfully!")
print("\n[OK] Evaluation run completed successfully!")
print(f"Result Counts: {agent_eval_run.result_counts}")

output_items = list(
Expand All @@ -143,7 +143,7 @@
pprint(output_items)
print(f"{'-'*60}")
else:
print("\n Evaluation run failed.")
print("\n[FAIL] Evaluation run failed.")

openai_client.evals.delete(eval_id=eval_object.id)
print("Evaluation deleted")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,7 @@
time.sleep(5)

if response_eval_run.status == "completed":
print("\n Evaluation run completed successfully!")
print("\n[OK] Evaluation run completed successfully!")
print(f"Result Counts: {response_eval_run.result_counts}")

output_items = list(
Expand All @@ -126,7 +126,7 @@
pprint(output_items)
print(f"{'-'*60}")
else:
print("\n Evaluation run failed.")
print("\n[FAIL] Evaluation run failed.")

openai_client.evals.delete(eval_id=eval_object.id)
print("Evaluation deleted")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -167,7 +167,7 @@ def get_horoscope(sign: str) -> str:
time.sleep(5)

if response_eval_run.status == "completed":
print("\n Evaluation run completed successfully!")
print("\n[OK] Evaluation run completed successfully!")
print(f"Result Counts: {response_eval_run.result_counts}")

output_items = list(
Expand All @@ -181,7 +181,7 @@ def get_horoscope(sign: str) -> str:
print(f"{'-'*60}")
else:
print(f"Eval Run Report URL: {response_eval_run.report_url}")
print("\n Evaluation run failed.")
print("\n[FAIL] Evaluation run failed.")

openai_client.evals.delete(eval_id=eval_object.id)
print("Evaluation deleted")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -159,7 +159,7 @@
time.sleep(5)

if run.status == "completed":
print("\n Evaluation run completed successfully!")
print("\n[OK] Evaluation run completed successfully!")
print(f"Result Counts: {run.result_counts}")

output_items = list(client.evals.runs.output_items.list(run_id=run.id, eval_id=eval_object.id))
Expand All @@ -170,7 +170,7 @@

print(f"\nEval Run Report URL: {run.report_url}")
else:
print(f"\n Evaluation run failed: {run.error}")
print(f"\n[FAIL] Evaluation run failed: {run.error}")

client.evals.delete(eval_id=eval_object.id)
print("Evaluation deleted")
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,7 @@

# If the eval run completed successfully, generate cluster insights
if eval_run.status == "completed":
print("\n Evaluation run completed successfully!")
print("\n[OK] Evaluation run completed successfully!")
print(f"Evaluation run result counts: {eval_run.result_counts}")

clusterInsight = project_client.beta.insights.generate(
Expand All @@ -141,13 +141,13 @@
time.sleep(5)

if clusterInsight.state == OperationState.SUCCEEDED:
print("\n Cluster insights generated successfully!")
print("\n[OK] Cluster insights generated successfully!")
pprint(clusterInsight)
else:
print("\n Cluster insight generation failed.")
print("\n[FAIL] Cluster insight generation failed.")

else:
print("\n Evaluation run failed. Cannot generate cluster insights.")
print("\n[FAIL] Evaluation run failed. Cannot generate cluster insights.")

openai_client.evals.delete(eval_id=eval_object.id)
print("Evaluation deleted")
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,7 @@
failed_runs = [run for run in completed_runs.values() if run.status == "failed"]

if not failed_runs:
print("\n Both evaluation runs completed successfully!")
print("\n[OK] Both evaluation runs completed successfully!")

# Generate comparison insights
compareInsight = project_client.beta.insights.generate(
Expand All @@ -150,11 +150,11 @@
time.sleep(5)

if compareInsight.state == OperationState.SUCCEEDED:
print("\n Evaluation comparison generated successfully!")
print("\n[OK] Evaluation comparison generated successfully!")
pprint(compareInsight)

else:
print("\n One or more eval runs failed. Cannot generate comparison insight.")
print("\n[FAIL] One or more eval runs failed. Cannot generate comparison insight.")

openai_client.evals.delete(eval_id=eval_object.id)
print("Evaluation deleted")
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@
time.sleep(5)

if agent_eval_run.status == "completed":
print("\n Evaluation run completed successfully!")
print("\n[OK] Evaluation run completed successfully!")
print(f"Result Counts: {agent_eval_run.result_counts}")

output_items = list(
Expand All @@ -123,7 +123,7 @@
pprint(output_items)
print(f"{'-'*60}")
else:
print("\n Evaluation run failed.")
print("\n[FAIL] Evaluation run failed.")

openai_client.evals.delete(eval_id=eval_object.id)
print("Evaluation deleted")
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@
time.sleep(5)

if agent_eval_run.status == "completed":
print("\n Evaluation run completed successfully!")
print("\n[OK] Evaluation run completed successfully!")
print(f"Result Counts: {agent_eval_run.result_counts}")

output_items = list(
Expand All @@ -123,7 +123,7 @@
pprint(output_items)
print(f"{'-'*60}")
else:
print("\n Evaluation run failed.")
print("\n[FAIL] Evaluation run failed.")

openai_client.evals.delete(eval_id=eval_object.id)
print("Evaluation deleted")
Original file line number Diff line number Diff line change
Expand Up @@ -159,7 +159,7 @@
time.sleep(5)

if run.status == "completed":
print("\n Evaluation run completed successfully!")
print("\n[OK] Evaluation run completed successfully!")
print(f"Result Counts: {run.result_counts}")

output_items = list(client.evals.runs.output_items.list(run_id=run.id, eval_id=eval_object.id))
Expand All @@ -170,7 +170,7 @@

print(f"\nEval Run Report URL: {run.report_url}")
else:
print(f"\n Evaluation run failed: {run.error}")
print(f"\n[FAIL] Evaluation run failed: {run.error}")

client.evals.delete(eval_id=eval_object.id)
print("Evaluation deleted")
Original file line number Diff line number Diff line change
Expand Up @@ -193,7 +193,7 @@
time.sleep(10)

if run.status == "completed":
print("\n Simulation run completed successfully!")
print("\n[OK] Simulation run completed successfully!")
print(f"Result Counts: {run.result_counts}")
# With 3 seed scenarios and num_conversations=2, expect 6 total conversations
print(f"Expected: {3 * 2} conversations (3 scenarios × 2 per scenario)")
Expand All @@ -206,7 +206,7 @@

print(f"\nEval Run Report URL: {run.report_url}")
else:
print(f"\n Simulation run failed: {run.error}")
print(f"\n[FAIL] Simulation run failed: {run.error}")

client.evals.delete(eval_id=eval_object.id)
print("Evaluation deleted")
Original file line number Diff line number Diff line change
Expand Up @@ -162,7 +162,7 @@ def main() -> None:
time.sleep(5)

if run.status == "completed":
print("\n Evaluation run completed successfully!")
print("\n[OK] Evaluation run completed successfully!")
print(f"Result Counts: {run.result_counts}")

output_items = list(client.evals.runs.output_items.list(run_id=run.id, eval_id=eval_object.id))
Expand All @@ -173,7 +173,7 @@ def main() -> None:

print(f"\nEval Run Report URL: {run.report_url}")
else:
print(f"\n Evaluation run failed: {run.error}")
print(f"\n[FAIL] Evaluation run failed: {run.error}")

client.evals.delete(eval_id=eval_object.id)
print("Evaluation deleted")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,7 @@
time.sleep(5)

if run.status == "completed":
print("\n Evaluation run completed successfully!")
print("\n[OK] Evaluation run completed successfully!")
print(f"Result Counts: {run.result_counts}")

output_items = list(client.evals.runs.output_items.list(run_id=run.id, eval_id=eval_object.id))
Expand All @@ -160,7 +160,7 @@

print(f"\nEval Run Report URL: {run.report_url}")
else:
print(f"\n Evaluation run failed: {run.error}")
print(f"\n[FAIL] Evaluation run failed: {run.error}")

client.evals.delete(eval_id=eval_object.id)
print("Evaluation deleted")
Original file line number Diff line number Diff line change
Expand Up @@ -176,7 +176,7 @@ def assign_rbac(): # pylint: disable=too-many-statements
raise

elif "RoleAssignmentExists" in error_message:
print("\n ROLE ASSIGNMENT ALREADY EXISTS:")
print("\n[OK] ROLE ASSIGNMENT ALREADY EXISTS:")
print("The 'Foundry User' role is already assigned to the project's managed identity.")
print("No action needed - the required permissions are already in place.")

Expand All @@ -194,7 +194,7 @@ def assign_rbac(): # pylint: disable=too-many-statements
print("This usually indicates a service availability issue.")

else:
print("\n UNEXPECTED ERROR:")
print("\n[FAIL] UNEXPECTED ERROR:")
print("An unexpected error occurred. Please check the error details above.")
raise

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -192,7 +192,7 @@ def assign_rbac(): # pylint: disable=too-many-statements
raise

elif "RoleAssignmentExists" in error_message:
print("\n ROLE ASSIGNMENT ALREADY EXISTS:")
print("\n[OK] ROLE ASSIGNMENT ALREADY EXISTS:")
print("The 'Azure AI User' role is already assigned to the project's managed identity.")
print("No action needed - the required permissions are already in place.")

Expand All @@ -210,7 +210,7 @@ def assign_rbac(): # pylint: disable=too-many-statements
print("This usually indicates a service availability issue.")

else:
print("\n UNEXPECTED ERROR:")
print("\n[FAIL] UNEXPECTED ERROR:")
print("An unexpected error occurred. Please check the error details above.")
raise

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,7 @@
time.sleep(5)

if eval_run.status == "completed":
print("\n Evaluation run completed successfully!")
print("\n[OK] Evaluation run completed successfully!")
print(f"Result Counts: {eval_run.result_counts}")

output_items = list(client.evals.runs.output_items.list(run_id=eval_run.id, eval_id=eval_object.id))
Expand All @@ -164,7 +164,7 @@
if output_dataset_id:
print(f"Output Dataset ID (for reuse): {output_dataset_id}")
else:
print("\n Evaluation run failed.")
print("\n[FAIL] Evaluation run failed.")

client.evals.delete(eval_id=eval_object.id)
print("Evaluation deleted")
Original file line number Diff line number Diff line change
Expand Up @@ -151,7 +151,7 @@
time.sleep(5)

if eval_run.status == "completed":
print("\n Evaluation run completed successfully!")
print("\n[OK] Evaluation run completed successfully!")
print(f"Result Counts: {eval_run.result_counts}")

output_items = list(client.evals.runs.output_items.list(run_id=eval_run.id, eval_id=eval_object.id))
Expand All @@ -168,7 +168,7 @@
if output_dataset_id:
print(f"Output Dataset ID (for reuse): {output_dataset_id}")
else:
print("\n Evaluation run failed.")
print("\n[FAIL] Evaluation run failed.")

client.evals.delete(eval_id=eval_object.id)
print("Evaluation deleted")
Loading