Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
154 changes: 21 additions & 133 deletions build_scripts/evaluate_scorers.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,31 +12,15 @@
"""

import asyncio
import os
import sys
import time

from azure.ai.contentsafety.models import TextCategory
from tqdm import tqdm

from pyrit.common.path import SCORER_EVALS_PATH
from pyrit.prompt_target import OpenAIChatTarget
from pyrit.score import (
AzureContentFilterScorer,
FloatScaleThresholdScorer,
LikertScalePaths,
SelfAskLikertScorer,
SelfAskRefusalScorer,
SelfAskScaleScorer,
TrueFalseCompositeScorer,
TrueFalseInverterScorer,
TrueFalseScoreAggregator,
)
from pyrit.score.true_false.self_ask_true_false_scorer import (
SelfAskTrueFalseScorer,
TrueFalseQuestionPaths,
)
from pyrit.registry import ScorerRegistry
from pyrit.setup import IN_MEMORY, initialize_pyrit_async
from pyrit.setup.initializers import ScorerInitializer, TargetInitializer


async def evaluate_scorers() -> None:
Expand All @@ -45,137 +29,42 @@ async def evaluate_scorers() -> None:

This will:
1. Initialize PyRIT with in-memory database
2. Create a shared chat target for consistency
3. Instantiate each scorer with appropriate configuration
2. Register all scorers from ScorerInitializer into the ScorerRegistry
3. Iterate through all registered scorers
4. Run evaluate_async() on each scorer
5. Save results to scorer_evals directory
"""
print("Initializing PyRIT...")
await initialize_pyrit_async(memory_db_type=IN_MEMORY)

# Targets
gpt_4o_target = OpenAIChatTarget(
endpoint=os.environ.get("AZURE_OPENAI_GPT4O_ENDPOINT"),
api_key=os.environ.get("AZURE_OPENAI_GPT4O_KEY"),
model_name=os.environ.get("AZURE_OPENAI_GPT4O_MODEL"),
)

gpt_4o_unsafe = OpenAIChatTarget(
endpoint=os.environ.get("AZURE_OPENAI_GPT4O_UNSAFE_CHAT_ENDPOINT"),
api_key=os.environ.get("AZURE_OPENAI_GPT4O_UNSAFE_CHAT_KEY"),
model_name=os.environ.get("AZURE_OPENAI_GPT4O_UNSAFE_CHAT_MODEL"),
)

gpt_4o_unsafe_temp9 = OpenAIChatTarget(
endpoint=os.environ.get("AZURE_OPENAI_GPT4O_UNSAFE_CHAT_ENDPOINT"),
api_key=os.environ.get("AZURE_OPENAI_GPT4O_UNSAFE_CHAT_KEY"),
model_name=os.environ.get("AZURE_OPENAI_GPT4O_UNSAFE_CHAT_MODEL"),
temperature=0.9,
)

# Refusal Scorer
refusal = SelfAskRefusalScorer(chat_target=gpt_4o_target)

# Objective Scorers
refusal_gpt_4o = TrueFalseInverterScorer(scorer=SelfAskRefusalScorer(chat_target=gpt_4o_target))

refusal_gpt_4o_unsafe = TrueFalseInverterScorer(scorer=SelfAskRefusalScorer(chat_target=gpt_4o_unsafe))

refusal_gpt_4o_unsafe_temp9 = TrueFalseInverterScorer(scorer=SelfAskRefusalScorer(chat_target=gpt_4o_unsafe_temp9))

acs_t1 = FloatScaleThresholdScorer(scorer=AzureContentFilterScorer(), threshold=0.1)
acs_t5 = FloatScaleThresholdScorer(scorer=AzureContentFilterScorer(), threshold=0.5)
acs_t7 = FloatScaleThresholdScorer(scorer=AzureContentFilterScorer(), threshold=0.7)

acs_with_refusal = TrueFalseCompositeScorer(
aggregator=TrueFalseScoreAggregator.AND,
scorers=[
FloatScaleThresholdScorer(scorer=AzureContentFilterScorer(), threshold=0.5),
TrueFalseInverterScorer(
scorer=SelfAskRefusalScorer(chat_target=gpt_4o_unsafe_temp9),
),
],
await initialize_pyrit_async(
memory_db_type=IN_MEMORY,
initializers=[TargetInitializer(tags=["default", "scorer"]), ScorerInitializer()],
)

_scale_scorer_gpt_4o = SelfAskScaleScorer(
chat_target=OpenAIChatTarget(
endpoint=os.environ.get("AZURE_OPENAI_GPT4O_ENDPOINT"),
api_key=os.environ.get("AZURE_OPENAI_GPT4O_KEY"),
model_name=os.environ.get("AZURE_OPENAI_GPT4O_MODEL"),
temperature=0.9,
),
)
registry = ScorerRegistry.get_registry_singleton()
scorer_names = registry.get_names()

task_achieved_tf_scorer = SelfAskTrueFalseScorer(
chat_target=OpenAIChatTarget(
endpoint=os.environ.get("AZURE_OPENAI_GPT4O_ENDPOINT"),
api_key=os.environ.get("AZURE_OPENAI_GPT4O_KEY"),
model_name=os.environ.get("AZURE_OPENAI_GPT4O_MODEL"),
temperature=0.9,
),
true_false_question_path=TrueFalseQuestionPaths.TASK_ACHIEVED.value,
)
if not scorer_names:
print("No scorers registered. Check environment variable configuration.")
return

scale_gpt_4o_9_threshhold = FloatScaleThresholdScorer(scorer=_scale_scorer_gpt_4o, threshold=0.9)

scale_and_refusal = TrueFalseCompositeScorer(
scorers=[scale_gpt_4o_9_threshhold, refusal_gpt_4o],
aggregator=TrueFalseScoreAggregator.AND,
)

# Harm Scorers
likert_scorers_gpt_4o = [
SelfAskLikertScorer(
chat_target=gpt_4o_target,
likert_scale=scale,
)
for scale in LikertScalePaths
if scale.evaluation_files is not None
]

acs_hate = AzureContentFilterScorer(harm_categories=[TextCategory.HATE])
acs_self_harm = AzureContentFilterScorer(harm_categories=[TextCategory.SELF_HARM])
acs_sexual = AzureContentFilterScorer(harm_categories=[TextCategory.SEXUAL])
acs_violence = AzureContentFilterScorer(harm_categories=[TextCategory.VIOLENCE])

# Build list of scorers to evaluate
scorers = [
refusal,
refusal_gpt_4o,
refusal_gpt_4o_unsafe,
refusal_gpt_4o_unsafe_temp9,
acs_t1,
acs_t5,
acs_t7,
acs_with_refusal,
scale_gpt_4o_9_threshhold,
scale_and_refusal,
acs_hate,
acs_self_harm,
acs_sexual,
acs_violence,
task_achieved_tf_scorer,
]

scorers.extend(likert_scorers_gpt_4o)

print(f"\nEvaluating {len(scorers)} scorer(s)...\n")
print(f"\nEvaluating {len(scorer_names)} scorer(s)...\n")

# Use tqdm for progress tracking across all scorers
scorer_iterator = tqdm(enumerate(scorers, 1), total=len(scorers), desc="Scorers") if tqdm else enumerate(scorers, 1)
scorer_iterator = (
tqdm(enumerate(scorer_names, 1), total=len(scorer_names), desc="Scorers")
if tqdm
else enumerate(scorer_names, 1)
)

# Evaluate each scorer
for i, scorer in scorer_iterator:
scorer_name = scorer.__class__.__name__
print(f"\n[{i}/{len(scorers)}] Evaluating {scorer_name}...")
for i, scorer_name in scorer_iterator:
scorer = registry.get_instance_by_name(scorer_name)
print(f"\n[{i}/{len(scorer_names)}] Evaluating {scorer_name}...")
print(" Status: Starting evaluation (this may take several minutes)...")

start_time = time.time()

try:
# Run evaluation with production settings:
# - num_scorer_trials=3 for variance measurement
# - add_to_evaluation_results=True to save to registry
print(" Status: Running evaluations...")
results = await scorer.evaluate_async(
num_scorer_trials=3,
Expand All @@ -184,7 +73,6 @@ async def evaluate_scorers() -> None:

elapsed_time = time.time() - start_time

# Results are saved to disk by evaluate_async() with add_to_evaluation_results=True
print(" ✓ Evaluation complete and saved!")
print(f" Elapsed time: {elapsed_time:.1f}s")
if results:
Expand Down
3 changes: 2 additions & 1 deletion doc/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -719,7 +719,8 @@ API Reference

PyRITInitializer
AIRTInitializer
AIRTTargetInitializer
TargetInitializer
ScorerInitializer
SimpleInitializer
LoadDefaultDatasets
ScenarioObjectiveListInitializer
8 changes: 4 additions & 4 deletions doc/code/registry/2_instance_registry.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -196,7 +196,7 @@
"source": [
"## Using Target Initializer\n",
"\n",
"You can optionally use the `AIRTTargetInitializer` to automatically configure and register targets that use commonly used environment variables (from `.env_example`). This initializer does not strictly require any environment variables - it simply registers whatever endpoints are available."
"You can optionally use the `TargetInitializer` to automatically configure and register targets that use commonly used environment variables (from `.env_example`). This initializer does not strictly require any environment variables - it simply registers whatever endpoints are available."
]
},
{
Expand All @@ -218,18 +218,18 @@
"source": [
"from pyrit.registry import TargetRegistry\n",
"from pyrit.setup import initialize_pyrit_async\n",
"from pyrit.setup.initializers import AIRTTargetInitializer\n",
"from pyrit.setup.initializers import TargetInitializer\n",
"\n",
"# Using built-in initializer\n",
"await initialize_pyrit_async( # type: ignore\n",
" memory_db_type=\"InMemory\", initializers=[AIRTTargetInitializer()]\n",
" memory_db_type=\"InMemory\", initializers=[TargetInitializer()]\n",
")\n",
"\n",
"# Get the registry singleton\n",
"registry = TargetRegistry.get_registry_singleton()\n",
"# List registered targets\n",
"target_names = registry.get_names()\n",
"print(f\"Registered targets after AIRT initialization: {target_names}\")"
"print(f\"Registered targets after initialization: {target_names}\")"
]
}
],
Expand Down
8 changes: 4 additions & 4 deletions doc/code/registry/2_instance_registry.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,20 +100,20 @@
# %% [markdown]
# ## Using Target Initializer
#
# You can optionally use the `AIRTTargetInitializer` to automatically configure and register targets that use commonly used environment variables (from `.env_example`). This initializer does not strictly require any environment variables - it simply registers whatever endpoints are available.
# You can optionally use the `TargetInitializer` to automatically configure and register targets that use commonly used environment variables (from `.env_example`). This initializer does not strictly require any environment variables - it simply registers whatever endpoints are available.

# %%
from pyrit.registry import TargetRegistry
from pyrit.setup import initialize_pyrit_async
from pyrit.setup.initializers import AIRTTargetInitializer
from pyrit.setup.initializers import TargetInitializer

# Using built-in initializer
await initialize_pyrit_async( # type: ignore
memory_db_type="InMemory", initializers=[AIRTTargetInitializer()]
memory_db_type="InMemory", initializers=[TargetInitializer()]
)

# Get the registry singleton
registry = TargetRegistry.get_registry_singleton()
# List registered targets
target_names = registry.get_names()
print(f"Registered targets after AIRT initialization: {target_names}")
print(f"Registered targets after initialization: {target_names}")
6 changes: 4 additions & 2 deletions pyrit/setup/initializers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@
"""PyRIT initializers package."""

from pyrit.setup.initializers.airt import AIRTInitializer
from pyrit.setup.initializers.airt_targets import AIRTTargetInitializer
from pyrit.setup.initializers.components.scorers import ScorerInitializer
from pyrit.setup.initializers.components.targets import TargetInitializer
from pyrit.setup.initializers.pyrit_initializer import PyRITInitializer
from pyrit.setup.initializers.scenarios.load_default_datasets import LoadDefaultDatasets
from pyrit.setup.initializers.scenarios.objective_list import ScenarioObjectiveListInitializer
Expand All @@ -14,7 +15,8 @@
__all__ = [
"PyRITInitializer",
"AIRTInitializer",
"AIRTTargetInitializer",
"ScorerInitializer",
"TargetInitializer",
"SimpleInitializer",
"LoadDefaultDatasets",
"ScenarioObjectiveListInitializer",
Expand Down
13 changes: 13 additions & 0 deletions pyrit/setup/initializers/components/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.

"""Component initializers for targets, scorers, and other components."""

from pyrit.setup.initializers.components.scorers import ScorerInitializer
from pyrit.setup.initializers.components.targets import TargetConfig, TargetInitializer

__all__ = [
"ScorerInitializer",
"TargetConfig",
"TargetInitializer",
]
Loading