Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 8 additions & 3 deletions apps/cli/src/commands/import/promptfoo.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,8 @@ tests:
expect(suite.tests[0]).toMatchObject({
id: 'capital',
criteria: 'Capital answer stays deterministic',
input: 'Answer clearly: What is the capital of France?',
input: 'Answer clearly: {{question}}',
vars: { question: 'What is the capital of France?' },
assertions: [{ type: 'equals', value: 'Paris' }],
metadata: {
promptfoo: {
Expand Down Expand Up @@ -95,7 +96,8 @@ tests: file://./tests.jsonl
const yaml = await convertPromptfooToAgentvYaml(configPath);
expect(yaml).toContain('# Converted from promptfoo config:');
expect(yaml).toContain('id: math');
expect(yaml).toContain('input: "Please answer: What is 2 + 2?"');
expect(yaml).toContain('input: "Please answer: {{question}}"');
expect(yaml).toContain('vars:');
expect(yaml).toContain('type: equals');
});

Expand Down Expand Up @@ -129,7 +131,10 @@ tests: file://./tests.csv
expect(suite.tests[0]).toMatchObject({
id: 'capital-question',
criteria: 'Capital question',
input: 'Question: What is the capital of France?',
input: 'Question: {{question}}',
vars: {
question: 'What is the capital of France?',
},
assertions: [
{ type: 'equals', value: 'Paris' },
{ type: 'contains', value: 'Paris' },
Expand Down
39 changes: 10 additions & 29 deletions apps/cli/src/commands/import/promptfoo.ts
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@ interface AgentvAssertion {
interface AgentvTest {
readonly id: string;
readonly input: AgentvInput;
readonly vars?: Record<string, JsonValue>;
readonly assertions?: readonly AgentvAssertion[];
readonly [key: string]: unknown;
}
Expand Down Expand Up @@ -825,7 +826,8 @@ async function buildAgentvTests(options: {
}

for (const prompt of promptSelection) {
const renderedInput = renderPrompt(prompt, effectiveVars, testOptions);
const importedVars = testOptions.disableVarExpansion ? undefined : effectiveVars;
const templatedInput = buildPromptTemplate(prompt, testOptions);
const promptSuffix =
promptSelection.length > 1 ? `--${sanitizeName(prompt.key || prompt.label)}` : '';
const metadata = buildPromptfooMetadata(rawTest, effectiveVars, prompt, effectiveTargets);
Expand All @@ -838,7 +840,8 @@ async function buildAgentvTests(options: {
const test: AgentvTest = {
id: `${explicitId ?? baseId}${promptSuffix}`,
...(typeof rawTest.description === 'string' ? { criteria: rawTest.description } : {}),
input: renderedInput,
input: templatedInput,
...(importedVars && Object.keys(importedVars).length > 0 ? { vars: importedVars } : {}),
...(convertedCaseAssertions.length > 0 ? { assertions: convertedCaseAssertions } : {}),
...(metadata ? { metadata } : {}),
...(execution ? { execution } : {}),
Expand Down Expand Up @@ -970,52 +973,30 @@ function filterProviders(
return matched.map((provider) => provider.targetName);
}

function renderPrompt(
function buildPromptTemplate(
prompt: PromptfooPrompt,
vars: Record<string, JsonValue>,
testOptions: PromptfooTestOptions,
): AgentvInput {
const prefix = testOptions.prefix ?? '';
const suffix = testOptions.suffix ?? '';

if (typeof prompt.content === 'string') {
return `${prefix}${renderTemplate(prompt.content, vars)}${suffix}`;
return `${prefix}${preserveTemplate(prompt.content)}${suffix}`;
}

return prompt.content.map((message, index, allMessages) => ({
role: message.role,
content: `${index === 0 ? prefix : ''}${renderTemplate(message.content, vars)}${index === allMessages.length - 1 ? suffix : ''}`,
content: `${index === 0 ? prefix : ''}${preserveTemplate(message.content)}${index === allMessages.length - 1 ? suffix : ''}`,
}));
}

function renderTemplate(template: string, vars: Record<string, JsonValue>) {
function preserveTemplate(template: string) {
if (template.includes('{%') || template.includes('{#') || /\{\{[^}]*\|/.test(template)) {
throw new Error(
`Unsupported Nunjucks syntax in prompt '${template.slice(0, 80)}'. Use simple {{var}} templates or migrate manually`,
);
}

return template.replace(/\{\{\s*([^}]+?)\s*\}\}/g, (_match, expression: string) => {
const value = lookupPath(vars, expression.trim());
if (value === undefined) {
return '';
}
if (typeof value === 'string') return value;
return JSON.stringify(value);
});
}

function lookupPath(
value: JsonValue | Record<string, JsonValue>,
expression: string,
): JsonValue | undefined {
if (!expression) return undefined;
return expression.split('.').reduce<JsonValue | undefined>((current, part) => {
if (!current || typeof current !== 'object' || Array.isArray(current)) {
return undefined;
}
return (current as Record<string, JsonValue>)[part];
}, value as JsonValue);
return template;
}

function buildPromptfooMetadata(
Expand Down
28 changes: 28 additions & 0 deletions apps/web/src/content/docs/docs/evaluation/eval-files.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -264,6 +264,34 @@ For local sources, omit `checkout.resolve`. If you need to pin the local clone t
MY_REPO_LOCAL_PATH=/home/dev/repos/my-repo
```

## Per-Test Template Variables

Eval YAML also supports per-test `vars` for data-driven prompt templates. Use `{{name}}` placeholders in test-facing text fields, and AgentV resolves them when the suite loads.

```yaml
input: "Answer clearly: {{question}}"

tests:
- id: capital
vars:
question: What is the capital of France?
expected_answer: Paris
criteria: "Answers {{question}} correctly"
input:
- role: user
content: "Question: {{question}}"
expected_output: "{{expected_answer}}"
```

### Behavior

- `vars` is defined per test as an object
- `{{name}}` and dotted paths like `{{ user.name }}` are supported
- Substitution applies to suite-level `input`, test `input`, `input_files`, `criteria`, `expected_output`, and conversation turn `input` / `expected_output`
- When the whole string is a single placeholder, the original JSON value is preserved
- Missing variables are left unchanged, so unrelated template syntax is not silently blanked out
- `vars` interpolation is separate from environment interpolation: `{{question}}` uses test data, `${{ PROJECT_NAME }}` uses environment variables

## JSONL Format

For large-scale evaluations, AgentV supports JSONL (JSON Lines) format. Each line is a single test:
Expand Down
1 change: 1 addition & 0 deletions apps/web/src/content/docs/docs/tools/import.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ Default output: `EVAL.yaml` beside the promptfoo config file.
- inline tests and external YAML / JSON / JSONL / CSV test files
- `defaultTest.assert` promoted to suite-level `assertions`
- per-test `vars`, `description`, `threshold`, `metadata`, prompt filters, and provider filters
- simple prompt templates are preserved as AgentV `{{var}}` input templates instead of being eagerly flattened
- deterministic assertions that map directly to AgentV: `equals`, `contains`, `icontains`, `regex`, `starts-with`, `ends-with`, `contains-any`, `contains-all`, `icontains-any`, `icontains-all`, `is-json`, `latency`, `cost`
- rubric-style assertions mapped to `llm-grader`: `llm-rubric`, `g-eval`, `factuality`, `context-faithfulness`, `context-recall`

Expand Down
75 changes: 75 additions & 0 deletions packages/core/src/evaluation/interpolation.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
import type { EnvLookup } from './providers/types.js';

const ENV_VAR_PATTERN = /\$\{\{\s*([A-Za-z_][A-Za-z0-9_]*)\s*\}\}/g;
const TEMPLATE_VAR_PATTERN = /\{\{\s*([A-Za-z_][A-Za-z0-9_.]*)\s*\}\}/g;
const WHOLE_TEMPLATE_VAR_PATTERN = /^\{\{\s*([A-Za-z_][A-Za-z0-9_.]*)\s*\}\}$/;

/**
* Regex that matches a string consisting of exactly one `${{ VAR }}` reference
Expand Down Expand Up @@ -29,6 +31,42 @@ function coercePrimitive(value: string): unknown {
return value;
}

function isPlainObject(value: unknown): value is Record<string, unknown> {
return typeof value === 'object' && value !== null && !Array.isArray(value);
}

function cloneTemplateValue(value: unknown): unknown {
if (Array.isArray(value)) {
return value.map((item) => cloneTemplateValue(item));
}
if (isPlainObject(value)) {
const result: Record<string, unknown> = {};
for (const [key, nested] of Object.entries(value)) {
result[key] = cloneTemplateValue(nested);
}
return result;
}
return value;
}

function stringifyTemplateValue(value: unknown): string {
if (typeof value === 'string') return value;
return JSON.stringify(value);
}

function lookupTemplateVar(
vars: Readonly<Record<string, unknown>>,
expression: string,
): unknown | undefined {
if (!expression) return undefined;
return expression.split('.').reduce<unknown>((current, segment) => {
if (!isPlainObject(current)) {
return undefined;
}
return current[segment];
}, vars);
}

/**
* Recursively interpolate `${{ VAR }}` references in all string values.
* Missing variables resolve to empty string.
Expand Down Expand Up @@ -71,3 +109,40 @@ export function interpolateEnv(value: unknown, env: EnvLookup): unknown {
}
return value;
}

/**
* Recursively interpolate `{{ var }}` references in string values using per-test vars.
* Missing variables are left unchanged so unrelated template syntaxes remain intact.
* When the whole string is a single variable reference, the original JSON value is preserved.
*/
export function interpolateTemplateVars(
value: unknown,
vars: Readonly<Record<string, unknown>>,
): unknown {
if (typeof value === 'string') {
const wholeMatch = WHOLE_TEMPLATE_VAR_PATTERN.exec(value);
if (wholeMatch) {
const resolved = lookupTemplateVar(vars, wholeMatch[1] as string);
return resolved === undefined ? value : cloneTemplateValue(resolved);
}

return value.replace(TEMPLATE_VAR_PATTERN, (match, expression: string) => {
const resolved = lookupTemplateVar(vars, expression);
return resolved === undefined ? match : stringifyTemplateValue(resolved);
});
}

if (Array.isArray(value)) {
return value.map((item) => interpolateTemplateVars(item, vars));
}

if (isPlainObject(value)) {
const result: Record<string, unknown> = {};
for (const [key, nested] of Object.entries(value)) {
result[key] = interpolateTemplateVars(nested, vars);
}
return result;
}

return value;
}
5 changes: 4 additions & 1 deletion packages/core/src/evaluation/validation/eval-file.schema.ts
Original file line number Diff line number Diff line change
Expand Up @@ -25,11 +25,13 @@ const MessageSchema = z.object({
content: MessageContentSchema,
});

const JsonObjectSchema = z.object({}).catchall(z.unknown());

/** Input: string shorthand or message array */
const InputSchema = z.union([z.string(), z.array(MessageSchema)]);

/** Expected output: string, object, or message array */
const ExpectedOutputSchema = z.union([z.string(), z.record(z.unknown()), z.array(MessageSchema)]);
const ExpectedOutputSchema = z.union([z.string(), JsonObjectSchema, z.array(MessageSchema)]);

// ---------------------------------------------------------------------------
// Grader schemas (YAML input format)
Expand Down Expand Up @@ -389,6 +391,7 @@ const ConversationTurnSchema = z.object({

const EvalTestSchema = z.object({
id: z.string().min(1),
vars: JsonObjectSchema.optional(),
criteria: z.string().optional(),
input: InputSchema.optional(),
input_files: z.array(z.string()).optional(),
Expand Down
Loading
Loading