Skip to content

Commit 3f5ee99

Browse files
MrFlounderclaude
andauthored
fix(crab-pf): replace broken verify with smoke+session test, add session handling (#32)
## Summary - **Fix false-positive verification**: The old verify tool ran `promptfoo eval` against a config with `redteam` section but no `tests` array — zero tests ran, zero failures, reported success. Now replaced with a 3-step process: direct provider smoke test → session test → eval with 2 real test cases. - **Add session handling context**: System prompt now teaches the agent about the `callApi(prompt, context, options)` signature and `sessionId` contract needed for multi-turn redteam attacks (Crescendo, GOAT). - **Fix GPT-5/o1/o3 compatibility**: Use `max_completion_tokens` instead of `max_tokens`, omit `temperature` for reasoning models. - **Fix Node.js module caching**: `await import(url)` returns cached module when provider.js is rewritten. Added `?t=timestamp` cache buster. ## Changed files | File | What changed | |------|-------------| | `generator/config.ts` | Replace `redteam` section with `prompts` + `tests` + `defaultTest.assert` | | `agent/loop.ts` | Rewrite `verify` tool: smoke test + session test + eval with proper parsing | | `agent/tools.ts` | Update verify description, remove unused `numTests` param | | `agent/system-prompt.ts` | Add session handling section, update `callApi` signature in example | | `agent/providers.ts` | GPT-5/o1/o3 compat (`max_completion_tokens`, no `temperature`) | ## Test plan - [ ] Run `crab pf` against a simple HTTP target — verify eval shows `2 passed, 0 failed` - [ ] Run against a session-based target — verify provider gets correct `callApi` signature and returns `sessionId` - [ ] Run with `--provider openai:gpt-5` — verify no API errors - [ ] Break provider.js intentionally — verify smoke test catches it (not false-positive) 🤖 Generated with [Claude Code](https://claude.com/claude-code) --------- Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
1 parent b013517 commit 3f5ee99

5 files changed

Lines changed: 128 additions & 61 deletions

File tree

plugins/promptfoo/src/agent/loop.ts

Lines changed: 87 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,9 @@ import { toOpenAITools, toAnthropicTools } from './tools.js';
1313
import type { LLMProvider, Message, ToolCall, ChatResponse } from './providers.js';
1414
import type { DiscoveryResult } from '../types.js';
1515
import * as fs from 'node:fs';
16+
import * as path from 'node:path';
1617
import { execSync } from 'node:child_process';
18+
import { pathToFileURL } from 'node:url';
1719

1820
export interface AgentOptions {
1921
context: string; // Raw artifact or description
@@ -76,7 +78,7 @@ Steps:
7678
2. Send a probe to verify connectivity
7779
3. Identify the prompt field and response field
7880
4. Generate the config (and provider file if needed)
79-
5. Verify it works with a mini redteam test
81+
5. Verify it works
8082
6. Call done() when complete`,
8183
},
8284
];
@@ -152,11 +154,14 @@ Steps:
152154
toolCalls: response.toolCalls,
153155
});
154156

155-
// 5. Add tool results
157+
// 5. Add tool results — include error in content so LLM can reason about failures
156158
for (const result of toolResults) {
159+
const content = result.error
160+
? JSON.stringify({ error: result.error, result: result.result })
161+
: JSON.stringify(result.result);
157162
messages.push({
158163
role: 'tool',
159-
content: JSON.stringify(result.result),
164+
content,
160165
toolCallId: result.toolCallId,
161166
});
162167
}
@@ -268,60 +273,113 @@ async function executeTool(
268273
}
269274

270275
case 'verify': {
271-
const { configFile, numTests } = args as {
276+
const { configFile } = args as {
272277
configFile?: string;
273-
numTests?: number;
274278
};
275279

276280
const configPath = configFile || state.configFile || 'promptfooconfig.yaml';
281+
const steps: string[] = [];
282+
283+
// Step 1: Direct provider smoke + session test
284+
const providerPath = path.join(outputDir, 'provider.js');
285+
if (fs.existsSync(providerPath)) {
286+
// Install dependencies first if package.json exists
287+
const packageJsonPath = path.join(outputDir, 'package.json');
288+
if (fs.existsSync(packageJsonPath)) {
289+
try {
290+
execSync(`cd "${outputDir}" && npm install --silent 2>&1`, {
291+
timeout: 60000,
292+
encoding: 'utf-8',
293+
});
294+
} catch {
295+
// Ignore install errors, will surface in import
296+
}
297+
}
277298

278-
// Install dependencies if package.json exists
279-
const packageJsonPath = `${outputDir}/package.json`;
280-
if (fs.existsSync(packageJsonPath)) {
281-
try {
282-
execSync(`cd "${outputDir}" && npm install --silent 2>&1`, {
283-
timeout: 60000,
284-
encoding: 'utf-8',
285-
});
286-
} catch {
287-
// Ignore install errors, will fail in eval if deps missing
299+
const providerUrl = pathToFileURL(path.resolve(providerPath)).href + `?t=${Date.now()}`;
300+
const mod = await import(providerUrl);
301+
const ProviderClass = mod.default;
302+
const instance = new ProviderClass({ config: {} });
303+
304+
// Smoke test
305+
const r1 = await instance.callApi('Hello, this is a test message', { vars: {} }, {});
306+
if (!r1 || !r1.output || r1.error) {
307+
const diag = JSON.stringify(r1, null, 2)?.slice(0, 500) || 'null response';
308+
steps.push(`Smoke test FAILED. Provider returned: ${diag}`);
309+
state.verified = false;
310+
result = { success: false, error: `Provider smoke test failed`, providerResponse: r1, steps };
311+
break;
312+
}
313+
steps.push(`Smoke test PASSED: got ${r1.output.length} chars`);
314+
315+
// Session test — second call, passing sessionId from first response (mimics promptfoo strategy flow)
316+
const sessionContext = r1.sessionId
317+
? { vars: { sessionId: r1.sessionId } }
318+
: { vars: {} };
319+
const r2 = await instance.callApi('Follow up question', sessionContext, {});
320+
if (!r2 || !r2.output || r2.error) {
321+
const diag = JSON.stringify(r2, null, 2)?.slice(0, 500) || 'null response';
322+
steps.push(`Session test FAILED. Provider returned: ${diag}`);
323+
state.verified = false;
324+
result = { success: false, error: `Provider session test failed`, providerResponse: r2, steps };
325+
break;
288326
}
327+
steps.push(`Session test PASSED: got ${r2.output.length} chars${r1.sessionId ? `, sessionId: ${r1.sessionId}` : ''}`);
289328
}
290329

291-
// Try to run promptfoo eval
330+
// Step 2: Run promptfoo eval
292331
try {
293332
const output = execSync(
294333
`cd "${outputDir}" && npx promptfoo eval -c "${configPath}" --no-progress-bar 2>&1`,
295334
{ timeout: 120000, encoding: 'utf-8' }
296335
);
297336

298-
// Check for actual failures, ignoring version warnings
299-
const hasTestFailure = output.includes('[FAIL]') || output.includes('Test failed');
337+
const passMatch = output.match(/(\d+) passed/);
338+
const failMatch = output.match(/(\d+) failed/);
339+
const errorMatch = output.match(/(\d+) error/);
340+
const passed = passMatch ? parseInt(passMatch[1]) : 0;
341+
const failed = failMatch ? parseInt(failMatch[1]) : 0;
342+
const errors = errorMatch ? parseInt(errorMatch[1]) : 0;
343+
300344
const hasConfigError = output.includes('Error loading config') || output.includes('Invalid config');
301-
const hasProviderError = output.includes('Provider error') || output.includes('Connection refused');
302345

303-
state.verified = !hasTestFailure && !hasConfigError && !hasProviderError;
346+
if (passed === 0 && failed === 0) {
347+
steps.push('Eval FAILED: zero tests ran');
348+
state.verified = false;
349+
} else if (failed > 0 || errors > 0 || hasConfigError) {
350+
steps.push(`Eval FAILED: ${passed} passed, ${failed} failed, ${errors} errors`);
351+
state.verified = false;
352+
} else {
353+
steps.push(`Eval PASSED: ${passed} passed, ${failed} failed`);
354+
state.verified = true;
355+
}
304356

305357
result = {
306358
success: state.verified,
307359
output: output.slice(0, 1000),
360+
steps,
308361
};
309362
} catch (error) {
310363
const err = error as { message: string; stdout?: string; stderr?: string };
311-
// If promptfoo ran but returned non-zero, check if tests actually passed
312364
const stdout = err.stdout || '';
313-
const hasPassingOutput = stdout.includes('[PASS]') || stdout.includes('Evaluation complete');
314365

315-
result = {
316-
success: hasPassingOutput,
317-
error: hasPassingOutput ? undefined : err.message,
318-
stdout: stdout.slice(0, 1000),
319-
stderr: err.stderr?.slice(0, 500),
320-
};
366+
const passMatch = stdout.match(/(\d+) passed/);
367+
const passed = passMatch ? parseInt(passMatch[1]) : 0;
321368

322-
if (hasPassingOutput) {
369+
if (passed > 0 && !stdout.includes('failed')) {
370+
steps.push(`Eval PASSED (non-zero exit): ${passed} passed`);
323371
state.verified = true;
372+
} else {
373+
steps.push(`Eval FAILED: ${err.message.slice(0, 200)}`);
374+
state.verified = false;
324375
}
376+
377+
result = {
378+
success: state.verified,
379+
error: state.verified ? undefined : err.message,
380+
stdout: stdout.slice(0, 1000),
381+
steps,
382+
};
325383
}
326384
break;
327385
}

plugins/promptfoo/src/agent/providers.ts

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -63,8 +63,12 @@ export class OpenAIProvider implements LLMProvider {
6363
model: this.model,
6464
messages: options.messages.map((m) => this.toOpenAIMessage(m)),
6565
tools: options.tools,
66-
max_tokens: options.maxTokens || 4096,
67-
temperature: options.temperature ?? 0.7,
66+
...(this.model.startsWith('gpt-5') || this.model.startsWith('o1') || this.model.startsWith('o3')
67+
? { max_completion_tokens: options.maxTokens || 4096 }
68+
: { max_tokens: options.maxTokens || 4096 }),
69+
...(this.model.startsWith('gpt-5') || this.model.startsWith('o1') || this.model.startsWith('o3')
70+
? {}
71+
: { temperature: options.temperature ?? 0.7 }),
6872
}),
6973
});
7074

plugins/promptfoo/src/agent/system-prompt.ts

Lines changed: 27 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -10,15 +10,15 @@ export const DISCOVERY_SYSTEM_PROMPT = `You are a target discovery agent for pro
1010
1111
1. Probe the target to understand how it communicates
1212
2. Generate a working promptfoo config (YAML + custom provider if needed)
13-
3. Verify it works with a mini redteam test
13+
3. Verify it works
1414
1515
## Tools
1616
1717
- **probe(url, method?, body?, headers?)** - Send HTTP request, see response
1818
- **probe_ws(url, message, headers?, timeout?)** - Test WebSocket endpoint
1919
- **write_config(description, providerType, providerConfig)** - Write promptfooconfig.yaml
2020
- **write_provider(code, filename, language)** - Write custom provider.js/py
21-
- **verify()** - Run promptfoo eval to test the config
21+
- **verify()** - Test provider directly (smoke + session), then run promptfoo eval
2222
- **done(summary, configFile, verified)** - Signal completion
2323
2424
## Promptfoo Config Format
@@ -56,26 +56,46 @@ export default class Provider {
5656
return 'my-provider';
5757
}
5858
59-
async callApi(prompt) {
59+
async callApi(prompt, context, options) {
60+
// context.vars.sessionId is set on subsequent turns if you returned sessionId previously
6061
// Your logic here...
61-
return { output: "the response string" }; // MUST return { output: string }
62+
return {
63+
output: "the response string",
64+
sessionId: "optional-session-id", // Return if target uses sessions
65+
};
6266
}
6367
}
6468
\`\`\`
6569
6670
**Key requirements:**
6771
- Must be a class with \`export default\`
68-
- Must have \`callApi(prompt)\` method
69-
- \`callApi\` must return \`{ output: string }\`, not just a string
72+
- Must have \`callApi(prompt, context, options)\` method — all 3 params
73+
- \`callApi\` must return \`{ output: string, sessionId?: string }\`
7074
- Use native fetch (Node 18+), import 'ws' for WebSocket
7175
76+
## Session Handling
77+
78+
Promptfoo uses sessions for multi-turn conversations (e.g. redteam attack strategies like Crescendo and GOAT). The flow works like this:
79+
80+
1. Strategy calls \`callApi(prompt, context)\` on turn 1
81+
2. Provider talks to the target, gets a response and a session/conversation ID
82+
3. Provider returns \`{ output: "...", sessionId: "abc123" }\`
83+
4. Promptfoo stores the sessionId and passes it back on turn 2+ via \`context.vars.sessionId\`
84+
5. Provider reads \`context.vars.sessionId\` and reuses the existing conversation
85+
86+
**If the target is stateful (uses sessions, conversation IDs, etc.), the provider MUST support this flow.** Otherwise multi-turn attacks will start a new conversation on every turn and fail.
87+
88+
For **custom providers**: Accept the \`context\` parameter, check \`context.vars.sessionId\` to reuse an existing session, and return \`sessionId\` in the response.
89+
90+
For **HTTP providers**: Use \`sessionParser\` in the config to extract the session ID from the response (e.g. \`sessionParser: json.session_id\`). Promptfoo handles the rest automatically.
91+
7292
## Workflow
7393
7494
1. Read the target spec to understand the API
7595
2. Probe to verify connectivity and response format
7696
3. Decide: HTTP provider (simple) or custom provider (complex)
7797
4. Write config (and provider.js if needed)
78-
5. Verify with promptfoo eval
98+
5. Verify — runs provider smoke test + session test, then promptfoo eval with 2 test cases
7999
6. Call done() with results
80100
81101
Be intelligent. Figure out the target's protocol, auth, request/response format from probing. Generate configs that work.`;

plugins/promptfoo/src/agent/tools.ts

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -133,7 +133,7 @@ export const toolDefinitions: ToolDefinition[] = [
133133
},
134134
{
135135
name: 'verify',
136-
description: 'Run a mini redteam test to verify the configuration works. This sends a few test prompts through the target.',
136+
description: 'Verify the configuration works. Tests the provider directly (smoke + session test), then runs promptfoo eval with 2 simple test cases.',
137137
parameters: {
138138
type: 'object',
139139
properties: {
@@ -142,11 +142,6 @@ export const toolDefinitions: ToolDefinition[] = [
142142
description: 'Path to the config file to verify (default: promptfooconfig.yaml)',
143143
default: 'promptfooconfig.yaml',
144144
},
145-
numTests: {
146-
type: 'number',
147-
description: 'Number of test prompts to send (default: 3)',
148-
default: 3,
149-
},
150145
},
151146
},
152147
},

plugins/promptfoo/src/generator/config.ts

Lines changed: 7 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -62,26 +62,16 @@ export function generateConfig(options: GenerateConfigOptions): GeneratedConfig
6262
const config = {
6363
description,
6464
providers: [provider],
65-
// Default test with the prompt variable
65+
prompts: ['{{prompt}}'],
6666
defaultTest: {
67-
vars: {
68-
prompt: '{{prompt}}',
69-
},
70-
},
71-
// Mini redteam for verification
72-
redteam: {
73-
plugins: ['harmful:hate'],
74-
strategies: [
75-
{ id: 'jailbreak' },
76-
{
77-
id: 'jailbreak:composite',
78-
config: {
79-
maxTurns: 3,
80-
},
81-
},
67+
assert: [
68+
{ type: 'javascript', value: 'output && output.length > 0' },
8269
],
83-
numTests: 1,
8470
},
71+
tests: [
72+
{ vars: { prompt: 'Hello, how are you?' } },
73+
{ vars: { prompt: 'What can you help me with?' } },
74+
],
8575
};
8676

8777
// Generate YAML

0 commit comments

Comments
 (0)