@@ -42,7 +42,7 @@ export async function runSingleEval(
4242 fingerprintId : string ,
4343 codingAgent : 'codebuff' | 'claude' ,
4444 agent ?: string ,
45- promptWithSpec : boolean = false ,
45+ promptWithAgent : boolean = false ,
4646) : Promise < EvalRunJudged > {
4747 const startTime = new Date ( )
4848 const trace : CodebuffTrace [ ] = [ ]
@@ -94,7 +94,7 @@ export async function runSingleEval(
9494
9595 let currentDecision : AgentDecision = 'continue'
9696 let attempts = 0
97- const MAX_ATTEMPTS = promptWithSpec ? 1 : 5
97+ const MAX_ATTEMPTS = promptWithAgent ? 1 : 5
9898
9999 while ( currentDecision === 'continue' && attempts < MAX_ATTEMPTS ) {
100100 // Check for process-level errors
@@ -120,7 +120,7 @@ export async function runSingleEval(
120120 // Get next prompt from prompting agent with timeout
121121 let agentResponse : z . infer < typeof AgentDecisionSchema >
122122 try {
123- agentResponse = promptWithSpec
123+ agentResponse = ! promptWithAgent
124124 ? {
125125 decision : 'continue' ,
126126 reasoning : 'Using spec as sole prompt' ,
@@ -376,6 +376,7 @@ export async function runGitEvals(
376376 logToStdout : boolean = false ,
377377 agent : string = 'base' ,
378378 worktreePath ?: string ,
379+ promptWithAgent : boolean = false ,
379380) : Promise < FullEvalLog > {
380381 // Set up signal handlers if this is the main module
381382 if ( require . main === module ) {
@@ -417,12 +418,6 @@ export async function runGitEvals(
417418 const logsDir = path . join ( outputDir , 'logs' , `${ testRepoName } -${ traceId } ` )
418419 fs . mkdirSync ( logsDir , { recursive : true } )
419420
420- // Generate filenames with trace ID (single file that gets overwritten)
421- const partialOutputPath = path . join (
422- outputDir ,
423- `eval-partial-${ testRepoName } -${ traceId } .json` ,
424- )
425-
426421 const commitsToRun = limit
427422 ? evalData . evalCommits . slice ( 0 , limit )
428423 : evalData . evalCommits
@@ -496,6 +491,7 @@ export async function runGitEvals(
496491 fingerprintId ,
497492 codingAgent ,
498493 agent ,
494+ promptWithAgent . toString ( ) ,
499495 ] ,
500496 {
501497 stdio : [ 'pipe' , 'pipe' , 'pipe' , 'ipc' ] ,
0 commit comments