takt/src/agents/ai-judge.ts
nrslib b25e9a78ab fix: callAiJudgeをプロバイダーシステム経由に変更(Codex対応)
callAiJudgeがinfra/claude/にハードコードされており、Codexプロバイダー使用時に
judge評価が動作しなかった。agents/ai-judge.tsに移動し、runAgent経由で
プロバイダーを正しく解決するように修正。
2026-02-10 19:32:42 +09:00

68 lines
2.0 KiB
TypeScript

/**
* AI judge - provider-aware rule condition evaluator
*
* Evaluates agent output against ai() conditions using the configured provider.
* Uses runAgent (which resolves provider from config) instead of hardcoded Claude.
*/
import type { AiJudgeCaller, AiJudgeCondition } from '../core/piece/types.js';
import { loadTemplate } from '../shared/prompts/index.js';
import { createLogger } from '../shared/utils/index.js';
import { runAgent } from './runner.js';
const log = createLogger('ai-judge');
/**
* Detect judge rule index from [JUDGE:N] tag pattern.
* Returns 0-based rule index, or -1 if no match.
*/
export function detectJudgeIndex(content: string): number {
const regex = /\[JUDGE:(\d+)\]/i;
const match = content.match(regex);
if (match?.[1]) {
const index = Number.parseInt(match[1], 10) - 1;
return index >= 0 ? index : -1;
}
return -1;
}
/**
* Build the prompt for the AI judge that evaluates agent output against ai() conditions.
*/
export function buildJudgePrompt(
agentOutput: string,
aiConditions: AiJudgeCondition[],
): string {
const conditionList = aiConditions
.map((c) => `| ${c.index + 1} | ${c.text} |`)
.join('\n');
return loadTemplate('perform_judge_message', 'en', { agentOutput, conditionList });
}
/**
* Call AI judge to evaluate agent output against ai() conditions.
* Uses the provider system (via runAgent) for correct provider resolution.
* Returns 0-based index of the matched ai() condition, or -1 if no match.
*/
export const callAiJudge: AiJudgeCaller = async (
agentOutput: string,
conditions: AiJudgeCondition[],
options: { cwd: string },
): Promise<number> => {
const prompt = buildJudgePrompt(agentOutput, conditions);
const response = await runAgent(undefined, prompt, {
cwd: options.cwd,
maxTurns: 1,
allowedTools: [],
});
if (response.status !== 'done') {
log.error('AI judge call failed', { error: response.error });
return -1;
}
return detectJudgeIndex(response.content);
};