fix: callAiJudgeをプロバイダーシステム経由に変更(Codex対応)

callAiJudgeがinfra/claude/にハードコードされており、Codexプロバイダー使用時に
judge評価が動作しなかった。agents/ai-judge.tsに移動し、runAgent経由で
プロバイダーを正しく解決するように修正。
This commit is contained in:
nrslib 2026-02-10 16:31:03 +09:00
parent 9c4408909d
commit b25e9a78ab
18 changed files with 110 additions and 103 deletions

View File

@ -1,6 +1,6 @@
{
"name": "takt",
"version": "0.11.0",
"version": "0.11.1",
"description": "TAKT: Task Agent Koordination Tool - AI Agent Piece Orchestration",
"main": "dist/index.js",
"types": "dist/index.d.ts",

View File

@ -3,7 +3,7 @@
*/
import { describe, it, expect } from 'vitest';
import { detectJudgeIndex, buildJudgePrompt } from '../infra/claude/client.js';
import { detectJudgeIndex, buildJudgePrompt } from '../agents/ai-judge.js';
describe('detectJudgeIndex', () => {
it('should detect [JUDGE:1] as index 0', () => {

View File

@ -14,12 +14,13 @@ import { join } from 'node:path';
import { tmpdir } from 'node:os';
import { setMockScenario, resetScenario } from '../infra/mock/index.js';
import type { PieceConfig, PieceMovement, PieceRule } from '../core/models/index.js';
import { callAiJudge, detectRuleIndex } from '../infra/claude/index.js';
import { detectRuleIndex } from '../infra/claude/index.js';
import { callAiJudge } from '../agents/ai-judge.js';
// --- Mocks ---
vi.mock('../infra/claude/client.js', async (importOriginal) => {
const original = await importOriginal<typeof import('../infra/claude/client.js')>();
vi.mock('../agents/ai-judge.js', async (importOriginal) => {
const original = await importOriginal<typeof import('../agents/ai-judge.js')>();
return {
...original,
callAiJudge: vi.fn().mockResolvedValue(-1),

View File

@ -105,11 +105,14 @@ vi.mock('../core/piece/index.js', () => ({
}));
vi.mock('../infra/claude/index.js', () => ({
callAiJudge: vi.fn(),
detectRuleIndex: vi.fn(),
interruptAllQueries: mockInterruptAllQueries,
}));
vi.mock('../agents/ai-judge.js', () => ({
callAiJudge: vi.fn(),
}));
vi.mock('../infra/config/index.js', () => ({
loadPersonaSessions: vi.fn().mockReturnValue({}),
updatePersonaSession: vi.fn(),

View File

@ -15,15 +15,16 @@ import { join } from 'node:path';
import { tmpdir } from 'node:os';
import { setMockScenario, resetScenario } from '../infra/mock/index.js';
import type { PieceConfig, PieceMovement, PieceRule } from '../core/models/index.js';
import { callAiJudge, detectRuleIndex } from '../infra/claude/index.js';
import { detectRuleIndex } from '../infra/claude/index.js';
import { callAiJudge } from '../agents/ai-judge.js';
// --- Mocks (minimal — only infrastructure, not core logic) ---
// Safety net: prevent callAiJudge from calling real Claude CLI.
// Safety net: prevent callAiJudge from calling real agent.
// Tag-based detection should always match in these tests; if it doesn't,
// this mock surfaces the failure immediately instead of timing out.
vi.mock('../infra/claude/client.js', async (importOriginal) => {
const original = await importOriginal<typeof import('../infra/claude/client.js')>();
vi.mock('../agents/ai-judge.js', async (importOriginal) => {
const original = await importOriginal<typeof import('../agents/ai-judge.js')>();
return {
...original,
callAiJudge: vi.fn().mockResolvedValue(-1),

View File

@ -13,12 +13,13 @@ import { mkdtempSync, mkdirSync, rmSync } from 'node:fs';
import { join } from 'node:path';
import { tmpdir } from 'node:os';
import { setMockScenario, resetScenario } from '../infra/mock/index.js';
import { callAiJudge, detectRuleIndex } from '../infra/claude/index.js';
import { detectRuleIndex } from '../infra/claude/index.js';
import { callAiJudge } from '../agents/ai-judge.js';
// --- Mocks ---
vi.mock('../infra/claude/client.js', async (importOriginal) => {
const original = await importOriginal<typeof import('../infra/claude/client.js')>();
vi.mock('../agents/ai-judge.js', async (importOriginal) => {
const original = await importOriginal<typeof import('../agents/ai-judge.js')>();
return {
...original,
callAiJudge: vi.fn().mockImplementation(async (content: string, conditions: { index: number; text: string }[]) => {

View File

@ -31,8 +31,8 @@ const {
mockPushBranch: vi.fn(),
}));
vi.mock('../infra/claude/client.js', async (importOriginal) => {
const original = await importOriginal<typeof import('../infra/claude/client.js')>();
vi.mock('../agents/ai-judge.js', async (importOriginal) => {
const original = await importOriginal<typeof import('../agents/ai-judge.js')>();
return {
...original,
callAiJudge: vi.fn().mockImplementation(async (content: string, conditions: { index: number; text: string }[]) => {

View File

@ -16,9 +16,9 @@ import { setMockScenario, resetScenario } from '../infra/mock/index.js';
// --- Mocks ---
// Safety net: prevent callAiJudge from calling real Claude CLI.
vi.mock('../infra/claude/client.js', async (importOriginal) => {
const original = await importOriginal<typeof import('../infra/claude/client.js')>();
// Safety net: prevent callAiJudge from calling real agent.
vi.mock('../agents/ai-judge.js', async (importOriginal) => {
const original = await importOriginal<typeof import('../agents/ai-judge.js')>();
return {
...original,
callAiJudge: vi.fn().mockImplementation(async (content: string, conditions: { index: number; text: string }[]) => {

View File

@ -71,11 +71,14 @@ vi.mock('../core/piece/index.js', () => ({
}));
vi.mock('../infra/claude/index.js', () => ({
callAiJudge: vi.fn(),
detectRuleIndex: vi.fn(),
interruptAllQueries: mockInterruptAllQueries,
}));
vi.mock('../agents/ai-judge.js', () => ({
callAiJudge: vi.fn(),
}));
vi.mock('../infra/config/index.js', () => ({
loadPersonaSessions: vi.fn().mockReturnValue({}),
updatePersonaSession: vi.fn(),

View File

@ -15,12 +15,13 @@ import { join } from 'node:path';
import { tmpdir } from 'node:os';
import { setMockScenario, resetScenario } from '../infra/mock/index.js';
import type { PieceConfig, PieceMovement, PieceRule } from '../core/models/index.js';
import { callAiJudge, detectRuleIndex } from '../infra/claude/index.js';
import { detectRuleIndex } from '../infra/claude/index.js';
import { callAiJudge } from '../agents/ai-judge.js';
// --- Mocks ---
vi.mock('../infra/claude/client.js', async (importOriginal) => {
const original = await importOriginal<typeof import('../infra/claude/client.js')>();
vi.mock('../agents/ai-judge.js', async (importOriginal) => {
const original = await importOriginal<typeof import('../agents/ai-judge.js')>();
return {
...original,
callAiJudge: vi.fn().mockResolvedValue(-1),

View File

@ -78,11 +78,14 @@ vi.mock('../core/piece/index.js', () => ({
}));
vi.mock('../infra/claude/index.js', () => ({
callAiJudge: vi.fn(),
detectRuleIndex: vi.fn(),
interruptAllQueries: vi.fn(),
}));
vi.mock('../agents/ai-judge.js', () => ({
callAiJudge: vi.fn(),
}));
vi.mock('../infra/config/index.js', () => ({
loadPersonaSessions: vi.fn().mockReturnValue({}),
updatePersonaSession: vi.fn(),

View File

@ -98,10 +98,13 @@ vi.mock('../infra/github/index.js', () => ({
vi.mock('../infra/claude/index.js', () => ({
interruptAllQueries: vi.fn(),
callAiJudge: vi.fn(),
detectRuleIndex: vi.fn(),
}));
vi.mock('../agents/ai-judge.js', () => ({
callAiJudge: vi.fn(),
}));
vi.mock('../shared/exitCodes.js', () => ({
EXIT_SIGINT: 130,
}));

67
src/agents/ai-judge.ts Normal file
View File

@ -0,0 +1,67 @@
/**
* AI judge - provider-aware rule condition evaluator
*
* Evaluates agent output against ai() conditions using the configured provider.
* Uses runAgent (which resolves provider from config) instead of hardcoded Claude.
*/
import type { AiJudgeCaller, AiJudgeCondition } from '../core/piece/types.js';
import { loadTemplate } from '../shared/prompts/index.js';
import { createLogger } from '../shared/utils/index.js';
import { runAgent } from './runner.js';
const log = createLogger('ai-judge');
/**
* Detect judge rule index from [JUDGE:N] tag pattern.
* Returns 0-based rule index, or -1 if no match.
*/
export function detectJudgeIndex(content: string): number {
const regex = /\[JUDGE:(\d+)\]/i;
const match = content.match(regex);
if (match?.[1]) {
const index = Number.parseInt(match[1], 10) - 1;
return index >= 0 ? index : -1;
}
return -1;
}
/**
* Build the prompt for the AI judge that evaluates agent output against ai() conditions.
*/
export function buildJudgePrompt(
agentOutput: string,
aiConditions: AiJudgeCondition[],
): string {
const conditionList = aiConditions
.map((c) => `| ${c.index + 1} | ${c.text} |`)
.join('\n');
return loadTemplate('perform_judge_message', 'en', { agentOutput, conditionList });
}
/**
* Call AI judge to evaluate agent output against ai() conditions.
* Uses the provider system (via runAgent) for correct provider resolution.
* Returns 0-based index of the matched ai() condition, or -1 if no match.
*/
export const callAiJudge: AiJudgeCaller = async (
agentOutput: string,
conditions: AiJudgeCondition[],
options: { cwd: string },
): Promise<number> => {
const prompt = buildJudgePrompt(agentOutput, conditions);
const response = await runAgent(undefined, prompt, {
cwd: options.cwd,
maxTurns: 1,
allowedTools: [],
});
if (response.status !== 'done') {
log.error('AI judge call failed', { error: response.error });
return -1;
}
return detectJudgeIndex(response.content);
};

View File

@ -3,4 +3,5 @@
*/
export { AgentRunner, runAgent } from './runner.js';
export { callAiJudge, detectJudgeIndex, buildJudgePrompt } from './ai-judge.js';
export type { RunAgentOptions, StreamCallback } from './types.js';

View File

@ -6,7 +6,8 @@ import { readFileSync } from 'node:fs';
import { PieceEngine, type IterationLimitRequest, type UserInputRequest } from '../../../core/piece/index.js';
import type { PieceConfig } from '../../../core/models/index.js';
import type { PieceExecutionResult, PieceExecutionOptions } from './types.js';
import { callAiJudge, detectRuleIndex, interruptAllQueries } from '../../../infra/claude/index.js';
import { detectRuleIndex, interruptAllQueries } from '../../../infra/claude/index.js';
import { callAiJudge } from '../../../agents/ai-judge.js';
export type { PieceExecutionResult, PieceExecutionOptions };

View File

@ -65,10 +65,7 @@ export {
callClaudeCustom,
callClaudeAgent,
callClaudeSkill,
callAiJudge,
detectRuleIndex,
detectJudgeIndex,
buildJudgePrompt,
isRegexSafe,
} from './infra/claude/index.js';
export type {

View File

@ -154,60 +154,6 @@ export class ClaudeClient {
};
}
/**
* Detect judge rule index from [JUDGE:N] tag pattern.
* Returns 0-based rule index, or -1 if no match.
*/
static detectJudgeIndex(content: string): number {
const regex = /\[JUDGE:(\d+)\]/i;
const match = content.match(regex);
if (match?.[1]) {
const index = Number.parseInt(match[1], 10) - 1;
return index >= 0 ? index : -1;
}
return -1;
}
/**
* Build the prompt for the AI judge that evaluates agent output against ai() conditions.
*/
static buildJudgePrompt(
agentOutput: string,
aiConditions: { index: number; text: string }[],
): string {
const conditionList = aiConditions
.map((c) => `| ${c.index + 1} | ${c.text} |`)
.join('\n');
return loadTemplate('perform_judge_message', 'en', { agentOutput, conditionList });
}
/**
* Call AI judge to evaluate agent output against ai() conditions.
* Uses a lightweight model (haiku) for cost efficiency.
* Returns 0-based index of the matched ai() condition, or -1 if no match.
*/
async callAiJudge(
agentOutput: string,
aiConditions: { index: number; text: string }[],
options: { cwd: string },
): Promise<number> {
const prompt = ClaudeClient.buildJudgePrompt(agentOutput, aiConditions);
const spawnOptions: ClaudeSpawnOptions = {
cwd: options.cwd,
model: 'haiku',
maxTurns: 1,
};
const result = await executeClaudeCli(prompt, spawnOptions);
if (!result.success) {
log.error('AI judge call failed', { error: result.error });
return -1;
}
return ClaudeClient.detectJudgeIndex(result.content);
}
}
// ---- Module-level functions ----
@ -247,21 +193,3 @@ export async function callClaudeSkill(
return defaultClient.callSkill(skillName, prompt, options);
}
export function detectJudgeIndex(content: string): number {
return ClaudeClient.detectJudgeIndex(content);
}
export function buildJudgePrompt(
agentOutput: string,
aiConditions: { index: number; text: string }[],
): string {
return ClaudeClient.buildJudgePrompt(agentOutput, aiConditions);
}
export async function callAiJudge(
agentOutput: string,
aiConditions: { index: number; text: string }[],
options: { cwd: string },
): Promise<number> {
return defaultClient.callAiJudge(agentOutput, aiConditions, options);
}

View File

@ -67,9 +67,6 @@ export {
callClaudeCustom,
callClaudeAgent,
callClaudeSkill,
callAiJudge,
detectRuleIndex,
detectJudgeIndex,
buildJudgePrompt,
isRegexSafe,
} from './client.js';