fix: callAiJudgeをプロバイダーシステム経由に変更(Codex対応)
callAiJudgeがinfra/claude/にハードコードされており、Codexプロバイダー使用時に judge評価が動作しなかった。agents/ai-judge.tsに移動し、runAgent経由で プロバイダーを正しく解決するように修正。
This commit is contained in:
parent
9c4408909d
commit
b25e9a78ab
@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "takt",
|
||||
"version": "0.11.0",
|
||||
"version": "0.11.1",
|
||||
"description": "TAKT: Task Agent Koordination Tool - AI Agent Piece Orchestration",
|
||||
"main": "dist/index.js",
|
||||
"types": "dist/index.d.ts",
|
||||
|
||||
@ -3,7 +3,7 @@
|
||||
*/
|
||||
|
||||
import { describe, it, expect } from 'vitest';
|
||||
import { detectJudgeIndex, buildJudgePrompt } from '../infra/claude/client.js';
|
||||
import { detectJudgeIndex, buildJudgePrompt } from '../agents/ai-judge.js';
|
||||
|
||||
describe('detectJudgeIndex', () => {
|
||||
it('should detect [JUDGE:1] as index 0', () => {
|
||||
|
||||
@ -14,12 +14,13 @@ import { join } from 'node:path';
|
||||
import { tmpdir } from 'node:os';
|
||||
import { setMockScenario, resetScenario } from '../infra/mock/index.js';
|
||||
import type { PieceConfig, PieceMovement, PieceRule } from '../core/models/index.js';
|
||||
import { callAiJudge, detectRuleIndex } from '../infra/claude/index.js';
|
||||
import { detectRuleIndex } from '../infra/claude/index.js';
|
||||
import { callAiJudge } from '../agents/ai-judge.js';
|
||||
|
||||
// --- Mocks ---
|
||||
|
||||
vi.mock('../infra/claude/client.js', async (importOriginal) => {
|
||||
const original = await importOriginal<typeof import('../infra/claude/client.js')>();
|
||||
vi.mock('../agents/ai-judge.js', async (importOriginal) => {
|
||||
const original = await importOriginal<typeof import('../agents/ai-judge.js')>();
|
||||
return {
|
||||
...original,
|
||||
callAiJudge: vi.fn().mockResolvedValue(-1),
|
||||
|
||||
@ -105,11 +105,14 @@ vi.mock('../core/piece/index.js', () => ({
|
||||
}));
|
||||
|
||||
vi.mock('../infra/claude/index.js', () => ({
|
||||
callAiJudge: vi.fn(),
|
||||
detectRuleIndex: vi.fn(),
|
||||
interruptAllQueries: mockInterruptAllQueries,
|
||||
}));
|
||||
|
||||
vi.mock('../agents/ai-judge.js', () => ({
|
||||
callAiJudge: vi.fn(),
|
||||
}));
|
||||
|
||||
vi.mock('../infra/config/index.js', () => ({
|
||||
loadPersonaSessions: vi.fn().mockReturnValue({}),
|
||||
updatePersonaSession: vi.fn(),
|
||||
|
||||
@ -15,15 +15,16 @@ import { join } from 'node:path';
|
||||
import { tmpdir } from 'node:os';
|
||||
import { setMockScenario, resetScenario } from '../infra/mock/index.js';
|
||||
import type { PieceConfig, PieceMovement, PieceRule } from '../core/models/index.js';
|
||||
import { callAiJudge, detectRuleIndex } from '../infra/claude/index.js';
|
||||
import { detectRuleIndex } from '../infra/claude/index.js';
|
||||
import { callAiJudge } from '../agents/ai-judge.js';
|
||||
|
||||
// --- Mocks (minimal — only infrastructure, not core logic) ---
|
||||
|
||||
// Safety net: prevent callAiJudge from calling real Claude CLI.
|
||||
// Safety net: prevent callAiJudge from calling real agent.
|
||||
// Tag-based detection should always match in these tests; if it doesn't,
|
||||
// this mock surfaces the failure immediately instead of timing out.
|
||||
vi.mock('../infra/claude/client.js', async (importOriginal) => {
|
||||
const original = await importOriginal<typeof import('../infra/claude/client.js')>();
|
||||
vi.mock('../agents/ai-judge.js', async (importOriginal) => {
|
||||
const original = await importOriginal<typeof import('../agents/ai-judge.js')>();
|
||||
return {
|
||||
...original,
|
||||
callAiJudge: vi.fn().mockResolvedValue(-1),
|
||||
|
||||
@ -13,12 +13,13 @@ import { mkdtempSync, mkdirSync, rmSync } from 'node:fs';
|
||||
import { join } from 'node:path';
|
||||
import { tmpdir } from 'node:os';
|
||||
import { setMockScenario, resetScenario } from '../infra/mock/index.js';
|
||||
import { callAiJudge, detectRuleIndex } from '../infra/claude/index.js';
|
||||
import { detectRuleIndex } from '../infra/claude/index.js';
|
||||
import { callAiJudge } from '../agents/ai-judge.js';
|
||||
|
||||
// --- Mocks ---
|
||||
|
||||
vi.mock('../infra/claude/client.js', async (importOriginal) => {
|
||||
const original = await importOriginal<typeof import('../infra/claude/client.js')>();
|
||||
vi.mock('../agents/ai-judge.js', async (importOriginal) => {
|
||||
const original = await importOriginal<typeof import('../agents/ai-judge.js')>();
|
||||
return {
|
||||
...original,
|
||||
callAiJudge: vi.fn().mockImplementation(async (content: string, conditions: { index: number; text: string }[]) => {
|
||||
|
||||
@ -31,8 +31,8 @@ const {
|
||||
mockPushBranch: vi.fn(),
|
||||
}));
|
||||
|
||||
vi.mock('../infra/claude/client.js', async (importOriginal) => {
|
||||
const original = await importOriginal<typeof import('../infra/claude/client.js')>();
|
||||
vi.mock('../agents/ai-judge.js', async (importOriginal) => {
|
||||
const original = await importOriginal<typeof import('../agents/ai-judge.js')>();
|
||||
return {
|
||||
...original,
|
||||
callAiJudge: vi.fn().mockImplementation(async (content: string, conditions: { index: number; text: string }[]) => {
|
||||
|
||||
@ -16,9 +16,9 @@ import { setMockScenario, resetScenario } from '../infra/mock/index.js';
|
||||
|
||||
// --- Mocks ---
|
||||
|
||||
// Safety net: prevent callAiJudge from calling real Claude CLI.
|
||||
vi.mock('../infra/claude/client.js', async (importOriginal) => {
|
||||
const original = await importOriginal<typeof import('../infra/claude/client.js')>();
|
||||
// Safety net: prevent callAiJudge from calling real agent.
|
||||
vi.mock('../agents/ai-judge.js', async (importOriginal) => {
|
||||
const original = await importOriginal<typeof import('../agents/ai-judge.js')>();
|
||||
return {
|
||||
...original,
|
||||
callAiJudge: vi.fn().mockImplementation(async (content: string, conditions: { index: number; text: string }[]) => {
|
||||
|
||||
@ -71,11 +71,14 @@ vi.mock('../core/piece/index.js', () => ({
|
||||
}));
|
||||
|
||||
vi.mock('../infra/claude/index.js', () => ({
|
||||
callAiJudge: vi.fn(),
|
||||
detectRuleIndex: vi.fn(),
|
||||
interruptAllQueries: mockInterruptAllQueries,
|
||||
}));
|
||||
|
||||
vi.mock('../agents/ai-judge.js', () => ({
|
||||
callAiJudge: vi.fn(),
|
||||
}));
|
||||
|
||||
vi.mock('../infra/config/index.js', () => ({
|
||||
loadPersonaSessions: vi.fn().mockReturnValue({}),
|
||||
updatePersonaSession: vi.fn(),
|
||||
|
||||
@ -15,12 +15,13 @@ import { join } from 'node:path';
|
||||
import { tmpdir } from 'node:os';
|
||||
import { setMockScenario, resetScenario } from '../infra/mock/index.js';
|
||||
import type { PieceConfig, PieceMovement, PieceRule } from '../core/models/index.js';
|
||||
import { callAiJudge, detectRuleIndex } from '../infra/claude/index.js';
|
||||
import { detectRuleIndex } from '../infra/claude/index.js';
|
||||
import { callAiJudge } from '../agents/ai-judge.js';
|
||||
|
||||
// --- Mocks ---
|
||||
|
||||
vi.mock('../infra/claude/client.js', async (importOriginal) => {
|
||||
const original = await importOriginal<typeof import('../infra/claude/client.js')>();
|
||||
vi.mock('../agents/ai-judge.js', async (importOriginal) => {
|
||||
const original = await importOriginal<typeof import('../agents/ai-judge.js')>();
|
||||
return {
|
||||
...original,
|
||||
callAiJudge: vi.fn().mockResolvedValue(-1),
|
||||
|
||||
@ -78,11 +78,14 @@ vi.mock('../core/piece/index.js', () => ({
|
||||
}));
|
||||
|
||||
vi.mock('../infra/claude/index.js', () => ({
|
||||
callAiJudge: vi.fn(),
|
||||
detectRuleIndex: vi.fn(),
|
||||
interruptAllQueries: vi.fn(),
|
||||
}));
|
||||
|
||||
vi.mock('../agents/ai-judge.js', () => ({
|
||||
callAiJudge: vi.fn(),
|
||||
}));
|
||||
|
||||
vi.mock('../infra/config/index.js', () => ({
|
||||
loadPersonaSessions: vi.fn().mockReturnValue({}),
|
||||
updatePersonaSession: vi.fn(),
|
||||
|
||||
@ -98,10 +98,13 @@ vi.mock('../infra/github/index.js', () => ({
|
||||
|
||||
vi.mock('../infra/claude/index.js', () => ({
|
||||
interruptAllQueries: vi.fn(),
|
||||
callAiJudge: vi.fn(),
|
||||
detectRuleIndex: vi.fn(),
|
||||
}));
|
||||
|
||||
vi.mock('../agents/ai-judge.js', () => ({
|
||||
callAiJudge: vi.fn(),
|
||||
}));
|
||||
|
||||
vi.mock('../shared/exitCodes.js', () => ({
|
||||
EXIT_SIGINT: 130,
|
||||
}));
|
||||
|
||||
67
src/agents/ai-judge.ts
Normal file
67
src/agents/ai-judge.ts
Normal file
@ -0,0 +1,67 @@
|
||||
/**
|
||||
* AI judge - provider-aware rule condition evaluator
|
||||
*
|
||||
* Evaluates agent output against ai() conditions using the configured provider.
|
||||
* Uses runAgent (which resolves provider from config) instead of hardcoded Claude.
|
||||
*/
|
||||
|
||||
import type { AiJudgeCaller, AiJudgeCondition } from '../core/piece/types.js';
|
||||
import { loadTemplate } from '../shared/prompts/index.js';
|
||||
import { createLogger } from '../shared/utils/index.js';
|
||||
import { runAgent } from './runner.js';
|
||||
|
||||
const log = createLogger('ai-judge');
|
||||
|
||||
/**
|
||||
* Detect judge rule index from [JUDGE:N] tag pattern.
|
||||
* Returns 0-based rule index, or -1 if no match.
|
||||
*/
|
||||
export function detectJudgeIndex(content: string): number {
|
||||
const regex = /\[JUDGE:(\d+)\]/i;
|
||||
const match = content.match(regex);
|
||||
if (match?.[1]) {
|
||||
const index = Number.parseInt(match[1], 10) - 1;
|
||||
return index >= 0 ? index : -1;
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
/**
|
||||
* Build the prompt for the AI judge that evaluates agent output against ai() conditions.
|
||||
*/
|
||||
export function buildJudgePrompt(
|
||||
agentOutput: string,
|
||||
aiConditions: AiJudgeCondition[],
|
||||
): string {
|
||||
const conditionList = aiConditions
|
||||
.map((c) => `| ${c.index + 1} | ${c.text} |`)
|
||||
.join('\n');
|
||||
|
||||
return loadTemplate('perform_judge_message', 'en', { agentOutput, conditionList });
|
||||
}
|
||||
|
||||
/**
|
||||
* Call AI judge to evaluate agent output against ai() conditions.
|
||||
* Uses the provider system (via runAgent) for correct provider resolution.
|
||||
* Returns 0-based index of the matched ai() condition, or -1 if no match.
|
||||
*/
|
||||
export const callAiJudge: AiJudgeCaller = async (
|
||||
agentOutput: string,
|
||||
conditions: AiJudgeCondition[],
|
||||
options: { cwd: string },
|
||||
): Promise<number> => {
|
||||
const prompt = buildJudgePrompt(agentOutput, conditions);
|
||||
|
||||
const response = await runAgent(undefined, prompt, {
|
||||
cwd: options.cwd,
|
||||
maxTurns: 1,
|
||||
allowedTools: [],
|
||||
});
|
||||
|
||||
if (response.status !== 'done') {
|
||||
log.error('AI judge call failed', { error: response.error });
|
||||
return -1;
|
||||
}
|
||||
|
||||
return detectJudgeIndex(response.content);
|
||||
};
|
||||
@ -3,4 +3,5 @@
|
||||
*/
|
||||
|
||||
export { AgentRunner, runAgent } from './runner.js';
|
||||
export { callAiJudge, detectJudgeIndex, buildJudgePrompt } from './ai-judge.js';
|
||||
export type { RunAgentOptions, StreamCallback } from './types.js';
|
||||
|
||||
@ -6,7 +6,8 @@ import { readFileSync } from 'node:fs';
|
||||
import { PieceEngine, type IterationLimitRequest, type UserInputRequest } from '../../../core/piece/index.js';
|
||||
import type { PieceConfig } from '../../../core/models/index.js';
|
||||
import type { PieceExecutionResult, PieceExecutionOptions } from './types.js';
|
||||
import { callAiJudge, detectRuleIndex, interruptAllQueries } from '../../../infra/claude/index.js';
|
||||
import { detectRuleIndex, interruptAllQueries } from '../../../infra/claude/index.js';
|
||||
import { callAiJudge } from '../../../agents/ai-judge.js';
|
||||
|
||||
export type { PieceExecutionResult, PieceExecutionOptions };
|
||||
|
||||
|
||||
@ -65,10 +65,7 @@ export {
|
||||
callClaudeCustom,
|
||||
callClaudeAgent,
|
||||
callClaudeSkill,
|
||||
callAiJudge,
|
||||
detectRuleIndex,
|
||||
detectJudgeIndex,
|
||||
buildJudgePrompt,
|
||||
isRegexSafe,
|
||||
} from './infra/claude/index.js';
|
||||
export type {
|
||||
|
||||
@ -154,60 +154,6 @@ export class ClaudeClient {
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Detect judge rule index from [JUDGE:N] tag pattern.
|
||||
* Returns 0-based rule index, or -1 if no match.
|
||||
*/
|
||||
static detectJudgeIndex(content: string): number {
|
||||
const regex = /\[JUDGE:(\d+)\]/i;
|
||||
const match = content.match(regex);
|
||||
if (match?.[1]) {
|
||||
const index = Number.parseInt(match[1], 10) - 1;
|
||||
return index >= 0 ? index : -1;
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
/**
|
||||
* Build the prompt for the AI judge that evaluates agent output against ai() conditions.
|
||||
*/
|
||||
static buildJudgePrompt(
|
||||
agentOutput: string,
|
||||
aiConditions: { index: number; text: string }[],
|
||||
): string {
|
||||
const conditionList = aiConditions
|
||||
.map((c) => `| ${c.index + 1} | ${c.text} |`)
|
||||
.join('\n');
|
||||
|
||||
return loadTemplate('perform_judge_message', 'en', { agentOutput, conditionList });
|
||||
}
|
||||
|
||||
/**
|
||||
* Call AI judge to evaluate agent output against ai() conditions.
|
||||
* Uses a lightweight model (haiku) for cost efficiency.
|
||||
* Returns 0-based index of the matched ai() condition, or -1 if no match.
|
||||
*/
|
||||
async callAiJudge(
|
||||
agentOutput: string,
|
||||
aiConditions: { index: number; text: string }[],
|
||||
options: { cwd: string },
|
||||
): Promise<number> {
|
||||
const prompt = ClaudeClient.buildJudgePrompt(agentOutput, aiConditions);
|
||||
|
||||
const spawnOptions: ClaudeSpawnOptions = {
|
||||
cwd: options.cwd,
|
||||
model: 'haiku',
|
||||
maxTurns: 1,
|
||||
};
|
||||
|
||||
const result = await executeClaudeCli(prompt, spawnOptions);
|
||||
if (!result.success) {
|
||||
log.error('AI judge call failed', { error: result.error });
|
||||
return -1;
|
||||
}
|
||||
|
||||
return ClaudeClient.detectJudgeIndex(result.content);
|
||||
}
|
||||
}
|
||||
|
||||
// ---- Module-level functions ----
|
||||
@ -247,21 +193,3 @@ export async function callClaudeSkill(
|
||||
return defaultClient.callSkill(skillName, prompt, options);
|
||||
}
|
||||
|
||||
export function detectJudgeIndex(content: string): number {
|
||||
return ClaudeClient.detectJudgeIndex(content);
|
||||
}
|
||||
|
||||
export function buildJudgePrompt(
|
||||
agentOutput: string,
|
||||
aiConditions: { index: number; text: string }[],
|
||||
): string {
|
||||
return ClaudeClient.buildJudgePrompt(agentOutput, aiConditions);
|
||||
}
|
||||
|
||||
export async function callAiJudge(
|
||||
agentOutput: string,
|
||||
aiConditions: { index: number; text: string }[],
|
||||
options: { cwd: string },
|
||||
): Promise<number> {
|
||||
return defaultClient.callAiJudge(agentOutput, aiConditions, options);
|
||||
}
|
||||
|
||||
@ -67,9 +67,6 @@ export {
|
||||
callClaudeCustom,
|
||||
callClaudeAgent,
|
||||
callClaudeSkill,
|
||||
callAiJudge,
|
||||
detectRuleIndex,
|
||||
detectJudgeIndex,
|
||||
buildJudgePrompt,
|
||||
isRegexSafe,
|
||||
} from './client.js';
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user