fix: callAiJudgeをプロバイダーシステム経由に変更（Codex対応）

callAiJudgeがinfra/claude/にハードコードされており、Codexプロバイダー使用時に judge評価が動作しなかった。agents/ai-judge.tsに移動し、runAgent経由でプロバイダーを正しく解決するように修正。
2026-02-10 16:31:03 +09:00 · 2026-02-10 16:31:03 +09:00 · b25e9a78ab
commit b25e9a78ab
parent 9c4408909d
18 changed files with 110 additions and 103 deletions
--- a/package.json
+++ b/package.json
@ -1,6 +1,6 @@
 {
  "name": "takt",
-  "version": "0.11.0",
+  "version": "0.11.1",
  "description": "TAKT: Task Agent Koordination Tool - AI Agent Piece Orchestration",
  "main": "dist/index.js",
  "types": "dist/index.d.ts",
--- a/src/tests/ai-judge.test.ts
+++ b/src/tests/ai-judge.test.ts
@ -3,7 +3,7 @@
 */

 import { describe, it, expect } from 'vitest';
-import { detectJudgeIndex, buildJudgePrompt } from '../infra/claude/client.js';
+import { detectJudgeIndex, buildJudgePrompt } from '../agents/ai-judge.js';

 describe('detectJudgeIndex', () => {
  it('should detect [JUDGE:1] as index 0', () => {
--- a/src/tests/it-error-recovery.test.ts
+++ b/src/tests/it-error-recovery.test.ts
@ -14,12 +14,13 @@ import { join } from 'node:path';
 import { tmpdir } from 'node:os';
 import { setMockScenario, resetScenario } from '../infra/mock/index.js';
 import type { PieceConfig, PieceMovement, PieceRule } from '../core/models/index.js';
-import { callAiJudge, detectRuleIndex } from '../infra/claude/index.js';
+import { detectRuleIndex } from '../infra/claude/index.js';
+import { callAiJudge } from '../agents/ai-judge.js';

 // --- Mocks ---

-vi.mock('../infra/claude/client.js', async (importOriginal) => {
-  const original = await importOriginal<typeof import('../infra/claude/client.js')>();
+vi.mock('../agents/ai-judge.js', async (importOriginal) => {
+  const original = await importOriginal<typeof import('../agents/ai-judge.js')>();
  return {
    ...original,
    callAiJudge: vi.fn().mockResolvedValue(-1),
--- a/src/tests/it-notification-sound.test.ts
+++ b/src/tests/it-notification-sound.test.ts
@ -105,11 +105,14 @@ vi.mock('../core/piece/index.js', () => ({
 }));

 vi.mock('../infra/claude/index.js', () => ({
-  callAiJudge: vi.fn(),
  detectRuleIndex: vi.fn(),
  interruptAllQueries: mockInterruptAllQueries,
 }));

+vi.mock('../agents/ai-judge.js', () => ({
+  callAiJudge: vi.fn(),
+}));
+
 vi.mock('../infra/config/index.js', () => ({
  loadPersonaSessions: vi.fn().mockReturnValue({}),
  updatePersonaSession: vi.fn(),
--- a/src/tests/it-piece-execution.test.ts
+++ b/src/tests/it-piece-execution.test.ts
@ -15,15 +15,16 @@ import { join } from 'node:path';
 import { tmpdir } from 'node:os';
 import { setMockScenario, resetScenario } from '../infra/mock/index.js';
 import type { PieceConfig, PieceMovement, PieceRule } from '../core/models/index.js';
-import { callAiJudge, detectRuleIndex } from '../infra/claude/index.js';
+import { detectRuleIndex } from '../infra/claude/index.js';
+import { callAiJudge } from '../agents/ai-judge.js';

 // --- Mocks (minimal — only infrastructure, not core logic) ---

-// Safety net: prevent callAiJudge from calling real Claude CLI.
+// Safety net: prevent callAiJudge from calling real agent.
 // Tag-based detection should always match in these tests; if it doesn't,
 // this mock surfaces the failure immediately instead of timing out.
-vi.mock('../infra/claude/client.js', async (importOriginal) => {
-  const original = await importOriginal<typeof import('../infra/claude/client.js')>();
+vi.mock('../agents/ai-judge.js', async (importOriginal) => {
+  const original = await importOriginal<typeof import('../agents/ai-judge.js')>();
  return {
    ...original,
    callAiJudge: vi.fn().mockResolvedValue(-1),
--- a/src/tests/it-piece-patterns.test.ts
+++ b/src/tests/it-piece-patterns.test.ts
@ -13,12 +13,13 @@ import { mkdtempSync, mkdirSync, rmSync } from 'node:fs';
 import { join } from 'node:path';
 import { tmpdir } from 'node:os';
 import { setMockScenario, resetScenario } from '../infra/mock/index.js';
-import { callAiJudge, detectRuleIndex } from '../infra/claude/index.js';
+import { detectRuleIndex } from '../infra/claude/index.js';
+import { callAiJudge } from '../agents/ai-judge.js';

 // --- Mocks ---

-vi.mock('../infra/claude/client.js', async (importOriginal) => {
-  const original = await importOriginal<typeof import('../infra/claude/client.js')>();
+vi.mock('../agents/ai-judge.js', async (importOriginal) => {
+  const original = await importOriginal<typeof import('../agents/ai-judge.js')>();
  return {
    ...original,
    callAiJudge: vi.fn().mockImplementation(async (content: string, conditions: { index: number; text: string }[]) => {
--- a/src/tests/it-pipeline-modes.test.ts
+++ b/src/tests/it-pipeline-modes.test.ts
@ -31,8 +31,8 @@ const {
  mockPushBranch: vi.fn(),
 }));

-vi.mock('../infra/claude/client.js', async (importOriginal) => {
-  const original = await importOriginal<typeof import('../infra/claude/client.js')>();
+vi.mock('../agents/ai-judge.js', async (importOriginal) => {
+  const original = await importOriginal<typeof import('../agents/ai-judge.js')>();
  return {
    ...original,
    callAiJudge: vi.fn().mockImplementation(async (content: string, conditions: { index: number; text: string }[]) => {
--- a/src/tests/it-pipeline.test.ts
+++ b/src/tests/it-pipeline.test.ts
@ -16,9 +16,9 @@ import { setMockScenario, resetScenario } from '../infra/mock/index.js';

 // --- Mocks ---

-// Safety net: prevent callAiJudge from calling real Claude CLI.
-vi.mock('../infra/claude/client.js', async (importOriginal) => {
-  const original = await importOriginal<typeof import('../infra/claude/client.js')>();
+// Safety net: prevent callAiJudge from calling real agent.
+vi.mock('../agents/ai-judge.js', async (importOriginal) => {
+  const original = await importOriginal<typeof import('../agents/ai-judge.js')>();
  return {
    ...original,
    callAiJudge: vi.fn().mockImplementation(async (content: string, conditions: { index: number; text: string }[]) => {
--- a/src/tests/it-sigint-interrupt.test.ts
+++ b/src/tests/it-sigint-interrupt.test.ts
@ -71,11 +71,14 @@ vi.mock('../core/piece/index.js', () => ({
 }));

 vi.mock('../infra/claude/index.js', () => ({
-  callAiJudge: vi.fn(),
  detectRuleIndex: vi.fn(),
  interruptAllQueries: mockInterruptAllQueries,
 }));

+vi.mock('../agents/ai-judge.js', () => ({
+  callAiJudge: vi.fn(),
+}));
+
 vi.mock('../infra/config/index.js', () => ({
  loadPersonaSessions: vi.fn().mockReturnValue({}),
  updatePersonaSession: vi.fn(),
--- a/src/tests/it-three-phase-execution.test.ts
+++ b/src/tests/it-three-phase-execution.test.ts
@ -15,12 +15,13 @@ import { join } from 'node:path';
 import { tmpdir } from 'node:os';
 import { setMockScenario, resetScenario } from '../infra/mock/index.js';
 import type { PieceConfig, PieceMovement, PieceRule } from '../core/models/index.js';
-import { callAiJudge, detectRuleIndex } from '../infra/claude/index.js';
+import { detectRuleIndex } from '../infra/claude/index.js';
+import { callAiJudge } from '../agents/ai-judge.js';

 // --- Mocks ---

-vi.mock('../infra/claude/client.js', async (importOriginal) => {
-  const original = await importOriginal<typeof import('../infra/claude/client.js')>();
+vi.mock('../agents/ai-judge.js', async (importOriginal) => {
+  const original = await importOriginal<typeof import('../agents/ai-judge.js')>();
  return {
    ...original,
    callAiJudge: vi.fn().mockResolvedValue(-1),
--- a/src/tests/pieceExecution-debug-prompts.test.ts
+++ b/src/tests/pieceExecution-debug-prompts.test.ts
@ -78,11 +78,14 @@ vi.mock('../core/piece/index.js', () => ({
 }));

 vi.mock('../infra/claude/index.js', () => ({
-  callAiJudge: vi.fn(),
  detectRuleIndex: vi.fn(),
  interruptAllQueries: vi.fn(),
 }));

+vi.mock('../agents/ai-judge.js', () => ({
+  callAiJudge: vi.fn(),
+}));
+
 vi.mock('../infra/config/index.js', () => ({
  loadPersonaSessions: vi.fn().mockReturnValue({}),
  updatePersonaSession: vi.fn(),
--- a/src/tests/runAllTasks-concurrency.test.ts
+++ b/src/tests/runAllTasks-concurrency.test.ts
@ -98,10 +98,13 @@ vi.mock('../infra/github/index.js', () => ({

 vi.mock('../infra/claude/index.js', () => ({
  interruptAllQueries: vi.fn(),
-  callAiJudge: vi.fn(),
  detectRuleIndex: vi.fn(),
 }));

+vi.mock('../agents/ai-judge.js', () => ({
+  callAiJudge: vi.fn(),
+}));
+
 vi.mock('../shared/exitCodes.js', () => ({
  EXIT_SIGINT: 130,
 }));
--- a/src/agents/ai-judge.ts
+++ b/src/agents/ai-judge.ts
@ -0,0 +1,67 @@
+/**
+ * AI judge - provider-aware rule condition evaluator
+ *
+ * Evaluates agent output against ai() conditions using the configured provider.
+ * Uses runAgent (which resolves provider from config) instead of hardcoded Claude.
+ */
+
+import type { AiJudgeCaller, AiJudgeCondition } from '../core/piece/types.js';
+import { loadTemplate } from '../shared/prompts/index.js';
+import { createLogger } from '../shared/utils/index.js';
+import { runAgent } from './runner.js';
+
+const log = createLogger('ai-judge');
+
+/**
+ * Detect judge rule index from [JUDGE:N] tag pattern.
+ * Returns 0-based rule index, or -1 if no match.
+ */
+export function detectJudgeIndex(content: string): number {
+  const regex = /\[JUDGE:(\d+)\]/i;
+  const match = content.match(regex);
+  if (match?.[1]) {
+    const index = Number.parseInt(match[1], 10) - 1;
+    return index >= 0 ? index : -1;
+  }
+  return -1;
+}
+
+/**
+ * Build the prompt for the AI judge that evaluates agent output against ai() conditions.
+ */
+export function buildJudgePrompt(
+  agentOutput: string,
+  aiConditions: AiJudgeCondition[],
+): string {
+  const conditionList = aiConditions
+    .map((c) => `| ${c.index + 1} | ${c.text} |`)
+    .join('\n');
+
+  return loadTemplate('perform_judge_message', 'en', { agentOutput, conditionList });
+}
+
+/**
+ * Call AI judge to evaluate agent output against ai() conditions.
+ * Uses the provider system (via runAgent) for correct provider resolution.
+ * Returns 0-based index of the matched ai() condition, or -1 if no match.
+ */
+export const callAiJudge: AiJudgeCaller = async (
+  agentOutput: string,
+  conditions: AiJudgeCondition[],
+  options: { cwd: string },
+): Promise<number> => {
+  const prompt = buildJudgePrompt(agentOutput, conditions);
+
+  const response = await runAgent(undefined, prompt, {
+    cwd: options.cwd,
+    maxTurns: 1,
+    allowedTools: [],
+  });
+
+  if (response.status !== 'done') {
+    log.error('AI judge call failed', { error: response.error });
+    return -1;
+  }
+
+  return detectJudgeIndex(response.content);
+};
--- a/src/agents/index.ts
+++ b/src/agents/index.ts
@ -3,4 +3,5 @@
 */

 export { AgentRunner, runAgent } from './runner.js';
+export { callAiJudge, detectJudgeIndex, buildJudgePrompt } from './ai-judge.js';
 export type { RunAgentOptions, StreamCallback } from './types.js';
--- a/src/features/tasks/execute/pieceExecution.ts
+++ b/src/features/tasks/execute/pieceExecution.ts
@ -6,7 +6,8 @@ import { readFileSync } from 'node:fs';
 import { PieceEngine, type IterationLimitRequest, type UserInputRequest } from '../../../core/piece/index.js';
 import type { PieceConfig } from '../../../core/models/index.js';
 import type { PieceExecutionResult, PieceExecutionOptions } from './types.js';
-import { callAiJudge, detectRuleIndex, interruptAllQueries } from '../../../infra/claude/index.js';
+import { detectRuleIndex, interruptAllQueries } from '../../../infra/claude/index.js';
+import { callAiJudge } from '../../../agents/ai-judge.js';

 export type { PieceExecutionResult, PieceExecutionOptions };

--- a/src/index.ts
+++ b/src/index.ts
@ -65,10 +65,7 @@ export {
  callClaudeCustom,
  callClaudeAgent,
  callClaudeSkill,
-  callAiJudge,
  detectRuleIndex,
-  detectJudgeIndex,
-  buildJudgePrompt,
  isRegexSafe,
 } from './infra/claude/index.js';
 export type {
--- a/src/infra/claude/client.ts
+++ b/src/infra/claude/client.ts
@ -154,60 +154,6 @@ export class ClaudeClient {
    };
  }

-  /**
-   * Detect judge rule index from [JUDGE:N] tag pattern.
-   * Returns 0-based rule index, or -1 if no match.
-   */
-  static detectJudgeIndex(content: string): number {
-    const regex = /\[JUDGE:(\d+)\]/i;
-    const match = content.match(regex);
-    if (match?.[1]) {
-      const index = Number.parseInt(match[1], 10) - 1;
-      return index >= 0 ? index : -1;
-    }
-    return -1;
-  }
-
-  /**
-   * Build the prompt for the AI judge that evaluates agent output against ai() conditions.
-   */
-  static buildJudgePrompt(
-    agentOutput: string,
-    aiConditions: { index: number; text: string }[],
-  ): string {
-    const conditionList = aiConditions
-      .map((c) => `| ${c.index + 1} | ${c.text} |`)
-      .join('\n');
-
-    return loadTemplate('perform_judge_message', 'en', { agentOutput, conditionList });
-  }
-
-  /**
-   * Call AI judge to evaluate agent output against ai() conditions.
-   * Uses a lightweight model (haiku) for cost efficiency.
-   * Returns 0-based index of the matched ai() condition, or -1 if no match.
-   */
-  async callAiJudge(
-    agentOutput: string,
-    aiConditions: { index: number; text: string }[],
-    options: { cwd: string },
-  ): Promise<number> {
-    const prompt = ClaudeClient.buildJudgePrompt(agentOutput, aiConditions);
-
-    const spawnOptions: ClaudeSpawnOptions = {
-      cwd: options.cwd,
-      model: 'haiku',
-      maxTurns: 1,
-    };
-
-    const result = await executeClaudeCli(prompt, spawnOptions);
-    if (!result.success) {
-      log.error('AI judge call failed', { error: result.error });
-      return -1;
-    }
-
-    return ClaudeClient.detectJudgeIndex(result.content);
-  }
 }

 // ---- Module-level functions ----
@ -247,21 +193,3 @@ export async function callClaudeSkill(
  return defaultClient.callSkill(skillName, prompt, options);
 }

-export function detectJudgeIndex(content: string): number {
-  return ClaudeClient.detectJudgeIndex(content);
-}
-
-export function buildJudgePrompt(
-  agentOutput: string,
-  aiConditions: { index: number; text: string }[],
-): string {
-  return ClaudeClient.buildJudgePrompt(agentOutput, aiConditions);
-}
-
-export async function callAiJudge(
-  agentOutput: string,
-  aiConditions: { index: number; text: string }[],
-  options: { cwd: string },
-): Promise<number> {
-  return defaultClient.callAiJudge(agentOutput, aiConditions, options);
-}
--- a/src/infra/claude/index.ts
+++ b/src/infra/claude/index.ts
@ -67,9 +67,6 @@ export {
  callClaudeCustom,
  callClaudeAgent,
  callClaudeSkill,
-  callAiJudge,
  detectRuleIndex,
-  detectJudgeIndex,
-  buildJudgePrompt,
  isRegexSafe,
 } from './client.js';