diff --git a/builtins/schemas/decomposition.json b/builtins/schemas/decomposition.json new file mode 100644 index 0000000..e9116c7 --- /dev/null +++ b/builtins/schemas/decomposition.json @@ -0,0 +1,33 @@ +{ + "type": "object", + "properties": { + "parts": { + "type": "array", + "items": { + "type": "object", + "properties": { + "id": { + "type": "string", + "description": "Unique part identifier" + }, + "title": { + "type": "string", + "description": "Human-readable part title" + }, + "instruction": { + "type": "string", + "description": "Instruction for the part agent" + }, + "timeout_ms": { + "type": ["integer", "null"], + "description": "Optional timeout in ms" + } + }, + "required": ["id", "title", "instruction", "timeout_ms"], + "additionalProperties": false + } + } + }, + "required": ["parts"], + "additionalProperties": false +} diff --git a/builtins/schemas/evaluation.json b/builtins/schemas/evaluation.json new file mode 100644 index 0000000..1526206 --- /dev/null +++ b/builtins/schemas/evaluation.json @@ -0,0 +1,15 @@ +{ + "type": "object", + "properties": { + "matched_index": { + "type": "integer", + "description": "Matched condition number (1-based)" + }, + "reason": { + "type": "string", + "description": "Why this condition was matched" + } + }, + "required": ["matched_index", "reason"], + "additionalProperties": false +} diff --git a/builtins/schemas/judgment.json b/builtins/schemas/judgment.json new file mode 100644 index 0000000..a8d6aed --- /dev/null +++ b/builtins/schemas/judgment.json @@ -0,0 +1,15 @@ +{ + "type": "object", + "properties": { + "step": { + "type": "integer", + "description": "Matched rule number (1-based)" + }, + "reason": { + "type": "string", + "description": "Brief justification for the decision" + } + }, + "required": ["step", "reason"], + "additionalProperties": false +} diff --git a/docs/implements/structured-output.ja.md b/docs/implements/structured-output.ja.md new file mode 100644 index 0000000..660fbe3 --- /dev/null +++ b/docs/implements/structured-output.ja.md @@ -0,0 +1,127 @@ +# Structured Output — Phase 3 ステータス判定 + +## 概要 + +Phase 3(ステータス判定)において、エージェントの出力を structured output(JSON スキーマ)で取得し、ルールマッチングの精度と信頼性を向上させる。 + +## プロバイダ別の挙動 + +| プロバイダ | メソッド | 仕組み | +|-----------|---------|--------| +| Claude | `structured_output` | SDK が `StructuredOutput` ツールを自動追加。エージェントがツール経由で `{ step, reason }` を返す | +| Codex | `structured_output` | `TurnOptions.outputSchema` で API レベルの JSON 制約。テキストが JSON になる | +| OpenCode | `structured_output` | プロンプト末尾に JSON スキーマ付き出力指示を注入。テキストレスポンスから `parseStructuredOutput()` で JSON を抽出 | + +## フォールバックチェーン + +`judgeStatus()` は3段階の独立した LLM 呼び出しでルールをマッチする。 + +``` +Stage 1: structured_output — outputSchema 付き LLM 呼び出し → structuredOutput.step(1-based integer) +Stage 2: phase3_tag — outputSchema なし LLM 呼び出し → content 内の [MOVEMENT:N] タグ検出 +Stage 3: ai_judge — evaluateCondition() による AI 条件評価 +``` + +各ステージは専用のインストラクションで LLM に問い合わせる。Stage 1 は「ルール番号を JSON で返せ」、Stage 2 は「タグを1行で出力せよ」と聞き方が異なる。 + +セッションログには `toJudgmentMatchMethod()` で変換された値が記録される。 + +| 内部メソッド | セッションログ | +|-------------|--------------| +| `structured_output` | `structured_output` | +| `phase3_tag` / `phase1_tag` | `tag_fallback` | +| `ai_judge` / `ai_judge_fallback` | `ai_judge` | + +## インストラクション分岐 + +Phase 3 テンプレート(`perform_phase3_message`)は `structuredOutput` フラグで2つのモードを持つ。 + +### Structured Output モード(`structuredOutput: true`) + +主要指示: ルール番号(1-based)と理由を返せ。 +フォールバック指示: structured output が使えない場合はタグを出力せよ。 + +### タグモード(`structuredOutput: false`) + +従来の指示: 対応するタグを1行で出力せよ。 + +現在、Phase 3 は常に `structuredOutput: true` で実行される。 + +## アーキテクチャ + +``` +StatusJudgmentBuilder + └─ structuredOutput: true + ├─ criteriaTable: ルール条件テーブル(常に含む) + ├─ outputList: タグ一覧(フォールバック用に含む) + └─ テンプレート: "ルール番号と理由を返せ + タグはフォールバック" + +runStatusJudgmentPhase() + └─ judgeStatus() → JudgeStatusResult { ruleIndex, method } + └─ StatusJudgmentPhaseResult { tag, ruleIndex, method } + +MovementExecutor + ├─ Phase 3 あり → judgeStatus の結果を直接使用(method 伝搬) + └─ Phase 3 なし → detectMatchedRule() で Phase 1 コンテンツから検出 +``` + +## JSON スキーマ + +### judgment.json(judgeStatus 用) + +```json +{ + "type": "object", + "properties": { + "step": { "type": "integer", "description": "Matched rule number (1-based)" }, + "reason": { "type": "string", "description": "Brief justification" } + }, + "required": ["step", "reason"], + "additionalProperties": false +} +``` + +### evaluation.json(evaluateCondition 用) + +```json +{ + "type": "object", + "properties": { + "matched_index": { "type": "integer" }, + "reason": { "type": "string" } + }, + "required": ["matched_index", "reason"], + "additionalProperties": false +} +``` + +## parseStructuredOutput() — JSON 抽出 + +Codex と OpenCode はテキストレスポンスから JSON を抽出する。3段階のフォールバック戦略を持つ。 + +``` +1. Direct parse — テキスト全体が `{` で始まる JSON オブジェクト +2. Code block — ```json ... ``` または ``` ... ``` 内の JSON +3. Brace extraction — テキスト内の最初の `{` から最後の `}` までを切り出し +``` + +## OpenCode 固有の仕組み + +OpenCode SDK は `outputFormat` を型定義でサポートしていない。代わりにプロンプト末尾に JSON 出力指示を注入する。 + +``` +--- +IMPORTANT: You MUST respond with ONLY a valid JSON object matching this schema. No other text, no markdown code blocks, no explanation. +```json +{ "type": "object", ... } +``` +``` + +エージェントが返すテキストを `parseStructuredOutput()` でパースし、`AgentResponse.structuredOutput` に格納する。 + +## 注意事項 + +- OpenAI API(Codex)は `required` に全プロパティを含めないとエラーになる(`additionalProperties: false` 時) +- Codex SDK の `TurnCompletedEvent` には `finalResponse` フィールドがない。structured output は `AgentMessageItem.text` の JSON テキストから `parseStructuredOutput()` でパースする +- Claude SDK は `StructuredOutput` ツール方式のため、インストラクションでタグ出力を強調しすぎるとエージェントがツールを呼ばずタグを出力してしまう +- OpenCode のプロンプト注入方式はモデルの指示従順性に依存する。JSON 以外のテキストが混在する場合は `parseStructuredOutput()` の code block / brace extraction で回収する diff --git a/docs/testing/e2e.md b/docs/testing/e2e.md index cc996b3..b536ab0 100644 --- a/docs/testing/e2e.md +++ b/docs/testing/e2e.md @@ -17,7 +17,8 @@ E2Eテストを追加・変更した場合は、このドキュメントも更 ## E2E用config.yaml - E2Eのグローバル設定は `e2e/fixtures/config.e2e.yaml` を基準に生成する。 - `createIsolatedEnv()` は毎回一時ディレクトリ配下(`$TAKT_CONFIG_DIR/config.yaml`)にこの基準設定を書き出す。 -- 通知音は `notification_sound_events` でタイミング別に制御し、E2E既定では道中(`iteration_limit` / `piece_complete` / `piece_abort`)をOFF、全体終了時(`run_complete` / `run_abort`)のみONにする。 +- E2E実行中の `takt` 内通知音は `notification_sound: false` で無効化する。 +- `npm run test:e2e` は成否にかかわらず最後に1回ベルを鳴らし、終了コードはテスト結果を維持する。 - 各スペックで `provider` や `concurrency` を変更する場合は、`updateIsolatedConfig()` を使って差分のみ上書きする。 - `~/.takt/config.yaml` はE2Eでは参照されないため、通常実行の設定には影響しない。 diff --git a/e2e/fixtures/config.e2e.yaml b/e2e/fixtures/config.e2e.yaml index 6eea1b8..fca15ce 100644 --- a/e2e/fixtures/config.e2e.yaml +++ b/e2e/fixtures/config.e2e.yaml @@ -2,10 +2,10 @@ provider: claude language: en log_level: info default_piece: default -notification_sound: true +notification_sound: false notification_sound_events: iteration_limit: false piece_complete: false piece_abort: false run_complete: true - run_abort: true + run_abort: false diff --git a/e2e/fixtures/pieces/structured-output.yaml b/e2e/fixtures/pieces/structured-output.yaml new file mode 100644 index 0000000..fcb7280 --- /dev/null +++ b/e2e/fixtures/pieces/structured-output.yaml @@ -0,0 +1,18 @@ +name: e2e-structured-output +description: E2E piece to verify structured output rule matching + +max_movements: 5 + +movements: + - name: execute + edit: false + persona: ../agents/test-coder.md + permission_mode: readonly + instruction_template: | + Reply with exactly: "Task completed successfully." + Do not do anything else. + rules: + - condition: Task completed + next: COMPLETE + - condition: Task failed + next: ABORT diff --git a/e2e/fixtures/scenarios/cycle-detect-abort.json b/e2e/fixtures/scenarios/cycle-detect-abort.json index 8eb40f9..c4d6b1a 100644 --- a/e2e/fixtures/scenarios/cycle-detect-abort.json +++ b/e2e/fixtures/scenarios/cycle-detect-abort.json @@ -1,12 +1,13 @@ [ {"persona": "agents/test-reviewer-a", "status": "done", "content": "[REVIEW:2]\n\nNeeds fix."}, {"persona": "conductor", "status": "done", "content": "[REVIEW:2]"}, + {"persona": "conductor", "status": "done", "content": "[REVIEW:2]"}, {"persona": "agents/test-coder", "status": "done", "content": "[FIX:1]\n\nFixed."}, - {"persona": "conductor", "status": "done", "content": "[FIX:1]"}, {"persona": "agents/test-reviewer-a", "status": "done", "content": "[REVIEW:2]\n\nStill needs fix."}, {"persona": "conductor", "status": "done", "content": "[REVIEW:2]"}, + {"persona": "conductor", "status": "done", "content": "[REVIEW:2]"}, {"persona": "agents/test-coder", "status": "done", "content": "[FIX:1]\n\nFixed again."}, - {"persona": "conductor", "status": "done", "content": "[FIX:1]"}, {"persona": "agents/test-reviewer-b", "status": "done", "content": "[_LOOP_JUDGE_REVIEW_FIX:2]\n\nAbort this loop."}, + {"persona": "conductor", "status": "done", "content": "[_LOOP_JUDGE_REVIEW_FIX:2]"}, {"persona": "conductor", "status": "done", "content": "[_LOOP_JUDGE_REVIEW_FIX:2]"} ] diff --git a/e2e/fixtures/scenarios/cycle-detect-pass.json b/e2e/fixtures/scenarios/cycle-detect-pass.json index e0ccf07..999ece9 100644 --- a/e2e/fixtures/scenarios/cycle-detect-pass.json +++ b/e2e/fixtures/scenarios/cycle-detect-pass.json @@ -1,8 +1,9 @@ [ {"persona": "agents/test-reviewer-a", "status": "done", "content": "[REVIEW:2]\n\nNeeds fix."}, {"persona": "conductor", "status": "done", "content": "[REVIEW:2]"}, + {"persona": "conductor", "status": "done", "content": "[REVIEW:2]"}, {"persona": "agents/test-coder", "status": "done", "content": "[FIX:1]\n\nFixed."}, - {"persona": "conductor", "status": "done", "content": "[FIX:1]"}, {"persona": "agents/test-reviewer-a", "status": "done", "content": "[REVIEW:1]\n\nApproved."}, + {"persona": "conductor", "status": "done", "content": "[REVIEW:1]"}, {"persona": "conductor", "status": "done", "content": "[REVIEW:1]"} ] diff --git a/e2e/fixtures/scenarios/multi-step-all-approved.json b/e2e/fixtures/scenarios/multi-step-all-approved.json index 5392a8b..bb38ddc 100644 --- a/e2e/fixtures/scenarios/multi-step-all-approved.json +++ b/e2e/fixtures/scenarios/multi-step-all-approved.json @@ -1,7 +1,9 @@ [ - { "persona": "test-coder", "status": "done", "content": "Plan created." }, - { "persona": "test-reviewer-a", "status": "done", "content": "Architecture approved." }, - { "persona": "test-reviewer-b", "status": "done", "content": "Security approved." }, + { "persona": "agents/test-coder", "status": "done", "content": "Plan created." }, + { "persona": "agents/test-reviewer-a", "status": "done", "content": "Architecture approved." }, + { "persona": "agents/test-reviewer-b", "status": "done", "content": "Security approved." }, + { "persona": "conductor", "status": "done", "content": "[ARCH-REVIEW:1] [SECURITY-REVIEW:1]" }, + { "persona": "conductor", "status": "done", "content": "[ARCH-REVIEW:1] [SECURITY-REVIEW:1]" }, { "persona": "conductor", "status": "done", "content": "[ARCH-REVIEW:1] [SECURITY-REVIEW:1]" }, { "persona": "conductor", "status": "done", "content": "[ARCH-REVIEW:1] [SECURITY-REVIEW:1]" } ] diff --git a/e2e/fixtures/scenarios/multi-step-needs-fix.json b/e2e/fixtures/scenarios/multi-step-needs-fix.json index 52b595d..fda74e3 100644 --- a/e2e/fixtures/scenarios/multi-step-needs-fix.json +++ b/e2e/fixtures/scenarios/multi-step-needs-fix.json @@ -1,15 +1,19 @@ [ - { "persona": "test-coder", "status": "done", "content": "Plan created." }, + { "persona": "agents/test-coder", "status": "done", "content": "Plan created." }, - { "persona": "test-reviewer-a", "status": "done", "content": "Architecture looks good." }, - { "persona": "test-reviewer-b", "status": "done", "content": "Security issues found." }, + { "persona": "agents/test-reviewer-a", "status": "done", "content": "Architecture looks good." }, + { "persona": "agents/test-reviewer-b", "status": "done", "content": "Security issues found." }, + { "persona": "conductor", "status": "done", "content": "[ARCH-REVIEW:1] [SECURITY-REVIEW:2]" }, + { "persona": "conductor", "status": "done", "content": "[ARCH-REVIEW:1] [SECURITY-REVIEW:2]" }, { "persona": "conductor", "status": "done", "content": "[ARCH-REVIEW:1] [SECURITY-REVIEW:2]" }, { "persona": "conductor", "status": "done", "content": "[ARCH-REVIEW:1] [SECURITY-REVIEW:2]" }, - { "persona": "test-coder", "status": "done", "content": "Fix applied." }, + { "persona": "agents/test-coder", "status": "done", "content": "Fix applied." }, - { "persona": "test-reviewer-a", "status": "done", "content": "Architecture still approved." }, - { "persona": "test-reviewer-b", "status": "done", "content": "Security now approved." }, + { "persona": "agents/test-reviewer-a", "status": "done", "content": "Architecture still approved." }, + { "persona": "agents/test-reviewer-b", "status": "done", "content": "Security now approved." }, + { "persona": "conductor", "status": "done", "content": "[ARCH-REVIEW:1] [SECURITY-REVIEW:1]" }, + { "persona": "conductor", "status": "done", "content": "[ARCH-REVIEW:1] [SECURITY-REVIEW:1]" }, { "persona": "conductor", "status": "done", "content": "[ARCH-REVIEW:1] [SECURITY-REVIEW:1]" }, { "persona": "conductor", "status": "done", "content": "[ARCH-REVIEW:1] [SECURITY-REVIEW:1]" } ] diff --git a/e2e/fixtures/scenarios/report-judge.json b/e2e/fixtures/scenarios/report-judge.json index aacb7d4..57d7c66 100644 --- a/e2e/fixtures/scenarios/report-judge.json +++ b/e2e/fixtures/scenarios/report-judge.json @@ -9,6 +9,11 @@ "status": "done", "content": "Report summary: OK" }, + { + "persona": "conductor", + "status": "done", + "content": "[EXECUTE:1]" + }, { "persona": "conductor", "status": "done", diff --git a/e2e/helpers/test-repo.ts b/e2e/helpers/test-repo.ts index 04c1c90..35cd4f1 100644 --- a/e2e/helpers/test-repo.ts +++ b/e2e/helpers/test-repo.ts @@ -54,6 +54,66 @@ function getGitHubUser(): string { return user; } +function canUseGitHubRepo(): boolean { + try { + const user = getGitHubUser(); + const repoName = `${user}/takt-testing`; + execFileSync('gh', ['repo', 'view', repoName], { + encoding: 'utf-8', + stdio: 'pipe', + }); + return true; + } catch { + return false; + } +} + +export function isGitHubE2EAvailable(): boolean { + return canUseGitHubRepo(); +} + +function createOfflineTestRepo(options?: CreateTestRepoOptions): TestRepo { + const sandboxPath = mkdtempSync(join(tmpdir(), 'takt-e2e-repo-')); + const originPath = join(sandboxPath, 'origin.git'); + const repoPath = join(sandboxPath, 'work'); + + execFileSync('git', ['init', '--bare', originPath], { stdio: 'pipe' }); + execFileSync('git', ['clone', originPath, repoPath], { stdio: 'pipe' }); + execFileSync('git', ['config', 'user.email', 'test@example.com'], { cwd: repoPath, stdio: 'pipe' }); + execFileSync('git', ['config', 'user.name', 'Test'], { cwd: repoPath, stdio: 'pipe' }); + writeFileSync(join(repoPath, 'README.md'), '# test\n'); + execFileSync('git', ['add', '.'], { cwd: repoPath, stdio: 'pipe' }); + execFileSync('git', ['commit', '-m', 'init'], { cwd: repoPath, stdio: 'pipe' }); + execFileSync('git', ['push', '-u', 'origin', 'HEAD'], { cwd: repoPath, stdio: 'pipe' }); + + const testBranch = options?.skipBranch ? undefined : `e2e-test-${Date.now()}`; + if (testBranch) { + execFileSync('git', ['checkout', '-b', testBranch], { + cwd: repoPath, + stdio: 'pipe', + }); + } + + const currentBranch = testBranch + ?? execFileSync('git', ['branch', '--show-current'], { + cwd: repoPath, + encoding: 'utf-8', + }).trim(); + + return { + path: repoPath, + repoName: 'local/takt-testing', + branch: currentBranch, + cleanup: () => { + try { + rmSync(sandboxPath, { recursive: true, force: true }); + } catch { + // Best-effort cleanup + } + }, + }; +} + /** * Clone the takt-testing repository and create a test branch. * @@ -63,6 +123,10 @@ function getGitHubUser(): string { * 3. Delete local directory */ export function createTestRepo(options?: CreateTestRepoOptions): TestRepo { + if (!canUseGitHubRepo()) { + return createOfflineTestRepo(options); + } + const user = getGitHubUser(); const repoName = `${user}/takt-testing`; diff --git a/e2e/specs/add.e2e.ts b/e2e/specs/add.e2e.ts index bc7979c..f16cdce 100644 --- a/e2e/specs/add.e2e.ts +++ b/e2e/specs/add.e2e.ts @@ -9,11 +9,12 @@ import { updateIsolatedConfig, type IsolatedEnv, } from '../helpers/isolated-env'; -import { createTestRepo, type TestRepo } from '../helpers/test-repo'; +import { createTestRepo, isGitHubE2EAvailable, type TestRepo } from '../helpers/test-repo'; import { runTakt } from '../helpers/takt-runner'; const __filename = fileURLToPath(import.meta.url); const __dirname = dirname(__filename); +const requiresGitHub = isGitHubE2EAvailable(); // E2E更新時は docs/testing/e2e.md も更新すること describe('E2E: Add task from GitHub issue (takt add)', () => { @@ -67,7 +68,7 @@ describe('E2E: Add task from GitHub issue (takt add)', () => { } }); - it('should create a task file from issue reference', () => { + it.skipIf(!requiresGitHub)('should create a task file from issue reference', () => { const scenarioPath = resolve(__dirname, '../fixtures/scenarios/add-task.json'); const result = runTakt({ diff --git a/e2e/specs/structured-output.e2e.ts b/e2e/specs/structured-output.e2e.ts new file mode 100644 index 0000000..1742e63 --- /dev/null +++ b/e2e/specs/structured-output.e2e.ts @@ -0,0 +1,96 @@ +import { describe, it, expect, beforeEach, afterEach } from 'vitest'; +import { resolve, dirname } from 'node:path'; +import { fileURLToPath } from 'node:url'; +import { createIsolatedEnv, type IsolatedEnv } from '../helpers/isolated-env'; +import { createLocalRepo, type LocalRepo } from '../helpers/test-repo'; +import { runTakt } from '../helpers/takt-runner'; +import { readSessionRecords } from '../helpers/session-log'; + +const __filename = fileURLToPath(import.meta.url); +const __dirname = dirname(__filename); + +/** + * E2E: Structured output for status judgment (Phase 3). + * + * Verifies that real providers (Claude, Codex, OpenCode) can execute a piece + * where the status judgment phase uses structured output (`outputSchema`) + * internally via `judgeStatus()`. + * + * The piece has 2 rules per step, so `judgeStatus` cannot auto-select + * and must actually call the provider with an outputSchema to determine + * which rule matched. + * + * If structured output works correctly, `judgeStatus` extracts the step + * number from `response.structuredOutput.step` (recorded as `structured_output`). + * If the agent happens to output `[STEP:N]` tags, the RuleEvaluator detects + * them as `phase3_tag`/`phase1_tag` (recorded as `tag_fallback` in session log). + * The session log matchMethod is transformed by `toJudgmentMatchMethod()`. + * + * Run with: + * TAKT_E2E_PROVIDER=claude vitest run --config vitest.config.e2e.structured-output.ts + * TAKT_E2E_PROVIDER=codex vitest run --config vitest.config.e2e.structured-output.ts + * TAKT_E2E_PROVIDER=opencode TAKT_E2E_MODEL=openai/gpt-4 vitest run --config vitest.config.e2e.structured-output.ts + */ +describe('E2E: Structured output rule matching', () => { + let isolatedEnv: IsolatedEnv; + let repo: LocalRepo; + + beforeEach(() => { + isolatedEnv = createIsolatedEnv(); + repo = createLocalRepo(); + }); + + afterEach(() => { + try { repo.cleanup(); } catch { /* best-effort */ } + try { isolatedEnv.cleanup(); } catch { /* best-effort */ } + }); + + it('should complete piece via Phase 3 status judgment with 2-rule step', () => { + const piecePath = resolve(__dirname, '../fixtures/pieces/structured-output.yaml'); + + const result = runTakt({ + args: [ + '--task', 'Say hello', + '--piece', piecePath, + '--create-worktree', 'no', + ], + cwd: repo.path, + env: isolatedEnv.env, + timeout: 240_000, + }); + + if (result.exitCode !== 0) { + console.log('=== STDOUT ===\n', result.stdout); + console.log('=== STDERR ===\n', result.stderr); + } + + // Always log the matchMethod for diagnostic purposes + const allRecords = readSessionRecords(repo.path); + const sc = allRecords.find((r) => r.type === 'step_complete'); + console.log(`=== matchMethod: ${sc?.matchMethod ?? '(none)'} ===`); + + expect(result.exitCode).toBe(0); + expect(result.stdout).toContain('Piece completed'); + + // Verify session log has proper step_complete with matchMethod + const records = readSessionRecords(repo.path); + + const pieceComplete = records.find((r) => r.type === 'piece_complete'); + expect(pieceComplete).toBeDefined(); + + const stepComplete = records.find((r) => r.type === 'step_complete'); + expect(stepComplete).toBeDefined(); + + // matchMethod should be present — the 2-rule step required actual judgment + // (auto_select is only used for single-rule steps) + const matchMethod = stepComplete?.matchMethod as string | undefined; + expect(matchMethod).toBeDefined(); + + // Session log records transformed matchMethod via toJudgmentMatchMethod(): + // structured_output → structured_output (judgeStatus extracted from structuredOutput.step) + // phase3_tag / phase1_tag → tag_fallback (agent output [STEP:N] tag, detected by RuleEvaluator) + // ai_judge / ai_judge_fallback → ai_judge (AI evaluated conditions as fallback) + const validMethods = ['structured_output', 'tag_fallback', 'ai_judge']; + expect(validMethods).toContain(matchMethod); + }, 240_000); +}); diff --git a/package.json b/package.json index 92b5859..945badd 100644 --- a/package.json +++ b/package.json @@ -14,8 +14,8 @@ "watch": "tsc --watch", "test": "vitest run", "test:watch": "vitest", - "test:e2e": "npm run test:e2e:all", - "test:e2e:mock": "vitest run --config vitest.config.e2e.mock.ts --reporter=verbose", + "test:e2e": "npm run test:e2e:mock; code=$?; if [ \"$code\" -eq 0 ]; then msg='test:e2e passed'; else msg=\"test:e2e failed (exit=$code)\"; fi; if command -v osascript >/dev/null 2>&1; then osascript -e \"display notification \\\"$msg\\\" with title \\\"takt\\\" subtitle \\\"E2E\\\"\" >/dev/null 2>&1 || true; fi; echo \"[takt] $msg\"; exit $code", + "test:e2e:mock": "TAKT_E2E_PROVIDER=mock vitest run --config vitest.config.e2e.mock.ts --reporter=verbose", "test:e2e:all": "npm run test:e2e:mock && npm run test:e2e:provider", "test:e2e:provider": "npm run test:e2e:provider:claude && npm run test:e2e:provider:codex", "test:e2e:provider:claude": "TAKT_E2E_PROVIDER=claude vitest run --config vitest.config.e2e.provider.ts --reporter=verbose", diff --git a/src/__tests__/agent-usecases.test.ts b/src/__tests__/agent-usecases.test.ts new file mode 100644 index 0000000..91d0f40 --- /dev/null +++ b/src/__tests__/agent-usecases.test.ts @@ -0,0 +1,232 @@ +import { beforeEach, describe, expect, it, vi } from 'vitest'; +import { runAgent } from '../agents/runner.js'; +import { parseParts } from '../core/piece/engine/task-decomposer.js'; +import { detectJudgeIndex } from '../agents/judge-utils.js'; +import { + executeAgent, + generateReport, + executePart, + evaluateCondition, + judgeStatus, + decomposeTask, +} from '../core/piece/agent-usecases.js'; + +vi.mock('../agents/runner.js', () => ({ + runAgent: vi.fn(), +})); + +vi.mock('../core/piece/schema-loader.js', () => ({ + loadJudgmentSchema: vi.fn(() => ({ type: 'judgment' })), + loadEvaluationSchema: vi.fn(() => ({ type: 'evaluation' })), + loadDecompositionSchema: vi.fn((maxParts: number) => ({ type: 'decomposition', maxParts })), +})); + +vi.mock('../core/piece/engine/task-decomposer.js', () => ({ + parseParts: vi.fn(), +})); + +vi.mock('../agents/judge-utils.js', () => ({ + buildJudgePrompt: vi.fn(() => 'judge prompt'), + detectJudgeIndex: vi.fn(() => -1), +})); + +function doneResponse(content: string, structuredOutput?: Record) { + return { + persona: 'tester', + status: 'done' as const, + content, + timestamp: new Date('2026-02-12T00:00:00Z'), + structuredOutput, + }; +} + +const judgeOptions = { cwd: '/repo', movementName: 'review' }; + +describe('agent-usecases', () => { + beforeEach(() => { + vi.clearAllMocks(); + }); + + it('executeAgent/generateReport/executePart は runAgent に委譲する', async () => { + vi.mocked(runAgent).mockResolvedValue(doneResponse('ok')); + + await executeAgent('coder', 'do work', { cwd: '/tmp' }); + await generateReport('coder', 'write report', { cwd: '/tmp' }); + await executePart('coder', 'part work', { cwd: '/tmp' }); + + expect(runAgent).toHaveBeenCalledTimes(3); + expect(runAgent).toHaveBeenNthCalledWith(1, 'coder', 'do work', { cwd: '/tmp' }); + expect(runAgent).toHaveBeenNthCalledWith(2, 'coder', 'write report', { cwd: '/tmp' }); + expect(runAgent).toHaveBeenNthCalledWith(3, 'coder', 'part work', { cwd: '/tmp' }); + }); + + it('evaluateCondition は構造化出力の matched_index を優先する', async () => { + vi.mocked(runAgent).mockResolvedValue(doneResponse('ignored', { matched_index: 2 })); + + const result = await evaluateCondition('agent output', [ + { index: 0, text: 'first' }, + { index: 1, text: 'second' }, + ], { cwd: '/repo' }); + + expect(result).toBe(1); + expect(runAgent).toHaveBeenCalledWith(undefined, 'judge prompt', expect.objectContaining({ + cwd: '/repo', + outputSchema: { type: 'evaluation' }, + })); + }); + + it('evaluateCondition は構造化出力が使えない場合にタグ検出へフォールバックする', async () => { + vi.mocked(runAgent).mockResolvedValue(doneResponse('[JUDGE:2]')); + vi.mocked(detectJudgeIndex).mockReturnValue(1); + + const result = await evaluateCondition('agent output', [ + { index: 0, text: 'first' }, + { index: 1, text: 'second' }, + ], { cwd: '/repo' }); + + expect(result).toBe(1); + expect(detectJudgeIndex).toHaveBeenCalledWith('[JUDGE:2]'); + }); + + it('evaluateCondition は runAgent が done 以外なら -1 を返す', async () => { + vi.mocked(runAgent).mockResolvedValue({ + persona: 'tester', + status: 'error', + content: 'failed', + timestamp: new Date('2026-02-12T00:00:00Z'), + }); + + const result = await evaluateCondition('agent output', [ + { index: 0, text: 'first' }, + ], { cwd: '/repo' }); + + expect(result).toBe(-1); + expect(detectJudgeIndex).not.toHaveBeenCalled(); + }); + + // --- judgeStatus: 3-stage fallback --- + + it('judgeStatus は単一ルール時に auto_select を返す', async () => { + const result = await judgeStatus('structured', 'tag', [{ condition: 'always', next: 'done' }], judgeOptions); + + expect(result).toEqual({ ruleIndex: 0, method: 'auto_select' }); + expect(runAgent).not.toHaveBeenCalled(); + }); + + it('judgeStatus はルールが空ならエラー', async () => { + await expect(judgeStatus('structured', 'tag', [], judgeOptions)) + .rejects.toThrow('judgeStatus requires at least one rule'); + }); + + it('judgeStatus は Stage 1 で構造化出力 step を採用する', async () => { + vi.mocked(runAgent).mockResolvedValueOnce(doneResponse('x', { step: 2 })); + + const result = await judgeStatus('structured', 'tag', [ + { condition: 'a', next: 'one' }, + { condition: 'b', next: 'two' }, + ], judgeOptions); + + expect(result).toEqual({ ruleIndex: 1, method: 'structured_output' }); + expect(runAgent).toHaveBeenCalledTimes(1); + expect(runAgent).toHaveBeenCalledWith('conductor', 'structured', expect.objectContaining({ + outputSchema: { type: 'judgment' }, + })); + }); + + it('judgeStatus は Stage 2 でタグ検出を使う', async () => { + // Stage 1: structured output fails (no structuredOutput) + vi.mocked(runAgent).mockResolvedValueOnce(doneResponse('no match')); + // Stage 2: tag detection succeeds + vi.mocked(runAgent).mockResolvedValueOnce(doneResponse('[REVIEW:2]')); + + const result = await judgeStatus('structured', 'tag', [ + { condition: 'a', next: 'one' }, + { condition: 'b', next: 'two' }, + ], judgeOptions); + + expect(result).toEqual({ ruleIndex: 1, method: 'phase3_tag' }); + expect(runAgent).toHaveBeenCalledTimes(2); + expect(runAgent).toHaveBeenNthCalledWith(1, 'conductor', 'structured', expect.objectContaining({ + outputSchema: { type: 'judgment' }, + })); + expect(runAgent).toHaveBeenNthCalledWith(2, 'conductor', 'tag', expect.not.objectContaining({ + outputSchema: expect.anything(), + })); + }); + + it('judgeStatus は Stage 3 で AI Judge を使う', async () => { + // Stage 1: structured output fails + vi.mocked(runAgent).mockResolvedValueOnce(doneResponse('no match')); + // Stage 2: tag detection fails + vi.mocked(runAgent).mockResolvedValueOnce(doneResponse('no tag')); + // Stage 3: evaluateCondition succeeds + vi.mocked(runAgent).mockResolvedValueOnce(doneResponse('ignored', { matched_index: 2 })); + + const result = await judgeStatus('structured', 'tag', [ + { condition: 'a', next: 'one' }, + { condition: 'b', next: 'two' }, + ], judgeOptions); + + expect(result).toEqual({ ruleIndex: 1, method: 'ai_judge' }); + expect(runAgent).toHaveBeenCalledTimes(3); + }); + + it('judgeStatus は全ての判定に失敗したらエラー', async () => { + // Stage 1: structured output fails + vi.mocked(runAgent).mockResolvedValueOnce(doneResponse('no match')); + // Stage 2: tag detection fails + vi.mocked(runAgent).mockResolvedValueOnce(doneResponse('no tag')); + // Stage 3: evaluateCondition fails + vi.mocked(runAgent).mockResolvedValueOnce(doneResponse('still no match')); + vi.mocked(detectJudgeIndex).mockReturnValue(-1); + + await expect(judgeStatus('structured', 'tag', [ + { condition: 'a', next: 'one' }, + { condition: 'b', next: 'two' }, + ], judgeOptions)).rejects.toThrow('Status not found for movement "review"'); + }); + + // --- decomposeTask --- + + it('decomposeTask は構造化出力 parts を返す', async () => { + vi.mocked(runAgent).mockResolvedValue(doneResponse('x', { + parts: [ + { id: 'p1', title: 'Part 1', instruction: 'Do 1', timeout_ms: 1000 }, + ], + })); + + const result = await decomposeTask('instruction', 3, { cwd: '/repo', persona: 'team-leader' }); + + expect(result).toEqual([ + { id: 'p1', title: 'Part 1', instruction: 'Do 1', timeoutMs: 1000 }, + ]); + expect(parseParts).not.toHaveBeenCalled(); + }); + + it('decomposeTask は構造化出力がない場合 parseParts にフォールバックする', async () => { + vi.mocked(runAgent).mockResolvedValue(doneResponse('```json [] ```')); + vi.mocked(parseParts).mockReturnValue([ + { id: 'p1', title: 'Part 1', instruction: 'fallback', timeoutMs: undefined }, + ]); + + const result = await decomposeTask('instruction', 2, { cwd: '/repo' }); + + expect(parseParts).toHaveBeenCalledWith('```json [] ```', 2); + expect(result).toEqual([ + { id: 'p1', title: 'Part 1', instruction: 'fallback', timeoutMs: undefined }, + ]); + }); + + it('decomposeTask は done 以外をエラーにする', async () => { + vi.mocked(runAgent).mockResolvedValue({ + persona: 'team-leader', + status: 'error', + content: 'failure', + error: 'bad output', + timestamp: new Date('2026-02-12T00:00:00Z'), + }); + + await expect(decomposeTask('instruction', 2, { cwd: '/repo' })) + .rejects.toThrow('Team leader failed: bad output'); + }); +}); diff --git a/src/__tests__/claude-executor-structured-output.test.ts b/src/__tests__/claude-executor-structured-output.test.ts new file mode 100644 index 0000000..4bbe16e --- /dev/null +++ b/src/__tests__/claude-executor-structured-output.test.ts @@ -0,0 +1,164 @@ +/** + * Claude SDK layer structured output tests. + * + * Tests two internal components: + * 1. SdkOptionsBuilder — outputSchema → outputFormat conversion + * 2. QueryExecutor — structured_output extraction from SDK result messages + */ + +import { beforeEach, describe, expect, it, vi } from 'vitest'; + +// ===== SdkOptionsBuilder tests (no mock needed) ===== + +import { buildSdkOptions } from '../infra/claude/options-builder.js'; + +describe('SdkOptionsBuilder — outputFormat 変換', () => { + it('outputSchema が outputFormat に変換される', () => { + const schema = { type: 'object', properties: { step: { type: 'integer' } } }; + const sdkOptions = buildSdkOptions({ cwd: '/tmp', outputSchema: schema }); + + expect((sdkOptions as Record).outputFormat).toEqual({ + type: 'json_schema', + schema, + }); + }); + + it('outputSchema 未設定なら outputFormat は含まれない', () => { + const sdkOptions = buildSdkOptions({ cwd: '/tmp' }); + expect(sdkOptions).not.toHaveProperty('outputFormat'); + }); +}); + +// ===== QueryExecutor tests (mock @anthropic-ai/claude-agent-sdk) ===== + +const { mockQuery } = vi.hoisted(() => ({ + mockQuery: vi.fn(), +})); + +vi.mock('@anthropic-ai/claude-agent-sdk', () => ({ + query: mockQuery, + AbortError: class AbortError extends Error { + constructor(message?: string) { + super(message); + this.name = 'AbortError'; + } + }, +})); + +// QueryExecutor は executor.ts 内で query() を使うため、mock 後にインポート +const { QueryExecutor } = await import('../infra/claude/executor.js'); + +/** + * query() が返す Query オブジェクト(async iterable + interrupt)のモック + */ +function createMockQuery(messages: Array>) { + return { + [Symbol.asyncIterator]: async function* () { + for (const msg of messages) { + yield msg; + } + }, + interrupt: vi.fn(), + }; +} + +describe('QueryExecutor — structuredOutput 抽出', () => { + beforeEach(() => { + vi.clearAllMocks(); + }); + + it('result メッセージの structured_output (snake_case) を抽出する', async () => { + mockQuery.mockReturnValue(createMockQuery([ + { type: 'result', subtype: 'success', result: 'done', structured_output: { step: 2 } }, + ])); + + const executor = new QueryExecutor(); + const result = await executor.execute('test', { cwd: '/tmp' }); + + expect(result.success).toBe(true); + expect(result.structuredOutput).toEqual({ step: 2 }); + }); + + it('result メッセージの structuredOutput (camelCase) を抽出する', async () => { + mockQuery.mockReturnValue(createMockQuery([ + { type: 'result', subtype: 'success', result: 'done', structuredOutput: { step: 3 } }, + ])); + + const executor = new QueryExecutor(); + const result = await executor.execute('test', { cwd: '/tmp' }); + + expect(result.structuredOutput).toEqual({ step: 3 }); + }); + + it('structured_output が snake_case 優先 (snake_case と camelCase 両方ある場合)', async () => { + mockQuery.mockReturnValue(createMockQuery([ + { + type: 'result', + subtype: 'success', + result: 'done', + structured_output: { step: 1 }, + structuredOutput: { step: 9 }, + }, + ])); + + const executor = new QueryExecutor(); + const result = await executor.execute('test', { cwd: '/tmp' }); + + expect(result.structuredOutput).toEqual({ step: 1 }); + }); + + it('structuredOutput がない場合は undefined', async () => { + mockQuery.mockReturnValue(createMockQuery([ + { type: 'result', subtype: 'success', result: 'plain text' }, + ])); + + const executor = new QueryExecutor(); + const result = await executor.execute('test', { cwd: '/tmp' }); + + expect(result.structuredOutput).toBeUndefined(); + }); + + it('structured_output が配列の場合は無視する', async () => { + mockQuery.mockReturnValue(createMockQuery([ + { type: 'result', subtype: 'success', result: 'done', structured_output: [1, 2, 3] }, + ])); + + const executor = new QueryExecutor(); + const result = await executor.execute('test', { cwd: '/tmp' }); + + expect(result.structuredOutput).toBeUndefined(); + }); + + it('structured_output が null の場合は無視する', async () => { + mockQuery.mockReturnValue(createMockQuery([ + { type: 'result', subtype: 'success', result: 'done', structured_output: null }, + ])); + + const executor = new QueryExecutor(); + const result = await executor.execute('test', { cwd: '/tmp' }); + + expect(result.structuredOutput).toBeUndefined(); + }); + + it('assistant テキストと structured_output を同時に取得する', async () => { + mockQuery.mockReturnValue(createMockQuery([ + { + type: 'assistant', + message: { content: [{ type: 'text', text: 'thinking...' }] }, + }, + { + type: 'result', + subtype: 'success', + result: 'final text', + structured_output: { step: 1, reason: 'approved' }, + }, + ])); + + const executor = new QueryExecutor(); + const result = await executor.execute('test', { cwd: '/tmp' }); + + expect(result.success).toBe(true); + expect(result.content).toBe('final text'); + expect(result.structuredOutput).toEqual({ step: 1, reason: 'approved' }); + }); +}); diff --git a/src/__tests__/claude-provider-abort-signal.test.ts b/src/__tests__/claude-provider-abort-signal.test.ts index 16fcb73..b3f4a8a 100644 --- a/src/__tests__/claude-provider-abort-signal.test.ts +++ b/src/__tests__/claude-provider-abort-signal.test.ts @@ -9,7 +9,7 @@ const { mockResolveAnthropicApiKey: vi.fn(), })); -vi.mock('../infra/claude/index.js', () => ({ +vi.mock('../infra/claude/client.js', () => ({ callClaude: mockCallClaude, callClaudeCustom: vi.fn(), callClaudeAgent: vi.fn(), diff --git a/src/__tests__/client.test.ts b/src/__tests__/client.test.ts index fdb0652..a44cfa4 100644 --- a/src/__tests__/client.test.ts +++ b/src/__tests__/client.test.ts @@ -3,10 +3,8 @@ */ import { describe, it, expect } from 'vitest'; -import { - detectRuleIndex, - isRegexSafe, -} from '../infra/claude/client.js'; +import { isRegexSafe } from '../infra/claude/utils.js'; +import { detectRuleIndex } from '../shared/utils/ruleIndex.js'; describe('isRegexSafe', () => { it('should accept simple patterns', () => { diff --git a/src/__tests__/codex-structured-output.test.ts b/src/__tests__/codex-structured-output.test.ts new file mode 100644 index 0000000..a262b14 --- /dev/null +++ b/src/__tests__/codex-structured-output.test.ts @@ -0,0 +1,152 @@ +/** + * Codex SDK layer structured output tests. + * + * Tests CodexClient's extraction of structuredOutput by parsing + * JSON text from agent_message items when outputSchema is provided. + * + * Codex SDK returns structured output as JSON text in agent_message + * items (not via turn.completed.finalResponse which doesn't exist + * on TurnCompletedEvent). + */ + +import { beforeEach, describe, expect, it, vi } from 'vitest'; + +// ===== Codex SDK mock ===== + +let mockEvents: Array> = []; + +vi.mock('@openai/codex-sdk', () => { + return { + Codex: class MockCodex { + async startThread() { + return { + id: 'thread-mock', + runStreamed: async () => ({ + events: (async function* () { + for (const event of mockEvents) { + yield event; + } + })(), + }), + }; + } + async resumeThread() { + return this.startThread(); + } + }, + }; +}); + +// CodexClient は @openai/codex-sdk をインポートするため、mock 後にインポート +const { CodexClient } = await import('../infra/codex/client.js'); + +describe('CodexClient — structuredOutput 抽出', () => { + beforeEach(() => { + vi.clearAllMocks(); + mockEvents = []; + }); + + it('outputSchema 指定時に agent_message の JSON テキストを structuredOutput として返す', async () => { + const schema = { type: 'object', properties: { step: { type: 'integer' } } }; + mockEvents = [ + { type: 'thread.started', thread_id: 'thread-1' }, + { + type: 'item.completed', + item: { id: 'msg-1', type: 'agent_message', text: '{"step": 2, "reason": "approved"}' }, + }, + { type: 'turn.completed', usage: { input_tokens: 0, cached_input_tokens: 0, output_tokens: 0 } }, + ]; + + const client = new CodexClient(); + const result = await client.call('coder', 'prompt', { cwd: '/tmp', outputSchema: schema }); + + expect(result.status).toBe('done'); + expect(result.structuredOutput).toEqual({ step: 2, reason: 'approved' }); + }); + + it('outputSchema なしの場合はテキストを JSON パースしない', async () => { + mockEvents = [ + { type: 'thread.started', thread_id: 'thread-1' }, + { + type: 'item.completed', + item: { id: 'msg-1', type: 'agent_message', text: '{"step": 2}' }, + }, + { type: 'turn.completed', usage: { input_tokens: 0, cached_input_tokens: 0, output_tokens: 0 } }, + ]; + + const client = new CodexClient(); + const result = await client.call('coder', 'prompt', { cwd: '/tmp' }); + + expect(result.status).toBe('done'); + expect(result.structuredOutput).toBeUndefined(); + }); + + it('agent_message が JSON でない場合は undefined', async () => { + const schema = { type: 'object', properties: { step: { type: 'integer' } } }; + mockEvents = [ + { type: 'thread.started', thread_id: 'thread-1' }, + { + type: 'item.completed', + item: { id: 'msg-1', type: 'agent_message', text: 'plain text response' }, + }, + { type: 'turn.completed', usage: { input_tokens: 0, cached_input_tokens: 0, output_tokens: 0 } }, + ]; + + const client = new CodexClient(); + const result = await client.call('coder', 'prompt', { cwd: '/tmp', outputSchema: schema }); + + expect(result.status).toBe('done'); + expect(result.structuredOutput).toBeUndefined(); + }); + + it('JSON が配列の場合は無視する', async () => { + const schema = { type: 'object', properties: { step: { type: 'integer' } } }; + mockEvents = [ + { type: 'thread.started', thread_id: 'thread-1' }, + { + type: 'item.completed', + item: { id: 'msg-1', type: 'agent_message', text: '[1, 2, 3]' }, + }, + { type: 'turn.completed', usage: { input_tokens: 0, cached_input_tokens: 0, output_tokens: 0 } }, + ]; + + const client = new CodexClient(); + const result = await client.call('coder', 'prompt', { cwd: '/tmp', outputSchema: schema }); + + expect(result.structuredOutput).toBeUndefined(); + }); + + it('agent_message がない場合は structuredOutput なし', async () => { + const schema = { type: 'object', properties: { step: { type: 'integer' } } }; + mockEvents = [ + { type: 'thread.started', thread_id: 'thread-1' }, + { type: 'turn.completed', usage: { input_tokens: 0, cached_input_tokens: 0, output_tokens: 0 } }, + ]; + + const client = new CodexClient(); + const result = await client.call('coder', 'prompt', { cwd: '/tmp', outputSchema: schema }); + + expect(result.status).toBe('done'); + expect(result.structuredOutput).toBeUndefined(); + }); + + it('outputSchema 付きで呼び出して structuredOutput が返る', async () => { + const schema = { type: 'object', properties: { step: { type: 'integer' } } }; + mockEvents = [ + { type: 'thread.started', thread_id: 'thread-1' }, + { + type: 'item.completed', + item: { id: 'msg-1', type: 'agent_message', text: '{"step": 1}' }, + }, + { type: 'turn.completed', usage: { input_tokens: 0, cached_input_tokens: 0, output_tokens: 0 } }, + ]; + + const client = new CodexClient(); + const result = await client.call('coder', 'prompt', { + cwd: '/tmp', + outputSchema: schema, + }); + + expect(result.structuredOutput).toEqual({ step: 1 }); + }); +}); diff --git a/src/__tests__/e2e-helpers.test.ts b/src/__tests__/e2e-helpers.test.ts index 63b395d..f7b25d6 100644 --- a/src/__tests__/e2e-helpers.test.ts +++ b/src/__tests__/e2e-helpers.test.ts @@ -76,7 +76,7 @@ describe('createIsolatedEnv', () => { expect(isolated.env.GIT_CONFIG_GLOBAL).toContain('takt-e2e-'); }); - it('should create config.yaml from E2E fixture with notification_sound timing controls', () => { + it('should create config.yaml from E2E fixture with notification_sound disabled', () => { const isolated = createIsolatedEnv(); cleanups.push(isolated.cleanup); @@ -86,13 +86,13 @@ describe('createIsolatedEnv', () => { expect(config.language).toBe('en'); expect(config.log_level).toBe('info'); expect(config.default_piece).toBe('default'); - expect(config.notification_sound).toBe(true); + expect(config.notification_sound).toBe(false); expect(config.notification_sound_events).toEqual({ iteration_limit: false, piece_complete: false, piece_abort: false, run_complete: true, - run_abort: true, + run_abort: false, }); }); @@ -120,13 +120,13 @@ describe('createIsolatedEnv', () => { expect(config.provider).toBe('mock'); expect(config.concurrency).toBe(2); - expect(config.notification_sound).toBe(true); + expect(config.notification_sound).toBe(false); expect(config.notification_sound_events).toEqual({ iteration_limit: false, piece_complete: false, piece_abort: false, run_complete: true, - run_abort: true, + run_abort: false, }); expect(config.language).toBe('en'); }); @@ -149,7 +149,7 @@ describe('createIsolatedEnv', () => { piece_complete: false, piece_abort: false, run_complete: false, - run_abort: true, + run_abort: false, }); }); diff --git a/src/__tests__/engine-abort.test.ts b/src/__tests__/engine-abort.test.ts index dae845c..0d9fdff 100644 --- a/src/__tests__/engine-abort.test.ts +++ b/src/__tests__/engine-abort.test.ts @@ -25,7 +25,7 @@ vi.mock('../core/piece/evaluation/index.js', () => ({ vi.mock('../core/piece/phase-runner.js', () => ({ needsStatusJudgmentPhase: vi.fn().mockReturnValue(false), runReportPhase: vi.fn().mockResolvedValue(undefined), - runStatusJudgmentPhase: vi.fn().mockResolvedValue(''), + runStatusJudgmentPhase: vi.fn().mockResolvedValue({ tag: '', ruleIndex: 0, method: 'auto_select' }), })); vi.mock('../shared/utils/index.js', async (importOriginal) => ({ diff --git a/src/__tests__/engine-arpeggio.test.ts b/src/__tests__/engine-arpeggio.test.ts index 3523c60..35f55b2 100644 --- a/src/__tests__/engine-arpeggio.test.ts +++ b/src/__tests__/engine-arpeggio.test.ts @@ -21,7 +21,7 @@ vi.mock('../core/piece/evaluation/index.js', () => ({ vi.mock('../core/piece/phase-runner.js', () => ({ needsStatusJudgmentPhase: vi.fn().mockReturnValue(false), runReportPhase: vi.fn().mockResolvedValue(undefined), - runStatusJudgmentPhase: vi.fn().mockResolvedValue(''), + runStatusJudgmentPhase: vi.fn().mockResolvedValue({ tag: '', ruleIndex: 0, method: 'auto_select' }), })); vi.mock('../shared/utils/index.js', async () => { diff --git a/src/__tests__/engine-blocked.test.ts b/src/__tests__/engine-blocked.test.ts index 8cf10e6..02abc20 100644 --- a/src/__tests__/engine-blocked.test.ts +++ b/src/__tests__/engine-blocked.test.ts @@ -23,7 +23,7 @@ vi.mock('../core/piece/evaluation/index.js', () => ({ vi.mock('../core/piece/phase-runner.js', () => ({ needsStatusJudgmentPhase: vi.fn().mockReturnValue(false), runReportPhase: vi.fn().mockResolvedValue(undefined), - runStatusJudgmentPhase: vi.fn().mockResolvedValue(''), + runStatusJudgmentPhase: vi.fn().mockResolvedValue({ tag: '', ruleIndex: 0, method: 'auto_select' }), })); vi.mock('../shared/utils/index.js', async (importOriginal) => ({ diff --git a/src/__tests__/engine-error.test.ts b/src/__tests__/engine-error.test.ts index bcc9ca2..553ec2f 100644 --- a/src/__tests__/engine-error.test.ts +++ b/src/__tests__/engine-error.test.ts @@ -24,7 +24,7 @@ vi.mock('../core/piece/evaluation/index.js', () => ({ vi.mock('../core/piece/phase-runner.js', () => ({ needsStatusJudgmentPhase: vi.fn().mockReturnValue(false), runReportPhase: vi.fn().mockResolvedValue(undefined), - runStatusJudgmentPhase: vi.fn().mockResolvedValue(''), + runStatusJudgmentPhase: vi.fn().mockResolvedValue({ tag: '', ruleIndex: 0, method: 'auto_select' }), })); vi.mock('../shared/utils/index.js', async (importOriginal) => ({ @@ -36,7 +36,7 @@ vi.mock('../shared/utils/index.js', async (importOriginal) => ({ import { PieceEngine } from '../core/piece/index.js'; import { runAgent } from '../agents/runner.js'; -import { detectMatchedRule } from '../core/piece/index.js'; +import { detectMatchedRule } from '../core/piece/evaluation/index.js'; import { makeResponse, makeMovement, diff --git a/src/__tests__/engine-happy-path.test.ts b/src/__tests__/engine-happy-path.test.ts index d067fa4..c42e613 100644 --- a/src/__tests__/engine-happy-path.test.ts +++ b/src/__tests__/engine-happy-path.test.ts @@ -28,7 +28,7 @@ vi.mock('../core/piece/evaluation/index.js', () => ({ vi.mock('../core/piece/phase-runner.js', () => ({ needsStatusJudgmentPhase: vi.fn().mockReturnValue(false), runReportPhase: vi.fn().mockResolvedValue(undefined), - runStatusJudgmentPhase: vi.fn().mockResolvedValue(''), + runStatusJudgmentPhase: vi.fn().mockResolvedValue({ tag: '', ruleIndex: 0, method: 'auto_select' }), })); vi.mock('../shared/utils/index.js', async (importOriginal) => ({ diff --git a/src/__tests__/engine-loop-monitors.test.ts b/src/__tests__/engine-loop-monitors.test.ts index e363264..31aff5d 100644 --- a/src/__tests__/engine-loop-monitors.test.ts +++ b/src/__tests__/engine-loop-monitors.test.ts @@ -27,7 +27,7 @@ vi.mock('../core/piece/evaluation/index.js', () => ({ vi.mock('../core/piece/phase-runner.js', () => ({ needsStatusJudgmentPhase: vi.fn().mockReturnValue(false), runReportPhase: vi.fn().mockResolvedValue(undefined), - runStatusJudgmentPhase: vi.fn().mockResolvedValue(''), + runStatusJudgmentPhase: vi.fn().mockResolvedValue({ tag: '', ruleIndex: 0, method: 'auto_select' }), })); vi.mock('../shared/utils/index.js', async (importOriginal) => ({ diff --git a/src/__tests__/engine-parallel-failure.test.ts b/src/__tests__/engine-parallel-failure.test.ts index a48d6c1..2ead682 100644 --- a/src/__tests__/engine-parallel-failure.test.ts +++ b/src/__tests__/engine-parallel-failure.test.ts @@ -23,7 +23,7 @@ vi.mock('../core/piece/evaluation/index.js', () => ({ vi.mock('../core/piece/phase-runner.js', () => ({ needsStatusJudgmentPhase: vi.fn().mockReturnValue(false), runReportPhase: vi.fn().mockResolvedValue(undefined), - runStatusJudgmentPhase: vi.fn().mockResolvedValue(''), + runStatusJudgmentPhase: vi.fn().mockResolvedValue({ tag: '', ruleIndex: 0, method: 'auto_select' }), })); vi.mock('../shared/utils/index.js', async (importOriginal) => ({ @@ -35,7 +35,7 @@ vi.mock('../shared/utils/index.js', async (importOriginal) => ({ import { PieceEngine } from '../core/piece/index.js'; import { runAgent } from '../agents/runner.js'; -import { detectMatchedRule } from '../core/piece/index.js'; +import { detectMatchedRule } from '../core/piece/evaluation/index.js'; import { makeResponse, makeMovement, diff --git a/src/__tests__/engine-parallel.test.ts b/src/__tests__/engine-parallel.test.ts index bb5cf77..f86f1bf 100644 --- a/src/__tests__/engine-parallel.test.ts +++ b/src/__tests__/engine-parallel.test.ts @@ -24,7 +24,7 @@ vi.mock('../core/piece/evaluation/index.js', () => ({ vi.mock('../core/piece/phase-runner.js', () => ({ needsStatusJudgmentPhase: vi.fn().mockReturnValue(false), runReportPhase: vi.fn().mockResolvedValue(undefined), - runStatusJudgmentPhase: vi.fn().mockResolvedValue(''), + runStatusJudgmentPhase: vi.fn().mockResolvedValue({ tag: '', ruleIndex: 0, method: 'auto_select' }), })); vi.mock('../shared/utils/index.js', async (importOriginal) => ({ diff --git a/src/__tests__/engine-team-leader.test.ts b/src/__tests__/engine-team-leader.test.ts index cd35a8d..3d0de7e 100644 --- a/src/__tests__/engine-team-leader.test.ts +++ b/src/__tests__/engine-team-leader.test.ts @@ -17,7 +17,7 @@ vi.mock('../core/piece/evaluation/index.js', () => ({ vi.mock('../core/piece/phase-runner.js', () => ({ needsStatusJudgmentPhase: vi.fn().mockReturnValue(false), runReportPhase: vi.fn().mockResolvedValue(undefined), - runStatusJudgmentPhase: vi.fn().mockResolvedValue(''), + runStatusJudgmentPhase: vi.fn().mockResolvedValue({ tag: '', ruleIndex: 0, method: 'auto_select' }), })); vi.mock('../shared/utils/index.js', async (importOriginal) => ({ diff --git a/src/__tests__/engine-test-helpers.ts b/src/__tests__/engine-test-helpers.ts index 5ac943b..f17dc03 100644 --- a/src/__tests__/engine-test-helpers.ts +++ b/src/__tests__/engine-test-helpers.ts @@ -16,9 +16,9 @@ import { makeRule } from './test-helpers.js'; // --- Mock imports (consumers must call vi.mock before importing this) --- import { runAgent } from '../agents/runner.js'; -import { detectMatchedRule } from '../core/piece/index.js'; -import type { RuleMatch } from '../core/piece/index.js'; -import { needsStatusJudgmentPhase, runReportPhase, runStatusJudgmentPhase } from '../core/piece/index.js'; +import { detectMatchedRule } from '../core/piece/evaluation/index.js'; +import type { RuleMatch } from '../core/piece/evaluation/index.js'; +import { needsStatusJudgmentPhase, runReportPhase, runStatusJudgmentPhase } from '../core/piece/phase-runner.js'; import { generateReportDir } from '../shared/utils/index.js'; // --- Factory functions --- @@ -173,7 +173,7 @@ export function createTestTmpDir(): string { export function applyDefaultMocks(): void { vi.mocked(needsStatusJudgmentPhase).mockReturnValue(false); vi.mocked(runReportPhase).mockResolvedValue(undefined); - vi.mocked(runStatusJudgmentPhase).mockResolvedValue(''); + vi.mocked(runStatusJudgmentPhase).mockResolvedValue({ tag: '', ruleIndex: 0, method: 'auto_select' }); vi.mocked(generateReportDir).mockReturnValue('test-report-dir'); } diff --git a/src/__tests__/engine-worktree-report.test.ts b/src/__tests__/engine-worktree-report.test.ts index 1021c0a..f90084f 100644 --- a/src/__tests__/engine-worktree-report.test.ts +++ b/src/__tests__/engine-worktree-report.test.ts @@ -24,7 +24,7 @@ vi.mock('../core/piece/evaluation/index.js', () => ({ vi.mock('../core/piece/phase-runner.js', () => ({ needsStatusJudgmentPhase: vi.fn().mockReturnValue(false), runReportPhase: vi.fn().mockResolvedValue(undefined), - runStatusJudgmentPhase: vi.fn().mockResolvedValue(''), + runStatusJudgmentPhase: vi.fn().mockResolvedValue({ tag: '', ruleIndex: 0, method: 'auto_select' }), })); vi.mock('../shared/utils/index.js', async (importOriginal) => ({ @@ -35,7 +35,7 @@ vi.mock('../shared/utils/index.js', async (importOriginal) => ({ // --- Imports (after mocks) --- import { PieceEngine } from '../core/piece/index.js'; -import { runReportPhase } from '../core/piece/index.js'; +import { runReportPhase } from '../core/piece/phase-runner.js'; import { makeResponse, makeMovement, diff --git a/src/__tests__/it-error-recovery.test.ts b/src/__tests__/it-error-recovery.test.ts index 5df4423..75199ba 100644 --- a/src/__tests__/it-error-recovery.test.ts +++ b/src/__tests__/it-error-recovery.test.ts @@ -14,7 +14,7 @@ import { join } from 'node:path'; import { tmpdir } from 'node:os'; import { setMockScenario, resetScenario } from '../infra/mock/index.js'; import type { PieceConfig, PieceMovement, PieceRule } from '../core/models/index.js'; -import { detectRuleIndex } from '../infra/claude/index.js'; +import { detectRuleIndex } from '../shared/utils/ruleIndex.js'; import { makeRule } from './test-helpers.js'; import { callAiJudge } from '../agents/ai-judge.js'; @@ -31,7 +31,7 @@ vi.mock('../agents/ai-judge.js', async (importOriginal) => { vi.mock('../core/piece/phase-runner.js', () => ({ needsStatusJudgmentPhase: vi.fn().mockReturnValue(false), runReportPhase: vi.fn().mockResolvedValue(undefined), - runStatusJudgmentPhase: vi.fn().mockResolvedValue(''), + runStatusJudgmentPhase: vi.fn().mockResolvedValue({ tag: '', ruleIndex: 0, method: 'auto_select' }), })); vi.mock('../shared/utils/index.js', async (importOriginal) => ({ diff --git a/src/__tests__/it-notification-sound.test.ts b/src/__tests__/it-notification-sound.test.ts index ce54a0f..5f4d4a0 100644 --- a/src/__tests__/it-notification-sound.test.ts +++ b/src/__tests__/it-notification-sound.test.ts @@ -104,8 +104,7 @@ vi.mock('../core/piece/index.js', () => ({ PieceEngine: MockPieceEngine, })); -vi.mock('../infra/claude/index.js', () => ({ - detectRuleIndex: vi.fn(), +vi.mock('../infra/claude/query-manager.js', () => ({ interruptAllQueries: mockInterruptAllQueries, })); diff --git a/src/__tests__/it-piece-execution.test.ts b/src/__tests__/it-piece-execution.test.ts index dd40656..912fa8b 100644 --- a/src/__tests__/it-piece-execution.test.ts +++ b/src/__tests__/it-piece-execution.test.ts @@ -15,7 +15,7 @@ import { join } from 'node:path'; import { tmpdir } from 'node:os'; import { setMockScenario, resetScenario } from '../infra/mock/index.js'; import type { PieceConfig, PieceMovement, PieceRule } from '../core/models/index.js'; -import { detectRuleIndex } from '../infra/claude/index.js'; +import { detectRuleIndex } from '../shared/utils/ruleIndex.js'; import { makeRule } from './test-helpers.js'; import { callAiJudge } from '../agents/ai-judge.js'; @@ -35,7 +35,7 @@ vi.mock('../agents/ai-judge.js', async (importOriginal) => { vi.mock('../core/piece/phase-runner.js', () => ({ needsStatusJudgmentPhase: vi.fn().mockReturnValue(false), runReportPhase: vi.fn().mockResolvedValue(undefined), - runStatusJudgmentPhase: vi.fn().mockResolvedValue(''), + runStatusJudgmentPhase: vi.fn().mockResolvedValue({ tag: '', ruleIndex: 0, method: 'auto_select' }), })); vi.mock('../shared/utils/index.js', async (importOriginal) => ({ diff --git a/src/__tests__/it-piece-patterns.test.ts b/src/__tests__/it-piece-patterns.test.ts index 4ea6d59..bd99736 100644 --- a/src/__tests__/it-piece-patterns.test.ts +++ b/src/__tests__/it-piece-patterns.test.ts @@ -13,7 +13,7 @@ import { mkdtempSync, mkdirSync, rmSync } from 'node:fs'; import { join } from 'node:path'; import { tmpdir } from 'node:os'; import { setMockScenario, resetScenario } from '../infra/mock/index.js'; -import { detectRuleIndex } from '../infra/claude/index.js'; +import { detectRuleIndex } from '../shared/utils/ruleIndex.js'; import { callAiJudge } from '../agents/ai-judge.js'; // --- Mocks --- @@ -37,7 +37,7 @@ vi.mock('../agents/ai-judge.js', async (importOriginal) => { vi.mock('../core/piece/phase-runner.js', () => ({ needsStatusJudgmentPhase: vi.fn().mockReturnValue(false), runReportPhase: vi.fn().mockResolvedValue(undefined), - runStatusJudgmentPhase: vi.fn().mockResolvedValue(''), + runStatusJudgmentPhase: vi.fn().mockResolvedValue({ tag: '', ruleIndex: 0, method: 'auto_select' }), })); vi.mock('../shared/utils/index.js', async (importOriginal) => ({ diff --git a/src/__tests__/it-pipeline-modes.test.ts b/src/__tests__/it-pipeline-modes.test.ts index 0916bef..a381483 100644 --- a/src/__tests__/it-pipeline-modes.test.ts +++ b/src/__tests__/it-pipeline-modes.test.ts @@ -144,7 +144,7 @@ vi.mock('../shared/prompt/index.js', () => ({ vi.mock('../core/piece/phase-runner.js', () => ({ needsStatusJudgmentPhase: vi.fn().mockReturnValue(false), runReportPhase: vi.fn().mockResolvedValue(undefined), - runStatusJudgmentPhase: vi.fn().mockResolvedValue(''), + runStatusJudgmentPhase: vi.fn().mockResolvedValue({ tag: '', ruleIndex: 0, method: 'auto_select' }), })); // --- Imports (after mocks) --- diff --git a/src/__tests__/it-pipeline.test.ts b/src/__tests__/it-pipeline.test.ts index ad02723..5743f29 100644 --- a/src/__tests__/it-pipeline.test.ts +++ b/src/__tests__/it-pipeline.test.ts @@ -125,7 +125,7 @@ vi.mock('../shared/prompt/index.js', () => ({ vi.mock('../core/piece/phase-runner.js', () => ({ needsStatusJudgmentPhase: vi.fn().mockReturnValue(false), runReportPhase: vi.fn().mockResolvedValue(undefined), - runStatusJudgmentPhase: vi.fn().mockResolvedValue(''), + runStatusJudgmentPhase: vi.fn().mockResolvedValue({ tag: '', ruleIndex: 0, method: 'auto_select' }), })); // --- Imports (after mocks) --- diff --git a/src/__tests__/it-rule-evaluation.test.ts b/src/__tests__/it-rule-evaluation.test.ts index 0facf7d..57ec9cc 100644 --- a/src/__tests__/it-rule-evaluation.test.ts +++ b/src/__tests__/it-rule-evaluation.test.ts @@ -34,8 +34,9 @@ vi.mock('../infra/config/project/projectConfig.js', () => ({ // --- Imports (after mocks) --- -import { detectMatchedRule, evaluateAggregateConditions } from '../core/piece/index.js'; -import { detectRuleIndex } from '../infra/claude/index.js'; +import { evaluateAggregateConditions } from '../core/piece/index.js'; +import { detectMatchedRule } from '../core/piece/evaluation/index.js'; +import { detectRuleIndex } from '../shared/utils/ruleIndex.js'; import type { RuleMatch, RuleEvaluatorContext } from '../core/piece/index.js'; // --- Test helpers --- diff --git a/src/__tests__/it-sigint-interrupt.test.ts b/src/__tests__/it-sigint-interrupt.test.ts index 28abafe..e15226b 100644 --- a/src/__tests__/it-sigint-interrupt.test.ts +++ b/src/__tests__/it-sigint-interrupt.test.ts @@ -74,8 +74,8 @@ vi.mock('../core/piece/index.js', () => ({ PieceEngine: MockPieceEngine, })); -vi.mock('../infra/claude/index.js', () => ({ - detectRuleIndex: vi.fn(), +vi.mock('../infra/claude/query-manager.js', async (importOriginal) => ({ + ...(await importOriginal>()), interruptAllQueries: mockInterruptAllQueries, })); diff --git a/src/__tests__/it-three-phase-execution.test.ts b/src/__tests__/it-three-phase-execution.test.ts index 9a9bf98..d5b173e 100644 --- a/src/__tests__/it-three-phase-execution.test.ts +++ b/src/__tests__/it-three-phase-execution.test.ts @@ -15,7 +15,7 @@ import { join } from 'node:path'; import { tmpdir } from 'node:os'; import { setMockScenario, resetScenario } from '../infra/mock/index.js'; import type { PieceConfig, PieceMovement, PieceRule } from '../core/models/index.js'; -import { detectRuleIndex } from '../infra/claude/index.js'; +import { detectRuleIndex } from '../shared/utils/ruleIndex.js'; import { makeRule } from './test-helpers.js'; import { callAiJudge } from '../agents/ai-judge.js'; @@ -114,7 +114,7 @@ describe('Three-Phase Execution IT: phase1 only (no report, no tag rules)', () = // No tag rules needed → Phase 3 not needed mockNeedsStatusJudgmentPhase.mockReturnValue(false); mockRunReportPhase.mockResolvedValue(undefined); - mockRunStatusJudgmentPhase.mockResolvedValue(''); + mockRunStatusJudgmentPhase.mockResolvedValue({ tag: '', ruleIndex: 0, method: 'auto_select' }); }); afterEach(() => { @@ -166,7 +166,7 @@ describe('Three-Phase Execution IT: phase1 + phase2 (report defined)', () => { mockNeedsStatusJudgmentPhase.mockReturnValue(false); mockRunReportPhase.mockResolvedValue(undefined); - mockRunStatusJudgmentPhase.mockResolvedValue(''); + mockRunStatusJudgmentPhase.mockResolvedValue({ tag: '', ruleIndex: 0, method: 'auto_select' }); }); afterEach(() => { @@ -246,7 +246,7 @@ describe('Three-Phase Execution IT: phase1 + phase3 (tag rules defined)', () => mockNeedsStatusJudgmentPhase.mockReturnValue(true); mockRunReportPhase.mockResolvedValue(undefined); // Phase 3 returns content with a tag - mockRunStatusJudgmentPhase.mockResolvedValue('[STEP:1]'); + mockRunStatusJudgmentPhase.mockResolvedValue({ tag: '[STEP:1]', ruleIndex: 0, method: 'structured_output' }); }); afterEach(() => { @@ -298,7 +298,7 @@ describe('Three-Phase Execution IT: all three phases', () => { mockNeedsStatusJudgmentPhase.mockReturnValue(true); mockRunReportPhase.mockResolvedValue(undefined); - mockRunStatusJudgmentPhase.mockResolvedValue('[STEP:1]'); + mockRunStatusJudgmentPhase.mockResolvedValue({ tag: '[STEP:1]', ruleIndex: 0, method: 'structured_output' }); }); afterEach(() => { @@ -369,7 +369,7 @@ describe('Three-Phase Execution IT: phase3 tag → rule match', () => { ]); // Phase 3 returns rule 2 (ABORT) - mockRunStatusJudgmentPhase.mockResolvedValue('[STEP1:2]'); + mockRunStatusJudgmentPhase.mockResolvedValue({ tag: '[STEP1:2]', ruleIndex: 1, method: 'structured_output' }); const config: PieceConfig = { name: 'it-phase3-tag', diff --git a/src/__tests__/judgment-detector.test.ts b/src/__tests__/judgment-detector.test.ts deleted file mode 100644 index 1bd198c..0000000 --- a/src/__tests__/judgment-detector.test.ts +++ /dev/null @@ -1,70 +0,0 @@ -/** - * Test for JudgmentDetector - */ - -import { describe, it, expect } from 'vitest'; -import { JudgmentDetector } from '../core/piece/judgment/JudgmentDetector.js'; - -describe('JudgmentDetector', () => { - describe('detect', () => { - it('should detect tag in simple response', () => { - const result = JudgmentDetector.detect('[ARCH-REVIEW:1]'); - expect(result.success).toBe(true); - expect(result.tag).toBe('[ARCH-REVIEW:1]'); - }); - - it('should detect tag with surrounding text', () => { - const result = JudgmentDetector.detect('Based on the review, I choose [MOVEMENT:2] because...'); - expect(result.success).toBe(true); - expect(result.tag).toBe('[MOVEMENT:2]'); - }); - - it('should detect tag with hyphenated movement name', () => { - const result = JudgmentDetector.detect('[AI-ANTIPATTERN-REVIEW:1]'); - expect(result.success).toBe(true); - expect(result.tag).toBe('[AI-ANTIPATTERN-REVIEW:1]'); - }); - - it('should detect tag with underscored movement name', () => { - const result = JudgmentDetector.detect('[AI_REVIEW:1]'); - expect(result.success).toBe(true); - expect(result.tag).toBe('[AI_REVIEW:1]'); - }); - - it('should detect "判断できない" (Japanese)', () => { - const result = JudgmentDetector.detect('判断できない:情報が不足しています'); - expect(result.success).toBe(false); - expect(result.reason).toBe('Conductor explicitly stated it cannot judge'); - }); - - it('should detect "Cannot determine" (English)', () => { - const result = JudgmentDetector.detect('Cannot determine: Insufficient information'); - expect(result.success).toBe(false); - expect(result.reason).toBe('Conductor explicitly stated it cannot judge'); - }); - - it('should detect "unable to judge"', () => { - const result = JudgmentDetector.detect('I am unable to judge based on the provided information.'); - expect(result.success).toBe(false); - expect(result.reason).toBe('Conductor explicitly stated it cannot judge'); - }); - - it('should fail when no tag and no explicit "cannot judge"', () => { - const result = JudgmentDetector.detect('This is a response without a tag or explicit statement.'); - expect(result.success).toBe(false); - expect(result.reason).toBe('No tag found and no explicit "cannot judge" statement'); - }); - - it('should fail on empty response', () => { - const result = JudgmentDetector.detect(''); - expect(result.success).toBe(false); - expect(result.reason).toBe('No tag found and no explicit "cannot judge" statement'); - }); - - it('should detect first tag when multiple tags exist', () => { - const result = JudgmentDetector.detect('[MOVEMENT:1] or [MOVEMENT:2]'); - expect(result.success).toBe(true); - expect(result.tag).toBe('[MOVEMENT:1]'); - }); - }); -}); diff --git a/src/__tests__/judgment-fallback.test.ts b/src/__tests__/judgment-fallback.test.ts deleted file mode 100644 index 0d7d560..0000000 --- a/src/__tests__/judgment-fallback.test.ts +++ /dev/null @@ -1,183 +0,0 @@ -/** - * Test for Fallback Strategies - */ - -import { describe, it, expect, vi, beforeEach } from 'vitest'; -import { mkdtempSync, mkdirSync, rmSync, writeFileSync } from 'node:fs'; -import { join } from 'node:path'; -import { tmpdir } from 'node:os'; -import type { PieceMovement } from '../core/models/types.js'; -import type { JudgmentContext } from '../core/piece/judgment/FallbackStrategy.js'; -import { runAgent } from '../agents/runner.js'; -import { - AutoSelectStrategy, - ReportBasedStrategy, - ResponseBasedStrategy, - AgentConsultStrategy, - JudgmentStrategyFactory, -} from '../core/piece/judgment/FallbackStrategy.js'; - -// Mock runAgent -vi.mock('../agents/runner.js', () => ({ - runAgent: vi.fn(), -})); - -describe('JudgmentStrategies', () => { - const mockStep: PieceMovement = { - name: 'test-movement', - persona: 'test-agent', - rules: [ - { description: 'Rule 1', condition: 'approved' }, - { description: 'Rule 2', condition: 'rejected' }, - ], - }; - - const mockContext: JudgmentContext = { - step: mockStep, - cwd: '/test/cwd', - language: 'en', - reportDir: '/test/reports', - lastResponse: 'Last response content', - sessionId: 'session-123', - }; - - beforeEach(() => { - vi.clearAllMocks(); - }); - - describe('AutoSelectStrategy', () => { - it('should apply when step has only one rule', () => { - const singleRuleStep: PieceMovement = { - name: 'single-rule', - rules: [{ description: 'Only rule', condition: 'always' }], - }; - const strategy = new AutoSelectStrategy(); - expect(strategy.canApply({ ...mockContext, step: singleRuleStep })).toBe(true); - }); - - it('should not apply when step has multiple rules', () => { - const strategy = new AutoSelectStrategy(); - expect(strategy.canApply(mockContext)).toBe(false); - }); - - it('should return auto-selected tag', async () => { - const singleRuleStep: PieceMovement = { - name: 'single-rule', - rules: [{ description: 'Only rule', condition: 'always' }], - }; - const strategy = new AutoSelectStrategy(); - const result = await strategy.execute({ ...mockContext, step: singleRuleStep }); - expect(result.success).toBe(true); - expect(result.tag).toBe('[SINGLE-RULE:1]'); - }); - }); - - describe('ReportBasedStrategy', () => { - it('should apply when reportDir and output contracts are configured', () => { - const strategy = new ReportBasedStrategy(); - const stepWithOutputContracts: PieceMovement = { - ...mockStep, - outputContracts: [{ label: 'review', path: 'review-report.md' }], - }; - expect(strategy.canApply({ ...mockContext, step: stepWithOutputContracts })).toBe(true); - }); - - it('should not apply when reportDir is missing', () => { - const strategy = new ReportBasedStrategy(); - expect(strategy.canApply({ ...mockContext, reportDir: undefined })).toBe(false); - }); - - it('should not apply when step has no output contracts configured', () => { - const strategy = new ReportBasedStrategy(); - // mockStep has no outputContracts field → getReportFiles returns [] - expect(strategy.canApply(mockContext)).toBe(false); - }); - - it('should use only latest report file from reports directory', async () => { - const tmpRoot = mkdtempSync(join(tmpdir(), 'takt-judgment-report-')); - try { - const reportDir = join(tmpRoot, '.takt', 'runs', 'sample-run', 'reports'); - const historyDir = join(tmpRoot, '.takt', 'runs', 'sample-run', 'logs', 'reports-history'); - mkdirSync(reportDir, { recursive: true }); - mkdirSync(historyDir, { recursive: true }); - - const latestFile = '05-architect-review.md'; - writeFileSync(join(reportDir, latestFile), 'LATEST-ONLY-CONTENT'); - writeFileSync(join(historyDir, '05-architect-review.20260210T061143Z.md'), 'OLD-HISTORY-CONTENT'); - - const stepWithOutputContracts: PieceMovement = { - ...mockStep, - outputContracts: [{ name: latestFile }], - }; - - const runAgentMock = vi.mocked(runAgent); - runAgentMock.mockResolvedValue({ - persona: 'conductor', - status: 'done', - content: '[TEST-MOVEMENT:1]', - timestamp: new Date('2026-02-10T07:11:43Z'), - }); - - const strategy = new ReportBasedStrategy(); - const result = await strategy.execute({ - ...mockContext, - step: stepWithOutputContracts, - reportDir, - }); - - expect(result.success).toBe(true); - expect(runAgentMock).toHaveBeenCalledTimes(1); - const instruction = runAgentMock.mock.calls[0]?.[1]; - expect(instruction).toContain('LATEST-ONLY-CONTENT'); - expect(instruction).not.toContain('OLD-HISTORY-CONTENT'); - } finally { - rmSync(tmpRoot, { recursive: true, force: true }); - } - }); - }); - - describe('ResponseBasedStrategy', () => { - it('should apply when lastResponse is provided', () => { - const strategy = new ResponseBasedStrategy(); - expect(strategy.canApply(mockContext)).toBe(true); - }); - - it('should not apply when lastResponse is missing', () => { - const strategy = new ResponseBasedStrategy(); - expect(strategy.canApply({ ...mockContext, lastResponse: undefined })).toBe(false); - }); - - it('should not apply when lastResponse is empty', () => { - const strategy = new ResponseBasedStrategy(); - expect(strategy.canApply({ ...mockContext, lastResponse: '' })).toBe(false); - }); - }); - - describe('AgentConsultStrategy', () => { - it('should apply when sessionId is provided', () => { - const strategy = new AgentConsultStrategy(); - expect(strategy.canApply(mockContext)).toBe(true); - }); - - it('should not apply when sessionId is missing', () => { - const strategy = new AgentConsultStrategy(); - expect(strategy.canApply({ ...mockContext, sessionId: undefined })).toBe(false); - }); - - it('should not apply when sessionId is empty', () => { - const strategy = new AgentConsultStrategy(); - expect(strategy.canApply({ ...mockContext, sessionId: '' })).toBe(false); - }); - }); - - describe('JudgmentStrategyFactory', () => { - it('should create strategies in correct order', () => { - const strategies = JudgmentStrategyFactory.createStrategies(); - expect(strategies).toHaveLength(4); - expect(strategies[0]).toBeInstanceOf(AutoSelectStrategy); - expect(strategies[1]).toBeInstanceOf(ReportBasedStrategy); - expect(strategies[2]).toBeInstanceOf(ResponseBasedStrategy); - expect(strategies[3]).toBeInstanceOf(AgentConsultStrategy); - }); - }); -}); diff --git a/src/__tests__/judgment-strategies.test.ts b/src/__tests__/judgment-strategies.test.ts deleted file mode 100644 index 18d72b9..0000000 --- a/src/__tests__/judgment-strategies.test.ts +++ /dev/null @@ -1,194 +0,0 @@ -/** - * Unit tests for FallbackStrategy judgment strategies - * - * Tests AutoSelectStrategy and canApply logic for all strategies. - * Strategies requiring external agent calls (ReportBased, ResponseBased, - * AgentConsult) are tested for canApply and input validation only. - */ - -import { describe, it, expect } from 'vitest'; -import { - AutoSelectStrategy, - ReportBasedStrategy, - ResponseBasedStrategy, - AgentConsultStrategy, - JudgmentStrategyFactory, - type JudgmentContext, -} from '../core/piece/judgment/FallbackStrategy.js'; -import { makeMovement } from './test-helpers.js'; - -function makeContext(overrides: Partial = {}): JudgmentContext { - return { - step: makeMovement(), - cwd: '/tmp/test', - ...overrides, - }; -} - -describe('AutoSelectStrategy', () => { - const strategy = new AutoSelectStrategy(); - - it('should have name "AutoSelect"', () => { - expect(strategy.name).toBe('AutoSelect'); - }); - - describe('canApply', () => { - it('should return true when movement has exactly one rule', () => { - const ctx = makeContext({ - step: makeMovement({ - rules: [{ condition: 'done', next: 'COMPLETE' }], - }), - }); - expect(strategy.canApply(ctx)).toBe(true); - }); - - it('should return false when movement has multiple rules', () => { - const ctx = makeContext({ - step: makeMovement({ - rules: [ - { condition: 'approved', next: 'implement' }, - { condition: 'rejected', next: 'review' }, - ], - }), - }); - expect(strategy.canApply(ctx)).toBe(false); - }); - - it('should return false when movement has no rules', () => { - const ctx = makeContext({ - step: makeMovement({ rules: undefined }), - }); - expect(strategy.canApply(ctx)).toBe(false); - }); - }); - - describe('execute', () => { - it('should return auto-selected tag for single-branch movement', async () => { - const ctx = makeContext({ - step: makeMovement({ - name: 'review', - rules: [{ condition: 'done', next: 'COMPLETE' }], - }), - }); - - const result = await strategy.execute(ctx); - expect(result.success).toBe(true); - expect(result.tag).toBe('[REVIEW:1]'); - }); - }); -}); - -describe('ReportBasedStrategy', () => { - const strategy = new ReportBasedStrategy(); - - it('should have name "ReportBased"', () => { - expect(strategy.name).toBe('ReportBased'); - }); - - describe('canApply', () => { - it('should return true when reportDir and outputContracts are present', () => { - const ctx = makeContext({ - reportDir: '/tmp/reports', - step: makeMovement({ - outputContracts: [{ name: 'report.md' }], - }), - }); - expect(strategy.canApply(ctx)).toBe(true); - }); - - it('should return false when reportDir is missing', () => { - const ctx = makeContext({ - step: makeMovement({ - outputContracts: [{ name: 'report.md' }], - }), - }); - expect(strategy.canApply(ctx)).toBe(false); - }); - - it('should return false when outputContracts is empty', () => { - const ctx = makeContext({ - reportDir: '/tmp/reports', - step: makeMovement({ outputContracts: [] }), - }); - expect(strategy.canApply(ctx)).toBe(false); - }); - - it('should return false when outputContracts is undefined', () => { - const ctx = makeContext({ - reportDir: '/tmp/reports', - step: makeMovement(), - }); - expect(strategy.canApply(ctx)).toBe(false); - }); - }); -}); - -describe('ResponseBasedStrategy', () => { - const strategy = new ResponseBasedStrategy(); - - it('should have name "ResponseBased"', () => { - expect(strategy.name).toBe('ResponseBased'); - }); - - describe('canApply', () => { - it('should return true when lastResponse is non-empty', () => { - const ctx = makeContext({ lastResponse: 'some response' }); - expect(strategy.canApply(ctx)).toBe(true); - }); - - it('should return false when lastResponse is undefined', () => { - const ctx = makeContext({ lastResponse: undefined }); - expect(strategy.canApply(ctx)).toBe(false); - }); - - it('should return false when lastResponse is empty string', () => { - const ctx = makeContext({ lastResponse: '' }); - expect(strategy.canApply(ctx)).toBe(false); - }); - }); -}); - -describe('AgentConsultStrategy', () => { - const strategy = new AgentConsultStrategy(); - - it('should have name "AgentConsult"', () => { - expect(strategy.name).toBe('AgentConsult'); - }); - - describe('canApply', () => { - it('should return true when sessionId is non-empty', () => { - const ctx = makeContext({ sessionId: 'session-123' }); - expect(strategy.canApply(ctx)).toBe(true); - }); - - it('should return false when sessionId is undefined', () => { - const ctx = makeContext({ sessionId: undefined }); - expect(strategy.canApply(ctx)).toBe(false); - }); - - it('should return false when sessionId is empty string', () => { - const ctx = makeContext({ sessionId: '' }); - expect(strategy.canApply(ctx)).toBe(false); - }); - }); - - describe('execute', () => { - it('should return failure when sessionId is not provided', async () => { - const ctx = makeContext({ sessionId: undefined }); - const result = await strategy.execute(ctx); - expect(result.success).toBe(false); - expect(result.reason).toBe('Session ID not provided'); - }); - }); -}); - -describe('JudgmentStrategyFactory', () => { - it('should create strategies in correct priority order', () => { - const strategies = JudgmentStrategyFactory.createStrategies(); - expect(strategies).toHaveLength(4); - expect(strategies[0]!.name).toBe('AutoSelect'); - expect(strategies[1]!.name).toBe('ReportBased'); - expect(strategies[2]!.name).toBe('ResponseBased'); - expect(strategies[3]!.name).toBe('AgentConsult'); - }); -}); diff --git a/src/__tests__/parseStructuredOutput.test.ts b/src/__tests__/parseStructuredOutput.test.ts new file mode 100644 index 0000000..7f247e7 --- /dev/null +++ b/src/__tests__/parseStructuredOutput.test.ts @@ -0,0 +1,86 @@ +import { describe, it, expect } from 'vitest'; +import { parseStructuredOutput } from '../shared/utils/structuredOutput.js'; + +describe('parseStructuredOutput', () => { + it('should return undefined when hasOutputSchema is false', () => { + expect(parseStructuredOutput('{"step":1}', false)).toBeUndefined(); + }); + + it('should return undefined for empty text', () => { + expect(parseStructuredOutput('', true)).toBeUndefined(); + }); + + // Strategy 1: Direct JSON parse + describe('direct JSON parse', () => { + it('should parse pure JSON object', () => { + expect(parseStructuredOutput('{"step":1,"reason":"done"}', true)) + .toEqual({ step: 1, reason: 'done' }); + }); + + it('should parse JSON with whitespace', () => { + expect(parseStructuredOutput(' { "step": 2, "reason": "ok" } ', true)) + .toEqual({ step: 2, reason: 'ok' }); + }); + + it('should ignore arrays', () => { + expect(parseStructuredOutput('[1,2,3]', true)).toBeUndefined(); + }); + + it('should ignore primitive JSON', () => { + expect(parseStructuredOutput('"hello"', true)).toBeUndefined(); + }); + }); + + // Strategy 2: Code block extraction + describe('code block extraction', () => { + it('should extract JSON from ```json code block', () => { + const text = 'Here is the result:\n```json\n{"step":1,"reason":"matched"}\n```'; + expect(parseStructuredOutput(text, true)) + .toEqual({ step: 1, reason: 'matched' }); + }); + + it('should extract JSON from ``` code block (no language)', () => { + const text = 'Result:\n```\n{"step":2,"reason":"fallback"}\n```'; + expect(parseStructuredOutput(text, true)) + .toEqual({ step: 2, reason: 'fallback' }); + }); + }); + + // Strategy 3: Brace extraction + describe('brace extraction', () => { + it('should extract JSON with preamble text', () => { + const text = 'The matched rule is: {"step":1,"reason":"condition met"}'; + expect(parseStructuredOutput(text, true)) + .toEqual({ step: 1, reason: 'condition met' }); + }); + + it('should extract JSON with postamble text', () => { + const text = '{"step":3,"reason":"done"}\nEnd of response.'; + expect(parseStructuredOutput(text, true)) + .toEqual({ step: 3, reason: 'done' }); + }); + + it('should extract JSON with both preamble and postamble', () => { + const text = 'Based on my analysis:\n{"matched_index":2,"reason":"test"}\nThat is my judgment.'; + expect(parseStructuredOutput(text, true)) + .toEqual({ matched_index: 2, reason: 'test' }); + }); + }); + + // Edge cases + describe('edge cases', () => { + it('should return undefined for text without JSON', () => { + expect(parseStructuredOutput('No JSON here at all.', true)).toBeUndefined(); + }); + + it('should return undefined for invalid JSON', () => { + expect(parseStructuredOutput('{invalid json}', true)).toBeUndefined(); + }); + + it('should handle nested objects', () => { + const text = '{"step":1,"reason":"ok","meta":{"detail":"extra"}}'; + expect(parseStructuredOutput(text, true)) + .toEqual({ step: 1, reason: 'ok', meta: { detail: 'extra' } }); + }); + }); +}); diff --git a/src/__tests__/pieceExecution-debug-prompts.test.ts b/src/__tests__/pieceExecution-debug-prompts.test.ts index 75aa4e5..5fb8402 100644 --- a/src/__tests__/pieceExecution-debug-prompts.test.ts +++ b/src/__tests__/pieceExecution-debug-prompts.test.ts @@ -77,8 +77,7 @@ vi.mock('../core/piece/index.js', () => ({ PieceEngine: MockPieceEngine, })); -vi.mock('../infra/claude/index.js', () => ({ - detectRuleIndex: vi.fn(), +vi.mock('../infra/claude/query-manager.js', () => ({ interruptAllQueries: vi.fn(), })); diff --git a/src/__tests__/pieceExecution-session-loading.test.ts b/src/__tests__/pieceExecution-session-loading.test.ts index 92ff51e..e6402da 100644 --- a/src/__tests__/pieceExecution-session-loading.test.ts +++ b/src/__tests__/pieceExecution-session-loading.test.ts @@ -46,8 +46,7 @@ vi.mock('../core/piece/index.js', () => ({ PieceEngine: MockPieceEngine, })); -vi.mock('../infra/claude/index.js', () => ({ - detectRuleIndex: vi.fn(), +vi.mock('../infra/claude/query-manager.js', () => ({ interruptAllQueries: vi.fn(), })); diff --git a/src/__tests__/provider-structured-output.test.ts b/src/__tests__/provider-structured-output.test.ts new file mode 100644 index 0000000..3f2206e --- /dev/null +++ b/src/__tests__/provider-structured-output.test.ts @@ -0,0 +1,244 @@ +/** + * Provider layer structured output tests. + * + * Verifies that each provider (Claude, Codex, OpenCode) correctly passes + * `outputSchema` through to its underlying client function and returns + * `structuredOutput` in the AgentResponse. + */ + +import { beforeEach, describe, expect, it, vi } from 'vitest'; + +// ===== Claude ===== +const { + mockCallClaude, + mockCallClaudeCustom, +} = vi.hoisted(() => ({ + mockCallClaude: vi.fn(), + mockCallClaudeCustom: vi.fn(), +})); + +vi.mock('../infra/claude/client.js', () => ({ + callClaude: mockCallClaude, + callClaudeCustom: mockCallClaudeCustom, + callClaudeAgent: vi.fn(), + callClaudeSkill: vi.fn(), +})); + +// ===== Codex ===== +const { + mockCallCodex, + mockCallCodexCustom, +} = vi.hoisted(() => ({ + mockCallCodex: vi.fn(), + mockCallCodexCustom: vi.fn(), +})); + +vi.mock('../infra/codex/index.js', () => ({ + callCodex: mockCallCodex, + callCodexCustom: mockCallCodexCustom, +})); + +// ===== OpenCode ===== +const { + mockCallOpenCode, + mockCallOpenCodeCustom, +} = vi.hoisted(() => ({ + mockCallOpenCode: vi.fn(), + mockCallOpenCodeCustom: vi.fn(), +})); + +vi.mock('../infra/opencode/index.js', () => ({ + callOpenCode: mockCallOpenCode, + callOpenCodeCustom: mockCallOpenCodeCustom, +})); + +// ===== Config (API key resolvers) ===== +vi.mock('../infra/config/index.js', () => ({ + resolveAnthropicApiKey: vi.fn(() => undefined), + resolveOpenaiApiKey: vi.fn(() => undefined), + resolveOpencodeApiKey: vi.fn(() => undefined), +})); + +// Codex の isInsideGitRepo をバイパス +vi.mock('node:child_process', () => ({ + execFileSync: vi.fn(() => 'true'), +})); + +import { ClaudeProvider } from '../infra/providers/claude.js'; +import { CodexProvider } from '../infra/providers/codex.js'; +import { OpenCodeProvider } from '../infra/providers/opencode.js'; + +const SCHEMA = { + type: 'object', + properties: { step: { type: 'integer' } }, + required: ['step'], +}; + +function doneResponse(persona: string, structuredOutput?: Record) { + return { + persona, + status: 'done' as const, + content: 'ok', + timestamp: new Date(), + structuredOutput, + }; +} + +// ---------- Claude ---------- + +describe('ClaudeProvider — structured output', () => { + beforeEach(() => { + vi.clearAllMocks(); + }); + + it('outputSchema を callClaude に渡し structuredOutput を返す', async () => { + mockCallClaude.mockResolvedValue(doneResponse('coder', { step: 2 })); + + const agent = new ClaudeProvider().setup({ name: 'coder' }); + const result = await agent.call('prompt', { cwd: '/tmp', outputSchema: SCHEMA }); + + const opts = mockCallClaude.mock.calls[0]?.[2]; + expect(opts).toHaveProperty('outputSchema', SCHEMA); + expect(result.structuredOutput).toEqual({ step: 2 }); + }); + + it('systemPrompt 指定時も outputSchema が callClaudeCustom に渡される', async () => { + mockCallClaudeCustom.mockResolvedValue(doneResponse('judge', { step: 1 })); + + const agent = new ClaudeProvider().setup({ name: 'judge', systemPrompt: 'You are a judge.' }); + const result = await agent.call('prompt', { cwd: '/tmp', outputSchema: SCHEMA }); + + const opts = mockCallClaudeCustom.mock.calls[0]?.[3]; + expect(opts).toHaveProperty('outputSchema', SCHEMA); + expect(result.structuredOutput).toEqual({ step: 1 }); + }); + + it('structuredOutput がない場合は undefined', async () => { + mockCallClaude.mockResolvedValue(doneResponse('coder')); + + const agent = new ClaudeProvider().setup({ name: 'coder' }); + const result = await agent.call('prompt', { cwd: '/tmp', outputSchema: SCHEMA }); + + expect(result.structuredOutput).toBeUndefined(); + }); + + it('outputSchema 未指定時は undefined が渡される', async () => { + mockCallClaude.mockResolvedValue(doneResponse('coder')); + + const agent = new ClaudeProvider().setup({ name: 'coder' }); + await agent.call('prompt', { cwd: '/tmp' }); + + const opts = mockCallClaude.mock.calls[0]?.[2]; + expect(opts.outputSchema).toBeUndefined(); + }); +}); + +// ---------- Codex ---------- + +describe('CodexProvider — structured output', () => { + beforeEach(() => { + vi.clearAllMocks(); + }); + + it('outputSchema を callCodex に渡し structuredOutput を返す', async () => { + mockCallCodex.mockResolvedValue(doneResponse('coder', { step: 2 })); + + const agent = new CodexProvider().setup({ name: 'coder' }); + const result = await agent.call('prompt', { cwd: '/tmp', outputSchema: SCHEMA }); + + const opts = mockCallCodex.mock.calls[0]?.[2]; + expect(opts).toHaveProperty('outputSchema', SCHEMA); + expect(result.structuredOutput).toEqual({ step: 2 }); + }); + + it('systemPrompt 指定時も outputSchema が callCodexCustom に渡される', async () => { + mockCallCodexCustom.mockResolvedValue(doneResponse('judge', { step: 1 })); + + const agent = new CodexProvider().setup({ name: 'judge', systemPrompt: 'sys' }); + const result = await agent.call('prompt', { cwd: '/tmp', outputSchema: SCHEMA }); + + const opts = mockCallCodexCustom.mock.calls[0]?.[3]; + expect(opts).toHaveProperty('outputSchema', SCHEMA); + expect(result.structuredOutput).toEqual({ step: 1 }); + }); + + it('structuredOutput がない場合は undefined', async () => { + mockCallCodex.mockResolvedValue(doneResponse('coder')); + + const agent = new CodexProvider().setup({ name: 'coder' }); + const result = await agent.call('prompt', { cwd: '/tmp', outputSchema: SCHEMA }); + + expect(result.structuredOutput).toBeUndefined(); + }); + + it('outputSchema 未指定時は undefined が渡される', async () => { + mockCallCodex.mockResolvedValue(doneResponse('coder')); + + const agent = new CodexProvider().setup({ name: 'coder' }); + await agent.call('prompt', { cwd: '/tmp' }); + + const opts = mockCallCodex.mock.calls[0]?.[2]; + expect(opts.outputSchema).toBeUndefined(); + }); +}); + +// ---------- OpenCode ---------- + +describe('OpenCodeProvider — structured output', () => { + beforeEach(() => { + vi.clearAllMocks(); + }); + + it('outputSchema を callOpenCode に渡し structuredOutput を返す', async () => { + mockCallOpenCode.mockResolvedValue(doneResponse('coder', { step: 2 })); + + const agent = new OpenCodeProvider().setup({ name: 'coder' }); + const result = await agent.call('prompt', { + cwd: '/tmp', + model: 'openai/gpt-4', + outputSchema: SCHEMA, + }); + + const opts = mockCallOpenCode.mock.calls[0]?.[2]; + expect(opts).toHaveProperty('outputSchema', SCHEMA); + expect(result.structuredOutput).toEqual({ step: 2 }); + }); + + it('systemPrompt 指定時も outputSchema が callOpenCodeCustom に渡される', async () => { + mockCallOpenCodeCustom.mockResolvedValue(doneResponse('judge', { step: 1 })); + + const agent = new OpenCodeProvider().setup({ name: 'judge', systemPrompt: 'sys' }); + const result = await agent.call('prompt', { + cwd: '/tmp', + model: 'openai/gpt-4', + outputSchema: SCHEMA, + }); + + const opts = mockCallOpenCodeCustom.mock.calls[0]?.[3]; + expect(opts).toHaveProperty('outputSchema', SCHEMA); + expect(result.structuredOutput).toEqual({ step: 1 }); + }); + + it('structuredOutput がない場合は undefined', async () => { + mockCallOpenCode.mockResolvedValue(doneResponse('coder')); + + const agent = new OpenCodeProvider().setup({ name: 'coder' }); + const result = await agent.call('prompt', { + cwd: '/tmp', + model: 'openai/gpt-4', + outputSchema: SCHEMA, + }); + + expect(result.structuredOutput).toBeUndefined(); + }); + + it('outputSchema 未指定時は undefined が渡される', async () => { + mockCallOpenCode.mockResolvedValue(doneResponse('coder')); + + const agent = new OpenCodeProvider().setup({ name: 'coder' }); + await agent.call('prompt', { cwd: '/tmp', model: 'openai/gpt-4' }); + + const opts = mockCallOpenCode.mock.calls[0]?.[2]; + expect(opts.outputSchema).toBeUndefined(); + }); +}); diff --git a/src/__tests__/public-api-exports.test.ts b/src/__tests__/public-api-exports.test.ts new file mode 100644 index 0000000..3ec0b57 --- /dev/null +++ b/src/__tests__/public-api-exports.test.ts @@ -0,0 +1,83 @@ +import { describe, expect, it } from 'vitest'; + +describe('public API exports', () => { + it('should expose piece usecases, engine, and piece loader APIs', async () => { + // Given: パッケージの公開API + const api = await import('../index.js'); + + // When: 主要なユースケース関数とエンジン公開API・piece読み込みAPIを参照する + // Then: 必要な公開シンボルが利用できる + expect(typeof api.executeAgent).toBe('function'); + expect(typeof api.generateReport).toBe('function'); + expect(typeof api.executePart).toBe('function'); + expect(typeof api.judgeStatus).toBe('function'); + expect(typeof api.evaluateCondition).toBe('function'); + expect(typeof api.decomposeTask).toBe('function'); + + expect(typeof api.PieceEngine).toBe('function'); + + expect(typeof api.loadPiece).toBe('function'); + expect(typeof api.loadPieceByIdentifier).toBe('function'); + expect(typeof api.listPieces).toBe('function'); + }); + + it('should not expose internal engine implementation details', async () => { + // Given: パッケージの公開API + const api = await import('../index.js'); + + // When: 非公開にすべき内部シンボルの有無を確認する + // Then: 内部実装詳細は公開されていない + expect('AgentRunner' in api).toBe(false); + expect('RuleEvaluator' in api).toBe(false); + expect('AggregateEvaluator' in api).toBe(false); + expect('evaluateAggregateConditions' in api).toBe(false); + expect('needsStatusJudgmentPhase' in api).toBe(false); + expect('StatusJudgmentBuilder' in api).toBe(false); + expect('buildEditRule' in api).toBe(false); + expect('detectRuleIndex' in api).toBe(false); + expect('ParallelLogger' in api).toBe(false); + expect('InstructionBuilder' in api).toBe(false); + expect('ReportInstructionBuilder' in api).toBe(false); + expect('COMPLETE_MOVEMENT' in api).toBe(false); + expect('ABORT_MOVEMENT' in api).toBe(false); + expect('ERROR_MESSAGES' in api).toBe(false); + expect('determineNextMovementByRules' in api).toBe(false); + expect('extractBlockedPrompt' in api).toBe(false); + expect('LoopDetector' in api).toBe(false); + expect('createInitialState' in api).toBe(false); + expect('addUserInput' in api).toBe(false); + expect('getPreviousOutput' in api).toBe(false); + expect('handleBlocked' in api).toBe(false); + }); + + it('should not expose infrastructure implementations and internal shared utilities', async () => { + // Given: パッケージの公開API + const api = await import('../index.js'); + + // When: 非公開にすべきインフラ実装と内部ユーティリティの有無を確認する + // Then: 直接利用させない実装詳細は公開されていない + expect('ClaudeClient' in api).toBe(false); + expect('executeClaudeCli' in api).toBe(false); + expect('CodexClient' in api).toBe(false); + expect('mapToCodexSandboxMode' in api).toBe(false); + expect('getResourcesDir' in api).toBe(false); + expect('DEFAULT_PIECE_NAME' in api).toBe(false); + expect('buildPrompt' in api).toBe(false); + expect('writeFileAtomic' in api).toBe(false); + expect('getInputHistoryPath' in api).toBe(false); + expect('MAX_INPUT_HISTORY' in api).toBe(false); + expect('loadInputHistory' in api).toBe(false); + expect('saveInputHistory' in api).toBe(false); + expect('addToInputHistory' in api).toBe(false); + expect('getPersonaSessionsPath' in api).toBe(false); + expect('loadPersonaSessions' in api).toBe(false); + expect('savePersonaSessions' in api).toBe(false); + expect('updatePersonaSession' in api).toBe(false); + expect('clearPersonaSessions' in api).toBe(false); + expect('getWorktreeSessionsDir' in api).toBe(false); + expect('encodeWorktreePath' in api).toBe(false); + expect('getWorktreeSessionPath' in api).toBe(false); + expect('loadWorktreeSessions' in api).toBe(false); + expect('updateWorktreeSession' in api).toBe(false); + }); +}); diff --git a/src/__tests__/report-phase-blocked.test.ts b/src/__tests__/report-phase-blocked.test.ts index 3afad14..0241784 100644 --- a/src/__tests__/report-phase-blocked.test.ts +++ b/src/__tests__/report-phase-blocked.test.ts @@ -23,7 +23,7 @@ vi.mock('../core/piece/evaluation/index.js', () => ({ vi.mock('../core/piece/phase-runner.js', () => ({ needsStatusJudgmentPhase: vi.fn().mockReturnValue(false), runReportPhase: vi.fn().mockResolvedValue(undefined), - runStatusJudgmentPhase: vi.fn().mockResolvedValue(''), + runStatusJudgmentPhase: vi.fn().mockResolvedValue({ tag: '', ruleIndex: 0, method: 'auto_select' }), })); vi.mock('../shared/utils/index.js', async (importOriginal) => ({ @@ -34,7 +34,7 @@ vi.mock('../shared/utils/index.js', async (importOriginal) => ({ // --- Imports (after mocks) --- import { PieceEngine } from '../core/piece/index.js'; -import { runReportPhase } from '../core/piece/index.js'; +import { runReportPhase } from '../core/piece/phase-runner.js'; import { makeResponse, makeMovement, diff --git a/src/__tests__/runAllTasks-concurrency.test.ts b/src/__tests__/runAllTasks-concurrency.test.ts index 9bab686..5e0b1ce 100644 --- a/src/__tests__/runAllTasks-concurrency.test.ts +++ b/src/__tests__/runAllTasks-concurrency.test.ts @@ -115,9 +115,8 @@ vi.mock('../infra/github/index.js', () => ({ pushBranch: vi.fn(), })); -vi.mock('../infra/claude/index.js', () => ({ +vi.mock('../infra/claude/query-manager.js', () => ({ interruptAllQueries: vi.fn(), - detectRuleIndex: vi.fn(), })); vi.mock('../agents/ai-judge.js', () => ({ diff --git a/src/__tests__/schema-loader.test.ts b/src/__tests__/schema-loader.test.ts new file mode 100644 index 0000000..a44341c --- /dev/null +++ b/src/__tests__/schema-loader.test.ts @@ -0,0 +1,76 @@ +import { beforeEach, describe, expect, it, vi } from 'vitest'; + +const readFileSyncMock = vi.fn((path: string) => { + if (path.endsWith('judgment.json')) { + return JSON.stringify({ type: 'object', properties: { step: { type: 'integer' } } }); + } + if (path.endsWith('evaluation.json')) { + return JSON.stringify({ type: 'object', properties: { matched_index: { type: 'integer' } } }); + } + if (path.endsWith('decomposition.json')) { + return JSON.stringify({ + type: 'object', + properties: { + parts: { + type: 'array', + items: { + type: 'object', + properties: { + id: { type: 'string' }, + title: { type: 'string' }, + instruction: { type: 'string' }, + }, + }, + }, + }, + }); + } + throw new Error(`Unexpected schema path: ${path}`); +}); + +vi.mock('node:fs', () => ({ + readFileSync: readFileSyncMock, +})); + +vi.mock('../infra/resources/index.js', () => ({ + getResourcesDir: vi.fn(() => '/mock/resources'), +})); + +describe('schema-loader', () => { + beforeEach(() => { + vi.resetModules(); + readFileSyncMock.mockClear(); + }); + + it('同じスキーマを複数回ロードしても readFileSync は1回だけ', async () => { + const { loadJudgmentSchema } = await import('../core/piece/schema-loader.js'); + + const first = loadJudgmentSchema(); + const second = loadJudgmentSchema(); + + expect(first).toEqual(second); + expect(readFileSyncMock).toHaveBeenCalledTimes(1); + expect(readFileSyncMock).toHaveBeenCalledWith('/mock/resources/schemas/judgment.json', 'utf-8'); + }); + + it('loadDecompositionSchema は maxItems を注入し、呼び出しごとに独立したオブジェクトを返す', async () => { + const { loadDecompositionSchema } = await import('../core/piece/schema-loader.js'); + + const first = loadDecompositionSchema(2); + const second = loadDecompositionSchema(5); + + const firstParts = (first.properties as Record).parts as Record; + const secondParts = (second.properties as Record).parts as Record; + + expect(firstParts.maxItems).toBe(2); + expect(secondParts.maxItems).toBe(5); + expect(readFileSyncMock).toHaveBeenCalledTimes(1); + }); + + it('loadDecompositionSchema は不正な maxParts を拒否する', async () => { + const { loadDecompositionSchema } = await import('../core/piece/schema-loader.js'); + + expect(() => loadDecompositionSchema(0)).toThrow('maxParts must be a positive integer: 0'); + expect(() => loadDecompositionSchema(-1)).toThrow('maxParts must be a positive integer: -1'); + }); +}); diff --git a/src/agents/ai-judge.ts b/src/agents/ai-judge.ts index 178d072..004b3d9 100644 --- a/src/agents/ai-judge.ts +++ b/src/agents/ai-judge.ts @@ -6,39 +6,12 @@ */ import type { AiJudgeCaller, AiJudgeCondition } from '../core/piece/types.js'; -import { loadTemplate } from '../shared/prompts/index.js'; import { createLogger } from '../shared/utils/index.js'; -import { runAgent } from './runner.js'; +import { evaluateCondition } from '../core/piece/agent-usecases.js'; const log = createLogger('ai-judge'); -/** - * Detect judge rule index from [JUDGE:N] tag pattern. - * Returns 0-based rule index, or -1 if no match. - */ -export function detectJudgeIndex(content: string): number { - const regex = /\[JUDGE:(\d+)\]/i; - const match = content.match(regex); - if (match?.[1]) { - const index = Number.parseInt(match[1], 10) - 1; - return index >= 0 ? index : -1; - } - return -1; -} - -/** - * Build the prompt for the AI judge that evaluates agent output against ai() conditions. - */ -export function buildJudgePrompt( - agentOutput: string, - aiConditions: AiJudgeCondition[], -): string { - const conditionList = aiConditions - .map((c) => `| ${c.index + 1} | ${c.text} |`) - .join('\n'); - - return loadTemplate('perform_judge_message', 'en', { agentOutput, conditionList }); -} +export { detectJudgeIndex, buildJudgePrompt } from './judge-utils.js'; /** * Call AI judge to evaluate agent output against ai() conditions. @@ -50,18 +23,9 @@ export const callAiJudge: AiJudgeCaller = async ( conditions: AiJudgeCondition[], options: { cwd: string }, ): Promise => { - const prompt = buildJudgePrompt(agentOutput, conditions); - - const response = await runAgent(undefined, prompt, { - cwd: options.cwd, - maxTurns: 1, - permissionMode: 'readonly', - }); - - if (response.status !== 'done') { - log.error('AI judge call failed', { error: response.error }); - return -1; + const result = await evaluateCondition(agentOutput, conditions, options); + if (result < 0) { + log.error('AI judge call failed to match a condition'); } - - return detectJudgeIndex(response.content); + return result; }; diff --git a/src/agents/index.ts b/src/agents/index.ts index 6adc5d9..2355cce 100644 --- a/src/agents/index.ts +++ b/src/agents/index.ts @@ -2,6 +2,5 @@ * Agents module - exports agent execution utilities */ -export { AgentRunner, runAgent } from './runner.js'; -export { callAiJudge, detectJudgeIndex, buildJudgePrompt } from './ai-judge.js'; +export { AgentRunner } from './runner.js'; export type { RunAgentOptions, StreamCallback } from './types.js'; diff --git a/src/agents/judge-utils.ts b/src/agents/judge-utils.ts new file mode 100644 index 0000000..0fb4795 --- /dev/null +++ b/src/agents/judge-utils.ts @@ -0,0 +1,22 @@ +import { loadTemplate } from '../shared/prompts/index.js'; + +export function detectJudgeIndex(content: string): number { + const regex = /\[JUDGE:(\d+)\]/i; + const match = content.match(regex); + if (match?.[1]) { + const index = Number.parseInt(match[1], 10) - 1; + return index >= 0 ? index : -1; + } + return -1; +} + +export function buildJudgePrompt( + agentOutput: string, + aiConditions: Array<{ index: number; text: string }>, +): string { + const conditionList = aiConditions + .map((c) => `| ${c.index + 1} | ${c.text} |`) + .join('\n'); + + return loadTemplate('perform_judge_message', 'en', { agentOutput, conditionList }); +} diff --git a/src/agents/runner.ts b/src/agents/runner.ts index cb54c93..5b126d0 100644 --- a/src/agents/runner.ts +++ b/src/agents/runner.ts @@ -111,6 +111,7 @@ export class AgentRunner { onPermissionRequest: options.onPermissionRequest, onAskUserQuestion: options.onAskUserQuestion, bypassPermissions: options.bypassPermissions, + outputSchema: options.outputSchema, }; } diff --git a/src/agents/types.ts b/src/agents/types.ts index d27882a..dad5d17 100644 --- a/src/agents/types.ts +++ b/src/agents/types.ts @@ -2,7 +2,7 @@ * Type definitions for agent execution */ -import type { StreamCallback, PermissionHandler, AskUserQuestionHandler } from '../infra/claude/index.js'; +import type { StreamCallback, PermissionHandler, AskUserQuestionHandler } from '../infra/claude/types.js'; import type { PermissionMode, Language, McpServerConfig } from '../core/models/index.js'; export type { StreamCallback }; @@ -39,4 +39,6 @@ export interface RunAgentOptions { movementsList: ReadonlyArray<{ name: string; description?: string }>; currentPosition: string; }; + /** JSON Schema for structured output */ + outputSchema?: Record; } diff --git a/src/core/models/response.ts b/src/core/models/response.ts index b687e8f..532584c 100644 --- a/src/core/models/response.ts +++ b/src/core/models/response.ts @@ -17,5 +17,6 @@ export interface AgentResponse { matchedRuleIndex?: number; /** How the rule match was detected */ matchedRuleMethod?: RuleMatchMethod; + /** Structured output returned by provider SDK (JSON Schema mode) */ + structuredOutput?: Record; } - diff --git a/src/core/models/status.ts b/src/core/models/status.ts index fb77af9..0dde9f7 100644 --- a/src/core/models/status.ts +++ b/src/core/models/status.ts @@ -21,6 +21,8 @@ export type Status = /** How a rule match was detected */ export type RuleMatchMethod = | 'aggregate' + | 'auto_select' + | 'structured_output' | 'phase3_tag' | 'phase1_tag' | 'ai_judge' diff --git a/src/core/piece/agent-usecases.ts b/src/core/piece/agent-usecases.ts new file mode 100644 index 0000000..2c20ee2 --- /dev/null +++ b/src/core/piece/agent-usecases.ts @@ -0,0 +1,170 @@ +import type { AgentResponse, PartDefinition, PieceRule, RuleMatchMethod, Language } from '../models/types.js'; +import { runAgent, type RunAgentOptions } from '../../agents/runner.js'; +import { detectJudgeIndex, buildJudgePrompt } from '../../agents/judge-utils.js'; +import { parseParts } from './engine/task-decomposer.js'; +import { loadJudgmentSchema, loadEvaluationSchema, loadDecompositionSchema } from './schema-loader.js'; +import { detectRuleIndex } from '../../shared/utils/ruleIndex.js'; +import { ensureUniquePartIds, parsePartDefinitionEntry } from './part-definition-validator.js'; + +export interface JudgeStatusOptions { + cwd: string; + movementName: string; + language?: Language; +} + +export interface JudgeStatusResult { + ruleIndex: number; + method: RuleMatchMethod; +} + +export interface EvaluateConditionOptions { + cwd: string; +} + +export interface DecomposeTaskOptions { + cwd: string; + persona?: string; + language?: Language; + model?: string; + provider?: 'claude' | 'codex' | 'opencode' | 'mock'; +} + +function toPartDefinitions(raw: unknown, maxParts: number): PartDefinition[] { + if (!Array.isArray(raw)) { + throw new Error('Structured output "parts" must be an array'); + } + if (raw.length === 0) { + throw new Error('Structured output "parts" must not be empty'); + } + if (raw.length > maxParts) { + throw new Error(`Structured output produced too many parts: ${raw.length} > ${maxParts}`); + } + + const parts: PartDefinition[] = raw.map((entry, index) => parsePartDefinitionEntry(entry, index)); + ensureUniquePartIds(parts); + + return parts; +} + +export async function executeAgent( + persona: string | undefined, + instruction: string, + options: RunAgentOptions, +): Promise { + return runAgent(persona, instruction, options); +} +export const generateReport = executeAgent; +export const executePart = executeAgent; + +export async function evaluateCondition( + agentOutput: string, + conditions: Array<{ index: number; text: string }>, + options: EvaluateConditionOptions, +): Promise { + const prompt = buildJudgePrompt(agentOutput, conditions); + const response = await runAgent(undefined, prompt, { + cwd: options.cwd, + maxTurns: 1, + permissionMode: 'readonly', + outputSchema: loadEvaluationSchema(), + }); + + if (response.status !== 'done') { + return -1; + } + + const matchedIndex = response.structuredOutput?.matched_index; + if (typeof matchedIndex === 'number' && Number.isInteger(matchedIndex)) { + const zeroBased = matchedIndex - 1; + if (zeroBased >= 0 && zeroBased < conditions.length) { + return zeroBased; + } + } + + return detectJudgeIndex(response.content); +} + +export async function judgeStatus( + structuredInstruction: string, + tagInstruction: string, + rules: PieceRule[], + options: JudgeStatusOptions, +): Promise { + if (rules.length === 0) { + throw new Error('judgeStatus requires at least one rule'); + } + + if (rules.length === 1) { + return { ruleIndex: 0, method: 'auto_select' }; + } + + const agentOptions = { + cwd: options.cwd, + maxTurns: 3, + permissionMode: 'readonly' as const, + language: options.language, + }; + + // Stage 1: Structured output + const structuredResponse = await runAgent('conductor', structuredInstruction, { + ...agentOptions, + outputSchema: loadJudgmentSchema(), + }); + + if (structuredResponse.status === 'done') { + const stepNumber = structuredResponse.structuredOutput?.step; + if (typeof stepNumber === 'number' && Number.isInteger(stepNumber)) { + const ruleIndex = stepNumber - 1; + if (ruleIndex >= 0 && ruleIndex < rules.length) { + return { ruleIndex, method: 'structured_output' }; + } + } + } + + // Stage 2: Tag detection (dedicated call, no outputSchema) + const tagResponse = await runAgent('conductor', tagInstruction, agentOptions); + + if (tagResponse.status === 'done') { + const tagRuleIndex = detectRuleIndex(tagResponse.content, options.movementName); + if (tagRuleIndex >= 0 && tagRuleIndex < rules.length) { + return { ruleIndex: tagRuleIndex, method: 'phase3_tag' }; + } + } + + // Stage 3: AI judge + const conditions = rules.map((rule, index) => ({ index, text: rule.condition })); + const fallbackIndex = await evaluateCondition(structuredInstruction, conditions, { cwd: options.cwd }); + if (fallbackIndex >= 0 && fallbackIndex < rules.length) { + return { ruleIndex: fallbackIndex, method: 'ai_judge' }; + } + + throw new Error(`Status not found for movement "${options.movementName}"`); +} + +export async function decomposeTask( + instruction: string, + maxParts: number, + options: DecomposeTaskOptions, +): Promise { + const response = await runAgent(options.persona, instruction, { + cwd: options.cwd, + language: options.language, + model: options.model, + provider: options.provider, + permissionMode: 'readonly', + maxTurns: 3, + outputSchema: loadDecompositionSchema(maxParts), + }); + + if (response.status !== 'done') { + const detail = response.error ?? response.content; + throw new Error(`Team leader failed: ${detail}`); + } + + const parts = response.structuredOutput?.parts; + if (parts != null) { + return toPartDefinitions(parts, maxParts); + } + + return parseParts(response.content, maxParts); +} diff --git a/src/core/piece/engine/ArpeggioRunner.ts b/src/core/piece/engine/ArpeggioRunner.ts index 017c247..24adc45 100644 --- a/src/core/piece/engine/ArpeggioRunner.ts +++ b/src/core/piece/engine/ArpeggioRunner.ts @@ -15,7 +15,8 @@ import type { ArpeggioMovementConfig, BatchResult, DataBatch } from '../arpeggio import { createDataSource } from '../arpeggio/data-source-factory.js'; import { loadTemplate, expandTemplate } from '../arpeggio/template.js'; import { buildMergeFn, writeMergedOutput } from '../arpeggio/merge.js'; -import { runAgent, type RunAgentOptions } from '../../../agents/runner.js'; +import type { RunAgentOptions } from '../../../agents/runner.js'; +import { executeAgent } from '../agent-usecases.js'; import { detectMatchedRule } from '../evaluation/index.js'; import { incrementMovementIteration } from './state-manager.js'; import { createLogger } from '../../../shared/utils/index.js'; @@ -84,7 +85,7 @@ async function executeBatchWithRetry( for (let attempt = 0; attempt <= maxRetries; attempt++) { try { - const response = await runAgent(persona, prompt, agentOptions); + const response = await executeAgent(persona, prompt, agentOptions); if (response.status === 'error') { lastError = response.error ?? response.content ?? 'Agent returned error status'; log.info('Batch execution failed, retrying', { diff --git a/src/core/piece/engine/MovementExecutor.ts b/src/core/piece/engine/MovementExecutor.ts index 9f0d994..32a2b8a 100644 --- a/src/core/piece/engine/MovementExecutor.ts +++ b/src/core/piece/engine/MovementExecutor.ts @@ -15,7 +15,7 @@ import type { Language, } from '../../models/types.js'; import type { PhaseName } from '../types.js'; -import { runAgent } from '../../../agents/runner.js'; +import { executeAgent } from '../agent-usecases.js'; import { InstructionBuilder, isOutputContractItem } from '../instruction/InstructionBuilder.js'; import { needsStatusJudgmentPhase, runReportPhase, runStatusJudgmentPhase } from '../phase-runner.js'; import { detectMatchedRule } from '../evaluation/index.js'; @@ -202,7 +202,7 @@ export class MovementExecutor { // Phase 1: main execution (Write excluded if movement has report) this.deps.onPhaseStart?.(step, 1, 'execute', instruction); const agentOptions = this.deps.optionsBuilder.buildAgentOptions(step); - let response = await runAgent(step.persona, instruction, agentOptions); + let response = await executeAgent(step.persona, instruction, agentOptions); updatePersonaSession(sessionKey, response.sessionId); this.deps.onPhaseComplete?.(step, 1, 'execute', response.content, response.status, response.error); @@ -220,22 +220,28 @@ export class MovementExecutor { } } - // Phase 3: status judgment (resume session, no tools, output status tag) - let tagContent = ''; - if (needsStatusJudgmentPhase(step)) { - tagContent = await runStatusJudgmentPhase(step, phaseCtx); - } + // Phase 3: status judgment (new session, no tools, determines matched rule) + const phase3Result = needsStatusJudgmentPhase(step) + ? await runStatusJudgmentPhase(step, phaseCtx) + : undefined; - const match = await detectMatchedRule(step, response.content, tagContent, { - state, - cwd: this.deps.getCwd(), - interactive: this.deps.getInteractive(), - detectRuleIndex: this.deps.detectRuleIndex, - callAiJudge: this.deps.callAiJudge, - }); - if (match) { - log.debug('Rule matched', { movement: step.name, ruleIndex: match.index, method: match.method }); - response = { ...response, matchedRuleIndex: match.index, matchedRuleMethod: match.method }; + if (phase3Result) { + // Phase 3 already determined the matched rule — use its result directly + log.debug('Rule matched (Phase 3)', { movement: step.name, ruleIndex: phase3Result.ruleIndex, method: phase3Result.method }); + response = { ...response, matchedRuleIndex: phase3Result.ruleIndex, matchedRuleMethod: phase3Result.method }; + } else { + // No Phase 3 — use rule evaluator with Phase 1 content + const match = await detectMatchedRule(step, response.content, '', { + state, + cwd: this.deps.getCwd(), + interactive: this.deps.getInteractive(), + detectRuleIndex: this.deps.detectRuleIndex, + callAiJudge: this.deps.callAiJudge, + }); + if (match) { + log.debug('Rule matched', { movement: step.name, ruleIndex: match.index, method: match.method }); + response = { ...response, matchedRuleIndex: match.index, matchedRuleMethod: match.method }; + } } state.movementOutputs.set(step.name, response); diff --git a/src/core/piece/engine/OptionsBuilder.ts b/src/core/piece/engine/OptionsBuilder.ts index b0711bd..1b11ddc 100644 --- a/src/core/piece/engine/OptionsBuilder.ts +++ b/src/core/piece/engine/OptionsBuilder.ts @@ -1,10 +1,3 @@ -/** - * Builds RunAgentOptions for different execution phases. - * - * Centralizes the option construction logic that was previously - * scattered across PieceEngine methods. - */ - import { join } from 'node:path'; import type { PieceMovement, PieceState, Language } from '../../models/types.js'; import type { RunAgentOptions } from '../../../agents/runner.js'; diff --git a/src/core/piece/engine/ParallelRunner.ts b/src/core/piece/engine/ParallelRunner.ts index a72a04e..1a5d865 100644 --- a/src/core/piece/engine/ParallelRunner.ts +++ b/src/core/piece/engine/ParallelRunner.ts @@ -10,7 +10,7 @@ import type { PieceState, AgentResponse, } from '../../models/types.js'; -import { runAgent } from '../../../agents/runner.js'; +import { executeAgent } from '../agent-usecases.js'; import { ParallelLogger } from './parallel-logger.js'; import { needsStatusJudgmentPhase, runReportPhase, runStatusJudgmentPhase } from '../phase-runner.js'; import { detectMatchedRule } from '../evaluation/index.js'; @@ -101,7 +101,7 @@ export class ParallelRunner { : baseOptions; this.deps.onPhaseStart?.(subMovement, 1, 'execute', subInstruction); - const subResponse = await runAgent(subMovement.persona, subInstruction, agentOptions); + const subResponse = await executeAgent(subMovement.persona, subInstruction, agentOptions); updatePersonaSession(subSessionKey, subResponse.sessionId); this.deps.onPhaseComplete?.(subMovement, 1, 'execute', subResponse.content, subResponse.status, subResponse.error); @@ -114,15 +114,19 @@ export class ParallelRunner { } // Phase 3: status judgment for sub-movement - let subTagContent = ''; - if (needsStatusJudgmentPhase(subMovement)) { - subTagContent = await runStatusJudgmentPhase(subMovement, phaseCtx); - } + const subPhase3 = needsStatusJudgmentPhase(subMovement) + ? await runStatusJudgmentPhase(subMovement, phaseCtx) + : undefined; - const match = await detectMatchedRule(subMovement, subResponse.content, subTagContent, ruleCtx); - const finalResponse = match - ? { ...subResponse, matchedRuleIndex: match.index, matchedRuleMethod: match.method } - : subResponse; + let finalResponse: AgentResponse; + if (subPhase3) { + finalResponse = { ...subResponse, matchedRuleIndex: subPhase3.ruleIndex, matchedRuleMethod: subPhase3.method }; + } else { + const match = await detectMatchedRule(subMovement, subResponse.content, '', ruleCtx); + finalResponse = match + ? { ...subResponse, matchedRuleIndex: match.index, matchedRuleMethod: match.method } + : subResponse; + } state.movementOutputs.set(subMovement.name, finalResponse); this.deps.movementExecutor.emitMovementReports(subMovement); diff --git a/src/core/piece/engine/TeamLeaderRunner.ts b/src/core/piece/engine/TeamLeaderRunner.ts index d97db90..9b92eaf 100644 --- a/src/core/piece/engine/TeamLeaderRunner.ts +++ b/src/core/piece/engine/TeamLeaderRunner.ts @@ -1,4 +1,3 @@ -import { runAgent } from '../../../agents/runner.js'; import type { PieceMovement, PieceState, @@ -6,11 +5,11 @@ import type { PartDefinition, PartResult, } from '../../models/types.js'; +import { decomposeTask, executeAgent } from '../agent-usecases.js'; import { detectMatchedRule } from '../evaluation/index.js'; import { buildSessionKey } from '../session-key.js'; import { ParallelLogger } from './parallel-logger.js'; import { incrementMovementIteration } from './state-manager.js'; -import { parseParts } from './task-decomposer.js'; import { buildAbortSignal } from './abort-signal.js'; import { createLogger, getErrorMessage } from '../../../shared/utils/index.js'; import type { OptionsBuilder } from './OptionsBuilder.js'; @@ -99,26 +98,19 @@ export class TeamLeaderRunner { ); this.deps.onPhaseStart?.(leaderStep, 1, 'execute', instruction); - const leaderResponse = await runAgent( - leaderStep.persona, - instruction, - this.deps.optionsBuilder.buildAgentOptions(leaderStep), - ); - updatePersonaSession(buildSessionKey(leaderStep), leaderResponse.sessionId); - this.deps.onPhaseComplete?.( - leaderStep, - 1, - 'execute', - leaderResponse.content, - leaderResponse.status, - leaderResponse.error, - ); - if (leaderResponse.status === 'error') { - const detail = leaderResponse.error ?? leaderResponse.content; - throw new Error(`Team leader failed: ${detail}`); - } - - const parts = parseParts(leaderResponse.content, teamLeaderConfig.maxParts); + const parts = await decomposeTask(instruction, teamLeaderConfig.maxParts, { + cwd: this.deps.getCwd(), + persona: leaderStep.persona, + model: leaderStep.model, + provider: leaderStep.provider, + }); + const leaderResponse: AgentResponse = { + persona: leaderStep.persona ?? leaderStep.name, + status: 'done', + content: JSON.stringify({ parts }, null, 2), + timestamp: new Date(), + }; + this.deps.onPhaseComplete?.(leaderStep, 1, 'execute', leaderResponse.content, leaderResponse.status, leaderResponse.error); log.debug('Team leader decomposed parts', { movement: step.name, partCount: parts.length, @@ -240,7 +232,7 @@ export class TeamLeaderRunner { : { ...baseOptions, abortSignal: signal }; try { - const response = await runAgent(partMovement.persona, part.instruction, options); + const response = await executeAgent(partMovement.persona, part.instruction, options); updatePersonaSession(buildSessionKey(partMovement), response.sessionId); return { part, diff --git a/src/core/piece/engine/task-decomposer.ts b/src/core/piece/engine/task-decomposer.ts index 5754fbe..34ffc6f 100644 --- a/src/core/piece/engine/task-decomposer.ts +++ b/src/core/piece/engine/task-decomposer.ts @@ -1,4 +1,5 @@ import type { PartDefinition } from '../../models/part.js'; +import { ensureUniquePartIds, parsePartDefinitionEntry } from '../part-definition-validator.js'; const JSON_CODE_BLOCK_REGEX = /```json\s*([\s\S]*?)```/g; @@ -24,36 +25,6 @@ function parseJsonBlock(content: string): unknown { } } -function assertString(value: unknown, fieldName: string, index: number): string { - if (typeof value !== 'string' || value.trim().length === 0) { - throw new Error(`Part[${index}] "${fieldName}" must be a non-empty string`); - } - return value; -} - -function parsePartEntry(entry: unknown, index: number): PartDefinition { - if (typeof entry !== 'object' || entry == null || Array.isArray(entry)) { - throw new Error(`Part[${index}] must be an object`); - } - - const raw = entry as Record; - const id = assertString(raw.id, 'id', index); - const title = assertString(raw.title, 'title', index); - const instruction = assertString(raw.instruction, 'instruction', index); - - const timeoutMs = raw.timeout_ms; - if (timeoutMs != null && (typeof timeoutMs !== 'number' || !Number.isInteger(timeoutMs) || timeoutMs <= 0)) { - throw new Error(`Part[${index}] "timeout_ms" must be a positive integer`); - } - - return { - id, - title, - instruction, - timeoutMs: timeoutMs as number | undefined, - }; -} - export function parseParts(content: string, maxParts: number): PartDefinition[] { const parsed = parseJsonBlock(content); if (!Array.isArray(parsed)) { @@ -66,14 +37,8 @@ export function parseParts(content: string, maxParts: number): PartDefinition[] throw new Error(`Team leader produced too many parts: ${parsed.length} > ${maxParts}`); } - const parts = parsed.map((entry, index) => parsePartEntry(entry, index)); - const ids = new Set(); - for (const part of parts) { - if (ids.has(part.id)) { - throw new Error(`Duplicate part id: ${part.id}`); - } - ids.add(part.id); - } + const parts = parsed.map((entry, index) => parsePartDefinitionEntry(entry, index)); + ensureUniquePartIds(parts); return parts; } diff --git a/src/core/piece/index.ts b/src/core/piece/index.ts index 776c810..3684ea4 100644 --- a/src/core/piece/index.ts +++ b/src/core/piece/index.ts @@ -60,8 +60,19 @@ export { buildEditRule, type InstructionContext } from './instruction/instructio export { generateStatusRulesComponents, type StatusRulesComponents } from './instruction/status-rules.js'; // Rule evaluation -export { RuleEvaluator, type RuleMatch, type RuleEvaluatorContext, detectMatchedRule, evaluateAggregateConditions } from './evaluation/index.js'; +export { RuleEvaluator, type RuleMatch, type RuleEvaluatorContext, evaluateAggregateConditions } from './evaluation/index.js'; export { AggregateEvaluator } from './evaluation/AggregateEvaluator.js'; // Phase runner -export { needsStatusJudgmentPhase, runReportPhase, runStatusJudgmentPhase, type ReportPhaseBlockedResult } from './phase-runner.js'; +export { needsStatusJudgmentPhase, type ReportPhaseBlockedResult } from './phase-runner.js'; + +// Agent usecases +export { + executeAgent, + generateReport, + executePart, + judgeStatus, + evaluateCondition, + decomposeTask, + type JudgeStatusResult, +} from './agent-usecases.js'; diff --git a/src/core/piece/instruction/StatusJudgmentBuilder.ts b/src/core/piece/instruction/StatusJudgmentBuilder.ts index 85a9354..9529050 100644 --- a/src/core/piece/instruction/StatusJudgmentBuilder.ts +++ b/src/core/piece/instruction/StatusJudgmentBuilder.ts @@ -27,6 +27,8 @@ export interface StatusJudgmentContext { lastResponse?: string; /** Input source type for fallback strategies */ inputSource?: 'report' | 'response'; + /** When true, omit tag output instructions (structured output schema handles format) */ + structuredOutput?: boolean; } /** @@ -64,12 +66,17 @@ export class StatusJudgmentBuilder { contentToJudge = this.buildFromResponse(); } + const isStructured = this.context.structuredOutput ?? false; + return loadTemplate('perform_phase3_message', language, { reportContent: contentToJudge, criteriaTable: components.criteriaTable, - outputList: components.outputList, - hasAppendix: components.hasAppendix, - appendixContent: components.appendixContent, + structuredOutput: isStructured, + ...(isStructured ? {} : { + outputList: components.outputList, + hasAppendix: components.hasAppendix, + appendixContent: components.appendixContent, + }), }); } diff --git a/src/core/piece/judgment/FallbackStrategy.ts b/src/core/piece/judgment/FallbackStrategy.ts deleted file mode 100644 index f3007c8..0000000 --- a/src/core/piece/judgment/FallbackStrategy.ts +++ /dev/null @@ -1,255 +0,0 @@ -/** - * Fallback strategies for Phase 3 judgment. - * - * Implements Chain of Responsibility pattern to try multiple judgment methods - * when conductor cannot determine the status from report alone. - */ - -import { readFileSync } from 'node:fs'; -import { resolve } from 'node:path'; -import type { PieceMovement, Language } from '../../models/types.js'; -import { runAgent } from '../../../agents/runner.js'; -import { StatusJudgmentBuilder } from '../instruction/StatusJudgmentBuilder.js'; -import { JudgmentDetector, type JudgmentResult } from './JudgmentDetector.js'; -import { hasOnlyOneBranch, getAutoSelectedTag, getReportFiles } from '../evaluation/rule-utils.js'; -import { createLogger } from '../../../shared/utils/index.js'; - -const log = createLogger('fallback-strategy'); - -export interface JudgmentContext { - step: PieceMovement; - cwd: string; - language?: Language; - reportDir?: string; - lastResponse?: string; // Phase 1の最終応答 - sessionId?: string; -} - -export interface JudgmentStrategy { - readonly name: string; - canApply(context: JudgmentContext): boolean; - execute(context: JudgmentContext): Promise; -} - -/** - * Base class for judgment strategies using Template Method Pattern. - */ -abstract class JudgmentStrategyBase implements JudgmentStrategy { - abstract readonly name: string; - - abstract canApply(context: JudgmentContext): boolean; - - async execute(context: JudgmentContext): Promise { - try { - // 1. 情報収集(サブクラスで実装) - const input = await this.gatherInput(context); - - // 2. 指示生成(サブクラスで実装) - const instruction = this.buildInstruction(input, context); - - // 3. conductor実行(共通) - const response = await this.runConductor(instruction, context); - - // 4. 結果検出(共通) - return JudgmentDetector.detect(response); - } catch (error) { - const errorMsg = error instanceof Error ? error.message : String(error); - log.debug(`Strategy ${this.name} threw error`, { error: errorMsg }); - return { - success: false, - reason: `Strategy failed with error: ${errorMsg}`, - }; - } - } - - protected abstract gatherInput(context: JudgmentContext): Promise; - - protected abstract buildInstruction(input: string, context: JudgmentContext): string; - - protected async runConductor(instruction: string, context: JudgmentContext): Promise { - const response = await runAgent('conductor', instruction, { - cwd: context.cwd, - maxTurns: 3, - permissionMode: 'readonly', - language: context.language, - }); - - if (response.status !== 'done') { - throw new Error(`Conductor failed: ${response.error || response.content || 'Unknown error'}`); - } - - return response.content; - } -} - -/** - * Strategy 1: Auto-select when there's only one branch. - * This strategy doesn't use conductor - just returns the single tag. - */ -export class AutoSelectStrategy implements JudgmentStrategy { - readonly name = 'AutoSelect'; - - canApply(context: JudgmentContext): boolean { - return hasOnlyOneBranch(context.step); - } - - async execute(context: JudgmentContext): Promise { - const tag = getAutoSelectedTag(context.step); - log.debug('Auto-selected tag (single branch)', { tag }); - return { - success: true, - tag, - }; - } -} - -/** - * Strategy 2: Report-based judgment. - * Read report files and ask conductor to judge. - */ -export class ReportBasedStrategy extends JudgmentStrategyBase { - readonly name = 'ReportBased'; - - canApply(context: JudgmentContext): boolean { - return context.reportDir !== undefined && getReportFiles(context.step.outputContracts).length > 0; - } - - protected async gatherInput(context: JudgmentContext): Promise { - if (!context.reportDir) { - throw new Error('Report directory not provided'); - } - - const reportFiles = getReportFiles(context.step.outputContracts); - if (reportFiles.length === 0) { - throw new Error('No report files configured'); - } - - const reportContents: string[] = []; - for (const fileName of reportFiles) { - const filePath = resolve(context.reportDir, fileName); - try { - const content = readFileSync(filePath, 'utf-8'); - reportContents.push(`# ${fileName}\n\n${content}`); - } catch (error) { - const errorMsg = error instanceof Error ? error.message : String(error); - throw new Error(`Failed to read report file ${fileName}: ${errorMsg}`); - } - } - - return reportContents.join('\n\n---\n\n'); - } - - protected buildInstruction(input: string, context: JudgmentContext): string { - return new StatusJudgmentBuilder(context.step, { - language: context.language, - reportContent: input, - inputSource: 'report', - }).build(); - } -} - -/** - * Strategy 3: Response-based judgment. - * Use the last response from Phase 1 to judge. - */ -export class ResponseBasedStrategy extends JudgmentStrategyBase { - readonly name = 'ResponseBased'; - - canApply(context: JudgmentContext): boolean { - return context.lastResponse !== undefined && context.lastResponse.length > 0; - } - - protected async gatherInput(context: JudgmentContext): Promise { - if (!context.lastResponse) { - throw new Error('Last response not provided'); - } - return context.lastResponse; - } - - protected buildInstruction(input: string, context: JudgmentContext): string { - return new StatusJudgmentBuilder(context.step, { - language: context.language, - lastResponse: input, - inputSource: 'response', - }).build(); - } -} - -/** - * Strategy 4: Agent consult. - * Resume the Phase 1 agent session and ask which tag is appropriate. - */ -export class AgentConsultStrategy implements JudgmentStrategy { - readonly name = 'AgentConsult'; - - canApply(context: JudgmentContext): boolean { - return context.sessionId !== undefined && context.sessionId.length > 0; - } - - async execute(context: JudgmentContext): Promise { - if (!context.sessionId) { - return { - success: false, - reason: 'Session ID not provided', - }; - } - - try { - const question = this.buildQuestion(context); - - const response = await runAgent(context.step.persona ?? context.step.name, question, { - cwd: context.cwd, - sessionId: context.sessionId, - maxTurns: 3, - language: context.language, - }); - - if (response.status !== 'done') { - return { - success: false, - reason: `Agent consultation failed: ${response.error || 'Unknown error'}`, - }; - } - - return JudgmentDetector.detect(response.content); - } catch (error) { - const errorMsg = error instanceof Error ? error.message : String(error); - log.debug('Agent consult strategy failed', { error: errorMsg }); - return { - success: false, - reason: `Agent consultation error: ${errorMsg}`, - }; - } - } - - private buildQuestion(context: JudgmentContext): string { - const rules = context.step.rules || []; - const ruleDescriptions = rules.map((rule, idx) => { - const tag = `[${context.step.name.toUpperCase()}:${idx + 1}]`; - const desc = rule.condition || `Rule ${idx + 1}`; - return `- ${tag}: ${desc}`; - }).join('\n'); - - const lang = context.language || 'en'; - - if (lang === 'ja') { - return `あなたの作業結果に基づいて、以下の判定タグのうちどれが適切か教えてください:\n\n${ruleDescriptions}\n\n該当するタグを1つだけ出力してください(例: [${context.step.name.toUpperCase()}:1])。`; - } else { - return `Based on your work, which of the following judgment tags is appropriate?\n\n${ruleDescriptions}\n\nPlease output only one tag (e.g., [${context.step.name.toUpperCase()}:1]).`; - } - } -} - -/** - * Factory for creating judgment strategies in order of priority. - */ -export class JudgmentStrategyFactory { - static createStrategies(): JudgmentStrategy[] { - return [ - new AutoSelectStrategy(), - new ReportBasedStrategy(), - new ResponseBasedStrategy(), - new AgentConsultStrategy(), - ]; - } -} diff --git a/src/core/piece/judgment/JudgmentDetector.ts b/src/core/piece/judgment/JudgmentDetector.ts deleted file mode 100644 index a00a5da..0000000 --- a/src/core/piece/judgment/JudgmentDetector.ts +++ /dev/null @@ -1,45 +0,0 @@ -/** - * Detect judgment result from conductor's response. - */ -export interface JudgmentResult { - success: boolean; - tag?: string; // e.g., "[ARCH-REVIEW:1]" - reason?: string; -} - -export class JudgmentDetector { - private static readonly TAG_PATTERN = /\[([A-Z_-]+):(\d+)\]/; - private static readonly CANNOT_JUDGE_PATTERNS = [ - /判断できない/i, - /cannot\s+determine/i, - /unable\s+to\s+judge/i, - /insufficient\s+information/i, - ]; - - static detect(response: string): JudgmentResult { - // 1. タグ検出 - const tagMatch = response.match(this.TAG_PATTERN); - if (tagMatch) { - return { - success: true, - tag: tagMatch[0], // e.g., "[ARCH-REVIEW:1]" - }; - } - - // 2. 「判断できない」検出 - for (const pattern of this.CANNOT_JUDGE_PATTERNS) { - if (pattern.test(response)) { - return { - success: false, - reason: 'Conductor explicitly stated it cannot judge', - }; - } - } - - // 3. タグも「判断できない」もない → 失敗 - return { - success: false, - reason: 'No tag found and no explicit "cannot judge" statement', - }; - } -} diff --git a/src/core/piece/judgment/index.ts b/src/core/piece/judgment/index.ts deleted file mode 100644 index 58f3cae..0000000 --- a/src/core/piece/judgment/index.ts +++ /dev/null @@ -1,18 +0,0 @@ -/** - * Judgment module exports - */ - -export { - JudgmentDetector, - type JudgmentResult, -} from './JudgmentDetector.js'; - -export { - AutoSelectStrategy, - ReportBasedStrategy, - ResponseBasedStrategy, - AgentConsultStrategy, - JudgmentStrategyFactory, - type JudgmentContext, - type JudgmentStrategy, -} from './FallbackStrategy.js'; diff --git a/src/core/piece/part-definition-validator.ts b/src/core/piece/part-definition-validator.ts new file mode 100644 index 0000000..bdd2939 --- /dev/null +++ b/src/core/piece/part-definition-validator.ts @@ -0,0 +1,41 @@ +import type { PartDefinition } from '../models/part.js'; + +function assertNonEmptyString(value: unknown, fieldName: string, index: number): string { + if (typeof value !== 'string' || value.trim().length === 0) { + throw new Error(`Part[${index}] "${fieldName}" must be a non-empty string`); + } + return value; +} + +export function parsePartDefinitionEntry(entry: unknown, index: number): PartDefinition { + if (typeof entry !== 'object' || entry == null || Array.isArray(entry)) { + throw new Error(`Part[${index}] must be an object`); + } + + const raw = entry as Record; + const id = assertNonEmptyString(raw.id, 'id', index); + const title = assertNonEmptyString(raw.title, 'title', index); + const instruction = assertNonEmptyString(raw.instruction, 'instruction', index); + + const timeoutMs = raw.timeout_ms; + if (timeoutMs != null && (typeof timeoutMs !== 'number' || !Number.isInteger(timeoutMs) || timeoutMs <= 0)) { + throw new Error(`Part[${index}] "timeout_ms" must be a positive integer`); + } + + return { + id, + title, + instruction, + timeoutMs: timeoutMs as number | undefined, + }; +} + +export function ensureUniquePartIds(parts: PartDefinition[]): void { + const ids = new Set(); + for (const part of parts) { + if (ids.has(part.id)) { + throw new Error(`Duplicate part id: ${part.id}`); + } + ids.add(part.id); + } +} diff --git a/src/core/piece/phase-runner.ts b/src/core/piece/phase-runner.ts index 0b61b93..0feaada 100644 --- a/src/core/piece/phase-runner.ts +++ b/src/core/piece/phase-runner.ts @@ -9,12 +9,13 @@ import { existsSync, mkdirSync, readFileSync, writeFileSync } from 'node:fs'; import { dirname, parse, resolve, sep } from 'node:path'; import type { PieceMovement, Language, AgentResponse } from '../models/types.js'; import type { PhaseName } from './types.js'; -import { runAgent, type RunAgentOptions } from '../../agents/runner.js'; +import type { RunAgentOptions } from '../../agents/runner.js'; import { ReportInstructionBuilder } from './instruction/ReportInstructionBuilder.js'; import { hasTagBasedRules, getReportFiles } from './evaluation/rule-utils.js'; -import { JudgmentStrategyFactory, type JudgmentContext } from './judgment/index.js'; +import { executeAgent } from './agent-usecases.js'; import { createLogger } from '../../shared/utils/index.js'; import { buildSessionKey } from './session-key.js'; +export { runStatusJudgmentPhase, type StatusJudgmentPhaseResult } from './status-judgment-phase.js'; const log = createLogger('phase-runner'); @@ -212,7 +213,7 @@ async function runSingleReportAttempt( let response: AgentResponse; try { - response = await runAgent(step.persona, instruction, options); + response = await executeAgent(step.persona, instruction, options); } catch (error) { const errorMsg = error instanceof Error ? error.message : String(error); ctx.onPhaseComplete?.(step, 2, 'report', '', 'error', errorMsg); @@ -240,55 +241,3 @@ async function runSingleReportAttempt( ctx.onPhaseComplete?.(step, 2, 'report', response.content, response.status); return { kind: 'success', content: trimmedContent, response }; } - -/** - * Phase 3: Status judgment. - * Uses the 'conductor' agent in a new session to output a status tag. - * Implements multi-stage fallback logic to ensure judgment succeeds. - * Returns the Phase 3 response content (containing the status tag). - */ -export async function runStatusJudgmentPhase( - step: PieceMovement, - ctx: PhaseRunnerContext, -): Promise { - log.debug('Running status judgment phase', { movement: step.name }); - - const strategies = JudgmentStrategyFactory.createStrategies(); - const sessionKey = buildSessionKey(step); - const judgmentContext: JudgmentContext = { - step, - cwd: ctx.cwd, - language: ctx.language, - reportDir: ctx.reportDir, - lastResponse: ctx.lastResponse, - sessionId: ctx.getSessionId(sessionKey), - }; - - for (const strategy of strategies) { - if (!strategy.canApply(judgmentContext)) { - log.debug(`Strategy ${strategy.name} not applicable, skipping`); - continue; - } - - log.debug(`Trying strategy: ${strategy.name}`); - ctx.onPhaseStart?.(step, 3, 'judge', `Strategy: ${strategy.name}`); - - try { - const result = await strategy.execute(judgmentContext); - if (result.success) { - log.debug(`Strategy ${strategy.name} succeeded`, { tag: result.tag }); - ctx.onPhaseComplete?.(step, 3, 'judge', result.tag!, 'done'); - return result.tag!; - } - - log.debug(`Strategy ${strategy.name} failed`, { reason: result.reason }); - } catch (error) { - const errorMsg = error instanceof Error ? error.message : String(error); - log.debug(`Strategy ${strategy.name} threw error`, { error: errorMsg }); - } - } - - const errorMsg = 'All judgment strategies failed'; - ctx.onPhaseComplete?.(step, 3, 'judge', '', 'error', errorMsg); - throw new Error(errorMsg); -} diff --git a/src/core/piece/schema-loader.ts b/src/core/piece/schema-loader.ts new file mode 100644 index 0000000..d4067aa --- /dev/null +++ b/src/core/piece/schema-loader.ts @@ -0,0 +1,50 @@ +import { readFileSync } from 'node:fs'; +import { join } from 'node:path'; +import { getResourcesDir } from '../../infra/resources/index.js'; + +type JsonSchema = Record; + +const schemaCache = new Map(); + +function loadSchema(name: string): JsonSchema { + const cached = schemaCache.get(name); + if (cached) { + return cached; + } + const schemaPath = join(getResourcesDir(), 'schemas', name); + const content = readFileSync(schemaPath, 'utf-8'); + const parsed = JSON.parse(content) as JsonSchema; + schemaCache.set(name, parsed); + return parsed; +} + +function cloneSchema(schema: JsonSchema): JsonSchema { + return JSON.parse(JSON.stringify(schema)) as JsonSchema; +} + +export function loadJudgmentSchema(): JsonSchema { + return loadSchema('judgment.json'); +} + +export function loadEvaluationSchema(): JsonSchema { + return loadSchema('evaluation.json'); +} + +export function loadDecompositionSchema(maxParts: number): JsonSchema { + if (!Number.isInteger(maxParts) || maxParts <= 0) { + throw new Error(`maxParts must be a positive integer: ${maxParts}`); + } + + const schema = cloneSchema(loadSchema('decomposition.json')); + const properties = schema.properties; + if (!properties || typeof properties !== 'object' || Array.isArray(properties)) { + throw new Error('decomposition schema is invalid: properties is missing'); + } + const rawParts = (properties as Record).parts; + if (!rawParts || typeof rawParts !== 'object' || Array.isArray(rawParts)) { + throw new Error('decomposition schema is invalid: parts is missing'); + } + + (rawParts as Record).maxItems = maxParts; + return schema; +} diff --git a/src/core/piece/status-judgment-phase.ts b/src/core/piece/status-judgment-phase.ts new file mode 100644 index 0000000..3c5d899 --- /dev/null +++ b/src/core/piece/status-judgment-phase.ts @@ -0,0 +1,101 @@ +import { existsSync, readFileSync } from 'node:fs'; +import { resolve } from 'node:path'; +import type { PieceMovement, RuleMatchMethod } from '../models/types.js'; +import { judgeStatus } from './agent-usecases.js'; +import { StatusJudgmentBuilder, type StatusJudgmentContext } from './instruction/StatusJudgmentBuilder.js'; +import { getReportFiles } from './evaluation/rule-utils.js'; +import { createLogger } from '../../shared/utils/index.js'; +import type { PhaseRunnerContext } from './phase-runner.js'; + +const log = createLogger('phase-runner'); + +/** Result of Phase 3 status judgment, including the detection method. */ +export interface StatusJudgmentPhaseResult { + tag: string; + ruleIndex: number; + method: RuleMatchMethod; +} + +/** + * Build the base context (shared by structured output and tag instructions). + */ +function buildBaseContext( + step: PieceMovement, + ctx: PhaseRunnerContext, +): Omit | undefined { + const reportFiles = getReportFiles(step.outputContracts); + + if (reportFiles.length > 0) { + const reports: string[] = []; + for (const fileName of reportFiles) { + const filePath = resolve(ctx.reportDir, fileName); + if (!existsSync(filePath)) continue; + const content = readFileSync(filePath, 'utf-8'); + reports.push(`# ${fileName}\n\n${content}`); + } + if (reports.length > 0) { + return { + language: ctx.language, + reportContent: reports.join('\n\n---\n\n'), + inputSource: 'report', + }; + } + } + + if (!ctx.lastResponse) return undefined; + + return { + language: ctx.language, + lastResponse: ctx.lastResponse, + inputSource: 'response', + }; +} + +/** + * Phase 3: Status judgment. + * + * Builds two instructions from the same context: + * - Structured output instruction (JSON schema) + * - Tag instruction (free-form tag detection) + * + * `judgeStatus()` tries them in order: structured → tag → ai_judge. + */ +export async function runStatusJudgmentPhase( + step: PieceMovement, + ctx: PhaseRunnerContext, +): Promise { + log.debug('Running status judgment phase', { movement: step.name }); + if (!step.rules || step.rules.length === 0) { + throw new Error(`Status judgment requires rules for movement "${step.name}"`); + } + + const baseContext = buildBaseContext(step, ctx); + if (!baseContext) { + throw new Error(`Status judgment requires report or lastResponse for movement "${step.name}"`); + } + + const structuredInstruction = new StatusJudgmentBuilder(step, { + ...baseContext, + structuredOutput: true, + }).build(); + + const tagInstruction = new StatusJudgmentBuilder(step, { + ...baseContext, + }).build(); + + ctx.onPhaseStart?.(step, 3, 'judge', structuredInstruction); + try { + const result = await judgeStatus(structuredInstruction, tagInstruction, step.rules, { + cwd: ctx.cwd, + movementName: step.name, + language: ctx.language, + }); + const tag = `[${step.name.toUpperCase()}:${result.ruleIndex + 1}]`; + ctx.onPhaseComplete?.(step, 3, 'judge', tag, 'done'); + return { tag, ruleIndex: result.ruleIndex, method: result.method }; + } catch (error) { + const errorMsg = error instanceof Error ? error.message : String(error); + ctx.onPhaseComplete?.(step, 3, 'judge', '', 'error', errorMsg); + throw error; + } +} diff --git a/src/features/tasks/execute/parallelExecution.ts b/src/features/tasks/execute/parallelExecution.ts index ff46711..39a67fd 100644 --- a/src/features/tasks/execute/parallelExecution.ts +++ b/src/features/tasks/execute/parallelExecution.ts @@ -123,7 +123,11 @@ export async function runWithWorkerPool( selfSigintInjected = true; process.emit('SIGINT'); if (selfSigintTwice) { - process.emit('SIGINT'); + // E2E deterministic path: force-exit shortly after graceful SIGINT. + // Avoids intermittent hangs caused by listener ordering/races. + setTimeout(() => { + process.exit(EXIT_SIGINT); + }, 25); } } } diff --git a/src/features/tasks/execute/pieceExecution.ts b/src/features/tasks/execute/pieceExecution.ts index 0135225..215ad50 100644 --- a/src/features/tasks/execute/pieceExecution.ts +++ b/src/features/tasks/execute/pieceExecution.ts @@ -6,7 +6,8 @@ import { readFileSync } from 'node:fs'; import { PieceEngine, type IterationLimitRequest, type UserInputRequest } from '../../../core/piece/index.js'; import type { PieceConfig } from '../../../core/models/index.js'; import type { PieceExecutionResult, PieceExecutionOptions } from './types.js'; -import { detectRuleIndex, interruptAllQueries } from '../../../infra/claude/index.js'; +import { detectRuleIndex } from '../../../shared/utils/ruleIndex.js'; +import { interruptAllQueries } from '../../../infra/claude/query-manager.js'; import { callAiJudge } from '../../../agents/ai-judge.js'; export type { PieceExecutionResult, PieceExecutionOptions }; @@ -113,6 +114,16 @@ function assertTaskPrefixPair( } } +function toJudgmentMatchMethod( + matchedRuleMethod: string | undefined, +): string | undefined { + if (!matchedRuleMethod) return undefined; + if (matchedRuleMethod === 'structured_output') return 'structured_output'; + if (matchedRuleMethod === 'ai_judge' || matchedRuleMethod === 'ai_judge_fallback') return 'ai_judge'; + if (matchedRuleMethod === 'phase3_tag' || matchedRuleMethod === 'phase1_tag') return 'tag_fallback'; + return undefined; +} + function createOutputFns(prefixWriter: TaskPrefixWriter | undefined): OutputFns { if (!prefixWriter) { return { @@ -587,6 +598,7 @@ export async function executePiece( } // Write step_complete record to NDJSON log + const matchMethod = toJudgmentMatchMethod(response.matchedRuleMethod); const record: NdjsonStepComplete = { type: 'step_complete', step: step.name, @@ -596,6 +608,7 @@ export async function executePiece( instruction, ...(response.matchedRuleIndex != null ? { matchedRuleIndex: response.matchedRuleIndex } : {}), ...(response.matchedRuleMethod ? { matchedRuleMethod: response.matchedRuleMethod } : {}), + ...(matchMethod ? { matchMethod } : {}), ...(response.error ? { error: response.error } : {}), timestamp: response.timestamp.toISOString(), }; diff --git a/src/index.ts b/src/index.ts index 623af4d..bd9541d 100644 --- a/src/index.ts +++ b/src/index.ts @@ -5,12 +5,30 @@ */ // Models -export * from './core/models/index.js'; +export type { + Status, + PieceRule, + PieceMovement, + PieceConfig, + PieceState, + Language, + PartDefinition, + PartResult, +} from './core/models/types.js'; -// Configuration (PermissionMode excluded to avoid name conflict with core/models PermissionMode) -export * from './infra/config/paths.js'; -export * from './infra/config/loaders/index.js'; -export * from './infra/config/global/index.js'; +// Configuration +export { + loadPiece, + loadPieceByIdentifier, + listPieces, + listPieceEntries, + loadAllPieces, + loadAllPiecesWithSources, + getPieceDescription, + getBuiltinPiece, + isPiecePath, +} from './infra/config/loaders/index.js'; +export type { PieceSource, PieceWithSource, PieceDirEntry } from './infra/config/loaders/index.js'; export { loadProjectConfig, saveProjectConfig, @@ -19,108 +37,18 @@ export { setCurrentPiece, isVerboseMode, type ProjectLocalConfig, - writeFileAtomic, - getInputHistoryPath, - MAX_INPUT_HISTORY, - loadInputHistory, - saveInputHistory, - addToInputHistory, - type PersonaSessionData, - getPersonaSessionsPath, - loadPersonaSessions, - savePersonaSessions, - updatePersonaSession, - clearPersonaSessions, - getWorktreeSessionsDir, - encodeWorktreePath, - getWorktreeSessionPath, - loadWorktreeSessions, - updateWorktreeSession, - getClaudeProjectSessionsDir, - clearClaudeProjectSessions, } from './infra/config/project/index.js'; -// Claude integration -export { - ClaudeClient, - ClaudeProcess, - QueryExecutor, - QueryRegistry, - executeClaudeCli, - executeClaudeQuery, - generateQueryId, - hasActiveProcess, - isQueryActive, - getActiveQueryCount, - registerQuery, - unregisterQuery, - interruptQuery, - interruptAllQueries, - interruptCurrentProcess, - sdkMessageToStreamEvent, - createCanUseToolCallback, - createAskUserQuestionHooks, - buildSdkOptions, - callClaude, - callClaudeCustom, - callClaudeAgent, - callClaudeSkill, - detectRuleIndex, - isRegexSafe, -} from './infra/claude/index.js'; -export type { - StreamEvent, - StreamCallback, - PermissionRequest, - PermissionHandler, - AskUserQuestionInput, - AskUserQuestionHandler, - ClaudeResult, - ClaudeResultWithQueryId, - ClaudeCallOptions, - ClaudeSpawnOptions, - InitEventData, - ToolUseEventData, - ToolResultEventData, - ToolOutputEventData, - TextEventData, - ThinkingEventData, - ResultEventData, - ErrorEventData, -} from './infra/claude/index.js'; - -// Codex integration -export * from './infra/codex/index.js'; - -// Agent execution -export * from './agents/index.js'; - // Piece engine export { PieceEngine, - COMPLETE_MOVEMENT, - ABORT_MOVEMENT, - ERROR_MESSAGES, - determineNextMovementByRules, - extractBlockedPrompt, - LoopDetector, - createInitialState, - addUserInput, - getPreviousOutput, - handleBlocked, - ParallelLogger, - InstructionBuilder, isOutputContractItem, - ReportInstructionBuilder, - StatusJudgmentBuilder, - buildEditRule, - RuleEvaluator, - detectMatchedRule, - evaluateAggregateConditions, - AggregateEvaluator, - needsStatusJudgmentPhase, - runReportPhase, - runStatusJudgmentPhase, + executeAgent, + generateReport, + executePart, + judgeStatus, + evaluateCondition, + decomposeTask, } from './core/piece/index.js'; export type { PieceEvents, @@ -129,24 +57,6 @@ export type { SessionUpdateCallback, IterationLimitCallback, PieceEngineOptions, - LoopCheckResult, ProviderType, - RuleMatch, - RuleEvaluatorContext, - ReportInstructionContext, - StatusJudgmentContext, - InstructionContext, - StatusRulesComponents, - BlockedHandlerResult, + JudgeStatusResult, } from './core/piece/index.js'; - -// Utilities -export * from './shared/utils/index.js'; -export * from './shared/ui/index.js'; -export * from './shared/prompt/index.js'; -export * from './shared/constants.js'; -export * from './shared/context.js'; -export * from './shared/exitCodes.js'; - -// Resources (embedded prompts and templates) -export * from './infra/resources/index.js'; diff --git a/src/infra/claude/client.ts b/src/infra/claude/client.ts index 497e29f..cb568e5 100644 --- a/src/infra/claude/client.ts +++ b/src/infra/claude/client.ts @@ -11,7 +11,6 @@ import { createLogger } from '../../shared/utils/index.js'; import { loadTemplate } from '../../shared/prompts/index.js'; export type { ClaudeCallOptions } from './types.js'; -export { detectRuleIndex, isRegexSafe } from './utils.js'; const log = createLogger('client'); @@ -52,6 +51,7 @@ export class ClaudeClient { onAskUserQuestion: options.onAskUserQuestion, bypassPermissions: options.bypassPermissions, anthropicApiKey: options.anthropicApiKey, + outputSchema: options.outputSchema, }; } @@ -76,6 +76,7 @@ export class ClaudeClient { timestamp: new Date(), sessionId: result.sessionId, error: result.error, + structuredOutput: result.structuredOutput, }; } @@ -104,6 +105,7 @@ export class ClaudeClient { timestamp: new Date(), sessionId: result.sessionId, error: result.error, + structuredOutput: result.structuredOutput, }; } @@ -153,6 +155,7 @@ export class ClaudeClient { timestamp: new Date(), sessionId: result.sessionId, error: result.error, + structuredOutput: result.structuredOutput, }; } diff --git a/src/infra/claude/executor.ts b/src/infra/claude/executor.ts index 23f8269..0365dad 100644 --- a/src/infra/claude/executor.ts +++ b/src/infra/claude/executor.ts @@ -94,6 +94,7 @@ export class QueryExecutor { let resultContent: string | undefined; let hasResultMessage = false; let accumulatedAssistantText = ''; + let structuredOutput: Record | undefined; let onExternalAbort: (() => void) | undefined; try { @@ -139,6 +140,17 @@ export class QueryExecutor { const resultMsg = message as SDKResultMessage; if (resultMsg.subtype === 'success') { resultContent = resultMsg.result; + const rawStructuredOutput = (resultMsg as unknown as { + structured_output?: unknown; + structuredOutput?: unknown; + }).structured_output ?? (resultMsg as unknown as { structuredOutput?: unknown }).structuredOutput; + if ( + rawStructuredOutput + && typeof rawStructuredOutput === 'object' + && !Array.isArray(rawStructuredOutput) + ) { + structuredOutput = rawStructuredOutput as Record; + } success = true; } else { success = false; @@ -169,6 +181,7 @@ export class QueryExecutor { content: finalContent.trim(), sessionId, fullContent: accumulatedAssistantText.trim(), + structuredOutput, }; } catch (error) { if (onExternalAbort && options.abortSignal) { diff --git a/src/infra/claude/index.ts b/src/infra/claude/index.ts index 5e2cb5c..7fb5387 100644 --- a/src/infra/claude/index.ts +++ b/src/infra/claude/index.ts @@ -61,13 +61,5 @@ export { buildSdkOptions, } from './options-builder.js'; -// Client functions -export { - callClaude, - callClaudeCustom, - callClaudeAgent, - callClaudeSkill, - detectRuleIndex, - isRegexSafe, -} from './client.js'; + diff --git a/src/infra/claude/options-builder.ts b/src/infra/claude/options-builder.ts index 86e0871..dac37ab 100644 --- a/src/infra/claude/options-builder.ts +++ b/src/infra/claude/options-builder.ts @@ -65,6 +65,12 @@ export class SdkOptionsBuilder { if (this.options.agents) sdkOptions.agents = this.options.agents; if (this.options.mcpServers) sdkOptions.mcpServers = this.options.mcpServers; if (this.options.systemPrompt) sdkOptions.systemPrompt = this.options.systemPrompt; + if (this.options.outputSchema) { + (sdkOptions as Record).outputFormat = { + type: 'json_schema', + schema: this.options.outputSchema, + }; + } if (canUseTool) sdkOptions.canUseTool = canUseTool; if (hooks) sdkOptions.hooks = hooks; diff --git a/src/infra/claude/types.ts b/src/infra/claude/types.ts index aa4d080..1c19741 100644 --- a/src/infra/claude/types.ts +++ b/src/infra/claude/types.ts @@ -109,6 +109,8 @@ export interface ClaudeResult { interrupted?: boolean; /** All assistant text accumulated during execution (for status detection) */ fullContent?: string; + /** Structured output returned by Claude SDK */ + structuredOutput?: Record; } /** Extended result with query ID for concurrent execution */ @@ -141,6 +143,8 @@ export interface ClaudeCallOptions { bypassPermissions?: boolean; /** Anthropic API key to inject via env (bypasses CLI auth) */ anthropicApiKey?: string; + /** JSON Schema for structured output */ + outputSchema?: Record; } /** Options for spawning a Claude SDK query (low-level, used by executor/process) */ @@ -168,6 +172,8 @@ export interface ClaudeSpawnOptions { bypassPermissions?: boolean; /** Anthropic API key to inject via env (bypasses CLI auth) */ anthropicApiKey?: string; + /** JSON Schema for structured output */ + outputSchema?: Record; /** Callback for stderr output from the Claude Code process */ onStderr?: (data: string) => void; } diff --git a/src/infra/claude/utils.ts b/src/infra/claude/utils.ts index 7255a24..1810bb0 100644 --- a/src/infra/claude/utils.ts +++ b/src/infra/claude/utils.ts @@ -1,27 +1,7 @@ /** * Utility functions for Claude client operations. - * - * Stateless helpers for rule detection and regex safety validation. */ -/** - * Detect rule index from numbered tag pattern [STEP_NAME:N]. - * Returns 0-based rule index, or -1 if no match. - * - * Example: detectRuleIndex("... [PLAN:2] ...", "plan") → 1 - */ -export function detectRuleIndex(content: string, movementName: string): number { - const tag = movementName.toUpperCase(); - const regex = new RegExp(`\\[${tag}:(\\d+)\\]`, 'gi'); - const matches = [...content.matchAll(regex)]; - const match = matches.at(-1); - if (match?.[1]) { - const index = Number.parseInt(match[1], 10) - 1; - return index >= 0 ? index : -1; - } - return -1; -} - /** Validate regex pattern for ReDoS safety */ export function isRegexSafe(pattern: string): boolean { if (pattern.length > 200) { diff --git a/src/infra/codex/client.ts b/src/infra/codex/client.ts index 1fb5da4..0a0e045 100644 --- a/src/infra/codex/client.ts +++ b/src/infra/codex/client.ts @@ -4,9 +4,9 @@ * Uses @openai/codex-sdk for native TypeScript integration. */ -import { Codex } from '@openai/codex-sdk'; +import { Codex, type TurnOptions } from '@openai/codex-sdk'; import type { AgentResponse } from '../../core/models/index.js'; -import { createLogger, getErrorMessage, createStreamDiagnostics, type StreamDiagnostics } from '../../shared/utils/index.js'; +import { createLogger, getErrorMessage, createStreamDiagnostics, parseStructuredOutput, type StreamDiagnostics } from '../../shared/utils/index.js'; import { mapToCodexSandboxMode, type CodexCallOptions } from './types.js'; import { type CodexEvent, @@ -150,9 +150,11 @@ export class CodexClient { const diag = createStreamDiagnostics('codex-sdk', { agentType, model: options.model, attempt }); diagRef = diag; - const { events } = await thread.runStreamed(fullPrompt, { + const turnOptions: TurnOptions = { signal: streamAbortController.signal, - }); + ...(options.outputSchema ? { outputSchema: options.outputSchema } : {}), + }; + const { events } = await thread.runStreamed(fullPrompt, turnOptions); resetIdleTimeout(); diag.onConnected(); @@ -270,6 +272,7 @@ export class CodexClient { } const trimmed = content.trim(); + const structuredOutput = parseStructuredOutput(trimmed, !!options.outputSchema); emitResult(options.onStream, true, trimmed, currentThreadId); return { @@ -278,6 +281,7 @@ export class CodexClient { content: trimmed, timestamp: new Date(), sessionId: currentThreadId, + structuredOutput, }; } catch (error) { const message = getErrorMessage(error); diff --git a/src/infra/codex/types.ts b/src/infra/codex/types.ts index 1834167..097eed6 100644 --- a/src/infra/codex/types.ts +++ b/src/infra/codex/types.ts @@ -31,4 +31,6 @@ export interface CodexCallOptions { onStream?: StreamCallback; /** OpenAI API key (bypasses CLI auth) */ openaiApiKey?: string; + /** JSON Schema for structured output */ + outputSchema?: Record; } diff --git a/src/infra/fs/session.ts b/src/infra/fs/session.ts index 2e4660e..34ea8a7 100644 --- a/src/infra/fs/session.ts +++ b/src/infra/fs/session.ts @@ -113,6 +113,7 @@ export class SessionManager { ...(record.error ? { error: record.error } : {}), ...(record.matchedRuleIndex != null ? { matchedRuleIndex: record.matchedRuleIndex } : {}), ...(record.matchedRuleMethod ? { matchedRuleMethod: record.matchedRuleMethod } : {}), + ...(record.matchMethod ? { matchMethod: record.matchMethod } : {}), }); sessionLog.iterations++; } diff --git a/src/infra/mock/client.ts b/src/infra/mock/client.ts index 4fb0d4f..9912032 100644 --- a/src/infra/mock/client.ts +++ b/src/infra/mock/client.ts @@ -65,6 +65,7 @@ export async function callMock( content, timestamp: new Date(), sessionId, + structuredOutput: options.structuredOutput, }; } diff --git a/src/infra/mock/types.ts b/src/infra/mock/types.ts index f55b2bb..1c3d0c8 100644 --- a/src/infra/mock/types.ts +++ b/src/infra/mock/types.ts @@ -13,6 +13,8 @@ export interface MockCallOptions { mockResponse?: string; /** Fixed status to return (optional, defaults to 'done') */ mockStatus?: 'done' | 'blocked' | 'error' | 'approved' | 'rejected' | 'improve'; + /** Structured output payload returned as-is */ + structuredOutput?: Record; } /** A single entry in a mock scenario */ diff --git a/src/infra/opencode/client.ts b/src/infra/opencode/client.ts index 18c70b6..0b04999 100644 --- a/src/infra/opencode/client.ts +++ b/src/infra/opencode/client.ts @@ -8,7 +8,7 @@ import { createOpencode } from '@opencode-ai/sdk/v2'; import { createServer } from 'node:net'; import type { AgentResponse } from '../../core/models/index.js'; -import { createLogger, getErrorMessage, createStreamDiagnostics, type StreamDiagnostics } from '../../shared/utils/index.js'; +import { createLogger, getErrorMessage, createStreamDiagnostics, parseStructuredOutput, type StreamDiagnostics } from '../../shared/utils/index.js'; import { parseProviderModel } from '../../shared/utils/providerModel.js'; import { buildOpenCodePermissionConfig, @@ -236,16 +236,34 @@ export class OpenCodeClient { }); } + /** Build a prompt suffix that instructs the agent to return JSON matching the schema */ + private buildStructuredOutputSuffix(schema: Record): string { + return [ + '', + '---', + 'IMPORTANT: You MUST respond with ONLY a valid JSON object matching this schema. No other text, no markdown code blocks, no explanation.', + '```', + JSON.stringify(schema, null, 2), + '```', + ].join('\n'); + } + /** Call OpenCode with an agent prompt */ async call( agentType: string, prompt: string, options: OpenCodeCallOptions, ): Promise { - const fullPrompt = options.systemPrompt + const basePrompt = options.systemPrompt ? `${options.systemPrompt}\n\n${prompt}` : prompt; + // OpenCode SDK does not natively support structured output via outputFormat. + // Inject JSON output instructions into the prompt to make the agent return JSON. + const fullPrompt = options.outputSchema + ? `${basePrompt}${this.buildStructuredOutputSuffix(options.outputSchema)}` + : basePrompt; + for (let attempt = 1; attempt <= OPENCODE_RETRY_MAX_ATTEMPTS; attempt++) { let idleTimeoutId: ReturnType | undefined; const streamAbortController = new AbortController(); @@ -329,16 +347,25 @@ export class OpenCodeClient { diag.onConnected(); const tools = mapToOpenCodeTools(options.allowedTools); - await client.session.promptAsync( - { - sessionID: sessionId, - directory: options.cwd, - model: parsedModel, - ...(tools ? { tools } : {}), - parts: [{ type: 'text' as const, text: fullPrompt }], - }, - { signal: streamAbortController.signal }, - ); + const promptPayload: Record = { + sessionID: sessionId, + directory: options.cwd, + model: parsedModel, + ...(tools ? { tools } : {}), + parts: [{ type: 'text' as const, text: fullPrompt }], + }; + if (options.outputSchema) { + promptPayload.outputFormat = { + type: 'json_schema', + schema: options.outputSchema, + }; + } + + // OpenCode SDK types do not yet expose outputFormat even though runtime accepts it. + const promptPayloadForSdk = promptPayload as unknown as Parameters[0]; + await client.session.promptAsync(promptPayloadForSdk, { + signal: streamAbortController.signal, + }); emitInit(options.onStream, options.model, sessionId); @@ -571,6 +598,7 @@ export class OpenCodeClient { } const trimmed = content.trim(); + const structuredOutput = parseStructuredOutput(trimmed, !!options.outputSchema); emitResult(options.onStream, true, trimmed, sessionId); return { @@ -579,6 +607,7 @@ export class OpenCodeClient { content: trimmed, timestamp: new Date(), sessionId, + structuredOutput, }; } catch (error) { const message = getErrorMessage(error); diff --git a/src/infra/opencode/types.ts b/src/infra/opencode/types.ts index fb5fa5a..d981247 100644 --- a/src/infra/opencode/types.ts +++ b/src/infra/opencode/types.ts @@ -170,4 +170,6 @@ export interface OpenCodeCallOptions { onAskUserQuestion?: AskUserQuestionHandler; /** OpenCode API key */ opencodeApiKey?: string; + /** JSON Schema for structured output */ + outputSchema?: Record; } diff --git a/src/infra/providers/claude.ts b/src/infra/providers/claude.ts index c59fe8f..a47702f 100644 --- a/src/infra/providers/claude.ts +++ b/src/infra/providers/claude.ts @@ -2,7 +2,8 @@ * Claude provider implementation */ -import { callClaude, callClaudeCustom, callClaudeAgent, callClaudeSkill, type ClaudeCallOptions } from '../claude/index.js'; +import { callClaude, callClaudeCustom, callClaudeAgent, callClaudeSkill } from '../claude/client.js'; +import type { ClaudeCallOptions } from '../claude/types.js'; import { resolveAnthropicApiKey } from '../config/index.js'; import type { AgentResponse } from '../../core/models/index.js'; import type { AgentSetup, Provider, ProviderAgent, ProviderCallOptions } from './types.js'; @@ -22,6 +23,7 @@ function toClaudeOptions(options: ProviderCallOptions): ClaudeCallOptions { onAskUserQuestion: options.onAskUserQuestion, bypassPermissions: options.bypassPermissions, anthropicApiKey: options.anthropicApiKey ?? resolveAnthropicApiKey(), + outputSchema: options.outputSchema, }; } diff --git a/src/infra/providers/codex.ts b/src/infra/providers/codex.ts index 88c67d2..47a4de8 100644 --- a/src/infra/providers/codex.ts +++ b/src/infra/providers/codex.ts @@ -33,6 +33,7 @@ function toCodexOptions(options: ProviderCallOptions): CodexCallOptions { permissionMode: options.permissionMode, onStream: options.onStream, openaiApiKey: options.openaiApiKey ?? resolveOpenaiApiKey(), + outputSchema: options.outputSchema, }; } diff --git a/src/infra/providers/opencode.ts b/src/infra/providers/opencode.ts index 19e9798..3243158 100644 --- a/src/infra/providers/opencode.ts +++ b/src/infra/providers/opencode.ts @@ -22,6 +22,7 @@ function toOpenCodeOptions(options: ProviderCallOptions): OpenCodeCallOptions { onStream: options.onStream, onAskUserQuestion: options.onAskUserQuestion, opencodeApiKey: options.opencodeApiKey ?? resolveOpencodeApiKey(), + outputSchema: options.outputSchema, }; } diff --git a/src/infra/providers/types.ts b/src/infra/providers/types.ts index d2bc48d..e9214b8 100644 --- a/src/infra/providers/types.ts +++ b/src/infra/providers/types.ts @@ -40,6 +40,8 @@ export interface ProviderCallOptions { openaiApiKey?: string; /** OpenCode API key for OpenCode provider */ opencodeApiKey?: string; + /** JSON Schema for structured output */ + outputSchema?: Record; } /** A configured agent ready to be called */ diff --git a/src/shared/prompts/en/perform_phase3_message.md b/src/shared/prompts/en/perform_phase3_message.md index a3aa41b..80e5839 100644 --- a/src/shared/prompts/en/perform_phase3_message.md +++ b/src/shared/prompts/en/perform_phase3_message.md @@ -1,10 +1,14 @@ +{{#if structuredOutput}} +**Review is already complete. Evaluate the report below and determine which numbered rule (1-based) best matches the result.** +{{else}} **Review is already complete. Output exactly one tag corresponding to the judgment result shown in the report below.** +{{/if}} {{reportContent}} @@ -12,12 +16,21 @@ {{criteriaTable}} +{{#if structuredOutput}} + +## Task + +Evaluate the report against the criteria above. Return the matched rule number (1-based integer) and a brief reason for your decision. +{{else}} + ## Output Format **Output the tag corresponding to the judgment shown in the report in one line:** {{outputList}} +{{/if}} {{#if hasAppendix}} ### Appendix Template -{{appendixContent}}{{/if}} +{{appendixContent}} +{{/if}} diff --git a/src/shared/prompts/ja/perform_phase3_message.md b/src/shared/prompts/ja/perform_phase3_message.md index becfa29..89299ef 100644 --- a/src/shared/prompts/ja/perform_phase3_message.md +++ b/src/shared/prompts/ja/perform_phase3_message.md @@ -1,10 +1,14 @@ +{{#if structuredOutput}} +**既にレビューは完了しています。以下のレポートを評価し、どの番号のルール(1始まり)が結果に最も合致するか判定してください。** +{{else}} **既にレビューは完了しています。以下のレポートで示された判定結果に対応するタグを1つだけ出力してください。** +{{/if}} {{reportContent}} @@ -12,12 +16,21 @@ {{criteriaTable}} +{{#if structuredOutput}} + +## タスク + +上記の判定基準に照らしてレポートを評価してください。合致するルール番号(1始まりの整数)と簡潔な理由を返してください。 +{{else}} + ## 出力フォーマット **レポートで示した判定に対応するタグを1行で出力してください:** {{outputList}} +{{/if}} {{#if hasAppendix}} ### 追加出力テンプレート -{{appendixContent}}{{/if}} +{{appendixContent}} +{{/if}} diff --git a/src/shared/utils/index.ts b/src/shared/utils/index.ts index 69050cb..ffa23c0 100644 --- a/src/shared/utils/index.ts +++ b/src/shared/utils/index.ts @@ -11,6 +11,7 @@ export * from './slackWebhook.js'; export * from './sleep.js'; export * from './slug.js'; export * from './streamDiagnostics.js'; +export * from './structuredOutput.js'; export * from './taskPaths.js'; export * from './text.js'; export * from './types.js'; diff --git a/src/shared/utils/ruleIndex.ts b/src/shared/utils/ruleIndex.ts new file mode 100644 index 0000000..e0758c4 --- /dev/null +++ b/src/shared/utils/ruleIndex.ts @@ -0,0 +1,15 @@ +/** + * Detect rule index from numbered tag pattern [STEP_NAME:N]. + * Returns 0-based rule index, or -1 if no match. + */ +export function detectRuleIndex(content: string, movementName: string): number { + const tag = movementName.toUpperCase(); + const regex = new RegExp(`\\[${tag}:(\\d+)\\]`, 'gi'); + const matches = [...content.matchAll(regex)]; + const match = matches.at(-1); + if (match?.[1]) { + const index = Number.parseInt(match[1], 10) - 1; + return index >= 0 ? index : -1; + } + return -1; +} diff --git a/src/shared/utils/structuredOutput.ts b/src/shared/utils/structuredOutput.ts new file mode 100644 index 0000000..e1b8838 --- /dev/null +++ b/src/shared/utils/structuredOutput.ts @@ -0,0 +1,56 @@ +/** + * Parse structured output from provider text response. + * + * Codex and OpenCode return structured output as JSON text in agent messages. + * This function extracts a JSON object from the text when outputSchema was requested. + * + * Extraction strategies (in order): + * 1. Direct JSON parse — text is pure JSON starting with `{` + * 2. Code block extraction — JSON inside ```json ... ``` or ``` ... ``` + * 3. Brace extraction — find outermost `{` ... `}` in the text + */ + +function tryParseJsonObject(text: string): Record | undefined { + try { + const parsed = JSON.parse(text) as unknown; + if (parsed && typeof parsed === 'object' && !Array.isArray(parsed)) { + return parsed as Record; + } + } catch { + // Not valid JSON + } + return undefined; +} + +export function parseStructuredOutput( + text: string, + hasOutputSchema: boolean, +): Record | undefined { + if (!hasOutputSchema || !text) return undefined; + + const trimmed = text.trim(); + + // Strategy 1: Direct JSON parse (text is pure JSON) + if (trimmed.startsWith('{')) { + const result = tryParseJsonObject(trimmed); + if (result) return result; + } + + // Strategy 2: Extract from markdown code block (```json\n{...}\n```) + const codeBlockMatch = trimmed.match(/```(?:json)?\s*\n(\{[\s\S]*?\})\s*\n```/); + if (codeBlockMatch?.[1]) { + const result = tryParseJsonObject(codeBlockMatch[1].trim()); + if (result) return result; + } + + // Strategy 3: Find first `{` and last `}` (handles preamble/postamble text) + const firstBrace = trimmed.indexOf('{'); + const lastBrace = trimmed.lastIndexOf('}'); + if (firstBrace >= 0 && lastBrace > firstBrace) { + const candidate = trimmed.slice(firstBrace, lastBrace + 1); + const result = tryParseJsonObject(candidate); + if (result) return result; + } + + return undefined; +} diff --git a/src/shared/utils/types.ts b/src/shared/utils/types.ts index 2f33f52..2926689 100644 --- a/src/shared/utils/types.ts +++ b/src/shared/utils/types.ts @@ -26,6 +26,8 @@ export interface SessionLog { matchedRuleIndex?: number; /** How the rule match was detected */ matchedRuleMethod?: string; + /** Method used by status judgment phase */ + matchMethod?: string; }>; } @@ -56,6 +58,7 @@ export interface NdjsonStepComplete { instruction: string; matchedRuleIndex?: number; matchedRuleMethod?: string; + matchMethod?: string; error?: string; timestamp: string; } diff --git a/vitest.config.e2e.provider.ts b/vitest.config.e2e.provider.ts index 84c2932..cd00e37 100644 --- a/vitest.config.e2e.provider.ts +++ b/vitest.config.e2e.provider.ts @@ -7,6 +7,7 @@ export default defineConfig({ 'e2e/specs/worktree.e2e.ts', 'e2e/specs/pipeline.e2e.ts', 'e2e/specs/github-issue.e2e.ts', + 'e2e/specs/structured-output.e2e.ts', ], environment: 'node', globals: false, diff --git a/vitest.config.e2e.structured-output.ts b/vitest.config.e2e.structured-output.ts new file mode 100644 index 0000000..9926aa5 --- /dev/null +++ b/vitest.config.e2e.structured-output.ts @@ -0,0 +1,20 @@ +import { defineConfig } from 'vitest/config'; + +export default defineConfig({ + test: { + include: [ + 'e2e/specs/structured-output.e2e.ts', + ], + environment: 'node', + globals: false, + testTimeout: 240000, + hookTimeout: 60000, + teardownTimeout: 30000, + pool: 'threads', + poolOptions: { + threads: { + singleThread: true, + }, + }, + }, +});