diff --git a/builtins/schemas/decomposition.json b/builtins/schemas/decomposition.json new file mode 100644 index 0000000..5706f4b --- /dev/null +++ b/builtins/schemas/decomposition.json @@ -0,0 +1,33 @@ +{ + "type": "object", + "properties": { + "parts": { + "type": "array", + "items": { + "type": "object", + "properties": { + "id": { + "type": "string", + "description": "Unique part identifier" + }, + "title": { + "type": "string", + "description": "Human-readable part title" + }, + "instruction": { + "type": "string", + "description": "Instruction for the part agent" + }, + "timeout_ms": { + "type": "integer", + "description": "Optional timeout in ms" + } + }, + "required": ["id", "title", "instruction"], + "additionalProperties": false + } + } + }, + "required": ["parts"], + "additionalProperties": false +} diff --git a/builtins/schemas/evaluation.json b/builtins/schemas/evaluation.json new file mode 100644 index 0000000..269909c --- /dev/null +++ b/builtins/schemas/evaluation.json @@ -0,0 +1,15 @@ +{ + "type": "object", + "properties": { + "matched_index": { + "type": "integer", + "description": "Matched condition number (1-based)" + }, + "reason": { + "type": "string", + "description": "Why this condition was matched" + } + }, + "required": ["matched_index"], + "additionalProperties": false +} diff --git a/builtins/schemas/judgment.json b/builtins/schemas/judgment.json new file mode 100644 index 0000000..42c21d0 --- /dev/null +++ b/builtins/schemas/judgment.json @@ -0,0 +1,15 @@ +{ + "type": "object", + "properties": { + "step": { + "type": "integer", + "description": "Matched rule number (1-based)" + }, + "reason": { + "type": "string", + "description": "Brief justification for the decision" + } + }, + "required": ["step"], + "additionalProperties": false +} diff --git a/src/__tests__/agent-usecases.test.ts b/src/__tests__/agent-usecases.test.ts new file mode 100644 index 0000000..d8e3f11 --- /dev/null +++ b/src/__tests__/agent-usecases.test.ts @@ -0,0 +1,185 @@ +import { beforeEach, describe, expect, it, vi } from 'vitest'; +import { runAgent } from '../agents/runner.js'; +import { parseParts } from '../core/piece/engine/task-decomposer.js'; +import { detectJudgeIndex } from '../agents/judge-utils.js'; +import { + executeAgent, + generateReport, + executePart, + evaluateCondition, + judgeStatus, + decomposeTask, +} from '../core/piece/agent-usecases.js'; + +vi.mock('../agents/runner.js', () => ({ + runAgent: vi.fn(), +})); + +vi.mock('../core/piece/schema-loader.js', () => ({ + loadJudgmentSchema: vi.fn(() => ({ type: 'judgment' })), + loadEvaluationSchema: vi.fn(() => ({ type: 'evaluation' })), + loadDecompositionSchema: vi.fn((maxParts: number) => ({ type: 'decomposition', maxParts })), +})); + +vi.mock('../core/piece/engine/task-decomposer.js', () => ({ + parseParts: vi.fn(), +})); + +vi.mock('../agents/judge-utils.js', () => ({ + buildJudgePrompt: vi.fn(() => 'judge prompt'), + detectJudgeIndex: vi.fn(() => -1), +})); + +function doneResponse(content: string, structuredOutput?: Record) { + return { + persona: 'tester', + status: 'done' as const, + content, + timestamp: new Date('2026-02-12T00:00:00Z'), + structuredOutput, + }; +} + +describe('agent-usecases', () => { + beforeEach(() => { + vi.clearAllMocks(); + }); + + it('executeAgent/generateReport/executePart は runAgent に委譲する', async () => { + vi.mocked(runAgent).mockResolvedValue(doneResponse('ok')); + + await executeAgent('coder', 'do work', { cwd: '/tmp' }); + await generateReport('coder', 'write report', { cwd: '/tmp' }); + await executePart('coder', 'part work', { cwd: '/tmp' }); + + expect(runAgent).toHaveBeenCalledTimes(3); + expect(runAgent).toHaveBeenNthCalledWith(1, 'coder', 'do work', { cwd: '/tmp' }); + expect(runAgent).toHaveBeenNthCalledWith(2, 'coder', 'write report', { cwd: '/tmp' }); + expect(runAgent).toHaveBeenNthCalledWith(3, 'coder', 'part work', { cwd: '/tmp' }); + }); + + it('evaluateCondition は構造化出力の matched_index を優先する', async () => { + vi.mocked(runAgent).mockResolvedValue(doneResponse('ignored', { matched_index: 2 })); + + const result = await evaluateCondition('agent output', [ + { index: 0, text: 'first' }, + { index: 1, text: 'second' }, + ], { cwd: '/repo' }); + + expect(result).toBe(1); + expect(runAgent).toHaveBeenCalledWith(undefined, 'judge prompt', expect.objectContaining({ + cwd: '/repo', + outputSchema: { type: 'evaluation' }, + })); + }); + + it('evaluateCondition は構造化出力が使えない場合にタグ検出へフォールバックする', async () => { + vi.mocked(runAgent).mockResolvedValue(doneResponse('[JUDGE:2]')); + vi.mocked(detectJudgeIndex).mockReturnValue(1); + + const result = await evaluateCondition('agent output', [ + { index: 0, text: 'first' }, + { index: 1, text: 'second' }, + ], { cwd: '/repo' }); + + expect(result).toBe(1); + expect(detectJudgeIndex).toHaveBeenCalledWith('[JUDGE:2]'); + }); + + it('judgeStatus は単一ルール時に auto_select を返す', async () => { + const result = await judgeStatus('instruction', [{ condition: 'always', next: 'done' }], { + cwd: '/repo', + movementName: 'review', + }); + + expect(result).toEqual({ ruleIndex: 0, method: 'auto_select' }); + expect(runAgent).not.toHaveBeenCalled(); + }); + + it('judgeStatus は構造化出力 step を採用する', async () => { + vi.mocked(runAgent).mockResolvedValue(doneResponse('x', { step: 2 })); + + const result = await judgeStatus('instruction', [ + { condition: 'a', next: 'one' }, + { condition: 'b', next: 'two' }, + ], { + cwd: '/repo', + movementName: 'review', + }); + + expect(result).toEqual({ ruleIndex: 1, method: 'structured_output' }); + }); + + it('judgeStatus はタグフォールバックを使う', async () => { + vi.mocked(runAgent).mockResolvedValue(doneResponse('[REVIEW:2]')); + + const result = await judgeStatus('instruction', [ + { condition: 'a', next: 'one' }, + { condition: 'b', next: 'two' }, + ], { + cwd: '/repo', + movementName: 'review', + }); + + expect(result).toEqual({ ruleIndex: 1, method: 'phase3_tag' }); + }); + + it('judgeStatus は最終手段として AI Judge を使う', async () => { + vi.mocked(runAgent) + .mockResolvedValueOnce(doneResponse('no match')) + .mockResolvedValueOnce(doneResponse('ignored', { matched_index: 2 })); + + const result = await judgeStatus('instruction', [ + { condition: 'a', next: 'one' }, + { condition: 'b', next: 'two' }, + ], { + cwd: '/repo', + movementName: 'review', + }); + + expect(result).toEqual({ ruleIndex: 1, method: 'ai_judge' }); + expect(runAgent).toHaveBeenCalledTimes(2); + }); + + it('decomposeTask は構造化出力 parts を返す', async () => { + vi.mocked(runAgent).mockResolvedValue(doneResponse('x', { + parts: [ + { id: 'p1', title: 'Part 1', instruction: 'Do 1', timeout_ms: 1000 }, + ], + })); + + const result = await decomposeTask('instruction', 3, { cwd: '/repo', persona: 'team-leader' }); + + expect(result).toEqual([ + { id: 'p1', title: 'Part 1', instruction: 'Do 1', timeoutMs: 1000 }, + ]); + expect(parseParts).not.toHaveBeenCalled(); + }); + + it('decomposeTask は構造化出力がない場合 parseParts にフォールバックする', async () => { + vi.mocked(runAgent).mockResolvedValue(doneResponse('```json [] ```')); + vi.mocked(parseParts).mockReturnValue([ + { id: 'p1', title: 'Part 1', instruction: 'fallback', timeoutMs: undefined }, + ]); + + const result = await decomposeTask('instruction', 2, { cwd: '/repo' }); + + expect(parseParts).toHaveBeenCalledWith('```json [] ```', 2); + expect(result).toEqual([ + { id: 'p1', title: 'Part 1', instruction: 'fallback', timeoutMs: undefined }, + ]); + }); + + it('decomposeTask は done 以外をエラーにする', async () => { + vi.mocked(runAgent).mockResolvedValue({ + persona: 'team-leader', + status: 'error', + content: 'failure', + error: 'bad output', + timestamp: new Date('2026-02-12T00:00:00Z'), + }); + + await expect(decomposeTask('instruction', 2, { cwd: '/repo' })) + .rejects.toThrow('Team leader failed: bad output'); + }); +}); diff --git a/src/__tests__/engine-error.test.ts b/src/__tests__/engine-error.test.ts index bcc9ca2..28fa82c 100644 --- a/src/__tests__/engine-error.test.ts +++ b/src/__tests__/engine-error.test.ts @@ -36,7 +36,7 @@ vi.mock('../shared/utils/index.js', async (importOriginal) => ({ import { PieceEngine } from '../core/piece/index.js'; import { runAgent } from '../agents/runner.js'; -import { detectMatchedRule } from '../core/piece/index.js'; +import { detectMatchedRule } from '../core/piece/evaluation/index.js'; import { makeResponse, makeMovement, diff --git a/src/__tests__/engine-parallel-failure.test.ts b/src/__tests__/engine-parallel-failure.test.ts index a48d6c1..ef1569b 100644 --- a/src/__tests__/engine-parallel-failure.test.ts +++ b/src/__tests__/engine-parallel-failure.test.ts @@ -35,7 +35,7 @@ vi.mock('../shared/utils/index.js', async (importOriginal) => ({ import { PieceEngine } from '../core/piece/index.js'; import { runAgent } from '../agents/runner.js'; -import { detectMatchedRule } from '../core/piece/index.js'; +import { detectMatchedRule } from '../core/piece/evaluation/index.js'; import { makeResponse, makeMovement, diff --git a/src/__tests__/engine-test-helpers.ts b/src/__tests__/engine-test-helpers.ts index 5ac943b..3513476 100644 --- a/src/__tests__/engine-test-helpers.ts +++ b/src/__tests__/engine-test-helpers.ts @@ -16,9 +16,9 @@ import { makeRule } from './test-helpers.js'; // --- Mock imports (consumers must call vi.mock before importing this) --- import { runAgent } from '../agents/runner.js'; -import { detectMatchedRule } from '../core/piece/index.js'; -import type { RuleMatch } from '../core/piece/index.js'; -import { needsStatusJudgmentPhase, runReportPhase, runStatusJudgmentPhase } from '../core/piece/index.js'; +import { detectMatchedRule } from '../core/piece/evaluation/index.js'; +import type { RuleMatch } from '../core/piece/evaluation/index.js'; +import { needsStatusJudgmentPhase, runReportPhase, runStatusJudgmentPhase } from '../core/piece/phase-runner.js'; import { generateReportDir } from '../shared/utils/index.js'; // --- Factory functions --- diff --git a/src/__tests__/engine-worktree-report.test.ts b/src/__tests__/engine-worktree-report.test.ts index 1021c0a..32fa194 100644 --- a/src/__tests__/engine-worktree-report.test.ts +++ b/src/__tests__/engine-worktree-report.test.ts @@ -35,7 +35,7 @@ vi.mock('../shared/utils/index.js', async (importOriginal) => ({ // --- Imports (after mocks) --- import { PieceEngine } from '../core/piece/index.js'; -import { runReportPhase } from '../core/piece/index.js'; +import { runReportPhase } from '../core/piece/phase-runner.js'; import { makeResponse, makeMovement, diff --git a/src/__tests__/it-rule-evaluation.test.ts b/src/__tests__/it-rule-evaluation.test.ts index 0facf7d..b423f29 100644 --- a/src/__tests__/it-rule-evaluation.test.ts +++ b/src/__tests__/it-rule-evaluation.test.ts @@ -34,7 +34,8 @@ vi.mock('../infra/config/project/projectConfig.js', () => ({ // --- Imports (after mocks) --- -import { detectMatchedRule, evaluateAggregateConditions } from '../core/piece/index.js'; +import { evaluateAggregateConditions } from '../core/piece/index.js'; +import { detectMatchedRule } from '../core/piece/evaluation/index.js'; import { detectRuleIndex } from '../infra/claude/index.js'; import type { RuleMatch, RuleEvaluatorContext } from '../core/piece/index.js'; diff --git a/src/__tests__/judgment-fallback.test.ts b/src/__tests__/judgment-fallback.test.ts deleted file mode 100644 index 0d7d560..0000000 --- a/src/__tests__/judgment-fallback.test.ts +++ /dev/null @@ -1,183 +0,0 @@ -/** - * Test for Fallback Strategies - */ - -import { describe, it, expect, vi, beforeEach } from 'vitest'; -import { mkdtempSync, mkdirSync, rmSync, writeFileSync } from 'node:fs'; -import { join } from 'node:path'; -import { tmpdir } from 'node:os'; -import type { PieceMovement } from '../core/models/types.js'; -import type { JudgmentContext } from '../core/piece/judgment/FallbackStrategy.js'; -import { runAgent } from '../agents/runner.js'; -import { - AutoSelectStrategy, - ReportBasedStrategy, - ResponseBasedStrategy, - AgentConsultStrategy, - JudgmentStrategyFactory, -} from '../core/piece/judgment/FallbackStrategy.js'; - -// Mock runAgent -vi.mock('../agents/runner.js', () => ({ - runAgent: vi.fn(), -})); - -describe('JudgmentStrategies', () => { - const mockStep: PieceMovement = { - name: 'test-movement', - persona: 'test-agent', - rules: [ - { description: 'Rule 1', condition: 'approved' }, - { description: 'Rule 2', condition: 'rejected' }, - ], - }; - - const mockContext: JudgmentContext = { - step: mockStep, - cwd: '/test/cwd', - language: 'en', - reportDir: '/test/reports', - lastResponse: 'Last response content', - sessionId: 'session-123', - }; - - beforeEach(() => { - vi.clearAllMocks(); - }); - - describe('AutoSelectStrategy', () => { - it('should apply when step has only one rule', () => { - const singleRuleStep: PieceMovement = { - name: 'single-rule', - rules: [{ description: 'Only rule', condition: 'always' }], - }; - const strategy = new AutoSelectStrategy(); - expect(strategy.canApply({ ...mockContext, step: singleRuleStep })).toBe(true); - }); - - it('should not apply when step has multiple rules', () => { - const strategy = new AutoSelectStrategy(); - expect(strategy.canApply(mockContext)).toBe(false); - }); - - it('should return auto-selected tag', async () => { - const singleRuleStep: PieceMovement = { - name: 'single-rule', - rules: [{ description: 'Only rule', condition: 'always' }], - }; - const strategy = new AutoSelectStrategy(); - const result = await strategy.execute({ ...mockContext, step: singleRuleStep }); - expect(result.success).toBe(true); - expect(result.tag).toBe('[SINGLE-RULE:1]'); - }); - }); - - describe('ReportBasedStrategy', () => { - it('should apply when reportDir and output contracts are configured', () => { - const strategy = new ReportBasedStrategy(); - const stepWithOutputContracts: PieceMovement = { - ...mockStep, - outputContracts: [{ label: 'review', path: 'review-report.md' }], - }; - expect(strategy.canApply({ ...mockContext, step: stepWithOutputContracts })).toBe(true); - }); - - it('should not apply when reportDir is missing', () => { - const strategy = new ReportBasedStrategy(); - expect(strategy.canApply({ ...mockContext, reportDir: undefined })).toBe(false); - }); - - it('should not apply when step has no output contracts configured', () => { - const strategy = new ReportBasedStrategy(); - // mockStep has no outputContracts field → getReportFiles returns [] - expect(strategy.canApply(mockContext)).toBe(false); - }); - - it('should use only latest report file from reports directory', async () => { - const tmpRoot = mkdtempSync(join(tmpdir(), 'takt-judgment-report-')); - try { - const reportDir = join(tmpRoot, '.takt', 'runs', 'sample-run', 'reports'); - const historyDir = join(tmpRoot, '.takt', 'runs', 'sample-run', 'logs', 'reports-history'); - mkdirSync(reportDir, { recursive: true }); - mkdirSync(historyDir, { recursive: true }); - - const latestFile = '05-architect-review.md'; - writeFileSync(join(reportDir, latestFile), 'LATEST-ONLY-CONTENT'); - writeFileSync(join(historyDir, '05-architect-review.20260210T061143Z.md'), 'OLD-HISTORY-CONTENT'); - - const stepWithOutputContracts: PieceMovement = { - ...mockStep, - outputContracts: [{ name: latestFile }], - }; - - const runAgentMock = vi.mocked(runAgent); - runAgentMock.mockResolvedValue({ - persona: 'conductor', - status: 'done', - content: '[TEST-MOVEMENT:1]', - timestamp: new Date('2026-02-10T07:11:43Z'), - }); - - const strategy = new ReportBasedStrategy(); - const result = await strategy.execute({ - ...mockContext, - step: stepWithOutputContracts, - reportDir, - }); - - expect(result.success).toBe(true); - expect(runAgentMock).toHaveBeenCalledTimes(1); - const instruction = runAgentMock.mock.calls[0]?.[1]; - expect(instruction).toContain('LATEST-ONLY-CONTENT'); - expect(instruction).not.toContain('OLD-HISTORY-CONTENT'); - } finally { - rmSync(tmpRoot, { recursive: true, force: true }); - } - }); - }); - - describe('ResponseBasedStrategy', () => { - it('should apply when lastResponse is provided', () => { - const strategy = new ResponseBasedStrategy(); - expect(strategy.canApply(mockContext)).toBe(true); - }); - - it('should not apply when lastResponse is missing', () => { - const strategy = new ResponseBasedStrategy(); - expect(strategy.canApply({ ...mockContext, lastResponse: undefined })).toBe(false); - }); - - it('should not apply when lastResponse is empty', () => { - const strategy = new ResponseBasedStrategy(); - expect(strategy.canApply({ ...mockContext, lastResponse: '' })).toBe(false); - }); - }); - - describe('AgentConsultStrategy', () => { - it('should apply when sessionId is provided', () => { - const strategy = new AgentConsultStrategy(); - expect(strategy.canApply(mockContext)).toBe(true); - }); - - it('should not apply when sessionId is missing', () => { - const strategy = new AgentConsultStrategy(); - expect(strategy.canApply({ ...mockContext, sessionId: undefined })).toBe(false); - }); - - it('should not apply when sessionId is empty', () => { - const strategy = new AgentConsultStrategy(); - expect(strategy.canApply({ ...mockContext, sessionId: '' })).toBe(false); - }); - }); - - describe('JudgmentStrategyFactory', () => { - it('should create strategies in correct order', () => { - const strategies = JudgmentStrategyFactory.createStrategies(); - expect(strategies).toHaveLength(4); - expect(strategies[0]).toBeInstanceOf(AutoSelectStrategy); - expect(strategies[1]).toBeInstanceOf(ReportBasedStrategy); - expect(strategies[2]).toBeInstanceOf(ResponseBasedStrategy); - expect(strategies[3]).toBeInstanceOf(AgentConsultStrategy); - }); - }); -}); diff --git a/src/__tests__/judgment-strategies.test.ts b/src/__tests__/judgment-strategies.test.ts deleted file mode 100644 index 18d72b9..0000000 --- a/src/__tests__/judgment-strategies.test.ts +++ /dev/null @@ -1,194 +0,0 @@ -/** - * Unit tests for FallbackStrategy judgment strategies - * - * Tests AutoSelectStrategy and canApply logic for all strategies. - * Strategies requiring external agent calls (ReportBased, ResponseBased, - * AgentConsult) are tested for canApply and input validation only. - */ - -import { describe, it, expect } from 'vitest'; -import { - AutoSelectStrategy, - ReportBasedStrategy, - ResponseBasedStrategy, - AgentConsultStrategy, - JudgmentStrategyFactory, - type JudgmentContext, -} from '../core/piece/judgment/FallbackStrategy.js'; -import { makeMovement } from './test-helpers.js'; - -function makeContext(overrides: Partial = {}): JudgmentContext { - return { - step: makeMovement(), - cwd: '/tmp/test', - ...overrides, - }; -} - -describe('AutoSelectStrategy', () => { - const strategy = new AutoSelectStrategy(); - - it('should have name "AutoSelect"', () => { - expect(strategy.name).toBe('AutoSelect'); - }); - - describe('canApply', () => { - it('should return true when movement has exactly one rule', () => { - const ctx = makeContext({ - step: makeMovement({ - rules: [{ condition: 'done', next: 'COMPLETE' }], - }), - }); - expect(strategy.canApply(ctx)).toBe(true); - }); - - it('should return false when movement has multiple rules', () => { - const ctx = makeContext({ - step: makeMovement({ - rules: [ - { condition: 'approved', next: 'implement' }, - { condition: 'rejected', next: 'review' }, - ], - }), - }); - expect(strategy.canApply(ctx)).toBe(false); - }); - - it('should return false when movement has no rules', () => { - const ctx = makeContext({ - step: makeMovement({ rules: undefined }), - }); - expect(strategy.canApply(ctx)).toBe(false); - }); - }); - - describe('execute', () => { - it('should return auto-selected tag for single-branch movement', async () => { - const ctx = makeContext({ - step: makeMovement({ - name: 'review', - rules: [{ condition: 'done', next: 'COMPLETE' }], - }), - }); - - const result = await strategy.execute(ctx); - expect(result.success).toBe(true); - expect(result.tag).toBe('[REVIEW:1]'); - }); - }); -}); - -describe('ReportBasedStrategy', () => { - const strategy = new ReportBasedStrategy(); - - it('should have name "ReportBased"', () => { - expect(strategy.name).toBe('ReportBased'); - }); - - describe('canApply', () => { - it('should return true when reportDir and outputContracts are present', () => { - const ctx = makeContext({ - reportDir: '/tmp/reports', - step: makeMovement({ - outputContracts: [{ name: 'report.md' }], - }), - }); - expect(strategy.canApply(ctx)).toBe(true); - }); - - it('should return false when reportDir is missing', () => { - const ctx = makeContext({ - step: makeMovement({ - outputContracts: [{ name: 'report.md' }], - }), - }); - expect(strategy.canApply(ctx)).toBe(false); - }); - - it('should return false when outputContracts is empty', () => { - const ctx = makeContext({ - reportDir: '/tmp/reports', - step: makeMovement({ outputContracts: [] }), - }); - expect(strategy.canApply(ctx)).toBe(false); - }); - - it('should return false when outputContracts is undefined', () => { - const ctx = makeContext({ - reportDir: '/tmp/reports', - step: makeMovement(), - }); - expect(strategy.canApply(ctx)).toBe(false); - }); - }); -}); - -describe('ResponseBasedStrategy', () => { - const strategy = new ResponseBasedStrategy(); - - it('should have name "ResponseBased"', () => { - expect(strategy.name).toBe('ResponseBased'); - }); - - describe('canApply', () => { - it('should return true when lastResponse is non-empty', () => { - const ctx = makeContext({ lastResponse: 'some response' }); - expect(strategy.canApply(ctx)).toBe(true); - }); - - it('should return false when lastResponse is undefined', () => { - const ctx = makeContext({ lastResponse: undefined }); - expect(strategy.canApply(ctx)).toBe(false); - }); - - it('should return false when lastResponse is empty string', () => { - const ctx = makeContext({ lastResponse: '' }); - expect(strategy.canApply(ctx)).toBe(false); - }); - }); -}); - -describe('AgentConsultStrategy', () => { - const strategy = new AgentConsultStrategy(); - - it('should have name "AgentConsult"', () => { - expect(strategy.name).toBe('AgentConsult'); - }); - - describe('canApply', () => { - it('should return true when sessionId is non-empty', () => { - const ctx = makeContext({ sessionId: 'session-123' }); - expect(strategy.canApply(ctx)).toBe(true); - }); - - it('should return false when sessionId is undefined', () => { - const ctx = makeContext({ sessionId: undefined }); - expect(strategy.canApply(ctx)).toBe(false); - }); - - it('should return false when sessionId is empty string', () => { - const ctx = makeContext({ sessionId: '' }); - expect(strategy.canApply(ctx)).toBe(false); - }); - }); - - describe('execute', () => { - it('should return failure when sessionId is not provided', async () => { - const ctx = makeContext({ sessionId: undefined }); - const result = await strategy.execute(ctx); - expect(result.success).toBe(false); - expect(result.reason).toBe('Session ID not provided'); - }); - }); -}); - -describe('JudgmentStrategyFactory', () => { - it('should create strategies in correct priority order', () => { - const strategies = JudgmentStrategyFactory.createStrategies(); - expect(strategies).toHaveLength(4); - expect(strategies[0]!.name).toBe('AutoSelect'); - expect(strategies[1]!.name).toBe('ReportBased'); - expect(strategies[2]!.name).toBe('ResponseBased'); - expect(strategies[3]!.name).toBe('AgentConsult'); - }); -}); diff --git a/src/__tests__/report-phase-blocked.test.ts b/src/__tests__/report-phase-blocked.test.ts index 3afad14..fca6c2b 100644 --- a/src/__tests__/report-phase-blocked.test.ts +++ b/src/__tests__/report-phase-blocked.test.ts @@ -34,7 +34,7 @@ vi.mock('../shared/utils/index.js', async (importOriginal) => ({ // --- Imports (after mocks) --- import { PieceEngine } from '../core/piece/index.js'; -import { runReportPhase } from '../core/piece/index.js'; +import { runReportPhase } from '../core/piece/phase-runner.js'; import { makeResponse, makeMovement, diff --git a/src/__tests__/schema-loader.test.ts b/src/__tests__/schema-loader.test.ts new file mode 100644 index 0000000..a44341c --- /dev/null +++ b/src/__tests__/schema-loader.test.ts @@ -0,0 +1,76 @@ +import { beforeEach, describe, expect, it, vi } from 'vitest'; + +const readFileSyncMock = vi.fn((path: string) => { + if (path.endsWith('judgment.json')) { + return JSON.stringify({ type: 'object', properties: { step: { type: 'integer' } } }); + } + if (path.endsWith('evaluation.json')) { + return JSON.stringify({ type: 'object', properties: { matched_index: { type: 'integer' } } }); + } + if (path.endsWith('decomposition.json')) { + return JSON.stringify({ + type: 'object', + properties: { + parts: { + type: 'array', + items: { + type: 'object', + properties: { + id: { type: 'string' }, + title: { type: 'string' }, + instruction: { type: 'string' }, + }, + }, + }, + }, + }); + } + throw new Error(`Unexpected schema path: ${path}`); +}); + +vi.mock('node:fs', () => ({ + readFileSync: readFileSyncMock, +})); + +vi.mock('../infra/resources/index.js', () => ({ + getResourcesDir: vi.fn(() => '/mock/resources'), +})); + +describe('schema-loader', () => { + beforeEach(() => { + vi.resetModules(); + readFileSyncMock.mockClear(); + }); + + it('同じスキーマを複数回ロードしても readFileSync は1回だけ', async () => { + const { loadJudgmentSchema } = await import('../core/piece/schema-loader.js'); + + const first = loadJudgmentSchema(); + const second = loadJudgmentSchema(); + + expect(first).toEqual(second); + expect(readFileSyncMock).toHaveBeenCalledTimes(1); + expect(readFileSyncMock).toHaveBeenCalledWith('/mock/resources/schemas/judgment.json', 'utf-8'); + }); + + it('loadDecompositionSchema は maxItems を注入し、呼び出しごとに独立したオブジェクトを返す', async () => { + const { loadDecompositionSchema } = await import('../core/piece/schema-loader.js'); + + const first = loadDecompositionSchema(2); + const second = loadDecompositionSchema(5); + + const firstParts = (first.properties as Record).parts as Record; + const secondParts = (second.properties as Record).parts as Record; + + expect(firstParts.maxItems).toBe(2); + expect(secondParts.maxItems).toBe(5); + expect(readFileSyncMock).toHaveBeenCalledTimes(1); + }); + + it('loadDecompositionSchema は不正な maxParts を拒否する', async () => { + const { loadDecompositionSchema } = await import('../core/piece/schema-loader.js'); + + expect(() => loadDecompositionSchema(0)).toThrow('maxParts must be a positive integer: 0'); + expect(() => loadDecompositionSchema(-1)).toThrow('maxParts must be a positive integer: -1'); + }); +}); diff --git a/src/agents/ai-judge.ts b/src/agents/ai-judge.ts index 178d072..004b3d9 100644 --- a/src/agents/ai-judge.ts +++ b/src/agents/ai-judge.ts @@ -6,39 +6,12 @@ */ import type { AiJudgeCaller, AiJudgeCondition } from '../core/piece/types.js'; -import { loadTemplate } from '../shared/prompts/index.js'; import { createLogger } from '../shared/utils/index.js'; -import { runAgent } from './runner.js'; +import { evaluateCondition } from '../core/piece/agent-usecases.js'; const log = createLogger('ai-judge'); -/** - * Detect judge rule index from [JUDGE:N] tag pattern. - * Returns 0-based rule index, or -1 if no match. - */ -export function detectJudgeIndex(content: string): number { - const regex = /\[JUDGE:(\d+)\]/i; - const match = content.match(regex); - if (match?.[1]) { - const index = Number.parseInt(match[1], 10) - 1; - return index >= 0 ? index : -1; - } - return -1; -} - -/** - * Build the prompt for the AI judge that evaluates agent output against ai() conditions. - */ -export function buildJudgePrompt( - agentOutput: string, - aiConditions: AiJudgeCondition[], -): string { - const conditionList = aiConditions - .map((c) => `| ${c.index + 1} | ${c.text} |`) - .join('\n'); - - return loadTemplate('perform_judge_message', 'en', { agentOutput, conditionList }); -} +export { detectJudgeIndex, buildJudgePrompt } from './judge-utils.js'; /** * Call AI judge to evaluate agent output against ai() conditions. @@ -50,18 +23,9 @@ export const callAiJudge: AiJudgeCaller = async ( conditions: AiJudgeCondition[], options: { cwd: string }, ): Promise => { - const prompt = buildJudgePrompt(agentOutput, conditions); - - const response = await runAgent(undefined, prompt, { - cwd: options.cwd, - maxTurns: 1, - permissionMode: 'readonly', - }); - - if (response.status !== 'done') { - log.error('AI judge call failed', { error: response.error }); - return -1; + const result = await evaluateCondition(agentOutput, conditions, options); + if (result < 0) { + log.error('AI judge call failed to match a condition'); } - - return detectJudgeIndex(response.content); + return result; }; diff --git a/src/agents/index.ts b/src/agents/index.ts index 6adc5d9..2355cce 100644 --- a/src/agents/index.ts +++ b/src/agents/index.ts @@ -2,6 +2,5 @@ * Agents module - exports agent execution utilities */ -export { AgentRunner, runAgent } from './runner.js'; -export { callAiJudge, detectJudgeIndex, buildJudgePrompt } from './ai-judge.js'; +export { AgentRunner } from './runner.js'; export type { RunAgentOptions, StreamCallback } from './types.js'; diff --git a/src/agents/judge-utils.ts b/src/agents/judge-utils.ts new file mode 100644 index 0000000..0fb4795 --- /dev/null +++ b/src/agents/judge-utils.ts @@ -0,0 +1,22 @@ +import { loadTemplate } from '../shared/prompts/index.js'; + +export function detectJudgeIndex(content: string): number { + const regex = /\[JUDGE:(\d+)\]/i; + const match = content.match(regex); + if (match?.[1]) { + const index = Number.parseInt(match[1], 10) - 1; + return index >= 0 ? index : -1; + } + return -1; +} + +export function buildJudgePrompt( + agentOutput: string, + aiConditions: Array<{ index: number; text: string }>, +): string { + const conditionList = aiConditions + .map((c) => `| ${c.index + 1} | ${c.text} |`) + .join('\n'); + + return loadTemplate('perform_judge_message', 'en', { agentOutput, conditionList }); +} diff --git a/src/agents/runner.ts b/src/agents/runner.ts index cb54c93..5b126d0 100644 --- a/src/agents/runner.ts +++ b/src/agents/runner.ts @@ -111,6 +111,7 @@ export class AgentRunner { onPermissionRequest: options.onPermissionRequest, onAskUserQuestion: options.onAskUserQuestion, bypassPermissions: options.bypassPermissions, + outputSchema: options.outputSchema, }; } diff --git a/src/agents/types.ts b/src/agents/types.ts index d27882a..abfc0e4 100644 --- a/src/agents/types.ts +++ b/src/agents/types.ts @@ -39,4 +39,6 @@ export interface RunAgentOptions { movementsList: ReadonlyArray<{ name: string; description?: string }>; currentPosition: string; }; + /** JSON Schema for structured output */ + outputSchema?: Record; } diff --git a/src/core/models/response.ts b/src/core/models/response.ts index b687e8f..532584c 100644 --- a/src/core/models/response.ts +++ b/src/core/models/response.ts @@ -17,5 +17,6 @@ export interface AgentResponse { matchedRuleIndex?: number; /** How the rule match was detected */ matchedRuleMethod?: RuleMatchMethod; + /** Structured output returned by provider SDK (JSON Schema mode) */ + structuredOutput?: Record; } - diff --git a/src/core/models/status.ts b/src/core/models/status.ts index fb77af9..0dde9f7 100644 --- a/src/core/models/status.ts +++ b/src/core/models/status.ts @@ -21,6 +21,8 @@ export type Status = /** How a rule match was detected */ export type RuleMatchMethod = | 'aggregate' + | 'auto_select' + | 'structured_output' | 'phase3_tag' | 'phase1_tag' | 'ai_judge' diff --git a/src/core/piece/agent-usecases.ts b/src/core/piece/agent-usecases.ts new file mode 100644 index 0000000..c4d58a4 --- /dev/null +++ b/src/core/piece/agent-usecases.ts @@ -0,0 +1,235 @@ +import type { AgentResponse, PartDefinition, PieceRule, RuleMatchMethod, Language } from '../models/types.js'; +import { runAgent, type RunAgentOptions } from '../../agents/runner.js'; +import { detectJudgeIndex, buildJudgePrompt } from '../../agents/judge-utils.js'; +import { parseParts } from './engine/task-decomposer.js'; +import { loadJudgmentSchema, loadEvaluationSchema, loadDecompositionSchema } from './schema-loader.js'; + +export type UsecaseOptions = RunAgentOptions; + +export interface JudgeStatusOptions { + cwd: string; + movementName: string; + language?: Language; +} + +export interface JudgeStatusResult { + ruleIndex: number; + method: RuleMatchMethod; +} + +export interface EvaluateConditionOptions { + cwd: string; +} + +export interface DecomposeTaskOptions { + cwd: string; + persona?: string; + language?: Language; + model?: string; + provider?: 'claude' | 'codex' | 'opencode' | 'mock'; +} + +function detectRuleIndex(content: string, movementName: string): number { + const tag = movementName.toUpperCase(); + const regex = new RegExp(`\\[${tag}:(\\d+)\\]`, 'gi'); + const matches = [...content.matchAll(regex)]; + const match = matches.at(-1); + if (match?.[1]) { + const index = Number.parseInt(match[1], 10) - 1; + return index >= 0 ? index : -1; + } + return -1; +} + +function toPartDefinitions(raw: unknown, maxParts: number): PartDefinition[] { + if (!Array.isArray(raw)) { + throw new Error('Structured output "parts" must be an array'); + } + if (raw.length === 0) { + throw new Error('Structured output "parts" must not be empty'); + } + if (raw.length > maxParts) { + throw new Error(`Structured output produced too many parts: ${raw.length} > ${maxParts}`); + } + + const parts: PartDefinition[] = raw.map((entry, index) => { + if (typeof entry !== 'object' || entry == null || Array.isArray(entry)) { + throw new Error(`Part[${index}] must be an object`); + } + const row = entry as Record; + const id = row.id; + const title = row.title; + const instruction = row.instruction; + const timeoutMs = row.timeout_ms; + + if (typeof id !== 'string' || id.trim().length === 0) { + throw new Error(`Part[${index}] "id" must be a non-empty string`); + } + if (typeof title !== 'string' || title.trim().length === 0) { + throw new Error(`Part[${index}] "title" must be a non-empty string`); + } + if (typeof instruction !== 'string' || instruction.trim().length === 0) { + throw new Error(`Part[${index}] "instruction" must be a non-empty string`); + } + if ( + timeoutMs != null + && (typeof timeoutMs !== 'number' || !Number.isInteger(timeoutMs) || timeoutMs <= 0) + ) { + throw new Error(`Part[${index}] "timeout_ms" must be a positive integer`); + } + + return { + id, + title, + instruction, + timeoutMs: timeoutMs as number | undefined, + }; + }); + + const seen = new Set(); + for (const part of parts) { + if (seen.has(part.id)) { + throw new Error(`Duplicate part id: ${part.id}`); + } + seen.add(part.id); + } + + return parts; +} + +export async function executeAgent( + persona: string | undefined, + instruction: string, + options: UsecaseOptions, +): Promise { + return runAgent(persona, instruction, options); +} + +export async function generateReport( + persona: string | undefined, + instruction: string, + options: UsecaseOptions, +): Promise { + return runAgent(persona, instruction, options); +} + +export async function executePart( + persona: string | undefined, + instruction: string, + options: UsecaseOptions, +): Promise { + return runAgent(persona, instruction, options); +} + +export async function evaluateCondition( + agentOutput: string, + conditions: Array<{ index: number; text: string }>, + options: EvaluateConditionOptions, +): Promise { + const prompt = buildJudgePrompt(agentOutput, conditions); + const response = await runAgent(undefined, prompt, { + cwd: options.cwd, + maxTurns: 1, + permissionMode: 'readonly', + outputSchema: loadEvaluationSchema(), + }); + + if (response.status !== 'done') { + return -1; + } + + const matchedIndex = response.structuredOutput?.matched_index; + if (typeof matchedIndex === 'number' && Number.isInteger(matchedIndex)) { + const zeroBased = matchedIndex - 1; + if (zeroBased >= 0 && zeroBased < conditions.length) { + return zeroBased; + } + } + + return detectJudgeIndex(response.content); +} + +export async function judgeStatus( + instruction: string, + rules: PieceRule[], + options: JudgeStatusOptions, +): Promise { + if (rules.length === 0) { + throw new Error('judgeStatus requires at least one rule'); + } + + if (rules.length === 1) { + return { + ruleIndex: 0, + method: 'auto_select', + }; + } + + const response = await runAgent('conductor', instruction, { + cwd: options.cwd, + maxTurns: 3, + permissionMode: 'readonly', + language: options.language, + outputSchema: loadJudgmentSchema(), + }); + + if (response.status === 'done') { + const stepNumber = response.structuredOutput?.step; + if (typeof stepNumber === 'number' && Number.isInteger(stepNumber)) { + const ruleIndex = stepNumber - 1; + if (ruleIndex >= 0 && ruleIndex < rules.length) { + return { + ruleIndex, + method: 'structured_output', + }; + } + } + + const tagRuleIndex = detectRuleIndex(response.content, options.movementName); + if (tagRuleIndex >= 0 && tagRuleIndex < rules.length) { + return { + ruleIndex: tagRuleIndex, + method: 'phase3_tag', + }; + } + } + + const conditions = rules.map((rule, index) => ({ index, text: rule.condition })); + const fallbackIndex = await evaluateCondition(instruction, conditions, { cwd: options.cwd }); + if (fallbackIndex >= 0 && fallbackIndex < rules.length) { + return { + ruleIndex: fallbackIndex, + method: 'ai_judge', + }; + } + + throw new Error(`Status not found for movement "${options.movementName}"`); +} + +export async function decomposeTask( + instruction: string, + maxParts: number, + options: DecomposeTaskOptions, +): Promise { + const response = await runAgent(options.persona, instruction, { + cwd: options.cwd, + language: options.language, + model: options.model, + provider: options.provider, + permissionMode: 'readonly', + maxTurns: 3, + outputSchema: loadDecompositionSchema(maxParts), + }); + + if (response.status !== 'done') { + const detail = response.error ?? response.content; + throw new Error(`Team leader failed: ${detail}`); + } + + const parts = response.structuredOutput?.parts; + if (parts != null) { + return toPartDefinitions(parts, maxParts); + } + + return parseParts(response.content, maxParts); +} diff --git a/src/core/piece/engine/ArpeggioRunner.ts b/src/core/piece/engine/ArpeggioRunner.ts index 017c247..24adc45 100644 --- a/src/core/piece/engine/ArpeggioRunner.ts +++ b/src/core/piece/engine/ArpeggioRunner.ts @@ -15,7 +15,8 @@ import type { ArpeggioMovementConfig, BatchResult, DataBatch } from '../arpeggio import { createDataSource } from '../arpeggio/data-source-factory.js'; import { loadTemplate, expandTemplate } from '../arpeggio/template.js'; import { buildMergeFn, writeMergedOutput } from '../arpeggio/merge.js'; -import { runAgent, type RunAgentOptions } from '../../../agents/runner.js'; +import type { RunAgentOptions } from '../../../agents/runner.js'; +import { executeAgent } from '../agent-usecases.js'; import { detectMatchedRule } from '../evaluation/index.js'; import { incrementMovementIteration } from './state-manager.js'; import { createLogger } from '../../../shared/utils/index.js'; @@ -84,7 +85,7 @@ async function executeBatchWithRetry( for (let attempt = 0; attempt <= maxRetries; attempt++) { try { - const response = await runAgent(persona, prompt, agentOptions); + const response = await executeAgent(persona, prompt, agentOptions); if (response.status === 'error') { lastError = response.error ?? response.content ?? 'Agent returned error status'; log.info('Batch execution failed, retrying', { diff --git a/src/core/piece/engine/MovementExecutor.ts b/src/core/piece/engine/MovementExecutor.ts index 9f0d994..ca4cd76 100644 --- a/src/core/piece/engine/MovementExecutor.ts +++ b/src/core/piece/engine/MovementExecutor.ts @@ -15,7 +15,7 @@ import type { Language, } from '../../models/types.js'; import type { PhaseName } from '../types.js'; -import { runAgent } from '../../../agents/runner.js'; +import { executeAgent } from '../agent-usecases.js'; import { InstructionBuilder, isOutputContractItem } from '../instruction/InstructionBuilder.js'; import { needsStatusJudgmentPhase, runReportPhase, runStatusJudgmentPhase } from '../phase-runner.js'; import { detectMatchedRule } from '../evaluation/index.js'; @@ -202,7 +202,7 @@ export class MovementExecutor { // Phase 1: main execution (Write excluded if movement has report) this.deps.onPhaseStart?.(step, 1, 'execute', instruction); const agentOptions = this.deps.optionsBuilder.buildAgentOptions(step); - let response = await runAgent(step.persona, instruction, agentOptions); + let response = await executeAgent(step.persona, instruction, agentOptions); updatePersonaSession(sessionKey, response.sessionId); this.deps.onPhaseComplete?.(step, 1, 'execute', response.content, response.status, response.error); diff --git a/src/core/piece/engine/ParallelRunner.ts b/src/core/piece/engine/ParallelRunner.ts index a72a04e..94dc5ae 100644 --- a/src/core/piece/engine/ParallelRunner.ts +++ b/src/core/piece/engine/ParallelRunner.ts @@ -10,7 +10,7 @@ import type { PieceState, AgentResponse, } from '../../models/types.js'; -import { runAgent } from '../../../agents/runner.js'; +import { executeAgent } from '../agent-usecases.js'; import { ParallelLogger } from './parallel-logger.js'; import { needsStatusJudgmentPhase, runReportPhase, runStatusJudgmentPhase } from '../phase-runner.js'; import { detectMatchedRule } from '../evaluation/index.js'; @@ -101,7 +101,7 @@ export class ParallelRunner { : baseOptions; this.deps.onPhaseStart?.(subMovement, 1, 'execute', subInstruction); - const subResponse = await runAgent(subMovement.persona, subInstruction, agentOptions); + const subResponse = await executeAgent(subMovement.persona, subInstruction, agentOptions); updatePersonaSession(subSessionKey, subResponse.sessionId); this.deps.onPhaseComplete?.(subMovement, 1, 'execute', subResponse.content, subResponse.status, subResponse.error); diff --git a/src/core/piece/engine/TeamLeaderRunner.ts b/src/core/piece/engine/TeamLeaderRunner.ts index d97db90..f082396 100644 --- a/src/core/piece/engine/TeamLeaderRunner.ts +++ b/src/core/piece/engine/TeamLeaderRunner.ts @@ -1,4 +1,3 @@ -import { runAgent } from '../../../agents/runner.js'; import type { PieceMovement, PieceState, @@ -6,11 +5,11 @@ import type { PartDefinition, PartResult, } from '../../models/types.js'; +import { decomposeTask, executePart } from '../agent-usecases.js'; import { detectMatchedRule } from '../evaluation/index.js'; import { buildSessionKey } from '../session-key.js'; import { ParallelLogger } from './parallel-logger.js'; import { incrementMovementIteration } from './state-manager.js'; -import { parseParts } from './task-decomposer.js'; import { buildAbortSignal } from './abort-signal.js'; import { createLogger, getErrorMessage } from '../../../shared/utils/index.js'; import type { OptionsBuilder } from './OptionsBuilder.js'; @@ -99,26 +98,19 @@ export class TeamLeaderRunner { ); this.deps.onPhaseStart?.(leaderStep, 1, 'execute', instruction); - const leaderResponse = await runAgent( - leaderStep.persona, - instruction, - this.deps.optionsBuilder.buildAgentOptions(leaderStep), - ); - updatePersonaSession(buildSessionKey(leaderStep), leaderResponse.sessionId); - this.deps.onPhaseComplete?.( - leaderStep, - 1, - 'execute', - leaderResponse.content, - leaderResponse.status, - leaderResponse.error, - ); - if (leaderResponse.status === 'error') { - const detail = leaderResponse.error ?? leaderResponse.content; - throw new Error(`Team leader failed: ${detail}`); - } - - const parts = parseParts(leaderResponse.content, teamLeaderConfig.maxParts); + const parts = await decomposeTask(instruction, teamLeaderConfig.maxParts, { + cwd: this.deps.getCwd(), + persona: leaderStep.persona, + model: leaderStep.model, + provider: leaderStep.provider, + }); + const leaderResponse: AgentResponse = { + persona: leaderStep.persona ?? leaderStep.name, + status: 'done', + content: JSON.stringify({ parts }, null, 2), + timestamp: new Date(), + }; + this.deps.onPhaseComplete?.(leaderStep, 1, 'execute', leaderResponse.content, leaderResponse.status, leaderResponse.error); log.debug('Team leader decomposed parts', { movement: step.name, partCount: parts.length, @@ -240,7 +232,7 @@ export class TeamLeaderRunner { : { ...baseOptions, abortSignal: signal }; try { - const response = await runAgent(partMovement.persona, part.instruction, options); + const response = await executePart(partMovement.persona, part.instruction, options); updatePersonaSession(buildSessionKey(partMovement), response.sessionId); return { part, diff --git a/src/core/piece/index.ts b/src/core/piece/index.ts index 776c810..3684ea4 100644 --- a/src/core/piece/index.ts +++ b/src/core/piece/index.ts @@ -60,8 +60,19 @@ export { buildEditRule, type InstructionContext } from './instruction/instructio export { generateStatusRulesComponents, type StatusRulesComponents } from './instruction/status-rules.js'; // Rule evaluation -export { RuleEvaluator, type RuleMatch, type RuleEvaluatorContext, detectMatchedRule, evaluateAggregateConditions } from './evaluation/index.js'; +export { RuleEvaluator, type RuleMatch, type RuleEvaluatorContext, evaluateAggregateConditions } from './evaluation/index.js'; export { AggregateEvaluator } from './evaluation/AggregateEvaluator.js'; // Phase runner -export { needsStatusJudgmentPhase, runReportPhase, runStatusJudgmentPhase, type ReportPhaseBlockedResult } from './phase-runner.js'; +export { needsStatusJudgmentPhase, type ReportPhaseBlockedResult } from './phase-runner.js'; + +// Agent usecases +export { + executeAgent, + generateReport, + executePart, + judgeStatus, + evaluateCondition, + decomposeTask, + type JudgeStatusResult, +} from './agent-usecases.js'; diff --git a/src/core/piece/instruction/StatusJudgmentBuilder.ts b/src/core/piece/instruction/StatusJudgmentBuilder.ts index 85a9354..e4f4099 100644 --- a/src/core/piece/instruction/StatusJudgmentBuilder.ts +++ b/src/core/piece/instruction/StatusJudgmentBuilder.ts @@ -27,6 +27,8 @@ export interface StatusJudgmentContext { lastResponse?: string; /** Input source type for fallback strategies */ inputSource?: 'report' | 'response'; + /** Structured output mode omits tag-format instructions */ + useStructuredOutput?: boolean; } /** @@ -67,7 +69,9 @@ export class StatusJudgmentBuilder { return loadTemplate('perform_phase3_message', language, { reportContent: contentToJudge, criteriaTable: components.criteriaTable, - outputList: components.outputList, + outputList: this.context.useStructuredOutput + ? '' + : components.outputList, hasAppendix: components.hasAppendix, appendixContent: components.appendixContent, }); diff --git a/src/core/piece/judgment/FallbackStrategy.ts b/src/core/piece/judgment/FallbackStrategy.ts deleted file mode 100644 index f3007c8..0000000 --- a/src/core/piece/judgment/FallbackStrategy.ts +++ /dev/null @@ -1,255 +0,0 @@ -/** - * Fallback strategies for Phase 3 judgment. - * - * Implements Chain of Responsibility pattern to try multiple judgment methods - * when conductor cannot determine the status from report alone. - */ - -import { readFileSync } from 'node:fs'; -import { resolve } from 'node:path'; -import type { PieceMovement, Language } from '../../models/types.js'; -import { runAgent } from '../../../agents/runner.js'; -import { StatusJudgmentBuilder } from '../instruction/StatusJudgmentBuilder.js'; -import { JudgmentDetector, type JudgmentResult } from './JudgmentDetector.js'; -import { hasOnlyOneBranch, getAutoSelectedTag, getReportFiles } from '../evaluation/rule-utils.js'; -import { createLogger } from '../../../shared/utils/index.js'; - -const log = createLogger('fallback-strategy'); - -export interface JudgmentContext { - step: PieceMovement; - cwd: string; - language?: Language; - reportDir?: string; - lastResponse?: string; // Phase 1の最終応答 - sessionId?: string; -} - -export interface JudgmentStrategy { - readonly name: string; - canApply(context: JudgmentContext): boolean; - execute(context: JudgmentContext): Promise; -} - -/** - * Base class for judgment strategies using Template Method Pattern. - */ -abstract class JudgmentStrategyBase implements JudgmentStrategy { - abstract readonly name: string; - - abstract canApply(context: JudgmentContext): boolean; - - async execute(context: JudgmentContext): Promise { - try { - // 1. 情報収集(サブクラスで実装) - const input = await this.gatherInput(context); - - // 2. 指示生成(サブクラスで実装) - const instruction = this.buildInstruction(input, context); - - // 3. conductor実行(共通) - const response = await this.runConductor(instruction, context); - - // 4. 結果検出(共通) - return JudgmentDetector.detect(response); - } catch (error) { - const errorMsg = error instanceof Error ? error.message : String(error); - log.debug(`Strategy ${this.name} threw error`, { error: errorMsg }); - return { - success: false, - reason: `Strategy failed with error: ${errorMsg}`, - }; - } - } - - protected abstract gatherInput(context: JudgmentContext): Promise; - - protected abstract buildInstruction(input: string, context: JudgmentContext): string; - - protected async runConductor(instruction: string, context: JudgmentContext): Promise { - const response = await runAgent('conductor', instruction, { - cwd: context.cwd, - maxTurns: 3, - permissionMode: 'readonly', - language: context.language, - }); - - if (response.status !== 'done') { - throw new Error(`Conductor failed: ${response.error || response.content || 'Unknown error'}`); - } - - return response.content; - } -} - -/** - * Strategy 1: Auto-select when there's only one branch. - * This strategy doesn't use conductor - just returns the single tag. - */ -export class AutoSelectStrategy implements JudgmentStrategy { - readonly name = 'AutoSelect'; - - canApply(context: JudgmentContext): boolean { - return hasOnlyOneBranch(context.step); - } - - async execute(context: JudgmentContext): Promise { - const tag = getAutoSelectedTag(context.step); - log.debug('Auto-selected tag (single branch)', { tag }); - return { - success: true, - tag, - }; - } -} - -/** - * Strategy 2: Report-based judgment. - * Read report files and ask conductor to judge. - */ -export class ReportBasedStrategy extends JudgmentStrategyBase { - readonly name = 'ReportBased'; - - canApply(context: JudgmentContext): boolean { - return context.reportDir !== undefined && getReportFiles(context.step.outputContracts).length > 0; - } - - protected async gatherInput(context: JudgmentContext): Promise { - if (!context.reportDir) { - throw new Error('Report directory not provided'); - } - - const reportFiles = getReportFiles(context.step.outputContracts); - if (reportFiles.length === 0) { - throw new Error('No report files configured'); - } - - const reportContents: string[] = []; - for (const fileName of reportFiles) { - const filePath = resolve(context.reportDir, fileName); - try { - const content = readFileSync(filePath, 'utf-8'); - reportContents.push(`# ${fileName}\n\n${content}`); - } catch (error) { - const errorMsg = error instanceof Error ? error.message : String(error); - throw new Error(`Failed to read report file ${fileName}: ${errorMsg}`); - } - } - - return reportContents.join('\n\n---\n\n'); - } - - protected buildInstruction(input: string, context: JudgmentContext): string { - return new StatusJudgmentBuilder(context.step, { - language: context.language, - reportContent: input, - inputSource: 'report', - }).build(); - } -} - -/** - * Strategy 3: Response-based judgment. - * Use the last response from Phase 1 to judge. - */ -export class ResponseBasedStrategy extends JudgmentStrategyBase { - readonly name = 'ResponseBased'; - - canApply(context: JudgmentContext): boolean { - return context.lastResponse !== undefined && context.lastResponse.length > 0; - } - - protected async gatherInput(context: JudgmentContext): Promise { - if (!context.lastResponse) { - throw new Error('Last response not provided'); - } - return context.lastResponse; - } - - protected buildInstruction(input: string, context: JudgmentContext): string { - return new StatusJudgmentBuilder(context.step, { - language: context.language, - lastResponse: input, - inputSource: 'response', - }).build(); - } -} - -/** - * Strategy 4: Agent consult. - * Resume the Phase 1 agent session and ask which tag is appropriate. - */ -export class AgentConsultStrategy implements JudgmentStrategy { - readonly name = 'AgentConsult'; - - canApply(context: JudgmentContext): boolean { - return context.sessionId !== undefined && context.sessionId.length > 0; - } - - async execute(context: JudgmentContext): Promise { - if (!context.sessionId) { - return { - success: false, - reason: 'Session ID not provided', - }; - } - - try { - const question = this.buildQuestion(context); - - const response = await runAgent(context.step.persona ?? context.step.name, question, { - cwd: context.cwd, - sessionId: context.sessionId, - maxTurns: 3, - language: context.language, - }); - - if (response.status !== 'done') { - return { - success: false, - reason: `Agent consultation failed: ${response.error || 'Unknown error'}`, - }; - } - - return JudgmentDetector.detect(response.content); - } catch (error) { - const errorMsg = error instanceof Error ? error.message : String(error); - log.debug('Agent consult strategy failed', { error: errorMsg }); - return { - success: false, - reason: `Agent consultation error: ${errorMsg}`, - }; - } - } - - private buildQuestion(context: JudgmentContext): string { - const rules = context.step.rules || []; - const ruleDescriptions = rules.map((rule, idx) => { - const tag = `[${context.step.name.toUpperCase()}:${idx + 1}]`; - const desc = rule.condition || `Rule ${idx + 1}`; - return `- ${tag}: ${desc}`; - }).join('\n'); - - const lang = context.language || 'en'; - - if (lang === 'ja') { - return `あなたの作業結果に基づいて、以下の判定タグのうちどれが適切か教えてください:\n\n${ruleDescriptions}\n\n該当するタグを1つだけ出力してください(例: [${context.step.name.toUpperCase()}:1])。`; - } else { - return `Based on your work, which of the following judgment tags is appropriate?\n\n${ruleDescriptions}\n\nPlease output only one tag (e.g., [${context.step.name.toUpperCase()}:1]).`; - } - } -} - -/** - * Factory for creating judgment strategies in order of priority. - */ -export class JudgmentStrategyFactory { - static createStrategies(): JudgmentStrategy[] { - return [ - new AutoSelectStrategy(), - new ReportBasedStrategy(), - new ResponseBasedStrategy(), - new AgentConsultStrategy(), - ]; - } -} diff --git a/src/core/piece/judgment/index.ts b/src/core/piece/judgment/index.ts index 58f3cae..c5e6487 100644 --- a/src/core/piece/judgment/index.ts +++ b/src/core/piece/judgment/index.ts @@ -6,13 +6,3 @@ export { JudgmentDetector, type JudgmentResult, } from './JudgmentDetector.js'; - -export { - AutoSelectStrategy, - ReportBasedStrategy, - ResponseBasedStrategy, - AgentConsultStrategy, - JudgmentStrategyFactory, - type JudgmentContext, - type JudgmentStrategy, -} from './FallbackStrategy.js'; diff --git a/src/core/piece/phase-runner.ts b/src/core/piece/phase-runner.ts index b1daf35..7105bf8 100644 --- a/src/core/piece/phase-runner.ts +++ b/src/core/piece/phase-runner.ts @@ -9,12 +9,13 @@ import { existsSync, mkdirSync, readFileSync, writeFileSync } from 'node:fs'; import { dirname, parse, resolve, sep } from 'node:path'; import type { PieceMovement, Language, AgentResponse } from '../models/types.js'; import type { PhaseName } from './types.js'; -import { runAgent, type RunAgentOptions } from '../../agents/runner.js'; +import type { RunAgentOptions } from '../../agents/runner.js'; import { ReportInstructionBuilder } from './instruction/ReportInstructionBuilder.js'; import { hasTagBasedRules, getReportFiles } from './evaluation/rule-utils.js'; -import { JudgmentStrategyFactory, type JudgmentContext } from './judgment/index.js'; +import { generateReport } from './agent-usecases.js'; import { createLogger } from '../../shared/utils/index.js'; import { buildSessionKey } from './session-key.js'; +export { runStatusJudgmentPhase } from './status-judgment-phase.js'; const log = createLogger('phase-runner'); @@ -213,7 +214,7 @@ async function runSingleReportAttempt( let response: AgentResponse; try { - response = await runAgent(step.persona, instruction, options); + response = await generateReport(step.persona, instruction, options); } catch (error) { const errorMsg = error instanceof Error ? error.message : String(error); ctx.onPhaseComplete?.(step, 2, 'report', '', 'error', errorMsg); @@ -241,55 +242,3 @@ async function runSingleReportAttempt( ctx.onPhaseComplete?.(step, 2, 'report', response.content, response.status); return { kind: 'success', content: trimmedContent, response }; } - -/** - * Phase 3: Status judgment. - * Uses the 'conductor' agent in a new session to output a status tag. - * Implements multi-stage fallback logic to ensure judgment succeeds. - * Returns the Phase 3 response content (containing the status tag). - */ -export async function runStatusJudgmentPhase( - step: PieceMovement, - ctx: PhaseRunnerContext, -): Promise { - log.debug('Running status judgment phase', { movement: step.name }); - - const strategies = JudgmentStrategyFactory.createStrategies(); - const sessionKey = buildSessionKey(step); - const judgmentContext: JudgmentContext = { - step, - cwd: ctx.cwd, - language: ctx.language, - reportDir: ctx.reportDir, - lastResponse: ctx.lastResponse, - sessionId: ctx.getSessionId(sessionKey), - }; - - for (const strategy of strategies) { - if (!strategy.canApply(judgmentContext)) { - log.debug(`Strategy ${strategy.name} not applicable, skipping`); - continue; - } - - log.debug(`Trying strategy: ${strategy.name}`); - ctx.onPhaseStart?.(step, 3, 'judge', `Strategy: ${strategy.name}`); - - try { - const result = await strategy.execute(judgmentContext); - if (result.success) { - log.debug(`Strategy ${strategy.name} succeeded`, { tag: result.tag }); - ctx.onPhaseComplete?.(step, 3, 'judge', result.tag!, 'done'); - return result.tag!; - } - - log.debug(`Strategy ${strategy.name} failed`, { reason: result.reason }); - } catch (error) { - const errorMsg = error instanceof Error ? error.message : String(error); - log.debug(`Strategy ${strategy.name} threw error`, { error: errorMsg }); - } - } - - const errorMsg = 'All judgment strategies failed'; - ctx.onPhaseComplete?.(step, 3, 'judge', '', 'error', errorMsg); - throw new Error(errorMsg); -} diff --git a/src/core/piece/schema-loader.ts b/src/core/piece/schema-loader.ts new file mode 100644 index 0000000..d4067aa --- /dev/null +++ b/src/core/piece/schema-loader.ts @@ -0,0 +1,50 @@ +import { readFileSync } from 'node:fs'; +import { join } from 'node:path'; +import { getResourcesDir } from '../../infra/resources/index.js'; + +type JsonSchema = Record; + +const schemaCache = new Map(); + +function loadSchema(name: string): JsonSchema { + const cached = schemaCache.get(name); + if (cached) { + return cached; + } + const schemaPath = join(getResourcesDir(), 'schemas', name); + const content = readFileSync(schemaPath, 'utf-8'); + const parsed = JSON.parse(content) as JsonSchema; + schemaCache.set(name, parsed); + return parsed; +} + +function cloneSchema(schema: JsonSchema): JsonSchema { + return JSON.parse(JSON.stringify(schema)) as JsonSchema; +} + +export function loadJudgmentSchema(): JsonSchema { + return loadSchema('judgment.json'); +} + +export function loadEvaluationSchema(): JsonSchema { + return loadSchema('evaluation.json'); +} + +export function loadDecompositionSchema(maxParts: number): JsonSchema { + if (!Number.isInteger(maxParts) || maxParts <= 0) { + throw new Error(`maxParts must be a positive integer: ${maxParts}`); + } + + const schema = cloneSchema(loadSchema('decomposition.json')); + const properties = schema.properties; + if (!properties || typeof properties !== 'object' || Array.isArray(properties)) { + throw new Error('decomposition schema is invalid: properties is missing'); + } + const rawParts = (properties as Record).parts; + if (!rawParts || typeof rawParts !== 'object' || Array.isArray(rawParts)) { + throw new Error('decomposition schema is invalid: parts is missing'); + } + + (rawParts as Record).maxItems = maxParts; + return schema; +} diff --git a/src/core/piece/status-judgment-phase.ts b/src/core/piece/status-judgment-phase.ts new file mode 100644 index 0000000..1a7b501 --- /dev/null +++ b/src/core/piece/status-judgment-phase.ts @@ -0,0 +1,78 @@ +import { existsSync, readFileSync } from 'node:fs'; +import { resolve } from 'node:path'; +import type { PieceMovement } from '../models/types.js'; +import { judgeStatus } from './agent-usecases.js'; +import { StatusJudgmentBuilder } from './instruction/StatusJudgmentBuilder.js'; +import { getReportFiles } from './evaluation/rule-utils.js'; +import { createLogger } from '../../shared/utils/index.js'; +import type { PhaseRunnerContext } from './phase-runner.js'; + +const log = createLogger('phase-runner'); + +/** + * Phase 3: Status judgment. + * Uses the 'conductor' agent in a new session to output a status tag. + * Implements multi-stage fallback logic to ensure judgment succeeds. + * Returns the Phase 3 response content (containing the status tag). + */ +export async function runStatusJudgmentPhase( + step: PieceMovement, + ctx: PhaseRunnerContext, +): Promise { + log.debug('Running status judgment phase', { movement: step.name }); + if (!step.rules || step.rules.length === 0) { + throw new Error(`Status judgment requires rules for movement "${step.name}"`); + } + + const reportFiles = getReportFiles(step.outputContracts); + let instruction: string | undefined; + + if (reportFiles.length > 0) { + const reports: string[] = []; + for (const fileName of reportFiles) { + const filePath = resolve(ctx.reportDir, fileName); + if (!existsSync(filePath)) { + continue; + } + const content = readFileSync(filePath, 'utf-8'); + reports.push(`# ${fileName}\n\n${content}`); + } + if (reports.length > 0) { + instruction = new StatusJudgmentBuilder(step, { + language: ctx.language, + reportContent: reports.join('\n\n---\n\n'), + inputSource: 'report', + useStructuredOutput: true, + }).build(); + } + } + + if (instruction == null) { + if (!ctx.lastResponse) { + throw new Error(`Status judgment requires report or lastResponse for movement "${step.name}"`); + } + + instruction = new StatusJudgmentBuilder(step, { + language: ctx.language, + lastResponse: ctx.lastResponse, + inputSource: 'response', + useStructuredOutput: true, + }).build(); + } + + ctx.onPhaseStart?.(step, 3, 'judge', instruction); + try { + const result = await judgeStatus(instruction, step.rules, { + cwd: ctx.cwd, + movementName: step.name, + language: ctx.language, + }); + const tag = `[${step.name.toUpperCase()}:${result.ruleIndex + 1}]`; + ctx.onPhaseComplete?.(step, 3, 'judge', tag, 'done'); + return tag; + } catch (error) { + const errorMsg = error instanceof Error ? error.message : String(error); + ctx.onPhaseComplete?.(step, 3, 'judge', '', 'error', errorMsg); + throw error; + } +} diff --git a/src/features/tasks/execute/pieceExecution.ts b/src/features/tasks/execute/pieceExecution.ts index 0135225..01d559c 100644 --- a/src/features/tasks/execute/pieceExecution.ts +++ b/src/features/tasks/execute/pieceExecution.ts @@ -113,6 +113,16 @@ function assertTaskPrefixPair( } } +function toJudgmentMatchMethod( + matchedRuleMethod: string | undefined, +): string | undefined { + if (!matchedRuleMethod) return undefined; + if (matchedRuleMethod === 'structured_output') return 'structured_output'; + if (matchedRuleMethod === 'ai_judge' || matchedRuleMethod === 'ai_judge_fallback') return 'ai_judge'; + if (matchedRuleMethod === 'phase3_tag' || matchedRuleMethod === 'phase1_tag') return 'tag_fallback'; + return undefined; +} + function createOutputFns(prefixWriter: TaskPrefixWriter | undefined): OutputFns { if (!prefixWriter) { return { @@ -587,6 +597,7 @@ export async function executePiece( } // Write step_complete record to NDJSON log + const matchMethod = toJudgmentMatchMethod(response.matchedRuleMethod); const record: NdjsonStepComplete = { type: 'step_complete', step: step.name, @@ -596,6 +607,7 @@ export async function executePiece( instruction, ...(response.matchedRuleIndex != null ? { matchedRuleIndex: response.matchedRuleIndex } : {}), ...(response.matchedRuleMethod ? { matchedRuleMethod: response.matchedRuleMethod } : {}), + ...(matchMethod ? { matchMethod } : {}), ...(response.error ? { error: response.error } : {}), timestamp: response.timestamp.toISOString(), }; diff --git a/src/index.ts b/src/index.ts index 623af4d..ec9cbb8 100644 --- a/src/index.ts +++ b/src/index.ts @@ -61,12 +61,6 @@ export { createCanUseToolCallback, createAskUserQuestionHooks, buildSdkOptions, - callClaude, - callClaudeCustom, - callClaudeAgent, - callClaudeSkill, - detectRuleIndex, - isRegexSafe, } from './infra/claude/index.js'; export type { StreamEvent, @@ -90,7 +84,8 @@ export type { } from './infra/claude/index.js'; // Codex integration -export * from './infra/codex/index.js'; +export { CodexClient, mapToCodexSandboxMode } from './infra/codex/index.js'; +export type { CodexCallOptions, CodexSandboxMode } from './infra/codex/index.js'; // Agent execution export * from './agents/index.js'; @@ -115,12 +110,15 @@ export { StatusJudgmentBuilder, buildEditRule, RuleEvaluator, - detectMatchedRule, evaluateAggregateConditions, AggregateEvaluator, needsStatusJudgmentPhase, - runReportPhase, - runStatusJudgmentPhase, + executeAgent, + generateReport, + executePart, + judgeStatus, + evaluateCondition, + decomposeTask, } from './core/piece/index.js'; export type { PieceEvents, @@ -133,6 +131,7 @@ export type { ProviderType, RuleMatch, RuleEvaluatorContext, + JudgeStatusResult, ReportInstructionContext, StatusJudgmentContext, InstructionContext, diff --git a/src/infra/claude/client.ts b/src/infra/claude/client.ts index 497e29f..53b4f2c 100644 --- a/src/infra/claude/client.ts +++ b/src/infra/claude/client.ts @@ -52,6 +52,7 @@ export class ClaudeClient { onAskUserQuestion: options.onAskUserQuestion, bypassPermissions: options.bypassPermissions, anthropicApiKey: options.anthropicApiKey, + outputSchema: options.outputSchema, }; } @@ -76,6 +77,7 @@ export class ClaudeClient { timestamp: new Date(), sessionId: result.sessionId, error: result.error, + structuredOutput: result.structuredOutput, }; } @@ -104,6 +106,7 @@ export class ClaudeClient { timestamp: new Date(), sessionId: result.sessionId, error: result.error, + structuredOutput: result.structuredOutput, }; } @@ -153,6 +156,7 @@ export class ClaudeClient { timestamp: new Date(), sessionId: result.sessionId, error: result.error, + structuredOutput: result.structuredOutput, }; } diff --git a/src/infra/claude/executor.ts b/src/infra/claude/executor.ts index 23f8269..0365dad 100644 --- a/src/infra/claude/executor.ts +++ b/src/infra/claude/executor.ts @@ -94,6 +94,7 @@ export class QueryExecutor { let resultContent: string | undefined; let hasResultMessage = false; let accumulatedAssistantText = ''; + let structuredOutput: Record | undefined; let onExternalAbort: (() => void) | undefined; try { @@ -139,6 +140,17 @@ export class QueryExecutor { const resultMsg = message as SDKResultMessage; if (resultMsg.subtype === 'success') { resultContent = resultMsg.result; + const rawStructuredOutput = (resultMsg as unknown as { + structured_output?: unknown; + structuredOutput?: unknown; + }).structured_output ?? (resultMsg as unknown as { structuredOutput?: unknown }).structuredOutput; + if ( + rawStructuredOutput + && typeof rawStructuredOutput === 'object' + && !Array.isArray(rawStructuredOutput) + ) { + structuredOutput = rawStructuredOutput as Record; + } success = true; } else { success = false; @@ -169,6 +181,7 @@ export class QueryExecutor { content: finalContent.trim(), sessionId, fullContent: accumulatedAssistantText.trim(), + structuredOutput, }; } catch (error) { if (onExternalAbort && options.abortSignal) { diff --git a/src/infra/claude/options-builder.ts b/src/infra/claude/options-builder.ts index 86e0871..dac37ab 100644 --- a/src/infra/claude/options-builder.ts +++ b/src/infra/claude/options-builder.ts @@ -65,6 +65,12 @@ export class SdkOptionsBuilder { if (this.options.agents) sdkOptions.agents = this.options.agents; if (this.options.mcpServers) sdkOptions.mcpServers = this.options.mcpServers; if (this.options.systemPrompt) sdkOptions.systemPrompt = this.options.systemPrompt; + if (this.options.outputSchema) { + (sdkOptions as Record).outputFormat = { + type: 'json_schema', + schema: this.options.outputSchema, + }; + } if (canUseTool) sdkOptions.canUseTool = canUseTool; if (hooks) sdkOptions.hooks = hooks; diff --git a/src/infra/claude/types.ts b/src/infra/claude/types.ts index aa4d080..1c19741 100644 --- a/src/infra/claude/types.ts +++ b/src/infra/claude/types.ts @@ -109,6 +109,8 @@ export interface ClaudeResult { interrupted?: boolean; /** All assistant text accumulated during execution (for status detection) */ fullContent?: string; + /** Structured output returned by Claude SDK */ + structuredOutput?: Record; } /** Extended result with query ID for concurrent execution */ @@ -141,6 +143,8 @@ export interface ClaudeCallOptions { bypassPermissions?: boolean; /** Anthropic API key to inject via env (bypasses CLI auth) */ anthropicApiKey?: string; + /** JSON Schema for structured output */ + outputSchema?: Record; } /** Options for spawning a Claude SDK query (low-level, used by executor/process) */ @@ -168,6 +172,8 @@ export interface ClaudeSpawnOptions { bypassPermissions?: boolean; /** Anthropic API key to inject via env (bypasses CLI auth) */ anthropicApiKey?: string; + /** JSON Schema for structured output */ + outputSchema?: Record; /** Callback for stderr output from the Claude Code process */ onStderr?: (data: string) => void; } diff --git a/src/infra/codex/client.ts b/src/infra/codex/client.ts index 1fb5da4..48db412 100644 --- a/src/infra/codex/client.ts +++ b/src/infra/codex/client.ts @@ -150,13 +150,18 @@ export class CodexClient { const diag = createStreamDiagnostics('codex-sdk', { agentType, model: options.model, attempt }); diagRef = diag; - const { events } = await thread.runStreamed(fullPrompt, { + const runOptions: Record = { signal: streamAbortController.signal, - }); + }; + if (options.outputSchema) { + runOptions.outputSchema = options.outputSchema; + } + const { events } = await thread.runStreamed(fullPrompt, runOptions as never); resetIdleTimeout(); diag.onConnected(); let content = ''; + let structuredOutput: Record | undefined; const contentOffsets = new Map(); let success = true; let failureMessage = ''; @@ -189,6 +194,20 @@ export class CodexClient { break; } + if (event.type === 'turn.completed') { + const rawFinalResponse = (event as unknown as { + turn?: { finalResponse?: unknown }; + }).turn?.finalResponse; + if ( + rawFinalResponse + && typeof rawFinalResponse === 'object' + && !Array.isArray(rawFinalResponse) + ) { + structuredOutput = rawFinalResponse as Record; + } + continue; + } + if (event.type === 'item.started') { const item = event.item as CodexItem | undefined; if (item) { @@ -278,6 +297,7 @@ export class CodexClient { content: trimmed, timestamp: new Date(), sessionId: currentThreadId, + structuredOutput, }; } catch (error) { const message = getErrorMessage(error); diff --git a/src/infra/codex/types.ts b/src/infra/codex/types.ts index 1834167..097eed6 100644 --- a/src/infra/codex/types.ts +++ b/src/infra/codex/types.ts @@ -31,4 +31,6 @@ export interface CodexCallOptions { onStream?: StreamCallback; /** OpenAI API key (bypasses CLI auth) */ openaiApiKey?: string; + /** JSON Schema for structured output */ + outputSchema?: Record; } diff --git a/src/infra/fs/session.ts b/src/infra/fs/session.ts index 2e4660e..34ea8a7 100644 --- a/src/infra/fs/session.ts +++ b/src/infra/fs/session.ts @@ -113,6 +113,7 @@ export class SessionManager { ...(record.error ? { error: record.error } : {}), ...(record.matchedRuleIndex != null ? { matchedRuleIndex: record.matchedRuleIndex } : {}), ...(record.matchedRuleMethod ? { matchedRuleMethod: record.matchedRuleMethod } : {}), + ...(record.matchMethod ? { matchMethod: record.matchMethod } : {}), }); sessionLog.iterations++; } diff --git a/src/infra/mock/client.ts b/src/infra/mock/client.ts index 4fb0d4f..9912032 100644 --- a/src/infra/mock/client.ts +++ b/src/infra/mock/client.ts @@ -65,6 +65,7 @@ export async function callMock( content, timestamp: new Date(), sessionId, + structuredOutput: options.structuredOutput, }; } diff --git a/src/infra/mock/types.ts b/src/infra/mock/types.ts index f55b2bb..1c3d0c8 100644 --- a/src/infra/mock/types.ts +++ b/src/infra/mock/types.ts @@ -13,6 +13,8 @@ export interface MockCallOptions { mockResponse?: string; /** Fixed status to return (optional, defaults to 'done') */ mockStatus?: 'done' | 'blocked' | 'error' | 'approved' | 'rejected' | 'improve'; + /** Structured output payload returned as-is */ + structuredOutput?: Record; } /** A single entry in a mock scenario */ diff --git a/src/infra/opencode/client.ts b/src/infra/opencode/client.ts index 18c70b6..3366cb5 100644 --- a/src/infra/opencode/client.ts +++ b/src/infra/opencode/client.ts @@ -329,14 +329,22 @@ export class OpenCodeClient { diag.onConnected(); const tools = mapToOpenCodeTools(options.allowedTools); + const promptPayload: Record = { + sessionID: sessionId, + directory: options.cwd, + model: parsedModel, + ...(tools ? { tools } : {}), + parts: [{ type: 'text' as const, text: fullPrompt }], + }; + if (options.outputSchema) { + promptPayload.outputFormat = { + type: 'json_schema', + schema: options.outputSchema, + }; + } + await client.session.promptAsync( - { - sessionID: sessionId, - directory: options.cwd, - model: parsedModel, - ...(tools ? { tools } : {}), - parts: [{ type: 'text' as const, text: fullPrompt }], - }, + promptPayload as never, { signal: streamAbortController.signal }, ); @@ -571,6 +579,17 @@ export class OpenCodeClient { } const trimmed = content.trim(); + let structuredOutput: Record | undefined; + if (options.outputSchema && trimmed.startsWith('{')) { + try { + const parsed = JSON.parse(trimmed) as unknown; + if (parsed && typeof parsed === 'object' && !Array.isArray(parsed)) { + structuredOutput = parsed as Record; + } + } catch { + // Non-JSON response falls back to text path. + } + } emitResult(options.onStream, true, trimmed, sessionId); return { @@ -579,6 +598,7 @@ export class OpenCodeClient { content: trimmed, timestamp: new Date(), sessionId, + structuredOutput, }; } catch (error) { const message = getErrorMessage(error); diff --git a/src/infra/opencode/types.ts b/src/infra/opencode/types.ts index fb5fa5a..d981247 100644 --- a/src/infra/opencode/types.ts +++ b/src/infra/opencode/types.ts @@ -170,4 +170,6 @@ export interface OpenCodeCallOptions { onAskUserQuestion?: AskUserQuestionHandler; /** OpenCode API key */ opencodeApiKey?: string; + /** JSON Schema for structured output */ + outputSchema?: Record; } diff --git a/src/infra/providers/claude.ts b/src/infra/providers/claude.ts index c59fe8f..81c3a2f 100644 --- a/src/infra/providers/claude.ts +++ b/src/infra/providers/claude.ts @@ -22,6 +22,7 @@ function toClaudeOptions(options: ProviderCallOptions): ClaudeCallOptions { onAskUserQuestion: options.onAskUserQuestion, bypassPermissions: options.bypassPermissions, anthropicApiKey: options.anthropicApiKey ?? resolveAnthropicApiKey(), + outputSchema: options.outputSchema, }; } diff --git a/src/infra/providers/codex.ts b/src/infra/providers/codex.ts index 88c67d2..47a4de8 100644 --- a/src/infra/providers/codex.ts +++ b/src/infra/providers/codex.ts @@ -33,6 +33,7 @@ function toCodexOptions(options: ProviderCallOptions): CodexCallOptions { permissionMode: options.permissionMode, onStream: options.onStream, openaiApiKey: options.openaiApiKey ?? resolveOpenaiApiKey(), + outputSchema: options.outputSchema, }; } diff --git a/src/infra/providers/opencode.ts b/src/infra/providers/opencode.ts index 19e9798..3243158 100644 --- a/src/infra/providers/opencode.ts +++ b/src/infra/providers/opencode.ts @@ -22,6 +22,7 @@ function toOpenCodeOptions(options: ProviderCallOptions): OpenCodeCallOptions { onStream: options.onStream, onAskUserQuestion: options.onAskUserQuestion, opencodeApiKey: options.opencodeApiKey ?? resolveOpencodeApiKey(), + outputSchema: options.outputSchema, }; } diff --git a/src/infra/providers/types.ts b/src/infra/providers/types.ts index d2bc48d..e9214b8 100644 --- a/src/infra/providers/types.ts +++ b/src/infra/providers/types.ts @@ -40,6 +40,8 @@ export interface ProviderCallOptions { openaiApiKey?: string; /** OpenCode API key for OpenCode provider */ opencodeApiKey?: string; + /** JSON Schema for structured output */ + outputSchema?: Record; } /** A configured agent ready to be called */ diff --git a/src/shared/utils/types.ts b/src/shared/utils/types.ts index 2f33f52..2926689 100644 --- a/src/shared/utils/types.ts +++ b/src/shared/utils/types.ts @@ -26,6 +26,8 @@ export interface SessionLog { matchedRuleIndex?: number; /** How the rule match was detected */ matchedRuleMethod?: string; + /** Method used by status judgment phase */ + matchMethod?: string; }>; } @@ -56,6 +58,7 @@ export interface NdjsonStepComplete { instruction: string; matchedRuleIndex?: number; matchedRuleMethod?: string; + matchMethod?: string; error?: string; timestamp: string; }