From 8403a7c8922d7e04bf77f81a309b83efae5dd4fc Mon Sep 17 00:00:00 2001 From: nrs <38722970+nrslib@users.noreply.github.com> Date: Wed, 4 Mar 2026 23:07:36 +0900 Subject: [PATCH] takt: add-trace-report-generation (#467) --- src/__tests__/agent-usecases.test.ts | 97 +++++ .../config-modularity-boundary.test.ts | 29 ++ src/__tests__/engine-arpeggio.test.ts | 146 ++++++-- src/__tests__/engine-error.test.ts | 10 +- src/__tests__/engine-happy-path.test.ts | 9 +- src/__tests__/engine-parallel-failure.test.ts | 40 +- src/__tests__/engine-parallel.test.ts | 6 +- src/__tests__/engine-team-leader.test.ts | 125 +++++-- src/__tests__/engine-test-helpers.ts | 8 +- .../it-config-provider-options.test.ts | 9 +- .../it-provider-config-block.test.ts | 8 +- .../phase-runner-report-history.test.ts | 47 ++- .../pieceExecution-ask-user-question.test.ts | 1 + .../pieceExecution-debug-prompts.test.ts | 223 ++++++++++- .../pieceExecution-session-loading.test.ts | 1 + src/__tests__/report-phase-retry.test.ts | 65 ++-- src/__tests__/status-judgment-phase.test.ts | 124 ++++++ src/__tests__/traceReport.test.ts | 236 ++++++++++++ src/agents/agent-usecases.ts | 354 +----------------- src/agents/decompose-task-usecase.ts | 222 +++++++++++ src/agents/judge-status-usecase.ts | 184 +++++++++ src/agents/runner.ts | 25 +- src/agents/types.ts | 4 + src/core/piece/engine/ArpeggioRunner.ts | 45 ++- src/core/piece/engine/MovementExecutor.ts | 49 ++- src/core/piece/engine/OptionsBuilder.ts | 34 +- src/core/piece/engine/ParallelRunner.ts | 56 ++- src/core/piece/engine/PieceEngine.ts | 78 +++- src/core/piece/engine/TeamLeaderRunner.ts | 35 +- src/core/piece/phase-runner.ts | 58 ++- src/core/piece/status-judgment-phase.ts | 39 +- src/core/piece/types.ts | 42 ++- src/features/tasks/execute/pieceExecution.ts | 68 ++-- src/features/tasks/execute/sessionLogger.ts | 187 +++++++-- src/features/tasks/execute/traceReport.ts | 55 +++ .../tasks/execute/traceReportParser.ts | 260 +++++++++++++ .../tasks/execute/traceReportRedaction.ts | 81 ++++ .../tasks/execute/traceReportRenderer.ts | 297 +++++++++++++++ .../tasks/execute/traceReportTypes.ts | 48 +++ .../tasks/execute/traceReportWriter.ts | 81 ++++ src/infra/fs/index.ts | 1 + src/infra/fs/session.ts | 1 + src/shared/utils/debug.ts | 9 + src/shared/utils/phaseExecutionId.ts | 50 +++ src/shared/utils/types.ts | 25 ++ 45 files changed, 2970 insertions(+), 602 deletions(-) create mode 100644 src/__tests__/status-judgment-phase.test.ts create mode 100644 src/__tests__/traceReport.test.ts create mode 100644 src/agents/decompose-task-usecase.ts create mode 100644 src/agents/judge-status-usecase.ts create mode 100644 src/features/tasks/execute/traceReport.ts create mode 100644 src/features/tasks/execute/traceReportParser.ts create mode 100644 src/features/tasks/execute/traceReportRedaction.ts create mode 100644 src/features/tasks/execute/traceReportRenderer.ts create mode 100644 src/features/tasks/execute/traceReportTypes.ts create mode 100644 src/features/tasks/execute/traceReportWriter.ts create mode 100644 src/shared/utils/phaseExecutionId.ts diff --git a/src/__tests__/agent-usecases.test.ts b/src/__tests__/agent-usecases.test.ts index a02e099..b82e609 100644 --- a/src/__tests__/agent-usecases.test.ts +++ b/src/__tests__/agent-usecases.test.ts @@ -43,6 +43,13 @@ function doneResponse(content: string, structuredOutput?: Record { beforeEach(() => { @@ -173,6 +180,75 @@ describe('agent-usecases', () => { expect(runAgent).toHaveBeenCalledTimes(3); }); + it('judgeStatus は Phase 3 の内部ステージログを順序どおりに通知する', async () => { + const onJudgeStage = vi.fn(); + // Stage 1: structured output fails + vi.mocked(runAgent).mockResolvedValueOnce(doneResponse('no match')); + // Stage 2: tag detection succeeds + vi.mocked(runAgent).mockResolvedValueOnce(doneResponse('[REVIEW:2]')); + + await judgeStatus( + 'structured', + 'tag', + [ + { condition: 'a', next: 'one' }, + { condition: 'b', next: 'two' }, + ], + { + ...judgeOptions, + onJudgeStage, + } as typeof judgeOptions & { onJudgeStage: (entry: JudgeStageLog) => void }, + ); + + expect(onJudgeStage).toHaveBeenCalledTimes(2); + expect(onJudgeStage).toHaveBeenNthCalledWith(1, expect.objectContaining({ + stage: 1, + method: 'structured_output', + status: 'done', + instruction: 'structured', + response: 'no match', + })); + expect(onJudgeStage).toHaveBeenNthCalledWith(2, expect.objectContaining({ + stage: 2, + method: 'phase3_tag', + status: 'done', + instruction: 'tag', + response: '[REVIEW:2]', + })); + }); + + it('judgeStatus は全ステージ失敗時にも Stage 3 までログ通知する', async () => { + const onJudgeStage = vi.fn(); + // Stage 1: structured output fails + vi.mocked(runAgent).mockResolvedValueOnce(doneResponse('no match')); + // Stage 2: tag detection fails + vi.mocked(runAgent).mockResolvedValueOnce(doneResponse('no tag')); + // Stage 3: evaluateCondition fails + vi.mocked(runAgent).mockResolvedValueOnce(doneResponse('still no match')); + vi.mocked(detectJudgeIndex).mockReturnValue(-1); + + await expect( + judgeStatus( + 'structured', + 'tag', + [ + { condition: 'a', next: 'one' }, + { condition: 'b', next: 'two' }, + ], + { + ...judgeOptions, + onJudgeStage, + } as typeof judgeOptions & { onJudgeStage: (entry: JudgeStageLog) => void }, + ), + ).rejects.toThrow('Status not found for movement "review"'); + + expect(onJudgeStage).toHaveBeenCalledTimes(3); + expect(onJudgeStage).toHaveBeenLastCalledWith(expect.objectContaining({ + stage: 3, + method: 'ai_judge', + })); + }); + it('judgeStatus は全ての判定に失敗したらエラー', async () => { // Stage 1: structured output fails vi.mocked(runAgent).mockResolvedValueOnce(doneResponse('no match')); @@ -232,6 +308,27 @@ describe('agent-usecases', () => { .rejects.toThrow('Team leader failed: bad output'); }); + it('decomposeTask は onPromptResolved を runAgent に伝搬する', async () => { + vi.mocked(runAgent).mockResolvedValue(doneResponse('x', { + parts: [ + { id: 'p1', title: 'Part 1', instruction: 'Do 1', timeout_ms: null }, + ], + })); + const onPromptResolved = vi.fn(); + + await decomposeTask('instruction', 2, { + cwd: '/repo', + persona: 'team-leader', + onPromptResolved, + }); + + expect(runAgent).toHaveBeenCalledWith( + 'team-leader', + expect.any(String), + expect.objectContaining({ onPromptResolved }), + ); + }); + it('requestMoreParts は構造化出力をパースして返す', async () => { vi.mocked(runAgent).mockResolvedValue(doneResponse('x', { done: false, diff --git a/src/__tests__/config-modularity-boundary.test.ts b/src/__tests__/config-modularity-boundary.test.ts index 79edbe1..10a6ac3 100644 --- a/src/__tests__/config-modularity-boundary.test.ts +++ b/src/__tests__/config-modularity-boundary.test.ts @@ -26,4 +26,33 @@ describe('config module file-size boundary', () => { const lineCount = getLineCount('../features/tasks/execute/pieceExecution.ts'); expect(lineCount).toBeLessThanOrEqual(300); }); + + it('keeps sessionLogger.ts under 300 lines', () => { + const lineCount = getLineCount('../features/tasks/execute/sessionLogger.ts'); + expect(lineCount).toBeLessThanOrEqual(300); + }); + + it('keeps traceReport renderer/parser split modules under 300 lines', () => { + const rendererLineCount = getLineCount('../features/tasks/execute/traceReportRenderer.ts'); + const parserLineCount = getLineCount('../features/tasks/execute/traceReportParser.ts'); + expect(rendererLineCount).toBeLessThanOrEqual(300); + expect(parserLineCount).toBeLessThanOrEqual(300); + }); + + it('keeps traceReport.ts as thin facade under 120 lines', () => { + const lineCount = getLineCount('../features/tasks/execute/traceReport.ts'); + expect(lineCount).toBeLessThanOrEqual(120); + }); + + it('keeps agent-usecases.ts as thin facade under 120 lines', () => { + const lineCount = getLineCount('../agents/agent-usecases.ts'); + expect(lineCount).toBeLessThanOrEqual(120); + }); + + it('keeps split agent usecases under 300 lines each', () => { + const judgeLineCount = getLineCount('../agents/judge-status-usecase.ts'); + const decomposeLineCount = getLineCount('../agents/decompose-task-usecase.ts'); + expect(judgeLineCount).toBeLessThanOrEqual(300); + expect(decomposeLineCount).toBeLessThanOrEqual(300); + }); }); diff --git a/src/__tests__/engine-arpeggio.test.ts b/src/__tests__/engine-arpeggio.test.ts index 0038d0c..e424bc6 100644 --- a/src/__tests__/engine-arpeggio.test.ts +++ b/src/__tests__/engine-arpeggio.test.ts @@ -100,6 +100,19 @@ function createEngineOptions(tmpDir: string): PieceEngineOptions { }; } +function mockRunAgentWithPrompt(...responses: ReturnType[]): void { + const mock = vi.mocked(runAgent); + for (const response of responses) { + mock.mockImplementationOnce(async (persona, instruction, options) => { + options?.onPromptResolved?.({ + systemPrompt: typeof persona === 'string' ? persona : '', + userInstruction: instruction, + }); + return response; + }); + } +} + describe('ArpeggioRunner integration', () => { let engine: PieceEngine | undefined; @@ -122,10 +135,11 @@ describe('ArpeggioRunner integration', () => { // Mock agent to return batch-specific responses const mockAgent = vi.mocked(runAgent); - mockAgent - .mockResolvedValueOnce(makeResponse({ content: 'Processed Alice' })) - .mockResolvedValueOnce(makeResponse({ content: 'Processed Bob' })) - .mockResolvedValueOnce(makeResponse({ content: 'Processed Charlie' })); + mockRunAgentWithPrompt( + makeResponse({ content: 'Processed Alice' }), + makeResponse({ content: 'Processed Bob' }), + makeResponse({ content: 'Processed Charlie' }), + ); // Mock rule detection for the merged result vi.mocked(detectMatchedRule).mockResolvedValueOnce({ @@ -163,9 +177,10 @@ describe('ArpeggioRunner integration', () => { const config = buildArpeggioPieceConfig(arpeggioConfig, tmpDir); const mockAgent = vi.mocked(runAgent); - mockAgent - .mockResolvedValueOnce(makeResponse({ content: 'Batch 0 result' })) - .mockResolvedValueOnce(makeResponse({ content: 'Batch 1 result' })); + mockRunAgentWithPrompt( + makeResponse({ content: 'Batch 0 result' }), + makeResponse({ content: 'Batch 1 result' }), + ); vi.mocked(detectMatchedRule).mockResolvedValueOnce({ index: 0, @@ -189,13 +204,12 @@ describe('ArpeggioRunner integration', () => { const config = buildArpeggioPieceConfig(arpeggioConfig, tmpDir); const mockAgent = vi.mocked(runAgent); - // First batch succeeds - mockAgent.mockResolvedValueOnce(makeResponse({ content: 'OK' })); - // Second batch fails twice (initial + 1 retry) - mockAgent.mockResolvedValueOnce(makeResponse({ status: 'error', error: 'fail1' })); - mockAgent.mockResolvedValueOnce(makeResponse({ status: 'error', error: 'fail2' })); - // Third batch succeeds - mockAgent.mockResolvedValueOnce(makeResponse({ content: 'OK' })); + mockRunAgentWithPrompt( + makeResponse({ content: 'OK' }), + makeResponse({ status: 'error', error: 'fail1' }), + makeResponse({ status: 'error', error: 'fail2' }), + makeResponse({ content: 'OK' }), + ); engine = new PieceEngine(config, tmpDir, 'test task', createEngineOptions(tmpDir)); const state = await engine.run(); @@ -210,10 +224,11 @@ describe('ArpeggioRunner integration', () => { const config = buildArpeggioPieceConfig(arpeggioConfig, tmpDir); const mockAgent = vi.mocked(runAgent); - mockAgent - .mockResolvedValueOnce(makeResponse({ content: 'Result A' })) - .mockResolvedValueOnce(makeResponse({ content: 'Result B' })) - .mockResolvedValueOnce(makeResponse({ content: 'Result C' })); + mockRunAgentWithPrompt( + makeResponse({ content: 'Result A' }), + makeResponse({ content: 'Result B' }), + makeResponse({ content: 'Result C' }), + ); vi.mocked(detectMatchedRule).mockResolvedValueOnce({ index: 0, @@ -234,10 +249,11 @@ describe('ArpeggioRunner integration', () => { const config = buildArpeggioPieceConfig(arpeggioConfig, tmpDir); const mockAgent = vi.mocked(runAgent); - mockAgent - .mockResolvedValueOnce(makeResponse({ content: 'A' })) - .mockResolvedValueOnce(makeResponse({ content: 'B' })) - .mockResolvedValueOnce(makeResponse({ content: 'C' })); + mockRunAgentWithPrompt( + makeResponse({ content: 'A' }), + makeResponse({ content: 'B' }), + makeResponse({ content: 'C' }), + ); vi.mocked(detectMatchedRule).mockResolvedValueOnce({ index: 0, @@ -251,4 +267,90 @@ describe('ArpeggioRunner integration', () => { expect(mockAgent).toHaveBeenCalledTimes(3); }); + it('should record resolved prompt in phase:start for arpeggio batches', async () => { + const { tmpDir, csvPath, templatePath } = createArpeggioTestDir(); + const arpeggioConfig = createArpeggioConfig(csvPath, templatePath, { concurrency: 2 }); + const config = buildArpeggioPieceConfig(arpeggioConfig, tmpDir); + const phaseStarts: string[] = []; + + mockRunAgentWithPrompt( + makeResponse({ content: 'A' }), + makeResponse({ content: 'B' }), + makeResponse({ content: 'C' }), + ); + vi.mocked(detectMatchedRule).mockResolvedValueOnce({ index: 0, method: 'phase1_tag' }); + + engine = new PieceEngine(config, tmpDir, 'test task', createEngineOptions(tmpDir)); + engine.on('phase:start', (step, phase, phaseName, instruction) => { + if (step.name !== 'process' || phase !== 1 || phaseName !== 'execute') return; + phaseStarts.push(instruction); + }); + + const state = await engine.run(); + + expect(state.status).toBe('completed'); + expect(phaseStarts.length).toBe(3); + expect(phaseStarts.every((instruction) => !instruction.startsWith('[Arpeggio batch'))).toBe(true); + expect(phaseStarts.some((instruction) => instruction.includes('Process '))).toBe(true); + }); + + it('should keep phaseExecutionId bindings correct when completion order is reversed', async () => { + const { tmpDir, csvPath, templatePath } = createArpeggioTestDir(); + const arpeggioConfig = createArpeggioConfig(csvPath, templatePath, { concurrency: 2 }); + const config = buildArpeggioPieceConfig(arpeggioConfig, tmpDir); + const phaseStartsByExecutionId = new Map(); + const phaseCompletions: Array<{ phaseExecutionId?: string; content: string }> = []; + + vi.mocked(runAgent).mockImplementation(async (persona, instruction, options) => { + options?.onPromptResolved?.({ + systemPrompt: typeof persona === 'string' ? persona : '', + userInstruction: instruction, + }); + if (instruction.includes('Alice')) { + await new Promise((resolve) => setTimeout(resolve, 40)); + return makeResponse({ content: 'Result Alice' }); + } + if (instruction.includes('Bob')) { + await new Promise((resolve) => setTimeout(resolve, 5)); + return makeResponse({ content: 'Result Bob' }); + } + return makeResponse({ content: 'Result Charlie' }); + }); + vi.mocked(detectMatchedRule).mockResolvedValueOnce({ index: 0, method: 'phase1_tag' }); + + engine = new PieceEngine(config, tmpDir, 'test task', createEngineOptions(tmpDir)); + engine.on('phase:start', (step, phase, phaseName, instruction, _promptParts, phaseExecutionId) => { + if (step.name !== 'process' || phase !== 1 || phaseName !== 'execute' || !phaseExecutionId) return; + phaseStartsByExecutionId.set(phaseExecutionId, instruction); + }); + engine.on('phase:complete', (step, phase, phaseName, content, _status, _error, phaseExecutionId) => { + if (step.name !== 'process' || phase !== 1 || phaseName !== 'execute') return; + phaseCompletions.push({ phaseExecutionId, content }); + }); + + const state = await engine.run(); + + expect(state.status).toBe('completed'); + expect(phaseCompletions).toHaveLength(3); + expect(new Set(phaseCompletions.map((entry) => entry.phaseExecutionId)).size).toBe(3); + expect(phaseCompletions.map((entry) => entry.content).sort()).toEqual([ + 'Result Alice', + 'Result Bob', + 'Result Charlie', + ]); + for (const completion of phaseCompletions) { + const instruction = completion.phaseExecutionId + ? phaseStartsByExecutionId.get(completion.phaseExecutionId) + : undefined; + expect(instruction).toBeDefined(); + if (completion.content === 'Result Alice') { + expect(instruction).toContain('Alice'); + } else if (completion.content === 'Result Bob') { + expect(instruction).toContain('Bob'); + } else { + expect(instruction).toContain('Charlie'); + } + } + }); + }); diff --git a/src/__tests__/engine-error.test.ts b/src/__tests__/engine-error.test.ts index 1a751d7..80e150b 100644 --- a/src/__tests__/engine-error.test.ts +++ b/src/__tests__/engine-error.test.ts @@ -167,9 +167,13 @@ describe('PieceEngine Integration: Error Handling', () => { const engine = new PieceEngine(config, tmpDir, 'test task', { projectCwd: tmpDir }); for (let i = 0; i < 5; i++) { - vi.mocked(runAgent).mockResolvedValueOnce( - makeResponse({ content: `iteration ${i}` }) - ); + vi.mocked(runAgent).mockImplementationOnce(async (persona, task, options) => { + options?.onPromptResolved?.({ + systemPrompt: typeof persona === 'string' ? persona : '', + userInstruction: task, + }); + return makeResponse({ content: `iteration ${i}` }); + }); vi.mocked(detectMatchedRule).mockResolvedValueOnce( { index: 0, method: 'phase1_tag' } ); diff --git a/src/__tests__/engine-happy-path.test.ts b/src/__tests__/engine-happy-path.test.ts index c42e613..f3891d2 100644 --- a/src/__tests__/engine-happy-path.test.ts +++ b/src/__tests__/engine-happy-path.test.ts @@ -544,11 +544,16 @@ describe('PieceEngine Integration: Happy Path', () => { expect(phaseStartFn).toHaveBeenCalledWith( expect.objectContaining({ name: 'plan' }), - 1, 'execute', expect.any(String) + 1, 'execute', expect.any(String), expect.objectContaining({ + systemPrompt: expect.any(String), + userInstruction: expect.any(String), + }), + undefined, + 1, ); expect(phaseCompleteFn).toHaveBeenCalledWith( expect.objectContaining({ name: 'plan' }), - 1, 'execute', expect.any(String), 'done', undefined + 1, 'execute', expect.any(String), 'done', undefined, undefined, 1, ); }); diff --git a/src/__tests__/engine-parallel-failure.test.ts b/src/__tests__/engine-parallel-failure.test.ts index 2ead682..02258c9 100644 --- a/src/__tests__/engine-parallel-failure.test.ts +++ b/src/__tests__/engine-parallel-failure.test.ts @@ -122,13 +122,21 @@ describe('PieceEngine Integration: Parallel Movement Partial Failure', () => { // arch-review fails (exit code 1) mock.mockRejectedValueOnce(new Error('Claude Code process exited with code 1')); // security-review succeeds - mock.mockResolvedValueOnce( - makeResponse({ persona: 'security-review', content: 'Security review passed' }), - ); + mock.mockImplementationOnce(async (persona, task, options) => { + options?.onPromptResolved?.({ + systemPrompt: typeof persona === 'string' ? persona : '', + userInstruction: task, + }); + return makeResponse({ persona: 'security-review', content: 'Security review passed' }); + }); // done step - mock.mockResolvedValueOnce( - makeResponse({ persona: 'done', content: 'Completed' }), - ); + mock.mockImplementationOnce(async (persona, task, options) => { + options?.onPromptResolved?.({ + systemPrompt: typeof persona === 'string' ? persona : '', + userInstruction: task, + }); + return makeResponse({ persona: 'done', content: 'Completed' }); + }); mockDetectMatchedRuleSequence([ // security-review sub-movement rule match (arch-review has no match — it failed) @@ -179,12 +187,20 @@ describe('PieceEngine Integration: Parallel Movement Partial Failure', () => { const mock = vi.mocked(runAgent); mock.mockRejectedValueOnce(new Error('Session resume failed')); - mock.mockResolvedValueOnce( - makeResponse({ persona: 'security-review', content: 'OK' }), - ); - mock.mockResolvedValueOnce( - makeResponse({ persona: 'done', content: 'Done' }), - ); + mock.mockImplementationOnce(async (persona, task, options) => { + options?.onPromptResolved?.({ + systemPrompt: typeof persona === 'string' ? persona : '', + userInstruction: task, + }); + return makeResponse({ persona: 'security-review', content: 'OK' }); + }); + mock.mockImplementationOnce(async (persona, task, options) => { + options?.onPromptResolved?.({ + systemPrompt: typeof persona === 'string' ? persona : '', + userInstruction: task, + }); + return makeResponse({ persona: 'done', content: 'Done' }); + }); mockDetectMatchedRuleSequence([ { index: 0, method: 'phase1_tag' }, diff --git a/src/__tests__/engine-parallel.test.ts b/src/__tests__/engine-parallel.test.ts index f86f1bf..411dcbd 100644 --- a/src/__tests__/engine-parallel.test.ts +++ b/src/__tests__/engine-parallel.test.ts @@ -216,11 +216,15 @@ describe('PieceEngine Integration: Parallel Movement Aggregation', () => { ['../personas/supervise.md', makeResponse({ persona: 'supervise', content: 'All passed' })], ]); - vi.mocked(runAgent).mockImplementation(async (persona, _task, options) => { + vi.mocked(runAgent).mockImplementation(async (persona, task, options) => { const response = responsesByPersona.get(persona ?? ''); if (!response) { throw new Error(`Unexpected persona: ${persona}`); } + options.onPromptResolved?.({ + systemPrompt: typeof persona === 'string' ? persona : '', + userInstruction: task, + }); if (persona === '../personas/arch-review.md') { options.onStream?.({ type: 'text', data: { text: 'arch stream line\n' } }); diff --git a/src/__tests__/engine-team-leader.test.ts b/src/__tests__/engine-team-leader.test.ts index 7a20bf6..0bbd5fb 100644 --- a/src/__tests__/engine-team-leader.test.ts +++ b/src/__tests__/engine-team-leader.test.ts @@ -49,6 +49,19 @@ function buildTeamLeaderConfig(): PieceConfig { }; } +function mockRunAgentWithPrompt(...responses: ReturnType[]): void { + const mock = vi.mocked(runAgent); + for (const response of responses) { + mock.mockImplementationOnce(async (persona, instruction, options) => { + options?.onPromptResolved?.({ + systemPrompt: typeof persona === 'string' ? persona : '', + userInstruction: instruction, + }); + return response; + }); + } +} + describe('PieceEngine Integration: TeamLeaderRunner', () => { let tmpDir: string; @@ -68,21 +81,22 @@ describe('PieceEngine Integration: TeamLeaderRunner', () => { const config = buildTeamLeaderConfig(); const engine = new PieceEngine(config, tmpDir, 'implement feature', { projectCwd: tmpDir }); - vi.mocked(runAgent) - .mockResolvedValueOnce(makeResponse({ + mockRunAgentWithPrompt( + makeResponse({ persona: 'team-leader', content: [ '```json', '[{"id":"part-1","title":"API","instruction":"Implement API"},{"id":"part-2","title":"Test","instruction":"Add tests"}]', '```', ].join('\n'), - })) - .mockResolvedValueOnce(makeResponse({ persona: 'coder', content: 'API done' })) - .mockResolvedValueOnce(makeResponse({ persona: 'coder', content: 'Tests done' })) - .mockResolvedValueOnce(makeResponse({ + }), + makeResponse({ persona: 'coder', content: 'API done' }), + makeResponse({ persona: 'coder', content: 'Tests done' }), + makeResponse({ persona: 'team-leader', structuredOutput: { done: true, reasoning: 'enough', parts: [] }, - })); + }), + ); vi.mocked(detectMatchedRule).mockResolvedValueOnce({ index: 0, method: 'phase1_tag' }); @@ -103,21 +117,22 @@ describe('PieceEngine Integration: TeamLeaderRunner', () => { const config = buildTeamLeaderConfig(); const engine = new PieceEngine(config, tmpDir, 'implement feature', { projectCwd: tmpDir }); - vi.mocked(runAgent) - .mockResolvedValueOnce(makeResponse({ + mockRunAgentWithPrompt( + makeResponse({ persona: 'team-leader', content: [ '```json', '[{"id":"part-1","title":"API","instruction":"Implement API"},{"id":"part-2","title":"Test","instruction":"Add tests"}]', '```', ].join('\n'), - })) - .mockResolvedValueOnce(makeResponse({ persona: 'coder', status: 'error', error: 'api failed' })) - .mockResolvedValueOnce(makeResponse({ persona: 'coder', status: 'error', error: 'test failed' })) - .mockResolvedValueOnce(makeResponse({ + }), + makeResponse({ persona: 'coder', status: 'error', error: 'api failed' }), + makeResponse({ persona: 'coder', status: 'error', error: 'test failed' }), + makeResponse({ persona: 'team-leader', structuredOutput: { done: true, reasoning: 'stop', parts: [] }, - })); + }), + ); const state = await engine.run(); @@ -128,21 +143,22 @@ describe('PieceEngine Integration: TeamLeaderRunner', () => { const config = buildTeamLeaderConfig(); const engine = new PieceEngine(config, tmpDir, 'implement feature', { projectCwd: tmpDir }); - vi.mocked(runAgent) - .mockResolvedValueOnce(makeResponse({ + mockRunAgentWithPrompt( + makeResponse({ persona: 'team-leader', content: [ '```json', '[{"id":"part-1","title":"API","instruction":"Implement API"},{"id":"part-2","title":"Test","instruction":"Add tests"}]', '```', ].join('\n'), - })) - .mockResolvedValueOnce(makeResponse({ persona: 'coder', content: 'API done' })) - .mockResolvedValueOnce(makeResponse({ persona: 'coder', status: 'error', error: 'test failed' })) - .mockResolvedValueOnce(makeResponse({ + }), + makeResponse({ persona: 'coder', content: 'API done' }), + makeResponse({ persona: 'coder', status: 'error', error: 'test failed' }), + makeResponse({ persona: 'team-leader', structuredOutput: { done: true, reasoning: 'stop', parts: [] }, - })); + }), + ); vi.mocked(detectMatchedRule).mockResolvedValueOnce({ index: 0, method: 'phase1_tag' }); @@ -161,21 +177,22 @@ describe('PieceEngine Integration: TeamLeaderRunner', () => { const config = buildTeamLeaderConfig(); const engine = new PieceEngine(config, tmpDir, 'implement feature', { projectCwd: tmpDir }); - vi.mocked(runAgent) - .mockResolvedValueOnce(makeResponse({ + mockRunAgentWithPrompt( + makeResponse({ persona: 'team-leader', content: [ '```json', '[{"id":"part-1","title":"API","instruction":"Implement API"},{"id":"part-2","title":"Test","instruction":"Add tests"}]', '```', ].join('\n'), - })) - .mockResolvedValueOnce(makeResponse({ persona: 'coder', status: 'error', content: 'api failed from content' })) - .mockResolvedValueOnce(makeResponse({ persona: 'coder', content: 'Tests done' })) - .mockResolvedValueOnce(makeResponse({ + }), + makeResponse({ persona: 'coder', status: 'error', content: 'api failed from content' }), + makeResponse({ persona: 'coder', content: 'Tests done' }), + makeResponse({ persona: 'team-leader', structuredOutput: { done: true, reasoning: 'stop', parts: [] }, - })); + }), + ); vi.mocked(detectMatchedRule).mockResolvedValueOnce({ index: 0, method: 'phase1_tag' }); @@ -191,8 +208,8 @@ describe('PieceEngine Integration: TeamLeaderRunner', () => { const config = buildTeamLeaderConfig(); const engine = new PieceEngine(config, tmpDir, 'implement feature', { projectCwd: tmpDir }); - vi.mocked(runAgent) - .mockResolvedValueOnce(makeResponse({ + mockRunAgentWithPrompt( + makeResponse({ persona: 'team-leader', structuredOutput: { parts: [ @@ -200,10 +217,10 @@ describe('PieceEngine Integration: TeamLeaderRunner', () => { { id: 'part-2', title: 'Test', instruction: 'Add tests', timeout_ms: null }, ], }, - })) - .mockResolvedValueOnce(makeResponse({ persona: 'coder', content: 'API done' })) - .mockResolvedValueOnce(makeResponse({ persona: 'coder', content: 'Tests done' })) - .mockResolvedValueOnce(makeResponse({ + }), + makeResponse({ persona: 'coder', content: 'API done' }), + makeResponse({ persona: 'coder', content: 'Tests done' }), + makeResponse({ persona: 'team-leader', structuredOutput: { done: false, @@ -212,16 +229,17 @@ describe('PieceEngine Integration: TeamLeaderRunner', () => { { id: 'part-3', title: 'Docs', instruction: 'Write docs', timeout_ms: null }, ], }, - })) - .mockResolvedValueOnce(makeResponse({ persona: 'coder', content: 'Docs done' })) - .mockResolvedValueOnce(makeResponse({ + }), + makeResponse({ persona: 'coder', content: 'Docs done' }), + makeResponse({ persona: 'team-leader', structuredOutput: { done: true, reasoning: 'Enough', parts: [], }, - })); + }), + ); vi.mocked(detectMatchedRule).mockResolvedValueOnce({ index: 0, method: 'phase1_tag' }); @@ -235,4 +253,35 @@ describe('PieceEngine Integration: TeamLeaderRunner', () => { expect(output!.content).toContain('Docs done'); }); + it('team leader の phase:start には分解実行時の実 instruction を記録する', async () => { + const config = buildTeamLeaderConfig(); + const engine = new PieceEngine(config, tmpDir, 'implement feature', { projectCwd: tmpDir }); + const phaseStarts: string[] = []; + engine.on('phase:start', (step, phase, phaseName, instruction) => { + if (step.name !== 'implement' || phase !== 1 || phaseName !== 'execute') return; + phaseStarts.push(instruction); + }); + + mockRunAgentWithPrompt( + makeResponse({ + persona: 'team-leader', + structuredOutput: { + parts: [{ id: 'part-1', title: 'API', instruction: 'Implement API', timeout_ms: null }], + }, + }), + makeResponse({ persona: 'coder', content: 'API done' }), + makeResponse({ + persona: 'team-leader', + structuredOutput: { done: true, reasoning: 'enough', parts: [] }, + }), + ); + vi.mocked(detectMatchedRule).mockResolvedValueOnce({ index: 0, method: 'phase1_tag' }); + + const state = await engine.run(); + + expect(state.status).toBe('completed'); + expect(phaseStarts.length).toBeGreaterThan(0); + expect(phaseStarts[0]).toContain('This is decomposition-only planning. Do not execute the task.'); + }); + }); diff --git a/src/__tests__/engine-test-helpers.ts b/src/__tests__/engine-test-helpers.ts index f17dc03..6198357 100644 --- a/src/__tests__/engine-test-helpers.ts +++ b/src/__tests__/engine-test-helpers.ts @@ -136,7 +136,13 @@ export function buildDefaultPieceConfig(overrides: Partial = {}): P export function mockRunAgentSequence(responses: AgentResponse[]): void { const mock = vi.mocked(runAgent); for (const response of responses) { - mock.mockResolvedValueOnce(response); + mock.mockImplementationOnce(async (persona, task, options) => { + options?.onPromptResolved?.({ + systemPrompt: typeof persona === 'string' ? persona : '', + userInstruction: task, + }); + return response; + }); } } diff --git a/src/__tests__/it-config-provider-options.test.ts b/src/__tests__/it-config-provider-options.test.ts index c714d8b..bc1d32b 100644 --- a/src/__tests__/it-config-provider-options.test.ts +++ b/src/__tests__/it-config-provider-options.test.ts @@ -103,7 +103,13 @@ describe('IT: config provider_options reflection', () => { delete process.env.TAKT_PROVIDER_OPTIONS_CODEX_NETWORK_ACCESS; invalidateGlobalConfigCache(); - vi.mocked(runAgent).mockResolvedValue(makeDoneResponse()); + vi.mocked(runAgent).mockImplementation(async (persona, task, options) => { + options?.onPromptResolved?.({ + systemPrompt: typeof persona === 'string' ? persona : '', + userInstruction: task, + }); + return makeDoneResponse(); + }); }); afterEach(() => { @@ -203,4 +209,3 @@ describe('IT: config provider_options reflection', () => { }); }); }); - diff --git a/src/__tests__/it-provider-config-block.test.ts b/src/__tests__/it-provider-config-block.test.ts index b5d00bb..1a5d3b3 100644 --- a/src/__tests__/it-provider-config-block.test.ts +++ b/src/__tests__/it-provider-config-block.test.ts @@ -83,7 +83,13 @@ describe('IT: provider block reflection', () => { beforeEach(() => { vi.clearAllMocks(); originalConfigDir = process.env.TAKT_CONFIG_DIR; - vi.mocked(runAgent).mockResolvedValue(makeDoneResponse()); + vi.mocked(runAgent).mockImplementation(async (persona, task, options) => { + options?.onPromptResolved?.({ + systemPrompt: typeof persona === 'string' ? persona : '', + userInstruction: task, + }); + return makeDoneResponse(); + }); }); afterEach(() => { diff --git a/src/__tests__/phase-runner-report-history.test.ts b/src/__tests__/phase-runner-report-history.test.ts index 07c896b..377af91 100644 --- a/src/__tests__/phase-runner-report-history.test.ts +++ b/src/__tests__/phase-runner-report-history.test.ts @@ -11,6 +11,7 @@ vi.mock('../agents/runner.js', () => ({ })); import { runAgent } from '../agents/runner.js'; +import type { AgentResponse } from '../core/models/types.js'; function createStep(fileName: string): PieceMovement { return { @@ -51,6 +52,19 @@ function createContext( }; } +function queueRunAgentResponses(responses: AgentResponse[]): void { + const runAgentMock = vi.mocked(runAgent); + for (const response of responses) { + runAgentMock.mockImplementationOnce(async (persona, task, options) => { + options?.onPromptResolved?.({ + systemPrompt: typeof persona === 'string' ? persona : '', + userInstruction: task, + }); + return response; + }); + } +} + describe('runReportPhase report history behavior', () => { let tmpRoot: string; @@ -71,22 +85,22 @@ describe('runReportPhase report history behavior', () => { const reportDir = join(tmpRoot, '.takt', 'runs', 'sample-run', 'reports'); const step = createStep('05-architect-review.md'); const ctx = createContext(reportDir); - const runAgentMock = vi.mocked(runAgent); - runAgentMock - .mockResolvedValueOnce({ + queueRunAgentResponses([ + { persona: 'reviewers', status: 'done', content: 'First review result', timestamp: new Date('2026-02-10T06:11:43Z'), sessionId: 'session-2', - }) - .mockResolvedValueOnce({ + }, + { persona: 'reviewers', status: 'done', content: 'Second review result', timestamp: new Date('2026-02-10T06:14:37Z'), sessionId: 'session-3', - }); + }, + ]); // When await runReportPhase(step, 1, ctx); @@ -113,29 +127,29 @@ describe('runReportPhase report history behavior', () => { const reportDir = join(tmpRoot, '.takt', 'runs', 'sample-run', 'reports'); const step = createStep('06-qa-review.md'); const ctx = createContext(reportDir); - const runAgentMock = vi.mocked(runAgent); - runAgentMock - .mockResolvedValueOnce({ + queueRunAgentResponses([ + { persona: 'reviewers', status: 'done', content: 'v1', timestamp: new Date('2026-02-10T06:11:43Z'), sessionId: 'session-2', - }) - .mockResolvedValueOnce({ + }, + { persona: 'reviewers', status: 'done', content: 'v2', timestamp: new Date('2026-02-10T06:11:43Z'), sessionId: 'session-3', - }) - .mockResolvedValueOnce({ + }, + { persona: 'reviewers', status: 'done', content: 'v3', timestamp: new Date('2026-02-10T06:11:43Z'), sessionId: 'session-4', - }); + }, + ]); // When await runReportPhase(step, 1, ctx); @@ -158,14 +172,13 @@ describe('runReportPhase report history behavior', () => { const ctx = createContext(reportDir, (overrides) => { capturedOverrides.push(overrides); }); - const runAgentMock = vi.mocked(runAgent); - runAgentMock.mockResolvedValueOnce({ + queueRunAgentResponses([{ persona: 'reviewers', status: 'done', content: 'Permission-based report execution', timestamp: new Date('2026-02-10T06:21:17Z'), sessionId: 'session-2', - }); + }]); // When await runReportPhase(step, 1, ctx); diff --git a/src/__tests__/pieceExecution-ask-user-question.test.ts b/src/__tests__/pieceExecution-ask-user-question.test.ts index d372745..0ef0ed8 100644 --- a/src/__tests__/pieceExecution-ask-user-question.test.ts +++ b/src/__tests__/pieceExecution-ask-user-question.test.ts @@ -139,6 +139,7 @@ vi.mock('../shared/utils/index.js', () => ({ preventSleep: vi.fn(), isDebugEnabled: vi.fn().mockReturnValue(false), writePromptLog: vi.fn(), + getDebugPromptsLogFile: vi.fn().mockReturnValue(null), generateReportDir: vi.fn().mockReturnValue('test-report-dir'), isValidReportDirName: vi.fn().mockReturnValue(true), playWarningSound: vi.fn(), diff --git a/src/__tests__/pieceExecution-debug-prompts.test.ts b/src/__tests__/pieceExecution-debug-prompts.test.ts index 4b14c7b..6e7bf12 100644 --- a/src/__tests__/pieceExecution-debug-prompts.test.ts +++ b/src/__tests__/pieceExecution-debug-prompts.test.ts @@ -31,12 +31,57 @@ const { mockIsDebugEnabled, mockWritePromptLog, MockPieceEngine } = vi.hoisted(( const step = this.config.movements[0]!; const timestamp = new Date('2026-02-07T00:00:00.000Z'); const shouldAbort = this.task === 'abort-task'; - + const shouldAbortBeforeComplete = this.task === 'abort-before-complete-task'; + const shouldDuplicatePhase = this.task === 'duplicate-phase-task'; + const shouldEmitSensitive = this.task === 'sensitive-content-task'; const shouldRepeatMovement = this.task === 'repeat-movement-task'; + const shouldReversePhaseCompletion = this.task === 'reverse-phase-complete-task'; const providerInfo = { provider: undefined, model: undefined }; this.emit('movement:start', step, 1, 'movement instruction', providerInfo); - this.emit('phase:start', step, 1, 'execute', 'phase prompt'); - this.emit('phase:complete', step, 1, 'execute', 'phase response', 'done'); + if (shouldReversePhaseCompletion) { + this.emit('phase:start', step, 1, 'execute', 'phase prompt first', { + systemPrompt: '../agents/coder.md', + userInstruction: 'phase prompt first', + }, 'implement:1:1:1', 1); + this.emit('phase:start', step, 1, 'execute', 'phase prompt second', { + systemPrompt: '../agents/coder.md', + userInstruction: 'phase prompt second', + }, 'implement:1:1:2', 1); + } else { + this.emit('phase:start', step, 1, 'execute', shouldEmitSensitive ? 'token=plain-secret' : 'phase prompt', { + systemPrompt: shouldEmitSensitive ? 'Authorization: Bearer super-secret-token' : '../agents/coder.md', + userInstruction: shouldEmitSensitive ? 'api_key=plain-secret' : 'phase prompt', + }); + } + this.emit('phase:start', step, 3, 'judge', 'phase3 prompt', { + systemPrompt: 'conductor', + userInstruction: 'phase3 prompt', + }); + this.emit('phase:judge_stage', step, 3, 'judge', { + stage: 1, + method: 'structured_output', + status: 'done', + instruction: 'judge stage prompt', + response: 'judge stage response', + }); + this.emit('phase:complete', step, 3, 'judge', '[IMPLEMENT:1]', 'done'); + if (shouldAbortBeforeComplete) { + this.emit('piece:abort', { status: 'aborted', iteration: 1 }, 'user_interrupted'); + return { status: 'aborted', iteration: 1 }; + } + if (shouldReversePhaseCompletion) { + this.emit('phase:complete', step, 1, 'execute', 'phase response second', 'done', undefined, 'implement:1:1:2', 1); + this.emit('phase:complete', step, 1, 'execute', 'phase response first', 'done', undefined, 'implement:1:1:1', 1); + } else { + this.emit('phase:complete', step, 1, 'execute', shouldEmitSensitive ? 'password=plain-secret' : 'phase response', 'done'); + } + if (shouldDuplicatePhase) { + this.emit('phase:start', step, 1, 'execute', 'phase prompt second', { + systemPrompt: '../agents/coder.md', + userInstruction: 'phase prompt second', + }); + this.emit('phase:complete', step, 1, 'execute', 'phase response second', 'done'); + } this.emit( 'movement:complete', step, @@ -154,6 +199,7 @@ vi.mock('../shared/utils/index.js', () => ({ preventSleep: vi.fn(), isDebugEnabled: mockIsDebugEnabled, writePromptLog: mockWritePromptLog, + getDebugPromptsLogFile: vi.fn().mockReturnValue(null), generateReportDir: vi.fn().mockReturnValue('test-report-dir'), isValidReportDirName: vi.fn().mockImplementation((value: string) => /^[a-z0-9]+(?:-[a-z0-9]+)*$/.test(value)), })); @@ -173,6 +219,7 @@ vi.mock('../shared/exitCodes.js', () => ({ import { executePiece } from '../features/tasks/execute/pieceExecution.js'; import { ensureDir, writeFileAtomic } from '../infra/config/index.js'; +import { appendNdjsonLine } from '../infra/fs/index.js'; describe('executePiece debug prompts logging', () => { beforeEach(() => { @@ -204,15 +251,16 @@ describe('executePiece debug prompts logging', () => { projectCwd: '/tmp/project', }); - expect(mockWritePromptLog).toHaveBeenCalledTimes(1); - const record = mockWritePromptLog.mock.calls[0]?.[0] as { + expect(mockWritePromptLog).toHaveBeenCalledTimes(2); + const records = mockWritePromptLog.mock.calls.map((call) => call[0]) as Array<{ movement: string; phase: number; iteration: number; prompt: string; response: string; timestamp: string; - }; + }>; + const record = records.find((entry) => entry.phase === 1)!; expect(record.movement).toBe('implement'); expect(record.phase).toBe(1); expect(record.iteration).toBe(1); @@ -221,6 +269,54 @@ describe('executePiece debug prompts logging', () => { expect(record.timestamp).toMatch(/^\d{4}-\d{2}-\d{2}T/); }); + it('should separate system prompt and user instruction in debug prompt records', async () => { + mockIsDebugEnabled.mockReturnValue(true); + + await executePiece(makeConfig(), 'task', '/tmp/project', { + projectCwd: '/tmp/project', + }); + + expect(mockWritePromptLog).toHaveBeenCalledTimes(2); + const records = mockWritePromptLog.mock.calls.map((call) => call[0]) as Array & { phase: number }>; + const record = records.find((entry) => entry.phase === 1)!; + expect(record).toHaveProperty('systemPrompt'); + expect(record).toHaveProperty('userInstruction'); + expect(record.systemPrompt).toBe('../agents/coder.md'); + expect(record.userInstruction).toBe('phase prompt'); + }); + + it('should include phase and judge stage details in trace markdown', async () => { + await executePiece(makeConfig(), 'task', '/tmp/project', { + projectCwd: '/tmp/project', + reportDirName: 'test-report-dir', + }); + + const traceCall = vi.mocked(writeFileAtomic).mock.calls.find( + (call) => String(call[0]).endsWith('/trace.md') + ); + expect(traceCall).toBeDefined(); + const traceContent = String(traceCall?.[1]); + expect(traceContent).toContain('## Iteration 1: implement'); + expect(traceContent).toContain('### Phase 1: execute'); + expect(traceContent).toContain('#### Judgment Stages'); + expect(traceContent).toContain('Stage 1 (structured_output): status=done'); + }); + + it('should render trace markdown even when piece aborts before movement completion', async () => { + await executePiece(makeConfig(), 'abort-before-complete-task', '/tmp/project', { + projectCwd: '/tmp/project', + reportDirName: 'test-report-dir', + }); + + const traceCall = vi.mocked(writeFileAtomic).mock.calls.find( + (call) => String(call[0]).endsWith('/trace.md') + ); + expect(traceCall).toBeDefined(); + const traceContent = String(traceCall?.[1]); + expect(traceContent).toContain('- Status: ❌ aborted'); + expect(traceContent).toContain('- Movement Status: in_progress'); + }); + it('should not write prompt log record when debug is disabled', async () => { mockIsDebugEnabled.mockReturnValue(false); @@ -231,6 +327,24 @@ describe('executePiece debug prompts logging', () => { expect(mockWritePromptLog).not.toHaveBeenCalled(); }); + it('should handle repeated phase starts for same movement and phase without missing debug prompt', async () => { + mockIsDebugEnabled.mockReturnValue(true); + + await executePiece(makeConfig(), 'duplicate-phase-task', '/tmp/project', { + projectCwd: '/tmp/project', + }); + + expect(mockWritePromptLog).toHaveBeenCalledTimes(3); + const records = mockWritePromptLog.mock.calls.map((call) => call[0]) as Array<{ + phase: number; + response: string; + }>; + const phase1Responses = records + .filter((record) => record.phase === 1) + .map((record) => record.response); + expect(phase1Responses).toEqual(['phase response', 'phase response second']); + }); + it('should update movement prefix context on each movement:start event', async () => { const stdoutSpy = vi.spyOn(process.stdout, 'write').mockImplementation(() => true); @@ -277,11 +391,13 @@ describe('executePiece debug prompts logging', () => { reportDirName: 'test-report-dir', }); - const calls = vi.mocked(writeFileAtomic).mock.calls; - expect(calls).toHaveLength(2); + const metaCalls = vi.mocked(writeFileAtomic).mock.calls.filter( + (call) => String(call[0]).endsWith('/meta.json') + ); + expect(metaCalls).toHaveLength(2); - const firstMeta = JSON.parse(String(calls[0]![1])) as { status: string; endTime?: string }; - const secondMeta = JSON.parse(String(calls[1]![1])) as { status: string; endTime?: string }; + const firstMeta = JSON.parse(String(metaCalls[0]![1])) as { status: string; endTime?: string }; + const secondMeta = JSON.parse(String(metaCalls[1]![1])) as { status: string; endTime?: string }; expect(firstMeta.status).toBe('running'); expect(firstMeta.endTime).toBeUndefined(); expect(secondMeta.status).toBe('completed'); @@ -294,11 +410,13 @@ describe('executePiece debug prompts logging', () => { reportDirName: 'test-report-dir', }); - const calls = vi.mocked(writeFileAtomic).mock.calls; - expect(calls).toHaveLength(2); + const metaCalls = vi.mocked(writeFileAtomic).mock.calls.filter( + (call) => String(call[0]).endsWith('/meta.json') + ); + expect(metaCalls).toHaveLength(2); - const firstMeta = JSON.parse(String(calls[0]![1])) as { status: string; endTime?: string }; - const secondMeta = JSON.parse(String(calls[1]![1])) as { status: string; endTime?: string }; + const firstMeta = JSON.parse(String(metaCalls[0]![1])) as { status: string; endTime?: string }; + const secondMeta = JSON.parse(String(metaCalls[1]![1])) as { status: string; endTime?: string }; expect(firstMeta.status).toBe('running'); expect(firstMeta.endTime).toBeUndefined(); expect(secondMeta.status).toBe('aborted'); @@ -313,14 +431,83 @@ describe('executePiece debug prompts logging', () => { }) ).rejects.toThrow('mock constructor failure'); - const calls = vi.mocked(writeFileAtomic).mock.calls; - expect(calls).toHaveLength(2); + const metaCalls = vi.mocked(writeFileAtomic).mock.calls.filter( + (call) => String(call[0]).endsWith('/meta.json') + ); + expect(metaCalls).toHaveLength(2); - const firstMeta = JSON.parse(String(calls[0]![1])) as { status: string; endTime?: string }; - const secondMeta = JSON.parse(String(calls[1]![1])) as { status: string; endTime?: string }; + const firstMeta = JSON.parse(String(metaCalls[0]![1])) as { status: string; endTime?: string }; + const secondMeta = JSON.parse(String(metaCalls[1]![1])) as { status: string; endTime?: string }; expect(firstMeta.status).toBe('running'); expect(firstMeta.endTime).toBeUndefined(); expect(secondMeta.status).toBe('aborted'); expect(secondMeta.endTime).toMatch(/^\d{4}-\d{2}-\d{2}T/); }); + + it('should write trace.md on piece completion', async () => { + await executePiece(makeConfig(), 'task', '/tmp/project', { + projectCwd: '/tmp/project', + reportDirName: 'test-report-dir', + }); + + const traceCalls = vi.mocked(writeFileAtomic).mock.calls.filter( + (call) => String(call[0]).endsWith('/trace.md') + ); + expect(traceCalls.length).toBeGreaterThan(0); + }); + + it('should write trace.md on piece abort', async () => { + await executePiece(makeConfig(), 'abort-task', '/tmp/project', { + projectCwd: '/tmp/project', + reportDirName: 'test-report-dir', + }); + + const traceCalls = vi.mocked(writeFileAtomic).mock.calls.filter( + (call) => String(call[0]).endsWith('/trace.md') + ); + expect(traceCalls.length).toBeGreaterThan(0); + }); + + it('should sanitize sensitive fields before writing session NDJSON when trace mode is default', async () => { + await executePiece(makeConfig(), 'token=plain-secret', '/tmp/project', { + projectCwd: '/tmp/project', + reportDirName: 'test-report-dir', + interactiveMetadata: { + confirmed: true, + task: 'api_key=plain-secret', + }, + }); + await executePiece(makeConfig(), 'sensitive-content-task', '/tmp/project', { + projectCwd: '/tmp/project', + reportDirName: 'test-report-dir-2', + }); + + const records = vi.mocked(appendNdjsonLine).mock.calls.map((call) => call[1]); + const recordText = JSON.stringify(records); + expect(recordText).toContain('[REDACTED]'); + expect(recordText).not.toContain('plain-secret'); + expect(recordText).not.toContain('super-secret-token'); + }); + + it('should keep phaseExecutionId bindings consistent in trace when completions arrive in reverse order', async () => { + await executePiece(makeConfig(), 'reverse-phase-complete-task', '/tmp/project', { + projectCwd: '/tmp/project', + reportDirName: 'test-report-dir', + }); + + const traceCall = vi.mocked(writeFileAtomic).mock.calls.find( + (call) => String(call[0]).endsWith('/trace.md') + ); + expect(traceCall).toBeDefined(); + const traceContent = String(traceCall?.[1]); + const firstPromptIndex = traceContent.indexOf('phase prompt first'); + const firstResponseIndex = traceContent.indexOf('phase response first'); + const secondPromptIndex = traceContent.indexOf('phase prompt second'); + const secondResponseIndex = traceContent.indexOf('phase response second'); + + expect(firstPromptIndex).toBeGreaterThan(-1); + expect(firstResponseIndex).toBeGreaterThan(firstPromptIndex); + expect(secondPromptIndex).toBeGreaterThan(firstResponseIndex); + expect(secondResponseIndex).toBeGreaterThan(secondPromptIndex); + }); }); diff --git a/src/__tests__/pieceExecution-session-loading.test.ts b/src/__tests__/pieceExecution-session-loading.test.ts index 9cb6c3e..d5997e5 100644 --- a/src/__tests__/pieceExecution-session-loading.test.ts +++ b/src/__tests__/pieceExecution-session-loading.test.ts @@ -136,6 +136,7 @@ vi.mock('../shared/utils/index.js', () => ({ preventSleep: vi.fn(), isDebugEnabled: vi.fn().mockReturnValue(false), writePromptLog: vi.fn(), + getDebugPromptsLogFile: vi.fn().mockReturnValue(null), generateReportDir: vi.fn().mockReturnValue('test-report-dir'), isValidReportDirName: vi.fn().mockReturnValue(true), playWarningSound: vi.fn(), diff --git a/src/__tests__/report-phase-retry.test.ts b/src/__tests__/report-phase-retry.test.ts index 26c8807..74934a5 100644 --- a/src/__tests__/report-phase-retry.test.ts +++ b/src/__tests__/report-phase-retry.test.ts @@ -10,6 +10,7 @@ vi.mock('../agents/runner.js', () => ({ })); import { runAgent } from '../agents/runner.js'; +import type { AgentResponse } from '../core/models/types.js'; function createStep(fileName: string): PieceMovement { return { @@ -50,6 +51,19 @@ function createContext(reportDir: string, lastResponse = 'Phase 1 result'): Phas }; } +function queueRunAgentResponses(responses: AgentResponse[]): void { + const runAgentMock = vi.mocked(runAgent); + for (const response of responses) { + runAgentMock.mockImplementationOnce(async (persona, task, options) => { + options?.onPromptResolved?.({ + systemPrompt: typeof persona === 'string' ? persona : '', + userInstruction: task, + }); + return response; + }); + } +} + describe('runReportPhase retry with new session', () => { let tmpRoot: string; @@ -69,22 +83,23 @@ describe('runReportPhase retry with new session', () => { const reportDir = join(tmpRoot, '.takt', 'runs', 'sample-run', 'reports'); const step = createStep('02-coder.md'); const ctx = createContext(reportDir, 'Implemented feature X'); - const runAgentMock = vi.mocked(runAgent); - runAgentMock - .mockResolvedValueOnce({ + queueRunAgentResponses([ + { persona: 'coder', status: 'done', content: ' ', timestamp: new Date('2026-02-11T00:00:00Z'), sessionId: 'session-resume-2', - }) - .mockResolvedValueOnce({ + }, + { persona: 'coder', status: 'done', content: '# Report\nRecovered output', timestamp: new Date('2026-02-11T00:00:01Z'), sessionId: 'session-fresh-1', - }); + }, + ]); + const runAgentMock = vi.mocked(runAgent); // When await runReportPhase(step, 1, ctx); @@ -107,21 +122,22 @@ describe('runReportPhase retry with new session', () => { const reportDir = join(tmpRoot, '.takt', 'runs', 'sample-run', 'reports'); const step = createStep('03-review.md'); const ctx = createContext(reportDir); - const runAgentMock = vi.mocked(runAgent); - runAgentMock - .mockResolvedValueOnce({ + queueRunAgentResponses([ + { persona: 'coder', status: 'error', content: 'Tool use is not allowed in this phase', timestamp: new Date('2026-02-11T00:01:00Z'), error: 'Tool use is not allowed in this phase', - }) - .mockResolvedValueOnce({ + }, + { persona: 'coder', status: 'done', content: 'Recovered report', timestamp: new Date('2026-02-11T00:01:01Z'), - }); + }, + ]); + const runAgentMock = vi.mocked(runAgent); // When await runReportPhase(step, 1, ctx); @@ -137,20 +153,21 @@ describe('runReportPhase retry with new session', () => { const reportDir = join(tmpRoot, '.takt', 'runs', 'sample-run', 'reports'); const step = createStep('04-qa.md'); const ctx = createContext(reportDir); - const runAgentMock = vi.mocked(runAgent); - runAgentMock - .mockResolvedValueOnce({ + queueRunAgentResponses([ + { persona: 'coder', status: 'done', content: ' ', timestamp: new Date('2026-02-11T00:02:00Z'), - }) - .mockResolvedValueOnce({ + }, + { persona: 'coder', status: 'done', content: '\n\n', timestamp: new Date('2026-02-11T00:02:01Z'), - }); + }, + ]); + const runAgentMock = vi.mocked(runAgent); // When / Then await expect(runReportPhase(step, 1, ctx)).rejects.toThrow('Report phase failed for 04-qa.md: Report output is empty'); @@ -162,14 +179,14 @@ describe('runReportPhase retry with new session', () => { const reportDir = join(tmpRoot, '.takt', 'runs', 'sample-run', 'reports'); const step = createStep('05-ok.md'); const ctx = createContext(reportDir); - const runAgentMock = vi.mocked(runAgent); - runAgentMock.mockResolvedValueOnce({ + queueRunAgentResponses([{ persona: 'coder', status: 'done', content: 'Single-pass success', timestamp: new Date('2026-02-11T00:03:00Z'), sessionId: 'session-resume-2', - }); + }]); + const runAgentMock = vi.mocked(runAgent); // When await runReportPhase(step, 1, ctx); @@ -185,13 +202,13 @@ describe('runReportPhase retry with new session', () => { const reportDir = join(tmpRoot, '.takt', 'runs', 'sample-run', 'reports'); const step = createStep('06-blocked.md'); const ctx = createContext(reportDir); - const runAgentMock = vi.mocked(runAgent); - runAgentMock.mockResolvedValueOnce({ + queueRunAgentResponses([{ persona: 'coder', status: 'blocked', content: 'Need permission', timestamp: new Date('2026-02-11T00:04:00Z'), - }); + }]); + const runAgentMock = vi.mocked(runAgent); // When const result = await runReportPhase(step, 1, ctx); diff --git a/src/__tests__/status-judgment-phase.test.ts b/src/__tests__/status-judgment-phase.test.ts new file mode 100644 index 0000000..6fb91ba --- /dev/null +++ b/src/__tests__/status-judgment-phase.test.ts @@ -0,0 +1,124 @@ +import { describe, it, expect, vi, beforeEach } from 'vitest'; +import type { PieceMovement } from '../core/models/types.js'; +import { runStatusJudgmentPhase } from '../core/piece/status-judgment-phase.js'; + +const { mockJudgeStatus } = vi.hoisted(() => ({ + mockJudgeStatus: vi.fn(), +})); + +vi.mock('../agents/agent-usecases.js', () => ({ + judgeStatus: mockJudgeStatus, +})); + +describe('runStatusJudgmentPhase', () => { + beforeEach(() => { + vi.clearAllMocks(); + }); + + it('should pass judge stage callbacks through PhaseRunnerContext', async () => { + mockJudgeStatus.mockImplementation( + async (_structured: string, _tag: string, _rules: unknown[], options: { onJudgeStage?: (entry: { + stage: 1 | 2 | 3; + method: 'structured_output' | 'phase3_tag' | 'ai_judge'; + status: 'done' | 'error' | 'skipped'; + instruction: string; + response: string; + }) => void; onStructuredPromptResolved?: (promptParts: { systemPrompt: string; userInstruction: string }) => void }) => { + options.onStructuredPromptResolved?.({ + systemPrompt: 'conductor-system', + userInstruction: 'structured prompt', + }); + options.onJudgeStage?.({ + stage: 1, + method: 'structured_output', + status: 'done', + instruction: 'structured prompt', + response: '{"step":2}', + }); + return { ruleIndex: 1, method: 'structured_output' as const }; + }, + ); + + const step: PieceMovement = { + name: 'review', + persona: 'reviewer', + personaDisplayName: 'reviewer', + instructionTemplate: 'Review', + passPreviousResponse: true, + rules: [ + { condition: 'needs_fix', next: 'fix' }, + { condition: 'approved', next: 'COMPLETE' }, + ], + }; + const onPhaseStart = vi.fn(); + const onPhaseComplete = vi.fn(); + const onJudgeStage = vi.fn(); + + const result = await runStatusJudgmentPhase(step, { + cwd: '/tmp/project', + reportDir: '/tmp/project/.takt/reports', + lastResponse: 'response body', + iteration: 4, + getSessionId: vi.fn(), + buildResumeOptions: vi.fn(), + buildNewSessionReportOptions: vi.fn(), + updatePersonaSession: vi.fn(), + onPhaseStart, + onPhaseComplete, + onJudgeStage, + }); + + expect(result).toEqual({ + tag: '[REVIEW:2]', + ruleIndex: 1, + method: 'structured_output', + }); + expect(onPhaseStart).toHaveBeenCalledWith( + step, + 3, + 'judge', + expect.any(String), + { + systemPrompt: 'conductor-system', + userInstruction: 'structured prompt', + }, + 'review:4:3:1', + 4, + ); + expect(onJudgeStage).toHaveBeenCalledWith( + step, + 3, + 'judge', + expect.objectContaining({ stage: 1, method: 'structured_output' }), + 'review:4:3:1', + 4, + ); + expect(onPhaseComplete).toHaveBeenCalledWith(step, 3, 'judge', '[REVIEW:2]', 'done', undefined, 'review:4:3:1', 4); + }); + + it('should fail fast when iteration is missing', async () => { + mockJudgeStatus.mockResolvedValue({ ruleIndex: 0, method: 'structured_output' }); + + const step: PieceMovement = { + name: 'review', + persona: 'reviewer', + personaDisplayName: 'reviewer', + instructionTemplate: 'Review', + passPreviousResponse: true, + rules: [ + { condition: 'needs_fix', next: 'fix' }, + { condition: 'approved', next: 'COMPLETE' }, + ], + }; + + await expect(runStatusJudgmentPhase(step, { + cwd: '/tmp/project', + reportDir: '/tmp/project/.takt/reports', + lastResponse: 'response body', + getSessionId: vi.fn(), + buildResumeOptions: vi.fn(), + buildNewSessionReportOptions: vi.fn(), + updatePersonaSession: vi.fn(), + })).rejects.toThrow('Status judgment requires iteration for movement "review"'); + }); +}); diff --git a/src/__tests__/traceReport.test.ts b/src/__tests__/traceReport.test.ts new file mode 100644 index 0000000..2d55ac8 --- /dev/null +++ b/src/__tests__/traceReport.test.ts @@ -0,0 +1,236 @@ +import { describe, it, expect } from 'vitest'; +import { mkdtempSync, writeFileSync } from 'node:fs'; +import { join } from 'node:path'; +import { tmpdir } from 'node:os'; +import { renderTraceReportMarkdown, renderTraceReportFromLogs } from '../features/tasks/execute/traceReport.js'; + +describe('traceReport', () => { + it('should render judge stage details and tolerate aborted incomplete movement', () => { + const markdown = renderTraceReportMarkdown( + { + tracePath: '/tmp/trace.md', + pieceName: 'test-piece', + task: 'test task', + runSlug: 'run-1', + status: 'aborted', + iterations: 1, + endTime: '2026-03-04T12:00:00.000Z', + reason: 'user_interrupted', + }, + '2026-03-04T11:59:00.000Z', + [ + { + step: 'ai_fix', + persona: 'coder', + iteration: 1, + startedAt: '2026-03-04T11:59:01.000Z', + phases: [ + { + phaseExecutionId: 'ai_fix:3:1', + phase: 3, + phaseName: 'judge', + instruction: 'judge prompt', + systemPrompt: 'conductor', + userInstruction: 'judge prompt', + startedAt: '2026-03-04T11:59:02.000Z', + judgeStages: [ + { + stage: 1, + method: 'structured_output', + status: 'error', + instruction: 'stage1 prompt', + response: '', + }, + ], + }, + ], + }, + ], + ); + + expect(markdown).toContain('- Status: ❌ aborted'); + expect(markdown).toContain('- Movement Status: in_progress'); + expect(markdown).toContain('## Iteration 1: ai_fix (persona: coder)'); + expect(markdown).toContain('
System Prompt'); + expect(markdown).toContain('
User Instruction'); + expect(markdown).toContain('- Stage 1 (structured_output)'); + expect(markdown).toContain('
Stage Instruction'); + expect(markdown).toContain('
Stage Response'); + }); + + it('should render movements in timestamp order from NDJSON logs', () => { + const dir = mkdtempSync(join(tmpdir(), 'trace-report-')); + const sessionPath = join(dir, 'session.jsonl'); + const promptPath = join(dir, 'prompts.jsonl'); + writeFileSync(sessionPath, [ + JSON.stringify({ type: 'piece_start', task: 'task', pieceName: 'piece', startTime: '2026-03-04T11:59:00.000Z' }), + JSON.stringify({ type: 'step_start', step: 'reviewers', persona: 'reviewer', iteration: 2, timestamp: '2026-03-04T11:59:05.000Z' }), + JSON.stringify({ type: 'step_start', step: 'plan', persona: 'planner', iteration: 1, timestamp: '2026-03-04T11:59:01.000Z' }), + JSON.stringify({ type: 'phase_start', step: 'reviewers', iteration: 2, phase: 1, phaseName: 'execute', phaseExecutionId: 'reviewers:2:1:1', instruction: 'r', timestamp: '2026-03-04T11:59:06.000Z' }), + JSON.stringify({ type: 'phase_complete', step: 'reviewers', iteration: 2, phase: 1, phaseName: 'execute', phaseExecutionId: 'reviewers:2:1:1', status: 'done', content: 'r-ok', timestamp: '2026-03-04T11:59:07.000Z' }), + JSON.stringify({ type: 'step_complete', step: 'reviewers', persona: 'reviewer', status: 'done', content: 'r-ok', instruction: 'inst', timestamp: '2026-03-04T11:59:08.000Z' }), + JSON.stringify({ type: 'phase_start', step: 'plan', iteration: 1, phase: 1, phaseName: 'execute', phaseExecutionId: 'plan:1:1:1', instruction: 'p', timestamp: '2026-03-04T11:59:02.000Z' }), + JSON.stringify({ type: 'phase_complete', step: 'plan', iteration: 1, phase: 1, phaseName: 'execute', phaseExecutionId: 'plan:1:1:1', status: 'done', content: 'p-ok', timestamp: '2026-03-04T11:59:03.000Z' }), + JSON.stringify({ type: 'step_complete', step: 'plan', persona: 'planner', status: 'done', content: 'p-ok', instruction: 'inst', timestamp: '2026-03-04T11:59:04.000Z' }), + JSON.stringify({ type: 'piece_complete', iterations: 2, endTime: '2026-03-04T12:00:00.000Z' }), + '', + ].join('\n')); + writeFileSync(promptPath, [ + JSON.stringify({ movement: 'plan', phase: 1, iteration: 1, phaseExecutionId: 'plan:1:1:1', prompt: 'p', systemPrompt: 'ps', userInstruction: 'pu', response: 'p-ok', timestamp: '2026-03-04T11:59:03.000Z' }), + JSON.stringify({ movement: 'reviewers', phase: 1, iteration: 2, phaseExecutionId: 'reviewers:2:1:1', prompt: 'r', systemPrompt: 'rs', userInstruction: 'ru', response: 'r-ok', timestamp: '2026-03-04T11:59:07.000Z' }), + '', + ].join('\n')); + + const markdown = renderTraceReportFromLogs( + { + tracePath: join(dir, 'trace.md'), + pieceName: 'piece', + task: 'task', + runSlug: 'run-1', + status: 'completed', + iterations: 2, + endTime: '2026-03-04T12:00:00.000Z', + }, + sessionPath, + promptPath, + 'full', + ); + + expect(markdown).toBeDefined(); + const planIndex = markdown!.indexOf('## Iteration 1: plan'); + const reviewersIndex = markdown!.indexOf('## Iteration 2: reviewers'); + expect(planIndex).toBeGreaterThan(-1); + expect(reviewersIndex).toBeGreaterThan(planIndex); + }); + + it('should fail fast when completed trace has missing phase status', () => { + expect(() => renderTraceReportMarkdown( + { + tracePath: '/tmp/trace.md', + pieceName: 'test-piece', + task: 'test task', + runSlug: 'run-1', + status: 'completed', + iterations: 1, + endTime: '2026-03-04T12:00:00.000Z', + }, + '2026-03-04T11:59:00.000Z', + [ + { + step: 'plan', + persona: 'planner', + iteration: 1, + startedAt: '2026-03-04T11:59:01.000Z', + phases: [ + { + phaseExecutionId: 'plan:1:1', + phase: 1, + phaseName: 'execute', + instruction: 'instr', + systemPrompt: 'system', + userInstruction: 'user', + startedAt: '2026-03-04T11:59:02.000Z', + completedAt: '2026-03-04T11:59:03.000Z', + }, + ], + }, + ], + )).toThrow('missing status'); + }); + + it('should mask sensitive task and reason in redacted mode', () => { + const dir = mkdtempSync(join(tmpdir(), 'trace-report-redact-')); + const sessionPath = join(dir, 'session.jsonl'); + writeFileSync(sessionPath, [ + JSON.stringify({ type: 'piece_start', task: 'token=topsecret', pieceName: 'piece', startTime: '2026-03-04T11:59:00.000Z' }), + JSON.stringify({ type: 'step_start', step: 'plan', persona: 'planner', iteration: 1, timestamp: '2026-03-04T11:59:01.000Z' }), + JSON.stringify({ type: 'phase_start', step: 'plan', iteration: 1, phase: 1, phaseName: 'execute', phaseExecutionId: 'plan:1:1:1', instruction: 'api_key=abc123', systemPrompt: 'Authorization: Bearer abc123', userInstruction: 'user token=abc123', timestamp: '2026-03-04T11:59:02.000Z' }), + JSON.stringify({ type: 'phase_complete', step: 'plan', iteration: 1, phase: 1, phaseName: 'execute', phaseExecutionId: 'plan:1:1:1', status: 'done', content: 'password=hunter2', timestamp: '2026-03-04T11:59:03.000Z' }), + JSON.stringify({ type: 'step_complete', step: 'plan', persona: 'planner', status: 'done', content: 'secret=my-secret', instruction: 'inst', timestamp: '2026-03-04T11:59:04.000Z' }), + '', + ].join('\n')); + + const markdown = renderTraceReportFromLogs( + { + tracePath: join(dir, 'trace.md'), + pieceName: 'piece', + task: 'token=topsecret', + runSlug: 'run-1', + status: 'aborted', + iterations: 1, + endTime: '2026-03-04T12:00:00.000Z', + reason: 'api_key=super-secret', + }, + sessionPath, + undefined, + 'redacted', + ); + + expect(markdown).toContain('token=[REDACTED]'); + expect(markdown).toContain('api_key=[REDACTED]'); + expect(markdown).not.toContain('topsecret'); + expect(markdown).not.toContain('super-secret'); + expect(markdown).not.toContain('hunter2'); + }); + + it('should mask quoted JSON secrets and common token formats in redacted mode', () => { + const dir = mkdtempSync(join(tmpdir(), 'trace-report-redact-json-')); + const sessionPath = join(dir, 'session.jsonl'); + writeFileSync(sessionPath, [ + JSON.stringify({ type: 'piece_start', task: '{"api_key":"abc123"}', pieceName: 'piece', startTime: '2026-03-04T11:59:00.000Z' }), + JSON.stringify({ type: 'step_start', step: 'plan', persona: 'planner', iteration: 1, timestamp: '2026-03-04T11:59:01.000Z' }), + JSON.stringify({ type: 'phase_start', step: 'plan', iteration: 1, phase: 1, phaseName: 'execute', phaseExecutionId: 'plan:1:1:1', instruction: '{"token":"xyz987"}', systemPrompt: 'Authorization: Bearer sk-abcdef12345678', userInstruction: 'ghp_abcdef1234567890', timestamp: '2026-03-04T11:59:02.000Z' }), + JSON.stringify({ type: 'phase_complete', step: 'plan', iteration: 1, phase: 1, phaseName: 'execute', phaseExecutionId: 'plan:1:1:1', status: 'done', content: 'xoxb-1234abcd-5678efgh', timestamp: '2026-03-04T11:59:03.000Z' }), + JSON.stringify({ type: 'step_complete', step: 'plan', persona: 'planner', status: 'done', content: '{"password":"plain"}', instruction: 'inst', timestamp: '2026-03-04T11:59:04.000Z' }), + '', + ].join('\n')); + + const markdown = renderTraceReportFromLogs( + { + tracePath: join(dir, 'trace.md'), + pieceName: 'piece', + task: '{"api_key":"abc123"}', + runSlug: 'run-1', + status: 'aborted', + iterations: 1, + endTime: '2026-03-04T12:00:00.000Z', + reason: '{"secret":"plain"}', + }, + sessionPath, + undefined, + 'redacted', + ); + + expect(markdown).toContain('"api_key":"[REDACTED]"'); + expect(markdown).toContain('"secret":"[REDACTED]"'); + expect(markdown).toContain('Authorization: Bearer [REDACTED]'); + expect(markdown).not.toContain('abc123'); + expect(markdown).not.toContain('xyz987'); + expect(markdown).not.toContain('ghp_abcdef1234567890'); + expect(markdown).not.toContain('xoxb-1234abcd-5678efgh'); + }); + + it('should fold alternating loop iterations into a details block', () => { + const markdown = renderTraceReportMarkdown( + { + tracePath: '/tmp/trace.md', + pieceName: 'test-piece', + task: 'test task', + runSlug: 'run-1', + status: 'completed', + iterations: 4, + endTime: '2026-03-04T12:00:00.000Z', + }, + '2026-03-04T11:59:00.000Z', + [ + { step: 'reviewers', persona: 'reviewer', iteration: 1, startedAt: '2026-03-04T11:59:01.000Z', phases: [], result: { status: 'done', content: 'ok' } }, + { step: 'fix', persona: 'coder', iteration: 2, startedAt: '2026-03-04T11:59:02.000Z', phases: [], result: { status: 'done', content: 'ok' } }, + { step: 'reviewers', persona: 'reviewer', iteration: 3, startedAt: '2026-03-04T11:59:03.000Z', phases: [], result: { status: 'done', content: 'ok' } }, + { step: 'fix', persona: 'coder', iteration: 4, startedAt: '2026-03-04T11:59:04.000Z', phases: [], result: { status: 'done', content: 'ok' } }, + ], + ); + + expect(markdown).toContain('reviewers ↔ fix loop'); + expect(markdown).toContain('
Loop details'); + }); +}); diff --git a/src/agents/agent-usecases.ts b/src/agents/agent-usecases.ts index 14d6478..f2216f9 100644 --- a/src/agents/agent-usecases.ts +++ b/src/agents/agent-usecases.ts @@ -1,172 +1,19 @@ -import type { AgentResponse, PartDefinition, PieceRule, RuleMatchMethod, Language } from '../core/models/types.js'; -import { runAgent, type RunAgentOptions, type StreamCallback } from './runner.js'; -import { detectJudgeIndex, buildJudgePrompt } from './judge-utils.js'; -import { parseParts } from '../core/piece/engine/task-decomposer.js'; -import { loadJudgmentSchema, loadEvaluationSchema, loadDecompositionSchema, loadMorePartsSchema } from '../infra/resources/schema-loader.js'; -import { detectRuleIndex } from '../shared/utils/ruleIndex.js'; -import { ensureUniquePartIds, parsePartDefinitionEntry } from '../core/piece/part-definition-validator.js'; +import type { AgentResponse } from '../core/models/types.js'; +import { runAgent, type RunAgentOptions } from './runner.js'; -export interface JudgeStatusOptions { - cwd: string; - movementName: string; - language?: Language; - interactive?: boolean; - onStream?: StreamCallback; -} - -export interface JudgeStatusResult { - ruleIndex: number; - method: RuleMatchMethod; -} - -export interface EvaluateConditionOptions { - cwd: string; -} - -export interface DecomposeTaskOptions { - cwd: string; - persona?: string; - personaPath?: string; - language?: Language; - model?: string; - provider?: 'claude' | 'codex' | 'opencode' | 'cursor' | 'copilot' | 'mock'; - onStream?: StreamCallback; -} - -export interface MorePartsResponse { - done: boolean; - reasoning: string; - parts: PartDefinition[]; -} - -function toPartDefinitions(raw: unknown, maxParts: number): PartDefinition[] { - if (!Array.isArray(raw)) { - throw new Error('Structured output "parts" must be an array'); - } - if (raw.length === 0) { - throw new Error('Structured output "parts" must not be empty'); - } - if (raw.length > maxParts) { - throw new Error(`Structured output produced too many parts: ${raw.length} > ${maxParts}`); - } - - const parts: PartDefinition[] = raw.map((entry, index) => parsePartDefinitionEntry(entry, index)); - ensureUniquePartIds(parts); - - return parts; -} - -function toMorePartsResponse(raw: unknown, maxAdditionalParts: number): MorePartsResponse { - if (typeof raw !== 'object' || raw == null || Array.isArray(raw)) { - throw new Error('Structured output must be an object'); - } - - const payload = raw as Record; - if (typeof payload.done !== 'boolean') { - throw new Error('Structured output "done" must be a boolean'); - } - if (typeof payload.reasoning !== 'string') { - throw new Error('Structured output "reasoning" must be a string'); - } - if (!Array.isArray(payload.parts)) { - throw new Error('Structured output "parts" must be an array'); - } - if (payload.parts.length > maxAdditionalParts) { - throw new Error(`Structured output produced too many parts: ${payload.parts.length} > ${maxAdditionalParts}`); - } - - const parts: PartDefinition[] = payload.parts.map((entry, index) => parsePartDefinitionEntry(entry, index)); - ensureUniquePartIds(parts); - - return { - done: payload.done, - reasoning: payload.reasoning, - parts, - }; -} - -function summarizePartContent(content: string): string { - const maxLength = 2000; - if (content.length <= maxLength) { - return content; - } - return `${content.slice(0, maxLength)}\n...[truncated]`; -} - -function buildDecomposePrompt(instruction: string, maxParts: number, language?: Language): string { - if (language === 'ja') { - return [ - '以下はタスク分解専用の指示です。タスクを実行せず、分解だけを行ってください。', - '- ツールは使用しない', - `- パート数は 1 以上 ${maxParts} 以下`, - '- パートは互いに独立させる', - '', - '## 元タスク', - instruction, - ].join('\n'); - } - - return [ - 'This is decomposition-only planning. Do not execute the task.', - '- Do not use any tool', - `- Produce between 1 and ${maxParts} independent parts`, - '- Keep each part self-contained', - '', - '## Original Task', - instruction, - ].join('\n'); -} - -function buildMorePartsPrompt( - originalInstruction: string, - allResults: Array<{ id: string; title: string; status: string; content: string }>, - existingIds: string[], - maxAdditionalParts: number, - language?: Language, -): string { - const resultBlock = allResults.map((result) => [ - `### ${result.id}: ${result.title} (${result.status})`, - summarizePartContent(result.content), - ].join('\n')).join('\n\n'); - - if (language === 'ja') { - return [ - '以下の実行結果を見て、追加のサブタスクが必要か判断してください。', - '- ツールは使用しない', - '', - '## 元タスク', - originalInstruction, - '', - '## 完了済みパート', - resultBlock || '(なし)', - '', - '## 判断ルール', - '- 追加作業が不要なら done=true にする', - '- 追加作業が必要なら parts に新しいパートを入れる', - '- 不足が複数ある場合は、可能な限り一括で複数パートを返す', - `- 既存IDは再利用しない: ${existingIds.join(', ') || '(なし)'}`, - `- 追加できる最大数: ${maxAdditionalParts}`, - ].join('\n'); - } - - return [ - 'Review completed part results and decide whether additional parts are needed.', - '- Do not use any tool', - '', - '## Original Task', - originalInstruction, - '', - '## Completed Parts', - resultBlock || '(none)', - '', - '## Decision Rules', - '- Set done=true when no additional work is required', - '- If more work is needed, provide new parts in "parts"', - '- If multiple missing tasks are known, return multiple new parts in one batch when possible', - `- Do not reuse existing IDs: ${existingIds.join(', ') || '(none)'}`, - `- Maximum additional parts: ${maxAdditionalParts}`, - ].join('\n'); -} +export { + evaluateCondition, + judgeStatus, + type EvaluateConditionOptions, + type JudgeStatusOptions, + type JudgeStatusResult, +} from './judge-status-usecase.js'; +export { + decomposeTask, + requestMoreParts, + type DecomposeTaskOptions, + type MorePartsResponse, +} from './decompose-task-usecase.js'; export async function executeAgent( persona: string | undefined, @@ -175,175 +22,6 @@ export async function executeAgent( ): Promise { return runAgent(persona, instruction, options); } + export const generateReport = executeAgent; export const executePart = executeAgent; - -export async function evaluateCondition( - agentOutput: string, - conditions: Array<{ index: number; text: string }>, - options: EvaluateConditionOptions, -): Promise { - const prompt = buildJudgePrompt(agentOutput, conditions); - const response = await runAgent(undefined, prompt, { - cwd: options.cwd, - maxTurns: 1, - permissionMode: 'readonly', - outputSchema: loadEvaluationSchema(), - }); - - if (response.status !== 'done') { - return -1; - } - - const matchedIndex = response.structuredOutput?.matched_index; - if (typeof matchedIndex === 'number' && Number.isInteger(matchedIndex)) { - const zeroBased = matchedIndex - 1; - if (zeroBased >= 0 && zeroBased < conditions.length) { - return zeroBased; - } - } - - return detectJudgeIndex(response.content); -} - -export async function judgeStatus( - structuredInstruction: string, - tagInstruction: string, - rules: PieceRule[], - options: JudgeStatusOptions, -): Promise { - if (rules.length === 0) { - throw new Error('judgeStatus requires at least one rule'); - } - - if (rules.length === 1) { - return { ruleIndex: 0, method: 'auto_select' }; - } - - const interactiveEnabled = options.interactive === true; - - const isValidRuleIndex = (index: number): boolean => { - if (index < 0 || index >= rules.length) return false; - const rule = rules[index]; - return !(rule?.interactiveOnly && !interactiveEnabled); - }; - - const agentOptions = { - cwd: options.cwd, - maxTurns: 3, - permissionMode: 'readonly' as const, - language: options.language, - onStream: options.onStream, - }; - - // Stage 1: Structured output - const structuredResponse = await runAgent('conductor', structuredInstruction, { - ...agentOptions, - outputSchema: loadJudgmentSchema(), - }); - - if (structuredResponse.status === 'done') { - const stepNumber = structuredResponse.structuredOutput?.step; - if (typeof stepNumber === 'number' && Number.isInteger(stepNumber)) { - const ruleIndex = stepNumber - 1; - if (isValidRuleIndex(ruleIndex)) { - return { ruleIndex, method: 'structured_output' }; - } - } - } - - // Stage 2: Tag detection (dedicated call, no outputSchema) - const tagResponse = await runAgent('conductor', tagInstruction, agentOptions); - - if (tagResponse.status === 'done') { - const tagRuleIndex = detectRuleIndex(tagResponse.content, options.movementName); - if (isValidRuleIndex(tagRuleIndex)) { - return { ruleIndex: tagRuleIndex, method: 'phase3_tag' }; - } - } - - // Stage 3: AI judge - const conditions = rules - .map((rule, index) => ({ rule, index })) - .filter(({ rule }) => interactiveEnabled || !rule.interactiveOnly) - .map(({ index, rule }) => ({ index, text: rule.condition })); - - if (conditions.length > 0) { - const fallbackIndex = await evaluateCondition(structuredInstruction, conditions, { cwd: options.cwd }); - if (fallbackIndex >= 0 && fallbackIndex < conditions.length) { - const originalIndex = conditions[fallbackIndex]?.index; - if (originalIndex !== undefined) { - return { ruleIndex: originalIndex, method: 'ai_judge' }; - } - } - } - - throw new Error(`Status not found for movement "${options.movementName}"`); -} - -export async function decomposeTask( - instruction: string, - maxParts: number, - options: DecomposeTaskOptions, -): Promise { - const response = await runAgent(options.persona, buildDecomposePrompt(instruction, maxParts, options.language), { - cwd: options.cwd, - personaPath: options.personaPath, - language: options.language, - model: options.model, - provider: options.provider, - allowedTools: [], - permissionMode: 'readonly', - maxTurns: 4, - outputSchema: loadDecompositionSchema(maxParts), - onStream: options.onStream, - }); - - if (response.status !== 'done') { - const detail = response.error || response.content || response.status; - throw new Error(`Team leader failed: ${detail}`); - } - - const parts = response.structuredOutput?.parts; - if (parts != null) { - return toPartDefinitions(parts, maxParts); - } - - return parseParts(response.content, maxParts); -} - -export async function requestMoreParts( - originalInstruction: string, - allResults: Array<{ id: string; title: string; status: string; content: string }>, - existingIds: string[], - maxAdditionalParts: number, - options: DecomposeTaskOptions, -): Promise { - const prompt = buildMorePartsPrompt( - originalInstruction, - allResults, - existingIds, - maxAdditionalParts, - options.language, - ); - - const response = await runAgent(options.persona, prompt, { - cwd: options.cwd, - personaPath: options.personaPath, - language: options.language, - model: options.model, - provider: options.provider, - allowedTools: [], - permissionMode: 'readonly', - maxTurns: 4, - outputSchema: loadMorePartsSchema(maxAdditionalParts), - onStream: options.onStream, - }); - - if (response.status !== 'done') { - const detail = response.error || response.content || response.status; - throw new Error(`Team leader feedback failed: ${detail}`); - } - - return toMorePartsResponse(response.structuredOutput, maxAdditionalParts); -} diff --git a/src/agents/decompose-task-usecase.ts b/src/agents/decompose-task-usecase.ts new file mode 100644 index 0000000..3c80a70 --- /dev/null +++ b/src/agents/decompose-task-usecase.ts @@ -0,0 +1,222 @@ +import type { Language, PartDefinition } from '../core/models/types.js'; +import { runAgent, type StreamCallback } from './runner.js'; +import { parseParts } from '../core/piece/engine/task-decomposer.js'; +import { loadDecompositionSchema, loadMorePartsSchema } from '../infra/resources/schema-loader.js'; +import { ensureUniquePartIds, parsePartDefinitionEntry } from '../core/piece/part-definition-validator.js'; + +export interface DecomposeTaskOptions { + cwd: string; + persona?: string; + personaPath?: string; + language?: Language; + model?: string; + provider?: 'claude' | 'codex' | 'opencode' | 'cursor' | 'copilot' | 'mock'; + onStream?: StreamCallback; + onPromptResolved?: (promptParts: { + systemPrompt: string; + userInstruction: string; + }) => void; +} + +export interface MorePartsResponse { + done: boolean; + reasoning: string; + parts: PartDefinition[]; +} + +function toPartDefinitions(raw: unknown, maxParts: number): PartDefinition[] { + if (!Array.isArray(raw)) { + throw new Error('Structured output "parts" must be an array'); + } + if (raw.length === 0) { + throw new Error('Structured output "parts" must not be empty'); + } + if (raw.length > maxParts) { + throw new Error(`Structured output produced too many parts: ${raw.length} > ${maxParts}`); + } + + const parts: PartDefinition[] = raw.map((entry, index) => parsePartDefinitionEntry(entry, index)); + ensureUniquePartIds(parts); + + return parts; +} + +function toMorePartsResponse(raw: unknown, maxAdditionalParts: number): MorePartsResponse { + if (typeof raw !== 'object' || raw == null || Array.isArray(raw)) { + throw new Error('Structured output must be an object'); + } + + const payload = raw as Record; + if (typeof payload.done !== 'boolean') { + throw new Error('Structured output "done" must be a boolean'); + } + if (typeof payload.reasoning !== 'string') { + throw new Error('Structured output "reasoning" must be a string'); + } + if (!Array.isArray(payload.parts)) { + throw new Error('Structured output "parts" must be an array'); + } + if (payload.parts.length > maxAdditionalParts) { + throw new Error(`Structured output produced too many parts: ${payload.parts.length} > ${maxAdditionalParts}`); + } + + const parts: PartDefinition[] = payload.parts.map((entry, index) => parsePartDefinitionEntry(entry, index)); + ensureUniquePartIds(parts); + + return { + done: payload.done, + reasoning: payload.reasoning, + parts, + }; +} + +function summarizePartContent(content: string): string { + const maxLength = 2000; + if (content.length <= maxLength) { + return content; + } + return `${content.slice(0, maxLength)}\n...[truncated]`; +} + +function buildDecomposePrompt(instruction: string, maxParts: number, language?: Language): string { + if (language === 'ja') { + return [ + '以下はタスク分解専用の指示です。タスクを実行せず、分解だけを行ってください。', + '- ツールは使用しない', + `- パート数は 1 以上 ${maxParts} 以下`, + '- パートは互いに独立させる', + '', + '## 元タスク', + instruction, + ].join('\n'); + } + + return [ + 'This is decomposition-only planning. Do not execute the task.', + '- Do not use any tool', + `- Produce between 1 and ${maxParts} independent parts`, + '- Keep each part self-contained', + '', + '## Original Task', + instruction, + ].join('\n'); +} + +function buildMorePartsPrompt( + originalInstruction: string, + allResults: Array<{ id: string; title: string; status: string; content: string }>, + existingIds: string[], + maxAdditionalParts: number, + language?: Language, +): string { + const resultBlock = allResults.map((result) => [ + `### ${result.id}: ${result.title} (${result.status})`, + summarizePartContent(result.content), + ].join('\n')).join('\n\n'); + + if (language === 'ja') { + return [ + '以下の実行結果を見て、追加のサブタスクが必要か判断してください。', + '- ツールは使用しない', + '', + '## 元タスク', + originalInstruction, + '', + '## 完了済みパート', + resultBlock || '(なし)', + '', + '## 判断ルール', + '- 追加作業が不要なら done=true にする', + '- 追加作業が必要なら parts に新しいパートを入れる', + '- 不足が複数ある場合は、可能な限り一括で複数パートを返す', + `- 既存IDは再利用しない: ${existingIds.join(', ') || '(なし)'}`, + `- 追加できる最大数: ${maxAdditionalParts}`, + ].join('\n'); + } + + return [ + 'Review completed part results and decide whether additional parts are needed.', + '- Do not use any tool', + '', + '## Original Task', + originalInstruction, + '', + '## Completed Parts', + resultBlock || '(none)', + '', + '## Decision Rules', + '- Set done=true when no additional work is required', + '- If more work is needed, provide new parts in "parts"', + '- If multiple missing tasks are known, return multiple new parts in one batch when possible', + `- Do not reuse existing IDs: ${existingIds.join(', ') || '(none)'}`, + `- Maximum additional parts: ${maxAdditionalParts}`, + ].join('\n'); +} + +export async function decomposeTask( + instruction: string, + maxParts: number, + options: DecomposeTaskOptions, +): Promise { + const response = await runAgent(options.persona, buildDecomposePrompt(instruction, maxParts, options.language), { + cwd: options.cwd, + personaPath: options.personaPath, + language: options.language, + model: options.model, + provider: options.provider, + allowedTools: [], + permissionMode: 'readonly', + maxTurns: 4, + outputSchema: loadDecompositionSchema(maxParts), + onStream: options.onStream, + onPromptResolved: options.onPromptResolved, + }); + + if (response.status !== 'done') { + const detail = response.error || response.content || response.status; + throw new Error(`Team leader failed: ${detail}`); + } + + const parts = response.structuredOutput?.parts; + if (parts != null) { + return toPartDefinitions(parts, maxParts); + } + + return parseParts(response.content, maxParts); +} + +export async function requestMoreParts( + originalInstruction: string, + allResults: Array<{ id: string; title: string; status: string; content: string }>, + existingIds: string[], + maxAdditionalParts: number, + options: DecomposeTaskOptions, +): Promise { + const prompt = buildMorePartsPrompt( + originalInstruction, + allResults, + existingIds, + maxAdditionalParts, + options.language, + ); + + const response = await runAgent(options.persona, prompt, { + cwd: options.cwd, + personaPath: options.personaPath, + language: options.language, + model: options.model, + provider: options.provider, + allowedTools: [], + permissionMode: 'readonly', + maxTurns: 4, + outputSchema: loadMorePartsSchema(maxAdditionalParts), + onStream: options.onStream, + }); + + if (response.status !== 'done') { + const detail = response.error || response.content || response.status; + throw new Error(`Team leader feedback failed: ${detail}`); + } + + return toMorePartsResponse(response.structuredOutput, maxAdditionalParts); +} diff --git a/src/agents/judge-status-usecase.ts b/src/agents/judge-status-usecase.ts new file mode 100644 index 0000000..6f9ab69 --- /dev/null +++ b/src/agents/judge-status-usecase.ts @@ -0,0 +1,184 @@ +import type { PieceRule, RuleMatchMethod, Language } from '../core/models/types.js'; +import { runAgent, type StreamCallback } from './runner.js'; +import { detectJudgeIndex, buildJudgePrompt } from './judge-utils.js'; +import { loadJudgmentSchema, loadEvaluationSchema } from '../infra/resources/schema-loader.js'; +import { detectRuleIndex } from '../shared/utils/ruleIndex.js'; + +export interface JudgeStatusOptions { + cwd: string; + movementName: string; + language?: Language; + interactive?: boolean; + onStream?: StreamCallback; + onJudgeStage?: (entry: { + stage: 1 | 2 | 3; + method: 'structured_output' | 'phase3_tag' | 'ai_judge'; + status: 'done' | 'error' | 'skipped'; + instruction: string; + response: string; + }) => void; + onStructuredPromptResolved?: (promptParts: { + systemPrompt: string; + userInstruction: string; + }) => void; +} + +export interface JudgeStatusResult { + ruleIndex: number; + method: RuleMatchMethod; +} + +export interface EvaluateConditionOptions { + cwd: string; + onJudgeResponse?: (entry: { + instruction: string; + status: 'done' | 'error'; + response: string; + }) => void; +} + +export async function evaluateCondition( + agentOutput: string, + conditions: Array<{ index: number; text: string }>, + options: EvaluateConditionOptions, +): Promise { + const prompt = buildJudgePrompt(agentOutput, conditions); + const response = await runAgent(undefined, prompt, { + cwd: options.cwd, + maxTurns: 1, + permissionMode: 'readonly', + outputSchema: loadEvaluationSchema(), + }); + + options.onJudgeResponse?.({ + instruction: prompt, + status: response.status === 'done' ? 'done' : 'error', + response: response.content, + }); + + if (response.status !== 'done') { + return -1; + } + + const matchedIndex = response.structuredOutput?.matched_index; + if (typeof matchedIndex === 'number' && Number.isInteger(matchedIndex)) { + const zeroBased = matchedIndex - 1; + if (zeroBased >= 0 && zeroBased < conditions.length) { + return zeroBased; + } + } + + return detectJudgeIndex(response.content); +} + +export async function judgeStatus( + structuredInstruction: string, + tagInstruction: string, + rules: PieceRule[], + options: JudgeStatusOptions, +): Promise { + if (rules.length === 0) { + throw new Error('judgeStatus requires at least one rule'); + } + + if (rules.length === 1) { + return { ruleIndex: 0, method: 'auto_select' }; + } + + const interactiveEnabled = options.interactive === true; + + const isValidRuleIndex = (index: number): boolean => { + if (index < 0 || index >= rules.length) return false; + const rule = rules[index]; + return !(rule?.interactiveOnly && !interactiveEnabled); + }; + + const agentOptions = { + cwd: options.cwd, + maxTurns: 3, + permissionMode: 'readonly' as const, + language: options.language, + onStream: options.onStream, + }; + + const structuredResponse = await runAgent('conductor', structuredInstruction, { + ...agentOptions, + outputSchema: loadJudgmentSchema(), + onPromptResolved: options.onStructuredPromptResolved, + }); + + options.onJudgeStage?.({ + stage: 1, + method: 'structured_output', + status: structuredResponse.status === 'done' ? 'done' : 'error', + instruction: structuredInstruction, + response: structuredResponse.content, + }); + + if (structuredResponse.status === 'done') { + const stepNumber = structuredResponse.structuredOutput?.step; + if (typeof stepNumber === 'number' && Number.isInteger(stepNumber)) { + const ruleIndex = stepNumber - 1; + if (isValidRuleIndex(ruleIndex)) { + return { ruleIndex, method: 'structured_output' }; + } + } + } + + const tagResponse = await runAgent('conductor', tagInstruction, agentOptions); + + options.onJudgeStage?.({ + stage: 2, + method: 'phase3_tag', + status: tagResponse.status === 'done' ? 'done' : 'error', + instruction: tagInstruction, + response: tagResponse.content, + }); + + if (tagResponse.status === 'done') { + const tagRuleIndex = detectRuleIndex(tagResponse.content, options.movementName); + if (isValidRuleIndex(tagRuleIndex)) { + return { ruleIndex: tagRuleIndex, method: 'phase3_tag' }; + } + } + + const conditions = rules + .map((rule, index) => ({ rule, index })) + .filter(({ rule }) => interactiveEnabled || !rule.interactiveOnly) + .map(({ index, rule }) => ({ index, text: rule.condition })); + + if (conditions.length > 0) { + let stage3Status: 'done' | 'error' | 'skipped' = 'skipped'; + let stage3Instruction = ''; + let stage3Response = ''; + const fallbackIndex = await evaluateCondition(structuredInstruction, conditions, { + cwd: options.cwd, + onJudgeResponse: (entry) => { + stage3Status = entry.status; + stage3Instruction = entry.instruction; + stage3Response = entry.response; + }, + }); + + if (stage3Status === 'skipped' || stage3Instruction === '') { + throw new Error(`AI judge response missing for movement "${options.movementName}"`); + } + + options.onJudgeStage?.({ + stage: 3, + method: 'ai_judge', + status: stage3Status, + instruction: stage3Instruction, + response: stage3Response, + }); + + if (fallbackIndex >= 0 && fallbackIndex < conditions.length) { + const originalIndex = conditions[fallbackIndex]?.index; + if (originalIndex !== undefined) { + return { ruleIndex: originalIndex, method: 'ai_judge' }; + } + } + } + + throw new Error(`Status not found for movement "${options.movementName}"`); +} diff --git a/src/agents/runner.ts b/src/agents/runner.ts index fb3cfbc..24cacc9 100644 --- a/src/agents/runner.ts +++ b/src/agents/runner.ts @@ -158,11 +158,18 @@ export class AgentRunner { const providerType = resolved.provider; const provider = getProvider(providerType); + const resolvedSystemPrompt = agentConfig.claudeAgent || agentConfig.claudeSkill + ? undefined + : loadAgentPrompt(agentConfig, options.cwd); + + options.onPromptResolved?.({ + systemPrompt: resolvedSystemPrompt ?? '', + userInstruction: task, + }); + const agent = provider.setup({ name: agentConfig.name, - systemPrompt: agentConfig.claudeAgent || agentConfig.claudeSkill - ? undefined - : loadAgentPrompt(agentConfig, options.cwd), + systemPrompt: resolvedSystemPrompt, claudeAgent: agentConfig.claudeAgent, claudeSkill: agentConfig.claudeSkill, }); @@ -223,6 +230,10 @@ export class AgentRunner { } const systemPrompt = loadTemplate('perform_agent_system_prompt', language, templateVars); + options.onPromptResolved?.({ + systemPrompt, + userInstruction: task, + }); const agent = provider.setup({ name: personaName, systemPrompt }); return agent.call(task, callOptions); } @@ -236,11 +247,19 @@ export class AgentRunner { return this.runCustom(agentConfig, task, options); } + options.onPromptResolved?.({ + systemPrompt: personaSpec, + userInstruction: task, + }); const agent = provider.setup({ name: personaName, systemPrompt: personaSpec }); return agent.call(task, callOptions); } // 3. No persona specified — run with instruction_template only (no system prompt) + options.onPromptResolved?.({ + systemPrompt: '', + userInstruction: task, + }); const agent = provider.setup({ name: personaName }); return agent.call(task, callOptions); } diff --git a/src/agents/types.ts b/src/agents/types.ts index 63007fb..d113ec5 100644 --- a/src/agents/types.ts +++ b/src/agents/types.ts @@ -46,4 +46,8 @@ export interface RunAgentOptions { currentPosition: string; }; outputSchema?: Record; + onPromptResolved?: (promptParts: { + systemPrompt: string; + userInstruction: string; + }) => void; } diff --git a/src/core/piece/engine/ArpeggioRunner.ts b/src/core/piece/engine/ArpeggioRunner.ts index e5a8bdb..650cb93 100644 --- a/src/core/piece/engine/ArpeggioRunner.ts +++ b/src/core/piece/engine/ArpeggioRunner.ts @@ -22,7 +22,7 @@ import { incrementMovementIteration } from './state-manager.js'; import { createLogger } from '../../../shared/utils/index.js'; import type { OptionsBuilder } from './OptionsBuilder.js'; import type { MovementExecutor } from './MovementExecutor.js'; -import type { PhaseName } from '../types.js'; +import type { PhaseName, PhasePromptParts } from '../types.js'; const log = createLogger('arpeggio-runner'); @@ -37,8 +37,25 @@ export interface ArpeggioRunnerDeps { conditions: Array<{ index: number; text: string }>, options: { cwd: string } ) => Promise; - readonly onPhaseStart?: (step: PieceMovement, phase: 1 | 2 | 3, phaseName: PhaseName, instruction: string) => void; - readonly onPhaseComplete?: (step: PieceMovement, phase: 1 | 2 | 3, phaseName: PhaseName, content: string, status: string, error?: string) => void; + readonly onPhaseStart?: ( + step: PieceMovement, + phase: 1 | 2 | 3, + phaseName: PhaseName, + instruction: string, + promptParts: PhasePromptParts, + phaseExecutionId?: string, + iteration?: number, + ) => void; + readonly onPhaseComplete?: ( + step: PieceMovement, + phase: 1 | 2 | 3, + phaseName: PhaseName, + content: string, + status: string, + error?: string, + phaseExecutionId?: string, + iteration?: number, + ) => void; } /** @@ -185,6 +202,8 @@ export class ArpeggioRunner { batches, template, step, + movementIteration, + state.iteration, agentOptions, arpeggioConfig, semaphore, @@ -244,6 +263,8 @@ export class ArpeggioRunner { batches: readonly DataBatch[], template: string, step: PieceMovement, + movementIteration: number, + iteration: number, agentOptions: RunAgentOptions, config: ArpeggioMovementConfig, semaphore: Semaphore, @@ -251,20 +272,34 @@ export class ArpeggioRunner { const promises = batches.map(async (batch) => { await semaphore.acquire(); try { - this.deps.onPhaseStart?.(step, 1, 'execute', `[Arpeggio batch ${batch.batchIndex + 1}/${batch.totalBatches}]`); + let didEmitPhaseStart = false; + const phaseExecutionId = `${step.name}:1:${movementIteration}:${batch.batchIndex}`; + const batchAgentOptions: RunAgentOptions = { + ...agentOptions, + onPromptResolved: (promptParts) => { + if (didEmitPhaseStart) return; + this.deps.onPhaseStart?.(step, 1, 'execute', promptParts.userInstruction, promptParts, phaseExecutionId, iteration); + didEmitPhaseStart = true; + }, + }; const result = await executeBatchWithRetry( batch, template, step.persona, - agentOptions, + batchAgentOptions, config.maxRetries, config.retryDelayMs, ); + if (!didEmitPhaseStart) { + throw new Error(`Missing prompt parts for phase start: ${step.name}:1`); + } this.deps.onPhaseComplete?.( step, 1, 'execute', result.content, result.success ? 'done' : 'error', result.error, + phaseExecutionId, + iteration, ); return result; } finally { diff --git a/src/core/piece/engine/MovementExecutor.ts b/src/core/piece/engine/MovementExecutor.ts index eaf937f..85e6bb9 100644 --- a/src/core/piece/engine/MovementExecutor.ts +++ b/src/core/piece/engine/MovementExecutor.ts @@ -14,7 +14,7 @@ import type { AgentResponse, Language, } from '../../models/types.js'; -import type { PhaseName } from '../types.js'; +import type { PhaseName, PhasePromptParts, JudgeStageEntry } from '../types.js'; import { executeAgent } from '../../../agents/agent-usecases.js'; import { InstructionBuilder } from '../instruction/InstructionBuilder.js'; import { needsStatusJudgmentPhase, runReportPhase, runStatusJudgmentPhase } from '../phase-runner.js'; @@ -45,8 +45,33 @@ export interface MovementExecutorDeps { conditions: Array<{ index: number; text: string }>, options: { cwd: string } ) => Promise; - readonly onPhaseStart?: (step: PieceMovement, phase: 1 | 2 | 3, phaseName: PhaseName, instruction: string) => void; - readonly onPhaseComplete?: (step: PieceMovement, phase: 1 | 2 | 3, phaseName: PhaseName, content: string, status: string, error?: string) => void; + readonly onPhaseStart?: ( + step: PieceMovement, + phase: 1 | 2 | 3, + phaseName: PhaseName, + instruction: string, + promptParts: PhasePromptParts, + phaseExecutionId?: string, + iteration?: number, + ) => void; + readonly onPhaseComplete?: ( + step: PieceMovement, + phase: 1 | 2 | 3, + phaseName: PhaseName, + content: string, + status: string, + error?: string, + phaseExecutionId?: string, + iteration?: number, + ) => void; + readonly onJudgeStage?: ( + step: PieceMovement, + phase: 3, + phaseName: 'judge', + entry: JudgeStageEntry, + phaseExecutionId?: string, + iteration?: number, + ) => void; } export class MovementExecutor { @@ -197,6 +222,8 @@ export class MovementExecutor { updatePersonaSession, this.deps.onPhaseStart, this.deps.onPhaseComplete, + this.deps.onJudgeStage, + state.iteration, ); // Phase 2: report output (resume same session, Write only) @@ -276,11 +303,21 @@ export class MovementExecutor { }); // Phase 1: main execution (Write excluded if movement has report) - this.deps.onPhaseStart?.(step, 1, 'execute', instruction); - const agentOptions = this.deps.optionsBuilder.buildAgentOptions(step); + let didEmitPhaseStart = false; + const baseAgentOptions = this.deps.optionsBuilder.buildAgentOptions(step); + const agentOptions = { + ...baseAgentOptions, + onPromptResolved: (promptParts: PhasePromptParts) => { + this.deps.onPhaseStart?.(step, 1, 'execute', instruction, promptParts, undefined, state.iteration); + didEmitPhaseStart = true; + }, + }; let response = await executeAgent(step.persona, instruction, agentOptions); + if (!didEmitPhaseStart) { + throw new Error(`Missing prompt parts for phase start: ${step.name}:1`); + } updatePersonaSession(sessionKey, response.sessionId); - this.deps.onPhaseComplete?.(step, 1, 'execute', response.content, response.status, response.error); + this.deps.onPhaseComplete?.(step, 1, 'execute', response.content, response.status, response.error, undefined, state.iteration); // Provider failures should abort immediately. if (response.status === 'error') { diff --git a/src/core/piece/engine/OptionsBuilder.ts b/src/core/piece/engine/OptionsBuilder.ts index b513a6a..9d5fec0 100644 --- a/src/core/piece/engine/OptionsBuilder.ts +++ b/src/core/piece/engine/OptionsBuilder.ts @@ -3,7 +3,7 @@ import type { PieceMovement, PieceState, Language } from '../../models/types.js' import type { MovementProviderOptions } from '../../models/piece-types.js'; import type { RunAgentOptions } from '../../../agents/runner.js'; import type { PhaseRunnerContext } from '../phase-runner.js'; -import type { PieceEngineOptions, PhaseName, MovementProviderInfo } from '../types.js'; +import type { PieceEngineOptions, PhaseName, MovementProviderInfo, PhasePromptParts, JudgeStageEntry } from '../types.js'; import { buildSessionKey } from '../session-key.js'; import { resolveMovementProviderModel } from '../provider-resolution.js'; @@ -158,8 +158,34 @@ export class OptionsBuilder { state: PieceState, lastResponse: string | undefined, updatePersonaSession: (persona: string, sessionId: string | undefined) => void, - onPhaseStart?: (step: PieceMovement, phase: 1 | 2 | 3, phaseName: PhaseName, instruction: string) => void, - onPhaseComplete?: (step: PieceMovement, phase: 1 | 2 | 3, phaseName: PhaseName, content: string, status: string, error?: string) => void, + onPhaseStart?: ( + step: PieceMovement, + phase: 1 | 2 | 3, + phaseName: PhaseName, + instruction: string, + promptParts: PhasePromptParts, + phaseExecutionId?: string, + iteration?: number, + ) => void, + onPhaseComplete?: ( + step: PieceMovement, + phase: 1 | 2 | 3, + phaseName: PhaseName, + content: string, + status: string, + error?: string, + phaseExecutionId?: string, + iteration?: number, + ) => void, + onJudgeStage?: ( + step: PieceMovement, + phase: 3, + phaseName: 'judge', + entry: JudgeStageEntry, + phaseExecutionId?: string, + iteration?: number, + ) => void, + iteration?: number, ): PhaseRunnerContext { return { cwd: this.getCwd(), @@ -174,6 +200,8 @@ export class OptionsBuilder { updatePersonaSession, onPhaseStart, onPhaseComplete, + onJudgeStage, + iteration, }; } } diff --git a/src/core/piece/engine/ParallelRunner.ts b/src/core/piece/engine/ParallelRunner.ts index 7159d86..001bb92 100644 --- a/src/core/piece/engine/ParallelRunner.ts +++ b/src/core/piece/engine/ParallelRunner.ts @@ -19,7 +19,7 @@ import { createLogger, getErrorMessage } from '../../../shared/utils/index.js'; import { buildSessionKey } from '../session-key.js'; import type { OptionsBuilder } from './OptionsBuilder.js'; import type { MovementExecutor } from './MovementExecutor.js'; -import type { PieceEngineOptions, PhaseName } from '../types.js'; +import type { PieceEngineOptions, PhaseName, PhasePromptParts, JudgeStageEntry } from '../types.js'; import type { ParallelLoggerOptions } from './parallel-logger.js'; const log = createLogger('parallel-runner'); @@ -37,8 +37,33 @@ export interface ParallelRunnerDeps { conditions: Array<{ index: number; text: string }>, options: { cwd: string } ) => Promise; - readonly onPhaseStart?: (step: PieceMovement, phase: 1 | 2 | 3, phaseName: PhaseName, instruction: string) => void; - readonly onPhaseComplete?: (step: PieceMovement, phase: 1 | 2 | 3, phaseName: PhaseName, content: string, status: string, error?: string) => void; + readonly onPhaseStart?: ( + step: PieceMovement, + phase: 1 | 2 | 3, + phaseName: PhaseName, + instruction: string, + promptParts: PhasePromptParts, + phaseExecutionId?: string, + iteration?: number, + ) => void; + readonly onPhaseComplete?: ( + step: PieceMovement, + phase: 1 | 2 | 3, + phaseName: PhaseName, + content: string, + status: string, + error?: string, + phaseExecutionId?: string, + iteration?: number, + ) => void; + readonly onJudgeStage?: ( + step: PieceMovement, + phase: 3, + phaseName: 'judge', + entry: JudgeStageEntry, + phaseExecutionId?: string, + iteration?: number, + ) => void; } export class ParallelRunner { @@ -86,6 +111,7 @@ export class ParallelRunner { subMovements.map(async (subMovement, index) => { const subIteration = incrementMovementIteration(state, subMovement.name); const subInstruction = this.deps.movementExecutor.buildInstruction(subMovement, subIteration, state, task, maxMovements); + const parentIteration = state.iteration; // Session key uses buildSessionKey (persona:provider) — same as normal movements. // This ensures sessions are shared across movements with the same persona+provider, @@ -94,19 +120,33 @@ export class ParallelRunner { // Phase 1: main execution (Write excluded if sub-movement has report) const baseOptions = this.deps.optionsBuilder.buildAgentOptions(subMovement); + let didEmitPhaseStart = false; // Override onStream with parallel logger's prefixed handler (immutable) const agentOptions = parallelLogger ? { ...baseOptions, onStream: parallelLogger.createStreamHandler(subMovement.name, index) } - : baseOptions; - - this.deps.onPhaseStart?.(subMovement, 1, 'execute', subInstruction); + : { ...baseOptions }; + agentOptions.onPromptResolved = (promptParts: PhasePromptParts) => { + this.deps.onPhaseStart?.(subMovement, 1, 'execute', subInstruction, promptParts, undefined, parentIteration); + didEmitPhaseStart = true; + }; const subResponse = await executeAgent(subMovement.persona, subInstruction, agentOptions); + if (!didEmitPhaseStart) { + throw new Error(`Missing prompt parts for phase start: ${subMovement.name}:1`); + } updatePersonaSession(subSessionKey, subResponse.sessionId); - this.deps.onPhaseComplete?.(subMovement, 1, 'execute', subResponse.content, subResponse.status, subResponse.error); + this.deps.onPhaseComplete?.(subMovement, 1, 'execute', subResponse.content, subResponse.status, subResponse.error, undefined, parentIteration); // Phase 2/3 context — no overrides needed, phase-runner uses buildSessionKey internally - const phaseCtx = this.deps.optionsBuilder.buildPhaseRunnerContext(state, subResponse.content, updatePersonaSession, this.deps.onPhaseStart, this.deps.onPhaseComplete); + const phaseCtx = this.deps.optionsBuilder.buildPhaseRunnerContext( + state, + subResponse.content, + updatePersonaSession, + this.deps.onPhaseStart, + this.deps.onPhaseComplete, + this.deps.onJudgeStage, + parentIteration, + ); // Phase 2: report output for sub-movement if (subMovement.outputContracts && subMovement.outputContracts.length > 0) { diff --git a/src/core/piece/engine/PieceEngine.ts b/src/core/piece/engine/PieceEngine.ts index 3a31956..95e78c5 100644 --- a/src/core/piece/engine/PieceEngine.ts +++ b/src/core/piece/engine/PieceEngine.ts @@ -128,11 +128,26 @@ export class PieceEngine extends EventEmitter { getRetryNote: () => this.options.retryNote, detectRuleIndex: this.detectRuleIndex, callAiJudge: this.callAiJudge, - onPhaseStart: (step, phase, phaseName, instruction) => { - this.emit('phase:start', step, phase, phaseName, instruction); + onPhaseStart: (step, phase, phaseName, instruction, promptParts, phaseExecutionId, iteration) => { + if (phaseExecutionId == null && iteration == null) { + this.emit('phase:start', step, phase, phaseName, instruction, promptParts); + return; + } + this.emit('phase:start', step, phase, phaseName, instruction, promptParts, phaseExecutionId, iteration); }, - onPhaseComplete: (step, phase, phaseName, content, phaseStatus, error) => { - this.emit('phase:complete', step, phase, phaseName, content, phaseStatus, error); + onPhaseComplete: (step, phase, phaseName, content, phaseStatus, error, phaseExecutionId, iteration) => { + if (phaseExecutionId == null && iteration == null) { + this.emit('phase:complete', step, phase, phaseName, content, phaseStatus, error); + return; + } + this.emit('phase:complete', step, phase, phaseName, content, phaseStatus, error, phaseExecutionId, iteration); + }, + onJudgeStage: (step, phase, phaseName, entry, phaseExecutionId, iteration) => { + if (phaseExecutionId == null && iteration == null) { + this.emit('phase:judge_stage', step, phase, phaseName, entry); + return; + } + this.emit('phase:judge_stage', step, phase, phaseName, entry, phaseExecutionId, iteration); }, }); @@ -145,11 +160,26 @@ export class PieceEngine extends EventEmitter { getInteractive: () => this.options.interactive === true, detectRuleIndex: this.detectRuleIndex, callAiJudge: this.callAiJudge, - onPhaseStart: (step, phase, phaseName, instruction) => { - this.emit('phase:start', step, phase, phaseName, instruction); + onPhaseStart: (step, phase, phaseName, instruction, promptParts, phaseExecutionId, iteration) => { + if (phaseExecutionId == null && iteration == null) { + this.emit('phase:start', step, phase, phaseName, instruction, promptParts); + return; + } + this.emit('phase:start', step, phase, phaseName, instruction, promptParts, phaseExecutionId, iteration); }, - onPhaseComplete: (step, phase, phaseName, content, phaseStatus, error) => { - this.emit('phase:complete', step, phase, phaseName, content, phaseStatus, error); + onPhaseComplete: (step, phase, phaseName, content, phaseStatus, error, phaseExecutionId, iteration) => { + if (phaseExecutionId == null && iteration == null) { + this.emit('phase:complete', step, phase, phaseName, content, phaseStatus, error); + return; + } + this.emit('phase:complete', step, phase, phaseName, content, phaseStatus, error, phaseExecutionId, iteration); + }, + onJudgeStage: (step, phase, phaseName, entry, phaseExecutionId, iteration) => { + if (phaseExecutionId == null && iteration == null) { + this.emit('phase:judge_stage', step, phase, phaseName, entry); + return; + } + this.emit('phase:judge_stage', step, phase, phaseName, entry, phaseExecutionId, iteration); }, }); @@ -160,11 +190,19 @@ export class PieceEngine extends EventEmitter { getInteractive: () => this.options.interactive === true, detectRuleIndex: this.detectRuleIndex, callAiJudge: this.callAiJudge, - onPhaseStart: (step, phase, phaseName, instruction) => { - this.emit('phase:start', step, phase, phaseName, instruction); + onPhaseStart: (step, phase, phaseName, instruction, promptParts, phaseExecutionId, iteration) => { + if (phaseExecutionId == null && iteration == null) { + this.emit('phase:start', step, phase, phaseName, instruction, promptParts); + return; + } + this.emit('phase:start', step, phase, phaseName, instruction, promptParts, phaseExecutionId, iteration); }, - onPhaseComplete: (step, phase, phaseName, content, phaseStatus, error) => { - this.emit('phase:complete', step, phase, phaseName, content, phaseStatus, error); + onPhaseComplete: (step, phase, phaseName, content, phaseStatus, error, phaseExecutionId, iteration) => { + if (phaseExecutionId == null && iteration == null) { + this.emit('phase:complete', step, phase, phaseName, content, phaseStatus, error); + return; + } + this.emit('phase:complete', step, phase, phaseName, content, phaseStatus, error, phaseExecutionId, iteration); }, }); @@ -176,11 +214,19 @@ export class PieceEngine extends EventEmitter { getInteractive: () => this.options.interactive === true, detectRuleIndex: this.detectRuleIndex, callAiJudge: this.callAiJudge, - onPhaseStart: (step, phase, phaseName, instruction) => { - this.emit('phase:start', step, phase, phaseName, instruction); + onPhaseStart: (step, phase, phaseName, instruction, promptParts, phaseExecutionId, iteration) => { + if (phaseExecutionId == null && iteration == null) { + this.emit('phase:start', step, phase, phaseName, instruction, promptParts); + return; + } + this.emit('phase:start', step, phase, phaseName, instruction, promptParts, phaseExecutionId, iteration); }, - onPhaseComplete: (step, phase, phaseName, content, phaseStatus, error) => { - this.emit('phase:complete', step, phase, phaseName, content, phaseStatus, error); + onPhaseComplete: (step, phase, phaseName, content, phaseStatus, error, phaseExecutionId, iteration) => { + if (phaseExecutionId == null && iteration == null) { + this.emit('phase:complete', step, phase, phaseName, content, phaseStatus, error); + return; + } + this.emit('phase:complete', step, phase, phaseName, content, phaseStatus, error, phaseExecutionId, iteration); }, }); diff --git a/src/core/piece/engine/TeamLeaderRunner.ts b/src/core/piece/engine/TeamLeaderRunner.ts index 9ea7ceb..439564d 100644 --- a/src/core/piece/engine/TeamLeaderRunner.ts +++ b/src/core/piece/engine/TeamLeaderRunner.ts @@ -17,7 +17,7 @@ import { createPartMovement, resolvePartErrorDetail, summarizeParts } from './te import { buildTeamLeaderParallelLoggerOptions, emitTeamLeaderProgressHint } from './team-leader-streaming.js'; import type { OptionsBuilder } from './OptionsBuilder.js'; import type { MovementExecutor } from './MovementExecutor.js'; -import type { PieceEngineOptions, PhaseName } from '../types.js'; +import type { PieceEngineOptions, PhaseName, PhasePromptParts } from '../types.js'; const log = createLogger('team-leader-runner'); const MAX_TOTAL_PARTS = 20; @@ -34,8 +34,25 @@ export interface TeamLeaderRunnerDeps { conditions: Array<{ index: number; text: string }>, options: { cwd: string } ) => Promise; - readonly onPhaseStart?: (step: PieceMovement, phase: 1 | 2 | 3, phaseName: PhaseName, instruction: string) => void; - readonly onPhaseComplete?: (step: PieceMovement, phase: 1 | 2 | 3, phaseName: PhaseName, content: string, status: string, error?: string) => void; + readonly onPhaseStart?: ( + step: PieceMovement, + phase: 1 | 2 | 3, + phaseName: PhaseName, + instruction: string, + promptParts: PhasePromptParts, + phaseExecutionId?: string, + iteration?: number, + ) => void; + readonly onPhaseComplete?: ( + step: PieceMovement, + phase: 1 | 2 | 3, + phaseName: PhaseName, + content: string, + status: string, + error?: string, + phaseExecutionId?: string, + iteration?: number, + ) => void; } export class TeamLeaderRunner { @@ -54,6 +71,7 @@ export class TeamLeaderRunner { throw new Error(`Movement "${step.name}" has no teamLeader configuration`); } const teamLeaderConfig = step.teamLeader; + const parentIteration = state.iteration; const movementIteration = incrementMovementIteration(state, step.name); const leaderStep: PieceMovement = { @@ -72,7 +90,7 @@ export class TeamLeaderRunner { ); emitTeamLeaderProgressHint(this.deps.engineOptions, 'decompose'); - this.deps.onPhaseStart?.(leaderStep, 1, 'execute', instruction); + let didEmitPhaseStart = false; const parts = await decomposeTask(instruction, teamLeaderConfig.maxParts, { cwd: this.deps.getCwd(), persona: leaderStep.persona, @@ -80,14 +98,21 @@ export class TeamLeaderRunner { model: leaderModel, provider: leaderProvider, onStream: this.deps.engineOptions.onStream, + onPromptResolved: (promptParts) => { + this.deps.onPhaseStart?.(leaderStep, 1, 'execute', promptParts.userInstruction, promptParts, undefined, parentIteration); + didEmitPhaseStart = true; + }, }); + if (!didEmitPhaseStart) { + throw new Error(`Missing prompt parts for phase start: ${leaderStep.name}:1`); + } const leaderResponse: AgentResponse = { persona: leaderStep.persona ?? leaderStep.name, status: 'done', content: JSON.stringify({ parts }, null, 2), timestamp: new Date(), }; - this.deps.onPhaseComplete?.(leaderStep, 1, 'execute', leaderResponse.content, leaderResponse.status, leaderResponse.error); + this.deps.onPhaseComplete?.(leaderStep, 1, 'execute', leaderResponse.content, leaderResponse.status, leaderResponse.error, undefined, parentIteration); log.debug('Team leader decomposed parts', { movement: step.name, partCount: parts.length, diff --git a/src/core/piece/phase-runner.ts b/src/core/piece/phase-runner.ts index 22d8b2f..5de8ab0 100644 --- a/src/core/piece/phase-runner.ts +++ b/src/core/piece/phase-runner.ts @@ -8,7 +8,7 @@ import { existsSync, mkdirSync, readFileSync, writeFileSync } from 'node:fs'; import { dirname, resolve, sep } from 'node:path'; import type { PieceMovement, Language, AgentResponse } from '../models/types.js'; -import type { PhaseName } from './types.js'; +import type { PhaseName, PhasePromptParts, JudgeStageEntry } from './types.js'; import type { RunAgentOptions } from '../../agents/runner.js'; import { ReportInstructionBuilder } from './instruction/ReportInstructionBuilder.js'; import { hasTagBasedRules, getReportFiles } from './evaluation/rule-utils.js'; @@ -33,6 +33,8 @@ export interface PhaseRunnerContext { interactive?: boolean; /** Last response from Phase 1 */ lastResponse?: string; + /** Parent piece iteration for sub-movement phase events */ + iteration?: number; /** Get persona session ID */ getSessionId: (persona: string) => string | undefined; /** Build resume options for a movement */ @@ -44,9 +46,35 @@ export interface PhaseRunnerContext { /** Stream callback for provider event logging (passed to judgeStatus) */ onStream?: import('../../agents/types.js').StreamCallback; /** Callback for phase lifecycle logging */ - onPhaseStart?: (step: PieceMovement, phase: 1 | 2 | 3, phaseName: PhaseName, instruction: string) => void; + onPhaseStart?: ( + step: PieceMovement, + phase: 1 | 2 | 3, + phaseName: PhaseName, + instruction: string, + promptParts: PhasePromptParts, + phaseExecutionId?: string, + iteration?: number, + ) => void; /** Callback for phase completion logging */ - onPhaseComplete?: (step: PieceMovement, phase: 1 | 2 | 3, phaseName: PhaseName, content: string, status: string, error?: string) => void; + onPhaseComplete?: ( + step: PieceMovement, + phase: 1 | 2 | 3, + phaseName: PhaseName, + content: string, + status: string, + error?: string, + phaseExecutionId?: string, + iteration?: number, + ) => void; + /** Callback for Phase 3 internal stage logging */ + onJudgeStage?: ( + step: PieceMovement, + phase: 3, + phaseName: 'judge', + entry: JudgeStageEntry, + phaseExecutionId?: string, + iteration?: number, + ) => void; } /** @@ -207,35 +235,45 @@ async function runSingleReportAttempt( options: RunAgentOptions, ctx: PhaseRunnerContext, ): Promise { - ctx.onPhaseStart?.(step, 2, 'report', instruction); + let didEmitPhaseStart = false; + const callOptions: RunAgentOptions = { + ...options, + onPromptResolved: (promptParts) => { + ctx.onPhaseStart?.(step, 2, 'report', instruction, promptParts, undefined, ctx.iteration); + didEmitPhaseStart = true; + }, + }; let response: AgentResponse; try { - response = await executeAgent(step.persona, instruction, options); + response = await executeAgent(step.persona, instruction, callOptions); + if (!didEmitPhaseStart) { + throw new Error(`Missing prompt parts for phase start: ${step.name}:2`); + } } catch (error) { const errorMsg = error instanceof Error ? error.message : String(error); - ctx.onPhaseComplete?.(step, 2, 'report', '', 'error', errorMsg); + ctx.onPhaseComplete?.(step, 2, 'report', '', 'error', errorMsg, undefined, ctx.iteration); throw error; } if (response.status === 'blocked') { - ctx.onPhaseComplete?.(step, 2, 'report', response.content, response.status); + ctx.onPhaseComplete?.(step, 2, 'report', response.content, response.status, undefined, undefined, ctx.iteration); return { kind: 'blocked', response }; } if (response.status !== 'done') { const errorMessage = response.error || response.content || 'Unknown error'; - ctx.onPhaseComplete?.(step, 2, 'report', response.content, response.status, errorMessage); + ctx.onPhaseComplete?.(step, 2, 'report', response.content, response.status, errorMessage, undefined, ctx.iteration); return { kind: 'retryable_failure', errorMessage }; } const trimmedContent = response.content.trim(); if (trimmedContent.length === 0) { const errorMessage = 'Report output is empty'; - ctx.onPhaseComplete?.(step, 2, 'report', response.content, 'error', errorMessage); + ctx.onPhaseComplete?.(step, 2, 'report', response.content, 'error', errorMessage, undefined, ctx.iteration); return { kind: 'retryable_failure', errorMessage }; } - ctx.onPhaseComplete?.(step, 2, 'report', response.content, response.status); + ctx.onPhaseComplete?.(step, 2, 'report', response.content, response.status, undefined, undefined, ctx.iteration); return { kind: 'success', content: trimmedContent, response }; } diff --git a/src/core/piece/status-judgment-phase.ts b/src/core/piece/status-judgment-phase.ts index 22515cc..e185c47 100644 --- a/src/core/piece/status-judgment-phase.ts +++ b/src/core/piece/status-judgment-phase.ts @@ -6,6 +6,7 @@ import { StatusJudgmentBuilder, type StatusJudgmentContext } from './instruction import { getJudgmentReportFiles } from './evaluation/rule-utils.js'; import { createLogger } from '../../shared/utils/index.js'; import type { PhaseRunnerContext } from './phase-runner.js'; +import { buildPhaseExecutionId } from '../../shared/utils/phaseExecutionId.js'; const log = createLogger('phase-runner'); @@ -85,8 +86,29 @@ export async function runStatusJudgmentPhase( const tagInstruction = new StatusJudgmentBuilder(step, { ...baseContext, }).build(); + if (!ctx.iteration || !Number.isInteger(ctx.iteration) || ctx.iteration <= 0) { + throw new Error(`Status judgment requires iteration for movement "${step.name}"`); + } + const phaseExecutionId = buildPhaseExecutionId({ + step: step.name, + iteration: ctx.iteration, + phase: 3, + sequence: 1, + }); + + let didEmitPhaseStart = false; + const emitPhaseStart = (promptParts: { systemPrompt: string; userInstruction: string }): void => { + ctx.onPhaseStart?.(step, 3, 'judge', structuredInstruction, promptParts, phaseExecutionId, ctx.iteration); + didEmitPhaseStart = true; + }; + + if (step.rules.length === 1) { + emitPhaseStart({ + systemPrompt: '', + userInstruction: structuredInstruction, + }); + } - ctx.onPhaseStart?.(step, 3, 'judge', structuredInstruction); try { const result = await judgeStatus(structuredInstruction, tagInstruction, step.rules, { cwd: ctx.cwd, @@ -94,13 +116,24 @@ export async function runStatusJudgmentPhase( language: ctx.language, interactive: ctx.interactive, onStream: ctx.onStream, + onStructuredPromptResolved: (promptParts) => { + if (!didEmitPhaseStart) { + emitPhaseStart(promptParts); + } + }, + onJudgeStage: (entry) => { + ctx.onJudgeStage?.(step, 3, 'judge', entry, phaseExecutionId, ctx.iteration); + }, }); + if (!didEmitPhaseStart) { + throw new Error(`Missing prompt parts for phase start: ${step.name}:3`); + } const tag = `[${step.name.toUpperCase()}:${result.ruleIndex + 1}]`; - ctx.onPhaseComplete?.(step, 3, 'judge', tag, 'done'); + ctx.onPhaseComplete?.(step, 3, 'judge', tag, 'done', undefined, phaseExecutionId, ctx.iteration); return { tag, ruleIndex: result.ruleIndex, method: result.method }; } catch (error) { const errorMsg = error instanceof Error ? error.message : String(error); - ctx.onPhaseComplete?.(step, 3, 'judge', '', 'error', errorMsg); + ctx.onPhaseComplete?.(step, 3, 'judge', '', 'error', errorMsg, phaseExecutionId, ctx.iteration); throw error; } } diff --git a/src/core/piece/types.ts b/src/core/piece/types.ts index c032eed..ab2bd41 100644 --- a/src/core/piece/types.ts +++ b/src/core/piece/types.ts @@ -78,6 +78,19 @@ export type AiJudgeCaller = ( export type PhaseName = 'execute' | 'report' | 'judge'; +export interface PhasePromptParts { + systemPrompt: string; + userInstruction: string; +} + +export interface JudgeStageEntry { + stage: 1 | 2 | 3; + method: 'structured_output' | 'phase3_tag' | 'ai_judge'; + status: 'done' | 'error' | 'skipped'; + instruction: string; + response: string; +} + /** Provider and model info resolved for a movement */ export interface MovementProviderInfo { provider: ProviderType | undefined; @@ -91,8 +104,33 @@ export interface PieceEvents { 'movement:report': (step: PieceMovement, filePath: string, fileName: string) => void; 'movement:blocked': (step: PieceMovement, response: AgentResponse) => void; 'movement:user_input': (step: PieceMovement, userInput: string) => void; - 'phase:start': (step: PieceMovement, phase: 1 | 2 | 3, phaseName: PhaseName, instruction: string) => void; - 'phase:complete': (step: PieceMovement, phase: 1 | 2 | 3, phaseName: PhaseName, content: string, status: string, error?: string) => void; + 'phase:start': ( + step: PieceMovement, + phase: 1 | 2 | 3, + phaseName: PhaseName, + instruction: string, + promptParts: PhasePromptParts, + phaseExecutionId?: string, + iteration?: number, + ) => void; + 'phase:complete': ( + step: PieceMovement, + phase: 1 | 2 | 3, + phaseName: PhaseName, + content: string, + status: string, + error?: string, + phaseExecutionId?: string, + iteration?: number, + ) => void; + 'phase:judge_stage': ( + step: PieceMovement, + phase: 3, + phaseName: 'judge', + entry: JudgeStageEntry, + phaseExecutionId?: string, + iteration?: number, + ) => void; 'piece:complete': (state: PieceState) => void; 'piece:abort': (state: PieceState, reason: string) => void; 'iteration:limit': (iteration: number, maxMovements: number) => void; diff --git a/src/features/tasks/execute/pieceExecution.ts b/src/features/tasks/execute/pieceExecution.ts index a678488..c9577af 100644 --- a/src/features/tasks/execute/pieceExecution.ts +++ b/src/features/tasks/execute/pieceExecution.ts @@ -11,7 +11,7 @@ import { isQuietMode } from '../../../shared/context.js'; import { StreamDisplay } from '../../../shared/ui/index.js'; import { TaskPrefixWriter } from '../../../shared/ui/TaskPrefixWriter.js'; import { generateSessionId, createSessionLog, finalizeSessionLog, initNdjsonLog } from '../../../infra/fs/index.js'; -import { createLogger, notifySuccess, notifyError, preventSleep, generateReportDir, isValidReportDirName } from '../../../shared/utils/index.js'; +import { createLogger, notifySuccess, notifyError, preventSleep, generateReportDir, isValidReportDirName, getDebugPromptsLogFile } from '../../../shared/utils/index.js'; import { createProviderEventLogger, isProviderEventsEnabled } from '../../../shared/utils/providerEventLogger.js'; import { getLabel } from '../../../shared/i18n/index.js'; import { buildRunPaths } from '../../../core/piece/run/run-paths.js'; @@ -25,9 +25,9 @@ import { createOutputFns, createPrefixedStreamHandler } from './outputFns.js'; import { RunMetaManager } from './runMeta.js'; import { createIterationLimitHandler, createUserInputHandler } from './iterationLimitHandler.js'; import { assertTaskPrefixPair, truncate, formatElapsedTime } from './pieceExecutionUtils.js'; - +import { createTraceReportWriter } from './traceReportWriter.js'; +import { sanitizeTextForStorage } from './traceReportRedaction.js'; export type { PieceExecutionResult, PieceExecutionOptions }; - const log = createLogger('piece'); export async function executePiece( @@ -39,12 +39,10 @@ export async function executePiece( const { headerPrefix = 'Running Piece:', interactiveUserInput = false } = options; const projectCwd = options.projectCwd; assertTaskPrefixPair(options.taskPrefix, options.taskColorIndex); - const prefixWriter = options.taskPrefix != null ? new TaskPrefixWriter({ taskName: options.taskPrefix, colorIndex: options.taskColorIndex!, displayLabel: options.taskDisplayLabel }) : undefined; const out = createOutputFns(prefixWriter); - const isRetry = Boolean(options.startMovement || options.retryNote); log.debug('Session mode', { isRetry, isWorktree: cwd !== projectCwd }); out.header(`${headerPrefix} ${pieceConfig.name}`); @@ -52,18 +50,9 @@ export async function executePiece( const pieceSessionId = generateSessionId(); const runSlug = options.reportDirName ?? generateReportDir(task); if (!isValidReportDirName(runSlug)) throw new Error(`Invalid reportDirName: ${runSlug}`); - const runPaths = buildRunPaths(cwd, runSlug); const runMetaManager = new RunMetaManager(runPaths, task, pieceConfig.name); - let sessionLog = createSessionLog(task, projectCwd, pieceConfig.name); - const ndjsonLogPath = initNdjsonLog(pieceSessionId, task, pieceConfig.name, { logsDir: runPaths.logsAbs }); - const sessionLogger = new SessionLogger(ndjsonLogPath); - - if (options.interactiveMetadata) { - sessionLogger.writeInteractiveMetadata(options.interactiveMetadata); - } - const displayRef: { current: StreamDisplay | null } = { current: null }; const streamHandler = prefixWriter ? createPrefixedStreamHandler(prefixWriter) @@ -71,12 +60,23 @@ export async function executePiece( if (!displayRef.current || event.type === 'result') return; displayRef.current.createHandler()(event); }; - const isWorktree = cwd !== projectCwd; const globalConfig = resolvePieceConfigValues( projectCwd, ['notificationSound', 'notificationSoundEvents', 'provider', 'runtime', 'preventSleep', 'model', 'logging', 'analytics'], ); + const traceReportMode = globalConfig.logging?.trace === true ? 'full' : 'redacted'; + const allowSensitiveData = traceReportMode === 'full'; + const ndjsonLogPath = initNdjsonLog( + pieceSessionId, + sanitizeTextForStorage(task, allowSensitiveData), + pieceConfig.name, + { logsDir: runPaths.logsAbs }, + ); + const sessionLogger = new SessionLogger(ndjsonLogPath, allowSensitiveData); + if (options.interactiveMetadata) { + sessionLogger.writeInteractiveMetadata(options.interactiveMetadata); + } const shouldNotify = globalConfig.notificationSound !== false; const nse = globalConfig.notificationSoundEvents; const shouldNotifyIterationLimit = shouldNotify && nse?.iterationLimit !== false; @@ -98,10 +98,8 @@ export async function executePiece( movement: options.startMovement ?? pieceConfig.initialMovement, enabled: isProviderEventsEnabled(globalConfig), }); - initAnalyticsWriter(globalConfig.analytics?.enabled === true, globalConfig.analytics?.eventsPath ?? join(getGlobalConfigDir(), 'analytics', 'events')); if (globalConfig.preventSleep) preventSleep(); - const analyticsEmitter = new AnalyticsEmitter(runSlug, currentProvider, configuredModel ?? '(default)'); const savedSessions = isRetry ? (isWorktree ? loadWorktreeSessions(projectCwd, cwd, currentProvider) : loadPersonaSessions(projectCwd, currentProvider)) @@ -128,12 +126,22 @@ export async function executePiece( let exceededInfo: ExceededInfo | undefined; let lastMovementContent: string | undefined; let lastMovementName: string | undefined; + const writeTraceReportOnce = createTraceReportWriter({ + sessionLogger, + ndjsonLogPath, + tracePath: join(runPaths.runRootAbs, 'trace.md'), + pieceName: pieceConfig.name, + task, + runSlug, + promptLogPath: getDebugPromptsLogFile() ?? undefined, + mode: traceReportMode, + logger: log, + }); let currentIteration = 0; const movementIterations = new Map(); let engine: PieceEngine | null = null; const runAbortController = new AbortController(); const abortHandler = new AbortHandler({ externalSignal: options.abortSignal, internalController: runAbortController, getEngine: () => engine }); - try { engine = new PieceEngine(effectivePieceConfig, cwd, task, { abortSignal: runAbortController.signal, @@ -161,20 +169,21 @@ export async function executePiece( taskColorIndex: options.taskColorIndex, initialIteration: options.initialIterationOverride, }); - abortHandler.install(); - - engine.on('phase:start', (step, phase, phaseName, instruction) => { + engine.on('phase:start', (step, phase, phaseName, instruction, promptParts, phaseExecutionId, iteration) => { log.debug('Phase starting', { step: step.name, phase, phaseName }); - sessionLogger.onPhaseStart(step, phase, phaseName, instruction); + sessionLogger.onPhaseStart(step, phase, phaseName, instruction, promptParts, phaseExecutionId, iteration); }); - engine.on('phase:complete', (step, phase, phaseName, content, phaseStatus, phaseError) => { + engine.on('phase:complete', (step, phase, phaseName, content, phaseStatus, phaseError, phaseExecutionId, iteration) => { log.debug('Phase completed', { step: step.name, phase, phaseName, status: phaseStatus }); sessionLogger.setIteration(currentIteration); - sessionLogger.onPhaseComplete(step, phase, phaseName, content, phaseStatus, phaseError); + sessionLogger.onPhaseComplete(step, phase, phaseName, content, phaseStatus, phaseError, phaseExecutionId, iteration); }); + engine.on('phase:judge_stage', (step, phase, phaseName, entry, phaseExecutionId, iteration) => { + sessionLogger.onJudgeStage(step, phase, phaseName, entry, phaseExecutionId, iteration); + }); engine.on('movement:start', (step, iteration, instruction, providerInfo) => { log.debug('Movement starting', { step: step.name, persona: step.personaDisplayName, iteration }); currentIteration = iteration; @@ -234,6 +243,11 @@ export async function executePiece( sessionLog = finalizeSessionLog(sessionLog, 'completed'); sessionLogger.onPieceComplete(state); runMetaManager.finalize('completed', state.iteration); + writeTraceReportOnce({ + status: 'completed', + iterations: state.iteration, + endTime: new Date().toISOString(), + }); try { saveSessionState(projectCwd, { status: 'success', taskResult: truncate(lastMovementContent ?? '', 1000), timestamp: new Date().toISOString(), pieceName: pieceConfig.name, taskContent: truncate(task, 200), lastMovement: lastMovementName } satisfies SessionState); } catch (error) { log.error('Failed to save session state', { error }); } @@ -252,6 +266,12 @@ export async function executePiece( sessionLog = finalizeSessionLog(sessionLog, 'aborted'); sessionLogger.onPieceAbort(state, reason); runMetaManager.finalize('aborted', state.iteration); + writeTraceReportOnce({ + status: 'aborted', + iterations: state.iteration, + reason, + endTime: new Date().toISOString(), + }); try { saveSessionState(projectCwd, { status: reason === 'user_interrupted' ? 'user_stopped' : 'error', errorMessage: reason, timestamp: new Date().toISOString(), pieceName: pieceConfig.name, taskContent: truncate(task, 200), lastMovement: lastMovementName } satisfies SessionState); } catch (error) { log.error('Failed to save session state', { error }); } diff --git a/src/features/tasks/execute/sessionLogger.ts b/src/features/tasks/execute/sessionLogger.ts index 3a8666d..f2fc682 100644 --- a/src/features/tasks/execute/sessionLogger.ts +++ b/src/features/tasks/execute/sessionLogger.ts @@ -2,7 +2,6 @@ * Session logger — NDJSON ログ書き出し専用モジュール * * PieceEngine のイベントを受け取り、NDJSON セッションログへ追記する責務を担う。 - * analytics や UI 出力は担当しない。 */ import { @@ -13,14 +12,16 @@ import { type NdjsonPieceAbort, type NdjsonPhaseStart, type NdjsonPhaseComplete, + type NdjsonPhaseJudgeStage, type NdjsonInteractiveStart, type NdjsonInteractiveEnd, } from '../../../infra/fs/index.js'; import type { InteractiveMetadata } from './types.js'; import { isDebugEnabled, writePromptLog } from '../../../shared/utils/index.js'; -import type { PromptLogRecord } from '../../../shared/utils/index.js'; +import type { PromptLogRecord, NdjsonRecord } from '../../../shared/utils/index.js'; import type { PieceMovement, AgentResponse, PieceState } from '../../../core/models/index.js'; -import type { PhaseName } from '../../../core/piece/index.js'; +import type { JudgeStageEntry, PhasePromptParts } from '../../../core/piece/types.js'; +import { sanitizeTextForStorage } from './traceReportRedaction.js'; function toJudgmentMatchMethod( matchedRuleMethod: string | undefined, @@ -34,29 +35,30 @@ function toJudgmentMatchMethod( export class SessionLogger { private readonly ndjsonLogPath: string; - /** phase 開始時のプロンプトを一時保持(デバッグ用) */ - private readonly phasePrompts = new Map(); - /** 現在のピース全体のイテレーション数 */ + private readonly allowSensitiveData: boolean; + private readonly phasePromptsByExecutionId = new Map(); + private readonly phaseExecutionCounters = new Map(); + private readonly ndjsonRecords: NdjsonRecord[] = []; + private readonly promptRecords: PromptLogRecord[] = []; private currentIteration = 0; - constructor(ndjsonLogPath: string) { + constructor(ndjsonLogPath: string, allowSensitiveData: boolean) { this.ndjsonLogPath = ndjsonLogPath; + this.allowSensitiveData = allowSensitiveData; } - /** インタラクティブモードのメタデータ(interactive_start / interactive_end)を NDJSON へ記録する */ writeInteractiveMetadata(meta: InteractiveMetadata): void { const startRecord: NdjsonInteractiveStart = { type: 'interactive_start', timestamp: new Date().toISOString() }; - appendNdjsonLine(this.ndjsonLogPath, startRecord); + this.appendRecord(startRecord); const endRecord: NdjsonInteractiveEnd = { type: 'interactive_end', confirmed: meta.confirmed, - ...(meta.task ? { task: meta.task } : {}), + ...(meta.task ? { task: this.sanitizeText(meta.task) } : {}), timestamp: new Date().toISOString(), }; - appendNdjsonLine(this.ndjsonLogPath, endRecord); + this.appendRecord(endRecord); } - /** 現在のイテレーション番号を更新する(movement:start で呼ぶ) */ setIteration(iteration: number): void { this.currentIteration = iteration; } @@ -64,75 +66,127 @@ export class SessionLogger { onPhaseStart( step: PieceMovement, phase: 1 | 2 | 3, - phaseName: PhaseName, + phaseName: 'execute' | 'report' | 'judge', instruction: string, + promptParts: PhasePromptParts, + phaseExecutionId?: string, + iteration?: number, ): void { + if (!instruction) { + throw new Error(`Missing phase instruction for ${step.name}:${phase}`); + } + const resolvedPhaseExecutionId = this.resolvePhaseExecutionId(step.name, phase, phaseExecutionId, iteration); const record: NdjsonPhaseStart = { type: 'phase_start', step: step.name, phase, phaseName, + phaseExecutionId: resolvedPhaseExecutionId, timestamp: new Date().toISOString(), - ...(instruction ? { instruction } : {}), + instruction: this.sanitizeText(instruction), + systemPrompt: this.sanitizeText(promptParts.systemPrompt), + userInstruction: this.sanitizeText(promptParts.userInstruction), + ...(iteration != null ? { iteration } : {}), }; - appendNdjsonLine(this.ndjsonLogPath, record); + this.appendRecord(record); if (isDebugEnabled()) { - this.phasePrompts.set(`${step.name}:${phase}`, instruction); + this.phasePromptsByExecutionId.set(resolvedPhaseExecutionId, promptParts); } } onPhaseComplete( step: PieceMovement, phase: 1 | 2 | 3, - phaseName: PhaseName, + phaseName: 'execute' | 'report' | 'judge', content: string, phaseStatus: string, phaseError: string | undefined, + phaseExecutionId?: string, + iteration?: number, ): void { + if (!phaseStatus) { + throw new Error(`Missing phase status for ${step.name}:${phase}`); + } + const resolvedPhaseExecutionId = this.resolveCompletionPhaseExecutionId(step.name, phase, phaseExecutionId, iteration); + const completedAt = new Date().toISOString(); const record: NdjsonPhaseComplete = { type: 'phase_complete', step: step.name, phase, phaseName, + phaseExecutionId: resolvedPhaseExecutionId, status: phaseStatus, - content, - timestamp: new Date().toISOString(), - ...(phaseError ? { error: phaseError } : {}), + content: this.sanitizeText(content), + timestamp: completedAt, + ...(phaseError ? { error: this.sanitizeText(phaseError) } : {}), + ...(iteration != null ? { iteration } : {}), }; - appendNdjsonLine(this.ndjsonLogPath, record); + this.appendRecord(record); - const promptKey = `${step.name}:${phase}`; - const prompt = this.phasePrompts.get(promptKey); - this.phasePrompts.delete(promptKey); - - if (isDebugEnabled() && prompt) { + const prompt = this.phasePromptsByExecutionId.get(resolvedPhaseExecutionId); + if (isDebugEnabled()) { + if (!prompt) { + throw new Error(`Missing debug prompt for ${step.name}:${phase}:${resolvedPhaseExecutionId}`); + } const promptRecord: PromptLogRecord = { movement: step.name, phase, - iteration: this.currentIteration, - prompt, - response: content, - timestamp: new Date().toISOString(), + iteration: iteration ?? this.currentIteration, + phaseExecutionId: resolvedPhaseExecutionId, + prompt: this.sanitizeText(prompt.userInstruction), + systemPrompt: this.sanitizeText(prompt.systemPrompt), + userInstruction: this.sanitizeText(prompt.userInstruction), + response: this.sanitizeText(content), + timestamp: completedAt, }; writePromptLog(promptRecord); + this.promptRecords.push(promptRecord); + this.phasePromptsByExecutionId.delete(resolvedPhaseExecutionId); } } + onJudgeStage( + step: PieceMovement, + phase: 3, + phaseName: 'judge', + entry: JudgeStageEntry, + phaseExecutionId?: string, + iteration?: number, + ): void { + const resolvedPhaseExecutionId = this.resolveCompletionPhaseExecutionId(step.name, phase, phaseExecutionId, iteration); + const record: NdjsonPhaseJudgeStage = { + type: 'phase_judge_stage', + step: step.name, + phase, + phaseName, + phaseExecutionId: resolvedPhaseExecutionId, + stage: entry.stage, + method: entry.method, + status: entry.status, + instruction: this.sanitizeText(entry.instruction), + response: this.sanitizeText(entry.response), + timestamp: new Date().toISOString(), + ...(iteration != null ? { iteration } : {}), + }; + this.appendRecord(record); + } + onMovementStart( step: PieceMovement, iteration: number, instruction: string | undefined, ): void { + this.currentIteration = iteration; const record: NdjsonStepStart = { type: 'step_start', step: step.name, persona: step.personaDisplayName, iteration, timestamp: new Date().toISOString(), - ...(instruction ? { instruction } : {}), + ...(instruction ? { instruction: this.sanitizeText(instruction) } : {}), }; - appendNdjsonLine(this.ndjsonLogPath, record); + this.appendRecord(record); } onMovementComplete( @@ -146,15 +200,15 @@ export class SessionLogger { step: step.name, persona: response.persona, status: response.status, - content: response.content, - instruction, + content: this.sanitizeText(response.content), + instruction: this.sanitizeText(instruction), ...(response.matchedRuleIndex != null ? { matchedRuleIndex: response.matchedRuleIndex } : {}), ...(response.matchedRuleMethod ? { matchedRuleMethod: response.matchedRuleMethod } : {}), ...(matchMethod ? { matchMethod } : {}), - ...(response.error ? { error: response.error } : {}), + ...(response.error ? { error: this.sanitizeText(response.error) } : {}), timestamp: response.timestamp.toISOString(), }; - appendNdjsonLine(this.ndjsonLogPath, record); + this.appendRecord(record); } onPieceComplete(state: PieceState): void { @@ -163,16 +217,73 @@ export class SessionLogger { iterations: state.iteration, endTime: new Date().toISOString(), }; - appendNdjsonLine(this.ndjsonLogPath, record); + this.appendRecord(record); } onPieceAbort(state: PieceState, reason: string): void { const record: NdjsonPieceAbort = { type: 'piece_abort', iterations: state.iteration, - reason, + reason: this.sanitizeText(reason), endTime: new Date().toISOString(), }; + this.appendRecord(record); + } + + getNdjsonRecords(): NdjsonRecord[] { + return [...this.ndjsonRecords]; + } + + getPromptRecords(): PromptLogRecord[] { + return [...this.promptRecords]; + } + + private buildPhaseKey(stepName: string, phase: 1 | 2 | 3, iteration?: number): string { + if (iteration == null) { + return `${stepName}:${phase}`; + } + return `${stepName}:${iteration}:${phase}`; + } + + private resolvePhaseExecutionId( + stepName: string, + phase: 1 | 2 | 3, + phaseExecutionId: string | undefined, + iteration?: number, + ): string { + if (phaseExecutionId) { + return phaseExecutionId; + } + const key = this.buildPhaseKey(stepName, phase, iteration); + const current = this.phaseExecutionCounters.get(key) ?? 0; + const next = current + 1; + this.phaseExecutionCounters.set(key, next); + return `${key}:${next}`; + } + + private resolveCompletionPhaseExecutionId( + stepName: string, + phase: 1 | 2 | 3, + phaseExecutionId: string | undefined, + iteration?: number, + ): string { + if (phaseExecutionId) { + return phaseExecutionId; + } + const key = this.buildPhaseKey(stepName, phase, iteration); + const current = this.phaseExecutionCounters.get(key); + if (current == null) { + throw new Error(`Missing phase execution id on completion for ${stepName}:${phase}`); + } + return `${key}:${current}`; + } + + private appendRecord(record: NdjsonRecord): void { + this.ndjsonRecords.push(record); appendNdjsonLine(this.ndjsonLogPath, record); } + + private sanitizeText(text: string): string { + return sanitizeTextForStorage(text, this.allowSensitiveData); + } } diff --git a/src/features/tasks/execute/traceReport.ts b/src/features/tasks/execute/traceReport.ts new file mode 100644 index 0000000..fe7d6dd --- /dev/null +++ b/src/features/tasks/execute/traceReport.ts @@ -0,0 +1,55 @@ +import type { NdjsonRecord, PromptLogRecord } from '../../../shared/utils/index.js'; +import type { + TraceReportMode, + TraceReportParams, + TraceMovement, + TracePhase, +} from './traceReportTypes.js'; +import { parseJsonl, buildTraceFromRecords, type PromptRecord } from './traceReportParser.js'; +import { cloneMovementsForMode, sanitizeTraceParamsForMode } from './traceReportRedaction.js'; +import { assertTraceParams, renderTraceReportMarkdown } from './traceReportRenderer.js'; + +export type { + TraceReportMode, + TraceReportParams, + TraceMovement, + TracePhase, +}; + +export { assertTraceParams, renderTraceReportMarkdown }; + +export function renderTraceReportFromLogs( + params: TraceReportParams, + ndjsonLogPath: string, + promptLogPath: string | undefined, + mode: TraceReportMode, +): string | undefined { + if (mode === 'off') { + return undefined; + } + const records = parseJsonl(ndjsonLogPath); + if (records.length === 0) { + throw new Error(`No session records found for trace report: ${ndjsonLogPath}`); + } + const promptRecords = promptLogPath ? parseJsonl(promptLogPath) : []; + return renderTraceReportFromRecords(params, records, promptRecords, mode); +} + +export function renderTraceReportFromRecords( + params: TraceReportParams, + records: NdjsonRecord[], + promptRecords: PromptRecord[] | PromptLogRecord[], + mode: TraceReportMode, +): string | undefined { + if (mode === 'off') { + return undefined; + } + if (records.length === 0) { + throw new Error('No session records found for trace report from records'); + } + + const trace = buildTraceFromRecords(records, promptRecords as PromptRecord[], params.endTime); + const paramsForMode = sanitizeTraceParamsForMode(params, mode); + const movementsForMode = cloneMovementsForMode(trace.movements, mode); + return renderTraceReportMarkdown(paramsForMode, trace.traceStartedAt, movementsForMode); +} diff --git a/src/features/tasks/execute/traceReportParser.ts b/src/features/tasks/execute/traceReportParser.ts new file mode 100644 index 0000000..64cb807 --- /dev/null +++ b/src/features/tasks/execute/traceReportParser.ts @@ -0,0 +1,260 @@ +import { existsSync, readFileSync } from 'node:fs'; +import type { NdjsonRecord, PromptLogRecord } from '../../../shared/utils/index.js'; +import { + buildPhaseExecutionId, + parsePhaseExecutionId, +} from '../../../shared/utils/phaseExecutionId.js'; +import type { + TraceMovement, + TracePhase, +} from './traceReportTypes.js'; + +interface PromptRecord extends PromptLogRecord { + timestamp: string; +} + +interface BuildTraceResult { + traceStartedAt: string; + movements: TraceMovement[]; +} + +export function parseJsonl(path: string): T[] { + if (!existsSync(path)) { + return []; + } + const lines = readFileSync(path, 'utf-8') + .split('\n') + .map((line) => line.trim()) + .filter((line) => line.length > 0); + return lines.map((line) => JSON.parse(line) as T); +} + +function movementKey(step: string, iteration: number): string { + return `${step}:${iteration}`; +} + +function createPhaseExecutionId( + step: string, + iteration: number, + phase: 1 | 2 | 3, + counters: Map, +): string { + const key = `${step}:${iteration}:${phase}`; + const current = counters.get(key) ?? 0; + const next = current + 1; + counters.set(key, next); + return buildPhaseExecutionId({ + step, + iteration, + phase, + sequence: next, + }); +} + +function parsePhaseExecutionKey( + phaseExecutionId: string, +): { step: string; iteration: number } | undefined { + const parsed = parsePhaseExecutionId(phaseExecutionId); + if (!parsed) { + return undefined; + } + return { step: parsed.step, iteration: parsed.iteration }; +} + +function ensureMovement( + movementsByKey: Map, + step: string, + iteration: number, + timestamp: string, + fallbackPersona: string, +): TraceMovement { + const key = movementKey(step, iteration); + const existing = movementsByKey.get(key); + if (existing) { + return existing; + } + const movement: TraceMovement = { + step, + persona: fallbackPersona, + iteration, + startedAt: timestamp, + phases: [], + }; + movementsByKey.set(key, movement); + return movement; +} + +export function buildTraceFromRecords( + records: NdjsonRecord[], + promptRecords: PromptRecord[], + defaultEndTime: string, +): BuildTraceResult { + const promptByExecutionId = new Map(); + for (const prompt of promptRecords) { + if (prompt.phaseExecutionId) { + promptByExecutionId.set(prompt.phaseExecutionId, prompt); + } + } + + const movementsByKey = new Map(); + const phasesByExecutionId = new Map(); + const phaseExecutionCounters = new Map(); + const latestIterationByStep = new Map(); + + let traceStartedAt = ''; + + for (const record of records) { + if (!traceStartedAt && record.type === 'piece_start') { + traceStartedAt = record.startTime; + continue; + } + + if (record.type === 'step_start') { + latestIterationByStep.set(record.step, record.iteration); + const movement = ensureMovement( + movementsByKey, + record.step, + record.iteration, + record.timestamp, + record.persona, + ); + movement.persona = record.persona; + movement.instruction = record.instruction; + continue; + } + + if (record.type === 'step_complete') { + const iteration = latestIterationByStep.get(record.step); + if (iteration == null) { + throw new Error(`Missing iteration for step_complete: ${record.step}`); + } + const movement = ensureMovement( + movementsByKey, + record.step, + iteration, + record.timestamp, + record.persona, + ); + movement.completedAt = record.timestamp; + movement.result = { + status: record.status, + content: record.content, + error: record.error, + matchedRuleIndex: record.matchedRuleIndex, + matchedRuleMethod: record.matchedRuleMethod, + matchMethod: record.matchMethod, + }; + continue; + } + + if (record.type === 'phase_start') { + const iteration = record.iteration ?? latestIterationByStep.get(record.step); + if (iteration == null) { + throw new Error(`Missing iteration for phase_start: ${record.step}:${record.phase}`); + } + const movement = ensureMovement( + movementsByKey, + record.step, + iteration, + record.timestamp, + record.step, + ); + const resolvedExecutionId = + record.phaseExecutionId + ?? createPhaseExecutionId(record.step, iteration, record.phase, phaseExecutionCounters); + const prompt = promptByExecutionId.get(resolvedExecutionId); + const phase: TracePhase = { + phaseExecutionId: resolvedExecutionId, + phase: record.phase, + phaseName: record.phaseName, + instruction: record.instruction ?? record.userInstruction ?? prompt?.userInstruction ?? '', + systemPrompt: record.systemPrompt ?? prompt?.systemPrompt ?? '', + userInstruction: record.userInstruction ?? prompt?.userInstruction ?? record.instruction ?? '', + startedAt: record.timestamp, + }; + movement.phases.push(phase); + phasesByExecutionId.set(resolvedExecutionId, { + movement, + index: movement.phases.length - 1, + }); + continue; + } + + if (record.type === 'phase_complete') { + const iterationFromId = record.phaseExecutionId + ? parsePhaseExecutionKey(record.phaseExecutionId)?.iteration + : undefined; + const iteration = + record.iteration + ?? iterationFromId + ?? latestIterationByStep.get(record.step); + if (iteration == null) { + throw new Error(`Missing iteration for phase_complete: ${record.step}:${record.phase}`); + } + const resolvedExecutionId = + record.phaseExecutionId + ?? createPhaseExecutionId(record.step, iteration, record.phase, phaseExecutionCounters); + const phaseRef = phasesByExecutionId.get(resolvedExecutionId); + if (!phaseRef) { + throw new Error(`Missing phase_start before phase_complete: ${resolvedExecutionId}`); + } + const existing = phaseRef.movement.phases[phaseRef.index]; + if (!existing) { + throw new Error(`Missing phase state for completion: ${resolvedExecutionId}`); + } + const prompt = promptByExecutionId.get(resolvedExecutionId); + phaseRef.movement.phases[phaseRef.index] = { + ...existing, + instruction: existing.instruction || prompt?.userInstruction || '', + systemPrompt: prompt?.systemPrompt ?? existing.systemPrompt, + userInstruction: prompt?.userInstruction ?? existing.userInstruction, + response: record.content, + status: record.status, + error: record.error, + completedAt: record.timestamp, + }; + continue; + } + + if (record.type === 'phase_judge_stage') { + const phaseRef = record.phaseExecutionId + ? phasesByExecutionId.get(record.phaseExecutionId) + : undefined; + if (!phaseRef) { + continue; + } + const existing = phaseRef.movement.phases[phaseRef.index]; + if (!existing) { + continue; + } + phaseRef.movement.phases[phaseRef.index] = { + ...existing, + judgeStages: [ + ...(existing.judgeStages ?? []), + { + stage: record.stage, + method: record.method, + status: record.status, + instruction: record.instruction, + response: record.response, + }, + ], + }; + } + } + + const movements = [...movementsByKey.values()].sort((a, b) => { + const byStart = a.startedAt.localeCompare(b.startedAt); + if (byStart !== 0) { + return byStart; + } + return a.iteration - b.iteration; + }); + + return { + traceStartedAt: traceStartedAt || defaultEndTime, + movements, + }; +} + +export type { PromptRecord }; diff --git a/src/features/tasks/execute/traceReportRedaction.ts b/src/features/tasks/execute/traceReportRedaction.ts new file mode 100644 index 0000000..80f8e4e --- /dev/null +++ b/src/features/tasks/execute/traceReportRedaction.ts @@ -0,0 +1,81 @@ +import type { + TraceMovement, + TraceReportMode, + TraceReportParams, +} from './traceReportTypes.js'; + +export function sanitizeSensitiveText(text: string): string { + if (!text) return text; + return text + .replace(/(Authorization\s*:\s*Bearer\s+)([^\s]+)/gi, '$1[REDACTED]') + .replace( + /(["']?(?:api[_-]?key|token|password|secret|access[_-]?token|refresh[_-]?token)["']?\s*[:=]\s*["']?)([^"',\s}\]]+)(["']?)/gi, + '$1[REDACTED]$3', + ) + .replace(/([?&](?:api[_-]?key|token|password|secret)=)([^&\s]+)/gi, '$1[REDACTED]') + .replace(/\b(?:sk-[A-Za-z0-9]{8,}|ghp_[A-Za-z0-9]{8,}|xox[baprs]-[A-Za-z0-9-]{8,})\b/g, '[REDACTED]'); +} + +function transformText(text: string, mode: TraceReportMode): string { + if (!text) { + return text; + } + if (mode === 'full') { + return text; + } + return sanitizeSensitiveText(text); +} + +export function cloneMovementsForMode( + movements: TraceMovement[], + mode: TraceReportMode, +): TraceMovement[] { + return movements.map((movement) => ({ + ...movement, + instruction: movement.instruction == null ? undefined : transformText(movement.instruction, mode), + result: movement.result + ? { + ...movement.result, + content: transformText(movement.result.content, mode), + ...(movement.result.error ? { error: transformText(movement.result.error, mode) } : {}), + } + : undefined, + phases: movement.phases.map((phase) => ({ + ...phase, + instruction: transformText(phase.instruction, mode), + systemPrompt: transformText(phase.systemPrompt, mode), + userInstruction: transformText(phase.userInstruction, mode), + response: phase.response == null ? undefined : transformText(phase.response, mode), + error: phase.error == null ? undefined : transformText(phase.error, mode), + judgeStages: phase.judgeStages?.map((stage) => ({ + ...stage, + instruction: transformText(stage.instruction, mode), + response: transformText(stage.response, mode), + })), + })), + })); +} + +export function sanitizeTraceParamsForMode( + params: TraceReportParams, + mode: TraceReportMode, +): TraceReportParams { + if (mode === 'full') { + return params; + } + return { + ...params, + task: sanitizeSensitiveText(params.task), + ...(params.reason ? { reason: sanitizeSensitiveText(params.reason) } : {}), + }; +} + +export function sanitizeTextForStorage(text: string, allowFullText: boolean): string { + if (!text) { + return text; + } + if (allowFullText) { + return text; + } + return sanitizeSensitiveText(text); +} diff --git a/src/features/tasks/execute/traceReportRenderer.ts b/src/features/tasks/execute/traceReportRenderer.ts new file mode 100644 index 0000000..8c55940 --- /dev/null +++ b/src/features/tasks/execute/traceReportRenderer.ts @@ -0,0 +1,297 @@ +import type { + TraceMovement, + TracePhase, + TraceReportParams, +} from './traceReportTypes.js'; + +interface MovementBlock { + kind: 'movement'; + movement: TraceMovement; +} + +interface LoopBlock { + kind: 'loop'; + movements: TraceMovement[]; +} + +type RenderBlock = MovementBlock | LoopBlock; + +export function assertTraceParams(params: TraceReportParams): void { + if (!params.tracePath) throw new Error('tracePath is required'); + if (!params.pieceName) throw new Error('pieceName is required'); + if (!params.task) throw new Error('task is required'); + if (!params.runSlug) throw new Error('runSlug is required'); + if (!params.endTime) throw new Error('endTime is required'); + if (!Number.isInteger(params.iterations) || params.iterations < 0) { + throw new Error(`iterations must be a non-negative integer: ${params.iterations}`); + } +} + +function assertTraceMovement(movement: TraceMovement, index: number): void { + if (!movement.step) throw new Error(`trace movement[${index}] missing step`); + if (!movement.persona) throw new Error(`trace movement[${index}] missing persona`); + if (!Number.isInteger(movement.iteration) || movement.iteration <= 0) { + throw new Error(`trace movement[${index}] has invalid iteration: ${movement.iteration}`); + } + if (!movement.startedAt) throw new Error(`trace movement[${index}] missing startedAt`); +} + +function hasPhaseError(phase: TracePhase): boolean { + if (phase.status === 'error' || Boolean(phase.error)) { + return true; + } + return (phase.judgeStages ?? []).some((stage) => stage.status === 'error'); +} + +function movementMarker( + movement: TraceMovement, + runStatus: TraceReportParams['status'], + isLastMovement: boolean, +): string { + if (movement.result?.status === 'error' || movement.result?.error) { + return '❌'; + } + if (runStatus === 'aborted' && !movement.result && isLastMovement) { + return '❌'; + } + if (movement.phases.some(hasPhaseError)) { + return '⚠️'; + } + return ''; +} + +function renderPhaseSection( + phase: TracePhase, + runStatus: TraceReportParams['status'], +): string[] { + if (!phase.instruction) { + throw new Error(`phase ${phase.phase} (${phase.phaseName}) missing instruction`); + } + if (!phase.status && runStatus === 'completed') { + throw new Error(`phase ${phase.phase} (${phase.phaseName}) missing status`); + } + if (!phase.completedAt && runStatus === 'completed') { + throw new Error(`phase ${phase.phase} (${phase.phaseName}) missing completedAt`); + } + + const marker = hasPhaseError(phase) ? ' ⚠️' : ''; + const lines: string[] = [ + `### Phase ${phase.phase}: ${phase.phaseName}${marker}`, + '', + `- Started: ${phase.startedAt}`, + ...(phase.completedAt ? [`- Completed: ${phase.completedAt}`] : []), + `- System Prompt: ${phase.systemPrompt.length} chars`, + '
System Prompt', + '', + phase.systemPrompt, + '', + '
', + '', + `- User Instruction: ${phase.userInstruction.length} chars`, + '
User Instruction', + '', + phase.userInstruction, + '', + '
', + ]; + + if (phase.response != null) { + lines.push( + '', + `- Response: ${phase.response.length} chars`, + '
Response', + '', + phase.response, + '', + '
', + ); + } + lines.push('', `- Status: ${phase.status ?? 'in_progress'}`); + if (phase.error) { + lines.push(`- Error: ${phase.error}`); + } + + if (phase.phase === 3 && phase.judgeStages && phase.judgeStages.length > 0) { + lines.push('', '#### Judgment Stages', ''); + for (const stage of phase.judgeStages) { + const stageMarker = stage.status === 'error' ? ' ⚠️' : ''; + lines.push( + `- Stage ${stage.stage} (${stage.method})${stageMarker}: status=${stage.status}, instruction=${stage.instruction.length} chars, response=${stage.response.length} chars`, + ); + lines.push('
Stage Instruction', '', stage.instruction, '', '
', ''); + lines.push('
Stage Response', '', stage.response, '', '
', ''); + } + } + + lines.push(''); + return lines; +} + +function renderMovementSection( + movement: TraceMovement, + params: TraceReportParams, + isLastMovement: boolean, +): string[] { + const marker = movementMarker(movement, params.status, isLastMovement); + const markerSuffix = marker ? ` ${marker}` : ''; + const lines: string[] = [ + `## Iteration ${movement.iteration}: ${movement.step} (persona: ${movement.persona})${markerSuffix} - ${movement.startedAt}`, + '', + ]; + + if (movement.instruction) { + lines.push( + `- Movement Instruction: ${movement.instruction.length} chars`, + '
Instruction', + '', + movement.instruction, + '', + '
', + '', + ); + } + + const phases = [...movement.phases].sort((a, b) => { + const byStart = a.startedAt.localeCompare(b.startedAt); + if (byStart !== 0) { + return byStart; + } + return a.phase - b.phase; + }); + + for (const phase of phases) { + lines.push(...renderPhaseSection(phase, params.status)); + } + + if (movement.result) { + lines.push( + `- Movement Status: ${movement.result.status}`, + `- Movement Response: ${movement.result.content.length} chars`, + ); + if (movement.result.matchMethod) { + lines.push(`- Match Method: ${movement.result.matchMethod}`); + } + if (movement.result.matchedRuleIndex != null) { + lines.push(`- Matched Rule Index: ${movement.result.matchedRuleIndex}`); + } + if (movement.result.error) { + lines.push(`- Error: ${movement.result.error}`); + } + lines.push('
Movement Response', '', movement.result.content, '', '
'); + } else { + lines.push(`- Movement Status: ${movement.completedAt ? 'aborted' : 'in_progress'}`); + } + + lines.push('', '---', ''); + return lines; +} + +function buildRenderBlocks(sorted: TraceMovement[]): RenderBlock[] { + const blocks: RenderBlock[] = []; + let index = 0; + while (index < sorted.length) { + if (index + 3 < sorted.length) { + const first = sorted[index]!; + const second = sorted[index + 1]!; + const third = sorted[index + 2]!; + const fourth = sorted[index + 3]!; + const isAlternatingLoop = + first.step !== second.step + && first.step === third.step + && second.step === fourth.step; + if (isAlternatingLoop) { + const a = first.step; + const b = second.step; + let end = index + 4; + while (end < sorted.length) { + const expected = (end - index) % 2 === 0 ? a : b; + if (sorted[end]!.step !== expected) { + break; + } + end += 1; + } + blocks.push({ + kind: 'loop', + movements: sorted.slice(index, end), + }); + index = end; + continue; + } + } + blocks.push({ kind: 'movement', movement: sorted[index]! }); + index += 1; + } + return blocks; +} + +function renderLoopBlock(block: LoopBlock, params: TraceReportParams): string[] { + const first = block.movements[0]!; + const second = block.movements[1]!; + const last = block.movements[block.movements.length - 1]!; + const cycleCount = Math.floor(block.movements.length / 2); + const lines: string[] = [ + `## Iteration ${first.iteration}-${last.iteration}: ${first.step} ↔ ${second.step} loop (${cycleCount} cycles) ⚠️`, + '', + `
Loop details (${block.movements.length} movements)`, + '', + ]; + + block.movements.forEach((movement, movementIndex) => { + const movementLines = renderMovementSection( + movement, + params, + movementIndex === block.movements.length - 1, + ); + lines.push(...movementLines.map((line) => (line ? ` ${line}` : line))); + }); + + lines.push('
', '', '---', ''); + return lines; +} + +export function renderTraceReportMarkdown( + params: TraceReportParams, + traceStartedAt: string, + movements: TraceMovement[], +): string { + assertTraceParams(params); + if (!traceStartedAt) { + throw new Error('traceStartedAt is required'); + } + + const statusLabel = params.status === 'completed' ? '✅ completed' : '❌ aborted'; + const lines: string[] = [ + `# Execution Trace: ${params.pieceName}`, + '', + `- Task: ${params.task}`, + `- Run: ${params.runSlug}`, + `- Started: ${traceStartedAt}`, + `- Ended: ${params.endTime}`, + `- Status: ${statusLabel}`, + `- Iterations: ${params.iterations}`, + ...(params.reason ? [`- Reason: ${params.reason}`] : []), + '', + '---', + '', + ]; + + const sorted = [...movements].sort((a, b) => { + const byStart = a.startedAt.localeCompare(b.startedAt); + if (byStart !== 0) { + return byStart; + } + return a.iteration - b.iteration; + }); + sorted.forEach((movement, index) => assertTraceMovement(movement, index)); + + const blocks = buildRenderBlocks(sorted); + blocks.forEach((block, blockIndex) => { + if (block.kind === 'loop') { + lines.push(...renderLoopBlock(block, params)); + return; + } + lines.push(...renderMovementSection(block.movement, params, blockIndex === blocks.length - 1)); + }); + + return lines.join('\n'); +} diff --git a/src/features/tasks/execute/traceReportTypes.ts b/src/features/tasks/execute/traceReportTypes.ts new file mode 100644 index 0000000..1d71fa5 --- /dev/null +++ b/src/features/tasks/execute/traceReportTypes.ts @@ -0,0 +1,48 @@ +import type { PhaseName } from '../../../core/piece/index.js'; +import type { JudgeStageEntry } from '../../../core/piece/types.js'; + +export type TraceReportMode = 'off' | 'redacted' | 'full'; + +export interface TraceReportParams { + tracePath: string; + pieceName: string; + task: string; + runSlug: string; + status: 'completed' | 'aborted'; + iterations: number; + endTime: string; + reason?: string; +} + +export interface TracePhase { + phaseExecutionId: string; + phase: 1 | 2 | 3; + phaseName: PhaseName; + instruction: string; + systemPrompt: string; + userInstruction: string; + response?: string; + status?: string; + error?: string; + startedAt: string; + completedAt?: string; + judgeStages?: JudgeStageEntry[]; +} + +export interface TraceMovement { + step: string; + persona: string; + iteration: number; + instruction?: string; + startedAt: string; + completedAt?: string; + phases: TracePhase[]; + result?: { + status: string; + content: string; + error?: string; + matchedRuleIndex?: number; + matchedRuleMethod?: string; + matchMethod?: string; + }; +} diff --git a/src/features/tasks/execute/traceReportWriter.ts b/src/features/tasks/execute/traceReportWriter.ts new file mode 100644 index 0000000..38a717d --- /dev/null +++ b/src/features/tasks/execute/traceReportWriter.ts @@ -0,0 +1,81 @@ +import { writeFileAtomic } from '../../../infra/config/index.js'; +import type { SessionLogger } from './sessionLogger.js'; +import type { TraceReportMode } from './traceReport.js'; +import { + assertTraceParams, + renderTraceReportFromLogs, + renderTraceReportFromRecords, +} from './traceReport.js'; + +interface TraceReportWriterParams { + sessionLogger: SessionLogger; + ndjsonLogPath: string; + tracePath: string; + pieceName: string; + task: string; + runSlug: string; + promptLogPath?: string; + mode: TraceReportMode; + logger: { + info: (message: string, data?: unknown) => void; + }; +} + +interface WriteTraceReportInput { + status: 'completed' | 'aborted'; + iterations: number; + endTime: string; + reason?: string; +} + +export function createTraceReportWriter(params: TraceReportWriterParams): (input: WriteTraceReportInput) => void { + let traceReportWritten = false; + + return (input: WriteTraceReportInput): void => { + if (traceReportWritten) { + params.logger.info('Trace report write skipped because it has already been written', { + status: input.status, + iterations: input.iterations, + }); + return; + } + traceReportWritten = true; + const traceParams = { + tracePath: params.tracePath, + pieceName: params.pieceName, + task: params.task, + runSlug: params.runSlug, + status: input.status, + iterations: input.iterations, + reason: input.reason, + endTime: input.endTime, + } as const; + assertTraceParams(traceParams); + + let markdown: string | undefined; + try { + markdown = renderTraceReportFromLogs( + traceParams, + params.ndjsonLogPath, + params.promptLogPath, + params.mode, + ); + } catch (error) { + const message = error instanceof Error ? error.message : String(error); + if (!message.startsWith('No session records found for trace report:')) { + throw error; + } + markdown = renderTraceReportFromRecords( + traceParams, + params.sessionLogger.getNdjsonRecords(), + params.sessionLogger.getPromptRecords(), + params.mode, + ); + } + + if (!markdown) { + return; + } + writeFileAtomic(params.tracePath, markdown); + }; +} diff --git a/src/infra/fs/index.ts b/src/infra/fs/index.ts index 9c18038..b42eb6e 100644 --- a/src/infra/fs/index.ts +++ b/src/infra/fs/index.ts @@ -11,6 +11,7 @@ export type { NdjsonPieceAbort, NdjsonPhaseStart, NdjsonPhaseComplete, + NdjsonPhaseJudgeStage, NdjsonInteractiveStart, NdjsonInteractiveEnd, NdjsonRecord, diff --git a/src/infra/fs/session.ts b/src/infra/fs/session.ts index 47151bc..8b8b4ca 100644 --- a/src/infra/fs/session.ts +++ b/src/infra/fs/session.ts @@ -21,6 +21,7 @@ export type { NdjsonPieceAbort, NdjsonPhaseStart, NdjsonPhaseComplete, + NdjsonPhaseJudgeStage, NdjsonInteractiveStart, NdjsonInteractiveEnd, NdjsonRecord, diff --git a/src/shared/utils/debug.ts b/src/shared/utils/debug.ts index c64c1c4..c1f01c8 100644 --- a/src/shared/utils/debug.ts +++ b/src/shared/utils/debug.ts @@ -128,6 +128,11 @@ export class DebugLogger { return this.debugLogFile; } + /** Get current debug prompts log file path */ + getPromptsLogFile(): string | null { + return this.debugPromptsLogFile; + } + /** Format log message with timestamp and level */ private static formatLogMessage(level: string, component: string, message: string, data?: unknown): string { const timestamp = new Date().toISOString(); @@ -223,6 +228,10 @@ export function getDebugLogFile(): string | null { return DebugLogger.getInstance().getLogFile(); } +export function getDebugPromptsLogFile(): string | null { + return DebugLogger.getInstance().getPromptsLogFile(); +} + export function debugLog(component: string, message: string, data?: unknown): void { DebugLogger.getInstance().writeLog('DEBUG', component, message, data); } diff --git a/src/shared/utils/phaseExecutionId.ts b/src/shared/utils/phaseExecutionId.ts new file mode 100644 index 0000000..67057e8 --- /dev/null +++ b/src/shared/utils/phaseExecutionId.ts @@ -0,0 +1,50 @@ +export interface PhaseExecutionIdParts { + step: string; + iteration: number; + phase: 1 | 2 | 3; + sequence: number; +} + +export function buildPhaseExecutionId(parts: PhaseExecutionIdParts): string { + if (!parts.step) { + throw new Error('phaseExecutionId requires step'); + } + if (!Number.isInteger(parts.iteration) || parts.iteration <= 0) { + throw new Error(`phaseExecutionId requires positive iteration: ${parts.iteration}`); + } + if (parts.phase !== 1 && parts.phase !== 2 && parts.phase !== 3) { + throw new Error(`phaseExecutionId requires phase 1|2|3: ${parts.phase}`); + } + if (!Number.isInteger(parts.sequence) || parts.sequence <= 0) { + throw new Error(`phaseExecutionId requires positive sequence: ${parts.sequence}`); + } + return `${parts.step}:${parts.iteration}:${parts.phase}:${parts.sequence}`; +} + +export function parsePhaseExecutionId( + phaseExecutionId: string, +): PhaseExecutionIdParts | undefined { + const parts = phaseExecutionId.split(':'); + if (parts.length !== 4) { + return undefined; + } + const [step, iterationStr, phaseStr, sequenceStr] = parts; + const iteration = Number(iterationStr); + const phase = Number(phaseStr); + const sequence = Number(sequenceStr); + if (!step || !Number.isInteger(iteration) || iteration <= 0) { + return undefined; + } + if (!Number.isInteger(phase) || (phase !== 1 && phase !== 2 && phase !== 3)) { + return undefined; + } + if (!Number.isInteger(sequence) || sequence <= 0) { + return undefined; + } + return { + step, + iteration, + phase: phase as 1 | 2 | 3, + sequence, + }; +} diff --git a/src/shared/utils/types.ts b/src/shared/utils/types.ts index 2926689..88b4cfd 100644 --- a/src/shared/utils/types.ts +++ b/src/shared/utils/types.ts @@ -79,23 +79,44 @@ export interface NdjsonPieceAbort { export interface NdjsonPhaseStart { type: 'phase_start'; step: string; + iteration?: number; phase: 1 | 2 | 3; phaseName: 'execute' | 'report' | 'judge'; + phaseExecutionId?: string; timestamp: string; instruction?: string; + systemPrompt?: string; + userInstruction?: string; } export interface NdjsonPhaseComplete { type: 'phase_complete'; step: string; + iteration?: number; phase: 1 | 2 | 3; phaseName: 'execute' | 'report' | 'judge'; + phaseExecutionId?: string; status: string; content?: string; timestamp: string; error?: string; } +export interface NdjsonPhaseJudgeStage { + type: 'phase_judge_stage'; + step: string; + iteration?: number; + phase: 3; + phaseName: 'judge'; + phaseExecutionId?: string; + stage: 1 | 2 | 3; + method: 'structured_output' | 'phase3_tag' | 'ai_judge'; + status: 'done' | 'error' | 'skipped'; + instruction: string; + response: string; + timestamp: string; +} + export interface NdjsonInteractiveStart { type: 'interactive_start'; timestamp: string; @@ -116,6 +137,7 @@ export type NdjsonRecord = | NdjsonPieceAbort | NdjsonPhaseStart | NdjsonPhaseComplete + | NdjsonPhaseJudgeStage | NdjsonInteractiveStart | NdjsonInteractiveEnd; @@ -124,7 +146,10 @@ export interface PromptLogRecord { movement: string; phase: 1 | 2 | 3; iteration: number; + phaseExecutionId?: string; prompt: string; + systemPrompt: string; + userInstruction: string; response: string; timestamp: string; }