takt: add-trace-report-generation (#467)

This commit is contained in:
nrs 2026-03-04 23:07:36 +09:00 committed by GitHub
parent dbc22c76fc
commit 8403a7c892
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
45 changed files with 2970 additions and 602 deletions

View File

@ -43,6 +43,13 @@ function doneResponse(content: string, structuredOutput?: Record<string, unknown
}
const judgeOptions = { cwd: '/repo', movementName: 'review' };
type JudgeStageLog = {
stage: 1 | 2 | 3;
method: 'structured_output' | 'phase3_tag' | 'ai_judge';
status: 'done' | 'error' | 'skipped';
instruction: string;
response: string;
};
describe('agent-usecases', () => {
beforeEach(() => {
@ -173,6 +180,75 @@ describe('agent-usecases', () => {
expect(runAgent).toHaveBeenCalledTimes(3);
});
it('judgeStatus は Phase 3 の内部ステージログを順序どおりに通知する', async () => {
const onJudgeStage = vi.fn();
// Stage 1: structured output fails
vi.mocked(runAgent).mockResolvedValueOnce(doneResponse('no match'));
// Stage 2: tag detection succeeds
vi.mocked(runAgent).mockResolvedValueOnce(doneResponse('[REVIEW:2]'));
await judgeStatus(
'structured',
'tag',
[
{ condition: 'a', next: 'one' },
{ condition: 'b', next: 'two' },
],
{
...judgeOptions,
onJudgeStage,
} as typeof judgeOptions & { onJudgeStage: (entry: JudgeStageLog) => void },
);
expect(onJudgeStage).toHaveBeenCalledTimes(2);
expect(onJudgeStage).toHaveBeenNthCalledWith(1, expect.objectContaining({
stage: 1,
method: 'structured_output',
status: 'done',
instruction: 'structured',
response: 'no match',
}));
expect(onJudgeStage).toHaveBeenNthCalledWith(2, expect.objectContaining({
stage: 2,
method: 'phase3_tag',
status: 'done',
instruction: 'tag',
response: '[REVIEW:2]',
}));
});
it('judgeStatus は全ステージ失敗時にも Stage 3 までログ通知する', async () => {
const onJudgeStage = vi.fn();
// Stage 1: structured output fails
vi.mocked(runAgent).mockResolvedValueOnce(doneResponse('no match'));
// Stage 2: tag detection fails
vi.mocked(runAgent).mockResolvedValueOnce(doneResponse('no tag'));
// Stage 3: evaluateCondition fails
vi.mocked(runAgent).mockResolvedValueOnce(doneResponse('still no match'));
vi.mocked(detectJudgeIndex).mockReturnValue(-1);
await expect(
judgeStatus(
'structured',
'tag',
[
{ condition: 'a', next: 'one' },
{ condition: 'b', next: 'two' },
],
{
...judgeOptions,
onJudgeStage,
} as typeof judgeOptions & { onJudgeStage: (entry: JudgeStageLog) => void },
),
).rejects.toThrow('Status not found for movement "review"');
expect(onJudgeStage).toHaveBeenCalledTimes(3);
expect(onJudgeStage).toHaveBeenLastCalledWith(expect.objectContaining({
stage: 3,
method: 'ai_judge',
}));
});
it('judgeStatus は全ての判定に失敗したらエラー', async () => {
// Stage 1: structured output fails
vi.mocked(runAgent).mockResolvedValueOnce(doneResponse('no match'));
@ -232,6 +308,27 @@ describe('agent-usecases', () => {
.rejects.toThrow('Team leader failed: bad output');
});
it('decomposeTask は onPromptResolved を runAgent に伝搬する', async () => {
vi.mocked(runAgent).mockResolvedValue(doneResponse('x', {
parts: [
{ id: 'p1', title: 'Part 1', instruction: 'Do 1', timeout_ms: null },
],
}));
const onPromptResolved = vi.fn();
await decomposeTask('instruction', 2, {
cwd: '/repo',
persona: 'team-leader',
onPromptResolved,
});
expect(runAgent).toHaveBeenCalledWith(
'team-leader',
expect.any(String),
expect.objectContaining({ onPromptResolved }),
);
});
it('requestMoreParts は構造化出力をパースして返す', async () => {
vi.mocked(runAgent).mockResolvedValue(doneResponse('x', {
done: false,

View File

@ -26,4 +26,33 @@ describe('config module file-size boundary', () => {
const lineCount = getLineCount('../features/tasks/execute/pieceExecution.ts');
expect(lineCount).toBeLessThanOrEqual(300);
});
it('keeps sessionLogger.ts under 300 lines', () => {
const lineCount = getLineCount('../features/tasks/execute/sessionLogger.ts');
expect(lineCount).toBeLessThanOrEqual(300);
});
it('keeps traceReport renderer/parser split modules under 300 lines', () => {
const rendererLineCount = getLineCount('../features/tasks/execute/traceReportRenderer.ts');
const parserLineCount = getLineCount('../features/tasks/execute/traceReportParser.ts');
expect(rendererLineCount).toBeLessThanOrEqual(300);
expect(parserLineCount).toBeLessThanOrEqual(300);
});
it('keeps traceReport.ts as thin facade under 120 lines', () => {
const lineCount = getLineCount('../features/tasks/execute/traceReport.ts');
expect(lineCount).toBeLessThanOrEqual(120);
});
it('keeps agent-usecases.ts as thin facade under 120 lines', () => {
const lineCount = getLineCount('../agents/agent-usecases.ts');
expect(lineCount).toBeLessThanOrEqual(120);
});
it('keeps split agent usecases under 300 lines each', () => {
const judgeLineCount = getLineCount('../agents/judge-status-usecase.ts');
const decomposeLineCount = getLineCount('../agents/decompose-task-usecase.ts');
expect(judgeLineCount).toBeLessThanOrEqual(300);
expect(decomposeLineCount).toBeLessThanOrEqual(300);
});
});

View File

@ -100,6 +100,19 @@ function createEngineOptions(tmpDir: string): PieceEngineOptions {
};
}
function mockRunAgentWithPrompt(...responses: ReturnType<typeof makeResponse>[]): void {
const mock = vi.mocked(runAgent);
for (const response of responses) {
mock.mockImplementationOnce(async (persona, instruction, options) => {
options?.onPromptResolved?.({
systemPrompt: typeof persona === 'string' ? persona : '',
userInstruction: instruction,
});
return response;
});
}
}
describe('ArpeggioRunner integration', () => {
let engine: PieceEngine | undefined;
@ -122,10 +135,11 @@ describe('ArpeggioRunner integration', () => {
// Mock agent to return batch-specific responses
const mockAgent = vi.mocked(runAgent);
mockAgent
.mockResolvedValueOnce(makeResponse({ content: 'Processed Alice' }))
.mockResolvedValueOnce(makeResponse({ content: 'Processed Bob' }))
.mockResolvedValueOnce(makeResponse({ content: 'Processed Charlie' }));
mockRunAgentWithPrompt(
makeResponse({ content: 'Processed Alice' }),
makeResponse({ content: 'Processed Bob' }),
makeResponse({ content: 'Processed Charlie' }),
);
// Mock rule detection for the merged result
vi.mocked(detectMatchedRule).mockResolvedValueOnce({
@ -163,9 +177,10 @@ describe('ArpeggioRunner integration', () => {
const config = buildArpeggioPieceConfig(arpeggioConfig, tmpDir);
const mockAgent = vi.mocked(runAgent);
mockAgent
.mockResolvedValueOnce(makeResponse({ content: 'Batch 0 result' }))
.mockResolvedValueOnce(makeResponse({ content: 'Batch 1 result' }));
mockRunAgentWithPrompt(
makeResponse({ content: 'Batch 0 result' }),
makeResponse({ content: 'Batch 1 result' }),
);
vi.mocked(detectMatchedRule).mockResolvedValueOnce({
index: 0,
@ -189,13 +204,12 @@ describe('ArpeggioRunner integration', () => {
const config = buildArpeggioPieceConfig(arpeggioConfig, tmpDir);
const mockAgent = vi.mocked(runAgent);
// First batch succeeds
mockAgent.mockResolvedValueOnce(makeResponse({ content: 'OK' }));
// Second batch fails twice (initial + 1 retry)
mockAgent.mockResolvedValueOnce(makeResponse({ status: 'error', error: 'fail1' }));
mockAgent.mockResolvedValueOnce(makeResponse({ status: 'error', error: 'fail2' }));
// Third batch succeeds
mockAgent.mockResolvedValueOnce(makeResponse({ content: 'OK' }));
mockRunAgentWithPrompt(
makeResponse({ content: 'OK' }),
makeResponse({ status: 'error', error: 'fail1' }),
makeResponse({ status: 'error', error: 'fail2' }),
makeResponse({ content: 'OK' }),
);
engine = new PieceEngine(config, tmpDir, 'test task', createEngineOptions(tmpDir));
const state = await engine.run();
@ -210,10 +224,11 @@ describe('ArpeggioRunner integration', () => {
const config = buildArpeggioPieceConfig(arpeggioConfig, tmpDir);
const mockAgent = vi.mocked(runAgent);
mockAgent
.mockResolvedValueOnce(makeResponse({ content: 'Result A' }))
.mockResolvedValueOnce(makeResponse({ content: 'Result B' }))
.mockResolvedValueOnce(makeResponse({ content: 'Result C' }));
mockRunAgentWithPrompt(
makeResponse({ content: 'Result A' }),
makeResponse({ content: 'Result B' }),
makeResponse({ content: 'Result C' }),
);
vi.mocked(detectMatchedRule).mockResolvedValueOnce({
index: 0,
@ -234,10 +249,11 @@ describe('ArpeggioRunner integration', () => {
const config = buildArpeggioPieceConfig(arpeggioConfig, tmpDir);
const mockAgent = vi.mocked(runAgent);
mockAgent
.mockResolvedValueOnce(makeResponse({ content: 'A' }))
.mockResolvedValueOnce(makeResponse({ content: 'B' }))
.mockResolvedValueOnce(makeResponse({ content: 'C' }));
mockRunAgentWithPrompt(
makeResponse({ content: 'A' }),
makeResponse({ content: 'B' }),
makeResponse({ content: 'C' }),
);
vi.mocked(detectMatchedRule).mockResolvedValueOnce({
index: 0,
@ -251,4 +267,90 @@ describe('ArpeggioRunner integration', () => {
expect(mockAgent).toHaveBeenCalledTimes(3);
});
it('should record resolved prompt in phase:start for arpeggio batches', async () => {
const { tmpDir, csvPath, templatePath } = createArpeggioTestDir();
const arpeggioConfig = createArpeggioConfig(csvPath, templatePath, { concurrency: 2 });
const config = buildArpeggioPieceConfig(arpeggioConfig, tmpDir);
const phaseStarts: string[] = [];
mockRunAgentWithPrompt(
makeResponse({ content: 'A' }),
makeResponse({ content: 'B' }),
makeResponse({ content: 'C' }),
);
vi.mocked(detectMatchedRule).mockResolvedValueOnce({ index: 0, method: 'phase1_tag' });
engine = new PieceEngine(config, tmpDir, 'test task', createEngineOptions(tmpDir));
engine.on('phase:start', (step, phase, phaseName, instruction) => {
if (step.name !== 'process' || phase !== 1 || phaseName !== 'execute') return;
phaseStarts.push(instruction);
});
const state = await engine.run();
expect(state.status).toBe('completed');
expect(phaseStarts.length).toBe(3);
expect(phaseStarts.every((instruction) => !instruction.startsWith('[Arpeggio batch'))).toBe(true);
expect(phaseStarts.some((instruction) => instruction.includes('Process '))).toBe(true);
});
it('should keep phaseExecutionId bindings correct when completion order is reversed', async () => {
const { tmpDir, csvPath, templatePath } = createArpeggioTestDir();
const arpeggioConfig = createArpeggioConfig(csvPath, templatePath, { concurrency: 2 });
const config = buildArpeggioPieceConfig(arpeggioConfig, tmpDir);
const phaseStartsByExecutionId = new Map<string, string>();
const phaseCompletions: Array<{ phaseExecutionId?: string; content: string }> = [];
vi.mocked(runAgent).mockImplementation(async (persona, instruction, options) => {
options?.onPromptResolved?.({
systemPrompt: typeof persona === 'string' ? persona : '',
userInstruction: instruction,
});
if (instruction.includes('Alice')) {
await new Promise((resolve) => setTimeout(resolve, 40));
return makeResponse({ content: 'Result Alice' });
}
if (instruction.includes('Bob')) {
await new Promise((resolve) => setTimeout(resolve, 5));
return makeResponse({ content: 'Result Bob' });
}
return makeResponse({ content: 'Result Charlie' });
});
vi.mocked(detectMatchedRule).mockResolvedValueOnce({ index: 0, method: 'phase1_tag' });
engine = new PieceEngine(config, tmpDir, 'test task', createEngineOptions(tmpDir));
engine.on('phase:start', (step, phase, phaseName, instruction, _promptParts, phaseExecutionId) => {
if (step.name !== 'process' || phase !== 1 || phaseName !== 'execute' || !phaseExecutionId) return;
phaseStartsByExecutionId.set(phaseExecutionId, instruction);
});
engine.on('phase:complete', (step, phase, phaseName, content, _status, _error, phaseExecutionId) => {
if (step.name !== 'process' || phase !== 1 || phaseName !== 'execute') return;
phaseCompletions.push({ phaseExecutionId, content });
});
const state = await engine.run();
expect(state.status).toBe('completed');
expect(phaseCompletions).toHaveLength(3);
expect(new Set(phaseCompletions.map((entry) => entry.phaseExecutionId)).size).toBe(3);
expect(phaseCompletions.map((entry) => entry.content).sort()).toEqual([
'Result Alice',
'Result Bob',
'Result Charlie',
]);
for (const completion of phaseCompletions) {
const instruction = completion.phaseExecutionId
? phaseStartsByExecutionId.get(completion.phaseExecutionId)
: undefined;
expect(instruction).toBeDefined();
if (completion.content === 'Result Alice') {
expect(instruction).toContain('Alice');
} else if (completion.content === 'Result Bob') {
expect(instruction).toContain('Bob');
} else {
expect(instruction).toContain('Charlie');
}
}
});
});

View File

@ -167,9 +167,13 @@ describe('PieceEngine Integration: Error Handling', () => {
const engine = new PieceEngine(config, tmpDir, 'test task', { projectCwd: tmpDir });
for (let i = 0; i < 5; i++) {
vi.mocked(runAgent).mockResolvedValueOnce(
makeResponse({ content: `iteration ${i}` })
);
vi.mocked(runAgent).mockImplementationOnce(async (persona, task, options) => {
options?.onPromptResolved?.({
systemPrompt: typeof persona === 'string' ? persona : '',
userInstruction: task,
});
return makeResponse({ content: `iteration ${i}` });
});
vi.mocked(detectMatchedRule).mockResolvedValueOnce(
{ index: 0, method: 'phase1_tag' }
);

View File

@ -544,11 +544,16 @@ describe('PieceEngine Integration: Happy Path', () => {
expect(phaseStartFn).toHaveBeenCalledWith(
expect.objectContaining({ name: 'plan' }),
1, 'execute', expect.any(String)
1, 'execute', expect.any(String), expect.objectContaining({
systemPrompt: expect.any(String),
userInstruction: expect.any(String),
}),
undefined,
1,
);
expect(phaseCompleteFn).toHaveBeenCalledWith(
expect.objectContaining({ name: 'plan' }),
1, 'execute', expect.any(String), 'done', undefined
1, 'execute', expect.any(String), 'done', undefined, undefined, 1,
);
});

View File

@ -122,13 +122,21 @@ describe('PieceEngine Integration: Parallel Movement Partial Failure', () => {
// arch-review fails (exit code 1)
mock.mockRejectedValueOnce(new Error('Claude Code process exited with code 1'));
// security-review succeeds
mock.mockResolvedValueOnce(
makeResponse({ persona: 'security-review', content: 'Security review passed' }),
);
mock.mockImplementationOnce(async (persona, task, options) => {
options?.onPromptResolved?.({
systemPrompt: typeof persona === 'string' ? persona : '',
userInstruction: task,
});
return makeResponse({ persona: 'security-review', content: 'Security review passed' });
});
// done step
mock.mockResolvedValueOnce(
makeResponse({ persona: 'done', content: 'Completed' }),
);
mock.mockImplementationOnce(async (persona, task, options) => {
options?.onPromptResolved?.({
systemPrompt: typeof persona === 'string' ? persona : '',
userInstruction: task,
});
return makeResponse({ persona: 'done', content: 'Completed' });
});
mockDetectMatchedRuleSequence([
// security-review sub-movement rule match (arch-review has no match — it failed)
@ -179,12 +187,20 @@ describe('PieceEngine Integration: Parallel Movement Partial Failure', () => {
const mock = vi.mocked(runAgent);
mock.mockRejectedValueOnce(new Error('Session resume failed'));
mock.mockResolvedValueOnce(
makeResponse({ persona: 'security-review', content: 'OK' }),
);
mock.mockResolvedValueOnce(
makeResponse({ persona: 'done', content: 'Done' }),
);
mock.mockImplementationOnce(async (persona, task, options) => {
options?.onPromptResolved?.({
systemPrompt: typeof persona === 'string' ? persona : '',
userInstruction: task,
});
return makeResponse({ persona: 'security-review', content: 'OK' });
});
mock.mockImplementationOnce(async (persona, task, options) => {
options?.onPromptResolved?.({
systemPrompt: typeof persona === 'string' ? persona : '',
userInstruction: task,
});
return makeResponse({ persona: 'done', content: 'Done' });
});
mockDetectMatchedRuleSequence([
{ index: 0, method: 'phase1_tag' },

View File

@ -216,11 +216,15 @@ describe('PieceEngine Integration: Parallel Movement Aggregation', () => {
['../personas/supervise.md', makeResponse({ persona: 'supervise', content: 'All passed' })],
]);
vi.mocked(runAgent).mockImplementation(async (persona, _task, options) => {
vi.mocked(runAgent).mockImplementation(async (persona, task, options) => {
const response = responsesByPersona.get(persona ?? '');
if (!response) {
throw new Error(`Unexpected persona: ${persona}`);
}
options.onPromptResolved?.({
systemPrompt: typeof persona === 'string' ? persona : '',
userInstruction: task,
});
if (persona === '../personas/arch-review.md') {
options.onStream?.({ type: 'text', data: { text: 'arch stream line\n' } });

View File

@ -49,6 +49,19 @@ function buildTeamLeaderConfig(): PieceConfig {
};
}
function mockRunAgentWithPrompt(...responses: ReturnType<typeof makeResponse>[]): void {
const mock = vi.mocked(runAgent);
for (const response of responses) {
mock.mockImplementationOnce(async (persona, instruction, options) => {
options?.onPromptResolved?.({
systemPrompt: typeof persona === 'string' ? persona : '',
userInstruction: instruction,
});
return response;
});
}
}
describe('PieceEngine Integration: TeamLeaderRunner', () => {
let tmpDir: string;
@ -68,21 +81,22 @@ describe('PieceEngine Integration: TeamLeaderRunner', () => {
const config = buildTeamLeaderConfig();
const engine = new PieceEngine(config, tmpDir, 'implement feature', { projectCwd: tmpDir });
vi.mocked(runAgent)
.mockResolvedValueOnce(makeResponse({
mockRunAgentWithPrompt(
makeResponse({
persona: 'team-leader',
content: [
'```json',
'[{"id":"part-1","title":"API","instruction":"Implement API"},{"id":"part-2","title":"Test","instruction":"Add tests"}]',
'```',
].join('\n'),
}))
.mockResolvedValueOnce(makeResponse({ persona: 'coder', content: 'API done' }))
.mockResolvedValueOnce(makeResponse({ persona: 'coder', content: 'Tests done' }))
.mockResolvedValueOnce(makeResponse({
}),
makeResponse({ persona: 'coder', content: 'API done' }),
makeResponse({ persona: 'coder', content: 'Tests done' }),
makeResponse({
persona: 'team-leader',
structuredOutput: { done: true, reasoning: 'enough', parts: [] },
}));
}),
);
vi.mocked(detectMatchedRule).mockResolvedValueOnce({ index: 0, method: 'phase1_tag' });
@ -103,21 +117,22 @@ describe('PieceEngine Integration: TeamLeaderRunner', () => {
const config = buildTeamLeaderConfig();
const engine = new PieceEngine(config, tmpDir, 'implement feature', { projectCwd: tmpDir });
vi.mocked(runAgent)
.mockResolvedValueOnce(makeResponse({
mockRunAgentWithPrompt(
makeResponse({
persona: 'team-leader',
content: [
'```json',
'[{"id":"part-1","title":"API","instruction":"Implement API"},{"id":"part-2","title":"Test","instruction":"Add tests"}]',
'```',
].join('\n'),
}))
.mockResolvedValueOnce(makeResponse({ persona: 'coder', status: 'error', error: 'api failed' }))
.mockResolvedValueOnce(makeResponse({ persona: 'coder', status: 'error', error: 'test failed' }))
.mockResolvedValueOnce(makeResponse({
}),
makeResponse({ persona: 'coder', status: 'error', error: 'api failed' }),
makeResponse({ persona: 'coder', status: 'error', error: 'test failed' }),
makeResponse({
persona: 'team-leader',
structuredOutput: { done: true, reasoning: 'stop', parts: [] },
}));
}),
);
const state = await engine.run();
@ -128,21 +143,22 @@ describe('PieceEngine Integration: TeamLeaderRunner', () => {
const config = buildTeamLeaderConfig();
const engine = new PieceEngine(config, tmpDir, 'implement feature', { projectCwd: tmpDir });
vi.mocked(runAgent)
.mockResolvedValueOnce(makeResponse({
mockRunAgentWithPrompt(
makeResponse({
persona: 'team-leader',
content: [
'```json',
'[{"id":"part-1","title":"API","instruction":"Implement API"},{"id":"part-2","title":"Test","instruction":"Add tests"}]',
'```',
].join('\n'),
}))
.mockResolvedValueOnce(makeResponse({ persona: 'coder', content: 'API done' }))
.mockResolvedValueOnce(makeResponse({ persona: 'coder', status: 'error', error: 'test failed' }))
.mockResolvedValueOnce(makeResponse({
}),
makeResponse({ persona: 'coder', content: 'API done' }),
makeResponse({ persona: 'coder', status: 'error', error: 'test failed' }),
makeResponse({
persona: 'team-leader',
structuredOutput: { done: true, reasoning: 'stop', parts: [] },
}));
}),
);
vi.mocked(detectMatchedRule).mockResolvedValueOnce({ index: 0, method: 'phase1_tag' });
@ -161,21 +177,22 @@ describe('PieceEngine Integration: TeamLeaderRunner', () => {
const config = buildTeamLeaderConfig();
const engine = new PieceEngine(config, tmpDir, 'implement feature', { projectCwd: tmpDir });
vi.mocked(runAgent)
.mockResolvedValueOnce(makeResponse({
mockRunAgentWithPrompt(
makeResponse({
persona: 'team-leader',
content: [
'```json',
'[{"id":"part-1","title":"API","instruction":"Implement API"},{"id":"part-2","title":"Test","instruction":"Add tests"}]',
'```',
].join('\n'),
}))
.mockResolvedValueOnce(makeResponse({ persona: 'coder', status: 'error', content: 'api failed from content' }))
.mockResolvedValueOnce(makeResponse({ persona: 'coder', content: 'Tests done' }))
.mockResolvedValueOnce(makeResponse({
}),
makeResponse({ persona: 'coder', status: 'error', content: 'api failed from content' }),
makeResponse({ persona: 'coder', content: 'Tests done' }),
makeResponse({
persona: 'team-leader',
structuredOutput: { done: true, reasoning: 'stop', parts: [] },
}));
}),
);
vi.mocked(detectMatchedRule).mockResolvedValueOnce({ index: 0, method: 'phase1_tag' });
@ -191,8 +208,8 @@ describe('PieceEngine Integration: TeamLeaderRunner', () => {
const config = buildTeamLeaderConfig();
const engine = new PieceEngine(config, tmpDir, 'implement feature', { projectCwd: tmpDir });
vi.mocked(runAgent)
.mockResolvedValueOnce(makeResponse({
mockRunAgentWithPrompt(
makeResponse({
persona: 'team-leader',
structuredOutput: {
parts: [
@ -200,10 +217,10 @@ describe('PieceEngine Integration: TeamLeaderRunner', () => {
{ id: 'part-2', title: 'Test', instruction: 'Add tests', timeout_ms: null },
],
},
}))
.mockResolvedValueOnce(makeResponse({ persona: 'coder', content: 'API done' }))
.mockResolvedValueOnce(makeResponse({ persona: 'coder', content: 'Tests done' }))
.mockResolvedValueOnce(makeResponse({
}),
makeResponse({ persona: 'coder', content: 'API done' }),
makeResponse({ persona: 'coder', content: 'Tests done' }),
makeResponse({
persona: 'team-leader',
structuredOutput: {
done: false,
@ -212,16 +229,17 @@ describe('PieceEngine Integration: TeamLeaderRunner', () => {
{ id: 'part-3', title: 'Docs', instruction: 'Write docs', timeout_ms: null },
],
},
}))
.mockResolvedValueOnce(makeResponse({ persona: 'coder', content: 'Docs done' }))
.mockResolvedValueOnce(makeResponse({
}),
makeResponse({ persona: 'coder', content: 'Docs done' }),
makeResponse({
persona: 'team-leader',
structuredOutput: {
done: true,
reasoning: 'Enough',
parts: [],
},
}));
}),
);
vi.mocked(detectMatchedRule).mockResolvedValueOnce({ index: 0, method: 'phase1_tag' });
@ -235,4 +253,35 @@ describe('PieceEngine Integration: TeamLeaderRunner', () => {
expect(output!.content).toContain('Docs done');
});
it('team leader の phase:start には分解実行時の実 instruction を記録する', async () => {
const config = buildTeamLeaderConfig();
const engine = new PieceEngine(config, tmpDir, 'implement feature', { projectCwd: tmpDir });
const phaseStarts: string[] = [];
engine.on('phase:start', (step, phase, phaseName, instruction) => {
if (step.name !== 'implement' || phase !== 1 || phaseName !== 'execute') return;
phaseStarts.push(instruction);
});
mockRunAgentWithPrompt(
makeResponse({
persona: 'team-leader',
structuredOutput: {
parts: [{ id: 'part-1', title: 'API', instruction: 'Implement API', timeout_ms: null }],
},
}),
makeResponse({ persona: 'coder', content: 'API done' }),
makeResponse({
persona: 'team-leader',
structuredOutput: { done: true, reasoning: 'enough', parts: [] },
}),
);
vi.mocked(detectMatchedRule).mockResolvedValueOnce({ index: 0, method: 'phase1_tag' });
const state = await engine.run();
expect(state.status).toBe('completed');
expect(phaseStarts.length).toBeGreaterThan(0);
expect(phaseStarts[0]).toContain('This is decomposition-only planning. Do not execute the task.');
});
});

View File

@ -136,7 +136,13 @@ export function buildDefaultPieceConfig(overrides: Partial<PieceConfig> = {}): P
export function mockRunAgentSequence(responses: AgentResponse[]): void {
const mock = vi.mocked(runAgent);
for (const response of responses) {
mock.mockResolvedValueOnce(response);
mock.mockImplementationOnce(async (persona, task, options) => {
options?.onPromptResolved?.({
systemPrompt: typeof persona === 'string' ? persona : '',
userInstruction: task,
});
return response;
});
}
}

View File

@ -103,7 +103,13 @@ describe('IT: config provider_options reflection', () => {
delete process.env.TAKT_PROVIDER_OPTIONS_CODEX_NETWORK_ACCESS;
invalidateGlobalConfigCache();
vi.mocked(runAgent).mockResolvedValue(makeDoneResponse());
vi.mocked(runAgent).mockImplementation(async (persona, task, options) => {
options?.onPromptResolved?.({
systemPrompt: typeof persona === 'string' ? persona : '',
userInstruction: task,
});
return makeDoneResponse();
});
});
afterEach(() => {
@ -203,4 +209,3 @@ describe('IT: config provider_options reflection', () => {
});
});
});

View File

@ -83,7 +83,13 @@ describe('IT: provider block reflection', () => {
beforeEach(() => {
vi.clearAllMocks();
originalConfigDir = process.env.TAKT_CONFIG_DIR;
vi.mocked(runAgent).mockResolvedValue(makeDoneResponse());
vi.mocked(runAgent).mockImplementation(async (persona, task, options) => {
options?.onPromptResolved?.({
systemPrompt: typeof persona === 'string' ? persona : '',
userInstruction: task,
});
return makeDoneResponse();
});
});
afterEach(() => {

View File

@ -11,6 +11,7 @@ vi.mock('../agents/runner.js', () => ({
}));
import { runAgent } from '../agents/runner.js';
import type { AgentResponse } from '../core/models/types.js';
function createStep(fileName: string): PieceMovement {
return {
@ -51,6 +52,19 @@ function createContext(
};
}
function queueRunAgentResponses(responses: AgentResponse[]): void {
const runAgentMock = vi.mocked(runAgent);
for (const response of responses) {
runAgentMock.mockImplementationOnce(async (persona, task, options) => {
options?.onPromptResolved?.({
systemPrompt: typeof persona === 'string' ? persona : '',
userInstruction: task,
});
return response;
});
}
}
describe('runReportPhase report history behavior', () => {
let tmpRoot: string;
@ -71,22 +85,22 @@ describe('runReportPhase report history behavior', () => {
const reportDir = join(tmpRoot, '.takt', 'runs', 'sample-run', 'reports');
const step = createStep('05-architect-review.md');
const ctx = createContext(reportDir);
const runAgentMock = vi.mocked(runAgent);
runAgentMock
.mockResolvedValueOnce({
queueRunAgentResponses([
{
persona: 'reviewers',
status: 'done',
content: 'First review result',
timestamp: new Date('2026-02-10T06:11:43Z'),
sessionId: 'session-2',
})
.mockResolvedValueOnce({
},
{
persona: 'reviewers',
status: 'done',
content: 'Second review result',
timestamp: new Date('2026-02-10T06:14:37Z'),
sessionId: 'session-3',
});
},
]);
// When
await runReportPhase(step, 1, ctx);
@ -113,29 +127,29 @@ describe('runReportPhase report history behavior', () => {
const reportDir = join(tmpRoot, '.takt', 'runs', 'sample-run', 'reports');
const step = createStep('06-qa-review.md');
const ctx = createContext(reportDir);
const runAgentMock = vi.mocked(runAgent);
runAgentMock
.mockResolvedValueOnce({
queueRunAgentResponses([
{
persona: 'reviewers',
status: 'done',
content: 'v1',
timestamp: new Date('2026-02-10T06:11:43Z'),
sessionId: 'session-2',
})
.mockResolvedValueOnce({
},
{
persona: 'reviewers',
status: 'done',
content: 'v2',
timestamp: new Date('2026-02-10T06:11:43Z'),
sessionId: 'session-3',
})
.mockResolvedValueOnce({
},
{
persona: 'reviewers',
status: 'done',
content: 'v3',
timestamp: new Date('2026-02-10T06:11:43Z'),
sessionId: 'session-4',
});
},
]);
// When
await runReportPhase(step, 1, ctx);
@ -158,14 +172,13 @@ describe('runReportPhase report history behavior', () => {
const ctx = createContext(reportDir, (overrides) => {
capturedOverrides.push(overrides);
});
const runAgentMock = vi.mocked(runAgent);
runAgentMock.mockResolvedValueOnce({
queueRunAgentResponses([{
persona: 'reviewers',
status: 'done',
content: 'Permission-based report execution',
timestamp: new Date('2026-02-10T06:21:17Z'),
sessionId: 'session-2',
});
}]);
// When
await runReportPhase(step, 1, ctx);

View File

@ -139,6 +139,7 @@ vi.mock('../shared/utils/index.js', () => ({
preventSleep: vi.fn(),
isDebugEnabled: vi.fn().mockReturnValue(false),
writePromptLog: vi.fn(),
getDebugPromptsLogFile: vi.fn().mockReturnValue(null),
generateReportDir: vi.fn().mockReturnValue('test-report-dir'),
isValidReportDirName: vi.fn().mockReturnValue(true),
playWarningSound: vi.fn(),

View File

@ -31,12 +31,57 @@ const { mockIsDebugEnabled, mockWritePromptLog, MockPieceEngine } = vi.hoisted((
const step = this.config.movements[0]!;
const timestamp = new Date('2026-02-07T00:00:00.000Z');
const shouldAbort = this.task === 'abort-task';
const shouldAbortBeforeComplete = this.task === 'abort-before-complete-task';
const shouldDuplicatePhase = this.task === 'duplicate-phase-task';
const shouldEmitSensitive = this.task === 'sensitive-content-task';
const shouldRepeatMovement = this.task === 'repeat-movement-task';
const shouldReversePhaseCompletion = this.task === 'reverse-phase-complete-task';
const providerInfo = { provider: undefined, model: undefined };
this.emit('movement:start', step, 1, 'movement instruction', providerInfo);
this.emit('phase:start', step, 1, 'execute', 'phase prompt');
this.emit('phase:complete', step, 1, 'execute', 'phase response', 'done');
if (shouldReversePhaseCompletion) {
this.emit('phase:start', step, 1, 'execute', 'phase prompt first', {
systemPrompt: '../agents/coder.md',
userInstruction: 'phase prompt first',
}, 'implement:1:1:1', 1);
this.emit('phase:start', step, 1, 'execute', 'phase prompt second', {
systemPrompt: '../agents/coder.md',
userInstruction: 'phase prompt second',
}, 'implement:1:1:2', 1);
} else {
this.emit('phase:start', step, 1, 'execute', shouldEmitSensitive ? 'token=plain-secret' : 'phase prompt', {
systemPrompt: shouldEmitSensitive ? 'Authorization: Bearer super-secret-token' : '../agents/coder.md',
userInstruction: shouldEmitSensitive ? 'api_key=plain-secret' : 'phase prompt',
});
}
this.emit('phase:start', step, 3, 'judge', 'phase3 prompt', {
systemPrompt: 'conductor',
userInstruction: 'phase3 prompt',
});
this.emit('phase:judge_stage', step, 3, 'judge', {
stage: 1,
method: 'structured_output',
status: 'done',
instruction: 'judge stage prompt',
response: 'judge stage response',
});
this.emit('phase:complete', step, 3, 'judge', '[IMPLEMENT:1]', 'done');
if (shouldAbortBeforeComplete) {
this.emit('piece:abort', { status: 'aborted', iteration: 1 }, 'user_interrupted');
return { status: 'aborted', iteration: 1 };
}
if (shouldReversePhaseCompletion) {
this.emit('phase:complete', step, 1, 'execute', 'phase response second', 'done', undefined, 'implement:1:1:2', 1);
this.emit('phase:complete', step, 1, 'execute', 'phase response first', 'done', undefined, 'implement:1:1:1', 1);
} else {
this.emit('phase:complete', step, 1, 'execute', shouldEmitSensitive ? 'password=plain-secret' : 'phase response', 'done');
}
if (shouldDuplicatePhase) {
this.emit('phase:start', step, 1, 'execute', 'phase prompt second', {
systemPrompt: '../agents/coder.md',
userInstruction: 'phase prompt second',
});
this.emit('phase:complete', step, 1, 'execute', 'phase response second', 'done');
}
this.emit(
'movement:complete',
step,
@ -154,6 +199,7 @@ vi.mock('../shared/utils/index.js', () => ({
preventSleep: vi.fn(),
isDebugEnabled: mockIsDebugEnabled,
writePromptLog: mockWritePromptLog,
getDebugPromptsLogFile: vi.fn().mockReturnValue(null),
generateReportDir: vi.fn().mockReturnValue('test-report-dir'),
isValidReportDirName: vi.fn().mockImplementation((value: string) => /^[a-z0-9]+(?:-[a-z0-9]+)*$/.test(value)),
}));
@ -173,6 +219,7 @@ vi.mock('../shared/exitCodes.js', () => ({
import { executePiece } from '../features/tasks/execute/pieceExecution.js';
import { ensureDir, writeFileAtomic } from '../infra/config/index.js';
import { appendNdjsonLine } from '../infra/fs/index.js';
describe('executePiece debug prompts logging', () => {
beforeEach(() => {
@ -204,15 +251,16 @@ describe('executePiece debug prompts logging', () => {
projectCwd: '/tmp/project',
});
expect(mockWritePromptLog).toHaveBeenCalledTimes(1);
const record = mockWritePromptLog.mock.calls[0]?.[0] as {
expect(mockWritePromptLog).toHaveBeenCalledTimes(2);
const records = mockWritePromptLog.mock.calls.map((call) => call[0]) as Array<{
movement: string;
phase: number;
iteration: number;
prompt: string;
response: string;
timestamp: string;
};
}>;
const record = records.find((entry) => entry.phase === 1)!;
expect(record.movement).toBe('implement');
expect(record.phase).toBe(1);
expect(record.iteration).toBe(1);
@ -221,6 +269,54 @@ describe('executePiece debug prompts logging', () => {
expect(record.timestamp).toMatch(/^\d{4}-\d{2}-\d{2}T/);
});
it('should separate system prompt and user instruction in debug prompt records', async () => {
mockIsDebugEnabled.mockReturnValue(true);
await executePiece(makeConfig(), 'task', '/tmp/project', {
projectCwd: '/tmp/project',
});
expect(mockWritePromptLog).toHaveBeenCalledTimes(2);
const records = mockWritePromptLog.mock.calls.map((call) => call[0]) as Array<Record<string, unknown> & { phase: number }>;
const record = records.find((entry) => entry.phase === 1)!;
expect(record).toHaveProperty('systemPrompt');
expect(record).toHaveProperty('userInstruction');
expect(record.systemPrompt).toBe('../agents/coder.md');
expect(record.userInstruction).toBe('phase prompt');
});
it('should include phase and judge stage details in trace markdown', async () => {
await executePiece(makeConfig(), 'task', '/tmp/project', {
projectCwd: '/tmp/project',
reportDirName: 'test-report-dir',
});
const traceCall = vi.mocked(writeFileAtomic).mock.calls.find(
(call) => String(call[0]).endsWith('/trace.md')
);
expect(traceCall).toBeDefined();
const traceContent = String(traceCall?.[1]);
expect(traceContent).toContain('## Iteration 1: implement');
expect(traceContent).toContain('### Phase 1: execute');
expect(traceContent).toContain('#### Judgment Stages');
expect(traceContent).toContain('Stage 1 (structured_output): status=done');
});
it('should render trace markdown even when piece aborts before movement completion', async () => {
await executePiece(makeConfig(), 'abort-before-complete-task', '/tmp/project', {
projectCwd: '/tmp/project',
reportDirName: 'test-report-dir',
});
const traceCall = vi.mocked(writeFileAtomic).mock.calls.find(
(call) => String(call[0]).endsWith('/trace.md')
);
expect(traceCall).toBeDefined();
const traceContent = String(traceCall?.[1]);
expect(traceContent).toContain('- Status: ❌ aborted');
expect(traceContent).toContain('- Movement Status: in_progress');
});
it('should not write prompt log record when debug is disabled', async () => {
mockIsDebugEnabled.mockReturnValue(false);
@ -231,6 +327,24 @@ describe('executePiece debug prompts logging', () => {
expect(mockWritePromptLog).not.toHaveBeenCalled();
});
it('should handle repeated phase starts for same movement and phase without missing debug prompt', async () => {
mockIsDebugEnabled.mockReturnValue(true);
await executePiece(makeConfig(), 'duplicate-phase-task', '/tmp/project', {
projectCwd: '/tmp/project',
});
expect(mockWritePromptLog).toHaveBeenCalledTimes(3);
const records = mockWritePromptLog.mock.calls.map((call) => call[0]) as Array<{
phase: number;
response: string;
}>;
const phase1Responses = records
.filter((record) => record.phase === 1)
.map((record) => record.response);
expect(phase1Responses).toEqual(['phase response', 'phase response second']);
});
it('should update movement prefix context on each movement:start event', async () => {
const stdoutSpy = vi.spyOn(process.stdout, 'write').mockImplementation(() => true);
@ -277,11 +391,13 @@ describe('executePiece debug prompts logging', () => {
reportDirName: 'test-report-dir',
});
const calls = vi.mocked(writeFileAtomic).mock.calls;
expect(calls).toHaveLength(2);
const metaCalls = vi.mocked(writeFileAtomic).mock.calls.filter(
(call) => String(call[0]).endsWith('/meta.json')
);
expect(metaCalls).toHaveLength(2);
const firstMeta = JSON.parse(String(calls[0]![1])) as { status: string; endTime?: string };
const secondMeta = JSON.parse(String(calls[1]![1])) as { status: string; endTime?: string };
const firstMeta = JSON.parse(String(metaCalls[0]![1])) as { status: string; endTime?: string };
const secondMeta = JSON.parse(String(metaCalls[1]![1])) as { status: string; endTime?: string };
expect(firstMeta.status).toBe('running');
expect(firstMeta.endTime).toBeUndefined();
expect(secondMeta.status).toBe('completed');
@ -294,11 +410,13 @@ describe('executePiece debug prompts logging', () => {
reportDirName: 'test-report-dir',
});
const calls = vi.mocked(writeFileAtomic).mock.calls;
expect(calls).toHaveLength(2);
const metaCalls = vi.mocked(writeFileAtomic).mock.calls.filter(
(call) => String(call[0]).endsWith('/meta.json')
);
expect(metaCalls).toHaveLength(2);
const firstMeta = JSON.parse(String(calls[0]![1])) as { status: string; endTime?: string };
const secondMeta = JSON.parse(String(calls[1]![1])) as { status: string; endTime?: string };
const firstMeta = JSON.parse(String(metaCalls[0]![1])) as { status: string; endTime?: string };
const secondMeta = JSON.parse(String(metaCalls[1]![1])) as { status: string; endTime?: string };
expect(firstMeta.status).toBe('running');
expect(firstMeta.endTime).toBeUndefined();
expect(secondMeta.status).toBe('aborted');
@ -313,14 +431,83 @@ describe('executePiece debug prompts logging', () => {
})
).rejects.toThrow('mock constructor failure');
const calls = vi.mocked(writeFileAtomic).mock.calls;
expect(calls).toHaveLength(2);
const metaCalls = vi.mocked(writeFileAtomic).mock.calls.filter(
(call) => String(call[0]).endsWith('/meta.json')
);
expect(metaCalls).toHaveLength(2);
const firstMeta = JSON.parse(String(calls[0]![1])) as { status: string; endTime?: string };
const secondMeta = JSON.parse(String(calls[1]![1])) as { status: string; endTime?: string };
const firstMeta = JSON.parse(String(metaCalls[0]![1])) as { status: string; endTime?: string };
const secondMeta = JSON.parse(String(metaCalls[1]![1])) as { status: string; endTime?: string };
expect(firstMeta.status).toBe('running');
expect(firstMeta.endTime).toBeUndefined();
expect(secondMeta.status).toBe('aborted');
expect(secondMeta.endTime).toMatch(/^\d{4}-\d{2}-\d{2}T/);
});
it('should write trace.md on piece completion', async () => {
await executePiece(makeConfig(), 'task', '/tmp/project', {
projectCwd: '/tmp/project',
reportDirName: 'test-report-dir',
});
const traceCalls = vi.mocked(writeFileAtomic).mock.calls.filter(
(call) => String(call[0]).endsWith('/trace.md')
);
expect(traceCalls.length).toBeGreaterThan(0);
});
it('should write trace.md on piece abort', async () => {
await executePiece(makeConfig(), 'abort-task', '/tmp/project', {
projectCwd: '/tmp/project',
reportDirName: 'test-report-dir',
});
const traceCalls = vi.mocked(writeFileAtomic).mock.calls.filter(
(call) => String(call[0]).endsWith('/trace.md')
);
expect(traceCalls.length).toBeGreaterThan(0);
});
it('should sanitize sensitive fields before writing session NDJSON when trace mode is default', async () => {
await executePiece(makeConfig(), 'token=plain-secret', '/tmp/project', {
projectCwd: '/tmp/project',
reportDirName: 'test-report-dir',
interactiveMetadata: {
confirmed: true,
task: 'api_key=plain-secret',
},
});
await executePiece(makeConfig(), 'sensitive-content-task', '/tmp/project', {
projectCwd: '/tmp/project',
reportDirName: 'test-report-dir-2',
});
const records = vi.mocked(appendNdjsonLine).mock.calls.map((call) => call[1]);
const recordText = JSON.stringify(records);
expect(recordText).toContain('[REDACTED]');
expect(recordText).not.toContain('plain-secret');
expect(recordText).not.toContain('super-secret-token');
});
it('should keep phaseExecutionId bindings consistent in trace when completions arrive in reverse order', async () => {
await executePiece(makeConfig(), 'reverse-phase-complete-task', '/tmp/project', {
projectCwd: '/tmp/project',
reportDirName: 'test-report-dir',
});
const traceCall = vi.mocked(writeFileAtomic).mock.calls.find(
(call) => String(call[0]).endsWith('/trace.md')
);
expect(traceCall).toBeDefined();
const traceContent = String(traceCall?.[1]);
const firstPromptIndex = traceContent.indexOf('phase prompt first');
const firstResponseIndex = traceContent.indexOf('phase response first');
const secondPromptIndex = traceContent.indexOf('phase prompt second');
const secondResponseIndex = traceContent.indexOf('phase response second');
expect(firstPromptIndex).toBeGreaterThan(-1);
expect(firstResponseIndex).toBeGreaterThan(firstPromptIndex);
expect(secondPromptIndex).toBeGreaterThan(firstResponseIndex);
expect(secondResponseIndex).toBeGreaterThan(secondPromptIndex);
});
});

View File

@ -136,6 +136,7 @@ vi.mock('../shared/utils/index.js', () => ({
preventSleep: vi.fn(),
isDebugEnabled: vi.fn().mockReturnValue(false),
writePromptLog: vi.fn(),
getDebugPromptsLogFile: vi.fn().mockReturnValue(null),
generateReportDir: vi.fn().mockReturnValue('test-report-dir'),
isValidReportDirName: vi.fn().mockReturnValue(true),
playWarningSound: vi.fn(),

View File

@ -10,6 +10,7 @@ vi.mock('../agents/runner.js', () => ({
}));
import { runAgent } from '../agents/runner.js';
import type { AgentResponse } from '../core/models/types.js';
function createStep(fileName: string): PieceMovement {
return {
@ -50,6 +51,19 @@ function createContext(reportDir: string, lastResponse = 'Phase 1 result'): Phas
};
}
function queueRunAgentResponses(responses: AgentResponse[]): void {
const runAgentMock = vi.mocked(runAgent);
for (const response of responses) {
runAgentMock.mockImplementationOnce(async (persona, task, options) => {
options?.onPromptResolved?.({
systemPrompt: typeof persona === 'string' ? persona : '',
userInstruction: task,
});
return response;
});
}
}
describe('runReportPhase retry with new session', () => {
let tmpRoot: string;
@ -69,22 +83,23 @@ describe('runReportPhase retry with new session', () => {
const reportDir = join(tmpRoot, '.takt', 'runs', 'sample-run', 'reports');
const step = createStep('02-coder.md');
const ctx = createContext(reportDir, 'Implemented feature X');
const runAgentMock = vi.mocked(runAgent);
runAgentMock
.mockResolvedValueOnce({
queueRunAgentResponses([
{
persona: 'coder',
status: 'done',
content: ' ',
timestamp: new Date('2026-02-11T00:00:00Z'),
sessionId: 'session-resume-2',
})
.mockResolvedValueOnce({
},
{
persona: 'coder',
status: 'done',
content: '# Report\nRecovered output',
timestamp: new Date('2026-02-11T00:00:01Z'),
sessionId: 'session-fresh-1',
});
},
]);
const runAgentMock = vi.mocked(runAgent);
// When
await runReportPhase(step, 1, ctx);
@ -107,21 +122,22 @@ describe('runReportPhase retry with new session', () => {
const reportDir = join(tmpRoot, '.takt', 'runs', 'sample-run', 'reports');
const step = createStep('03-review.md');
const ctx = createContext(reportDir);
const runAgentMock = vi.mocked(runAgent);
runAgentMock
.mockResolvedValueOnce({
queueRunAgentResponses([
{
persona: 'coder',
status: 'error',
content: 'Tool use is not allowed in this phase',
timestamp: new Date('2026-02-11T00:01:00Z'),
error: 'Tool use is not allowed in this phase',
})
.mockResolvedValueOnce({
},
{
persona: 'coder',
status: 'done',
content: 'Recovered report',
timestamp: new Date('2026-02-11T00:01:01Z'),
});
},
]);
const runAgentMock = vi.mocked(runAgent);
// When
await runReportPhase(step, 1, ctx);
@ -137,20 +153,21 @@ describe('runReportPhase retry with new session', () => {
const reportDir = join(tmpRoot, '.takt', 'runs', 'sample-run', 'reports');
const step = createStep('04-qa.md');
const ctx = createContext(reportDir);
const runAgentMock = vi.mocked(runAgent);
runAgentMock
.mockResolvedValueOnce({
queueRunAgentResponses([
{
persona: 'coder',
status: 'done',
content: ' ',
timestamp: new Date('2026-02-11T00:02:00Z'),
})
.mockResolvedValueOnce({
},
{
persona: 'coder',
status: 'done',
content: '\n\n',
timestamp: new Date('2026-02-11T00:02:01Z'),
});
},
]);
const runAgentMock = vi.mocked(runAgent);
// When / Then
await expect(runReportPhase(step, 1, ctx)).rejects.toThrow('Report phase failed for 04-qa.md: Report output is empty');
@ -162,14 +179,14 @@ describe('runReportPhase retry with new session', () => {
const reportDir = join(tmpRoot, '.takt', 'runs', 'sample-run', 'reports');
const step = createStep('05-ok.md');
const ctx = createContext(reportDir);
const runAgentMock = vi.mocked(runAgent);
runAgentMock.mockResolvedValueOnce({
queueRunAgentResponses([{
persona: 'coder',
status: 'done',
content: 'Single-pass success',
timestamp: new Date('2026-02-11T00:03:00Z'),
sessionId: 'session-resume-2',
});
}]);
const runAgentMock = vi.mocked(runAgent);
// When
await runReportPhase(step, 1, ctx);
@ -185,13 +202,13 @@ describe('runReportPhase retry with new session', () => {
const reportDir = join(tmpRoot, '.takt', 'runs', 'sample-run', 'reports');
const step = createStep('06-blocked.md');
const ctx = createContext(reportDir);
const runAgentMock = vi.mocked(runAgent);
runAgentMock.mockResolvedValueOnce({
queueRunAgentResponses([{
persona: 'coder',
status: 'blocked',
content: 'Need permission',
timestamp: new Date('2026-02-11T00:04:00Z'),
});
}]);
const runAgentMock = vi.mocked(runAgent);
// When
const result = await runReportPhase(step, 1, ctx);

View File

@ -0,0 +1,124 @@
import { describe, it, expect, vi, beforeEach } from 'vitest';
import type { PieceMovement } from '../core/models/types.js';
import { runStatusJudgmentPhase } from '../core/piece/status-judgment-phase.js';
const { mockJudgeStatus } = vi.hoisted(() => ({
mockJudgeStatus: vi.fn(),
}));
vi.mock('../agents/agent-usecases.js', () => ({
judgeStatus: mockJudgeStatus,
}));
describe('runStatusJudgmentPhase', () => {
beforeEach(() => {
vi.clearAllMocks();
});
it('should pass judge stage callbacks through PhaseRunnerContext', async () => {
mockJudgeStatus.mockImplementation(
async (_structured: string, _tag: string, _rules: unknown[], options: { onJudgeStage?: (entry: {
stage: 1 | 2 | 3;
method: 'structured_output' | 'phase3_tag' | 'ai_judge';
status: 'done' | 'error' | 'skipped';
instruction: string;
response: string;
}) => void; onStructuredPromptResolved?: (promptParts: { systemPrompt: string; userInstruction: string }) => void }) => {
options.onStructuredPromptResolved?.({
systemPrompt: 'conductor-system',
userInstruction: 'structured prompt',
});
options.onJudgeStage?.({
stage: 1,
method: 'structured_output',
status: 'done',
instruction: 'structured prompt',
response: '{"step":2}',
});
return { ruleIndex: 1, method: 'structured_output' as const };
},
);
const step: PieceMovement = {
name: 'review',
persona: 'reviewer',
personaDisplayName: 'reviewer',
instructionTemplate: 'Review',
passPreviousResponse: true,
rules: [
{ condition: 'needs_fix', next: 'fix' },
{ condition: 'approved', next: 'COMPLETE' },
],
};
const onPhaseStart = vi.fn();
const onPhaseComplete = vi.fn();
const onJudgeStage = vi.fn();
const result = await runStatusJudgmentPhase(step, {
cwd: '/tmp/project',
reportDir: '/tmp/project/.takt/reports',
lastResponse: 'response body',
iteration: 4,
getSessionId: vi.fn(),
buildResumeOptions: vi.fn(),
buildNewSessionReportOptions: vi.fn(),
updatePersonaSession: vi.fn(),
onPhaseStart,
onPhaseComplete,
onJudgeStage,
});
expect(result).toEqual({
tag: '[REVIEW:2]',
ruleIndex: 1,
method: 'structured_output',
});
expect(onPhaseStart).toHaveBeenCalledWith(
step,
3,
'judge',
expect.any(String),
{
systemPrompt: 'conductor-system',
userInstruction: 'structured prompt',
},
'review:4:3:1',
4,
);
expect(onJudgeStage).toHaveBeenCalledWith(
step,
3,
'judge',
expect.objectContaining({ stage: 1, method: 'structured_output' }),
'review:4:3:1',
4,
);
expect(onPhaseComplete).toHaveBeenCalledWith(step, 3, 'judge', '[REVIEW:2]', 'done', undefined, 'review:4:3:1', 4);
});
it('should fail fast when iteration is missing', async () => {
mockJudgeStatus.mockResolvedValue({ ruleIndex: 0, method: 'structured_output' });
const step: PieceMovement = {
name: 'review',
persona: 'reviewer',
personaDisplayName: 'reviewer',
instructionTemplate: 'Review',
passPreviousResponse: true,
rules: [
{ condition: 'needs_fix', next: 'fix' },
{ condition: 'approved', next: 'COMPLETE' },
],
};
await expect(runStatusJudgmentPhase(step, {
cwd: '/tmp/project',
reportDir: '/tmp/project/.takt/reports',
lastResponse: 'response body',
getSessionId: vi.fn(),
buildResumeOptions: vi.fn(),
buildNewSessionReportOptions: vi.fn(),
updatePersonaSession: vi.fn(),
})).rejects.toThrow('Status judgment requires iteration for movement "review"');
});
});

View File

@ -0,0 +1,236 @@
import { describe, it, expect } from 'vitest';
import { mkdtempSync, writeFileSync } from 'node:fs';
import { join } from 'node:path';
import { tmpdir } from 'node:os';
import { renderTraceReportMarkdown, renderTraceReportFromLogs } from '../features/tasks/execute/traceReport.js';
describe('traceReport', () => {
it('should render judge stage details and tolerate aborted incomplete movement', () => {
const markdown = renderTraceReportMarkdown(
{
tracePath: '/tmp/trace.md',
pieceName: 'test-piece',
task: 'test task',
runSlug: 'run-1',
status: 'aborted',
iterations: 1,
endTime: '2026-03-04T12:00:00.000Z',
reason: 'user_interrupted',
},
'2026-03-04T11:59:00.000Z',
[
{
step: 'ai_fix',
persona: 'coder',
iteration: 1,
startedAt: '2026-03-04T11:59:01.000Z',
phases: [
{
phaseExecutionId: 'ai_fix:3:1',
phase: 3,
phaseName: 'judge',
instruction: 'judge prompt',
systemPrompt: 'conductor',
userInstruction: 'judge prompt',
startedAt: '2026-03-04T11:59:02.000Z',
judgeStages: [
{
stage: 1,
method: 'structured_output',
status: 'error',
instruction: 'stage1 prompt',
response: '',
},
],
},
],
},
],
);
expect(markdown).toContain('- Status: ❌ aborted');
expect(markdown).toContain('- Movement Status: in_progress');
expect(markdown).toContain('## Iteration 1: ai_fix (persona: coder)');
expect(markdown).toContain('<details><summary>System Prompt</summary>');
expect(markdown).toContain('<details><summary>User Instruction</summary>');
expect(markdown).toContain('- Stage 1 (structured_output)');
expect(markdown).toContain('<details><summary>Stage Instruction</summary>');
expect(markdown).toContain('<details><summary>Stage Response</summary>');
});
it('should render movements in timestamp order from NDJSON logs', () => {
const dir = mkdtempSync(join(tmpdir(), 'trace-report-'));
const sessionPath = join(dir, 'session.jsonl');
const promptPath = join(dir, 'prompts.jsonl');
writeFileSync(sessionPath, [
JSON.stringify({ type: 'piece_start', task: 'task', pieceName: 'piece', startTime: '2026-03-04T11:59:00.000Z' }),
JSON.stringify({ type: 'step_start', step: 'reviewers', persona: 'reviewer', iteration: 2, timestamp: '2026-03-04T11:59:05.000Z' }),
JSON.stringify({ type: 'step_start', step: 'plan', persona: 'planner', iteration: 1, timestamp: '2026-03-04T11:59:01.000Z' }),
JSON.stringify({ type: 'phase_start', step: 'reviewers', iteration: 2, phase: 1, phaseName: 'execute', phaseExecutionId: 'reviewers:2:1:1', instruction: 'r', timestamp: '2026-03-04T11:59:06.000Z' }),
JSON.stringify({ type: 'phase_complete', step: 'reviewers', iteration: 2, phase: 1, phaseName: 'execute', phaseExecutionId: 'reviewers:2:1:1', status: 'done', content: 'r-ok', timestamp: '2026-03-04T11:59:07.000Z' }),
JSON.stringify({ type: 'step_complete', step: 'reviewers', persona: 'reviewer', status: 'done', content: 'r-ok', instruction: 'inst', timestamp: '2026-03-04T11:59:08.000Z' }),
JSON.stringify({ type: 'phase_start', step: 'plan', iteration: 1, phase: 1, phaseName: 'execute', phaseExecutionId: 'plan:1:1:1', instruction: 'p', timestamp: '2026-03-04T11:59:02.000Z' }),
JSON.stringify({ type: 'phase_complete', step: 'plan', iteration: 1, phase: 1, phaseName: 'execute', phaseExecutionId: 'plan:1:1:1', status: 'done', content: 'p-ok', timestamp: '2026-03-04T11:59:03.000Z' }),
JSON.stringify({ type: 'step_complete', step: 'plan', persona: 'planner', status: 'done', content: 'p-ok', instruction: 'inst', timestamp: '2026-03-04T11:59:04.000Z' }),
JSON.stringify({ type: 'piece_complete', iterations: 2, endTime: '2026-03-04T12:00:00.000Z' }),
'',
].join('\n'));
writeFileSync(promptPath, [
JSON.stringify({ movement: 'plan', phase: 1, iteration: 1, phaseExecutionId: 'plan:1:1:1', prompt: 'p', systemPrompt: 'ps', userInstruction: 'pu', response: 'p-ok', timestamp: '2026-03-04T11:59:03.000Z' }),
JSON.stringify({ movement: 'reviewers', phase: 1, iteration: 2, phaseExecutionId: 'reviewers:2:1:1', prompt: 'r', systemPrompt: 'rs', userInstruction: 'ru', response: 'r-ok', timestamp: '2026-03-04T11:59:07.000Z' }),
'',
].join('\n'));
const markdown = renderTraceReportFromLogs(
{
tracePath: join(dir, 'trace.md'),
pieceName: 'piece',
task: 'task',
runSlug: 'run-1',
status: 'completed',
iterations: 2,
endTime: '2026-03-04T12:00:00.000Z',
},
sessionPath,
promptPath,
'full',
);
expect(markdown).toBeDefined();
const planIndex = markdown!.indexOf('## Iteration 1: plan');
const reviewersIndex = markdown!.indexOf('## Iteration 2: reviewers');
expect(planIndex).toBeGreaterThan(-1);
expect(reviewersIndex).toBeGreaterThan(planIndex);
});
it('should fail fast when completed trace has missing phase status', () => {
expect(() => renderTraceReportMarkdown(
{
tracePath: '/tmp/trace.md',
pieceName: 'test-piece',
task: 'test task',
runSlug: 'run-1',
status: 'completed',
iterations: 1,
endTime: '2026-03-04T12:00:00.000Z',
},
'2026-03-04T11:59:00.000Z',
[
{
step: 'plan',
persona: 'planner',
iteration: 1,
startedAt: '2026-03-04T11:59:01.000Z',
phases: [
{
phaseExecutionId: 'plan:1:1',
phase: 1,
phaseName: 'execute',
instruction: 'instr',
systemPrompt: 'system',
userInstruction: 'user',
startedAt: '2026-03-04T11:59:02.000Z',
completedAt: '2026-03-04T11:59:03.000Z',
},
],
},
],
)).toThrow('missing status');
});
it('should mask sensitive task and reason in redacted mode', () => {
const dir = mkdtempSync(join(tmpdir(), 'trace-report-redact-'));
const sessionPath = join(dir, 'session.jsonl');
writeFileSync(sessionPath, [
JSON.stringify({ type: 'piece_start', task: 'token=topsecret', pieceName: 'piece', startTime: '2026-03-04T11:59:00.000Z' }),
JSON.stringify({ type: 'step_start', step: 'plan', persona: 'planner', iteration: 1, timestamp: '2026-03-04T11:59:01.000Z' }),
JSON.stringify({ type: 'phase_start', step: 'plan', iteration: 1, phase: 1, phaseName: 'execute', phaseExecutionId: 'plan:1:1:1', instruction: 'api_key=abc123', systemPrompt: 'Authorization: Bearer abc123', userInstruction: 'user token=abc123', timestamp: '2026-03-04T11:59:02.000Z' }),
JSON.stringify({ type: 'phase_complete', step: 'plan', iteration: 1, phase: 1, phaseName: 'execute', phaseExecutionId: 'plan:1:1:1', status: 'done', content: 'password=hunter2', timestamp: '2026-03-04T11:59:03.000Z' }),
JSON.stringify({ type: 'step_complete', step: 'plan', persona: 'planner', status: 'done', content: 'secret=my-secret', instruction: 'inst', timestamp: '2026-03-04T11:59:04.000Z' }),
'',
].join('\n'));
const markdown = renderTraceReportFromLogs(
{
tracePath: join(dir, 'trace.md'),
pieceName: 'piece',
task: 'token=topsecret',
runSlug: 'run-1',
status: 'aborted',
iterations: 1,
endTime: '2026-03-04T12:00:00.000Z',
reason: 'api_key=super-secret',
},
sessionPath,
undefined,
'redacted',
);
expect(markdown).toContain('token=[REDACTED]');
expect(markdown).toContain('api_key=[REDACTED]');
expect(markdown).not.toContain('topsecret');
expect(markdown).not.toContain('super-secret');
expect(markdown).not.toContain('hunter2');
});
it('should mask quoted JSON secrets and common token formats in redacted mode', () => {
const dir = mkdtempSync(join(tmpdir(), 'trace-report-redact-json-'));
const sessionPath = join(dir, 'session.jsonl');
writeFileSync(sessionPath, [
JSON.stringify({ type: 'piece_start', task: '{"api_key":"abc123"}', pieceName: 'piece', startTime: '2026-03-04T11:59:00.000Z' }),
JSON.stringify({ type: 'step_start', step: 'plan', persona: 'planner', iteration: 1, timestamp: '2026-03-04T11:59:01.000Z' }),
JSON.stringify({ type: 'phase_start', step: 'plan', iteration: 1, phase: 1, phaseName: 'execute', phaseExecutionId: 'plan:1:1:1', instruction: '{"token":"xyz987"}', systemPrompt: 'Authorization: Bearer sk-abcdef12345678', userInstruction: 'ghp_abcdef1234567890', timestamp: '2026-03-04T11:59:02.000Z' }),
JSON.stringify({ type: 'phase_complete', step: 'plan', iteration: 1, phase: 1, phaseName: 'execute', phaseExecutionId: 'plan:1:1:1', status: 'done', content: 'xoxb-1234abcd-5678efgh', timestamp: '2026-03-04T11:59:03.000Z' }),
JSON.stringify({ type: 'step_complete', step: 'plan', persona: 'planner', status: 'done', content: '{"password":"plain"}', instruction: 'inst', timestamp: '2026-03-04T11:59:04.000Z' }),
'',
].join('\n'));
const markdown = renderTraceReportFromLogs(
{
tracePath: join(dir, 'trace.md'),
pieceName: 'piece',
task: '{"api_key":"abc123"}',
runSlug: 'run-1',
status: 'aborted',
iterations: 1,
endTime: '2026-03-04T12:00:00.000Z',
reason: '{"secret":"plain"}',
},
sessionPath,
undefined,
'redacted',
);
expect(markdown).toContain('"api_key":"[REDACTED]"');
expect(markdown).toContain('"secret":"[REDACTED]"');
expect(markdown).toContain('Authorization: Bearer [REDACTED]');
expect(markdown).not.toContain('abc123');
expect(markdown).not.toContain('xyz987');
expect(markdown).not.toContain('ghp_abcdef1234567890');
expect(markdown).not.toContain('xoxb-1234abcd-5678efgh');
});
it('should fold alternating loop iterations into a details block', () => {
const markdown = renderTraceReportMarkdown(
{
tracePath: '/tmp/trace.md',
pieceName: 'test-piece',
task: 'test task',
runSlug: 'run-1',
status: 'completed',
iterations: 4,
endTime: '2026-03-04T12:00:00.000Z',
},
'2026-03-04T11:59:00.000Z',
[
{ step: 'reviewers', persona: 'reviewer', iteration: 1, startedAt: '2026-03-04T11:59:01.000Z', phases: [], result: { status: 'done', content: 'ok' } },
{ step: 'fix', persona: 'coder', iteration: 2, startedAt: '2026-03-04T11:59:02.000Z', phases: [], result: { status: 'done', content: 'ok' } },
{ step: 'reviewers', persona: 'reviewer', iteration: 3, startedAt: '2026-03-04T11:59:03.000Z', phases: [], result: { status: 'done', content: 'ok' } },
{ step: 'fix', persona: 'coder', iteration: 4, startedAt: '2026-03-04T11:59:04.000Z', phases: [], result: { status: 'done', content: 'ok' } },
],
);
expect(markdown).toContain('reviewers ↔ fix loop');
expect(markdown).toContain('<details><summary>Loop details');
});
});

View File

@ -1,172 +1,19 @@
import type { AgentResponse, PartDefinition, PieceRule, RuleMatchMethod, Language } from '../core/models/types.js';
import { runAgent, type RunAgentOptions, type StreamCallback } from './runner.js';
import { detectJudgeIndex, buildJudgePrompt } from './judge-utils.js';
import { parseParts } from '../core/piece/engine/task-decomposer.js';
import { loadJudgmentSchema, loadEvaluationSchema, loadDecompositionSchema, loadMorePartsSchema } from '../infra/resources/schema-loader.js';
import { detectRuleIndex } from '../shared/utils/ruleIndex.js';
import { ensureUniquePartIds, parsePartDefinitionEntry } from '../core/piece/part-definition-validator.js';
import type { AgentResponse } from '../core/models/types.js';
import { runAgent, type RunAgentOptions } from './runner.js';
export interface JudgeStatusOptions {
cwd: string;
movementName: string;
language?: Language;
interactive?: boolean;
onStream?: StreamCallback;
}
export interface JudgeStatusResult {
ruleIndex: number;
method: RuleMatchMethod;
}
export interface EvaluateConditionOptions {
cwd: string;
}
export interface DecomposeTaskOptions {
cwd: string;
persona?: string;
personaPath?: string;
language?: Language;
model?: string;
provider?: 'claude' | 'codex' | 'opencode' | 'cursor' | 'copilot' | 'mock';
onStream?: StreamCallback;
}
export interface MorePartsResponse {
done: boolean;
reasoning: string;
parts: PartDefinition[];
}
function toPartDefinitions(raw: unknown, maxParts: number): PartDefinition[] {
if (!Array.isArray(raw)) {
throw new Error('Structured output "parts" must be an array');
}
if (raw.length === 0) {
throw new Error('Structured output "parts" must not be empty');
}
if (raw.length > maxParts) {
throw new Error(`Structured output produced too many parts: ${raw.length} > ${maxParts}`);
}
const parts: PartDefinition[] = raw.map((entry, index) => parsePartDefinitionEntry(entry, index));
ensureUniquePartIds(parts);
return parts;
}
function toMorePartsResponse(raw: unknown, maxAdditionalParts: number): MorePartsResponse {
if (typeof raw !== 'object' || raw == null || Array.isArray(raw)) {
throw new Error('Structured output must be an object');
}
const payload = raw as Record<string, unknown>;
if (typeof payload.done !== 'boolean') {
throw new Error('Structured output "done" must be a boolean');
}
if (typeof payload.reasoning !== 'string') {
throw new Error('Structured output "reasoning" must be a string');
}
if (!Array.isArray(payload.parts)) {
throw new Error('Structured output "parts" must be an array');
}
if (payload.parts.length > maxAdditionalParts) {
throw new Error(`Structured output produced too many parts: ${payload.parts.length} > ${maxAdditionalParts}`);
}
const parts: PartDefinition[] = payload.parts.map((entry, index) => parsePartDefinitionEntry(entry, index));
ensureUniquePartIds(parts);
return {
done: payload.done,
reasoning: payload.reasoning,
parts,
};
}
function summarizePartContent(content: string): string {
const maxLength = 2000;
if (content.length <= maxLength) {
return content;
}
return `${content.slice(0, maxLength)}\n...[truncated]`;
}
function buildDecomposePrompt(instruction: string, maxParts: number, language?: Language): string {
if (language === 'ja') {
return [
'以下はタスク分解専用の指示です。タスクを実行せず、分解だけを行ってください。',
'- ツールは使用しない',
`- パート数は 1 以上 ${maxParts} 以下`,
'- パートは互いに独立させる',
'',
'## 元タスク',
instruction,
].join('\n');
}
return [
'This is decomposition-only planning. Do not execute the task.',
'- Do not use any tool',
`- Produce between 1 and ${maxParts} independent parts`,
'- Keep each part self-contained',
'',
'## Original Task',
instruction,
].join('\n');
}
function buildMorePartsPrompt(
originalInstruction: string,
allResults: Array<{ id: string; title: string; status: string; content: string }>,
existingIds: string[],
maxAdditionalParts: number,
language?: Language,
): string {
const resultBlock = allResults.map((result) => [
`### ${result.id}: ${result.title} (${result.status})`,
summarizePartContent(result.content),
].join('\n')).join('\n\n');
if (language === 'ja') {
return [
'以下の実行結果を見て、追加のサブタスクが必要か判断してください。',
'- ツールは使用しない',
'',
'## 元タスク',
originalInstruction,
'',
'## 完了済みパート',
resultBlock || '(なし)',
'',
'## 判断ルール',
'- 追加作業が不要なら done=true にする',
'- 追加作業が必要なら parts に新しいパートを入れる',
'- 不足が複数ある場合は、可能な限り一括で複数パートを返す',
`- 既存IDは再利用しない: ${existingIds.join(', ') || '(なし)'}`,
`- 追加できる最大数: ${maxAdditionalParts}`,
].join('\n');
}
return [
'Review completed part results and decide whether additional parts are needed.',
'- Do not use any tool',
'',
'## Original Task',
originalInstruction,
'',
'## Completed Parts',
resultBlock || '(none)',
'',
'## Decision Rules',
'- Set done=true when no additional work is required',
'- If more work is needed, provide new parts in "parts"',
'- If multiple missing tasks are known, return multiple new parts in one batch when possible',
`- Do not reuse existing IDs: ${existingIds.join(', ') || '(none)'}`,
`- Maximum additional parts: ${maxAdditionalParts}`,
].join('\n');
}
export {
evaluateCondition,
judgeStatus,
type EvaluateConditionOptions,
type JudgeStatusOptions,
type JudgeStatusResult,
} from './judge-status-usecase.js';
export {
decomposeTask,
requestMoreParts,
type DecomposeTaskOptions,
type MorePartsResponse,
} from './decompose-task-usecase.js';
export async function executeAgent(
persona: string | undefined,
@ -175,175 +22,6 @@ export async function executeAgent(
): Promise<AgentResponse> {
return runAgent(persona, instruction, options);
}
export const generateReport = executeAgent;
export const executePart = executeAgent;
export async function evaluateCondition(
agentOutput: string,
conditions: Array<{ index: number; text: string }>,
options: EvaluateConditionOptions,
): Promise<number> {
const prompt = buildJudgePrompt(agentOutput, conditions);
const response = await runAgent(undefined, prompt, {
cwd: options.cwd,
maxTurns: 1,
permissionMode: 'readonly',
outputSchema: loadEvaluationSchema(),
});
if (response.status !== 'done') {
return -1;
}
const matchedIndex = response.structuredOutput?.matched_index;
if (typeof matchedIndex === 'number' && Number.isInteger(matchedIndex)) {
const zeroBased = matchedIndex - 1;
if (zeroBased >= 0 && zeroBased < conditions.length) {
return zeroBased;
}
}
return detectJudgeIndex(response.content);
}
export async function judgeStatus(
structuredInstruction: string,
tagInstruction: string,
rules: PieceRule[],
options: JudgeStatusOptions,
): Promise<JudgeStatusResult> {
if (rules.length === 0) {
throw new Error('judgeStatus requires at least one rule');
}
if (rules.length === 1) {
return { ruleIndex: 0, method: 'auto_select' };
}
const interactiveEnabled = options.interactive === true;
const isValidRuleIndex = (index: number): boolean => {
if (index < 0 || index >= rules.length) return false;
const rule = rules[index];
return !(rule?.interactiveOnly && !interactiveEnabled);
};
const agentOptions = {
cwd: options.cwd,
maxTurns: 3,
permissionMode: 'readonly' as const,
language: options.language,
onStream: options.onStream,
};
// Stage 1: Structured output
const structuredResponse = await runAgent('conductor', structuredInstruction, {
...agentOptions,
outputSchema: loadJudgmentSchema(),
});
if (structuredResponse.status === 'done') {
const stepNumber = structuredResponse.structuredOutput?.step;
if (typeof stepNumber === 'number' && Number.isInteger(stepNumber)) {
const ruleIndex = stepNumber - 1;
if (isValidRuleIndex(ruleIndex)) {
return { ruleIndex, method: 'structured_output' };
}
}
}
// Stage 2: Tag detection (dedicated call, no outputSchema)
const tagResponse = await runAgent('conductor', tagInstruction, agentOptions);
if (tagResponse.status === 'done') {
const tagRuleIndex = detectRuleIndex(tagResponse.content, options.movementName);
if (isValidRuleIndex(tagRuleIndex)) {
return { ruleIndex: tagRuleIndex, method: 'phase3_tag' };
}
}
// Stage 3: AI judge
const conditions = rules
.map((rule, index) => ({ rule, index }))
.filter(({ rule }) => interactiveEnabled || !rule.interactiveOnly)
.map(({ index, rule }) => ({ index, text: rule.condition }));
if (conditions.length > 0) {
const fallbackIndex = await evaluateCondition(structuredInstruction, conditions, { cwd: options.cwd });
if (fallbackIndex >= 0 && fallbackIndex < conditions.length) {
const originalIndex = conditions[fallbackIndex]?.index;
if (originalIndex !== undefined) {
return { ruleIndex: originalIndex, method: 'ai_judge' };
}
}
}
throw new Error(`Status not found for movement "${options.movementName}"`);
}
export async function decomposeTask(
instruction: string,
maxParts: number,
options: DecomposeTaskOptions,
): Promise<PartDefinition[]> {
const response = await runAgent(options.persona, buildDecomposePrompt(instruction, maxParts, options.language), {
cwd: options.cwd,
personaPath: options.personaPath,
language: options.language,
model: options.model,
provider: options.provider,
allowedTools: [],
permissionMode: 'readonly',
maxTurns: 4,
outputSchema: loadDecompositionSchema(maxParts),
onStream: options.onStream,
});
if (response.status !== 'done') {
const detail = response.error || response.content || response.status;
throw new Error(`Team leader failed: ${detail}`);
}
const parts = response.structuredOutput?.parts;
if (parts != null) {
return toPartDefinitions(parts, maxParts);
}
return parseParts(response.content, maxParts);
}
export async function requestMoreParts(
originalInstruction: string,
allResults: Array<{ id: string; title: string; status: string; content: string }>,
existingIds: string[],
maxAdditionalParts: number,
options: DecomposeTaskOptions,
): Promise<MorePartsResponse> {
const prompt = buildMorePartsPrompt(
originalInstruction,
allResults,
existingIds,
maxAdditionalParts,
options.language,
);
const response = await runAgent(options.persona, prompt, {
cwd: options.cwd,
personaPath: options.personaPath,
language: options.language,
model: options.model,
provider: options.provider,
allowedTools: [],
permissionMode: 'readonly',
maxTurns: 4,
outputSchema: loadMorePartsSchema(maxAdditionalParts),
onStream: options.onStream,
});
if (response.status !== 'done') {
const detail = response.error || response.content || response.status;
throw new Error(`Team leader feedback failed: ${detail}`);
}
return toMorePartsResponse(response.structuredOutput, maxAdditionalParts);
}

View File

@ -0,0 +1,222 @@
import type { Language, PartDefinition } from '../core/models/types.js';
import { runAgent, type StreamCallback } from './runner.js';
import { parseParts } from '../core/piece/engine/task-decomposer.js';
import { loadDecompositionSchema, loadMorePartsSchema } from '../infra/resources/schema-loader.js';
import { ensureUniquePartIds, parsePartDefinitionEntry } from '../core/piece/part-definition-validator.js';
export interface DecomposeTaskOptions {
cwd: string;
persona?: string;
personaPath?: string;
language?: Language;
model?: string;
provider?: 'claude' | 'codex' | 'opencode' | 'cursor' | 'copilot' | 'mock';
onStream?: StreamCallback;
onPromptResolved?: (promptParts: {
systemPrompt: string;
userInstruction: string;
}) => void;
}
export interface MorePartsResponse {
done: boolean;
reasoning: string;
parts: PartDefinition[];
}
function toPartDefinitions(raw: unknown, maxParts: number): PartDefinition[] {
if (!Array.isArray(raw)) {
throw new Error('Structured output "parts" must be an array');
}
if (raw.length === 0) {
throw new Error('Structured output "parts" must not be empty');
}
if (raw.length > maxParts) {
throw new Error(`Structured output produced too many parts: ${raw.length} > ${maxParts}`);
}
const parts: PartDefinition[] = raw.map((entry, index) => parsePartDefinitionEntry(entry, index));
ensureUniquePartIds(parts);
return parts;
}
function toMorePartsResponse(raw: unknown, maxAdditionalParts: number): MorePartsResponse {
if (typeof raw !== 'object' || raw == null || Array.isArray(raw)) {
throw new Error('Structured output must be an object');
}
const payload = raw as Record<string, unknown>;
if (typeof payload.done !== 'boolean') {
throw new Error('Structured output "done" must be a boolean');
}
if (typeof payload.reasoning !== 'string') {
throw new Error('Structured output "reasoning" must be a string');
}
if (!Array.isArray(payload.parts)) {
throw new Error('Structured output "parts" must be an array');
}
if (payload.parts.length > maxAdditionalParts) {
throw new Error(`Structured output produced too many parts: ${payload.parts.length} > ${maxAdditionalParts}`);
}
const parts: PartDefinition[] = payload.parts.map((entry, index) => parsePartDefinitionEntry(entry, index));
ensureUniquePartIds(parts);
return {
done: payload.done,
reasoning: payload.reasoning,
parts,
};
}
function summarizePartContent(content: string): string {
const maxLength = 2000;
if (content.length <= maxLength) {
return content;
}
return `${content.slice(0, maxLength)}\n...[truncated]`;
}
function buildDecomposePrompt(instruction: string, maxParts: number, language?: Language): string {
if (language === 'ja') {
return [
'以下はタスク分解専用の指示です。タスクを実行せず、分解だけを行ってください。',
'- ツールは使用しない',
`- パート数は 1 以上 ${maxParts} 以下`,
'- パートは互いに独立させる',
'',
'## 元タスク',
instruction,
].join('\n');
}
return [
'This is decomposition-only planning. Do not execute the task.',
'- Do not use any tool',
`- Produce between 1 and ${maxParts} independent parts`,
'- Keep each part self-contained',
'',
'## Original Task',
instruction,
].join('\n');
}
function buildMorePartsPrompt(
originalInstruction: string,
allResults: Array<{ id: string; title: string; status: string; content: string }>,
existingIds: string[],
maxAdditionalParts: number,
language?: Language,
): string {
const resultBlock = allResults.map((result) => [
`### ${result.id}: ${result.title} (${result.status})`,
summarizePartContent(result.content),
].join('\n')).join('\n\n');
if (language === 'ja') {
return [
'以下の実行結果を見て、追加のサブタスクが必要か判断してください。',
'- ツールは使用しない',
'',
'## 元タスク',
originalInstruction,
'',
'## 完了済みパート',
resultBlock || '(なし)',
'',
'## 判断ルール',
'- 追加作業が不要なら done=true にする',
'- 追加作業が必要なら parts に新しいパートを入れる',
'- 不足が複数ある場合は、可能な限り一括で複数パートを返す',
`- 既存IDは再利用しない: ${existingIds.join(', ') || '(なし)'}`,
`- 追加できる最大数: ${maxAdditionalParts}`,
].join('\n');
}
return [
'Review completed part results and decide whether additional parts are needed.',
'- Do not use any tool',
'',
'## Original Task',
originalInstruction,
'',
'## Completed Parts',
resultBlock || '(none)',
'',
'## Decision Rules',
'- Set done=true when no additional work is required',
'- If more work is needed, provide new parts in "parts"',
'- If multiple missing tasks are known, return multiple new parts in one batch when possible',
`- Do not reuse existing IDs: ${existingIds.join(', ') || '(none)'}`,
`- Maximum additional parts: ${maxAdditionalParts}`,
].join('\n');
}
export async function decomposeTask(
instruction: string,
maxParts: number,
options: DecomposeTaskOptions,
): Promise<PartDefinition[]> {
const response = await runAgent(options.persona, buildDecomposePrompt(instruction, maxParts, options.language), {
cwd: options.cwd,
personaPath: options.personaPath,
language: options.language,
model: options.model,
provider: options.provider,
allowedTools: [],
permissionMode: 'readonly',
maxTurns: 4,
outputSchema: loadDecompositionSchema(maxParts),
onStream: options.onStream,
onPromptResolved: options.onPromptResolved,
});
if (response.status !== 'done') {
const detail = response.error || response.content || response.status;
throw new Error(`Team leader failed: ${detail}`);
}
const parts = response.structuredOutput?.parts;
if (parts != null) {
return toPartDefinitions(parts, maxParts);
}
return parseParts(response.content, maxParts);
}
export async function requestMoreParts(
originalInstruction: string,
allResults: Array<{ id: string; title: string; status: string; content: string }>,
existingIds: string[],
maxAdditionalParts: number,
options: DecomposeTaskOptions,
): Promise<MorePartsResponse> {
const prompt = buildMorePartsPrompt(
originalInstruction,
allResults,
existingIds,
maxAdditionalParts,
options.language,
);
const response = await runAgent(options.persona, prompt, {
cwd: options.cwd,
personaPath: options.personaPath,
language: options.language,
model: options.model,
provider: options.provider,
allowedTools: [],
permissionMode: 'readonly',
maxTurns: 4,
outputSchema: loadMorePartsSchema(maxAdditionalParts),
onStream: options.onStream,
});
if (response.status !== 'done') {
const detail = response.error || response.content || response.status;
throw new Error(`Team leader feedback failed: ${detail}`);
}
return toMorePartsResponse(response.structuredOutput, maxAdditionalParts);
}

View File

@ -0,0 +1,184 @@
import type { PieceRule, RuleMatchMethod, Language } from '../core/models/types.js';
import { runAgent, type StreamCallback } from './runner.js';
import { detectJudgeIndex, buildJudgePrompt } from './judge-utils.js';
import { loadJudgmentSchema, loadEvaluationSchema } from '../infra/resources/schema-loader.js';
import { detectRuleIndex } from '../shared/utils/ruleIndex.js';
export interface JudgeStatusOptions {
cwd: string;
movementName: string;
language?: Language;
interactive?: boolean;
onStream?: StreamCallback;
onJudgeStage?: (entry: {
stage: 1 | 2 | 3;
method: 'structured_output' | 'phase3_tag' | 'ai_judge';
status: 'done' | 'error' | 'skipped';
instruction: string;
response: string;
}) => void;
onStructuredPromptResolved?: (promptParts: {
systemPrompt: string;
userInstruction: string;
}) => void;
}
export interface JudgeStatusResult {
ruleIndex: number;
method: RuleMatchMethod;
}
export interface EvaluateConditionOptions {
cwd: string;
onJudgeResponse?: (entry: {
instruction: string;
status: 'done' | 'error';
response: string;
}) => void;
}
export async function evaluateCondition(
agentOutput: string,
conditions: Array<{ index: number; text: string }>,
options: EvaluateConditionOptions,
): Promise<number> {
const prompt = buildJudgePrompt(agentOutput, conditions);
const response = await runAgent(undefined, prompt, {
cwd: options.cwd,
maxTurns: 1,
permissionMode: 'readonly',
outputSchema: loadEvaluationSchema(),
});
options.onJudgeResponse?.({
instruction: prompt,
status: response.status === 'done' ? 'done' : 'error',
response: response.content,
});
if (response.status !== 'done') {
return -1;
}
const matchedIndex = response.structuredOutput?.matched_index;
if (typeof matchedIndex === 'number' && Number.isInteger(matchedIndex)) {
const zeroBased = matchedIndex - 1;
if (zeroBased >= 0 && zeroBased < conditions.length) {
return zeroBased;
}
}
return detectJudgeIndex(response.content);
}
export async function judgeStatus(
structuredInstruction: string,
tagInstruction: string,
rules: PieceRule[],
options: JudgeStatusOptions,
): Promise<JudgeStatusResult> {
if (rules.length === 0) {
throw new Error('judgeStatus requires at least one rule');
}
if (rules.length === 1) {
return { ruleIndex: 0, method: 'auto_select' };
}
const interactiveEnabled = options.interactive === true;
const isValidRuleIndex = (index: number): boolean => {
if (index < 0 || index >= rules.length) return false;
const rule = rules[index];
return !(rule?.interactiveOnly && !interactiveEnabled);
};
const agentOptions = {
cwd: options.cwd,
maxTurns: 3,
permissionMode: 'readonly' as const,
language: options.language,
onStream: options.onStream,
};
const structuredResponse = await runAgent('conductor', structuredInstruction, {
...agentOptions,
outputSchema: loadJudgmentSchema(),
onPromptResolved: options.onStructuredPromptResolved,
});
options.onJudgeStage?.({
stage: 1,
method: 'structured_output',
status: structuredResponse.status === 'done' ? 'done' : 'error',
instruction: structuredInstruction,
response: structuredResponse.content,
});
if (structuredResponse.status === 'done') {
const stepNumber = structuredResponse.structuredOutput?.step;
if (typeof stepNumber === 'number' && Number.isInteger(stepNumber)) {
const ruleIndex = stepNumber - 1;
if (isValidRuleIndex(ruleIndex)) {
return { ruleIndex, method: 'structured_output' };
}
}
}
const tagResponse = await runAgent('conductor', tagInstruction, agentOptions);
options.onJudgeStage?.({
stage: 2,
method: 'phase3_tag',
status: tagResponse.status === 'done' ? 'done' : 'error',
instruction: tagInstruction,
response: tagResponse.content,
});
if (tagResponse.status === 'done') {
const tagRuleIndex = detectRuleIndex(tagResponse.content, options.movementName);
if (isValidRuleIndex(tagRuleIndex)) {
return { ruleIndex: tagRuleIndex, method: 'phase3_tag' };
}
}
const conditions = rules
.map((rule, index) => ({ rule, index }))
.filter(({ rule }) => interactiveEnabled || !rule.interactiveOnly)
.map(({ index, rule }) => ({ index, text: rule.condition }));
if (conditions.length > 0) {
let stage3Status: 'done' | 'error' | 'skipped' = 'skipped';
let stage3Instruction = '';
let stage3Response = '';
const fallbackIndex = await evaluateCondition(structuredInstruction, conditions, {
cwd: options.cwd,
onJudgeResponse: (entry) => {
stage3Status = entry.status;
stage3Instruction = entry.instruction;
stage3Response = entry.response;
},
});
if (stage3Status === 'skipped' || stage3Instruction === '') {
throw new Error(`AI judge response missing for movement "${options.movementName}"`);
}
options.onJudgeStage?.({
stage: 3,
method: 'ai_judge',
status: stage3Status,
instruction: stage3Instruction,
response: stage3Response,
});
if (fallbackIndex >= 0 && fallbackIndex < conditions.length) {
const originalIndex = conditions[fallbackIndex]?.index;
if (originalIndex !== undefined) {
return { ruleIndex: originalIndex, method: 'ai_judge' };
}
}
}
throw new Error(`Status not found for movement "${options.movementName}"`);
}

View File

@ -158,11 +158,18 @@ export class AgentRunner {
const providerType = resolved.provider;
const provider = getProvider(providerType);
const resolvedSystemPrompt = agentConfig.claudeAgent || agentConfig.claudeSkill
? undefined
: loadAgentPrompt(agentConfig, options.cwd);
options.onPromptResolved?.({
systemPrompt: resolvedSystemPrompt ?? '',
userInstruction: task,
});
const agent = provider.setup({
name: agentConfig.name,
systemPrompt: agentConfig.claudeAgent || agentConfig.claudeSkill
? undefined
: loadAgentPrompt(agentConfig, options.cwd),
systemPrompt: resolvedSystemPrompt,
claudeAgent: agentConfig.claudeAgent,
claudeSkill: agentConfig.claudeSkill,
});
@ -223,6 +230,10 @@ export class AgentRunner {
}
const systemPrompt = loadTemplate('perform_agent_system_prompt', language, templateVars);
options.onPromptResolved?.({
systemPrompt,
userInstruction: task,
});
const agent = provider.setup({ name: personaName, systemPrompt });
return agent.call(task, callOptions);
}
@ -236,11 +247,19 @@ export class AgentRunner {
return this.runCustom(agentConfig, task, options);
}
options.onPromptResolved?.({
systemPrompt: personaSpec,
userInstruction: task,
});
const agent = provider.setup({ name: personaName, systemPrompt: personaSpec });
return agent.call(task, callOptions);
}
// 3. No persona specified — run with instruction_template only (no system prompt)
options.onPromptResolved?.({
systemPrompt: '',
userInstruction: task,
});
const agent = provider.setup({ name: personaName });
return agent.call(task, callOptions);
}

View File

@ -46,4 +46,8 @@ export interface RunAgentOptions {
currentPosition: string;
};
outputSchema?: Record<string, unknown>;
onPromptResolved?: (promptParts: {
systemPrompt: string;
userInstruction: string;
}) => void;
}

View File

@ -22,7 +22,7 @@ import { incrementMovementIteration } from './state-manager.js';
import { createLogger } from '../../../shared/utils/index.js';
import type { OptionsBuilder } from './OptionsBuilder.js';
import type { MovementExecutor } from './MovementExecutor.js';
import type { PhaseName } from '../types.js';
import type { PhaseName, PhasePromptParts } from '../types.js';
const log = createLogger('arpeggio-runner');
@ -37,8 +37,25 @@ export interface ArpeggioRunnerDeps {
conditions: Array<{ index: number; text: string }>,
options: { cwd: string }
) => Promise<number>;
readonly onPhaseStart?: (step: PieceMovement, phase: 1 | 2 | 3, phaseName: PhaseName, instruction: string) => void;
readonly onPhaseComplete?: (step: PieceMovement, phase: 1 | 2 | 3, phaseName: PhaseName, content: string, status: string, error?: string) => void;
readonly onPhaseStart?: (
step: PieceMovement,
phase: 1 | 2 | 3,
phaseName: PhaseName,
instruction: string,
promptParts: PhasePromptParts,
phaseExecutionId?: string,
iteration?: number,
) => void;
readonly onPhaseComplete?: (
step: PieceMovement,
phase: 1 | 2 | 3,
phaseName: PhaseName,
content: string,
status: string,
error?: string,
phaseExecutionId?: string,
iteration?: number,
) => void;
}
/**
@ -185,6 +202,8 @@ export class ArpeggioRunner {
batches,
template,
step,
movementIteration,
state.iteration,
agentOptions,
arpeggioConfig,
semaphore,
@ -244,6 +263,8 @@ export class ArpeggioRunner {
batches: readonly DataBatch[],
template: string,
step: PieceMovement,
movementIteration: number,
iteration: number,
agentOptions: RunAgentOptions,
config: ArpeggioMovementConfig,
semaphore: Semaphore,
@ -251,20 +272,34 @@ export class ArpeggioRunner {
const promises = batches.map(async (batch) => {
await semaphore.acquire();
try {
this.deps.onPhaseStart?.(step, 1, 'execute', `[Arpeggio batch ${batch.batchIndex + 1}/${batch.totalBatches}]`);
let didEmitPhaseStart = false;
const phaseExecutionId = `${step.name}:1:${movementIteration}:${batch.batchIndex}`;
const batchAgentOptions: RunAgentOptions = {
...agentOptions,
onPromptResolved: (promptParts) => {
if (didEmitPhaseStart) return;
this.deps.onPhaseStart?.(step, 1, 'execute', promptParts.userInstruction, promptParts, phaseExecutionId, iteration);
didEmitPhaseStart = true;
},
};
const result = await executeBatchWithRetry(
batch,
template,
step.persona,
agentOptions,
batchAgentOptions,
config.maxRetries,
config.retryDelayMs,
);
if (!didEmitPhaseStart) {
throw new Error(`Missing prompt parts for phase start: ${step.name}:1`);
}
this.deps.onPhaseComplete?.(
step, 1, 'execute',
result.content,
result.success ? 'done' : 'error',
result.error,
phaseExecutionId,
iteration,
);
return result;
} finally {

View File

@ -14,7 +14,7 @@ import type {
AgentResponse,
Language,
} from '../../models/types.js';
import type { PhaseName } from '../types.js';
import type { PhaseName, PhasePromptParts, JudgeStageEntry } from '../types.js';
import { executeAgent } from '../../../agents/agent-usecases.js';
import { InstructionBuilder } from '../instruction/InstructionBuilder.js';
import { needsStatusJudgmentPhase, runReportPhase, runStatusJudgmentPhase } from '../phase-runner.js';
@ -45,8 +45,33 @@ export interface MovementExecutorDeps {
conditions: Array<{ index: number; text: string }>,
options: { cwd: string }
) => Promise<number>;
readonly onPhaseStart?: (step: PieceMovement, phase: 1 | 2 | 3, phaseName: PhaseName, instruction: string) => void;
readonly onPhaseComplete?: (step: PieceMovement, phase: 1 | 2 | 3, phaseName: PhaseName, content: string, status: string, error?: string) => void;
readonly onPhaseStart?: (
step: PieceMovement,
phase: 1 | 2 | 3,
phaseName: PhaseName,
instruction: string,
promptParts: PhasePromptParts,
phaseExecutionId?: string,
iteration?: number,
) => void;
readonly onPhaseComplete?: (
step: PieceMovement,
phase: 1 | 2 | 3,
phaseName: PhaseName,
content: string,
status: string,
error?: string,
phaseExecutionId?: string,
iteration?: number,
) => void;
readonly onJudgeStage?: (
step: PieceMovement,
phase: 3,
phaseName: 'judge',
entry: JudgeStageEntry,
phaseExecutionId?: string,
iteration?: number,
) => void;
}
export class MovementExecutor {
@ -197,6 +222,8 @@ export class MovementExecutor {
updatePersonaSession,
this.deps.onPhaseStart,
this.deps.onPhaseComplete,
this.deps.onJudgeStage,
state.iteration,
);
// Phase 2: report output (resume same session, Write only)
@ -276,11 +303,21 @@ export class MovementExecutor {
});
// Phase 1: main execution (Write excluded if movement has report)
this.deps.onPhaseStart?.(step, 1, 'execute', instruction);
const agentOptions = this.deps.optionsBuilder.buildAgentOptions(step);
let didEmitPhaseStart = false;
const baseAgentOptions = this.deps.optionsBuilder.buildAgentOptions(step);
const agentOptions = {
...baseAgentOptions,
onPromptResolved: (promptParts: PhasePromptParts) => {
this.deps.onPhaseStart?.(step, 1, 'execute', instruction, promptParts, undefined, state.iteration);
didEmitPhaseStart = true;
},
};
let response = await executeAgent(step.persona, instruction, agentOptions);
if (!didEmitPhaseStart) {
throw new Error(`Missing prompt parts for phase start: ${step.name}:1`);
}
updatePersonaSession(sessionKey, response.sessionId);
this.deps.onPhaseComplete?.(step, 1, 'execute', response.content, response.status, response.error);
this.deps.onPhaseComplete?.(step, 1, 'execute', response.content, response.status, response.error, undefined, state.iteration);
// Provider failures should abort immediately.
if (response.status === 'error') {

View File

@ -3,7 +3,7 @@ import type { PieceMovement, PieceState, Language } from '../../models/types.js'
import type { MovementProviderOptions } from '../../models/piece-types.js';
import type { RunAgentOptions } from '../../../agents/runner.js';
import type { PhaseRunnerContext } from '../phase-runner.js';
import type { PieceEngineOptions, PhaseName, MovementProviderInfo } from '../types.js';
import type { PieceEngineOptions, PhaseName, MovementProviderInfo, PhasePromptParts, JudgeStageEntry } from '../types.js';
import { buildSessionKey } from '../session-key.js';
import { resolveMovementProviderModel } from '../provider-resolution.js';
@ -158,8 +158,34 @@ export class OptionsBuilder {
state: PieceState,
lastResponse: string | undefined,
updatePersonaSession: (persona: string, sessionId: string | undefined) => void,
onPhaseStart?: (step: PieceMovement, phase: 1 | 2 | 3, phaseName: PhaseName, instruction: string) => void,
onPhaseComplete?: (step: PieceMovement, phase: 1 | 2 | 3, phaseName: PhaseName, content: string, status: string, error?: string) => void,
onPhaseStart?: (
step: PieceMovement,
phase: 1 | 2 | 3,
phaseName: PhaseName,
instruction: string,
promptParts: PhasePromptParts,
phaseExecutionId?: string,
iteration?: number,
) => void,
onPhaseComplete?: (
step: PieceMovement,
phase: 1 | 2 | 3,
phaseName: PhaseName,
content: string,
status: string,
error?: string,
phaseExecutionId?: string,
iteration?: number,
) => void,
onJudgeStage?: (
step: PieceMovement,
phase: 3,
phaseName: 'judge',
entry: JudgeStageEntry,
phaseExecutionId?: string,
iteration?: number,
) => void,
iteration?: number,
): PhaseRunnerContext {
return {
cwd: this.getCwd(),
@ -174,6 +200,8 @@ export class OptionsBuilder {
updatePersonaSession,
onPhaseStart,
onPhaseComplete,
onJudgeStage,
iteration,
};
}
}

View File

@ -19,7 +19,7 @@ import { createLogger, getErrorMessage } from '../../../shared/utils/index.js';
import { buildSessionKey } from '../session-key.js';
import type { OptionsBuilder } from './OptionsBuilder.js';
import type { MovementExecutor } from './MovementExecutor.js';
import type { PieceEngineOptions, PhaseName } from '../types.js';
import type { PieceEngineOptions, PhaseName, PhasePromptParts, JudgeStageEntry } from '../types.js';
import type { ParallelLoggerOptions } from './parallel-logger.js';
const log = createLogger('parallel-runner');
@ -37,8 +37,33 @@ export interface ParallelRunnerDeps {
conditions: Array<{ index: number; text: string }>,
options: { cwd: string }
) => Promise<number>;
readonly onPhaseStart?: (step: PieceMovement, phase: 1 | 2 | 3, phaseName: PhaseName, instruction: string) => void;
readonly onPhaseComplete?: (step: PieceMovement, phase: 1 | 2 | 3, phaseName: PhaseName, content: string, status: string, error?: string) => void;
readonly onPhaseStart?: (
step: PieceMovement,
phase: 1 | 2 | 3,
phaseName: PhaseName,
instruction: string,
promptParts: PhasePromptParts,
phaseExecutionId?: string,
iteration?: number,
) => void;
readonly onPhaseComplete?: (
step: PieceMovement,
phase: 1 | 2 | 3,
phaseName: PhaseName,
content: string,
status: string,
error?: string,
phaseExecutionId?: string,
iteration?: number,
) => void;
readonly onJudgeStage?: (
step: PieceMovement,
phase: 3,
phaseName: 'judge',
entry: JudgeStageEntry,
phaseExecutionId?: string,
iteration?: number,
) => void;
}
export class ParallelRunner {
@ -86,6 +111,7 @@ export class ParallelRunner {
subMovements.map(async (subMovement, index) => {
const subIteration = incrementMovementIteration(state, subMovement.name);
const subInstruction = this.deps.movementExecutor.buildInstruction(subMovement, subIteration, state, task, maxMovements);
const parentIteration = state.iteration;
// Session key uses buildSessionKey (persona:provider) — same as normal movements.
// This ensures sessions are shared across movements with the same persona+provider,
@ -94,19 +120,33 @@ export class ParallelRunner {
// Phase 1: main execution (Write excluded if sub-movement has report)
const baseOptions = this.deps.optionsBuilder.buildAgentOptions(subMovement);
let didEmitPhaseStart = false;
// Override onStream with parallel logger's prefixed handler (immutable)
const agentOptions = parallelLogger
? { ...baseOptions, onStream: parallelLogger.createStreamHandler(subMovement.name, index) }
: baseOptions;
this.deps.onPhaseStart?.(subMovement, 1, 'execute', subInstruction);
: { ...baseOptions };
agentOptions.onPromptResolved = (promptParts: PhasePromptParts) => {
this.deps.onPhaseStart?.(subMovement, 1, 'execute', subInstruction, promptParts, undefined, parentIteration);
didEmitPhaseStart = true;
};
const subResponse = await executeAgent(subMovement.persona, subInstruction, agentOptions);
if (!didEmitPhaseStart) {
throw new Error(`Missing prompt parts for phase start: ${subMovement.name}:1`);
}
updatePersonaSession(subSessionKey, subResponse.sessionId);
this.deps.onPhaseComplete?.(subMovement, 1, 'execute', subResponse.content, subResponse.status, subResponse.error);
this.deps.onPhaseComplete?.(subMovement, 1, 'execute', subResponse.content, subResponse.status, subResponse.error, undefined, parentIteration);
// Phase 2/3 context — no overrides needed, phase-runner uses buildSessionKey internally
const phaseCtx = this.deps.optionsBuilder.buildPhaseRunnerContext(state, subResponse.content, updatePersonaSession, this.deps.onPhaseStart, this.deps.onPhaseComplete);
const phaseCtx = this.deps.optionsBuilder.buildPhaseRunnerContext(
state,
subResponse.content,
updatePersonaSession,
this.deps.onPhaseStart,
this.deps.onPhaseComplete,
this.deps.onJudgeStage,
parentIteration,
);
// Phase 2: report output for sub-movement
if (subMovement.outputContracts && subMovement.outputContracts.length > 0) {

View File

@ -128,11 +128,26 @@ export class PieceEngine extends EventEmitter {
getRetryNote: () => this.options.retryNote,
detectRuleIndex: this.detectRuleIndex,
callAiJudge: this.callAiJudge,
onPhaseStart: (step, phase, phaseName, instruction) => {
this.emit('phase:start', step, phase, phaseName, instruction);
onPhaseStart: (step, phase, phaseName, instruction, promptParts, phaseExecutionId, iteration) => {
if (phaseExecutionId == null && iteration == null) {
this.emit('phase:start', step, phase, phaseName, instruction, promptParts);
return;
}
this.emit('phase:start', step, phase, phaseName, instruction, promptParts, phaseExecutionId, iteration);
},
onPhaseComplete: (step, phase, phaseName, content, phaseStatus, error) => {
onPhaseComplete: (step, phase, phaseName, content, phaseStatus, error, phaseExecutionId, iteration) => {
if (phaseExecutionId == null && iteration == null) {
this.emit('phase:complete', step, phase, phaseName, content, phaseStatus, error);
return;
}
this.emit('phase:complete', step, phase, phaseName, content, phaseStatus, error, phaseExecutionId, iteration);
},
onJudgeStage: (step, phase, phaseName, entry, phaseExecutionId, iteration) => {
if (phaseExecutionId == null && iteration == null) {
this.emit('phase:judge_stage', step, phase, phaseName, entry);
return;
}
this.emit('phase:judge_stage', step, phase, phaseName, entry, phaseExecutionId, iteration);
},
});
@ -145,11 +160,26 @@ export class PieceEngine extends EventEmitter {
getInteractive: () => this.options.interactive === true,
detectRuleIndex: this.detectRuleIndex,
callAiJudge: this.callAiJudge,
onPhaseStart: (step, phase, phaseName, instruction) => {
this.emit('phase:start', step, phase, phaseName, instruction);
onPhaseStart: (step, phase, phaseName, instruction, promptParts, phaseExecutionId, iteration) => {
if (phaseExecutionId == null && iteration == null) {
this.emit('phase:start', step, phase, phaseName, instruction, promptParts);
return;
}
this.emit('phase:start', step, phase, phaseName, instruction, promptParts, phaseExecutionId, iteration);
},
onPhaseComplete: (step, phase, phaseName, content, phaseStatus, error) => {
onPhaseComplete: (step, phase, phaseName, content, phaseStatus, error, phaseExecutionId, iteration) => {
if (phaseExecutionId == null && iteration == null) {
this.emit('phase:complete', step, phase, phaseName, content, phaseStatus, error);
return;
}
this.emit('phase:complete', step, phase, phaseName, content, phaseStatus, error, phaseExecutionId, iteration);
},
onJudgeStage: (step, phase, phaseName, entry, phaseExecutionId, iteration) => {
if (phaseExecutionId == null && iteration == null) {
this.emit('phase:judge_stage', step, phase, phaseName, entry);
return;
}
this.emit('phase:judge_stage', step, phase, phaseName, entry, phaseExecutionId, iteration);
},
});
@ -160,11 +190,19 @@ export class PieceEngine extends EventEmitter {
getInteractive: () => this.options.interactive === true,
detectRuleIndex: this.detectRuleIndex,
callAiJudge: this.callAiJudge,
onPhaseStart: (step, phase, phaseName, instruction) => {
this.emit('phase:start', step, phase, phaseName, instruction);
onPhaseStart: (step, phase, phaseName, instruction, promptParts, phaseExecutionId, iteration) => {
if (phaseExecutionId == null && iteration == null) {
this.emit('phase:start', step, phase, phaseName, instruction, promptParts);
return;
}
this.emit('phase:start', step, phase, phaseName, instruction, promptParts, phaseExecutionId, iteration);
},
onPhaseComplete: (step, phase, phaseName, content, phaseStatus, error) => {
onPhaseComplete: (step, phase, phaseName, content, phaseStatus, error, phaseExecutionId, iteration) => {
if (phaseExecutionId == null && iteration == null) {
this.emit('phase:complete', step, phase, phaseName, content, phaseStatus, error);
return;
}
this.emit('phase:complete', step, phase, phaseName, content, phaseStatus, error, phaseExecutionId, iteration);
},
});
@ -176,11 +214,19 @@ export class PieceEngine extends EventEmitter {
getInteractive: () => this.options.interactive === true,
detectRuleIndex: this.detectRuleIndex,
callAiJudge: this.callAiJudge,
onPhaseStart: (step, phase, phaseName, instruction) => {
this.emit('phase:start', step, phase, phaseName, instruction);
onPhaseStart: (step, phase, phaseName, instruction, promptParts, phaseExecutionId, iteration) => {
if (phaseExecutionId == null && iteration == null) {
this.emit('phase:start', step, phase, phaseName, instruction, promptParts);
return;
}
this.emit('phase:start', step, phase, phaseName, instruction, promptParts, phaseExecutionId, iteration);
},
onPhaseComplete: (step, phase, phaseName, content, phaseStatus, error) => {
onPhaseComplete: (step, phase, phaseName, content, phaseStatus, error, phaseExecutionId, iteration) => {
if (phaseExecutionId == null && iteration == null) {
this.emit('phase:complete', step, phase, phaseName, content, phaseStatus, error);
return;
}
this.emit('phase:complete', step, phase, phaseName, content, phaseStatus, error, phaseExecutionId, iteration);
},
});

View File

@ -17,7 +17,7 @@ import { createPartMovement, resolvePartErrorDetail, summarizeParts } from './te
import { buildTeamLeaderParallelLoggerOptions, emitTeamLeaderProgressHint } from './team-leader-streaming.js';
import type { OptionsBuilder } from './OptionsBuilder.js';
import type { MovementExecutor } from './MovementExecutor.js';
import type { PieceEngineOptions, PhaseName } from '../types.js';
import type { PieceEngineOptions, PhaseName, PhasePromptParts } from '../types.js';
const log = createLogger('team-leader-runner');
const MAX_TOTAL_PARTS = 20;
@ -34,8 +34,25 @@ export interface TeamLeaderRunnerDeps {
conditions: Array<{ index: number; text: string }>,
options: { cwd: string }
) => Promise<number>;
readonly onPhaseStart?: (step: PieceMovement, phase: 1 | 2 | 3, phaseName: PhaseName, instruction: string) => void;
readonly onPhaseComplete?: (step: PieceMovement, phase: 1 | 2 | 3, phaseName: PhaseName, content: string, status: string, error?: string) => void;
readonly onPhaseStart?: (
step: PieceMovement,
phase: 1 | 2 | 3,
phaseName: PhaseName,
instruction: string,
promptParts: PhasePromptParts,
phaseExecutionId?: string,
iteration?: number,
) => void;
readonly onPhaseComplete?: (
step: PieceMovement,
phase: 1 | 2 | 3,
phaseName: PhaseName,
content: string,
status: string,
error?: string,
phaseExecutionId?: string,
iteration?: number,
) => void;
}
export class TeamLeaderRunner {
@ -54,6 +71,7 @@ export class TeamLeaderRunner {
throw new Error(`Movement "${step.name}" has no teamLeader configuration`);
}
const teamLeaderConfig = step.teamLeader;
const parentIteration = state.iteration;
const movementIteration = incrementMovementIteration(state, step.name);
const leaderStep: PieceMovement = {
@ -72,7 +90,7 @@ export class TeamLeaderRunner {
);
emitTeamLeaderProgressHint(this.deps.engineOptions, 'decompose');
this.deps.onPhaseStart?.(leaderStep, 1, 'execute', instruction);
let didEmitPhaseStart = false;
const parts = await decomposeTask(instruction, teamLeaderConfig.maxParts, {
cwd: this.deps.getCwd(),
persona: leaderStep.persona,
@ -80,14 +98,21 @@ export class TeamLeaderRunner {
model: leaderModel,
provider: leaderProvider,
onStream: this.deps.engineOptions.onStream,
onPromptResolved: (promptParts) => {
this.deps.onPhaseStart?.(leaderStep, 1, 'execute', promptParts.userInstruction, promptParts, undefined, parentIteration);
didEmitPhaseStart = true;
},
});
if (!didEmitPhaseStart) {
throw new Error(`Missing prompt parts for phase start: ${leaderStep.name}:1`);
}
const leaderResponse: AgentResponse = {
persona: leaderStep.persona ?? leaderStep.name,
status: 'done',
content: JSON.stringify({ parts }, null, 2),
timestamp: new Date(),
};
this.deps.onPhaseComplete?.(leaderStep, 1, 'execute', leaderResponse.content, leaderResponse.status, leaderResponse.error);
this.deps.onPhaseComplete?.(leaderStep, 1, 'execute', leaderResponse.content, leaderResponse.status, leaderResponse.error, undefined, parentIteration);
log.debug('Team leader decomposed parts', {
movement: step.name,
partCount: parts.length,

View File

@ -8,7 +8,7 @@
import { existsSync, mkdirSync, readFileSync, writeFileSync } from 'node:fs';
import { dirname, resolve, sep } from 'node:path';
import type { PieceMovement, Language, AgentResponse } from '../models/types.js';
import type { PhaseName } from './types.js';
import type { PhaseName, PhasePromptParts, JudgeStageEntry } from './types.js';
import type { RunAgentOptions } from '../../agents/runner.js';
import { ReportInstructionBuilder } from './instruction/ReportInstructionBuilder.js';
import { hasTagBasedRules, getReportFiles } from './evaluation/rule-utils.js';
@ -33,6 +33,8 @@ export interface PhaseRunnerContext {
interactive?: boolean;
/** Last response from Phase 1 */
lastResponse?: string;
/** Parent piece iteration for sub-movement phase events */
iteration?: number;
/** Get persona session ID */
getSessionId: (persona: string) => string | undefined;
/** Build resume options for a movement */
@ -44,9 +46,35 @@ export interface PhaseRunnerContext {
/** Stream callback for provider event logging (passed to judgeStatus) */
onStream?: import('../../agents/types.js').StreamCallback;
/** Callback for phase lifecycle logging */
onPhaseStart?: (step: PieceMovement, phase: 1 | 2 | 3, phaseName: PhaseName, instruction: string) => void;
onPhaseStart?: (
step: PieceMovement,
phase: 1 | 2 | 3,
phaseName: PhaseName,
instruction: string,
promptParts: PhasePromptParts,
phaseExecutionId?: string,
iteration?: number,
) => void;
/** Callback for phase completion logging */
onPhaseComplete?: (step: PieceMovement, phase: 1 | 2 | 3, phaseName: PhaseName, content: string, status: string, error?: string) => void;
onPhaseComplete?: (
step: PieceMovement,
phase: 1 | 2 | 3,
phaseName: PhaseName,
content: string,
status: string,
error?: string,
phaseExecutionId?: string,
iteration?: number,
) => void;
/** Callback for Phase 3 internal stage logging */
onJudgeStage?: (
step: PieceMovement,
phase: 3,
phaseName: 'judge',
entry: JudgeStageEntry,
phaseExecutionId?: string,
iteration?: number,
) => void;
}
/**
@ -207,35 +235,45 @@ async function runSingleReportAttempt(
options: RunAgentOptions,
ctx: PhaseRunnerContext,
): Promise<ReportAttemptResult> {
ctx.onPhaseStart?.(step, 2, 'report', instruction);
let didEmitPhaseStart = false;
const callOptions: RunAgentOptions = {
...options,
onPromptResolved: (promptParts) => {
ctx.onPhaseStart?.(step, 2, 'report', instruction, promptParts, undefined, ctx.iteration);
didEmitPhaseStart = true;
},
};
let response: AgentResponse;
try {
response = await executeAgent(step.persona, instruction, options);
response = await executeAgent(step.persona, instruction, callOptions);
if (!didEmitPhaseStart) {
throw new Error(`Missing prompt parts for phase start: ${step.name}:2`);
}
} catch (error) {
const errorMsg = error instanceof Error ? error.message : String(error);
ctx.onPhaseComplete?.(step, 2, 'report', '', 'error', errorMsg);
ctx.onPhaseComplete?.(step, 2, 'report', '', 'error', errorMsg, undefined, ctx.iteration);
throw error;
}
if (response.status === 'blocked') {
ctx.onPhaseComplete?.(step, 2, 'report', response.content, response.status);
ctx.onPhaseComplete?.(step, 2, 'report', response.content, response.status, undefined, undefined, ctx.iteration);
return { kind: 'blocked', response };
}
if (response.status !== 'done') {
const errorMessage = response.error || response.content || 'Unknown error';
ctx.onPhaseComplete?.(step, 2, 'report', response.content, response.status, errorMessage);
ctx.onPhaseComplete?.(step, 2, 'report', response.content, response.status, errorMessage, undefined, ctx.iteration);
return { kind: 'retryable_failure', errorMessage };
}
const trimmedContent = response.content.trim();
if (trimmedContent.length === 0) {
const errorMessage = 'Report output is empty';
ctx.onPhaseComplete?.(step, 2, 'report', response.content, 'error', errorMessage);
ctx.onPhaseComplete?.(step, 2, 'report', response.content, 'error', errorMessage, undefined, ctx.iteration);
return { kind: 'retryable_failure', errorMessage };
}
ctx.onPhaseComplete?.(step, 2, 'report', response.content, response.status);
ctx.onPhaseComplete?.(step, 2, 'report', response.content, response.status, undefined, undefined, ctx.iteration);
return { kind: 'success', content: trimmedContent, response };
}

View File

@ -6,6 +6,7 @@ import { StatusJudgmentBuilder, type StatusJudgmentContext } from './instruction
import { getJudgmentReportFiles } from './evaluation/rule-utils.js';
import { createLogger } from '../../shared/utils/index.js';
import type { PhaseRunnerContext } from './phase-runner.js';
import { buildPhaseExecutionId } from '../../shared/utils/phaseExecutionId.js';
const log = createLogger('phase-runner');
@ -85,8 +86,29 @@ export async function runStatusJudgmentPhase(
const tagInstruction = new StatusJudgmentBuilder(step, {
...baseContext,
}).build();
if (!ctx.iteration || !Number.isInteger(ctx.iteration) || ctx.iteration <= 0) {
throw new Error(`Status judgment requires iteration for movement "${step.name}"`);
}
const phaseExecutionId = buildPhaseExecutionId({
step: step.name,
iteration: ctx.iteration,
phase: 3,
sequence: 1,
});
let didEmitPhaseStart = false;
const emitPhaseStart = (promptParts: { systemPrompt: string; userInstruction: string }): void => {
ctx.onPhaseStart?.(step, 3, 'judge', structuredInstruction, promptParts, phaseExecutionId, ctx.iteration);
didEmitPhaseStart = true;
};
if (step.rules.length === 1) {
emitPhaseStart({
systemPrompt: '',
userInstruction: structuredInstruction,
});
}
ctx.onPhaseStart?.(step, 3, 'judge', structuredInstruction);
try {
const result = await judgeStatus(structuredInstruction, tagInstruction, step.rules, {
cwd: ctx.cwd,
@ -94,13 +116,24 @@ export async function runStatusJudgmentPhase(
language: ctx.language,
interactive: ctx.interactive,
onStream: ctx.onStream,
onStructuredPromptResolved: (promptParts) => {
if (!didEmitPhaseStart) {
emitPhaseStart(promptParts);
}
},
onJudgeStage: (entry) => {
ctx.onJudgeStage?.(step, 3, 'judge', entry, phaseExecutionId, ctx.iteration);
},
});
if (!didEmitPhaseStart) {
throw new Error(`Missing prompt parts for phase start: ${step.name}:3`);
}
const tag = `[${step.name.toUpperCase()}:${result.ruleIndex + 1}]`;
ctx.onPhaseComplete?.(step, 3, 'judge', tag, 'done');
ctx.onPhaseComplete?.(step, 3, 'judge', tag, 'done', undefined, phaseExecutionId, ctx.iteration);
return { tag, ruleIndex: result.ruleIndex, method: result.method };
} catch (error) {
const errorMsg = error instanceof Error ? error.message : String(error);
ctx.onPhaseComplete?.(step, 3, 'judge', '', 'error', errorMsg);
ctx.onPhaseComplete?.(step, 3, 'judge', '', 'error', errorMsg, phaseExecutionId, ctx.iteration);
throw error;
}
}

View File

@ -78,6 +78,19 @@ export type AiJudgeCaller = (
export type PhaseName = 'execute' | 'report' | 'judge';
export interface PhasePromptParts {
systemPrompt: string;
userInstruction: string;
}
export interface JudgeStageEntry {
stage: 1 | 2 | 3;
method: 'structured_output' | 'phase3_tag' | 'ai_judge';
status: 'done' | 'error' | 'skipped';
instruction: string;
response: string;
}
/** Provider and model info resolved for a movement */
export interface MovementProviderInfo {
provider: ProviderType | undefined;
@ -91,8 +104,33 @@ export interface PieceEvents {
'movement:report': (step: PieceMovement, filePath: string, fileName: string) => void;
'movement:blocked': (step: PieceMovement, response: AgentResponse) => void;
'movement:user_input': (step: PieceMovement, userInput: string) => void;
'phase:start': (step: PieceMovement, phase: 1 | 2 | 3, phaseName: PhaseName, instruction: string) => void;
'phase:complete': (step: PieceMovement, phase: 1 | 2 | 3, phaseName: PhaseName, content: string, status: string, error?: string) => void;
'phase:start': (
step: PieceMovement,
phase: 1 | 2 | 3,
phaseName: PhaseName,
instruction: string,
promptParts: PhasePromptParts,
phaseExecutionId?: string,
iteration?: number,
) => void;
'phase:complete': (
step: PieceMovement,
phase: 1 | 2 | 3,
phaseName: PhaseName,
content: string,
status: string,
error?: string,
phaseExecutionId?: string,
iteration?: number,
) => void;
'phase:judge_stage': (
step: PieceMovement,
phase: 3,
phaseName: 'judge',
entry: JudgeStageEntry,
phaseExecutionId?: string,
iteration?: number,
) => void;
'piece:complete': (state: PieceState) => void;
'piece:abort': (state: PieceState, reason: string) => void;
'iteration:limit': (iteration: number, maxMovements: number) => void;

View File

@ -11,7 +11,7 @@ import { isQuietMode } from '../../../shared/context.js';
import { StreamDisplay } from '../../../shared/ui/index.js';
import { TaskPrefixWriter } from '../../../shared/ui/TaskPrefixWriter.js';
import { generateSessionId, createSessionLog, finalizeSessionLog, initNdjsonLog } from '../../../infra/fs/index.js';
import { createLogger, notifySuccess, notifyError, preventSleep, generateReportDir, isValidReportDirName } from '../../../shared/utils/index.js';
import { createLogger, notifySuccess, notifyError, preventSleep, generateReportDir, isValidReportDirName, getDebugPromptsLogFile } from '../../../shared/utils/index.js';
import { createProviderEventLogger, isProviderEventsEnabled } from '../../../shared/utils/providerEventLogger.js';
import { getLabel } from '../../../shared/i18n/index.js';
import { buildRunPaths } from '../../../core/piece/run/run-paths.js';
@ -25,9 +25,9 @@ import { createOutputFns, createPrefixedStreamHandler } from './outputFns.js';
import { RunMetaManager } from './runMeta.js';
import { createIterationLimitHandler, createUserInputHandler } from './iterationLimitHandler.js';
import { assertTaskPrefixPair, truncate, formatElapsedTime } from './pieceExecutionUtils.js';
import { createTraceReportWriter } from './traceReportWriter.js';
import { sanitizeTextForStorage } from './traceReportRedaction.js';
export type { PieceExecutionResult, PieceExecutionOptions };
const log = createLogger('piece');
export async function executePiece(
@ -39,12 +39,10 @@ export async function executePiece(
const { headerPrefix = 'Running Piece:', interactiveUserInput = false } = options;
const projectCwd = options.projectCwd;
assertTaskPrefixPair(options.taskPrefix, options.taskColorIndex);
const prefixWriter = options.taskPrefix != null
? new TaskPrefixWriter({ taskName: options.taskPrefix, colorIndex: options.taskColorIndex!, displayLabel: options.taskDisplayLabel })
: undefined;
const out = createOutputFns(prefixWriter);
const isRetry = Boolean(options.startMovement || options.retryNote);
log.debug('Session mode', { isRetry, isWorktree: cwd !== projectCwd });
out.header(`${headerPrefix} ${pieceConfig.name}`);
@ -52,18 +50,9 @@ export async function executePiece(
const pieceSessionId = generateSessionId();
const runSlug = options.reportDirName ?? generateReportDir(task);
if (!isValidReportDirName(runSlug)) throw new Error(`Invalid reportDirName: ${runSlug}`);
const runPaths = buildRunPaths(cwd, runSlug);
const runMetaManager = new RunMetaManager(runPaths, task, pieceConfig.name);
let sessionLog = createSessionLog(task, projectCwd, pieceConfig.name);
const ndjsonLogPath = initNdjsonLog(pieceSessionId, task, pieceConfig.name, { logsDir: runPaths.logsAbs });
const sessionLogger = new SessionLogger(ndjsonLogPath);
if (options.interactiveMetadata) {
sessionLogger.writeInteractiveMetadata(options.interactiveMetadata);
}
const displayRef: { current: StreamDisplay | null } = { current: null };
const streamHandler = prefixWriter
? createPrefixedStreamHandler(prefixWriter)
@ -71,12 +60,23 @@ export async function executePiece(
if (!displayRef.current || event.type === 'result') return;
displayRef.current.createHandler()(event);
};
const isWorktree = cwd !== projectCwd;
const globalConfig = resolvePieceConfigValues(
projectCwd,
['notificationSound', 'notificationSoundEvents', 'provider', 'runtime', 'preventSleep', 'model', 'logging', 'analytics'],
);
const traceReportMode = globalConfig.logging?.trace === true ? 'full' : 'redacted';
const allowSensitiveData = traceReportMode === 'full';
const ndjsonLogPath = initNdjsonLog(
pieceSessionId,
sanitizeTextForStorage(task, allowSensitiveData),
pieceConfig.name,
{ logsDir: runPaths.logsAbs },
);
const sessionLogger = new SessionLogger(ndjsonLogPath, allowSensitiveData);
if (options.interactiveMetadata) {
sessionLogger.writeInteractiveMetadata(options.interactiveMetadata);
}
const shouldNotify = globalConfig.notificationSound !== false;
const nse = globalConfig.notificationSoundEvents;
const shouldNotifyIterationLimit = shouldNotify && nse?.iterationLimit !== false;
@ -98,10 +98,8 @@ export async function executePiece(
movement: options.startMovement ?? pieceConfig.initialMovement,
enabled: isProviderEventsEnabled(globalConfig),
});
initAnalyticsWriter(globalConfig.analytics?.enabled === true, globalConfig.analytics?.eventsPath ?? join(getGlobalConfigDir(), 'analytics', 'events'));
if (globalConfig.preventSleep) preventSleep();
const analyticsEmitter = new AnalyticsEmitter(runSlug, currentProvider, configuredModel ?? '(default)');
const savedSessions = isRetry
? (isWorktree ? loadWorktreeSessions(projectCwd, cwd, currentProvider) : loadPersonaSessions(projectCwd, currentProvider))
@ -128,12 +126,22 @@ export async function executePiece(
let exceededInfo: ExceededInfo | undefined;
let lastMovementContent: string | undefined;
let lastMovementName: string | undefined;
const writeTraceReportOnce = createTraceReportWriter({
sessionLogger,
ndjsonLogPath,
tracePath: join(runPaths.runRootAbs, 'trace.md'),
pieceName: pieceConfig.name,
task,
runSlug,
promptLogPath: getDebugPromptsLogFile() ?? undefined,
mode: traceReportMode,
logger: log,
});
let currentIteration = 0;
const movementIterations = new Map<string, number>();
let engine: PieceEngine | null = null;
const runAbortController = new AbortController();
const abortHandler = new AbortHandler({ externalSignal: options.abortSignal, internalController: runAbortController, getEngine: () => engine });
try {
engine = new PieceEngine(effectivePieceConfig, cwd, task, {
abortSignal: runAbortController.signal,
@ -161,20 +169,21 @@ export async function executePiece(
taskColorIndex: options.taskColorIndex,
initialIteration: options.initialIterationOverride,
});
abortHandler.install();
engine.on('phase:start', (step, phase, phaseName, instruction) => {
engine.on('phase:start', (step, phase, phaseName, instruction, promptParts, phaseExecutionId, iteration) => {
log.debug('Phase starting', { step: step.name, phase, phaseName });
sessionLogger.onPhaseStart(step, phase, phaseName, instruction);
sessionLogger.onPhaseStart(step, phase, phaseName, instruction, promptParts, phaseExecutionId, iteration);
});
engine.on('phase:complete', (step, phase, phaseName, content, phaseStatus, phaseError) => {
engine.on('phase:complete', (step, phase, phaseName, content, phaseStatus, phaseError, phaseExecutionId, iteration) => {
log.debug('Phase completed', { step: step.name, phase, phaseName, status: phaseStatus });
sessionLogger.setIteration(currentIteration);
sessionLogger.onPhaseComplete(step, phase, phaseName, content, phaseStatus, phaseError);
sessionLogger.onPhaseComplete(step, phase, phaseName, content, phaseStatus, phaseError, phaseExecutionId, iteration);
});
engine.on('phase:judge_stage', (step, phase, phaseName, entry, phaseExecutionId, iteration) => {
sessionLogger.onJudgeStage(step, phase, phaseName, entry, phaseExecutionId, iteration);
});
engine.on('movement:start', (step, iteration, instruction, providerInfo) => {
log.debug('Movement starting', { step: step.name, persona: step.personaDisplayName, iteration });
currentIteration = iteration;
@ -234,6 +243,11 @@ export async function executePiece(
sessionLog = finalizeSessionLog(sessionLog, 'completed');
sessionLogger.onPieceComplete(state);
runMetaManager.finalize('completed', state.iteration);
writeTraceReportOnce({
status: 'completed',
iterations: state.iteration,
endTime: new Date().toISOString(),
});
try {
saveSessionState(projectCwd, { status: 'success', taskResult: truncate(lastMovementContent ?? '', 1000), timestamp: new Date().toISOString(), pieceName: pieceConfig.name, taskContent: truncate(task, 200), lastMovement: lastMovementName } satisfies SessionState);
} catch (error) { log.error('Failed to save session state', { error }); }
@ -252,6 +266,12 @@ export async function executePiece(
sessionLog = finalizeSessionLog(sessionLog, 'aborted');
sessionLogger.onPieceAbort(state, reason);
runMetaManager.finalize('aborted', state.iteration);
writeTraceReportOnce({
status: 'aborted',
iterations: state.iteration,
reason,
endTime: new Date().toISOString(),
});
try {
saveSessionState(projectCwd, { status: reason === 'user_interrupted' ? 'user_stopped' : 'error', errorMessage: reason, timestamp: new Date().toISOString(), pieceName: pieceConfig.name, taskContent: truncate(task, 200), lastMovement: lastMovementName } satisfies SessionState);
} catch (error) { log.error('Failed to save session state', { error }); }

View File

@ -2,7 +2,6 @@
* Session logger NDJSON
*
* PieceEngine NDJSON
* analytics UI
*/
import {
@ -13,14 +12,16 @@ import {
type NdjsonPieceAbort,
type NdjsonPhaseStart,
type NdjsonPhaseComplete,
type NdjsonPhaseJudgeStage,
type NdjsonInteractiveStart,
type NdjsonInteractiveEnd,
} from '../../../infra/fs/index.js';
import type { InteractiveMetadata } from './types.js';
import { isDebugEnabled, writePromptLog } from '../../../shared/utils/index.js';
import type { PromptLogRecord } from '../../../shared/utils/index.js';
import type { PromptLogRecord, NdjsonRecord } from '../../../shared/utils/index.js';
import type { PieceMovement, AgentResponse, PieceState } from '../../../core/models/index.js';
import type { PhaseName } from '../../../core/piece/index.js';
import type { JudgeStageEntry, PhasePromptParts } from '../../../core/piece/types.js';
import { sanitizeTextForStorage } from './traceReportRedaction.js';
function toJudgmentMatchMethod(
matchedRuleMethod: string | undefined,
@ -34,29 +35,30 @@ function toJudgmentMatchMethod(
export class SessionLogger {
private readonly ndjsonLogPath: string;
/** phase 開始時のプロンプトを一時保持(デバッグ用) */
private readonly phasePrompts = new Map<string, string>();
/** 現在のピース全体のイテレーション数 */
private readonly allowSensitiveData: boolean;
private readonly phasePromptsByExecutionId = new Map<string, PhasePromptParts>();
private readonly phaseExecutionCounters = new Map<string, number>();
private readonly ndjsonRecords: NdjsonRecord[] = [];
private readonly promptRecords: PromptLogRecord[] = [];
private currentIteration = 0;
constructor(ndjsonLogPath: string) {
constructor(ndjsonLogPath: string, allowSensitiveData: boolean) {
this.ndjsonLogPath = ndjsonLogPath;
this.allowSensitiveData = allowSensitiveData;
}
/** インタラクティブモードのメタデータinteractive_start / interactive_endを NDJSON へ記録する */
writeInteractiveMetadata(meta: InteractiveMetadata): void {
const startRecord: NdjsonInteractiveStart = { type: 'interactive_start', timestamp: new Date().toISOString() };
appendNdjsonLine(this.ndjsonLogPath, startRecord);
this.appendRecord(startRecord);
const endRecord: NdjsonInteractiveEnd = {
type: 'interactive_end',
confirmed: meta.confirmed,
...(meta.task ? { task: meta.task } : {}),
...(meta.task ? { task: this.sanitizeText(meta.task) } : {}),
timestamp: new Date().toISOString(),
};
appendNdjsonLine(this.ndjsonLogPath, endRecord);
this.appendRecord(endRecord);
}
/** 現在のイテレーション番号を更新するmovement:start で呼ぶ) */
setIteration(iteration: number): void {
this.currentIteration = iteration;
}
@ -64,75 +66,127 @@ export class SessionLogger {
onPhaseStart(
step: PieceMovement,
phase: 1 | 2 | 3,
phaseName: PhaseName,
phaseName: 'execute' | 'report' | 'judge',
instruction: string,
promptParts: PhasePromptParts,
phaseExecutionId?: string,
iteration?: number,
): void {
if (!instruction) {
throw new Error(`Missing phase instruction for ${step.name}:${phase}`);
}
const resolvedPhaseExecutionId = this.resolvePhaseExecutionId(step.name, phase, phaseExecutionId, iteration);
const record: NdjsonPhaseStart = {
type: 'phase_start',
step: step.name,
phase,
phaseName,
phaseExecutionId: resolvedPhaseExecutionId,
timestamp: new Date().toISOString(),
...(instruction ? { instruction } : {}),
instruction: this.sanitizeText(instruction),
systemPrompt: this.sanitizeText(promptParts.systemPrompt),
userInstruction: this.sanitizeText(promptParts.userInstruction),
...(iteration != null ? { iteration } : {}),
};
appendNdjsonLine(this.ndjsonLogPath, record);
this.appendRecord(record);
if (isDebugEnabled()) {
this.phasePrompts.set(`${step.name}:${phase}`, instruction);
this.phasePromptsByExecutionId.set(resolvedPhaseExecutionId, promptParts);
}
}
onPhaseComplete(
step: PieceMovement,
phase: 1 | 2 | 3,
phaseName: PhaseName,
phaseName: 'execute' | 'report' | 'judge',
content: string,
phaseStatus: string,
phaseError: string | undefined,
phaseExecutionId?: string,
iteration?: number,
): void {
if (!phaseStatus) {
throw new Error(`Missing phase status for ${step.name}:${phase}`);
}
const resolvedPhaseExecutionId = this.resolveCompletionPhaseExecutionId(step.name, phase, phaseExecutionId, iteration);
const completedAt = new Date().toISOString();
const record: NdjsonPhaseComplete = {
type: 'phase_complete',
step: step.name,
phase,
phaseName,
phaseExecutionId: resolvedPhaseExecutionId,
status: phaseStatus,
content,
timestamp: new Date().toISOString(),
...(phaseError ? { error: phaseError } : {}),
content: this.sanitizeText(content),
timestamp: completedAt,
...(phaseError ? { error: this.sanitizeText(phaseError) } : {}),
...(iteration != null ? { iteration } : {}),
};
appendNdjsonLine(this.ndjsonLogPath, record);
this.appendRecord(record);
const promptKey = `${step.name}:${phase}`;
const prompt = this.phasePrompts.get(promptKey);
this.phasePrompts.delete(promptKey);
if (isDebugEnabled() && prompt) {
const prompt = this.phasePromptsByExecutionId.get(resolvedPhaseExecutionId);
if (isDebugEnabled()) {
if (!prompt) {
throw new Error(`Missing debug prompt for ${step.name}:${phase}:${resolvedPhaseExecutionId}`);
}
const promptRecord: PromptLogRecord = {
movement: step.name,
phase,
iteration: this.currentIteration,
prompt,
response: content,
timestamp: new Date().toISOString(),
iteration: iteration ?? this.currentIteration,
phaseExecutionId: resolvedPhaseExecutionId,
prompt: this.sanitizeText(prompt.userInstruction),
systemPrompt: this.sanitizeText(prompt.systemPrompt),
userInstruction: this.sanitizeText(prompt.userInstruction),
response: this.sanitizeText(content),
timestamp: completedAt,
};
writePromptLog(promptRecord);
this.promptRecords.push(promptRecord);
this.phasePromptsByExecutionId.delete(resolvedPhaseExecutionId);
}
}
onJudgeStage(
step: PieceMovement,
phase: 3,
phaseName: 'judge',
entry: JudgeStageEntry,
phaseExecutionId?: string,
iteration?: number,
): void {
const resolvedPhaseExecutionId = this.resolveCompletionPhaseExecutionId(step.name, phase, phaseExecutionId, iteration);
const record: NdjsonPhaseJudgeStage = {
type: 'phase_judge_stage',
step: step.name,
phase,
phaseName,
phaseExecutionId: resolvedPhaseExecutionId,
stage: entry.stage,
method: entry.method,
status: entry.status,
instruction: this.sanitizeText(entry.instruction),
response: this.sanitizeText(entry.response),
timestamp: new Date().toISOString(),
...(iteration != null ? { iteration } : {}),
};
this.appendRecord(record);
}
onMovementStart(
step: PieceMovement,
iteration: number,
instruction: string | undefined,
): void {
this.currentIteration = iteration;
const record: NdjsonStepStart = {
type: 'step_start',
step: step.name,
persona: step.personaDisplayName,
iteration,
timestamp: new Date().toISOString(),
...(instruction ? { instruction } : {}),
...(instruction ? { instruction: this.sanitizeText(instruction) } : {}),
};
appendNdjsonLine(this.ndjsonLogPath, record);
this.appendRecord(record);
}
onMovementComplete(
@ -146,15 +200,15 @@ export class SessionLogger {
step: step.name,
persona: response.persona,
status: response.status,
content: response.content,
instruction,
content: this.sanitizeText(response.content),
instruction: this.sanitizeText(instruction),
...(response.matchedRuleIndex != null ? { matchedRuleIndex: response.matchedRuleIndex } : {}),
...(response.matchedRuleMethod ? { matchedRuleMethod: response.matchedRuleMethod } : {}),
...(matchMethod ? { matchMethod } : {}),
...(response.error ? { error: response.error } : {}),
...(response.error ? { error: this.sanitizeText(response.error) } : {}),
timestamp: response.timestamp.toISOString(),
};
appendNdjsonLine(this.ndjsonLogPath, record);
this.appendRecord(record);
}
onPieceComplete(state: PieceState): void {
@ -163,16 +217,73 @@ export class SessionLogger {
iterations: state.iteration,
endTime: new Date().toISOString(),
};
appendNdjsonLine(this.ndjsonLogPath, record);
this.appendRecord(record);
}
onPieceAbort(state: PieceState, reason: string): void {
const record: NdjsonPieceAbort = {
type: 'piece_abort',
iterations: state.iteration,
reason,
reason: this.sanitizeText(reason),
endTime: new Date().toISOString(),
};
this.appendRecord(record);
}
getNdjsonRecords(): NdjsonRecord[] {
return [...this.ndjsonRecords];
}
getPromptRecords(): PromptLogRecord[] {
return [...this.promptRecords];
}
private buildPhaseKey(stepName: string, phase: 1 | 2 | 3, iteration?: number): string {
if (iteration == null) {
return `${stepName}:${phase}`;
}
return `${stepName}:${iteration}:${phase}`;
}
private resolvePhaseExecutionId(
stepName: string,
phase: 1 | 2 | 3,
phaseExecutionId: string | undefined,
iteration?: number,
): string {
if (phaseExecutionId) {
return phaseExecutionId;
}
const key = this.buildPhaseKey(stepName, phase, iteration);
const current = this.phaseExecutionCounters.get(key) ?? 0;
const next = current + 1;
this.phaseExecutionCounters.set(key, next);
return `${key}:${next}`;
}
private resolveCompletionPhaseExecutionId(
stepName: string,
phase: 1 | 2 | 3,
phaseExecutionId: string | undefined,
iteration?: number,
): string {
if (phaseExecutionId) {
return phaseExecutionId;
}
const key = this.buildPhaseKey(stepName, phase, iteration);
const current = this.phaseExecutionCounters.get(key);
if (current == null) {
throw new Error(`Missing phase execution id on completion for ${stepName}:${phase}`);
}
return `${key}:${current}`;
}
private appendRecord(record: NdjsonRecord): void {
this.ndjsonRecords.push(record);
appendNdjsonLine(this.ndjsonLogPath, record);
}
private sanitizeText(text: string): string {
return sanitizeTextForStorage(text, this.allowSensitiveData);
}
}

View File

@ -0,0 +1,55 @@
import type { NdjsonRecord, PromptLogRecord } from '../../../shared/utils/index.js';
import type {
TraceReportMode,
TraceReportParams,
TraceMovement,
TracePhase,
} from './traceReportTypes.js';
import { parseJsonl, buildTraceFromRecords, type PromptRecord } from './traceReportParser.js';
import { cloneMovementsForMode, sanitizeTraceParamsForMode } from './traceReportRedaction.js';
import { assertTraceParams, renderTraceReportMarkdown } from './traceReportRenderer.js';
export type {
TraceReportMode,
TraceReportParams,
TraceMovement,
TracePhase,
};
export { assertTraceParams, renderTraceReportMarkdown };
export function renderTraceReportFromLogs(
params: TraceReportParams,
ndjsonLogPath: string,
promptLogPath: string | undefined,
mode: TraceReportMode,
): string | undefined {
if (mode === 'off') {
return undefined;
}
const records = parseJsonl<NdjsonRecord>(ndjsonLogPath);
if (records.length === 0) {
throw new Error(`No session records found for trace report: ${ndjsonLogPath}`);
}
const promptRecords = promptLogPath ? parseJsonl<PromptRecord>(promptLogPath) : [];
return renderTraceReportFromRecords(params, records, promptRecords, mode);
}
export function renderTraceReportFromRecords(
params: TraceReportParams,
records: NdjsonRecord[],
promptRecords: PromptRecord[] | PromptLogRecord[],
mode: TraceReportMode,
): string | undefined {
if (mode === 'off') {
return undefined;
}
if (records.length === 0) {
throw new Error('No session records found for trace report from records');
}
const trace = buildTraceFromRecords(records, promptRecords as PromptRecord[], params.endTime);
const paramsForMode = sanitizeTraceParamsForMode(params, mode);
const movementsForMode = cloneMovementsForMode(trace.movements, mode);
return renderTraceReportMarkdown(paramsForMode, trace.traceStartedAt, movementsForMode);
}

View File

@ -0,0 +1,260 @@
import { existsSync, readFileSync } from 'node:fs';
import type { NdjsonRecord, PromptLogRecord } from '../../../shared/utils/index.js';
import {
buildPhaseExecutionId,
parsePhaseExecutionId,
} from '../../../shared/utils/phaseExecutionId.js';
import type {
TraceMovement,
TracePhase,
} from './traceReportTypes.js';
interface PromptRecord extends PromptLogRecord {
timestamp: string;
}
interface BuildTraceResult {
traceStartedAt: string;
movements: TraceMovement[];
}
export function parseJsonl<T>(path: string): T[] {
if (!existsSync(path)) {
return [];
}
const lines = readFileSync(path, 'utf-8')
.split('\n')
.map((line) => line.trim())
.filter((line) => line.length > 0);
return lines.map((line) => JSON.parse(line) as T);
}
function movementKey(step: string, iteration: number): string {
return `${step}:${iteration}`;
}
function createPhaseExecutionId(
step: string,
iteration: number,
phase: 1 | 2 | 3,
counters: Map<string, number>,
): string {
const key = `${step}:${iteration}:${phase}`;
const current = counters.get(key) ?? 0;
const next = current + 1;
counters.set(key, next);
return buildPhaseExecutionId({
step,
iteration,
phase,
sequence: next,
});
}
function parsePhaseExecutionKey(
phaseExecutionId: string,
): { step: string; iteration: number } | undefined {
const parsed = parsePhaseExecutionId(phaseExecutionId);
if (!parsed) {
return undefined;
}
return { step: parsed.step, iteration: parsed.iteration };
}
function ensureMovement(
movementsByKey: Map<string, TraceMovement>,
step: string,
iteration: number,
timestamp: string,
fallbackPersona: string,
): TraceMovement {
const key = movementKey(step, iteration);
const existing = movementsByKey.get(key);
if (existing) {
return existing;
}
const movement: TraceMovement = {
step,
persona: fallbackPersona,
iteration,
startedAt: timestamp,
phases: [],
};
movementsByKey.set(key, movement);
return movement;
}
export function buildTraceFromRecords(
records: NdjsonRecord[],
promptRecords: PromptRecord[],
defaultEndTime: string,
): BuildTraceResult {
const promptByExecutionId = new Map<string, PromptRecord>();
for (const prompt of promptRecords) {
if (prompt.phaseExecutionId) {
promptByExecutionId.set(prompt.phaseExecutionId, prompt);
}
}
const movementsByKey = new Map<string, TraceMovement>();
const phasesByExecutionId = new Map<string, { movement: TraceMovement; index: number }>();
const phaseExecutionCounters = new Map<string, number>();
const latestIterationByStep = new Map<string, number>();
let traceStartedAt = '';
for (const record of records) {
if (!traceStartedAt && record.type === 'piece_start') {
traceStartedAt = record.startTime;
continue;
}
if (record.type === 'step_start') {
latestIterationByStep.set(record.step, record.iteration);
const movement = ensureMovement(
movementsByKey,
record.step,
record.iteration,
record.timestamp,
record.persona,
);
movement.persona = record.persona;
movement.instruction = record.instruction;
continue;
}
if (record.type === 'step_complete') {
const iteration = latestIterationByStep.get(record.step);
if (iteration == null) {
throw new Error(`Missing iteration for step_complete: ${record.step}`);
}
const movement = ensureMovement(
movementsByKey,
record.step,
iteration,
record.timestamp,
record.persona,
);
movement.completedAt = record.timestamp;
movement.result = {
status: record.status,
content: record.content,
error: record.error,
matchedRuleIndex: record.matchedRuleIndex,
matchedRuleMethod: record.matchedRuleMethod,
matchMethod: record.matchMethod,
};
continue;
}
if (record.type === 'phase_start') {
const iteration = record.iteration ?? latestIterationByStep.get(record.step);
if (iteration == null) {
throw new Error(`Missing iteration for phase_start: ${record.step}:${record.phase}`);
}
const movement = ensureMovement(
movementsByKey,
record.step,
iteration,
record.timestamp,
record.step,
);
const resolvedExecutionId =
record.phaseExecutionId
?? createPhaseExecutionId(record.step, iteration, record.phase, phaseExecutionCounters);
const prompt = promptByExecutionId.get(resolvedExecutionId);
const phase: TracePhase = {
phaseExecutionId: resolvedExecutionId,
phase: record.phase,
phaseName: record.phaseName,
instruction: record.instruction ?? record.userInstruction ?? prompt?.userInstruction ?? '',
systemPrompt: record.systemPrompt ?? prompt?.systemPrompt ?? '',
userInstruction: record.userInstruction ?? prompt?.userInstruction ?? record.instruction ?? '',
startedAt: record.timestamp,
};
movement.phases.push(phase);
phasesByExecutionId.set(resolvedExecutionId, {
movement,
index: movement.phases.length - 1,
});
continue;
}
if (record.type === 'phase_complete') {
const iterationFromId = record.phaseExecutionId
? parsePhaseExecutionKey(record.phaseExecutionId)?.iteration
: undefined;
const iteration =
record.iteration
?? iterationFromId
?? latestIterationByStep.get(record.step);
if (iteration == null) {
throw new Error(`Missing iteration for phase_complete: ${record.step}:${record.phase}`);
}
const resolvedExecutionId =
record.phaseExecutionId
?? createPhaseExecutionId(record.step, iteration, record.phase, phaseExecutionCounters);
const phaseRef = phasesByExecutionId.get(resolvedExecutionId);
if (!phaseRef) {
throw new Error(`Missing phase_start before phase_complete: ${resolvedExecutionId}`);
}
const existing = phaseRef.movement.phases[phaseRef.index];
if (!existing) {
throw new Error(`Missing phase state for completion: ${resolvedExecutionId}`);
}
const prompt = promptByExecutionId.get(resolvedExecutionId);
phaseRef.movement.phases[phaseRef.index] = {
...existing,
instruction: existing.instruction || prompt?.userInstruction || '',
systemPrompt: prompt?.systemPrompt ?? existing.systemPrompt,
userInstruction: prompt?.userInstruction ?? existing.userInstruction,
response: record.content,
status: record.status,
error: record.error,
completedAt: record.timestamp,
};
continue;
}
if (record.type === 'phase_judge_stage') {
const phaseRef = record.phaseExecutionId
? phasesByExecutionId.get(record.phaseExecutionId)
: undefined;
if (!phaseRef) {
continue;
}
const existing = phaseRef.movement.phases[phaseRef.index];
if (!existing) {
continue;
}
phaseRef.movement.phases[phaseRef.index] = {
...existing,
judgeStages: [
...(existing.judgeStages ?? []),
{
stage: record.stage,
method: record.method,
status: record.status,
instruction: record.instruction,
response: record.response,
},
],
};
}
}
const movements = [...movementsByKey.values()].sort((a, b) => {
const byStart = a.startedAt.localeCompare(b.startedAt);
if (byStart !== 0) {
return byStart;
}
return a.iteration - b.iteration;
});
return {
traceStartedAt: traceStartedAt || defaultEndTime,
movements,
};
}
export type { PromptRecord };

View File

@ -0,0 +1,81 @@
import type {
TraceMovement,
TraceReportMode,
TraceReportParams,
} from './traceReportTypes.js';
export function sanitizeSensitiveText(text: string): string {
if (!text) return text;
return text
.replace(/(Authorization\s*:\s*Bearer\s+)([^\s]+)/gi, '$1[REDACTED]')
.replace(
/(["']?(?:api[_-]?key|token|password|secret|access[_-]?token|refresh[_-]?token)["']?\s*[:=]\s*["']?)([^"',\s}\]]+)(["']?)/gi,
'$1[REDACTED]$3',
)
.replace(/([?&](?:api[_-]?key|token|password|secret)=)([^&\s]+)/gi, '$1[REDACTED]')
.replace(/\b(?:sk-[A-Za-z0-9]{8,}|ghp_[A-Za-z0-9]{8,}|xox[baprs]-[A-Za-z0-9-]{8,})\b/g, '[REDACTED]');
}
function transformText(text: string, mode: TraceReportMode): string {
if (!text) {
return text;
}
if (mode === 'full') {
return text;
}
return sanitizeSensitiveText(text);
}
export function cloneMovementsForMode(
movements: TraceMovement[],
mode: TraceReportMode,
): TraceMovement[] {
return movements.map((movement) => ({
...movement,
instruction: movement.instruction == null ? undefined : transformText(movement.instruction, mode),
result: movement.result
? {
...movement.result,
content: transformText(movement.result.content, mode),
...(movement.result.error ? { error: transformText(movement.result.error, mode) } : {}),
}
: undefined,
phases: movement.phases.map((phase) => ({
...phase,
instruction: transformText(phase.instruction, mode),
systemPrompt: transformText(phase.systemPrompt, mode),
userInstruction: transformText(phase.userInstruction, mode),
response: phase.response == null ? undefined : transformText(phase.response, mode),
error: phase.error == null ? undefined : transformText(phase.error, mode),
judgeStages: phase.judgeStages?.map((stage) => ({
...stage,
instruction: transformText(stage.instruction, mode),
response: transformText(stage.response, mode),
})),
})),
}));
}
export function sanitizeTraceParamsForMode(
params: TraceReportParams,
mode: TraceReportMode,
): TraceReportParams {
if (mode === 'full') {
return params;
}
return {
...params,
task: sanitizeSensitiveText(params.task),
...(params.reason ? { reason: sanitizeSensitiveText(params.reason) } : {}),
};
}
export function sanitizeTextForStorage(text: string, allowFullText: boolean): string {
if (!text) {
return text;
}
if (allowFullText) {
return text;
}
return sanitizeSensitiveText(text);
}

View File

@ -0,0 +1,297 @@
import type {
TraceMovement,
TracePhase,
TraceReportParams,
} from './traceReportTypes.js';
interface MovementBlock {
kind: 'movement';
movement: TraceMovement;
}
interface LoopBlock {
kind: 'loop';
movements: TraceMovement[];
}
type RenderBlock = MovementBlock | LoopBlock;
export function assertTraceParams(params: TraceReportParams): void {
if (!params.tracePath) throw new Error('tracePath is required');
if (!params.pieceName) throw new Error('pieceName is required');
if (!params.task) throw new Error('task is required');
if (!params.runSlug) throw new Error('runSlug is required');
if (!params.endTime) throw new Error('endTime is required');
if (!Number.isInteger(params.iterations) || params.iterations < 0) {
throw new Error(`iterations must be a non-negative integer: ${params.iterations}`);
}
}
function assertTraceMovement(movement: TraceMovement, index: number): void {
if (!movement.step) throw new Error(`trace movement[${index}] missing step`);
if (!movement.persona) throw new Error(`trace movement[${index}] missing persona`);
if (!Number.isInteger(movement.iteration) || movement.iteration <= 0) {
throw new Error(`trace movement[${index}] has invalid iteration: ${movement.iteration}`);
}
if (!movement.startedAt) throw new Error(`trace movement[${index}] missing startedAt`);
}
function hasPhaseError(phase: TracePhase): boolean {
if (phase.status === 'error' || Boolean(phase.error)) {
return true;
}
return (phase.judgeStages ?? []).some((stage) => stage.status === 'error');
}
function movementMarker(
movement: TraceMovement,
runStatus: TraceReportParams['status'],
isLastMovement: boolean,
): string {
if (movement.result?.status === 'error' || movement.result?.error) {
return '❌';
}
if (runStatus === 'aborted' && !movement.result && isLastMovement) {
return '❌';
}
if (movement.phases.some(hasPhaseError)) {
return '⚠️';
}
return '';
}
function renderPhaseSection(
phase: TracePhase,
runStatus: TraceReportParams['status'],
): string[] {
if (!phase.instruction) {
throw new Error(`phase ${phase.phase} (${phase.phaseName}) missing instruction`);
}
if (!phase.status && runStatus === 'completed') {
throw new Error(`phase ${phase.phase} (${phase.phaseName}) missing status`);
}
if (!phase.completedAt && runStatus === 'completed') {
throw new Error(`phase ${phase.phase} (${phase.phaseName}) missing completedAt`);
}
const marker = hasPhaseError(phase) ? ' ⚠️' : '';
const lines: string[] = [
`### Phase ${phase.phase}: ${phase.phaseName}${marker}`,
'',
`- Started: ${phase.startedAt}`,
...(phase.completedAt ? [`- Completed: ${phase.completedAt}`] : []),
`- System Prompt: ${phase.systemPrompt.length} chars`,
'<details><summary>System Prompt</summary>',
'',
phase.systemPrompt,
'',
'</details>',
'',
`- User Instruction: ${phase.userInstruction.length} chars`,
'<details><summary>User Instruction</summary>',
'',
phase.userInstruction,
'',
'</details>',
];
if (phase.response != null) {
lines.push(
'',
`- Response: ${phase.response.length} chars`,
'<details><summary>Response</summary>',
'',
phase.response,
'',
'</details>',
);
}
lines.push('', `- Status: ${phase.status ?? 'in_progress'}`);
if (phase.error) {
lines.push(`- Error: ${phase.error}`);
}
if (phase.phase === 3 && phase.judgeStages && phase.judgeStages.length > 0) {
lines.push('', '#### Judgment Stages', '');
for (const stage of phase.judgeStages) {
const stageMarker = stage.status === 'error' ? ' ⚠️' : '';
lines.push(
`- Stage ${stage.stage} (${stage.method})${stageMarker}: status=${stage.status}, instruction=${stage.instruction.length} chars, response=${stage.response.length} chars`,
);
lines.push('<details><summary>Stage Instruction</summary>', '', stage.instruction, '', '</details>', '');
lines.push('<details><summary>Stage Response</summary>', '', stage.response, '', '</details>', '');
}
}
lines.push('');
return lines;
}
function renderMovementSection(
movement: TraceMovement,
params: TraceReportParams,
isLastMovement: boolean,
): string[] {
const marker = movementMarker(movement, params.status, isLastMovement);
const markerSuffix = marker ? ` ${marker}` : '';
const lines: string[] = [
`## Iteration ${movement.iteration}: ${movement.step} (persona: ${movement.persona})${markerSuffix} - ${movement.startedAt}`,
'',
];
if (movement.instruction) {
lines.push(
`- Movement Instruction: ${movement.instruction.length} chars`,
'<details><summary>Instruction</summary>',
'',
movement.instruction,
'',
'</details>',
'',
);
}
const phases = [...movement.phases].sort((a, b) => {
const byStart = a.startedAt.localeCompare(b.startedAt);
if (byStart !== 0) {
return byStart;
}
return a.phase - b.phase;
});
for (const phase of phases) {
lines.push(...renderPhaseSection(phase, params.status));
}
if (movement.result) {
lines.push(
`- Movement Status: ${movement.result.status}`,
`- Movement Response: ${movement.result.content.length} chars`,
);
if (movement.result.matchMethod) {
lines.push(`- Match Method: ${movement.result.matchMethod}`);
}
if (movement.result.matchedRuleIndex != null) {
lines.push(`- Matched Rule Index: ${movement.result.matchedRuleIndex}`);
}
if (movement.result.error) {
lines.push(`- Error: ${movement.result.error}`);
}
lines.push('<details><summary>Movement Response</summary>', '', movement.result.content, '', '</details>');
} else {
lines.push(`- Movement Status: ${movement.completedAt ? 'aborted' : 'in_progress'}`);
}
lines.push('', '---', '');
return lines;
}
function buildRenderBlocks(sorted: TraceMovement[]): RenderBlock[] {
const blocks: RenderBlock[] = [];
let index = 0;
while (index < sorted.length) {
if (index + 3 < sorted.length) {
const first = sorted[index]!;
const second = sorted[index + 1]!;
const third = sorted[index + 2]!;
const fourth = sorted[index + 3]!;
const isAlternatingLoop =
first.step !== second.step
&& first.step === third.step
&& second.step === fourth.step;
if (isAlternatingLoop) {
const a = first.step;
const b = second.step;
let end = index + 4;
while (end < sorted.length) {
const expected = (end - index) % 2 === 0 ? a : b;
if (sorted[end]!.step !== expected) {
break;
}
end += 1;
}
blocks.push({
kind: 'loop',
movements: sorted.slice(index, end),
});
index = end;
continue;
}
}
blocks.push({ kind: 'movement', movement: sorted[index]! });
index += 1;
}
return blocks;
}
function renderLoopBlock(block: LoopBlock, params: TraceReportParams): string[] {
const first = block.movements[0]!;
const second = block.movements[1]!;
const last = block.movements[block.movements.length - 1]!;
const cycleCount = Math.floor(block.movements.length / 2);
const lines: string[] = [
`## Iteration ${first.iteration}-${last.iteration}: ${first.step}${second.step} loop (${cycleCount} cycles) ⚠️`,
'',
`<details><summary>Loop details (${block.movements.length} movements)</summary>`,
'',
];
block.movements.forEach((movement, movementIndex) => {
const movementLines = renderMovementSection(
movement,
params,
movementIndex === block.movements.length - 1,
);
lines.push(...movementLines.map((line) => (line ? ` ${line}` : line)));
});
lines.push('</details>', '', '---', '');
return lines;
}
export function renderTraceReportMarkdown(
params: TraceReportParams,
traceStartedAt: string,
movements: TraceMovement[],
): string {
assertTraceParams(params);
if (!traceStartedAt) {
throw new Error('traceStartedAt is required');
}
const statusLabel = params.status === 'completed' ? '✅ completed' : '❌ aborted';
const lines: string[] = [
`# Execution Trace: ${params.pieceName}`,
'',
`- Task: ${params.task}`,
`- Run: ${params.runSlug}`,
`- Started: ${traceStartedAt}`,
`- Ended: ${params.endTime}`,
`- Status: ${statusLabel}`,
`- Iterations: ${params.iterations}`,
...(params.reason ? [`- Reason: ${params.reason}`] : []),
'',
'---',
'',
];
const sorted = [...movements].sort((a, b) => {
const byStart = a.startedAt.localeCompare(b.startedAt);
if (byStart !== 0) {
return byStart;
}
return a.iteration - b.iteration;
});
sorted.forEach((movement, index) => assertTraceMovement(movement, index));
const blocks = buildRenderBlocks(sorted);
blocks.forEach((block, blockIndex) => {
if (block.kind === 'loop') {
lines.push(...renderLoopBlock(block, params));
return;
}
lines.push(...renderMovementSection(block.movement, params, blockIndex === blocks.length - 1));
});
return lines.join('\n');
}

View File

@ -0,0 +1,48 @@
import type { PhaseName } from '../../../core/piece/index.js';
import type { JudgeStageEntry } from '../../../core/piece/types.js';
export type TraceReportMode = 'off' | 'redacted' | 'full';
export interface TraceReportParams {
tracePath: string;
pieceName: string;
task: string;
runSlug: string;
status: 'completed' | 'aborted';
iterations: number;
endTime: string;
reason?: string;
}
export interface TracePhase {
phaseExecutionId: string;
phase: 1 | 2 | 3;
phaseName: PhaseName;
instruction: string;
systemPrompt: string;
userInstruction: string;
response?: string;
status?: string;
error?: string;
startedAt: string;
completedAt?: string;
judgeStages?: JudgeStageEntry[];
}
export interface TraceMovement {
step: string;
persona: string;
iteration: number;
instruction?: string;
startedAt: string;
completedAt?: string;
phases: TracePhase[];
result?: {
status: string;
content: string;
error?: string;
matchedRuleIndex?: number;
matchedRuleMethod?: string;
matchMethod?: string;
};
}

View File

@ -0,0 +1,81 @@
import { writeFileAtomic } from '../../../infra/config/index.js';
import type { SessionLogger } from './sessionLogger.js';
import type { TraceReportMode } from './traceReport.js';
import {
assertTraceParams,
renderTraceReportFromLogs,
renderTraceReportFromRecords,
} from './traceReport.js';
interface TraceReportWriterParams {
sessionLogger: SessionLogger;
ndjsonLogPath: string;
tracePath: string;
pieceName: string;
task: string;
runSlug: string;
promptLogPath?: string;
mode: TraceReportMode;
logger: {
info: (message: string, data?: unknown) => void;
};
}
interface WriteTraceReportInput {
status: 'completed' | 'aborted';
iterations: number;
endTime: string;
reason?: string;
}
export function createTraceReportWriter(params: TraceReportWriterParams): (input: WriteTraceReportInput) => void {
let traceReportWritten = false;
return (input: WriteTraceReportInput): void => {
if (traceReportWritten) {
params.logger.info('Trace report write skipped because it has already been written', {
status: input.status,
iterations: input.iterations,
});
return;
}
traceReportWritten = true;
const traceParams = {
tracePath: params.tracePath,
pieceName: params.pieceName,
task: params.task,
runSlug: params.runSlug,
status: input.status,
iterations: input.iterations,
reason: input.reason,
endTime: input.endTime,
} as const;
assertTraceParams(traceParams);
let markdown: string | undefined;
try {
markdown = renderTraceReportFromLogs(
traceParams,
params.ndjsonLogPath,
params.promptLogPath,
params.mode,
);
} catch (error) {
const message = error instanceof Error ? error.message : String(error);
if (!message.startsWith('No session records found for trace report:')) {
throw error;
}
markdown = renderTraceReportFromRecords(
traceParams,
params.sessionLogger.getNdjsonRecords(),
params.sessionLogger.getPromptRecords(),
params.mode,
);
}
if (!markdown) {
return;
}
writeFileAtomic(params.tracePath, markdown);
};
}

View File

@ -11,6 +11,7 @@ export type {
NdjsonPieceAbort,
NdjsonPhaseStart,
NdjsonPhaseComplete,
NdjsonPhaseJudgeStage,
NdjsonInteractiveStart,
NdjsonInteractiveEnd,
NdjsonRecord,

View File

@ -21,6 +21,7 @@ export type {
NdjsonPieceAbort,
NdjsonPhaseStart,
NdjsonPhaseComplete,
NdjsonPhaseJudgeStage,
NdjsonInteractiveStart,
NdjsonInteractiveEnd,
NdjsonRecord,

View File

@ -128,6 +128,11 @@ export class DebugLogger {
return this.debugLogFile;
}
/** Get current debug prompts log file path */
getPromptsLogFile(): string | null {
return this.debugPromptsLogFile;
}
/** Format log message with timestamp and level */
private static formatLogMessage(level: string, component: string, message: string, data?: unknown): string {
const timestamp = new Date().toISOString();
@ -223,6 +228,10 @@ export function getDebugLogFile(): string | null {
return DebugLogger.getInstance().getLogFile();
}
export function getDebugPromptsLogFile(): string | null {
return DebugLogger.getInstance().getPromptsLogFile();
}
export function debugLog(component: string, message: string, data?: unknown): void {
DebugLogger.getInstance().writeLog('DEBUG', component, message, data);
}

View File

@ -0,0 +1,50 @@
export interface PhaseExecutionIdParts {
step: string;
iteration: number;
phase: 1 | 2 | 3;
sequence: number;
}
export function buildPhaseExecutionId(parts: PhaseExecutionIdParts): string {
if (!parts.step) {
throw new Error('phaseExecutionId requires step');
}
if (!Number.isInteger(parts.iteration) || parts.iteration <= 0) {
throw new Error(`phaseExecutionId requires positive iteration: ${parts.iteration}`);
}
if (parts.phase !== 1 && parts.phase !== 2 && parts.phase !== 3) {
throw new Error(`phaseExecutionId requires phase 1|2|3: ${parts.phase}`);
}
if (!Number.isInteger(parts.sequence) || parts.sequence <= 0) {
throw new Error(`phaseExecutionId requires positive sequence: ${parts.sequence}`);
}
return `${parts.step}:${parts.iteration}:${parts.phase}:${parts.sequence}`;
}
export function parsePhaseExecutionId(
phaseExecutionId: string,
): PhaseExecutionIdParts | undefined {
const parts = phaseExecutionId.split(':');
if (parts.length !== 4) {
return undefined;
}
const [step, iterationStr, phaseStr, sequenceStr] = parts;
const iteration = Number(iterationStr);
const phase = Number(phaseStr);
const sequence = Number(sequenceStr);
if (!step || !Number.isInteger(iteration) || iteration <= 0) {
return undefined;
}
if (!Number.isInteger(phase) || (phase !== 1 && phase !== 2 && phase !== 3)) {
return undefined;
}
if (!Number.isInteger(sequence) || sequence <= 0) {
return undefined;
}
return {
step,
iteration,
phase: phase as 1 | 2 | 3,
sequence,
};
}

View File

@ -79,23 +79,44 @@ export interface NdjsonPieceAbort {
export interface NdjsonPhaseStart {
type: 'phase_start';
step: string;
iteration?: number;
phase: 1 | 2 | 3;
phaseName: 'execute' | 'report' | 'judge';
phaseExecutionId?: string;
timestamp: string;
instruction?: string;
systemPrompt?: string;
userInstruction?: string;
}
export interface NdjsonPhaseComplete {
type: 'phase_complete';
step: string;
iteration?: number;
phase: 1 | 2 | 3;
phaseName: 'execute' | 'report' | 'judge';
phaseExecutionId?: string;
status: string;
content?: string;
timestamp: string;
error?: string;
}
export interface NdjsonPhaseJudgeStage {
type: 'phase_judge_stage';
step: string;
iteration?: number;
phase: 3;
phaseName: 'judge';
phaseExecutionId?: string;
stage: 1 | 2 | 3;
method: 'structured_output' | 'phase3_tag' | 'ai_judge';
status: 'done' | 'error' | 'skipped';
instruction: string;
response: string;
timestamp: string;
}
export interface NdjsonInteractiveStart {
type: 'interactive_start';
timestamp: string;
@ -116,6 +137,7 @@ export type NdjsonRecord =
| NdjsonPieceAbort
| NdjsonPhaseStart
| NdjsonPhaseComplete
| NdjsonPhaseJudgeStage
| NdjsonInteractiveStart
| NdjsonInteractiveEnd;
@ -124,7 +146,10 @@ export interface PromptLogRecord {
movement: string;
phase: 1 | 2 | 3;
iteration: number;
phaseExecutionId?: string;
prompt: string;
systemPrompt: string;
userInstruction: string;
response: string;
timestamp: string;
}