takt: add-trace-report-generation (#467)
This commit is contained in:
parent
dbc22c76fc
commit
8403a7c892
@ -43,6 +43,13 @@ function doneResponse(content: string, structuredOutput?: Record<string, unknown
|
||||
}
|
||||
|
||||
const judgeOptions = { cwd: '/repo', movementName: 'review' };
|
||||
type JudgeStageLog = {
|
||||
stage: 1 | 2 | 3;
|
||||
method: 'structured_output' | 'phase3_tag' | 'ai_judge';
|
||||
status: 'done' | 'error' | 'skipped';
|
||||
instruction: string;
|
||||
response: string;
|
||||
};
|
||||
|
||||
describe('agent-usecases', () => {
|
||||
beforeEach(() => {
|
||||
@ -173,6 +180,75 @@ describe('agent-usecases', () => {
|
||||
expect(runAgent).toHaveBeenCalledTimes(3);
|
||||
});
|
||||
|
||||
it('judgeStatus は Phase 3 の内部ステージログを順序どおりに通知する', async () => {
|
||||
const onJudgeStage = vi.fn();
|
||||
// Stage 1: structured output fails
|
||||
vi.mocked(runAgent).mockResolvedValueOnce(doneResponse('no match'));
|
||||
// Stage 2: tag detection succeeds
|
||||
vi.mocked(runAgent).mockResolvedValueOnce(doneResponse('[REVIEW:2]'));
|
||||
|
||||
await judgeStatus(
|
||||
'structured',
|
||||
'tag',
|
||||
[
|
||||
{ condition: 'a', next: 'one' },
|
||||
{ condition: 'b', next: 'two' },
|
||||
],
|
||||
{
|
||||
...judgeOptions,
|
||||
onJudgeStage,
|
||||
} as typeof judgeOptions & { onJudgeStage: (entry: JudgeStageLog) => void },
|
||||
);
|
||||
|
||||
expect(onJudgeStage).toHaveBeenCalledTimes(2);
|
||||
expect(onJudgeStage).toHaveBeenNthCalledWith(1, expect.objectContaining({
|
||||
stage: 1,
|
||||
method: 'structured_output',
|
||||
status: 'done',
|
||||
instruction: 'structured',
|
||||
response: 'no match',
|
||||
}));
|
||||
expect(onJudgeStage).toHaveBeenNthCalledWith(2, expect.objectContaining({
|
||||
stage: 2,
|
||||
method: 'phase3_tag',
|
||||
status: 'done',
|
||||
instruction: 'tag',
|
||||
response: '[REVIEW:2]',
|
||||
}));
|
||||
});
|
||||
|
||||
it('judgeStatus は全ステージ失敗時にも Stage 3 までログ通知する', async () => {
|
||||
const onJudgeStage = vi.fn();
|
||||
// Stage 1: structured output fails
|
||||
vi.mocked(runAgent).mockResolvedValueOnce(doneResponse('no match'));
|
||||
// Stage 2: tag detection fails
|
||||
vi.mocked(runAgent).mockResolvedValueOnce(doneResponse('no tag'));
|
||||
// Stage 3: evaluateCondition fails
|
||||
vi.mocked(runAgent).mockResolvedValueOnce(doneResponse('still no match'));
|
||||
vi.mocked(detectJudgeIndex).mockReturnValue(-1);
|
||||
|
||||
await expect(
|
||||
judgeStatus(
|
||||
'structured',
|
||||
'tag',
|
||||
[
|
||||
{ condition: 'a', next: 'one' },
|
||||
{ condition: 'b', next: 'two' },
|
||||
],
|
||||
{
|
||||
...judgeOptions,
|
||||
onJudgeStage,
|
||||
} as typeof judgeOptions & { onJudgeStage: (entry: JudgeStageLog) => void },
|
||||
),
|
||||
).rejects.toThrow('Status not found for movement "review"');
|
||||
|
||||
expect(onJudgeStage).toHaveBeenCalledTimes(3);
|
||||
expect(onJudgeStage).toHaveBeenLastCalledWith(expect.objectContaining({
|
||||
stage: 3,
|
||||
method: 'ai_judge',
|
||||
}));
|
||||
});
|
||||
|
||||
it('judgeStatus は全ての判定に失敗したらエラー', async () => {
|
||||
// Stage 1: structured output fails
|
||||
vi.mocked(runAgent).mockResolvedValueOnce(doneResponse('no match'));
|
||||
@ -232,6 +308,27 @@ describe('agent-usecases', () => {
|
||||
.rejects.toThrow('Team leader failed: bad output');
|
||||
});
|
||||
|
||||
it('decomposeTask は onPromptResolved を runAgent に伝搬する', async () => {
|
||||
vi.mocked(runAgent).mockResolvedValue(doneResponse('x', {
|
||||
parts: [
|
||||
{ id: 'p1', title: 'Part 1', instruction: 'Do 1', timeout_ms: null },
|
||||
],
|
||||
}));
|
||||
const onPromptResolved = vi.fn();
|
||||
|
||||
await decomposeTask('instruction', 2, {
|
||||
cwd: '/repo',
|
||||
persona: 'team-leader',
|
||||
onPromptResolved,
|
||||
});
|
||||
|
||||
expect(runAgent).toHaveBeenCalledWith(
|
||||
'team-leader',
|
||||
expect.any(String),
|
||||
expect.objectContaining({ onPromptResolved }),
|
||||
);
|
||||
});
|
||||
|
||||
it('requestMoreParts は構造化出力をパースして返す', async () => {
|
||||
vi.mocked(runAgent).mockResolvedValue(doneResponse('x', {
|
||||
done: false,
|
||||
|
||||
@ -26,4 +26,33 @@ describe('config module file-size boundary', () => {
|
||||
const lineCount = getLineCount('../features/tasks/execute/pieceExecution.ts');
|
||||
expect(lineCount).toBeLessThanOrEqual(300);
|
||||
});
|
||||
|
||||
it('keeps sessionLogger.ts under 300 lines', () => {
|
||||
const lineCount = getLineCount('../features/tasks/execute/sessionLogger.ts');
|
||||
expect(lineCount).toBeLessThanOrEqual(300);
|
||||
});
|
||||
|
||||
it('keeps traceReport renderer/parser split modules under 300 lines', () => {
|
||||
const rendererLineCount = getLineCount('../features/tasks/execute/traceReportRenderer.ts');
|
||||
const parserLineCount = getLineCount('../features/tasks/execute/traceReportParser.ts');
|
||||
expect(rendererLineCount).toBeLessThanOrEqual(300);
|
||||
expect(parserLineCount).toBeLessThanOrEqual(300);
|
||||
});
|
||||
|
||||
it('keeps traceReport.ts as thin facade under 120 lines', () => {
|
||||
const lineCount = getLineCount('../features/tasks/execute/traceReport.ts');
|
||||
expect(lineCount).toBeLessThanOrEqual(120);
|
||||
});
|
||||
|
||||
it('keeps agent-usecases.ts as thin facade under 120 lines', () => {
|
||||
const lineCount = getLineCount('../agents/agent-usecases.ts');
|
||||
expect(lineCount).toBeLessThanOrEqual(120);
|
||||
});
|
||||
|
||||
it('keeps split agent usecases under 300 lines each', () => {
|
||||
const judgeLineCount = getLineCount('../agents/judge-status-usecase.ts');
|
||||
const decomposeLineCount = getLineCount('../agents/decompose-task-usecase.ts');
|
||||
expect(judgeLineCount).toBeLessThanOrEqual(300);
|
||||
expect(decomposeLineCount).toBeLessThanOrEqual(300);
|
||||
});
|
||||
});
|
||||
|
||||
@ -100,6 +100,19 @@ function createEngineOptions(tmpDir: string): PieceEngineOptions {
|
||||
};
|
||||
}
|
||||
|
||||
function mockRunAgentWithPrompt(...responses: ReturnType<typeof makeResponse>[]): void {
|
||||
const mock = vi.mocked(runAgent);
|
||||
for (const response of responses) {
|
||||
mock.mockImplementationOnce(async (persona, instruction, options) => {
|
||||
options?.onPromptResolved?.({
|
||||
systemPrompt: typeof persona === 'string' ? persona : '',
|
||||
userInstruction: instruction,
|
||||
});
|
||||
return response;
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
describe('ArpeggioRunner integration', () => {
|
||||
let engine: PieceEngine | undefined;
|
||||
|
||||
@ -122,10 +135,11 @@ describe('ArpeggioRunner integration', () => {
|
||||
|
||||
// Mock agent to return batch-specific responses
|
||||
const mockAgent = vi.mocked(runAgent);
|
||||
mockAgent
|
||||
.mockResolvedValueOnce(makeResponse({ content: 'Processed Alice' }))
|
||||
.mockResolvedValueOnce(makeResponse({ content: 'Processed Bob' }))
|
||||
.mockResolvedValueOnce(makeResponse({ content: 'Processed Charlie' }));
|
||||
mockRunAgentWithPrompt(
|
||||
makeResponse({ content: 'Processed Alice' }),
|
||||
makeResponse({ content: 'Processed Bob' }),
|
||||
makeResponse({ content: 'Processed Charlie' }),
|
||||
);
|
||||
|
||||
// Mock rule detection for the merged result
|
||||
vi.mocked(detectMatchedRule).mockResolvedValueOnce({
|
||||
@ -163,9 +177,10 @@ describe('ArpeggioRunner integration', () => {
|
||||
const config = buildArpeggioPieceConfig(arpeggioConfig, tmpDir);
|
||||
|
||||
const mockAgent = vi.mocked(runAgent);
|
||||
mockAgent
|
||||
.mockResolvedValueOnce(makeResponse({ content: 'Batch 0 result' }))
|
||||
.mockResolvedValueOnce(makeResponse({ content: 'Batch 1 result' }));
|
||||
mockRunAgentWithPrompt(
|
||||
makeResponse({ content: 'Batch 0 result' }),
|
||||
makeResponse({ content: 'Batch 1 result' }),
|
||||
);
|
||||
|
||||
vi.mocked(detectMatchedRule).mockResolvedValueOnce({
|
||||
index: 0,
|
||||
@ -189,13 +204,12 @@ describe('ArpeggioRunner integration', () => {
|
||||
const config = buildArpeggioPieceConfig(arpeggioConfig, tmpDir);
|
||||
|
||||
const mockAgent = vi.mocked(runAgent);
|
||||
// First batch succeeds
|
||||
mockAgent.mockResolvedValueOnce(makeResponse({ content: 'OK' }));
|
||||
// Second batch fails twice (initial + 1 retry)
|
||||
mockAgent.mockResolvedValueOnce(makeResponse({ status: 'error', error: 'fail1' }));
|
||||
mockAgent.mockResolvedValueOnce(makeResponse({ status: 'error', error: 'fail2' }));
|
||||
// Third batch succeeds
|
||||
mockAgent.mockResolvedValueOnce(makeResponse({ content: 'OK' }));
|
||||
mockRunAgentWithPrompt(
|
||||
makeResponse({ content: 'OK' }),
|
||||
makeResponse({ status: 'error', error: 'fail1' }),
|
||||
makeResponse({ status: 'error', error: 'fail2' }),
|
||||
makeResponse({ content: 'OK' }),
|
||||
);
|
||||
|
||||
engine = new PieceEngine(config, tmpDir, 'test task', createEngineOptions(tmpDir));
|
||||
const state = await engine.run();
|
||||
@ -210,10 +224,11 @@ describe('ArpeggioRunner integration', () => {
|
||||
const config = buildArpeggioPieceConfig(arpeggioConfig, tmpDir);
|
||||
|
||||
const mockAgent = vi.mocked(runAgent);
|
||||
mockAgent
|
||||
.mockResolvedValueOnce(makeResponse({ content: 'Result A' }))
|
||||
.mockResolvedValueOnce(makeResponse({ content: 'Result B' }))
|
||||
.mockResolvedValueOnce(makeResponse({ content: 'Result C' }));
|
||||
mockRunAgentWithPrompt(
|
||||
makeResponse({ content: 'Result A' }),
|
||||
makeResponse({ content: 'Result B' }),
|
||||
makeResponse({ content: 'Result C' }),
|
||||
);
|
||||
|
||||
vi.mocked(detectMatchedRule).mockResolvedValueOnce({
|
||||
index: 0,
|
||||
@ -234,10 +249,11 @@ describe('ArpeggioRunner integration', () => {
|
||||
const config = buildArpeggioPieceConfig(arpeggioConfig, tmpDir);
|
||||
|
||||
const mockAgent = vi.mocked(runAgent);
|
||||
mockAgent
|
||||
.mockResolvedValueOnce(makeResponse({ content: 'A' }))
|
||||
.mockResolvedValueOnce(makeResponse({ content: 'B' }))
|
||||
.mockResolvedValueOnce(makeResponse({ content: 'C' }));
|
||||
mockRunAgentWithPrompt(
|
||||
makeResponse({ content: 'A' }),
|
||||
makeResponse({ content: 'B' }),
|
||||
makeResponse({ content: 'C' }),
|
||||
);
|
||||
|
||||
vi.mocked(detectMatchedRule).mockResolvedValueOnce({
|
||||
index: 0,
|
||||
@ -251,4 +267,90 @@ describe('ArpeggioRunner integration', () => {
|
||||
expect(mockAgent).toHaveBeenCalledTimes(3);
|
||||
});
|
||||
|
||||
it('should record resolved prompt in phase:start for arpeggio batches', async () => {
|
||||
const { tmpDir, csvPath, templatePath } = createArpeggioTestDir();
|
||||
const arpeggioConfig = createArpeggioConfig(csvPath, templatePath, { concurrency: 2 });
|
||||
const config = buildArpeggioPieceConfig(arpeggioConfig, tmpDir);
|
||||
const phaseStarts: string[] = [];
|
||||
|
||||
mockRunAgentWithPrompt(
|
||||
makeResponse({ content: 'A' }),
|
||||
makeResponse({ content: 'B' }),
|
||||
makeResponse({ content: 'C' }),
|
||||
);
|
||||
vi.mocked(detectMatchedRule).mockResolvedValueOnce({ index: 0, method: 'phase1_tag' });
|
||||
|
||||
engine = new PieceEngine(config, tmpDir, 'test task', createEngineOptions(tmpDir));
|
||||
engine.on('phase:start', (step, phase, phaseName, instruction) => {
|
||||
if (step.name !== 'process' || phase !== 1 || phaseName !== 'execute') return;
|
||||
phaseStarts.push(instruction);
|
||||
});
|
||||
|
||||
const state = await engine.run();
|
||||
|
||||
expect(state.status).toBe('completed');
|
||||
expect(phaseStarts.length).toBe(3);
|
||||
expect(phaseStarts.every((instruction) => !instruction.startsWith('[Arpeggio batch'))).toBe(true);
|
||||
expect(phaseStarts.some((instruction) => instruction.includes('Process '))).toBe(true);
|
||||
});
|
||||
|
||||
it('should keep phaseExecutionId bindings correct when completion order is reversed', async () => {
|
||||
const { tmpDir, csvPath, templatePath } = createArpeggioTestDir();
|
||||
const arpeggioConfig = createArpeggioConfig(csvPath, templatePath, { concurrency: 2 });
|
||||
const config = buildArpeggioPieceConfig(arpeggioConfig, tmpDir);
|
||||
const phaseStartsByExecutionId = new Map<string, string>();
|
||||
const phaseCompletions: Array<{ phaseExecutionId?: string; content: string }> = [];
|
||||
|
||||
vi.mocked(runAgent).mockImplementation(async (persona, instruction, options) => {
|
||||
options?.onPromptResolved?.({
|
||||
systemPrompt: typeof persona === 'string' ? persona : '',
|
||||
userInstruction: instruction,
|
||||
});
|
||||
if (instruction.includes('Alice')) {
|
||||
await new Promise((resolve) => setTimeout(resolve, 40));
|
||||
return makeResponse({ content: 'Result Alice' });
|
||||
}
|
||||
if (instruction.includes('Bob')) {
|
||||
await new Promise((resolve) => setTimeout(resolve, 5));
|
||||
return makeResponse({ content: 'Result Bob' });
|
||||
}
|
||||
return makeResponse({ content: 'Result Charlie' });
|
||||
});
|
||||
vi.mocked(detectMatchedRule).mockResolvedValueOnce({ index: 0, method: 'phase1_tag' });
|
||||
|
||||
engine = new PieceEngine(config, tmpDir, 'test task', createEngineOptions(tmpDir));
|
||||
engine.on('phase:start', (step, phase, phaseName, instruction, _promptParts, phaseExecutionId) => {
|
||||
if (step.name !== 'process' || phase !== 1 || phaseName !== 'execute' || !phaseExecutionId) return;
|
||||
phaseStartsByExecutionId.set(phaseExecutionId, instruction);
|
||||
});
|
||||
engine.on('phase:complete', (step, phase, phaseName, content, _status, _error, phaseExecutionId) => {
|
||||
if (step.name !== 'process' || phase !== 1 || phaseName !== 'execute') return;
|
||||
phaseCompletions.push({ phaseExecutionId, content });
|
||||
});
|
||||
|
||||
const state = await engine.run();
|
||||
|
||||
expect(state.status).toBe('completed');
|
||||
expect(phaseCompletions).toHaveLength(3);
|
||||
expect(new Set(phaseCompletions.map((entry) => entry.phaseExecutionId)).size).toBe(3);
|
||||
expect(phaseCompletions.map((entry) => entry.content).sort()).toEqual([
|
||||
'Result Alice',
|
||||
'Result Bob',
|
||||
'Result Charlie',
|
||||
]);
|
||||
for (const completion of phaseCompletions) {
|
||||
const instruction = completion.phaseExecutionId
|
||||
? phaseStartsByExecutionId.get(completion.phaseExecutionId)
|
||||
: undefined;
|
||||
expect(instruction).toBeDefined();
|
||||
if (completion.content === 'Result Alice') {
|
||||
expect(instruction).toContain('Alice');
|
||||
} else if (completion.content === 'Result Bob') {
|
||||
expect(instruction).toContain('Bob');
|
||||
} else {
|
||||
expect(instruction).toContain('Charlie');
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
});
|
||||
|
||||
@ -167,9 +167,13 @@ describe('PieceEngine Integration: Error Handling', () => {
|
||||
const engine = new PieceEngine(config, tmpDir, 'test task', { projectCwd: tmpDir });
|
||||
|
||||
for (let i = 0; i < 5; i++) {
|
||||
vi.mocked(runAgent).mockResolvedValueOnce(
|
||||
makeResponse({ content: `iteration ${i}` })
|
||||
);
|
||||
vi.mocked(runAgent).mockImplementationOnce(async (persona, task, options) => {
|
||||
options?.onPromptResolved?.({
|
||||
systemPrompt: typeof persona === 'string' ? persona : '',
|
||||
userInstruction: task,
|
||||
});
|
||||
return makeResponse({ content: `iteration ${i}` });
|
||||
});
|
||||
vi.mocked(detectMatchedRule).mockResolvedValueOnce(
|
||||
{ index: 0, method: 'phase1_tag' }
|
||||
);
|
||||
|
||||
@ -544,11 +544,16 @@ describe('PieceEngine Integration: Happy Path', () => {
|
||||
|
||||
expect(phaseStartFn).toHaveBeenCalledWith(
|
||||
expect.objectContaining({ name: 'plan' }),
|
||||
1, 'execute', expect.any(String)
|
||||
1, 'execute', expect.any(String), expect.objectContaining({
|
||||
systemPrompt: expect.any(String),
|
||||
userInstruction: expect.any(String),
|
||||
}),
|
||||
undefined,
|
||||
1,
|
||||
);
|
||||
expect(phaseCompleteFn).toHaveBeenCalledWith(
|
||||
expect.objectContaining({ name: 'plan' }),
|
||||
1, 'execute', expect.any(String), 'done', undefined
|
||||
1, 'execute', expect.any(String), 'done', undefined, undefined, 1,
|
||||
);
|
||||
});
|
||||
|
||||
|
||||
@ -122,13 +122,21 @@ describe('PieceEngine Integration: Parallel Movement Partial Failure', () => {
|
||||
// arch-review fails (exit code 1)
|
||||
mock.mockRejectedValueOnce(new Error('Claude Code process exited with code 1'));
|
||||
// security-review succeeds
|
||||
mock.mockResolvedValueOnce(
|
||||
makeResponse({ persona: 'security-review', content: 'Security review passed' }),
|
||||
);
|
||||
mock.mockImplementationOnce(async (persona, task, options) => {
|
||||
options?.onPromptResolved?.({
|
||||
systemPrompt: typeof persona === 'string' ? persona : '',
|
||||
userInstruction: task,
|
||||
});
|
||||
return makeResponse({ persona: 'security-review', content: 'Security review passed' });
|
||||
});
|
||||
// done step
|
||||
mock.mockResolvedValueOnce(
|
||||
makeResponse({ persona: 'done', content: 'Completed' }),
|
||||
);
|
||||
mock.mockImplementationOnce(async (persona, task, options) => {
|
||||
options?.onPromptResolved?.({
|
||||
systemPrompt: typeof persona === 'string' ? persona : '',
|
||||
userInstruction: task,
|
||||
});
|
||||
return makeResponse({ persona: 'done', content: 'Completed' });
|
||||
});
|
||||
|
||||
mockDetectMatchedRuleSequence([
|
||||
// security-review sub-movement rule match (arch-review has no match — it failed)
|
||||
@ -179,12 +187,20 @@ describe('PieceEngine Integration: Parallel Movement Partial Failure', () => {
|
||||
|
||||
const mock = vi.mocked(runAgent);
|
||||
mock.mockRejectedValueOnce(new Error('Session resume failed'));
|
||||
mock.mockResolvedValueOnce(
|
||||
makeResponse({ persona: 'security-review', content: 'OK' }),
|
||||
);
|
||||
mock.mockResolvedValueOnce(
|
||||
makeResponse({ persona: 'done', content: 'Done' }),
|
||||
);
|
||||
mock.mockImplementationOnce(async (persona, task, options) => {
|
||||
options?.onPromptResolved?.({
|
||||
systemPrompt: typeof persona === 'string' ? persona : '',
|
||||
userInstruction: task,
|
||||
});
|
||||
return makeResponse({ persona: 'security-review', content: 'OK' });
|
||||
});
|
||||
mock.mockImplementationOnce(async (persona, task, options) => {
|
||||
options?.onPromptResolved?.({
|
||||
systemPrompt: typeof persona === 'string' ? persona : '',
|
||||
userInstruction: task,
|
||||
});
|
||||
return makeResponse({ persona: 'done', content: 'Done' });
|
||||
});
|
||||
|
||||
mockDetectMatchedRuleSequence([
|
||||
{ index: 0, method: 'phase1_tag' },
|
||||
|
||||
@ -216,11 +216,15 @@ describe('PieceEngine Integration: Parallel Movement Aggregation', () => {
|
||||
['../personas/supervise.md', makeResponse({ persona: 'supervise', content: 'All passed' })],
|
||||
]);
|
||||
|
||||
vi.mocked(runAgent).mockImplementation(async (persona, _task, options) => {
|
||||
vi.mocked(runAgent).mockImplementation(async (persona, task, options) => {
|
||||
const response = responsesByPersona.get(persona ?? '');
|
||||
if (!response) {
|
||||
throw new Error(`Unexpected persona: ${persona}`);
|
||||
}
|
||||
options.onPromptResolved?.({
|
||||
systemPrompt: typeof persona === 'string' ? persona : '',
|
||||
userInstruction: task,
|
||||
});
|
||||
|
||||
if (persona === '../personas/arch-review.md') {
|
||||
options.onStream?.({ type: 'text', data: { text: 'arch stream line\n' } });
|
||||
|
||||
@ -49,6 +49,19 @@ function buildTeamLeaderConfig(): PieceConfig {
|
||||
};
|
||||
}
|
||||
|
||||
function mockRunAgentWithPrompt(...responses: ReturnType<typeof makeResponse>[]): void {
|
||||
const mock = vi.mocked(runAgent);
|
||||
for (const response of responses) {
|
||||
mock.mockImplementationOnce(async (persona, instruction, options) => {
|
||||
options?.onPromptResolved?.({
|
||||
systemPrompt: typeof persona === 'string' ? persona : '',
|
||||
userInstruction: instruction,
|
||||
});
|
||||
return response;
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
describe('PieceEngine Integration: TeamLeaderRunner', () => {
|
||||
let tmpDir: string;
|
||||
|
||||
@ -68,21 +81,22 @@ describe('PieceEngine Integration: TeamLeaderRunner', () => {
|
||||
const config = buildTeamLeaderConfig();
|
||||
const engine = new PieceEngine(config, tmpDir, 'implement feature', { projectCwd: tmpDir });
|
||||
|
||||
vi.mocked(runAgent)
|
||||
.mockResolvedValueOnce(makeResponse({
|
||||
mockRunAgentWithPrompt(
|
||||
makeResponse({
|
||||
persona: 'team-leader',
|
||||
content: [
|
||||
'```json',
|
||||
'[{"id":"part-1","title":"API","instruction":"Implement API"},{"id":"part-2","title":"Test","instruction":"Add tests"}]',
|
||||
'```',
|
||||
].join('\n'),
|
||||
}))
|
||||
.mockResolvedValueOnce(makeResponse({ persona: 'coder', content: 'API done' }))
|
||||
.mockResolvedValueOnce(makeResponse({ persona: 'coder', content: 'Tests done' }))
|
||||
.mockResolvedValueOnce(makeResponse({
|
||||
}),
|
||||
makeResponse({ persona: 'coder', content: 'API done' }),
|
||||
makeResponse({ persona: 'coder', content: 'Tests done' }),
|
||||
makeResponse({
|
||||
persona: 'team-leader',
|
||||
structuredOutput: { done: true, reasoning: 'enough', parts: [] },
|
||||
}));
|
||||
}),
|
||||
);
|
||||
|
||||
vi.mocked(detectMatchedRule).mockResolvedValueOnce({ index: 0, method: 'phase1_tag' });
|
||||
|
||||
@ -103,21 +117,22 @@ describe('PieceEngine Integration: TeamLeaderRunner', () => {
|
||||
const config = buildTeamLeaderConfig();
|
||||
const engine = new PieceEngine(config, tmpDir, 'implement feature', { projectCwd: tmpDir });
|
||||
|
||||
vi.mocked(runAgent)
|
||||
.mockResolvedValueOnce(makeResponse({
|
||||
mockRunAgentWithPrompt(
|
||||
makeResponse({
|
||||
persona: 'team-leader',
|
||||
content: [
|
||||
'```json',
|
||||
'[{"id":"part-1","title":"API","instruction":"Implement API"},{"id":"part-2","title":"Test","instruction":"Add tests"}]',
|
||||
'```',
|
||||
].join('\n'),
|
||||
}))
|
||||
.mockResolvedValueOnce(makeResponse({ persona: 'coder', status: 'error', error: 'api failed' }))
|
||||
.mockResolvedValueOnce(makeResponse({ persona: 'coder', status: 'error', error: 'test failed' }))
|
||||
.mockResolvedValueOnce(makeResponse({
|
||||
}),
|
||||
makeResponse({ persona: 'coder', status: 'error', error: 'api failed' }),
|
||||
makeResponse({ persona: 'coder', status: 'error', error: 'test failed' }),
|
||||
makeResponse({
|
||||
persona: 'team-leader',
|
||||
structuredOutput: { done: true, reasoning: 'stop', parts: [] },
|
||||
}));
|
||||
}),
|
||||
);
|
||||
|
||||
const state = await engine.run();
|
||||
|
||||
@ -128,21 +143,22 @@ describe('PieceEngine Integration: TeamLeaderRunner', () => {
|
||||
const config = buildTeamLeaderConfig();
|
||||
const engine = new PieceEngine(config, tmpDir, 'implement feature', { projectCwd: tmpDir });
|
||||
|
||||
vi.mocked(runAgent)
|
||||
.mockResolvedValueOnce(makeResponse({
|
||||
mockRunAgentWithPrompt(
|
||||
makeResponse({
|
||||
persona: 'team-leader',
|
||||
content: [
|
||||
'```json',
|
||||
'[{"id":"part-1","title":"API","instruction":"Implement API"},{"id":"part-2","title":"Test","instruction":"Add tests"}]',
|
||||
'```',
|
||||
].join('\n'),
|
||||
}))
|
||||
.mockResolvedValueOnce(makeResponse({ persona: 'coder', content: 'API done' }))
|
||||
.mockResolvedValueOnce(makeResponse({ persona: 'coder', status: 'error', error: 'test failed' }))
|
||||
.mockResolvedValueOnce(makeResponse({
|
||||
}),
|
||||
makeResponse({ persona: 'coder', content: 'API done' }),
|
||||
makeResponse({ persona: 'coder', status: 'error', error: 'test failed' }),
|
||||
makeResponse({
|
||||
persona: 'team-leader',
|
||||
structuredOutput: { done: true, reasoning: 'stop', parts: [] },
|
||||
}));
|
||||
}),
|
||||
);
|
||||
|
||||
vi.mocked(detectMatchedRule).mockResolvedValueOnce({ index: 0, method: 'phase1_tag' });
|
||||
|
||||
@ -161,21 +177,22 @@ describe('PieceEngine Integration: TeamLeaderRunner', () => {
|
||||
const config = buildTeamLeaderConfig();
|
||||
const engine = new PieceEngine(config, tmpDir, 'implement feature', { projectCwd: tmpDir });
|
||||
|
||||
vi.mocked(runAgent)
|
||||
.mockResolvedValueOnce(makeResponse({
|
||||
mockRunAgentWithPrompt(
|
||||
makeResponse({
|
||||
persona: 'team-leader',
|
||||
content: [
|
||||
'```json',
|
||||
'[{"id":"part-1","title":"API","instruction":"Implement API"},{"id":"part-2","title":"Test","instruction":"Add tests"}]',
|
||||
'```',
|
||||
].join('\n'),
|
||||
}))
|
||||
.mockResolvedValueOnce(makeResponse({ persona: 'coder', status: 'error', content: 'api failed from content' }))
|
||||
.mockResolvedValueOnce(makeResponse({ persona: 'coder', content: 'Tests done' }))
|
||||
.mockResolvedValueOnce(makeResponse({
|
||||
}),
|
||||
makeResponse({ persona: 'coder', status: 'error', content: 'api failed from content' }),
|
||||
makeResponse({ persona: 'coder', content: 'Tests done' }),
|
||||
makeResponse({
|
||||
persona: 'team-leader',
|
||||
structuredOutput: { done: true, reasoning: 'stop', parts: [] },
|
||||
}));
|
||||
}),
|
||||
);
|
||||
|
||||
vi.mocked(detectMatchedRule).mockResolvedValueOnce({ index: 0, method: 'phase1_tag' });
|
||||
|
||||
@ -191,8 +208,8 @@ describe('PieceEngine Integration: TeamLeaderRunner', () => {
|
||||
const config = buildTeamLeaderConfig();
|
||||
const engine = new PieceEngine(config, tmpDir, 'implement feature', { projectCwd: tmpDir });
|
||||
|
||||
vi.mocked(runAgent)
|
||||
.mockResolvedValueOnce(makeResponse({
|
||||
mockRunAgentWithPrompt(
|
||||
makeResponse({
|
||||
persona: 'team-leader',
|
||||
structuredOutput: {
|
||||
parts: [
|
||||
@ -200,10 +217,10 @@ describe('PieceEngine Integration: TeamLeaderRunner', () => {
|
||||
{ id: 'part-2', title: 'Test', instruction: 'Add tests', timeout_ms: null },
|
||||
],
|
||||
},
|
||||
}))
|
||||
.mockResolvedValueOnce(makeResponse({ persona: 'coder', content: 'API done' }))
|
||||
.mockResolvedValueOnce(makeResponse({ persona: 'coder', content: 'Tests done' }))
|
||||
.mockResolvedValueOnce(makeResponse({
|
||||
}),
|
||||
makeResponse({ persona: 'coder', content: 'API done' }),
|
||||
makeResponse({ persona: 'coder', content: 'Tests done' }),
|
||||
makeResponse({
|
||||
persona: 'team-leader',
|
||||
structuredOutput: {
|
||||
done: false,
|
||||
@ -212,16 +229,17 @@ describe('PieceEngine Integration: TeamLeaderRunner', () => {
|
||||
{ id: 'part-3', title: 'Docs', instruction: 'Write docs', timeout_ms: null },
|
||||
],
|
||||
},
|
||||
}))
|
||||
.mockResolvedValueOnce(makeResponse({ persona: 'coder', content: 'Docs done' }))
|
||||
.mockResolvedValueOnce(makeResponse({
|
||||
}),
|
||||
makeResponse({ persona: 'coder', content: 'Docs done' }),
|
||||
makeResponse({
|
||||
persona: 'team-leader',
|
||||
structuredOutput: {
|
||||
done: true,
|
||||
reasoning: 'Enough',
|
||||
parts: [],
|
||||
},
|
||||
}));
|
||||
}),
|
||||
);
|
||||
|
||||
vi.mocked(detectMatchedRule).mockResolvedValueOnce({ index: 0, method: 'phase1_tag' });
|
||||
|
||||
@ -235,4 +253,35 @@ describe('PieceEngine Integration: TeamLeaderRunner', () => {
|
||||
expect(output!.content).toContain('Docs done');
|
||||
});
|
||||
|
||||
it('team leader の phase:start には分解実行時の実 instruction を記録する', async () => {
|
||||
const config = buildTeamLeaderConfig();
|
||||
const engine = new PieceEngine(config, tmpDir, 'implement feature', { projectCwd: tmpDir });
|
||||
const phaseStarts: string[] = [];
|
||||
engine.on('phase:start', (step, phase, phaseName, instruction) => {
|
||||
if (step.name !== 'implement' || phase !== 1 || phaseName !== 'execute') return;
|
||||
phaseStarts.push(instruction);
|
||||
});
|
||||
|
||||
mockRunAgentWithPrompt(
|
||||
makeResponse({
|
||||
persona: 'team-leader',
|
||||
structuredOutput: {
|
||||
parts: [{ id: 'part-1', title: 'API', instruction: 'Implement API', timeout_ms: null }],
|
||||
},
|
||||
}),
|
||||
makeResponse({ persona: 'coder', content: 'API done' }),
|
||||
makeResponse({
|
||||
persona: 'team-leader',
|
||||
structuredOutput: { done: true, reasoning: 'enough', parts: [] },
|
||||
}),
|
||||
);
|
||||
vi.mocked(detectMatchedRule).mockResolvedValueOnce({ index: 0, method: 'phase1_tag' });
|
||||
|
||||
const state = await engine.run();
|
||||
|
||||
expect(state.status).toBe('completed');
|
||||
expect(phaseStarts.length).toBeGreaterThan(0);
|
||||
expect(phaseStarts[0]).toContain('This is decomposition-only planning. Do not execute the task.');
|
||||
});
|
||||
|
||||
});
|
||||
|
||||
@ -136,7 +136,13 @@ export function buildDefaultPieceConfig(overrides: Partial<PieceConfig> = {}): P
|
||||
export function mockRunAgentSequence(responses: AgentResponse[]): void {
|
||||
const mock = vi.mocked(runAgent);
|
||||
for (const response of responses) {
|
||||
mock.mockResolvedValueOnce(response);
|
||||
mock.mockImplementationOnce(async (persona, task, options) => {
|
||||
options?.onPromptResolved?.({
|
||||
systemPrompt: typeof persona === 'string' ? persona : '',
|
||||
userInstruction: task,
|
||||
});
|
||||
return response;
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -103,7 +103,13 @@ describe('IT: config provider_options reflection', () => {
|
||||
delete process.env.TAKT_PROVIDER_OPTIONS_CODEX_NETWORK_ACCESS;
|
||||
invalidateGlobalConfigCache();
|
||||
|
||||
vi.mocked(runAgent).mockResolvedValue(makeDoneResponse());
|
||||
vi.mocked(runAgent).mockImplementation(async (persona, task, options) => {
|
||||
options?.onPromptResolved?.({
|
||||
systemPrompt: typeof persona === 'string' ? persona : '',
|
||||
userInstruction: task,
|
||||
});
|
||||
return makeDoneResponse();
|
||||
});
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
@ -203,4 +209,3 @@ describe('IT: config provider_options reflection', () => {
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
|
||||
@ -83,7 +83,13 @@ describe('IT: provider block reflection', () => {
|
||||
beforeEach(() => {
|
||||
vi.clearAllMocks();
|
||||
originalConfigDir = process.env.TAKT_CONFIG_DIR;
|
||||
vi.mocked(runAgent).mockResolvedValue(makeDoneResponse());
|
||||
vi.mocked(runAgent).mockImplementation(async (persona, task, options) => {
|
||||
options?.onPromptResolved?.({
|
||||
systemPrompt: typeof persona === 'string' ? persona : '',
|
||||
userInstruction: task,
|
||||
});
|
||||
return makeDoneResponse();
|
||||
});
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
|
||||
@ -11,6 +11,7 @@ vi.mock('../agents/runner.js', () => ({
|
||||
}));
|
||||
|
||||
import { runAgent } from '../agents/runner.js';
|
||||
import type { AgentResponse } from '../core/models/types.js';
|
||||
|
||||
function createStep(fileName: string): PieceMovement {
|
||||
return {
|
||||
@ -51,6 +52,19 @@ function createContext(
|
||||
};
|
||||
}
|
||||
|
||||
function queueRunAgentResponses(responses: AgentResponse[]): void {
|
||||
const runAgentMock = vi.mocked(runAgent);
|
||||
for (const response of responses) {
|
||||
runAgentMock.mockImplementationOnce(async (persona, task, options) => {
|
||||
options?.onPromptResolved?.({
|
||||
systemPrompt: typeof persona === 'string' ? persona : '',
|
||||
userInstruction: task,
|
||||
});
|
||||
return response;
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
describe('runReportPhase report history behavior', () => {
|
||||
let tmpRoot: string;
|
||||
|
||||
@ -71,22 +85,22 @@ describe('runReportPhase report history behavior', () => {
|
||||
const reportDir = join(tmpRoot, '.takt', 'runs', 'sample-run', 'reports');
|
||||
const step = createStep('05-architect-review.md');
|
||||
const ctx = createContext(reportDir);
|
||||
const runAgentMock = vi.mocked(runAgent);
|
||||
runAgentMock
|
||||
.mockResolvedValueOnce({
|
||||
queueRunAgentResponses([
|
||||
{
|
||||
persona: 'reviewers',
|
||||
status: 'done',
|
||||
content: 'First review result',
|
||||
timestamp: new Date('2026-02-10T06:11:43Z'),
|
||||
sessionId: 'session-2',
|
||||
})
|
||||
.mockResolvedValueOnce({
|
||||
},
|
||||
{
|
||||
persona: 'reviewers',
|
||||
status: 'done',
|
||||
content: 'Second review result',
|
||||
timestamp: new Date('2026-02-10T06:14:37Z'),
|
||||
sessionId: 'session-3',
|
||||
});
|
||||
},
|
||||
]);
|
||||
|
||||
// When
|
||||
await runReportPhase(step, 1, ctx);
|
||||
@ -113,29 +127,29 @@ describe('runReportPhase report history behavior', () => {
|
||||
const reportDir = join(tmpRoot, '.takt', 'runs', 'sample-run', 'reports');
|
||||
const step = createStep('06-qa-review.md');
|
||||
const ctx = createContext(reportDir);
|
||||
const runAgentMock = vi.mocked(runAgent);
|
||||
runAgentMock
|
||||
.mockResolvedValueOnce({
|
||||
queueRunAgentResponses([
|
||||
{
|
||||
persona: 'reviewers',
|
||||
status: 'done',
|
||||
content: 'v1',
|
||||
timestamp: new Date('2026-02-10T06:11:43Z'),
|
||||
sessionId: 'session-2',
|
||||
})
|
||||
.mockResolvedValueOnce({
|
||||
},
|
||||
{
|
||||
persona: 'reviewers',
|
||||
status: 'done',
|
||||
content: 'v2',
|
||||
timestamp: new Date('2026-02-10T06:11:43Z'),
|
||||
sessionId: 'session-3',
|
||||
})
|
||||
.mockResolvedValueOnce({
|
||||
},
|
||||
{
|
||||
persona: 'reviewers',
|
||||
status: 'done',
|
||||
content: 'v3',
|
||||
timestamp: new Date('2026-02-10T06:11:43Z'),
|
||||
sessionId: 'session-4',
|
||||
});
|
||||
},
|
||||
]);
|
||||
|
||||
// When
|
||||
await runReportPhase(step, 1, ctx);
|
||||
@ -158,14 +172,13 @@ describe('runReportPhase report history behavior', () => {
|
||||
const ctx = createContext(reportDir, (overrides) => {
|
||||
capturedOverrides.push(overrides);
|
||||
});
|
||||
const runAgentMock = vi.mocked(runAgent);
|
||||
runAgentMock.mockResolvedValueOnce({
|
||||
queueRunAgentResponses([{
|
||||
persona: 'reviewers',
|
||||
status: 'done',
|
||||
content: 'Permission-based report execution',
|
||||
timestamp: new Date('2026-02-10T06:21:17Z'),
|
||||
sessionId: 'session-2',
|
||||
});
|
||||
}]);
|
||||
|
||||
// When
|
||||
await runReportPhase(step, 1, ctx);
|
||||
|
||||
@ -139,6 +139,7 @@ vi.mock('../shared/utils/index.js', () => ({
|
||||
preventSleep: vi.fn(),
|
||||
isDebugEnabled: vi.fn().mockReturnValue(false),
|
||||
writePromptLog: vi.fn(),
|
||||
getDebugPromptsLogFile: vi.fn().mockReturnValue(null),
|
||||
generateReportDir: vi.fn().mockReturnValue('test-report-dir'),
|
||||
isValidReportDirName: vi.fn().mockReturnValue(true),
|
||||
playWarningSound: vi.fn(),
|
||||
|
||||
@ -31,12 +31,57 @@ const { mockIsDebugEnabled, mockWritePromptLog, MockPieceEngine } = vi.hoisted((
|
||||
const step = this.config.movements[0]!;
|
||||
const timestamp = new Date('2026-02-07T00:00:00.000Z');
|
||||
const shouldAbort = this.task === 'abort-task';
|
||||
|
||||
const shouldAbortBeforeComplete = this.task === 'abort-before-complete-task';
|
||||
const shouldDuplicatePhase = this.task === 'duplicate-phase-task';
|
||||
const shouldEmitSensitive = this.task === 'sensitive-content-task';
|
||||
const shouldRepeatMovement = this.task === 'repeat-movement-task';
|
||||
const shouldReversePhaseCompletion = this.task === 'reverse-phase-complete-task';
|
||||
const providerInfo = { provider: undefined, model: undefined };
|
||||
this.emit('movement:start', step, 1, 'movement instruction', providerInfo);
|
||||
this.emit('phase:start', step, 1, 'execute', 'phase prompt');
|
||||
this.emit('phase:complete', step, 1, 'execute', 'phase response', 'done');
|
||||
if (shouldReversePhaseCompletion) {
|
||||
this.emit('phase:start', step, 1, 'execute', 'phase prompt first', {
|
||||
systemPrompt: '../agents/coder.md',
|
||||
userInstruction: 'phase prompt first',
|
||||
}, 'implement:1:1:1', 1);
|
||||
this.emit('phase:start', step, 1, 'execute', 'phase prompt second', {
|
||||
systemPrompt: '../agents/coder.md',
|
||||
userInstruction: 'phase prompt second',
|
||||
}, 'implement:1:1:2', 1);
|
||||
} else {
|
||||
this.emit('phase:start', step, 1, 'execute', shouldEmitSensitive ? 'token=plain-secret' : 'phase prompt', {
|
||||
systemPrompt: shouldEmitSensitive ? 'Authorization: Bearer super-secret-token' : '../agents/coder.md',
|
||||
userInstruction: shouldEmitSensitive ? 'api_key=plain-secret' : 'phase prompt',
|
||||
});
|
||||
}
|
||||
this.emit('phase:start', step, 3, 'judge', 'phase3 prompt', {
|
||||
systemPrompt: 'conductor',
|
||||
userInstruction: 'phase3 prompt',
|
||||
});
|
||||
this.emit('phase:judge_stage', step, 3, 'judge', {
|
||||
stage: 1,
|
||||
method: 'structured_output',
|
||||
status: 'done',
|
||||
instruction: 'judge stage prompt',
|
||||
response: 'judge stage response',
|
||||
});
|
||||
this.emit('phase:complete', step, 3, 'judge', '[IMPLEMENT:1]', 'done');
|
||||
if (shouldAbortBeforeComplete) {
|
||||
this.emit('piece:abort', { status: 'aborted', iteration: 1 }, 'user_interrupted');
|
||||
return { status: 'aborted', iteration: 1 };
|
||||
}
|
||||
if (shouldReversePhaseCompletion) {
|
||||
this.emit('phase:complete', step, 1, 'execute', 'phase response second', 'done', undefined, 'implement:1:1:2', 1);
|
||||
this.emit('phase:complete', step, 1, 'execute', 'phase response first', 'done', undefined, 'implement:1:1:1', 1);
|
||||
} else {
|
||||
this.emit('phase:complete', step, 1, 'execute', shouldEmitSensitive ? 'password=plain-secret' : 'phase response', 'done');
|
||||
}
|
||||
if (shouldDuplicatePhase) {
|
||||
this.emit('phase:start', step, 1, 'execute', 'phase prompt second', {
|
||||
systemPrompt: '../agents/coder.md',
|
||||
userInstruction: 'phase prompt second',
|
||||
});
|
||||
this.emit('phase:complete', step, 1, 'execute', 'phase response second', 'done');
|
||||
}
|
||||
this.emit(
|
||||
'movement:complete',
|
||||
step,
|
||||
@ -154,6 +199,7 @@ vi.mock('../shared/utils/index.js', () => ({
|
||||
preventSleep: vi.fn(),
|
||||
isDebugEnabled: mockIsDebugEnabled,
|
||||
writePromptLog: mockWritePromptLog,
|
||||
getDebugPromptsLogFile: vi.fn().mockReturnValue(null),
|
||||
generateReportDir: vi.fn().mockReturnValue('test-report-dir'),
|
||||
isValidReportDirName: vi.fn().mockImplementation((value: string) => /^[a-z0-9]+(?:-[a-z0-9]+)*$/.test(value)),
|
||||
}));
|
||||
@ -173,6 +219,7 @@ vi.mock('../shared/exitCodes.js', () => ({
|
||||
|
||||
import { executePiece } from '../features/tasks/execute/pieceExecution.js';
|
||||
import { ensureDir, writeFileAtomic } from '../infra/config/index.js';
|
||||
import { appendNdjsonLine } from '../infra/fs/index.js';
|
||||
|
||||
describe('executePiece debug prompts logging', () => {
|
||||
beforeEach(() => {
|
||||
@ -204,15 +251,16 @@ describe('executePiece debug prompts logging', () => {
|
||||
projectCwd: '/tmp/project',
|
||||
});
|
||||
|
||||
expect(mockWritePromptLog).toHaveBeenCalledTimes(1);
|
||||
const record = mockWritePromptLog.mock.calls[0]?.[0] as {
|
||||
expect(mockWritePromptLog).toHaveBeenCalledTimes(2);
|
||||
const records = mockWritePromptLog.mock.calls.map((call) => call[0]) as Array<{
|
||||
movement: string;
|
||||
phase: number;
|
||||
iteration: number;
|
||||
prompt: string;
|
||||
response: string;
|
||||
timestamp: string;
|
||||
};
|
||||
}>;
|
||||
const record = records.find((entry) => entry.phase === 1)!;
|
||||
expect(record.movement).toBe('implement');
|
||||
expect(record.phase).toBe(1);
|
||||
expect(record.iteration).toBe(1);
|
||||
@ -221,6 +269,54 @@ describe('executePiece debug prompts logging', () => {
|
||||
expect(record.timestamp).toMatch(/^\d{4}-\d{2}-\d{2}T/);
|
||||
});
|
||||
|
||||
it('should separate system prompt and user instruction in debug prompt records', async () => {
|
||||
mockIsDebugEnabled.mockReturnValue(true);
|
||||
|
||||
await executePiece(makeConfig(), 'task', '/tmp/project', {
|
||||
projectCwd: '/tmp/project',
|
||||
});
|
||||
|
||||
expect(mockWritePromptLog).toHaveBeenCalledTimes(2);
|
||||
const records = mockWritePromptLog.mock.calls.map((call) => call[0]) as Array<Record<string, unknown> & { phase: number }>;
|
||||
const record = records.find((entry) => entry.phase === 1)!;
|
||||
expect(record).toHaveProperty('systemPrompt');
|
||||
expect(record).toHaveProperty('userInstruction');
|
||||
expect(record.systemPrompt).toBe('../agents/coder.md');
|
||||
expect(record.userInstruction).toBe('phase prompt');
|
||||
});
|
||||
|
||||
it('should include phase and judge stage details in trace markdown', async () => {
|
||||
await executePiece(makeConfig(), 'task', '/tmp/project', {
|
||||
projectCwd: '/tmp/project',
|
||||
reportDirName: 'test-report-dir',
|
||||
});
|
||||
|
||||
const traceCall = vi.mocked(writeFileAtomic).mock.calls.find(
|
||||
(call) => String(call[0]).endsWith('/trace.md')
|
||||
);
|
||||
expect(traceCall).toBeDefined();
|
||||
const traceContent = String(traceCall?.[1]);
|
||||
expect(traceContent).toContain('## Iteration 1: implement');
|
||||
expect(traceContent).toContain('### Phase 1: execute');
|
||||
expect(traceContent).toContain('#### Judgment Stages');
|
||||
expect(traceContent).toContain('Stage 1 (structured_output): status=done');
|
||||
});
|
||||
|
||||
it('should render trace markdown even when piece aborts before movement completion', async () => {
|
||||
await executePiece(makeConfig(), 'abort-before-complete-task', '/tmp/project', {
|
||||
projectCwd: '/tmp/project',
|
||||
reportDirName: 'test-report-dir',
|
||||
});
|
||||
|
||||
const traceCall = vi.mocked(writeFileAtomic).mock.calls.find(
|
||||
(call) => String(call[0]).endsWith('/trace.md')
|
||||
);
|
||||
expect(traceCall).toBeDefined();
|
||||
const traceContent = String(traceCall?.[1]);
|
||||
expect(traceContent).toContain('- Status: ❌ aborted');
|
||||
expect(traceContent).toContain('- Movement Status: in_progress');
|
||||
});
|
||||
|
||||
it('should not write prompt log record when debug is disabled', async () => {
|
||||
mockIsDebugEnabled.mockReturnValue(false);
|
||||
|
||||
@ -231,6 +327,24 @@ describe('executePiece debug prompts logging', () => {
|
||||
expect(mockWritePromptLog).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('should handle repeated phase starts for same movement and phase without missing debug prompt', async () => {
|
||||
mockIsDebugEnabled.mockReturnValue(true);
|
||||
|
||||
await executePiece(makeConfig(), 'duplicate-phase-task', '/tmp/project', {
|
||||
projectCwd: '/tmp/project',
|
||||
});
|
||||
|
||||
expect(mockWritePromptLog).toHaveBeenCalledTimes(3);
|
||||
const records = mockWritePromptLog.mock.calls.map((call) => call[0]) as Array<{
|
||||
phase: number;
|
||||
response: string;
|
||||
}>;
|
||||
const phase1Responses = records
|
||||
.filter((record) => record.phase === 1)
|
||||
.map((record) => record.response);
|
||||
expect(phase1Responses).toEqual(['phase response', 'phase response second']);
|
||||
});
|
||||
|
||||
it('should update movement prefix context on each movement:start event', async () => {
|
||||
const stdoutSpy = vi.spyOn(process.stdout, 'write').mockImplementation(() => true);
|
||||
|
||||
@ -277,11 +391,13 @@ describe('executePiece debug prompts logging', () => {
|
||||
reportDirName: 'test-report-dir',
|
||||
});
|
||||
|
||||
const calls = vi.mocked(writeFileAtomic).mock.calls;
|
||||
expect(calls).toHaveLength(2);
|
||||
const metaCalls = vi.mocked(writeFileAtomic).mock.calls.filter(
|
||||
(call) => String(call[0]).endsWith('/meta.json')
|
||||
);
|
||||
expect(metaCalls).toHaveLength(2);
|
||||
|
||||
const firstMeta = JSON.parse(String(calls[0]![1])) as { status: string; endTime?: string };
|
||||
const secondMeta = JSON.parse(String(calls[1]![1])) as { status: string; endTime?: string };
|
||||
const firstMeta = JSON.parse(String(metaCalls[0]![1])) as { status: string; endTime?: string };
|
||||
const secondMeta = JSON.parse(String(metaCalls[1]![1])) as { status: string; endTime?: string };
|
||||
expect(firstMeta.status).toBe('running');
|
||||
expect(firstMeta.endTime).toBeUndefined();
|
||||
expect(secondMeta.status).toBe('completed');
|
||||
@ -294,11 +410,13 @@ describe('executePiece debug prompts logging', () => {
|
||||
reportDirName: 'test-report-dir',
|
||||
});
|
||||
|
||||
const calls = vi.mocked(writeFileAtomic).mock.calls;
|
||||
expect(calls).toHaveLength(2);
|
||||
const metaCalls = vi.mocked(writeFileAtomic).mock.calls.filter(
|
||||
(call) => String(call[0]).endsWith('/meta.json')
|
||||
);
|
||||
expect(metaCalls).toHaveLength(2);
|
||||
|
||||
const firstMeta = JSON.parse(String(calls[0]![1])) as { status: string; endTime?: string };
|
||||
const secondMeta = JSON.parse(String(calls[1]![1])) as { status: string; endTime?: string };
|
||||
const firstMeta = JSON.parse(String(metaCalls[0]![1])) as { status: string; endTime?: string };
|
||||
const secondMeta = JSON.parse(String(metaCalls[1]![1])) as { status: string; endTime?: string };
|
||||
expect(firstMeta.status).toBe('running');
|
||||
expect(firstMeta.endTime).toBeUndefined();
|
||||
expect(secondMeta.status).toBe('aborted');
|
||||
@ -313,14 +431,83 @@ describe('executePiece debug prompts logging', () => {
|
||||
})
|
||||
).rejects.toThrow('mock constructor failure');
|
||||
|
||||
const calls = vi.mocked(writeFileAtomic).mock.calls;
|
||||
expect(calls).toHaveLength(2);
|
||||
const metaCalls = vi.mocked(writeFileAtomic).mock.calls.filter(
|
||||
(call) => String(call[0]).endsWith('/meta.json')
|
||||
);
|
||||
expect(metaCalls).toHaveLength(2);
|
||||
|
||||
const firstMeta = JSON.parse(String(calls[0]![1])) as { status: string; endTime?: string };
|
||||
const secondMeta = JSON.parse(String(calls[1]![1])) as { status: string; endTime?: string };
|
||||
const firstMeta = JSON.parse(String(metaCalls[0]![1])) as { status: string; endTime?: string };
|
||||
const secondMeta = JSON.parse(String(metaCalls[1]![1])) as { status: string; endTime?: string };
|
||||
expect(firstMeta.status).toBe('running');
|
||||
expect(firstMeta.endTime).toBeUndefined();
|
||||
expect(secondMeta.status).toBe('aborted');
|
||||
expect(secondMeta.endTime).toMatch(/^\d{4}-\d{2}-\d{2}T/);
|
||||
});
|
||||
|
||||
it('should write trace.md on piece completion', async () => {
|
||||
await executePiece(makeConfig(), 'task', '/tmp/project', {
|
||||
projectCwd: '/tmp/project',
|
||||
reportDirName: 'test-report-dir',
|
||||
});
|
||||
|
||||
const traceCalls = vi.mocked(writeFileAtomic).mock.calls.filter(
|
||||
(call) => String(call[0]).endsWith('/trace.md')
|
||||
);
|
||||
expect(traceCalls.length).toBeGreaterThan(0);
|
||||
});
|
||||
|
||||
it('should write trace.md on piece abort', async () => {
|
||||
await executePiece(makeConfig(), 'abort-task', '/tmp/project', {
|
||||
projectCwd: '/tmp/project',
|
||||
reportDirName: 'test-report-dir',
|
||||
});
|
||||
|
||||
const traceCalls = vi.mocked(writeFileAtomic).mock.calls.filter(
|
||||
(call) => String(call[0]).endsWith('/trace.md')
|
||||
);
|
||||
expect(traceCalls.length).toBeGreaterThan(0);
|
||||
});
|
||||
|
||||
it('should sanitize sensitive fields before writing session NDJSON when trace mode is default', async () => {
|
||||
await executePiece(makeConfig(), 'token=plain-secret', '/tmp/project', {
|
||||
projectCwd: '/tmp/project',
|
||||
reportDirName: 'test-report-dir',
|
||||
interactiveMetadata: {
|
||||
confirmed: true,
|
||||
task: 'api_key=plain-secret',
|
||||
},
|
||||
});
|
||||
await executePiece(makeConfig(), 'sensitive-content-task', '/tmp/project', {
|
||||
projectCwd: '/tmp/project',
|
||||
reportDirName: 'test-report-dir-2',
|
||||
});
|
||||
|
||||
const records = vi.mocked(appendNdjsonLine).mock.calls.map((call) => call[1]);
|
||||
const recordText = JSON.stringify(records);
|
||||
expect(recordText).toContain('[REDACTED]');
|
||||
expect(recordText).not.toContain('plain-secret');
|
||||
expect(recordText).not.toContain('super-secret-token');
|
||||
});
|
||||
|
||||
it('should keep phaseExecutionId bindings consistent in trace when completions arrive in reverse order', async () => {
|
||||
await executePiece(makeConfig(), 'reverse-phase-complete-task', '/tmp/project', {
|
||||
projectCwd: '/tmp/project',
|
||||
reportDirName: 'test-report-dir',
|
||||
});
|
||||
|
||||
const traceCall = vi.mocked(writeFileAtomic).mock.calls.find(
|
||||
(call) => String(call[0]).endsWith('/trace.md')
|
||||
);
|
||||
expect(traceCall).toBeDefined();
|
||||
const traceContent = String(traceCall?.[1]);
|
||||
const firstPromptIndex = traceContent.indexOf('phase prompt first');
|
||||
const firstResponseIndex = traceContent.indexOf('phase response first');
|
||||
const secondPromptIndex = traceContent.indexOf('phase prompt second');
|
||||
const secondResponseIndex = traceContent.indexOf('phase response second');
|
||||
|
||||
expect(firstPromptIndex).toBeGreaterThan(-1);
|
||||
expect(firstResponseIndex).toBeGreaterThan(firstPromptIndex);
|
||||
expect(secondPromptIndex).toBeGreaterThan(firstResponseIndex);
|
||||
expect(secondResponseIndex).toBeGreaterThan(secondPromptIndex);
|
||||
});
|
||||
});
|
||||
|
||||
@ -136,6 +136,7 @@ vi.mock('../shared/utils/index.js', () => ({
|
||||
preventSleep: vi.fn(),
|
||||
isDebugEnabled: vi.fn().mockReturnValue(false),
|
||||
writePromptLog: vi.fn(),
|
||||
getDebugPromptsLogFile: vi.fn().mockReturnValue(null),
|
||||
generateReportDir: vi.fn().mockReturnValue('test-report-dir'),
|
||||
isValidReportDirName: vi.fn().mockReturnValue(true),
|
||||
playWarningSound: vi.fn(),
|
||||
|
||||
@ -10,6 +10,7 @@ vi.mock('../agents/runner.js', () => ({
|
||||
}));
|
||||
|
||||
import { runAgent } from '../agents/runner.js';
|
||||
import type { AgentResponse } from '../core/models/types.js';
|
||||
|
||||
function createStep(fileName: string): PieceMovement {
|
||||
return {
|
||||
@ -50,6 +51,19 @@ function createContext(reportDir: string, lastResponse = 'Phase 1 result'): Phas
|
||||
};
|
||||
}
|
||||
|
||||
function queueRunAgentResponses(responses: AgentResponse[]): void {
|
||||
const runAgentMock = vi.mocked(runAgent);
|
||||
for (const response of responses) {
|
||||
runAgentMock.mockImplementationOnce(async (persona, task, options) => {
|
||||
options?.onPromptResolved?.({
|
||||
systemPrompt: typeof persona === 'string' ? persona : '',
|
||||
userInstruction: task,
|
||||
});
|
||||
return response;
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
describe('runReportPhase retry with new session', () => {
|
||||
let tmpRoot: string;
|
||||
|
||||
@ -69,22 +83,23 @@ describe('runReportPhase retry with new session', () => {
|
||||
const reportDir = join(tmpRoot, '.takt', 'runs', 'sample-run', 'reports');
|
||||
const step = createStep('02-coder.md');
|
||||
const ctx = createContext(reportDir, 'Implemented feature X');
|
||||
const runAgentMock = vi.mocked(runAgent);
|
||||
runAgentMock
|
||||
.mockResolvedValueOnce({
|
||||
queueRunAgentResponses([
|
||||
{
|
||||
persona: 'coder',
|
||||
status: 'done',
|
||||
content: ' ',
|
||||
timestamp: new Date('2026-02-11T00:00:00Z'),
|
||||
sessionId: 'session-resume-2',
|
||||
})
|
||||
.mockResolvedValueOnce({
|
||||
},
|
||||
{
|
||||
persona: 'coder',
|
||||
status: 'done',
|
||||
content: '# Report\nRecovered output',
|
||||
timestamp: new Date('2026-02-11T00:00:01Z'),
|
||||
sessionId: 'session-fresh-1',
|
||||
});
|
||||
},
|
||||
]);
|
||||
const runAgentMock = vi.mocked(runAgent);
|
||||
|
||||
// When
|
||||
await runReportPhase(step, 1, ctx);
|
||||
@ -107,21 +122,22 @@ describe('runReportPhase retry with new session', () => {
|
||||
const reportDir = join(tmpRoot, '.takt', 'runs', 'sample-run', 'reports');
|
||||
const step = createStep('03-review.md');
|
||||
const ctx = createContext(reportDir);
|
||||
const runAgentMock = vi.mocked(runAgent);
|
||||
runAgentMock
|
||||
.mockResolvedValueOnce({
|
||||
queueRunAgentResponses([
|
||||
{
|
||||
persona: 'coder',
|
||||
status: 'error',
|
||||
content: 'Tool use is not allowed in this phase',
|
||||
timestamp: new Date('2026-02-11T00:01:00Z'),
|
||||
error: 'Tool use is not allowed in this phase',
|
||||
})
|
||||
.mockResolvedValueOnce({
|
||||
},
|
||||
{
|
||||
persona: 'coder',
|
||||
status: 'done',
|
||||
content: 'Recovered report',
|
||||
timestamp: new Date('2026-02-11T00:01:01Z'),
|
||||
});
|
||||
},
|
||||
]);
|
||||
const runAgentMock = vi.mocked(runAgent);
|
||||
|
||||
// When
|
||||
await runReportPhase(step, 1, ctx);
|
||||
@ -137,20 +153,21 @@ describe('runReportPhase retry with new session', () => {
|
||||
const reportDir = join(tmpRoot, '.takt', 'runs', 'sample-run', 'reports');
|
||||
const step = createStep('04-qa.md');
|
||||
const ctx = createContext(reportDir);
|
||||
const runAgentMock = vi.mocked(runAgent);
|
||||
runAgentMock
|
||||
.mockResolvedValueOnce({
|
||||
queueRunAgentResponses([
|
||||
{
|
||||
persona: 'coder',
|
||||
status: 'done',
|
||||
content: ' ',
|
||||
timestamp: new Date('2026-02-11T00:02:00Z'),
|
||||
})
|
||||
.mockResolvedValueOnce({
|
||||
},
|
||||
{
|
||||
persona: 'coder',
|
||||
status: 'done',
|
||||
content: '\n\n',
|
||||
timestamp: new Date('2026-02-11T00:02:01Z'),
|
||||
});
|
||||
},
|
||||
]);
|
||||
const runAgentMock = vi.mocked(runAgent);
|
||||
|
||||
// When / Then
|
||||
await expect(runReportPhase(step, 1, ctx)).rejects.toThrow('Report phase failed for 04-qa.md: Report output is empty');
|
||||
@ -162,14 +179,14 @@ describe('runReportPhase retry with new session', () => {
|
||||
const reportDir = join(tmpRoot, '.takt', 'runs', 'sample-run', 'reports');
|
||||
const step = createStep('05-ok.md');
|
||||
const ctx = createContext(reportDir);
|
||||
const runAgentMock = vi.mocked(runAgent);
|
||||
runAgentMock.mockResolvedValueOnce({
|
||||
queueRunAgentResponses([{
|
||||
persona: 'coder',
|
||||
status: 'done',
|
||||
content: 'Single-pass success',
|
||||
timestamp: new Date('2026-02-11T00:03:00Z'),
|
||||
sessionId: 'session-resume-2',
|
||||
});
|
||||
}]);
|
||||
const runAgentMock = vi.mocked(runAgent);
|
||||
|
||||
// When
|
||||
await runReportPhase(step, 1, ctx);
|
||||
@ -185,13 +202,13 @@ describe('runReportPhase retry with new session', () => {
|
||||
const reportDir = join(tmpRoot, '.takt', 'runs', 'sample-run', 'reports');
|
||||
const step = createStep('06-blocked.md');
|
||||
const ctx = createContext(reportDir);
|
||||
const runAgentMock = vi.mocked(runAgent);
|
||||
runAgentMock.mockResolvedValueOnce({
|
||||
queueRunAgentResponses([{
|
||||
persona: 'coder',
|
||||
status: 'blocked',
|
||||
content: 'Need permission',
|
||||
timestamp: new Date('2026-02-11T00:04:00Z'),
|
||||
});
|
||||
}]);
|
||||
const runAgentMock = vi.mocked(runAgent);
|
||||
|
||||
// When
|
||||
const result = await runReportPhase(step, 1, ctx);
|
||||
|
||||
124
src/__tests__/status-judgment-phase.test.ts
Normal file
124
src/__tests__/status-judgment-phase.test.ts
Normal file
@ -0,0 +1,124 @@
|
||||
import { describe, it, expect, vi, beforeEach } from 'vitest';
|
||||
import type { PieceMovement } from '../core/models/types.js';
|
||||
import { runStatusJudgmentPhase } from '../core/piece/status-judgment-phase.js';
|
||||
|
||||
const { mockJudgeStatus } = vi.hoisted(() => ({
|
||||
mockJudgeStatus: vi.fn(),
|
||||
}));
|
||||
|
||||
vi.mock('../agents/agent-usecases.js', () => ({
|
||||
judgeStatus: mockJudgeStatus,
|
||||
}));
|
||||
|
||||
describe('runStatusJudgmentPhase', () => {
|
||||
beforeEach(() => {
|
||||
vi.clearAllMocks();
|
||||
});
|
||||
|
||||
it('should pass judge stage callbacks through PhaseRunnerContext', async () => {
|
||||
mockJudgeStatus.mockImplementation(
|
||||
async (_structured: string, _tag: string, _rules: unknown[], options: { onJudgeStage?: (entry: {
|
||||
stage: 1 | 2 | 3;
|
||||
method: 'structured_output' | 'phase3_tag' | 'ai_judge';
|
||||
status: 'done' | 'error' | 'skipped';
|
||||
instruction: string;
|
||||
response: string;
|
||||
}) => void; onStructuredPromptResolved?: (promptParts: { systemPrompt: string; userInstruction: string }) => void }) => {
|
||||
options.onStructuredPromptResolved?.({
|
||||
systemPrompt: 'conductor-system',
|
||||
userInstruction: 'structured prompt',
|
||||
});
|
||||
options.onJudgeStage?.({
|
||||
stage: 1,
|
||||
method: 'structured_output',
|
||||
status: 'done',
|
||||
instruction: 'structured prompt',
|
||||
response: '{"step":2}',
|
||||
});
|
||||
return { ruleIndex: 1, method: 'structured_output' as const };
|
||||
},
|
||||
);
|
||||
|
||||
const step: PieceMovement = {
|
||||
name: 'review',
|
||||
persona: 'reviewer',
|
||||
personaDisplayName: 'reviewer',
|
||||
instructionTemplate: 'Review',
|
||||
passPreviousResponse: true,
|
||||
rules: [
|
||||
{ condition: 'needs_fix', next: 'fix' },
|
||||
{ condition: 'approved', next: 'COMPLETE' },
|
||||
],
|
||||
};
|
||||
const onPhaseStart = vi.fn();
|
||||
const onPhaseComplete = vi.fn();
|
||||
const onJudgeStage = vi.fn();
|
||||
|
||||
const result = await runStatusJudgmentPhase(step, {
|
||||
cwd: '/tmp/project',
|
||||
reportDir: '/tmp/project/.takt/reports',
|
||||
lastResponse: 'response body',
|
||||
iteration: 4,
|
||||
getSessionId: vi.fn(),
|
||||
buildResumeOptions: vi.fn(),
|
||||
buildNewSessionReportOptions: vi.fn(),
|
||||
updatePersonaSession: vi.fn(),
|
||||
onPhaseStart,
|
||||
onPhaseComplete,
|
||||
onJudgeStage,
|
||||
});
|
||||
|
||||
expect(result).toEqual({
|
||||
tag: '[REVIEW:2]',
|
||||
ruleIndex: 1,
|
||||
method: 'structured_output',
|
||||
});
|
||||
expect(onPhaseStart).toHaveBeenCalledWith(
|
||||
step,
|
||||
3,
|
||||
'judge',
|
||||
expect.any(String),
|
||||
{
|
||||
systemPrompt: 'conductor-system',
|
||||
userInstruction: 'structured prompt',
|
||||
},
|
||||
'review:4:3:1',
|
||||
4,
|
||||
);
|
||||
expect(onJudgeStage).toHaveBeenCalledWith(
|
||||
step,
|
||||
3,
|
||||
'judge',
|
||||
expect.objectContaining({ stage: 1, method: 'structured_output' }),
|
||||
'review:4:3:1',
|
||||
4,
|
||||
);
|
||||
expect(onPhaseComplete).toHaveBeenCalledWith(step, 3, 'judge', '[REVIEW:2]', 'done', undefined, 'review:4:3:1', 4);
|
||||
});
|
||||
|
||||
it('should fail fast when iteration is missing', async () => {
|
||||
mockJudgeStatus.mockResolvedValue({ ruleIndex: 0, method: 'structured_output' });
|
||||
|
||||
const step: PieceMovement = {
|
||||
name: 'review',
|
||||
persona: 'reviewer',
|
||||
personaDisplayName: 'reviewer',
|
||||
instructionTemplate: 'Review',
|
||||
passPreviousResponse: true,
|
||||
rules: [
|
||||
{ condition: 'needs_fix', next: 'fix' },
|
||||
{ condition: 'approved', next: 'COMPLETE' },
|
||||
],
|
||||
};
|
||||
|
||||
await expect(runStatusJudgmentPhase(step, {
|
||||
cwd: '/tmp/project',
|
||||
reportDir: '/tmp/project/.takt/reports',
|
||||
lastResponse: 'response body',
|
||||
getSessionId: vi.fn(),
|
||||
buildResumeOptions: vi.fn(),
|
||||
buildNewSessionReportOptions: vi.fn(),
|
||||
updatePersonaSession: vi.fn(),
|
||||
})).rejects.toThrow('Status judgment requires iteration for movement "review"');
|
||||
});
|
||||
});
|
||||
236
src/__tests__/traceReport.test.ts
Normal file
236
src/__tests__/traceReport.test.ts
Normal file
@ -0,0 +1,236 @@
|
||||
import { describe, it, expect } from 'vitest';
|
||||
import { mkdtempSync, writeFileSync } from 'node:fs';
|
||||
import { join } from 'node:path';
|
||||
import { tmpdir } from 'node:os';
|
||||
import { renderTraceReportMarkdown, renderTraceReportFromLogs } from '../features/tasks/execute/traceReport.js';
|
||||
|
||||
describe('traceReport', () => {
|
||||
it('should render judge stage details and tolerate aborted incomplete movement', () => {
|
||||
const markdown = renderTraceReportMarkdown(
|
||||
{
|
||||
tracePath: '/tmp/trace.md',
|
||||
pieceName: 'test-piece',
|
||||
task: 'test task',
|
||||
runSlug: 'run-1',
|
||||
status: 'aborted',
|
||||
iterations: 1,
|
||||
endTime: '2026-03-04T12:00:00.000Z',
|
||||
reason: 'user_interrupted',
|
||||
},
|
||||
'2026-03-04T11:59:00.000Z',
|
||||
[
|
||||
{
|
||||
step: 'ai_fix',
|
||||
persona: 'coder',
|
||||
iteration: 1,
|
||||
startedAt: '2026-03-04T11:59:01.000Z',
|
||||
phases: [
|
||||
{
|
||||
phaseExecutionId: 'ai_fix:3:1',
|
||||
phase: 3,
|
||||
phaseName: 'judge',
|
||||
instruction: 'judge prompt',
|
||||
systemPrompt: 'conductor',
|
||||
userInstruction: 'judge prompt',
|
||||
startedAt: '2026-03-04T11:59:02.000Z',
|
||||
judgeStages: [
|
||||
{
|
||||
stage: 1,
|
||||
method: 'structured_output',
|
||||
status: 'error',
|
||||
instruction: 'stage1 prompt',
|
||||
response: '',
|
||||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
);
|
||||
|
||||
expect(markdown).toContain('- Status: ❌ aborted');
|
||||
expect(markdown).toContain('- Movement Status: in_progress');
|
||||
expect(markdown).toContain('## Iteration 1: ai_fix (persona: coder)');
|
||||
expect(markdown).toContain('<details><summary>System Prompt</summary>');
|
||||
expect(markdown).toContain('<details><summary>User Instruction</summary>');
|
||||
expect(markdown).toContain('- Stage 1 (structured_output)');
|
||||
expect(markdown).toContain('<details><summary>Stage Instruction</summary>');
|
||||
expect(markdown).toContain('<details><summary>Stage Response</summary>');
|
||||
});
|
||||
|
||||
it('should render movements in timestamp order from NDJSON logs', () => {
|
||||
const dir = mkdtempSync(join(tmpdir(), 'trace-report-'));
|
||||
const sessionPath = join(dir, 'session.jsonl');
|
||||
const promptPath = join(dir, 'prompts.jsonl');
|
||||
writeFileSync(sessionPath, [
|
||||
JSON.stringify({ type: 'piece_start', task: 'task', pieceName: 'piece', startTime: '2026-03-04T11:59:00.000Z' }),
|
||||
JSON.stringify({ type: 'step_start', step: 'reviewers', persona: 'reviewer', iteration: 2, timestamp: '2026-03-04T11:59:05.000Z' }),
|
||||
JSON.stringify({ type: 'step_start', step: 'plan', persona: 'planner', iteration: 1, timestamp: '2026-03-04T11:59:01.000Z' }),
|
||||
JSON.stringify({ type: 'phase_start', step: 'reviewers', iteration: 2, phase: 1, phaseName: 'execute', phaseExecutionId: 'reviewers:2:1:1', instruction: 'r', timestamp: '2026-03-04T11:59:06.000Z' }),
|
||||
JSON.stringify({ type: 'phase_complete', step: 'reviewers', iteration: 2, phase: 1, phaseName: 'execute', phaseExecutionId: 'reviewers:2:1:1', status: 'done', content: 'r-ok', timestamp: '2026-03-04T11:59:07.000Z' }),
|
||||
JSON.stringify({ type: 'step_complete', step: 'reviewers', persona: 'reviewer', status: 'done', content: 'r-ok', instruction: 'inst', timestamp: '2026-03-04T11:59:08.000Z' }),
|
||||
JSON.stringify({ type: 'phase_start', step: 'plan', iteration: 1, phase: 1, phaseName: 'execute', phaseExecutionId: 'plan:1:1:1', instruction: 'p', timestamp: '2026-03-04T11:59:02.000Z' }),
|
||||
JSON.stringify({ type: 'phase_complete', step: 'plan', iteration: 1, phase: 1, phaseName: 'execute', phaseExecutionId: 'plan:1:1:1', status: 'done', content: 'p-ok', timestamp: '2026-03-04T11:59:03.000Z' }),
|
||||
JSON.stringify({ type: 'step_complete', step: 'plan', persona: 'planner', status: 'done', content: 'p-ok', instruction: 'inst', timestamp: '2026-03-04T11:59:04.000Z' }),
|
||||
JSON.stringify({ type: 'piece_complete', iterations: 2, endTime: '2026-03-04T12:00:00.000Z' }),
|
||||
'',
|
||||
].join('\n'));
|
||||
writeFileSync(promptPath, [
|
||||
JSON.stringify({ movement: 'plan', phase: 1, iteration: 1, phaseExecutionId: 'plan:1:1:1', prompt: 'p', systemPrompt: 'ps', userInstruction: 'pu', response: 'p-ok', timestamp: '2026-03-04T11:59:03.000Z' }),
|
||||
JSON.stringify({ movement: 'reviewers', phase: 1, iteration: 2, phaseExecutionId: 'reviewers:2:1:1', prompt: 'r', systemPrompt: 'rs', userInstruction: 'ru', response: 'r-ok', timestamp: '2026-03-04T11:59:07.000Z' }),
|
||||
'',
|
||||
].join('\n'));
|
||||
|
||||
const markdown = renderTraceReportFromLogs(
|
||||
{
|
||||
tracePath: join(dir, 'trace.md'),
|
||||
pieceName: 'piece',
|
||||
task: 'task',
|
||||
runSlug: 'run-1',
|
||||
status: 'completed',
|
||||
iterations: 2,
|
||||
endTime: '2026-03-04T12:00:00.000Z',
|
||||
},
|
||||
sessionPath,
|
||||
promptPath,
|
||||
'full',
|
||||
);
|
||||
|
||||
expect(markdown).toBeDefined();
|
||||
const planIndex = markdown!.indexOf('## Iteration 1: plan');
|
||||
const reviewersIndex = markdown!.indexOf('## Iteration 2: reviewers');
|
||||
expect(planIndex).toBeGreaterThan(-1);
|
||||
expect(reviewersIndex).toBeGreaterThan(planIndex);
|
||||
});
|
||||
|
||||
it('should fail fast when completed trace has missing phase status', () => {
|
||||
expect(() => renderTraceReportMarkdown(
|
||||
{
|
||||
tracePath: '/tmp/trace.md',
|
||||
pieceName: 'test-piece',
|
||||
task: 'test task',
|
||||
runSlug: 'run-1',
|
||||
status: 'completed',
|
||||
iterations: 1,
|
||||
endTime: '2026-03-04T12:00:00.000Z',
|
||||
},
|
||||
'2026-03-04T11:59:00.000Z',
|
||||
[
|
||||
{
|
||||
step: 'plan',
|
||||
persona: 'planner',
|
||||
iteration: 1,
|
||||
startedAt: '2026-03-04T11:59:01.000Z',
|
||||
phases: [
|
||||
{
|
||||
phaseExecutionId: 'plan:1:1',
|
||||
phase: 1,
|
||||
phaseName: 'execute',
|
||||
instruction: 'instr',
|
||||
systemPrompt: 'system',
|
||||
userInstruction: 'user',
|
||||
startedAt: '2026-03-04T11:59:02.000Z',
|
||||
completedAt: '2026-03-04T11:59:03.000Z',
|
||||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
)).toThrow('missing status');
|
||||
});
|
||||
|
||||
it('should mask sensitive task and reason in redacted mode', () => {
|
||||
const dir = mkdtempSync(join(tmpdir(), 'trace-report-redact-'));
|
||||
const sessionPath = join(dir, 'session.jsonl');
|
||||
writeFileSync(sessionPath, [
|
||||
JSON.stringify({ type: 'piece_start', task: 'token=topsecret', pieceName: 'piece', startTime: '2026-03-04T11:59:00.000Z' }),
|
||||
JSON.stringify({ type: 'step_start', step: 'plan', persona: 'planner', iteration: 1, timestamp: '2026-03-04T11:59:01.000Z' }),
|
||||
JSON.stringify({ type: 'phase_start', step: 'plan', iteration: 1, phase: 1, phaseName: 'execute', phaseExecutionId: 'plan:1:1:1', instruction: 'api_key=abc123', systemPrompt: 'Authorization: Bearer abc123', userInstruction: 'user token=abc123', timestamp: '2026-03-04T11:59:02.000Z' }),
|
||||
JSON.stringify({ type: 'phase_complete', step: 'plan', iteration: 1, phase: 1, phaseName: 'execute', phaseExecutionId: 'plan:1:1:1', status: 'done', content: 'password=hunter2', timestamp: '2026-03-04T11:59:03.000Z' }),
|
||||
JSON.stringify({ type: 'step_complete', step: 'plan', persona: 'planner', status: 'done', content: 'secret=my-secret', instruction: 'inst', timestamp: '2026-03-04T11:59:04.000Z' }),
|
||||
'',
|
||||
].join('\n'));
|
||||
|
||||
const markdown = renderTraceReportFromLogs(
|
||||
{
|
||||
tracePath: join(dir, 'trace.md'),
|
||||
pieceName: 'piece',
|
||||
task: 'token=topsecret',
|
||||
runSlug: 'run-1',
|
||||
status: 'aborted',
|
||||
iterations: 1,
|
||||
endTime: '2026-03-04T12:00:00.000Z',
|
||||
reason: 'api_key=super-secret',
|
||||
},
|
||||
sessionPath,
|
||||
undefined,
|
||||
'redacted',
|
||||
);
|
||||
|
||||
expect(markdown).toContain('token=[REDACTED]');
|
||||
expect(markdown).toContain('api_key=[REDACTED]');
|
||||
expect(markdown).not.toContain('topsecret');
|
||||
expect(markdown).not.toContain('super-secret');
|
||||
expect(markdown).not.toContain('hunter2');
|
||||
});
|
||||
|
||||
it('should mask quoted JSON secrets and common token formats in redacted mode', () => {
|
||||
const dir = mkdtempSync(join(tmpdir(), 'trace-report-redact-json-'));
|
||||
const sessionPath = join(dir, 'session.jsonl');
|
||||
writeFileSync(sessionPath, [
|
||||
JSON.stringify({ type: 'piece_start', task: '{"api_key":"abc123"}', pieceName: 'piece', startTime: '2026-03-04T11:59:00.000Z' }),
|
||||
JSON.stringify({ type: 'step_start', step: 'plan', persona: 'planner', iteration: 1, timestamp: '2026-03-04T11:59:01.000Z' }),
|
||||
JSON.stringify({ type: 'phase_start', step: 'plan', iteration: 1, phase: 1, phaseName: 'execute', phaseExecutionId: 'plan:1:1:1', instruction: '{"token":"xyz987"}', systemPrompt: 'Authorization: Bearer sk-abcdef12345678', userInstruction: 'ghp_abcdef1234567890', timestamp: '2026-03-04T11:59:02.000Z' }),
|
||||
JSON.stringify({ type: 'phase_complete', step: 'plan', iteration: 1, phase: 1, phaseName: 'execute', phaseExecutionId: 'plan:1:1:1', status: 'done', content: 'xoxb-1234abcd-5678efgh', timestamp: '2026-03-04T11:59:03.000Z' }),
|
||||
JSON.stringify({ type: 'step_complete', step: 'plan', persona: 'planner', status: 'done', content: '{"password":"plain"}', instruction: 'inst', timestamp: '2026-03-04T11:59:04.000Z' }),
|
||||
'',
|
||||
].join('\n'));
|
||||
|
||||
const markdown = renderTraceReportFromLogs(
|
||||
{
|
||||
tracePath: join(dir, 'trace.md'),
|
||||
pieceName: 'piece',
|
||||
task: '{"api_key":"abc123"}',
|
||||
runSlug: 'run-1',
|
||||
status: 'aborted',
|
||||
iterations: 1,
|
||||
endTime: '2026-03-04T12:00:00.000Z',
|
||||
reason: '{"secret":"plain"}',
|
||||
},
|
||||
sessionPath,
|
||||
undefined,
|
||||
'redacted',
|
||||
);
|
||||
|
||||
expect(markdown).toContain('"api_key":"[REDACTED]"');
|
||||
expect(markdown).toContain('"secret":"[REDACTED]"');
|
||||
expect(markdown).toContain('Authorization: Bearer [REDACTED]');
|
||||
expect(markdown).not.toContain('abc123');
|
||||
expect(markdown).not.toContain('xyz987');
|
||||
expect(markdown).not.toContain('ghp_abcdef1234567890');
|
||||
expect(markdown).not.toContain('xoxb-1234abcd-5678efgh');
|
||||
});
|
||||
|
||||
it('should fold alternating loop iterations into a details block', () => {
|
||||
const markdown = renderTraceReportMarkdown(
|
||||
{
|
||||
tracePath: '/tmp/trace.md',
|
||||
pieceName: 'test-piece',
|
||||
task: 'test task',
|
||||
runSlug: 'run-1',
|
||||
status: 'completed',
|
||||
iterations: 4,
|
||||
endTime: '2026-03-04T12:00:00.000Z',
|
||||
},
|
||||
'2026-03-04T11:59:00.000Z',
|
||||
[
|
||||
{ step: 'reviewers', persona: 'reviewer', iteration: 1, startedAt: '2026-03-04T11:59:01.000Z', phases: [], result: { status: 'done', content: 'ok' } },
|
||||
{ step: 'fix', persona: 'coder', iteration: 2, startedAt: '2026-03-04T11:59:02.000Z', phases: [], result: { status: 'done', content: 'ok' } },
|
||||
{ step: 'reviewers', persona: 'reviewer', iteration: 3, startedAt: '2026-03-04T11:59:03.000Z', phases: [], result: { status: 'done', content: 'ok' } },
|
||||
{ step: 'fix', persona: 'coder', iteration: 4, startedAt: '2026-03-04T11:59:04.000Z', phases: [], result: { status: 'done', content: 'ok' } },
|
||||
],
|
||||
);
|
||||
|
||||
expect(markdown).toContain('reviewers ↔ fix loop');
|
||||
expect(markdown).toContain('<details><summary>Loop details');
|
||||
});
|
||||
});
|
||||
@ -1,172 +1,19 @@
|
||||
import type { AgentResponse, PartDefinition, PieceRule, RuleMatchMethod, Language } from '../core/models/types.js';
|
||||
import { runAgent, type RunAgentOptions, type StreamCallback } from './runner.js';
|
||||
import { detectJudgeIndex, buildJudgePrompt } from './judge-utils.js';
|
||||
import { parseParts } from '../core/piece/engine/task-decomposer.js';
|
||||
import { loadJudgmentSchema, loadEvaluationSchema, loadDecompositionSchema, loadMorePartsSchema } from '../infra/resources/schema-loader.js';
|
||||
import { detectRuleIndex } from '../shared/utils/ruleIndex.js';
|
||||
import { ensureUniquePartIds, parsePartDefinitionEntry } from '../core/piece/part-definition-validator.js';
|
||||
import type { AgentResponse } from '../core/models/types.js';
|
||||
import { runAgent, type RunAgentOptions } from './runner.js';
|
||||
|
||||
export interface JudgeStatusOptions {
|
||||
cwd: string;
|
||||
movementName: string;
|
||||
language?: Language;
|
||||
interactive?: boolean;
|
||||
onStream?: StreamCallback;
|
||||
}
|
||||
|
||||
export interface JudgeStatusResult {
|
||||
ruleIndex: number;
|
||||
method: RuleMatchMethod;
|
||||
}
|
||||
|
||||
export interface EvaluateConditionOptions {
|
||||
cwd: string;
|
||||
}
|
||||
|
||||
export interface DecomposeTaskOptions {
|
||||
cwd: string;
|
||||
persona?: string;
|
||||
personaPath?: string;
|
||||
language?: Language;
|
||||
model?: string;
|
||||
provider?: 'claude' | 'codex' | 'opencode' | 'cursor' | 'copilot' | 'mock';
|
||||
onStream?: StreamCallback;
|
||||
}
|
||||
|
||||
export interface MorePartsResponse {
|
||||
done: boolean;
|
||||
reasoning: string;
|
||||
parts: PartDefinition[];
|
||||
}
|
||||
|
||||
function toPartDefinitions(raw: unknown, maxParts: number): PartDefinition[] {
|
||||
if (!Array.isArray(raw)) {
|
||||
throw new Error('Structured output "parts" must be an array');
|
||||
}
|
||||
if (raw.length === 0) {
|
||||
throw new Error('Structured output "parts" must not be empty');
|
||||
}
|
||||
if (raw.length > maxParts) {
|
||||
throw new Error(`Structured output produced too many parts: ${raw.length} > ${maxParts}`);
|
||||
}
|
||||
|
||||
const parts: PartDefinition[] = raw.map((entry, index) => parsePartDefinitionEntry(entry, index));
|
||||
ensureUniquePartIds(parts);
|
||||
|
||||
return parts;
|
||||
}
|
||||
|
||||
function toMorePartsResponse(raw: unknown, maxAdditionalParts: number): MorePartsResponse {
|
||||
if (typeof raw !== 'object' || raw == null || Array.isArray(raw)) {
|
||||
throw new Error('Structured output must be an object');
|
||||
}
|
||||
|
||||
const payload = raw as Record<string, unknown>;
|
||||
if (typeof payload.done !== 'boolean') {
|
||||
throw new Error('Structured output "done" must be a boolean');
|
||||
}
|
||||
if (typeof payload.reasoning !== 'string') {
|
||||
throw new Error('Structured output "reasoning" must be a string');
|
||||
}
|
||||
if (!Array.isArray(payload.parts)) {
|
||||
throw new Error('Structured output "parts" must be an array');
|
||||
}
|
||||
if (payload.parts.length > maxAdditionalParts) {
|
||||
throw new Error(`Structured output produced too many parts: ${payload.parts.length} > ${maxAdditionalParts}`);
|
||||
}
|
||||
|
||||
const parts: PartDefinition[] = payload.parts.map((entry, index) => parsePartDefinitionEntry(entry, index));
|
||||
ensureUniquePartIds(parts);
|
||||
|
||||
return {
|
||||
done: payload.done,
|
||||
reasoning: payload.reasoning,
|
||||
parts,
|
||||
};
|
||||
}
|
||||
|
||||
function summarizePartContent(content: string): string {
|
||||
const maxLength = 2000;
|
||||
if (content.length <= maxLength) {
|
||||
return content;
|
||||
}
|
||||
return `${content.slice(0, maxLength)}\n...[truncated]`;
|
||||
}
|
||||
|
||||
function buildDecomposePrompt(instruction: string, maxParts: number, language?: Language): string {
|
||||
if (language === 'ja') {
|
||||
return [
|
||||
'以下はタスク分解専用の指示です。タスクを実行せず、分解だけを行ってください。',
|
||||
'- ツールは使用しない',
|
||||
`- パート数は 1 以上 ${maxParts} 以下`,
|
||||
'- パートは互いに独立させる',
|
||||
'',
|
||||
'## 元タスク',
|
||||
instruction,
|
||||
].join('\n');
|
||||
}
|
||||
|
||||
return [
|
||||
'This is decomposition-only planning. Do not execute the task.',
|
||||
'- Do not use any tool',
|
||||
`- Produce between 1 and ${maxParts} independent parts`,
|
||||
'- Keep each part self-contained',
|
||||
'',
|
||||
'## Original Task',
|
||||
instruction,
|
||||
].join('\n');
|
||||
}
|
||||
|
||||
function buildMorePartsPrompt(
|
||||
originalInstruction: string,
|
||||
allResults: Array<{ id: string; title: string; status: string; content: string }>,
|
||||
existingIds: string[],
|
||||
maxAdditionalParts: number,
|
||||
language?: Language,
|
||||
): string {
|
||||
const resultBlock = allResults.map((result) => [
|
||||
`### ${result.id}: ${result.title} (${result.status})`,
|
||||
summarizePartContent(result.content),
|
||||
].join('\n')).join('\n\n');
|
||||
|
||||
if (language === 'ja') {
|
||||
return [
|
||||
'以下の実行結果を見て、追加のサブタスクが必要か判断してください。',
|
||||
'- ツールは使用しない',
|
||||
'',
|
||||
'## 元タスク',
|
||||
originalInstruction,
|
||||
'',
|
||||
'## 完了済みパート',
|
||||
resultBlock || '(なし)',
|
||||
'',
|
||||
'## 判断ルール',
|
||||
'- 追加作業が不要なら done=true にする',
|
||||
'- 追加作業が必要なら parts に新しいパートを入れる',
|
||||
'- 不足が複数ある場合は、可能な限り一括で複数パートを返す',
|
||||
`- 既存IDは再利用しない: ${existingIds.join(', ') || '(なし)'}`,
|
||||
`- 追加できる最大数: ${maxAdditionalParts}`,
|
||||
].join('\n');
|
||||
}
|
||||
|
||||
return [
|
||||
'Review completed part results and decide whether additional parts are needed.',
|
||||
'- Do not use any tool',
|
||||
'',
|
||||
'## Original Task',
|
||||
originalInstruction,
|
||||
'',
|
||||
'## Completed Parts',
|
||||
resultBlock || '(none)',
|
||||
'',
|
||||
'## Decision Rules',
|
||||
'- Set done=true when no additional work is required',
|
||||
'- If more work is needed, provide new parts in "parts"',
|
||||
'- If multiple missing tasks are known, return multiple new parts in one batch when possible',
|
||||
`- Do not reuse existing IDs: ${existingIds.join(', ') || '(none)'}`,
|
||||
`- Maximum additional parts: ${maxAdditionalParts}`,
|
||||
].join('\n');
|
||||
}
|
||||
export {
|
||||
evaluateCondition,
|
||||
judgeStatus,
|
||||
type EvaluateConditionOptions,
|
||||
type JudgeStatusOptions,
|
||||
type JudgeStatusResult,
|
||||
} from './judge-status-usecase.js';
|
||||
export {
|
||||
decomposeTask,
|
||||
requestMoreParts,
|
||||
type DecomposeTaskOptions,
|
||||
type MorePartsResponse,
|
||||
} from './decompose-task-usecase.js';
|
||||
|
||||
export async function executeAgent(
|
||||
persona: string | undefined,
|
||||
@ -175,175 +22,6 @@ export async function executeAgent(
|
||||
): Promise<AgentResponse> {
|
||||
return runAgent(persona, instruction, options);
|
||||
}
|
||||
|
||||
export const generateReport = executeAgent;
|
||||
export const executePart = executeAgent;
|
||||
|
||||
export async function evaluateCondition(
|
||||
agentOutput: string,
|
||||
conditions: Array<{ index: number; text: string }>,
|
||||
options: EvaluateConditionOptions,
|
||||
): Promise<number> {
|
||||
const prompt = buildJudgePrompt(agentOutput, conditions);
|
||||
const response = await runAgent(undefined, prompt, {
|
||||
cwd: options.cwd,
|
||||
maxTurns: 1,
|
||||
permissionMode: 'readonly',
|
||||
outputSchema: loadEvaluationSchema(),
|
||||
});
|
||||
|
||||
if (response.status !== 'done') {
|
||||
return -1;
|
||||
}
|
||||
|
||||
const matchedIndex = response.structuredOutput?.matched_index;
|
||||
if (typeof matchedIndex === 'number' && Number.isInteger(matchedIndex)) {
|
||||
const zeroBased = matchedIndex - 1;
|
||||
if (zeroBased >= 0 && zeroBased < conditions.length) {
|
||||
return zeroBased;
|
||||
}
|
||||
}
|
||||
|
||||
return detectJudgeIndex(response.content);
|
||||
}
|
||||
|
||||
export async function judgeStatus(
|
||||
structuredInstruction: string,
|
||||
tagInstruction: string,
|
||||
rules: PieceRule[],
|
||||
options: JudgeStatusOptions,
|
||||
): Promise<JudgeStatusResult> {
|
||||
if (rules.length === 0) {
|
||||
throw new Error('judgeStatus requires at least one rule');
|
||||
}
|
||||
|
||||
if (rules.length === 1) {
|
||||
return { ruleIndex: 0, method: 'auto_select' };
|
||||
}
|
||||
|
||||
const interactiveEnabled = options.interactive === true;
|
||||
|
||||
const isValidRuleIndex = (index: number): boolean => {
|
||||
if (index < 0 || index >= rules.length) return false;
|
||||
const rule = rules[index];
|
||||
return !(rule?.interactiveOnly && !interactiveEnabled);
|
||||
};
|
||||
|
||||
const agentOptions = {
|
||||
cwd: options.cwd,
|
||||
maxTurns: 3,
|
||||
permissionMode: 'readonly' as const,
|
||||
language: options.language,
|
||||
onStream: options.onStream,
|
||||
};
|
||||
|
||||
// Stage 1: Structured output
|
||||
const structuredResponse = await runAgent('conductor', structuredInstruction, {
|
||||
...agentOptions,
|
||||
outputSchema: loadJudgmentSchema(),
|
||||
});
|
||||
|
||||
if (structuredResponse.status === 'done') {
|
||||
const stepNumber = structuredResponse.structuredOutput?.step;
|
||||
if (typeof stepNumber === 'number' && Number.isInteger(stepNumber)) {
|
||||
const ruleIndex = stepNumber - 1;
|
||||
if (isValidRuleIndex(ruleIndex)) {
|
||||
return { ruleIndex, method: 'structured_output' };
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Stage 2: Tag detection (dedicated call, no outputSchema)
|
||||
const tagResponse = await runAgent('conductor', tagInstruction, agentOptions);
|
||||
|
||||
if (tagResponse.status === 'done') {
|
||||
const tagRuleIndex = detectRuleIndex(tagResponse.content, options.movementName);
|
||||
if (isValidRuleIndex(tagRuleIndex)) {
|
||||
return { ruleIndex: tagRuleIndex, method: 'phase3_tag' };
|
||||
}
|
||||
}
|
||||
|
||||
// Stage 3: AI judge
|
||||
const conditions = rules
|
||||
.map((rule, index) => ({ rule, index }))
|
||||
.filter(({ rule }) => interactiveEnabled || !rule.interactiveOnly)
|
||||
.map(({ index, rule }) => ({ index, text: rule.condition }));
|
||||
|
||||
if (conditions.length > 0) {
|
||||
const fallbackIndex = await evaluateCondition(structuredInstruction, conditions, { cwd: options.cwd });
|
||||
if (fallbackIndex >= 0 && fallbackIndex < conditions.length) {
|
||||
const originalIndex = conditions[fallbackIndex]?.index;
|
||||
if (originalIndex !== undefined) {
|
||||
return { ruleIndex: originalIndex, method: 'ai_judge' };
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
throw new Error(`Status not found for movement "${options.movementName}"`);
|
||||
}
|
||||
|
||||
export async function decomposeTask(
|
||||
instruction: string,
|
||||
maxParts: number,
|
||||
options: DecomposeTaskOptions,
|
||||
): Promise<PartDefinition[]> {
|
||||
const response = await runAgent(options.persona, buildDecomposePrompt(instruction, maxParts, options.language), {
|
||||
cwd: options.cwd,
|
||||
personaPath: options.personaPath,
|
||||
language: options.language,
|
||||
model: options.model,
|
||||
provider: options.provider,
|
||||
allowedTools: [],
|
||||
permissionMode: 'readonly',
|
||||
maxTurns: 4,
|
||||
outputSchema: loadDecompositionSchema(maxParts),
|
||||
onStream: options.onStream,
|
||||
});
|
||||
|
||||
if (response.status !== 'done') {
|
||||
const detail = response.error || response.content || response.status;
|
||||
throw new Error(`Team leader failed: ${detail}`);
|
||||
}
|
||||
|
||||
const parts = response.structuredOutput?.parts;
|
||||
if (parts != null) {
|
||||
return toPartDefinitions(parts, maxParts);
|
||||
}
|
||||
|
||||
return parseParts(response.content, maxParts);
|
||||
}
|
||||
|
||||
export async function requestMoreParts(
|
||||
originalInstruction: string,
|
||||
allResults: Array<{ id: string; title: string; status: string; content: string }>,
|
||||
existingIds: string[],
|
||||
maxAdditionalParts: number,
|
||||
options: DecomposeTaskOptions,
|
||||
): Promise<MorePartsResponse> {
|
||||
const prompt = buildMorePartsPrompt(
|
||||
originalInstruction,
|
||||
allResults,
|
||||
existingIds,
|
||||
maxAdditionalParts,
|
||||
options.language,
|
||||
);
|
||||
|
||||
const response = await runAgent(options.persona, prompt, {
|
||||
cwd: options.cwd,
|
||||
personaPath: options.personaPath,
|
||||
language: options.language,
|
||||
model: options.model,
|
||||
provider: options.provider,
|
||||
allowedTools: [],
|
||||
permissionMode: 'readonly',
|
||||
maxTurns: 4,
|
||||
outputSchema: loadMorePartsSchema(maxAdditionalParts),
|
||||
onStream: options.onStream,
|
||||
});
|
||||
|
||||
if (response.status !== 'done') {
|
||||
const detail = response.error || response.content || response.status;
|
||||
throw new Error(`Team leader feedback failed: ${detail}`);
|
||||
}
|
||||
|
||||
return toMorePartsResponse(response.structuredOutput, maxAdditionalParts);
|
||||
}
|
||||
|
||||
222
src/agents/decompose-task-usecase.ts
Normal file
222
src/agents/decompose-task-usecase.ts
Normal file
@ -0,0 +1,222 @@
|
||||
import type { Language, PartDefinition } from '../core/models/types.js';
|
||||
import { runAgent, type StreamCallback } from './runner.js';
|
||||
import { parseParts } from '../core/piece/engine/task-decomposer.js';
|
||||
import { loadDecompositionSchema, loadMorePartsSchema } from '../infra/resources/schema-loader.js';
|
||||
import { ensureUniquePartIds, parsePartDefinitionEntry } from '../core/piece/part-definition-validator.js';
|
||||
|
||||
export interface DecomposeTaskOptions {
|
||||
cwd: string;
|
||||
persona?: string;
|
||||
personaPath?: string;
|
||||
language?: Language;
|
||||
model?: string;
|
||||
provider?: 'claude' | 'codex' | 'opencode' | 'cursor' | 'copilot' | 'mock';
|
||||
onStream?: StreamCallback;
|
||||
onPromptResolved?: (promptParts: {
|
||||
systemPrompt: string;
|
||||
userInstruction: string;
|
||||
}) => void;
|
||||
}
|
||||
|
||||
export interface MorePartsResponse {
|
||||
done: boolean;
|
||||
reasoning: string;
|
||||
parts: PartDefinition[];
|
||||
}
|
||||
|
||||
function toPartDefinitions(raw: unknown, maxParts: number): PartDefinition[] {
|
||||
if (!Array.isArray(raw)) {
|
||||
throw new Error('Structured output "parts" must be an array');
|
||||
}
|
||||
if (raw.length === 0) {
|
||||
throw new Error('Structured output "parts" must not be empty');
|
||||
}
|
||||
if (raw.length > maxParts) {
|
||||
throw new Error(`Structured output produced too many parts: ${raw.length} > ${maxParts}`);
|
||||
}
|
||||
|
||||
const parts: PartDefinition[] = raw.map((entry, index) => parsePartDefinitionEntry(entry, index));
|
||||
ensureUniquePartIds(parts);
|
||||
|
||||
return parts;
|
||||
}
|
||||
|
||||
function toMorePartsResponse(raw: unknown, maxAdditionalParts: number): MorePartsResponse {
|
||||
if (typeof raw !== 'object' || raw == null || Array.isArray(raw)) {
|
||||
throw new Error('Structured output must be an object');
|
||||
}
|
||||
|
||||
const payload = raw as Record<string, unknown>;
|
||||
if (typeof payload.done !== 'boolean') {
|
||||
throw new Error('Structured output "done" must be a boolean');
|
||||
}
|
||||
if (typeof payload.reasoning !== 'string') {
|
||||
throw new Error('Structured output "reasoning" must be a string');
|
||||
}
|
||||
if (!Array.isArray(payload.parts)) {
|
||||
throw new Error('Structured output "parts" must be an array');
|
||||
}
|
||||
if (payload.parts.length > maxAdditionalParts) {
|
||||
throw new Error(`Structured output produced too many parts: ${payload.parts.length} > ${maxAdditionalParts}`);
|
||||
}
|
||||
|
||||
const parts: PartDefinition[] = payload.parts.map((entry, index) => parsePartDefinitionEntry(entry, index));
|
||||
ensureUniquePartIds(parts);
|
||||
|
||||
return {
|
||||
done: payload.done,
|
||||
reasoning: payload.reasoning,
|
||||
parts,
|
||||
};
|
||||
}
|
||||
|
||||
function summarizePartContent(content: string): string {
|
||||
const maxLength = 2000;
|
||||
if (content.length <= maxLength) {
|
||||
return content;
|
||||
}
|
||||
return `${content.slice(0, maxLength)}\n...[truncated]`;
|
||||
}
|
||||
|
||||
function buildDecomposePrompt(instruction: string, maxParts: number, language?: Language): string {
|
||||
if (language === 'ja') {
|
||||
return [
|
||||
'以下はタスク分解専用の指示です。タスクを実行せず、分解だけを行ってください。',
|
||||
'- ツールは使用しない',
|
||||
`- パート数は 1 以上 ${maxParts} 以下`,
|
||||
'- パートは互いに独立させる',
|
||||
'',
|
||||
'## 元タスク',
|
||||
instruction,
|
||||
].join('\n');
|
||||
}
|
||||
|
||||
return [
|
||||
'This is decomposition-only planning. Do not execute the task.',
|
||||
'- Do not use any tool',
|
||||
`- Produce between 1 and ${maxParts} independent parts`,
|
||||
'- Keep each part self-contained',
|
||||
'',
|
||||
'## Original Task',
|
||||
instruction,
|
||||
].join('\n');
|
||||
}
|
||||
|
||||
function buildMorePartsPrompt(
|
||||
originalInstruction: string,
|
||||
allResults: Array<{ id: string; title: string; status: string; content: string }>,
|
||||
existingIds: string[],
|
||||
maxAdditionalParts: number,
|
||||
language?: Language,
|
||||
): string {
|
||||
const resultBlock = allResults.map((result) => [
|
||||
`### ${result.id}: ${result.title} (${result.status})`,
|
||||
summarizePartContent(result.content),
|
||||
].join('\n')).join('\n\n');
|
||||
|
||||
if (language === 'ja') {
|
||||
return [
|
||||
'以下の実行結果を見て、追加のサブタスクが必要か判断してください。',
|
||||
'- ツールは使用しない',
|
||||
'',
|
||||
'## 元タスク',
|
||||
originalInstruction,
|
||||
'',
|
||||
'## 完了済みパート',
|
||||
resultBlock || '(なし)',
|
||||
'',
|
||||
'## 判断ルール',
|
||||
'- 追加作業が不要なら done=true にする',
|
||||
'- 追加作業が必要なら parts に新しいパートを入れる',
|
||||
'- 不足が複数ある場合は、可能な限り一括で複数パートを返す',
|
||||
`- 既存IDは再利用しない: ${existingIds.join(', ') || '(なし)'}`,
|
||||
`- 追加できる最大数: ${maxAdditionalParts}`,
|
||||
].join('\n');
|
||||
}
|
||||
|
||||
return [
|
||||
'Review completed part results and decide whether additional parts are needed.',
|
||||
'- Do not use any tool',
|
||||
'',
|
||||
'## Original Task',
|
||||
originalInstruction,
|
||||
'',
|
||||
'## Completed Parts',
|
||||
resultBlock || '(none)',
|
||||
'',
|
||||
'## Decision Rules',
|
||||
'- Set done=true when no additional work is required',
|
||||
'- If more work is needed, provide new parts in "parts"',
|
||||
'- If multiple missing tasks are known, return multiple new parts in one batch when possible',
|
||||
`- Do not reuse existing IDs: ${existingIds.join(', ') || '(none)'}`,
|
||||
`- Maximum additional parts: ${maxAdditionalParts}`,
|
||||
].join('\n');
|
||||
}
|
||||
|
||||
export async function decomposeTask(
|
||||
instruction: string,
|
||||
maxParts: number,
|
||||
options: DecomposeTaskOptions,
|
||||
): Promise<PartDefinition[]> {
|
||||
const response = await runAgent(options.persona, buildDecomposePrompt(instruction, maxParts, options.language), {
|
||||
cwd: options.cwd,
|
||||
personaPath: options.personaPath,
|
||||
language: options.language,
|
||||
model: options.model,
|
||||
provider: options.provider,
|
||||
allowedTools: [],
|
||||
permissionMode: 'readonly',
|
||||
maxTurns: 4,
|
||||
outputSchema: loadDecompositionSchema(maxParts),
|
||||
onStream: options.onStream,
|
||||
onPromptResolved: options.onPromptResolved,
|
||||
});
|
||||
|
||||
if (response.status !== 'done') {
|
||||
const detail = response.error || response.content || response.status;
|
||||
throw new Error(`Team leader failed: ${detail}`);
|
||||
}
|
||||
|
||||
const parts = response.structuredOutput?.parts;
|
||||
if (parts != null) {
|
||||
return toPartDefinitions(parts, maxParts);
|
||||
}
|
||||
|
||||
return parseParts(response.content, maxParts);
|
||||
}
|
||||
|
||||
export async function requestMoreParts(
|
||||
originalInstruction: string,
|
||||
allResults: Array<{ id: string; title: string; status: string; content: string }>,
|
||||
existingIds: string[],
|
||||
maxAdditionalParts: number,
|
||||
options: DecomposeTaskOptions,
|
||||
): Promise<MorePartsResponse> {
|
||||
const prompt = buildMorePartsPrompt(
|
||||
originalInstruction,
|
||||
allResults,
|
||||
existingIds,
|
||||
maxAdditionalParts,
|
||||
options.language,
|
||||
);
|
||||
|
||||
const response = await runAgent(options.persona, prompt, {
|
||||
cwd: options.cwd,
|
||||
personaPath: options.personaPath,
|
||||
language: options.language,
|
||||
model: options.model,
|
||||
provider: options.provider,
|
||||
allowedTools: [],
|
||||
permissionMode: 'readonly',
|
||||
maxTurns: 4,
|
||||
outputSchema: loadMorePartsSchema(maxAdditionalParts),
|
||||
onStream: options.onStream,
|
||||
});
|
||||
|
||||
if (response.status !== 'done') {
|
||||
const detail = response.error || response.content || response.status;
|
||||
throw new Error(`Team leader feedback failed: ${detail}`);
|
||||
}
|
||||
|
||||
return toMorePartsResponse(response.structuredOutput, maxAdditionalParts);
|
||||
}
|
||||
184
src/agents/judge-status-usecase.ts
Normal file
184
src/agents/judge-status-usecase.ts
Normal file
@ -0,0 +1,184 @@
|
||||
import type { PieceRule, RuleMatchMethod, Language } from '../core/models/types.js';
|
||||
import { runAgent, type StreamCallback } from './runner.js';
|
||||
import { detectJudgeIndex, buildJudgePrompt } from './judge-utils.js';
|
||||
import { loadJudgmentSchema, loadEvaluationSchema } from '../infra/resources/schema-loader.js';
|
||||
import { detectRuleIndex } from '../shared/utils/ruleIndex.js';
|
||||
|
||||
export interface JudgeStatusOptions {
|
||||
cwd: string;
|
||||
movementName: string;
|
||||
language?: Language;
|
||||
interactive?: boolean;
|
||||
onStream?: StreamCallback;
|
||||
onJudgeStage?: (entry: {
|
||||
stage: 1 | 2 | 3;
|
||||
method: 'structured_output' | 'phase3_tag' | 'ai_judge';
|
||||
status: 'done' | 'error' | 'skipped';
|
||||
instruction: string;
|
||||
response: string;
|
||||
}) => void;
|
||||
onStructuredPromptResolved?: (promptParts: {
|
||||
systemPrompt: string;
|
||||
userInstruction: string;
|
||||
}) => void;
|
||||
}
|
||||
|
||||
export interface JudgeStatusResult {
|
||||
ruleIndex: number;
|
||||
method: RuleMatchMethod;
|
||||
}
|
||||
|
||||
export interface EvaluateConditionOptions {
|
||||
cwd: string;
|
||||
onJudgeResponse?: (entry: {
|
||||
instruction: string;
|
||||
status: 'done' | 'error';
|
||||
response: string;
|
||||
}) => void;
|
||||
}
|
||||
|
||||
export async function evaluateCondition(
|
||||
agentOutput: string,
|
||||
conditions: Array<{ index: number; text: string }>,
|
||||
options: EvaluateConditionOptions,
|
||||
): Promise<number> {
|
||||
const prompt = buildJudgePrompt(agentOutput, conditions);
|
||||
const response = await runAgent(undefined, prompt, {
|
||||
cwd: options.cwd,
|
||||
maxTurns: 1,
|
||||
permissionMode: 'readonly',
|
||||
outputSchema: loadEvaluationSchema(),
|
||||
});
|
||||
|
||||
options.onJudgeResponse?.({
|
||||
instruction: prompt,
|
||||
status: response.status === 'done' ? 'done' : 'error',
|
||||
response: response.content,
|
||||
});
|
||||
|
||||
if (response.status !== 'done') {
|
||||
return -1;
|
||||
}
|
||||
|
||||
const matchedIndex = response.structuredOutput?.matched_index;
|
||||
if (typeof matchedIndex === 'number' && Number.isInteger(matchedIndex)) {
|
||||
const zeroBased = matchedIndex - 1;
|
||||
if (zeroBased >= 0 && zeroBased < conditions.length) {
|
||||
return zeroBased;
|
||||
}
|
||||
}
|
||||
|
||||
return detectJudgeIndex(response.content);
|
||||
}
|
||||
|
||||
export async function judgeStatus(
|
||||
structuredInstruction: string,
|
||||
tagInstruction: string,
|
||||
rules: PieceRule[],
|
||||
options: JudgeStatusOptions,
|
||||
): Promise<JudgeStatusResult> {
|
||||
if (rules.length === 0) {
|
||||
throw new Error('judgeStatus requires at least one rule');
|
||||
}
|
||||
|
||||
if (rules.length === 1) {
|
||||
return { ruleIndex: 0, method: 'auto_select' };
|
||||
}
|
||||
|
||||
const interactiveEnabled = options.interactive === true;
|
||||
|
||||
const isValidRuleIndex = (index: number): boolean => {
|
||||
if (index < 0 || index >= rules.length) return false;
|
||||
const rule = rules[index];
|
||||
return !(rule?.interactiveOnly && !interactiveEnabled);
|
||||
};
|
||||
|
||||
const agentOptions = {
|
||||
cwd: options.cwd,
|
||||
maxTurns: 3,
|
||||
permissionMode: 'readonly' as const,
|
||||
language: options.language,
|
||||
onStream: options.onStream,
|
||||
};
|
||||
|
||||
const structuredResponse = await runAgent('conductor', structuredInstruction, {
|
||||
...agentOptions,
|
||||
outputSchema: loadJudgmentSchema(),
|
||||
onPromptResolved: options.onStructuredPromptResolved,
|
||||
});
|
||||
|
||||
options.onJudgeStage?.({
|
||||
stage: 1,
|
||||
method: 'structured_output',
|
||||
status: structuredResponse.status === 'done' ? 'done' : 'error',
|
||||
instruction: structuredInstruction,
|
||||
response: structuredResponse.content,
|
||||
});
|
||||
|
||||
if (structuredResponse.status === 'done') {
|
||||
const stepNumber = structuredResponse.structuredOutput?.step;
|
||||
if (typeof stepNumber === 'number' && Number.isInteger(stepNumber)) {
|
||||
const ruleIndex = stepNumber - 1;
|
||||
if (isValidRuleIndex(ruleIndex)) {
|
||||
return { ruleIndex, method: 'structured_output' };
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const tagResponse = await runAgent('conductor', tagInstruction, agentOptions);
|
||||
|
||||
options.onJudgeStage?.({
|
||||
stage: 2,
|
||||
method: 'phase3_tag',
|
||||
status: tagResponse.status === 'done' ? 'done' : 'error',
|
||||
instruction: tagInstruction,
|
||||
response: tagResponse.content,
|
||||
});
|
||||
|
||||
if (tagResponse.status === 'done') {
|
||||
const tagRuleIndex = detectRuleIndex(tagResponse.content, options.movementName);
|
||||
if (isValidRuleIndex(tagRuleIndex)) {
|
||||
return { ruleIndex: tagRuleIndex, method: 'phase3_tag' };
|
||||
}
|
||||
}
|
||||
|
||||
const conditions = rules
|
||||
.map((rule, index) => ({ rule, index }))
|
||||
.filter(({ rule }) => interactiveEnabled || !rule.interactiveOnly)
|
||||
.map(({ index, rule }) => ({ index, text: rule.condition }));
|
||||
|
||||
if (conditions.length > 0) {
|
||||
let stage3Status: 'done' | 'error' | 'skipped' = 'skipped';
|
||||
let stage3Instruction = '';
|
||||
let stage3Response = '';
|
||||
const fallbackIndex = await evaluateCondition(structuredInstruction, conditions, {
|
||||
cwd: options.cwd,
|
||||
onJudgeResponse: (entry) => {
|
||||
stage3Status = entry.status;
|
||||
stage3Instruction = entry.instruction;
|
||||
stage3Response = entry.response;
|
||||
},
|
||||
});
|
||||
|
||||
if (stage3Status === 'skipped' || stage3Instruction === '') {
|
||||
throw new Error(`AI judge response missing for movement "${options.movementName}"`);
|
||||
}
|
||||
|
||||
options.onJudgeStage?.({
|
||||
stage: 3,
|
||||
method: 'ai_judge',
|
||||
status: stage3Status,
|
||||
instruction: stage3Instruction,
|
||||
response: stage3Response,
|
||||
});
|
||||
|
||||
if (fallbackIndex >= 0 && fallbackIndex < conditions.length) {
|
||||
const originalIndex = conditions[fallbackIndex]?.index;
|
||||
if (originalIndex !== undefined) {
|
||||
return { ruleIndex: originalIndex, method: 'ai_judge' };
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
throw new Error(`Status not found for movement "${options.movementName}"`);
|
||||
}
|
||||
@ -158,11 +158,18 @@ export class AgentRunner {
|
||||
const providerType = resolved.provider;
|
||||
const provider = getProvider(providerType);
|
||||
|
||||
const resolvedSystemPrompt = agentConfig.claudeAgent || agentConfig.claudeSkill
|
||||
? undefined
|
||||
: loadAgentPrompt(agentConfig, options.cwd);
|
||||
|
||||
options.onPromptResolved?.({
|
||||
systemPrompt: resolvedSystemPrompt ?? '',
|
||||
userInstruction: task,
|
||||
});
|
||||
|
||||
const agent = provider.setup({
|
||||
name: agentConfig.name,
|
||||
systemPrompt: agentConfig.claudeAgent || agentConfig.claudeSkill
|
||||
? undefined
|
||||
: loadAgentPrompt(agentConfig, options.cwd),
|
||||
systemPrompt: resolvedSystemPrompt,
|
||||
claudeAgent: agentConfig.claudeAgent,
|
||||
claudeSkill: agentConfig.claudeSkill,
|
||||
});
|
||||
@ -223,6 +230,10 @@ export class AgentRunner {
|
||||
}
|
||||
|
||||
const systemPrompt = loadTemplate('perform_agent_system_prompt', language, templateVars);
|
||||
options.onPromptResolved?.({
|
||||
systemPrompt,
|
||||
userInstruction: task,
|
||||
});
|
||||
const agent = provider.setup({ name: personaName, systemPrompt });
|
||||
return agent.call(task, callOptions);
|
||||
}
|
||||
@ -236,11 +247,19 @@ export class AgentRunner {
|
||||
return this.runCustom(agentConfig, task, options);
|
||||
}
|
||||
|
||||
options.onPromptResolved?.({
|
||||
systemPrompt: personaSpec,
|
||||
userInstruction: task,
|
||||
});
|
||||
const agent = provider.setup({ name: personaName, systemPrompt: personaSpec });
|
||||
return agent.call(task, callOptions);
|
||||
}
|
||||
|
||||
// 3. No persona specified — run with instruction_template only (no system prompt)
|
||||
options.onPromptResolved?.({
|
||||
systemPrompt: '',
|
||||
userInstruction: task,
|
||||
});
|
||||
const agent = provider.setup({ name: personaName });
|
||||
return agent.call(task, callOptions);
|
||||
}
|
||||
|
||||
@ -46,4 +46,8 @@ export interface RunAgentOptions {
|
||||
currentPosition: string;
|
||||
};
|
||||
outputSchema?: Record<string, unknown>;
|
||||
onPromptResolved?: (promptParts: {
|
||||
systemPrompt: string;
|
||||
userInstruction: string;
|
||||
}) => void;
|
||||
}
|
||||
|
||||
@ -22,7 +22,7 @@ import { incrementMovementIteration } from './state-manager.js';
|
||||
import { createLogger } from '../../../shared/utils/index.js';
|
||||
import type { OptionsBuilder } from './OptionsBuilder.js';
|
||||
import type { MovementExecutor } from './MovementExecutor.js';
|
||||
import type { PhaseName } from '../types.js';
|
||||
import type { PhaseName, PhasePromptParts } from '../types.js';
|
||||
|
||||
const log = createLogger('arpeggio-runner');
|
||||
|
||||
@ -37,8 +37,25 @@ export interface ArpeggioRunnerDeps {
|
||||
conditions: Array<{ index: number; text: string }>,
|
||||
options: { cwd: string }
|
||||
) => Promise<number>;
|
||||
readonly onPhaseStart?: (step: PieceMovement, phase: 1 | 2 | 3, phaseName: PhaseName, instruction: string) => void;
|
||||
readonly onPhaseComplete?: (step: PieceMovement, phase: 1 | 2 | 3, phaseName: PhaseName, content: string, status: string, error?: string) => void;
|
||||
readonly onPhaseStart?: (
|
||||
step: PieceMovement,
|
||||
phase: 1 | 2 | 3,
|
||||
phaseName: PhaseName,
|
||||
instruction: string,
|
||||
promptParts: PhasePromptParts,
|
||||
phaseExecutionId?: string,
|
||||
iteration?: number,
|
||||
) => void;
|
||||
readonly onPhaseComplete?: (
|
||||
step: PieceMovement,
|
||||
phase: 1 | 2 | 3,
|
||||
phaseName: PhaseName,
|
||||
content: string,
|
||||
status: string,
|
||||
error?: string,
|
||||
phaseExecutionId?: string,
|
||||
iteration?: number,
|
||||
) => void;
|
||||
}
|
||||
|
||||
/**
|
||||
@ -185,6 +202,8 @@ export class ArpeggioRunner {
|
||||
batches,
|
||||
template,
|
||||
step,
|
||||
movementIteration,
|
||||
state.iteration,
|
||||
agentOptions,
|
||||
arpeggioConfig,
|
||||
semaphore,
|
||||
@ -244,6 +263,8 @@ export class ArpeggioRunner {
|
||||
batches: readonly DataBatch[],
|
||||
template: string,
|
||||
step: PieceMovement,
|
||||
movementIteration: number,
|
||||
iteration: number,
|
||||
agentOptions: RunAgentOptions,
|
||||
config: ArpeggioMovementConfig,
|
||||
semaphore: Semaphore,
|
||||
@ -251,20 +272,34 @@ export class ArpeggioRunner {
|
||||
const promises = batches.map(async (batch) => {
|
||||
await semaphore.acquire();
|
||||
try {
|
||||
this.deps.onPhaseStart?.(step, 1, 'execute', `[Arpeggio batch ${batch.batchIndex + 1}/${batch.totalBatches}]`);
|
||||
let didEmitPhaseStart = false;
|
||||
const phaseExecutionId = `${step.name}:1:${movementIteration}:${batch.batchIndex}`;
|
||||
const batchAgentOptions: RunAgentOptions = {
|
||||
...agentOptions,
|
||||
onPromptResolved: (promptParts) => {
|
||||
if (didEmitPhaseStart) return;
|
||||
this.deps.onPhaseStart?.(step, 1, 'execute', promptParts.userInstruction, promptParts, phaseExecutionId, iteration);
|
||||
didEmitPhaseStart = true;
|
||||
},
|
||||
};
|
||||
const result = await executeBatchWithRetry(
|
||||
batch,
|
||||
template,
|
||||
step.persona,
|
||||
agentOptions,
|
||||
batchAgentOptions,
|
||||
config.maxRetries,
|
||||
config.retryDelayMs,
|
||||
);
|
||||
if (!didEmitPhaseStart) {
|
||||
throw new Error(`Missing prompt parts for phase start: ${step.name}:1`);
|
||||
}
|
||||
this.deps.onPhaseComplete?.(
|
||||
step, 1, 'execute',
|
||||
result.content,
|
||||
result.success ? 'done' : 'error',
|
||||
result.error,
|
||||
phaseExecutionId,
|
||||
iteration,
|
||||
);
|
||||
return result;
|
||||
} finally {
|
||||
|
||||
@ -14,7 +14,7 @@ import type {
|
||||
AgentResponse,
|
||||
Language,
|
||||
} from '../../models/types.js';
|
||||
import type { PhaseName } from '../types.js';
|
||||
import type { PhaseName, PhasePromptParts, JudgeStageEntry } from '../types.js';
|
||||
import { executeAgent } from '../../../agents/agent-usecases.js';
|
||||
import { InstructionBuilder } from '../instruction/InstructionBuilder.js';
|
||||
import { needsStatusJudgmentPhase, runReportPhase, runStatusJudgmentPhase } from '../phase-runner.js';
|
||||
@ -45,8 +45,33 @@ export interface MovementExecutorDeps {
|
||||
conditions: Array<{ index: number; text: string }>,
|
||||
options: { cwd: string }
|
||||
) => Promise<number>;
|
||||
readonly onPhaseStart?: (step: PieceMovement, phase: 1 | 2 | 3, phaseName: PhaseName, instruction: string) => void;
|
||||
readonly onPhaseComplete?: (step: PieceMovement, phase: 1 | 2 | 3, phaseName: PhaseName, content: string, status: string, error?: string) => void;
|
||||
readonly onPhaseStart?: (
|
||||
step: PieceMovement,
|
||||
phase: 1 | 2 | 3,
|
||||
phaseName: PhaseName,
|
||||
instruction: string,
|
||||
promptParts: PhasePromptParts,
|
||||
phaseExecutionId?: string,
|
||||
iteration?: number,
|
||||
) => void;
|
||||
readonly onPhaseComplete?: (
|
||||
step: PieceMovement,
|
||||
phase: 1 | 2 | 3,
|
||||
phaseName: PhaseName,
|
||||
content: string,
|
||||
status: string,
|
||||
error?: string,
|
||||
phaseExecutionId?: string,
|
||||
iteration?: number,
|
||||
) => void;
|
||||
readonly onJudgeStage?: (
|
||||
step: PieceMovement,
|
||||
phase: 3,
|
||||
phaseName: 'judge',
|
||||
entry: JudgeStageEntry,
|
||||
phaseExecutionId?: string,
|
||||
iteration?: number,
|
||||
) => void;
|
||||
}
|
||||
|
||||
export class MovementExecutor {
|
||||
@ -197,6 +222,8 @@ export class MovementExecutor {
|
||||
updatePersonaSession,
|
||||
this.deps.onPhaseStart,
|
||||
this.deps.onPhaseComplete,
|
||||
this.deps.onJudgeStage,
|
||||
state.iteration,
|
||||
);
|
||||
|
||||
// Phase 2: report output (resume same session, Write only)
|
||||
@ -276,11 +303,21 @@ export class MovementExecutor {
|
||||
});
|
||||
|
||||
// Phase 1: main execution (Write excluded if movement has report)
|
||||
this.deps.onPhaseStart?.(step, 1, 'execute', instruction);
|
||||
const agentOptions = this.deps.optionsBuilder.buildAgentOptions(step);
|
||||
let didEmitPhaseStart = false;
|
||||
const baseAgentOptions = this.deps.optionsBuilder.buildAgentOptions(step);
|
||||
const agentOptions = {
|
||||
...baseAgentOptions,
|
||||
onPromptResolved: (promptParts: PhasePromptParts) => {
|
||||
this.deps.onPhaseStart?.(step, 1, 'execute', instruction, promptParts, undefined, state.iteration);
|
||||
didEmitPhaseStart = true;
|
||||
},
|
||||
};
|
||||
let response = await executeAgent(step.persona, instruction, agentOptions);
|
||||
if (!didEmitPhaseStart) {
|
||||
throw new Error(`Missing prompt parts for phase start: ${step.name}:1`);
|
||||
}
|
||||
updatePersonaSession(sessionKey, response.sessionId);
|
||||
this.deps.onPhaseComplete?.(step, 1, 'execute', response.content, response.status, response.error);
|
||||
this.deps.onPhaseComplete?.(step, 1, 'execute', response.content, response.status, response.error, undefined, state.iteration);
|
||||
|
||||
// Provider failures should abort immediately.
|
||||
if (response.status === 'error') {
|
||||
|
||||
@ -3,7 +3,7 @@ import type { PieceMovement, PieceState, Language } from '../../models/types.js'
|
||||
import type { MovementProviderOptions } from '../../models/piece-types.js';
|
||||
import type { RunAgentOptions } from '../../../agents/runner.js';
|
||||
import type { PhaseRunnerContext } from '../phase-runner.js';
|
||||
import type { PieceEngineOptions, PhaseName, MovementProviderInfo } from '../types.js';
|
||||
import type { PieceEngineOptions, PhaseName, MovementProviderInfo, PhasePromptParts, JudgeStageEntry } from '../types.js';
|
||||
import { buildSessionKey } from '../session-key.js';
|
||||
import { resolveMovementProviderModel } from '../provider-resolution.js';
|
||||
|
||||
@ -158,8 +158,34 @@ export class OptionsBuilder {
|
||||
state: PieceState,
|
||||
lastResponse: string | undefined,
|
||||
updatePersonaSession: (persona: string, sessionId: string | undefined) => void,
|
||||
onPhaseStart?: (step: PieceMovement, phase: 1 | 2 | 3, phaseName: PhaseName, instruction: string) => void,
|
||||
onPhaseComplete?: (step: PieceMovement, phase: 1 | 2 | 3, phaseName: PhaseName, content: string, status: string, error?: string) => void,
|
||||
onPhaseStart?: (
|
||||
step: PieceMovement,
|
||||
phase: 1 | 2 | 3,
|
||||
phaseName: PhaseName,
|
||||
instruction: string,
|
||||
promptParts: PhasePromptParts,
|
||||
phaseExecutionId?: string,
|
||||
iteration?: number,
|
||||
) => void,
|
||||
onPhaseComplete?: (
|
||||
step: PieceMovement,
|
||||
phase: 1 | 2 | 3,
|
||||
phaseName: PhaseName,
|
||||
content: string,
|
||||
status: string,
|
||||
error?: string,
|
||||
phaseExecutionId?: string,
|
||||
iteration?: number,
|
||||
) => void,
|
||||
onJudgeStage?: (
|
||||
step: PieceMovement,
|
||||
phase: 3,
|
||||
phaseName: 'judge',
|
||||
entry: JudgeStageEntry,
|
||||
phaseExecutionId?: string,
|
||||
iteration?: number,
|
||||
) => void,
|
||||
iteration?: number,
|
||||
): PhaseRunnerContext {
|
||||
return {
|
||||
cwd: this.getCwd(),
|
||||
@ -174,6 +200,8 @@ export class OptionsBuilder {
|
||||
updatePersonaSession,
|
||||
onPhaseStart,
|
||||
onPhaseComplete,
|
||||
onJudgeStage,
|
||||
iteration,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
@ -19,7 +19,7 @@ import { createLogger, getErrorMessage } from '../../../shared/utils/index.js';
|
||||
import { buildSessionKey } from '../session-key.js';
|
||||
import type { OptionsBuilder } from './OptionsBuilder.js';
|
||||
import type { MovementExecutor } from './MovementExecutor.js';
|
||||
import type { PieceEngineOptions, PhaseName } from '../types.js';
|
||||
import type { PieceEngineOptions, PhaseName, PhasePromptParts, JudgeStageEntry } from '../types.js';
|
||||
import type { ParallelLoggerOptions } from './parallel-logger.js';
|
||||
|
||||
const log = createLogger('parallel-runner');
|
||||
@ -37,8 +37,33 @@ export interface ParallelRunnerDeps {
|
||||
conditions: Array<{ index: number; text: string }>,
|
||||
options: { cwd: string }
|
||||
) => Promise<number>;
|
||||
readonly onPhaseStart?: (step: PieceMovement, phase: 1 | 2 | 3, phaseName: PhaseName, instruction: string) => void;
|
||||
readonly onPhaseComplete?: (step: PieceMovement, phase: 1 | 2 | 3, phaseName: PhaseName, content: string, status: string, error?: string) => void;
|
||||
readonly onPhaseStart?: (
|
||||
step: PieceMovement,
|
||||
phase: 1 | 2 | 3,
|
||||
phaseName: PhaseName,
|
||||
instruction: string,
|
||||
promptParts: PhasePromptParts,
|
||||
phaseExecutionId?: string,
|
||||
iteration?: number,
|
||||
) => void;
|
||||
readonly onPhaseComplete?: (
|
||||
step: PieceMovement,
|
||||
phase: 1 | 2 | 3,
|
||||
phaseName: PhaseName,
|
||||
content: string,
|
||||
status: string,
|
||||
error?: string,
|
||||
phaseExecutionId?: string,
|
||||
iteration?: number,
|
||||
) => void;
|
||||
readonly onJudgeStage?: (
|
||||
step: PieceMovement,
|
||||
phase: 3,
|
||||
phaseName: 'judge',
|
||||
entry: JudgeStageEntry,
|
||||
phaseExecutionId?: string,
|
||||
iteration?: number,
|
||||
) => void;
|
||||
}
|
||||
|
||||
export class ParallelRunner {
|
||||
@ -86,6 +111,7 @@ export class ParallelRunner {
|
||||
subMovements.map(async (subMovement, index) => {
|
||||
const subIteration = incrementMovementIteration(state, subMovement.name);
|
||||
const subInstruction = this.deps.movementExecutor.buildInstruction(subMovement, subIteration, state, task, maxMovements);
|
||||
const parentIteration = state.iteration;
|
||||
|
||||
// Session key uses buildSessionKey (persona:provider) — same as normal movements.
|
||||
// This ensures sessions are shared across movements with the same persona+provider,
|
||||
@ -94,19 +120,33 @@ export class ParallelRunner {
|
||||
|
||||
// Phase 1: main execution (Write excluded if sub-movement has report)
|
||||
const baseOptions = this.deps.optionsBuilder.buildAgentOptions(subMovement);
|
||||
let didEmitPhaseStart = false;
|
||||
|
||||
// Override onStream with parallel logger's prefixed handler (immutable)
|
||||
const agentOptions = parallelLogger
|
||||
? { ...baseOptions, onStream: parallelLogger.createStreamHandler(subMovement.name, index) }
|
||||
: baseOptions;
|
||||
|
||||
this.deps.onPhaseStart?.(subMovement, 1, 'execute', subInstruction);
|
||||
: { ...baseOptions };
|
||||
agentOptions.onPromptResolved = (promptParts: PhasePromptParts) => {
|
||||
this.deps.onPhaseStart?.(subMovement, 1, 'execute', subInstruction, promptParts, undefined, parentIteration);
|
||||
didEmitPhaseStart = true;
|
||||
};
|
||||
const subResponse = await executeAgent(subMovement.persona, subInstruction, agentOptions);
|
||||
if (!didEmitPhaseStart) {
|
||||
throw new Error(`Missing prompt parts for phase start: ${subMovement.name}:1`);
|
||||
}
|
||||
updatePersonaSession(subSessionKey, subResponse.sessionId);
|
||||
this.deps.onPhaseComplete?.(subMovement, 1, 'execute', subResponse.content, subResponse.status, subResponse.error);
|
||||
this.deps.onPhaseComplete?.(subMovement, 1, 'execute', subResponse.content, subResponse.status, subResponse.error, undefined, parentIteration);
|
||||
|
||||
// Phase 2/3 context — no overrides needed, phase-runner uses buildSessionKey internally
|
||||
const phaseCtx = this.deps.optionsBuilder.buildPhaseRunnerContext(state, subResponse.content, updatePersonaSession, this.deps.onPhaseStart, this.deps.onPhaseComplete);
|
||||
const phaseCtx = this.deps.optionsBuilder.buildPhaseRunnerContext(
|
||||
state,
|
||||
subResponse.content,
|
||||
updatePersonaSession,
|
||||
this.deps.onPhaseStart,
|
||||
this.deps.onPhaseComplete,
|
||||
this.deps.onJudgeStage,
|
||||
parentIteration,
|
||||
);
|
||||
|
||||
// Phase 2: report output for sub-movement
|
||||
if (subMovement.outputContracts && subMovement.outputContracts.length > 0) {
|
||||
|
||||
@ -128,11 +128,26 @@ export class PieceEngine extends EventEmitter {
|
||||
getRetryNote: () => this.options.retryNote,
|
||||
detectRuleIndex: this.detectRuleIndex,
|
||||
callAiJudge: this.callAiJudge,
|
||||
onPhaseStart: (step, phase, phaseName, instruction) => {
|
||||
this.emit('phase:start', step, phase, phaseName, instruction);
|
||||
onPhaseStart: (step, phase, phaseName, instruction, promptParts, phaseExecutionId, iteration) => {
|
||||
if (phaseExecutionId == null && iteration == null) {
|
||||
this.emit('phase:start', step, phase, phaseName, instruction, promptParts);
|
||||
return;
|
||||
}
|
||||
this.emit('phase:start', step, phase, phaseName, instruction, promptParts, phaseExecutionId, iteration);
|
||||
},
|
||||
onPhaseComplete: (step, phase, phaseName, content, phaseStatus, error) => {
|
||||
this.emit('phase:complete', step, phase, phaseName, content, phaseStatus, error);
|
||||
onPhaseComplete: (step, phase, phaseName, content, phaseStatus, error, phaseExecutionId, iteration) => {
|
||||
if (phaseExecutionId == null && iteration == null) {
|
||||
this.emit('phase:complete', step, phase, phaseName, content, phaseStatus, error);
|
||||
return;
|
||||
}
|
||||
this.emit('phase:complete', step, phase, phaseName, content, phaseStatus, error, phaseExecutionId, iteration);
|
||||
},
|
||||
onJudgeStage: (step, phase, phaseName, entry, phaseExecutionId, iteration) => {
|
||||
if (phaseExecutionId == null && iteration == null) {
|
||||
this.emit('phase:judge_stage', step, phase, phaseName, entry);
|
||||
return;
|
||||
}
|
||||
this.emit('phase:judge_stage', step, phase, phaseName, entry, phaseExecutionId, iteration);
|
||||
},
|
||||
});
|
||||
|
||||
@ -145,11 +160,26 @@ export class PieceEngine extends EventEmitter {
|
||||
getInteractive: () => this.options.interactive === true,
|
||||
detectRuleIndex: this.detectRuleIndex,
|
||||
callAiJudge: this.callAiJudge,
|
||||
onPhaseStart: (step, phase, phaseName, instruction) => {
|
||||
this.emit('phase:start', step, phase, phaseName, instruction);
|
||||
onPhaseStart: (step, phase, phaseName, instruction, promptParts, phaseExecutionId, iteration) => {
|
||||
if (phaseExecutionId == null && iteration == null) {
|
||||
this.emit('phase:start', step, phase, phaseName, instruction, promptParts);
|
||||
return;
|
||||
}
|
||||
this.emit('phase:start', step, phase, phaseName, instruction, promptParts, phaseExecutionId, iteration);
|
||||
},
|
||||
onPhaseComplete: (step, phase, phaseName, content, phaseStatus, error) => {
|
||||
this.emit('phase:complete', step, phase, phaseName, content, phaseStatus, error);
|
||||
onPhaseComplete: (step, phase, phaseName, content, phaseStatus, error, phaseExecutionId, iteration) => {
|
||||
if (phaseExecutionId == null && iteration == null) {
|
||||
this.emit('phase:complete', step, phase, phaseName, content, phaseStatus, error);
|
||||
return;
|
||||
}
|
||||
this.emit('phase:complete', step, phase, phaseName, content, phaseStatus, error, phaseExecutionId, iteration);
|
||||
},
|
||||
onJudgeStage: (step, phase, phaseName, entry, phaseExecutionId, iteration) => {
|
||||
if (phaseExecutionId == null && iteration == null) {
|
||||
this.emit('phase:judge_stage', step, phase, phaseName, entry);
|
||||
return;
|
||||
}
|
||||
this.emit('phase:judge_stage', step, phase, phaseName, entry, phaseExecutionId, iteration);
|
||||
},
|
||||
});
|
||||
|
||||
@ -160,11 +190,19 @@ export class PieceEngine extends EventEmitter {
|
||||
getInteractive: () => this.options.interactive === true,
|
||||
detectRuleIndex: this.detectRuleIndex,
|
||||
callAiJudge: this.callAiJudge,
|
||||
onPhaseStart: (step, phase, phaseName, instruction) => {
|
||||
this.emit('phase:start', step, phase, phaseName, instruction);
|
||||
onPhaseStart: (step, phase, phaseName, instruction, promptParts, phaseExecutionId, iteration) => {
|
||||
if (phaseExecutionId == null && iteration == null) {
|
||||
this.emit('phase:start', step, phase, phaseName, instruction, promptParts);
|
||||
return;
|
||||
}
|
||||
this.emit('phase:start', step, phase, phaseName, instruction, promptParts, phaseExecutionId, iteration);
|
||||
},
|
||||
onPhaseComplete: (step, phase, phaseName, content, phaseStatus, error) => {
|
||||
this.emit('phase:complete', step, phase, phaseName, content, phaseStatus, error);
|
||||
onPhaseComplete: (step, phase, phaseName, content, phaseStatus, error, phaseExecutionId, iteration) => {
|
||||
if (phaseExecutionId == null && iteration == null) {
|
||||
this.emit('phase:complete', step, phase, phaseName, content, phaseStatus, error);
|
||||
return;
|
||||
}
|
||||
this.emit('phase:complete', step, phase, phaseName, content, phaseStatus, error, phaseExecutionId, iteration);
|
||||
},
|
||||
});
|
||||
|
||||
@ -176,11 +214,19 @@ export class PieceEngine extends EventEmitter {
|
||||
getInteractive: () => this.options.interactive === true,
|
||||
detectRuleIndex: this.detectRuleIndex,
|
||||
callAiJudge: this.callAiJudge,
|
||||
onPhaseStart: (step, phase, phaseName, instruction) => {
|
||||
this.emit('phase:start', step, phase, phaseName, instruction);
|
||||
onPhaseStart: (step, phase, phaseName, instruction, promptParts, phaseExecutionId, iteration) => {
|
||||
if (phaseExecutionId == null && iteration == null) {
|
||||
this.emit('phase:start', step, phase, phaseName, instruction, promptParts);
|
||||
return;
|
||||
}
|
||||
this.emit('phase:start', step, phase, phaseName, instruction, promptParts, phaseExecutionId, iteration);
|
||||
},
|
||||
onPhaseComplete: (step, phase, phaseName, content, phaseStatus, error) => {
|
||||
this.emit('phase:complete', step, phase, phaseName, content, phaseStatus, error);
|
||||
onPhaseComplete: (step, phase, phaseName, content, phaseStatus, error, phaseExecutionId, iteration) => {
|
||||
if (phaseExecutionId == null && iteration == null) {
|
||||
this.emit('phase:complete', step, phase, phaseName, content, phaseStatus, error);
|
||||
return;
|
||||
}
|
||||
this.emit('phase:complete', step, phase, phaseName, content, phaseStatus, error, phaseExecutionId, iteration);
|
||||
},
|
||||
});
|
||||
|
||||
|
||||
@ -17,7 +17,7 @@ import { createPartMovement, resolvePartErrorDetail, summarizeParts } from './te
|
||||
import { buildTeamLeaderParallelLoggerOptions, emitTeamLeaderProgressHint } from './team-leader-streaming.js';
|
||||
import type { OptionsBuilder } from './OptionsBuilder.js';
|
||||
import type { MovementExecutor } from './MovementExecutor.js';
|
||||
import type { PieceEngineOptions, PhaseName } from '../types.js';
|
||||
import type { PieceEngineOptions, PhaseName, PhasePromptParts } from '../types.js';
|
||||
|
||||
const log = createLogger('team-leader-runner');
|
||||
const MAX_TOTAL_PARTS = 20;
|
||||
@ -34,8 +34,25 @@ export interface TeamLeaderRunnerDeps {
|
||||
conditions: Array<{ index: number; text: string }>,
|
||||
options: { cwd: string }
|
||||
) => Promise<number>;
|
||||
readonly onPhaseStart?: (step: PieceMovement, phase: 1 | 2 | 3, phaseName: PhaseName, instruction: string) => void;
|
||||
readonly onPhaseComplete?: (step: PieceMovement, phase: 1 | 2 | 3, phaseName: PhaseName, content: string, status: string, error?: string) => void;
|
||||
readonly onPhaseStart?: (
|
||||
step: PieceMovement,
|
||||
phase: 1 | 2 | 3,
|
||||
phaseName: PhaseName,
|
||||
instruction: string,
|
||||
promptParts: PhasePromptParts,
|
||||
phaseExecutionId?: string,
|
||||
iteration?: number,
|
||||
) => void;
|
||||
readonly onPhaseComplete?: (
|
||||
step: PieceMovement,
|
||||
phase: 1 | 2 | 3,
|
||||
phaseName: PhaseName,
|
||||
content: string,
|
||||
status: string,
|
||||
error?: string,
|
||||
phaseExecutionId?: string,
|
||||
iteration?: number,
|
||||
) => void;
|
||||
}
|
||||
|
||||
export class TeamLeaderRunner {
|
||||
@ -54,6 +71,7 @@ export class TeamLeaderRunner {
|
||||
throw new Error(`Movement "${step.name}" has no teamLeader configuration`);
|
||||
}
|
||||
const teamLeaderConfig = step.teamLeader;
|
||||
const parentIteration = state.iteration;
|
||||
|
||||
const movementIteration = incrementMovementIteration(state, step.name);
|
||||
const leaderStep: PieceMovement = {
|
||||
@ -72,7 +90,7 @@ export class TeamLeaderRunner {
|
||||
);
|
||||
|
||||
emitTeamLeaderProgressHint(this.deps.engineOptions, 'decompose');
|
||||
this.deps.onPhaseStart?.(leaderStep, 1, 'execute', instruction);
|
||||
let didEmitPhaseStart = false;
|
||||
const parts = await decomposeTask(instruction, teamLeaderConfig.maxParts, {
|
||||
cwd: this.deps.getCwd(),
|
||||
persona: leaderStep.persona,
|
||||
@ -80,14 +98,21 @@ export class TeamLeaderRunner {
|
||||
model: leaderModel,
|
||||
provider: leaderProvider,
|
||||
onStream: this.deps.engineOptions.onStream,
|
||||
onPromptResolved: (promptParts) => {
|
||||
this.deps.onPhaseStart?.(leaderStep, 1, 'execute', promptParts.userInstruction, promptParts, undefined, parentIteration);
|
||||
didEmitPhaseStart = true;
|
||||
},
|
||||
});
|
||||
if (!didEmitPhaseStart) {
|
||||
throw new Error(`Missing prompt parts for phase start: ${leaderStep.name}:1`);
|
||||
}
|
||||
const leaderResponse: AgentResponse = {
|
||||
persona: leaderStep.persona ?? leaderStep.name,
|
||||
status: 'done',
|
||||
content: JSON.stringify({ parts }, null, 2),
|
||||
timestamp: new Date(),
|
||||
};
|
||||
this.deps.onPhaseComplete?.(leaderStep, 1, 'execute', leaderResponse.content, leaderResponse.status, leaderResponse.error);
|
||||
this.deps.onPhaseComplete?.(leaderStep, 1, 'execute', leaderResponse.content, leaderResponse.status, leaderResponse.error, undefined, parentIteration);
|
||||
log.debug('Team leader decomposed parts', {
|
||||
movement: step.name,
|
||||
partCount: parts.length,
|
||||
|
||||
@ -8,7 +8,7 @@
|
||||
import { existsSync, mkdirSync, readFileSync, writeFileSync } from 'node:fs';
|
||||
import { dirname, resolve, sep } from 'node:path';
|
||||
import type { PieceMovement, Language, AgentResponse } from '../models/types.js';
|
||||
import type { PhaseName } from './types.js';
|
||||
import type { PhaseName, PhasePromptParts, JudgeStageEntry } from './types.js';
|
||||
import type { RunAgentOptions } from '../../agents/runner.js';
|
||||
import { ReportInstructionBuilder } from './instruction/ReportInstructionBuilder.js';
|
||||
import { hasTagBasedRules, getReportFiles } from './evaluation/rule-utils.js';
|
||||
@ -33,6 +33,8 @@ export interface PhaseRunnerContext {
|
||||
interactive?: boolean;
|
||||
/** Last response from Phase 1 */
|
||||
lastResponse?: string;
|
||||
/** Parent piece iteration for sub-movement phase events */
|
||||
iteration?: number;
|
||||
/** Get persona session ID */
|
||||
getSessionId: (persona: string) => string | undefined;
|
||||
/** Build resume options for a movement */
|
||||
@ -44,9 +46,35 @@ export interface PhaseRunnerContext {
|
||||
/** Stream callback for provider event logging (passed to judgeStatus) */
|
||||
onStream?: import('../../agents/types.js').StreamCallback;
|
||||
/** Callback for phase lifecycle logging */
|
||||
onPhaseStart?: (step: PieceMovement, phase: 1 | 2 | 3, phaseName: PhaseName, instruction: string) => void;
|
||||
onPhaseStart?: (
|
||||
step: PieceMovement,
|
||||
phase: 1 | 2 | 3,
|
||||
phaseName: PhaseName,
|
||||
instruction: string,
|
||||
promptParts: PhasePromptParts,
|
||||
phaseExecutionId?: string,
|
||||
iteration?: number,
|
||||
) => void;
|
||||
/** Callback for phase completion logging */
|
||||
onPhaseComplete?: (step: PieceMovement, phase: 1 | 2 | 3, phaseName: PhaseName, content: string, status: string, error?: string) => void;
|
||||
onPhaseComplete?: (
|
||||
step: PieceMovement,
|
||||
phase: 1 | 2 | 3,
|
||||
phaseName: PhaseName,
|
||||
content: string,
|
||||
status: string,
|
||||
error?: string,
|
||||
phaseExecutionId?: string,
|
||||
iteration?: number,
|
||||
) => void;
|
||||
/** Callback for Phase 3 internal stage logging */
|
||||
onJudgeStage?: (
|
||||
step: PieceMovement,
|
||||
phase: 3,
|
||||
phaseName: 'judge',
|
||||
entry: JudgeStageEntry,
|
||||
phaseExecutionId?: string,
|
||||
iteration?: number,
|
||||
) => void;
|
||||
}
|
||||
|
||||
/**
|
||||
@ -207,35 +235,45 @@ async function runSingleReportAttempt(
|
||||
options: RunAgentOptions,
|
||||
ctx: PhaseRunnerContext,
|
||||
): Promise<ReportAttemptResult> {
|
||||
ctx.onPhaseStart?.(step, 2, 'report', instruction);
|
||||
let didEmitPhaseStart = false;
|
||||
const callOptions: RunAgentOptions = {
|
||||
...options,
|
||||
onPromptResolved: (promptParts) => {
|
||||
ctx.onPhaseStart?.(step, 2, 'report', instruction, promptParts, undefined, ctx.iteration);
|
||||
didEmitPhaseStart = true;
|
||||
},
|
||||
};
|
||||
|
||||
let response: AgentResponse;
|
||||
try {
|
||||
response = await executeAgent(step.persona, instruction, options);
|
||||
response = await executeAgent(step.persona, instruction, callOptions);
|
||||
if (!didEmitPhaseStart) {
|
||||
throw new Error(`Missing prompt parts for phase start: ${step.name}:2`);
|
||||
}
|
||||
} catch (error) {
|
||||
const errorMsg = error instanceof Error ? error.message : String(error);
|
||||
ctx.onPhaseComplete?.(step, 2, 'report', '', 'error', errorMsg);
|
||||
ctx.onPhaseComplete?.(step, 2, 'report', '', 'error', errorMsg, undefined, ctx.iteration);
|
||||
throw error;
|
||||
}
|
||||
|
||||
if (response.status === 'blocked') {
|
||||
ctx.onPhaseComplete?.(step, 2, 'report', response.content, response.status);
|
||||
ctx.onPhaseComplete?.(step, 2, 'report', response.content, response.status, undefined, undefined, ctx.iteration);
|
||||
return { kind: 'blocked', response };
|
||||
}
|
||||
|
||||
if (response.status !== 'done') {
|
||||
const errorMessage = response.error || response.content || 'Unknown error';
|
||||
ctx.onPhaseComplete?.(step, 2, 'report', response.content, response.status, errorMessage);
|
||||
ctx.onPhaseComplete?.(step, 2, 'report', response.content, response.status, errorMessage, undefined, ctx.iteration);
|
||||
return { kind: 'retryable_failure', errorMessage };
|
||||
}
|
||||
|
||||
const trimmedContent = response.content.trim();
|
||||
if (trimmedContent.length === 0) {
|
||||
const errorMessage = 'Report output is empty';
|
||||
ctx.onPhaseComplete?.(step, 2, 'report', response.content, 'error', errorMessage);
|
||||
ctx.onPhaseComplete?.(step, 2, 'report', response.content, 'error', errorMessage, undefined, ctx.iteration);
|
||||
return { kind: 'retryable_failure', errorMessage };
|
||||
}
|
||||
|
||||
ctx.onPhaseComplete?.(step, 2, 'report', response.content, response.status);
|
||||
ctx.onPhaseComplete?.(step, 2, 'report', response.content, response.status, undefined, undefined, ctx.iteration);
|
||||
return { kind: 'success', content: trimmedContent, response };
|
||||
}
|
||||
|
||||
@ -6,6 +6,7 @@ import { StatusJudgmentBuilder, type StatusJudgmentContext } from './instruction
|
||||
import { getJudgmentReportFiles } from './evaluation/rule-utils.js';
|
||||
import { createLogger } from '../../shared/utils/index.js';
|
||||
import type { PhaseRunnerContext } from './phase-runner.js';
|
||||
import { buildPhaseExecutionId } from '../../shared/utils/phaseExecutionId.js';
|
||||
|
||||
const log = createLogger('phase-runner');
|
||||
|
||||
@ -85,8 +86,29 @@ export async function runStatusJudgmentPhase(
|
||||
const tagInstruction = new StatusJudgmentBuilder(step, {
|
||||
...baseContext,
|
||||
}).build();
|
||||
if (!ctx.iteration || !Number.isInteger(ctx.iteration) || ctx.iteration <= 0) {
|
||||
throw new Error(`Status judgment requires iteration for movement "${step.name}"`);
|
||||
}
|
||||
const phaseExecutionId = buildPhaseExecutionId({
|
||||
step: step.name,
|
||||
iteration: ctx.iteration,
|
||||
phase: 3,
|
||||
sequence: 1,
|
||||
});
|
||||
|
||||
let didEmitPhaseStart = false;
|
||||
const emitPhaseStart = (promptParts: { systemPrompt: string; userInstruction: string }): void => {
|
||||
ctx.onPhaseStart?.(step, 3, 'judge', structuredInstruction, promptParts, phaseExecutionId, ctx.iteration);
|
||||
didEmitPhaseStart = true;
|
||||
};
|
||||
|
||||
if (step.rules.length === 1) {
|
||||
emitPhaseStart({
|
||||
systemPrompt: '',
|
||||
userInstruction: structuredInstruction,
|
||||
});
|
||||
}
|
||||
|
||||
ctx.onPhaseStart?.(step, 3, 'judge', structuredInstruction);
|
||||
try {
|
||||
const result = await judgeStatus(structuredInstruction, tagInstruction, step.rules, {
|
||||
cwd: ctx.cwd,
|
||||
@ -94,13 +116,24 @@ export async function runStatusJudgmentPhase(
|
||||
language: ctx.language,
|
||||
interactive: ctx.interactive,
|
||||
onStream: ctx.onStream,
|
||||
onStructuredPromptResolved: (promptParts) => {
|
||||
if (!didEmitPhaseStart) {
|
||||
emitPhaseStart(promptParts);
|
||||
}
|
||||
},
|
||||
onJudgeStage: (entry) => {
|
||||
ctx.onJudgeStage?.(step, 3, 'judge', entry, phaseExecutionId, ctx.iteration);
|
||||
},
|
||||
});
|
||||
if (!didEmitPhaseStart) {
|
||||
throw new Error(`Missing prompt parts for phase start: ${step.name}:3`);
|
||||
}
|
||||
const tag = `[${step.name.toUpperCase()}:${result.ruleIndex + 1}]`;
|
||||
ctx.onPhaseComplete?.(step, 3, 'judge', tag, 'done');
|
||||
ctx.onPhaseComplete?.(step, 3, 'judge', tag, 'done', undefined, phaseExecutionId, ctx.iteration);
|
||||
return { tag, ruleIndex: result.ruleIndex, method: result.method };
|
||||
} catch (error) {
|
||||
const errorMsg = error instanceof Error ? error.message : String(error);
|
||||
ctx.onPhaseComplete?.(step, 3, 'judge', '', 'error', errorMsg);
|
||||
ctx.onPhaseComplete?.(step, 3, 'judge', '', 'error', errorMsg, phaseExecutionId, ctx.iteration);
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
@ -78,6 +78,19 @@ export type AiJudgeCaller = (
|
||||
|
||||
export type PhaseName = 'execute' | 'report' | 'judge';
|
||||
|
||||
export interface PhasePromptParts {
|
||||
systemPrompt: string;
|
||||
userInstruction: string;
|
||||
}
|
||||
|
||||
export interface JudgeStageEntry {
|
||||
stage: 1 | 2 | 3;
|
||||
method: 'structured_output' | 'phase3_tag' | 'ai_judge';
|
||||
status: 'done' | 'error' | 'skipped';
|
||||
instruction: string;
|
||||
response: string;
|
||||
}
|
||||
|
||||
/** Provider and model info resolved for a movement */
|
||||
export interface MovementProviderInfo {
|
||||
provider: ProviderType | undefined;
|
||||
@ -91,8 +104,33 @@ export interface PieceEvents {
|
||||
'movement:report': (step: PieceMovement, filePath: string, fileName: string) => void;
|
||||
'movement:blocked': (step: PieceMovement, response: AgentResponse) => void;
|
||||
'movement:user_input': (step: PieceMovement, userInput: string) => void;
|
||||
'phase:start': (step: PieceMovement, phase: 1 | 2 | 3, phaseName: PhaseName, instruction: string) => void;
|
||||
'phase:complete': (step: PieceMovement, phase: 1 | 2 | 3, phaseName: PhaseName, content: string, status: string, error?: string) => void;
|
||||
'phase:start': (
|
||||
step: PieceMovement,
|
||||
phase: 1 | 2 | 3,
|
||||
phaseName: PhaseName,
|
||||
instruction: string,
|
||||
promptParts: PhasePromptParts,
|
||||
phaseExecutionId?: string,
|
||||
iteration?: number,
|
||||
) => void;
|
||||
'phase:complete': (
|
||||
step: PieceMovement,
|
||||
phase: 1 | 2 | 3,
|
||||
phaseName: PhaseName,
|
||||
content: string,
|
||||
status: string,
|
||||
error?: string,
|
||||
phaseExecutionId?: string,
|
||||
iteration?: number,
|
||||
) => void;
|
||||
'phase:judge_stage': (
|
||||
step: PieceMovement,
|
||||
phase: 3,
|
||||
phaseName: 'judge',
|
||||
entry: JudgeStageEntry,
|
||||
phaseExecutionId?: string,
|
||||
iteration?: number,
|
||||
) => void;
|
||||
'piece:complete': (state: PieceState) => void;
|
||||
'piece:abort': (state: PieceState, reason: string) => void;
|
||||
'iteration:limit': (iteration: number, maxMovements: number) => void;
|
||||
|
||||
@ -11,7 +11,7 @@ import { isQuietMode } from '../../../shared/context.js';
|
||||
import { StreamDisplay } from '../../../shared/ui/index.js';
|
||||
import { TaskPrefixWriter } from '../../../shared/ui/TaskPrefixWriter.js';
|
||||
import { generateSessionId, createSessionLog, finalizeSessionLog, initNdjsonLog } from '../../../infra/fs/index.js';
|
||||
import { createLogger, notifySuccess, notifyError, preventSleep, generateReportDir, isValidReportDirName } from '../../../shared/utils/index.js';
|
||||
import { createLogger, notifySuccess, notifyError, preventSleep, generateReportDir, isValidReportDirName, getDebugPromptsLogFile } from '../../../shared/utils/index.js';
|
||||
import { createProviderEventLogger, isProviderEventsEnabled } from '../../../shared/utils/providerEventLogger.js';
|
||||
import { getLabel } from '../../../shared/i18n/index.js';
|
||||
import { buildRunPaths } from '../../../core/piece/run/run-paths.js';
|
||||
@ -25,9 +25,9 @@ import { createOutputFns, createPrefixedStreamHandler } from './outputFns.js';
|
||||
import { RunMetaManager } from './runMeta.js';
|
||||
import { createIterationLimitHandler, createUserInputHandler } from './iterationLimitHandler.js';
|
||||
import { assertTaskPrefixPair, truncate, formatElapsedTime } from './pieceExecutionUtils.js';
|
||||
|
||||
import { createTraceReportWriter } from './traceReportWriter.js';
|
||||
import { sanitizeTextForStorage } from './traceReportRedaction.js';
|
||||
export type { PieceExecutionResult, PieceExecutionOptions };
|
||||
|
||||
const log = createLogger('piece');
|
||||
|
||||
export async function executePiece(
|
||||
@ -39,12 +39,10 @@ export async function executePiece(
|
||||
const { headerPrefix = 'Running Piece:', interactiveUserInput = false } = options;
|
||||
const projectCwd = options.projectCwd;
|
||||
assertTaskPrefixPair(options.taskPrefix, options.taskColorIndex);
|
||||
|
||||
const prefixWriter = options.taskPrefix != null
|
||||
? new TaskPrefixWriter({ taskName: options.taskPrefix, colorIndex: options.taskColorIndex!, displayLabel: options.taskDisplayLabel })
|
||||
: undefined;
|
||||
const out = createOutputFns(prefixWriter);
|
||||
|
||||
const isRetry = Boolean(options.startMovement || options.retryNote);
|
||||
log.debug('Session mode', { isRetry, isWorktree: cwd !== projectCwd });
|
||||
out.header(`${headerPrefix} ${pieceConfig.name}`);
|
||||
@ -52,18 +50,9 @@ export async function executePiece(
|
||||
const pieceSessionId = generateSessionId();
|
||||
const runSlug = options.reportDirName ?? generateReportDir(task);
|
||||
if (!isValidReportDirName(runSlug)) throw new Error(`Invalid reportDirName: ${runSlug}`);
|
||||
|
||||
const runPaths = buildRunPaths(cwd, runSlug);
|
||||
const runMetaManager = new RunMetaManager(runPaths, task, pieceConfig.name);
|
||||
|
||||
let sessionLog = createSessionLog(task, projectCwd, pieceConfig.name);
|
||||
const ndjsonLogPath = initNdjsonLog(pieceSessionId, task, pieceConfig.name, { logsDir: runPaths.logsAbs });
|
||||
const sessionLogger = new SessionLogger(ndjsonLogPath);
|
||||
|
||||
if (options.interactiveMetadata) {
|
||||
sessionLogger.writeInteractiveMetadata(options.interactiveMetadata);
|
||||
}
|
||||
|
||||
const displayRef: { current: StreamDisplay | null } = { current: null };
|
||||
const streamHandler = prefixWriter
|
||||
? createPrefixedStreamHandler(prefixWriter)
|
||||
@ -71,12 +60,23 @@ export async function executePiece(
|
||||
if (!displayRef.current || event.type === 'result') return;
|
||||
displayRef.current.createHandler()(event);
|
||||
};
|
||||
|
||||
const isWorktree = cwd !== projectCwd;
|
||||
const globalConfig = resolvePieceConfigValues(
|
||||
projectCwd,
|
||||
['notificationSound', 'notificationSoundEvents', 'provider', 'runtime', 'preventSleep', 'model', 'logging', 'analytics'],
|
||||
);
|
||||
const traceReportMode = globalConfig.logging?.trace === true ? 'full' : 'redacted';
|
||||
const allowSensitiveData = traceReportMode === 'full';
|
||||
const ndjsonLogPath = initNdjsonLog(
|
||||
pieceSessionId,
|
||||
sanitizeTextForStorage(task, allowSensitiveData),
|
||||
pieceConfig.name,
|
||||
{ logsDir: runPaths.logsAbs },
|
||||
);
|
||||
const sessionLogger = new SessionLogger(ndjsonLogPath, allowSensitiveData);
|
||||
if (options.interactiveMetadata) {
|
||||
sessionLogger.writeInteractiveMetadata(options.interactiveMetadata);
|
||||
}
|
||||
const shouldNotify = globalConfig.notificationSound !== false;
|
||||
const nse = globalConfig.notificationSoundEvents;
|
||||
const shouldNotifyIterationLimit = shouldNotify && nse?.iterationLimit !== false;
|
||||
@ -98,10 +98,8 @@ export async function executePiece(
|
||||
movement: options.startMovement ?? pieceConfig.initialMovement,
|
||||
enabled: isProviderEventsEnabled(globalConfig),
|
||||
});
|
||||
|
||||
initAnalyticsWriter(globalConfig.analytics?.enabled === true, globalConfig.analytics?.eventsPath ?? join(getGlobalConfigDir(), 'analytics', 'events'));
|
||||
if (globalConfig.preventSleep) preventSleep();
|
||||
|
||||
const analyticsEmitter = new AnalyticsEmitter(runSlug, currentProvider, configuredModel ?? '(default)');
|
||||
const savedSessions = isRetry
|
||||
? (isWorktree ? loadWorktreeSessions(projectCwd, cwd, currentProvider) : loadPersonaSessions(projectCwd, currentProvider))
|
||||
@ -128,12 +126,22 @@ export async function executePiece(
|
||||
let exceededInfo: ExceededInfo | undefined;
|
||||
let lastMovementContent: string | undefined;
|
||||
let lastMovementName: string | undefined;
|
||||
const writeTraceReportOnce = createTraceReportWriter({
|
||||
sessionLogger,
|
||||
ndjsonLogPath,
|
||||
tracePath: join(runPaths.runRootAbs, 'trace.md'),
|
||||
pieceName: pieceConfig.name,
|
||||
task,
|
||||
runSlug,
|
||||
promptLogPath: getDebugPromptsLogFile() ?? undefined,
|
||||
mode: traceReportMode,
|
||||
logger: log,
|
||||
});
|
||||
let currentIteration = 0;
|
||||
const movementIterations = new Map<string, number>();
|
||||
let engine: PieceEngine | null = null;
|
||||
const runAbortController = new AbortController();
|
||||
const abortHandler = new AbortHandler({ externalSignal: options.abortSignal, internalController: runAbortController, getEngine: () => engine });
|
||||
|
||||
try {
|
||||
engine = new PieceEngine(effectivePieceConfig, cwd, task, {
|
||||
abortSignal: runAbortController.signal,
|
||||
@ -161,20 +169,21 @@ export async function executePiece(
|
||||
taskColorIndex: options.taskColorIndex,
|
||||
initialIteration: options.initialIterationOverride,
|
||||
});
|
||||
|
||||
abortHandler.install();
|
||||
|
||||
engine.on('phase:start', (step, phase, phaseName, instruction) => {
|
||||
engine.on('phase:start', (step, phase, phaseName, instruction, promptParts, phaseExecutionId, iteration) => {
|
||||
log.debug('Phase starting', { step: step.name, phase, phaseName });
|
||||
sessionLogger.onPhaseStart(step, phase, phaseName, instruction);
|
||||
sessionLogger.onPhaseStart(step, phase, phaseName, instruction, promptParts, phaseExecutionId, iteration);
|
||||
});
|
||||
|
||||
engine.on('phase:complete', (step, phase, phaseName, content, phaseStatus, phaseError) => {
|
||||
engine.on('phase:complete', (step, phase, phaseName, content, phaseStatus, phaseError, phaseExecutionId, iteration) => {
|
||||
log.debug('Phase completed', { step: step.name, phase, phaseName, status: phaseStatus });
|
||||
sessionLogger.setIteration(currentIteration);
|
||||
sessionLogger.onPhaseComplete(step, phase, phaseName, content, phaseStatus, phaseError);
|
||||
sessionLogger.onPhaseComplete(step, phase, phaseName, content, phaseStatus, phaseError, phaseExecutionId, iteration);
|
||||
});
|
||||
|
||||
engine.on('phase:judge_stage', (step, phase, phaseName, entry, phaseExecutionId, iteration) => {
|
||||
sessionLogger.onJudgeStage(step, phase, phaseName, entry, phaseExecutionId, iteration);
|
||||
});
|
||||
engine.on('movement:start', (step, iteration, instruction, providerInfo) => {
|
||||
log.debug('Movement starting', { step: step.name, persona: step.personaDisplayName, iteration });
|
||||
currentIteration = iteration;
|
||||
@ -234,6 +243,11 @@ export async function executePiece(
|
||||
sessionLog = finalizeSessionLog(sessionLog, 'completed');
|
||||
sessionLogger.onPieceComplete(state);
|
||||
runMetaManager.finalize('completed', state.iteration);
|
||||
writeTraceReportOnce({
|
||||
status: 'completed',
|
||||
iterations: state.iteration,
|
||||
endTime: new Date().toISOString(),
|
||||
});
|
||||
try {
|
||||
saveSessionState(projectCwd, { status: 'success', taskResult: truncate(lastMovementContent ?? '', 1000), timestamp: new Date().toISOString(), pieceName: pieceConfig.name, taskContent: truncate(task, 200), lastMovement: lastMovementName } satisfies SessionState);
|
||||
} catch (error) { log.error('Failed to save session state', { error }); }
|
||||
@ -252,6 +266,12 @@ export async function executePiece(
|
||||
sessionLog = finalizeSessionLog(sessionLog, 'aborted');
|
||||
sessionLogger.onPieceAbort(state, reason);
|
||||
runMetaManager.finalize('aborted', state.iteration);
|
||||
writeTraceReportOnce({
|
||||
status: 'aborted',
|
||||
iterations: state.iteration,
|
||||
reason,
|
||||
endTime: new Date().toISOString(),
|
||||
});
|
||||
try {
|
||||
saveSessionState(projectCwd, { status: reason === 'user_interrupted' ? 'user_stopped' : 'error', errorMessage: reason, timestamp: new Date().toISOString(), pieceName: pieceConfig.name, taskContent: truncate(task, 200), lastMovement: lastMovementName } satisfies SessionState);
|
||||
} catch (error) { log.error('Failed to save session state', { error }); }
|
||||
|
||||
@ -2,7 +2,6 @@
|
||||
* Session logger — NDJSON ログ書き出し専用モジュール
|
||||
*
|
||||
* PieceEngine のイベントを受け取り、NDJSON セッションログへ追記する責務を担う。
|
||||
* analytics や UI 出力は担当しない。
|
||||
*/
|
||||
|
||||
import {
|
||||
@ -13,14 +12,16 @@ import {
|
||||
type NdjsonPieceAbort,
|
||||
type NdjsonPhaseStart,
|
||||
type NdjsonPhaseComplete,
|
||||
type NdjsonPhaseJudgeStage,
|
||||
type NdjsonInteractiveStart,
|
||||
type NdjsonInteractiveEnd,
|
||||
} from '../../../infra/fs/index.js';
|
||||
import type { InteractiveMetadata } from './types.js';
|
||||
import { isDebugEnabled, writePromptLog } from '../../../shared/utils/index.js';
|
||||
import type { PromptLogRecord } from '../../../shared/utils/index.js';
|
||||
import type { PromptLogRecord, NdjsonRecord } from '../../../shared/utils/index.js';
|
||||
import type { PieceMovement, AgentResponse, PieceState } from '../../../core/models/index.js';
|
||||
import type { PhaseName } from '../../../core/piece/index.js';
|
||||
import type { JudgeStageEntry, PhasePromptParts } from '../../../core/piece/types.js';
|
||||
import { sanitizeTextForStorage } from './traceReportRedaction.js';
|
||||
|
||||
function toJudgmentMatchMethod(
|
||||
matchedRuleMethod: string | undefined,
|
||||
@ -34,29 +35,30 @@ function toJudgmentMatchMethod(
|
||||
|
||||
export class SessionLogger {
|
||||
private readonly ndjsonLogPath: string;
|
||||
/** phase 開始時のプロンプトを一時保持(デバッグ用) */
|
||||
private readonly phasePrompts = new Map<string, string>();
|
||||
/** 現在のピース全体のイテレーション数 */
|
||||
private readonly allowSensitiveData: boolean;
|
||||
private readonly phasePromptsByExecutionId = new Map<string, PhasePromptParts>();
|
||||
private readonly phaseExecutionCounters = new Map<string, number>();
|
||||
private readonly ndjsonRecords: NdjsonRecord[] = [];
|
||||
private readonly promptRecords: PromptLogRecord[] = [];
|
||||
private currentIteration = 0;
|
||||
|
||||
constructor(ndjsonLogPath: string) {
|
||||
constructor(ndjsonLogPath: string, allowSensitiveData: boolean) {
|
||||
this.ndjsonLogPath = ndjsonLogPath;
|
||||
this.allowSensitiveData = allowSensitiveData;
|
||||
}
|
||||
|
||||
/** インタラクティブモードのメタデータ(interactive_start / interactive_end)を NDJSON へ記録する */
|
||||
writeInteractiveMetadata(meta: InteractiveMetadata): void {
|
||||
const startRecord: NdjsonInteractiveStart = { type: 'interactive_start', timestamp: new Date().toISOString() };
|
||||
appendNdjsonLine(this.ndjsonLogPath, startRecord);
|
||||
this.appendRecord(startRecord);
|
||||
const endRecord: NdjsonInteractiveEnd = {
|
||||
type: 'interactive_end',
|
||||
confirmed: meta.confirmed,
|
||||
...(meta.task ? { task: meta.task } : {}),
|
||||
...(meta.task ? { task: this.sanitizeText(meta.task) } : {}),
|
||||
timestamp: new Date().toISOString(),
|
||||
};
|
||||
appendNdjsonLine(this.ndjsonLogPath, endRecord);
|
||||
this.appendRecord(endRecord);
|
||||
}
|
||||
|
||||
/** 現在のイテレーション番号を更新する(movement:start で呼ぶ) */
|
||||
setIteration(iteration: number): void {
|
||||
this.currentIteration = iteration;
|
||||
}
|
||||
@ -64,75 +66,127 @@ export class SessionLogger {
|
||||
onPhaseStart(
|
||||
step: PieceMovement,
|
||||
phase: 1 | 2 | 3,
|
||||
phaseName: PhaseName,
|
||||
phaseName: 'execute' | 'report' | 'judge',
|
||||
instruction: string,
|
||||
promptParts: PhasePromptParts,
|
||||
phaseExecutionId?: string,
|
||||
iteration?: number,
|
||||
): void {
|
||||
if (!instruction) {
|
||||
throw new Error(`Missing phase instruction for ${step.name}:${phase}`);
|
||||
}
|
||||
const resolvedPhaseExecutionId = this.resolvePhaseExecutionId(step.name, phase, phaseExecutionId, iteration);
|
||||
const record: NdjsonPhaseStart = {
|
||||
type: 'phase_start',
|
||||
step: step.name,
|
||||
phase,
|
||||
phaseName,
|
||||
phaseExecutionId: resolvedPhaseExecutionId,
|
||||
timestamp: new Date().toISOString(),
|
||||
...(instruction ? { instruction } : {}),
|
||||
instruction: this.sanitizeText(instruction),
|
||||
systemPrompt: this.sanitizeText(promptParts.systemPrompt),
|
||||
userInstruction: this.sanitizeText(promptParts.userInstruction),
|
||||
...(iteration != null ? { iteration } : {}),
|
||||
};
|
||||
appendNdjsonLine(this.ndjsonLogPath, record);
|
||||
this.appendRecord(record);
|
||||
|
||||
if (isDebugEnabled()) {
|
||||
this.phasePrompts.set(`${step.name}:${phase}`, instruction);
|
||||
this.phasePromptsByExecutionId.set(resolvedPhaseExecutionId, promptParts);
|
||||
}
|
||||
}
|
||||
|
||||
onPhaseComplete(
|
||||
step: PieceMovement,
|
||||
phase: 1 | 2 | 3,
|
||||
phaseName: PhaseName,
|
||||
phaseName: 'execute' | 'report' | 'judge',
|
||||
content: string,
|
||||
phaseStatus: string,
|
||||
phaseError: string | undefined,
|
||||
phaseExecutionId?: string,
|
||||
iteration?: number,
|
||||
): void {
|
||||
if (!phaseStatus) {
|
||||
throw new Error(`Missing phase status for ${step.name}:${phase}`);
|
||||
}
|
||||
const resolvedPhaseExecutionId = this.resolveCompletionPhaseExecutionId(step.name, phase, phaseExecutionId, iteration);
|
||||
const completedAt = new Date().toISOString();
|
||||
const record: NdjsonPhaseComplete = {
|
||||
type: 'phase_complete',
|
||||
step: step.name,
|
||||
phase,
|
||||
phaseName,
|
||||
phaseExecutionId: resolvedPhaseExecutionId,
|
||||
status: phaseStatus,
|
||||
content,
|
||||
timestamp: new Date().toISOString(),
|
||||
...(phaseError ? { error: phaseError } : {}),
|
||||
content: this.sanitizeText(content),
|
||||
timestamp: completedAt,
|
||||
...(phaseError ? { error: this.sanitizeText(phaseError) } : {}),
|
||||
...(iteration != null ? { iteration } : {}),
|
||||
};
|
||||
appendNdjsonLine(this.ndjsonLogPath, record);
|
||||
this.appendRecord(record);
|
||||
|
||||
const promptKey = `${step.name}:${phase}`;
|
||||
const prompt = this.phasePrompts.get(promptKey);
|
||||
this.phasePrompts.delete(promptKey);
|
||||
|
||||
if (isDebugEnabled() && prompt) {
|
||||
const prompt = this.phasePromptsByExecutionId.get(resolvedPhaseExecutionId);
|
||||
if (isDebugEnabled()) {
|
||||
if (!prompt) {
|
||||
throw new Error(`Missing debug prompt for ${step.name}:${phase}:${resolvedPhaseExecutionId}`);
|
||||
}
|
||||
const promptRecord: PromptLogRecord = {
|
||||
movement: step.name,
|
||||
phase,
|
||||
iteration: this.currentIteration,
|
||||
prompt,
|
||||
response: content,
|
||||
timestamp: new Date().toISOString(),
|
||||
iteration: iteration ?? this.currentIteration,
|
||||
phaseExecutionId: resolvedPhaseExecutionId,
|
||||
prompt: this.sanitizeText(prompt.userInstruction),
|
||||
systemPrompt: this.sanitizeText(prompt.systemPrompt),
|
||||
userInstruction: this.sanitizeText(prompt.userInstruction),
|
||||
response: this.sanitizeText(content),
|
||||
timestamp: completedAt,
|
||||
};
|
||||
writePromptLog(promptRecord);
|
||||
this.promptRecords.push(promptRecord);
|
||||
this.phasePromptsByExecutionId.delete(resolvedPhaseExecutionId);
|
||||
}
|
||||
}
|
||||
|
||||
onJudgeStage(
|
||||
step: PieceMovement,
|
||||
phase: 3,
|
||||
phaseName: 'judge',
|
||||
entry: JudgeStageEntry,
|
||||
phaseExecutionId?: string,
|
||||
iteration?: number,
|
||||
): void {
|
||||
const resolvedPhaseExecutionId = this.resolveCompletionPhaseExecutionId(step.name, phase, phaseExecutionId, iteration);
|
||||
const record: NdjsonPhaseJudgeStage = {
|
||||
type: 'phase_judge_stage',
|
||||
step: step.name,
|
||||
phase,
|
||||
phaseName,
|
||||
phaseExecutionId: resolvedPhaseExecutionId,
|
||||
stage: entry.stage,
|
||||
method: entry.method,
|
||||
status: entry.status,
|
||||
instruction: this.sanitizeText(entry.instruction),
|
||||
response: this.sanitizeText(entry.response),
|
||||
timestamp: new Date().toISOString(),
|
||||
...(iteration != null ? { iteration } : {}),
|
||||
};
|
||||
this.appendRecord(record);
|
||||
}
|
||||
|
||||
onMovementStart(
|
||||
step: PieceMovement,
|
||||
iteration: number,
|
||||
instruction: string | undefined,
|
||||
): void {
|
||||
this.currentIteration = iteration;
|
||||
const record: NdjsonStepStart = {
|
||||
type: 'step_start',
|
||||
step: step.name,
|
||||
persona: step.personaDisplayName,
|
||||
iteration,
|
||||
timestamp: new Date().toISOString(),
|
||||
...(instruction ? { instruction } : {}),
|
||||
...(instruction ? { instruction: this.sanitizeText(instruction) } : {}),
|
||||
};
|
||||
appendNdjsonLine(this.ndjsonLogPath, record);
|
||||
this.appendRecord(record);
|
||||
}
|
||||
|
||||
onMovementComplete(
|
||||
@ -146,15 +200,15 @@ export class SessionLogger {
|
||||
step: step.name,
|
||||
persona: response.persona,
|
||||
status: response.status,
|
||||
content: response.content,
|
||||
instruction,
|
||||
content: this.sanitizeText(response.content),
|
||||
instruction: this.sanitizeText(instruction),
|
||||
...(response.matchedRuleIndex != null ? { matchedRuleIndex: response.matchedRuleIndex } : {}),
|
||||
...(response.matchedRuleMethod ? { matchedRuleMethod: response.matchedRuleMethod } : {}),
|
||||
...(matchMethod ? { matchMethod } : {}),
|
||||
...(response.error ? { error: response.error } : {}),
|
||||
...(response.error ? { error: this.sanitizeText(response.error) } : {}),
|
||||
timestamp: response.timestamp.toISOString(),
|
||||
};
|
||||
appendNdjsonLine(this.ndjsonLogPath, record);
|
||||
this.appendRecord(record);
|
||||
}
|
||||
|
||||
onPieceComplete(state: PieceState): void {
|
||||
@ -163,16 +217,73 @@ export class SessionLogger {
|
||||
iterations: state.iteration,
|
||||
endTime: new Date().toISOString(),
|
||||
};
|
||||
appendNdjsonLine(this.ndjsonLogPath, record);
|
||||
this.appendRecord(record);
|
||||
}
|
||||
|
||||
onPieceAbort(state: PieceState, reason: string): void {
|
||||
const record: NdjsonPieceAbort = {
|
||||
type: 'piece_abort',
|
||||
iterations: state.iteration,
|
||||
reason,
|
||||
reason: this.sanitizeText(reason),
|
||||
endTime: new Date().toISOString(),
|
||||
};
|
||||
this.appendRecord(record);
|
||||
}
|
||||
|
||||
getNdjsonRecords(): NdjsonRecord[] {
|
||||
return [...this.ndjsonRecords];
|
||||
}
|
||||
|
||||
getPromptRecords(): PromptLogRecord[] {
|
||||
return [...this.promptRecords];
|
||||
}
|
||||
|
||||
private buildPhaseKey(stepName: string, phase: 1 | 2 | 3, iteration?: number): string {
|
||||
if (iteration == null) {
|
||||
return `${stepName}:${phase}`;
|
||||
}
|
||||
return `${stepName}:${iteration}:${phase}`;
|
||||
}
|
||||
|
||||
private resolvePhaseExecutionId(
|
||||
stepName: string,
|
||||
phase: 1 | 2 | 3,
|
||||
phaseExecutionId: string | undefined,
|
||||
iteration?: number,
|
||||
): string {
|
||||
if (phaseExecutionId) {
|
||||
return phaseExecutionId;
|
||||
}
|
||||
const key = this.buildPhaseKey(stepName, phase, iteration);
|
||||
const current = this.phaseExecutionCounters.get(key) ?? 0;
|
||||
const next = current + 1;
|
||||
this.phaseExecutionCounters.set(key, next);
|
||||
return `${key}:${next}`;
|
||||
}
|
||||
|
||||
private resolveCompletionPhaseExecutionId(
|
||||
stepName: string,
|
||||
phase: 1 | 2 | 3,
|
||||
phaseExecutionId: string | undefined,
|
||||
iteration?: number,
|
||||
): string {
|
||||
if (phaseExecutionId) {
|
||||
return phaseExecutionId;
|
||||
}
|
||||
const key = this.buildPhaseKey(stepName, phase, iteration);
|
||||
const current = this.phaseExecutionCounters.get(key);
|
||||
if (current == null) {
|
||||
throw new Error(`Missing phase execution id on completion for ${stepName}:${phase}`);
|
||||
}
|
||||
return `${key}:${current}`;
|
||||
}
|
||||
|
||||
private appendRecord(record: NdjsonRecord): void {
|
||||
this.ndjsonRecords.push(record);
|
||||
appendNdjsonLine(this.ndjsonLogPath, record);
|
||||
}
|
||||
|
||||
private sanitizeText(text: string): string {
|
||||
return sanitizeTextForStorage(text, this.allowSensitiveData);
|
||||
}
|
||||
}
|
||||
|
||||
55
src/features/tasks/execute/traceReport.ts
Normal file
55
src/features/tasks/execute/traceReport.ts
Normal file
@ -0,0 +1,55 @@
|
||||
import type { NdjsonRecord, PromptLogRecord } from '../../../shared/utils/index.js';
|
||||
import type {
|
||||
TraceReportMode,
|
||||
TraceReportParams,
|
||||
TraceMovement,
|
||||
TracePhase,
|
||||
} from './traceReportTypes.js';
|
||||
import { parseJsonl, buildTraceFromRecords, type PromptRecord } from './traceReportParser.js';
|
||||
import { cloneMovementsForMode, sanitizeTraceParamsForMode } from './traceReportRedaction.js';
|
||||
import { assertTraceParams, renderTraceReportMarkdown } from './traceReportRenderer.js';
|
||||
|
||||
export type {
|
||||
TraceReportMode,
|
||||
TraceReportParams,
|
||||
TraceMovement,
|
||||
TracePhase,
|
||||
};
|
||||
|
||||
export { assertTraceParams, renderTraceReportMarkdown };
|
||||
|
||||
export function renderTraceReportFromLogs(
|
||||
params: TraceReportParams,
|
||||
ndjsonLogPath: string,
|
||||
promptLogPath: string | undefined,
|
||||
mode: TraceReportMode,
|
||||
): string | undefined {
|
||||
if (mode === 'off') {
|
||||
return undefined;
|
||||
}
|
||||
const records = parseJsonl<NdjsonRecord>(ndjsonLogPath);
|
||||
if (records.length === 0) {
|
||||
throw new Error(`No session records found for trace report: ${ndjsonLogPath}`);
|
||||
}
|
||||
const promptRecords = promptLogPath ? parseJsonl<PromptRecord>(promptLogPath) : [];
|
||||
return renderTraceReportFromRecords(params, records, promptRecords, mode);
|
||||
}
|
||||
|
||||
export function renderTraceReportFromRecords(
|
||||
params: TraceReportParams,
|
||||
records: NdjsonRecord[],
|
||||
promptRecords: PromptRecord[] | PromptLogRecord[],
|
||||
mode: TraceReportMode,
|
||||
): string | undefined {
|
||||
if (mode === 'off') {
|
||||
return undefined;
|
||||
}
|
||||
if (records.length === 0) {
|
||||
throw new Error('No session records found for trace report from records');
|
||||
}
|
||||
|
||||
const trace = buildTraceFromRecords(records, promptRecords as PromptRecord[], params.endTime);
|
||||
const paramsForMode = sanitizeTraceParamsForMode(params, mode);
|
||||
const movementsForMode = cloneMovementsForMode(trace.movements, mode);
|
||||
return renderTraceReportMarkdown(paramsForMode, trace.traceStartedAt, movementsForMode);
|
||||
}
|
||||
260
src/features/tasks/execute/traceReportParser.ts
Normal file
260
src/features/tasks/execute/traceReportParser.ts
Normal file
@ -0,0 +1,260 @@
|
||||
import { existsSync, readFileSync } from 'node:fs';
|
||||
import type { NdjsonRecord, PromptLogRecord } from '../../../shared/utils/index.js';
|
||||
import {
|
||||
buildPhaseExecutionId,
|
||||
parsePhaseExecutionId,
|
||||
} from '../../../shared/utils/phaseExecutionId.js';
|
||||
import type {
|
||||
TraceMovement,
|
||||
TracePhase,
|
||||
} from './traceReportTypes.js';
|
||||
|
||||
interface PromptRecord extends PromptLogRecord {
|
||||
timestamp: string;
|
||||
}
|
||||
|
||||
interface BuildTraceResult {
|
||||
traceStartedAt: string;
|
||||
movements: TraceMovement[];
|
||||
}
|
||||
|
||||
export function parseJsonl<T>(path: string): T[] {
|
||||
if (!existsSync(path)) {
|
||||
return [];
|
||||
}
|
||||
const lines = readFileSync(path, 'utf-8')
|
||||
.split('\n')
|
||||
.map((line) => line.trim())
|
||||
.filter((line) => line.length > 0);
|
||||
return lines.map((line) => JSON.parse(line) as T);
|
||||
}
|
||||
|
||||
function movementKey(step: string, iteration: number): string {
|
||||
return `${step}:${iteration}`;
|
||||
}
|
||||
|
||||
function createPhaseExecutionId(
|
||||
step: string,
|
||||
iteration: number,
|
||||
phase: 1 | 2 | 3,
|
||||
counters: Map<string, number>,
|
||||
): string {
|
||||
const key = `${step}:${iteration}:${phase}`;
|
||||
const current = counters.get(key) ?? 0;
|
||||
const next = current + 1;
|
||||
counters.set(key, next);
|
||||
return buildPhaseExecutionId({
|
||||
step,
|
||||
iteration,
|
||||
phase,
|
||||
sequence: next,
|
||||
});
|
||||
}
|
||||
|
||||
function parsePhaseExecutionKey(
|
||||
phaseExecutionId: string,
|
||||
): { step: string; iteration: number } | undefined {
|
||||
const parsed = parsePhaseExecutionId(phaseExecutionId);
|
||||
if (!parsed) {
|
||||
return undefined;
|
||||
}
|
||||
return { step: parsed.step, iteration: parsed.iteration };
|
||||
}
|
||||
|
||||
function ensureMovement(
|
||||
movementsByKey: Map<string, TraceMovement>,
|
||||
step: string,
|
||||
iteration: number,
|
||||
timestamp: string,
|
||||
fallbackPersona: string,
|
||||
): TraceMovement {
|
||||
const key = movementKey(step, iteration);
|
||||
const existing = movementsByKey.get(key);
|
||||
if (existing) {
|
||||
return existing;
|
||||
}
|
||||
const movement: TraceMovement = {
|
||||
step,
|
||||
persona: fallbackPersona,
|
||||
iteration,
|
||||
startedAt: timestamp,
|
||||
phases: [],
|
||||
};
|
||||
movementsByKey.set(key, movement);
|
||||
return movement;
|
||||
}
|
||||
|
||||
export function buildTraceFromRecords(
|
||||
records: NdjsonRecord[],
|
||||
promptRecords: PromptRecord[],
|
||||
defaultEndTime: string,
|
||||
): BuildTraceResult {
|
||||
const promptByExecutionId = new Map<string, PromptRecord>();
|
||||
for (const prompt of promptRecords) {
|
||||
if (prompt.phaseExecutionId) {
|
||||
promptByExecutionId.set(prompt.phaseExecutionId, prompt);
|
||||
}
|
||||
}
|
||||
|
||||
const movementsByKey = new Map<string, TraceMovement>();
|
||||
const phasesByExecutionId = new Map<string, { movement: TraceMovement; index: number }>();
|
||||
const phaseExecutionCounters = new Map<string, number>();
|
||||
const latestIterationByStep = new Map<string, number>();
|
||||
|
||||
let traceStartedAt = '';
|
||||
|
||||
for (const record of records) {
|
||||
if (!traceStartedAt && record.type === 'piece_start') {
|
||||
traceStartedAt = record.startTime;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (record.type === 'step_start') {
|
||||
latestIterationByStep.set(record.step, record.iteration);
|
||||
const movement = ensureMovement(
|
||||
movementsByKey,
|
||||
record.step,
|
||||
record.iteration,
|
||||
record.timestamp,
|
||||
record.persona,
|
||||
);
|
||||
movement.persona = record.persona;
|
||||
movement.instruction = record.instruction;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (record.type === 'step_complete') {
|
||||
const iteration = latestIterationByStep.get(record.step);
|
||||
if (iteration == null) {
|
||||
throw new Error(`Missing iteration for step_complete: ${record.step}`);
|
||||
}
|
||||
const movement = ensureMovement(
|
||||
movementsByKey,
|
||||
record.step,
|
||||
iteration,
|
||||
record.timestamp,
|
||||
record.persona,
|
||||
);
|
||||
movement.completedAt = record.timestamp;
|
||||
movement.result = {
|
||||
status: record.status,
|
||||
content: record.content,
|
||||
error: record.error,
|
||||
matchedRuleIndex: record.matchedRuleIndex,
|
||||
matchedRuleMethod: record.matchedRuleMethod,
|
||||
matchMethod: record.matchMethod,
|
||||
};
|
||||
continue;
|
||||
}
|
||||
|
||||
if (record.type === 'phase_start') {
|
||||
const iteration = record.iteration ?? latestIterationByStep.get(record.step);
|
||||
if (iteration == null) {
|
||||
throw new Error(`Missing iteration for phase_start: ${record.step}:${record.phase}`);
|
||||
}
|
||||
const movement = ensureMovement(
|
||||
movementsByKey,
|
||||
record.step,
|
||||
iteration,
|
||||
record.timestamp,
|
||||
record.step,
|
||||
);
|
||||
const resolvedExecutionId =
|
||||
record.phaseExecutionId
|
||||
?? createPhaseExecutionId(record.step, iteration, record.phase, phaseExecutionCounters);
|
||||
const prompt = promptByExecutionId.get(resolvedExecutionId);
|
||||
const phase: TracePhase = {
|
||||
phaseExecutionId: resolvedExecutionId,
|
||||
phase: record.phase,
|
||||
phaseName: record.phaseName,
|
||||
instruction: record.instruction ?? record.userInstruction ?? prompt?.userInstruction ?? '',
|
||||
systemPrompt: record.systemPrompt ?? prompt?.systemPrompt ?? '',
|
||||
userInstruction: record.userInstruction ?? prompt?.userInstruction ?? record.instruction ?? '',
|
||||
startedAt: record.timestamp,
|
||||
};
|
||||
movement.phases.push(phase);
|
||||
phasesByExecutionId.set(resolvedExecutionId, {
|
||||
movement,
|
||||
index: movement.phases.length - 1,
|
||||
});
|
||||
continue;
|
||||
}
|
||||
|
||||
if (record.type === 'phase_complete') {
|
||||
const iterationFromId = record.phaseExecutionId
|
||||
? parsePhaseExecutionKey(record.phaseExecutionId)?.iteration
|
||||
: undefined;
|
||||
const iteration =
|
||||
record.iteration
|
||||
?? iterationFromId
|
||||
?? latestIterationByStep.get(record.step);
|
||||
if (iteration == null) {
|
||||
throw new Error(`Missing iteration for phase_complete: ${record.step}:${record.phase}`);
|
||||
}
|
||||
const resolvedExecutionId =
|
||||
record.phaseExecutionId
|
||||
?? createPhaseExecutionId(record.step, iteration, record.phase, phaseExecutionCounters);
|
||||
const phaseRef = phasesByExecutionId.get(resolvedExecutionId);
|
||||
if (!phaseRef) {
|
||||
throw new Error(`Missing phase_start before phase_complete: ${resolvedExecutionId}`);
|
||||
}
|
||||
const existing = phaseRef.movement.phases[phaseRef.index];
|
||||
if (!existing) {
|
||||
throw new Error(`Missing phase state for completion: ${resolvedExecutionId}`);
|
||||
}
|
||||
const prompt = promptByExecutionId.get(resolvedExecutionId);
|
||||
phaseRef.movement.phases[phaseRef.index] = {
|
||||
...existing,
|
||||
instruction: existing.instruction || prompt?.userInstruction || '',
|
||||
systemPrompt: prompt?.systemPrompt ?? existing.systemPrompt,
|
||||
userInstruction: prompt?.userInstruction ?? existing.userInstruction,
|
||||
response: record.content,
|
||||
status: record.status,
|
||||
error: record.error,
|
||||
completedAt: record.timestamp,
|
||||
};
|
||||
continue;
|
||||
}
|
||||
|
||||
if (record.type === 'phase_judge_stage') {
|
||||
const phaseRef = record.phaseExecutionId
|
||||
? phasesByExecutionId.get(record.phaseExecutionId)
|
||||
: undefined;
|
||||
if (!phaseRef) {
|
||||
continue;
|
||||
}
|
||||
const existing = phaseRef.movement.phases[phaseRef.index];
|
||||
if (!existing) {
|
||||
continue;
|
||||
}
|
||||
phaseRef.movement.phases[phaseRef.index] = {
|
||||
...existing,
|
||||
judgeStages: [
|
||||
...(existing.judgeStages ?? []),
|
||||
{
|
||||
stage: record.stage,
|
||||
method: record.method,
|
||||
status: record.status,
|
||||
instruction: record.instruction,
|
||||
response: record.response,
|
||||
},
|
||||
],
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
const movements = [...movementsByKey.values()].sort((a, b) => {
|
||||
const byStart = a.startedAt.localeCompare(b.startedAt);
|
||||
if (byStart !== 0) {
|
||||
return byStart;
|
||||
}
|
||||
return a.iteration - b.iteration;
|
||||
});
|
||||
|
||||
return {
|
||||
traceStartedAt: traceStartedAt || defaultEndTime,
|
||||
movements,
|
||||
};
|
||||
}
|
||||
|
||||
export type { PromptRecord };
|
||||
81
src/features/tasks/execute/traceReportRedaction.ts
Normal file
81
src/features/tasks/execute/traceReportRedaction.ts
Normal file
@ -0,0 +1,81 @@
|
||||
import type {
|
||||
TraceMovement,
|
||||
TraceReportMode,
|
||||
TraceReportParams,
|
||||
} from './traceReportTypes.js';
|
||||
|
||||
export function sanitizeSensitiveText(text: string): string {
|
||||
if (!text) return text;
|
||||
return text
|
||||
.replace(/(Authorization\s*:\s*Bearer\s+)([^\s]+)/gi, '$1[REDACTED]')
|
||||
.replace(
|
||||
/(["']?(?:api[_-]?key|token|password|secret|access[_-]?token|refresh[_-]?token)["']?\s*[:=]\s*["']?)([^"',\s}\]]+)(["']?)/gi,
|
||||
'$1[REDACTED]$3',
|
||||
)
|
||||
.replace(/([?&](?:api[_-]?key|token|password|secret)=)([^&\s]+)/gi, '$1[REDACTED]')
|
||||
.replace(/\b(?:sk-[A-Za-z0-9]{8,}|ghp_[A-Za-z0-9]{8,}|xox[baprs]-[A-Za-z0-9-]{8,})\b/g, '[REDACTED]');
|
||||
}
|
||||
|
||||
function transformText(text: string, mode: TraceReportMode): string {
|
||||
if (!text) {
|
||||
return text;
|
||||
}
|
||||
if (mode === 'full') {
|
||||
return text;
|
||||
}
|
||||
return sanitizeSensitiveText(text);
|
||||
}
|
||||
|
||||
export function cloneMovementsForMode(
|
||||
movements: TraceMovement[],
|
||||
mode: TraceReportMode,
|
||||
): TraceMovement[] {
|
||||
return movements.map((movement) => ({
|
||||
...movement,
|
||||
instruction: movement.instruction == null ? undefined : transformText(movement.instruction, mode),
|
||||
result: movement.result
|
||||
? {
|
||||
...movement.result,
|
||||
content: transformText(movement.result.content, mode),
|
||||
...(movement.result.error ? { error: transformText(movement.result.error, mode) } : {}),
|
||||
}
|
||||
: undefined,
|
||||
phases: movement.phases.map((phase) => ({
|
||||
...phase,
|
||||
instruction: transformText(phase.instruction, mode),
|
||||
systemPrompt: transformText(phase.systemPrompt, mode),
|
||||
userInstruction: transformText(phase.userInstruction, mode),
|
||||
response: phase.response == null ? undefined : transformText(phase.response, mode),
|
||||
error: phase.error == null ? undefined : transformText(phase.error, mode),
|
||||
judgeStages: phase.judgeStages?.map((stage) => ({
|
||||
...stage,
|
||||
instruction: transformText(stage.instruction, mode),
|
||||
response: transformText(stage.response, mode),
|
||||
})),
|
||||
})),
|
||||
}));
|
||||
}
|
||||
|
||||
export function sanitizeTraceParamsForMode(
|
||||
params: TraceReportParams,
|
||||
mode: TraceReportMode,
|
||||
): TraceReportParams {
|
||||
if (mode === 'full') {
|
||||
return params;
|
||||
}
|
||||
return {
|
||||
...params,
|
||||
task: sanitizeSensitiveText(params.task),
|
||||
...(params.reason ? { reason: sanitizeSensitiveText(params.reason) } : {}),
|
||||
};
|
||||
}
|
||||
|
||||
export function sanitizeTextForStorage(text: string, allowFullText: boolean): string {
|
||||
if (!text) {
|
||||
return text;
|
||||
}
|
||||
if (allowFullText) {
|
||||
return text;
|
||||
}
|
||||
return sanitizeSensitiveText(text);
|
||||
}
|
||||
297
src/features/tasks/execute/traceReportRenderer.ts
Normal file
297
src/features/tasks/execute/traceReportRenderer.ts
Normal file
@ -0,0 +1,297 @@
|
||||
import type {
|
||||
TraceMovement,
|
||||
TracePhase,
|
||||
TraceReportParams,
|
||||
} from './traceReportTypes.js';
|
||||
|
||||
interface MovementBlock {
|
||||
kind: 'movement';
|
||||
movement: TraceMovement;
|
||||
}
|
||||
|
||||
interface LoopBlock {
|
||||
kind: 'loop';
|
||||
movements: TraceMovement[];
|
||||
}
|
||||
|
||||
type RenderBlock = MovementBlock | LoopBlock;
|
||||
|
||||
export function assertTraceParams(params: TraceReportParams): void {
|
||||
if (!params.tracePath) throw new Error('tracePath is required');
|
||||
if (!params.pieceName) throw new Error('pieceName is required');
|
||||
if (!params.task) throw new Error('task is required');
|
||||
if (!params.runSlug) throw new Error('runSlug is required');
|
||||
if (!params.endTime) throw new Error('endTime is required');
|
||||
if (!Number.isInteger(params.iterations) || params.iterations < 0) {
|
||||
throw new Error(`iterations must be a non-negative integer: ${params.iterations}`);
|
||||
}
|
||||
}
|
||||
|
||||
function assertTraceMovement(movement: TraceMovement, index: number): void {
|
||||
if (!movement.step) throw new Error(`trace movement[${index}] missing step`);
|
||||
if (!movement.persona) throw new Error(`trace movement[${index}] missing persona`);
|
||||
if (!Number.isInteger(movement.iteration) || movement.iteration <= 0) {
|
||||
throw new Error(`trace movement[${index}] has invalid iteration: ${movement.iteration}`);
|
||||
}
|
||||
if (!movement.startedAt) throw new Error(`trace movement[${index}] missing startedAt`);
|
||||
}
|
||||
|
||||
function hasPhaseError(phase: TracePhase): boolean {
|
||||
if (phase.status === 'error' || Boolean(phase.error)) {
|
||||
return true;
|
||||
}
|
||||
return (phase.judgeStages ?? []).some((stage) => stage.status === 'error');
|
||||
}
|
||||
|
||||
function movementMarker(
|
||||
movement: TraceMovement,
|
||||
runStatus: TraceReportParams['status'],
|
||||
isLastMovement: boolean,
|
||||
): string {
|
||||
if (movement.result?.status === 'error' || movement.result?.error) {
|
||||
return '❌';
|
||||
}
|
||||
if (runStatus === 'aborted' && !movement.result && isLastMovement) {
|
||||
return '❌';
|
||||
}
|
||||
if (movement.phases.some(hasPhaseError)) {
|
||||
return '⚠️';
|
||||
}
|
||||
return '';
|
||||
}
|
||||
|
||||
function renderPhaseSection(
|
||||
phase: TracePhase,
|
||||
runStatus: TraceReportParams['status'],
|
||||
): string[] {
|
||||
if (!phase.instruction) {
|
||||
throw new Error(`phase ${phase.phase} (${phase.phaseName}) missing instruction`);
|
||||
}
|
||||
if (!phase.status && runStatus === 'completed') {
|
||||
throw new Error(`phase ${phase.phase} (${phase.phaseName}) missing status`);
|
||||
}
|
||||
if (!phase.completedAt && runStatus === 'completed') {
|
||||
throw new Error(`phase ${phase.phase} (${phase.phaseName}) missing completedAt`);
|
||||
}
|
||||
|
||||
const marker = hasPhaseError(phase) ? ' ⚠️' : '';
|
||||
const lines: string[] = [
|
||||
`### Phase ${phase.phase}: ${phase.phaseName}${marker}`,
|
||||
'',
|
||||
`- Started: ${phase.startedAt}`,
|
||||
...(phase.completedAt ? [`- Completed: ${phase.completedAt}`] : []),
|
||||
`- System Prompt: ${phase.systemPrompt.length} chars`,
|
||||
'<details><summary>System Prompt</summary>',
|
||||
'',
|
||||
phase.systemPrompt,
|
||||
'',
|
||||
'</details>',
|
||||
'',
|
||||
`- User Instruction: ${phase.userInstruction.length} chars`,
|
||||
'<details><summary>User Instruction</summary>',
|
||||
'',
|
||||
phase.userInstruction,
|
||||
'',
|
||||
'</details>',
|
||||
];
|
||||
|
||||
if (phase.response != null) {
|
||||
lines.push(
|
||||
'',
|
||||
`- Response: ${phase.response.length} chars`,
|
||||
'<details><summary>Response</summary>',
|
||||
'',
|
||||
phase.response,
|
||||
'',
|
||||
'</details>',
|
||||
);
|
||||
}
|
||||
lines.push('', `- Status: ${phase.status ?? 'in_progress'}`);
|
||||
if (phase.error) {
|
||||
lines.push(`- Error: ${phase.error}`);
|
||||
}
|
||||
|
||||
if (phase.phase === 3 && phase.judgeStages && phase.judgeStages.length > 0) {
|
||||
lines.push('', '#### Judgment Stages', '');
|
||||
for (const stage of phase.judgeStages) {
|
||||
const stageMarker = stage.status === 'error' ? ' ⚠️' : '';
|
||||
lines.push(
|
||||
`- Stage ${stage.stage} (${stage.method})${stageMarker}: status=${stage.status}, instruction=${stage.instruction.length} chars, response=${stage.response.length} chars`,
|
||||
);
|
||||
lines.push('<details><summary>Stage Instruction</summary>', '', stage.instruction, '', '</details>', '');
|
||||
lines.push('<details><summary>Stage Response</summary>', '', stage.response, '', '</details>', '');
|
||||
}
|
||||
}
|
||||
|
||||
lines.push('');
|
||||
return lines;
|
||||
}
|
||||
|
||||
function renderMovementSection(
|
||||
movement: TraceMovement,
|
||||
params: TraceReportParams,
|
||||
isLastMovement: boolean,
|
||||
): string[] {
|
||||
const marker = movementMarker(movement, params.status, isLastMovement);
|
||||
const markerSuffix = marker ? ` ${marker}` : '';
|
||||
const lines: string[] = [
|
||||
`## Iteration ${movement.iteration}: ${movement.step} (persona: ${movement.persona})${markerSuffix} - ${movement.startedAt}`,
|
||||
'',
|
||||
];
|
||||
|
||||
if (movement.instruction) {
|
||||
lines.push(
|
||||
`- Movement Instruction: ${movement.instruction.length} chars`,
|
||||
'<details><summary>Instruction</summary>',
|
||||
'',
|
||||
movement.instruction,
|
||||
'',
|
||||
'</details>',
|
||||
'',
|
||||
);
|
||||
}
|
||||
|
||||
const phases = [...movement.phases].sort((a, b) => {
|
||||
const byStart = a.startedAt.localeCompare(b.startedAt);
|
||||
if (byStart !== 0) {
|
||||
return byStart;
|
||||
}
|
||||
return a.phase - b.phase;
|
||||
});
|
||||
|
||||
for (const phase of phases) {
|
||||
lines.push(...renderPhaseSection(phase, params.status));
|
||||
}
|
||||
|
||||
if (movement.result) {
|
||||
lines.push(
|
||||
`- Movement Status: ${movement.result.status}`,
|
||||
`- Movement Response: ${movement.result.content.length} chars`,
|
||||
);
|
||||
if (movement.result.matchMethod) {
|
||||
lines.push(`- Match Method: ${movement.result.matchMethod}`);
|
||||
}
|
||||
if (movement.result.matchedRuleIndex != null) {
|
||||
lines.push(`- Matched Rule Index: ${movement.result.matchedRuleIndex}`);
|
||||
}
|
||||
if (movement.result.error) {
|
||||
lines.push(`- Error: ${movement.result.error}`);
|
||||
}
|
||||
lines.push('<details><summary>Movement Response</summary>', '', movement.result.content, '', '</details>');
|
||||
} else {
|
||||
lines.push(`- Movement Status: ${movement.completedAt ? 'aborted' : 'in_progress'}`);
|
||||
}
|
||||
|
||||
lines.push('', '---', '');
|
||||
return lines;
|
||||
}
|
||||
|
||||
function buildRenderBlocks(sorted: TraceMovement[]): RenderBlock[] {
|
||||
const blocks: RenderBlock[] = [];
|
||||
let index = 0;
|
||||
while (index < sorted.length) {
|
||||
if (index + 3 < sorted.length) {
|
||||
const first = sorted[index]!;
|
||||
const second = sorted[index + 1]!;
|
||||
const third = sorted[index + 2]!;
|
||||
const fourth = sorted[index + 3]!;
|
||||
const isAlternatingLoop =
|
||||
first.step !== second.step
|
||||
&& first.step === third.step
|
||||
&& second.step === fourth.step;
|
||||
if (isAlternatingLoop) {
|
||||
const a = first.step;
|
||||
const b = second.step;
|
||||
let end = index + 4;
|
||||
while (end < sorted.length) {
|
||||
const expected = (end - index) % 2 === 0 ? a : b;
|
||||
if (sorted[end]!.step !== expected) {
|
||||
break;
|
||||
}
|
||||
end += 1;
|
||||
}
|
||||
blocks.push({
|
||||
kind: 'loop',
|
||||
movements: sorted.slice(index, end),
|
||||
});
|
||||
index = end;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
blocks.push({ kind: 'movement', movement: sorted[index]! });
|
||||
index += 1;
|
||||
}
|
||||
return blocks;
|
||||
}
|
||||
|
||||
function renderLoopBlock(block: LoopBlock, params: TraceReportParams): string[] {
|
||||
const first = block.movements[0]!;
|
||||
const second = block.movements[1]!;
|
||||
const last = block.movements[block.movements.length - 1]!;
|
||||
const cycleCount = Math.floor(block.movements.length / 2);
|
||||
const lines: string[] = [
|
||||
`## Iteration ${first.iteration}-${last.iteration}: ${first.step} ↔ ${second.step} loop (${cycleCount} cycles) ⚠️`,
|
||||
'',
|
||||
`<details><summary>Loop details (${block.movements.length} movements)</summary>`,
|
||||
'',
|
||||
];
|
||||
|
||||
block.movements.forEach((movement, movementIndex) => {
|
||||
const movementLines = renderMovementSection(
|
||||
movement,
|
||||
params,
|
||||
movementIndex === block.movements.length - 1,
|
||||
);
|
||||
lines.push(...movementLines.map((line) => (line ? ` ${line}` : line)));
|
||||
});
|
||||
|
||||
lines.push('</details>', '', '---', '');
|
||||
return lines;
|
||||
}
|
||||
|
||||
export function renderTraceReportMarkdown(
|
||||
params: TraceReportParams,
|
||||
traceStartedAt: string,
|
||||
movements: TraceMovement[],
|
||||
): string {
|
||||
assertTraceParams(params);
|
||||
if (!traceStartedAt) {
|
||||
throw new Error('traceStartedAt is required');
|
||||
}
|
||||
|
||||
const statusLabel = params.status === 'completed' ? '✅ completed' : '❌ aborted';
|
||||
const lines: string[] = [
|
||||
`# Execution Trace: ${params.pieceName}`,
|
||||
'',
|
||||
`- Task: ${params.task}`,
|
||||
`- Run: ${params.runSlug}`,
|
||||
`- Started: ${traceStartedAt}`,
|
||||
`- Ended: ${params.endTime}`,
|
||||
`- Status: ${statusLabel}`,
|
||||
`- Iterations: ${params.iterations}`,
|
||||
...(params.reason ? [`- Reason: ${params.reason}`] : []),
|
||||
'',
|
||||
'---',
|
||||
'',
|
||||
];
|
||||
|
||||
const sorted = [...movements].sort((a, b) => {
|
||||
const byStart = a.startedAt.localeCompare(b.startedAt);
|
||||
if (byStart !== 0) {
|
||||
return byStart;
|
||||
}
|
||||
return a.iteration - b.iteration;
|
||||
});
|
||||
sorted.forEach((movement, index) => assertTraceMovement(movement, index));
|
||||
|
||||
const blocks = buildRenderBlocks(sorted);
|
||||
blocks.forEach((block, blockIndex) => {
|
||||
if (block.kind === 'loop') {
|
||||
lines.push(...renderLoopBlock(block, params));
|
||||
return;
|
||||
}
|
||||
lines.push(...renderMovementSection(block.movement, params, blockIndex === blocks.length - 1));
|
||||
});
|
||||
|
||||
return lines.join('\n');
|
||||
}
|
||||
48
src/features/tasks/execute/traceReportTypes.ts
Normal file
48
src/features/tasks/execute/traceReportTypes.ts
Normal file
@ -0,0 +1,48 @@
|
||||
import type { PhaseName } from '../../../core/piece/index.js';
|
||||
import type { JudgeStageEntry } from '../../../core/piece/types.js';
|
||||
|
||||
export type TraceReportMode = 'off' | 'redacted' | 'full';
|
||||
|
||||
export interface TraceReportParams {
|
||||
tracePath: string;
|
||||
pieceName: string;
|
||||
task: string;
|
||||
runSlug: string;
|
||||
status: 'completed' | 'aborted';
|
||||
iterations: number;
|
||||
endTime: string;
|
||||
reason?: string;
|
||||
}
|
||||
|
||||
export interface TracePhase {
|
||||
phaseExecutionId: string;
|
||||
phase: 1 | 2 | 3;
|
||||
phaseName: PhaseName;
|
||||
instruction: string;
|
||||
systemPrompt: string;
|
||||
userInstruction: string;
|
||||
response?: string;
|
||||
status?: string;
|
||||
error?: string;
|
||||
startedAt: string;
|
||||
completedAt?: string;
|
||||
judgeStages?: JudgeStageEntry[];
|
||||
}
|
||||
|
||||
export interface TraceMovement {
|
||||
step: string;
|
||||
persona: string;
|
||||
iteration: number;
|
||||
instruction?: string;
|
||||
startedAt: string;
|
||||
completedAt?: string;
|
||||
phases: TracePhase[];
|
||||
result?: {
|
||||
status: string;
|
||||
content: string;
|
||||
error?: string;
|
||||
matchedRuleIndex?: number;
|
||||
matchedRuleMethod?: string;
|
||||
matchMethod?: string;
|
||||
};
|
||||
}
|
||||
81
src/features/tasks/execute/traceReportWriter.ts
Normal file
81
src/features/tasks/execute/traceReportWriter.ts
Normal file
@ -0,0 +1,81 @@
|
||||
import { writeFileAtomic } from '../../../infra/config/index.js';
|
||||
import type { SessionLogger } from './sessionLogger.js';
|
||||
import type { TraceReportMode } from './traceReport.js';
|
||||
import {
|
||||
assertTraceParams,
|
||||
renderTraceReportFromLogs,
|
||||
renderTraceReportFromRecords,
|
||||
} from './traceReport.js';
|
||||
|
||||
interface TraceReportWriterParams {
|
||||
sessionLogger: SessionLogger;
|
||||
ndjsonLogPath: string;
|
||||
tracePath: string;
|
||||
pieceName: string;
|
||||
task: string;
|
||||
runSlug: string;
|
||||
promptLogPath?: string;
|
||||
mode: TraceReportMode;
|
||||
logger: {
|
||||
info: (message: string, data?: unknown) => void;
|
||||
};
|
||||
}
|
||||
|
||||
interface WriteTraceReportInput {
|
||||
status: 'completed' | 'aborted';
|
||||
iterations: number;
|
||||
endTime: string;
|
||||
reason?: string;
|
||||
}
|
||||
|
||||
export function createTraceReportWriter(params: TraceReportWriterParams): (input: WriteTraceReportInput) => void {
|
||||
let traceReportWritten = false;
|
||||
|
||||
return (input: WriteTraceReportInput): void => {
|
||||
if (traceReportWritten) {
|
||||
params.logger.info('Trace report write skipped because it has already been written', {
|
||||
status: input.status,
|
||||
iterations: input.iterations,
|
||||
});
|
||||
return;
|
||||
}
|
||||
traceReportWritten = true;
|
||||
const traceParams = {
|
||||
tracePath: params.tracePath,
|
||||
pieceName: params.pieceName,
|
||||
task: params.task,
|
||||
runSlug: params.runSlug,
|
||||
status: input.status,
|
||||
iterations: input.iterations,
|
||||
reason: input.reason,
|
||||
endTime: input.endTime,
|
||||
} as const;
|
||||
assertTraceParams(traceParams);
|
||||
|
||||
let markdown: string | undefined;
|
||||
try {
|
||||
markdown = renderTraceReportFromLogs(
|
||||
traceParams,
|
||||
params.ndjsonLogPath,
|
||||
params.promptLogPath,
|
||||
params.mode,
|
||||
);
|
||||
} catch (error) {
|
||||
const message = error instanceof Error ? error.message : String(error);
|
||||
if (!message.startsWith('No session records found for trace report:')) {
|
||||
throw error;
|
||||
}
|
||||
markdown = renderTraceReportFromRecords(
|
||||
traceParams,
|
||||
params.sessionLogger.getNdjsonRecords(),
|
||||
params.sessionLogger.getPromptRecords(),
|
||||
params.mode,
|
||||
);
|
||||
}
|
||||
|
||||
if (!markdown) {
|
||||
return;
|
||||
}
|
||||
writeFileAtomic(params.tracePath, markdown);
|
||||
};
|
||||
}
|
||||
@ -11,6 +11,7 @@ export type {
|
||||
NdjsonPieceAbort,
|
||||
NdjsonPhaseStart,
|
||||
NdjsonPhaseComplete,
|
||||
NdjsonPhaseJudgeStage,
|
||||
NdjsonInteractiveStart,
|
||||
NdjsonInteractiveEnd,
|
||||
NdjsonRecord,
|
||||
|
||||
@ -21,6 +21,7 @@ export type {
|
||||
NdjsonPieceAbort,
|
||||
NdjsonPhaseStart,
|
||||
NdjsonPhaseComplete,
|
||||
NdjsonPhaseJudgeStage,
|
||||
NdjsonInteractiveStart,
|
||||
NdjsonInteractiveEnd,
|
||||
NdjsonRecord,
|
||||
|
||||
@ -128,6 +128,11 @@ export class DebugLogger {
|
||||
return this.debugLogFile;
|
||||
}
|
||||
|
||||
/** Get current debug prompts log file path */
|
||||
getPromptsLogFile(): string | null {
|
||||
return this.debugPromptsLogFile;
|
||||
}
|
||||
|
||||
/** Format log message with timestamp and level */
|
||||
private static formatLogMessage(level: string, component: string, message: string, data?: unknown): string {
|
||||
const timestamp = new Date().toISOString();
|
||||
@ -223,6 +228,10 @@ export function getDebugLogFile(): string | null {
|
||||
return DebugLogger.getInstance().getLogFile();
|
||||
}
|
||||
|
||||
export function getDebugPromptsLogFile(): string | null {
|
||||
return DebugLogger.getInstance().getPromptsLogFile();
|
||||
}
|
||||
|
||||
export function debugLog(component: string, message: string, data?: unknown): void {
|
||||
DebugLogger.getInstance().writeLog('DEBUG', component, message, data);
|
||||
}
|
||||
|
||||
50
src/shared/utils/phaseExecutionId.ts
Normal file
50
src/shared/utils/phaseExecutionId.ts
Normal file
@ -0,0 +1,50 @@
|
||||
export interface PhaseExecutionIdParts {
|
||||
step: string;
|
||||
iteration: number;
|
||||
phase: 1 | 2 | 3;
|
||||
sequence: number;
|
||||
}
|
||||
|
||||
export function buildPhaseExecutionId(parts: PhaseExecutionIdParts): string {
|
||||
if (!parts.step) {
|
||||
throw new Error('phaseExecutionId requires step');
|
||||
}
|
||||
if (!Number.isInteger(parts.iteration) || parts.iteration <= 0) {
|
||||
throw new Error(`phaseExecutionId requires positive iteration: ${parts.iteration}`);
|
||||
}
|
||||
if (parts.phase !== 1 && parts.phase !== 2 && parts.phase !== 3) {
|
||||
throw new Error(`phaseExecutionId requires phase 1|2|3: ${parts.phase}`);
|
||||
}
|
||||
if (!Number.isInteger(parts.sequence) || parts.sequence <= 0) {
|
||||
throw new Error(`phaseExecutionId requires positive sequence: ${parts.sequence}`);
|
||||
}
|
||||
return `${parts.step}:${parts.iteration}:${parts.phase}:${parts.sequence}`;
|
||||
}
|
||||
|
||||
export function parsePhaseExecutionId(
|
||||
phaseExecutionId: string,
|
||||
): PhaseExecutionIdParts | undefined {
|
||||
const parts = phaseExecutionId.split(':');
|
||||
if (parts.length !== 4) {
|
||||
return undefined;
|
||||
}
|
||||
const [step, iterationStr, phaseStr, sequenceStr] = parts;
|
||||
const iteration = Number(iterationStr);
|
||||
const phase = Number(phaseStr);
|
||||
const sequence = Number(sequenceStr);
|
||||
if (!step || !Number.isInteger(iteration) || iteration <= 0) {
|
||||
return undefined;
|
||||
}
|
||||
if (!Number.isInteger(phase) || (phase !== 1 && phase !== 2 && phase !== 3)) {
|
||||
return undefined;
|
||||
}
|
||||
if (!Number.isInteger(sequence) || sequence <= 0) {
|
||||
return undefined;
|
||||
}
|
||||
return {
|
||||
step,
|
||||
iteration,
|
||||
phase: phase as 1 | 2 | 3,
|
||||
sequence,
|
||||
};
|
||||
}
|
||||
@ -79,23 +79,44 @@ export interface NdjsonPieceAbort {
|
||||
export interface NdjsonPhaseStart {
|
||||
type: 'phase_start';
|
||||
step: string;
|
||||
iteration?: number;
|
||||
phase: 1 | 2 | 3;
|
||||
phaseName: 'execute' | 'report' | 'judge';
|
||||
phaseExecutionId?: string;
|
||||
timestamp: string;
|
||||
instruction?: string;
|
||||
systemPrompt?: string;
|
||||
userInstruction?: string;
|
||||
}
|
||||
|
||||
export interface NdjsonPhaseComplete {
|
||||
type: 'phase_complete';
|
||||
step: string;
|
||||
iteration?: number;
|
||||
phase: 1 | 2 | 3;
|
||||
phaseName: 'execute' | 'report' | 'judge';
|
||||
phaseExecutionId?: string;
|
||||
status: string;
|
||||
content?: string;
|
||||
timestamp: string;
|
||||
error?: string;
|
||||
}
|
||||
|
||||
export interface NdjsonPhaseJudgeStage {
|
||||
type: 'phase_judge_stage';
|
||||
step: string;
|
||||
iteration?: number;
|
||||
phase: 3;
|
||||
phaseName: 'judge';
|
||||
phaseExecutionId?: string;
|
||||
stage: 1 | 2 | 3;
|
||||
method: 'structured_output' | 'phase3_tag' | 'ai_judge';
|
||||
status: 'done' | 'error' | 'skipped';
|
||||
instruction: string;
|
||||
response: string;
|
||||
timestamp: string;
|
||||
}
|
||||
|
||||
export interface NdjsonInteractiveStart {
|
||||
type: 'interactive_start';
|
||||
timestamp: string;
|
||||
@ -116,6 +137,7 @@ export type NdjsonRecord =
|
||||
| NdjsonPieceAbort
|
||||
| NdjsonPhaseStart
|
||||
| NdjsonPhaseComplete
|
||||
| NdjsonPhaseJudgeStage
|
||||
| NdjsonInteractiveStart
|
||||
| NdjsonInteractiveEnd;
|
||||
|
||||
@ -124,7 +146,10 @@ export interface PromptLogRecord {
|
||||
movement: string;
|
||||
phase: 1 | 2 | 3;
|
||||
iteration: number;
|
||||
phaseExecutionId?: string;
|
||||
prompt: string;
|
||||
systemPrompt: string;
|
||||
userInstruction: string;
|
||||
response: string;
|
||||
timestamp: string;
|
||||
}
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user