takt: add-trace-report-generation (#467)
This commit is contained in:
parent
dbc22c76fc
commit
8403a7c892
@ -43,6 +43,13 @@ function doneResponse(content: string, structuredOutput?: Record<string, unknown
|
|||||||
}
|
}
|
||||||
|
|
||||||
const judgeOptions = { cwd: '/repo', movementName: 'review' };
|
const judgeOptions = { cwd: '/repo', movementName: 'review' };
|
||||||
|
type JudgeStageLog = {
|
||||||
|
stage: 1 | 2 | 3;
|
||||||
|
method: 'structured_output' | 'phase3_tag' | 'ai_judge';
|
||||||
|
status: 'done' | 'error' | 'skipped';
|
||||||
|
instruction: string;
|
||||||
|
response: string;
|
||||||
|
};
|
||||||
|
|
||||||
describe('agent-usecases', () => {
|
describe('agent-usecases', () => {
|
||||||
beforeEach(() => {
|
beforeEach(() => {
|
||||||
@ -173,6 +180,75 @@ describe('agent-usecases', () => {
|
|||||||
expect(runAgent).toHaveBeenCalledTimes(3);
|
expect(runAgent).toHaveBeenCalledTimes(3);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it('judgeStatus は Phase 3 の内部ステージログを順序どおりに通知する', async () => {
|
||||||
|
const onJudgeStage = vi.fn();
|
||||||
|
// Stage 1: structured output fails
|
||||||
|
vi.mocked(runAgent).mockResolvedValueOnce(doneResponse('no match'));
|
||||||
|
// Stage 2: tag detection succeeds
|
||||||
|
vi.mocked(runAgent).mockResolvedValueOnce(doneResponse('[REVIEW:2]'));
|
||||||
|
|
||||||
|
await judgeStatus(
|
||||||
|
'structured',
|
||||||
|
'tag',
|
||||||
|
[
|
||||||
|
{ condition: 'a', next: 'one' },
|
||||||
|
{ condition: 'b', next: 'two' },
|
||||||
|
],
|
||||||
|
{
|
||||||
|
...judgeOptions,
|
||||||
|
onJudgeStage,
|
||||||
|
} as typeof judgeOptions & { onJudgeStage: (entry: JudgeStageLog) => void },
|
||||||
|
);
|
||||||
|
|
||||||
|
expect(onJudgeStage).toHaveBeenCalledTimes(2);
|
||||||
|
expect(onJudgeStage).toHaveBeenNthCalledWith(1, expect.objectContaining({
|
||||||
|
stage: 1,
|
||||||
|
method: 'structured_output',
|
||||||
|
status: 'done',
|
||||||
|
instruction: 'structured',
|
||||||
|
response: 'no match',
|
||||||
|
}));
|
||||||
|
expect(onJudgeStage).toHaveBeenNthCalledWith(2, expect.objectContaining({
|
||||||
|
stage: 2,
|
||||||
|
method: 'phase3_tag',
|
||||||
|
status: 'done',
|
||||||
|
instruction: 'tag',
|
||||||
|
response: '[REVIEW:2]',
|
||||||
|
}));
|
||||||
|
});
|
||||||
|
|
||||||
|
it('judgeStatus は全ステージ失敗時にも Stage 3 までログ通知する', async () => {
|
||||||
|
const onJudgeStage = vi.fn();
|
||||||
|
// Stage 1: structured output fails
|
||||||
|
vi.mocked(runAgent).mockResolvedValueOnce(doneResponse('no match'));
|
||||||
|
// Stage 2: tag detection fails
|
||||||
|
vi.mocked(runAgent).mockResolvedValueOnce(doneResponse('no tag'));
|
||||||
|
// Stage 3: evaluateCondition fails
|
||||||
|
vi.mocked(runAgent).mockResolvedValueOnce(doneResponse('still no match'));
|
||||||
|
vi.mocked(detectJudgeIndex).mockReturnValue(-1);
|
||||||
|
|
||||||
|
await expect(
|
||||||
|
judgeStatus(
|
||||||
|
'structured',
|
||||||
|
'tag',
|
||||||
|
[
|
||||||
|
{ condition: 'a', next: 'one' },
|
||||||
|
{ condition: 'b', next: 'two' },
|
||||||
|
],
|
||||||
|
{
|
||||||
|
...judgeOptions,
|
||||||
|
onJudgeStage,
|
||||||
|
} as typeof judgeOptions & { onJudgeStage: (entry: JudgeStageLog) => void },
|
||||||
|
),
|
||||||
|
).rejects.toThrow('Status not found for movement "review"');
|
||||||
|
|
||||||
|
expect(onJudgeStage).toHaveBeenCalledTimes(3);
|
||||||
|
expect(onJudgeStage).toHaveBeenLastCalledWith(expect.objectContaining({
|
||||||
|
stage: 3,
|
||||||
|
method: 'ai_judge',
|
||||||
|
}));
|
||||||
|
});
|
||||||
|
|
||||||
it('judgeStatus は全ての判定に失敗したらエラー', async () => {
|
it('judgeStatus は全ての判定に失敗したらエラー', async () => {
|
||||||
// Stage 1: structured output fails
|
// Stage 1: structured output fails
|
||||||
vi.mocked(runAgent).mockResolvedValueOnce(doneResponse('no match'));
|
vi.mocked(runAgent).mockResolvedValueOnce(doneResponse('no match'));
|
||||||
@ -232,6 +308,27 @@ describe('agent-usecases', () => {
|
|||||||
.rejects.toThrow('Team leader failed: bad output');
|
.rejects.toThrow('Team leader failed: bad output');
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it('decomposeTask は onPromptResolved を runAgent に伝搬する', async () => {
|
||||||
|
vi.mocked(runAgent).mockResolvedValue(doneResponse('x', {
|
||||||
|
parts: [
|
||||||
|
{ id: 'p1', title: 'Part 1', instruction: 'Do 1', timeout_ms: null },
|
||||||
|
],
|
||||||
|
}));
|
||||||
|
const onPromptResolved = vi.fn();
|
||||||
|
|
||||||
|
await decomposeTask('instruction', 2, {
|
||||||
|
cwd: '/repo',
|
||||||
|
persona: 'team-leader',
|
||||||
|
onPromptResolved,
|
||||||
|
});
|
||||||
|
|
||||||
|
expect(runAgent).toHaveBeenCalledWith(
|
||||||
|
'team-leader',
|
||||||
|
expect.any(String),
|
||||||
|
expect.objectContaining({ onPromptResolved }),
|
||||||
|
);
|
||||||
|
});
|
||||||
|
|
||||||
it('requestMoreParts は構造化出力をパースして返す', async () => {
|
it('requestMoreParts は構造化出力をパースして返す', async () => {
|
||||||
vi.mocked(runAgent).mockResolvedValue(doneResponse('x', {
|
vi.mocked(runAgent).mockResolvedValue(doneResponse('x', {
|
||||||
done: false,
|
done: false,
|
||||||
|
|||||||
@ -26,4 +26,33 @@ describe('config module file-size boundary', () => {
|
|||||||
const lineCount = getLineCount('../features/tasks/execute/pieceExecution.ts');
|
const lineCount = getLineCount('../features/tasks/execute/pieceExecution.ts');
|
||||||
expect(lineCount).toBeLessThanOrEqual(300);
|
expect(lineCount).toBeLessThanOrEqual(300);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it('keeps sessionLogger.ts under 300 lines', () => {
|
||||||
|
const lineCount = getLineCount('../features/tasks/execute/sessionLogger.ts');
|
||||||
|
expect(lineCount).toBeLessThanOrEqual(300);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('keeps traceReport renderer/parser split modules under 300 lines', () => {
|
||||||
|
const rendererLineCount = getLineCount('../features/tasks/execute/traceReportRenderer.ts');
|
||||||
|
const parserLineCount = getLineCount('../features/tasks/execute/traceReportParser.ts');
|
||||||
|
expect(rendererLineCount).toBeLessThanOrEqual(300);
|
||||||
|
expect(parserLineCount).toBeLessThanOrEqual(300);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('keeps traceReport.ts as thin facade under 120 lines', () => {
|
||||||
|
const lineCount = getLineCount('../features/tasks/execute/traceReport.ts');
|
||||||
|
expect(lineCount).toBeLessThanOrEqual(120);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('keeps agent-usecases.ts as thin facade under 120 lines', () => {
|
||||||
|
const lineCount = getLineCount('../agents/agent-usecases.ts');
|
||||||
|
expect(lineCount).toBeLessThanOrEqual(120);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('keeps split agent usecases under 300 lines each', () => {
|
||||||
|
const judgeLineCount = getLineCount('../agents/judge-status-usecase.ts');
|
||||||
|
const decomposeLineCount = getLineCount('../agents/decompose-task-usecase.ts');
|
||||||
|
expect(judgeLineCount).toBeLessThanOrEqual(300);
|
||||||
|
expect(decomposeLineCount).toBeLessThanOrEqual(300);
|
||||||
|
});
|
||||||
});
|
});
|
||||||
|
|||||||
@ -100,6 +100,19 @@ function createEngineOptions(tmpDir: string): PieceEngineOptions {
|
|||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function mockRunAgentWithPrompt(...responses: ReturnType<typeof makeResponse>[]): void {
|
||||||
|
const mock = vi.mocked(runAgent);
|
||||||
|
for (const response of responses) {
|
||||||
|
mock.mockImplementationOnce(async (persona, instruction, options) => {
|
||||||
|
options?.onPromptResolved?.({
|
||||||
|
systemPrompt: typeof persona === 'string' ? persona : '',
|
||||||
|
userInstruction: instruction,
|
||||||
|
});
|
||||||
|
return response;
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
describe('ArpeggioRunner integration', () => {
|
describe('ArpeggioRunner integration', () => {
|
||||||
let engine: PieceEngine | undefined;
|
let engine: PieceEngine | undefined;
|
||||||
|
|
||||||
@ -122,10 +135,11 @@ describe('ArpeggioRunner integration', () => {
|
|||||||
|
|
||||||
// Mock agent to return batch-specific responses
|
// Mock agent to return batch-specific responses
|
||||||
const mockAgent = vi.mocked(runAgent);
|
const mockAgent = vi.mocked(runAgent);
|
||||||
mockAgent
|
mockRunAgentWithPrompt(
|
||||||
.mockResolvedValueOnce(makeResponse({ content: 'Processed Alice' }))
|
makeResponse({ content: 'Processed Alice' }),
|
||||||
.mockResolvedValueOnce(makeResponse({ content: 'Processed Bob' }))
|
makeResponse({ content: 'Processed Bob' }),
|
||||||
.mockResolvedValueOnce(makeResponse({ content: 'Processed Charlie' }));
|
makeResponse({ content: 'Processed Charlie' }),
|
||||||
|
);
|
||||||
|
|
||||||
// Mock rule detection for the merged result
|
// Mock rule detection for the merged result
|
||||||
vi.mocked(detectMatchedRule).mockResolvedValueOnce({
|
vi.mocked(detectMatchedRule).mockResolvedValueOnce({
|
||||||
@ -163,9 +177,10 @@ describe('ArpeggioRunner integration', () => {
|
|||||||
const config = buildArpeggioPieceConfig(arpeggioConfig, tmpDir);
|
const config = buildArpeggioPieceConfig(arpeggioConfig, tmpDir);
|
||||||
|
|
||||||
const mockAgent = vi.mocked(runAgent);
|
const mockAgent = vi.mocked(runAgent);
|
||||||
mockAgent
|
mockRunAgentWithPrompt(
|
||||||
.mockResolvedValueOnce(makeResponse({ content: 'Batch 0 result' }))
|
makeResponse({ content: 'Batch 0 result' }),
|
||||||
.mockResolvedValueOnce(makeResponse({ content: 'Batch 1 result' }));
|
makeResponse({ content: 'Batch 1 result' }),
|
||||||
|
);
|
||||||
|
|
||||||
vi.mocked(detectMatchedRule).mockResolvedValueOnce({
|
vi.mocked(detectMatchedRule).mockResolvedValueOnce({
|
||||||
index: 0,
|
index: 0,
|
||||||
@ -189,13 +204,12 @@ describe('ArpeggioRunner integration', () => {
|
|||||||
const config = buildArpeggioPieceConfig(arpeggioConfig, tmpDir);
|
const config = buildArpeggioPieceConfig(arpeggioConfig, tmpDir);
|
||||||
|
|
||||||
const mockAgent = vi.mocked(runAgent);
|
const mockAgent = vi.mocked(runAgent);
|
||||||
// First batch succeeds
|
mockRunAgentWithPrompt(
|
||||||
mockAgent.mockResolvedValueOnce(makeResponse({ content: 'OK' }));
|
makeResponse({ content: 'OK' }),
|
||||||
// Second batch fails twice (initial + 1 retry)
|
makeResponse({ status: 'error', error: 'fail1' }),
|
||||||
mockAgent.mockResolvedValueOnce(makeResponse({ status: 'error', error: 'fail1' }));
|
makeResponse({ status: 'error', error: 'fail2' }),
|
||||||
mockAgent.mockResolvedValueOnce(makeResponse({ status: 'error', error: 'fail2' }));
|
makeResponse({ content: 'OK' }),
|
||||||
// Third batch succeeds
|
);
|
||||||
mockAgent.mockResolvedValueOnce(makeResponse({ content: 'OK' }));
|
|
||||||
|
|
||||||
engine = new PieceEngine(config, tmpDir, 'test task', createEngineOptions(tmpDir));
|
engine = new PieceEngine(config, tmpDir, 'test task', createEngineOptions(tmpDir));
|
||||||
const state = await engine.run();
|
const state = await engine.run();
|
||||||
@ -210,10 +224,11 @@ describe('ArpeggioRunner integration', () => {
|
|||||||
const config = buildArpeggioPieceConfig(arpeggioConfig, tmpDir);
|
const config = buildArpeggioPieceConfig(arpeggioConfig, tmpDir);
|
||||||
|
|
||||||
const mockAgent = vi.mocked(runAgent);
|
const mockAgent = vi.mocked(runAgent);
|
||||||
mockAgent
|
mockRunAgentWithPrompt(
|
||||||
.mockResolvedValueOnce(makeResponse({ content: 'Result A' }))
|
makeResponse({ content: 'Result A' }),
|
||||||
.mockResolvedValueOnce(makeResponse({ content: 'Result B' }))
|
makeResponse({ content: 'Result B' }),
|
||||||
.mockResolvedValueOnce(makeResponse({ content: 'Result C' }));
|
makeResponse({ content: 'Result C' }),
|
||||||
|
);
|
||||||
|
|
||||||
vi.mocked(detectMatchedRule).mockResolvedValueOnce({
|
vi.mocked(detectMatchedRule).mockResolvedValueOnce({
|
||||||
index: 0,
|
index: 0,
|
||||||
@ -234,10 +249,11 @@ describe('ArpeggioRunner integration', () => {
|
|||||||
const config = buildArpeggioPieceConfig(arpeggioConfig, tmpDir);
|
const config = buildArpeggioPieceConfig(arpeggioConfig, tmpDir);
|
||||||
|
|
||||||
const mockAgent = vi.mocked(runAgent);
|
const mockAgent = vi.mocked(runAgent);
|
||||||
mockAgent
|
mockRunAgentWithPrompt(
|
||||||
.mockResolvedValueOnce(makeResponse({ content: 'A' }))
|
makeResponse({ content: 'A' }),
|
||||||
.mockResolvedValueOnce(makeResponse({ content: 'B' }))
|
makeResponse({ content: 'B' }),
|
||||||
.mockResolvedValueOnce(makeResponse({ content: 'C' }));
|
makeResponse({ content: 'C' }),
|
||||||
|
);
|
||||||
|
|
||||||
vi.mocked(detectMatchedRule).mockResolvedValueOnce({
|
vi.mocked(detectMatchedRule).mockResolvedValueOnce({
|
||||||
index: 0,
|
index: 0,
|
||||||
@ -251,4 +267,90 @@ describe('ArpeggioRunner integration', () => {
|
|||||||
expect(mockAgent).toHaveBeenCalledTimes(3);
|
expect(mockAgent).toHaveBeenCalledTimes(3);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it('should record resolved prompt in phase:start for arpeggio batches', async () => {
|
||||||
|
const { tmpDir, csvPath, templatePath } = createArpeggioTestDir();
|
||||||
|
const arpeggioConfig = createArpeggioConfig(csvPath, templatePath, { concurrency: 2 });
|
||||||
|
const config = buildArpeggioPieceConfig(arpeggioConfig, tmpDir);
|
||||||
|
const phaseStarts: string[] = [];
|
||||||
|
|
||||||
|
mockRunAgentWithPrompt(
|
||||||
|
makeResponse({ content: 'A' }),
|
||||||
|
makeResponse({ content: 'B' }),
|
||||||
|
makeResponse({ content: 'C' }),
|
||||||
|
);
|
||||||
|
vi.mocked(detectMatchedRule).mockResolvedValueOnce({ index: 0, method: 'phase1_tag' });
|
||||||
|
|
||||||
|
engine = new PieceEngine(config, tmpDir, 'test task', createEngineOptions(tmpDir));
|
||||||
|
engine.on('phase:start', (step, phase, phaseName, instruction) => {
|
||||||
|
if (step.name !== 'process' || phase !== 1 || phaseName !== 'execute') return;
|
||||||
|
phaseStarts.push(instruction);
|
||||||
|
});
|
||||||
|
|
||||||
|
const state = await engine.run();
|
||||||
|
|
||||||
|
expect(state.status).toBe('completed');
|
||||||
|
expect(phaseStarts.length).toBe(3);
|
||||||
|
expect(phaseStarts.every((instruction) => !instruction.startsWith('[Arpeggio batch'))).toBe(true);
|
||||||
|
expect(phaseStarts.some((instruction) => instruction.includes('Process '))).toBe(true);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should keep phaseExecutionId bindings correct when completion order is reversed', async () => {
|
||||||
|
const { tmpDir, csvPath, templatePath } = createArpeggioTestDir();
|
||||||
|
const arpeggioConfig = createArpeggioConfig(csvPath, templatePath, { concurrency: 2 });
|
||||||
|
const config = buildArpeggioPieceConfig(arpeggioConfig, tmpDir);
|
||||||
|
const phaseStartsByExecutionId = new Map<string, string>();
|
||||||
|
const phaseCompletions: Array<{ phaseExecutionId?: string; content: string }> = [];
|
||||||
|
|
||||||
|
vi.mocked(runAgent).mockImplementation(async (persona, instruction, options) => {
|
||||||
|
options?.onPromptResolved?.({
|
||||||
|
systemPrompt: typeof persona === 'string' ? persona : '',
|
||||||
|
userInstruction: instruction,
|
||||||
|
});
|
||||||
|
if (instruction.includes('Alice')) {
|
||||||
|
await new Promise((resolve) => setTimeout(resolve, 40));
|
||||||
|
return makeResponse({ content: 'Result Alice' });
|
||||||
|
}
|
||||||
|
if (instruction.includes('Bob')) {
|
||||||
|
await new Promise((resolve) => setTimeout(resolve, 5));
|
||||||
|
return makeResponse({ content: 'Result Bob' });
|
||||||
|
}
|
||||||
|
return makeResponse({ content: 'Result Charlie' });
|
||||||
|
});
|
||||||
|
vi.mocked(detectMatchedRule).mockResolvedValueOnce({ index: 0, method: 'phase1_tag' });
|
||||||
|
|
||||||
|
engine = new PieceEngine(config, tmpDir, 'test task', createEngineOptions(tmpDir));
|
||||||
|
engine.on('phase:start', (step, phase, phaseName, instruction, _promptParts, phaseExecutionId) => {
|
||||||
|
if (step.name !== 'process' || phase !== 1 || phaseName !== 'execute' || !phaseExecutionId) return;
|
||||||
|
phaseStartsByExecutionId.set(phaseExecutionId, instruction);
|
||||||
|
});
|
||||||
|
engine.on('phase:complete', (step, phase, phaseName, content, _status, _error, phaseExecutionId) => {
|
||||||
|
if (step.name !== 'process' || phase !== 1 || phaseName !== 'execute') return;
|
||||||
|
phaseCompletions.push({ phaseExecutionId, content });
|
||||||
|
});
|
||||||
|
|
||||||
|
const state = await engine.run();
|
||||||
|
|
||||||
|
expect(state.status).toBe('completed');
|
||||||
|
expect(phaseCompletions).toHaveLength(3);
|
||||||
|
expect(new Set(phaseCompletions.map((entry) => entry.phaseExecutionId)).size).toBe(3);
|
||||||
|
expect(phaseCompletions.map((entry) => entry.content).sort()).toEqual([
|
||||||
|
'Result Alice',
|
||||||
|
'Result Bob',
|
||||||
|
'Result Charlie',
|
||||||
|
]);
|
||||||
|
for (const completion of phaseCompletions) {
|
||||||
|
const instruction = completion.phaseExecutionId
|
||||||
|
? phaseStartsByExecutionId.get(completion.phaseExecutionId)
|
||||||
|
: undefined;
|
||||||
|
expect(instruction).toBeDefined();
|
||||||
|
if (completion.content === 'Result Alice') {
|
||||||
|
expect(instruction).toContain('Alice');
|
||||||
|
} else if (completion.content === 'Result Bob') {
|
||||||
|
expect(instruction).toContain('Bob');
|
||||||
|
} else {
|
||||||
|
expect(instruction).toContain('Charlie');
|
||||||
|
}
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
});
|
});
|
||||||
|
|||||||
@ -167,9 +167,13 @@ describe('PieceEngine Integration: Error Handling', () => {
|
|||||||
const engine = new PieceEngine(config, tmpDir, 'test task', { projectCwd: tmpDir });
|
const engine = new PieceEngine(config, tmpDir, 'test task', { projectCwd: tmpDir });
|
||||||
|
|
||||||
for (let i = 0; i < 5; i++) {
|
for (let i = 0; i < 5; i++) {
|
||||||
vi.mocked(runAgent).mockResolvedValueOnce(
|
vi.mocked(runAgent).mockImplementationOnce(async (persona, task, options) => {
|
||||||
makeResponse({ content: `iteration ${i}` })
|
options?.onPromptResolved?.({
|
||||||
);
|
systemPrompt: typeof persona === 'string' ? persona : '',
|
||||||
|
userInstruction: task,
|
||||||
|
});
|
||||||
|
return makeResponse({ content: `iteration ${i}` });
|
||||||
|
});
|
||||||
vi.mocked(detectMatchedRule).mockResolvedValueOnce(
|
vi.mocked(detectMatchedRule).mockResolvedValueOnce(
|
||||||
{ index: 0, method: 'phase1_tag' }
|
{ index: 0, method: 'phase1_tag' }
|
||||||
);
|
);
|
||||||
|
|||||||
@ -544,11 +544,16 @@ describe('PieceEngine Integration: Happy Path', () => {
|
|||||||
|
|
||||||
expect(phaseStartFn).toHaveBeenCalledWith(
|
expect(phaseStartFn).toHaveBeenCalledWith(
|
||||||
expect.objectContaining({ name: 'plan' }),
|
expect.objectContaining({ name: 'plan' }),
|
||||||
1, 'execute', expect.any(String)
|
1, 'execute', expect.any(String), expect.objectContaining({
|
||||||
|
systemPrompt: expect.any(String),
|
||||||
|
userInstruction: expect.any(String),
|
||||||
|
}),
|
||||||
|
undefined,
|
||||||
|
1,
|
||||||
);
|
);
|
||||||
expect(phaseCompleteFn).toHaveBeenCalledWith(
|
expect(phaseCompleteFn).toHaveBeenCalledWith(
|
||||||
expect.objectContaining({ name: 'plan' }),
|
expect.objectContaining({ name: 'plan' }),
|
||||||
1, 'execute', expect.any(String), 'done', undefined
|
1, 'execute', expect.any(String), 'done', undefined, undefined, 1,
|
||||||
);
|
);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
|||||||
@ -122,13 +122,21 @@ describe('PieceEngine Integration: Parallel Movement Partial Failure', () => {
|
|||||||
// arch-review fails (exit code 1)
|
// arch-review fails (exit code 1)
|
||||||
mock.mockRejectedValueOnce(new Error('Claude Code process exited with code 1'));
|
mock.mockRejectedValueOnce(new Error('Claude Code process exited with code 1'));
|
||||||
// security-review succeeds
|
// security-review succeeds
|
||||||
mock.mockResolvedValueOnce(
|
mock.mockImplementationOnce(async (persona, task, options) => {
|
||||||
makeResponse({ persona: 'security-review', content: 'Security review passed' }),
|
options?.onPromptResolved?.({
|
||||||
);
|
systemPrompt: typeof persona === 'string' ? persona : '',
|
||||||
|
userInstruction: task,
|
||||||
|
});
|
||||||
|
return makeResponse({ persona: 'security-review', content: 'Security review passed' });
|
||||||
|
});
|
||||||
// done step
|
// done step
|
||||||
mock.mockResolvedValueOnce(
|
mock.mockImplementationOnce(async (persona, task, options) => {
|
||||||
makeResponse({ persona: 'done', content: 'Completed' }),
|
options?.onPromptResolved?.({
|
||||||
);
|
systemPrompt: typeof persona === 'string' ? persona : '',
|
||||||
|
userInstruction: task,
|
||||||
|
});
|
||||||
|
return makeResponse({ persona: 'done', content: 'Completed' });
|
||||||
|
});
|
||||||
|
|
||||||
mockDetectMatchedRuleSequence([
|
mockDetectMatchedRuleSequence([
|
||||||
// security-review sub-movement rule match (arch-review has no match — it failed)
|
// security-review sub-movement rule match (arch-review has no match — it failed)
|
||||||
@ -179,12 +187,20 @@ describe('PieceEngine Integration: Parallel Movement Partial Failure', () => {
|
|||||||
|
|
||||||
const mock = vi.mocked(runAgent);
|
const mock = vi.mocked(runAgent);
|
||||||
mock.mockRejectedValueOnce(new Error('Session resume failed'));
|
mock.mockRejectedValueOnce(new Error('Session resume failed'));
|
||||||
mock.mockResolvedValueOnce(
|
mock.mockImplementationOnce(async (persona, task, options) => {
|
||||||
makeResponse({ persona: 'security-review', content: 'OK' }),
|
options?.onPromptResolved?.({
|
||||||
);
|
systemPrompt: typeof persona === 'string' ? persona : '',
|
||||||
mock.mockResolvedValueOnce(
|
userInstruction: task,
|
||||||
makeResponse({ persona: 'done', content: 'Done' }),
|
});
|
||||||
);
|
return makeResponse({ persona: 'security-review', content: 'OK' });
|
||||||
|
});
|
||||||
|
mock.mockImplementationOnce(async (persona, task, options) => {
|
||||||
|
options?.onPromptResolved?.({
|
||||||
|
systemPrompt: typeof persona === 'string' ? persona : '',
|
||||||
|
userInstruction: task,
|
||||||
|
});
|
||||||
|
return makeResponse({ persona: 'done', content: 'Done' });
|
||||||
|
});
|
||||||
|
|
||||||
mockDetectMatchedRuleSequence([
|
mockDetectMatchedRuleSequence([
|
||||||
{ index: 0, method: 'phase1_tag' },
|
{ index: 0, method: 'phase1_tag' },
|
||||||
|
|||||||
@ -216,11 +216,15 @@ describe('PieceEngine Integration: Parallel Movement Aggregation', () => {
|
|||||||
['../personas/supervise.md', makeResponse({ persona: 'supervise', content: 'All passed' })],
|
['../personas/supervise.md', makeResponse({ persona: 'supervise', content: 'All passed' })],
|
||||||
]);
|
]);
|
||||||
|
|
||||||
vi.mocked(runAgent).mockImplementation(async (persona, _task, options) => {
|
vi.mocked(runAgent).mockImplementation(async (persona, task, options) => {
|
||||||
const response = responsesByPersona.get(persona ?? '');
|
const response = responsesByPersona.get(persona ?? '');
|
||||||
if (!response) {
|
if (!response) {
|
||||||
throw new Error(`Unexpected persona: ${persona}`);
|
throw new Error(`Unexpected persona: ${persona}`);
|
||||||
}
|
}
|
||||||
|
options.onPromptResolved?.({
|
||||||
|
systemPrompt: typeof persona === 'string' ? persona : '',
|
||||||
|
userInstruction: task,
|
||||||
|
});
|
||||||
|
|
||||||
if (persona === '../personas/arch-review.md') {
|
if (persona === '../personas/arch-review.md') {
|
||||||
options.onStream?.({ type: 'text', data: { text: 'arch stream line\n' } });
|
options.onStream?.({ type: 'text', data: { text: 'arch stream line\n' } });
|
||||||
|
|||||||
@ -49,6 +49,19 @@ function buildTeamLeaderConfig(): PieceConfig {
|
|||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function mockRunAgentWithPrompt(...responses: ReturnType<typeof makeResponse>[]): void {
|
||||||
|
const mock = vi.mocked(runAgent);
|
||||||
|
for (const response of responses) {
|
||||||
|
mock.mockImplementationOnce(async (persona, instruction, options) => {
|
||||||
|
options?.onPromptResolved?.({
|
||||||
|
systemPrompt: typeof persona === 'string' ? persona : '',
|
||||||
|
userInstruction: instruction,
|
||||||
|
});
|
||||||
|
return response;
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
describe('PieceEngine Integration: TeamLeaderRunner', () => {
|
describe('PieceEngine Integration: TeamLeaderRunner', () => {
|
||||||
let tmpDir: string;
|
let tmpDir: string;
|
||||||
|
|
||||||
@ -68,21 +81,22 @@ describe('PieceEngine Integration: TeamLeaderRunner', () => {
|
|||||||
const config = buildTeamLeaderConfig();
|
const config = buildTeamLeaderConfig();
|
||||||
const engine = new PieceEngine(config, tmpDir, 'implement feature', { projectCwd: tmpDir });
|
const engine = new PieceEngine(config, tmpDir, 'implement feature', { projectCwd: tmpDir });
|
||||||
|
|
||||||
vi.mocked(runAgent)
|
mockRunAgentWithPrompt(
|
||||||
.mockResolvedValueOnce(makeResponse({
|
makeResponse({
|
||||||
persona: 'team-leader',
|
persona: 'team-leader',
|
||||||
content: [
|
content: [
|
||||||
'```json',
|
'```json',
|
||||||
'[{"id":"part-1","title":"API","instruction":"Implement API"},{"id":"part-2","title":"Test","instruction":"Add tests"}]',
|
'[{"id":"part-1","title":"API","instruction":"Implement API"},{"id":"part-2","title":"Test","instruction":"Add tests"}]',
|
||||||
'```',
|
'```',
|
||||||
].join('\n'),
|
].join('\n'),
|
||||||
}))
|
}),
|
||||||
.mockResolvedValueOnce(makeResponse({ persona: 'coder', content: 'API done' }))
|
makeResponse({ persona: 'coder', content: 'API done' }),
|
||||||
.mockResolvedValueOnce(makeResponse({ persona: 'coder', content: 'Tests done' }))
|
makeResponse({ persona: 'coder', content: 'Tests done' }),
|
||||||
.mockResolvedValueOnce(makeResponse({
|
makeResponse({
|
||||||
persona: 'team-leader',
|
persona: 'team-leader',
|
||||||
structuredOutput: { done: true, reasoning: 'enough', parts: [] },
|
structuredOutput: { done: true, reasoning: 'enough', parts: [] },
|
||||||
}));
|
}),
|
||||||
|
);
|
||||||
|
|
||||||
vi.mocked(detectMatchedRule).mockResolvedValueOnce({ index: 0, method: 'phase1_tag' });
|
vi.mocked(detectMatchedRule).mockResolvedValueOnce({ index: 0, method: 'phase1_tag' });
|
||||||
|
|
||||||
@ -103,21 +117,22 @@ describe('PieceEngine Integration: TeamLeaderRunner', () => {
|
|||||||
const config = buildTeamLeaderConfig();
|
const config = buildTeamLeaderConfig();
|
||||||
const engine = new PieceEngine(config, tmpDir, 'implement feature', { projectCwd: tmpDir });
|
const engine = new PieceEngine(config, tmpDir, 'implement feature', { projectCwd: tmpDir });
|
||||||
|
|
||||||
vi.mocked(runAgent)
|
mockRunAgentWithPrompt(
|
||||||
.mockResolvedValueOnce(makeResponse({
|
makeResponse({
|
||||||
persona: 'team-leader',
|
persona: 'team-leader',
|
||||||
content: [
|
content: [
|
||||||
'```json',
|
'```json',
|
||||||
'[{"id":"part-1","title":"API","instruction":"Implement API"},{"id":"part-2","title":"Test","instruction":"Add tests"}]',
|
'[{"id":"part-1","title":"API","instruction":"Implement API"},{"id":"part-2","title":"Test","instruction":"Add tests"}]',
|
||||||
'```',
|
'```',
|
||||||
].join('\n'),
|
].join('\n'),
|
||||||
}))
|
}),
|
||||||
.mockResolvedValueOnce(makeResponse({ persona: 'coder', status: 'error', error: 'api failed' }))
|
makeResponse({ persona: 'coder', status: 'error', error: 'api failed' }),
|
||||||
.mockResolvedValueOnce(makeResponse({ persona: 'coder', status: 'error', error: 'test failed' }))
|
makeResponse({ persona: 'coder', status: 'error', error: 'test failed' }),
|
||||||
.mockResolvedValueOnce(makeResponse({
|
makeResponse({
|
||||||
persona: 'team-leader',
|
persona: 'team-leader',
|
||||||
structuredOutput: { done: true, reasoning: 'stop', parts: [] },
|
structuredOutput: { done: true, reasoning: 'stop', parts: [] },
|
||||||
}));
|
}),
|
||||||
|
);
|
||||||
|
|
||||||
const state = await engine.run();
|
const state = await engine.run();
|
||||||
|
|
||||||
@ -128,21 +143,22 @@ describe('PieceEngine Integration: TeamLeaderRunner', () => {
|
|||||||
const config = buildTeamLeaderConfig();
|
const config = buildTeamLeaderConfig();
|
||||||
const engine = new PieceEngine(config, tmpDir, 'implement feature', { projectCwd: tmpDir });
|
const engine = new PieceEngine(config, tmpDir, 'implement feature', { projectCwd: tmpDir });
|
||||||
|
|
||||||
vi.mocked(runAgent)
|
mockRunAgentWithPrompt(
|
||||||
.mockResolvedValueOnce(makeResponse({
|
makeResponse({
|
||||||
persona: 'team-leader',
|
persona: 'team-leader',
|
||||||
content: [
|
content: [
|
||||||
'```json',
|
'```json',
|
||||||
'[{"id":"part-1","title":"API","instruction":"Implement API"},{"id":"part-2","title":"Test","instruction":"Add tests"}]',
|
'[{"id":"part-1","title":"API","instruction":"Implement API"},{"id":"part-2","title":"Test","instruction":"Add tests"}]',
|
||||||
'```',
|
'```',
|
||||||
].join('\n'),
|
].join('\n'),
|
||||||
}))
|
}),
|
||||||
.mockResolvedValueOnce(makeResponse({ persona: 'coder', content: 'API done' }))
|
makeResponse({ persona: 'coder', content: 'API done' }),
|
||||||
.mockResolvedValueOnce(makeResponse({ persona: 'coder', status: 'error', error: 'test failed' }))
|
makeResponse({ persona: 'coder', status: 'error', error: 'test failed' }),
|
||||||
.mockResolvedValueOnce(makeResponse({
|
makeResponse({
|
||||||
persona: 'team-leader',
|
persona: 'team-leader',
|
||||||
structuredOutput: { done: true, reasoning: 'stop', parts: [] },
|
structuredOutput: { done: true, reasoning: 'stop', parts: [] },
|
||||||
}));
|
}),
|
||||||
|
);
|
||||||
|
|
||||||
vi.mocked(detectMatchedRule).mockResolvedValueOnce({ index: 0, method: 'phase1_tag' });
|
vi.mocked(detectMatchedRule).mockResolvedValueOnce({ index: 0, method: 'phase1_tag' });
|
||||||
|
|
||||||
@ -161,21 +177,22 @@ describe('PieceEngine Integration: TeamLeaderRunner', () => {
|
|||||||
const config = buildTeamLeaderConfig();
|
const config = buildTeamLeaderConfig();
|
||||||
const engine = new PieceEngine(config, tmpDir, 'implement feature', { projectCwd: tmpDir });
|
const engine = new PieceEngine(config, tmpDir, 'implement feature', { projectCwd: tmpDir });
|
||||||
|
|
||||||
vi.mocked(runAgent)
|
mockRunAgentWithPrompt(
|
||||||
.mockResolvedValueOnce(makeResponse({
|
makeResponse({
|
||||||
persona: 'team-leader',
|
persona: 'team-leader',
|
||||||
content: [
|
content: [
|
||||||
'```json',
|
'```json',
|
||||||
'[{"id":"part-1","title":"API","instruction":"Implement API"},{"id":"part-2","title":"Test","instruction":"Add tests"}]',
|
'[{"id":"part-1","title":"API","instruction":"Implement API"},{"id":"part-2","title":"Test","instruction":"Add tests"}]',
|
||||||
'```',
|
'```',
|
||||||
].join('\n'),
|
].join('\n'),
|
||||||
}))
|
}),
|
||||||
.mockResolvedValueOnce(makeResponse({ persona: 'coder', status: 'error', content: 'api failed from content' }))
|
makeResponse({ persona: 'coder', status: 'error', content: 'api failed from content' }),
|
||||||
.mockResolvedValueOnce(makeResponse({ persona: 'coder', content: 'Tests done' }))
|
makeResponse({ persona: 'coder', content: 'Tests done' }),
|
||||||
.mockResolvedValueOnce(makeResponse({
|
makeResponse({
|
||||||
persona: 'team-leader',
|
persona: 'team-leader',
|
||||||
structuredOutput: { done: true, reasoning: 'stop', parts: [] },
|
structuredOutput: { done: true, reasoning: 'stop', parts: [] },
|
||||||
}));
|
}),
|
||||||
|
);
|
||||||
|
|
||||||
vi.mocked(detectMatchedRule).mockResolvedValueOnce({ index: 0, method: 'phase1_tag' });
|
vi.mocked(detectMatchedRule).mockResolvedValueOnce({ index: 0, method: 'phase1_tag' });
|
||||||
|
|
||||||
@ -191,8 +208,8 @@ describe('PieceEngine Integration: TeamLeaderRunner', () => {
|
|||||||
const config = buildTeamLeaderConfig();
|
const config = buildTeamLeaderConfig();
|
||||||
const engine = new PieceEngine(config, tmpDir, 'implement feature', { projectCwd: tmpDir });
|
const engine = new PieceEngine(config, tmpDir, 'implement feature', { projectCwd: tmpDir });
|
||||||
|
|
||||||
vi.mocked(runAgent)
|
mockRunAgentWithPrompt(
|
||||||
.mockResolvedValueOnce(makeResponse({
|
makeResponse({
|
||||||
persona: 'team-leader',
|
persona: 'team-leader',
|
||||||
structuredOutput: {
|
structuredOutput: {
|
||||||
parts: [
|
parts: [
|
||||||
@ -200,10 +217,10 @@ describe('PieceEngine Integration: TeamLeaderRunner', () => {
|
|||||||
{ id: 'part-2', title: 'Test', instruction: 'Add tests', timeout_ms: null },
|
{ id: 'part-2', title: 'Test', instruction: 'Add tests', timeout_ms: null },
|
||||||
],
|
],
|
||||||
},
|
},
|
||||||
}))
|
}),
|
||||||
.mockResolvedValueOnce(makeResponse({ persona: 'coder', content: 'API done' }))
|
makeResponse({ persona: 'coder', content: 'API done' }),
|
||||||
.mockResolvedValueOnce(makeResponse({ persona: 'coder', content: 'Tests done' }))
|
makeResponse({ persona: 'coder', content: 'Tests done' }),
|
||||||
.mockResolvedValueOnce(makeResponse({
|
makeResponse({
|
||||||
persona: 'team-leader',
|
persona: 'team-leader',
|
||||||
structuredOutput: {
|
structuredOutput: {
|
||||||
done: false,
|
done: false,
|
||||||
@ -212,16 +229,17 @@ describe('PieceEngine Integration: TeamLeaderRunner', () => {
|
|||||||
{ id: 'part-3', title: 'Docs', instruction: 'Write docs', timeout_ms: null },
|
{ id: 'part-3', title: 'Docs', instruction: 'Write docs', timeout_ms: null },
|
||||||
],
|
],
|
||||||
},
|
},
|
||||||
}))
|
}),
|
||||||
.mockResolvedValueOnce(makeResponse({ persona: 'coder', content: 'Docs done' }))
|
makeResponse({ persona: 'coder', content: 'Docs done' }),
|
||||||
.mockResolvedValueOnce(makeResponse({
|
makeResponse({
|
||||||
persona: 'team-leader',
|
persona: 'team-leader',
|
||||||
structuredOutput: {
|
structuredOutput: {
|
||||||
done: true,
|
done: true,
|
||||||
reasoning: 'Enough',
|
reasoning: 'Enough',
|
||||||
parts: [],
|
parts: [],
|
||||||
},
|
},
|
||||||
}));
|
}),
|
||||||
|
);
|
||||||
|
|
||||||
vi.mocked(detectMatchedRule).mockResolvedValueOnce({ index: 0, method: 'phase1_tag' });
|
vi.mocked(detectMatchedRule).mockResolvedValueOnce({ index: 0, method: 'phase1_tag' });
|
||||||
|
|
||||||
@ -235,4 +253,35 @@ describe('PieceEngine Integration: TeamLeaderRunner', () => {
|
|||||||
expect(output!.content).toContain('Docs done');
|
expect(output!.content).toContain('Docs done');
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it('team leader の phase:start には分解実行時の実 instruction を記録する', async () => {
|
||||||
|
const config = buildTeamLeaderConfig();
|
||||||
|
const engine = new PieceEngine(config, tmpDir, 'implement feature', { projectCwd: tmpDir });
|
||||||
|
const phaseStarts: string[] = [];
|
||||||
|
engine.on('phase:start', (step, phase, phaseName, instruction) => {
|
||||||
|
if (step.name !== 'implement' || phase !== 1 || phaseName !== 'execute') return;
|
||||||
|
phaseStarts.push(instruction);
|
||||||
|
});
|
||||||
|
|
||||||
|
mockRunAgentWithPrompt(
|
||||||
|
makeResponse({
|
||||||
|
persona: 'team-leader',
|
||||||
|
structuredOutput: {
|
||||||
|
parts: [{ id: 'part-1', title: 'API', instruction: 'Implement API', timeout_ms: null }],
|
||||||
|
},
|
||||||
|
}),
|
||||||
|
makeResponse({ persona: 'coder', content: 'API done' }),
|
||||||
|
makeResponse({
|
||||||
|
persona: 'team-leader',
|
||||||
|
structuredOutput: { done: true, reasoning: 'enough', parts: [] },
|
||||||
|
}),
|
||||||
|
);
|
||||||
|
vi.mocked(detectMatchedRule).mockResolvedValueOnce({ index: 0, method: 'phase1_tag' });
|
||||||
|
|
||||||
|
const state = await engine.run();
|
||||||
|
|
||||||
|
expect(state.status).toBe('completed');
|
||||||
|
expect(phaseStarts.length).toBeGreaterThan(0);
|
||||||
|
expect(phaseStarts[0]).toContain('This is decomposition-only planning. Do not execute the task.');
|
||||||
|
});
|
||||||
|
|
||||||
});
|
});
|
||||||
|
|||||||
@ -136,7 +136,13 @@ export function buildDefaultPieceConfig(overrides: Partial<PieceConfig> = {}): P
|
|||||||
export function mockRunAgentSequence(responses: AgentResponse[]): void {
|
export function mockRunAgentSequence(responses: AgentResponse[]): void {
|
||||||
const mock = vi.mocked(runAgent);
|
const mock = vi.mocked(runAgent);
|
||||||
for (const response of responses) {
|
for (const response of responses) {
|
||||||
mock.mockResolvedValueOnce(response);
|
mock.mockImplementationOnce(async (persona, task, options) => {
|
||||||
|
options?.onPromptResolved?.({
|
||||||
|
systemPrompt: typeof persona === 'string' ? persona : '',
|
||||||
|
userInstruction: task,
|
||||||
|
});
|
||||||
|
return response;
|
||||||
|
});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -103,7 +103,13 @@ describe('IT: config provider_options reflection', () => {
|
|||||||
delete process.env.TAKT_PROVIDER_OPTIONS_CODEX_NETWORK_ACCESS;
|
delete process.env.TAKT_PROVIDER_OPTIONS_CODEX_NETWORK_ACCESS;
|
||||||
invalidateGlobalConfigCache();
|
invalidateGlobalConfigCache();
|
||||||
|
|
||||||
vi.mocked(runAgent).mockResolvedValue(makeDoneResponse());
|
vi.mocked(runAgent).mockImplementation(async (persona, task, options) => {
|
||||||
|
options?.onPromptResolved?.({
|
||||||
|
systemPrompt: typeof persona === 'string' ? persona : '',
|
||||||
|
userInstruction: task,
|
||||||
|
});
|
||||||
|
return makeDoneResponse();
|
||||||
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
afterEach(() => {
|
afterEach(() => {
|
||||||
@ -203,4 +209,3 @@ describe('IT: config provider_options reflection', () => {
|
|||||||
});
|
});
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
|
|||||||
@ -83,7 +83,13 @@ describe('IT: provider block reflection', () => {
|
|||||||
beforeEach(() => {
|
beforeEach(() => {
|
||||||
vi.clearAllMocks();
|
vi.clearAllMocks();
|
||||||
originalConfigDir = process.env.TAKT_CONFIG_DIR;
|
originalConfigDir = process.env.TAKT_CONFIG_DIR;
|
||||||
vi.mocked(runAgent).mockResolvedValue(makeDoneResponse());
|
vi.mocked(runAgent).mockImplementation(async (persona, task, options) => {
|
||||||
|
options?.onPromptResolved?.({
|
||||||
|
systemPrompt: typeof persona === 'string' ? persona : '',
|
||||||
|
userInstruction: task,
|
||||||
|
});
|
||||||
|
return makeDoneResponse();
|
||||||
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
afterEach(() => {
|
afterEach(() => {
|
||||||
|
|||||||
@ -11,6 +11,7 @@ vi.mock('../agents/runner.js', () => ({
|
|||||||
}));
|
}));
|
||||||
|
|
||||||
import { runAgent } from '../agents/runner.js';
|
import { runAgent } from '../agents/runner.js';
|
||||||
|
import type { AgentResponse } from '../core/models/types.js';
|
||||||
|
|
||||||
function createStep(fileName: string): PieceMovement {
|
function createStep(fileName: string): PieceMovement {
|
||||||
return {
|
return {
|
||||||
@ -51,6 +52,19 @@ function createContext(
|
|||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function queueRunAgentResponses(responses: AgentResponse[]): void {
|
||||||
|
const runAgentMock = vi.mocked(runAgent);
|
||||||
|
for (const response of responses) {
|
||||||
|
runAgentMock.mockImplementationOnce(async (persona, task, options) => {
|
||||||
|
options?.onPromptResolved?.({
|
||||||
|
systemPrompt: typeof persona === 'string' ? persona : '',
|
||||||
|
userInstruction: task,
|
||||||
|
});
|
||||||
|
return response;
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
describe('runReportPhase report history behavior', () => {
|
describe('runReportPhase report history behavior', () => {
|
||||||
let tmpRoot: string;
|
let tmpRoot: string;
|
||||||
|
|
||||||
@ -71,22 +85,22 @@ describe('runReportPhase report history behavior', () => {
|
|||||||
const reportDir = join(tmpRoot, '.takt', 'runs', 'sample-run', 'reports');
|
const reportDir = join(tmpRoot, '.takt', 'runs', 'sample-run', 'reports');
|
||||||
const step = createStep('05-architect-review.md');
|
const step = createStep('05-architect-review.md');
|
||||||
const ctx = createContext(reportDir);
|
const ctx = createContext(reportDir);
|
||||||
const runAgentMock = vi.mocked(runAgent);
|
queueRunAgentResponses([
|
||||||
runAgentMock
|
{
|
||||||
.mockResolvedValueOnce({
|
|
||||||
persona: 'reviewers',
|
persona: 'reviewers',
|
||||||
status: 'done',
|
status: 'done',
|
||||||
content: 'First review result',
|
content: 'First review result',
|
||||||
timestamp: new Date('2026-02-10T06:11:43Z'),
|
timestamp: new Date('2026-02-10T06:11:43Z'),
|
||||||
sessionId: 'session-2',
|
sessionId: 'session-2',
|
||||||
})
|
},
|
||||||
.mockResolvedValueOnce({
|
{
|
||||||
persona: 'reviewers',
|
persona: 'reviewers',
|
||||||
status: 'done',
|
status: 'done',
|
||||||
content: 'Second review result',
|
content: 'Second review result',
|
||||||
timestamp: new Date('2026-02-10T06:14:37Z'),
|
timestamp: new Date('2026-02-10T06:14:37Z'),
|
||||||
sessionId: 'session-3',
|
sessionId: 'session-3',
|
||||||
});
|
},
|
||||||
|
]);
|
||||||
|
|
||||||
// When
|
// When
|
||||||
await runReportPhase(step, 1, ctx);
|
await runReportPhase(step, 1, ctx);
|
||||||
@ -113,29 +127,29 @@ describe('runReportPhase report history behavior', () => {
|
|||||||
const reportDir = join(tmpRoot, '.takt', 'runs', 'sample-run', 'reports');
|
const reportDir = join(tmpRoot, '.takt', 'runs', 'sample-run', 'reports');
|
||||||
const step = createStep('06-qa-review.md');
|
const step = createStep('06-qa-review.md');
|
||||||
const ctx = createContext(reportDir);
|
const ctx = createContext(reportDir);
|
||||||
const runAgentMock = vi.mocked(runAgent);
|
queueRunAgentResponses([
|
||||||
runAgentMock
|
{
|
||||||
.mockResolvedValueOnce({
|
|
||||||
persona: 'reviewers',
|
persona: 'reviewers',
|
||||||
status: 'done',
|
status: 'done',
|
||||||
content: 'v1',
|
content: 'v1',
|
||||||
timestamp: new Date('2026-02-10T06:11:43Z'),
|
timestamp: new Date('2026-02-10T06:11:43Z'),
|
||||||
sessionId: 'session-2',
|
sessionId: 'session-2',
|
||||||
})
|
},
|
||||||
.mockResolvedValueOnce({
|
{
|
||||||
persona: 'reviewers',
|
persona: 'reviewers',
|
||||||
status: 'done',
|
status: 'done',
|
||||||
content: 'v2',
|
content: 'v2',
|
||||||
timestamp: new Date('2026-02-10T06:11:43Z'),
|
timestamp: new Date('2026-02-10T06:11:43Z'),
|
||||||
sessionId: 'session-3',
|
sessionId: 'session-3',
|
||||||
})
|
},
|
||||||
.mockResolvedValueOnce({
|
{
|
||||||
persona: 'reviewers',
|
persona: 'reviewers',
|
||||||
status: 'done',
|
status: 'done',
|
||||||
content: 'v3',
|
content: 'v3',
|
||||||
timestamp: new Date('2026-02-10T06:11:43Z'),
|
timestamp: new Date('2026-02-10T06:11:43Z'),
|
||||||
sessionId: 'session-4',
|
sessionId: 'session-4',
|
||||||
});
|
},
|
||||||
|
]);
|
||||||
|
|
||||||
// When
|
// When
|
||||||
await runReportPhase(step, 1, ctx);
|
await runReportPhase(step, 1, ctx);
|
||||||
@ -158,14 +172,13 @@ describe('runReportPhase report history behavior', () => {
|
|||||||
const ctx = createContext(reportDir, (overrides) => {
|
const ctx = createContext(reportDir, (overrides) => {
|
||||||
capturedOverrides.push(overrides);
|
capturedOverrides.push(overrides);
|
||||||
});
|
});
|
||||||
const runAgentMock = vi.mocked(runAgent);
|
queueRunAgentResponses([{
|
||||||
runAgentMock.mockResolvedValueOnce({
|
|
||||||
persona: 'reviewers',
|
persona: 'reviewers',
|
||||||
status: 'done',
|
status: 'done',
|
||||||
content: 'Permission-based report execution',
|
content: 'Permission-based report execution',
|
||||||
timestamp: new Date('2026-02-10T06:21:17Z'),
|
timestamp: new Date('2026-02-10T06:21:17Z'),
|
||||||
sessionId: 'session-2',
|
sessionId: 'session-2',
|
||||||
});
|
}]);
|
||||||
|
|
||||||
// When
|
// When
|
||||||
await runReportPhase(step, 1, ctx);
|
await runReportPhase(step, 1, ctx);
|
||||||
|
|||||||
@ -139,6 +139,7 @@ vi.mock('../shared/utils/index.js', () => ({
|
|||||||
preventSleep: vi.fn(),
|
preventSleep: vi.fn(),
|
||||||
isDebugEnabled: vi.fn().mockReturnValue(false),
|
isDebugEnabled: vi.fn().mockReturnValue(false),
|
||||||
writePromptLog: vi.fn(),
|
writePromptLog: vi.fn(),
|
||||||
|
getDebugPromptsLogFile: vi.fn().mockReturnValue(null),
|
||||||
generateReportDir: vi.fn().mockReturnValue('test-report-dir'),
|
generateReportDir: vi.fn().mockReturnValue('test-report-dir'),
|
||||||
isValidReportDirName: vi.fn().mockReturnValue(true),
|
isValidReportDirName: vi.fn().mockReturnValue(true),
|
||||||
playWarningSound: vi.fn(),
|
playWarningSound: vi.fn(),
|
||||||
|
|||||||
@ -31,12 +31,57 @@ const { mockIsDebugEnabled, mockWritePromptLog, MockPieceEngine } = vi.hoisted((
|
|||||||
const step = this.config.movements[0]!;
|
const step = this.config.movements[0]!;
|
||||||
const timestamp = new Date('2026-02-07T00:00:00.000Z');
|
const timestamp = new Date('2026-02-07T00:00:00.000Z');
|
||||||
const shouldAbort = this.task === 'abort-task';
|
const shouldAbort = this.task === 'abort-task';
|
||||||
|
const shouldAbortBeforeComplete = this.task === 'abort-before-complete-task';
|
||||||
|
const shouldDuplicatePhase = this.task === 'duplicate-phase-task';
|
||||||
|
const shouldEmitSensitive = this.task === 'sensitive-content-task';
|
||||||
const shouldRepeatMovement = this.task === 'repeat-movement-task';
|
const shouldRepeatMovement = this.task === 'repeat-movement-task';
|
||||||
|
const shouldReversePhaseCompletion = this.task === 'reverse-phase-complete-task';
|
||||||
const providerInfo = { provider: undefined, model: undefined };
|
const providerInfo = { provider: undefined, model: undefined };
|
||||||
this.emit('movement:start', step, 1, 'movement instruction', providerInfo);
|
this.emit('movement:start', step, 1, 'movement instruction', providerInfo);
|
||||||
this.emit('phase:start', step, 1, 'execute', 'phase prompt');
|
if (shouldReversePhaseCompletion) {
|
||||||
this.emit('phase:complete', step, 1, 'execute', 'phase response', 'done');
|
this.emit('phase:start', step, 1, 'execute', 'phase prompt first', {
|
||||||
|
systemPrompt: '../agents/coder.md',
|
||||||
|
userInstruction: 'phase prompt first',
|
||||||
|
}, 'implement:1:1:1', 1);
|
||||||
|
this.emit('phase:start', step, 1, 'execute', 'phase prompt second', {
|
||||||
|
systemPrompt: '../agents/coder.md',
|
||||||
|
userInstruction: 'phase prompt second',
|
||||||
|
}, 'implement:1:1:2', 1);
|
||||||
|
} else {
|
||||||
|
this.emit('phase:start', step, 1, 'execute', shouldEmitSensitive ? 'token=plain-secret' : 'phase prompt', {
|
||||||
|
systemPrompt: shouldEmitSensitive ? 'Authorization: Bearer super-secret-token' : '../agents/coder.md',
|
||||||
|
userInstruction: shouldEmitSensitive ? 'api_key=plain-secret' : 'phase prompt',
|
||||||
|
});
|
||||||
|
}
|
||||||
|
this.emit('phase:start', step, 3, 'judge', 'phase3 prompt', {
|
||||||
|
systemPrompt: 'conductor',
|
||||||
|
userInstruction: 'phase3 prompt',
|
||||||
|
});
|
||||||
|
this.emit('phase:judge_stage', step, 3, 'judge', {
|
||||||
|
stage: 1,
|
||||||
|
method: 'structured_output',
|
||||||
|
status: 'done',
|
||||||
|
instruction: 'judge stage prompt',
|
||||||
|
response: 'judge stage response',
|
||||||
|
});
|
||||||
|
this.emit('phase:complete', step, 3, 'judge', '[IMPLEMENT:1]', 'done');
|
||||||
|
if (shouldAbortBeforeComplete) {
|
||||||
|
this.emit('piece:abort', { status: 'aborted', iteration: 1 }, 'user_interrupted');
|
||||||
|
return { status: 'aborted', iteration: 1 };
|
||||||
|
}
|
||||||
|
if (shouldReversePhaseCompletion) {
|
||||||
|
this.emit('phase:complete', step, 1, 'execute', 'phase response second', 'done', undefined, 'implement:1:1:2', 1);
|
||||||
|
this.emit('phase:complete', step, 1, 'execute', 'phase response first', 'done', undefined, 'implement:1:1:1', 1);
|
||||||
|
} else {
|
||||||
|
this.emit('phase:complete', step, 1, 'execute', shouldEmitSensitive ? 'password=plain-secret' : 'phase response', 'done');
|
||||||
|
}
|
||||||
|
if (shouldDuplicatePhase) {
|
||||||
|
this.emit('phase:start', step, 1, 'execute', 'phase prompt second', {
|
||||||
|
systemPrompt: '../agents/coder.md',
|
||||||
|
userInstruction: 'phase prompt second',
|
||||||
|
});
|
||||||
|
this.emit('phase:complete', step, 1, 'execute', 'phase response second', 'done');
|
||||||
|
}
|
||||||
this.emit(
|
this.emit(
|
||||||
'movement:complete',
|
'movement:complete',
|
||||||
step,
|
step,
|
||||||
@ -154,6 +199,7 @@ vi.mock('../shared/utils/index.js', () => ({
|
|||||||
preventSleep: vi.fn(),
|
preventSleep: vi.fn(),
|
||||||
isDebugEnabled: mockIsDebugEnabled,
|
isDebugEnabled: mockIsDebugEnabled,
|
||||||
writePromptLog: mockWritePromptLog,
|
writePromptLog: mockWritePromptLog,
|
||||||
|
getDebugPromptsLogFile: vi.fn().mockReturnValue(null),
|
||||||
generateReportDir: vi.fn().mockReturnValue('test-report-dir'),
|
generateReportDir: vi.fn().mockReturnValue('test-report-dir'),
|
||||||
isValidReportDirName: vi.fn().mockImplementation((value: string) => /^[a-z0-9]+(?:-[a-z0-9]+)*$/.test(value)),
|
isValidReportDirName: vi.fn().mockImplementation((value: string) => /^[a-z0-9]+(?:-[a-z0-9]+)*$/.test(value)),
|
||||||
}));
|
}));
|
||||||
@ -173,6 +219,7 @@ vi.mock('../shared/exitCodes.js', () => ({
|
|||||||
|
|
||||||
import { executePiece } from '../features/tasks/execute/pieceExecution.js';
|
import { executePiece } from '../features/tasks/execute/pieceExecution.js';
|
||||||
import { ensureDir, writeFileAtomic } from '../infra/config/index.js';
|
import { ensureDir, writeFileAtomic } from '../infra/config/index.js';
|
||||||
|
import { appendNdjsonLine } from '../infra/fs/index.js';
|
||||||
|
|
||||||
describe('executePiece debug prompts logging', () => {
|
describe('executePiece debug prompts logging', () => {
|
||||||
beforeEach(() => {
|
beforeEach(() => {
|
||||||
@ -204,15 +251,16 @@ describe('executePiece debug prompts logging', () => {
|
|||||||
projectCwd: '/tmp/project',
|
projectCwd: '/tmp/project',
|
||||||
});
|
});
|
||||||
|
|
||||||
expect(mockWritePromptLog).toHaveBeenCalledTimes(1);
|
expect(mockWritePromptLog).toHaveBeenCalledTimes(2);
|
||||||
const record = mockWritePromptLog.mock.calls[0]?.[0] as {
|
const records = mockWritePromptLog.mock.calls.map((call) => call[0]) as Array<{
|
||||||
movement: string;
|
movement: string;
|
||||||
phase: number;
|
phase: number;
|
||||||
iteration: number;
|
iteration: number;
|
||||||
prompt: string;
|
prompt: string;
|
||||||
response: string;
|
response: string;
|
||||||
timestamp: string;
|
timestamp: string;
|
||||||
};
|
}>;
|
||||||
|
const record = records.find((entry) => entry.phase === 1)!;
|
||||||
expect(record.movement).toBe('implement');
|
expect(record.movement).toBe('implement');
|
||||||
expect(record.phase).toBe(1);
|
expect(record.phase).toBe(1);
|
||||||
expect(record.iteration).toBe(1);
|
expect(record.iteration).toBe(1);
|
||||||
@ -221,6 +269,54 @@ describe('executePiece debug prompts logging', () => {
|
|||||||
expect(record.timestamp).toMatch(/^\d{4}-\d{2}-\d{2}T/);
|
expect(record.timestamp).toMatch(/^\d{4}-\d{2}-\d{2}T/);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it('should separate system prompt and user instruction in debug prompt records', async () => {
|
||||||
|
mockIsDebugEnabled.mockReturnValue(true);
|
||||||
|
|
||||||
|
await executePiece(makeConfig(), 'task', '/tmp/project', {
|
||||||
|
projectCwd: '/tmp/project',
|
||||||
|
});
|
||||||
|
|
||||||
|
expect(mockWritePromptLog).toHaveBeenCalledTimes(2);
|
||||||
|
const records = mockWritePromptLog.mock.calls.map((call) => call[0]) as Array<Record<string, unknown> & { phase: number }>;
|
||||||
|
const record = records.find((entry) => entry.phase === 1)!;
|
||||||
|
expect(record).toHaveProperty('systemPrompt');
|
||||||
|
expect(record).toHaveProperty('userInstruction');
|
||||||
|
expect(record.systemPrompt).toBe('../agents/coder.md');
|
||||||
|
expect(record.userInstruction).toBe('phase prompt');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should include phase and judge stage details in trace markdown', async () => {
|
||||||
|
await executePiece(makeConfig(), 'task', '/tmp/project', {
|
||||||
|
projectCwd: '/tmp/project',
|
||||||
|
reportDirName: 'test-report-dir',
|
||||||
|
});
|
||||||
|
|
||||||
|
const traceCall = vi.mocked(writeFileAtomic).mock.calls.find(
|
||||||
|
(call) => String(call[0]).endsWith('/trace.md')
|
||||||
|
);
|
||||||
|
expect(traceCall).toBeDefined();
|
||||||
|
const traceContent = String(traceCall?.[1]);
|
||||||
|
expect(traceContent).toContain('## Iteration 1: implement');
|
||||||
|
expect(traceContent).toContain('### Phase 1: execute');
|
||||||
|
expect(traceContent).toContain('#### Judgment Stages');
|
||||||
|
expect(traceContent).toContain('Stage 1 (structured_output): status=done');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should render trace markdown even when piece aborts before movement completion', async () => {
|
||||||
|
await executePiece(makeConfig(), 'abort-before-complete-task', '/tmp/project', {
|
||||||
|
projectCwd: '/tmp/project',
|
||||||
|
reportDirName: 'test-report-dir',
|
||||||
|
});
|
||||||
|
|
||||||
|
const traceCall = vi.mocked(writeFileAtomic).mock.calls.find(
|
||||||
|
(call) => String(call[0]).endsWith('/trace.md')
|
||||||
|
);
|
||||||
|
expect(traceCall).toBeDefined();
|
||||||
|
const traceContent = String(traceCall?.[1]);
|
||||||
|
expect(traceContent).toContain('- Status: ❌ aborted');
|
||||||
|
expect(traceContent).toContain('- Movement Status: in_progress');
|
||||||
|
});
|
||||||
|
|
||||||
it('should not write prompt log record when debug is disabled', async () => {
|
it('should not write prompt log record when debug is disabled', async () => {
|
||||||
mockIsDebugEnabled.mockReturnValue(false);
|
mockIsDebugEnabled.mockReturnValue(false);
|
||||||
|
|
||||||
@ -231,6 +327,24 @@ describe('executePiece debug prompts logging', () => {
|
|||||||
expect(mockWritePromptLog).not.toHaveBeenCalled();
|
expect(mockWritePromptLog).not.toHaveBeenCalled();
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it('should handle repeated phase starts for same movement and phase without missing debug prompt', async () => {
|
||||||
|
mockIsDebugEnabled.mockReturnValue(true);
|
||||||
|
|
||||||
|
await executePiece(makeConfig(), 'duplicate-phase-task', '/tmp/project', {
|
||||||
|
projectCwd: '/tmp/project',
|
||||||
|
});
|
||||||
|
|
||||||
|
expect(mockWritePromptLog).toHaveBeenCalledTimes(3);
|
||||||
|
const records = mockWritePromptLog.mock.calls.map((call) => call[0]) as Array<{
|
||||||
|
phase: number;
|
||||||
|
response: string;
|
||||||
|
}>;
|
||||||
|
const phase1Responses = records
|
||||||
|
.filter((record) => record.phase === 1)
|
||||||
|
.map((record) => record.response);
|
||||||
|
expect(phase1Responses).toEqual(['phase response', 'phase response second']);
|
||||||
|
});
|
||||||
|
|
||||||
it('should update movement prefix context on each movement:start event', async () => {
|
it('should update movement prefix context on each movement:start event', async () => {
|
||||||
const stdoutSpy = vi.spyOn(process.stdout, 'write').mockImplementation(() => true);
|
const stdoutSpy = vi.spyOn(process.stdout, 'write').mockImplementation(() => true);
|
||||||
|
|
||||||
@ -277,11 +391,13 @@ describe('executePiece debug prompts logging', () => {
|
|||||||
reportDirName: 'test-report-dir',
|
reportDirName: 'test-report-dir',
|
||||||
});
|
});
|
||||||
|
|
||||||
const calls = vi.mocked(writeFileAtomic).mock.calls;
|
const metaCalls = vi.mocked(writeFileAtomic).mock.calls.filter(
|
||||||
expect(calls).toHaveLength(2);
|
(call) => String(call[0]).endsWith('/meta.json')
|
||||||
|
);
|
||||||
|
expect(metaCalls).toHaveLength(2);
|
||||||
|
|
||||||
const firstMeta = JSON.parse(String(calls[0]![1])) as { status: string; endTime?: string };
|
const firstMeta = JSON.parse(String(metaCalls[0]![1])) as { status: string; endTime?: string };
|
||||||
const secondMeta = JSON.parse(String(calls[1]![1])) as { status: string; endTime?: string };
|
const secondMeta = JSON.parse(String(metaCalls[1]![1])) as { status: string; endTime?: string };
|
||||||
expect(firstMeta.status).toBe('running');
|
expect(firstMeta.status).toBe('running');
|
||||||
expect(firstMeta.endTime).toBeUndefined();
|
expect(firstMeta.endTime).toBeUndefined();
|
||||||
expect(secondMeta.status).toBe('completed');
|
expect(secondMeta.status).toBe('completed');
|
||||||
@ -294,11 +410,13 @@ describe('executePiece debug prompts logging', () => {
|
|||||||
reportDirName: 'test-report-dir',
|
reportDirName: 'test-report-dir',
|
||||||
});
|
});
|
||||||
|
|
||||||
const calls = vi.mocked(writeFileAtomic).mock.calls;
|
const metaCalls = vi.mocked(writeFileAtomic).mock.calls.filter(
|
||||||
expect(calls).toHaveLength(2);
|
(call) => String(call[0]).endsWith('/meta.json')
|
||||||
|
);
|
||||||
|
expect(metaCalls).toHaveLength(2);
|
||||||
|
|
||||||
const firstMeta = JSON.parse(String(calls[0]![1])) as { status: string; endTime?: string };
|
const firstMeta = JSON.parse(String(metaCalls[0]![1])) as { status: string; endTime?: string };
|
||||||
const secondMeta = JSON.parse(String(calls[1]![1])) as { status: string; endTime?: string };
|
const secondMeta = JSON.parse(String(metaCalls[1]![1])) as { status: string; endTime?: string };
|
||||||
expect(firstMeta.status).toBe('running');
|
expect(firstMeta.status).toBe('running');
|
||||||
expect(firstMeta.endTime).toBeUndefined();
|
expect(firstMeta.endTime).toBeUndefined();
|
||||||
expect(secondMeta.status).toBe('aborted');
|
expect(secondMeta.status).toBe('aborted');
|
||||||
@ -313,14 +431,83 @@ describe('executePiece debug prompts logging', () => {
|
|||||||
})
|
})
|
||||||
).rejects.toThrow('mock constructor failure');
|
).rejects.toThrow('mock constructor failure');
|
||||||
|
|
||||||
const calls = vi.mocked(writeFileAtomic).mock.calls;
|
const metaCalls = vi.mocked(writeFileAtomic).mock.calls.filter(
|
||||||
expect(calls).toHaveLength(2);
|
(call) => String(call[0]).endsWith('/meta.json')
|
||||||
|
);
|
||||||
|
expect(metaCalls).toHaveLength(2);
|
||||||
|
|
||||||
const firstMeta = JSON.parse(String(calls[0]![1])) as { status: string; endTime?: string };
|
const firstMeta = JSON.parse(String(metaCalls[0]![1])) as { status: string; endTime?: string };
|
||||||
const secondMeta = JSON.parse(String(calls[1]![1])) as { status: string; endTime?: string };
|
const secondMeta = JSON.parse(String(metaCalls[1]![1])) as { status: string; endTime?: string };
|
||||||
expect(firstMeta.status).toBe('running');
|
expect(firstMeta.status).toBe('running');
|
||||||
expect(firstMeta.endTime).toBeUndefined();
|
expect(firstMeta.endTime).toBeUndefined();
|
||||||
expect(secondMeta.status).toBe('aborted');
|
expect(secondMeta.status).toBe('aborted');
|
||||||
expect(secondMeta.endTime).toMatch(/^\d{4}-\d{2}-\d{2}T/);
|
expect(secondMeta.endTime).toMatch(/^\d{4}-\d{2}-\d{2}T/);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it('should write trace.md on piece completion', async () => {
|
||||||
|
await executePiece(makeConfig(), 'task', '/tmp/project', {
|
||||||
|
projectCwd: '/tmp/project',
|
||||||
|
reportDirName: 'test-report-dir',
|
||||||
|
});
|
||||||
|
|
||||||
|
const traceCalls = vi.mocked(writeFileAtomic).mock.calls.filter(
|
||||||
|
(call) => String(call[0]).endsWith('/trace.md')
|
||||||
|
);
|
||||||
|
expect(traceCalls.length).toBeGreaterThan(0);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should write trace.md on piece abort', async () => {
|
||||||
|
await executePiece(makeConfig(), 'abort-task', '/tmp/project', {
|
||||||
|
projectCwd: '/tmp/project',
|
||||||
|
reportDirName: 'test-report-dir',
|
||||||
|
});
|
||||||
|
|
||||||
|
const traceCalls = vi.mocked(writeFileAtomic).mock.calls.filter(
|
||||||
|
(call) => String(call[0]).endsWith('/trace.md')
|
||||||
|
);
|
||||||
|
expect(traceCalls.length).toBeGreaterThan(0);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should sanitize sensitive fields before writing session NDJSON when trace mode is default', async () => {
|
||||||
|
await executePiece(makeConfig(), 'token=plain-secret', '/tmp/project', {
|
||||||
|
projectCwd: '/tmp/project',
|
||||||
|
reportDirName: 'test-report-dir',
|
||||||
|
interactiveMetadata: {
|
||||||
|
confirmed: true,
|
||||||
|
task: 'api_key=plain-secret',
|
||||||
|
},
|
||||||
|
});
|
||||||
|
await executePiece(makeConfig(), 'sensitive-content-task', '/tmp/project', {
|
||||||
|
projectCwd: '/tmp/project',
|
||||||
|
reportDirName: 'test-report-dir-2',
|
||||||
|
});
|
||||||
|
|
||||||
|
const records = vi.mocked(appendNdjsonLine).mock.calls.map((call) => call[1]);
|
||||||
|
const recordText = JSON.stringify(records);
|
||||||
|
expect(recordText).toContain('[REDACTED]');
|
||||||
|
expect(recordText).not.toContain('plain-secret');
|
||||||
|
expect(recordText).not.toContain('super-secret-token');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should keep phaseExecutionId bindings consistent in trace when completions arrive in reverse order', async () => {
|
||||||
|
await executePiece(makeConfig(), 'reverse-phase-complete-task', '/tmp/project', {
|
||||||
|
projectCwd: '/tmp/project',
|
||||||
|
reportDirName: 'test-report-dir',
|
||||||
|
});
|
||||||
|
|
||||||
|
const traceCall = vi.mocked(writeFileAtomic).mock.calls.find(
|
||||||
|
(call) => String(call[0]).endsWith('/trace.md')
|
||||||
|
);
|
||||||
|
expect(traceCall).toBeDefined();
|
||||||
|
const traceContent = String(traceCall?.[1]);
|
||||||
|
const firstPromptIndex = traceContent.indexOf('phase prompt first');
|
||||||
|
const firstResponseIndex = traceContent.indexOf('phase response first');
|
||||||
|
const secondPromptIndex = traceContent.indexOf('phase prompt second');
|
||||||
|
const secondResponseIndex = traceContent.indexOf('phase response second');
|
||||||
|
|
||||||
|
expect(firstPromptIndex).toBeGreaterThan(-1);
|
||||||
|
expect(firstResponseIndex).toBeGreaterThan(firstPromptIndex);
|
||||||
|
expect(secondPromptIndex).toBeGreaterThan(firstResponseIndex);
|
||||||
|
expect(secondResponseIndex).toBeGreaterThan(secondPromptIndex);
|
||||||
|
});
|
||||||
});
|
});
|
||||||
|
|||||||
@ -136,6 +136,7 @@ vi.mock('../shared/utils/index.js', () => ({
|
|||||||
preventSleep: vi.fn(),
|
preventSleep: vi.fn(),
|
||||||
isDebugEnabled: vi.fn().mockReturnValue(false),
|
isDebugEnabled: vi.fn().mockReturnValue(false),
|
||||||
writePromptLog: vi.fn(),
|
writePromptLog: vi.fn(),
|
||||||
|
getDebugPromptsLogFile: vi.fn().mockReturnValue(null),
|
||||||
generateReportDir: vi.fn().mockReturnValue('test-report-dir'),
|
generateReportDir: vi.fn().mockReturnValue('test-report-dir'),
|
||||||
isValidReportDirName: vi.fn().mockReturnValue(true),
|
isValidReportDirName: vi.fn().mockReturnValue(true),
|
||||||
playWarningSound: vi.fn(),
|
playWarningSound: vi.fn(),
|
||||||
|
|||||||
@ -10,6 +10,7 @@ vi.mock('../agents/runner.js', () => ({
|
|||||||
}));
|
}));
|
||||||
|
|
||||||
import { runAgent } from '../agents/runner.js';
|
import { runAgent } from '../agents/runner.js';
|
||||||
|
import type { AgentResponse } from '../core/models/types.js';
|
||||||
|
|
||||||
function createStep(fileName: string): PieceMovement {
|
function createStep(fileName: string): PieceMovement {
|
||||||
return {
|
return {
|
||||||
@ -50,6 +51,19 @@ function createContext(reportDir: string, lastResponse = 'Phase 1 result'): Phas
|
|||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function queueRunAgentResponses(responses: AgentResponse[]): void {
|
||||||
|
const runAgentMock = vi.mocked(runAgent);
|
||||||
|
for (const response of responses) {
|
||||||
|
runAgentMock.mockImplementationOnce(async (persona, task, options) => {
|
||||||
|
options?.onPromptResolved?.({
|
||||||
|
systemPrompt: typeof persona === 'string' ? persona : '',
|
||||||
|
userInstruction: task,
|
||||||
|
});
|
||||||
|
return response;
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
describe('runReportPhase retry with new session', () => {
|
describe('runReportPhase retry with new session', () => {
|
||||||
let tmpRoot: string;
|
let tmpRoot: string;
|
||||||
|
|
||||||
@ -69,22 +83,23 @@ describe('runReportPhase retry with new session', () => {
|
|||||||
const reportDir = join(tmpRoot, '.takt', 'runs', 'sample-run', 'reports');
|
const reportDir = join(tmpRoot, '.takt', 'runs', 'sample-run', 'reports');
|
||||||
const step = createStep('02-coder.md');
|
const step = createStep('02-coder.md');
|
||||||
const ctx = createContext(reportDir, 'Implemented feature X');
|
const ctx = createContext(reportDir, 'Implemented feature X');
|
||||||
const runAgentMock = vi.mocked(runAgent);
|
queueRunAgentResponses([
|
||||||
runAgentMock
|
{
|
||||||
.mockResolvedValueOnce({
|
|
||||||
persona: 'coder',
|
persona: 'coder',
|
||||||
status: 'done',
|
status: 'done',
|
||||||
content: ' ',
|
content: ' ',
|
||||||
timestamp: new Date('2026-02-11T00:00:00Z'),
|
timestamp: new Date('2026-02-11T00:00:00Z'),
|
||||||
sessionId: 'session-resume-2',
|
sessionId: 'session-resume-2',
|
||||||
})
|
},
|
||||||
.mockResolvedValueOnce({
|
{
|
||||||
persona: 'coder',
|
persona: 'coder',
|
||||||
status: 'done',
|
status: 'done',
|
||||||
content: '# Report\nRecovered output',
|
content: '# Report\nRecovered output',
|
||||||
timestamp: new Date('2026-02-11T00:00:01Z'),
|
timestamp: new Date('2026-02-11T00:00:01Z'),
|
||||||
sessionId: 'session-fresh-1',
|
sessionId: 'session-fresh-1',
|
||||||
});
|
},
|
||||||
|
]);
|
||||||
|
const runAgentMock = vi.mocked(runAgent);
|
||||||
|
|
||||||
// When
|
// When
|
||||||
await runReportPhase(step, 1, ctx);
|
await runReportPhase(step, 1, ctx);
|
||||||
@ -107,21 +122,22 @@ describe('runReportPhase retry with new session', () => {
|
|||||||
const reportDir = join(tmpRoot, '.takt', 'runs', 'sample-run', 'reports');
|
const reportDir = join(tmpRoot, '.takt', 'runs', 'sample-run', 'reports');
|
||||||
const step = createStep('03-review.md');
|
const step = createStep('03-review.md');
|
||||||
const ctx = createContext(reportDir);
|
const ctx = createContext(reportDir);
|
||||||
const runAgentMock = vi.mocked(runAgent);
|
queueRunAgentResponses([
|
||||||
runAgentMock
|
{
|
||||||
.mockResolvedValueOnce({
|
|
||||||
persona: 'coder',
|
persona: 'coder',
|
||||||
status: 'error',
|
status: 'error',
|
||||||
content: 'Tool use is not allowed in this phase',
|
content: 'Tool use is not allowed in this phase',
|
||||||
timestamp: new Date('2026-02-11T00:01:00Z'),
|
timestamp: new Date('2026-02-11T00:01:00Z'),
|
||||||
error: 'Tool use is not allowed in this phase',
|
error: 'Tool use is not allowed in this phase',
|
||||||
})
|
},
|
||||||
.mockResolvedValueOnce({
|
{
|
||||||
persona: 'coder',
|
persona: 'coder',
|
||||||
status: 'done',
|
status: 'done',
|
||||||
content: 'Recovered report',
|
content: 'Recovered report',
|
||||||
timestamp: new Date('2026-02-11T00:01:01Z'),
|
timestamp: new Date('2026-02-11T00:01:01Z'),
|
||||||
});
|
},
|
||||||
|
]);
|
||||||
|
const runAgentMock = vi.mocked(runAgent);
|
||||||
|
|
||||||
// When
|
// When
|
||||||
await runReportPhase(step, 1, ctx);
|
await runReportPhase(step, 1, ctx);
|
||||||
@ -137,20 +153,21 @@ describe('runReportPhase retry with new session', () => {
|
|||||||
const reportDir = join(tmpRoot, '.takt', 'runs', 'sample-run', 'reports');
|
const reportDir = join(tmpRoot, '.takt', 'runs', 'sample-run', 'reports');
|
||||||
const step = createStep('04-qa.md');
|
const step = createStep('04-qa.md');
|
||||||
const ctx = createContext(reportDir);
|
const ctx = createContext(reportDir);
|
||||||
const runAgentMock = vi.mocked(runAgent);
|
queueRunAgentResponses([
|
||||||
runAgentMock
|
{
|
||||||
.mockResolvedValueOnce({
|
|
||||||
persona: 'coder',
|
persona: 'coder',
|
||||||
status: 'done',
|
status: 'done',
|
||||||
content: ' ',
|
content: ' ',
|
||||||
timestamp: new Date('2026-02-11T00:02:00Z'),
|
timestamp: new Date('2026-02-11T00:02:00Z'),
|
||||||
})
|
},
|
||||||
.mockResolvedValueOnce({
|
{
|
||||||
persona: 'coder',
|
persona: 'coder',
|
||||||
status: 'done',
|
status: 'done',
|
||||||
content: '\n\n',
|
content: '\n\n',
|
||||||
timestamp: new Date('2026-02-11T00:02:01Z'),
|
timestamp: new Date('2026-02-11T00:02:01Z'),
|
||||||
});
|
},
|
||||||
|
]);
|
||||||
|
const runAgentMock = vi.mocked(runAgent);
|
||||||
|
|
||||||
// When / Then
|
// When / Then
|
||||||
await expect(runReportPhase(step, 1, ctx)).rejects.toThrow('Report phase failed for 04-qa.md: Report output is empty');
|
await expect(runReportPhase(step, 1, ctx)).rejects.toThrow('Report phase failed for 04-qa.md: Report output is empty');
|
||||||
@ -162,14 +179,14 @@ describe('runReportPhase retry with new session', () => {
|
|||||||
const reportDir = join(tmpRoot, '.takt', 'runs', 'sample-run', 'reports');
|
const reportDir = join(tmpRoot, '.takt', 'runs', 'sample-run', 'reports');
|
||||||
const step = createStep('05-ok.md');
|
const step = createStep('05-ok.md');
|
||||||
const ctx = createContext(reportDir);
|
const ctx = createContext(reportDir);
|
||||||
const runAgentMock = vi.mocked(runAgent);
|
queueRunAgentResponses([{
|
||||||
runAgentMock.mockResolvedValueOnce({
|
|
||||||
persona: 'coder',
|
persona: 'coder',
|
||||||
status: 'done',
|
status: 'done',
|
||||||
content: 'Single-pass success',
|
content: 'Single-pass success',
|
||||||
timestamp: new Date('2026-02-11T00:03:00Z'),
|
timestamp: new Date('2026-02-11T00:03:00Z'),
|
||||||
sessionId: 'session-resume-2',
|
sessionId: 'session-resume-2',
|
||||||
});
|
}]);
|
||||||
|
const runAgentMock = vi.mocked(runAgent);
|
||||||
|
|
||||||
// When
|
// When
|
||||||
await runReportPhase(step, 1, ctx);
|
await runReportPhase(step, 1, ctx);
|
||||||
@ -185,13 +202,13 @@ describe('runReportPhase retry with new session', () => {
|
|||||||
const reportDir = join(tmpRoot, '.takt', 'runs', 'sample-run', 'reports');
|
const reportDir = join(tmpRoot, '.takt', 'runs', 'sample-run', 'reports');
|
||||||
const step = createStep('06-blocked.md');
|
const step = createStep('06-blocked.md');
|
||||||
const ctx = createContext(reportDir);
|
const ctx = createContext(reportDir);
|
||||||
const runAgentMock = vi.mocked(runAgent);
|
queueRunAgentResponses([{
|
||||||
runAgentMock.mockResolvedValueOnce({
|
|
||||||
persona: 'coder',
|
persona: 'coder',
|
||||||
status: 'blocked',
|
status: 'blocked',
|
||||||
content: 'Need permission',
|
content: 'Need permission',
|
||||||
timestamp: new Date('2026-02-11T00:04:00Z'),
|
timestamp: new Date('2026-02-11T00:04:00Z'),
|
||||||
});
|
}]);
|
||||||
|
const runAgentMock = vi.mocked(runAgent);
|
||||||
|
|
||||||
// When
|
// When
|
||||||
const result = await runReportPhase(step, 1, ctx);
|
const result = await runReportPhase(step, 1, ctx);
|
||||||
|
|||||||
124
src/__tests__/status-judgment-phase.test.ts
Normal file
124
src/__tests__/status-judgment-phase.test.ts
Normal file
@ -0,0 +1,124 @@
|
|||||||
|
import { describe, it, expect, vi, beforeEach } from 'vitest';
|
||||||
|
import type { PieceMovement } from '../core/models/types.js';
|
||||||
|
import { runStatusJudgmentPhase } from '../core/piece/status-judgment-phase.js';
|
||||||
|
|
||||||
|
const { mockJudgeStatus } = vi.hoisted(() => ({
|
||||||
|
mockJudgeStatus: vi.fn(),
|
||||||
|
}));
|
||||||
|
|
||||||
|
vi.mock('../agents/agent-usecases.js', () => ({
|
||||||
|
judgeStatus: mockJudgeStatus,
|
||||||
|
}));
|
||||||
|
|
||||||
|
describe('runStatusJudgmentPhase', () => {
|
||||||
|
beforeEach(() => {
|
||||||
|
vi.clearAllMocks();
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should pass judge stage callbacks through PhaseRunnerContext', async () => {
|
||||||
|
mockJudgeStatus.mockImplementation(
|
||||||
|
async (_structured: string, _tag: string, _rules: unknown[], options: { onJudgeStage?: (entry: {
|
||||||
|
stage: 1 | 2 | 3;
|
||||||
|
method: 'structured_output' | 'phase3_tag' | 'ai_judge';
|
||||||
|
status: 'done' | 'error' | 'skipped';
|
||||||
|
instruction: string;
|
||||||
|
response: string;
|
||||||
|
}) => void; onStructuredPromptResolved?: (promptParts: { systemPrompt: string; userInstruction: string }) => void }) => {
|
||||||
|
options.onStructuredPromptResolved?.({
|
||||||
|
systemPrompt: 'conductor-system',
|
||||||
|
userInstruction: 'structured prompt',
|
||||||
|
});
|
||||||
|
options.onJudgeStage?.({
|
||||||
|
stage: 1,
|
||||||
|
method: 'structured_output',
|
||||||
|
status: 'done',
|
||||||
|
instruction: 'structured prompt',
|
||||||
|
response: '{"step":2}',
|
||||||
|
});
|
||||||
|
return { ruleIndex: 1, method: 'structured_output' as const };
|
||||||
|
},
|
||||||
|
);
|
||||||
|
|
||||||
|
const step: PieceMovement = {
|
||||||
|
name: 'review',
|
||||||
|
persona: 'reviewer',
|
||||||
|
personaDisplayName: 'reviewer',
|
||||||
|
instructionTemplate: 'Review',
|
||||||
|
passPreviousResponse: true,
|
||||||
|
rules: [
|
||||||
|
{ condition: 'needs_fix', next: 'fix' },
|
||||||
|
{ condition: 'approved', next: 'COMPLETE' },
|
||||||
|
],
|
||||||
|
};
|
||||||
|
const onPhaseStart = vi.fn();
|
||||||
|
const onPhaseComplete = vi.fn();
|
||||||
|
const onJudgeStage = vi.fn();
|
||||||
|
|
||||||
|
const result = await runStatusJudgmentPhase(step, {
|
||||||
|
cwd: '/tmp/project',
|
||||||
|
reportDir: '/tmp/project/.takt/reports',
|
||||||
|
lastResponse: 'response body',
|
||||||
|
iteration: 4,
|
||||||
|
getSessionId: vi.fn(),
|
||||||
|
buildResumeOptions: vi.fn(),
|
||||||
|
buildNewSessionReportOptions: vi.fn(),
|
||||||
|
updatePersonaSession: vi.fn(),
|
||||||
|
onPhaseStart,
|
||||||
|
onPhaseComplete,
|
||||||
|
onJudgeStage,
|
||||||
|
});
|
||||||
|
|
||||||
|
expect(result).toEqual({
|
||||||
|
tag: '[REVIEW:2]',
|
||||||
|
ruleIndex: 1,
|
||||||
|
method: 'structured_output',
|
||||||
|
});
|
||||||
|
expect(onPhaseStart).toHaveBeenCalledWith(
|
||||||
|
step,
|
||||||
|
3,
|
||||||
|
'judge',
|
||||||
|
expect.any(String),
|
||||||
|
{
|
||||||
|
systemPrompt: 'conductor-system',
|
||||||
|
userInstruction: 'structured prompt',
|
||||||
|
},
|
||||||
|
'review:4:3:1',
|
||||||
|
4,
|
||||||
|
);
|
||||||
|
expect(onJudgeStage).toHaveBeenCalledWith(
|
||||||
|
step,
|
||||||
|
3,
|
||||||
|
'judge',
|
||||||
|
expect.objectContaining({ stage: 1, method: 'structured_output' }),
|
||||||
|
'review:4:3:1',
|
||||||
|
4,
|
||||||
|
);
|
||||||
|
expect(onPhaseComplete).toHaveBeenCalledWith(step, 3, 'judge', '[REVIEW:2]', 'done', undefined, 'review:4:3:1', 4);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should fail fast when iteration is missing', async () => {
|
||||||
|
mockJudgeStatus.mockResolvedValue({ ruleIndex: 0, method: 'structured_output' });
|
||||||
|
|
||||||
|
const step: PieceMovement = {
|
||||||
|
name: 'review',
|
||||||
|
persona: 'reviewer',
|
||||||
|
personaDisplayName: 'reviewer',
|
||||||
|
instructionTemplate: 'Review',
|
||||||
|
passPreviousResponse: true,
|
||||||
|
rules: [
|
||||||
|
{ condition: 'needs_fix', next: 'fix' },
|
||||||
|
{ condition: 'approved', next: 'COMPLETE' },
|
||||||
|
],
|
||||||
|
};
|
||||||
|
|
||||||
|
await expect(runStatusJudgmentPhase(step, {
|
||||||
|
cwd: '/tmp/project',
|
||||||
|
reportDir: '/tmp/project/.takt/reports',
|
||||||
|
lastResponse: 'response body',
|
||||||
|
getSessionId: vi.fn(),
|
||||||
|
buildResumeOptions: vi.fn(),
|
||||||
|
buildNewSessionReportOptions: vi.fn(),
|
||||||
|
updatePersonaSession: vi.fn(),
|
||||||
|
})).rejects.toThrow('Status judgment requires iteration for movement "review"');
|
||||||
|
});
|
||||||
|
});
|
||||||
236
src/__tests__/traceReport.test.ts
Normal file
236
src/__tests__/traceReport.test.ts
Normal file
@ -0,0 +1,236 @@
|
|||||||
|
import { describe, it, expect } from 'vitest';
|
||||||
|
import { mkdtempSync, writeFileSync } from 'node:fs';
|
||||||
|
import { join } from 'node:path';
|
||||||
|
import { tmpdir } from 'node:os';
|
||||||
|
import { renderTraceReportMarkdown, renderTraceReportFromLogs } from '../features/tasks/execute/traceReport.js';
|
||||||
|
|
||||||
|
describe('traceReport', () => {
|
||||||
|
it('should render judge stage details and tolerate aborted incomplete movement', () => {
|
||||||
|
const markdown = renderTraceReportMarkdown(
|
||||||
|
{
|
||||||
|
tracePath: '/tmp/trace.md',
|
||||||
|
pieceName: 'test-piece',
|
||||||
|
task: 'test task',
|
||||||
|
runSlug: 'run-1',
|
||||||
|
status: 'aborted',
|
||||||
|
iterations: 1,
|
||||||
|
endTime: '2026-03-04T12:00:00.000Z',
|
||||||
|
reason: 'user_interrupted',
|
||||||
|
},
|
||||||
|
'2026-03-04T11:59:00.000Z',
|
||||||
|
[
|
||||||
|
{
|
||||||
|
step: 'ai_fix',
|
||||||
|
persona: 'coder',
|
||||||
|
iteration: 1,
|
||||||
|
startedAt: '2026-03-04T11:59:01.000Z',
|
||||||
|
phases: [
|
||||||
|
{
|
||||||
|
phaseExecutionId: 'ai_fix:3:1',
|
||||||
|
phase: 3,
|
||||||
|
phaseName: 'judge',
|
||||||
|
instruction: 'judge prompt',
|
||||||
|
systemPrompt: 'conductor',
|
||||||
|
userInstruction: 'judge prompt',
|
||||||
|
startedAt: '2026-03-04T11:59:02.000Z',
|
||||||
|
judgeStages: [
|
||||||
|
{
|
||||||
|
stage: 1,
|
||||||
|
method: 'structured_output',
|
||||||
|
status: 'error',
|
||||||
|
instruction: 'stage1 prompt',
|
||||||
|
response: '',
|
||||||
|
},
|
||||||
|
],
|
||||||
|
},
|
||||||
|
],
|
||||||
|
},
|
||||||
|
],
|
||||||
|
);
|
||||||
|
|
||||||
|
expect(markdown).toContain('- Status: ❌ aborted');
|
||||||
|
expect(markdown).toContain('- Movement Status: in_progress');
|
||||||
|
expect(markdown).toContain('## Iteration 1: ai_fix (persona: coder)');
|
||||||
|
expect(markdown).toContain('<details><summary>System Prompt</summary>');
|
||||||
|
expect(markdown).toContain('<details><summary>User Instruction</summary>');
|
||||||
|
expect(markdown).toContain('- Stage 1 (structured_output)');
|
||||||
|
expect(markdown).toContain('<details><summary>Stage Instruction</summary>');
|
||||||
|
expect(markdown).toContain('<details><summary>Stage Response</summary>');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should render movements in timestamp order from NDJSON logs', () => {
|
||||||
|
const dir = mkdtempSync(join(tmpdir(), 'trace-report-'));
|
||||||
|
const sessionPath = join(dir, 'session.jsonl');
|
||||||
|
const promptPath = join(dir, 'prompts.jsonl');
|
||||||
|
writeFileSync(sessionPath, [
|
||||||
|
JSON.stringify({ type: 'piece_start', task: 'task', pieceName: 'piece', startTime: '2026-03-04T11:59:00.000Z' }),
|
||||||
|
JSON.stringify({ type: 'step_start', step: 'reviewers', persona: 'reviewer', iteration: 2, timestamp: '2026-03-04T11:59:05.000Z' }),
|
||||||
|
JSON.stringify({ type: 'step_start', step: 'plan', persona: 'planner', iteration: 1, timestamp: '2026-03-04T11:59:01.000Z' }),
|
||||||
|
JSON.stringify({ type: 'phase_start', step: 'reviewers', iteration: 2, phase: 1, phaseName: 'execute', phaseExecutionId: 'reviewers:2:1:1', instruction: 'r', timestamp: '2026-03-04T11:59:06.000Z' }),
|
||||||
|
JSON.stringify({ type: 'phase_complete', step: 'reviewers', iteration: 2, phase: 1, phaseName: 'execute', phaseExecutionId: 'reviewers:2:1:1', status: 'done', content: 'r-ok', timestamp: '2026-03-04T11:59:07.000Z' }),
|
||||||
|
JSON.stringify({ type: 'step_complete', step: 'reviewers', persona: 'reviewer', status: 'done', content: 'r-ok', instruction: 'inst', timestamp: '2026-03-04T11:59:08.000Z' }),
|
||||||
|
JSON.stringify({ type: 'phase_start', step: 'plan', iteration: 1, phase: 1, phaseName: 'execute', phaseExecutionId: 'plan:1:1:1', instruction: 'p', timestamp: '2026-03-04T11:59:02.000Z' }),
|
||||||
|
JSON.stringify({ type: 'phase_complete', step: 'plan', iteration: 1, phase: 1, phaseName: 'execute', phaseExecutionId: 'plan:1:1:1', status: 'done', content: 'p-ok', timestamp: '2026-03-04T11:59:03.000Z' }),
|
||||||
|
JSON.stringify({ type: 'step_complete', step: 'plan', persona: 'planner', status: 'done', content: 'p-ok', instruction: 'inst', timestamp: '2026-03-04T11:59:04.000Z' }),
|
||||||
|
JSON.stringify({ type: 'piece_complete', iterations: 2, endTime: '2026-03-04T12:00:00.000Z' }),
|
||||||
|
'',
|
||||||
|
].join('\n'));
|
||||||
|
writeFileSync(promptPath, [
|
||||||
|
JSON.stringify({ movement: 'plan', phase: 1, iteration: 1, phaseExecutionId: 'plan:1:1:1', prompt: 'p', systemPrompt: 'ps', userInstruction: 'pu', response: 'p-ok', timestamp: '2026-03-04T11:59:03.000Z' }),
|
||||||
|
JSON.stringify({ movement: 'reviewers', phase: 1, iteration: 2, phaseExecutionId: 'reviewers:2:1:1', prompt: 'r', systemPrompt: 'rs', userInstruction: 'ru', response: 'r-ok', timestamp: '2026-03-04T11:59:07.000Z' }),
|
||||||
|
'',
|
||||||
|
].join('\n'));
|
||||||
|
|
||||||
|
const markdown = renderTraceReportFromLogs(
|
||||||
|
{
|
||||||
|
tracePath: join(dir, 'trace.md'),
|
||||||
|
pieceName: 'piece',
|
||||||
|
task: 'task',
|
||||||
|
runSlug: 'run-1',
|
||||||
|
status: 'completed',
|
||||||
|
iterations: 2,
|
||||||
|
endTime: '2026-03-04T12:00:00.000Z',
|
||||||
|
},
|
||||||
|
sessionPath,
|
||||||
|
promptPath,
|
||||||
|
'full',
|
||||||
|
);
|
||||||
|
|
||||||
|
expect(markdown).toBeDefined();
|
||||||
|
const planIndex = markdown!.indexOf('## Iteration 1: plan');
|
||||||
|
const reviewersIndex = markdown!.indexOf('## Iteration 2: reviewers');
|
||||||
|
expect(planIndex).toBeGreaterThan(-1);
|
||||||
|
expect(reviewersIndex).toBeGreaterThan(planIndex);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should fail fast when completed trace has missing phase status', () => {
|
||||||
|
expect(() => renderTraceReportMarkdown(
|
||||||
|
{
|
||||||
|
tracePath: '/tmp/trace.md',
|
||||||
|
pieceName: 'test-piece',
|
||||||
|
task: 'test task',
|
||||||
|
runSlug: 'run-1',
|
||||||
|
status: 'completed',
|
||||||
|
iterations: 1,
|
||||||
|
endTime: '2026-03-04T12:00:00.000Z',
|
||||||
|
},
|
||||||
|
'2026-03-04T11:59:00.000Z',
|
||||||
|
[
|
||||||
|
{
|
||||||
|
step: 'plan',
|
||||||
|
persona: 'planner',
|
||||||
|
iteration: 1,
|
||||||
|
startedAt: '2026-03-04T11:59:01.000Z',
|
||||||
|
phases: [
|
||||||
|
{
|
||||||
|
phaseExecutionId: 'plan:1:1',
|
||||||
|
phase: 1,
|
||||||
|
phaseName: 'execute',
|
||||||
|
instruction: 'instr',
|
||||||
|
systemPrompt: 'system',
|
||||||
|
userInstruction: 'user',
|
||||||
|
startedAt: '2026-03-04T11:59:02.000Z',
|
||||||
|
completedAt: '2026-03-04T11:59:03.000Z',
|
||||||
|
},
|
||||||
|
],
|
||||||
|
},
|
||||||
|
],
|
||||||
|
)).toThrow('missing status');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should mask sensitive task and reason in redacted mode', () => {
|
||||||
|
const dir = mkdtempSync(join(tmpdir(), 'trace-report-redact-'));
|
||||||
|
const sessionPath = join(dir, 'session.jsonl');
|
||||||
|
writeFileSync(sessionPath, [
|
||||||
|
JSON.stringify({ type: 'piece_start', task: 'token=topsecret', pieceName: 'piece', startTime: '2026-03-04T11:59:00.000Z' }),
|
||||||
|
JSON.stringify({ type: 'step_start', step: 'plan', persona: 'planner', iteration: 1, timestamp: '2026-03-04T11:59:01.000Z' }),
|
||||||
|
JSON.stringify({ type: 'phase_start', step: 'plan', iteration: 1, phase: 1, phaseName: 'execute', phaseExecutionId: 'plan:1:1:1', instruction: 'api_key=abc123', systemPrompt: 'Authorization: Bearer abc123', userInstruction: 'user token=abc123', timestamp: '2026-03-04T11:59:02.000Z' }),
|
||||||
|
JSON.stringify({ type: 'phase_complete', step: 'plan', iteration: 1, phase: 1, phaseName: 'execute', phaseExecutionId: 'plan:1:1:1', status: 'done', content: 'password=hunter2', timestamp: '2026-03-04T11:59:03.000Z' }),
|
||||||
|
JSON.stringify({ type: 'step_complete', step: 'plan', persona: 'planner', status: 'done', content: 'secret=my-secret', instruction: 'inst', timestamp: '2026-03-04T11:59:04.000Z' }),
|
||||||
|
'',
|
||||||
|
].join('\n'));
|
||||||
|
|
||||||
|
const markdown = renderTraceReportFromLogs(
|
||||||
|
{
|
||||||
|
tracePath: join(dir, 'trace.md'),
|
||||||
|
pieceName: 'piece',
|
||||||
|
task: 'token=topsecret',
|
||||||
|
runSlug: 'run-1',
|
||||||
|
status: 'aborted',
|
||||||
|
iterations: 1,
|
||||||
|
endTime: '2026-03-04T12:00:00.000Z',
|
||||||
|
reason: 'api_key=super-secret',
|
||||||
|
},
|
||||||
|
sessionPath,
|
||||||
|
undefined,
|
||||||
|
'redacted',
|
||||||
|
);
|
||||||
|
|
||||||
|
expect(markdown).toContain('token=[REDACTED]');
|
||||||
|
expect(markdown).toContain('api_key=[REDACTED]');
|
||||||
|
expect(markdown).not.toContain('topsecret');
|
||||||
|
expect(markdown).not.toContain('super-secret');
|
||||||
|
expect(markdown).not.toContain('hunter2');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should mask quoted JSON secrets and common token formats in redacted mode', () => {
|
||||||
|
const dir = mkdtempSync(join(tmpdir(), 'trace-report-redact-json-'));
|
||||||
|
const sessionPath = join(dir, 'session.jsonl');
|
||||||
|
writeFileSync(sessionPath, [
|
||||||
|
JSON.stringify({ type: 'piece_start', task: '{"api_key":"abc123"}', pieceName: 'piece', startTime: '2026-03-04T11:59:00.000Z' }),
|
||||||
|
JSON.stringify({ type: 'step_start', step: 'plan', persona: 'planner', iteration: 1, timestamp: '2026-03-04T11:59:01.000Z' }),
|
||||||
|
JSON.stringify({ type: 'phase_start', step: 'plan', iteration: 1, phase: 1, phaseName: 'execute', phaseExecutionId: 'plan:1:1:1', instruction: '{"token":"xyz987"}', systemPrompt: 'Authorization: Bearer sk-abcdef12345678', userInstruction: 'ghp_abcdef1234567890', timestamp: '2026-03-04T11:59:02.000Z' }),
|
||||||
|
JSON.stringify({ type: 'phase_complete', step: 'plan', iteration: 1, phase: 1, phaseName: 'execute', phaseExecutionId: 'plan:1:1:1', status: 'done', content: 'xoxb-1234abcd-5678efgh', timestamp: '2026-03-04T11:59:03.000Z' }),
|
||||||
|
JSON.stringify({ type: 'step_complete', step: 'plan', persona: 'planner', status: 'done', content: '{"password":"plain"}', instruction: 'inst', timestamp: '2026-03-04T11:59:04.000Z' }),
|
||||||
|
'',
|
||||||
|
].join('\n'));
|
||||||
|
|
||||||
|
const markdown = renderTraceReportFromLogs(
|
||||||
|
{
|
||||||
|
tracePath: join(dir, 'trace.md'),
|
||||||
|
pieceName: 'piece',
|
||||||
|
task: '{"api_key":"abc123"}',
|
||||||
|
runSlug: 'run-1',
|
||||||
|
status: 'aborted',
|
||||||
|
iterations: 1,
|
||||||
|
endTime: '2026-03-04T12:00:00.000Z',
|
||||||
|
reason: '{"secret":"plain"}',
|
||||||
|
},
|
||||||
|
sessionPath,
|
||||||
|
undefined,
|
||||||
|
'redacted',
|
||||||
|
);
|
||||||
|
|
||||||
|
expect(markdown).toContain('"api_key":"[REDACTED]"');
|
||||||
|
expect(markdown).toContain('"secret":"[REDACTED]"');
|
||||||
|
expect(markdown).toContain('Authorization: Bearer [REDACTED]');
|
||||||
|
expect(markdown).not.toContain('abc123');
|
||||||
|
expect(markdown).not.toContain('xyz987');
|
||||||
|
expect(markdown).not.toContain('ghp_abcdef1234567890');
|
||||||
|
expect(markdown).not.toContain('xoxb-1234abcd-5678efgh');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should fold alternating loop iterations into a details block', () => {
|
||||||
|
const markdown = renderTraceReportMarkdown(
|
||||||
|
{
|
||||||
|
tracePath: '/tmp/trace.md',
|
||||||
|
pieceName: 'test-piece',
|
||||||
|
task: 'test task',
|
||||||
|
runSlug: 'run-1',
|
||||||
|
status: 'completed',
|
||||||
|
iterations: 4,
|
||||||
|
endTime: '2026-03-04T12:00:00.000Z',
|
||||||
|
},
|
||||||
|
'2026-03-04T11:59:00.000Z',
|
||||||
|
[
|
||||||
|
{ step: 'reviewers', persona: 'reviewer', iteration: 1, startedAt: '2026-03-04T11:59:01.000Z', phases: [], result: { status: 'done', content: 'ok' } },
|
||||||
|
{ step: 'fix', persona: 'coder', iteration: 2, startedAt: '2026-03-04T11:59:02.000Z', phases: [], result: { status: 'done', content: 'ok' } },
|
||||||
|
{ step: 'reviewers', persona: 'reviewer', iteration: 3, startedAt: '2026-03-04T11:59:03.000Z', phases: [], result: { status: 'done', content: 'ok' } },
|
||||||
|
{ step: 'fix', persona: 'coder', iteration: 4, startedAt: '2026-03-04T11:59:04.000Z', phases: [], result: { status: 'done', content: 'ok' } },
|
||||||
|
],
|
||||||
|
);
|
||||||
|
|
||||||
|
expect(markdown).toContain('reviewers ↔ fix loop');
|
||||||
|
expect(markdown).toContain('<details><summary>Loop details');
|
||||||
|
});
|
||||||
|
});
|
||||||
@ -1,172 +1,19 @@
|
|||||||
import type { AgentResponse, PartDefinition, PieceRule, RuleMatchMethod, Language } from '../core/models/types.js';
|
import type { AgentResponse } from '../core/models/types.js';
|
||||||
import { runAgent, type RunAgentOptions, type StreamCallback } from './runner.js';
|
import { runAgent, type RunAgentOptions } from './runner.js';
|
||||||
import { detectJudgeIndex, buildJudgePrompt } from './judge-utils.js';
|
|
||||||
import { parseParts } from '../core/piece/engine/task-decomposer.js';
|
|
||||||
import { loadJudgmentSchema, loadEvaluationSchema, loadDecompositionSchema, loadMorePartsSchema } from '../infra/resources/schema-loader.js';
|
|
||||||
import { detectRuleIndex } from '../shared/utils/ruleIndex.js';
|
|
||||||
import { ensureUniquePartIds, parsePartDefinitionEntry } from '../core/piece/part-definition-validator.js';
|
|
||||||
|
|
||||||
export interface JudgeStatusOptions {
|
export {
|
||||||
cwd: string;
|
evaluateCondition,
|
||||||
movementName: string;
|
judgeStatus,
|
||||||
language?: Language;
|
type EvaluateConditionOptions,
|
||||||
interactive?: boolean;
|
type JudgeStatusOptions,
|
||||||
onStream?: StreamCallback;
|
type JudgeStatusResult,
|
||||||
}
|
} from './judge-status-usecase.js';
|
||||||
|
export {
|
||||||
export interface JudgeStatusResult {
|
decomposeTask,
|
||||||
ruleIndex: number;
|
requestMoreParts,
|
||||||
method: RuleMatchMethod;
|
type DecomposeTaskOptions,
|
||||||
}
|
type MorePartsResponse,
|
||||||
|
} from './decompose-task-usecase.js';
|
||||||
export interface EvaluateConditionOptions {
|
|
||||||
cwd: string;
|
|
||||||
}
|
|
||||||
|
|
||||||
export interface DecomposeTaskOptions {
|
|
||||||
cwd: string;
|
|
||||||
persona?: string;
|
|
||||||
personaPath?: string;
|
|
||||||
language?: Language;
|
|
||||||
model?: string;
|
|
||||||
provider?: 'claude' | 'codex' | 'opencode' | 'cursor' | 'copilot' | 'mock';
|
|
||||||
onStream?: StreamCallback;
|
|
||||||
}
|
|
||||||
|
|
||||||
export interface MorePartsResponse {
|
|
||||||
done: boolean;
|
|
||||||
reasoning: string;
|
|
||||||
parts: PartDefinition[];
|
|
||||||
}
|
|
||||||
|
|
||||||
function toPartDefinitions(raw: unknown, maxParts: number): PartDefinition[] {
|
|
||||||
if (!Array.isArray(raw)) {
|
|
||||||
throw new Error('Structured output "parts" must be an array');
|
|
||||||
}
|
|
||||||
if (raw.length === 0) {
|
|
||||||
throw new Error('Structured output "parts" must not be empty');
|
|
||||||
}
|
|
||||||
if (raw.length > maxParts) {
|
|
||||||
throw new Error(`Structured output produced too many parts: ${raw.length} > ${maxParts}`);
|
|
||||||
}
|
|
||||||
|
|
||||||
const parts: PartDefinition[] = raw.map((entry, index) => parsePartDefinitionEntry(entry, index));
|
|
||||||
ensureUniquePartIds(parts);
|
|
||||||
|
|
||||||
return parts;
|
|
||||||
}
|
|
||||||
|
|
||||||
function toMorePartsResponse(raw: unknown, maxAdditionalParts: number): MorePartsResponse {
|
|
||||||
if (typeof raw !== 'object' || raw == null || Array.isArray(raw)) {
|
|
||||||
throw new Error('Structured output must be an object');
|
|
||||||
}
|
|
||||||
|
|
||||||
const payload = raw as Record<string, unknown>;
|
|
||||||
if (typeof payload.done !== 'boolean') {
|
|
||||||
throw new Error('Structured output "done" must be a boolean');
|
|
||||||
}
|
|
||||||
if (typeof payload.reasoning !== 'string') {
|
|
||||||
throw new Error('Structured output "reasoning" must be a string');
|
|
||||||
}
|
|
||||||
if (!Array.isArray(payload.parts)) {
|
|
||||||
throw new Error('Structured output "parts" must be an array');
|
|
||||||
}
|
|
||||||
if (payload.parts.length > maxAdditionalParts) {
|
|
||||||
throw new Error(`Structured output produced too many parts: ${payload.parts.length} > ${maxAdditionalParts}`);
|
|
||||||
}
|
|
||||||
|
|
||||||
const parts: PartDefinition[] = payload.parts.map((entry, index) => parsePartDefinitionEntry(entry, index));
|
|
||||||
ensureUniquePartIds(parts);
|
|
||||||
|
|
||||||
return {
|
|
||||||
done: payload.done,
|
|
||||||
reasoning: payload.reasoning,
|
|
||||||
parts,
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
function summarizePartContent(content: string): string {
|
|
||||||
const maxLength = 2000;
|
|
||||||
if (content.length <= maxLength) {
|
|
||||||
return content;
|
|
||||||
}
|
|
||||||
return `${content.slice(0, maxLength)}\n...[truncated]`;
|
|
||||||
}
|
|
||||||
|
|
||||||
function buildDecomposePrompt(instruction: string, maxParts: number, language?: Language): string {
|
|
||||||
if (language === 'ja') {
|
|
||||||
return [
|
|
||||||
'以下はタスク分解専用の指示です。タスクを実行せず、分解だけを行ってください。',
|
|
||||||
'- ツールは使用しない',
|
|
||||||
`- パート数は 1 以上 ${maxParts} 以下`,
|
|
||||||
'- パートは互いに独立させる',
|
|
||||||
'',
|
|
||||||
'## 元タスク',
|
|
||||||
instruction,
|
|
||||||
].join('\n');
|
|
||||||
}
|
|
||||||
|
|
||||||
return [
|
|
||||||
'This is decomposition-only planning. Do not execute the task.',
|
|
||||||
'- Do not use any tool',
|
|
||||||
`- Produce between 1 and ${maxParts} independent parts`,
|
|
||||||
'- Keep each part self-contained',
|
|
||||||
'',
|
|
||||||
'## Original Task',
|
|
||||||
instruction,
|
|
||||||
].join('\n');
|
|
||||||
}
|
|
||||||
|
|
||||||
function buildMorePartsPrompt(
|
|
||||||
originalInstruction: string,
|
|
||||||
allResults: Array<{ id: string; title: string; status: string; content: string }>,
|
|
||||||
existingIds: string[],
|
|
||||||
maxAdditionalParts: number,
|
|
||||||
language?: Language,
|
|
||||||
): string {
|
|
||||||
const resultBlock = allResults.map((result) => [
|
|
||||||
`### ${result.id}: ${result.title} (${result.status})`,
|
|
||||||
summarizePartContent(result.content),
|
|
||||||
].join('\n')).join('\n\n');
|
|
||||||
|
|
||||||
if (language === 'ja') {
|
|
||||||
return [
|
|
||||||
'以下の実行結果を見て、追加のサブタスクが必要か判断してください。',
|
|
||||||
'- ツールは使用しない',
|
|
||||||
'',
|
|
||||||
'## 元タスク',
|
|
||||||
originalInstruction,
|
|
||||||
'',
|
|
||||||
'## 完了済みパート',
|
|
||||||
resultBlock || '(なし)',
|
|
||||||
'',
|
|
||||||
'## 判断ルール',
|
|
||||||
'- 追加作業が不要なら done=true にする',
|
|
||||||
'- 追加作業が必要なら parts に新しいパートを入れる',
|
|
||||||
'- 不足が複数ある場合は、可能な限り一括で複数パートを返す',
|
|
||||||
`- 既存IDは再利用しない: ${existingIds.join(', ') || '(なし)'}`,
|
|
||||||
`- 追加できる最大数: ${maxAdditionalParts}`,
|
|
||||||
].join('\n');
|
|
||||||
}
|
|
||||||
|
|
||||||
return [
|
|
||||||
'Review completed part results and decide whether additional parts are needed.',
|
|
||||||
'- Do not use any tool',
|
|
||||||
'',
|
|
||||||
'## Original Task',
|
|
||||||
originalInstruction,
|
|
||||||
'',
|
|
||||||
'## Completed Parts',
|
|
||||||
resultBlock || '(none)',
|
|
||||||
'',
|
|
||||||
'## Decision Rules',
|
|
||||||
'- Set done=true when no additional work is required',
|
|
||||||
'- If more work is needed, provide new parts in "parts"',
|
|
||||||
'- If multiple missing tasks are known, return multiple new parts in one batch when possible',
|
|
||||||
`- Do not reuse existing IDs: ${existingIds.join(', ') || '(none)'}`,
|
|
||||||
`- Maximum additional parts: ${maxAdditionalParts}`,
|
|
||||||
].join('\n');
|
|
||||||
}
|
|
||||||
|
|
||||||
export async function executeAgent(
|
export async function executeAgent(
|
||||||
persona: string | undefined,
|
persona: string | undefined,
|
||||||
@ -175,175 +22,6 @@ export async function executeAgent(
|
|||||||
): Promise<AgentResponse> {
|
): Promise<AgentResponse> {
|
||||||
return runAgent(persona, instruction, options);
|
return runAgent(persona, instruction, options);
|
||||||
}
|
}
|
||||||
|
|
||||||
export const generateReport = executeAgent;
|
export const generateReport = executeAgent;
|
||||||
export const executePart = executeAgent;
|
export const executePart = executeAgent;
|
||||||
|
|
||||||
export async function evaluateCondition(
|
|
||||||
agentOutput: string,
|
|
||||||
conditions: Array<{ index: number; text: string }>,
|
|
||||||
options: EvaluateConditionOptions,
|
|
||||||
): Promise<number> {
|
|
||||||
const prompt = buildJudgePrompt(agentOutput, conditions);
|
|
||||||
const response = await runAgent(undefined, prompt, {
|
|
||||||
cwd: options.cwd,
|
|
||||||
maxTurns: 1,
|
|
||||||
permissionMode: 'readonly',
|
|
||||||
outputSchema: loadEvaluationSchema(),
|
|
||||||
});
|
|
||||||
|
|
||||||
if (response.status !== 'done') {
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
|
|
||||||
const matchedIndex = response.structuredOutput?.matched_index;
|
|
||||||
if (typeof matchedIndex === 'number' && Number.isInteger(matchedIndex)) {
|
|
||||||
const zeroBased = matchedIndex - 1;
|
|
||||||
if (zeroBased >= 0 && zeroBased < conditions.length) {
|
|
||||||
return zeroBased;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return detectJudgeIndex(response.content);
|
|
||||||
}
|
|
||||||
|
|
||||||
export async function judgeStatus(
|
|
||||||
structuredInstruction: string,
|
|
||||||
tagInstruction: string,
|
|
||||||
rules: PieceRule[],
|
|
||||||
options: JudgeStatusOptions,
|
|
||||||
): Promise<JudgeStatusResult> {
|
|
||||||
if (rules.length === 0) {
|
|
||||||
throw new Error('judgeStatus requires at least one rule');
|
|
||||||
}
|
|
||||||
|
|
||||||
if (rules.length === 1) {
|
|
||||||
return { ruleIndex: 0, method: 'auto_select' };
|
|
||||||
}
|
|
||||||
|
|
||||||
const interactiveEnabled = options.interactive === true;
|
|
||||||
|
|
||||||
const isValidRuleIndex = (index: number): boolean => {
|
|
||||||
if (index < 0 || index >= rules.length) return false;
|
|
||||||
const rule = rules[index];
|
|
||||||
return !(rule?.interactiveOnly && !interactiveEnabled);
|
|
||||||
};
|
|
||||||
|
|
||||||
const agentOptions = {
|
|
||||||
cwd: options.cwd,
|
|
||||||
maxTurns: 3,
|
|
||||||
permissionMode: 'readonly' as const,
|
|
||||||
language: options.language,
|
|
||||||
onStream: options.onStream,
|
|
||||||
};
|
|
||||||
|
|
||||||
// Stage 1: Structured output
|
|
||||||
const structuredResponse = await runAgent('conductor', structuredInstruction, {
|
|
||||||
...agentOptions,
|
|
||||||
outputSchema: loadJudgmentSchema(),
|
|
||||||
});
|
|
||||||
|
|
||||||
if (structuredResponse.status === 'done') {
|
|
||||||
const stepNumber = structuredResponse.structuredOutput?.step;
|
|
||||||
if (typeof stepNumber === 'number' && Number.isInteger(stepNumber)) {
|
|
||||||
const ruleIndex = stepNumber - 1;
|
|
||||||
if (isValidRuleIndex(ruleIndex)) {
|
|
||||||
return { ruleIndex, method: 'structured_output' };
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Stage 2: Tag detection (dedicated call, no outputSchema)
|
|
||||||
const tagResponse = await runAgent('conductor', tagInstruction, agentOptions);
|
|
||||||
|
|
||||||
if (tagResponse.status === 'done') {
|
|
||||||
const tagRuleIndex = detectRuleIndex(tagResponse.content, options.movementName);
|
|
||||||
if (isValidRuleIndex(tagRuleIndex)) {
|
|
||||||
return { ruleIndex: tagRuleIndex, method: 'phase3_tag' };
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Stage 3: AI judge
|
|
||||||
const conditions = rules
|
|
||||||
.map((rule, index) => ({ rule, index }))
|
|
||||||
.filter(({ rule }) => interactiveEnabled || !rule.interactiveOnly)
|
|
||||||
.map(({ index, rule }) => ({ index, text: rule.condition }));
|
|
||||||
|
|
||||||
if (conditions.length > 0) {
|
|
||||||
const fallbackIndex = await evaluateCondition(structuredInstruction, conditions, { cwd: options.cwd });
|
|
||||||
if (fallbackIndex >= 0 && fallbackIndex < conditions.length) {
|
|
||||||
const originalIndex = conditions[fallbackIndex]?.index;
|
|
||||||
if (originalIndex !== undefined) {
|
|
||||||
return { ruleIndex: originalIndex, method: 'ai_judge' };
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
throw new Error(`Status not found for movement "${options.movementName}"`);
|
|
||||||
}
|
|
||||||
|
|
||||||
export async function decomposeTask(
|
|
||||||
instruction: string,
|
|
||||||
maxParts: number,
|
|
||||||
options: DecomposeTaskOptions,
|
|
||||||
): Promise<PartDefinition[]> {
|
|
||||||
const response = await runAgent(options.persona, buildDecomposePrompt(instruction, maxParts, options.language), {
|
|
||||||
cwd: options.cwd,
|
|
||||||
personaPath: options.personaPath,
|
|
||||||
language: options.language,
|
|
||||||
model: options.model,
|
|
||||||
provider: options.provider,
|
|
||||||
allowedTools: [],
|
|
||||||
permissionMode: 'readonly',
|
|
||||||
maxTurns: 4,
|
|
||||||
outputSchema: loadDecompositionSchema(maxParts),
|
|
||||||
onStream: options.onStream,
|
|
||||||
});
|
|
||||||
|
|
||||||
if (response.status !== 'done') {
|
|
||||||
const detail = response.error || response.content || response.status;
|
|
||||||
throw new Error(`Team leader failed: ${detail}`);
|
|
||||||
}
|
|
||||||
|
|
||||||
const parts = response.structuredOutput?.parts;
|
|
||||||
if (parts != null) {
|
|
||||||
return toPartDefinitions(parts, maxParts);
|
|
||||||
}
|
|
||||||
|
|
||||||
return parseParts(response.content, maxParts);
|
|
||||||
}
|
|
||||||
|
|
||||||
export async function requestMoreParts(
|
|
||||||
originalInstruction: string,
|
|
||||||
allResults: Array<{ id: string; title: string; status: string; content: string }>,
|
|
||||||
existingIds: string[],
|
|
||||||
maxAdditionalParts: number,
|
|
||||||
options: DecomposeTaskOptions,
|
|
||||||
): Promise<MorePartsResponse> {
|
|
||||||
const prompt = buildMorePartsPrompt(
|
|
||||||
originalInstruction,
|
|
||||||
allResults,
|
|
||||||
existingIds,
|
|
||||||
maxAdditionalParts,
|
|
||||||
options.language,
|
|
||||||
);
|
|
||||||
|
|
||||||
const response = await runAgent(options.persona, prompt, {
|
|
||||||
cwd: options.cwd,
|
|
||||||
personaPath: options.personaPath,
|
|
||||||
language: options.language,
|
|
||||||
model: options.model,
|
|
||||||
provider: options.provider,
|
|
||||||
allowedTools: [],
|
|
||||||
permissionMode: 'readonly',
|
|
||||||
maxTurns: 4,
|
|
||||||
outputSchema: loadMorePartsSchema(maxAdditionalParts),
|
|
||||||
onStream: options.onStream,
|
|
||||||
});
|
|
||||||
|
|
||||||
if (response.status !== 'done') {
|
|
||||||
const detail = response.error || response.content || response.status;
|
|
||||||
throw new Error(`Team leader feedback failed: ${detail}`);
|
|
||||||
}
|
|
||||||
|
|
||||||
return toMorePartsResponse(response.structuredOutput, maxAdditionalParts);
|
|
||||||
}
|
|
||||||
|
|||||||
222
src/agents/decompose-task-usecase.ts
Normal file
222
src/agents/decompose-task-usecase.ts
Normal file
@ -0,0 +1,222 @@
|
|||||||
|
import type { Language, PartDefinition } from '../core/models/types.js';
|
||||||
|
import { runAgent, type StreamCallback } from './runner.js';
|
||||||
|
import { parseParts } from '../core/piece/engine/task-decomposer.js';
|
||||||
|
import { loadDecompositionSchema, loadMorePartsSchema } from '../infra/resources/schema-loader.js';
|
||||||
|
import { ensureUniquePartIds, parsePartDefinitionEntry } from '../core/piece/part-definition-validator.js';
|
||||||
|
|
||||||
|
export interface DecomposeTaskOptions {
|
||||||
|
cwd: string;
|
||||||
|
persona?: string;
|
||||||
|
personaPath?: string;
|
||||||
|
language?: Language;
|
||||||
|
model?: string;
|
||||||
|
provider?: 'claude' | 'codex' | 'opencode' | 'cursor' | 'copilot' | 'mock';
|
||||||
|
onStream?: StreamCallback;
|
||||||
|
onPromptResolved?: (promptParts: {
|
||||||
|
systemPrompt: string;
|
||||||
|
userInstruction: string;
|
||||||
|
}) => void;
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface MorePartsResponse {
|
||||||
|
done: boolean;
|
||||||
|
reasoning: string;
|
||||||
|
parts: PartDefinition[];
|
||||||
|
}
|
||||||
|
|
||||||
|
function toPartDefinitions(raw: unknown, maxParts: number): PartDefinition[] {
|
||||||
|
if (!Array.isArray(raw)) {
|
||||||
|
throw new Error('Structured output "parts" must be an array');
|
||||||
|
}
|
||||||
|
if (raw.length === 0) {
|
||||||
|
throw new Error('Structured output "parts" must not be empty');
|
||||||
|
}
|
||||||
|
if (raw.length > maxParts) {
|
||||||
|
throw new Error(`Structured output produced too many parts: ${raw.length} > ${maxParts}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
const parts: PartDefinition[] = raw.map((entry, index) => parsePartDefinitionEntry(entry, index));
|
||||||
|
ensureUniquePartIds(parts);
|
||||||
|
|
||||||
|
return parts;
|
||||||
|
}
|
||||||
|
|
||||||
|
function toMorePartsResponse(raw: unknown, maxAdditionalParts: number): MorePartsResponse {
|
||||||
|
if (typeof raw !== 'object' || raw == null || Array.isArray(raw)) {
|
||||||
|
throw new Error('Structured output must be an object');
|
||||||
|
}
|
||||||
|
|
||||||
|
const payload = raw as Record<string, unknown>;
|
||||||
|
if (typeof payload.done !== 'boolean') {
|
||||||
|
throw new Error('Structured output "done" must be a boolean');
|
||||||
|
}
|
||||||
|
if (typeof payload.reasoning !== 'string') {
|
||||||
|
throw new Error('Structured output "reasoning" must be a string');
|
||||||
|
}
|
||||||
|
if (!Array.isArray(payload.parts)) {
|
||||||
|
throw new Error('Structured output "parts" must be an array');
|
||||||
|
}
|
||||||
|
if (payload.parts.length > maxAdditionalParts) {
|
||||||
|
throw new Error(`Structured output produced too many parts: ${payload.parts.length} > ${maxAdditionalParts}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
const parts: PartDefinition[] = payload.parts.map((entry, index) => parsePartDefinitionEntry(entry, index));
|
||||||
|
ensureUniquePartIds(parts);
|
||||||
|
|
||||||
|
return {
|
||||||
|
done: payload.done,
|
||||||
|
reasoning: payload.reasoning,
|
||||||
|
parts,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
function summarizePartContent(content: string): string {
|
||||||
|
const maxLength = 2000;
|
||||||
|
if (content.length <= maxLength) {
|
||||||
|
return content;
|
||||||
|
}
|
||||||
|
return `${content.slice(0, maxLength)}\n...[truncated]`;
|
||||||
|
}
|
||||||
|
|
||||||
|
function buildDecomposePrompt(instruction: string, maxParts: number, language?: Language): string {
|
||||||
|
if (language === 'ja') {
|
||||||
|
return [
|
||||||
|
'以下はタスク分解専用の指示です。タスクを実行せず、分解だけを行ってください。',
|
||||||
|
'- ツールは使用しない',
|
||||||
|
`- パート数は 1 以上 ${maxParts} 以下`,
|
||||||
|
'- パートは互いに独立させる',
|
||||||
|
'',
|
||||||
|
'## 元タスク',
|
||||||
|
instruction,
|
||||||
|
].join('\n');
|
||||||
|
}
|
||||||
|
|
||||||
|
return [
|
||||||
|
'This is decomposition-only planning. Do not execute the task.',
|
||||||
|
'- Do not use any tool',
|
||||||
|
`- Produce between 1 and ${maxParts} independent parts`,
|
||||||
|
'- Keep each part self-contained',
|
||||||
|
'',
|
||||||
|
'## Original Task',
|
||||||
|
instruction,
|
||||||
|
].join('\n');
|
||||||
|
}
|
||||||
|
|
||||||
|
function buildMorePartsPrompt(
|
||||||
|
originalInstruction: string,
|
||||||
|
allResults: Array<{ id: string; title: string; status: string; content: string }>,
|
||||||
|
existingIds: string[],
|
||||||
|
maxAdditionalParts: number,
|
||||||
|
language?: Language,
|
||||||
|
): string {
|
||||||
|
const resultBlock = allResults.map((result) => [
|
||||||
|
`### ${result.id}: ${result.title} (${result.status})`,
|
||||||
|
summarizePartContent(result.content),
|
||||||
|
].join('\n')).join('\n\n');
|
||||||
|
|
||||||
|
if (language === 'ja') {
|
||||||
|
return [
|
||||||
|
'以下の実行結果を見て、追加のサブタスクが必要か判断してください。',
|
||||||
|
'- ツールは使用しない',
|
||||||
|
'',
|
||||||
|
'## 元タスク',
|
||||||
|
originalInstruction,
|
||||||
|
'',
|
||||||
|
'## 完了済みパート',
|
||||||
|
resultBlock || '(なし)',
|
||||||
|
'',
|
||||||
|
'## 判断ルール',
|
||||||
|
'- 追加作業が不要なら done=true にする',
|
||||||
|
'- 追加作業が必要なら parts に新しいパートを入れる',
|
||||||
|
'- 不足が複数ある場合は、可能な限り一括で複数パートを返す',
|
||||||
|
`- 既存IDは再利用しない: ${existingIds.join(', ') || '(なし)'}`,
|
||||||
|
`- 追加できる最大数: ${maxAdditionalParts}`,
|
||||||
|
].join('\n');
|
||||||
|
}
|
||||||
|
|
||||||
|
return [
|
||||||
|
'Review completed part results and decide whether additional parts are needed.',
|
||||||
|
'- Do not use any tool',
|
||||||
|
'',
|
||||||
|
'## Original Task',
|
||||||
|
originalInstruction,
|
||||||
|
'',
|
||||||
|
'## Completed Parts',
|
||||||
|
resultBlock || '(none)',
|
||||||
|
'',
|
||||||
|
'## Decision Rules',
|
||||||
|
'- Set done=true when no additional work is required',
|
||||||
|
'- If more work is needed, provide new parts in "parts"',
|
||||||
|
'- If multiple missing tasks are known, return multiple new parts in one batch when possible',
|
||||||
|
`- Do not reuse existing IDs: ${existingIds.join(', ') || '(none)'}`,
|
||||||
|
`- Maximum additional parts: ${maxAdditionalParts}`,
|
||||||
|
].join('\n');
|
||||||
|
}
|
||||||
|
|
||||||
|
export async function decomposeTask(
|
||||||
|
instruction: string,
|
||||||
|
maxParts: number,
|
||||||
|
options: DecomposeTaskOptions,
|
||||||
|
): Promise<PartDefinition[]> {
|
||||||
|
const response = await runAgent(options.persona, buildDecomposePrompt(instruction, maxParts, options.language), {
|
||||||
|
cwd: options.cwd,
|
||||||
|
personaPath: options.personaPath,
|
||||||
|
language: options.language,
|
||||||
|
model: options.model,
|
||||||
|
provider: options.provider,
|
||||||
|
allowedTools: [],
|
||||||
|
permissionMode: 'readonly',
|
||||||
|
maxTurns: 4,
|
||||||
|
outputSchema: loadDecompositionSchema(maxParts),
|
||||||
|
onStream: options.onStream,
|
||||||
|
onPromptResolved: options.onPromptResolved,
|
||||||
|
});
|
||||||
|
|
||||||
|
if (response.status !== 'done') {
|
||||||
|
const detail = response.error || response.content || response.status;
|
||||||
|
throw new Error(`Team leader failed: ${detail}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
const parts = response.structuredOutput?.parts;
|
||||||
|
if (parts != null) {
|
||||||
|
return toPartDefinitions(parts, maxParts);
|
||||||
|
}
|
||||||
|
|
||||||
|
return parseParts(response.content, maxParts);
|
||||||
|
}
|
||||||
|
|
||||||
|
export async function requestMoreParts(
|
||||||
|
originalInstruction: string,
|
||||||
|
allResults: Array<{ id: string; title: string; status: string; content: string }>,
|
||||||
|
existingIds: string[],
|
||||||
|
maxAdditionalParts: number,
|
||||||
|
options: DecomposeTaskOptions,
|
||||||
|
): Promise<MorePartsResponse> {
|
||||||
|
const prompt = buildMorePartsPrompt(
|
||||||
|
originalInstruction,
|
||||||
|
allResults,
|
||||||
|
existingIds,
|
||||||
|
maxAdditionalParts,
|
||||||
|
options.language,
|
||||||
|
);
|
||||||
|
|
||||||
|
const response = await runAgent(options.persona, prompt, {
|
||||||
|
cwd: options.cwd,
|
||||||
|
personaPath: options.personaPath,
|
||||||
|
language: options.language,
|
||||||
|
model: options.model,
|
||||||
|
provider: options.provider,
|
||||||
|
allowedTools: [],
|
||||||
|
permissionMode: 'readonly',
|
||||||
|
maxTurns: 4,
|
||||||
|
outputSchema: loadMorePartsSchema(maxAdditionalParts),
|
||||||
|
onStream: options.onStream,
|
||||||
|
});
|
||||||
|
|
||||||
|
if (response.status !== 'done') {
|
||||||
|
const detail = response.error || response.content || response.status;
|
||||||
|
throw new Error(`Team leader feedback failed: ${detail}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
return toMorePartsResponse(response.structuredOutput, maxAdditionalParts);
|
||||||
|
}
|
||||||
184
src/agents/judge-status-usecase.ts
Normal file
184
src/agents/judge-status-usecase.ts
Normal file
@ -0,0 +1,184 @@
|
|||||||
|
import type { PieceRule, RuleMatchMethod, Language } from '../core/models/types.js';
|
||||||
|
import { runAgent, type StreamCallback } from './runner.js';
|
||||||
|
import { detectJudgeIndex, buildJudgePrompt } from './judge-utils.js';
|
||||||
|
import { loadJudgmentSchema, loadEvaluationSchema } from '../infra/resources/schema-loader.js';
|
||||||
|
import { detectRuleIndex } from '../shared/utils/ruleIndex.js';
|
||||||
|
|
||||||
|
export interface JudgeStatusOptions {
|
||||||
|
cwd: string;
|
||||||
|
movementName: string;
|
||||||
|
language?: Language;
|
||||||
|
interactive?: boolean;
|
||||||
|
onStream?: StreamCallback;
|
||||||
|
onJudgeStage?: (entry: {
|
||||||
|
stage: 1 | 2 | 3;
|
||||||
|
method: 'structured_output' | 'phase3_tag' | 'ai_judge';
|
||||||
|
status: 'done' | 'error' | 'skipped';
|
||||||
|
instruction: string;
|
||||||
|
response: string;
|
||||||
|
}) => void;
|
||||||
|
onStructuredPromptResolved?: (promptParts: {
|
||||||
|
systemPrompt: string;
|
||||||
|
userInstruction: string;
|
||||||
|
}) => void;
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface JudgeStatusResult {
|
||||||
|
ruleIndex: number;
|
||||||
|
method: RuleMatchMethod;
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface EvaluateConditionOptions {
|
||||||
|
cwd: string;
|
||||||
|
onJudgeResponse?: (entry: {
|
||||||
|
instruction: string;
|
||||||
|
status: 'done' | 'error';
|
||||||
|
response: string;
|
||||||
|
}) => void;
|
||||||
|
}
|
||||||
|
|
||||||
|
export async function evaluateCondition(
|
||||||
|
agentOutput: string,
|
||||||
|
conditions: Array<{ index: number; text: string }>,
|
||||||
|
options: EvaluateConditionOptions,
|
||||||
|
): Promise<number> {
|
||||||
|
const prompt = buildJudgePrompt(agentOutput, conditions);
|
||||||
|
const response = await runAgent(undefined, prompt, {
|
||||||
|
cwd: options.cwd,
|
||||||
|
maxTurns: 1,
|
||||||
|
permissionMode: 'readonly',
|
||||||
|
outputSchema: loadEvaluationSchema(),
|
||||||
|
});
|
||||||
|
|
||||||
|
options.onJudgeResponse?.({
|
||||||
|
instruction: prompt,
|
||||||
|
status: response.status === 'done' ? 'done' : 'error',
|
||||||
|
response: response.content,
|
||||||
|
});
|
||||||
|
|
||||||
|
if (response.status !== 'done') {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
const matchedIndex = response.structuredOutput?.matched_index;
|
||||||
|
if (typeof matchedIndex === 'number' && Number.isInteger(matchedIndex)) {
|
||||||
|
const zeroBased = matchedIndex - 1;
|
||||||
|
if (zeroBased >= 0 && zeroBased < conditions.length) {
|
||||||
|
return zeroBased;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return detectJudgeIndex(response.content);
|
||||||
|
}
|
||||||
|
|
||||||
|
export async function judgeStatus(
|
||||||
|
structuredInstruction: string,
|
||||||
|
tagInstruction: string,
|
||||||
|
rules: PieceRule[],
|
||||||
|
options: JudgeStatusOptions,
|
||||||
|
): Promise<JudgeStatusResult> {
|
||||||
|
if (rules.length === 0) {
|
||||||
|
throw new Error('judgeStatus requires at least one rule');
|
||||||
|
}
|
||||||
|
|
||||||
|
if (rules.length === 1) {
|
||||||
|
return { ruleIndex: 0, method: 'auto_select' };
|
||||||
|
}
|
||||||
|
|
||||||
|
const interactiveEnabled = options.interactive === true;
|
||||||
|
|
||||||
|
const isValidRuleIndex = (index: number): boolean => {
|
||||||
|
if (index < 0 || index >= rules.length) return false;
|
||||||
|
const rule = rules[index];
|
||||||
|
return !(rule?.interactiveOnly && !interactiveEnabled);
|
||||||
|
};
|
||||||
|
|
||||||
|
const agentOptions = {
|
||||||
|
cwd: options.cwd,
|
||||||
|
maxTurns: 3,
|
||||||
|
permissionMode: 'readonly' as const,
|
||||||
|
language: options.language,
|
||||||
|
onStream: options.onStream,
|
||||||
|
};
|
||||||
|
|
||||||
|
const structuredResponse = await runAgent('conductor', structuredInstruction, {
|
||||||
|
...agentOptions,
|
||||||
|
outputSchema: loadJudgmentSchema(),
|
||||||
|
onPromptResolved: options.onStructuredPromptResolved,
|
||||||
|
});
|
||||||
|
|
||||||
|
options.onJudgeStage?.({
|
||||||
|
stage: 1,
|
||||||
|
method: 'structured_output',
|
||||||
|
status: structuredResponse.status === 'done' ? 'done' : 'error',
|
||||||
|
instruction: structuredInstruction,
|
||||||
|
response: structuredResponse.content,
|
||||||
|
});
|
||||||
|
|
||||||
|
if (structuredResponse.status === 'done') {
|
||||||
|
const stepNumber = structuredResponse.structuredOutput?.step;
|
||||||
|
if (typeof stepNumber === 'number' && Number.isInteger(stepNumber)) {
|
||||||
|
const ruleIndex = stepNumber - 1;
|
||||||
|
if (isValidRuleIndex(ruleIndex)) {
|
||||||
|
return { ruleIndex, method: 'structured_output' };
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const tagResponse = await runAgent('conductor', tagInstruction, agentOptions);
|
||||||
|
|
||||||
|
options.onJudgeStage?.({
|
||||||
|
stage: 2,
|
||||||
|
method: 'phase3_tag',
|
||||||
|
status: tagResponse.status === 'done' ? 'done' : 'error',
|
||||||
|
instruction: tagInstruction,
|
||||||
|
response: tagResponse.content,
|
||||||
|
});
|
||||||
|
|
||||||
|
if (tagResponse.status === 'done') {
|
||||||
|
const tagRuleIndex = detectRuleIndex(tagResponse.content, options.movementName);
|
||||||
|
if (isValidRuleIndex(tagRuleIndex)) {
|
||||||
|
return { ruleIndex: tagRuleIndex, method: 'phase3_tag' };
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const conditions = rules
|
||||||
|
.map((rule, index) => ({ rule, index }))
|
||||||
|
.filter(({ rule }) => interactiveEnabled || !rule.interactiveOnly)
|
||||||
|
.map(({ index, rule }) => ({ index, text: rule.condition }));
|
||||||
|
|
||||||
|
if (conditions.length > 0) {
|
||||||
|
let stage3Status: 'done' | 'error' | 'skipped' = 'skipped';
|
||||||
|
let stage3Instruction = '';
|
||||||
|
let stage3Response = '';
|
||||||
|
const fallbackIndex = await evaluateCondition(structuredInstruction, conditions, {
|
||||||
|
cwd: options.cwd,
|
||||||
|
onJudgeResponse: (entry) => {
|
||||||
|
stage3Status = entry.status;
|
||||||
|
stage3Instruction = entry.instruction;
|
||||||
|
stage3Response = entry.response;
|
||||||
|
},
|
||||||
|
});
|
||||||
|
|
||||||
|
if (stage3Status === 'skipped' || stage3Instruction === '') {
|
||||||
|
throw new Error(`AI judge response missing for movement "${options.movementName}"`);
|
||||||
|
}
|
||||||
|
|
||||||
|
options.onJudgeStage?.({
|
||||||
|
stage: 3,
|
||||||
|
method: 'ai_judge',
|
||||||
|
status: stage3Status,
|
||||||
|
instruction: stage3Instruction,
|
||||||
|
response: stage3Response,
|
||||||
|
});
|
||||||
|
|
||||||
|
if (fallbackIndex >= 0 && fallbackIndex < conditions.length) {
|
||||||
|
const originalIndex = conditions[fallbackIndex]?.index;
|
||||||
|
if (originalIndex !== undefined) {
|
||||||
|
return { ruleIndex: originalIndex, method: 'ai_judge' };
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
throw new Error(`Status not found for movement "${options.movementName}"`);
|
||||||
|
}
|
||||||
@ -158,11 +158,18 @@ export class AgentRunner {
|
|||||||
const providerType = resolved.provider;
|
const providerType = resolved.provider;
|
||||||
const provider = getProvider(providerType);
|
const provider = getProvider(providerType);
|
||||||
|
|
||||||
|
const resolvedSystemPrompt = agentConfig.claudeAgent || agentConfig.claudeSkill
|
||||||
|
? undefined
|
||||||
|
: loadAgentPrompt(agentConfig, options.cwd);
|
||||||
|
|
||||||
|
options.onPromptResolved?.({
|
||||||
|
systemPrompt: resolvedSystemPrompt ?? '',
|
||||||
|
userInstruction: task,
|
||||||
|
});
|
||||||
|
|
||||||
const agent = provider.setup({
|
const agent = provider.setup({
|
||||||
name: agentConfig.name,
|
name: agentConfig.name,
|
||||||
systemPrompt: agentConfig.claudeAgent || agentConfig.claudeSkill
|
systemPrompt: resolvedSystemPrompt,
|
||||||
? undefined
|
|
||||||
: loadAgentPrompt(agentConfig, options.cwd),
|
|
||||||
claudeAgent: agentConfig.claudeAgent,
|
claudeAgent: agentConfig.claudeAgent,
|
||||||
claudeSkill: agentConfig.claudeSkill,
|
claudeSkill: agentConfig.claudeSkill,
|
||||||
});
|
});
|
||||||
@ -223,6 +230,10 @@ export class AgentRunner {
|
|||||||
}
|
}
|
||||||
|
|
||||||
const systemPrompt = loadTemplate('perform_agent_system_prompt', language, templateVars);
|
const systemPrompt = loadTemplate('perform_agent_system_prompt', language, templateVars);
|
||||||
|
options.onPromptResolved?.({
|
||||||
|
systemPrompt,
|
||||||
|
userInstruction: task,
|
||||||
|
});
|
||||||
const agent = provider.setup({ name: personaName, systemPrompt });
|
const agent = provider.setup({ name: personaName, systemPrompt });
|
||||||
return agent.call(task, callOptions);
|
return agent.call(task, callOptions);
|
||||||
}
|
}
|
||||||
@ -236,11 +247,19 @@ export class AgentRunner {
|
|||||||
return this.runCustom(agentConfig, task, options);
|
return this.runCustom(agentConfig, task, options);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
options.onPromptResolved?.({
|
||||||
|
systemPrompt: personaSpec,
|
||||||
|
userInstruction: task,
|
||||||
|
});
|
||||||
const agent = provider.setup({ name: personaName, systemPrompt: personaSpec });
|
const agent = provider.setup({ name: personaName, systemPrompt: personaSpec });
|
||||||
return agent.call(task, callOptions);
|
return agent.call(task, callOptions);
|
||||||
}
|
}
|
||||||
|
|
||||||
// 3. No persona specified — run with instruction_template only (no system prompt)
|
// 3. No persona specified — run with instruction_template only (no system prompt)
|
||||||
|
options.onPromptResolved?.({
|
||||||
|
systemPrompt: '',
|
||||||
|
userInstruction: task,
|
||||||
|
});
|
||||||
const agent = provider.setup({ name: personaName });
|
const agent = provider.setup({ name: personaName });
|
||||||
return agent.call(task, callOptions);
|
return agent.call(task, callOptions);
|
||||||
}
|
}
|
||||||
|
|||||||
@ -46,4 +46,8 @@ export interface RunAgentOptions {
|
|||||||
currentPosition: string;
|
currentPosition: string;
|
||||||
};
|
};
|
||||||
outputSchema?: Record<string, unknown>;
|
outputSchema?: Record<string, unknown>;
|
||||||
|
onPromptResolved?: (promptParts: {
|
||||||
|
systemPrompt: string;
|
||||||
|
userInstruction: string;
|
||||||
|
}) => void;
|
||||||
}
|
}
|
||||||
|
|||||||
@ -22,7 +22,7 @@ import { incrementMovementIteration } from './state-manager.js';
|
|||||||
import { createLogger } from '../../../shared/utils/index.js';
|
import { createLogger } from '../../../shared/utils/index.js';
|
||||||
import type { OptionsBuilder } from './OptionsBuilder.js';
|
import type { OptionsBuilder } from './OptionsBuilder.js';
|
||||||
import type { MovementExecutor } from './MovementExecutor.js';
|
import type { MovementExecutor } from './MovementExecutor.js';
|
||||||
import type { PhaseName } from '../types.js';
|
import type { PhaseName, PhasePromptParts } from '../types.js';
|
||||||
|
|
||||||
const log = createLogger('arpeggio-runner');
|
const log = createLogger('arpeggio-runner');
|
||||||
|
|
||||||
@ -37,8 +37,25 @@ export interface ArpeggioRunnerDeps {
|
|||||||
conditions: Array<{ index: number; text: string }>,
|
conditions: Array<{ index: number; text: string }>,
|
||||||
options: { cwd: string }
|
options: { cwd: string }
|
||||||
) => Promise<number>;
|
) => Promise<number>;
|
||||||
readonly onPhaseStart?: (step: PieceMovement, phase: 1 | 2 | 3, phaseName: PhaseName, instruction: string) => void;
|
readonly onPhaseStart?: (
|
||||||
readonly onPhaseComplete?: (step: PieceMovement, phase: 1 | 2 | 3, phaseName: PhaseName, content: string, status: string, error?: string) => void;
|
step: PieceMovement,
|
||||||
|
phase: 1 | 2 | 3,
|
||||||
|
phaseName: PhaseName,
|
||||||
|
instruction: string,
|
||||||
|
promptParts: PhasePromptParts,
|
||||||
|
phaseExecutionId?: string,
|
||||||
|
iteration?: number,
|
||||||
|
) => void;
|
||||||
|
readonly onPhaseComplete?: (
|
||||||
|
step: PieceMovement,
|
||||||
|
phase: 1 | 2 | 3,
|
||||||
|
phaseName: PhaseName,
|
||||||
|
content: string,
|
||||||
|
status: string,
|
||||||
|
error?: string,
|
||||||
|
phaseExecutionId?: string,
|
||||||
|
iteration?: number,
|
||||||
|
) => void;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -185,6 +202,8 @@ export class ArpeggioRunner {
|
|||||||
batches,
|
batches,
|
||||||
template,
|
template,
|
||||||
step,
|
step,
|
||||||
|
movementIteration,
|
||||||
|
state.iteration,
|
||||||
agentOptions,
|
agentOptions,
|
||||||
arpeggioConfig,
|
arpeggioConfig,
|
||||||
semaphore,
|
semaphore,
|
||||||
@ -244,6 +263,8 @@ export class ArpeggioRunner {
|
|||||||
batches: readonly DataBatch[],
|
batches: readonly DataBatch[],
|
||||||
template: string,
|
template: string,
|
||||||
step: PieceMovement,
|
step: PieceMovement,
|
||||||
|
movementIteration: number,
|
||||||
|
iteration: number,
|
||||||
agentOptions: RunAgentOptions,
|
agentOptions: RunAgentOptions,
|
||||||
config: ArpeggioMovementConfig,
|
config: ArpeggioMovementConfig,
|
||||||
semaphore: Semaphore,
|
semaphore: Semaphore,
|
||||||
@ -251,20 +272,34 @@ export class ArpeggioRunner {
|
|||||||
const promises = batches.map(async (batch) => {
|
const promises = batches.map(async (batch) => {
|
||||||
await semaphore.acquire();
|
await semaphore.acquire();
|
||||||
try {
|
try {
|
||||||
this.deps.onPhaseStart?.(step, 1, 'execute', `[Arpeggio batch ${batch.batchIndex + 1}/${batch.totalBatches}]`);
|
let didEmitPhaseStart = false;
|
||||||
|
const phaseExecutionId = `${step.name}:1:${movementIteration}:${batch.batchIndex}`;
|
||||||
|
const batchAgentOptions: RunAgentOptions = {
|
||||||
|
...agentOptions,
|
||||||
|
onPromptResolved: (promptParts) => {
|
||||||
|
if (didEmitPhaseStart) return;
|
||||||
|
this.deps.onPhaseStart?.(step, 1, 'execute', promptParts.userInstruction, promptParts, phaseExecutionId, iteration);
|
||||||
|
didEmitPhaseStart = true;
|
||||||
|
},
|
||||||
|
};
|
||||||
const result = await executeBatchWithRetry(
|
const result = await executeBatchWithRetry(
|
||||||
batch,
|
batch,
|
||||||
template,
|
template,
|
||||||
step.persona,
|
step.persona,
|
||||||
agentOptions,
|
batchAgentOptions,
|
||||||
config.maxRetries,
|
config.maxRetries,
|
||||||
config.retryDelayMs,
|
config.retryDelayMs,
|
||||||
);
|
);
|
||||||
|
if (!didEmitPhaseStart) {
|
||||||
|
throw new Error(`Missing prompt parts for phase start: ${step.name}:1`);
|
||||||
|
}
|
||||||
this.deps.onPhaseComplete?.(
|
this.deps.onPhaseComplete?.(
|
||||||
step, 1, 'execute',
|
step, 1, 'execute',
|
||||||
result.content,
|
result.content,
|
||||||
result.success ? 'done' : 'error',
|
result.success ? 'done' : 'error',
|
||||||
result.error,
|
result.error,
|
||||||
|
phaseExecutionId,
|
||||||
|
iteration,
|
||||||
);
|
);
|
||||||
return result;
|
return result;
|
||||||
} finally {
|
} finally {
|
||||||
|
|||||||
@ -14,7 +14,7 @@ import type {
|
|||||||
AgentResponse,
|
AgentResponse,
|
||||||
Language,
|
Language,
|
||||||
} from '../../models/types.js';
|
} from '../../models/types.js';
|
||||||
import type { PhaseName } from '../types.js';
|
import type { PhaseName, PhasePromptParts, JudgeStageEntry } from '../types.js';
|
||||||
import { executeAgent } from '../../../agents/agent-usecases.js';
|
import { executeAgent } from '../../../agents/agent-usecases.js';
|
||||||
import { InstructionBuilder } from '../instruction/InstructionBuilder.js';
|
import { InstructionBuilder } from '../instruction/InstructionBuilder.js';
|
||||||
import { needsStatusJudgmentPhase, runReportPhase, runStatusJudgmentPhase } from '../phase-runner.js';
|
import { needsStatusJudgmentPhase, runReportPhase, runStatusJudgmentPhase } from '../phase-runner.js';
|
||||||
@ -45,8 +45,33 @@ export interface MovementExecutorDeps {
|
|||||||
conditions: Array<{ index: number; text: string }>,
|
conditions: Array<{ index: number; text: string }>,
|
||||||
options: { cwd: string }
|
options: { cwd: string }
|
||||||
) => Promise<number>;
|
) => Promise<number>;
|
||||||
readonly onPhaseStart?: (step: PieceMovement, phase: 1 | 2 | 3, phaseName: PhaseName, instruction: string) => void;
|
readonly onPhaseStart?: (
|
||||||
readonly onPhaseComplete?: (step: PieceMovement, phase: 1 | 2 | 3, phaseName: PhaseName, content: string, status: string, error?: string) => void;
|
step: PieceMovement,
|
||||||
|
phase: 1 | 2 | 3,
|
||||||
|
phaseName: PhaseName,
|
||||||
|
instruction: string,
|
||||||
|
promptParts: PhasePromptParts,
|
||||||
|
phaseExecutionId?: string,
|
||||||
|
iteration?: number,
|
||||||
|
) => void;
|
||||||
|
readonly onPhaseComplete?: (
|
||||||
|
step: PieceMovement,
|
||||||
|
phase: 1 | 2 | 3,
|
||||||
|
phaseName: PhaseName,
|
||||||
|
content: string,
|
||||||
|
status: string,
|
||||||
|
error?: string,
|
||||||
|
phaseExecutionId?: string,
|
||||||
|
iteration?: number,
|
||||||
|
) => void;
|
||||||
|
readonly onJudgeStage?: (
|
||||||
|
step: PieceMovement,
|
||||||
|
phase: 3,
|
||||||
|
phaseName: 'judge',
|
||||||
|
entry: JudgeStageEntry,
|
||||||
|
phaseExecutionId?: string,
|
||||||
|
iteration?: number,
|
||||||
|
) => void;
|
||||||
}
|
}
|
||||||
|
|
||||||
export class MovementExecutor {
|
export class MovementExecutor {
|
||||||
@ -197,6 +222,8 @@ export class MovementExecutor {
|
|||||||
updatePersonaSession,
|
updatePersonaSession,
|
||||||
this.deps.onPhaseStart,
|
this.deps.onPhaseStart,
|
||||||
this.deps.onPhaseComplete,
|
this.deps.onPhaseComplete,
|
||||||
|
this.deps.onJudgeStage,
|
||||||
|
state.iteration,
|
||||||
);
|
);
|
||||||
|
|
||||||
// Phase 2: report output (resume same session, Write only)
|
// Phase 2: report output (resume same session, Write only)
|
||||||
@ -276,11 +303,21 @@ export class MovementExecutor {
|
|||||||
});
|
});
|
||||||
|
|
||||||
// Phase 1: main execution (Write excluded if movement has report)
|
// Phase 1: main execution (Write excluded if movement has report)
|
||||||
this.deps.onPhaseStart?.(step, 1, 'execute', instruction);
|
let didEmitPhaseStart = false;
|
||||||
const agentOptions = this.deps.optionsBuilder.buildAgentOptions(step);
|
const baseAgentOptions = this.deps.optionsBuilder.buildAgentOptions(step);
|
||||||
|
const agentOptions = {
|
||||||
|
...baseAgentOptions,
|
||||||
|
onPromptResolved: (promptParts: PhasePromptParts) => {
|
||||||
|
this.deps.onPhaseStart?.(step, 1, 'execute', instruction, promptParts, undefined, state.iteration);
|
||||||
|
didEmitPhaseStart = true;
|
||||||
|
},
|
||||||
|
};
|
||||||
let response = await executeAgent(step.persona, instruction, agentOptions);
|
let response = await executeAgent(step.persona, instruction, agentOptions);
|
||||||
|
if (!didEmitPhaseStart) {
|
||||||
|
throw new Error(`Missing prompt parts for phase start: ${step.name}:1`);
|
||||||
|
}
|
||||||
updatePersonaSession(sessionKey, response.sessionId);
|
updatePersonaSession(sessionKey, response.sessionId);
|
||||||
this.deps.onPhaseComplete?.(step, 1, 'execute', response.content, response.status, response.error);
|
this.deps.onPhaseComplete?.(step, 1, 'execute', response.content, response.status, response.error, undefined, state.iteration);
|
||||||
|
|
||||||
// Provider failures should abort immediately.
|
// Provider failures should abort immediately.
|
||||||
if (response.status === 'error') {
|
if (response.status === 'error') {
|
||||||
|
|||||||
@ -3,7 +3,7 @@ import type { PieceMovement, PieceState, Language } from '../../models/types.js'
|
|||||||
import type { MovementProviderOptions } from '../../models/piece-types.js';
|
import type { MovementProviderOptions } from '../../models/piece-types.js';
|
||||||
import type { RunAgentOptions } from '../../../agents/runner.js';
|
import type { RunAgentOptions } from '../../../agents/runner.js';
|
||||||
import type { PhaseRunnerContext } from '../phase-runner.js';
|
import type { PhaseRunnerContext } from '../phase-runner.js';
|
||||||
import type { PieceEngineOptions, PhaseName, MovementProviderInfo } from '../types.js';
|
import type { PieceEngineOptions, PhaseName, MovementProviderInfo, PhasePromptParts, JudgeStageEntry } from '../types.js';
|
||||||
import { buildSessionKey } from '../session-key.js';
|
import { buildSessionKey } from '../session-key.js';
|
||||||
import { resolveMovementProviderModel } from '../provider-resolution.js';
|
import { resolveMovementProviderModel } from '../provider-resolution.js';
|
||||||
|
|
||||||
@ -158,8 +158,34 @@ export class OptionsBuilder {
|
|||||||
state: PieceState,
|
state: PieceState,
|
||||||
lastResponse: string | undefined,
|
lastResponse: string | undefined,
|
||||||
updatePersonaSession: (persona: string, sessionId: string | undefined) => void,
|
updatePersonaSession: (persona: string, sessionId: string | undefined) => void,
|
||||||
onPhaseStart?: (step: PieceMovement, phase: 1 | 2 | 3, phaseName: PhaseName, instruction: string) => void,
|
onPhaseStart?: (
|
||||||
onPhaseComplete?: (step: PieceMovement, phase: 1 | 2 | 3, phaseName: PhaseName, content: string, status: string, error?: string) => void,
|
step: PieceMovement,
|
||||||
|
phase: 1 | 2 | 3,
|
||||||
|
phaseName: PhaseName,
|
||||||
|
instruction: string,
|
||||||
|
promptParts: PhasePromptParts,
|
||||||
|
phaseExecutionId?: string,
|
||||||
|
iteration?: number,
|
||||||
|
) => void,
|
||||||
|
onPhaseComplete?: (
|
||||||
|
step: PieceMovement,
|
||||||
|
phase: 1 | 2 | 3,
|
||||||
|
phaseName: PhaseName,
|
||||||
|
content: string,
|
||||||
|
status: string,
|
||||||
|
error?: string,
|
||||||
|
phaseExecutionId?: string,
|
||||||
|
iteration?: number,
|
||||||
|
) => void,
|
||||||
|
onJudgeStage?: (
|
||||||
|
step: PieceMovement,
|
||||||
|
phase: 3,
|
||||||
|
phaseName: 'judge',
|
||||||
|
entry: JudgeStageEntry,
|
||||||
|
phaseExecutionId?: string,
|
||||||
|
iteration?: number,
|
||||||
|
) => void,
|
||||||
|
iteration?: number,
|
||||||
): PhaseRunnerContext {
|
): PhaseRunnerContext {
|
||||||
return {
|
return {
|
||||||
cwd: this.getCwd(),
|
cwd: this.getCwd(),
|
||||||
@ -174,6 +200,8 @@ export class OptionsBuilder {
|
|||||||
updatePersonaSession,
|
updatePersonaSession,
|
||||||
onPhaseStart,
|
onPhaseStart,
|
||||||
onPhaseComplete,
|
onPhaseComplete,
|
||||||
|
onJudgeStage,
|
||||||
|
iteration,
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@ -19,7 +19,7 @@ import { createLogger, getErrorMessage } from '../../../shared/utils/index.js';
|
|||||||
import { buildSessionKey } from '../session-key.js';
|
import { buildSessionKey } from '../session-key.js';
|
||||||
import type { OptionsBuilder } from './OptionsBuilder.js';
|
import type { OptionsBuilder } from './OptionsBuilder.js';
|
||||||
import type { MovementExecutor } from './MovementExecutor.js';
|
import type { MovementExecutor } from './MovementExecutor.js';
|
||||||
import type { PieceEngineOptions, PhaseName } from '../types.js';
|
import type { PieceEngineOptions, PhaseName, PhasePromptParts, JudgeStageEntry } from '../types.js';
|
||||||
import type { ParallelLoggerOptions } from './parallel-logger.js';
|
import type { ParallelLoggerOptions } from './parallel-logger.js';
|
||||||
|
|
||||||
const log = createLogger('parallel-runner');
|
const log = createLogger('parallel-runner');
|
||||||
@ -37,8 +37,33 @@ export interface ParallelRunnerDeps {
|
|||||||
conditions: Array<{ index: number; text: string }>,
|
conditions: Array<{ index: number; text: string }>,
|
||||||
options: { cwd: string }
|
options: { cwd: string }
|
||||||
) => Promise<number>;
|
) => Promise<number>;
|
||||||
readonly onPhaseStart?: (step: PieceMovement, phase: 1 | 2 | 3, phaseName: PhaseName, instruction: string) => void;
|
readonly onPhaseStart?: (
|
||||||
readonly onPhaseComplete?: (step: PieceMovement, phase: 1 | 2 | 3, phaseName: PhaseName, content: string, status: string, error?: string) => void;
|
step: PieceMovement,
|
||||||
|
phase: 1 | 2 | 3,
|
||||||
|
phaseName: PhaseName,
|
||||||
|
instruction: string,
|
||||||
|
promptParts: PhasePromptParts,
|
||||||
|
phaseExecutionId?: string,
|
||||||
|
iteration?: number,
|
||||||
|
) => void;
|
||||||
|
readonly onPhaseComplete?: (
|
||||||
|
step: PieceMovement,
|
||||||
|
phase: 1 | 2 | 3,
|
||||||
|
phaseName: PhaseName,
|
||||||
|
content: string,
|
||||||
|
status: string,
|
||||||
|
error?: string,
|
||||||
|
phaseExecutionId?: string,
|
||||||
|
iteration?: number,
|
||||||
|
) => void;
|
||||||
|
readonly onJudgeStage?: (
|
||||||
|
step: PieceMovement,
|
||||||
|
phase: 3,
|
||||||
|
phaseName: 'judge',
|
||||||
|
entry: JudgeStageEntry,
|
||||||
|
phaseExecutionId?: string,
|
||||||
|
iteration?: number,
|
||||||
|
) => void;
|
||||||
}
|
}
|
||||||
|
|
||||||
export class ParallelRunner {
|
export class ParallelRunner {
|
||||||
@ -86,6 +111,7 @@ export class ParallelRunner {
|
|||||||
subMovements.map(async (subMovement, index) => {
|
subMovements.map(async (subMovement, index) => {
|
||||||
const subIteration = incrementMovementIteration(state, subMovement.name);
|
const subIteration = incrementMovementIteration(state, subMovement.name);
|
||||||
const subInstruction = this.deps.movementExecutor.buildInstruction(subMovement, subIteration, state, task, maxMovements);
|
const subInstruction = this.deps.movementExecutor.buildInstruction(subMovement, subIteration, state, task, maxMovements);
|
||||||
|
const parentIteration = state.iteration;
|
||||||
|
|
||||||
// Session key uses buildSessionKey (persona:provider) — same as normal movements.
|
// Session key uses buildSessionKey (persona:provider) — same as normal movements.
|
||||||
// This ensures sessions are shared across movements with the same persona+provider,
|
// This ensures sessions are shared across movements with the same persona+provider,
|
||||||
@ -94,19 +120,33 @@ export class ParallelRunner {
|
|||||||
|
|
||||||
// Phase 1: main execution (Write excluded if sub-movement has report)
|
// Phase 1: main execution (Write excluded if sub-movement has report)
|
||||||
const baseOptions = this.deps.optionsBuilder.buildAgentOptions(subMovement);
|
const baseOptions = this.deps.optionsBuilder.buildAgentOptions(subMovement);
|
||||||
|
let didEmitPhaseStart = false;
|
||||||
|
|
||||||
// Override onStream with parallel logger's prefixed handler (immutable)
|
// Override onStream with parallel logger's prefixed handler (immutable)
|
||||||
const agentOptions = parallelLogger
|
const agentOptions = parallelLogger
|
||||||
? { ...baseOptions, onStream: parallelLogger.createStreamHandler(subMovement.name, index) }
|
? { ...baseOptions, onStream: parallelLogger.createStreamHandler(subMovement.name, index) }
|
||||||
: baseOptions;
|
: { ...baseOptions };
|
||||||
|
agentOptions.onPromptResolved = (promptParts: PhasePromptParts) => {
|
||||||
this.deps.onPhaseStart?.(subMovement, 1, 'execute', subInstruction);
|
this.deps.onPhaseStart?.(subMovement, 1, 'execute', subInstruction, promptParts, undefined, parentIteration);
|
||||||
|
didEmitPhaseStart = true;
|
||||||
|
};
|
||||||
const subResponse = await executeAgent(subMovement.persona, subInstruction, agentOptions);
|
const subResponse = await executeAgent(subMovement.persona, subInstruction, agentOptions);
|
||||||
|
if (!didEmitPhaseStart) {
|
||||||
|
throw new Error(`Missing prompt parts for phase start: ${subMovement.name}:1`);
|
||||||
|
}
|
||||||
updatePersonaSession(subSessionKey, subResponse.sessionId);
|
updatePersonaSession(subSessionKey, subResponse.sessionId);
|
||||||
this.deps.onPhaseComplete?.(subMovement, 1, 'execute', subResponse.content, subResponse.status, subResponse.error);
|
this.deps.onPhaseComplete?.(subMovement, 1, 'execute', subResponse.content, subResponse.status, subResponse.error, undefined, parentIteration);
|
||||||
|
|
||||||
// Phase 2/3 context — no overrides needed, phase-runner uses buildSessionKey internally
|
// Phase 2/3 context — no overrides needed, phase-runner uses buildSessionKey internally
|
||||||
const phaseCtx = this.deps.optionsBuilder.buildPhaseRunnerContext(state, subResponse.content, updatePersonaSession, this.deps.onPhaseStart, this.deps.onPhaseComplete);
|
const phaseCtx = this.deps.optionsBuilder.buildPhaseRunnerContext(
|
||||||
|
state,
|
||||||
|
subResponse.content,
|
||||||
|
updatePersonaSession,
|
||||||
|
this.deps.onPhaseStart,
|
||||||
|
this.deps.onPhaseComplete,
|
||||||
|
this.deps.onJudgeStage,
|
||||||
|
parentIteration,
|
||||||
|
);
|
||||||
|
|
||||||
// Phase 2: report output for sub-movement
|
// Phase 2: report output for sub-movement
|
||||||
if (subMovement.outputContracts && subMovement.outputContracts.length > 0) {
|
if (subMovement.outputContracts && subMovement.outputContracts.length > 0) {
|
||||||
|
|||||||
@ -128,11 +128,26 @@ export class PieceEngine extends EventEmitter {
|
|||||||
getRetryNote: () => this.options.retryNote,
|
getRetryNote: () => this.options.retryNote,
|
||||||
detectRuleIndex: this.detectRuleIndex,
|
detectRuleIndex: this.detectRuleIndex,
|
||||||
callAiJudge: this.callAiJudge,
|
callAiJudge: this.callAiJudge,
|
||||||
onPhaseStart: (step, phase, phaseName, instruction) => {
|
onPhaseStart: (step, phase, phaseName, instruction, promptParts, phaseExecutionId, iteration) => {
|
||||||
this.emit('phase:start', step, phase, phaseName, instruction);
|
if (phaseExecutionId == null && iteration == null) {
|
||||||
|
this.emit('phase:start', step, phase, phaseName, instruction, promptParts);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
this.emit('phase:start', step, phase, phaseName, instruction, promptParts, phaseExecutionId, iteration);
|
||||||
},
|
},
|
||||||
onPhaseComplete: (step, phase, phaseName, content, phaseStatus, error) => {
|
onPhaseComplete: (step, phase, phaseName, content, phaseStatus, error, phaseExecutionId, iteration) => {
|
||||||
|
if (phaseExecutionId == null && iteration == null) {
|
||||||
this.emit('phase:complete', step, phase, phaseName, content, phaseStatus, error);
|
this.emit('phase:complete', step, phase, phaseName, content, phaseStatus, error);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
this.emit('phase:complete', step, phase, phaseName, content, phaseStatus, error, phaseExecutionId, iteration);
|
||||||
|
},
|
||||||
|
onJudgeStage: (step, phase, phaseName, entry, phaseExecutionId, iteration) => {
|
||||||
|
if (phaseExecutionId == null && iteration == null) {
|
||||||
|
this.emit('phase:judge_stage', step, phase, phaseName, entry);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
this.emit('phase:judge_stage', step, phase, phaseName, entry, phaseExecutionId, iteration);
|
||||||
},
|
},
|
||||||
});
|
});
|
||||||
|
|
||||||
@ -145,11 +160,26 @@ export class PieceEngine extends EventEmitter {
|
|||||||
getInteractive: () => this.options.interactive === true,
|
getInteractive: () => this.options.interactive === true,
|
||||||
detectRuleIndex: this.detectRuleIndex,
|
detectRuleIndex: this.detectRuleIndex,
|
||||||
callAiJudge: this.callAiJudge,
|
callAiJudge: this.callAiJudge,
|
||||||
onPhaseStart: (step, phase, phaseName, instruction) => {
|
onPhaseStart: (step, phase, phaseName, instruction, promptParts, phaseExecutionId, iteration) => {
|
||||||
this.emit('phase:start', step, phase, phaseName, instruction);
|
if (phaseExecutionId == null && iteration == null) {
|
||||||
|
this.emit('phase:start', step, phase, phaseName, instruction, promptParts);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
this.emit('phase:start', step, phase, phaseName, instruction, promptParts, phaseExecutionId, iteration);
|
||||||
},
|
},
|
||||||
onPhaseComplete: (step, phase, phaseName, content, phaseStatus, error) => {
|
onPhaseComplete: (step, phase, phaseName, content, phaseStatus, error, phaseExecutionId, iteration) => {
|
||||||
|
if (phaseExecutionId == null && iteration == null) {
|
||||||
this.emit('phase:complete', step, phase, phaseName, content, phaseStatus, error);
|
this.emit('phase:complete', step, phase, phaseName, content, phaseStatus, error);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
this.emit('phase:complete', step, phase, phaseName, content, phaseStatus, error, phaseExecutionId, iteration);
|
||||||
|
},
|
||||||
|
onJudgeStage: (step, phase, phaseName, entry, phaseExecutionId, iteration) => {
|
||||||
|
if (phaseExecutionId == null && iteration == null) {
|
||||||
|
this.emit('phase:judge_stage', step, phase, phaseName, entry);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
this.emit('phase:judge_stage', step, phase, phaseName, entry, phaseExecutionId, iteration);
|
||||||
},
|
},
|
||||||
});
|
});
|
||||||
|
|
||||||
@ -160,11 +190,19 @@ export class PieceEngine extends EventEmitter {
|
|||||||
getInteractive: () => this.options.interactive === true,
|
getInteractive: () => this.options.interactive === true,
|
||||||
detectRuleIndex: this.detectRuleIndex,
|
detectRuleIndex: this.detectRuleIndex,
|
||||||
callAiJudge: this.callAiJudge,
|
callAiJudge: this.callAiJudge,
|
||||||
onPhaseStart: (step, phase, phaseName, instruction) => {
|
onPhaseStart: (step, phase, phaseName, instruction, promptParts, phaseExecutionId, iteration) => {
|
||||||
this.emit('phase:start', step, phase, phaseName, instruction);
|
if (phaseExecutionId == null && iteration == null) {
|
||||||
|
this.emit('phase:start', step, phase, phaseName, instruction, promptParts);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
this.emit('phase:start', step, phase, phaseName, instruction, promptParts, phaseExecutionId, iteration);
|
||||||
},
|
},
|
||||||
onPhaseComplete: (step, phase, phaseName, content, phaseStatus, error) => {
|
onPhaseComplete: (step, phase, phaseName, content, phaseStatus, error, phaseExecutionId, iteration) => {
|
||||||
|
if (phaseExecutionId == null && iteration == null) {
|
||||||
this.emit('phase:complete', step, phase, phaseName, content, phaseStatus, error);
|
this.emit('phase:complete', step, phase, phaseName, content, phaseStatus, error);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
this.emit('phase:complete', step, phase, phaseName, content, phaseStatus, error, phaseExecutionId, iteration);
|
||||||
},
|
},
|
||||||
});
|
});
|
||||||
|
|
||||||
@ -176,11 +214,19 @@ export class PieceEngine extends EventEmitter {
|
|||||||
getInteractive: () => this.options.interactive === true,
|
getInteractive: () => this.options.interactive === true,
|
||||||
detectRuleIndex: this.detectRuleIndex,
|
detectRuleIndex: this.detectRuleIndex,
|
||||||
callAiJudge: this.callAiJudge,
|
callAiJudge: this.callAiJudge,
|
||||||
onPhaseStart: (step, phase, phaseName, instruction) => {
|
onPhaseStart: (step, phase, phaseName, instruction, promptParts, phaseExecutionId, iteration) => {
|
||||||
this.emit('phase:start', step, phase, phaseName, instruction);
|
if (phaseExecutionId == null && iteration == null) {
|
||||||
|
this.emit('phase:start', step, phase, phaseName, instruction, promptParts);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
this.emit('phase:start', step, phase, phaseName, instruction, promptParts, phaseExecutionId, iteration);
|
||||||
},
|
},
|
||||||
onPhaseComplete: (step, phase, phaseName, content, phaseStatus, error) => {
|
onPhaseComplete: (step, phase, phaseName, content, phaseStatus, error, phaseExecutionId, iteration) => {
|
||||||
|
if (phaseExecutionId == null && iteration == null) {
|
||||||
this.emit('phase:complete', step, phase, phaseName, content, phaseStatus, error);
|
this.emit('phase:complete', step, phase, phaseName, content, phaseStatus, error);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
this.emit('phase:complete', step, phase, phaseName, content, phaseStatus, error, phaseExecutionId, iteration);
|
||||||
},
|
},
|
||||||
});
|
});
|
||||||
|
|
||||||
|
|||||||
@ -17,7 +17,7 @@ import { createPartMovement, resolvePartErrorDetail, summarizeParts } from './te
|
|||||||
import { buildTeamLeaderParallelLoggerOptions, emitTeamLeaderProgressHint } from './team-leader-streaming.js';
|
import { buildTeamLeaderParallelLoggerOptions, emitTeamLeaderProgressHint } from './team-leader-streaming.js';
|
||||||
import type { OptionsBuilder } from './OptionsBuilder.js';
|
import type { OptionsBuilder } from './OptionsBuilder.js';
|
||||||
import type { MovementExecutor } from './MovementExecutor.js';
|
import type { MovementExecutor } from './MovementExecutor.js';
|
||||||
import type { PieceEngineOptions, PhaseName } from '../types.js';
|
import type { PieceEngineOptions, PhaseName, PhasePromptParts } from '../types.js';
|
||||||
|
|
||||||
const log = createLogger('team-leader-runner');
|
const log = createLogger('team-leader-runner');
|
||||||
const MAX_TOTAL_PARTS = 20;
|
const MAX_TOTAL_PARTS = 20;
|
||||||
@ -34,8 +34,25 @@ export interface TeamLeaderRunnerDeps {
|
|||||||
conditions: Array<{ index: number; text: string }>,
|
conditions: Array<{ index: number; text: string }>,
|
||||||
options: { cwd: string }
|
options: { cwd: string }
|
||||||
) => Promise<number>;
|
) => Promise<number>;
|
||||||
readonly onPhaseStart?: (step: PieceMovement, phase: 1 | 2 | 3, phaseName: PhaseName, instruction: string) => void;
|
readonly onPhaseStart?: (
|
||||||
readonly onPhaseComplete?: (step: PieceMovement, phase: 1 | 2 | 3, phaseName: PhaseName, content: string, status: string, error?: string) => void;
|
step: PieceMovement,
|
||||||
|
phase: 1 | 2 | 3,
|
||||||
|
phaseName: PhaseName,
|
||||||
|
instruction: string,
|
||||||
|
promptParts: PhasePromptParts,
|
||||||
|
phaseExecutionId?: string,
|
||||||
|
iteration?: number,
|
||||||
|
) => void;
|
||||||
|
readonly onPhaseComplete?: (
|
||||||
|
step: PieceMovement,
|
||||||
|
phase: 1 | 2 | 3,
|
||||||
|
phaseName: PhaseName,
|
||||||
|
content: string,
|
||||||
|
status: string,
|
||||||
|
error?: string,
|
||||||
|
phaseExecutionId?: string,
|
||||||
|
iteration?: number,
|
||||||
|
) => void;
|
||||||
}
|
}
|
||||||
|
|
||||||
export class TeamLeaderRunner {
|
export class TeamLeaderRunner {
|
||||||
@ -54,6 +71,7 @@ export class TeamLeaderRunner {
|
|||||||
throw new Error(`Movement "${step.name}" has no teamLeader configuration`);
|
throw new Error(`Movement "${step.name}" has no teamLeader configuration`);
|
||||||
}
|
}
|
||||||
const teamLeaderConfig = step.teamLeader;
|
const teamLeaderConfig = step.teamLeader;
|
||||||
|
const parentIteration = state.iteration;
|
||||||
|
|
||||||
const movementIteration = incrementMovementIteration(state, step.name);
|
const movementIteration = incrementMovementIteration(state, step.name);
|
||||||
const leaderStep: PieceMovement = {
|
const leaderStep: PieceMovement = {
|
||||||
@ -72,7 +90,7 @@ export class TeamLeaderRunner {
|
|||||||
);
|
);
|
||||||
|
|
||||||
emitTeamLeaderProgressHint(this.deps.engineOptions, 'decompose');
|
emitTeamLeaderProgressHint(this.deps.engineOptions, 'decompose');
|
||||||
this.deps.onPhaseStart?.(leaderStep, 1, 'execute', instruction);
|
let didEmitPhaseStart = false;
|
||||||
const parts = await decomposeTask(instruction, teamLeaderConfig.maxParts, {
|
const parts = await decomposeTask(instruction, teamLeaderConfig.maxParts, {
|
||||||
cwd: this.deps.getCwd(),
|
cwd: this.deps.getCwd(),
|
||||||
persona: leaderStep.persona,
|
persona: leaderStep.persona,
|
||||||
@ -80,14 +98,21 @@ export class TeamLeaderRunner {
|
|||||||
model: leaderModel,
|
model: leaderModel,
|
||||||
provider: leaderProvider,
|
provider: leaderProvider,
|
||||||
onStream: this.deps.engineOptions.onStream,
|
onStream: this.deps.engineOptions.onStream,
|
||||||
|
onPromptResolved: (promptParts) => {
|
||||||
|
this.deps.onPhaseStart?.(leaderStep, 1, 'execute', promptParts.userInstruction, promptParts, undefined, parentIteration);
|
||||||
|
didEmitPhaseStart = true;
|
||||||
|
},
|
||||||
});
|
});
|
||||||
|
if (!didEmitPhaseStart) {
|
||||||
|
throw new Error(`Missing prompt parts for phase start: ${leaderStep.name}:1`);
|
||||||
|
}
|
||||||
const leaderResponse: AgentResponse = {
|
const leaderResponse: AgentResponse = {
|
||||||
persona: leaderStep.persona ?? leaderStep.name,
|
persona: leaderStep.persona ?? leaderStep.name,
|
||||||
status: 'done',
|
status: 'done',
|
||||||
content: JSON.stringify({ parts }, null, 2),
|
content: JSON.stringify({ parts }, null, 2),
|
||||||
timestamp: new Date(),
|
timestamp: new Date(),
|
||||||
};
|
};
|
||||||
this.deps.onPhaseComplete?.(leaderStep, 1, 'execute', leaderResponse.content, leaderResponse.status, leaderResponse.error);
|
this.deps.onPhaseComplete?.(leaderStep, 1, 'execute', leaderResponse.content, leaderResponse.status, leaderResponse.error, undefined, parentIteration);
|
||||||
log.debug('Team leader decomposed parts', {
|
log.debug('Team leader decomposed parts', {
|
||||||
movement: step.name,
|
movement: step.name,
|
||||||
partCount: parts.length,
|
partCount: parts.length,
|
||||||
|
|||||||
@ -8,7 +8,7 @@
|
|||||||
import { existsSync, mkdirSync, readFileSync, writeFileSync } from 'node:fs';
|
import { existsSync, mkdirSync, readFileSync, writeFileSync } from 'node:fs';
|
||||||
import { dirname, resolve, sep } from 'node:path';
|
import { dirname, resolve, sep } from 'node:path';
|
||||||
import type { PieceMovement, Language, AgentResponse } from '../models/types.js';
|
import type { PieceMovement, Language, AgentResponse } from '../models/types.js';
|
||||||
import type { PhaseName } from './types.js';
|
import type { PhaseName, PhasePromptParts, JudgeStageEntry } from './types.js';
|
||||||
import type { RunAgentOptions } from '../../agents/runner.js';
|
import type { RunAgentOptions } from '../../agents/runner.js';
|
||||||
import { ReportInstructionBuilder } from './instruction/ReportInstructionBuilder.js';
|
import { ReportInstructionBuilder } from './instruction/ReportInstructionBuilder.js';
|
||||||
import { hasTagBasedRules, getReportFiles } from './evaluation/rule-utils.js';
|
import { hasTagBasedRules, getReportFiles } from './evaluation/rule-utils.js';
|
||||||
@ -33,6 +33,8 @@ export interface PhaseRunnerContext {
|
|||||||
interactive?: boolean;
|
interactive?: boolean;
|
||||||
/** Last response from Phase 1 */
|
/** Last response from Phase 1 */
|
||||||
lastResponse?: string;
|
lastResponse?: string;
|
||||||
|
/** Parent piece iteration for sub-movement phase events */
|
||||||
|
iteration?: number;
|
||||||
/** Get persona session ID */
|
/** Get persona session ID */
|
||||||
getSessionId: (persona: string) => string | undefined;
|
getSessionId: (persona: string) => string | undefined;
|
||||||
/** Build resume options for a movement */
|
/** Build resume options for a movement */
|
||||||
@ -44,9 +46,35 @@ export interface PhaseRunnerContext {
|
|||||||
/** Stream callback for provider event logging (passed to judgeStatus) */
|
/** Stream callback for provider event logging (passed to judgeStatus) */
|
||||||
onStream?: import('../../agents/types.js').StreamCallback;
|
onStream?: import('../../agents/types.js').StreamCallback;
|
||||||
/** Callback for phase lifecycle logging */
|
/** Callback for phase lifecycle logging */
|
||||||
onPhaseStart?: (step: PieceMovement, phase: 1 | 2 | 3, phaseName: PhaseName, instruction: string) => void;
|
onPhaseStart?: (
|
||||||
|
step: PieceMovement,
|
||||||
|
phase: 1 | 2 | 3,
|
||||||
|
phaseName: PhaseName,
|
||||||
|
instruction: string,
|
||||||
|
promptParts: PhasePromptParts,
|
||||||
|
phaseExecutionId?: string,
|
||||||
|
iteration?: number,
|
||||||
|
) => void;
|
||||||
/** Callback for phase completion logging */
|
/** Callback for phase completion logging */
|
||||||
onPhaseComplete?: (step: PieceMovement, phase: 1 | 2 | 3, phaseName: PhaseName, content: string, status: string, error?: string) => void;
|
onPhaseComplete?: (
|
||||||
|
step: PieceMovement,
|
||||||
|
phase: 1 | 2 | 3,
|
||||||
|
phaseName: PhaseName,
|
||||||
|
content: string,
|
||||||
|
status: string,
|
||||||
|
error?: string,
|
||||||
|
phaseExecutionId?: string,
|
||||||
|
iteration?: number,
|
||||||
|
) => void;
|
||||||
|
/** Callback for Phase 3 internal stage logging */
|
||||||
|
onJudgeStage?: (
|
||||||
|
step: PieceMovement,
|
||||||
|
phase: 3,
|
||||||
|
phaseName: 'judge',
|
||||||
|
entry: JudgeStageEntry,
|
||||||
|
phaseExecutionId?: string,
|
||||||
|
iteration?: number,
|
||||||
|
) => void;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -207,35 +235,45 @@ async function runSingleReportAttempt(
|
|||||||
options: RunAgentOptions,
|
options: RunAgentOptions,
|
||||||
ctx: PhaseRunnerContext,
|
ctx: PhaseRunnerContext,
|
||||||
): Promise<ReportAttemptResult> {
|
): Promise<ReportAttemptResult> {
|
||||||
ctx.onPhaseStart?.(step, 2, 'report', instruction);
|
let didEmitPhaseStart = false;
|
||||||
|
const callOptions: RunAgentOptions = {
|
||||||
|
...options,
|
||||||
|
onPromptResolved: (promptParts) => {
|
||||||
|
ctx.onPhaseStart?.(step, 2, 'report', instruction, promptParts, undefined, ctx.iteration);
|
||||||
|
didEmitPhaseStart = true;
|
||||||
|
},
|
||||||
|
};
|
||||||
|
|
||||||
let response: AgentResponse;
|
let response: AgentResponse;
|
||||||
try {
|
try {
|
||||||
response = await executeAgent(step.persona, instruction, options);
|
response = await executeAgent(step.persona, instruction, callOptions);
|
||||||
|
if (!didEmitPhaseStart) {
|
||||||
|
throw new Error(`Missing prompt parts for phase start: ${step.name}:2`);
|
||||||
|
}
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
const errorMsg = error instanceof Error ? error.message : String(error);
|
const errorMsg = error instanceof Error ? error.message : String(error);
|
||||||
ctx.onPhaseComplete?.(step, 2, 'report', '', 'error', errorMsg);
|
ctx.onPhaseComplete?.(step, 2, 'report', '', 'error', errorMsg, undefined, ctx.iteration);
|
||||||
throw error;
|
throw error;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (response.status === 'blocked') {
|
if (response.status === 'blocked') {
|
||||||
ctx.onPhaseComplete?.(step, 2, 'report', response.content, response.status);
|
ctx.onPhaseComplete?.(step, 2, 'report', response.content, response.status, undefined, undefined, ctx.iteration);
|
||||||
return { kind: 'blocked', response };
|
return { kind: 'blocked', response };
|
||||||
}
|
}
|
||||||
|
|
||||||
if (response.status !== 'done') {
|
if (response.status !== 'done') {
|
||||||
const errorMessage = response.error || response.content || 'Unknown error';
|
const errorMessage = response.error || response.content || 'Unknown error';
|
||||||
ctx.onPhaseComplete?.(step, 2, 'report', response.content, response.status, errorMessage);
|
ctx.onPhaseComplete?.(step, 2, 'report', response.content, response.status, errorMessage, undefined, ctx.iteration);
|
||||||
return { kind: 'retryable_failure', errorMessage };
|
return { kind: 'retryable_failure', errorMessage };
|
||||||
}
|
}
|
||||||
|
|
||||||
const trimmedContent = response.content.trim();
|
const trimmedContent = response.content.trim();
|
||||||
if (trimmedContent.length === 0) {
|
if (trimmedContent.length === 0) {
|
||||||
const errorMessage = 'Report output is empty';
|
const errorMessage = 'Report output is empty';
|
||||||
ctx.onPhaseComplete?.(step, 2, 'report', response.content, 'error', errorMessage);
|
ctx.onPhaseComplete?.(step, 2, 'report', response.content, 'error', errorMessage, undefined, ctx.iteration);
|
||||||
return { kind: 'retryable_failure', errorMessage };
|
return { kind: 'retryable_failure', errorMessage };
|
||||||
}
|
}
|
||||||
|
|
||||||
ctx.onPhaseComplete?.(step, 2, 'report', response.content, response.status);
|
ctx.onPhaseComplete?.(step, 2, 'report', response.content, response.status, undefined, undefined, ctx.iteration);
|
||||||
return { kind: 'success', content: trimmedContent, response };
|
return { kind: 'success', content: trimmedContent, response };
|
||||||
}
|
}
|
||||||
|
|||||||
@ -6,6 +6,7 @@ import { StatusJudgmentBuilder, type StatusJudgmentContext } from './instruction
|
|||||||
import { getJudgmentReportFiles } from './evaluation/rule-utils.js';
|
import { getJudgmentReportFiles } from './evaluation/rule-utils.js';
|
||||||
import { createLogger } from '../../shared/utils/index.js';
|
import { createLogger } from '../../shared/utils/index.js';
|
||||||
import type { PhaseRunnerContext } from './phase-runner.js';
|
import type { PhaseRunnerContext } from './phase-runner.js';
|
||||||
|
import { buildPhaseExecutionId } from '../../shared/utils/phaseExecutionId.js';
|
||||||
|
|
||||||
const log = createLogger('phase-runner');
|
const log = createLogger('phase-runner');
|
||||||
|
|
||||||
@ -85,8 +86,29 @@ export async function runStatusJudgmentPhase(
|
|||||||
const tagInstruction = new StatusJudgmentBuilder(step, {
|
const tagInstruction = new StatusJudgmentBuilder(step, {
|
||||||
...baseContext,
|
...baseContext,
|
||||||
}).build();
|
}).build();
|
||||||
|
if (!ctx.iteration || !Number.isInteger(ctx.iteration) || ctx.iteration <= 0) {
|
||||||
|
throw new Error(`Status judgment requires iteration for movement "${step.name}"`);
|
||||||
|
}
|
||||||
|
const phaseExecutionId = buildPhaseExecutionId({
|
||||||
|
step: step.name,
|
||||||
|
iteration: ctx.iteration,
|
||||||
|
phase: 3,
|
||||||
|
sequence: 1,
|
||||||
|
});
|
||||||
|
|
||||||
|
let didEmitPhaseStart = false;
|
||||||
|
const emitPhaseStart = (promptParts: { systemPrompt: string; userInstruction: string }): void => {
|
||||||
|
ctx.onPhaseStart?.(step, 3, 'judge', structuredInstruction, promptParts, phaseExecutionId, ctx.iteration);
|
||||||
|
didEmitPhaseStart = true;
|
||||||
|
};
|
||||||
|
|
||||||
|
if (step.rules.length === 1) {
|
||||||
|
emitPhaseStart({
|
||||||
|
systemPrompt: '',
|
||||||
|
userInstruction: structuredInstruction,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
ctx.onPhaseStart?.(step, 3, 'judge', structuredInstruction);
|
|
||||||
try {
|
try {
|
||||||
const result = await judgeStatus(structuredInstruction, tagInstruction, step.rules, {
|
const result = await judgeStatus(structuredInstruction, tagInstruction, step.rules, {
|
||||||
cwd: ctx.cwd,
|
cwd: ctx.cwd,
|
||||||
@ -94,13 +116,24 @@ export async function runStatusJudgmentPhase(
|
|||||||
language: ctx.language,
|
language: ctx.language,
|
||||||
interactive: ctx.interactive,
|
interactive: ctx.interactive,
|
||||||
onStream: ctx.onStream,
|
onStream: ctx.onStream,
|
||||||
|
onStructuredPromptResolved: (promptParts) => {
|
||||||
|
if (!didEmitPhaseStart) {
|
||||||
|
emitPhaseStart(promptParts);
|
||||||
|
}
|
||||||
|
},
|
||||||
|
onJudgeStage: (entry) => {
|
||||||
|
ctx.onJudgeStage?.(step, 3, 'judge', entry, phaseExecutionId, ctx.iteration);
|
||||||
|
},
|
||||||
});
|
});
|
||||||
|
if (!didEmitPhaseStart) {
|
||||||
|
throw new Error(`Missing prompt parts for phase start: ${step.name}:3`);
|
||||||
|
}
|
||||||
const tag = `[${step.name.toUpperCase()}:${result.ruleIndex + 1}]`;
|
const tag = `[${step.name.toUpperCase()}:${result.ruleIndex + 1}]`;
|
||||||
ctx.onPhaseComplete?.(step, 3, 'judge', tag, 'done');
|
ctx.onPhaseComplete?.(step, 3, 'judge', tag, 'done', undefined, phaseExecutionId, ctx.iteration);
|
||||||
return { tag, ruleIndex: result.ruleIndex, method: result.method };
|
return { tag, ruleIndex: result.ruleIndex, method: result.method };
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
const errorMsg = error instanceof Error ? error.message : String(error);
|
const errorMsg = error instanceof Error ? error.message : String(error);
|
||||||
ctx.onPhaseComplete?.(step, 3, 'judge', '', 'error', errorMsg);
|
ctx.onPhaseComplete?.(step, 3, 'judge', '', 'error', errorMsg, phaseExecutionId, ctx.iteration);
|
||||||
throw error;
|
throw error;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@ -78,6 +78,19 @@ export type AiJudgeCaller = (
|
|||||||
|
|
||||||
export type PhaseName = 'execute' | 'report' | 'judge';
|
export type PhaseName = 'execute' | 'report' | 'judge';
|
||||||
|
|
||||||
|
export interface PhasePromptParts {
|
||||||
|
systemPrompt: string;
|
||||||
|
userInstruction: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface JudgeStageEntry {
|
||||||
|
stage: 1 | 2 | 3;
|
||||||
|
method: 'structured_output' | 'phase3_tag' | 'ai_judge';
|
||||||
|
status: 'done' | 'error' | 'skipped';
|
||||||
|
instruction: string;
|
||||||
|
response: string;
|
||||||
|
}
|
||||||
|
|
||||||
/** Provider and model info resolved for a movement */
|
/** Provider and model info resolved for a movement */
|
||||||
export interface MovementProviderInfo {
|
export interface MovementProviderInfo {
|
||||||
provider: ProviderType | undefined;
|
provider: ProviderType | undefined;
|
||||||
@ -91,8 +104,33 @@ export interface PieceEvents {
|
|||||||
'movement:report': (step: PieceMovement, filePath: string, fileName: string) => void;
|
'movement:report': (step: PieceMovement, filePath: string, fileName: string) => void;
|
||||||
'movement:blocked': (step: PieceMovement, response: AgentResponse) => void;
|
'movement:blocked': (step: PieceMovement, response: AgentResponse) => void;
|
||||||
'movement:user_input': (step: PieceMovement, userInput: string) => void;
|
'movement:user_input': (step: PieceMovement, userInput: string) => void;
|
||||||
'phase:start': (step: PieceMovement, phase: 1 | 2 | 3, phaseName: PhaseName, instruction: string) => void;
|
'phase:start': (
|
||||||
'phase:complete': (step: PieceMovement, phase: 1 | 2 | 3, phaseName: PhaseName, content: string, status: string, error?: string) => void;
|
step: PieceMovement,
|
||||||
|
phase: 1 | 2 | 3,
|
||||||
|
phaseName: PhaseName,
|
||||||
|
instruction: string,
|
||||||
|
promptParts: PhasePromptParts,
|
||||||
|
phaseExecutionId?: string,
|
||||||
|
iteration?: number,
|
||||||
|
) => void;
|
||||||
|
'phase:complete': (
|
||||||
|
step: PieceMovement,
|
||||||
|
phase: 1 | 2 | 3,
|
||||||
|
phaseName: PhaseName,
|
||||||
|
content: string,
|
||||||
|
status: string,
|
||||||
|
error?: string,
|
||||||
|
phaseExecutionId?: string,
|
||||||
|
iteration?: number,
|
||||||
|
) => void;
|
||||||
|
'phase:judge_stage': (
|
||||||
|
step: PieceMovement,
|
||||||
|
phase: 3,
|
||||||
|
phaseName: 'judge',
|
||||||
|
entry: JudgeStageEntry,
|
||||||
|
phaseExecutionId?: string,
|
||||||
|
iteration?: number,
|
||||||
|
) => void;
|
||||||
'piece:complete': (state: PieceState) => void;
|
'piece:complete': (state: PieceState) => void;
|
||||||
'piece:abort': (state: PieceState, reason: string) => void;
|
'piece:abort': (state: PieceState, reason: string) => void;
|
||||||
'iteration:limit': (iteration: number, maxMovements: number) => void;
|
'iteration:limit': (iteration: number, maxMovements: number) => void;
|
||||||
|
|||||||
@ -11,7 +11,7 @@ import { isQuietMode } from '../../../shared/context.js';
|
|||||||
import { StreamDisplay } from '../../../shared/ui/index.js';
|
import { StreamDisplay } from '../../../shared/ui/index.js';
|
||||||
import { TaskPrefixWriter } from '../../../shared/ui/TaskPrefixWriter.js';
|
import { TaskPrefixWriter } from '../../../shared/ui/TaskPrefixWriter.js';
|
||||||
import { generateSessionId, createSessionLog, finalizeSessionLog, initNdjsonLog } from '../../../infra/fs/index.js';
|
import { generateSessionId, createSessionLog, finalizeSessionLog, initNdjsonLog } from '../../../infra/fs/index.js';
|
||||||
import { createLogger, notifySuccess, notifyError, preventSleep, generateReportDir, isValidReportDirName } from '../../../shared/utils/index.js';
|
import { createLogger, notifySuccess, notifyError, preventSleep, generateReportDir, isValidReportDirName, getDebugPromptsLogFile } from '../../../shared/utils/index.js';
|
||||||
import { createProviderEventLogger, isProviderEventsEnabled } from '../../../shared/utils/providerEventLogger.js';
|
import { createProviderEventLogger, isProviderEventsEnabled } from '../../../shared/utils/providerEventLogger.js';
|
||||||
import { getLabel } from '../../../shared/i18n/index.js';
|
import { getLabel } from '../../../shared/i18n/index.js';
|
||||||
import { buildRunPaths } from '../../../core/piece/run/run-paths.js';
|
import { buildRunPaths } from '../../../core/piece/run/run-paths.js';
|
||||||
@ -25,9 +25,9 @@ import { createOutputFns, createPrefixedStreamHandler } from './outputFns.js';
|
|||||||
import { RunMetaManager } from './runMeta.js';
|
import { RunMetaManager } from './runMeta.js';
|
||||||
import { createIterationLimitHandler, createUserInputHandler } from './iterationLimitHandler.js';
|
import { createIterationLimitHandler, createUserInputHandler } from './iterationLimitHandler.js';
|
||||||
import { assertTaskPrefixPair, truncate, formatElapsedTime } from './pieceExecutionUtils.js';
|
import { assertTaskPrefixPair, truncate, formatElapsedTime } from './pieceExecutionUtils.js';
|
||||||
|
import { createTraceReportWriter } from './traceReportWriter.js';
|
||||||
|
import { sanitizeTextForStorage } from './traceReportRedaction.js';
|
||||||
export type { PieceExecutionResult, PieceExecutionOptions };
|
export type { PieceExecutionResult, PieceExecutionOptions };
|
||||||
|
|
||||||
const log = createLogger('piece');
|
const log = createLogger('piece');
|
||||||
|
|
||||||
export async function executePiece(
|
export async function executePiece(
|
||||||
@ -39,12 +39,10 @@ export async function executePiece(
|
|||||||
const { headerPrefix = 'Running Piece:', interactiveUserInput = false } = options;
|
const { headerPrefix = 'Running Piece:', interactiveUserInput = false } = options;
|
||||||
const projectCwd = options.projectCwd;
|
const projectCwd = options.projectCwd;
|
||||||
assertTaskPrefixPair(options.taskPrefix, options.taskColorIndex);
|
assertTaskPrefixPair(options.taskPrefix, options.taskColorIndex);
|
||||||
|
|
||||||
const prefixWriter = options.taskPrefix != null
|
const prefixWriter = options.taskPrefix != null
|
||||||
? new TaskPrefixWriter({ taskName: options.taskPrefix, colorIndex: options.taskColorIndex!, displayLabel: options.taskDisplayLabel })
|
? new TaskPrefixWriter({ taskName: options.taskPrefix, colorIndex: options.taskColorIndex!, displayLabel: options.taskDisplayLabel })
|
||||||
: undefined;
|
: undefined;
|
||||||
const out = createOutputFns(prefixWriter);
|
const out = createOutputFns(prefixWriter);
|
||||||
|
|
||||||
const isRetry = Boolean(options.startMovement || options.retryNote);
|
const isRetry = Boolean(options.startMovement || options.retryNote);
|
||||||
log.debug('Session mode', { isRetry, isWorktree: cwd !== projectCwd });
|
log.debug('Session mode', { isRetry, isWorktree: cwd !== projectCwd });
|
||||||
out.header(`${headerPrefix} ${pieceConfig.name}`);
|
out.header(`${headerPrefix} ${pieceConfig.name}`);
|
||||||
@ -52,18 +50,9 @@ export async function executePiece(
|
|||||||
const pieceSessionId = generateSessionId();
|
const pieceSessionId = generateSessionId();
|
||||||
const runSlug = options.reportDirName ?? generateReportDir(task);
|
const runSlug = options.reportDirName ?? generateReportDir(task);
|
||||||
if (!isValidReportDirName(runSlug)) throw new Error(`Invalid reportDirName: ${runSlug}`);
|
if (!isValidReportDirName(runSlug)) throw new Error(`Invalid reportDirName: ${runSlug}`);
|
||||||
|
|
||||||
const runPaths = buildRunPaths(cwd, runSlug);
|
const runPaths = buildRunPaths(cwd, runSlug);
|
||||||
const runMetaManager = new RunMetaManager(runPaths, task, pieceConfig.name);
|
const runMetaManager = new RunMetaManager(runPaths, task, pieceConfig.name);
|
||||||
|
|
||||||
let sessionLog = createSessionLog(task, projectCwd, pieceConfig.name);
|
let sessionLog = createSessionLog(task, projectCwd, pieceConfig.name);
|
||||||
const ndjsonLogPath = initNdjsonLog(pieceSessionId, task, pieceConfig.name, { logsDir: runPaths.logsAbs });
|
|
||||||
const sessionLogger = new SessionLogger(ndjsonLogPath);
|
|
||||||
|
|
||||||
if (options.interactiveMetadata) {
|
|
||||||
sessionLogger.writeInteractiveMetadata(options.interactiveMetadata);
|
|
||||||
}
|
|
||||||
|
|
||||||
const displayRef: { current: StreamDisplay | null } = { current: null };
|
const displayRef: { current: StreamDisplay | null } = { current: null };
|
||||||
const streamHandler = prefixWriter
|
const streamHandler = prefixWriter
|
||||||
? createPrefixedStreamHandler(prefixWriter)
|
? createPrefixedStreamHandler(prefixWriter)
|
||||||
@ -71,12 +60,23 @@ export async function executePiece(
|
|||||||
if (!displayRef.current || event.type === 'result') return;
|
if (!displayRef.current || event.type === 'result') return;
|
||||||
displayRef.current.createHandler()(event);
|
displayRef.current.createHandler()(event);
|
||||||
};
|
};
|
||||||
|
|
||||||
const isWorktree = cwd !== projectCwd;
|
const isWorktree = cwd !== projectCwd;
|
||||||
const globalConfig = resolvePieceConfigValues(
|
const globalConfig = resolvePieceConfigValues(
|
||||||
projectCwd,
|
projectCwd,
|
||||||
['notificationSound', 'notificationSoundEvents', 'provider', 'runtime', 'preventSleep', 'model', 'logging', 'analytics'],
|
['notificationSound', 'notificationSoundEvents', 'provider', 'runtime', 'preventSleep', 'model', 'logging', 'analytics'],
|
||||||
);
|
);
|
||||||
|
const traceReportMode = globalConfig.logging?.trace === true ? 'full' : 'redacted';
|
||||||
|
const allowSensitiveData = traceReportMode === 'full';
|
||||||
|
const ndjsonLogPath = initNdjsonLog(
|
||||||
|
pieceSessionId,
|
||||||
|
sanitizeTextForStorage(task, allowSensitiveData),
|
||||||
|
pieceConfig.name,
|
||||||
|
{ logsDir: runPaths.logsAbs },
|
||||||
|
);
|
||||||
|
const sessionLogger = new SessionLogger(ndjsonLogPath, allowSensitiveData);
|
||||||
|
if (options.interactiveMetadata) {
|
||||||
|
sessionLogger.writeInteractiveMetadata(options.interactiveMetadata);
|
||||||
|
}
|
||||||
const shouldNotify = globalConfig.notificationSound !== false;
|
const shouldNotify = globalConfig.notificationSound !== false;
|
||||||
const nse = globalConfig.notificationSoundEvents;
|
const nse = globalConfig.notificationSoundEvents;
|
||||||
const shouldNotifyIterationLimit = shouldNotify && nse?.iterationLimit !== false;
|
const shouldNotifyIterationLimit = shouldNotify && nse?.iterationLimit !== false;
|
||||||
@ -98,10 +98,8 @@ export async function executePiece(
|
|||||||
movement: options.startMovement ?? pieceConfig.initialMovement,
|
movement: options.startMovement ?? pieceConfig.initialMovement,
|
||||||
enabled: isProviderEventsEnabled(globalConfig),
|
enabled: isProviderEventsEnabled(globalConfig),
|
||||||
});
|
});
|
||||||
|
|
||||||
initAnalyticsWriter(globalConfig.analytics?.enabled === true, globalConfig.analytics?.eventsPath ?? join(getGlobalConfigDir(), 'analytics', 'events'));
|
initAnalyticsWriter(globalConfig.analytics?.enabled === true, globalConfig.analytics?.eventsPath ?? join(getGlobalConfigDir(), 'analytics', 'events'));
|
||||||
if (globalConfig.preventSleep) preventSleep();
|
if (globalConfig.preventSleep) preventSleep();
|
||||||
|
|
||||||
const analyticsEmitter = new AnalyticsEmitter(runSlug, currentProvider, configuredModel ?? '(default)');
|
const analyticsEmitter = new AnalyticsEmitter(runSlug, currentProvider, configuredModel ?? '(default)');
|
||||||
const savedSessions = isRetry
|
const savedSessions = isRetry
|
||||||
? (isWorktree ? loadWorktreeSessions(projectCwd, cwd, currentProvider) : loadPersonaSessions(projectCwd, currentProvider))
|
? (isWorktree ? loadWorktreeSessions(projectCwd, cwd, currentProvider) : loadPersonaSessions(projectCwd, currentProvider))
|
||||||
@ -128,12 +126,22 @@ export async function executePiece(
|
|||||||
let exceededInfo: ExceededInfo | undefined;
|
let exceededInfo: ExceededInfo | undefined;
|
||||||
let lastMovementContent: string | undefined;
|
let lastMovementContent: string | undefined;
|
||||||
let lastMovementName: string | undefined;
|
let lastMovementName: string | undefined;
|
||||||
|
const writeTraceReportOnce = createTraceReportWriter({
|
||||||
|
sessionLogger,
|
||||||
|
ndjsonLogPath,
|
||||||
|
tracePath: join(runPaths.runRootAbs, 'trace.md'),
|
||||||
|
pieceName: pieceConfig.name,
|
||||||
|
task,
|
||||||
|
runSlug,
|
||||||
|
promptLogPath: getDebugPromptsLogFile() ?? undefined,
|
||||||
|
mode: traceReportMode,
|
||||||
|
logger: log,
|
||||||
|
});
|
||||||
let currentIteration = 0;
|
let currentIteration = 0;
|
||||||
const movementIterations = new Map<string, number>();
|
const movementIterations = new Map<string, number>();
|
||||||
let engine: PieceEngine | null = null;
|
let engine: PieceEngine | null = null;
|
||||||
const runAbortController = new AbortController();
|
const runAbortController = new AbortController();
|
||||||
const abortHandler = new AbortHandler({ externalSignal: options.abortSignal, internalController: runAbortController, getEngine: () => engine });
|
const abortHandler = new AbortHandler({ externalSignal: options.abortSignal, internalController: runAbortController, getEngine: () => engine });
|
||||||
|
|
||||||
try {
|
try {
|
||||||
engine = new PieceEngine(effectivePieceConfig, cwd, task, {
|
engine = new PieceEngine(effectivePieceConfig, cwd, task, {
|
||||||
abortSignal: runAbortController.signal,
|
abortSignal: runAbortController.signal,
|
||||||
@ -161,20 +169,21 @@ export async function executePiece(
|
|||||||
taskColorIndex: options.taskColorIndex,
|
taskColorIndex: options.taskColorIndex,
|
||||||
initialIteration: options.initialIterationOverride,
|
initialIteration: options.initialIterationOverride,
|
||||||
});
|
});
|
||||||
|
|
||||||
abortHandler.install();
|
abortHandler.install();
|
||||||
|
engine.on('phase:start', (step, phase, phaseName, instruction, promptParts, phaseExecutionId, iteration) => {
|
||||||
engine.on('phase:start', (step, phase, phaseName, instruction) => {
|
|
||||||
log.debug('Phase starting', { step: step.name, phase, phaseName });
|
log.debug('Phase starting', { step: step.name, phase, phaseName });
|
||||||
sessionLogger.onPhaseStart(step, phase, phaseName, instruction);
|
sessionLogger.onPhaseStart(step, phase, phaseName, instruction, promptParts, phaseExecutionId, iteration);
|
||||||
});
|
});
|
||||||
|
|
||||||
engine.on('phase:complete', (step, phase, phaseName, content, phaseStatus, phaseError) => {
|
engine.on('phase:complete', (step, phase, phaseName, content, phaseStatus, phaseError, phaseExecutionId, iteration) => {
|
||||||
log.debug('Phase completed', { step: step.name, phase, phaseName, status: phaseStatus });
|
log.debug('Phase completed', { step: step.name, phase, phaseName, status: phaseStatus });
|
||||||
sessionLogger.setIteration(currentIteration);
|
sessionLogger.setIteration(currentIteration);
|
||||||
sessionLogger.onPhaseComplete(step, phase, phaseName, content, phaseStatus, phaseError);
|
sessionLogger.onPhaseComplete(step, phase, phaseName, content, phaseStatus, phaseError, phaseExecutionId, iteration);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
engine.on('phase:judge_stage', (step, phase, phaseName, entry, phaseExecutionId, iteration) => {
|
||||||
|
sessionLogger.onJudgeStage(step, phase, phaseName, entry, phaseExecutionId, iteration);
|
||||||
|
});
|
||||||
engine.on('movement:start', (step, iteration, instruction, providerInfo) => {
|
engine.on('movement:start', (step, iteration, instruction, providerInfo) => {
|
||||||
log.debug('Movement starting', { step: step.name, persona: step.personaDisplayName, iteration });
|
log.debug('Movement starting', { step: step.name, persona: step.personaDisplayName, iteration });
|
||||||
currentIteration = iteration;
|
currentIteration = iteration;
|
||||||
@ -234,6 +243,11 @@ export async function executePiece(
|
|||||||
sessionLog = finalizeSessionLog(sessionLog, 'completed');
|
sessionLog = finalizeSessionLog(sessionLog, 'completed');
|
||||||
sessionLogger.onPieceComplete(state);
|
sessionLogger.onPieceComplete(state);
|
||||||
runMetaManager.finalize('completed', state.iteration);
|
runMetaManager.finalize('completed', state.iteration);
|
||||||
|
writeTraceReportOnce({
|
||||||
|
status: 'completed',
|
||||||
|
iterations: state.iteration,
|
||||||
|
endTime: new Date().toISOString(),
|
||||||
|
});
|
||||||
try {
|
try {
|
||||||
saveSessionState(projectCwd, { status: 'success', taskResult: truncate(lastMovementContent ?? '', 1000), timestamp: new Date().toISOString(), pieceName: pieceConfig.name, taskContent: truncate(task, 200), lastMovement: lastMovementName } satisfies SessionState);
|
saveSessionState(projectCwd, { status: 'success', taskResult: truncate(lastMovementContent ?? '', 1000), timestamp: new Date().toISOString(), pieceName: pieceConfig.name, taskContent: truncate(task, 200), lastMovement: lastMovementName } satisfies SessionState);
|
||||||
} catch (error) { log.error('Failed to save session state', { error }); }
|
} catch (error) { log.error('Failed to save session state', { error }); }
|
||||||
@ -252,6 +266,12 @@ export async function executePiece(
|
|||||||
sessionLog = finalizeSessionLog(sessionLog, 'aborted');
|
sessionLog = finalizeSessionLog(sessionLog, 'aborted');
|
||||||
sessionLogger.onPieceAbort(state, reason);
|
sessionLogger.onPieceAbort(state, reason);
|
||||||
runMetaManager.finalize('aborted', state.iteration);
|
runMetaManager.finalize('aborted', state.iteration);
|
||||||
|
writeTraceReportOnce({
|
||||||
|
status: 'aborted',
|
||||||
|
iterations: state.iteration,
|
||||||
|
reason,
|
||||||
|
endTime: new Date().toISOString(),
|
||||||
|
});
|
||||||
try {
|
try {
|
||||||
saveSessionState(projectCwd, { status: reason === 'user_interrupted' ? 'user_stopped' : 'error', errorMessage: reason, timestamp: new Date().toISOString(), pieceName: pieceConfig.name, taskContent: truncate(task, 200), lastMovement: lastMovementName } satisfies SessionState);
|
saveSessionState(projectCwd, { status: reason === 'user_interrupted' ? 'user_stopped' : 'error', errorMessage: reason, timestamp: new Date().toISOString(), pieceName: pieceConfig.name, taskContent: truncate(task, 200), lastMovement: lastMovementName } satisfies SessionState);
|
||||||
} catch (error) { log.error('Failed to save session state', { error }); }
|
} catch (error) { log.error('Failed to save session state', { error }); }
|
||||||
|
|||||||
@ -2,7 +2,6 @@
|
|||||||
* Session logger — NDJSON ログ書き出し専用モジュール
|
* Session logger — NDJSON ログ書き出し専用モジュール
|
||||||
*
|
*
|
||||||
* PieceEngine のイベントを受け取り、NDJSON セッションログへ追記する責務を担う。
|
* PieceEngine のイベントを受け取り、NDJSON セッションログへ追記する責務を担う。
|
||||||
* analytics や UI 出力は担当しない。
|
|
||||||
*/
|
*/
|
||||||
|
|
||||||
import {
|
import {
|
||||||
@ -13,14 +12,16 @@ import {
|
|||||||
type NdjsonPieceAbort,
|
type NdjsonPieceAbort,
|
||||||
type NdjsonPhaseStart,
|
type NdjsonPhaseStart,
|
||||||
type NdjsonPhaseComplete,
|
type NdjsonPhaseComplete,
|
||||||
|
type NdjsonPhaseJudgeStage,
|
||||||
type NdjsonInteractiveStart,
|
type NdjsonInteractiveStart,
|
||||||
type NdjsonInteractiveEnd,
|
type NdjsonInteractiveEnd,
|
||||||
} from '../../../infra/fs/index.js';
|
} from '../../../infra/fs/index.js';
|
||||||
import type { InteractiveMetadata } from './types.js';
|
import type { InteractiveMetadata } from './types.js';
|
||||||
import { isDebugEnabled, writePromptLog } from '../../../shared/utils/index.js';
|
import { isDebugEnabled, writePromptLog } from '../../../shared/utils/index.js';
|
||||||
import type { PromptLogRecord } from '../../../shared/utils/index.js';
|
import type { PromptLogRecord, NdjsonRecord } from '../../../shared/utils/index.js';
|
||||||
import type { PieceMovement, AgentResponse, PieceState } from '../../../core/models/index.js';
|
import type { PieceMovement, AgentResponse, PieceState } from '../../../core/models/index.js';
|
||||||
import type { PhaseName } from '../../../core/piece/index.js';
|
import type { JudgeStageEntry, PhasePromptParts } from '../../../core/piece/types.js';
|
||||||
|
import { sanitizeTextForStorage } from './traceReportRedaction.js';
|
||||||
|
|
||||||
function toJudgmentMatchMethod(
|
function toJudgmentMatchMethod(
|
||||||
matchedRuleMethod: string | undefined,
|
matchedRuleMethod: string | undefined,
|
||||||
@ -34,29 +35,30 @@ function toJudgmentMatchMethod(
|
|||||||
|
|
||||||
export class SessionLogger {
|
export class SessionLogger {
|
||||||
private readonly ndjsonLogPath: string;
|
private readonly ndjsonLogPath: string;
|
||||||
/** phase 開始時のプロンプトを一時保持(デバッグ用) */
|
private readonly allowSensitiveData: boolean;
|
||||||
private readonly phasePrompts = new Map<string, string>();
|
private readonly phasePromptsByExecutionId = new Map<string, PhasePromptParts>();
|
||||||
/** 現在のピース全体のイテレーション数 */
|
private readonly phaseExecutionCounters = new Map<string, number>();
|
||||||
|
private readonly ndjsonRecords: NdjsonRecord[] = [];
|
||||||
|
private readonly promptRecords: PromptLogRecord[] = [];
|
||||||
private currentIteration = 0;
|
private currentIteration = 0;
|
||||||
|
|
||||||
constructor(ndjsonLogPath: string) {
|
constructor(ndjsonLogPath: string, allowSensitiveData: boolean) {
|
||||||
this.ndjsonLogPath = ndjsonLogPath;
|
this.ndjsonLogPath = ndjsonLogPath;
|
||||||
|
this.allowSensitiveData = allowSensitiveData;
|
||||||
}
|
}
|
||||||
|
|
||||||
/** インタラクティブモードのメタデータ(interactive_start / interactive_end)を NDJSON へ記録する */
|
|
||||||
writeInteractiveMetadata(meta: InteractiveMetadata): void {
|
writeInteractiveMetadata(meta: InteractiveMetadata): void {
|
||||||
const startRecord: NdjsonInteractiveStart = { type: 'interactive_start', timestamp: new Date().toISOString() };
|
const startRecord: NdjsonInteractiveStart = { type: 'interactive_start', timestamp: new Date().toISOString() };
|
||||||
appendNdjsonLine(this.ndjsonLogPath, startRecord);
|
this.appendRecord(startRecord);
|
||||||
const endRecord: NdjsonInteractiveEnd = {
|
const endRecord: NdjsonInteractiveEnd = {
|
||||||
type: 'interactive_end',
|
type: 'interactive_end',
|
||||||
confirmed: meta.confirmed,
|
confirmed: meta.confirmed,
|
||||||
...(meta.task ? { task: meta.task } : {}),
|
...(meta.task ? { task: this.sanitizeText(meta.task) } : {}),
|
||||||
timestamp: new Date().toISOString(),
|
timestamp: new Date().toISOString(),
|
||||||
};
|
};
|
||||||
appendNdjsonLine(this.ndjsonLogPath, endRecord);
|
this.appendRecord(endRecord);
|
||||||
}
|
}
|
||||||
|
|
||||||
/** 現在のイテレーション番号を更新する(movement:start で呼ぶ) */
|
|
||||||
setIteration(iteration: number): void {
|
setIteration(iteration: number): void {
|
||||||
this.currentIteration = iteration;
|
this.currentIteration = iteration;
|
||||||
}
|
}
|
||||||
@ -64,75 +66,127 @@ export class SessionLogger {
|
|||||||
onPhaseStart(
|
onPhaseStart(
|
||||||
step: PieceMovement,
|
step: PieceMovement,
|
||||||
phase: 1 | 2 | 3,
|
phase: 1 | 2 | 3,
|
||||||
phaseName: PhaseName,
|
phaseName: 'execute' | 'report' | 'judge',
|
||||||
instruction: string,
|
instruction: string,
|
||||||
|
promptParts: PhasePromptParts,
|
||||||
|
phaseExecutionId?: string,
|
||||||
|
iteration?: number,
|
||||||
): void {
|
): void {
|
||||||
|
if (!instruction) {
|
||||||
|
throw new Error(`Missing phase instruction for ${step.name}:${phase}`);
|
||||||
|
}
|
||||||
|
const resolvedPhaseExecutionId = this.resolvePhaseExecutionId(step.name, phase, phaseExecutionId, iteration);
|
||||||
const record: NdjsonPhaseStart = {
|
const record: NdjsonPhaseStart = {
|
||||||
type: 'phase_start',
|
type: 'phase_start',
|
||||||
step: step.name,
|
step: step.name,
|
||||||
phase,
|
phase,
|
||||||
phaseName,
|
phaseName,
|
||||||
|
phaseExecutionId: resolvedPhaseExecutionId,
|
||||||
timestamp: new Date().toISOString(),
|
timestamp: new Date().toISOString(),
|
||||||
...(instruction ? { instruction } : {}),
|
instruction: this.sanitizeText(instruction),
|
||||||
|
systemPrompt: this.sanitizeText(promptParts.systemPrompt),
|
||||||
|
userInstruction: this.sanitizeText(promptParts.userInstruction),
|
||||||
|
...(iteration != null ? { iteration } : {}),
|
||||||
};
|
};
|
||||||
appendNdjsonLine(this.ndjsonLogPath, record);
|
this.appendRecord(record);
|
||||||
|
|
||||||
if (isDebugEnabled()) {
|
if (isDebugEnabled()) {
|
||||||
this.phasePrompts.set(`${step.name}:${phase}`, instruction);
|
this.phasePromptsByExecutionId.set(resolvedPhaseExecutionId, promptParts);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
onPhaseComplete(
|
onPhaseComplete(
|
||||||
step: PieceMovement,
|
step: PieceMovement,
|
||||||
phase: 1 | 2 | 3,
|
phase: 1 | 2 | 3,
|
||||||
phaseName: PhaseName,
|
phaseName: 'execute' | 'report' | 'judge',
|
||||||
content: string,
|
content: string,
|
||||||
phaseStatus: string,
|
phaseStatus: string,
|
||||||
phaseError: string | undefined,
|
phaseError: string | undefined,
|
||||||
|
phaseExecutionId?: string,
|
||||||
|
iteration?: number,
|
||||||
): void {
|
): void {
|
||||||
|
if (!phaseStatus) {
|
||||||
|
throw new Error(`Missing phase status for ${step.name}:${phase}`);
|
||||||
|
}
|
||||||
|
const resolvedPhaseExecutionId = this.resolveCompletionPhaseExecutionId(step.name, phase, phaseExecutionId, iteration);
|
||||||
|
const completedAt = new Date().toISOString();
|
||||||
const record: NdjsonPhaseComplete = {
|
const record: NdjsonPhaseComplete = {
|
||||||
type: 'phase_complete',
|
type: 'phase_complete',
|
||||||
step: step.name,
|
step: step.name,
|
||||||
phase,
|
phase,
|
||||||
phaseName,
|
phaseName,
|
||||||
|
phaseExecutionId: resolvedPhaseExecutionId,
|
||||||
status: phaseStatus,
|
status: phaseStatus,
|
||||||
content,
|
content: this.sanitizeText(content),
|
||||||
timestamp: new Date().toISOString(),
|
timestamp: completedAt,
|
||||||
...(phaseError ? { error: phaseError } : {}),
|
...(phaseError ? { error: this.sanitizeText(phaseError) } : {}),
|
||||||
|
...(iteration != null ? { iteration } : {}),
|
||||||
};
|
};
|
||||||
appendNdjsonLine(this.ndjsonLogPath, record);
|
this.appendRecord(record);
|
||||||
|
|
||||||
const promptKey = `${step.name}:${phase}`;
|
const prompt = this.phasePromptsByExecutionId.get(resolvedPhaseExecutionId);
|
||||||
const prompt = this.phasePrompts.get(promptKey);
|
if (isDebugEnabled()) {
|
||||||
this.phasePrompts.delete(promptKey);
|
if (!prompt) {
|
||||||
|
throw new Error(`Missing debug prompt for ${step.name}:${phase}:${resolvedPhaseExecutionId}`);
|
||||||
if (isDebugEnabled() && prompt) {
|
}
|
||||||
const promptRecord: PromptLogRecord = {
|
const promptRecord: PromptLogRecord = {
|
||||||
movement: step.name,
|
movement: step.name,
|
||||||
phase,
|
phase,
|
||||||
iteration: this.currentIteration,
|
iteration: iteration ?? this.currentIteration,
|
||||||
prompt,
|
phaseExecutionId: resolvedPhaseExecutionId,
|
||||||
response: content,
|
prompt: this.sanitizeText(prompt.userInstruction),
|
||||||
timestamp: new Date().toISOString(),
|
systemPrompt: this.sanitizeText(prompt.systemPrompt),
|
||||||
|
userInstruction: this.sanitizeText(prompt.userInstruction),
|
||||||
|
response: this.sanitizeText(content),
|
||||||
|
timestamp: completedAt,
|
||||||
};
|
};
|
||||||
writePromptLog(promptRecord);
|
writePromptLog(promptRecord);
|
||||||
|
this.promptRecords.push(promptRecord);
|
||||||
|
this.phasePromptsByExecutionId.delete(resolvedPhaseExecutionId);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
onJudgeStage(
|
||||||
|
step: PieceMovement,
|
||||||
|
phase: 3,
|
||||||
|
phaseName: 'judge',
|
||||||
|
entry: JudgeStageEntry,
|
||||||
|
phaseExecutionId?: string,
|
||||||
|
iteration?: number,
|
||||||
|
): void {
|
||||||
|
const resolvedPhaseExecutionId = this.resolveCompletionPhaseExecutionId(step.name, phase, phaseExecutionId, iteration);
|
||||||
|
const record: NdjsonPhaseJudgeStage = {
|
||||||
|
type: 'phase_judge_stage',
|
||||||
|
step: step.name,
|
||||||
|
phase,
|
||||||
|
phaseName,
|
||||||
|
phaseExecutionId: resolvedPhaseExecutionId,
|
||||||
|
stage: entry.stage,
|
||||||
|
method: entry.method,
|
||||||
|
status: entry.status,
|
||||||
|
instruction: this.sanitizeText(entry.instruction),
|
||||||
|
response: this.sanitizeText(entry.response),
|
||||||
|
timestamp: new Date().toISOString(),
|
||||||
|
...(iteration != null ? { iteration } : {}),
|
||||||
|
};
|
||||||
|
this.appendRecord(record);
|
||||||
|
}
|
||||||
|
|
||||||
onMovementStart(
|
onMovementStart(
|
||||||
step: PieceMovement,
|
step: PieceMovement,
|
||||||
iteration: number,
|
iteration: number,
|
||||||
instruction: string | undefined,
|
instruction: string | undefined,
|
||||||
): void {
|
): void {
|
||||||
|
this.currentIteration = iteration;
|
||||||
const record: NdjsonStepStart = {
|
const record: NdjsonStepStart = {
|
||||||
type: 'step_start',
|
type: 'step_start',
|
||||||
step: step.name,
|
step: step.name,
|
||||||
persona: step.personaDisplayName,
|
persona: step.personaDisplayName,
|
||||||
iteration,
|
iteration,
|
||||||
timestamp: new Date().toISOString(),
|
timestamp: new Date().toISOString(),
|
||||||
...(instruction ? { instruction } : {}),
|
...(instruction ? { instruction: this.sanitizeText(instruction) } : {}),
|
||||||
};
|
};
|
||||||
appendNdjsonLine(this.ndjsonLogPath, record);
|
this.appendRecord(record);
|
||||||
}
|
}
|
||||||
|
|
||||||
onMovementComplete(
|
onMovementComplete(
|
||||||
@ -146,15 +200,15 @@ export class SessionLogger {
|
|||||||
step: step.name,
|
step: step.name,
|
||||||
persona: response.persona,
|
persona: response.persona,
|
||||||
status: response.status,
|
status: response.status,
|
||||||
content: response.content,
|
content: this.sanitizeText(response.content),
|
||||||
instruction,
|
instruction: this.sanitizeText(instruction),
|
||||||
...(response.matchedRuleIndex != null ? { matchedRuleIndex: response.matchedRuleIndex } : {}),
|
...(response.matchedRuleIndex != null ? { matchedRuleIndex: response.matchedRuleIndex } : {}),
|
||||||
...(response.matchedRuleMethod ? { matchedRuleMethod: response.matchedRuleMethod } : {}),
|
...(response.matchedRuleMethod ? { matchedRuleMethod: response.matchedRuleMethod } : {}),
|
||||||
...(matchMethod ? { matchMethod } : {}),
|
...(matchMethod ? { matchMethod } : {}),
|
||||||
...(response.error ? { error: response.error } : {}),
|
...(response.error ? { error: this.sanitizeText(response.error) } : {}),
|
||||||
timestamp: response.timestamp.toISOString(),
|
timestamp: response.timestamp.toISOString(),
|
||||||
};
|
};
|
||||||
appendNdjsonLine(this.ndjsonLogPath, record);
|
this.appendRecord(record);
|
||||||
}
|
}
|
||||||
|
|
||||||
onPieceComplete(state: PieceState): void {
|
onPieceComplete(state: PieceState): void {
|
||||||
@ -163,16 +217,73 @@ export class SessionLogger {
|
|||||||
iterations: state.iteration,
|
iterations: state.iteration,
|
||||||
endTime: new Date().toISOString(),
|
endTime: new Date().toISOString(),
|
||||||
};
|
};
|
||||||
appendNdjsonLine(this.ndjsonLogPath, record);
|
this.appendRecord(record);
|
||||||
}
|
}
|
||||||
|
|
||||||
onPieceAbort(state: PieceState, reason: string): void {
|
onPieceAbort(state: PieceState, reason: string): void {
|
||||||
const record: NdjsonPieceAbort = {
|
const record: NdjsonPieceAbort = {
|
||||||
type: 'piece_abort',
|
type: 'piece_abort',
|
||||||
iterations: state.iteration,
|
iterations: state.iteration,
|
||||||
reason,
|
reason: this.sanitizeText(reason),
|
||||||
endTime: new Date().toISOString(),
|
endTime: new Date().toISOString(),
|
||||||
};
|
};
|
||||||
|
this.appendRecord(record);
|
||||||
|
}
|
||||||
|
|
||||||
|
getNdjsonRecords(): NdjsonRecord[] {
|
||||||
|
return [...this.ndjsonRecords];
|
||||||
|
}
|
||||||
|
|
||||||
|
getPromptRecords(): PromptLogRecord[] {
|
||||||
|
return [...this.promptRecords];
|
||||||
|
}
|
||||||
|
|
||||||
|
private buildPhaseKey(stepName: string, phase: 1 | 2 | 3, iteration?: number): string {
|
||||||
|
if (iteration == null) {
|
||||||
|
return `${stepName}:${phase}`;
|
||||||
|
}
|
||||||
|
return `${stepName}:${iteration}:${phase}`;
|
||||||
|
}
|
||||||
|
|
||||||
|
private resolvePhaseExecutionId(
|
||||||
|
stepName: string,
|
||||||
|
phase: 1 | 2 | 3,
|
||||||
|
phaseExecutionId: string | undefined,
|
||||||
|
iteration?: number,
|
||||||
|
): string {
|
||||||
|
if (phaseExecutionId) {
|
||||||
|
return phaseExecutionId;
|
||||||
|
}
|
||||||
|
const key = this.buildPhaseKey(stepName, phase, iteration);
|
||||||
|
const current = this.phaseExecutionCounters.get(key) ?? 0;
|
||||||
|
const next = current + 1;
|
||||||
|
this.phaseExecutionCounters.set(key, next);
|
||||||
|
return `${key}:${next}`;
|
||||||
|
}
|
||||||
|
|
||||||
|
private resolveCompletionPhaseExecutionId(
|
||||||
|
stepName: string,
|
||||||
|
phase: 1 | 2 | 3,
|
||||||
|
phaseExecutionId: string | undefined,
|
||||||
|
iteration?: number,
|
||||||
|
): string {
|
||||||
|
if (phaseExecutionId) {
|
||||||
|
return phaseExecutionId;
|
||||||
|
}
|
||||||
|
const key = this.buildPhaseKey(stepName, phase, iteration);
|
||||||
|
const current = this.phaseExecutionCounters.get(key);
|
||||||
|
if (current == null) {
|
||||||
|
throw new Error(`Missing phase execution id on completion for ${stepName}:${phase}`);
|
||||||
|
}
|
||||||
|
return `${key}:${current}`;
|
||||||
|
}
|
||||||
|
|
||||||
|
private appendRecord(record: NdjsonRecord): void {
|
||||||
|
this.ndjsonRecords.push(record);
|
||||||
appendNdjsonLine(this.ndjsonLogPath, record);
|
appendNdjsonLine(this.ndjsonLogPath, record);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private sanitizeText(text: string): string {
|
||||||
|
return sanitizeTextForStorage(text, this.allowSensitiveData);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
55
src/features/tasks/execute/traceReport.ts
Normal file
55
src/features/tasks/execute/traceReport.ts
Normal file
@ -0,0 +1,55 @@
|
|||||||
|
import type { NdjsonRecord, PromptLogRecord } from '../../../shared/utils/index.js';
|
||||||
|
import type {
|
||||||
|
TraceReportMode,
|
||||||
|
TraceReportParams,
|
||||||
|
TraceMovement,
|
||||||
|
TracePhase,
|
||||||
|
} from './traceReportTypes.js';
|
||||||
|
import { parseJsonl, buildTraceFromRecords, type PromptRecord } from './traceReportParser.js';
|
||||||
|
import { cloneMovementsForMode, sanitizeTraceParamsForMode } from './traceReportRedaction.js';
|
||||||
|
import { assertTraceParams, renderTraceReportMarkdown } from './traceReportRenderer.js';
|
||||||
|
|
||||||
|
export type {
|
||||||
|
TraceReportMode,
|
||||||
|
TraceReportParams,
|
||||||
|
TraceMovement,
|
||||||
|
TracePhase,
|
||||||
|
};
|
||||||
|
|
||||||
|
export { assertTraceParams, renderTraceReportMarkdown };
|
||||||
|
|
||||||
|
export function renderTraceReportFromLogs(
|
||||||
|
params: TraceReportParams,
|
||||||
|
ndjsonLogPath: string,
|
||||||
|
promptLogPath: string | undefined,
|
||||||
|
mode: TraceReportMode,
|
||||||
|
): string | undefined {
|
||||||
|
if (mode === 'off') {
|
||||||
|
return undefined;
|
||||||
|
}
|
||||||
|
const records = parseJsonl<NdjsonRecord>(ndjsonLogPath);
|
||||||
|
if (records.length === 0) {
|
||||||
|
throw new Error(`No session records found for trace report: ${ndjsonLogPath}`);
|
||||||
|
}
|
||||||
|
const promptRecords = promptLogPath ? parseJsonl<PromptRecord>(promptLogPath) : [];
|
||||||
|
return renderTraceReportFromRecords(params, records, promptRecords, mode);
|
||||||
|
}
|
||||||
|
|
||||||
|
export function renderTraceReportFromRecords(
|
||||||
|
params: TraceReportParams,
|
||||||
|
records: NdjsonRecord[],
|
||||||
|
promptRecords: PromptRecord[] | PromptLogRecord[],
|
||||||
|
mode: TraceReportMode,
|
||||||
|
): string | undefined {
|
||||||
|
if (mode === 'off') {
|
||||||
|
return undefined;
|
||||||
|
}
|
||||||
|
if (records.length === 0) {
|
||||||
|
throw new Error('No session records found for trace report from records');
|
||||||
|
}
|
||||||
|
|
||||||
|
const trace = buildTraceFromRecords(records, promptRecords as PromptRecord[], params.endTime);
|
||||||
|
const paramsForMode = sanitizeTraceParamsForMode(params, mode);
|
||||||
|
const movementsForMode = cloneMovementsForMode(trace.movements, mode);
|
||||||
|
return renderTraceReportMarkdown(paramsForMode, trace.traceStartedAt, movementsForMode);
|
||||||
|
}
|
||||||
260
src/features/tasks/execute/traceReportParser.ts
Normal file
260
src/features/tasks/execute/traceReportParser.ts
Normal file
@ -0,0 +1,260 @@
|
|||||||
|
import { existsSync, readFileSync } from 'node:fs';
|
||||||
|
import type { NdjsonRecord, PromptLogRecord } from '../../../shared/utils/index.js';
|
||||||
|
import {
|
||||||
|
buildPhaseExecutionId,
|
||||||
|
parsePhaseExecutionId,
|
||||||
|
} from '../../../shared/utils/phaseExecutionId.js';
|
||||||
|
import type {
|
||||||
|
TraceMovement,
|
||||||
|
TracePhase,
|
||||||
|
} from './traceReportTypes.js';
|
||||||
|
|
||||||
|
interface PromptRecord extends PromptLogRecord {
|
||||||
|
timestamp: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
interface BuildTraceResult {
|
||||||
|
traceStartedAt: string;
|
||||||
|
movements: TraceMovement[];
|
||||||
|
}
|
||||||
|
|
||||||
|
export function parseJsonl<T>(path: string): T[] {
|
||||||
|
if (!existsSync(path)) {
|
||||||
|
return [];
|
||||||
|
}
|
||||||
|
const lines = readFileSync(path, 'utf-8')
|
||||||
|
.split('\n')
|
||||||
|
.map((line) => line.trim())
|
||||||
|
.filter((line) => line.length > 0);
|
||||||
|
return lines.map((line) => JSON.parse(line) as T);
|
||||||
|
}
|
||||||
|
|
||||||
|
function movementKey(step: string, iteration: number): string {
|
||||||
|
return `${step}:${iteration}`;
|
||||||
|
}
|
||||||
|
|
||||||
|
function createPhaseExecutionId(
|
||||||
|
step: string,
|
||||||
|
iteration: number,
|
||||||
|
phase: 1 | 2 | 3,
|
||||||
|
counters: Map<string, number>,
|
||||||
|
): string {
|
||||||
|
const key = `${step}:${iteration}:${phase}`;
|
||||||
|
const current = counters.get(key) ?? 0;
|
||||||
|
const next = current + 1;
|
||||||
|
counters.set(key, next);
|
||||||
|
return buildPhaseExecutionId({
|
||||||
|
step,
|
||||||
|
iteration,
|
||||||
|
phase,
|
||||||
|
sequence: next,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
function parsePhaseExecutionKey(
|
||||||
|
phaseExecutionId: string,
|
||||||
|
): { step: string; iteration: number } | undefined {
|
||||||
|
const parsed = parsePhaseExecutionId(phaseExecutionId);
|
||||||
|
if (!parsed) {
|
||||||
|
return undefined;
|
||||||
|
}
|
||||||
|
return { step: parsed.step, iteration: parsed.iteration };
|
||||||
|
}
|
||||||
|
|
||||||
|
function ensureMovement(
|
||||||
|
movementsByKey: Map<string, TraceMovement>,
|
||||||
|
step: string,
|
||||||
|
iteration: number,
|
||||||
|
timestamp: string,
|
||||||
|
fallbackPersona: string,
|
||||||
|
): TraceMovement {
|
||||||
|
const key = movementKey(step, iteration);
|
||||||
|
const existing = movementsByKey.get(key);
|
||||||
|
if (existing) {
|
||||||
|
return existing;
|
||||||
|
}
|
||||||
|
const movement: TraceMovement = {
|
||||||
|
step,
|
||||||
|
persona: fallbackPersona,
|
||||||
|
iteration,
|
||||||
|
startedAt: timestamp,
|
||||||
|
phases: [],
|
||||||
|
};
|
||||||
|
movementsByKey.set(key, movement);
|
||||||
|
return movement;
|
||||||
|
}
|
||||||
|
|
||||||
|
export function buildTraceFromRecords(
|
||||||
|
records: NdjsonRecord[],
|
||||||
|
promptRecords: PromptRecord[],
|
||||||
|
defaultEndTime: string,
|
||||||
|
): BuildTraceResult {
|
||||||
|
const promptByExecutionId = new Map<string, PromptRecord>();
|
||||||
|
for (const prompt of promptRecords) {
|
||||||
|
if (prompt.phaseExecutionId) {
|
||||||
|
promptByExecutionId.set(prompt.phaseExecutionId, prompt);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const movementsByKey = new Map<string, TraceMovement>();
|
||||||
|
const phasesByExecutionId = new Map<string, { movement: TraceMovement; index: number }>();
|
||||||
|
const phaseExecutionCounters = new Map<string, number>();
|
||||||
|
const latestIterationByStep = new Map<string, number>();
|
||||||
|
|
||||||
|
let traceStartedAt = '';
|
||||||
|
|
||||||
|
for (const record of records) {
|
||||||
|
if (!traceStartedAt && record.type === 'piece_start') {
|
||||||
|
traceStartedAt = record.startTime;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (record.type === 'step_start') {
|
||||||
|
latestIterationByStep.set(record.step, record.iteration);
|
||||||
|
const movement = ensureMovement(
|
||||||
|
movementsByKey,
|
||||||
|
record.step,
|
||||||
|
record.iteration,
|
||||||
|
record.timestamp,
|
||||||
|
record.persona,
|
||||||
|
);
|
||||||
|
movement.persona = record.persona;
|
||||||
|
movement.instruction = record.instruction;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (record.type === 'step_complete') {
|
||||||
|
const iteration = latestIterationByStep.get(record.step);
|
||||||
|
if (iteration == null) {
|
||||||
|
throw new Error(`Missing iteration for step_complete: ${record.step}`);
|
||||||
|
}
|
||||||
|
const movement = ensureMovement(
|
||||||
|
movementsByKey,
|
||||||
|
record.step,
|
||||||
|
iteration,
|
||||||
|
record.timestamp,
|
||||||
|
record.persona,
|
||||||
|
);
|
||||||
|
movement.completedAt = record.timestamp;
|
||||||
|
movement.result = {
|
||||||
|
status: record.status,
|
||||||
|
content: record.content,
|
||||||
|
error: record.error,
|
||||||
|
matchedRuleIndex: record.matchedRuleIndex,
|
||||||
|
matchedRuleMethod: record.matchedRuleMethod,
|
||||||
|
matchMethod: record.matchMethod,
|
||||||
|
};
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (record.type === 'phase_start') {
|
||||||
|
const iteration = record.iteration ?? latestIterationByStep.get(record.step);
|
||||||
|
if (iteration == null) {
|
||||||
|
throw new Error(`Missing iteration for phase_start: ${record.step}:${record.phase}`);
|
||||||
|
}
|
||||||
|
const movement = ensureMovement(
|
||||||
|
movementsByKey,
|
||||||
|
record.step,
|
||||||
|
iteration,
|
||||||
|
record.timestamp,
|
||||||
|
record.step,
|
||||||
|
);
|
||||||
|
const resolvedExecutionId =
|
||||||
|
record.phaseExecutionId
|
||||||
|
?? createPhaseExecutionId(record.step, iteration, record.phase, phaseExecutionCounters);
|
||||||
|
const prompt = promptByExecutionId.get(resolvedExecutionId);
|
||||||
|
const phase: TracePhase = {
|
||||||
|
phaseExecutionId: resolvedExecutionId,
|
||||||
|
phase: record.phase,
|
||||||
|
phaseName: record.phaseName,
|
||||||
|
instruction: record.instruction ?? record.userInstruction ?? prompt?.userInstruction ?? '',
|
||||||
|
systemPrompt: record.systemPrompt ?? prompt?.systemPrompt ?? '',
|
||||||
|
userInstruction: record.userInstruction ?? prompt?.userInstruction ?? record.instruction ?? '',
|
||||||
|
startedAt: record.timestamp,
|
||||||
|
};
|
||||||
|
movement.phases.push(phase);
|
||||||
|
phasesByExecutionId.set(resolvedExecutionId, {
|
||||||
|
movement,
|
||||||
|
index: movement.phases.length - 1,
|
||||||
|
});
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (record.type === 'phase_complete') {
|
||||||
|
const iterationFromId = record.phaseExecutionId
|
||||||
|
? parsePhaseExecutionKey(record.phaseExecutionId)?.iteration
|
||||||
|
: undefined;
|
||||||
|
const iteration =
|
||||||
|
record.iteration
|
||||||
|
?? iterationFromId
|
||||||
|
?? latestIterationByStep.get(record.step);
|
||||||
|
if (iteration == null) {
|
||||||
|
throw new Error(`Missing iteration for phase_complete: ${record.step}:${record.phase}`);
|
||||||
|
}
|
||||||
|
const resolvedExecutionId =
|
||||||
|
record.phaseExecutionId
|
||||||
|
?? createPhaseExecutionId(record.step, iteration, record.phase, phaseExecutionCounters);
|
||||||
|
const phaseRef = phasesByExecutionId.get(resolvedExecutionId);
|
||||||
|
if (!phaseRef) {
|
||||||
|
throw new Error(`Missing phase_start before phase_complete: ${resolvedExecutionId}`);
|
||||||
|
}
|
||||||
|
const existing = phaseRef.movement.phases[phaseRef.index];
|
||||||
|
if (!existing) {
|
||||||
|
throw new Error(`Missing phase state for completion: ${resolvedExecutionId}`);
|
||||||
|
}
|
||||||
|
const prompt = promptByExecutionId.get(resolvedExecutionId);
|
||||||
|
phaseRef.movement.phases[phaseRef.index] = {
|
||||||
|
...existing,
|
||||||
|
instruction: existing.instruction || prompt?.userInstruction || '',
|
||||||
|
systemPrompt: prompt?.systemPrompt ?? existing.systemPrompt,
|
||||||
|
userInstruction: prompt?.userInstruction ?? existing.userInstruction,
|
||||||
|
response: record.content,
|
||||||
|
status: record.status,
|
||||||
|
error: record.error,
|
||||||
|
completedAt: record.timestamp,
|
||||||
|
};
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (record.type === 'phase_judge_stage') {
|
||||||
|
const phaseRef = record.phaseExecutionId
|
||||||
|
? phasesByExecutionId.get(record.phaseExecutionId)
|
||||||
|
: undefined;
|
||||||
|
if (!phaseRef) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
const existing = phaseRef.movement.phases[phaseRef.index];
|
||||||
|
if (!existing) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
phaseRef.movement.phases[phaseRef.index] = {
|
||||||
|
...existing,
|
||||||
|
judgeStages: [
|
||||||
|
...(existing.judgeStages ?? []),
|
||||||
|
{
|
||||||
|
stage: record.stage,
|
||||||
|
method: record.method,
|
||||||
|
status: record.status,
|
||||||
|
instruction: record.instruction,
|
||||||
|
response: record.response,
|
||||||
|
},
|
||||||
|
],
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const movements = [...movementsByKey.values()].sort((a, b) => {
|
||||||
|
const byStart = a.startedAt.localeCompare(b.startedAt);
|
||||||
|
if (byStart !== 0) {
|
||||||
|
return byStart;
|
||||||
|
}
|
||||||
|
return a.iteration - b.iteration;
|
||||||
|
});
|
||||||
|
|
||||||
|
return {
|
||||||
|
traceStartedAt: traceStartedAt || defaultEndTime,
|
||||||
|
movements,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
export type { PromptRecord };
|
||||||
81
src/features/tasks/execute/traceReportRedaction.ts
Normal file
81
src/features/tasks/execute/traceReportRedaction.ts
Normal file
@ -0,0 +1,81 @@
|
|||||||
|
import type {
|
||||||
|
TraceMovement,
|
||||||
|
TraceReportMode,
|
||||||
|
TraceReportParams,
|
||||||
|
} from './traceReportTypes.js';
|
||||||
|
|
||||||
|
export function sanitizeSensitiveText(text: string): string {
|
||||||
|
if (!text) return text;
|
||||||
|
return text
|
||||||
|
.replace(/(Authorization\s*:\s*Bearer\s+)([^\s]+)/gi, '$1[REDACTED]')
|
||||||
|
.replace(
|
||||||
|
/(["']?(?:api[_-]?key|token|password|secret|access[_-]?token|refresh[_-]?token)["']?\s*[:=]\s*["']?)([^"',\s}\]]+)(["']?)/gi,
|
||||||
|
'$1[REDACTED]$3',
|
||||||
|
)
|
||||||
|
.replace(/([?&](?:api[_-]?key|token|password|secret)=)([^&\s]+)/gi, '$1[REDACTED]')
|
||||||
|
.replace(/\b(?:sk-[A-Za-z0-9]{8,}|ghp_[A-Za-z0-9]{8,}|xox[baprs]-[A-Za-z0-9-]{8,})\b/g, '[REDACTED]');
|
||||||
|
}
|
||||||
|
|
||||||
|
function transformText(text: string, mode: TraceReportMode): string {
|
||||||
|
if (!text) {
|
||||||
|
return text;
|
||||||
|
}
|
||||||
|
if (mode === 'full') {
|
||||||
|
return text;
|
||||||
|
}
|
||||||
|
return sanitizeSensitiveText(text);
|
||||||
|
}
|
||||||
|
|
||||||
|
export function cloneMovementsForMode(
|
||||||
|
movements: TraceMovement[],
|
||||||
|
mode: TraceReportMode,
|
||||||
|
): TraceMovement[] {
|
||||||
|
return movements.map((movement) => ({
|
||||||
|
...movement,
|
||||||
|
instruction: movement.instruction == null ? undefined : transformText(movement.instruction, mode),
|
||||||
|
result: movement.result
|
||||||
|
? {
|
||||||
|
...movement.result,
|
||||||
|
content: transformText(movement.result.content, mode),
|
||||||
|
...(movement.result.error ? { error: transformText(movement.result.error, mode) } : {}),
|
||||||
|
}
|
||||||
|
: undefined,
|
||||||
|
phases: movement.phases.map((phase) => ({
|
||||||
|
...phase,
|
||||||
|
instruction: transformText(phase.instruction, mode),
|
||||||
|
systemPrompt: transformText(phase.systemPrompt, mode),
|
||||||
|
userInstruction: transformText(phase.userInstruction, mode),
|
||||||
|
response: phase.response == null ? undefined : transformText(phase.response, mode),
|
||||||
|
error: phase.error == null ? undefined : transformText(phase.error, mode),
|
||||||
|
judgeStages: phase.judgeStages?.map((stage) => ({
|
||||||
|
...stage,
|
||||||
|
instruction: transformText(stage.instruction, mode),
|
||||||
|
response: transformText(stage.response, mode),
|
||||||
|
})),
|
||||||
|
})),
|
||||||
|
}));
|
||||||
|
}
|
||||||
|
|
||||||
|
export function sanitizeTraceParamsForMode(
|
||||||
|
params: TraceReportParams,
|
||||||
|
mode: TraceReportMode,
|
||||||
|
): TraceReportParams {
|
||||||
|
if (mode === 'full') {
|
||||||
|
return params;
|
||||||
|
}
|
||||||
|
return {
|
||||||
|
...params,
|
||||||
|
task: sanitizeSensitiveText(params.task),
|
||||||
|
...(params.reason ? { reason: sanitizeSensitiveText(params.reason) } : {}),
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
export function sanitizeTextForStorage(text: string, allowFullText: boolean): string {
|
||||||
|
if (!text) {
|
||||||
|
return text;
|
||||||
|
}
|
||||||
|
if (allowFullText) {
|
||||||
|
return text;
|
||||||
|
}
|
||||||
|
return sanitizeSensitiveText(text);
|
||||||
|
}
|
||||||
297
src/features/tasks/execute/traceReportRenderer.ts
Normal file
297
src/features/tasks/execute/traceReportRenderer.ts
Normal file
@ -0,0 +1,297 @@
|
|||||||
|
import type {
|
||||||
|
TraceMovement,
|
||||||
|
TracePhase,
|
||||||
|
TraceReportParams,
|
||||||
|
} from './traceReportTypes.js';
|
||||||
|
|
||||||
|
interface MovementBlock {
|
||||||
|
kind: 'movement';
|
||||||
|
movement: TraceMovement;
|
||||||
|
}
|
||||||
|
|
||||||
|
interface LoopBlock {
|
||||||
|
kind: 'loop';
|
||||||
|
movements: TraceMovement[];
|
||||||
|
}
|
||||||
|
|
||||||
|
type RenderBlock = MovementBlock | LoopBlock;
|
||||||
|
|
||||||
|
export function assertTraceParams(params: TraceReportParams): void {
|
||||||
|
if (!params.tracePath) throw new Error('tracePath is required');
|
||||||
|
if (!params.pieceName) throw new Error('pieceName is required');
|
||||||
|
if (!params.task) throw new Error('task is required');
|
||||||
|
if (!params.runSlug) throw new Error('runSlug is required');
|
||||||
|
if (!params.endTime) throw new Error('endTime is required');
|
||||||
|
if (!Number.isInteger(params.iterations) || params.iterations < 0) {
|
||||||
|
throw new Error(`iterations must be a non-negative integer: ${params.iterations}`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function assertTraceMovement(movement: TraceMovement, index: number): void {
|
||||||
|
if (!movement.step) throw new Error(`trace movement[${index}] missing step`);
|
||||||
|
if (!movement.persona) throw new Error(`trace movement[${index}] missing persona`);
|
||||||
|
if (!Number.isInteger(movement.iteration) || movement.iteration <= 0) {
|
||||||
|
throw new Error(`trace movement[${index}] has invalid iteration: ${movement.iteration}`);
|
||||||
|
}
|
||||||
|
if (!movement.startedAt) throw new Error(`trace movement[${index}] missing startedAt`);
|
||||||
|
}
|
||||||
|
|
||||||
|
function hasPhaseError(phase: TracePhase): boolean {
|
||||||
|
if (phase.status === 'error' || Boolean(phase.error)) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
return (phase.judgeStages ?? []).some((stage) => stage.status === 'error');
|
||||||
|
}
|
||||||
|
|
||||||
|
function movementMarker(
|
||||||
|
movement: TraceMovement,
|
||||||
|
runStatus: TraceReportParams['status'],
|
||||||
|
isLastMovement: boolean,
|
||||||
|
): string {
|
||||||
|
if (movement.result?.status === 'error' || movement.result?.error) {
|
||||||
|
return '❌';
|
||||||
|
}
|
||||||
|
if (runStatus === 'aborted' && !movement.result && isLastMovement) {
|
||||||
|
return '❌';
|
||||||
|
}
|
||||||
|
if (movement.phases.some(hasPhaseError)) {
|
||||||
|
return '⚠️';
|
||||||
|
}
|
||||||
|
return '';
|
||||||
|
}
|
||||||
|
|
||||||
|
function renderPhaseSection(
|
||||||
|
phase: TracePhase,
|
||||||
|
runStatus: TraceReportParams['status'],
|
||||||
|
): string[] {
|
||||||
|
if (!phase.instruction) {
|
||||||
|
throw new Error(`phase ${phase.phase} (${phase.phaseName}) missing instruction`);
|
||||||
|
}
|
||||||
|
if (!phase.status && runStatus === 'completed') {
|
||||||
|
throw new Error(`phase ${phase.phase} (${phase.phaseName}) missing status`);
|
||||||
|
}
|
||||||
|
if (!phase.completedAt && runStatus === 'completed') {
|
||||||
|
throw new Error(`phase ${phase.phase} (${phase.phaseName}) missing completedAt`);
|
||||||
|
}
|
||||||
|
|
||||||
|
const marker = hasPhaseError(phase) ? ' ⚠️' : '';
|
||||||
|
const lines: string[] = [
|
||||||
|
`### Phase ${phase.phase}: ${phase.phaseName}${marker}`,
|
||||||
|
'',
|
||||||
|
`- Started: ${phase.startedAt}`,
|
||||||
|
...(phase.completedAt ? [`- Completed: ${phase.completedAt}`] : []),
|
||||||
|
`- System Prompt: ${phase.systemPrompt.length} chars`,
|
||||||
|
'<details><summary>System Prompt</summary>',
|
||||||
|
'',
|
||||||
|
phase.systemPrompt,
|
||||||
|
'',
|
||||||
|
'</details>',
|
||||||
|
'',
|
||||||
|
`- User Instruction: ${phase.userInstruction.length} chars`,
|
||||||
|
'<details><summary>User Instruction</summary>',
|
||||||
|
'',
|
||||||
|
phase.userInstruction,
|
||||||
|
'',
|
||||||
|
'</details>',
|
||||||
|
];
|
||||||
|
|
||||||
|
if (phase.response != null) {
|
||||||
|
lines.push(
|
||||||
|
'',
|
||||||
|
`- Response: ${phase.response.length} chars`,
|
||||||
|
'<details><summary>Response</summary>',
|
||||||
|
'',
|
||||||
|
phase.response,
|
||||||
|
'',
|
||||||
|
'</details>',
|
||||||
|
);
|
||||||
|
}
|
||||||
|
lines.push('', `- Status: ${phase.status ?? 'in_progress'}`);
|
||||||
|
if (phase.error) {
|
||||||
|
lines.push(`- Error: ${phase.error}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (phase.phase === 3 && phase.judgeStages && phase.judgeStages.length > 0) {
|
||||||
|
lines.push('', '#### Judgment Stages', '');
|
||||||
|
for (const stage of phase.judgeStages) {
|
||||||
|
const stageMarker = stage.status === 'error' ? ' ⚠️' : '';
|
||||||
|
lines.push(
|
||||||
|
`- Stage ${stage.stage} (${stage.method})${stageMarker}: status=${stage.status}, instruction=${stage.instruction.length} chars, response=${stage.response.length} chars`,
|
||||||
|
);
|
||||||
|
lines.push('<details><summary>Stage Instruction</summary>', '', stage.instruction, '', '</details>', '');
|
||||||
|
lines.push('<details><summary>Stage Response</summary>', '', stage.response, '', '</details>', '');
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
lines.push('');
|
||||||
|
return lines;
|
||||||
|
}
|
||||||
|
|
||||||
|
function renderMovementSection(
|
||||||
|
movement: TraceMovement,
|
||||||
|
params: TraceReportParams,
|
||||||
|
isLastMovement: boolean,
|
||||||
|
): string[] {
|
||||||
|
const marker = movementMarker(movement, params.status, isLastMovement);
|
||||||
|
const markerSuffix = marker ? ` ${marker}` : '';
|
||||||
|
const lines: string[] = [
|
||||||
|
`## Iteration ${movement.iteration}: ${movement.step} (persona: ${movement.persona})${markerSuffix} - ${movement.startedAt}`,
|
||||||
|
'',
|
||||||
|
];
|
||||||
|
|
||||||
|
if (movement.instruction) {
|
||||||
|
lines.push(
|
||||||
|
`- Movement Instruction: ${movement.instruction.length} chars`,
|
||||||
|
'<details><summary>Instruction</summary>',
|
||||||
|
'',
|
||||||
|
movement.instruction,
|
||||||
|
'',
|
||||||
|
'</details>',
|
||||||
|
'',
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
const phases = [...movement.phases].sort((a, b) => {
|
||||||
|
const byStart = a.startedAt.localeCompare(b.startedAt);
|
||||||
|
if (byStart !== 0) {
|
||||||
|
return byStart;
|
||||||
|
}
|
||||||
|
return a.phase - b.phase;
|
||||||
|
});
|
||||||
|
|
||||||
|
for (const phase of phases) {
|
||||||
|
lines.push(...renderPhaseSection(phase, params.status));
|
||||||
|
}
|
||||||
|
|
||||||
|
if (movement.result) {
|
||||||
|
lines.push(
|
||||||
|
`- Movement Status: ${movement.result.status}`,
|
||||||
|
`- Movement Response: ${movement.result.content.length} chars`,
|
||||||
|
);
|
||||||
|
if (movement.result.matchMethod) {
|
||||||
|
lines.push(`- Match Method: ${movement.result.matchMethod}`);
|
||||||
|
}
|
||||||
|
if (movement.result.matchedRuleIndex != null) {
|
||||||
|
lines.push(`- Matched Rule Index: ${movement.result.matchedRuleIndex}`);
|
||||||
|
}
|
||||||
|
if (movement.result.error) {
|
||||||
|
lines.push(`- Error: ${movement.result.error}`);
|
||||||
|
}
|
||||||
|
lines.push('<details><summary>Movement Response</summary>', '', movement.result.content, '', '</details>');
|
||||||
|
} else {
|
||||||
|
lines.push(`- Movement Status: ${movement.completedAt ? 'aborted' : 'in_progress'}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
lines.push('', '---', '');
|
||||||
|
return lines;
|
||||||
|
}
|
||||||
|
|
||||||
|
function buildRenderBlocks(sorted: TraceMovement[]): RenderBlock[] {
|
||||||
|
const blocks: RenderBlock[] = [];
|
||||||
|
let index = 0;
|
||||||
|
while (index < sorted.length) {
|
||||||
|
if (index + 3 < sorted.length) {
|
||||||
|
const first = sorted[index]!;
|
||||||
|
const second = sorted[index + 1]!;
|
||||||
|
const third = sorted[index + 2]!;
|
||||||
|
const fourth = sorted[index + 3]!;
|
||||||
|
const isAlternatingLoop =
|
||||||
|
first.step !== second.step
|
||||||
|
&& first.step === third.step
|
||||||
|
&& second.step === fourth.step;
|
||||||
|
if (isAlternatingLoop) {
|
||||||
|
const a = first.step;
|
||||||
|
const b = second.step;
|
||||||
|
let end = index + 4;
|
||||||
|
while (end < sorted.length) {
|
||||||
|
const expected = (end - index) % 2 === 0 ? a : b;
|
||||||
|
if (sorted[end]!.step !== expected) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
end += 1;
|
||||||
|
}
|
||||||
|
blocks.push({
|
||||||
|
kind: 'loop',
|
||||||
|
movements: sorted.slice(index, end),
|
||||||
|
});
|
||||||
|
index = end;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
blocks.push({ kind: 'movement', movement: sorted[index]! });
|
||||||
|
index += 1;
|
||||||
|
}
|
||||||
|
return blocks;
|
||||||
|
}
|
||||||
|
|
||||||
|
function renderLoopBlock(block: LoopBlock, params: TraceReportParams): string[] {
|
||||||
|
const first = block.movements[0]!;
|
||||||
|
const second = block.movements[1]!;
|
||||||
|
const last = block.movements[block.movements.length - 1]!;
|
||||||
|
const cycleCount = Math.floor(block.movements.length / 2);
|
||||||
|
const lines: string[] = [
|
||||||
|
`## Iteration ${first.iteration}-${last.iteration}: ${first.step} ↔ ${second.step} loop (${cycleCount} cycles) ⚠️`,
|
||||||
|
'',
|
||||||
|
`<details><summary>Loop details (${block.movements.length} movements)</summary>`,
|
||||||
|
'',
|
||||||
|
];
|
||||||
|
|
||||||
|
block.movements.forEach((movement, movementIndex) => {
|
||||||
|
const movementLines = renderMovementSection(
|
||||||
|
movement,
|
||||||
|
params,
|
||||||
|
movementIndex === block.movements.length - 1,
|
||||||
|
);
|
||||||
|
lines.push(...movementLines.map((line) => (line ? ` ${line}` : line)));
|
||||||
|
});
|
||||||
|
|
||||||
|
lines.push('</details>', '', '---', '');
|
||||||
|
return lines;
|
||||||
|
}
|
||||||
|
|
||||||
|
export function renderTraceReportMarkdown(
|
||||||
|
params: TraceReportParams,
|
||||||
|
traceStartedAt: string,
|
||||||
|
movements: TraceMovement[],
|
||||||
|
): string {
|
||||||
|
assertTraceParams(params);
|
||||||
|
if (!traceStartedAt) {
|
||||||
|
throw new Error('traceStartedAt is required');
|
||||||
|
}
|
||||||
|
|
||||||
|
const statusLabel = params.status === 'completed' ? '✅ completed' : '❌ aborted';
|
||||||
|
const lines: string[] = [
|
||||||
|
`# Execution Trace: ${params.pieceName}`,
|
||||||
|
'',
|
||||||
|
`- Task: ${params.task}`,
|
||||||
|
`- Run: ${params.runSlug}`,
|
||||||
|
`- Started: ${traceStartedAt}`,
|
||||||
|
`- Ended: ${params.endTime}`,
|
||||||
|
`- Status: ${statusLabel}`,
|
||||||
|
`- Iterations: ${params.iterations}`,
|
||||||
|
...(params.reason ? [`- Reason: ${params.reason}`] : []),
|
||||||
|
'',
|
||||||
|
'---',
|
||||||
|
'',
|
||||||
|
];
|
||||||
|
|
||||||
|
const sorted = [...movements].sort((a, b) => {
|
||||||
|
const byStart = a.startedAt.localeCompare(b.startedAt);
|
||||||
|
if (byStart !== 0) {
|
||||||
|
return byStart;
|
||||||
|
}
|
||||||
|
return a.iteration - b.iteration;
|
||||||
|
});
|
||||||
|
sorted.forEach((movement, index) => assertTraceMovement(movement, index));
|
||||||
|
|
||||||
|
const blocks = buildRenderBlocks(sorted);
|
||||||
|
blocks.forEach((block, blockIndex) => {
|
||||||
|
if (block.kind === 'loop') {
|
||||||
|
lines.push(...renderLoopBlock(block, params));
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
lines.push(...renderMovementSection(block.movement, params, blockIndex === blocks.length - 1));
|
||||||
|
});
|
||||||
|
|
||||||
|
return lines.join('\n');
|
||||||
|
}
|
||||||
48
src/features/tasks/execute/traceReportTypes.ts
Normal file
48
src/features/tasks/execute/traceReportTypes.ts
Normal file
@ -0,0 +1,48 @@
|
|||||||
|
import type { PhaseName } from '../../../core/piece/index.js';
|
||||||
|
import type { JudgeStageEntry } from '../../../core/piece/types.js';
|
||||||
|
|
||||||
|
export type TraceReportMode = 'off' | 'redacted' | 'full';
|
||||||
|
|
||||||
|
export interface TraceReportParams {
|
||||||
|
tracePath: string;
|
||||||
|
pieceName: string;
|
||||||
|
task: string;
|
||||||
|
runSlug: string;
|
||||||
|
status: 'completed' | 'aborted';
|
||||||
|
iterations: number;
|
||||||
|
endTime: string;
|
||||||
|
reason?: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface TracePhase {
|
||||||
|
phaseExecutionId: string;
|
||||||
|
phase: 1 | 2 | 3;
|
||||||
|
phaseName: PhaseName;
|
||||||
|
instruction: string;
|
||||||
|
systemPrompt: string;
|
||||||
|
userInstruction: string;
|
||||||
|
response?: string;
|
||||||
|
status?: string;
|
||||||
|
error?: string;
|
||||||
|
startedAt: string;
|
||||||
|
completedAt?: string;
|
||||||
|
judgeStages?: JudgeStageEntry[];
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface TraceMovement {
|
||||||
|
step: string;
|
||||||
|
persona: string;
|
||||||
|
iteration: number;
|
||||||
|
instruction?: string;
|
||||||
|
startedAt: string;
|
||||||
|
completedAt?: string;
|
||||||
|
phases: TracePhase[];
|
||||||
|
result?: {
|
||||||
|
status: string;
|
||||||
|
content: string;
|
||||||
|
error?: string;
|
||||||
|
matchedRuleIndex?: number;
|
||||||
|
matchedRuleMethod?: string;
|
||||||
|
matchMethod?: string;
|
||||||
|
};
|
||||||
|
}
|
||||||
81
src/features/tasks/execute/traceReportWriter.ts
Normal file
81
src/features/tasks/execute/traceReportWriter.ts
Normal file
@ -0,0 +1,81 @@
|
|||||||
|
import { writeFileAtomic } from '../../../infra/config/index.js';
|
||||||
|
import type { SessionLogger } from './sessionLogger.js';
|
||||||
|
import type { TraceReportMode } from './traceReport.js';
|
||||||
|
import {
|
||||||
|
assertTraceParams,
|
||||||
|
renderTraceReportFromLogs,
|
||||||
|
renderTraceReportFromRecords,
|
||||||
|
} from './traceReport.js';
|
||||||
|
|
||||||
|
interface TraceReportWriterParams {
|
||||||
|
sessionLogger: SessionLogger;
|
||||||
|
ndjsonLogPath: string;
|
||||||
|
tracePath: string;
|
||||||
|
pieceName: string;
|
||||||
|
task: string;
|
||||||
|
runSlug: string;
|
||||||
|
promptLogPath?: string;
|
||||||
|
mode: TraceReportMode;
|
||||||
|
logger: {
|
||||||
|
info: (message: string, data?: unknown) => void;
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
interface WriteTraceReportInput {
|
||||||
|
status: 'completed' | 'aborted';
|
||||||
|
iterations: number;
|
||||||
|
endTime: string;
|
||||||
|
reason?: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
export function createTraceReportWriter(params: TraceReportWriterParams): (input: WriteTraceReportInput) => void {
|
||||||
|
let traceReportWritten = false;
|
||||||
|
|
||||||
|
return (input: WriteTraceReportInput): void => {
|
||||||
|
if (traceReportWritten) {
|
||||||
|
params.logger.info('Trace report write skipped because it has already been written', {
|
||||||
|
status: input.status,
|
||||||
|
iterations: input.iterations,
|
||||||
|
});
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
traceReportWritten = true;
|
||||||
|
const traceParams = {
|
||||||
|
tracePath: params.tracePath,
|
||||||
|
pieceName: params.pieceName,
|
||||||
|
task: params.task,
|
||||||
|
runSlug: params.runSlug,
|
||||||
|
status: input.status,
|
||||||
|
iterations: input.iterations,
|
||||||
|
reason: input.reason,
|
||||||
|
endTime: input.endTime,
|
||||||
|
} as const;
|
||||||
|
assertTraceParams(traceParams);
|
||||||
|
|
||||||
|
let markdown: string | undefined;
|
||||||
|
try {
|
||||||
|
markdown = renderTraceReportFromLogs(
|
||||||
|
traceParams,
|
||||||
|
params.ndjsonLogPath,
|
||||||
|
params.promptLogPath,
|
||||||
|
params.mode,
|
||||||
|
);
|
||||||
|
} catch (error) {
|
||||||
|
const message = error instanceof Error ? error.message : String(error);
|
||||||
|
if (!message.startsWith('No session records found for trace report:')) {
|
||||||
|
throw error;
|
||||||
|
}
|
||||||
|
markdown = renderTraceReportFromRecords(
|
||||||
|
traceParams,
|
||||||
|
params.sessionLogger.getNdjsonRecords(),
|
||||||
|
params.sessionLogger.getPromptRecords(),
|
||||||
|
params.mode,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!markdown) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
writeFileAtomic(params.tracePath, markdown);
|
||||||
|
};
|
||||||
|
}
|
||||||
@ -11,6 +11,7 @@ export type {
|
|||||||
NdjsonPieceAbort,
|
NdjsonPieceAbort,
|
||||||
NdjsonPhaseStart,
|
NdjsonPhaseStart,
|
||||||
NdjsonPhaseComplete,
|
NdjsonPhaseComplete,
|
||||||
|
NdjsonPhaseJudgeStage,
|
||||||
NdjsonInteractiveStart,
|
NdjsonInteractiveStart,
|
||||||
NdjsonInteractiveEnd,
|
NdjsonInteractiveEnd,
|
||||||
NdjsonRecord,
|
NdjsonRecord,
|
||||||
|
|||||||
@ -21,6 +21,7 @@ export type {
|
|||||||
NdjsonPieceAbort,
|
NdjsonPieceAbort,
|
||||||
NdjsonPhaseStart,
|
NdjsonPhaseStart,
|
||||||
NdjsonPhaseComplete,
|
NdjsonPhaseComplete,
|
||||||
|
NdjsonPhaseJudgeStage,
|
||||||
NdjsonInteractiveStart,
|
NdjsonInteractiveStart,
|
||||||
NdjsonInteractiveEnd,
|
NdjsonInteractiveEnd,
|
||||||
NdjsonRecord,
|
NdjsonRecord,
|
||||||
|
|||||||
@ -128,6 +128,11 @@ export class DebugLogger {
|
|||||||
return this.debugLogFile;
|
return this.debugLogFile;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** Get current debug prompts log file path */
|
||||||
|
getPromptsLogFile(): string | null {
|
||||||
|
return this.debugPromptsLogFile;
|
||||||
|
}
|
||||||
|
|
||||||
/** Format log message with timestamp and level */
|
/** Format log message with timestamp and level */
|
||||||
private static formatLogMessage(level: string, component: string, message: string, data?: unknown): string {
|
private static formatLogMessage(level: string, component: string, message: string, data?: unknown): string {
|
||||||
const timestamp = new Date().toISOString();
|
const timestamp = new Date().toISOString();
|
||||||
@ -223,6 +228,10 @@ export function getDebugLogFile(): string | null {
|
|||||||
return DebugLogger.getInstance().getLogFile();
|
return DebugLogger.getInstance().getLogFile();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
export function getDebugPromptsLogFile(): string | null {
|
||||||
|
return DebugLogger.getInstance().getPromptsLogFile();
|
||||||
|
}
|
||||||
|
|
||||||
export function debugLog(component: string, message: string, data?: unknown): void {
|
export function debugLog(component: string, message: string, data?: unknown): void {
|
||||||
DebugLogger.getInstance().writeLog('DEBUG', component, message, data);
|
DebugLogger.getInstance().writeLog('DEBUG', component, message, data);
|
||||||
}
|
}
|
||||||
|
|||||||
50
src/shared/utils/phaseExecutionId.ts
Normal file
50
src/shared/utils/phaseExecutionId.ts
Normal file
@ -0,0 +1,50 @@
|
|||||||
|
export interface PhaseExecutionIdParts {
|
||||||
|
step: string;
|
||||||
|
iteration: number;
|
||||||
|
phase: 1 | 2 | 3;
|
||||||
|
sequence: number;
|
||||||
|
}
|
||||||
|
|
||||||
|
export function buildPhaseExecutionId(parts: PhaseExecutionIdParts): string {
|
||||||
|
if (!parts.step) {
|
||||||
|
throw new Error('phaseExecutionId requires step');
|
||||||
|
}
|
||||||
|
if (!Number.isInteger(parts.iteration) || parts.iteration <= 0) {
|
||||||
|
throw new Error(`phaseExecutionId requires positive iteration: ${parts.iteration}`);
|
||||||
|
}
|
||||||
|
if (parts.phase !== 1 && parts.phase !== 2 && parts.phase !== 3) {
|
||||||
|
throw new Error(`phaseExecutionId requires phase 1|2|3: ${parts.phase}`);
|
||||||
|
}
|
||||||
|
if (!Number.isInteger(parts.sequence) || parts.sequence <= 0) {
|
||||||
|
throw new Error(`phaseExecutionId requires positive sequence: ${parts.sequence}`);
|
||||||
|
}
|
||||||
|
return `${parts.step}:${parts.iteration}:${parts.phase}:${parts.sequence}`;
|
||||||
|
}
|
||||||
|
|
||||||
|
export function parsePhaseExecutionId(
|
||||||
|
phaseExecutionId: string,
|
||||||
|
): PhaseExecutionIdParts | undefined {
|
||||||
|
const parts = phaseExecutionId.split(':');
|
||||||
|
if (parts.length !== 4) {
|
||||||
|
return undefined;
|
||||||
|
}
|
||||||
|
const [step, iterationStr, phaseStr, sequenceStr] = parts;
|
||||||
|
const iteration = Number(iterationStr);
|
||||||
|
const phase = Number(phaseStr);
|
||||||
|
const sequence = Number(sequenceStr);
|
||||||
|
if (!step || !Number.isInteger(iteration) || iteration <= 0) {
|
||||||
|
return undefined;
|
||||||
|
}
|
||||||
|
if (!Number.isInteger(phase) || (phase !== 1 && phase !== 2 && phase !== 3)) {
|
||||||
|
return undefined;
|
||||||
|
}
|
||||||
|
if (!Number.isInteger(sequence) || sequence <= 0) {
|
||||||
|
return undefined;
|
||||||
|
}
|
||||||
|
return {
|
||||||
|
step,
|
||||||
|
iteration,
|
||||||
|
phase: phase as 1 | 2 | 3,
|
||||||
|
sequence,
|
||||||
|
};
|
||||||
|
}
|
||||||
@ -79,23 +79,44 @@ export interface NdjsonPieceAbort {
|
|||||||
export interface NdjsonPhaseStart {
|
export interface NdjsonPhaseStart {
|
||||||
type: 'phase_start';
|
type: 'phase_start';
|
||||||
step: string;
|
step: string;
|
||||||
|
iteration?: number;
|
||||||
phase: 1 | 2 | 3;
|
phase: 1 | 2 | 3;
|
||||||
phaseName: 'execute' | 'report' | 'judge';
|
phaseName: 'execute' | 'report' | 'judge';
|
||||||
|
phaseExecutionId?: string;
|
||||||
timestamp: string;
|
timestamp: string;
|
||||||
instruction?: string;
|
instruction?: string;
|
||||||
|
systemPrompt?: string;
|
||||||
|
userInstruction?: string;
|
||||||
}
|
}
|
||||||
|
|
||||||
export interface NdjsonPhaseComplete {
|
export interface NdjsonPhaseComplete {
|
||||||
type: 'phase_complete';
|
type: 'phase_complete';
|
||||||
step: string;
|
step: string;
|
||||||
|
iteration?: number;
|
||||||
phase: 1 | 2 | 3;
|
phase: 1 | 2 | 3;
|
||||||
phaseName: 'execute' | 'report' | 'judge';
|
phaseName: 'execute' | 'report' | 'judge';
|
||||||
|
phaseExecutionId?: string;
|
||||||
status: string;
|
status: string;
|
||||||
content?: string;
|
content?: string;
|
||||||
timestamp: string;
|
timestamp: string;
|
||||||
error?: string;
|
error?: string;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
export interface NdjsonPhaseJudgeStage {
|
||||||
|
type: 'phase_judge_stage';
|
||||||
|
step: string;
|
||||||
|
iteration?: number;
|
||||||
|
phase: 3;
|
||||||
|
phaseName: 'judge';
|
||||||
|
phaseExecutionId?: string;
|
||||||
|
stage: 1 | 2 | 3;
|
||||||
|
method: 'structured_output' | 'phase3_tag' | 'ai_judge';
|
||||||
|
status: 'done' | 'error' | 'skipped';
|
||||||
|
instruction: string;
|
||||||
|
response: string;
|
||||||
|
timestamp: string;
|
||||||
|
}
|
||||||
|
|
||||||
export interface NdjsonInteractiveStart {
|
export interface NdjsonInteractiveStart {
|
||||||
type: 'interactive_start';
|
type: 'interactive_start';
|
||||||
timestamp: string;
|
timestamp: string;
|
||||||
@ -116,6 +137,7 @@ export type NdjsonRecord =
|
|||||||
| NdjsonPieceAbort
|
| NdjsonPieceAbort
|
||||||
| NdjsonPhaseStart
|
| NdjsonPhaseStart
|
||||||
| NdjsonPhaseComplete
|
| NdjsonPhaseComplete
|
||||||
|
| NdjsonPhaseJudgeStage
|
||||||
| NdjsonInteractiveStart
|
| NdjsonInteractiveStart
|
||||||
| NdjsonInteractiveEnd;
|
| NdjsonInteractiveEnd;
|
||||||
|
|
||||||
@ -124,7 +146,10 @@ export interface PromptLogRecord {
|
|||||||
movement: string;
|
movement: string;
|
||||||
phase: 1 | 2 | 3;
|
phase: 1 | 2 | 3;
|
||||||
iteration: number;
|
iteration: number;
|
||||||
|
phaseExecutionId?: string;
|
||||||
prompt: string;
|
prompt: string;
|
||||||
|
systemPrompt: string;
|
||||||
|
userInstruction: string;
|
||||||
response: string;
|
response: string;
|
||||||
timestamp: string;
|
timestamp: string;
|
||||||
}
|
}
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user