diff --git a/src/__tests__/ai-judge.test.ts b/src/__tests__/ai-judge.test.ts new file mode 100644 index 0000000..d86e68b --- /dev/null +++ b/src/__tests__/ai-judge.test.ts @@ -0,0 +1,53 @@ +/** + * Tests for AI judge (ai() condition evaluation) + */ + +import { describe, it, expect } from 'vitest'; +import { detectJudgeIndex, buildJudgePrompt } from '../claude/client.js'; + +describe('detectJudgeIndex', () => { + it('should detect [JUDGE:1] as index 0', () => { + expect(detectJudgeIndex('[JUDGE:1]')).toBe(0); + }); + + it('should detect [JUDGE:3] as index 2', () => { + expect(detectJudgeIndex('Some output [JUDGE:3] more text')).toBe(2); + }); + + it('should return -1 for no match', () => { + expect(detectJudgeIndex('No judge tag here')).toBe(-1); + }); + + it('should return -1 for [JUDGE:0]', () => { + expect(detectJudgeIndex('[JUDGE:0]')).toBe(-1); + }); + + it('should be case-insensitive', () => { + expect(detectJudgeIndex('[judge:2]')).toBe(1); + }); +}); + +describe('buildJudgePrompt', () => { + it('should build a well-structured judge prompt', () => { + const agentOutput = 'Code implementation complete.\n\nAll tests pass.'; + const conditions = [ + { index: 0, text: 'No issues found' }, + { index: 1, text: 'Issues detected that need fixing' }, + ]; + + const prompt = buildJudgePrompt(agentOutput, conditions); + + expect(prompt).toContain('# Judge Task'); + expect(prompt).toContain('Code implementation complete.'); + expect(prompt).toContain('All tests pass.'); + expect(prompt).toContain('| 1 | No issues found |'); + expect(prompt).toContain('| 2 | Issues detected that need fixing |'); + expect(prompt).toContain('[JUDGE:N]'); + }); + + it('should handle single condition', () => { + const prompt = buildJudgePrompt('output', [{ index: 0, text: 'Always true' }]); + + expect(prompt).toContain('| 1 | Always true |'); + }); +}); diff --git a/src/__tests__/instructionBuilder.test.ts b/src/__tests__/instructionBuilder.test.ts index 77e9eaa..5c54322 100644 --- a/src/__tests__/instructionBuilder.test.ts +++ b/src/__tests__/instructionBuilder.test.ts @@ -5,12 +5,15 @@ import { describe, it, expect } from 'vitest'; import { buildInstruction, + buildReportInstruction, + buildStatusJudgmentInstruction, buildExecutionMetadata, renderExecutionMetadata, - renderStatusRulesHeader, generateStatusRulesFromRules, isReportObjectConfig, type InstructionContext, + type ReportInstructionContext, + type StatusJudgmentContext, } from '../workflow/instruction-builder.js'; import type { WorkflowStep, WorkflowRule } from '../models/types.js'; @@ -294,30 +297,6 @@ describe('instruction-builder', () => { }); }); - describe('renderStatusRulesHeader', () => { - it('should render Japanese header when language is ja', () => { - const header = renderStatusRulesHeader('ja'); - - expect(header).toContain('# ⚠️ 必須: ステータス出力ルール ⚠️'); - expect(header).toContain('このタグがないとワークフローが停止します'); - expect(header).toContain('最終出力には必ず以下のルールに従ったステータスタグを含めてください'); - }); - - it('should render English header when language is en', () => { - const header = renderStatusRulesHeader('en'); - - expect(header).toContain('# ⚠️ Required: Status Output Rules ⚠️'); - expect(header).toContain('The workflow will stop without this tag'); - expect(header).toContain('Your final output MUST include a status tag'); - }); - - it('should end with trailing empty line', () => { - const header = renderStatusRulesHeader('en'); - - expect(header).toMatch(/\n$/); - }); - }); - describe('generateStatusRulesFromRules', () => { const rules: WorkflowRule[] = [ { condition: '要件が明確で実装可能', next: 'implement' }, @@ -383,8 +362,8 @@ describe('instruction-builder', () => { }); }); - describe('buildInstruction with rules', () => { - it('should auto-generate status rules from rules', () => { + describe('buildInstruction with rules (Phase 1 — status rules injection)', () => { + it('should include status rules when tag-based rules exist', () => { const step = createMinimalStep('Do work'); step.name = 'plan'; step.rules = [ @@ -395,12 +374,9 @@ describe('instruction-builder', () => { const result = buildInstruction(step, context); - // Should contain status header - expect(result).toContain('⚠️ Required: Status Output Rules ⚠️'); - // Should contain auto-generated criteria table - expect(result).toContain('## Decision Criteria'); - expect(result).toContain('`[PLAN:1]`'); - expect(result).toContain('`[PLAN:2]`'); + expect(result).toContain('Decision Criteria'); + expect(result).toContain('[PLAN:1]'); + expect(result).toContain('[PLAN:2]'); }); it('should not add status rules when rules do not exist', () => { @@ -409,7 +385,6 @@ describe('instruction-builder', () => { const result = buildInstruction(step, context); - expect(result).not.toContain('⚠️ Required'); expect(result).not.toContain('Decision Criteria'); }); @@ -420,7 +395,6 @@ describe('instruction-builder', () => { const result = buildInstruction(step, context); - expect(result).not.toContain('⚠️ Required'); expect(result).not.toContain('Decision Criteria'); }); }); @@ -444,7 +418,7 @@ describe('instruction-builder', () => { expect(result).toContain('- Step: implement'); }); - it('should include single report file when report is a string', () => { + it('should NOT include report info even when step has report (phase separation)', () => { const step = createMinimalStep('Do work'); step.name = 'plan'; step.report = '00-plan.md'; @@ -455,14 +429,13 @@ describe('instruction-builder', () => { const result = buildInstruction(step, context); - expect(result).toContain('- Report Directory: 20260129-test/'); - expect(result).toContain('- Report File: 20260129-test/00-plan.md'); - expect(result).not.toContain('Report Files:'); + expect(result).toContain('## Workflow Context'); + expect(result).not.toContain('Report Directory'); + expect(result).not.toContain('Report File'); }); - it('should include multiple report files when report is ReportConfig[]', () => { + it('should NOT include report info for ReportConfig[] (phase separation)', () => { const step = createMinimalStep('Do work'); - step.name = 'implement'; step.report = [ { label: 'Scope', path: '01-scope.md' }, { label: 'Decisions', path: '02-decisions.md' }, @@ -474,16 +447,12 @@ describe('instruction-builder', () => { const result = buildInstruction(step, context); - expect(result).toContain('- Report Directory: 20260129-test/'); - expect(result).toContain('- Report Files:'); - expect(result).toContain(' - Scope: 20260129-test/01-scope.md'); - expect(result).toContain(' - Decisions: 20260129-test/02-decisions.md'); - expect(result).not.toContain('Report File:'); + expect(result).not.toContain('Report Directory'); + expect(result).not.toContain('Report Files'); }); - it('should include report file when report is ReportObjectConfig', () => { + it('should NOT include report info for ReportObjectConfig (phase separation)', () => { const step = createMinimalStep('Do work'); - step.name = 'plan'; step.report = { name: '00-plan.md' }; const context = createMinimalContext({ reportDir: '20260129-test', @@ -492,33 +461,6 @@ describe('instruction-builder', () => { const result = buildInstruction(step, context); - expect(result).toContain('- Report Directory: 20260129-test/'); - expect(result).toContain('- Report File: 20260129-test/00-plan.md'); - expect(result).not.toContain('Report Files:'); - }); - - it('should NOT include report info when reportDir is undefined', () => { - const step = createMinimalStep('Do work'); - step.report = '00-plan.md'; - const context = createMinimalContext({ language: 'en' }); - - const result = buildInstruction(step, context); - - expect(result).toContain('## Workflow Context'); - expect(result).not.toContain('Report Directory'); - expect(result).not.toContain('Report File'); - }); - - it('should NOT include report info when step has no report', () => { - const step = createMinimalStep('Do work'); - const context = createMinimalContext({ - reportDir: '20260129-test', - language: 'en', - }); - - const result = buildInstruction(step, context); - - expect(result).toContain('## Workflow Context'); expect(result).not.toContain('Report Directory'); expect(result).not.toContain('Report File'); }); @@ -534,99 +476,10 @@ describe('instruction-builder', () => { expect(result).toContain('- Step Iteration: 3(このステップの実行回数)'); }); - - it('should NOT include .takt/reports/ prefix in report paths', () => { - const step = createMinimalStep('Do work'); - step.report = '00-plan.md'; - const context = createMinimalContext({ - reportDir: '20260129-test', - language: 'en', - }); - - const result = buildInstruction(step, context); - - expect(result).not.toContain('.takt/reports/'); - }); }); - describe('ReportObjectConfig order/format injection', () => { - it('should inject order before instruction_template', () => { - const step = createMinimalStep('Do work'); - step.report = { - name: '00-plan.md', - order: '**Output:** Write to {report:00-plan.md}', - }; - const context = createMinimalContext({ - reportDir: '20260129-test', - language: 'en', - }); - - const result = buildInstruction(step, context); - - const orderIdx = result.indexOf('**Output:** Write to 20260129-test/00-plan.md'); - const instructionsIdx = result.indexOf('## Instructions'); - expect(orderIdx).toBeGreaterThan(-1); - expect(instructionsIdx).toBeGreaterThan(orderIdx); - }); - - it('should inject format after instruction_template', () => { - const step = createMinimalStep('Do work'); - step.report = { - name: '00-plan.md', - format: '**Format:**\n```markdown\n# Plan\n```', - }; - const context = createMinimalContext({ - reportDir: '20260129-test', - language: 'en', - }); - - const result = buildInstruction(step, context); - - const instructionsIdx = result.indexOf('## Instructions'); - const formatIdx = result.indexOf('**Format:**'); - expect(formatIdx).toBeGreaterThan(instructionsIdx); - }); - - it('should inject both order before and format after instruction_template', () => { - const step = createMinimalStep('Do work'); - step.report = { - name: '00-plan.md', - order: '**Output:** Write to {report:00-plan.md}', - format: '**Format:**\n```markdown\n# Plan\n```', - }; - const context = createMinimalContext({ - reportDir: '20260129-test', - language: 'en', - }); - - const result = buildInstruction(step, context); - - const orderIdx = result.indexOf('**Output:** Write to 20260129-test/00-plan.md'); - const instructionsIdx = result.indexOf('## Instructions'); - const formatIdx = result.indexOf('**Format:**'); - expect(orderIdx).toBeGreaterThan(-1); - expect(instructionsIdx).toBeGreaterThan(orderIdx); - expect(formatIdx).toBeGreaterThan(instructionsIdx); - }); - - it('should replace {report:filename} in order text', () => { - const step = createMinimalStep('Do work'); - step.report = { - name: '00-plan.md', - order: 'Output to {report:00-plan.md} file.', - }; - const context = createMinimalContext({ - reportDir: '20260129-test', - language: 'en', - }); - - const result = buildInstruction(step, context); - - expect(result).toContain('Output to 20260129-test/00-plan.md file.'); - expect(result).not.toContain('{report:00-plan.md}'); - }); - - it('should auto-inject report output instruction when report is a simple string', () => { + describe('buildInstruction report-free (phase separation)', () => { + it('should NOT include report output instruction in buildInstruction', () => { const step = createMinimalStep('Do work'); step.report = '00-plan.md'; const context = createMinimalContext({ @@ -636,20 +489,14 @@ describe('instruction-builder', () => { const result = buildInstruction(step, context); - // Auto-generated report output instruction should be injected before ## Instructions - expect(result).toContain('**Report output:** Output to the `Report File` specified above.'); - expect(result).toContain('- If file does not exist: Create new file'); - const reportIdx = result.indexOf('**Report output:**'); - const instructionsIdx = result.indexOf('## Instructions'); - expect(reportIdx).toBeGreaterThan(-1); - expect(instructionsIdx).toBeGreaterThan(reportIdx); + expect(result).not.toContain('**Report output:**'); + expect(result).not.toContain('Report File'); + expect(result).not.toContain('Report Directory'); }); - it('should auto-inject report output instruction when report is ReportConfig[]', () => { + it('should NOT include report format in buildInstruction', () => { const step = createMinimalStep('Do work'); - step.report = [ - { label: 'Scope', path: '01-scope.md' }, - ]; + step.report = { name: '00-plan.md', format: '**Format:**\n# Plan' }; const context = createMinimalContext({ reportDir: '20260129-test', language: 'en', @@ -657,84 +504,10 @@ describe('instruction-builder', () => { const result = buildInstruction(step, context); - // Auto-generated multi-file report output instruction - expect(result).toContain('**Report output:** Output to the `Report Files` specified above.'); - expect(result).toContain('- If file does not exist: Create new file'); + expect(result).not.toContain('**Format:**'); }); - it('should replace {report:filename} in instruction_template too', () => { - const step = createMinimalStep('Write to {report:00-plan.md}'); - const context = createMinimalContext({ - reportDir: '20260129-test', - language: 'en', - }); - - const result = buildInstruction(step, context); - - expect(result).toContain('Write to 20260129-test/00-plan.md'); - expect(result).not.toContain('{report:00-plan.md}'); - }); - - it('should replace {step_iteration} in order/format text', () => { - const step = createMinimalStep('Do work'); - step.report = { - name: '00-plan.md', - order: 'Append ## Iteration {step_iteration} section', - }; - const context = createMinimalContext({ - reportDir: '20260129-test', - stepIteration: 3, - language: 'en', - }); - - const result = buildInstruction(step, context); - - expect(result).toContain('Append ## Iteration 3 section'); - }); - - it('should auto-inject Japanese report output instruction for ja language', () => { - const step = createMinimalStep('作業する'); - step.report = { name: '00-plan.md' }; - const context = createMinimalContext({ - reportDir: '20260129-test', - language: 'ja', - }); - - const result = buildInstruction(step, context); - - expect(result).toContain('**レポート出力:** `Report File` に出力してください。'); - expect(result).toContain('- ファイルが存在しない場合: 新規作成'); - expect(result).toContain('- ファイルが存在する場合: `## Iteration 1` セクションを追記'); - }); - - it('should auto-inject Japanese multi-file report output instruction', () => { - const step = createMinimalStep('作業する'); - step.report = [{ label: 'Scope', path: '01-scope.md' }]; - const context = createMinimalContext({ - reportDir: '20260129-test', - language: 'ja', - }); - - const result = buildInstruction(step, context); - - expect(result).toContain('**レポート出力:** Report Files に出力してください。'); - }); - - it('should replace {step_iteration} in auto-generated report output instruction', () => { - const step = createMinimalStep('Do work'); - step.report = '00-plan.md'; - const context = createMinimalContext({ - reportDir: '20260129-test', - stepIteration: 5, - language: 'en', - }); - - const result = buildInstruction(step, context); - - expect(result).toContain('Append with `## Iteration 5` section'); - }); - - it('should prefer explicit order over auto-generated report instruction', () => { + it('should NOT include report order in buildInstruction', () => { const step = createMinimalStep('Do work'); step.report = { name: '00-plan.md', @@ -747,13 +520,11 @@ describe('instruction-builder', () => { const result = buildInstruction(step, context); - expect(result).toContain('Custom order instruction'); - expect(result).not.toContain('**Report output:**'); + expect(result).not.toContain('Custom order instruction'); }); - it('should auto-inject report output for ReportObjectConfig without order', () => { - const step = createMinimalStep('Do work'); - step.report = { name: '00-plan.md', format: '# Plan' }; + it('should still replace {report:filename} in instruction_template', () => { + const step = createMinimalStep('Write to {report:00-plan.md}'); const context = createMinimalContext({ reportDir: '20260129-test', language: 'en', @@ -761,20 +532,195 @@ describe('instruction-builder', () => { const result = buildInstruction(step, context); - expect(result).toContain('**Report output:** Output to the `Report File` specified above.'); + expect(result).toContain('Write to 20260129-test/00-plan.md'); + expect(result).not.toContain('{report:00-plan.md}'); }); + }); - it('should NOT inject report output when no reportDir', () => { + describe('buildReportInstruction (phase 2)', () => { + function createReportContext(overrides: Partial = {}): ReportInstructionContext { + return { + cwd: '/project', + reportDir: '20260129-test', + stepIteration: 1, + language: 'en', + ...overrides, + }; + } + + it('should include execution context with working directory', () => { const step = createMinimalStep('Do work'); step.report = '00-plan.md'; - const context = createMinimalContext({ - language: 'en', - }); + const ctx = createReportContext({ cwd: '/my/project' }); - const result = buildInstruction(step, context); + const result = buildReportInstruction(step, ctx); + expect(result).toContain('Working Directory: /my/project'); + }); + + it('should include no-source-edit rule in execution rules', () => { + const step = createMinimalStep('Do work'); + step.report = '00-plan.md'; + const ctx = createReportContext(); + + const result = buildReportInstruction(step, ctx); + + expect(result).toContain('Do NOT modify project source files'); + }); + + it('should include no-commit and no-cd rules', () => { + const step = createMinimalStep('Do work'); + step.report = '00-plan.md'; + const ctx = createReportContext(); + + const result = buildReportInstruction(step, ctx); + + expect(result).toContain('Do NOT run git commit'); + expect(result).toContain('Do NOT use `cd`'); + }); + + it('should include report directory and file for string report', () => { + const step = createMinimalStep('Do work'); + step.report = '00-plan.md'; + const ctx = createReportContext({ reportDir: '20260130-test' }); + + const result = buildReportInstruction(step, ctx); + + expect(result).toContain('- Report Directory: 20260130-test/'); + expect(result).toContain('- Report File: 20260130-test/00-plan.md'); + }); + + it('should include report files for ReportConfig[] report', () => { + const step = createMinimalStep('Do work'); + step.report = [ + { label: 'Scope', path: '01-scope.md' }, + { label: 'Decisions', path: '02-decisions.md' }, + ]; + const ctx = createReportContext(); + + const result = buildReportInstruction(step, ctx); + + expect(result).toContain('- Report Directory: 20260129-test/'); + expect(result).toContain('- Report Files:'); + expect(result).toContain(' - Scope: 20260129-test/01-scope.md'); + expect(result).toContain(' - Decisions: 20260129-test/02-decisions.md'); + }); + + it('should include report file for ReportObjectConfig report', () => { + const step = createMinimalStep('Do work'); + step.report = { name: '00-plan.md' }; + const ctx = createReportContext(); + + const result = buildReportInstruction(step, ctx); + + expect(result).toContain('- Report File: 20260129-test/00-plan.md'); + }); + + it('should include auto-generated report output instruction', () => { + const step = createMinimalStep('Do work'); + step.report = '00-plan.md'; + const ctx = createReportContext(); + + const result = buildReportInstruction(step, ctx); + + expect(result).toContain('**Report output:** Output to the `Report File` specified above.'); + expect(result).toContain('- If file does not exist: Create new file'); + expect(result).toContain('Append with `## Iteration 1` section'); + }); + + it('should include explicit order instead of auto-generated', () => { + const step = createMinimalStep('Do work'); + step.report = { + name: '00-plan.md', + order: 'Output to {report:00-plan.md} file.', + }; + const ctx = createReportContext(); + + const result = buildReportInstruction(step, ctx); + + expect(result).toContain('Output to 20260129-test/00-plan.md file.'); expect(result).not.toContain('**Report output:**'); }); + + it('should include format from ReportObjectConfig', () => { + const step = createMinimalStep('Do work'); + step.report = { + name: '00-plan.md', + format: '**Format:**\n```markdown\n# Plan\n```', + }; + const ctx = createReportContext(); + + const result = buildReportInstruction(step, ctx); + + expect(result).toContain('**Format:**'); + expect(result).toContain('# Plan'); + }); + + it('should replace {step_iteration} in report output instruction', () => { + const step = createMinimalStep('Do work'); + step.report = '00-plan.md'; + const ctx = createReportContext({ stepIteration: 5 }); + + const result = buildReportInstruction(step, ctx); + + expect(result).toContain('Append with `## Iteration 5` section'); + }); + + it('should include instruction body text', () => { + const step = createMinimalStep('Do work'); + step.report = '00-plan.md'; + const ctx = createReportContext(); + + const result = buildReportInstruction(step, ctx); + + expect(result).toContain('## Instructions'); + expect(result).toContain('Output the results of your previous work as a report'); + }); + + it('should NOT include user request, previous response, or status rules', () => { + const step = createMinimalStep('Do work'); + step.report = '00-plan.md'; + step.rules = [ + { condition: 'Done', next: 'COMPLETE' }, + ]; + const ctx = createReportContext(); + + const result = buildReportInstruction(step, ctx); + + expect(result).not.toContain('User Request'); + expect(result).not.toContain('Previous Response'); + expect(result).not.toContain('Additional User Inputs'); + expect(result).not.toContain('Status Output Rules'); + }); + + it('should render Japanese report instruction', () => { + const step = createMinimalStep('作業する'); + step.report = { name: '00-plan.md' }; + const ctx = createReportContext({ language: 'ja' }); + + const result = buildReportInstruction(step, ctx); + + expect(result).toContain('前のステップの作業結果をレポートとして出力してください'); + expect(result).toContain('プロジェクトのソースファイルを変更しないでください'); + expect(result).toContain('**レポート出力:** `Report File` に出力してください。'); + }); + + it('should throw error when step has no report config', () => { + const step = createMinimalStep('Do work'); + const ctx = createReportContext(); + + expect(() => buildReportInstruction(step, ctx)).toThrow('no report config'); + }); + + it('should include multi-file report output instruction for ReportConfig[]', () => { + const step = createMinimalStep('Do work'); + step.report = [{ label: 'Scope', path: '01-scope.md' }]; + const ctx = createReportContext(); + + const result = buildReportInstruction(step, ctx); + + expect(result).toContain('**Report output:** Output to the `Report Files` specified above.'); + }); }); describe('auto-injected User Request and Additional User Inputs sections', () => { @@ -910,6 +856,95 @@ describe('instruction-builder', () => { }); }); + describe('status rules injection — skip when all rules are ai()/aggregate', () => { + it('should NOT include status rules when all rules are ai() conditions', () => { + const step = createMinimalStep('Do work'); + step.rules = [ + { condition: 'ai("No issues")', next: 'COMPLETE', isAiCondition: true, aiConditionText: 'No issues' }, + { condition: 'ai("Issues found")', next: 'fix', isAiCondition: true, aiConditionText: 'Issues found' }, + ]; + const context = createMinimalContext({ language: 'en' }); + + const result = buildInstruction(step, context); + + expect(result).not.toContain('Decision Criteria'); + expect(result).not.toContain('[TEST-STEP:'); + }); + + it('should include status rules with mixed regular and ai() conditions', () => { + const step = createMinimalStep('Do work'); + step.name = 'review'; + step.rules = [ + { condition: 'Error occurred', next: 'ABORT' }, + { condition: 'ai("Issues found")', next: 'fix', isAiCondition: true, aiConditionText: 'Issues found' }, + ]; + const context = createMinimalContext({ language: 'en' }); + + const result = buildInstruction(step, context); + + expect(result).toContain('Decision Criteria'); + expect(result).toContain('[REVIEW:1]'); + }); + + it('should include status rules with regular conditions only', () => { + const step = createMinimalStep('Do work'); + step.name = 'plan'; + step.rules = [ + { condition: 'Done', next: 'COMPLETE' }, + { condition: 'Blocked', next: 'ABORT' }, + ]; + const context = createMinimalContext({ language: 'en' }); + + const result = buildInstruction(step, context); + + expect(result).toContain('Decision Criteria'); + expect(result).toContain('[PLAN:1]'); + expect(result).toContain('[PLAN:2]'); + }); + + it('should NOT include status rules when all rules are aggregate conditions', () => { + const step = createMinimalStep('Do work'); + step.rules = [ + { condition: 'all("approved")', next: 'COMPLETE', isAggregateCondition: true, aggregateType: 'all' as const, aggregateConditionText: 'approved' }, + { condition: 'any("rejected")', next: 'fix', isAggregateCondition: true, aggregateType: 'any' as const, aggregateConditionText: 'rejected' }, + ]; + const context = createMinimalContext({ language: 'en' }); + + const result = buildInstruction(step, context); + + expect(result).not.toContain('Decision Criteria'); + }); + + it('should NOT include status rules when all rules are ai() + aggregate', () => { + const step = createMinimalStep('Do work'); + step.rules = [ + { condition: 'all("approved")', next: 'COMPLETE', isAggregateCondition: true, aggregateType: 'all' as const, aggregateConditionText: 'approved' }, + { condition: 'any("rejected")', next: 'fix', isAggregateCondition: true, aggregateType: 'any' as const, aggregateConditionText: 'rejected' }, + { condition: 'ai("Judgment needed")', next: 'manual', isAiCondition: true, aiConditionText: 'Judgment needed' }, + ]; + const context = createMinimalContext({ language: 'en' }); + + const result = buildInstruction(step, context); + + expect(result).not.toContain('Decision Criteria'); + }); + + it('should include status rules with mixed aggregate and regular conditions', () => { + const step = createMinimalStep('Do work'); + step.name = 'supervise'; + step.rules = [ + { condition: 'all("approved")', next: 'COMPLETE', isAggregateCondition: true, aggregateType: 'all' as const, aggregateConditionText: 'approved' }, + { condition: 'Error occurred', next: 'ABORT' }, + ]; + const context = createMinimalContext({ language: 'en' }); + + const result = buildInstruction(step, context); + + expect(result).toContain('Decision Criteria'); + expect(result).toContain('[SUPERVISE:1]'); + }); + }); + describe('isReportObjectConfig', () => { it('should return true for ReportObjectConfig', () => { expect(isReportObjectConfig({ name: '00-plan.md' })).toBe(true); @@ -927,4 +962,117 @@ describe('instruction-builder', () => { expect(isReportObjectConfig([{ label: 'Scope', path: '01-scope.md' }])).toBe(false); }); }); + + describe('buildStatusJudgmentInstruction (Phase 3)', () => { + function createJudgmentContext(overrides: Partial = {}): StatusJudgmentContext { + return { + language: 'en', + ...overrides, + }; + } + + it('should include header instruction (en)', () => { + const step = createMinimalStep('Do work'); + step.name = 'plan'; + step.rules = [ + { condition: 'Clear requirements', next: 'implement' }, + { condition: 'Unclear', next: 'ABORT' }, + ]; + const ctx = createJudgmentContext(); + + const result = buildStatusJudgmentInstruction(step, ctx); + + expect(result).toContain('Review your work results and determine the status'); + expect(result).toContain('Do NOT perform any additional work'); + }); + + it('should include header instruction (ja)', () => { + const step = createMinimalStep('Do work'); + step.name = 'plan'; + step.rules = [ + { condition: '要件が明確', next: 'implement' }, + { condition: '不明確', next: 'ABORT' }, + ]; + const ctx = createJudgmentContext({ language: 'ja' }); + + const result = buildStatusJudgmentInstruction(step, ctx); + + expect(result).toContain('作業結果を振り返り、ステータスを判定してください'); + expect(result).toContain('追加の作業は行わないでください'); + }); + + it('should include criteria table with tags', () => { + const step = createMinimalStep('Do work'); + step.name = 'plan'; + step.rules = [ + { condition: 'Clear requirements', next: 'implement' }, + { condition: 'Unclear', next: 'ABORT' }, + ]; + const ctx = createJudgmentContext(); + + const result = buildStatusJudgmentInstruction(step, ctx); + + expect(result).toContain('## Decision Criteria'); + expect(result).toContain('`[PLAN:1]`'); + expect(result).toContain('`[PLAN:2]`'); + }); + + it('should include output format section', () => { + const step = createMinimalStep('Do work'); + step.name = 'review'; + step.rules = [ + { condition: 'Approved', next: 'COMPLETE' }, + { condition: 'Rejected', next: 'fix' }, + ]; + const ctx = createJudgmentContext(); + + const result = buildStatusJudgmentInstruction(step, ctx); + + expect(result).toContain('## Output Format'); + expect(result).toContain('`[REVIEW:1]` — Approved'); + expect(result).toContain('`[REVIEW:2]` — Rejected'); + }); + + it('should throw error when step has no rules', () => { + const step = createMinimalStep('Do work'); + const ctx = createJudgmentContext(); + + expect(() => buildStatusJudgmentInstruction(step, ctx)).toThrow('no rules'); + }); + + it('should throw error when step has empty rules', () => { + const step = createMinimalStep('Do work'); + step.rules = []; + const ctx = createJudgmentContext(); + + expect(() => buildStatusJudgmentInstruction(step, ctx)).toThrow('no rules'); + }); + + it('should default language to en', () => { + const step = createMinimalStep('Do work'); + step.name = 'test'; + step.rules = [{ condition: 'Done', next: 'COMPLETE' }]; + const ctx: StatusJudgmentContext = {}; + + const result = buildStatusJudgmentInstruction(step, ctx); + + expect(result).toContain('Review your work results'); + expect(result).toContain('## Decision Criteria'); + }); + + it('should include appendix template when rules have appendix', () => { + const step = createMinimalStep('Do work'); + step.name = 'plan'; + step.rules = [ + { condition: 'Done', next: 'COMPLETE' }, + { condition: 'Blocked', next: 'ABORT', appendix: '確認事項:\n- {質問1}' }, + ]; + const ctx = createJudgmentContext(); + + const result = buildStatusJudgmentInstruction(step, ctx); + + expect(result).toContain('Appendix Template'); + expect(result).toContain('確認事項:'); + }); + }); }); diff --git a/src/__tests__/parallel-and-loader.test.ts b/src/__tests__/parallel-and-loader.test.ts new file mode 100644 index 0000000..b8a53e3 --- /dev/null +++ b/src/__tests__/parallel-and-loader.test.ts @@ -0,0 +1,521 @@ +/** + * Tests for parallel step execution and ai() condition loader + * + * Covers: + * - Schema validation for parallel sub-steps + * - Workflow loader normalization of ai() conditions and parallel steps + * - Engine parallel step aggregation logic + */ + +import { describe, it, expect } from 'vitest'; +import { WorkflowConfigRawSchema, ParallelSubStepRawSchema, WorkflowStepRawSchema } from '../models/schemas.js'; + +describe('ParallelSubStepRawSchema', () => { + it('should validate a valid parallel sub-step', () => { + const raw = { + name: 'arch-review', + agent: '~/.takt/agents/default/reviewer.md', + instruction_template: 'Review architecture', + }; + + const result = ParallelSubStepRawSchema.safeParse(raw); + expect(result.success).toBe(true); + }); + + it('should reject a sub-step without agent', () => { + const raw = { + name: 'no-agent-step', + instruction_template: 'Do something', + }; + + const result = ParallelSubStepRawSchema.safeParse(raw); + expect(result.success).toBe(false); + }); + + it('should accept optional fields', () => { + const raw = { + name: 'full-sub-step', + agent: '~/.takt/agents/default/coder.md', + agent_name: 'Coder', + allowed_tools: ['Read', 'Grep'], + model: 'haiku', + edit: false, + instruction_template: 'Do work', + report: '01-report.md', + pass_previous_response: false, + }; + + const result = ParallelSubStepRawSchema.safeParse(raw); + expect(result.success).toBe(true); + if (result.success) { + expect(result.data.agent_name).toBe('Coder'); + expect(result.data.allowed_tools).toEqual(['Read', 'Grep']); + expect(result.data.edit).toBe(false); + } + }); + + it('should accept rules on sub-steps', () => { + const raw = { + name: 'reviewed', + agent: '~/.takt/agents/default/reviewer.md', + instruction_template: 'Review', + rules: [ + { condition: 'No issues', next: 'COMPLETE' }, + { condition: 'Issues found', next: 'fix' }, + ], + }; + + const result = ParallelSubStepRawSchema.safeParse(raw); + expect(result.success).toBe(true); + if (result.success) { + expect(result.data.rules).toHaveLength(2); + } + }); +}); + +describe('WorkflowStepRawSchema with parallel', () => { + it('should accept a step with parallel sub-steps (no agent)', () => { + const raw = { + name: 'parallel-review', + parallel: [ + { name: 'arch-review', agent: 'reviewer.md', instruction_template: 'Review arch' }, + { name: 'sec-review', agent: 'security.md', instruction_template: 'Review security' }, + ], + rules: [ + { condition: 'All pass', next: 'COMPLETE' }, + ], + }; + + const result = WorkflowStepRawSchema.safeParse(raw); + expect(result.success).toBe(true); + }); + + it('should reject a step with neither agent nor parallel', () => { + const raw = { + name: 'orphan-step', + instruction_template: 'Do something', + }; + + const result = WorkflowStepRawSchema.safeParse(raw); + expect(result.success).toBe(false); + }); + + it('should accept a step with agent (no parallel)', () => { + const raw = { + name: 'normal-step', + agent: 'coder.md', + instruction_template: 'Code something', + }; + + const result = WorkflowStepRawSchema.safeParse(raw); + expect(result.success).toBe(true); + }); + + it('should reject a step with empty parallel array', () => { + const raw = { + name: 'empty-parallel', + parallel: [], + }; + + const result = WorkflowStepRawSchema.safeParse(raw); + expect(result.success).toBe(false); + }); +}); + +describe('WorkflowConfigRawSchema with parallel steps', () => { + it('should validate a workflow with parallel step', () => { + const raw = { + name: 'test-parallel-workflow', + steps: [ + { + name: 'plan', + agent: 'planner.md', + rules: [{ condition: 'Plan complete', next: 'review' }], + }, + { + name: 'review', + parallel: [ + { name: 'arch-review', agent: 'arch-reviewer.md', instruction_template: 'Review architecture' }, + { name: 'sec-review', agent: 'sec-reviewer.md', instruction_template: 'Review security' }, + ], + rules: [ + { condition: 'All approved', next: 'COMPLETE' }, + { condition: 'Issues found', next: 'plan' }, + ], + }, + ], + initial_step: 'plan', + max_iterations: 10, + }; + + const result = WorkflowConfigRawSchema.safeParse(raw); + expect(result.success).toBe(true); + if (result.success) { + expect(result.data.steps).toHaveLength(2); + expect(result.data.steps[1].parallel).toHaveLength(2); + } + }); + + it('should validate a workflow mixing normal and parallel steps', () => { + const raw = { + name: 'mixed-workflow', + steps: [ + { name: 'plan', agent: 'planner.md', rules: [{ condition: 'Done', next: 'implement' }] }, + { name: 'implement', agent: 'coder.md', rules: [{ condition: 'Done', next: 'review' }] }, + { + name: 'review', + parallel: [ + { name: 'arch', agent: 'arch.md' }, + { name: 'sec', agent: 'sec.md' }, + ], + rules: [{ condition: 'All pass', next: 'COMPLETE' }], + }, + ], + initial_step: 'plan', + }; + + const result = WorkflowConfigRawSchema.safeParse(raw); + expect(result.success).toBe(true); + if (result.success) { + expect(result.data.steps[0].agent).toBe('planner.md'); + expect(result.data.steps[2].parallel).toHaveLength(2); + } + }); +}); + +describe('ai() condition in WorkflowRuleSchema', () => { + it('should accept ai() condition as a string', () => { + const raw = { + name: 'test-step', + agent: 'agent.md', + rules: [ + { condition: 'ai("All reviews approved")', next: 'COMPLETE' }, + { condition: 'ai("Issues detected")', next: 'fix' }, + ], + }; + + const result = WorkflowStepRawSchema.safeParse(raw); + expect(result.success).toBe(true); + if (result.success) { + expect(result.data.rules?.[0].condition).toBe('ai("All reviews approved")'); + expect(result.data.rules?.[1].condition).toBe('ai("Issues detected")'); + } + }); + + it('should accept mixed regular and ai() conditions', () => { + const raw = { + name: 'mixed-rules', + agent: 'agent.md', + rules: [ + { condition: 'Regular condition', next: 'step-a' }, + { condition: 'ai("AI evaluated condition")', next: 'step-b' }, + ], + }; + + const result = WorkflowStepRawSchema.safeParse(raw); + expect(result.success).toBe(true); + }); +}); + +describe('ai() condition regex parsing', () => { + // Test the regex pattern used in workflowLoader.ts + const AI_CONDITION_REGEX = /^ai\("(.+)"\)$/; + + it('should match simple ai() condition', () => { + const match = 'ai("No issues found")'.match(AI_CONDITION_REGEX); + expect(match).not.toBeNull(); + expect(match![1]).toBe('No issues found'); + }); + + it('should match ai() with Japanese text', () => { + const match = 'ai("全てのレビューが承認している場合")'.match(AI_CONDITION_REGEX); + expect(match).not.toBeNull(); + expect(match![1]).toBe('全てのレビューが承認している場合'); + }); + + it('should not match regular condition text', () => { + const match = 'No issues found'.match(AI_CONDITION_REGEX); + expect(match).toBeNull(); + }); + + it('should not match partial ai() pattern', () => { + expect('ai(missing quotes)'.match(AI_CONDITION_REGEX)).toBeNull(); + expect('ai("")'.match(AI_CONDITION_REGEX)).toBeNull(); // .+ requires at least 1 char + expect('not ai("text")'.match(AI_CONDITION_REGEX)).toBeNull(); // must start with ai( + expect('ai("text") extra'.match(AI_CONDITION_REGEX)).toBeNull(); // must end with ) + }); + + it('should match ai() with special characters in text', () => { + const match = 'ai("Issues found (critical/high severity)")'.match(AI_CONDITION_REGEX); + expect(match).not.toBeNull(); + expect(match![1]).toBe('Issues found (critical/high severity)'); + }); +}); + +describe('all()/any() aggregate condition regex parsing', () => { + const AGGREGATE_CONDITION_REGEX = /^(all|any)\("(.+)"\)$/; + + it('should match all() condition', () => { + const match = 'all("approved")'.match(AGGREGATE_CONDITION_REGEX); + expect(match).not.toBeNull(); + expect(match![1]).toBe('all'); + expect(match![2]).toBe('approved'); + }); + + it('should match any() condition', () => { + const match = 'any("rejected")'.match(AGGREGATE_CONDITION_REGEX); + expect(match).not.toBeNull(); + expect(match![1]).toBe('any'); + expect(match![2]).toBe('rejected'); + }); + + it('should match with Japanese text', () => { + const match = 'all("承認済み")'.match(AGGREGATE_CONDITION_REGEX); + expect(match).not.toBeNull(); + expect(match![1]).toBe('all'); + expect(match![2]).toBe('承認済み'); + }); + + it('should not match regular condition text', () => { + expect('approved'.match(AGGREGATE_CONDITION_REGEX)).toBeNull(); + }); + + it('should not match ai() condition', () => { + expect('ai("something")'.match(AGGREGATE_CONDITION_REGEX)).toBeNull(); + }); + + it('should not match invalid patterns', () => { + expect('all(missing quotes)'.match(AGGREGATE_CONDITION_REGEX)).toBeNull(); + expect('all("")'.match(AGGREGATE_CONDITION_REGEX)).toBeNull(); + expect('not all("text")'.match(AGGREGATE_CONDITION_REGEX)).toBeNull(); + expect('all("text") extra'.match(AGGREGATE_CONDITION_REGEX)).toBeNull(); + expect('ALL("text")'.match(AGGREGATE_CONDITION_REGEX)).toBeNull(); + }); + + it('should match with special characters in text', () => { + const match = 'any("issues found (critical)")'.match(AGGREGATE_CONDITION_REGEX); + expect(match).not.toBeNull(); + expect(match![2]).toBe('issues found (critical)'); + }); +}); + +describe('all()/any() condition in WorkflowStepRawSchema', () => { + it('should accept all() condition as a string', () => { + const raw = { + name: 'parallel-review', + parallel: [ + { name: 'arch-review', agent: 'reviewer.md', instruction_template: 'Review' }, + ], + rules: [ + { condition: 'all("approved")', next: 'COMPLETE' }, + { condition: 'any("rejected")', next: 'fix' }, + ], + }; + + const result = WorkflowStepRawSchema.safeParse(raw); + expect(result.success).toBe(true); + if (result.success) { + expect(result.data.rules?.[0].condition).toBe('all("approved")'); + expect(result.data.rules?.[1].condition).toBe('any("rejected")'); + } + }); + + it('should accept mixed regular, ai(), and all()/any() conditions', () => { + const raw = { + name: 'mixed-rules', + parallel: [ + { name: 'sub', agent: 'agent.md' }, + ], + rules: [ + { condition: 'all("approved")', next: 'COMPLETE' }, + { condition: 'any("rejected")', next: 'fix' }, + { condition: 'ai("Difficult judgment")', next: 'manual-review' }, + ], + }; + + const result = WorkflowStepRawSchema.safeParse(raw); + expect(result.success).toBe(true); + }); +}); + +describe('aggregate condition evaluation logic', () => { + // Simulate the evaluation logic from engine.ts + type SubResult = { name: string; matchedRuleIndex?: number; rules?: { condition: string }[] }; + + function evaluateAggregate( + aggregateType: 'all' | 'any', + targetCondition: string, + subSteps: SubResult[], + ): boolean { + if (subSteps.length === 0) return false; + + if (aggregateType === 'all') { + return subSteps.every((sub) => { + if (sub.matchedRuleIndex == null || !sub.rules) return false; + const matchedRule = sub.rules[sub.matchedRuleIndex]; + return matchedRule?.condition === targetCondition; + }); + } + // 'any' + return subSteps.some((sub) => { + if (sub.matchedRuleIndex == null || !sub.rules) return false; + const matchedRule = sub.rules[sub.matchedRuleIndex]; + return matchedRule?.condition === targetCondition; + }); + } + + const rules = [ + { condition: 'approved' }, + { condition: 'rejected' }, + ]; + + it('all(): true when all sub-steps match', () => { + const subs: SubResult[] = [ + { name: 'a', matchedRuleIndex: 0, rules }, + { name: 'b', matchedRuleIndex: 0, rules }, + ]; + expect(evaluateAggregate('all', 'approved', subs)).toBe(true); + }); + + it('all(): false when some sub-steps do not match', () => { + const subs: SubResult[] = [ + { name: 'a', matchedRuleIndex: 0, rules }, + { name: 'b', matchedRuleIndex: 1, rules }, + ]; + expect(evaluateAggregate('all', 'approved', subs)).toBe(false); + }); + + it('all(): false when sub-step has no matched rule', () => { + const subs: SubResult[] = [ + { name: 'a', matchedRuleIndex: 0, rules }, + { name: 'b', matchedRuleIndex: undefined, rules }, + ]; + expect(evaluateAggregate('all', 'approved', subs)).toBe(false); + }); + + it('all(): false when sub-step has no rules', () => { + const subs: SubResult[] = [ + { name: 'a', matchedRuleIndex: 0, rules }, + { name: 'b', matchedRuleIndex: 0, rules: undefined }, + ]; + expect(evaluateAggregate('all', 'approved', subs)).toBe(false); + }); + + it('all(): false with zero sub-steps', () => { + expect(evaluateAggregate('all', 'approved', [])).toBe(false); + }); + + it('any(): true when one sub-step matches', () => { + const subs: SubResult[] = [ + { name: 'a', matchedRuleIndex: 0, rules }, + { name: 'b', matchedRuleIndex: 1, rules }, + ]; + expect(evaluateAggregate('any', 'rejected', subs)).toBe(true); + }); + + it('any(): true when all sub-steps match', () => { + const subs: SubResult[] = [ + { name: 'a', matchedRuleIndex: 1, rules }, + { name: 'b', matchedRuleIndex: 1, rules }, + ]; + expect(evaluateAggregate('any', 'rejected', subs)).toBe(true); + }); + + it('any(): false when no sub-steps match', () => { + const subs: SubResult[] = [ + { name: 'a', matchedRuleIndex: 0, rules }, + { name: 'b', matchedRuleIndex: 0, rules }, + ]; + expect(evaluateAggregate('any', 'rejected', subs)).toBe(false); + }); + + it('any(): false with zero sub-steps', () => { + expect(evaluateAggregate('any', 'rejected', [])).toBe(false); + }); + + it('any(): skips sub-steps without matched rule (does not count as match)', () => { + const subs: SubResult[] = [ + { name: 'a', matchedRuleIndex: undefined, rules }, + { name: 'b', matchedRuleIndex: 1, rules }, + ]; + expect(evaluateAggregate('any', 'rejected', subs)).toBe(true); + }); + + it('any(): false when only unmatched sub-steps exist', () => { + const subs: SubResult[] = [ + { name: 'a', matchedRuleIndex: undefined, rules }, + { name: 'b', matchedRuleIndex: undefined, rules }, + ]; + expect(evaluateAggregate('any', 'rejected', subs)).toBe(false); + }); + + it('evaluation priority: first matching aggregate rule wins', () => { + const parentRules = [ + { type: 'all' as const, condition: 'approved' }, + { type: 'any' as const, condition: 'rejected' }, + ]; + const subs: SubResult[] = [ + { name: 'a', matchedRuleIndex: 0, rules }, + { name: 'b', matchedRuleIndex: 0, rules }, + ]; + + // Find the first matching rule + let matchedIndex = -1; + for (let i = 0; i < parentRules.length; i++) { + const r = parentRules[i]!; + if (evaluateAggregate(r.type, r.condition, subs)) { + matchedIndex = i; + break; + } + } + + expect(matchedIndex).toBe(0); // all("approved") matches first + }); +}); + +describe('parallel step aggregation format', () => { + it('should aggregate sub-step outputs in the expected format', () => { + // Mirror the aggregation logic from engine.ts + const subResults = [ + { name: 'arch-review', content: 'Architecture looks good.\n## Result: APPROVE' }, + { name: 'sec-review', content: 'No security issues.\n## Result: APPROVE' }, + ]; + + const aggregatedContent = subResults + .map((r) => `## ${r.name}\n${r.content}`) + .join('\n\n---\n\n'); + + expect(aggregatedContent).toContain('## arch-review'); + expect(aggregatedContent).toContain('Architecture looks good.'); + expect(aggregatedContent).toContain('---'); + expect(aggregatedContent).toContain('## sec-review'); + expect(aggregatedContent).toContain('No security issues.'); + }); + + it('should handle single sub-step', () => { + const subResults = [ + { name: 'only-step', content: 'Single result' }, + ]; + + const aggregatedContent = subResults + .map((r) => `## ${r.name}\n${r.content}`) + .join('\n\n---\n\n'); + + expect(aggregatedContent).toBe('## only-step\nSingle result'); + expect(aggregatedContent).not.toContain('---'); + }); + + it('should handle empty content from sub-steps', () => { + const subResults = [ + { name: 'step-a', content: '' }, + { name: 'step-b', content: 'Has content' }, + ]; + + const aggregatedContent = subResults + .map((r) => `## ${r.name}\n${r.content}`) + .join('\n\n---\n\n'); + + expect(aggregatedContent).toContain('## step-a\n'); + expect(aggregatedContent).toContain('## step-b\nHas content'); + }); +}); diff --git a/src/__tests__/parallel-logger.test.ts b/src/__tests__/parallel-logger.test.ts new file mode 100644 index 0000000..893954f --- /dev/null +++ b/src/__tests__/parallel-logger.test.ts @@ -0,0 +1,417 @@ +/** + * Tests for parallel-logger module + */ + +import { describe, it, expect, beforeEach } from 'vitest'; +import { ParallelLogger } from '../workflow/parallel-logger.js'; +import type { StreamEvent } from '../claude/types.js'; + +describe('ParallelLogger', () => { + let output: string[]; + let writeFn: (text: string) => void; + + beforeEach(() => { + output = []; + writeFn = (text: string) => output.push(text); + }); + + describe('buildPrefix', () => { + it('should build colored prefix with padding', () => { + const logger = new ParallelLogger({ + subStepNames: ['arch-review', 'sec'], + writeFn, + }); + + // arch-review is longest (11 chars), sec gets padding + const prefix = logger.buildPrefix('sec', 1); + // yellow color for index 1 + expect(prefix).toContain('[sec]'); + expect(prefix).toContain('\x1b[33m'); // yellow + expect(prefix).toContain('\x1b[0m'); // reset + // 11 - 3 = 8 spaces of padding + expect(prefix).toMatch(/\x1b\[0m {8} $/); + }); + + it('should cycle colors for index >= 4', () => { + const logger = new ParallelLogger({ + subStepNames: ['a', 'b', 'c', 'd', 'e'], + writeFn, + }); + + const prefix0 = logger.buildPrefix('a', 0); + const prefix4 = logger.buildPrefix('e', 4); + // Both should use cyan (\x1b[36m) + expect(prefix0).toContain('\x1b[36m'); + expect(prefix4).toContain('\x1b[36m'); + }); + + it('should assign correct colors in order', () => { + const logger = new ParallelLogger({ + subStepNames: ['a', 'b', 'c', 'd'], + writeFn, + }); + + expect(logger.buildPrefix('a', 0)).toContain('\x1b[36m'); // cyan + expect(logger.buildPrefix('b', 1)).toContain('\x1b[33m'); // yellow + expect(logger.buildPrefix('c', 2)).toContain('\x1b[35m'); // magenta + expect(logger.buildPrefix('d', 3)).toContain('\x1b[32m'); // green + }); + + it('should have no extra padding for longest name', () => { + const logger = new ParallelLogger({ + subStepNames: ['long-name', 'short'], + writeFn, + }); + + const prefix = logger.buildPrefix('long-name', 0); + // No padding needed (0 spaces) + expect(prefix).toMatch(/\x1b\[0m $/); + }); + }); + + describe('text event line buffering', () => { + it('should buffer partial line and output on newline', () => { + const logger = new ParallelLogger({ + subStepNames: ['step-a'], + writeFn, + }); + const handler = logger.createStreamHandler('step-a', 0); + + // Partial text (no newline) + handler({ type: 'text', data: { text: 'Hello' } } as StreamEvent); + expect(output).toHaveLength(0); + + // Complete the line + handler({ type: 'text', data: { text: ' World\n' } } as StreamEvent); + expect(output).toHaveLength(1); + expect(output[0]).toContain('[step-a]'); + expect(output[0]).toContain('Hello World'); + expect(output[0]).toMatch(/\n$/); + }); + + it('should handle multiple lines in single text event', () => { + const logger = new ParallelLogger({ + subStepNames: ['step-a'], + writeFn, + }); + const handler = logger.createStreamHandler('step-a', 0); + + handler({ type: 'text', data: { text: 'Line 1\nLine 2\n' } } as StreamEvent); + expect(output).toHaveLength(2); + expect(output[0]).toContain('Line 1'); + expect(output[1]).toContain('Line 2'); + }); + + it('should output empty line without prefix', () => { + const logger = new ParallelLogger({ + subStepNames: ['step-a'], + writeFn, + }); + const handler = logger.createStreamHandler('step-a', 0); + + handler({ type: 'text', data: { text: 'Hello\n\nWorld\n' } } as StreamEvent); + expect(output).toHaveLength(3); + expect(output[0]).toContain('Hello'); + expect(output[1]).toBe('\n'); // empty line without prefix + expect(output[2]).toContain('World'); + }); + + it('should keep trailing partial in buffer', () => { + const logger = new ParallelLogger({ + subStepNames: ['step-a'], + writeFn, + }); + const handler = logger.createStreamHandler('step-a', 0); + + handler({ type: 'text', data: { text: 'Complete\nPartial' } } as StreamEvent); + expect(output).toHaveLength(1); + expect(output[0]).toContain('Complete'); + + // Flush remaining + logger.flush(); + expect(output).toHaveLength(2); + expect(output[1]).toContain('Partial'); + }); + }); + + describe('block events (tool_use, tool_result, tool_output, thinking)', () => { + it('should prefix tool_use events', () => { + const logger = new ParallelLogger({ + subStepNames: ['sub-a'], + writeFn, + }); + const handler = logger.createStreamHandler('sub-a', 0); + + handler({ + type: 'tool_use', + data: { tool: 'Read', input: {}, id: '1' }, + } as StreamEvent); + + expect(output).toHaveLength(1); + expect(output[0]).toContain('[sub-a]'); + expect(output[0]).toContain('[tool] Read'); + }); + + it('should prefix tool_result events', () => { + const logger = new ParallelLogger({ + subStepNames: ['sub-a'], + writeFn, + }); + const handler = logger.createStreamHandler('sub-a', 0); + + handler({ + type: 'tool_result', + data: { content: 'File content here', isError: false }, + } as StreamEvent); + + expect(output).toHaveLength(1); + expect(output[0]).toContain('File content here'); + }); + + it('should prefix multi-line tool output', () => { + const logger = new ParallelLogger({ + subStepNames: ['sub-a'], + writeFn, + }); + const handler = logger.createStreamHandler('sub-a', 0); + + handler({ + type: 'tool_output', + data: { tool: 'Bash', output: 'line1\nline2' }, + } as StreamEvent); + + expect(output).toHaveLength(2); + expect(output[0]).toContain('line1'); + expect(output[1]).toContain('line2'); + }); + + it('should prefix thinking events', () => { + const logger = new ParallelLogger({ + subStepNames: ['sub-a'], + writeFn, + }); + const handler = logger.createStreamHandler('sub-a', 0); + + handler({ + type: 'thinking', + data: { thinking: 'Considering options...' }, + } as StreamEvent); + + expect(output).toHaveLength(1); + expect(output[0]).toContain('Considering options...'); + }); + }); + + describe('delegated events (init, result, error)', () => { + it('should delegate init event to parent callback', () => { + const parentEvents: StreamEvent[] = []; + const logger = new ParallelLogger({ + subStepNames: ['sub-a'], + parentOnStream: (event) => parentEvents.push(event), + writeFn, + }); + const handler = logger.createStreamHandler('sub-a', 0); + + const initEvent: StreamEvent = { + type: 'init', + data: { model: 'claude-3', sessionId: 'sess-1' }, + }; + handler(initEvent); + + expect(parentEvents).toHaveLength(1); + expect(parentEvents[0]).toBe(initEvent); + expect(output).toHaveLength(0); // Not written to stdout + }); + + it('should delegate result event to parent callback', () => { + const parentEvents: StreamEvent[] = []; + const logger = new ParallelLogger({ + subStepNames: ['sub-a'], + parentOnStream: (event) => parentEvents.push(event), + writeFn, + }); + const handler = logger.createStreamHandler('sub-a', 0); + + const resultEvent: StreamEvent = { + type: 'result', + data: { result: 'done', sessionId: 'sess-1', success: true }, + }; + handler(resultEvent); + + expect(parentEvents).toHaveLength(1); + expect(parentEvents[0]).toBe(resultEvent); + }); + + it('should delegate error event to parent callback', () => { + const parentEvents: StreamEvent[] = []; + const logger = new ParallelLogger({ + subStepNames: ['sub-a'], + parentOnStream: (event) => parentEvents.push(event), + writeFn, + }); + const handler = logger.createStreamHandler('sub-a', 0); + + const errorEvent: StreamEvent = { + type: 'error', + data: { message: 'Something went wrong' }, + }; + handler(errorEvent); + + expect(parentEvents).toHaveLength(1); + expect(parentEvents[0]).toBe(errorEvent); + }); + + it('should not crash when no parent callback for delegated events', () => { + const logger = new ParallelLogger({ + subStepNames: ['sub-a'], + writeFn, + }); + const handler = logger.createStreamHandler('sub-a', 0); + + // Should not throw + handler({ type: 'init', data: { model: 'claude-3', sessionId: 'sess-1' } } as StreamEvent); + handler({ type: 'result', data: { result: 'done', sessionId: 'sess-1', success: true } } as StreamEvent); + handler({ type: 'error', data: { message: 'err' } } as StreamEvent); + + expect(output).toHaveLength(0); + }); + }); + + describe('flush', () => { + it('should output remaining buffered content', () => { + const logger = new ParallelLogger({ + subStepNames: ['step-a', 'step-b'], + writeFn, + }); + const handlerA = logger.createStreamHandler('step-a', 0); + const handlerB = logger.createStreamHandler('step-b', 1); + + handlerA({ type: 'text', data: { text: 'partial-a' } } as StreamEvent); + handlerB({ type: 'text', data: { text: 'partial-b' } } as StreamEvent); + + expect(output).toHaveLength(0); + + logger.flush(); + + expect(output).toHaveLength(2); + expect(output[0]).toContain('partial-a'); + expect(output[1]).toContain('partial-b'); + }); + + it('should not output empty buffers', () => { + const logger = new ParallelLogger({ + subStepNames: ['step-a', 'step-b'], + writeFn, + }); + const handlerA = logger.createStreamHandler('step-a', 0); + + handlerA({ type: 'text', data: { text: 'content\n' } } as StreamEvent); + output.length = 0; // Clear previous output + + logger.flush(); + expect(output).toHaveLength(0); // Nothing to flush + }); + }); + + describe('printSummary', () => { + it('should print completion summary', () => { + const logger = new ParallelLogger({ + subStepNames: ['arch-review', 'security-review'], + writeFn, + }); + + logger.printSummary('parallel-review', [ + { name: 'arch-review', condition: 'approved' }, + { name: 'security-review', condition: 'rejected' }, + ]); + + const fullOutput = output.join(''); + expect(fullOutput).toContain('parallel-review results'); + expect(fullOutput).toContain('arch-review:'); + expect(fullOutput).toContain('approved'); + expect(fullOutput).toContain('security-review:'); + expect(fullOutput).toContain('rejected'); + // Header and footer contain ─ + expect(fullOutput).toContain('─'); + }); + + it('should show (no result) for undefined condition', () => { + const logger = new ParallelLogger({ + subStepNames: ['step-a'], + writeFn, + }); + + logger.printSummary('parallel-step', [ + { name: 'step-a', condition: undefined }, + ]); + + const fullOutput = output.join(''); + expect(fullOutput).toContain('(no result)'); + }); + + it('should right-pad sub-step names to align results', () => { + const logger = new ParallelLogger({ + subStepNames: ['short', 'very-long-name'], + writeFn, + }); + + logger.printSummary('test', [ + { name: 'short', condition: 'done' }, + { name: 'very-long-name', condition: 'done' }, + ]); + + // Find the result lines (indented with 2 spaces) + const resultLines = output.filter((l) => l.startsWith(' ')); + expect(resultLines).toHaveLength(2); + + // Both 'done' values should be at the same column + const doneIndex0 = resultLines[0]!.indexOf('done'); + const doneIndex1 = resultLines[1]!.indexOf('done'); + expect(doneIndex0).toBe(doneIndex1); + }); + + it('should flush remaining buffers before printing summary', () => { + const logger = new ParallelLogger({ + subStepNames: ['step-a'], + writeFn, + }); + const handler = logger.createStreamHandler('step-a', 0); + + // Leave partial content in buffer + handler({ type: 'text', data: { text: 'trailing content' } } as StreamEvent); + + logger.printSummary('test', [ + { name: 'step-a', condition: 'done' }, + ]); + + // First output should be the flushed buffer + expect(output[0]).toContain('trailing content'); + // Then the summary + const fullOutput = output.join(''); + expect(fullOutput).toContain('test results'); + }); + }); + + describe('interleaved output from multiple sub-steps', () => { + it('should correctly interleave prefixed output', () => { + const logger = new ParallelLogger({ + subStepNames: ['step-a', 'step-b'], + writeFn, + }); + const handlerA = logger.createStreamHandler('step-a', 0); + const handlerB = logger.createStreamHandler('step-b', 1); + + handlerA({ type: 'text', data: { text: 'A output\n' } } as StreamEvent); + handlerB({ type: 'text', data: { text: 'B output\n' } } as StreamEvent); + handlerA({ type: 'text', data: { text: 'A second\n' } } as StreamEvent); + + expect(output).toHaveLength(3); + expect(output[0]).toContain('[step-a]'); + expect(output[0]).toContain('A output'); + expect(output[1]).toContain('[step-b]'); + expect(output[1]).toContain('B output'); + expect(output[2]).toContain('[step-a]'); + expect(output[2]).toContain('A second'); + }); + }); +}); diff --git a/src/agents/runner.ts b/src/agents/runner.ts index 62c4c40..531898d 100644 --- a/src/agents/runner.ts +++ b/src/agents/runner.ts @@ -31,6 +31,8 @@ export interface RunAgentOptions { agentPath?: string; /** Allowed tools for this agent run */ allowedTools?: string[]; + /** Maximum number of agentic turns */ + maxTurns?: number; /** Permission mode for tool execution (from workflow step) */ permissionMode?: PermissionMode; onStream?: StreamCallback; @@ -82,6 +84,7 @@ export async function runCustomAgent( cwd: options.cwd, sessionId: options.sessionId, allowedTools, + maxTurns: options.maxTurns, model: resolveModel(options.cwd, options, agentConfig), permissionMode: options.permissionMode, onStream: options.onStream, @@ -98,6 +101,7 @@ export async function runCustomAgent( cwd: options.cwd, sessionId: options.sessionId, allowedTools, + maxTurns: options.maxTurns, model: resolveModel(options.cwd, options, agentConfig), permissionMode: options.permissionMode, onStream: options.onStream, @@ -118,6 +122,7 @@ export async function runCustomAgent( cwd: options.cwd, sessionId: options.sessionId, allowedTools, + maxTurns: options.maxTurns, model: resolveModel(options.cwd, options, agentConfig), permissionMode: options.permissionMode, onStream: options.onStream, @@ -195,6 +200,7 @@ export async function runAgent( cwd: options.cwd, sessionId: options.sessionId, allowedTools: options.allowedTools, + maxTurns: options.maxTurns, model: resolveModel(options.cwd, options), systemPrompt, permissionMode: options.permissionMode, diff --git a/src/claude/client.ts b/src/claude/client.ts index 75849f3..5aa6bbc 100644 --- a/src/claude/client.ts +++ b/src/claude/client.ts @@ -165,6 +165,80 @@ export async function callClaudeCustom( }; } +/** + * Detect judge rule index from [JUDGE:N] tag pattern. + * Returns 0-based rule index, or -1 if no match. + */ +export function detectJudgeIndex(content: string): number { + const regex = /\[JUDGE:(\d+)\]/i; + const match = content.match(regex); + if (match?.[1]) { + const index = Number.parseInt(match[1], 10) - 1; + return index >= 0 ? index : -1; + } + return -1; +} + +/** + * Build the prompt for the AI judge that evaluates agent output against ai() conditions. + */ +export function buildJudgePrompt( + agentOutput: string, + aiConditions: { index: number; text: string }[], +): string { + const conditionList = aiConditions + .map((c) => `| ${c.index + 1} | ${c.text} |`) + .join('\n'); + + return [ + '# Judge Task', + '', + 'You are a judge evaluating an agent\'s output against a set of conditions.', + 'Read the agent output below, then determine which condition best matches.', + '', + '## Agent Output', + '```', + agentOutput, + '```', + '', + '## Conditions', + '| # | Condition |', + '|---|-----------|', + conditionList, + '', + '## Instructions', + 'Output ONLY the tag `[JUDGE:N]` where N is the number of the best matching condition.', + 'Do not output anything else.', + ].join('\n'); +} + +/** + * Call AI judge to evaluate agent output against ai() conditions. + * Uses a lightweight model (haiku) for cost efficiency. + * Returns 0-based index of the matched ai() condition, or -1 if no match. + */ +export async function callAiJudge( + agentOutput: string, + aiConditions: { index: number; text: string }[], + options: { cwd: string }, +): Promise { + const prompt = buildJudgePrompt(agentOutput, aiConditions); + + const spawnOptions: ClaudeSpawnOptions = { + cwd: options.cwd, + model: 'haiku', + maxTurns: 1, + }; + + const result = await executeClaudeCli(prompt, spawnOptions); + if (!result.success) { + log.error('AI judge call failed', { error: result.error }); + return -1; + } + + return detectJudgeIndex(result.content); +} + /** Call a Claude Code built-in agent (using claude --agent flag if available) */ export async function callClaudeAgent( claudeAgentName: string, diff --git a/src/commands/workflowExecution.ts b/src/commands/workflowExecution.ts index 0c7be9f..b727db2 100644 --- a/src/commands/workflowExecution.ts +++ b/src/commands/workflowExecution.ts @@ -190,6 +190,7 @@ export async function executeWorkflow( step: step.name, status: response.status, matchedRuleIndex: response.matchedRuleIndex, + matchedRuleMethod: response.matchedRuleMethod, contentLength: response.content.length, sessionId: response.sessionId, error: response.error, @@ -203,7 +204,8 @@ export async function executeWorkflow( if (response.matchedRuleIndex != null && step.rules) { const rule = step.rules[response.matchedRuleIndex]; if (rule) { - status('Status', rule.condition); + const methodLabel = response.matchedRuleMethod ? ` (${response.matchedRuleMethod})` : ''; + status('Status', `${rule.condition}${methodLabel}`); } else { status('Status', response.status); } diff --git a/src/config/workflowLoader.ts b/src/config/workflowLoader.ts index 93c9f9a..a7bc5c1 100644 --- a/src/config/workflowLoader.ts +++ b/src/config/workflowLoader.ts @@ -120,6 +120,81 @@ function normalizeReport( ); } +/** Regex to detect ai("...") condition expressions */ +const AI_CONDITION_REGEX = /^ai\("(.+)"\)$/; + +/** Regex to detect all("...")/any("...") aggregate condition expressions */ +const AGGREGATE_CONDITION_REGEX = /^(all|any)\("(.+)"\)$/; + +/** + * Parse a rule's condition for ai() and all()/any() expressions. + * - `ai("text")` → sets isAiCondition and aiConditionText + * - `all("text")` / `any("text")` → sets isAggregateCondition, aggregateType, aggregateConditionText + */ +function normalizeRule(r: { condition: string; next: string; appendix?: string }): WorkflowRule { + const aiMatch = r.condition.match(AI_CONDITION_REGEX); + if (aiMatch?.[1]) { + return { + condition: r.condition, + next: r.next, + appendix: r.appendix, + isAiCondition: true, + aiConditionText: aiMatch[1], + }; + } + + const aggMatch = r.condition.match(AGGREGATE_CONDITION_REGEX); + if (aggMatch?.[1] && aggMatch[2]) { + return { + condition: r.condition, + next: r.next, + appendix: r.appendix, + isAggregateCondition: true, + aggregateType: aggMatch[1] as 'all' | 'any', + aggregateConditionText: aggMatch[2], + }; + } + + return { + condition: r.condition, + next: r.next, + appendix: r.appendix, + }; +} + +// eslint-disable-next-line @typescript-eslint/no-explicit-any +type RawStep = any; + +/** + * Normalize a raw step into internal WorkflowStep format. + */ +function normalizeStepFromRaw(step: RawStep, workflowDir: string): WorkflowStep { + const rules: WorkflowRule[] | undefined = step.rules?.map(normalizeRule); + const agentSpec: string = step.agent ?? ''; + + const result: WorkflowStep = { + name: step.name, + agent: agentSpec, + agentDisplayName: step.agent_name || (agentSpec ? extractAgentDisplayName(agentSpec) : step.name), + agentPath: agentSpec ? resolveAgentPathForWorkflow(agentSpec, workflowDir) : undefined, + allowedTools: step.allowed_tools, + provider: step.provider, + model: step.model, + permissionMode: step.permission_mode, + edit: step.edit, + instructionTemplate: resolveContentPath(step.instruction_template, workflowDir) || step.instruction || '{task}', + rules, + report: normalizeReport(step.report, workflowDir), + passPreviousResponse: step.pass_previous_response ?? true, + }; + + if (step.parallel && step.parallel.length > 0) { + result.parallel = step.parallel.map((sub: RawStep) => normalizeStepFromRaw(sub, workflowDir)); + } + + return result; +} + /** * Convert raw YAML workflow config to internal format. * Agent paths are resolved relative to the workflow directory. @@ -127,29 +202,9 @@ function normalizeReport( function normalizeWorkflowConfig(raw: unknown, workflowDir: string): WorkflowConfig { const parsed = WorkflowConfigRawSchema.parse(raw); - const steps: WorkflowStep[] = parsed.steps.map((step) => { - const rules: WorkflowRule[] | undefined = step.rules?.map((r) => ({ - condition: r.condition, - next: r.next, - appendix: r.appendix, - })); - - return { - name: step.name, - agent: step.agent, - agentDisplayName: step.agent_name || extractAgentDisplayName(step.agent), - agentPath: resolveAgentPathForWorkflow(step.agent, workflowDir), - allowedTools: step.allowed_tools, - provider: step.provider, - model: step.model, - permissionMode: step.permission_mode, - edit: step.edit, - instructionTemplate: resolveContentPath(step.instruction_template, workflowDir) || step.instruction || '{task}', - rules, - report: normalizeReport(step.report, workflowDir), - passPreviousResponse: step.pass_previous_response, - }; - }); + const steps: WorkflowStep[] = parsed.steps.map((step) => + normalizeStepFromRaw(step, workflowDir), + ); return { name: parsed.name, diff --git a/src/models/index.ts b/src/models/index.ts index 5b3e865..b43dd20 100644 --- a/src/models/index.ts +++ b/src/models/index.ts @@ -2,6 +2,7 @@ export type { AgentType, Status, + RuleMatchMethod, ReportConfig, ReportObjectConfig, AgentResponse, diff --git a/src/models/schemas.ts b/src/models/schemas.ts index e1d7aab..502adc5 100644 --- a/src/models/schemas.ts +++ b/src/models/schemas.ts @@ -13,7 +13,6 @@ export const AgentTypeSchema = z.enum(['coder', 'architect', 'supervisor', 'cust /** Status schema */ export const StatusSchema = z.enum([ 'pending', - 'in_progress', 'done', 'blocked', 'approved', @@ -81,10 +80,28 @@ export const WorkflowRuleSchema = z.object({ appendix: z.string().optional(), }); +/** Sub-step schema for parallel execution (agent is required) */ +export const ParallelSubStepRawSchema = z.object({ + name: z.string().min(1), + agent: z.string().min(1), + agent_name: z.string().optional(), + allowed_tools: z.array(z.string()).optional(), + provider: z.enum(['claude', 'codex', 'mock']).optional(), + model: z.string().optional(), + permission_mode: PermissionModeSchema.optional(), + edit: z.boolean().optional(), + instruction: z.string().optional(), + instruction_template: z.string().optional(), + rules: z.array(WorkflowRuleSchema).optional(), + report: ReportFieldSchema.optional(), + pass_previous_response: z.boolean().optional().default(true), +}); + /** Workflow step schema - raw YAML format */ export const WorkflowStepRawSchema = z.object({ name: z.string().min(1), - agent: z.string().min(1), + /** Agent is required for normal steps, optional for parallel container steps */ + agent: z.string().optional(), /** Display name for the agent (shown in output). Falls back to agent basename if not specified */ agent_name: z.string().optional(), allowed_tools: z.array(z.string()).optional(), @@ -101,7 +118,12 @@ export const WorkflowStepRawSchema = z.object({ /** Report file(s) for this step */ report: ReportFieldSchema.optional(), pass_previous_response: z.boolean().optional().default(true), -}); + /** Sub-steps to execute in parallel */ + parallel: z.array(ParallelSubStepRawSchema).optional(), +}).refine( + (data) => data.agent || (data.parallel && data.parallel.length > 0), + { message: 'Step must have either an agent or parallel sub-steps' }, +); /** Workflow configuration schema - raw YAML format */ export const WorkflowConfigRawSchema = z.object({ diff --git a/src/models/types.ts b/src/models/types.ts index 8bce74c..fc0bd83 100644 --- a/src/models/types.ts +++ b/src/models/types.ts @@ -8,7 +8,6 @@ export type AgentType = 'coder' | 'architect' | 'supervisor' | 'custom'; /** Execution status for agents and workflows */ export type Status = | 'pending' - | 'in_progress' | 'done' | 'blocked' | 'approved' @@ -18,6 +17,14 @@ export type Status = | 'interrupted' | 'answer'; +/** How a rule match was detected */ +export type RuleMatchMethod = + | 'aggregate' + | 'phase3_tag' + | 'phase1_tag' + | 'ai_judge' + | 'ai_judge_fallback'; + /** Response from an agent execution */ export interface AgentResponse { agent: string; @@ -29,6 +36,8 @@ export interface AgentResponse { error?: string; /** Matched rule index (0-based) when rules-based detection was used */ matchedRuleIndex?: number; + /** How the rule match was detected */ + matchedRuleMethod?: RuleMatchMethod; } /** Session state for workflow execution */ @@ -48,6 +57,16 @@ export interface WorkflowRule { next: string; /** Template for additional AI output */ appendix?: string; + /** Whether this condition uses ai() expression (set by loader) */ + isAiCondition?: boolean; + /** The condition text inside ai("...") for AI judge evaluation (set by loader) */ + aiConditionText?: string; + /** Whether this condition uses all()/any() aggregate expression (set by loader) */ + isAggregateCondition?: boolean; + /** Aggregate type: 'all' requires all sub-steps match, 'any' requires at least one (set by loader) */ + aggregateType?: 'all' | 'any'; + /** The condition text inside all("...")/any("...") to match against sub-step results (set by loader) */ + aggregateConditionText?: string; } /** Report file configuration for a workflow step (label: path pair) */ @@ -96,6 +115,8 @@ export interface WorkflowStep { /** Report file configuration. Single string, array of label:path, or object with order/format. */ report?: string | ReportConfig[] | ReportObjectConfig; passPreviousResponse: boolean; + /** Sub-steps to execute in parallel. When set, this step runs all sub-steps concurrently. */ + parallel?: WorkflowStep[]; } /** Loop detection configuration */ diff --git a/src/providers/claude.ts b/src/providers/claude.ts index e60ef74..9c8aec3 100644 --- a/src/providers/claude.ts +++ b/src/providers/claude.ts @@ -14,6 +14,7 @@ export class ClaudeProvider implements Provider { sessionId: options.sessionId, allowedTools: options.allowedTools, model: options.model, + maxTurns: options.maxTurns, systemPrompt: options.systemPrompt, permissionMode: options.permissionMode, onStream: options.onStream, @@ -31,6 +32,7 @@ export class ClaudeProvider implements Provider { sessionId: options.sessionId, allowedTools: options.allowedTools, model: options.model, + maxTurns: options.maxTurns, permissionMode: options.permissionMode, onStream: options.onStream, onPermissionRequest: options.onPermissionRequest, diff --git a/src/providers/index.ts b/src/providers/index.ts index c8d9c7f..5b013d0 100644 --- a/src/providers/index.ts +++ b/src/providers/index.ts @@ -18,6 +18,8 @@ export interface ProviderCallOptions { model?: string; systemPrompt?: string; allowedTools?: string[]; + /** Maximum number of agentic turns */ + maxTurns?: number; /** Permission mode for tool execution (from workflow step) */ permissionMode?: PermissionMode; onStream?: StreamCallback; diff --git a/src/utils/session.ts b/src/utils/session.ts index fa88f27..1ab50da 100644 --- a/src/utils/session.ts +++ b/src/utils/session.ts @@ -24,6 +24,10 @@ export interface SessionLog { timestamp: string; content: string; error?: string; + /** Matched rule index (0-based) when rules-based detection was used */ + matchedRuleIndex?: number; + /** How the rule match was detected */ + matchedRuleMethod?: string; }>; } @@ -88,6 +92,8 @@ export function addToSessionLog( timestamp: response.timestamp.toISOString(), content: response.content, ...(response.error ? { error: response.error } : {}), + ...(response.matchedRuleIndex != null ? { matchedRuleIndex: response.matchedRuleIndex } : {}), + ...(response.matchedRuleMethod ? { matchedRuleMethod: response.matchedRuleMethod } : {}), }); log.iterations++; } diff --git a/src/workflow/engine.ts b/src/workflow/engine.ts index 6e311e8..e06fcd4 100644 --- a/src/workflow/engine.ts +++ b/src/workflow/engine.ts @@ -10,15 +10,17 @@ import type { WorkflowState, WorkflowStep, AgentResponse, + RuleMatchMethod, } from '../models/types.js'; import { runAgent, type RunAgentOptions } from '../agents/runner.js'; import { COMPLETE_STEP, ABORT_STEP, ERROR_MESSAGES } from './constants.js'; import type { WorkflowEngineOptions } from './types.js'; import { determineNextStepByRules } from './transitions.js'; -import { detectRuleIndex } from '../claude/client.js'; -import { buildInstruction as buildInstructionFromTemplate, isReportObjectConfig } from './instruction-builder.js'; +import { detectRuleIndex, callAiJudge } from '../claude/client.js'; +import { buildInstruction as buildInstructionFromTemplate, buildReportInstruction as buildReportInstructionFromTemplate, buildStatusJudgmentInstruction as buildStatusJudgmentInstructionFromTemplate, isReportObjectConfig } from './instruction-builder.js'; import { LoopDetector } from './loop-detector.js'; import { handleBlocked } from './blocked-handler.js'; +import { ParallelLogger } from './parallel-logger.js'; import { createInitialState, addUserInput, @@ -196,24 +198,26 @@ export class WorkflowEngine extends EventEmitter { } } - /** Run a single step */ + /** Run a single step (delegates to runParallelStep if step has parallel sub-steps) */ private async runStep(step: WorkflowStep): Promise<{ response: AgentResponse; instruction: string }> { - const stepIteration = incrementStepIteration(this.state, step.name); - const instruction = this.buildInstruction(step, stepIteration); - const sessionId = this.state.agentSessions.get(step.agent); - log.debug('Running step', { - step: step.name, - agent: step.agent, - stepIteration, - iteration: this.state.iteration, - sessionId: sessionId ?? 'new', - }); + if (step.parallel && step.parallel.length > 0) { + return this.runParallelStep(step); + } + return this.runNormalStep(step); + } - const agentOptions: RunAgentOptions = { + /** Build RunAgentOptions from a step's configuration */ + private buildAgentOptions(step: WorkflowStep): RunAgentOptions { + // Phase 1: exclude Write from allowedTools when step has report config + const allowedTools = step.report + ? step.allowedTools?.filter((t) => t !== 'Write') + : step.allowedTools; + + return { cwd: this.cwd, - sessionId, + sessionId: this.state.agentSessions.get(step.agent), agentPath: step.agentPath, - allowedTools: step.allowedTools, + allowedTools, provider: step.provider, model: step.model, permissionMode: step.permissionMode, @@ -222,30 +226,440 @@ export class WorkflowEngine extends EventEmitter { onAskUserQuestion: this.options.onAskUserQuestion, bypassPermissions: this.options.bypassPermissions, }; + } - let response = await runAgent(step.agent, instruction, agentOptions); + /** + * Build RunAgentOptions for session-resume phases (Phase 2, Phase 3). + * Shares common fields with the original step's agent config. + */ + private buildResumeOptions(step: WorkflowStep, sessionId: string, overrides: Pick): RunAgentOptions { + return { + cwd: this.cwd, + sessionId, + agentPath: step.agentPath, + allowedTools: overrides.allowedTools, + maxTurns: overrides.maxTurns, + provider: step.provider, + model: step.model, + permissionMode: step.permissionMode, + onStream: this.options.onStream, + onPermissionRequest: this.options.onPermissionRequest, + onAskUserQuestion: this.options.onAskUserQuestion, + bypassPermissions: this.options.bypassPermissions, + }; + } - if (response.sessionId) { - const previousSessionId = this.state.agentSessions.get(step.agent); - this.state.agentSessions.set(step.agent, response.sessionId); + /** Update agent session and notify via callback if session changed */ + private updateAgentSession(agent: string, sessionId: string | undefined): void { + if (!sessionId) return; - if (this.options.onSessionUpdate && response.sessionId !== previousSessionId) { - this.options.onSessionUpdate(step.agent, response.sessionId); + const previousSessionId = this.state.agentSessions.get(agent); + this.state.agentSessions.set(agent, sessionId); + + if (this.options.onSessionUpdate && sessionId !== previousSessionId) { + this.options.onSessionUpdate(agent, sessionId); + } + } + + /** + * Detect matched rule for a step's response. + * Evaluation order (first match wins): + * 1. Aggregate conditions: all()/any() — evaluate sub-step results + * 2. Tag detection from Phase 3 output + * 3. Tag detection from Phase 1 output (fallback) + * 4. ai() condition evaluation via AI judge + * 5. All-conditions AI judge (final fallback) + * + * Returns undefined for steps without rules. + * Throws if rules exist but no rule matched (Fail Fast). + * + * @param step - The workflow step + * @param agentContent - Phase 1 output (main execution) + * @param tagContent - Phase 3 output (status judgment); empty string skips tag detection + */ + private async detectMatchedRule(step: WorkflowStep, agentContent: string, tagContent: string): Promise<{ index: number; method: RuleMatchMethod } | undefined> { + if (!step.rules || step.rules.length === 0) return undefined; + + // 1. Aggregate conditions (all/any) — only meaningful for parallel parent steps + const aggIndex = this.evaluateAggregateConditions(step); + if (aggIndex >= 0) { + return { index: aggIndex, method: 'aggregate' }; + } + + // 2. Tag detection from Phase 3 output + if (tagContent) { + const ruleIndex = detectRuleIndex(tagContent, step.name); + if (ruleIndex >= 0 && ruleIndex < step.rules.length) { + return { index: ruleIndex, method: 'phase3_tag' }; } } - if (step.rules && step.rules.length > 0) { - const ruleIndex = detectRuleIndex(response.content, step.name); + // 3. Tag detection from Phase 1 output (fallback) + if (agentContent) { + const ruleIndex = detectRuleIndex(agentContent, step.name); if (ruleIndex >= 0 && ruleIndex < step.rules.length) { - response = { ...response, matchedRuleIndex: ruleIndex }; + return { index: ruleIndex, method: 'phase1_tag' }; } } + // 4. AI judge for ai() conditions only + const aiRuleIndex = await this.evaluateAiConditions(step, agentContent); + if (aiRuleIndex >= 0) { + return { index: aiRuleIndex, method: 'ai_judge' }; + } + + // 5. AI judge for all conditions (final fallback) + const fallbackIndex = await this.evaluateAllConditionsViaAiJudge(step, agentContent); + if (fallbackIndex >= 0) { + return { index: fallbackIndex, method: 'ai_judge_fallback' }; + } + + throw new Error(`Status not found for step "${step.name}": no rule matched after all detection phases`); + } + + /** + * Evaluate aggregate conditions (all()/any()) against sub-step results. + * Returns the 0-based rule index in the step's rules array, or -1 if no match. + * + * For each aggregate rule, checks the matched condition text of sub-steps: + * - all("X"): true when ALL sub-steps have matched condition === X + * - any("X"): true when at least ONE sub-step has matched condition === X + * + * Edge cases per spec: + * - Sub-step with no matched rule: all() → false, any() → skip that sub-step + * - No sub-steps (0 件): both → false + * - Non-parallel step: both → false + */ + private evaluateAggregateConditions(step: WorkflowStep): number { + if (!step.rules || !step.parallel || step.parallel.length === 0) return -1; + + for (let i = 0; i < step.rules.length; i++) { + const rule = step.rules[i]!; + if (!rule.isAggregateCondition || !rule.aggregateType || !rule.aggregateConditionText) { + continue; + } + + const subSteps = step.parallel; + const targetCondition = rule.aggregateConditionText; + + if (rule.aggregateType === 'all') { + const allMatch = subSteps.every((sub) => { + const output = this.state.stepOutputs.get(sub.name); + if (!output || output.matchedRuleIndex == null || !sub.rules) return false; + const matchedRule = sub.rules[output.matchedRuleIndex]; + return matchedRule?.condition === targetCondition; + }); + if (allMatch) { + log.debug('Aggregate all() matched', { step: step.name, condition: targetCondition, ruleIndex: i }); + return i; + } + } else { + // 'any' + const anyMatch = subSteps.some((sub) => { + const output = this.state.stepOutputs.get(sub.name); + if (!output || output.matchedRuleIndex == null || !sub.rules) return false; + const matchedRule = sub.rules[output.matchedRuleIndex]; + return matchedRule?.condition === targetCondition; + }); + if (anyMatch) { + log.debug('Aggregate any() matched', { step: step.name, condition: targetCondition, ruleIndex: i }); + return i; + } + } + } + + return -1; + } + + /** Run a normal (non-parallel) step */ + private async runNormalStep(step: WorkflowStep): Promise<{ response: AgentResponse; instruction: string }> { + const stepIteration = incrementStepIteration(this.state, step.name); + const instruction = this.buildInstruction(step, stepIteration); + log.debug('Running step', { + step: step.name, + agent: step.agent, + stepIteration, + iteration: this.state.iteration, + sessionId: this.state.agentSessions.get(step.agent) ?? 'new', + }); + + // Phase 1: main execution (Write excluded if step has report) + const agentOptions = this.buildAgentOptions(step); + let response = await runAgent(step.agent, instruction, agentOptions); + this.updateAgentSession(step.agent, response.sessionId); + + // Phase 2: report output (resume same session, Write only) + if (step.report) { + await this.runReportPhase(step, stepIteration); + } + + // Phase 3: status judgment (resume session, no tools, output status tag) + let tagContent = ''; + if (this.needsStatusJudgmentPhase(step)) { + tagContent = await this.runStatusJudgmentPhase(step); + } + + const match = await this.detectMatchedRule(step, response.content, tagContent); + if (match) { + log.debug('Rule matched', { step: step.name, ruleIndex: match.index, method: match.method }); + response = { ...response, matchedRuleIndex: match.index, matchedRuleMethod: match.method }; + } + this.state.stepOutputs.set(step.name, response); this.emitStepReports(step); return { response, instruction }; } + /** + * Phase 2: Report output. + * Resumes the agent session with Write-only tools to output reports. + * The response is discarded — only sessionId is updated. + */ + private async runReportPhase(step: WorkflowStep, stepIteration: number): Promise { + const sessionId = this.state.agentSessions.get(step.agent); + if (!sessionId) { + throw new Error(`Report phase requires a session to resume, but no sessionId found for agent "${step.agent}" in step "${step.name}"`); + } + + log.debug('Running report phase', { step: step.name, sessionId }); + + const reportInstruction = buildReportInstructionFromTemplate(step, { + cwd: this.cwd, + reportDir: this.reportDir, + stepIteration, + language: this.language, + }); + + const reportOptions = this.buildResumeOptions(step, sessionId, { + allowedTools: ['Write'], + maxTurns: 3, + }); + + const reportResponse = await runAgent(step.agent, reportInstruction, reportOptions); + + // Update session (phase 2 may update it) + this.updateAgentSession(step.agent, reportResponse.sessionId); + + log.debug('Report phase complete', { step: step.name, status: reportResponse.status }); + } + + /** + * Check if a step needs Phase 3 (status judgment). + * Returns true when at least one rule requires tag-based detection + * (i.e., not all rules are ai() or aggregate conditions). + */ + private needsStatusJudgmentPhase(step: WorkflowStep): boolean { + if (!step.rules || step.rules.length === 0) return false; + const allNonTagConditions = step.rules.every((r) => r.isAiCondition || r.isAggregateCondition); + return !allNonTagConditions; + } + + /** + * Phase 3: Status judgment. + * Resumes the agent session with no tools to ask the agent to output a status tag. + * Returns the Phase 3 response content (containing the status tag). + */ + private async runStatusJudgmentPhase(step: WorkflowStep): Promise { + const sessionId = this.state.agentSessions.get(step.agent); + if (!sessionId) { + throw new Error(`Status judgment phase requires a session to resume, but no sessionId found for agent "${step.agent}" in step "${step.name}"`); + } + + log.debug('Running status judgment phase', { step: step.name, sessionId }); + + const judgmentInstruction = buildStatusJudgmentInstructionFromTemplate(step, { + language: this.language, + }); + + const judgmentOptions = this.buildResumeOptions(step, sessionId, { + allowedTools: [], + maxTurns: 3, + }); + + const judgmentResponse = await runAgent(step.agent, judgmentInstruction, judgmentOptions); + + // Update session (phase 3 may update it) + this.updateAgentSession(step.agent, judgmentResponse.sessionId); + + log.debug('Status judgment phase complete', { step: step.name, status: judgmentResponse.status }); + return judgmentResponse.content; + } + + /** + * Run a parallel step: execute all sub-steps concurrently, then aggregate results. + * The aggregated output becomes the parent step's response for rules evaluation. + * + * When onStream is provided, uses ParallelLogger to prefix each sub-step's + * output with `[name]` for readable interleaved display. + */ + private async runParallelStep(step: WorkflowStep): Promise<{ response: AgentResponse; instruction: string }> { + const subSteps = step.parallel!; + const stepIteration = incrementStepIteration(this.state, step.name); + log.debug('Running parallel step', { + step: step.name, + subSteps: subSteps.map(s => s.name), + stepIteration, + }); + + // Create parallel logger for prefixed output (only when streaming is enabled) + const parallelLogger = this.options.onStream + ? new ParallelLogger({ + subStepNames: subSteps.map((s) => s.name), + parentOnStream: this.options.onStream, + }) + : undefined; + + // Run all sub-steps concurrently + const subResults = await Promise.all( + subSteps.map(async (subStep, index) => { + const subIteration = incrementStepIteration(this.state, subStep.name); + const subInstruction = this.buildInstruction(subStep, subIteration); + + // Phase 1: main execution (Write excluded if sub-step has report) + const agentOptions = this.buildAgentOptions(subStep); + + // Override onStream with parallel logger's prefixed handler + if (parallelLogger) { + agentOptions.onStream = parallelLogger.createStreamHandler(subStep.name, index); + } + + const subResponse = await runAgent(subStep.agent, subInstruction, agentOptions); + this.updateAgentSession(subStep.agent, subResponse.sessionId); + + // Phase 2: report output for sub-step + if (subStep.report) { + await this.runReportPhase(subStep, subIteration); + } + + // Phase 3: status judgment for sub-step + let subTagContent = ''; + if (this.needsStatusJudgmentPhase(subStep)) { + subTagContent = await this.runStatusJudgmentPhase(subStep); + } + + const match = await this.detectMatchedRule(subStep, subResponse.content, subTagContent); + const finalResponse = match + ? { ...subResponse, matchedRuleIndex: match.index, matchedRuleMethod: match.method } + : subResponse; + + this.state.stepOutputs.set(subStep.name, finalResponse); + this.emitStepReports(subStep); + + return { subStep, response: finalResponse, instruction: subInstruction }; + }), + ); + + // Print completion summary + if (parallelLogger) { + parallelLogger.printSummary( + step.name, + subResults.map((r) => ({ + name: r.subStep.name, + condition: r.response.matchedRuleIndex != null && r.subStep.rules + ? r.subStep.rules[r.response.matchedRuleIndex]?.condition + : undefined, + })), + ); + } + + // Aggregate sub-step outputs into parent step's response + const aggregatedContent = subResults + .map((r) => `## ${r.subStep.name}\n${r.response.content}`) + .join('\n\n---\n\n'); + + const aggregatedInstruction = subResults + .map((r) => r.instruction) + .join('\n\n'); + + // Parent step uses aggregate conditions, so tagContent is empty + const match = await this.detectMatchedRule(step, aggregatedContent, ''); + + const aggregatedResponse: AgentResponse = { + agent: step.name, + status: 'done', + content: aggregatedContent, + timestamp: new Date(), + ...(match && { matchedRuleIndex: match.index, matchedRuleMethod: match.method }), + }; + + this.state.stepOutputs.set(step.name, aggregatedResponse); + this.emitStepReports(step); + return { response: aggregatedResponse, instruction: aggregatedInstruction }; + } + + /** + * Evaluate ai() conditions via AI judge. + * Collects all ai() rules, calls the judge, and maps the result back to the original rule index. + * Returns the 0-based rule index in the step's rules array, or -1 if no match. + */ + private async evaluateAiConditions(step: WorkflowStep, agentOutput: string): Promise { + if (!step.rules) return -1; + + const aiConditions: { index: number; text: string }[] = []; + for (let i = 0; i < step.rules.length; i++) { + const rule = step.rules[i]!; + if (rule.isAiCondition && rule.aiConditionText) { + aiConditions.push({ index: i, text: rule.aiConditionText }); + } + } + + if (aiConditions.length === 0) return -1; + + log.debug('Evaluating ai() conditions via judge', { + step: step.name, + conditionCount: aiConditions.length, + }); + + // Remap: judge returns 0-based index within aiConditions array + const judgeConditions = aiConditions.map((c, i) => ({ index: i, text: c.text })); + const judgeResult = await callAiJudge(agentOutput, judgeConditions, { cwd: this.cwd }); + + if (judgeResult >= 0 && judgeResult < aiConditions.length) { + const matched = aiConditions[judgeResult]!; + log.debug('AI judge matched condition', { + step: step.name, + judgeResult, + originalRuleIndex: matched.index, + condition: matched.text, + }); + return matched.index; + } + + log.debug('AI judge did not match any condition', { step: step.name }); + return -1; + } + + /** + * Final fallback: evaluate ALL rule conditions via AI judge. + * Unlike evaluateAiConditions (which only handles ai() flagged rules), + * this sends every rule's condition text to the judge. + * Returns the 0-based rule index, or -1 if no match. + */ + private async evaluateAllConditionsViaAiJudge(step: WorkflowStep, agentOutput: string): Promise { + if (!step.rules || step.rules.length === 0) return -1; + + const conditions = step.rules.map((rule, i) => ({ index: i, text: rule.condition })); + + log.debug('Evaluating all conditions via AI judge (final fallback)', { + step: step.name, + conditionCount: conditions.length, + }); + + const judgeResult = await callAiJudge(agentOutput, conditions, { cwd: this.cwd }); + + if (judgeResult >= 0 && judgeResult < conditions.length) { + log.debug('AI judge (fallback) matched condition', { + step: step.name, + ruleIndex: judgeResult, + condition: conditions[judgeResult]!.text, + }); + return judgeResult; + } + + log.debug('AI judge (fallback) did not match any condition', { step: step.name }); + return -1; + } + /** * Determine next step for a completed step using rules-based routing. */ diff --git a/src/workflow/instruction-builder.ts b/src/workflow/instruction-builder.ts index cb952c5..acdd061 100644 --- a/src/workflow/instruction-builder.ts +++ b/src/workflow/instruction-builder.ts @@ -3,9 +3,12 @@ * * Builds the instruction string for agent execution by: * 1. Auto-injecting standard sections (Execution Context, Workflow Context, - * User Request, Previous Response, Additional User Inputs, Instructions header) + * User Request, Previous Response, Additional User Inputs, Instructions header, + * Status Output Rules) * 2. Replacing template placeholders with actual values - * 3. Appending auto-generated status rules from workflow rules + * + * Status rules are injected into Phase 1 for tag-based detection, + * and also used in Phase 3 (buildStatusJudgmentInstruction) as a dedicated follow-up. */ import type { WorkflowStep, WorkflowRule, AgentResponse, Language, ReportConfig, ReportObjectConfig } from '../models/types.js'; @@ -60,29 +63,6 @@ export function buildExecutionMetadata(context: InstructionContext, edit?: boole }; } -/** Localized strings for status rules header */ -const STATUS_RULES_HEADER_STRINGS = { - en: { - heading: '# ⚠️ Required: Status Output Rules ⚠️', - warning: '**The workflow will stop without this tag.**', - instruction: 'Your final output MUST include a status tag following the rules below.', - }, - ja: { - heading: '# ⚠️ 必須: ステータス出力ルール ⚠️', - warning: '**このタグがないとワークフローが停止します。**', - instruction: '最終出力には必ず以下のルールに従ったステータスタグを含めてください。', - }, -} as const; - -/** - * Render status rules header. - * Prepended to auto-generated status rules from workflow rules. - */ -export function renderStatusRulesHeader(language: Language): string { - const strings = STATUS_RULES_HEADER_STRINGS[language]; - return [strings.heading, '', strings.warning, strings.instruction, ''].join('\n'); -} - /** Localized strings for rules-based status prompt */ const RULES_PROMPT_STRINGS = { en: { @@ -323,22 +303,31 @@ function renderWorkflowContext( `- ${s.step}: ${step.name}`, ]; - // Report info (only if step has report config AND reportDir is available) - if (step.report && context.reportDir) { - lines.push(`- ${s.reportDirectory}: ${context.reportDir}/`); + return lines.join('\n'); +} - if (typeof step.report === 'string') { - // Single file (string form) - lines.push(`- ${s.reportFile}: ${context.reportDir}/${step.report}`); - } else if (isReportObjectConfig(step.report)) { - // Object form (name + order + format) - lines.push(`- ${s.reportFile}: ${context.reportDir}/${step.report.name}`); - } else { - // Multiple files (ReportConfig[] form) - lines.push(`- ${s.reportFiles}:`); - for (const file of step.report as ReportConfig[]) { - lines.push(` - ${file.label}: ${context.reportDir}/${file.path}`); - } +/** + * Render report info for the Workflow Context section. + * Used only by buildReportInstruction() (phase 2). + */ +function renderReportContext( + report: string | ReportConfig[] | ReportObjectConfig, + reportDir: string, + language: Language, +): string { + const s = SECTION_STRINGS[language]; + const lines: string[] = [ + `- ${s.reportDirectory}: ${reportDir}/`, + ]; + + if (typeof report === 'string') { + lines.push(`- ${s.reportFile}: ${reportDir}/${report}`); + } else if (isReportObjectConfig(report)) { + lines.push(`- ${s.reportFile}: ${reportDir}/${report.name}`); + } else { + lines.push(`- ${s.reportFiles}:`); + for (const file of report) { + lines.push(` - ${file.label}: ${reportDir}/${file.path}`); } } @@ -419,7 +408,7 @@ function replaceTemplatePlaceholders( * 4. Previous Response — if passPreviousResponse and has content, unless template contains {previous_response} * 5. Additional User Inputs — unless template contains {user_inputs} * 6. Instructions header + instruction_template content — always - * 7. Status Output Rules — if rules exist + * 7. Status Output Rules — when step has tag-based rules (not all ai()/aggregate) * * Template placeholders ({task}, {previous_response}, etc.) are still replaced * within the instruction_template body for backward compatibility. @@ -466,20 +455,7 @@ export function buildInstruction( sections.push(`${s.additionalUserInputs}\n${escapeTemplateChars(userInputsStr)}`); } - // 6a. Report output instruction (auto-generated from step.report) - // If ReportObjectConfig has an explicit `order:`, use that (backward compat). - // Otherwise, auto-generate from the report declaration. - if (step.report && isReportObjectConfig(step.report) && step.report.order) { - const processedOrder = replaceTemplatePlaceholders(step.report.order.trimEnd(), step, context); - sections.push(processedOrder); - } else { - const reportInstruction = renderReportOutputInstruction(step, context, language); - if (reportInstruction) { - sections.push(reportInstruction); - } - } - - // 6b. Instructions header + instruction_template content + // 6. Instructions header + instruction_template content const processedTemplate = replaceTemplatePlaceholders( step.instructionTemplate, step, @@ -487,18 +463,182 @@ export function buildInstruction( ); sections.push(`${s.instructions}\n${processedTemplate}`); - // 6c. Report format (appended after instruction_template, from ReportObjectConfig) - if (step.report && isReportObjectConfig(step.report) && step.report.format) { - const processedFormat = replaceTemplatePlaceholders(step.report.format.trimEnd(), step, context); - sections.push(processedFormat); - } - - // 7. Status rules (auto-generated from rules) + // 7. Status Output Rules (for tag-based detection in Phase 1) + // Skip if all rules are ai() or aggregate conditions (no tags needed) if (step.rules && step.rules.length > 0) { - const statusHeader = renderStatusRulesHeader(language); - const generatedPrompt = generateStatusRulesFromRules(step.name, step.rules, language); - sections.push(`${statusHeader}\n${generatedPrompt}`); + const allNonTagConditions = step.rules.every((r) => r.isAiCondition || r.isAggregateCondition); + if (!allNonTagConditions) { + const statusRulesPrompt = generateStatusRulesFromRules(step.name, step.rules, language); + sections.push(statusRulesPrompt); + } } return sections.join('\n\n'); } + +/** Localized strings for report phase execution rules */ +const REPORT_PHASE_STRINGS = { + en: { + noSourceEdit: '**Do NOT modify project source files.** Only output report files.', + instructionBody: 'Output the results of your previous work as a report.', + }, + ja: { + noSourceEdit: '**プロジェクトのソースファイルを変更しないでください。** レポートファイルのみ出力してください。', + instructionBody: '前のステップの作業結果をレポートとして出力してください。', + }, +} as const; + +/** + * Context for building report phase instruction. + */ +export interface ReportInstructionContext { + /** Working directory */ + cwd: string; + /** Report directory path */ + reportDir: string; + /** Step iteration (for {step_iteration} replacement) */ + stepIteration: number; + /** Language */ + language?: Language; +} + +/** + * Build instruction for phase 2 (report output). + * + * Separate from buildInstruction() — only includes: + * - Execution Context (cwd + rules) + * - Workflow Context (report info only) + * - Report output instruction + format + * + * Does NOT include: User Request, Previous Response, User Inputs, + * Status rules, instruction_template. + */ +export function buildReportInstruction( + step: WorkflowStep, + context: ReportInstructionContext, +): string { + if (!step.report) { + throw new Error(`buildReportInstruction called for step "${step.name}" which has no report config`); + } + + const language = context.language ?? 'en'; + const s = SECTION_STRINGS[language]; + const r = REPORT_PHASE_STRINGS[language]; + const m = METADATA_STRINGS[language]; + const sections: string[] = []; + + // 1. Execution Context + const execLines = [ + m.heading, + `- ${m.workingDirectory}: ${context.cwd}`, + '', + m.rulesHeading, + `- ${m.noCommit}`, + `- ${m.noCd}`, + `- ${r.noSourceEdit}`, + ]; + if (m.note) { + execLines.push(''); + execLines.push(m.note); + } + execLines.push(''); + sections.push(execLines.join('\n')); + + // 2. Workflow Context (report info only) + const workflowLines = [ + s.workflowContext, + renderReportContext(step.report, context.reportDir, language), + ]; + sections.push(workflowLines.join('\n')); + + // 3. Instructions + report output instruction + format + const instrParts: string[] = [ + `${s.instructions}`, + r.instructionBody, + ]; + + // Report output instruction (auto-generated or explicit order) + const reportContext: InstructionContext = { + task: '', + iteration: 0, + maxIterations: 0, + stepIteration: context.stepIteration, + cwd: context.cwd, + userInputs: [], + reportDir: context.reportDir, + language, + }; + + if (isReportObjectConfig(step.report) && step.report.order) { + const processedOrder = replaceTemplatePlaceholders(step.report.order.trimEnd(), step, reportContext); + instrParts.push(''); + instrParts.push(processedOrder); + } else { + const reportInstruction = renderReportOutputInstruction(step, reportContext, language); + if (reportInstruction) { + instrParts.push(''); + instrParts.push(reportInstruction); + } + } + + // Report format + if (isReportObjectConfig(step.report) && step.report.format) { + const processedFormat = replaceTemplatePlaceholders(step.report.format.trimEnd(), step, reportContext); + instrParts.push(''); + instrParts.push(processedFormat); + } + + sections.push(instrParts.join('\n')); + + return sections.join('\n\n'); +} + +/** Localized strings for status judgment phase (Phase 3) */ +const STATUS_JUDGMENT_STRINGS = { + en: { + header: 'Review your work results and determine the status. Do NOT perform any additional work.', + }, + ja: { + header: '作業結果を振り返り、ステータスを判定してください。追加の作業は行わないでください。', + }, +} as const; + +/** + * Context for building status judgment instruction (Phase 3). + */ +export interface StatusJudgmentContext { + /** Language */ + language?: Language; +} + +/** + * Build instruction for Phase 3 (status judgment). + * + * Resumes the agent session and asks it to evaluate its work + * and output the appropriate status tag. No tools are allowed. + * + * Includes: + * - Header instruction (review and determine status) + * - Status rules (criteria table + output format) from generateStatusRulesFromRules() + */ +export function buildStatusJudgmentInstruction( + step: WorkflowStep, + context: StatusJudgmentContext, +): string { + if (!step.rules || step.rules.length === 0) { + throw new Error(`buildStatusJudgmentInstruction called for step "${step.name}" which has no rules`); + } + + const language = context.language ?? 'en'; + const s = STATUS_JUDGMENT_STRINGS[language]; + const sections: string[] = []; + + // Header + sections.push(s.header); + + // Status rules (criteria table + output format) + const generatedPrompt = generateStatusRulesFromRules(step.name, step.rules, language); + sections.push(generatedPrompt); + + return sections.join('\n\n'); +} diff --git a/src/workflow/parallel-logger.ts b/src/workflow/parallel-logger.ts new file mode 100644 index 0000000..31d14a3 --- /dev/null +++ b/src/workflow/parallel-logger.ts @@ -0,0 +1,206 @@ +/** + * Parallel step log display + * + * Provides prefixed, color-coded interleaved output for parallel sub-steps. + * Each sub-step's stream output gets a `[name]` prefix with right-padding + * aligned to the longest sub-step name. + */ + +import type { StreamCallback, StreamEvent } from '../claude/types.js'; + +/** ANSI color codes for sub-step prefixes (cycled in order) */ +const COLORS = ['\x1b[36m', '\x1b[33m', '\x1b[35m', '\x1b[32m'] as const; // cyan, yellow, magenta, green +const RESET = '\x1b[0m'; + +export interface ParallelLoggerOptions { + /** Sub-step names (used to calculate prefix width) */ + subStepNames: string[]; + /** Parent onStream callback to delegate non-prefixed events */ + parentOnStream?: StreamCallback; + /** Override process.stdout.write for testing */ + writeFn?: (text: string) => void; +} + +/** + * Logger for parallel step execution. + * + * Creates per-sub-step StreamCallback wrappers that: + * - Buffer partial lines until newline + * - Prepend colored `[name]` prefix to each complete line + * - Delegate init/result/error events to the parent callback + */ +export class ParallelLogger { + private readonly maxNameLength: number; + private readonly lineBuffers: Map = new Map(); + private readonly parentOnStream?: StreamCallback; + private readonly writeFn: (text: string) => void; + + constructor(options: ParallelLoggerOptions) { + this.maxNameLength = Math.max(...options.subStepNames.map((n) => n.length)); + this.parentOnStream = options.parentOnStream; + this.writeFn = options.writeFn ?? ((text: string) => process.stdout.write(text)); + + for (const name of options.subStepNames) { + this.lineBuffers.set(name, ''); + } + } + + /** + * Build the colored prefix string for a sub-step. + * Format: `\x1b[COLORm[name]\x1b[0m` + padding spaces + */ + buildPrefix(name: string, index: number): string { + const color = COLORS[index % COLORS.length]!; + const padding = ' '.repeat(this.maxNameLength - name.length); + return `${color}[${name}]${RESET}${padding} `; + } + + /** + * Create a StreamCallback wrapper for a specific sub-step. + * + * - `text`: buffered line-by-line with prefix + * - `tool_use`, `tool_result`, `tool_output`, `thinking`: prefixed per-line, no buffering + * - `init`, `result`, `error`: delegated to parent callback (no prefix) + */ + createStreamHandler(subStepName: string, index: number): StreamCallback { + const prefix = this.buildPrefix(subStepName, index); + + return (event: StreamEvent) => { + switch (event.type) { + case 'text': + this.handleTextEvent(subStepName, prefix, event.data.text); + break; + + case 'tool_use': + case 'tool_result': + case 'tool_output': + case 'thinking': + this.handleBlockEvent(prefix, event); + break; + + case 'init': + case 'result': + case 'error': + // Delegate to parent without prefix + this.parentOnStream?.(event); + break; + } + }; + } + + /** + * Handle text event with line buffering. + * Buffer until newline, then output prefixed complete lines. + * Empty lines get no prefix per spec. + */ + private handleTextEvent(name: string, prefix: string, text: string): void { + const buffer = this.lineBuffers.get(name) ?? ''; + const combined = buffer + text; + const parts = combined.split('\n'); + + // Last part is incomplete (no trailing newline) — keep in buffer + this.lineBuffers.set(name, parts.pop()!); + + // Output all complete lines + for (const line of parts) { + if (line === '') { + this.writeFn('\n'); + } else { + this.writeFn(`${prefix}${line}\n`); + } + } + } + + /** + * Handle block events (tool_use, tool_result, tool_output, thinking). + * Output with prefix, splitting multi-line content. + */ + private handleBlockEvent(prefix: string, event: StreamEvent): void { + let text: string; + switch (event.type) { + case 'tool_use': + text = `[tool] ${event.data.tool}`; + break; + case 'tool_result': + text = event.data.content; + break; + case 'tool_output': + text = event.data.output; + break; + case 'thinking': + text = event.data.thinking; + break; + default: + return; + } + + for (const line of text.split('\n')) { + if (line === '') { + this.writeFn('\n'); + } else { + this.writeFn(`${prefix}${line}\n`); + } + } + } + + /** + * Flush remaining line buffers for all sub-steps. + * Call after all sub-steps complete to output any trailing partial lines. + */ + flush(): void { + // Build prefixes for flush — need index mapping + // Since we don't store index, iterate lineBuffers in insertion order + // (Map preserves insertion order, matching subStepNames order) + let index = 0; + for (const [name, buffer] of this.lineBuffers) { + if (buffer !== '') { + const prefix = this.buildPrefix(name, index); + this.writeFn(`${prefix}${buffer}\n`); + this.lineBuffers.set(name, ''); + } + index++; + } + } + + /** + * Print completion summary after all sub-steps finish. + * + * Format: + * ``` + * ── parallel-review results ── + * arch-review: approved + * security-review: rejected + * ────────────────────────────── + * ``` + */ + printSummary( + parentStepName: string, + results: Array<{ name: string; condition: string | undefined }>, + ): void { + this.flush(); + + const maxResultNameLength = Math.max(...results.map((r) => r.name.length)); + + const resultLines = results.map((r) => { + const padding = ' '.repeat(maxResultNameLength - r.name.length); + const condition = r.condition ?? '(no result)'; + return ` ${r.name}:${padding} ${condition}`; + }); + + // Header line: ── name results ── + const headerText = ` ${parentStepName} results `; + const maxLineLength = Math.max( + headerText.length + 4, // 4 for "── " + " ──" + ...resultLines.map((l) => l.length), + ); + const sideWidth = Math.max(1, Math.floor((maxLineLength - headerText.length) / 2)); + const headerLine = `${'─'.repeat(sideWidth)}${headerText}${'─'.repeat(sideWidth)}`; + const footerLine = '─'.repeat(headerLine.length); + + this.writeFn(`${headerLine}\n`); + for (const line of resultLines) { + this.writeFn(`${line}\n`); + } + this.writeFn(`${footerLine}\n`); + } +}