From cd67a2355a5b1cae730c3be0ddd0ef38eb9bc83c Mon Sep 17 00:00:00 2001 From: nrslib <38722970+nrslib@users.noreply.github.com> Date: Fri, 30 Jan 2026 21:11:41 +0900 Subject: [PATCH] =?UTF-8?q?feat:=20WorkflowEngine=E3=81=AE=E3=83=A2?= =?UTF-8?q?=E3=83=83=E3=82=AF=E3=82=A4=E3=83=B3=E3=83=86=E3=82=B0=E3=83=AC?= =?UTF-8?q?=E3=83=BC=E3=82=B7=E3=83=A7=E3=83=B3=E3=83=86=E3=82=B9=E3=83=88?= =?UTF-8?q?=E8=BF=BD=E5=8A=A0=20(#17)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit runAgentをモックし、ワークフロー全体の状態遷移を検証する インテグレーションテストを追加。 テストケース: - 正常フロー(Happy Path) - 差し戻しフロー(review reject → fix → re-review) - AI review差し戻し(ai_review → ai_fix → ai_review) - エラー: ルール未マッチ、runAgent例外 - ループ検出 - イテレーション上限 - blockedハンドリング(onUserInputあり/なし) - パラレルステップ集約(all/any条件) - rulesのnextがundefinedのケース --- src/__tests__/engine-blocked.test.ts | 143 ++++++++++ src/__tests__/engine-error.test.ts | 222 +++++++++++++++ src/__tests__/engine-happy-path.test.ts | 344 ++++++++++++++++++++++++ src/__tests__/engine-parallel.test.ts | 162 +++++++++++ src/__tests__/engine-test-helpers.ts | 175 ++++++++++++ src/__tests__/transitions.test.ts | 18 ++ 6 files changed, 1064 insertions(+) create mode 100644 src/__tests__/engine-blocked.test.ts create mode 100644 src/__tests__/engine-error.test.ts create mode 100644 src/__tests__/engine-happy-path.test.ts create mode 100644 src/__tests__/engine-parallel.test.ts create mode 100644 src/__tests__/engine-test-helpers.ts diff --git a/src/__tests__/engine-blocked.test.ts b/src/__tests__/engine-blocked.test.ts new file mode 100644 index 0000000..9ee707a --- /dev/null +++ b/src/__tests__/engine-blocked.test.ts @@ -0,0 +1,143 @@ +/** + * WorkflowEngine integration tests: blocked handling scenarios. + * + * Covers: + * - Blocked without onUserInput callback (abort) + * - Blocked with onUserInput returning null (abort) + * - Blocked with onUserInput providing input (continue) + */ + +import { describe, it, expect, beforeEach, afterEach, vi } from 'vitest'; +import { existsSync, rmSync } from 'node:fs'; + +// --- Mock setup (must be before imports that use these modules) --- + +vi.mock('../agents/runner.js', () => ({ + runAgent: vi.fn(), +})); + +vi.mock('../workflow/rule-evaluator.js', () => ({ + detectMatchedRule: vi.fn(), +})); + +vi.mock('../workflow/phase-runner.js', () => ({ + needsStatusJudgmentPhase: vi.fn().mockReturnValue(false), + runReportPhase: vi.fn().mockResolvedValue(undefined), + runStatusJudgmentPhase: vi.fn().mockResolvedValue(''), +})); + +vi.mock('../utils/session.js', () => ({ + generateReportDir: vi.fn().mockReturnValue('test-report-dir'), +})); + +// --- Imports (after mocks) --- + +import { WorkflowEngine } from '../workflow/engine.js'; +import { + makeResponse, + buildDefaultWorkflowConfig, + mockRunAgentSequence, + mockDetectMatchedRuleSequence, + createTestTmpDir, + applyDefaultMocks, +} from './engine-test-helpers.js'; + +describe('WorkflowEngine Integration: Blocked Handling', () => { + let tmpDir: string; + + beforeEach(() => { + vi.resetAllMocks(); + applyDefaultMocks(); + tmpDir = createTestTmpDir(); + }); + + afterEach(() => { + if (existsSync(tmpDir)) { + rmSync(tmpDir, { recursive: true, force: true }); + } + }); + + it('should abort when blocked and no onUserInput callback', async () => { + const config = buildDefaultWorkflowConfig(); + const engine = new WorkflowEngine(config, tmpDir, 'test task'); + + mockRunAgentSequence([ + makeResponse({ agent: 'plan', status: 'blocked', content: 'Need clarification' }), + ]); + + mockDetectMatchedRuleSequence([ + { index: 0, method: 'phase1_tag' }, + ]); + + const blockedFn = vi.fn(); + const abortFn = vi.fn(); + engine.on('step:blocked', blockedFn); + engine.on('workflow:abort', abortFn); + + const state = await engine.run(); + + expect(state.status).toBe('aborted'); + expect(blockedFn).toHaveBeenCalledOnce(); + expect(abortFn).toHaveBeenCalledOnce(); + }); + + it('should abort when blocked and onUserInput returns null', async () => { + const config = buildDefaultWorkflowConfig(); + const onUserInput = vi.fn().mockResolvedValue(null); + const engine = new WorkflowEngine(config, tmpDir, 'test task', { onUserInput }); + + mockRunAgentSequence([ + makeResponse({ agent: 'plan', status: 'blocked', content: 'Need info' }), + ]); + + mockDetectMatchedRuleSequence([ + { index: 0, method: 'phase1_tag' }, + ]); + + const state = await engine.run(); + + expect(state.status).toBe('aborted'); + expect(onUserInput).toHaveBeenCalledOnce(); + }); + + it('should continue when blocked and onUserInput provides input', async () => { + const config = buildDefaultWorkflowConfig(); + const onUserInput = vi.fn().mockResolvedValueOnce('User provided clarification'); + const engine = new WorkflowEngine(config, tmpDir, 'test task', { onUserInput }); + + mockRunAgentSequence([ + // First: plan is blocked + makeResponse({ agent: 'plan', status: 'blocked', content: 'Need info' }), + // Second: plan succeeds after user input + makeResponse({ agent: 'plan', content: 'Plan done with user input' }), + makeResponse({ agent: 'implement', content: 'Impl done' }), + makeResponse({ agent: 'ai_review', content: 'OK' }), + makeResponse({ agent: 'arch-review', content: 'OK' }), + makeResponse({ agent: 'security-review', content: 'OK' }), + makeResponse({ agent: 'supervise', content: 'All passed' }), + ]); + + mockDetectMatchedRuleSequence([ + // First plan call: blocked, rule matched but blocked handling takes over + { index: 0, method: 'phase1_tag' }, + // Second plan call: success + { index: 0, method: 'phase1_tag' }, // plan → implement + { index: 0, method: 'phase1_tag' }, // implement → ai_review + { index: 0, method: 'phase1_tag' }, // ai_review → reviewers + { index: 0, method: 'phase1_tag' }, // arch-review → approved + { index: 0, method: 'phase1_tag' }, // security-review → approved + { index: 0, method: 'aggregate' }, // reviewers → supervise + { index: 0, method: 'phase1_tag' }, // supervise → COMPLETE + ]); + + const userInputFn = vi.fn(); + engine.on('step:user_input', userInputFn); + + const state = await engine.run(); + + expect(state.status).toBe('completed'); + expect(onUserInput).toHaveBeenCalledOnce(); + expect(userInputFn).toHaveBeenCalledOnce(); + expect(state.userInputs).toContain('User provided clarification'); + }); +}); diff --git a/src/__tests__/engine-error.test.ts b/src/__tests__/engine-error.test.ts new file mode 100644 index 0000000..7e9006b --- /dev/null +++ b/src/__tests__/engine-error.test.ts @@ -0,0 +1,222 @@ +/** + * WorkflowEngine integration tests: error handling scenarios. + * + * Covers: + * - No rule matched (abort) + * - runAgent throws (abort) + * - Loop detection (abort) + * - Iteration limit (abort and extend) + */ + +import { describe, it, expect, beforeEach, afterEach, vi } from 'vitest'; +import { existsSync, rmSync } from 'node:fs'; + +// --- Mock setup (must be before imports that use these modules) --- + +vi.mock('../agents/runner.js', () => ({ + runAgent: vi.fn(), +})); + +vi.mock('../workflow/rule-evaluator.js', () => ({ + detectMatchedRule: vi.fn(), +})); + +vi.mock('../workflow/phase-runner.js', () => ({ + needsStatusJudgmentPhase: vi.fn().mockReturnValue(false), + runReportPhase: vi.fn().mockResolvedValue(undefined), + runStatusJudgmentPhase: vi.fn().mockResolvedValue(''), +})); + +vi.mock('../utils/session.js', () => ({ + generateReportDir: vi.fn().mockReturnValue('test-report-dir'), +})); + +// --- Imports (after mocks) --- + +import { WorkflowEngine } from '../workflow/engine.js'; +import { runAgent } from '../agents/runner.js'; +import { detectMatchedRule } from '../workflow/rule-evaluator.js'; +import { + makeResponse, + makeStep, + makeRule, + buildDefaultWorkflowConfig, + mockRunAgentSequence, + mockDetectMatchedRuleSequence, + createTestTmpDir, + applyDefaultMocks, +} from './engine-test-helpers.js'; + +describe('WorkflowEngine Integration: Error Handling', () => { + let tmpDir: string; + + beforeEach(() => { + vi.resetAllMocks(); + applyDefaultMocks(); + tmpDir = createTestTmpDir(); + }); + + afterEach(() => { + if (existsSync(tmpDir)) { + rmSync(tmpDir, { recursive: true, force: true }); + } + }); + + // ===================================================== + // 1. No rule matched + // ===================================================== + describe('No rule matched', () => { + it('should abort when detectMatchedRule returns undefined', async () => { + const config = buildDefaultWorkflowConfig(); + const engine = new WorkflowEngine(config, tmpDir, 'test task'); + + mockRunAgentSequence([ + makeResponse({ agent: 'plan', content: 'Unclear output' }), + ]); + + mockDetectMatchedRuleSequence([undefined]); + + const abortFn = vi.fn(); + engine.on('workflow:abort', abortFn); + + const state = await engine.run(); + + expect(state.status).toBe('aborted'); + expect(abortFn).toHaveBeenCalledOnce(); + const reason = abortFn.mock.calls[0]![1] as string; + expect(reason).toContain('plan'); + }); + }); + + // ===================================================== + // 2. runAgent throws + // ===================================================== + describe('runAgent throws', () => { + it('should abort when runAgent throws an error', async () => { + const config = buildDefaultWorkflowConfig(); + const engine = new WorkflowEngine(config, tmpDir, 'test task'); + + vi.mocked(runAgent).mockRejectedValueOnce(new Error('API connection failed')); + + const abortFn = vi.fn(); + engine.on('workflow:abort', abortFn); + + const state = await engine.run(); + + expect(state.status).toBe('aborted'); + expect(abortFn).toHaveBeenCalledOnce(); + const reason = abortFn.mock.calls[0]![1] as string; + expect(reason).toContain('API connection failed'); + }); + }); + + // ===================================================== + // 3. Loop detection + // ===================================================== + describe('Loop detection', () => { + it('should abort when loop detected with action: abort', async () => { + const config = buildDefaultWorkflowConfig({ + maxIterations: 100, + loopDetection: { maxConsecutiveSameStep: 3, action: 'abort' }, + initialStep: 'loop-step', + steps: [ + makeStep('loop-step', { + rules: [makeRule('continue', 'loop-step')], + }), + ], + }); + + const engine = new WorkflowEngine(config, tmpDir, 'test task'); + + for (let i = 0; i < 5; i++) { + vi.mocked(runAgent).mockResolvedValueOnce( + makeResponse({ content: `iteration ${i}` }) + ); + vi.mocked(detectMatchedRule).mockResolvedValueOnce( + { index: 0, method: 'phase1_tag' } + ); + } + + const abortFn = vi.fn(); + engine.on('workflow:abort', abortFn); + + const state = await engine.run(); + + expect(state.status).toBe('aborted'); + expect(abortFn).toHaveBeenCalledOnce(); + const reason = abortFn.mock.calls[0]![1] as string; + expect(reason).toContain('Loop detected'); + expect(reason).toContain('loop-step'); + }); + }); + + // ===================================================== + // 4. Iteration limit + // ===================================================== + describe('Iteration limit', () => { + it('should abort when max iterations reached without onIterationLimit callback', async () => { + const config = buildDefaultWorkflowConfig({ maxIterations: 2 }); + const engine = new WorkflowEngine(config, tmpDir, 'test task'); + + mockRunAgentSequence([ + makeResponse({ agent: 'plan', content: 'Plan done' }), + makeResponse({ agent: 'implement', content: 'Impl done' }), + makeResponse({ agent: 'ai_review', content: 'OK' }), + ]); + + mockDetectMatchedRuleSequence([ + { index: 0, method: 'phase1_tag' }, // plan → implement + { index: 0, method: 'phase1_tag' }, // implement → ai_review + { index: 0, method: 'phase1_tag' }, // ai_review → reviewers (won't be reached) + ]); + + const limitFn = vi.fn(); + const abortFn = vi.fn(); + engine.on('iteration:limit', limitFn); + engine.on('workflow:abort', abortFn); + + const state = await engine.run(); + + expect(state.status).toBe('aborted'); + expect(limitFn).toHaveBeenCalledWith(2, 2); + expect(abortFn).toHaveBeenCalledOnce(); + const reason = abortFn.mock.calls[0]![1] as string; + expect(reason).toContain('Max iterations'); + }); + + it('should extend iterations when onIterationLimit provides additional iterations', async () => { + const config = buildDefaultWorkflowConfig({ maxIterations: 2 }); + + const onIterationLimit = vi.fn().mockResolvedValueOnce(10); + + const engine = new WorkflowEngine(config, tmpDir, 'test task', { + onIterationLimit, + }); + + mockRunAgentSequence([ + makeResponse({ agent: 'plan', content: 'Plan done' }), + makeResponse({ agent: 'implement', content: 'Impl done' }), + // After hitting limit at iteration 2, onIterationLimit extends to 12 + makeResponse({ agent: 'ai_review', content: 'OK' }), + makeResponse({ agent: 'arch-review', content: 'OK' }), + makeResponse({ agent: 'security-review', content: 'OK' }), + makeResponse({ agent: 'supervise', content: 'All passed' }), + ]); + + mockDetectMatchedRuleSequence([ + { index: 0, method: 'phase1_tag' }, // plan → implement + { index: 0, method: 'phase1_tag' }, // implement → ai_review + { index: 0, method: 'phase1_tag' }, // ai_review → reviewers + { index: 0, method: 'phase1_tag' }, // arch-review → approved + { index: 0, method: 'phase1_tag' }, // security-review → approved + { index: 0, method: 'aggregate' }, // reviewers → supervise + { index: 0, method: 'phase1_tag' }, // supervise → COMPLETE + ]); + + const state = await engine.run(); + + expect(state.status).toBe('completed'); + expect(onIterationLimit).toHaveBeenCalledOnce(); + }); + }); +}); diff --git a/src/__tests__/engine-happy-path.test.ts b/src/__tests__/engine-happy-path.test.ts new file mode 100644 index 0000000..8292b69 --- /dev/null +++ b/src/__tests__/engine-happy-path.test.ts @@ -0,0 +1,344 @@ +/** + * WorkflowEngine integration tests: happy path and normal flow scenarios. + * + * Covers: + * - Full happy path (plan → implement → ai_review → reviewers → supervise → COMPLETE) + * - Review reject and fix loop + * - AI review reject and fix + * - ABORT transition + * - Event emissions + * - Step output tracking + * - Config validation + */ + +import { describe, it, expect, beforeEach, afterEach, vi } from 'vitest'; +import { existsSync, rmSync } from 'node:fs'; +import type { WorkflowConfig, WorkflowStep } from '../models/types.js'; + +// --- Mock setup (must be before imports that use these modules) --- + +vi.mock('../agents/runner.js', () => ({ + runAgent: vi.fn(), +})); + +vi.mock('../workflow/rule-evaluator.js', () => ({ + detectMatchedRule: vi.fn(), +})); + +vi.mock('../workflow/phase-runner.js', () => ({ + needsStatusJudgmentPhase: vi.fn().mockReturnValue(false), + runReportPhase: vi.fn().mockResolvedValue(undefined), + runStatusJudgmentPhase: vi.fn().mockResolvedValue(''), +})); + +vi.mock('../utils/session.js', () => ({ + generateReportDir: vi.fn().mockReturnValue('test-report-dir'), +})); + +// --- Imports (after mocks) --- + +import { WorkflowEngine } from '../workflow/engine.js'; +import { runAgent } from '../agents/runner.js'; +import { + makeResponse, + makeStep, + makeRule, + buildDefaultWorkflowConfig, + mockRunAgentSequence, + mockDetectMatchedRuleSequence, + createTestTmpDir, + applyDefaultMocks, +} from './engine-test-helpers.js'; + +describe('WorkflowEngine Integration: Happy Path', () => { + let tmpDir: string; + + beforeEach(() => { + vi.resetAllMocks(); + applyDefaultMocks(); + tmpDir = createTestTmpDir(); + }); + + afterEach(() => { + if (existsSync(tmpDir)) { + rmSync(tmpDir, { recursive: true, force: true }); + } + }); + + // ===================================================== + // 1. Happy Path + // ===================================================== + describe('Happy path', () => { + it('should complete: plan → implement → ai_review → reviewers(all approved) → supervise → COMPLETE', async () => { + const config = buildDefaultWorkflowConfig(); + const engine = new WorkflowEngine(config, tmpDir, 'test task'); + + mockRunAgentSequence([ + makeResponse({ agent: 'plan', content: 'Plan complete' }), + makeResponse({ agent: 'implement', content: 'Implementation done' }), + makeResponse({ agent: 'ai_review', content: 'No issues' }), + makeResponse({ agent: 'arch-review', content: 'Architecture OK' }), + makeResponse({ agent: 'security-review', content: 'Security OK' }), + makeResponse({ agent: 'supervise', content: 'All passed' }), + ]); + + mockDetectMatchedRuleSequence([ + { index: 0, method: 'phase1_tag' }, // plan → implement + { index: 0, method: 'phase1_tag' }, // implement → ai_review + { index: 0, method: 'phase1_tag' }, // ai_review → reviewers + { index: 0, method: 'phase1_tag' }, // arch-review → approved + { index: 0, method: 'phase1_tag' }, // security-review → approved + { index: 0, method: 'aggregate' }, // reviewers(all approved) → supervise + { index: 0, method: 'phase1_tag' }, // supervise → COMPLETE + ]); + + const completeFn = vi.fn(); + engine.on('workflow:complete', completeFn); + + const state = await engine.run(); + + expect(state.status).toBe('completed'); + expect(state.iteration).toBe(5); // plan, implement, ai_review, reviewers, supervise + expect(completeFn).toHaveBeenCalledOnce(); + expect(vi.mocked(runAgent)).toHaveBeenCalledTimes(6); // 4 normal + 2 parallel sub-steps + }); + }); + + // ===================================================== + // 2. Review reject and fix loop + // ===================================================== + describe('Review reject and fix loop', () => { + it('should handle: reviewers(needs_fix) → fix → reviewers(all approved) → supervise → COMPLETE', async () => { + const config = buildDefaultWorkflowConfig(); + const engine = new WorkflowEngine(config, tmpDir, 'test task'); + + mockRunAgentSequence([ + makeResponse({ agent: 'plan', content: 'Plan done' }), + makeResponse({ agent: 'implement', content: 'Impl done' }), + makeResponse({ agent: 'ai_review', content: 'No issues' }), + // Round 1 reviewers: arch approved, security needs fix + makeResponse({ agent: 'arch-review', content: 'OK' }), + makeResponse({ agent: 'security-review', content: 'Vulnerability found' }), + // fix step + makeResponse({ agent: 'fix', content: 'Fixed security issue' }), + // Round 2 reviewers: both approved + makeResponse({ agent: 'arch-review', content: 'OK' }), + makeResponse({ agent: 'security-review', content: 'Security OK now' }), + // supervise + makeResponse({ agent: 'supervise', content: 'All passed' }), + ]); + + mockDetectMatchedRuleSequence([ + { index: 0, method: 'phase1_tag' }, // plan → implement + { index: 0, method: 'phase1_tag' }, // implement → ai_review + { index: 0, method: 'phase1_tag' }, // ai_review → reviewers + { index: 0, method: 'phase1_tag' }, // arch-review → approved + { index: 1, method: 'phase1_tag' }, // security-review → needs_fix + { index: 1, method: 'aggregate' }, // reviewers: any(needs_fix) → fix + { index: 0, method: 'phase1_tag' }, // fix → reviewers + { index: 0, method: 'phase1_tag' }, // arch-review → approved + { index: 0, method: 'phase1_tag' }, // security-review → approved + { index: 0, method: 'aggregate' }, // reviewers: all(approved) → supervise + { index: 0, method: 'phase1_tag' }, // supervise → COMPLETE + ]); + + const state = await engine.run(); + + expect(state.status).toBe('completed'); + // plan, implement, ai_review, reviewers(1st), fix, reviewers(2nd), supervise = 7 + expect(state.iteration).toBe(7); + }); + }); + + // ===================================================== + // 3. AI review reject and fix + // ===================================================== + describe('AI review reject and fix', () => { + it('should handle: ai_review(issues) → ai_fix → reviewers → supervise → COMPLETE', async () => { + const config = buildDefaultWorkflowConfig(); + const engine = new WorkflowEngine(config, tmpDir, 'test task'); + + mockRunAgentSequence([ + makeResponse({ agent: 'plan', content: 'Plan done' }), + makeResponse({ agent: 'implement', content: 'Impl done' }), + makeResponse({ agent: 'ai_review', content: 'AI issues found' }), + makeResponse({ agent: 'ai_fix', content: 'Issues fixed' }), + makeResponse({ agent: 'arch-review', content: 'OK' }), + makeResponse({ agent: 'security-review', content: 'OK' }), + makeResponse({ agent: 'supervise', content: 'All passed' }), + ]); + + mockDetectMatchedRuleSequence([ + { index: 0, method: 'phase1_tag' }, // plan → implement + { index: 0, method: 'phase1_tag' }, // implement → ai_review + { index: 1, method: 'phase1_tag' }, // ai_review → ai_fix (issues found) + { index: 0, method: 'phase1_tag' }, // ai_fix → reviewers + { index: 0, method: 'phase1_tag' }, // arch-review → approved + { index: 0, method: 'phase1_tag' }, // security-review → approved + { index: 0, method: 'aggregate' }, // reviewers → supervise + { index: 0, method: 'phase1_tag' }, // supervise → COMPLETE + ]); + + const state = await engine.run(); + + expect(state.status).toBe('completed'); + // plan, implement, ai_review, ai_fix, reviewers, supervise = 6 + expect(state.iteration).toBe(6); + }); + }); + + // ===================================================== + // 4. ABORT transition + // ===================================================== + describe('ABORT transition', () => { + it('should abort when step transitions to ABORT', async () => { + const config = buildDefaultWorkflowConfig(); + const engine = new WorkflowEngine(config, tmpDir, 'test task'); + + mockRunAgentSequence([ + makeResponse({ agent: 'plan', content: 'Requirements unclear' }), + ]); + + // plan rule index 1 → ABORT + mockDetectMatchedRuleSequence([ + { index: 1, method: 'phase1_tag' }, + ]); + + const abortFn = vi.fn(); + engine.on('workflow:abort', abortFn); + + const state = await engine.run(); + + expect(state.status).toBe('aborted'); + expect(abortFn).toHaveBeenCalledOnce(); + }); + }); + + // ===================================================== + // 5. Event emissions + // ===================================================== + describe('Event emissions', () => { + it('should emit step:start and step:complete for each step', async () => { + const config = buildDefaultWorkflowConfig(); + const engine = new WorkflowEngine(config, tmpDir, 'test task'); + + mockRunAgentSequence([ + makeResponse({ agent: 'plan', content: 'Plan' }), + makeResponse({ agent: 'implement', content: 'Impl' }), + makeResponse({ agent: 'ai_review', content: 'OK' }), + makeResponse({ agent: 'arch-review', content: 'OK' }), + makeResponse({ agent: 'security-review', content: 'OK' }), + makeResponse({ agent: 'supervise', content: 'Pass' }), + ]); + + mockDetectMatchedRuleSequence([ + { index: 0, method: 'phase1_tag' }, + { index: 0, method: 'phase1_tag' }, + { index: 0, method: 'phase1_tag' }, + { index: 0, method: 'phase1_tag' }, + { index: 0, method: 'phase1_tag' }, + { index: 0, method: 'aggregate' }, + { index: 0, method: 'phase1_tag' }, + ]); + + const startFn = vi.fn(); + const completeFn = vi.fn(); + engine.on('step:start', startFn); + engine.on('step:complete', completeFn); + + await engine.run(); + + // 5 steps: plan, implement, ai_review, reviewers, supervise + expect(startFn).toHaveBeenCalledTimes(5); + expect(completeFn).toHaveBeenCalledTimes(5); + + const startedSteps = startFn.mock.calls.map(call => (call[0] as WorkflowStep).name); + expect(startedSteps).toEqual(['plan', 'implement', 'ai_review', 'reviewers', 'supervise']); + }); + + it('should emit iteration:limit when max iterations reached', async () => { + const config = buildDefaultWorkflowConfig({ maxIterations: 1 }); + const engine = new WorkflowEngine(config, tmpDir, 'test task'); + + mockRunAgentSequence([ + makeResponse({ agent: 'plan', content: 'Plan' }), + ]); + mockDetectMatchedRuleSequence([ + { index: 0, method: 'phase1_tag' }, + ]); + + const limitFn = vi.fn(); + engine.on('iteration:limit', limitFn); + + await engine.run(); + + expect(limitFn).toHaveBeenCalledWith(1, 1); + }); + }); + + // ===================================================== + // 6. Step output tracking + // ===================================================== + describe('Step output tracking', () => { + it('should store outputs for all executed steps', async () => { + const config = buildDefaultWorkflowConfig(); + const engine = new WorkflowEngine(config, tmpDir, 'test task'); + + mockRunAgentSequence([ + makeResponse({ agent: 'plan', content: 'Plan output' }), + makeResponse({ agent: 'implement', content: 'Implement output' }), + makeResponse({ agent: 'ai_review', content: 'AI review output' }), + makeResponse({ agent: 'arch-review', content: 'Arch output' }), + makeResponse({ agent: 'security-review', content: 'Sec output' }), + makeResponse({ agent: 'supervise', content: 'Supervise output' }), + ]); + + mockDetectMatchedRuleSequence([ + { index: 0, method: 'phase1_tag' }, + { index: 0, method: 'phase1_tag' }, + { index: 0, method: 'phase1_tag' }, + { index: 0, method: 'phase1_tag' }, + { index: 0, method: 'phase1_tag' }, + { index: 0, method: 'aggregate' }, + { index: 0, method: 'phase1_tag' }, + ]); + + const state = await engine.run(); + + expect(state.stepOutputs.get('plan')!.content).toBe('Plan output'); + expect(state.stepOutputs.get('implement')!.content).toBe('Implement output'); + expect(state.stepOutputs.get('ai_review')!.content).toBe('AI review output'); + expect(state.stepOutputs.get('supervise')!.content).toBe('Supervise output'); + }); + }); + + // ===================================================== + // 7. Config validation + // ===================================================== + describe('Config validation', () => { + it('should throw when initial step does not exist', () => { + const config = buildDefaultWorkflowConfig({ initialStep: 'nonexistent' }); + + expect(() => { + new WorkflowEngine(config, tmpDir, 'test task'); + }).toThrow('Unknown step: nonexistent'); + }); + + it('should throw when rule references nonexistent step', () => { + const config: WorkflowConfig = { + name: 'test', + maxIterations: 10, + initialStep: 'step1', + steps: [ + makeStep('step1', { + rules: [makeRule('done', 'nonexistent_step')], + }), + ], + }; + + expect(() => { + new WorkflowEngine(config, tmpDir, 'test task'); + }).toThrow('nonexistent_step'); + }); + }); +}); diff --git a/src/__tests__/engine-parallel.test.ts b/src/__tests__/engine-parallel.test.ts new file mode 100644 index 0000000..7e11741 --- /dev/null +++ b/src/__tests__/engine-parallel.test.ts @@ -0,0 +1,162 @@ +/** + * WorkflowEngine integration tests: parallel step aggregation. + * + * Covers: + * - Aggregated output format (## headers and --- separators) + * - Individual sub-step output storage + * - Concurrent execution of sub-steps + */ + +import { describe, it, expect, beforeEach, afterEach, vi } from 'vitest'; +import { existsSync, rmSync } from 'node:fs'; + +// --- Mock setup (must be before imports that use these modules) --- + +vi.mock('../agents/runner.js', () => ({ + runAgent: vi.fn(), +})); + +vi.mock('../workflow/rule-evaluator.js', () => ({ + detectMatchedRule: vi.fn(), +})); + +vi.mock('../workflow/phase-runner.js', () => ({ + needsStatusJudgmentPhase: vi.fn().mockReturnValue(false), + runReportPhase: vi.fn().mockResolvedValue(undefined), + runStatusJudgmentPhase: vi.fn().mockResolvedValue(''), +})); + +vi.mock('../utils/session.js', () => ({ + generateReportDir: vi.fn().mockReturnValue('test-report-dir'), +})); + +// --- Imports (after mocks) --- + +import { WorkflowEngine } from '../workflow/engine.js'; +import { runAgent } from '../agents/runner.js'; +import { + makeResponse, + buildDefaultWorkflowConfig, + mockRunAgentSequence, + mockDetectMatchedRuleSequence, + createTestTmpDir, + applyDefaultMocks, +} from './engine-test-helpers.js'; + +describe('WorkflowEngine Integration: Parallel Step Aggregation', () => { + let tmpDir: string; + + beforeEach(() => { + vi.resetAllMocks(); + applyDefaultMocks(); + tmpDir = createTestTmpDir(); + }); + + afterEach(() => { + if (existsSync(tmpDir)) { + rmSync(tmpDir, { recursive: true, force: true }); + } + }); + + it('should aggregate sub-step outputs with ## headers and --- separators', async () => { + const config = buildDefaultWorkflowConfig(); + const engine = new WorkflowEngine(config, tmpDir, 'test task'); + + mockRunAgentSequence([ + makeResponse({ agent: 'plan', content: 'Plan done' }), + makeResponse({ agent: 'implement', content: 'Impl done' }), + makeResponse({ agent: 'ai_review', content: 'OK' }), + makeResponse({ agent: 'arch-review', content: 'Architecture review content' }), + makeResponse({ agent: 'security-review', content: 'Security review content' }), + makeResponse({ agent: 'supervise', content: 'All passed' }), + ]); + + mockDetectMatchedRuleSequence([ + { index: 0, method: 'phase1_tag' }, + { index: 0, method: 'phase1_tag' }, + { index: 0, method: 'phase1_tag' }, + { index: 0, method: 'phase1_tag' }, // arch-review + { index: 0, method: 'phase1_tag' }, // security-review + { index: 0, method: 'aggregate' }, // reviewers + { index: 0, method: 'phase1_tag' }, + ]); + + const state = await engine.run(); + + expect(state.status).toBe('completed'); + + const reviewersOutput = state.stepOutputs.get('reviewers'); + expect(reviewersOutput).toBeDefined(); + expect(reviewersOutput!.content).toContain('## arch-review'); + expect(reviewersOutput!.content).toContain('Architecture review content'); + expect(reviewersOutput!.content).toContain('---'); + expect(reviewersOutput!.content).toContain('## security-review'); + expect(reviewersOutput!.content).toContain('Security review content'); + expect(reviewersOutput!.matchedRuleMethod).toBe('aggregate'); + }); + + it('should store individual sub-step outputs in stepOutputs', async () => { + const config = buildDefaultWorkflowConfig(); + const engine = new WorkflowEngine(config, tmpDir, 'test task'); + + mockRunAgentSequence([ + makeResponse({ agent: 'plan', content: 'Plan' }), + makeResponse({ agent: 'implement', content: 'Impl' }), + makeResponse({ agent: 'ai_review', content: 'OK' }), + makeResponse({ agent: 'arch-review', content: 'Arch content' }), + makeResponse({ agent: 'security-review', content: 'Sec content' }), + makeResponse({ agent: 'supervise', content: 'Pass' }), + ]); + + mockDetectMatchedRuleSequence([ + { index: 0, method: 'phase1_tag' }, + { index: 0, method: 'phase1_tag' }, + { index: 0, method: 'phase1_tag' }, + { index: 0, method: 'phase1_tag' }, + { index: 0, method: 'phase1_tag' }, + { index: 0, method: 'aggregate' }, + { index: 0, method: 'phase1_tag' }, + ]); + + const state = await engine.run(); + + expect(state.stepOutputs.has('arch-review')).toBe(true); + expect(state.stepOutputs.has('security-review')).toBe(true); + expect(state.stepOutputs.has('reviewers')).toBe(true); + expect(state.stepOutputs.get('arch-review')!.content).toBe('Arch content'); + expect(state.stepOutputs.get('security-review')!.content).toBe('Sec content'); + }); + + it('should execute sub-steps concurrently (both runAgent calls happen)', async () => { + const config = buildDefaultWorkflowConfig(); + const engine = new WorkflowEngine(config, tmpDir, 'test task'); + + mockRunAgentSequence([ + makeResponse({ agent: 'plan', content: 'Plan' }), + makeResponse({ agent: 'implement', content: 'Impl' }), + makeResponse({ agent: 'ai_review', content: 'OK' }), + makeResponse({ agent: 'arch-review', content: 'OK' }), + makeResponse({ agent: 'security-review', content: 'OK' }), + makeResponse({ agent: 'supervise', content: 'Pass' }), + ]); + + mockDetectMatchedRuleSequence([ + { index: 0, method: 'phase1_tag' }, + { index: 0, method: 'phase1_tag' }, + { index: 0, method: 'phase1_tag' }, + { index: 0, method: 'phase1_tag' }, + { index: 0, method: 'phase1_tag' }, + { index: 0, method: 'aggregate' }, + { index: 0, method: 'phase1_tag' }, + ]); + + await engine.run(); + + // 6 total: 4 normal + 2 parallel sub-steps + expect(vi.mocked(runAgent)).toHaveBeenCalledTimes(6); + + const calledAgents = vi.mocked(runAgent).mock.calls.map(call => call[0]); + expect(calledAgents).toContain('../agents/arch-review.md'); + expect(calledAgents).toContain('../agents/security-review.md'); + }); +}); diff --git a/src/__tests__/engine-test-helpers.ts b/src/__tests__/engine-test-helpers.ts new file mode 100644 index 0000000..9a79f11 --- /dev/null +++ b/src/__tests__/engine-test-helpers.ts @@ -0,0 +1,175 @@ +/** + * Shared helpers for WorkflowEngine integration tests. + * + * Provides mock setup, factory functions, and a default workflow config + * matching the parallel reviewers structure (plan → implement → ai_review → reviewers → supervise). + */ + +import { vi } from 'vitest'; +import { mkdirSync } from 'node:fs'; +import { join } from 'node:path'; +import { tmpdir } from 'node:os'; +import { randomUUID } from 'node:crypto'; +import type { WorkflowConfig, WorkflowStep, AgentResponse, WorkflowRule } from '../models/types.js'; + +// --- Mock imports (consumers must call vi.mock before importing this) --- + +import { runAgent } from '../agents/runner.js'; +import { detectMatchedRule } from '../workflow/rule-evaluator.js'; +import type { RuleMatch } from '../workflow/rule-evaluator.js'; +import { needsStatusJudgmentPhase, runReportPhase, runStatusJudgmentPhase } from '../workflow/phase-runner.js'; +import { generateReportDir } from '../utils/session.js'; + +// --- Factory functions --- + +export function makeResponse(overrides: Partial = {}): AgentResponse { + return { + agent: 'test-agent', + status: 'done', + content: 'test response', + timestamp: new Date(), + sessionId: `session-${randomUUID()}`, + ...overrides, + }; +} + +export function makeRule(condition: string, next: string, extra: Partial = {}): WorkflowRule { + return { condition, next, ...extra }; +} + +export function makeStep(name: string, overrides: Partial = {}): WorkflowStep { + return { + name, + agent: `../agents/${name}.md`, + agentDisplayName: name, + instructionTemplate: `Run ${name}`, + passPreviousResponse: true, + ...overrides, + }; +} + +/** + * Build a workflow config matching the default.yaml parallel reviewers structure: + * plan → implement → ai_review → (ai_fix↔) → reviewers(parallel) → (fix↔) → supervise + */ +export function buildDefaultWorkflowConfig(overrides: Partial = {}): WorkflowConfig { + const archReviewSubStep = makeStep('arch-review', { + rules: [ + makeRule('approved', 'COMPLETE'), + makeRule('needs_fix', 'fix'), + ], + }); + + const securityReviewSubStep = makeStep('security-review', { + rules: [ + makeRule('approved', 'COMPLETE'), + makeRule('needs_fix', 'fix'), + ], + }); + + return { + name: 'test-default', + description: 'Test workflow', + maxIterations: 30, + initialStep: 'plan', + steps: [ + makeStep('plan', { + rules: [ + makeRule('Requirements are clear', 'implement'), + makeRule('Requirements unclear', 'ABORT'), + ], + }), + makeStep('implement', { + rules: [ + makeRule('Implementation complete', 'ai_review'), + makeRule('Cannot proceed', 'plan'), + ], + }), + makeStep('ai_review', { + rules: [ + makeRule('No AI-specific issues', 'reviewers'), + makeRule('AI-specific issues found', 'ai_fix'), + ], + }), + makeStep('ai_fix', { + rules: [ + makeRule('AI issues fixed', 'reviewers'), + makeRule('Cannot proceed', 'plan'), + ], + }), + makeStep('reviewers', { + parallel: [archReviewSubStep, securityReviewSubStep], + rules: [ + makeRule('all("approved")', 'supervise', { + isAggregateCondition: true, + aggregateType: 'all', + aggregateConditionText: 'approved', + }), + makeRule('any("needs_fix")', 'fix', { + isAggregateCondition: true, + aggregateType: 'any', + aggregateConditionText: 'needs_fix', + }), + ], + }), + makeStep('fix', { + rules: [ + makeRule('Fix complete', 'reviewers'), + makeRule('Cannot proceed', 'plan'), + ], + }), + makeStep('supervise', { + rules: [ + makeRule('All checks passed', 'COMPLETE'), + makeRule('Requirements unmet', 'plan'), + ], + }), + ], + ...overrides, + }; +} + +// --- Mock sequence helpers --- + +/** + * Configure runAgent mock to return a sequence of responses. + */ +export function mockRunAgentSequence(responses: AgentResponse[]): void { + const mock = vi.mocked(runAgent); + for (const response of responses) { + mock.mockResolvedValueOnce(response); + } +} + +/** + * Configure detectMatchedRule mock to return a sequence of rule matches. + */ +export function mockDetectMatchedRuleSequence(matches: (RuleMatch | undefined)[]): void { + const mock = vi.mocked(detectMatchedRule); + for (const match of matches) { + mock.mockResolvedValueOnce(match); + } +} + +// --- Test environment setup --- + +/** + * Create a temporary directory with the required .takt/reports structure. + * Returns the tmpDir path. Caller is responsible for cleanup. + */ +export function createTestTmpDir(): string { + const tmpDir = join(tmpdir(), `takt-engine-test-${randomUUID()}`); + mkdirSync(tmpDir, { recursive: true }); + mkdirSync(join(tmpDir, '.takt', 'reports', 'test-report-dir'), { recursive: true }); + return tmpDir; +} + +/** + * Re-apply default mocks for phase-runner and session after vi.resetAllMocks(). + */ +export function applyDefaultMocks(): void { + vi.mocked(needsStatusJudgmentPhase).mockReturnValue(false); + vi.mocked(runReportPhase).mockResolvedValue(undefined); + vi.mocked(runStatusJudgmentPhase).mockResolvedValue(''); + vi.mocked(generateReportDir).mockReturnValue('test-report-dir'); +} diff --git a/src/__tests__/transitions.test.ts b/src/__tests__/transitions.test.ts index fcc8890..7ea899a 100644 --- a/src/__tests__/transitions.test.ts +++ b/src/__tests__/transitions.test.ts @@ -60,4 +60,22 @@ describe('determineNextStepByRules', () => { expect(determineNextStepByRules(step, 0)).toBe('COMPLETE'); }); + + it('should return null when rule exists but next is undefined', () => { + // Parallel sub-step rules may omit `next` (optional field) + const step: WorkflowStep = { + name: 'sub-step', + agent: 'test-agent', + agentDisplayName: 'Test Agent', + instructionTemplate: '{task}', + passPreviousResponse: false, + rules: [ + { condition: 'approved' }, + { condition: 'needs_fix' }, + ], + }; + + expect(determineNextStepByRules(step, 0)).toBeNull(); + expect(determineNextStepByRules(step, 1)).toBeNull(); + }); });