feat: WorkflowEngineのモックインテグレーションテスト追加 (#17)

runAgentをモックし、ワークフロー全体の状態遷移を検証するインテグレーションテストを追加。テストケース: - 正常フロー（Happy Path） - 差し戻しフロー（review reject → fix → re-review） - AI review差し戻し（ai_review → ai_fix → ai_review） - エラー: ルール未マッチ、runAgent例外 - ループ検出 - イテレーション上限 - blockedハンドリング（onUserInputあり/なし） - パラレルステップ集約（all/any条件） - rulesのnextがundefinedのケース
2026-01-30 21:11:41 +09:00 · 2026-01-30 21:11:41 +09:00 · cd67a2355a
commit cd67a2355a
parent e657211591
6 changed files with 1064 additions and 0 deletions
--- a/src/tests/engine-blocked.test.ts
+++ b/src/tests/engine-blocked.test.ts
@ -0,0 +1,143 @@
+/**
+ * WorkflowEngine integration tests: blocked handling scenarios.
+ *
+ * Covers:
+ * - Blocked without onUserInput callback (abort)
+ * - Blocked with onUserInput returning null (abort)
+ * - Blocked with onUserInput providing input (continue)
+ */
+
+import { describe, it, expect, beforeEach, afterEach, vi } from 'vitest';
+import { existsSync, rmSync } from 'node:fs';
+
+// --- Mock setup (must be before imports that use these modules) ---
+
+vi.mock('../agents/runner.js', () => ({
+  runAgent: vi.fn(),
+}));
+
+vi.mock('../workflow/rule-evaluator.js', () => ({
+  detectMatchedRule: vi.fn(),
+}));
+
+vi.mock('../workflow/phase-runner.js', () => ({
+  needsStatusJudgmentPhase: vi.fn().mockReturnValue(false),
+  runReportPhase: vi.fn().mockResolvedValue(undefined),
+  runStatusJudgmentPhase: vi.fn().mockResolvedValue(''),
+}));
+
+vi.mock('../utils/session.js', () => ({
+  generateReportDir: vi.fn().mockReturnValue('test-report-dir'),
+}));
+
+// --- Imports (after mocks) ---
+
+import { WorkflowEngine } from '../workflow/engine.js';
+import {
+  makeResponse,
+  buildDefaultWorkflowConfig,
+  mockRunAgentSequence,
+  mockDetectMatchedRuleSequence,
+  createTestTmpDir,
+  applyDefaultMocks,
+} from './engine-test-helpers.js';
+
+describe('WorkflowEngine Integration: Blocked Handling', () => {
+  let tmpDir: string;
+
+  beforeEach(() => {
+    vi.resetAllMocks();
+    applyDefaultMocks();
+    tmpDir = createTestTmpDir();
+  });
+
+  afterEach(() => {
+    if (existsSync(tmpDir)) {
+      rmSync(tmpDir, { recursive: true, force: true });
+    }
+  });
+
+  it('should abort when blocked and no onUserInput callback', async () => {
+    const config = buildDefaultWorkflowConfig();
+    const engine = new WorkflowEngine(config, tmpDir, 'test task');
+
+    mockRunAgentSequence([
+      makeResponse({ agent: 'plan', status: 'blocked', content: 'Need clarification' }),
+    ]);
+
+    mockDetectMatchedRuleSequence([
+      { index: 0, method: 'phase1_tag' },
+    ]);
+
+    const blockedFn = vi.fn();
+    const abortFn = vi.fn();
+    engine.on('step:blocked', blockedFn);
+    engine.on('workflow:abort', abortFn);
+
+    const state = await engine.run();
+
+    expect(state.status).toBe('aborted');
+    expect(blockedFn).toHaveBeenCalledOnce();
+    expect(abortFn).toHaveBeenCalledOnce();
+  });
+
+  it('should abort when blocked and onUserInput returns null', async () => {
+    const config = buildDefaultWorkflowConfig();
+    const onUserInput = vi.fn().mockResolvedValue(null);
+    const engine = new WorkflowEngine(config, tmpDir, 'test task', { onUserInput });
+
+    mockRunAgentSequence([
+      makeResponse({ agent: 'plan', status: 'blocked', content: 'Need info' }),
+    ]);
+
+    mockDetectMatchedRuleSequence([
+      { index: 0, method: 'phase1_tag' },
+    ]);
+
+    const state = await engine.run();
+
+    expect(state.status).toBe('aborted');
+    expect(onUserInput).toHaveBeenCalledOnce();
+  });
+
+  it('should continue when blocked and onUserInput provides input', async () => {
+    const config = buildDefaultWorkflowConfig();
+    const onUserInput = vi.fn().mockResolvedValueOnce('User provided clarification');
+    const engine = new WorkflowEngine(config, tmpDir, 'test task', { onUserInput });
+
+    mockRunAgentSequence([
+      // First: plan is blocked
+      makeResponse({ agent: 'plan', status: 'blocked', content: 'Need info' }),
+      // Second: plan succeeds after user input
+      makeResponse({ agent: 'plan', content: 'Plan done with user input' }),
+      makeResponse({ agent: 'implement', content: 'Impl done' }),
+      makeResponse({ agent: 'ai_review', content: 'OK' }),
+      makeResponse({ agent: 'arch-review', content: 'OK' }),
+      makeResponse({ agent: 'security-review', content: 'OK' }),
+      makeResponse({ agent: 'supervise', content: 'All passed' }),
+    ]);
+
+    mockDetectMatchedRuleSequence([
+      // First plan call: blocked, rule matched but blocked handling takes over
+      { index: 0, method: 'phase1_tag' },
+      // Second plan call: success
+      { index: 0, method: 'phase1_tag' },  // plan → implement
+      { index: 0, method: 'phase1_tag' },  // implement → ai_review
+      { index: 0, method: 'phase1_tag' },  // ai_review → reviewers
+      { index: 0, method: 'phase1_tag' },  // arch-review → approved
+      { index: 0, method: 'phase1_tag' },  // security-review → approved
+      { index: 0, method: 'aggregate' },   // reviewers → supervise
+      { index: 0, method: 'phase1_tag' },  // supervise → COMPLETE
+    ]);
+
+    const userInputFn = vi.fn();
+    engine.on('step:user_input', userInputFn);
+
+    const state = await engine.run();
+
+    expect(state.status).toBe('completed');
+    expect(onUserInput).toHaveBeenCalledOnce();
+    expect(userInputFn).toHaveBeenCalledOnce();
+    expect(state.userInputs).toContain('User provided clarification');
+  });
+});
--- a/src/tests/engine-error.test.ts
+++ b/src/tests/engine-error.test.ts
@ -0,0 +1,222 @@
+/**
+ * WorkflowEngine integration tests: error handling scenarios.
+ *
+ * Covers:
+ * - No rule matched (abort)
+ * - runAgent throws (abort)
+ * - Loop detection (abort)
+ * - Iteration limit (abort and extend)
+ */
+
+import { describe, it, expect, beforeEach, afterEach, vi } from 'vitest';
+import { existsSync, rmSync } from 'node:fs';
+
+// --- Mock setup (must be before imports that use these modules) ---
+
+vi.mock('../agents/runner.js', () => ({
+  runAgent: vi.fn(),
+}));
+
+vi.mock('../workflow/rule-evaluator.js', () => ({
+  detectMatchedRule: vi.fn(),
+}));
+
+vi.mock('../workflow/phase-runner.js', () => ({
+  needsStatusJudgmentPhase: vi.fn().mockReturnValue(false),
+  runReportPhase: vi.fn().mockResolvedValue(undefined),
+  runStatusJudgmentPhase: vi.fn().mockResolvedValue(''),
+}));
+
+vi.mock('../utils/session.js', () => ({
+  generateReportDir: vi.fn().mockReturnValue('test-report-dir'),
+}));
+
+// --- Imports (after mocks) ---
+
+import { WorkflowEngine } from '../workflow/engine.js';
+import { runAgent } from '../agents/runner.js';
+import { detectMatchedRule } from '../workflow/rule-evaluator.js';
+import {
+  makeResponse,
+  makeStep,
+  makeRule,
+  buildDefaultWorkflowConfig,
+  mockRunAgentSequence,
+  mockDetectMatchedRuleSequence,
+  createTestTmpDir,
+  applyDefaultMocks,
+} from './engine-test-helpers.js';
+
+describe('WorkflowEngine Integration: Error Handling', () => {
+  let tmpDir: string;
+
+  beforeEach(() => {
+    vi.resetAllMocks();
+    applyDefaultMocks();
+    tmpDir = createTestTmpDir();
+  });
+
+  afterEach(() => {
+    if (existsSync(tmpDir)) {
+      rmSync(tmpDir, { recursive: true, force: true });
+    }
+  });
+
+  // =====================================================
+  // 1. No rule matched
+  // =====================================================
+  describe('No rule matched', () => {
+    it('should abort when detectMatchedRule returns undefined', async () => {
+      const config = buildDefaultWorkflowConfig();
+      const engine = new WorkflowEngine(config, tmpDir, 'test task');
+
+      mockRunAgentSequence([
+        makeResponse({ agent: 'plan', content: 'Unclear output' }),
+      ]);
+
+      mockDetectMatchedRuleSequence([undefined]);
+
+      const abortFn = vi.fn();
+      engine.on('workflow:abort', abortFn);
+
+      const state = await engine.run();
+
+      expect(state.status).toBe('aborted');
+      expect(abortFn).toHaveBeenCalledOnce();
+      const reason = abortFn.mock.calls[0]![1] as string;
+      expect(reason).toContain('plan');
+    });
+  });
+
+  // =====================================================
+  // 2. runAgent throws
+  // =====================================================
+  describe('runAgent throws', () => {
+    it('should abort when runAgent throws an error', async () => {
+      const config = buildDefaultWorkflowConfig();
+      const engine = new WorkflowEngine(config, tmpDir, 'test task');
+
+      vi.mocked(runAgent).mockRejectedValueOnce(new Error('API connection failed'));
+
+      const abortFn = vi.fn();
+      engine.on('workflow:abort', abortFn);
+
+      const state = await engine.run();
+
+      expect(state.status).toBe('aborted');
+      expect(abortFn).toHaveBeenCalledOnce();
+      const reason = abortFn.mock.calls[0]![1] as string;
+      expect(reason).toContain('API connection failed');
+    });
+  });
+
+  // =====================================================
+  // 3. Loop detection
+  // =====================================================
+  describe('Loop detection', () => {
+    it('should abort when loop detected with action: abort', async () => {
+      const config = buildDefaultWorkflowConfig({
+        maxIterations: 100,
+        loopDetection: { maxConsecutiveSameStep: 3, action: 'abort' },
+        initialStep: 'loop-step',
+        steps: [
+          makeStep('loop-step', {
+            rules: [makeRule('continue', 'loop-step')],
+          }),
+        ],
+      });
+
+      const engine = new WorkflowEngine(config, tmpDir, 'test task');
+
+      for (let i = 0; i < 5; i++) {
+        vi.mocked(runAgent).mockResolvedValueOnce(
+          makeResponse({ content: `iteration ${i}` })
+        );
+        vi.mocked(detectMatchedRule).mockResolvedValueOnce(
+          { index: 0, method: 'phase1_tag' }
+        );
+      }
+
+      const abortFn = vi.fn();
+      engine.on('workflow:abort', abortFn);
+
+      const state = await engine.run();
+
+      expect(state.status).toBe('aborted');
+      expect(abortFn).toHaveBeenCalledOnce();
+      const reason = abortFn.mock.calls[0]![1] as string;
+      expect(reason).toContain('Loop detected');
+      expect(reason).toContain('loop-step');
+    });
+  });
+
+  // =====================================================
+  // 4. Iteration limit
+  // =====================================================
+  describe('Iteration limit', () => {
+    it('should abort when max iterations reached without onIterationLimit callback', async () => {
+      const config = buildDefaultWorkflowConfig({ maxIterations: 2 });
+      const engine = new WorkflowEngine(config, tmpDir, 'test task');
+
+      mockRunAgentSequence([
+        makeResponse({ agent: 'plan', content: 'Plan done' }),
+        makeResponse({ agent: 'implement', content: 'Impl done' }),
+        makeResponse({ agent: 'ai_review', content: 'OK' }),
+      ]);
+
+      mockDetectMatchedRuleSequence([
+        { index: 0, method: 'phase1_tag' },  // plan → implement
+        { index: 0, method: 'phase1_tag' },  // implement → ai_review
+        { index: 0, method: 'phase1_tag' },  // ai_review → reviewers (won't be reached)
+      ]);
+
+      const limitFn = vi.fn();
+      const abortFn = vi.fn();
+      engine.on('iteration:limit', limitFn);
+      engine.on('workflow:abort', abortFn);
+
+      const state = await engine.run();
+
+      expect(state.status).toBe('aborted');
+      expect(limitFn).toHaveBeenCalledWith(2, 2);
+      expect(abortFn).toHaveBeenCalledOnce();
+      const reason = abortFn.mock.calls[0]![1] as string;
+      expect(reason).toContain('Max iterations');
+    });
+
+    it('should extend iterations when onIterationLimit provides additional iterations', async () => {
+      const config = buildDefaultWorkflowConfig({ maxIterations: 2 });
+
+      const onIterationLimit = vi.fn().mockResolvedValueOnce(10);
+
+      const engine = new WorkflowEngine(config, tmpDir, 'test task', {
+        onIterationLimit,
+      });
+
+      mockRunAgentSequence([
+        makeResponse({ agent: 'plan', content: 'Plan done' }),
+        makeResponse({ agent: 'implement', content: 'Impl done' }),
+        // After hitting limit at iteration 2, onIterationLimit extends to 12
+        makeResponse({ agent: 'ai_review', content: 'OK' }),
+        makeResponse({ agent: 'arch-review', content: 'OK' }),
+        makeResponse({ agent: 'security-review', content: 'OK' }),
+        makeResponse({ agent: 'supervise', content: 'All passed' }),
+      ]);
+
+      mockDetectMatchedRuleSequence([
+        { index: 0, method: 'phase1_tag' },  // plan → implement
+        { index: 0, method: 'phase1_tag' },  // implement → ai_review
+        { index: 0, method: 'phase1_tag' },  // ai_review → reviewers
+        { index: 0, method: 'phase1_tag' },  // arch-review → approved
+        { index: 0, method: 'phase1_tag' },  // security-review → approved
+        { index: 0, method: 'aggregate' },   // reviewers → supervise
+        { index: 0, method: 'phase1_tag' },  // supervise → COMPLETE
+      ]);
+
+      const state = await engine.run();
+
+      expect(state.status).toBe('completed');
+      expect(onIterationLimit).toHaveBeenCalledOnce();
+    });
+  });
+});
--- a/src/tests/engine-happy-path.test.ts
+++ b/src/tests/engine-happy-path.test.ts
@ -0,0 +1,344 @@
+/**
+ * WorkflowEngine integration tests: happy path and normal flow scenarios.
+ *
+ * Covers:
+ * - Full happy path (plan → implement → ai_review → reviewers → supervise → COMPLETE)
+ * - Review reject and fix loop
+ * - AI review reject and fix
+ * - ABORT transition
+ * - Event emissions
+ * - Step output tracking
+ * - Config validation
+ */
+
+import { describe, it, expect, beforeEach, afterEach, vi } from 'vitest';
+import { existsSync, rmSync } from 'node:fs';
+import type { WorkflowConfig, WorkflowStep } from '../models/types.js';
+
+// --- Mock setup (must be before imports that use these modules) ---
+
+vi.mock('../agents/runner.js', () => ({
+  runAgent: vi.fn(),
+}));
+
+vi.mock('../workflow/rule-evaluator.js', () => ({
+  detectMatchedRule: vi.fn(),
+}));
+
+vi.mock('../workflow/phase-runner.js', () => ({
+  needsStatusJudgmentPhase: vi.fn().mockReturnValue(false),
+  runReportPhase: vi.fn().mockResolvedValue(undefined),
+  runStatusJudgmentPhase: vi.fn().mockResolvedValue(''),
+}));
+
+vi.mock('../utils/session.js', () => ({
+  generateReportDir: vi.fn().mockReturnValue('test-report-dir'),
+}));
+
+// --- Imports (after mocks) ---
+
+import { WorkflowEngine } from '../workflow/engine.js';
+import { runAgent } from '../agents/runner.js';
+import {
+  makeResponse,
+  makeStep,
+  makeRule,
+  buildDefaultWorkflowConfig,
+  mockRunAgentSequence,
+  mockDetectMatchedRuleSequence,
+  createTestTmpDir,
+  applyDefaultMocks,
+} from './engine-test-helpers.js';
+
+describe('WorkflowEngine Integration: Happy Path', () => {
+  let tmpDir: string;
+
+  beforeEach(() => {
+    vi.resetAllMocks();
+    applyDefaultMocks();
+    tmpDir = createTestTmpDir();
+  });
+
+  afterEach(() => {
+    if (existsSync(tmpDir)) {
+      rmSync(tmpDir, { recursive: true, force: true });
+    }
+  });
+
+  // =====================================================
+  // 1. Happy Path
+  // =====================================================
+  describe('Happy path', () => {
+    it('should complete: plan → implement → ai_review → reviewers(all approved) → supervise → COMPLETE', async () => {
+      const config = buildDefaultWorkflowConfig();
+      const engine = new WorkflowEngine(config, tmpDir, 'test task');
+
+      mockRunAgentSequence([
+        makeResponse({ agent: 'plan', content: 'Plan complete' }),
+        makeResponse({ agent: 'implement', content: 'Implementation done' }),
+        makeResponse({ agent: 'ai_review', content: 'No issues' }),
+        makeResponse({ agent: 'arch-review', content: 'Architecture OK' }),
+        makeResponse({ agent: 'security-review', content: 'Security OK' }),
+        makeResponse({ agent: 'supervise', content: 'All passed' }),
+      ]);
+
+      mockDetectMatchedRuleSequence([
+        { index: 0, method: 'phase1_tag' },  // plan → implement
+        { index: 0, method: 'phase1_tag' },  // implement → ai_review
+        { index: 0, method: 'phase1_tag' },  // ai_review → reviewers
+        { index: 0, method: 'phase1_tag' },  // arch-review → approved
+        { index: 0, method: 'phase1_tag' },  // security-review → approved
+        { index: 0, method: 'aggregate' },   // reviewers(all approved) → supervise
+        { index: 0, method: 'phase1_tag' },  // supervise → COMPLETE
+      ]);
+
+      const completeFn = vi.fn();
+      engine.on('workflow:complete', completeFn);
+
+      const state = await engine.run();
+
+      expect(state.status).toBe('completed');
+      expect(state.iteration).toBe(5); // plan, implement, ai_review, reviewers, supervise
+      expect(completeFn).toHaveBeenCalledOnce();
+      expect(vi.mocked(runAgent)).toHaveBeenCalledTimes(6); // 4 normal + 2 parallel sub-steps
+    });
+  });
+
+  // =====================================================
+  // 2. Review reject and fix loop
+  // =====================================================
+  describe('Review reject and fix loop', () => {
+    it('should handle: reviewers(needs_fix) → fix → reviewers(all approved) → supervise → COMPLETE', async () => {
+      const config = buildDefaultWorkflowConfig();
+      const engine = new WorkflowEngine(config, tmpDir, 'test task');
+
+      mockRunAgentSequence([
+        makeResponse({ agent: 'plan', content: 'Plan done' }),
+        makeResponse({ agent: 'implement', content: 'Impl done' }),
+        makeResponse({ agent: 'ai_review', content: 'No issues' }),
+        // Round 1 reviewers: arch approved, security needs fix
+        makeResponse({ agent: 'arch-review', content: 'OK' }),
+        makeResponse({ agent: 'security-review', content: 'Vulnerability found' }),
+        // fix step
+        makeResponse({ agent: 'fix', content: 'Fixed security issue' }),
+        // Round 2 reviewers: both approved
+        makeResponse({ agent: 'arch-review', content: 'OK' }),
+        makeResponse({ agent: 'security-review', content: 'Security OK now' }),
+        // supervise
+        makeResponse({ agent: 'supervise', content: 'All passed' }),
+      ]);
+
+      mockDetectMatchedRuleSequence([
+        { index: 0, method: 'phase1_tag' },  // plan → implement
+        { index: 0, method: 'phase1_tag' },  // implement → ai_review
+        { index: 0, method: 'phase1_tag' },  // ai_review → reviewers
+        { index: 0, method: 'phase1_tag' },  // arch-review → approved
+        { index: 1, method: 'phase1_tag' },  // security-review → needs_fix
+        { index: 1, method: 'aggregate' },   // reviewers: any(needs_fix) → fix
+        { index: 0, method: 'phase1_tag' },  // fix → reviewers
+        { index: 0, method: 'phase1_tag' },  // arch-review → approved
+        { index: 0, method: 'phase1_tag' },  // security-review → approved
+        { index: 0, method: 'aggregate' },   // reviewers: all(approved) → supervise
+        { index: 0, method: 'phase1_tag' },  // supervise → COMPLETE
+      ]);
+
+      const state = await engine.run();
+
+      expect(state.status).toBe('completed');
+      // plan, implement, ai_review, reviewers(1st), fix, reviewers(2nd), supervise = 7
+      expect(state.iteration).toBe(7);
+    });
+  });
+
+  // =====================================================
+  // 3. AI review reject and fix
+  // =====================================================
+  describe('AI review reject and fix', () => {
+    it('should handle: ai_review(issues) → ai_fix → reviewers → supervise → COMPLETE', async () => {
+      const config = buildDefaultWorkflowConfig();
+      const engine = new WorkflowEngine(config, tmpDir, 'test task');
+
+      mockRunAgentSequence([
+        makeResponse({ agent: 'plan', content: 'Plan done' }),
+        makeResponse({ agent: 'implement', content: 'Impl done' }),
+        makeResponse({ agent: 'ai_review', content: 'AI issues found' }),
+        makeResponse({ agent: 'ai_fix', content: 'Issues fixed' }),
+        makeResponse({ agent: 'arch-review', content: 'OK' }),
+        makeResponse({ agent: 'security-review', content: 'OK' }),
+        makeResponse({ agent: 'supervise', content: 'All passed' }),
+      ]);
+
+      mockDetectMatchedRuleSequence([
+        { index: 0, method: 'phase1_tag' },  // plan → implement
+        { index: 0, method: 'phase1_tag' },  // implement → ai_review
+        { index: 1, method: 'phase1_tag' },  // ai_review → ai_fix (issues found)
+        { index: 0, method: 'phase1_tag' },  // ai_fix → reviewers
+        { index: 0, method: 'phase1_tag' },  // arch-review → approved
+        { index: 0, method: 'phase1_tag' },  // security-review → approved
+        { index: 0, method: 'aggregate' },   // reviewers → supervise
+        { index: 0, method: 'phase1_tag' },  // supervise → COMPLETE
+      ]);
+
+      const state = await engine.run();
+
+      expect(state.status).toBe('completed');
+      // plan, implement, ai_review, ai_fix, reviewers, supervise = 6
+      expect(state.iteration).toBe(6);
+    });
+  });
+
+  // =====================================================
+  // 4. ABORT transition
+  // =====================================================
+  describe('ABORT transition', () => {
+    it('should abort when step transitions to ABORT', async () => {
+      const config = buildDefaultWorkflowConfig();
+      const engine = new WorkflowEngine(config, tmpDir, 'test task');
+
+      mockRunAgentSequence([
+        makeResponse({ agent: 'plan', content: 'Requirements unclear' }),
+      ]);
+
+      // plan rule index 1 → ABORT
+      mockDetectMatchedRuleSequence([
+        { index: 1, method: 'phase1_tag' },
+      ]);
+
+      const abortFn = vi.fn();
+      engine.on('workflow:abort', abortFn);
+
+      const state = await engine.run();
+
+      expect(state.status).toBe('aborted');
+      expect(abortFn).toHaveBeenCalledOnce();
+    });
+  });
+
+  // =====================================================
+  // 5. Event emissions
+  // =====================================================
+  describe('Event emissions', () => {
+    it('should emit step:start and step:complete for each step', async () => {
+      const config = buildDefaultWorkflowConfig();
+      const engine = new WorkflowEngine(config, tmpDir, 'test task');
+
+      mockRunAgentSequence([
+        makeResponse({ agent: 'plan', content: 'Plan' }),
+        makeResponse({ agent: 'implement', content: 'Impl' }),
+        makeResponse({ agent: 'ai_review', content: 'OK' }),
+        makeResponse({ agent: 'arch-review', content: 'OK' }),
+        makeResponse({ agent: 'security-review', content: 'OK' }),
+        makeResponse({ agent: 'supervise', content: 'Pass' }),
+      ]);
+
+      mockDetectMatchedRuleSequence([
+        { index: 0, method: 'phase1_tag' },
+        { index: 0, method: 'phase1_tag' },
+        { index: 0, method: 'phase1_tag' },
+        { index: 0, method: 'phase1_tag' },
+        { index: 0, method: 'phase1_tag' },
+        { index: 0, method: 'aggregate' },
+        { index: 0, method: 'phase1_tag' },
+      ]);
+
+      const startFn = vi.fn();
+      const completeFn = vi.fn();
+      engine.on('step:start', startFn);
+      engine.on('step:complete', completeFn);
+
+      await engine.run();
+
+      // 5 steps: plan, implement, ai_review, reviewers, supervise
+      expect(startFn).toHaveBeenCalledTimes(5);
+      expect(completeFn).toHaveBeenCalledTimes(5);
+
+      const startedSteps = startFn.mock.calls.map(call => (call[0] as WorkflowStep).name);
+      expect(startedSteps).toEqual(['plan', 'implement', 'ai_review', 'reviewers', 'supervise']);
+    });
+
+    it('should emit iteration:limit when max iterations reached', async () => {
+      const config = buildDefaultWorkflowConfig({ maxIterations: 1 });
+      const engine = new WorkflowEngine(config, tmpDir, 'test task');
+
+      mockRunAgentSequence([
+        makeResponse({ agent: 'plan', content: 'Plan' }),
+      ]);
+      mockDetectMatchedRuleSequence([
+        { index: 0, method: 'phase1_tag' },
+      ]);
+
+      const limitFn = vi.fn();
+      engine.on('iteration:limit', limitFn);
+
+      await engine.run();
+
+      expect(limitFn).toHaveBeenCalledWith(1, 1);
+    });
+  });
+
+  // =====================================================
+  // 6. Step output tracking
+  // =====================================================
+  describe('Step output tracking', () => {
+    it('should store outputs for all executed steps', async () => {
+      const config = buildDefaultWorkflowConfig();
+      const engine = new WorkflowEngine(config, tmpDir, 'test task');
+
+      mockRunAgentSequence([
+        makeResponse({ agent: 'plan', content: 'Plan output' }),
+        makeResponse({ agent: 'implement', content: 'Implement output' }),
+        makeResponse({ agent: 'ai_review', content: 'AI review output' }),
+        makeResponse({ agent: 'arch-review', content: 'Arch output' }),
+        makeResponse({ agent: 'security-review', content: 'Sec output' }),
+        makeResponse({ agent: 'supervise', content: 'Supervise output' }),
+      ]);
+
+      mockDetectMatchedRuleSequence([
+        { index: 0, method: 'phase1_tag' },
+        { index: 0, method: 'phase1_tag' },
+        { index: 0, method: 'phase1_tag' },
+        { index: 0, method: 'phase1_tag' },
+        { index: 0, method: 'phase1_tag' },
+        { index: 0, method: 'aggregate' },
+        { index: 0, method: 'phase1_tag' },
+      ]);
+
+      const state = await engine.run();
+
+      expect(state.stepOutputs.get('plan')!.content).toBe('Plan output');
+      expect(state.stepOutputs.get('implement')!.content).toBe('Implement output');
+      expect(state.stepOutputs.get('ai_review')!.content).toBe('AI review output');
+      expect(state.stepOutputs.get('supervise')!.content).toBe('Supervise output');
+    });
+  });
+
+  // =====================================================
+  // 7. Config validation
+  // =====================================================
+  describe('Config validation', () => {
+    it('should throw when initial step does not exist', () => {
+      const config = buildDefaultWorkflowConfig({ initialStep: 'nonexistent' });
+
+      expect(() => {
+        new WorkflowEngine(config, tmpDir, 'test task');
+      }).toThrow('Unknown step: nonexistent');
+    });
+
+    it('should throw when rule references nonexistent step', () => {
+      const config: WorkflowConfig = {
+        name: 'test',
+        maxIterations: 10,
+        initialStep: 'step1',
+        steps: [
+          makeStep('step1', {
+            rules: [makeRule('done', 'nonexistent_step')],
+          }),
+        ],
+      };
+
+      expect(() => {
+        new WorkflowEngine(config, tmpDir, 'test task');
+      }).toThrow('nonexistent_step');
+    });
+  });
+});
--- a/src/tests/engine-parallel.test.ts
+++ b/src/tests/engine-parallel.test.ts
@ -0,0 +1,162 @@
+/**
+ * WorkflowEngine integration tests: parallel step aggregation.
+ *
+ * Covers:
+ * - Aggregated output format (## headers and --- separators)
+ * - Individual sub-step output storage
+ * - Concurrent execution of sub-steps
+ */
+
+import { describe, it, expect, beforeEach, afterEach, vi } from 'vitest';
+import { existsSync, rmSync } from 'node:fs';
+
+// --- Mock setup (must be before imports that use these modules) ---
+
+vi.mock('../agents/runner.js', () => ({
+  runAgent: vi.fn(),
+}));
+
+vi.mock('../workflow/rule-evaluator.js', () => ({
+  detectMatchedRule: vi.fn(),
+}));
+
+vi.mock('../workflow/phase-runner.js', () => ({
+  needsStatusJudgmentPhase: vi.fn().mockReturnValue(false),
+  runReportPhase: vi.fn().mockResolvedValue(undefined),
+  runStatusJudgmentPhase: vi.fn().mockResolvedValue(''),
+}));
+
+vi.mock('../utils/session.js', () => ({
+  generateReportDir: vi.fn().mockReturnValue('test-report-dir'),
+}));
+
+// --- Imports (after mocks) ---
+
+import { WorkflowEngine } from '../workflow/engine.js';
+import { runAgent } from '../agents/runner.js';
+import {
+  makeResponse,
+  buildDefaultWorkflowConfig,
+  mockRunAgentSequence,
+  mockDetectMatchedRuleSequence,
+  createTestTmpDir,
+  applyDefaultMocks,
+} from './engine-test-helpers.js';
+
+describe('WorkflowEngine Integration: Parallel Step Aggregation', () => {
+  let tmpDir: string;
+
+  beforeEach(() => {
+    vi.resetAllMocks();
+    applyDefaultMocks();
+    tmpDir = createTestTmpDir();
+  });
+
+  afterEach(() => {
+    if (existsSync(tmpDir)) {
+      rmSync(tmpDir, { recursive: true, force: true });
+    }
+  });
+
+  it('should aggregate sub-step outputs with ## headers and --- separators', async () => {
+    const config = buildDefaultWorkflowConfig();
+    const engine = new WorkflowEngine(config, tmpDir, 'test task');
+
+    mockRunAgentSequence([
+      makeResponse({ agent: 'plan', content: 'Plan done' }),
+      makeResponse({ agent: 'implement', content: 'Impl done' }),
+      makeResponse({ agent: 'ai_review', content: 'OK' }),
+      makeResponse({ agent: 'arch-review', content: 'Architecture review content' }),
+      makeResponse({ agent: 'security-review', content: 'Security review content' }),
+      makeResponse({ agent: 'supervise', content: 'All passed' }),
+    ]);
+
+    mockDetectMatchedRuleSequence([
+      { index: 0, method: 'phase1_tag' },
+      { index: 0, method: 'phase1_tag' },
+      { index: 0, method: 'phase1_tag' },
+      { index: 0, method: 'phase1_tag' },  // arch-review
+      { index: 0, method: 'phase1_tag' },  // security-review
+      { index: 0, method: 'aggregate' },   // reviewers
+      { index: 0, method: 'phase1_tag' },
+    ]);
+
+    const state = await engine.run();
+
+    expect(state.status).toBe('completed');
+
+    const reviewersOutput = state.stepOutputs.get('reviewers');
+    expect(reviewersOutput).toBeDefined();
+    expect(reviewersOutput!.content).toContain('## arch-review');
+    expect(reviewersOutput!.content).toContain('Architecture review content');
+    expect(reviewersOutput!.content).toContain('---');
+    expect(reviewersOutput!.content).toContain('## security-review');
+    expect(reviewersOutput!.content).toContain('Security review content');
+    expect(reviewersOutput!.matchedRuleMethod).toBe('aggregate');
+  });
+
+  it('should store individual sub-step outputs in stepOutputs', async () => {
+    const config = buildDefaultWorkflowConfig();
+    const engine = new WorkflowEngine(config, tmpDir, 'test task');
+
+    mockRunAgentSequence([
+      makeResponse({ agent: 'plan', content: 'Plan' }),
+      makeResponse({ agent: 'implement', content: 'Impl' }),
+      makeResponse({ agent: 'ai_review', content: 'OK' }),
+      makeResponse({ agent: 'arch-review', content: 'Arch content' }),
+      makeResponse({ agent: 'security-review', content: 'Sec content' }),
+      makeResponse({ agent: 'supervise', content: 'Pass' }),
+    ]);
+
+    mockDetectMatchedRuleSequence([
+      { index: 0, method: 'phase1_tag' },
+      { index: 0, method: 'phase1_tag' },
+      { index: 0, method: 'phase1_tag' },
+      { index: 0, method: 'phase1_tag' },
+      { index: 0, method: 'phase1_tag' },
+      { index: 0, method: 'aggregate' },
+      { index: 0, method: 'phase1_tag' },
+    ]);
+
+    const state = await engine.run();
+
+    expect(state.stepOutputs.has('arch-review')).toBe(true);
+    expect(state.stepOutputs.has('security-review')).toBe(true);
+    expect(state.stepOutputs.has('reviewers')).toBe(true);
+    expect(state.stepOutputs.get('arch-review')!.content).toBe('Arch content');
+    expect(state.stepOutputs.get('security-review')!.content).toBe('Sec content');
+  });
+
+  it('should execute sub-steps concurrently (both runAgent calls happen)', async () => {
+    const config = buildDefaultWorkflowConfig();
+    const engine = new WorkflowEngine(config, tmpDir, 'test task');
+
+    mockRunAgentSequence([
+      makeResponse({ agent: 'plan', content: 'Plan' }),
+      makeResponse({ agent: 'implement', content: 'Impl' }),
+      makeResponse({ agent: 'ai_review', content: 'OK' }),
+      makeResponse({ agent: 'arch-review', content: 'OK' }),
+      makeResponse({ agent: 'security-review', content: 'OK' }),
+      makeResponse({ agent: 'supervise', content: 'Pass' }),
+    ]);
+
+    mockDetectMatchedRuleSequence([
+      { index: 0, method: 'phase1_tag' },
+      { index: 0, method: 'phase1_tag' },
+      { index: 0, method: 'phase1_tag' },
+      { index: 0, method: 'phase1_tag' },
+      { index: 0, method: 'phase1_tag' },
+      { index: 0, method: 'aggregate' },
+      { index: 0, method: 'phase1_tag' },
+    ]);
+
+    await engine.run();
+
+    // 6 total: 4 normal + 2 parallel sub-steps
+    expect(vi.mocked(runAgent)).toHaveBeenCalledTimes(6);
+
+    const calledAgents = vi.mocked(runAgent).mock.calls.map(call => call[0]);
+    expect(calledAgents).toContain('../agents/arch-review.md');
+    expect(calledAgents).toContain('../agents/security-review.md');
+  });
+});
--- a/src/tests/engine-test-helpers.ts
+++ b/src/tests/engine-test-helpers.ts
@ -0,0 +1,175 @@
+/**
+ * Shared helpers for WorkflowEngine integration tests.
+ *
+ * Provides mock setup, factory functions, and a default workflow config
+ * matching the parallel reviewers structure (plan → implement → ai_review → reviewers → supervise).
+ */
+
+import { vi } from 'vitest';
+import { mkdirSync } from 'node:fs';
+import { join } from 'node:path';
+import { tmpdir } from 'node:os';
+import { randomUUID } from 'node:crypto';
+import type { WorkflowConfig, WorkflowStep, AgentResponse, WorkflowRule } from '../models/types.js';
+
+// --- Mock imports (consumers must call vi.mock before importing this) ---
+
+import { runAgent } from '../agents/runner.js';
+import { detectMatchedRule } from '../workflow/rule-evaluator.js';
+import type { RuleMatch } from '../workflow/rule-evaluator.js';
+import { needsStatusJudgmentPhase, runReportPhase, runStatusJudgmentPhase } from '../workflow/phase-runner.js';
+import { generateReportDir } from '../utils/session.js';
+
+// --- Factory functions ---
+
+export function makeResponse(overrides: Partial<AgentResponse> = {}): AgentResponse {
+  return {
+    agent: 'test-agent',
+    status: 'done',
+    content: 'test response',
+    timestamp: new Date(),
+    sessionId: `session-${randomUUID()}`,
+    ...overrides,
+  };
+}
+
+export function makeRule(condition: string, next: string, extra: Partial<WorkflowRule> = {}): WorkflowRule {
+  return { condition, next, ...extra };
+}
+
+export function makeStep(name: string, overrides: Partial<WorkflowStep> = {}): WorkflowStep {
+  return {
+    name,
+    agent: `../agents/${name}.md`,
+    agentDisplayName: name,
+    instructionTemplate: `Run ${name}`,
+    passPreviousResponse: true,
+    ...overrides,
+  };
+}
+
+/**
+ * Build a workflow config matching the default.yaml parallel reviewers structure:
+ * plan → implement → ai_review → (ai_fix↔) → reviewers(parallel) → (fix↔) → supervise
+ */
+export function buildDefaultWorkflowConfig(overrides: Partial<WorkflowConfig> = {}): WorkflowConfig {
+  const archReviewSubStep = makeStep('arch-review', {
+    rules: [
+      makeRule('approved', 'COMPLETE'),
+      makeRule('needs_fix', 'fix'),
+    ],
+  });
+
+  const securityReviewSubStep = makeStep('security-review', {
+    rules: [
+      makeRule('approved', 'COMPLETE'),
+      makeRule('needs_fix', 'fix'),
+    ],
+  });
+
+  return {
+    name: 'test-default',
+    description: 'Test workflow',
+    maxIterations: 30,
+    initialStep: 'plan',
+    steps: [
+      makeStep('plan', {
+        rules: [
+          makeRule('Requirements are clear', 'implement'),
+          makeRule('Requirements unclear', 'ABORT'),
+        ],
+      }),
+      makeStep('implement', {
+        rules: [
+          makeRule('Implementation complete', 'ai_review'),
+          makeRule('Cannot proceed', 'plan'),
+        ],
+      }),
+      makeStep('ai_review', {
+        rules: [
+          makeRule('No AI-specific issues', 'reviewers'),
+          makeRule('AI-specific issues found', 'ai_fix'),
+        ],
+      }),
+      makeStep('ai_fix', {
+        rules: [
+          makeRule('AI issues fixed', 'reviewers'),
+          makeRule('Cannot proceed', 'plan'),
+        ],
+      }),
+      makeStep('reviewers', {
+        parallel: [archReviewSubStep, securityReviewSubStep],
+        rules: [
+          makeRule('all("approved")', 'supervise', {
+            isAggregateCondition: true,
+            aggregateType: 'all',
+            aggregateConditionText: 'approved',
+          }),
+          makeRule('any("needs_fix")', 'fix', {
+            isAggregateCondition: true,
+            aggregateType: 'any',
+            aggregateConditionText: 'needs_fix',
+          }),
+        ],
+      }),
+      makeStep('fix', {
+        rules: [
+          makeRule('Fix complete', 'reviewers'),
+          makeRule('Cannot proceed', 'plan'),
+        ],
+      }),
+      makeStep('supervise', {
+        rules: [
+          makeRule('All checks passed', 'COMPLETE'),
+          makeRule('Requirements unmet', 'plan'),
+        ],
+      }),
+    ],
+    ...overrides,
+  };
+}
+
+// --- Mock sequence helpers ---
+
+/**
+ * Configure runAgent mock to return a sequence of responses.
+ */
+export function mockRunAgentSequence(responses: AgentResponse[]): void {
+  const mock = vi.mocked(runAgent);
+  for (const response of responses) {
+    mock.mockResolvedValueOnce(response);
+  }
+}
+
+/**
+ * Configure detectMatchedRule mock to return a sequence of rule matches.
+ */
+export function mockDetectMatchedRuleSequence(matches: (RuleMatch | undefined)[]): void {
+  const mock = vi.mocked(detectMatchedRule);
+  for (const match of matches) {
+    mock.mockResolvedValueOnce(match);
+  }
+}
+
+// --- Test environment setup ---
+
+/**
+ * Create a temporary directory with the required .takt/reports structure.
+ * Returns the tmpDir path. Caller is responsible for cleanup.
+ */
+export function createTestTmpDir(): string {
+  const tmpDir = join(tmpdir(), `takt-engine-test-${randomUUID()}`);
+  mkdirSync(tmpDir, { recursive: true });
+  mkdirSync(join(tmpDir, '.takt', 'reports', 'test-report-dir'), { recursive: true });
+  return tmpDir;
+}
+
+/**
+ * Re-apply default mocks for phase-runner and session after vi.resetAllMocks().
+ */
+export function applyDefaultMocks(): void {
+  vi.mocked(needsStatusJudgmentPhase).mockReturnValue(false);
+  vi.mocked(runReportPhase).mockResolvedValue(undefined);
+  vi.mocked(runStatusJudgmentPhase).mockResolvedValue('');
+  vi.mocked(generateReportDir).mockReturnValue('test-report-dir');
+}
--- a/src/tests/transitions.test.ts
+++ b/src/tests/transitions.test.ts
@ -60,4 +60,22 @@ describe('determineNextStepByRules', () => {

    expect(determineNextStepByRules(step, 0)).toBe('COMPLETE');
  });
+
+  it('should return null when rule exists but next is undefined', () => {
+    // Parallel sub-step rules may omit `next` (optional field)
+    const step: WorkflowStep = {
+      name: 'sub-step',
+      agent: 'test-agent',
+      agentDisplayName: 'Test Agent',
+      instructionTemplate: '{task}',
+      passPreviousResponse: false,
+      rules: [
+        { condition: 'approved' },
+        { condition: 'needs_fix' },
+      ],
+    };
+
+    expect(determineNextStepByRules(step, 0)).toBeNull();
+    expect(determineNextStepByRules(step, 1)).toBeNull();
+  });
 });