feat: WorkflowEngineのモックインテグレーションテスト追加 (#17)

runAgentをモックし、ワークフロー全体の状態遷移を検証するインテグレーションテストを追加。テストケース: - 正常フロー（Happy Path） - 差し戻しフロー（review reject → fix → re-review） - AI review差し戻し（ai_review → ai_fix → ai_review） - エラー: ルール未マッチ、runAgent例外 - ループ検出 - イテレーション上限 - blockedハンドリング（onUserInputあり/なし） - パラレルステップ集約（all/any条件） - rulesのnextがundefinedのケース
2026-01-30 21:11:41 +09:00 · 2026-01-30 21:11:41 +09:00 · cd67a2355a
commit cd67a2355a
parent e657211591
6 changed files with 1064 additions and 0 deletions
--- a/src/tests/engine-blocked.test.ts
+++ b/src/tests/engine-blocked.test.ts
@ -0,0 +1,143 @@
 /**
 * WorkflowEngine integration tests: blocked handling scenarios.
 *
 * Covers:
 * - Blocked without onUserInput callback (abort)
 * - Blocked with onUserInput returning null (abort)
 * - Blocked with onUserInput providing input (continue)
 */
 import { describe, it, expect, beforeEach, afterEach, vi } from 'vitest';
 import { existsSync, rmSync } from 'node:fs';
 // --- Mock setup (must be before imports that use these modules) ---
 vi.mock('../agents/runner.js', () => ({
  runAgent: vi.fn(),
 }));
 vi.mock('../workflow/rule-evaluator.js', () => ({
  detectMatchedRule: vi.fn(),
 }));
 vi.mock('../workflow/phase-runner.js', () => ({
  needsStatusJudgmentPhase: vi.fn().mockReturnValue(false),
  runReportPhase: vi.fn().mockResolvedValue(undefined),
  runStatusJudgmentPhase: vi.fn().mockResolvedValue(''),
 }));
 vi.mock('../utils/session.js', () => ({
  generateReportDir: vi.fn().mockReturnValue('test-report-dir'),
 }));
 // --- Imports (after mocks) ---
 import { WorkflowEngine } from '../workflow/engine.js';
 import {
  makeResponse,
  buildDefaultWorkflowConfig,
  mockRunAgentSequence,
  mockDetectMatchedRuleSequence,
  createTestTmpDir,
  applyDefaultMocks,
 } from './engine-test-helpers.js';
 describe('WorkflowEngine Integration: Blocked Handling', () => {
  let tmpDir: string;
  beforeEach(() => {
    vi.resetAllMocks();
    applyDefaultMocks();
    tmpDir = createTestTmpDir();
  });
  afterEach(() => {
    if (existsSync(tmpDir)) {
      rmSync(tmpDir, { recursive: true, force: true });
    }
  });
  it('should abort when blocked and no onUserInput callback', async () => {
    const config = buildDefaultWorkflowConfig();
    const engine = new WorkflowEngine(config, tmpDir, 'test task');
    mockRunAgentSequence([
      makeResponse({ agent: 'plan', status: 'blocked', content: 'Need clarification' }),
    ]);
    mockDetectMatchedRuleSequence([
      { index: 0, method: 'phase1_tag' },
    ]);
    const blockedFn = vi.fn();
    const abortFn = vi.fn();
    engine.on('step:blocked', blockedFn);
    engine.on('workflow:abort', abortFn);
    const state = await engine.run();
    expect(state.status).toBe('aborted');
    expect(blockedFn).toHaveBeenCalledOnce();
    expect(abortFn).toHaveBeenCalledOnce();
  });
  it('should abort when blocked and onUserInput returns null', async () => {
    const config = buildDefaultWorkflowConfig();
    const onUserInput = vi.fn().mockResolvedValue(null);
    const engine = new WorkflowEngine(config, tmpDir, 'test task', { onUserInput });
    mockRunAgentSequence([
      makeResponse({ agent: 'plan', status: 'blocked', content: 'Need info' }),
    ]);
    mockDetectMatchedRuleSequence([
      { index: 0, method: 'phase1_tag' },
    ]);
    const state = await engine.run();
    expect(state.status).toBe('aborted');
    expect(onUserInput).toHaveBeenCalledOnce();
  });
  it('should continue when blocked and onUserInput provides input', async () => {
    const config = buildDefaultWorkflowConfig();
    const onUserInput = vi.fn().mockResolvedValueOnce('User provided clarification');
    const engine = new WorkflowEngine(config, tmpDir, 'test task', { onUserInput });
    mockRunAgentSequence([
      // First: plan is blocked
      makeResponse({ agent: 'plan', status: 'blocked', content: 'Need info' }),
      // Second: plan succeeds after user input
      makeResponse({ agent: 'plan', content: 'Plan done with user input' }),
      makeResponse({ agent: 'implement', content: 'Impl done' }),
      makeResponse({ agent: 'ai_review', content: 'OK' }),
      makeResponse({ agent: 'arch-review', content: 'OK' }),
      makeResponse({ agent: 'security-review', content: 'OK' }),
      makeResponse({ agent: 'supervise', content: 'All passed' }),
    ]);
    mockDetectMatchedRuleSequence([
      // First plan call: blocked, rule matched but blocked handling takes over
      { index: 0, method: 'phase1_tag' },
      // Second plan call: success
      { index: 0, method: 'phase1_tag' },  // plan → implement
      { index: 0, method: 'phase1_tag' },  // implement → ai_review
      { index: 0, method: 'phase1_tag' },  // ai_review → reviewers
      { index: 0, method: 'phase1_tag' },  // arch-review → approved
      { index: 0, method: 'phase1_tag' },  // security-review → approved
      { index: 0, method: 'aggregate' },   // reviewers → supervise
      { index: 0, method: 'phase1_tag' },  // supervise → COMPLETE
    ]);
    const userInputFn = vi.fn();
    engine.on('step:user_input', userInputFn);
    const state = await engine.run();
    expect(state.status).toBe('completed');
    expect(onUserInput).toHaveBeenCalledOnce();
    expect(userInputFn).toHaveBeenCalledOnce();
    expect(state.userInputs).toContain('User provided clarification');
  });
 });
--- a/src/tests/engine-error.test.ts
+++ b/src/tests/engine-error.test.ts
@ -0,0 +1,222 @@
 /**
 * WorkflowEngine integration tests: error handling scenarios.
 *
 * Covers:
 * - No rule matched (abort)
 * - runAgent throws (abort)
 * - Loop detection (abort)
 * - Iteration limit (abort and extend)
 */
 import { describe, it, expect, beforeEach, afterEach, vi } from 'vitest';
 import { existsSync, rmSync } from 'node:fs';
 // --- Mock setup (must be before imports that use these modules) ---
 vi.mock('../agents/runner.js', () => ({
  runAgent: vi.fn(),
 }));
 vi.mock('../workflow/rule-evaluator.js', () => ({
  detectMatchedRule: vi.fn(),
 }));
 vi.mock('../workflow/phase-runner.js', () => ({
  needsStatusJudgmentPhase: vi.fn().mockReturnValue(false),
  runReportPhase: vi.fn().mockResolvedValue(undefined),
  runStatusJudgmentPhase: vi.fn().mockResolvedValue(''),
 }));
 vi.mock('../utils/session.js', () => ({
  generateReportDir: vi.fn().mockReturnValue('test-report-dir'),
 }));
 // --- Imports (after mocks) ---
 import { WorkflowEngine } from '../workflow/engine.js';
 import { runAgent } from '../agents/runner.js';
 import { detectMatchedRule } from '../workflow/rule-evaluator.js';
 import {
  makeResponse,
  makeStep,
  makeRule,
  buildDefaultWorkflowConfig,
  mockRunAgentSequence,
  mockDetectMatchedRuleSequence,
  createTestTmpDir,
  applyDefaultMocks,
 } from './engine-test-helpers.js';
 describe('WorkflowEngine Integration: Error Handling', () => {
  let tmpDir: string;
  beforeEach(() => {
    vi.resetAllMocks();
    applyDefaultMocks();
    tmpDir = createTestTmpDir();
  });
  afterEach(() => {
    if (existsSync(tmpDir)) {
      rmSync(tmpDir, { recursive: true, force: true });
    }
  });
  // =====================================================
  // 1. No rule matched
  // =====================================================
  describe('No rule matched', () => {
    it('should abort when detectMatchedRule returns undefined', async () => {
      const config = buildDefaultWorkflowConfig();
      const engine = new WorkflowEngine(config, tmpDir, 'test task');
      mockRunAgentSequence([
        makeResponse({ agent: 'plan', content: 'Unclear output' }),
      ]);
      mockDetectMatchedRuleSequence([undefined]);
      const abortFn = vi.fn();
      engine.on('workflow:abort', abortFn);
      const state = await engine.run();
      expect(state.status).toBe('aborted');
      expect(abortFn).toHaveBeenCalledOnce();
      const reason = abortFn.mock.calls[0]![1] as string;
      expect(reason).toContain('plan');
    });
  });
  // =====================================================
  // 2. runAgent throws
  // =====================================================
  describe('runAgent throws', () => {
    it('should abort when runAgent throws an error', async () => {
      const config = buildDefaultWorkflowConfig();
      const engine = new WorkflowEngine(config, tmpDir, 'test task');
      vi.mocked(runAgent).mockRejectedValueOnce(new Error('API connection failed'));
      const abortFn = vi.fn();
      engine.on('workflow:abort', abortFn);
      const state = await engine.run();
      expect(state.status).toBe('aborted');
      expect(abortFn).toHaveBeenCalledOnce();
      const reason = abortFn.mock.calls[0]![1] as string;
      expect(reason).toContain('API connection failed');
    });
  });
  // =====================================================
  // 3. Loop detection
  // =====================================================
  describe('Loop detection', () => {
    it('should abort when loop detected with action: abort', async () => {
      const config = buildDefaultWorkflowConfig({
        maxIterations: 100,
        loopDetection: { maxConsecutiveSameStep: 3, action: 'abort' },
        initialStep: 'loop-step',
        steps: [
          makeStep('loop-step', {
            rules: [makeRule('continue', 'loop-step')],
          }),
        ],
      });
      const engine = new WorkflowEngine(config, tmpDir, 'test task');
      for (let i = 0; i < 5; i++) {
        vi.mocked(runAgent).mockResolvedValueOnce(
          makeResponse({ content: `iteration ${i}` })
        );
        vi.mocked(detectMatchedRule).mockResolvedValueOnce(
          { index: 0, method: 'phase1_tag' }
        );
      }
      const abortFn = vi.fn();
      engine.on('workflow:abort', abortFn);
      const state = await engine.run();
      expect(state.status).toBe('aborted');
      expect(abortFn).toHaveBeenCalledOnce();
      const reason = abortFn.mock.calls[0]![1] as string;
      expect(reason).toContain('Loop detected');
      expect(reason).toContain('loop-step');
    });
  });
  // =====================================================
  // 4. Iteration limit
  // =====================================================
  describe('Iteration limit', () => {
    it('should abort when max iterations reached without onIterationLimit callback', async () => {
      const config = buildDefaultWorkflowConfig({ maxIterations: 2 });
      const engine = new WorkflowEngine(config, tmpDir, 'test task');
      mockRunAgentSequence([
        makeResponse({ agent: 'plan', content: 'Plan done' }),
        makeResponse({ agent: 'implement', content: 'Impl done' }),
        makeResponse({ agent: 'ai_review', content: 'OK' }),
      ]);
      mockDetectMatchedRuleSequence([
        { index: 0, method: 'phase1_tag' },  // plan → implement
        { index: 0, method: 'phase1_tag' },  // implement → ai_review
        { index: 0, method: 'phase1_tag' },  // ai_review → reviewers (won't be reached)
      ]);
      const limitFn = vi.fn();
      const abortFn = vi.fn();
      engine.on('iteration:limit', limitFn);
      engine.on('workflow:abort', abortFn);
      const state = await engine.run();
      expect(state.status).toBe('aborted');
      expect(limitFn).toHaveBeenCalledWith(2, 2);
      expect(abortFn).toHaveBeenCalledOnce();
      const reason = abortFn.mock.calls[0]![1] as string;
      expect(reason).toContain('Max iterations');
    });
    it('should extend iterations when onIterationLimit provides additional iterations', async () => {
      const config = buildDefaultWorkflowConfig({ maxIterations: 2 });
      const onIterationLimit = vi.fn().mockResolvedValueOnce(10);
      const engine = new WorkflowEngine(config, tmpDir, 'test task', {
        onIterationLimit,
      });
      mockRunAgentSequence([
        makeResponse({ agent: 'plan', content: 'Plan done' }),
        makeResponse({ agent: 'implement', content: 'Impl done' }),
        // After hitting limit at iteration 2, onIterationLimit extends to 12
        makeResponse({ agent: 'ai_review', content: 'OK' }),
        makeResponse({ agent: 'arch-review', content: 'OK' }),
        makeResponse({ agent: 'security-review', content: 'OK' }),
        makeResponse({ agent: 'supervise', content: 'All passed' }),
      ]);
      mockDetectMatchedRuleSequence([
        { index: 0, method: 'phase1_tag' },  // plan → implement
        { index: 0, method: 'phase1_tag' },  // implement → ai_review
        { index: 0, method: 'phase1_tag' },  // ai_review → reviewers
        { index: 0, method: 'phase1_tag' },  // arch-review → approved
        { index: 0, method: 'phase1_tag' },  // security-review → approved
        { index: 0, method: 'aggregate' },   // reviewers → supervise
        { index: 0, method: 'phase1_tag' },  // supervise → COMPLETE
      ]);
      const state = await engine.run();
      expect(state.status).toBe('completed');
      expect(onIterationLimit).toHaveBeenCalledOnce();
    });
  });
 });
--- a/src/tests/engine-happy-path.test.ts
+++ b/src/tests/engine-happy-path.test.ts
@ -0,0 +1,344 @@
 /**
 * WorkflowEngine integration tests: happy path and normal flow scenarios.
 *
 * Covers:
 * - Full happy path (plan → implement → ai_review → reviewers → supervise → COMPLETE)
 * - Review reject and fix loop
 * - AI review reject and fix
 * - ABORT transition
 * - Event emissions
 * - Step output tracking
 * - Config validation
 */
 import { describe, it, expect, beforeEach, afterEach, vi } from 'vitest';
 import { existsSync, rmSync } from 'node:fs';
 import type { WorkflowConfig, WorkflowStep } from '../models/types.js';
 // --- Mock setup (must be before imports that use these modules) ---
 vi.mock('../agents/runner.js', () => ({
  runAgent: vi.fn(),
 }));
 vi.mock('../workflow/rule-evaluator.js', () => ({
  detectMatchedRule: vi.fn(),
 }));
 vi.mock('../workflow/phase-runner.js', () => ({
  needsStatusJudgmentPhase: vi.fn().mockReturnValue(false),
  runReportPhase: vi.fn().mockResolvedValue(undefined),
  runStatusJudgmentPhase: vi.fn().mockResolvedValue(''),
 }));
 vi.mock('../utils/session.js', () => ({
  generateReportDir: vi.fn().mockReturnValue('test-report-dir'),
 }));
 // --- Imports (after mocks) ---
 import { WorkflowEngine } from '../workflow/engine.js';
 import { runAgent } from '../agents/runner.js';
 import {
  makeResponse,
  makeStep,
  makeRule,
  buildDefaultWorkflowConfig,
  mockRunAgentSequence,
  mockDetectMatchedRuleSequence,
  createTestTmpDir,
  applyDefaultMocks,
 } from './engine-test-helpers.js';
 describe('WorkflowEngine Integration: Happy Path', () => {
  let tmpDir: string;
  beforeEach(() => {
    vi.resetAllMocks();
    applyDefaultMocks();
    tmpDir = createTestTmpDir();
  });
  afterEach(() => {
    if (existsSync(tmpDir)) {
      rmSync(tmpDir, { recursive: true, force: true });
    }
  });
  // =====================================================
  // 1. Happy Path
  // =====================================================
  describe('Happy path', () => {
    it('should complete: plan → implement → ai_review → reviewers(all approved) → supervise → COMPLETE', async () => {
      const config = buildDefaultWorkflowConfig();
      const engine = new WorkflowEngine(config, tmpDir, 'test task');
      mockRunAgentSequence([
        makeResponse({ agent: 'plan', content: 'Plan complete' }),
        makeResponse({ agent: 'implement', content: 'Implementation done' }),
        makeResponse({ agent: 'ai_review', content: 'No issues' }),
        makeResponse({ agent: 'arch-review', content: 'Architecture OK' }),
        makeResponse({ agent: 'security-review', content: 'Security OK' }),
        makeResponse({ agent: 'supervise', content: 'All passed' }),
      ]);
      mockDetectMatchedRuleSequence([
        { index: 0, method: 'phase1_tag' },  // plan → implement
        { index: 0, method: 'phase1_tag' },  // implement → ai_review
        { index: 0, method: 'phase1_tag' },  // ai_review → reviewers
        { index: 0, method: 'phase1_tag' },  // arch-review → approved
        { index: 0, method: 'phase1_tag' },  // security-review → approved
        { index: 0, method: 'aggregate' },   // reviewers(all approved) → supervise
        { index: 0, method: 'phase1_tag' },  // supervise → COMPLETE
      ]);
      const completeFn = vi.fn();
      engine.on('workflow:complete', completeFn);
      const state = await engine.run();
      expect(state.status).toBe('completed');
      expect(state.iteration).toBe(5); // plan, implement, ai_review, reviewers, supervise
      expect(completeFn).toHaveBeenCalledOnce();
      expect(vi.mocked(runAgent)).toHaveBeenCalledTimes(6); // 4 normal + 2 parallel sub-steps
    });
  });
  // =====================================================
  // 2. Review reject and fix loop
  // =====================================================
  describe('Review reject and fix loop', () => {
    it('should handle: reviewers(needs_fix) → fix → reviewers(all approved) → supervise → COMPLETE', async () => {
      const config = buildDefaultWorkflowConfig();
      const engine = new WorkflowEngine(config, tmpDir, 'test task');
      mockRunAgentSequence([
        makeResponse({ agent: 'plan', content: 'Plan done' }),
        makeResponse({ agent: 'implement', content: 'Impl done' }),
        makeResponse({ agent: 'ai_review', content: 'No issues' }),
        // Round 1 reviewers: arch approved, security needs fix
        makeResponse({ agent: 'arch-review', content: 'OK' }),
        makeResponse({ agent: 'security-review', content: 'Vulnerability found' }),
        // fix step
        makeResponse({ agent: 'fix', content: 'Fixed security issue' }),
        // Round 2 reviewers: both approved
        makeResponse({ agent: 'arch-review', content: 'OK' }),
        makeResponse({ agent: 'security-review', content: 'Security OK now' }),
        // supervise
        makeResponse({ agent: 'supervise', content: 'All passed' }),
      ]);
      mockDetectMatchedRuleSequence([
        { index: 0, method: 'phase1_tag' },  // plan → implement
        { index: 0, method: 'phase1_tag' },  // implement → ai_review
        { index: 0, method: 'phase1_tag' },  // ai_review → reviewers
        { index: 0, method: 'phase1_tag' },  // arch-review → approved
        { index: 1, method: 'phase1_tag' },  // security-review → needs_fix
        { index: 1, method: 'aggregate' },   // reviewers: any(needs_fix) → fix
        { index: 0, method: 'phase1_tag' },  // fix → reviewers
        { index: 0, method: 'phase1_tag' },  // arch-review → approved
        { index: 0, method: 'phase1_tag' },  // security-review → approved
        { index: 0, method: 'aggregate' },   // reviewers: all(approved) → supervise
        { index: 0, method: 'phase1_tag' },  // supervise → COMPLETE
      ]);
      const state = await engine.run();
      expect(state.status).toBe('completed');
      // plan, implement, ai_review, reviewers(1st), fix, reviewers(2nd), supervise = 7
      expect(state.iteration).toBe(7);
    });
  });
  // =====================================================
  // 3. AI review reject and fix
  // =====================================================
  describe('AI review reject and fix', () => {
    it('should handle: ai_review(issues) → ai_fix → reviewers → supervise → COMPLETE', async () => {
      const config = buildDefaultWorkflowConfig();
      const engine = new WorkflowEngine(config, tmpDir, 'test task');
      mockRunAgentSequence([
        makeResponse({ agent: 'plan', content: 'Plan done' }),
        makeResponse({ agent: 'implement', content: 'Impl done' }),
        makeResponse({ agent: 'ai_review', content: 'AI issues found' }),
        makeResponse({ agent: 'ai_fix', content: 'Issues fixed' }),
        makeResponse({ agent: 'arch-review', content: 'OK' }),
        makeResponse({ agent: 'security-review', content: 'OK' }),
        makeResponse({ agent: 'supervise', content: 'All passed' }),
      ]);
      mockDetectMatchedRuleSequence([
        { index: 0, method: 'phase1_tag' },  // plan → implement
        { index: 0, method: 'phase1_tag' },  // implement → ai_review
        { index: 1, method: 'phase1_tag' },  // ai_review → ai_fix (issues found)
        { index: 0, method: 'phase1_tag' },  // ai_fix → reviewers
        { index: 0, method: 'phase1_tag' },  // arch-review → approved
        { index: 0, method: 'phase1_tag' },  // security-review → approved
        { index: 0, method: 'aggregate' },   // reviewers → supervise
        { index: 0, method: 'phase1_tag' },  // supervise → COMPLETE
      ]);
      const state = await engine.run();
      expect(state.status).toBe('completed');
      // plan, implement, ai_review, ai_fix, reviewers, supervise = 6
      expect(state.iteration).toBe(6);
    });
  });
  // =====================================================
  // 4. ABORT transition
  // =====================================================
  describe('ABORT transition', () => {
    it('should abort when step transitions to ABORT', async () => {
      const config = buildDefaultWorkflowConfig();
      const engine = new WorkflowEngine(config, tmpDir, 'test task');
      mockRunAgentSequence([
        makeResponse({ agent: 'plan', content: 'Requirements unclear' }),
      ]);
      // plan rule index 1 → ABORT
      mockDetectMatchedRuleSequence([
        { index: 1, method: 'phase1_tag' },
      ]);
      const abortFn = vi.fn();
      engine.on('workflow:abort', abortFn);
      const state = await engine.run();
      expect(state.status).toBe('aborted');
      expect(abortFn).toHaveBeenCalledOnce();
    });
  });
  // =====================================================
  // 5. Event emissions
  // =====================================================
  describe('Event emissions', () => {
    it('should emit step:start and step:complete for each step', async () => {
      const config = buildDefaultWorkflowConfig();
      const engine = new WorkflowEngine(config, tmpDir, 'test task');
      mockRunAgentSequence([
        makeResponse({ agent: 'plan', content: 'Plan' }),
        makeResponse({ agent: 'implement', content: 'Impl' }),
        makeResponse({ agent: 'ai_review', content: 'OK' }),
        makeResponse({ agent: 'arch-review', content: 'OK' }),
        makeResponse({ agent: 'security-review', content: 'OK' }),
        makeResponse({ agent: 'supervise', content: 'Pass' }),
      ]);
      mockDetectMatchedRuleSequence([
        { index: 0, method: 'phase1_tag' },
        { index: 0, method: 'phase1_tag' },
        { index: 0, method: 'phase1_tag' },
        { index: 0, method: 'phase1_tag' },
        { index: 0, method: 'phase1_tag' },
        { index: 0, method: 'aggregate' },
        { index: 0, method: 'phase1_tag' },
      ]);
      const startFn = vi.fn();
      const completeFn = vi.fn();
      engine.on('step:start', startFn);
      engine.on('step:complete', completeFn);
      await engine.run();
      // 5 steps: plan, implement, ai_review, reviewers, supervise
      expect(startFn).toHaveBeenCalledTimes(5);
      expect(completeFn).toHaveBeenCalledTimes(5);
      const startedSteps = startFn.mock.calls.map(call => (call[0] as WorkflowStep).name);
      expect(startedSteps).toEqual(['plan', 'implement', 'ai_review', 'reviewers', 'supervise']);
    });
    it('should emit iteration:limit when max iterations reached', async () => {
      const config = buildDefaultWorkflowConfig({ maxIterations: 1 });
      const engine = new WorkflowEngine(config, tmpDir, 'test task');
      mockRunAgentSequence([
        makeResponse({ agent: 'plan', content: 'Plan' }),
      ]);
      mockDetectMatchedRuleSequence([
        { index: 0, method: 'phase1_tag' },
      ]);
      const limitFn = vi.fn();
      engine.on('iteration:limit', limitFn);
      await engine.run();
      expect(limitFn).toHaveBeenCalledWith(1, 1);
    });
  });
  // =====================================================
  // 6. Step output tracking
  // =====================================================
  describe('Step output tracking', () => {
    it('should store outputs for all executed steps', async () => {
      const config = buildDefaultWorkflowConfig();
      const engine = new WorkflowEngine(config, tmpDir, 'test task');
      mockRunAgentSequence([
        makeResponse({ agent: 'plan', content: 'Plan output' }),
        makeResponse({ agent: 'implement', content: 'Implement output' }),
        makeResponse({ agent: 'ai_review', content: 'AI review output' }),
        makeResponse({ agent: 'arch-review', content: 'Arch output' }),
        makeResponse({ agent: 'security-review', content: 'Sec output' }),
        makeResponse({ agent: 'supervise', content: 'Supervise output' }),
      ]);
      mockDetectMatchedRuleSequence([
        { index: 0, method: 'phase1_tag' },
        { index: 0, method: 'phase1_tag' },
        { index: 0, method: 'phase1_tag' },
        { index: 0, method: 'phase1_tag' },
        { index: 0, method: 'phase1_tag' },
        { index: 0, method: 'aggregate' },
        { index: 0, method: 'phase1_tag' },
      ]);
      const state = await engine.run();
      expect(state.stepOutputs.get('plan')!.content).toBe('Plan output');
      expect(state.stepOutputs.get('implement')!.content).toBe('Implement output');
      expect(state.stepOutputs.get('ai_review')!.content).toBe('AI review output');
      expect(state.stepOutputs.get('supervise')!.content).toBe('Supervise output');
    });
  });
  // =====================================================
  // 7. Config validation
  // =====================================================
  describe('Config validation', () => {
    it('should throw when initial step does not exist', () => {
      const config = buildDefaultWorkflowConfig({ initialStep: 'nonexistent' });
      expect(() => {
        new WorkflowEngine(config, tmpDir, 'test task');
      }).toThrow('Unknown step: nonexistent');
    });
    it('should throw when rule references nonexistent step', () => {
      const config: WorkflowConfig = {
        name: 'test',
        maxIterations: 10,
        initialStep: 'step1',
        steps: [
          makeStep('step1', {
            rules: [makeRule('done', 'nonexistent_step')],
          }),
        ],
      };
      expect(() => {
        new WorkflowEngine(config, tmpDir, 'test task');
      }).toThrow('nonexistent_step');
    });
  });
 });
--- a/src/tests/engine-parallel.test.ts
+++ b/src/tests/engine-parallel.test.ts
@ -0,0 +1,162 @@
 /**
 * WorkflowEngine integration tests: parallel step aggregation.
 *
 * Covers:
 * - Aggregated output format (## headers and --- separators)
 * - Individual sub-step output storage
 * - Concurrent execution of sub-steps
 */
 import { describe, it, expect, beforeEach, afterEach, vi } from 'vitest';
 import { existsSync, rmSync } from 'node:fs';
 // --- Mock setup (must be before imports that use these modules) ---
 vi.mock('../agents/runner.js', () => ({
  runAgent: vi.fn(),
 }));
 vi.mock('../workflow/rule-evaluator.js', () => ({
  detectMatchedRule: vi.fn(),
 }));
 vi.mock('../workflow/phase-runner.js', () => ({
  needsStatusJudgmentPhase: vi.fn().mockReturnValue(false),
  runReportPhase: vi.fn().mockResolvedValue(undefined),
  runStatusJudgmentPhase: vi.fn().mockResolvedValue(''),
 }));
 vi.mock('../utils/session.js', () => ({
  generateReportDir: vi.fn().mockReturnValue('test-report-dir'),
 }));
 // --- Imports (after mocks) ---
 import { WorkflowEngine } from '../workflow/engine.js';
 import { runAgent } from '../agents/runner.js';
 import {
  makeResponse,
  buildDefaultWorkflowConfig,
  mockRunAgentSequence,
  mockDetectMatchedRuleSequence,
  createTestTmpDir,
  applyDefaultMocks,
 } from './engine-test-helpers.js';
 describe('WorkflowEngine Integration: Parallel Step Aggregation', () => {
  let tmpDir: string;
  beforeEach(() => {
    vi.resetAllMocks();
    applyDefaultMocks();
    tmpDir = createTestTmpDir();
  });
  afterEach(() => {
    if (existsSync(tmpDir)) {
      rmSync(tmpDir, { recursive: true, force: true });
    }
  });
  it('should aggregate sub-step outputs with ## headers and --- separators', async () => {
    const config = buildDefaultWorkflowConfig();
    const engine = new WorkflowEngine(config, tmpDir, 'test task');
    mockRunAgentSequence([
      makeResponse({ agent: 'plan', content: 'Plan done' }),
      makeResponse({ agent: 'implement', content: 'Impl done' }),
      makeResponse({ agent: 'ai_review', content: 'OK' }),
      makeResponse({ agent: 'arch-review', content: 'Architecture review content' }),
      makeResponse({ agent: 'security-review', content: 'Security review content' }),
      makeResponse({ agent: 'supervise', content: 'All passed' }),
    ]);
    mockDetectMatchedRuleSequence([
      { index: 0, method: 'phase1_tag' },
      { index: 0, method: 'phase1_tag' },
      { index: 0, method: 'phase1_tag' },
      { index: 0, method: 'phase1_tag' },  // arch-review
      { index: 0, method: 'phase1_tag' },  // security-review
      { index: 0, method: 'aggregate' },   // reviewers
      { index: 0, method: 'phase1_tag' },
    ]);
    const state = await engine.run();
    expect(state.status).toBe('completed');
    const reviewersOutput = state.stepOutputs.get('reviewers');
    expect(reviewersOutput).toBeDefined();
    expect(reviewersOutput!.content).toContain('## arch-review');
    expect(reviewersOutput!.content).toContain('Architecture review content');
    expect(reviewersOutput!.content).toContain('---');
    expect(reviewersOutput!.content).toContain('## security-review');
    expect(reviewersOutput!.content).toContain('Security review content');
    expect(reviewersOutput!.matchedRuleMethod).toBe('aggregate');
  });
  it('should store individual sub-step outputs in stepOutputs', async () => {
    const config = buildDefaultWorkflowConfig();
    const engine = new WorkflowEngine(config, tmpDir, 'test task');
    mockRunAgentSequence([
      makeResponse({ agent: 'plan', content: 'Plan' }),
      makeResponse({ agent: 'implement', content: 'Impl' }),
      makeResponse({ agent: 'ai_review', content: 'OK' }),
      makeResponse({ agent: 'arch-review', content: 'Arch content' }),
      makeResponse({ agent: 'security-review', content: 'Sec content' }),
      makeResponse({ agent: 'supervise', content: 'Pass' }),
    ]);
    mockDetectMatchedRuleSequence([
      { index: 0, method: 'phase1_tag' },
      { index: 0, method: 'phase1_tag' },
      { index: 0, method: 'phase1_tag' },
      { index: 0, method: 'phase1_tag' },
      { index: 0, method: 'phase1_tag' },
      { index: 0, method: 'aggregate' },
      { index: 0, method: 'phase1_tag' },
    ]);
    const state = await engine.run();
    expect(state.stepOutputs.has('arch-review')).toBe(true);
    expect(state.stepOutputs.has('security-review')).toBe(true);
    expect(state.stepOutputs.has('reviewers')).toBe(true);
    expect(state.stepOutputs.get('arch-review')!.content).toBe('Arch content');
    expect(state.stepOutputs.get('security-review')!.content).toBe('Sec content');
  });
  it('should execute sub-steps concurrently (both runAgent calls happen)', async () => {
    const config = buildDefaultWorkflowConfig();
    const engine = new WorkflowEngine(config, tmpDir, 'test task');
    mockRunAgentSequence([
      makeResponse({ agent: 'plan', content: 'Plan' }),
      makeResponse({ agent: 'implement', content: 'Impl' }),
      makeResponse({ agent: 'ai_review', content: 'OK' }),
      makeResponse({ agent: 'arch-review', content: 'OK' }),
      makeResponse({ agent: 'security-review', content: 'OK' }),
      makeResponse({ agent: 'supervise', content: 'Pass' }),
    ]);
    mockDetectMatchedRuleSequence([
      { index: 0, method: 'phase1_tag' },
      { index: 0, method: 'phase1_tag' },
      { index: 0, method: 'phase1_tag' },
      { index: 0, method: 'phase1_tag' },
      { index: 0, method: 'phase1_tag' },
      { index: 0, method: 'aggregate' },
      { index: 0, method: 'phase1_tag' },
    ]);
    await engine.run();
    // 6 total: 4 normal + 2 parallel sub-steps
    expect(vi.mocked(runAgent)).toHaveBeenCalledTimes(6);
    const calledAgents = vi.mocked(runAgent).mock.calls.map(call => call[0]);
    expect(calledAgents).toContain('../agents/arch-review.md');
    expect(calledAgents).toContain('../agents/security-review.md');
  });
 });
--- a/src/tests/engine-test-helpers.ts
+++ b/src/tests/engine-test-helpers.ts
@ -0,0 +1,175 @@
 /**
 * Shared helpers for WorkflowEngine integration tests.
 *
 * Provides mock setup, factory functions, and a default workflow config
 * matching the parallel reviewers structure (plan → implement → ai_review → reviewers → supervise).
 */
 import { vi } from 'vitest';
 import { mkdirSync } from 'node:fs';
 import { join } from 'node:path';
 import { tmpdir } from 'node:os';
 import { randomUUID } from 'node:crypto';
 import type { WorkflowConfig, WorkflowStep, AgentResponse, WorkflowRule } from '../models/types.js';
 // --- Mock imports (consumers must call vi.mock before importing this) ---
 import { runAgent } from '../agents/runner.js';
 import { detectMatchedRule } from '../workflow/rule-evaluator.js';
 import type { RuleMatch } from '../workflow/rule-evaluator.js';
 import { needsStatusJudgmentPhase, runReportPhase, runStatusJudgmentPhase } from '../workflow/phase-runner.js';
 import { generateReportDir } from '../utils/session.js';
 // --- Factory functions ---
 export function makeResponse(overrides: Partial<AgentResponse> = {}): AgentResponse {
  return {
    agent: 'test-agent',
    status: 'done',
    content: 'test response',
    timestamp: new Date(),
    sessionId: `session-${randomUUID()}`,
    ...overrides,
  };
 }
 export function makeRule(condition: string, next: string, extra: Partial<WorkflowRule> = {}): WorkflowRule {
  return { condition, next, ...extra };
 }
 export function makeStep(name: string, overrides: Partial<WorkflowStep> = {}): WorkflowStep {
  return {
    name,
    agent: `../agents/${name}.md`,
    agentDisplayName: name,
    instructionTemplate: `Run ${name}`,
    passPreviousResponse: true,
    ...overrides,
  };
 }
 /**
 * Build a workflow config matching the default.yaml parallel reviewers structure:
 * plan → implement → ai_review → (ai_fix↔) → reviewers(parallel) → (fix↔) → supervise
 */
 export function buildDefaultWorkflowConfig(overrides: Partial<WorkflowConfig> = {}): WorkflowConfig {
  const archReviewSubStep = makeStep('arch-review', {
    rules: [
      makeRule('approved', 'COMPLETE'),
      makeRule('needs_fix', 'fix'),
    ],
  });
  const securityReviewSubStep = makeStep('security-review', {
    rules: [
      makeRule('approved', 'COMPLETE'),
      makeRule('needs_fix', 'fix'),
    ],
  });
  return {
    name: 'test-default',
    description: 'Test workflow',
    maxIterations: 30,
    initialStep: 'plan',
    steps: [
      makeStep('plan', {
        rules: [
          makeRule('Requirements are clear', 'implement'),
          makeRule('Requirements unclear', 'ABORT'),
        ],
      }),
      makeStep('implement', {
        rules: [
          makeRule('Implementation complete', 'ai_review'),
          makeRule('Cannot proceed', 'plan'),
        ],
      }),
      makeStep('ai_review', {
        rules: [
          makeRule('No AI-specific issues', 'reviewers'),
          makeRule('AI-specific issues found', 'ai_fix'),
        ],
      }),
      makeStep('ai_fix', {
        rules: [
          makeRule('AI issues fixed', 'reviewers'),
          makeRule('Cannot proceed', 'plan'),
        ],
      }),
      makeStep('reviewers', {
        parallel: [archReviewSubStep, securityReviewSubStep],
        rules: [
          makeRule('all("approved")', 'supervise', {
            isAggregateCondition: true,
            aggregateType: 'all',
            aggregateConditionText: 'approved',
          }),
          makeRule('any("needs_fix")', 'fix', {
            isAggregateCondition: true,
            aggregateType: 'any',
            aggregateConditionText: 'needs_fix',
          }),
        ],
      }),
      makeStep('fix', {
        rules: [
          makeRule('Fix complete', 'reviewers'),
          makeRule('Cannot proceed', 'plan'),
        ],
      }),
      makeStep('supervise', {
        rules: [
          makeRule('All checks passed', 'COMPLETE'),
          makeRule('Requirements unmet', 'plan'),
        ],
      }),
    ],
    ...overrides,
  };
 }
 // --- Mock sequence helpers ---
 /**
 * Configure runAgent mock to return a sequence of responses.
 */
 export function mockRunAgentSequence(responses: AgentResponse[]): void {
  const mock = vi.mocked(runAgent);
  for (const response of responses) {
    mock.mockResolvedValueOnce(response);
  }
 }
 /**
 * Configure detectMatchedRule mock to return a sequence of rule matches.
 */
 export function mockDetectMatchedRuleSequence(matches: (RuleMatch | undefined)[]): void {
  const mock = vi.mocked(detectMatchedRule);
  for (const match of matches) {
    mock.mockResolvedValueOnce(match);
  }
 }
 // --- Test environment setup ---
 /**
 * Create a temporary directory with the required .takt/reports structure.
 * Returns the tmpDir path. Caller is responsible for cleanup.
 */
 export function createTestTmpDir(): string {
  const tmpDir = join(tmpdir(), `takt-engine-test-${randomUUID()}`);
  mkdirSync(tmpDir, { recursive: true });
  mkdirSync(join(tmpDir, '.takt', 'reports', 'test-report-dir'), { recursive: true });
  return tmpDir;
 }
 /**
 * Re-apply default mocks for phase-runner and session after vi.resetAllMocks().
 */
 export function applyDefaultMocks(): void {
  vi.mocked(needsStatusJudgmentPhase).mockReturnValue(false);
  vi.mocked(runReportPhase).mockResolvedValue(undefined);
  vi.mocked(runStatusJudgmentPhase).mockResolvedValue('');
  vi.mocked(generateReportDir).mockReturnValue('test-report-dir');
 }
--- a/src/tests/transitions.test.ts
+++ b/src/tests/transitions.test.ts
@ -60,4 +60,22 @@ describe('determineNextStepByRules', () => {
    expect(determineNextStepByRules(step, 0)).toBe('COMPLETE');
  });
  it('should return null when rule exists but next is undefined', () => {
    // Parallel sub-step rules may omit `next` (optional field)
    const step: WorkflowStep = {
      name: 'sub-step',
      agent: 'test-agent',
      agentDisplayName: 'Test Agent',
      instructionTemplate: '{task}',
      passPreviousResponse: false,
      rules: [
        { condition: 'approved' },
        { condition: 'needs_fix' },
      ],
    };
    expect(determineNextStepByRules(step, 0)).toBeNull();
    expect(determineNextStepByRules(step, 1)).toBeNull();
  });
 });