feat: GitHub Issue番号でタスク実行・追加を可能にし、エンジンをリファクタリング (#10)

- takt "#6" や /add-task "#6" "#7" でIssue内容をタスクとして実行可能に - gh CLI経由でIssue取得、タイトル・本文・ラベル・コメントをタスクテキストに変換 - engine.tsからフェーズ実行(phase-runner)、ルール評価(rule-evaluator)、ルールユーティリティ(rule-utils)を分離
2026-01-30 18:49:55 +09:00 · 2026-01-30 18:49:55 +09:00 · 84ef8fbaf7
commit 84ef8fbaf7
parent 006c69056a
10 changed files with 788 additions and 304 deletions
--- a/src/tests/github-issue.test.ts
+++ b/src/tests/github-issue.test.ts
@ -0,0 +1,165 @@
+/**
+ * Tests for github/issue module
+ *
+ * Tests parsing and formatting functions.
+ * checkGhCli/fetchIssue/resolveIssueTask are integration functions
+ * that call `gh` CLI, so they are not unit-tested here.
+ */
+
+import { describe, it, expect } from 'vitest';
+import {
+  parseIssueNumbers,
+  isIssueReference,
+  formatIssueAsTask,
+  type GitHubIssue,
+} from '../github/issue.js';
+
+describe('parseIssueNumbers', () => {
+  it('should parse single issue reference', () => {
+    expect(parseIssueNumbers(['#6'])).toEqual([6]);
+  });
+
+  it('should parse multiple issue references', () => {
+    expect(parseIssueNumbers(['#6', '#7'])).toEqual([6, 7]);
+  });
+
+  it('should parse large issue numbers', () => {
+    expect(parseIssueNumbers(['#123'])).toEqual([123]);
+  });
+
+  it('should return empty for non-issue args', () => {
+    expect(parseIssueNumbers(['Fix bug'])).toEqual([]);
+  });
+
+  it('should return empty when mixed issue and non-issue args', () => {
+    expect(parseIssueNumbers(['#6', 'and', '#7'])).toEqual([]);
+  });
+
+  it('should return empty for empty args', () => {
+    expect(parseIssueNumbers([])).toEqual([]);
+  });
+
+  it('should not match partial issue patterns', () => {
+    expect(parseIssueNumbers(['#abc'])).toEqual([]);
+    expect(parseIssueNumbers(['#'])).toEqual([]);
+    expect(parseIssueNumbers(['##6'])).toEqual([]);
+    expect(parseIssueNumbers(['6'])).toEqual([]);
+    expect(parseIssueNumbers(['issue#6'])).toEqual([]);
+  });
+
+  it('should handle #0', () => {
+    expect(parseIssueNumbers(['#0'])).toEqual([0]);
+  });
+});
+
+describe('isIssueReference', () => {
+  it('should return true for #N patterns', () => {
+    expect(isIssueReference('#6')).toBe(true);
+    expect(isIssueReference('#123')).toBe(true);
+  });
+
+  it('should return true with whitespace trim', () => {
+    expect(isIssueReference(' #6 ')).toBe(true);
+  });
+
+  it('should return false for non-issue text', () => {
+    expect(isIssueReference('Fix bug')).toBe(false);
+    expect(isIssueReference('#abc')).toBe(false);
+    expect(isIssueReference('')).toBe(false);
+    expect(isIssueReference('#')).toBe(false);
+    expect(isIssueReference('6')).toBe(false);
+  });
+
+  it('should return false for multiple issues (single string)', () => {
+    expect(isIssueReference('#6 #7')).toBe(false);
+  });
+});
+
+describe('formatIssueAsTask', () => {
+  it('should format issue with all fields', () => {
+    const issue: GitHubIssue = {
+      number: 6,
+      title: 'Fix authentication bug',
+      body: 'The login flow is broken.',
+      labels: ['bug', 'priority:high'],
+      comments: [
+        { author: 'user1', body: 'I can reproduce this.' },
+        { author: 'user2', body: 'Fixed in PR #7.' },
+      ],
+    };
+
+    const result = formatIssueAsTask(issue);
+
+    expect(result).toContain('## GitHub Issue #6: Fix authentication bug');
+    expect(result).toContain('The login flow is broken.');
+    expect(result).toContain('### Labels');
+    expect(result).toContain('bug, priority:high');
+    expect(result).toContain('### Comments');
+    expect(result).toContain('**user1**: I can reproduce this.');
+    expect(result).toContain('**user2**: Fixed in PR #7.');
+  });
+
+  it('should format issue with no body', () => {
+    const issue: GitHubIssue = {
+      number: 10,
+      title: 'Empty issue',
+      body: '',
+      labels: [],
+      comments: [],
+    };
+
+    const result = formatIssueAsTask(issue);
+
+    expect(result).toBe('## GitHub Issue #10: Empty issue');
+    expect(result).not.toContain('### Labels');
+    expect(result).not.toContain('### Comments');
+  });
+
+  it('should format issue with labels but no comments', () => {
+    const issue: GitHubIssue = {
+      number: 5,
+      title: 'Feature request',
+      body: 'Add dark mode.',
+      labels: ['enhancement'],
+      comments: [],
+    };
+
+    const result = formatIssueAsTask(issue);
+
+    expect(result).toContain('### Labels');
+    expect(result).toContain('enhancement');
+    expect(result).not.toContain('### Comments');
+  });
+
+  it('should format issue with comments but no labels', () => {
+    const issue: GitHubIssue = {
+      number: 3,
+      title: 'Discussion',
+      body: 'Thoughts?',
+      labels: [],
+      comments: [
+        { author: 'dev', body: 'LGTM' },
+      ],
+    };
+
+    const result = formatIssueAsTask(issue);
+
+    expect(result).not.toContain('### Labels');
+    expect(result).toContain('### Comments');
+    expect(result).toContain('**dev**: LGTM');
+  });
+
+  it('should handle multiline body', () => {
+    const issue: GitHubIssue = {
+      number: 1,
+      title: 'Multi-line',
+      body: 'Line 1\nLine 2\n\nLine 4',
+      labels: [],
+      comments: [],
+    };
+
+    const result = formatIssueAsTask(issue);
+
+    expect(result).toContain('Line 1\nLine 2\n\nLine 4');
+  });
+});
--- a/src/cli.ts
+++ b/src/cli.ts
@ -42,6 +42,7 @@ import { autoCommitAndPush } from './task/autoCommit.js';
 import { summarizeTaskName } from './task/summarize.js';
 import { DEFAULT_WORKFLOW_NAME } from './constants.js';
 import { checkForUpdates } from './utils/updateNotifier.js';
+import { resolveIssueTask, isIssueReference } from './github/issue.js';

 const require = createRequire(import.meta.url);
 const { version: cliVersion } = require('../package.json') as { version: string };
@ -188,6 +189,18 @@ program

    // Task execution
    if (task) {
+      // Resolve #N issue references to task text
+      let resolvedTask: string = task;
+      if (isIssueReference(task) || task.trim().split(/\s+/).every((t: string) => isIssueReference(t))) {
+        try {
+          info('Fetching GitHub Issue...');
+          resolvedTask = resolveIssueTask(task);
+        } catch (e) {
+          error(e instanceof Error ? e.message : String(e));
+          process.exit(1);
+        }
+      }
+
      // Get available workflows and prompt user to select
      const availableWorkflows = listWorkflows();
      const currentWorkflow = getCurrentWorkflow(cwd);
@ -230,13 +243,13 @@ program
      }

      // Ask whether to create a worktree
-      const { execCwd, isWorktree } = await confirmAndCreateWorktree(cwd, task);
+      const { execCwd, isWorktree } = await confirmAndCreateWorktree(cwd, resolvedTask);

-      log.info('Starting task execution', { task, workflow: selectedWorkflow, worktree: isWorktree });
-      const taskSuccess = await executeTask(task, execCwd, selectedWorkflow, cwd);
+      log.info('Starting task execution', { task: resolvedTask, workflow: selectedWorkflow, worktree: isWorktree });
+      const taskSuccess = await executeTask(resolvedTask, execCwd, selectedWorkflow, cwd);

      if (taskSuccess && isWorktree) {
-        const commitResult = autoCommitAndPush(execCwd, task, cwd);
+        const commitResult = autoCommitAndPush(execCwd, resolvedTask, cwd);
        if (commitResult.success && commitResult.commitHash) {
          success(`Auto-committed & pushed: ${commitResult.commitHash}`);
        } else if (!commitResult.success) {
--- a/src/commands/addTask.ts
+++ b/src/commands/addTask.ts
@ -12,7 +12,9 @@ import { promptInput, promptMultilineInput, confirm, selectOption } from '../pro
 import { success, info } from '../utils/ui.js';
 import { summarizeTaskName } from '../task/summarize.js';
 import { createLogger } from '../utils/debug.js';
+import { error as errorLog } from '../utils/ui.js';
 import { listWorkflows } from '../config/workflowLoader.js';
+import { parseIssueNumbers, resolveIssueTask } from '../github/issue.js';
 import { getCurrentWorkflow } from '../config/paths.js';
 import type { TaskFileData } from '../task/schema.js';

@ -53,8 +55,19 @@ export async function addTask(cwd: string, args: string[]): Promise<void> {
  let workflow: string | undefined;

  if (args.length > 0) {
-    // Argument mode: task content provided directly
-    taskContent = args.join(' ');
+    // Check if args are issue references (#N)
+    const issueNumbers = parseIssueNumbers(args);
+    if (issueNumbers.length > 0) {
+      try {
+        info('Fetching GitHub Issue...');
+        taskContent = resolveIssueTask(args.join(' '));
+      } catch (e) {
+        errorLog(e instanceof Error ? e.message : String(e));
+        return;
+      }
+    } else {
+      taskContent = args.join(' ');
+    }
  } else {
    // Interactive mode (multiline: empty line to finish)
    const input = await promptMultilineInput('Task content');
--- a/src/commands/help.ts
+++ b/src/commands/help.ts
@ -14,6 +14,7 @@ export function showHelp(): void {
  console.log(`
 Usage:
  takt {task}             Execute task with current workflow (continues session)
+  takt "#N"               Execute GitHub Issue #N as task (quote # in shell)
  takt /run-tasks (/run)           Run all pending tasks from .takt/tasks/
  takt /watch                      Watch for tasks and auto-execute (stays resident)
  takt /add-task (/add)            Add a new task (interactive, YAML format)
@ -25,6 +26,10 @@ Usage:

 Examples:
  takt "Fix the bug in main.ts"         # Execute task (continues session)
+  takt "#6"                             # Execute Issue #6 as task
+  takt "#6 #7"                          # Execute multiple Issues as task
+  takt /add-task "#6"                   # Create task from Issue #6
+  takt /add-task "#6" "#7"              # Create task from multiple Issues
  takt /add-task "認証機能を追加する"   # Quick add task
  takt /add-task                        # Interactive task creation
  takt /clear                           # Clear sessions, start fresh
--- a/src/github/issue.ts
+++ b/src/github/issue.ts
@ -0,0 +1,183 @@
+/**
+ * GitHub Issue utilities
+ *
+ * Fetches issue content via `gh` CLI and formats it as task text
+ * for workflow execution or task creation.
+ */
+
+import { execSync } from 'node:child_process';
+import { createLogger } from '../utils/debug.js';
+
+const log = createLogger('github');
+
+/** Regex to match `#N` patterns (issue numbers) */
+const ISSUE_NUMBER_REGEX = /^#(\d+)$/;
+
+export interface GitHubIssue {
+  number: number;
+  title: string;
+  body: string;
+  labels: string[];
+  comments: Array<{ author: string; body: string }>;
+}
+
+export interface GhCliStatus {
+  available: boolean;
+  error?: string;
+}
+
+/**
+ * Check if `gh` CLI is available and authenticated.
+ */
+export function checkGhCli(): GhCliStatus {
+  try {
+    execSync('gh auth status', { stdio: 'pipe' });
+    return { available: true };
+  } catch {
+    try {
+      execSync('gh --version', { stdio: 'pipe' });
+      return {
+        available: false,
+        error: 'gh CLI is installed but not authenticated. Run `gh auth login` first.',
+      };
+    } catch {
+      return {
+        available: false,
+        error: 'gh CLI is not installed. Install it from https://cli.github.com/',
+      };
+    }
+  }
+}
+
+/**
+ * Fetch issue content via `gh issue view`.
+ * Throws on failure (issue not found, network error, etc.).
+ */
+export function fetchIssue(issueNumber: number): GitHubIssue {
+  log.debug('Fetching issue', { issueNumber });
+
+  const raw = execSync(
+    `gh issue view ${issueNumber} --json number,title,body,labels,comments`,
+    { encoding: 'utf-8', stdio: ['pipe', 'pipe', 'pipe'] },
+  );
+
+  const data = JSON.parse(raw) as {
+    number: number;
+    title: string;
+    body: string;
+    labels: Array<{ name: string }>;
+    comments: Array<{ author: { login: string }; body: string }>;
+  };
+
+  return {
+    number: data.number,
+    title: data.title,
+    body: data.body ?? '',
+    labels: data.labels.map((l) => l.name),
+    comments: data.comments.map((c) => ({
+      author: c.author.login,
+      body: c.body,
+    })),
+  };
+}
+
+/**
+ * Format a GitHub issue into task text for workflow execution.
+ *
+ * Output format:
+ * ```
+ * ## GitHub Issue #6: Fix authentication bug
+ *
+ * {body}
+ *
+ * ### Labels
+ * bug, priority:high
+ *
+ * ### Comments
+ * **user1**: Comment body...
+ * ```
+ */
+export function formatIssueAsTask(issue: GitHubIssue): string {
+  const parts: string[] = [];
+
+  parts.push(`## GitHub Issue #${issue.number}: ${issue.title}`);
+
+  if (issue.body) {
+    parts.push('');
+    parts.push(issue.body);
+  }
+
+  if (issue.labels.length > 0) {
+    parts.push('');
+    parts.push('### Labels');
+    parts.push(issue.labels.join(', '));
+  }
+
+  if (issue.comments.length > 0) {
+    parts.push('');
+    parts.push('### Comments');
+    for (const comment of issue.comments) {
+      parts.push(`**${comment.author}**: ${comment.body}`);
+    }
+  }
+
+  return parts.join('\n');
+}
+
+/**
+ * Parse `#N` patterns from argument strings.
+ * Returns issue numbers found, or empty array if none.
+ *
+ * Each argument must be exactly `#N` (no mixed text).
+ * Examples:
+ *   ['#6'] → [6]
+ *   ['#6', '#7'] → [6, 7]
+ *   ['Fix bug'] → []
+ *   ['#6', 'and', '#7'] → [] (mixed, not all are issue refs)
+ */
+export function parseIssueNumbers(args: string[]): number[] {
+  if (args.length === 0) return [];
+
+  const numbers: number[] = [];
+  for (const arg of args) {
+    const match = arg.match(ISSUE_NUMBER_REGEX);
+    if (!match?.[1]) return []; // Not all args are issue refs
+    numbers.push(Number.parseInt(match[1], 10));
+  }
+
+  return numbers;
+}
+
+/**
+ * Check if a single task string is an issue reference (`#N`).
+ */
+export function isIssueReference(task: string): boolean {
+  return ISSUE_NUMBER_REGEX.test(task.trim());
+}
+
+/**
+ * Resolve issue references in a task string.
+ * If task contains `#N` patterns (space-separated), fetches issues and returns formatted text.
+ * Otherwise returns the task string as-is.
+ *
+ * Checks gh CLI availability before fetching.
+ * Throws if gh CLI is not available or issue fetch fails.
+ */
+export function resolveIssueTask(task: string): string {
+  const tokens = task.trim().split(/\s+/);
+  const issueNumbers = parseIssueNumbers(tokens);
+
+  if (issueNumbers.length === 0) {
+    return task;
+  }
+
+  const ghStatus = checkGhCli();
+  if (!ghStatus.available) {
+    throw new Error(ghStatus.error ?? 'gh CLI is not available');
+  }
+
+  log.info('Resolving issue references', { issueNumbers });
+
+  const issues = issueNumbers.map((n) => fetchIssue(n));
+  return issues.map(formatIssueAsTask).join('\n\n---\n\n');
+}
--- a/src/workflow/engine.ts
+++ b/src/workflow/engine.ts
@ -10,17 +10,17 @@ import type {
  WorkflowState,
  WorkflowStep,
  AgentResponse,
-  RuleMatchMethod,
 } from '../models/types.js';
 import { runAgent, type RunAgentOptions } from '../agents/runner.js';
 import { COMPLETE_STEP, ABORT_STEP, ERROR_MESSAGES } from './constants.js';
 import type { WorkflowEngineOptions } from './types.js';
 import { determineNextStepByRules } from './transitions.js';
-import { detectRuleIndex, callAiJudge } from '../claude/client.js';
-import { buildInstruction as buildInstructionFromTemplate, buildReportInstruction as buildReportInstructionFromTemplate, buildStatusJudgmentInstruction as buildStatusJudgmentInstructionFromTemplate, isReportObjectConfig } from './instruction-builder.js';
+import { buildInstruction as buildInstructionFromTemplate, isReportObjectConfig } from './instruction-builder.js';
 import { LoopDetector } from './loop-detector.js';
 import { handleBlocked } from './blocked-handler.js';
 import { ParallelLogger } from './parallel-logger.js';
+import { detectMatchedRule } from './rule-evaluator.js';
+import { needsStatusJudgmentPhase, runReportPhase, runStatusJudgmentPhase } from './phase-runner.js';
 import {
  createInitialState,
  addUserInput,
@ -206,18 +206,11 @@ export class WorkflowEngine extends EventEmitter {
    return this.runNormalStep(step);
  }

-  /** Build RunAgentOptions from a step's configuration */
-  private buildAgentOptions(step: WorkflowStep): RunAgentOptions {
-    // Phase 1: exclude Write from allowedTools when step has report config
-    const allowedTools = step.report
-      ? step.allowedTools?.filter((t) => t !== 'Write')
-      : step.allowedTools;
-
+  /** Build common RunAgentOptions shared by all phases */
+  private buildBaseOptions(step: WorkflowStep): RunAgentOptions {
    return {
      cwd: this.cwd,
-      sessionId: this.state.agentSessions.get(step.agent),
      agentPath: step.agentPath,
-      allowedTools,
      provider: step.provider,
      model: step.model,
      permissionMode: step.permissionMode,
@ -228,24 +221,29 @@ export class WorkflowEngine extends EventEmitter {
    };
  }

+  /** Build RunAgentOptions from a step's configuration (Phase 1) */
+  private buildAgentOptions(step: WorkflowStep): RunAgentOptions {
+    // Phase 1: exclude Write from allowedTools when step has report config
+    const allowedTools = step.report
+      ? step.allowedTools?.filter((t) => t !== 'Write')
+      : step.allowedTools;
+
+    return {
+      ...this.buildBaseOptions(step),
+      sessionId: this.state.agentSessions.get(step.agent),
+      allowedTools,
+    };
+  }
+
  /**
   * Build RunAgentOptions for session-resume phases (Phase 2, Phase 3).
-   * Shares common fields with the original step's agent config.
   */
  private buildResumeOptions(step: WorkflowStep, sessionId: string, overrides: Pick<RunAgentOptions, 'allowedTools' | 'maxTurns'>): RunAgentOptions {
    return {
-      cwd: this.cwd,
+      ...this.buildBaseOptions(step),
      sessionId,
-      agentPath: step.agentPath,
      allowedTools: overrides.allowedTools,
      maxTurns: overrides.maxTurns,
-      provider: step.provider,
-      model: step.model,
-      permissionMode: step.permissionMode,
-      onStream: this.options.onStream,
-      onPermissionRequest: this.options.onPermissionRequest,
-      onAskUserQuestion: this.options.onAskUserQuestion,
-      bypassPermissions: this.options.bypassPermissions,
    };
  }

@ -261,114 +259,16 @@ export class WorkflowEngine extends EventEmitter {
    }
  }

-  /**
-   * Detect matched rule for a step's response.
-   * Evaluation order (first match wins):
-   * 1. Aggregate conditions: all()/any() — evaluate sub-step results
-   * 2. Tag detection from Phase 3 output
-   * 3. Tag detection from Phase 1 output (fallback)
-   * 4. ai() condition evaluation via AI judge
-   * 5. All-conditions AI judge (final fallback)
-   *
-   * Returns undefined for steps without rules.
-   * Throws if rules exist but no rule matched (Fail Fast).
-   *
-   * @param step - The workflow step
-   * @param agentContent - Phase 1 output (main execution)
-   * @param tagContent - Phase 3 output (status judgment); empty string skips tag detection
-   */
-  private async detectMatchedRule(step: WorkflowStep, agentContent: string, tagContent: string): Promise<{ index: number; method: RuleMatchMethod } | undefined> {
-    if (!step.rules || step.rules.length === 0) return undefined;
-
-    // 1. Aggregate conditions (all/any) — only meaningful for parallel parent steps
-    const aggIndex = this.evaluateAggregateConditions(step);
-    if (aggIndex >= 0) {
-      return { index: aggIndex, method: 'aggregate' };
-    }
-
-    // 2. Tag detection from Phase 3 output
-    if (tagContent) {
-      const ruleIndex = detectRuleIndex(tagContent, step.name);
-      if (ruleIndex >= 0 && ruleIndex < step.rules.length) {
-        return { index: ruleIndex, method: 'phase3_tag' };
-      }
-    }
-
-    // 3. Tag detection from Phase 1 output (fallback)
-    if (agentContent) {
-      const ruleIndex = detectRuleIndex(agentContent, step.name);
-      if (ruleIndex >= 0 && ruleIndex < step.rules.length) {
-        return { index: ruleIndex, method: 'phase1_tag' };
-      }
-    }
-
-    // 4. AI judge for ai() conditions only
-    const aiRuleIndex = await this.evaluateAiConditions(step, agentContent);
-    if (aiRuleIndex >= 0) {
-      return { index: aiRuleIndex, method: 'ai_judge' };
-    }
-
-    // 5. AI judge for all conditions (final fallback)
-    const fallbackIndex = await this.evaluateAllConditionsViaAiJudge(step, agentContent);
-    if (fallbackIndex >= 0) {
-      return { index: fallbackIndex, method: 'ai_judge_fallback' };
-    }
-
-    throw new Error(`Status not found for step "${step.name}": no rule matched after all detection phases`);
-  }
-
-  /**
-   * Evaluate aggregate conditions (all()/any()) against sub-step results.
-   * Returns the 0-based rule index in the step's rules array, or -1 if no match.
-   *
-   * For each aggregate rule, checks the matched condition text of sub-steps:
-   * - all("X"): true when ALL sub-steps have matched condition === X
-   * - any("X"): true when at least ONE sub-step has matched condition === X
-   *
-   * Edge cases per spec:
-   * - Sub-step with no matched rule: all() → false, any() → skip that sub-step
-   * - No sub-steps (0 件): both → false
-   * - Non-parallel step: both → false
-   */
-  private evaluateAggregateConditions(step: WorkflowStep): number {
-    if (!step.rules || !step.parallel || step.parallel.length === 0) return -1;
-
-    for (let i = 0; i < step.rules.length; i++) {
-      const rule = step.rules[i]!;
-      if (!rule.isAggregateCondition || !rule.aggregateType || !rule.aggregateConditionText) {
-        continue;
-      }
-
-      const subSteps = step.parallel;
-      const targetCondition = rule.aggregateConditionText;
-
-      if (rule.aggregateType === 'all') {
-        const allMatch = subSteps.every((sub) => {
-          const output = this.state.stepOutputs.get(sub.name);
-          if (!output || output.matchedRuleIndex == null || !sub.rules) return false;
-          const matchedRule = sub.rules[output.matchedRuleIndex];
-          return matchedRule?.condition === targetCondition;
-        });
-        if (allMatch) {
-          log.debug('Aggregate all() matched', { step: step.name, condition: targetCondition, ruleIndex: i });
-          return i;
-        }
-      } else {
-        // 'any'
-        const anyMatch = subSteps.some((sub) => {
-          const output = this.state.stepOutputs.get(sub.name);
-          if (!output || output.matchedRuleIndex == null || !sub.rules) return false;
-          const matchedRule = sub.rules[output.matchedRuleIndex];
-          return matchedRule?.condition === targetCondition;
-        });
-        if (anyMatch) {
-          log.debug('Aggregate any() matched', { step: step.name, condition: targetCondition, ruleIndex: i });
-          return i;
-        }
-      }
-    }
-
-    return -1;
+  /** Build phase runner context for Phase 2/3 execution */
+  private buildPhaseRunnerContext() {
+    return {
+      cwd: this.cwd,
+      reportDir: this.reportDir,
+      language: this.language,
+      getSessionId: (agent: string) => this.state.agentSessions.get(agent),
+      buildResumeOptions: this.buildResumeOptions.bind(this),
+      updateAgentSession: this.updateAgentSession.bind(this),
+    };
  }

  /** Run a normal (non-parallel) step */
@ -388,18 +288,23 @@ export class WorkflowEngine extends EventEmitter {
    let response = await runAgent(step.agent, instruction, agentOptions);
    this.updateAgentSession(step.agent, response.sessionId);

+    const phaseCtx = this.buildPhaseRunnerContext();
+
    // Phase 2: report output (resume same session, Write only)
    if (step.report) {
-      await this.runReportPhase(step, stepIteration);
+      await runReportPhase(step, stepIteration, phaseCtx);
    }

    // Phase 3: status judgment (resume session, no tools, output status tag)
    let tagContent = '';
-    if (this.needsStatusJudgmentPhase(step)) {
-      tagContent = await this.runStatusJudgmentPhase(step);
+    if (needsStatusJudgmentPhase(step)) {
+      tagContent = await runStatusJudgmentPhase(step, phaseCtx);
    }

-    const match = await this.detectMatchedRule(step, response.content, tagContent);
+    const match = await detectMatchedRule(step, response.content, tagContent, {
+      state: this.state,
+      cwd: this.cwd,
+    });
    if (match) {
      log.debug('Rule matched', { step: step.name, ruleIndex: match.index, method: match.method });
      response = { ...response, matchedRuleIndex: match.index, matchedRuleMethod: match.method };
@ -410,81 +315,6 @@ export class WorkflowEngine extends EventEmitter {
    return { response, instruction };
  }

-  /**
-   * Phase 2: Report output.
-   * Resumes the agent session with Write-only tools to output reports.
-   * The response is discarded — only sessionId is updated.
-   */
-  private async runReportPhase(step: WorkflowStep, stepIteration: number): Promise<void> {
-    const sessionId = this.state.agentSessions.get(step.agent);
-    if (!sessionId) {
-      throw new Error(`Report phase requires a session to resume, but no sessionId found for agent "${step.agent}" in step "${step.name}"`);
-    }
-
-    log.debug('Running report phase', { step: step.name, sessionId });
-
-    const reportInstruction = buildReportInstructionFromTemplate(step, {
-      cwd: this.cwd,
-      reportDir: this.reportDir,
-      stepIteration,
-      language: this.language,
-    });
-
-    const reportOptions = this.buildResumeOptions(step, sessionId, {
-      allowedTools: ['Write'],
-      maxTurns: 3,
-    });
-
-    const reportResponse = await runAgent(step.agent, reportInstruction, reportOptions);
-
-    // Update session (phase 2 may update it)
-    this.updateAgentSession(step.agent, reportResponse.sessionId);
-
-    log.debug('Report phase complete', { step: step.name, status: reportResponse.status });
-  }
-
-  /**
-   * Check if a step needs Phase 3 (status judgment).
-   * Returns true when at least one rule requires tag-based detection
-   * (i.e., not all rules are ai() or aggregate conditions).
-   */
-  private needsStatusJudgmentPhase(step: WorkflowStep): boolean {
-    if (!step.rules || step.rules.length === 0) return false;
-    const allNonTagConditions = step.rules.every((r) => r.isAiCondition || r.isAggregateCondition);
-    return !allNonTagConditions;
-  }
-
-  /**
-   * Phase 3: Status judgment.
-   * Resumes the agent session with no tools to ask the agent to output a status tag.
-   * Returns the Phase 3 response content (containing the status tag).
-   */
-  private async runStatusJudgmentPhase(step: WorkflowStep): Promise<string> {
-    const sessionId = this.state.agentSessions.get(step.agent);
-    if (!sessionId) {
-      throw new Error(`Status judgment phase requires a session to resume, but no sessionId found for agent "${step.agent}" in step "${step.name}"`);
-    }
-
-    log.debug('Running status judgment phase', { step: step.name, sessionId });
-
-    const judgmentInstruction = buildStatusJudgmentInstructionFromTemplate(step, {
-      language: this.language,
-    });
-
-    const judgmentOptions = this.buildResumeOptions(step, sessionId, {
-      allowedTools: [],
-      maxTurns: 3,
-    });
-
-    const judgmentResponse = await runAgent(step.agent, judgmentInstruction, judgmentOptions);
-
-    // Update session (phase 3 may update it)
-    this.updateAgentSession(step.agent, judgmentResponse.sessionId);
-
-    log.debug('Status judgment phase complete', { step: step.name, status: judgmentResponse.status });
-    return judgmentResponse.content;
-  }
-
  /**
   * Run a parallel step: execute all sub-steps concurrently, then aggregate results.
   * The aggregated output becomes the parent step's response for rules evaluation.
@ -509,6 +339,9 @@ export class WorkflowEngine extends EventEmitter {
        })
      : undefined;

+    const phaseCtx = this.buildPhaseRunnerContext();
+    const ruleCtx = { state: this.state, cwd: this.cwd };
+
    // Run all sub-steps concurrently
    const subResults = await Promise.all(
      subSteps.map(async (subStep, index) => {
@ -516,28 +349,28 @@ export class WorkflowEngine extends EventEmitter {
        const subInstruction = this.buildInstruction(subStep, subIteration);

        // Phase 1: main execution (Write excluded if sub-step has report)
-        const agentOptions = this.buildAgentOptions(subStep);
+        const baseOptions = this.buildAgentOptions(subStep);

-        // Override onStream with parallel logger's prefixed handler
-        if (parallelLogger) {
-          agentOptions.onStream = parallelLogger.createStreamHandler(subStep.name, index);
-        }
+        // Override onStream with parallel logger's prefixed handler (immutable)
+        const agentOptions = parallelLogger
+          ? { ...baseOptions, onStream: parallelLogger.createStreamHandler(subStep.name, index) }
+          : baseOptions;

        const subResponse = await runAgent(subStep.agent, subInstruction, agentOptions);
        this.updateAgentSession(subStep.agent, subResponse.sessionId);

        // Phase 2: report output for sub-step
        if (subStep.report) {
-          await this.runReportPhase(subStep, subIteration);
+          await runReportPhase(subStep, subIteration, phaseCtx);
        }

        // Phase 3: status judgment for sub-step
        let subTagContent = '';
-        if (this.needsStatusJudgmentPhase(subStep)) {
-          subTagContent = await this.runStatusJudgmentPhase(subStep);
+        if (needsStatusJudgmentPhase(subStep)) {
+          subTagContent = await runStatusJudgmentPhase(subStep, phaseCtx);
        }

-        const match = await this.detectMatchedRule(subStep, subResponse.content, subTagContent);
+        const match = await detectMatchedRule(subStep, subResponse.content, subTagContent, ruleCtx);
        const finalResponse = match
          ? { ...subResponse, matchedRuleIndex: match.index, matchedRuleMethod: match.method }
          : subResponse;
@ -572,7 +405,7 @@ export class WorkflowEngine extends EventEmitter {
      .join('\n\n');

    // Parent step uses aggregate conditions, so tagContent is empty
-    const match = await this.detectMatchedRule(step, aggregatedContent, '');
+    const match = await detectMatchedRule(step, aggregatedContent, '', ruleCtx);

    const aggregatedResponse: AgentResponse = {
      agent: step.name,
@ -587,79 +420,6 @@ export class WorkflowEngine extends EventEmitter {
    return { response: aggregatedResponse, instruction: aggregatedInstruction };
  }

-  /**
-   * Evaluate ai() conditions via AI judge.
-   * Collects all ai() rules, calls the judge, and maps the result back to the original rule index.
-   * Returns the 0-based rule index in the step's rules array, or -1 if no match.
-   */
-  private async evaluateAiConditions(step: WorkflowStep, agentOutput: string): Promise<number> {
-    if (!step.rules) return -1;
-
-    const aiConditions: { index: number; text: string }[] = [];
-    for (let i = 0; i < step.rules.length; i++) {
-      const rule = step.rules[i]!;
-      if (rule.isAiCondition && rule.aiConditionText) {
-        aiConditions.push({ index: i, text: rule.aiConditionText });
-      }
-    }
-
-    if (aiConditions.length === 0) return -1;
-
-    log.debug('Evaluating ai() conditions via judge', {
-      step: step.name,
-      conditionCount: aiConditions.length,
-    });
-
-    // Remap: judge returns 0-based index within aiConditions array
-    const judgeConditions = aiConditions.map((c, i) => ({ index: i, text: c.text }));
-    const judgeResult = await callAiJudge(agentOutput, judgeConditions, { cwd: this.cwd });
-
-    if (judgeResult >= 0 && judgeResult < aiConditions.length) {
-      const matched = aiConditions[judgeResult]!;
-      log.debug('AI judge matched condition', {
-        step: step.name,
-        judgeResult,
-        originalRuleIndex: matched.index,
-        condition: matched.text,
-      });
-      return matched.index;
-    }
-
-    log.debug('AI judge did not match any condition', { step: step.name });
-    return -1;
-  }
-
-  /**
-   * Final fallback: evaluate ALL rule conditions via AI judge.
-   * Unlike evaluateAiConditions (which only handles ai() flagged rules),
-   * this sends every rule's condition text to the judge.
-   * Returns the 0-based rule index, or -1 if no match.
-   */
-  private async evaluateAllConditionsViaAiJudge(step: WorkflowStep, agentOutput: string): Promise<number> {
-    if (!step.rules || step.rules.length === 0) return -1;
-
-    const conditions = step.rules.map((rule, i) => ({ index: i, text: rule.condition }));
-
-    log.debug('Evaluating all conditions via AI judge (final fallback)', {
-      step: step.name,
-      conditionCount: conditions.length,
-    });
-
-    const judgeResult = await callAiJudge(agentOutput, conditions, { cwd: this.cwd });
-
-    if (judgeResult >= 0 && judgeResult < conditions.length) {
-      log.debug('AI judge (fallback) matched condition', {
-        step: step.name,
-        ruleIndex: judgeResult,
-        condition: conditions[judgeResult]!.text,
-      });
-      return judgeResult;
-    }
-
-    log.debug('AI judge (fallback) did not match any condition', { step: step.name });
-    return -1;
-  }
-
  /**
   * Determine next step for a completed step using rules-based routing.
   */
--- a/src/workflow/instruction-builder.ts
+++ b/src/workflow/instruction-builder.ts
@ -12,6 +12,7 @@
 */

 import type { WorkflowStep, WorkflowRule, AgentResponse, Language, ReportConfig, ReportObjectConfig } from '../models/types.js';
+import { hasTagBasedRules } from './rule-utils.js';


 /**
@ -465,12 +466,9 @@ export function buildInstruction(

  // 7. Status Output Rules (for tag-based detection in Phase 1)
  // Skip if all rules are ai() or aggregate conditions (no tags needed)
-  if (step.rules && step.rules.length > 0) {
-    const allNonTagConditions = step.rules.every((r) => r.isAiCondition || r.isAggregateCondition);
-    if (!allNonTagConditions) {
-      const statusRulesPrompt = generateStatusRulesFromRules(step.name, step.rules, language);
-      sections.push(statusRulesPrompt);
-    }
+  if (hasTagBasedRules(step)) {
+    const statusRulesPrompt = generateStatusRulesFromRules(step.name, step.rules!, language);
+    sections.push(statusRulesPrompt);
  }

  return sections.join('\n\n');
--- a/src/workflow/phase-runner.ts
+++ b/src/workflow/phase-runner.ts
@ -0,0 +1,111 @@
+/**
+ * Phase execution logic extracted from engine.ts.
+ *
+ * Handles Phase 2 (report output) and Phase 3 (status judgment)
+ * as session-resume operations.
+ */
+
+import type { WorkflowStep, Language } from '../models/types.js';
+import { runAgent, type RunAgentOptions } from '../agents/runner.js';
+import {
+  buildReportInstruction as buildReportInstructionFromTemplate,
+  buildStatusJudgmentInstruction as buildStatusJudgmentInstructionFromTemplate,
+} from './instruction-builder.js';
+import { hasTagBasedRules } from './rule-utils.js';
+import { createLogger } from '../utils/debug.js';
+
+const log = createLogger('phase-runner');
+
+export interface PhaseRunnerContext {
+  /** Working directory (agent work dir, may be a clone) */
+  cwd: string;
+  /** Report directory path */
+  reportDir: string;
+  /** Language for instructions */
+  language?: Language;
+  /** Get agent session ID */
+  getSessionId: (agent: string) => string | undefined;
+  /** Build resume options for a step */
+  buildResumeOptions: (step: WorkflowStep, sessionId: string, overrides: Pick<RunAgentOptions, 'allowedTools' | 'maxTurns'>) => RunAgentOptions;
+  /** Update agent session after a phase run */
+  updateAgentSession: (agent: string, sessionId: string | undefined) => void;
+}
+
+/**
+ * Check if a step needs Phase 3 (status judgment).
+ * Returns true when at least one rule requires tag-based detection.
+ */
+export function needsStatusJudgmentPhase(step: WorkflowStep): boolean {
+  return hasTagBasedRules(step);
+}
+
+/**
+ * Phase 2: Report output.
+ * Resumes the agent session with Write-only tools to output reports.
+ * The response is discarded — only sessionId is updated.
+ */
+export async function runReportPhase(
+  step: WorkflowStep,
+  stepIteration: number,
+  ctx: PhaseRunnerContext,
+): Promise<void> {
+  const sessionId = ctx.getSessionId(step.agent);
+  if (!sessionId) {
+    throw new Error(`Report phase requires a session to resume, but no sessionId found for agent "${step.agent}" in step "${step.name}"`);
+  }
+
+  log.debug('Running report phase', { step: step.name, sessionId });
+
+  const reportInstruction = buildReportInstructionFromTemplate(step, {
+    cwd: ctx.cwd,
+    reportDir: ctx.reportDir,
+    stepIteration,
+    language: ctx.language,
+  });
+
+  const reportOptions = ctx.buildResumeOptions(step, sessionId, {
+    allowedTools: ['Write'],
+    maxTurns: 3,
+  });
+
+  const reportResponse = await runAgent(step.agent, reportInstruction, reportOptions);
+
+  // Update session (phase 2 may update it)
+  ctx.updateAgentSession(step.agent, reportResponse.sessionId);
+
+  log.debug('Report phase complete', { step: step.name, status: reportResponse.status });
+}
+
+/**
+ * Phase 3: Status judgment.
+ * Resumes the agent session with no tools to ask the agent to output a status tag.
+ * Returns the Phase 3 response content (containing the status tag).
+ */
+export async function runStatusJudgmentPhase(
+  step: WorkflowStep,
+  ctx: PhaseRunnerContext,
+): Promise<string> {
+  const sessionId = ctx.getSessionId(step.agent);
+  if (!sessionId) {
+    throw new Error(`Status judgment phase requires a session to resume, but no sessionId found for agent "${step.agent}" in step "${step.name}"`);
+  }
+
+  log.debug('Running status judgment phase', { step: step.name, sessionId });
+
+  const judgmentInstruction = buildStatusJudgmentInstructionFromTemplate(step, {
+    language: ctx.language,
+  });
+
+  const judgmentOptions = ctx.buildResumeOptions(step, sessionId, {
+    allowedTools: [],
+    maxTurns: 3,
+  });
+
+  const judgmentResponse = await runAgent(step.agent, judgmentInstruction, judgmentOptions);
+
+  // Update session (phase 3 may update it)
+  ctx.updateAgentSession(step.agent, judgmentResponse.sessionId);
+
+  log.debug('Status judgment phase complete', { step: step.name, status: judgmentResponse.status });
+  return judgmentResponse.content;
+}
--- a/src/workflow/rule-evaluator.ts
+++ b/src/workflow/rule-evaluator.ts
@ -0,0 +1,218 @@
+/**
+ * Rule evaluation logic extracted from engine.ts.
+ *
+ * Evaluates workflow step rules to determine the matched rule index.
+ * Supports tag-based detection, ai() conditions, aggregate conditions,
+ * and AI judge fallback.
+ */
+
+import type {
+  WorkflowStep,
+  WorkflowState,
+  RuleMatchMethod,
+} from '../models/types.js';
+import { detectRuleIndex, callAiJudge } from '../claude/client.js';
+import { createLogger } from '../utils/debug.js';
+
+const log = createLogger('rule-evaluator');
+
+export interface RuleMatch {
+  index: number;
+  method: RuleMatchMethod;
+}
+
+export interface RuleEvaluatorContext {
+  /** Workflow state (for accessing stepOutputs in aggregate evaluation) */
+  state: WorkflowState;
+  /** Working directory (for AI judge calls) */
+  cwd: string;
+}
+
+/**
+ * Detect matched rule for a step's response.
+ * Evaluation order (first match wins):
+ * 1. Aggregate conditions: all()/any() — evaluate sub-step results
+ * 2. Tag detection from Phase 3 output
+ * 3. Tag detection from Phase 1 output (fallback)
+ * 4. ai() condition evaluation via AI judge
+ * 5. All-conditions AI judge (final fallback)
+ *
+ * Returns undefined for steps without rules.
+ * Throws if rules exist but no rule matched (Fail Fast).
+ *
+ * @param step - The workflow step
+ * @param agentContent - Phase 1 output (main execution)
+ * @param tagContent - Phase 3 output (status judgment); empty string skips tag detection
+ * @param ctx - Evaluation context (state, cwd)
+ */
+export async function detectMatchedRule(
+  step: WorkflowStep,
+  agentContent: string,
+  tagContent: string,
+  ctx: RuleEvaluatorContext,
+): Promise<RuleMatch | undefined> {
+  if (!step.rules || step.rules.length === 0) return undefined;
+
+  // 1. Aggregate conditions (all/any) — only meaningful for parallel parent steps
+  const aggIndex = evaluateAggregateConditions(step, ctx.state);
+  if (aggIndex >= 0) {
+    return { index: aggIndex, method: 'aggregate' };
+  }
+
+  // 2. Tag detection from Phase 3 output
+  if (tagContent) {
+    const ruleIndex = detectRuleIndex(tagContent, step.name);
+    if (ruleIndex >= 0 && ruleIndex < step.rules.length) {
+      return { index: ruleIndex, method: 'phase3_tag' };
+    }
+  }
+
+  // 3. Tag detection from Phase 1 output (fallback)
+  if (agentContent) {
+    const ruleIndex = detectRuleIndex(agentContent, step.name);
+    if (ruleIndex >= 0 && ruleIndex < step.rules.length) {
+      return { index: ruleIndex, method: 'phase1_tag' };
+    }
+  }
+
+  // 4. AI judge for ai() conditions only
+  const aiRuleIndex = await evaluateAiConditions(step, agentContent, ctx.cwd);
+  if (aiRuleIndex >= 0) {
+    return { index: aiRuleIndex, method: 'ai_judge' };
+  }
+
+  // 5. AI judge for all conditions (final fallback)
+  const fallbackIndex = await evaluateAllConditionsViaAiJudge(step, agentContent, ctx.cwd);
+  if (fallbackIndex >= 0) {
+    return { index: fallbackIndex, method: 'ai_judge_fallback' };
+  }
+
+  throw new Error(`Status not found for step "${step.name}": no rule matched after all detection phases`);
+}
+
+/**
+ * Evaluate aggregate conditions (all()/any()) against sub-step results.
+ * Returns the 0-based rule index in the step's rules array, or -1 if no match.
+ *
+ * For each aggregate rule, checks the matched condition text of sub-steps:
+ * - all("X"): true when ALL sub-steps have matched condition === X
+ * - any("X"): true when at least ONE sub-step has matched condition === X
+ *
+ * Edge cases per spec:
+ * - Sub-step with no matched rule: all() → false, any() → skip that sub-step
+ * - No sub-steps (0 件): both → false
+ * - Non-parallel step: both → false
+ */
+export function evaluateAggregateConditions(step: WorkflowStep, state: WorkflowState): number {
+  if (!step.rules || !step.parallel || step.parallel.length === 0) return -1;
+
+  for (let i = 0; i < step.rules.length; i++) {
+    const rule = step.rules[i]!;
+    if (!rule.isAggregateCondition || !rule.aggregateType || !rule.aggregateConditionText) {
+      continue;
+    }
+
+    const subSteps = step.parallel;
+    const targetCondition = rule.aggregateConditionText;
+
+    if (rule.aggregateType === 'all') {
+      const allMatch = subSteps.every((sub) => {
+        const output = state.stepOutputs.get(sub.name);
+        if (!output || output.matchedRuleIndex == null || !sub.rules) return false;
+        const matchedRule = sub.rules[output.matchedRuleIndex];
+        return matchedRule?.condition === targetCondition;
+      });
+      if (allMatch) {
+        log.debug('Aggregate all() matched', { step: step.name, condition: targetCondition, ruleIndex: i });
+        return i;
+      }
+    } else {
+      // 'any'
+      const anyMatch = subSteps.some((sub) => {
+        const output = state.stepOutputs.get(sub.name);
+        if (!output || output.matchedRuleIndex == null || !sub.rules) return false;
+        const matchedRule = sub.rules[output.matchedRuleIndex];
+        return matchedRule?.condition === targetCondition;
+      });
+      if (anyMatch) {
+        log.debug('Aggregate any() matched', { step: step.name, condition: targetCondition, ruleIndex: i });
+        return i;
+      }
+    }
+  }
+
+  return -1;
+}
+
+/**
+ * Evaluate ai() conditions via AI judge.
+ * Collects all ai() rules, calls the judge, and maps the result back to the original rule index.
+ * Returns the 0-based rule index in the step's rules array, or -1 if no match.
+ */
+export async function evaluateAiConditions(step: WorkflowStep, agentOutput: string, cwd: string): Promise<number> {
+  if (!step.rules) return -1;
+
+  const aiConditions: { index: number; text: string }[] = [];
+  for (let i = 0; i < step.rules.length; i++) {
+    const rule = step.rules[i]!;
+    if (rule.isAiCondition && rule.aiConditionText) {
+      aiConditions.push({ index: i, text: rule.aiConditionText });
+    }
+  }
+
+  if (aiConditions.length === 0) return -1;
+
+  log.debug('Evaluating ai() conditions via judge', {
+    step: step.name,
+    conditionCount: aiConditions.length,
+  });
+
+  // Remap: judge returns 0-based index within aiConditions array
+  const judgeConditions = aiConditions.map((c, i) => ({ index: i, text: c.text }));
+  const judgeResult = await callAiJudge(agentOutput, judgeConditions, { cwd });
+
+  if (judgeResult >= 0 && judgeResult < aiConditions.length) {
+    const matched = aiConditions[judgeResult]!;
+    log.debug('AI judge matched condition', {
+      step: step.name,
+      judgeResult,
+      originalRuleIndex: matched.index,
+      condition: matched.text,
+    });
+    return matched.index;
+  }
+
+  log.debug('AI judge did not match any condition', { step: step.name });
+  return -1;
+}
+
+/**
+ * Final fallback: evaluate ALL rule conditions via AI judge.
+ * Unlike evaluateAiConditions (which only handles ai() flagged rules),
+ * this sends every rule's condition text to the judge.
+ * Returns the 0-based rule index, or -1 if no match.
+ */
+export async function evaluateAllConditionsViaAiJudge(step: WorkflowStep, agentOutput: string, cwd: string): Promise<number> {
+  if (!step.rules || step.rules.length === 0) return -1;
+
+  const conditions = step.rules.map((rule, i) => ({ index: i, text: rule.condition }));
+
+  log.debug('Evaluating all conditions via AI judge (final fallback)', {
+    step: step.name,
+    conditionCount: conditions.length,
+  });
+
+  const judgeResult = await callAiJudge(agentOutput, conditions, { cwd });
+
+  if (judgeResult >= 0 && judgeResult < conditions.length) {
+    log.debug('AI judge (fallback) matched condition', {
+      step: step.name,
+      ruleIndex: judgeResult,
+      condition: conditions[judgeResult]!.text,
+    });
+    return judgeResult;
+  }
+
+  log.debug('AI judge (fallback) did not match any condition', { step: step.name });
+  return -1;
+}
--- a/src/workflow/rule-utils.ts
+++ b/src/workflow/rule-utils.ts
@ -0,0 +1,18 @@
+/**
+ * Shared rule utility functions used by both engine.ts and instruction-builder.ts.
+ */
+
+import type { WorkflowStep } from '../models/types.js';
+
+/**
+ * Check whether a step has tag-based rules (i.e., rules that require
+ * [STEP:N] tag output for detection).
+ *
+ * Returns false when all rules are ai() or aggregate conditions,
+ * meaning no tag-based status output is needed.
+ */
+export function hasTagBasedRules(step: WorkflowStep): boolean {
+  if (!step.rules || step.rules.length === 0) return false;
+  const allNonTagConditions = step.rules.every((r) => r.isAiCondition || r.isAggregateCondition);
+  return !allNonTagConditions;
+}