feat: GitHub Issue番号でタスク実行・追加を可能にし、エンジンをリファクタリング (#10)

- takt "#6" や /add-task "#6" "#7" でIssue内容をタスクとして実行可能に - gh CLI経由でIssue取得、タイトル・本文・ラベル・コメントをタスクテキストに変換 - engine.tsからフェーズ実行(phase-runner)、ルール評価(rule-evaluator)、ルールユーティリティ(rule-utils)を分離
2026-01-30 18:49:55 +09:00 · 2026-01-30 18:49:55 +09:00 · 84ef8fbaf7
commit 84ef8fbaf7
parent 006c69056a
10 changed files with 788 additions and 304 deletions
--- a/src/tests/github-issue.test.ts
+++ b/src/tests/github-issue.test.ts
@ -0,0 +1,165 @@
 /**
 * Tests for github/issue module
 *
 * Tests parsing and formatting functions.
 * checkGhCli/fetchIssue/resolveIssueTask are integration functions
 * that call `gh` CLI, so they are not unit-tested here.
 */
 import { describe, it, expect } from 'vitest';
 import {
  parseIssueNumbers,
  isIssueReference,
  formatIssueAsTask,
  type GitHubIssue,
 } from '../github/issue.js';
 describe('parseIssueNumbers', () => {
  it('should parse single issue reference', () => {
    expect(parseIssueNumbers(['#6'])).toEqual([6]);
  });
  it('should parse multiple issue references', () => {
    expect(parseIssueNumbers(['#6', '#7'])).toEqual([6, 7]);
  });
  it('should parse large issue numbers', () => {
    expect(parseIssueNumbers(['#123'])).toEqual([123]);
  });
  it('should return empty for non-issue args', () => {
    expect(parseIssueNumbers(['Fix bug'])).toEqual([]);
  });
  it('should return empty when mixed issue and non-issue args', () => {
    expect(parseIssueNumbers(['#6', 'and', '#7'])).toEqual([]);
  });
  it('should return empty for empty args', () => {
    expect(parseIssueNumbers([])).toEqual([]);
  });
  it('should not match partial issue patterns', () => {
    expect(parseIssueNumbers(['#abc'])).toEqual([]);
    expect(parseIssueNumbers(['#'])).toEqual([]);
    expect(parseIssueNumbers(['##6'])).toEqual([]);
    expect(parseIssueNumbers(['6'])).toEqual([]);
    expect(parseIssueNumbers(['issue#6'])).toEqual([]);
  });
  it('should handle #0', () => {
    expect(parseIssueNumbers(['#0'])).toEqual([0]);
  });
 });
 describe('isIssueReference', () => {
  it('should return true for #N patterns', () => {
    expect(isIssueReference('#6')).toBe(true);
    expect(isIssueReference('#123')).toBe(true);
  });
  it('should return true with whitespace trim', () => {
    expect(isIssueReference(' #6 ')).toBe(true);
  });
  it('should return false for non-issue text', () => {
    expect(isIssueReference('Fix bug')).toBe(false);
    expect(isIssueReference('#abc')).toBe(false);
    expect(isIssueReference('')).toBe(false);
    expect(isIssueReference('#')).toBe(false);
    expect(isIssueReference('6')).toBe(false);
  });
  it('should return false for multiple issues (single string)', () => {
    expect(isIssueReference('#6 #7')).toBe(false);
  });
 });
 describe('formatIssueAsTask', () => {
  it('should format issue with all fields', () => {
    const issue: GitHubIssue = {
      number: 6,
      title: 'Fix authentication bug',
      body: 'The login flow is broken.',
      labels: ['bug', 'priority:high'],
      comments: [
        { author: 'user1', body: 'I can reproduce this.' },
        { author: 'user2', body: 'Fixed in PR #7.' },
      ],
    };
    const result = formatIssueAsTask(issue);
    expect(result).toContain('## GitHub Issue #6: Fix authentication bug');
    expect(result).toContain('The login flow is broken.');
    expect(result).toContain('### Labels');
    expect(result).toContain('bug, priority:high');
    expect(result).toContain('### Comments');
    expect(result).toContain('**user1**: I can reproduce this.');
    expect(result).toContain('**user2**: Fixed in PR #7.');
  });
  it('should format issue with no body', () => {
    const issue: GitHubIssue = {
      number: 10,
      title: 'Empty issue',
      body: '',
      labels: [],
      comments: [],
    };
    const result = formatIssueAsTask(issue);
    expect(result).toBe('## GitHub Issue #10: Empty issue');
    expect(result).not.toContain('### Labels');
    expect(result).not.toContain('### Comments');
  });
  it('should format issue with labels but no comments', () => {
    const issue: GitHubIssue = {
      number: 5,
      title: 'Feature request',
      body: 'Add dark mode.',
      labels: ['enhancement'],
      comments: [],
    };
    const result = formatIssueAsTask(issue);
    expect(result).toContain('### Labels');
    expect(result).toContain('enhancement');
    expect(result).not.toContain('### Comments');
  });
  it('should format issue with comments but no labels', () => {
    const issue: GitHubIssue = {
      number: 3,
      title: 'Discussion',
      body: 'Thoughts?',
      labels: [],
      comments: [
        { author: 'dev', body: 'LGTM' },
      ],
    };
    const result = formatIssueAsTask(issue);
    expect(result).not.toContain('### Labels');
    expect(result).toContain('### Comments');
    expect(result).toContain('**dev**: LGTM');
  });
  it('should handle multiline body', () => {
    const issue: GitHubIssue = {
      number: 1,
      title: 'Multi-line',
      body: 'Line 1\nLine 2\n\nLine 4',
      labels: [],
      comments: [],
    };
    const result = formatIssueAsTask(issue);
    expect(result).toContain('Line 1\nLine 2\n\nLine 4');
  });
 });
--- a/src/cli.ts
+++ b/src/cli.ts
@ -42,6 +42,7 @@ import { autoCommitAndPush } from './task/autoCommit.js';
 import { summarizeTaskName } from './task/summarize.js';
 import { DEFAULT_WORKFLOW_NAME } from './constants.js';
 import { checkForUpdates } from './utils/updateNotifier.js';
 import { resolveIssueTask, isIssueReference } from './github/issue.js';
 const require = createRequire(import.meta.url);
 const { version: cliVersion } = require('../package.json') as { version: string };
@ -188,6 +189,18 @@ program
    // Task execution
    if (task) {
      // Resolve #N issue references to task text
      let resolvedTask: string = task;
      if (isIssueReference(task) || task.trim().split(/\s+/).every((t: string) => isIssueReference(t))) {
        try {
          info('Fetching GitHub Issue...');
          resolvedTask = resolveIssueTask(task);
        } catch (e) {
          error(e instanceof Error ? e.message : String(e));
          process.exit(1);
        }
      }
      // Get available workflows and prompt user to select
      const availableWorkflows = listWorkflows();
      const currentWorkflow = getCurrentWorkflow(cwd);
@ -230,13 +243,13 @@ program
      }
      // Ask whether to create a worktree
-      const { execCwd, isWorktree } = await confirmAndCreateWorktree(cwd, task);
+      const { execCwd, isWorktree } = await confirmAndCreateWorktree(cwd, resolvedTask);
-      log.info('Starting task execution', { task, workflow: selectedWorkflow, worktree: isWorktree });
+      log.info('Starting task execution', { task: resolvedTask, workflow: selectedWorkflow, worktree: isWorktree });
-      const taskSuccess = await executeTask(task, execCwd, selectedWorkflow, cwd);
+      const taskSuccess = await executeTask(resolvedTask, execCwd, selectedWorkflow, cwd);
      if (taskSuccess && isWorktree) {
-        const commitResult = autoCommitAndPush(execCwd, task, cwd);
+        const commitResult = autoCommitAndPush(execCwd, resolvedTask, cwd);
        if (commitResult.success && commitResult.commitHash) {
          success(`Auto-committed & pushed: ${commitResult.commitHash}`);
        } else if (!commitResult.success) {
--- a/src/commands/addTask.ts
+++ b/src/commands/addTask.ts
@ -12,7 +12,9 @@ import { promptInput, promptMultilineInput, confirm, selectOption } from '../pro
 import { success, info } from '../utils/ui.js';
 import { summarizeTaskName } from '../task/summarize.js';
 import { createLogger } from '../utils/debug.js';
 import { error as errorLog } from '../utils/ui.js';
 import { listWorkflows } from '../config/workflowLoader.js';
 import { parseIssueNumbers, resolveIssueTask } from '../github/issue.js';
 import { getCurrentWorkflow } from '../config/paths.js';
 import type { TaskFileData } from '../task/schema.js';
@ -53,8 +55,19 @@ export async function addTask(cwd: string, args: string[]): Promise<void> {
  let workflow: string | undefined;
  if (args.length > 0) {
-    // Argument mode: task content provided directly
+    // Check if args are issue references (#N)
    const issueNumbers = parseIssueNumbers(args);
    if (issueNumbers.length > 0) {
      try {
        info('Fetching GitHub Issue...');
        taskContent = resolveIssueTask(args.join(' '));
      } catch (e) {
        errorLog(e instanceof Error ? e.message : String(e));
        return;
      }
    } else {
      taskContent = args.join(' ');
    }
  } else {
    // Interactive mode (multiline: empty line to finish)
    const input = await promptMultilineInput('Task content');
--- a/src/commands/help.ts
+++ b/src/commands/help.ts
@ -14,6 +14,7 @@ export function showHelp(): void {
  console.log(`
 Usage:
  takt {task}             Execute task with current workflow (continues session)
  takt "#N"               Execute GitHub Issue #N as task (quote # in shell)
  takt /run-tasks (/run)           Run all pending tasks from .takt/tasks/
  takt /watch                      Watch for tasks and auto-execute (stays resident)
  takt /add-task (/add)            Add a new task (interactive, YAML format)
@ -25,6 +26,10 @@ Usage:
 Examples:
  takt "Fix the bug in main.ts"         # Execute task (continues session)
  takt "#6"                             # Execute Issue #6 as task
  takt "#6 #7"                          # Execute multiple Issues as task
  takt /add-task "#6"                   # Create task from Issue #6
  takt /add-task "#6" "#7"              # Create task from multiple Issues
  takt /add-task "認証機能を追加する"   # Quick add task
  takt /add-task                        # Interactive task creation
  takt /clear                           # Clear sessions, start fresh
--- a/src/github/issue.ts
+++ b/src/github/issue.ts
@ -0,0 +1,183 @@
 /**
 * GitHub Issue utilities
 *
 * Fetches issue content via `gh` CLI and formats it as task text
 * for workflow execution or task creation.
 */
 import { execSync } from 'node:child_process';
 import { createLogger } from '../utils/debug.js';
 const log = createLogger('github');
 /** Regex to match `#N` patterns (issue numbers) */
 const ISSUE_NUMBER_REGEX = /^#(\d+)$/;
 export interface GitHubIssue {
  number: number;
  title: string;
  body: string;
  labels: string[];
  comments: Array<{ author: string; body: string }>;
 }
 export interface GhCliStatus {
  available: boolean;
  error?: string;
 }
 /**
 * Check if `gh` CLI is available and authenticated.
 */
 export function checkGhCli(): GhCliStatus {
  try {
    execSync('gh auth status', { stdio: 'pipe' });
    return { available: true };
  } catch {
    try {
      execSync('gh --version', { stdio: 'pipe' });
      return {
        available: false,
        error: 'gh CLI is installed but not authenticated. Run `gh auth login` first.',
      };
    } catch {
      return {
        available: false,
        error: 'gh CLI is not installed. Install it from https://cli.github.com/',
      };
    }
  }
 }
 /**
 * Fetch issue content via `gh issue view`.
 * Throws on failure (issue not found, network error, etc.).
 */
 export function fetchIssue(issueNumber: number): GitHubIssue {
  log.debug('Fetching issue', { issueNumber });
  const raw = execSync(
    `gh issue view ${issueNumber} --json number,title,body,labels,comments`,
    { encoding: 'utf-8', stdio: ['pipe', 'pipe', 'pipe'] },
  );
  const data = JSON.parse(raw) as {
    number: number;
    title: string;
    body: string;
    labels: Array<{ name: string }>;
    comments: Array<{ author: { login: string }; body: string }>;
  };
  return {
    number: data.number,
    title: data.title,
    body: data.body ?? '',
    labels: data.labels.map((l) => l.name),
    comments: data.comments.map((c) => ({
      author: c.author.login,
      body: c.body,
    })),
  };
 }
 /**
 * Format a GitHub issue into task text for workflow execution.
 *
 * Output format:
 * ```
 * ## GitHub Issue #6: Fix authentication bug
 *
 * {body}
 *
 * ### Labels
 * bug, priority:high
 *
 * ### Comments
 * **user1**: Comment body...
 * ```
 */
 export function formatIssueAsTask(issue: GitHubIssue): string {
  const parts: string[] = [];
  parts.push(`## GitHub Issue #${issue.number}: ${issue.title}`);
  if (issue.body) {
    parts.push('');
    parts.push(issue.body);
  }
  if (issue.labels.length > 0) {
    parts.push('');
    parts.push('### Labels');
    parts.push(issue.labels.join(', '));
  }
  if (issue.comments.length > 0) {
    parts.push('');
    parts.push('### Comments');
    for (const comment of issue.comments) {
      parts.push(`**${comment.author}**: ${comment.body}`);
    }
  }
  return parts.join('\n');
 }
 /**
 * Parse `#N` patterns from argument strings.
 * Returns issue numbers found, or empty array if none.
 *
 * Each argument must be exactly `#N` (no mixed text).
 * Examples:
 *   ['#6'] → [6]
 *   ['#6', '#7'] → [6, 7]
 *   ['Fix bug'] → []
 *   ['#6', 'and', '#7'] → [] (mixed, not all are issue refs)
 */
 export function parseIssueNumbers(args: string[]): number[] {
  if (args.length === 0) return [];
  const numbers: number[] = [];
  for (const arg of args) {
    const match = arg.match(ISSUE_NUMBER_REGEX);
    if (!match?.[1]) return []; // Not all args are issue refs
    numbers.push(Number.parseInt(match[1], 10));
  }
  return numbers;
 }
 /**
 * Check if a single task string is an issue reference (`#N`).
 */
 export function isIssueReference(task: string): boolean {
  return ISSUE_NUMBER_REGEX.test(task.trim());
 }
 /**
 * Resolve issue references in a task string.
 * If task contains `#N` patterns (space-separated), fetches issues and returns formatted text.
 * Otherwise returns the task string as-is.
 *
 * Checks gh CLI availability before fetching.
 * Throws if gh CLI is not available or issue fetch fails.
 */
 export function resolveIssueTask(task: string): string {
  const tokens = task.trim().split(/\s+/);
  const issueNumbers = parseIssueNumbers(tokens);
  if (issueNumbers.length === 0) {
    return task;
  }
  const ghStatus = checkGhCli();
  if (!ghStatus.available) {
    throw new Error(ghStatus.error ?? 'gh CLI is not available');
  }
  log.info('Resolving issue references', { issueNumbers });
  const issues = issueNumbers.map((n) => fetchIssue(n));
  return issues.map(formatIssueAsTask).join('\n\n---\n\n');
 }
--- a/src/workflow/engine.ts
+++ b/src/workflow/engine.ts
@ -10,17 +10,17 @@ import type {
  WorkflowState,
  WorkflowStep,
  AgentResponse,
  RuleMatchMethod,
 } from '../models/types.js';
 import { runAgent, type RunAgentOptions } from '../agents/runner.js';
 import { COMPLETE_STEP, ABORT_STEP, ERROR_MESSAGES } from './constants.js';
 import type { WorkflowEngineOptions } from './types.js';
 import { determineNextStepByRules } from './transitions.js';
-import { detectRuleIndex, callAiJudge } from '../claude/client.js';
+import { buildInstruction as buildInstructionFromTemplate, isReportObjectConfig } from './instruction-builder.js';
 import { buildInstruction as buildInstructionFromTemplate, buildReportInstruction as buildReportInstructionFromTemplate, buildStatusJudgmentInstruction as buildStatusJudgmentInstructionFromTemplate, isReportObjectConfig } from './instruction-builder.js';
 import { LoopDetector } from './loop-detector.js';
 import { handleBlocked } from './blocked-handler.js';
 import { ParallelLogger } from './parallel-logger.js';
 import { detectMatchedRule } from './rule-evaluator.js';
 import { needsStatusJudgmentPhase, runReportPhase, runStatusJudgmentPhase } from './phase-runner.js';
 import {
  createInitialState,
  addUserInput,
@ -206,18 +206,11 @@ export class WorkflowEngine extends EventEmitter {
    return this.runNormalStep(step);
  }
-  /** Build RunAgentOptions from a step's configuration */
+  /** Build common RunAgentOptions shared by all phases */
-  private buildAgentOptions(step: WorkflowStep): RunAgentOptions {
+  private buildBaseOptions(step: WorkflowStep): RunAgentOptions {
    // Phase 1: exclude Write from allowedTools when step has report config
    const allowedTools = step.report
      ? step.allowedTools?.filter((t) => t !== 'Write')
      : step.allowedTools;
    return {
      cwd: this.cwd,
      sessionId: this.state.agentSessions.get(step.agent),
      agentPath: step.agentPath,
      allowedTools,
      provider: step.provider,
      model: step.model,
      permissionMode: step.permissionMode,
@ -228,24 +221,29 @@ export class WorkflowEngine extends EventEmitter {
    };
  }
  /** Build RunAgentOptions from a step's configuration (Phase 1) */
  private buildAgentOptions(step: WorkflowStep): RunAgentOptions {
    // Phase 1: exclude Write from allowedTools when step has report config
    const allowedTools = step.report
      ? step.allowedTools?.filter((t) => t !== 'Write')
      : step.allowedTools;
    return {
      ...this.buildBaseOptions(step),
      sessionId: this.state.agentSessions.get(step.agent),
      allowedTools,
    };
  }
  /**
   * Build RunAgentOptions for session-resume phases (Phase 2, Phase 3).
   * Shares common fields with the original step's agent config.
   */
  private buildResumeOptions(step: WorkflowStep, sessionId: string, overrides: Pick<RunAgentOptions, 'allowedTools' | 'maxTurns'>): RunAgentOptions {
    return {
-      cwd: this.cwd,
+      ...this.buildBaseOptions(step),
      sessionId,
      agentPath: step.agentPath,
      allowedTools: overrides.allowedTools,
      maxTurns: overrides.maxTurns,
      provider: step.provider,
      model: step.model,
      permissionMode: step.permissionMode,
      onStream: this.options.onStream,
      onPermissionRequest: this.options.onPermissionRequest,
      onAskUserQuestion: this.options.onAskUserQuestion,
      bypassPermissions: this.options.bypassPermissions,
    };
  }
@ -261,114 +259,16 @@ export class WorkflowEngine extends EventEmitter {
    }
  }
-  /**
+  /** Build phase runner context for Phase 2/3 execution */
-   * Detect matched rule for a step's response.
+  private buildPhaseRunnerContext() {
-   * Evaluation order (first match wins):
+    return {
-   * 1. Aggregate conditions: all()/any() — evaluate sub-step results
+      cwd: this.cwd,
-   * 2. Tag detection from Phase 3 output
+      reportDir: this.reportDir,
-   * 3. Tag detection from Phase 1 output (fallback)
+      language: this.language,
-   * 4. ai() condition evaluation via AI judge
+      getSessionId: (agent: string) => this.state.agentSessions.get(agent),
-   * 5. All-conditions AI judge (final fallback)
+      buildResumeOptions: this.buildResumeOptions.bind(this),
-   *
+      updateAgentSession: this.updateAgentSession.bind(this),
-   * Returns undefined for steps without rules.
+    };
   * Throws if rules exist but no rule matched (Fail Fast).
   *
   * @param step - The workflow step
   * @param agentContent - Phase 1 output (main execution)
   * @param tagContent - Phase 3 output (status judgment); empty string skips tag detection
   */
  private async detectMatchedRule(step: WorkflowStep, agentContent: string, tagContent: string): Promise<{ index: number; method: RuleMatchMethod } | undefined> {
    if (!step.rules || step.rules.length === 0) return undefined;
    // 1. Aggregate conditions (all/any) — only meaningful for parallel parent steps
    const aggIndex = this.evaluateAggregateConditions(step);
    if (aggIndex >= 0) {
      return { index: aggIndex, method: 'aggregate' };
    }
    // 2. Tag detection from Phase 3 output
    if (tagContent) {
      const ruleIndex = detectRuleIndex(tagContent, step.name);
      if (ruleIndex >= 0 && ruleIndex < step.rules.length) {
        return { index: ruleIndex, method: 'phase3_tag' };
      }
    }
    // 3. Tag detection from Phase 1 output (fallback)
    if (agentContent) {
      const ruleIndex = detectRuleIndex(agentContent, step.name);
      if (ruleIndex >= 0 && ruleIndex < step.rules.length) {
        return { index: ruleIndex, method: 'phase1_tag' };
      }
    }
    // 4. AI judge for ai() conditions only
    const aiRuleIndex = await this.evaluateAiConditions(step, agentContent);
    if (aiRuleIndex >= 0) {
      return { index: aiRuleIndex, method: 'ai_judge' };
    }
    // 5. AI judge for all conditions (final fallback)
    const fallbackIndex = await this.evaluateAllConditionsViaAiJudge(step, agentContent);
    if (fallbackIndex >= 0) {
      return { index: fallbackIndex, method: 'ai_judge_fallback' };
    }
    throw new Error(`Status not found for step "${step.name}": no rule matched after all detection phases`);
  }
  /**
   * Evaluate aggregate conditions (all()/any()) against sub-step results.
   * Returns the 0-based rule index in the step's rules array, or -1 if no match.
   *
   * For each aggregate rule, checks the matched condition text of sub-steps:
   * - all("X"): true when ALL sub-steps have matched condition === X
   * - any("X"): true when at least ONE sub-step has matched condition === X
   *
   * Edge cases per spec:
   * - Sub-step with no matched rule: all() → false, any() → skip that sub-step
   * - No sub-steps (0 件): both → false
   * - Non-parallel step: both → false
   */
  private evaluateAggregateConditions(step: WorkflowStep): number {
    if (!step.rules || !step.parallel || step.parallel.length === 0) return -1;
    for (let i = 0; i < step.rules.length; i++) {
      const rule = step.rules[i]!;
      if (!rule.isAggregateCondition || !rule.aggregateType || !rule.aggregateConditionText) {
        continue;
      }
      const subSteps = step.parallel;
      const targetCondition = rule.aggregateConditionText;
      if (rule.aggregateType === 'all') {
        const allMatch = subSteps.every((sub) => {
          const output = this.state.stepOutputs.get(sub.name);
          if (!output || output.matchedRuleIndex == null || !sub.rules) return false;
          const matchedRule = sub.rules[output.matchedRuleIndex];
          return matchedRule?.condition === targetCondition;
        });
        if (allMatch) {
          log.debug('Aggregate all() matched', { step: step.name, condition: targetCondition, ruleIndex: i });
          return i;
        }
      } else {
        // 'any'
        const anyMatch = subSteps.some((sub) => {
          const output = this.state.stepOutputs.get(sub.name);
          if (!output || output.matchedRuleIndex == null || !sub.rules) return false;
          const matchedRule = sub.rules[output.matchedRuleIndex];
          return matchedRule?.condition === targetCondition;
        });
        if (anyMatch) {
          log.debug('Aggregate any() matched', { step: step.name, condition: targetCondition, ruleIndex: i });
          return i;
        }
      }
    }
    return -1;
  }
  /** Run a normal (non-parallel) step */
@ -388,18 +288,23 @@ export class WorkflowEngine extends EventEmitter {
    let response = await runAgent(step.agent, instruction, agentOptions);
    this.updateAgentSession(step.agent, response.sessionId);
    const phaseCtx = this.buildPhaseRunnerContext();
    // Phase 2: report output (resume same session, Write only)
    if (step.report) {
-      await this.runReportPhase(step, stepIteration);
+      await runReportPhase(step, stepIteration, phaseCtx);
    }
    // Phase 3: status judgment (resume session, no tools, output status tag)
    let tagContent = '';
-    if (this.needsStatusJudgmentPhase(step)) {
+    if (needsStatusJudgmentPhase(step)) {
-      tagContent = await this.runStatusJudgmentPhase(step);
+      tagContent = await runStatusJudgmentPhase(step, phaseCtx);
    }
-    const match = await this.detectMatchedRule(step, response.content, tagContent);
+    const match = await detectMatchedRule(step, response.content, tagContent, {
      state: this.state,
      cwd: this.cwd,
    });
    if (match) {
      log.debug('Rule matched', { step: step.name, ruleIndex: match.index, method: match.method });
      response = { ...response, matchedRuleIndex: match.index, matchedRuleMethod: match.method };
@ -410,81 +315,6 @@ export class WorkflowEngine extends EventEmitter {
    return { response, instruction };
  }
  /**
   * Phase 2: Report output.
   * Resumes the agent session with Write-only tools to output reports.
   * The response is discarded — only sessionId is updated.
   */
  private async runReportPhase(step: WorkflowStep, stepIteration: number): Promise<void> {
    const sessionId = this.state.agentSessions.get(step.agent);
    if (!sessionId) {
      throw new Error(`Report phase requires a session to resume, but no sessionId found for agent "${step.agent}" in step "${step.name}"`);
    }
    log.debug('Running report phase', { step: step.name, sessionId });
    const reportInstruction = buildReportInstructionFromTemplate(step, {
      cwd: this.cwd,
      reportDir: this.reportDir,
      stepIteration,
      language: this.language,
    });
    const reportOptions = this.buildResumeOptions(step, sessionId, {
      allowedTools: ['Write'],
      maxTurns: 3,
    });
    const reportResponse = await runAgent(step.agent, reportInstruction, reportOptions);
    // Update session (phase 2 may update it)
    this.updateAgentSession(step.agent, reportResponse.sessionId);
    log.debug('Report phase complete', { step: step.name, status: reportResponse.status });
  }
  /**
   * Check if a step needs Phase 3 (status judgment).
   * Returns true when at least one rule requires tag-based detection
   * (i.e., not all rules are ai() or aggregate conditions).
   */
  private needsStatusJudgmentPhase(step: WorkflowStep): boolean {
    if (!step.rules || step.rules.length === 0) return false;
    const allNonTagConditions = step.rules.every((r) => r.isAiCondition || r.isAggregateCondition);
    return !allNonTagConditions;
  }
  /**
   * Phase 3: Status judgment.
   * Resumes the agent session with no tools to ask the agent to output a status tag.
   * Returns the Phase 3 response content (containing the status tag).
   */
  private async runStatusJudgmentPhase(step: WorkflowStep): Promise<string> {
    const sessionId = this.state.agentSessions.get(step.agent);
    if (!sessionId) {
      throw new Error(`Status judgment phase requires a session to resume, but no sessionId found for agent "${step.agent}" in step "${step.name}"`);
    }
    log.debug('Running status judgment phase', { step: step.name, sessionId });
    const judgmentInstruction = buildStatusJudgmentInstructionFromTemplate(step, {
      language: this.language,
    });
    const judgmentOptions = this.buildResumeOptions(step, sessionId, {
      allowedTools: [],
      maxTurns: 3,
    });
    const judgmentResponse = await runAgent(step.agent, judgmentInstruction, judgmentOptions);
    // Update session (phase 3 may update it)
    this.updateAgentSession(step.agent, judgmentResponse.sessionId);
    log.debug('Status judgment phase complete', { step: step.name, status: judgmentResponse.status });
    return judgmentResponse.content;
  }
  /**
   * Run a parallel step: execute all sub-steps concurrently, then aggregate results.
   * The aggregated output becomes the parent step's response for rules evaluation.
@ -509,6 +339,9 @@ export class WorkflowEngine extends EventEmitter {
        })
      : undefined;
    const phaseCtx = this.buildPhaseRunnerContext();
    const ruleCtx = { state: this.state, cwd: this.cwd };
    // Run all sub-steps concurrently
    const subResults = await Promise.all(
      subSteps.map(async (subStep, index) => {
@ -516,28 +349,28 @@ export class WorkflowEngine extends EventEmitter {
        const subInstruction = this.buildInstruction(subStep, subIteration);
        // Phase 1: main execution (Write excluded if sub-step has report)
-        const agentOptions = this.buildAgentOptions(subStep);
+        const baseOptions = this.buildAgentOptions(subStep);
-        // Override onStream with parallel logger's prefixed handler
+        // Override onStream with parallel logger's prefixed handler (immutable)
-        if (parallelLogger) {
+        const agentOptions = parallelLogger
-          agentOptions.onStream = parallelLogger.createStreamHandler(subStep.name, index);
+          ? { ...baseOptions, onStream: parallelLogger.createStreamHandler(subStep.name, index) }
-        }
+          : baseOptions;
        const subResponse = await runAgent(subStep.agent, subInstruction, agentOptions);
        this.updateAgentSession(subStep.agent, subResponse.sessionId);
        // Phase 2: report output for sub-step
        if (subStep.report) {
-          await this.runReportPhase(subStep, subIteration);
+          await runReportPhase(subStep, subIteration, phaseCtx);
        }
        // Phase 3: status judgment for sub-step
        let subTagContent = '';
-        if (this.needsStatusJudgmentPhase(subStep)) {
+        if (needsStatusJudgmentPhase(subStep)) {
-          subTagContent = await this.runStatusJudgmentPhase(subStep);
+          subTagContent = await runStatusJudgmentPhase(subStep, phaseCtx);
        }
-        const match = await this.detectMatchedRule(subStep, subResponse.content, subTagContent);
+        const match = await detectMatchedRule(subStep, subResponse.content, subTagContent, ruleCtx);
        const finalResponse = match
          ? { ...subResponse, matchedRuleIndex: match.index, matchedRuleMethod: match.method }
          : subResponse;
@ -572,7 +405,7 @@ export class WorkflowEngine extends EventEmitter {
      .join('\n\n');
    // Parent step uses aggregate conditions, so tagContent is empty
-    const match = await this.detectMatchedRule(step, aggregatedContent, '');
+    const match = await detectMatchedRule(step, aggregatedContent, '', ruleCtx);
    const aggregatedResponse: AgentResponse = {
      agent: step.name,
@ -587,79 +420,6 @@ export class WorkflowEngine extends EventEmitter {
    return { response: aggregatedResponse, instruction: aggregatedInstruction };
  }
  /**
   * Evaluate ai() conditions via AI judge.
   * Collects all ai() rules, calls the judge, and maps the result back to the original rule index.
   * Returns the 0-based rule index in the step's rules array, or -1 if no match.
   */
  private async evaluateAiConditions(step: WorkflowStep, agentOutput: string): Promise<number> {
    if (!step.rules) return -1;
    const aiConditions: { index: number; text: string }[] = [];
    for (let i = 0; i < step.rules.length; i++) {
      const rule = step.rules[i]!;
      if (rule.isAiCondition && rule.aiConditionText) {
        aiConditions.push({ index: i, text: rule.aiConditionText });
      }
    }
    if (aiConditions.length === 0) return -1;
    log.debug('Evaluating ai() conditions via judge', {
      step: step.name,
      conditionCount: aiConditions.length,
    });
    // Remap: judge returns 0-based index within aiConditions array
    const judgeConditions = aiConditions.map((c, i) => ({ index: i, text: c.text }));
    const judgeResult = await callAiJudge(agentOutput, judgeConditions, { cwd: this.cwd });
    if (judgeResult >= 0 && judgeResult < aiConditions.length) {
      const matched = aiConditions[judgeResult]!;
      log.debug('AI judge matched condition', {
        step: step.name,
        judgeResult,
        originalRuleIndex: matched.index,
        condition: matched.text,
      });
      return matched.index;
    }
    log.debug('AI judge did not match any condition', { step: step.name });
    return -1;
  }
  /**
   * Final fallback: evaluate ALL rule conditions via AI judge.
   * Unlike evaluateAiConditions (which only handles ai() flagged rules),
   * this sends every rule's condition text to the judge.
   * Returns the 0-based rule index, or -1 if no match.
   */
  private async evaluateAllConditionsViaAiJudge(step: WorkflowStep, agentOutput: string): Promise<number> {
    if (!step.rules || step.rules.length === 0) return -1;
    const conditions = step.rules.map((rule, i) => ({ index: i, text: rule.condition }));
    log.debug('Evaluating all conditions via AI judge (final fallback)', {
      step: step.name,
      conditionCount: conditions.length,
    });
    const judgeResult = await callAiJudge(agentOutput, conditions, { cwd: this.cwd });
    if (judgeResult >= 0 && judgeResult < conditions.length) {
      log.debug('AI judge (fallback) matched condition', {
        step: step.name,
        ruleIndex: judgeResult,
        condition: conditions[judgeResult]!.text,
      });
      return judgeResult;
    }
    log.debug('AI judge (fallback) did not match any condition', { step: step.name });
    return -1;
  }
  /**
   * Determine next step for a completed step using rules-based routing.
   */
--- a/src/workflow/instruction-builder.ts
+++ b/src/workflow/instruction-builder.ts
@ -12,6 +12,7 @@
 */
 import type { WorkflowStep, WorkflowRule, AgentResponse, Language, ReportConfig, ReportObjectConfig } from '../models/types.js';
 import { hasTagBasedRules } from './rule-utils.js';
 /**
@ -465,13 +466,10 @@ export function buildInstruction(
  // 7. Status Output Rules (for tag-based detection in Phase 1)
  // Skip if all rules are ai() or aggregate conditions (no tags needed)
-  if (step.rules && step.rules.length > 0) {
+  if (hasTagBasedRules(step)) {
-    const allNonTagConditions = step.rules.every((r) => r.isAiCondition || r.isAggregateCondition);
+    const statusRulesPrompt = generateStatusRulesFromRules(step.name, step.rules!, language);
    if (!allNonTagConditions) {
      const statusRulesPrompt = generateStatusRulesFromRules(step.name, step.rules, language);
    sections.push(statusRulesPrompt);
  }
  }
  return sections.join('\n\n');
 }
--- a/src/workflow/phase-runner.ts
+++ b/src/workflow/phase-runner.ts
@ -0,0 +1,111 @@
 /**
 * Phase execution logic extracted from engine.ts.
 *
 * Handles Phase 2 (report output) and Phase 3 (status judgment)
 * as session-resume operations.
 */
 import type { WorkflowStep, Language } from '../models/types.js';
 import { runAgent, type RunAgentOptions } from '../agents/runner.js';
 import {
  buildReportInstruction as buildReportInstructionFromTemplate,
  buildStatusJudgmentInstruction as buildStatusJudgmentInstructionFromTemplate,
 } from './instruction-builder.js';
 import { hasTagBasedRules } from './rule-utils.js';
 import { createLogger } from '../utils/debug.js';
 const log = createLogger('phase-runner');
 export interface PhaseRunnerContext {
  /** Working directory (agent work dir, may be a clone) */
  cwd: string;
  /** Report directory path */
  reportDir: string;
  /** Language for instructions */
  language?: Language;
  /** Get agent session ID */
  getSessionId: (agent: string) => string | undefined;
  /** Build resume options for a step */
  buildResumeOptions: (step: WorkflowStep, sessionId: string, overrides: Pick<RunAgentOptions, 'allowedTools' | 'maxTurns'>) => RunAgentOptions;
  /** Update agent session after a phase run */
  updateAgentSession: (agent: string, sessionId: string | undefined) => void;
 }
 /**
 * Check if a step needs Phase 3 (status judgment).
 * Returns true when at least one rule requires tag-based detection.
 */
 export function needsStatusJudgmentPhase(step: WorkflowStep): boolean {
  return hasTagBasedRules(step);
 }
 /**
 * Phase 2: Report output.
 * Resumes the agent session with Write-only tools to output reports.
 * The response is discarded — only sessionId is updated.
 */
 export async function runReportPhase(
  step: WorkflowStep,
  stepIteration: number,
  ctx: PhaseRunnerContext,
 ): Promise<void> {
  const sessionId = ctx.getSessionId(step.agent);
  if (!sessionId) {
    throw new Error(`Report phase requires a session to resume, but no sessionId found for agent "${step.agent}" in step "${step.name}"`);
  }
  log.debug('Running report phase', { step: step.name, sessionId });
  const reportInstruction = buildReportInstructionFromTemplate(step, {
    cwd: ctx.cwd,
    reportDir: ctx.reportDir,
    stepIteration,
    language: ctx.language,
  });
  const reportOptions = ctx.buildResumeOptions(step, sessionId, {
    allowedTools: ['Write'],
    maxTurns: 3,
  });
  const reportResponse = await runAgent(step.agent, reportInstruction, reportOptions);
  // Update session (phase 2 may update it)
  ctx.updateAgentSession(step.agent, reportResponse.sessionId);
  log.debug('Report phase complete', { step: step.name, status: reportResponse.status });
 }
 /**
 * Phase 3: Status judgment.
 * Resumes the agent session with no tools to ask the agent to output a status tag.
 * Returns the Phase 3 response content (containing the status tag).
 */
 export async function runStatusJudgmentPhase(
  step: WorkflowStep,
  ctx: PhaseRunnerContext,
 ): Promise<string> {
  const sessionId = ctx.getSessionId(step.agent);
  if (!sessionId) {
    throw new Error(`Status judgment phase requires a session to resume, but no sessionId found for agent "${step.agent}" in step "${step.name}"`);
  }
  log.debug('Running status judgment phase', { step: step.name, sessionId });
  const judgmentInstruction = buildStatusJudgmentInstructionFromTemplate(step, {
    language: ctx.language,
  });
  const judgmentOptions = ctx.buildResumeOptions(step, sessionId, {
    allowedTools: [],
    maxTurns: 3,
  });
  const judgmentResponse = await runAgent(step.agent, judgmentInstruction, judgmentOptions);
  // Update session (phase 3 may update it)
  ctx.updateAgentSession(step.agent, judgmentResponse.sessionId);
  log.debug('Status judgment phase complete', { step: step.name, status: judgmentResponse.status });
  return judgmentResponse.content;
 }
--- a/src/workflow/rule-evaluator.ts
+++ b/src/workflow/rule-evaluator.ts
@ -0,0 +1,218 @@
 /**
 * Rule evaluation logic extracted from engine.ts.
 *
 * Evaluates workflow step rules to determine the matched rule index.
 * Supports tag-based detection, ai() conditions, aggregate conditions,
 * and AI judge fallback.
 */
 import type {
  WorkflowStep,
  WorkflowState,
  RuleMatchMethod,
 } from '../models/types.js';
 import { detectRuleIndex, callAiJudge } from '../claude/client.js';
 import { createLogger } from '../utils/debug.js';
 const log = createLogger('rule-evaluator');
 export interface RuleMatch {
  index: number;
  method: RuleMatchMethod;
 }
 export interface RuleEvaluatorContext {
  /** Workflow state (for accessing stepOutputs in aggregate evaluation) */
  state: WorkflowState;
  /** Working directory (for AI judge calls) */
  cwd: string;
 }
 /**
 * Detect matched rule for a step's response.
 * Evaluation order (first match wins):
 * 1. Aggregate conditions: all()/any() — evaluate sub-step results
 * 2. Tag detection from Phase 3 output
 * 3. Tag detection from Phase 1 output (fallback)
 * 4. ai() condition evaluation via AI judge
 * 5. All-conditions AI judge (final fallback)
 *
 * Returns undefined for steps without rules.
 * Throws if rules exist but no rule matched (Fail Fast).
 *
 * @param step - The workflow step
 * @param agentContent - Phase 1 output (main execution)
 * @param tagContent - Phase 3 output (status judgment); empty string skips tag detection
 * @param ctx - Evaluation context (state, cwd)
 */
 export async function detectMatchedRule(
  step: WorkflowStep,
  agentContent: string,
  tagContent: string,
  ctx: RuleEvaluatorContext,
 ): Promise<RuleMatch | undefined> {
  if (!step.rules || step.rules.length === 0) return undefined;
  // 1. Aggregate conditions (all/any) — only meaningful for parallel parent steps
  const aggIndex = evaluateAggregateConditions(step, ctx.state);
  if (aggIndex >= 0) {
    return { index: aggIndex, method: 'aggregate' };
  }
  // 2. Tag detection from Phase 3 output
  if (tagContent) {
    const ruleIndex = detectRuleIndex(tagContent, step.name);
    if (ruleIndex >= 0 && ruleIndex < step.rules.length) {
      return { index: ruleIndex, method: 'phase3_tag' };
    }
  }
  // 3. Tag detection from Phase 1 output (fallback)
  if (agentContent) {
    const ruleIndex = detectRuleIndex(agentContent, step.name);
    if (ruleIndex >= 0 && ruleIndex < step.rules.length) {
      return { index: ruleIndex, method: 'phase1_tag' };
    }
  }
  // 4. AI judge for ai() conditions only
  const aiRuleIndex = await evaluateAiConditions(step, agentContent, ctx.cwd);
  if (aiRuleIndex >= 0) {
    return { index: aiRuleIndex, method: 'ai_judge' };
  }
  // 5. AI judge for all conditions (final fallback)
  const fallbackIndex = await evaluateAllConditionsViaAiJudge(step, agentContent, ctx.cwd);
  if (fallbackIndex >= 0) {
    return { index: fallbackIndex, method: 'ai_judge_fallback' };
  }
  throw new Error(`Status not found for step "${step.name}": no rule matched after all detection phases`);
 }
 /**
 * Evaluate aggregate conditions (all()/any()) against sub-step results.
 * Returns the 0-based rule index in the step's rules array, or -1 if no match.
 *
 * For each aggregate rule, checks the matched condition text of sub-steps:
 * - all("X"): true when ALL sub-steps have matched condition === X
 * - any("X"): true when at least ONE sub-step has matched condition === X
 *
 * Edge cases per spec:
 * - Sub-step with no matched rule: all() → false, any() → skip that sub-step
 * - No sub-steps (0 件): both → false
 * - Non-parallel step: both → false
 */
 export function evaluateAggregateConditions(step: WorkflowStep, state: WorkflowState): number {
  if (!step.rules || !step.parallel || step.parallel.length === 0) return -1;
  for (let i = 0; i < step.rules.length; i++) {
    const rule = step.rules[i]!;
    if (!rule.isAggregateCondition || !rule.aggregateType || !rule.aggregateConditionText) {
      continue;
    }
    const subSteps = step.parallel;
    const targetCondition = rule.aggregateConditionText;
    if (rule.aggregateType === 'all') {
      const allMatch = subSteps.every((sub) => {
        const output = state.stepOutputs.get(sub.name);
        if (!output || output.matchedRuleIndex == null || !sub.rules) return false;
        const matchedRule = sub.rules[output.matchedRuleIndex];
        return matchedRule?.condition === targetCondition;
      });
      if (allMatch) {
        log.debug('Aggregate all() matched', { step: step.name, condition: targetCondition, ruleIndex: i });
        return i;
      }
    } else {
      // 'any'
      const anyMatch = subSteps.some((sub) => {
        const output = state.stepOutputs.get(sub.name);
        if (!output || output.matchedRuleIndex == null || !sub.rules) return false;
        const matchedRule = sub.rules[output.matchedRuleIndex];
        return matchedRule?.condition === targetCondition;
      });
      if (anyMatch) {
        log.debug('Aggregate any() matched', { step: step.name, condition: targetCondition, ruleIndex: i });
        return i;
      }
    }
  }
  return -1;
 }
 /**
 * Evaluate ai() conditions via AI judge.
 * Collects all ai() rules, calls the judge, and maps the result back to the original rule index.
 * Returns the 0-based rule index in the step's rules array, or -1 if no match.
 */
 export async function evaluateAiConditions(step: WorkflowStep, agentOutput: string, cwd: string): Promise<number> {
  if (!step.rules) return -1;
  const aiConditions: { index: number; text: string }[] = [];
  for (let i = 0; i < step.rules.length; i++) {
    const rule = step.rules[i]!;
    if (rule.isAiCondition && rule.aiConditionText) {
      aiConditions.push({ index: i, text: rule.aiConditionText });
    }
  }
  if (aiConditions.length === 0) return -1;
  log.debug('Evaluating ai() conditions via judge', {
    step: step.name,
    conditionCount: aiConditions.length,
  });
  // Remap: judge returns 0-based index within aiConditions array
  const judgeConditions = aiConditions.map((c, i) => ({ index: i, text: c.text }));
  const judgeResult = await callAiJudge(agentOutput, judgeConditions, { cwd });
  if (judgeResult >= 0 && judgeResult < aiConditions.length) {
    const matched = aiConditions[judgeResult]!;
    log.debug('AI judge matched condition', {
      step: step.name,
      judgeResult,
      originalRuleIndex: matched.index,
      condition: matched.text,
    });
    return matched.index;
  }
  log.debug('AI judge did not match any condition', { step: step.name });
  return -1;
 }
 /**
 * Final fallback: evaluate ALL rule conditions via AI judge.
 * Unlike evaluateAiConditions (which only handles ai() flagged rules),
 * this sends every rule's condition text to the judge.
 * Returns the 0-based rule index, or -1 if no match.
 */
 export async function evaluateAllConditionsViaAiJudge(step: WorkflowStep, agentOutput: string, cwd: string): Promise<number> {
  if (!step.rules || step.rules.length === 0) return -1;
  const conditions = step.rules.map((rule, i) => ({ index: i, text: rule.condition }));
  log.debug('Evaluating all conditions via AI judge (final fallback)', {
    step: step.name,
    conditionCount: conditions.length,
  });
  const judgeResult = await callAiJudge(agentOutput, conditions, { cwd });
  if (judgeResult >= 0 && judgeResult < conditions.length) {
    log.debug('AI judge (fallback) matched condition', {
      step: step.name,
      ruleIndex: judgeResult,
      condition: conditions[judgeResult]!.text,
    });
    return judgeResult;
  }
  log.debug('AI judge (fallback) did not match any condition', { step: step.name });
  return -1;
 }
--- a/src/workflow/rule-utils.ts
+++ b/src/workflow/rule-utils.ts
@ -0,0 +1,18 @@
 /**
 * Shared rule utility functions used by both engine.ts and instruction-builder.ts.
 */
 import type { WorkflowStep } from '../models/types.js';
 /**
 * Check whether a step has tag-based rules (i.e., rules that require
 * [STEP:N] tag output for detection).
 *
 * Returns false when all rules are ai() or aggregate conditions,
 * meaning no tag-based status output is needed.
 */
 export function hasTagBasedRules(step: WorkflowStep): boolean {
  if (!step.rules || step.rules.length === 0) return false;
  const allNonTagConditions = step.rules.every((r) => r.isAiCondition || r.isAggregateCondition);
  return !allNonTagConditions;
 }