feat: GitHub Issue番号でタスク実行・追加を可能にし、エンジンをリファクタリング (#10)

- takt "#6" や /add-task "#6" "#7" でIssue内容をタスクとして実行可能に
- gh CLI経由でIssue取得、タイトル・本文・ラベル・コメントをタスクテキストに変換
- engine.tsからフェーズ実行(phase-runner)、ルール評価(rule-evaluator)、
  ルールユーティリティ(rule-utils)を分離
This commit is contained in:
nrslib 2026-01-30 18:49:55 +09:00
parent 006c69056a
commit 84ef8fbaf7
10 changed files with 788 additions and 304 deletions

View File

@ -0,0 +1,165 @@
/**
* Tests for github/issue module
*
* Tests parsing and formatting functions.
* checkGhCli/fetchIssue/resolveIssueTask are integration functions
* that call `gh` CLI, so they are not unit-tested here.
*/
import { describe, it, expect } from 'vitest';
import {
parseIssueNumbers,
isIssueReference,
formatIssueAsTask,
type GitHubIssue,
} from '../github/issue.js';
describe('parseIssueNumbers', () => {
it('should parse single issue reference', () => {
expect(parseIssueNumbers(['#6'])).toEqual([6]);
});
it('should parse multiple issue references', () => {
expect(parseIssueNumbers(['#6', '#7'])).toEqual([6, 7]);
});
it('should parse large issue numbers', () => {
expect(parseIssueNumbers(['#123'])).toEqual([123]);
});
it('should return empty for non-issue args', () => {
expect(parseIssueNumbers(['Fix bug'])).toEqual([]);
});
it('should return empty when mixed issue and non-issue args', () => {
expect(parseIssueNumbers(['#6', 'and', '#7'])).toEqual([]);
});
it('should return empty for empty args', () => {
expect(parseIssueNumbers([])).toEqual([]);
});
it('should not match partial issue patterns', () => {
expect(parseIssueNumbers(['#abc'])).toEqual([]);
expect(parseIssueNumbers(['#'])).toEqual([]);
expect(parseIssueNumbers(['##6'])).toEqual([]);
expect(parseIssueNumbers(['6'])).toEqual([]);
expect(parseIssueNumbers(['issue#6'])).toEqual([]);
});
it('should handle #0', () => {
expect(parseIssueNumbers(['#0'])).toEqual([0]);
});
});
describe('isIssueReference', () => {
it('should return true for #N patterns', () => {
expect(isIssueReference('#6')).toBe(true);
expect(isIssueReference('#123')).toBe(true);
});
it('should return true with whitespace trim', () => {
expect(isIssueReference(' #6 ')).toBe(true);
});
it('should return false for non-issue text', () => {
expect(isIssueReference('Fix bug')).toBe(false);
expect(isIssueReference('#abc')).toBe(false);
expect(isIssueReference('')).toBe(false);
expect(isIssueReference('#')).toBe(false);
expect(isIssueReference('6')).toBe(false);
});
it('should return false for multiple issues (single string)', () => {
expect(isIssueReference('#6 #7')).toBe(false);
});
});
describe('formatIssueAsTask', () => {
it('should format issue with all fields', () => {
const issue: GitHubIssue = {
number: 6,
title: 'Fix authentication bug',
body: 'The login flow is broken.',
labels: ['bug', 'priority:high'],
comments: [
{ author: 'user1', body: 'I can reproduce this.' },
{ author: 'user2', body: 'Fixed in PR #7.' },
],
};
const result = formatIssueAsTask(issue);
expect(result).toContain('## GitHub Issue #6: Fix authentication bug');
expect(result).toContain('The login flow is broken.');
expect(result).toContain('### Labels');
expect(result).toContain('bug, priority:high');
expect(result).toContain('### Comments');
expect(result).toContain('**user1**: I can reproduce this.');
expect(result).toContain('**user2**: Fixed in PR #7.');
});
it('should format issue with no body', () => {
const issue: GitHubIssue = {
number: 10,
title: 'Empty issue',
body: '',
labels: [],
comments: [],
};
const result = formatIssueAsTask(issue);
expect(result).toBe('## GitHub Issue #10: Empty issue');
expect(result).not.toContain('### Labels');
expect(result).not.toContain('### Comments');
});
it('should format issue with labels but no comments', () => {
const issue: GitHubIssue = {
number: 5,
title: 'Feature request',
body: 'Add dark mode.',
labels: ['enhancement'],
comments: [],
};
const result = formatIssueAsTask(issue);
expect(result).toContain('### Labels');
expect(result).toContain('enhancement');
expect(result).not.toContain('### Comments');
});
it('should format issue with comments but no labels', () => {
const issue: GitHubIssue = {
number: 3,
title: 'Discussion',
body: 'Thoughts?',
labels: [],
comments: [
{ author: 'dev', body: 'LGTM' },
],
};
const result = formatIssueAsTask(issue);
expect(result).not.toContain('### Labels');
expect(result).toContain('### Comments');
expect(result).toContain('**dev**: LGTM');
});
it('should handle multiline body', () => {
const issue: GitHubIssue = {
number: 1,
title: 'Multi-line',
body: 'Line 1\nLine 2\n\nLine 4',
labels: [],
comments: [],
};
const result = formatIssueAsTask(issue);
expect(result).toContain('Line 1\nLine 2\n\nLine 4');
});
});

View File

@ -42,6 +42,7 @@ import { autoCommitAndPush } from './task/autoCommit.js';
import { summarizeTaskName } from './task/summarize.js'; import { summarizeTaskName } from './task/summarize.js';
import { DEFAULT_WORKFLOW_NAME } from './constants.js'; import { DEFAULT_WORKFLOW_NAME } from './constants.js';
import { checkForUpdates } from './utils/updateNotifier.js'; import { checkForUpdates } from './utils/updateNotifier.js';
import { resolveIssueTask, isIssueReference } from './github/issue.js';
const require = createRequire(import.meta.url); const require = createRequire(import.meta.url);
const { version: cliVersion } = require('../package.json') as { version: string }; const { version: cliVersion } = require('../package.json') as { version: string };
@ -188,6 +189,18 @@ program
// Task execution // Task execution
if (task) { if (task) {
// Resolve #N issue references to task text
let resolvedTask: string = task;
if (isIssueReference(task) || task.trim().split(/\s+/).every((t: string) => isIssueReference(t))) {
try {
info('Fetching GitHub Issue...');
resolvedTask = resolveIssueTask(task);
} catch (e) {
error(e instanceof Error ? e.message : String(e));
process.exit(1);
}
}
// Get available workflows and prompt user to select // Get available workflows and prompt user to select
const availableWorkflows = listWorkflows(); const availableWorkflows = listWorkflows();
const currentWorkflow = getCurrentWorkflow(cwd); const currentWorkflow = getCurrentWorkflow(cwd);
@ -230,13 +243,13 @@ program
} }
// Ask whether to create a worktree // Ask whether to create a worktree
const { execCwd, isWorktree } = await confirmAndCreateWorktree(cwd, task); const { execCwd, isWorktree } = await confirmAndCreateWorktree(cwd, resolvedTask);
log.info('Starting task execution', { task, workflow: selectedWorkflow, worktree: isWorktree }); log.info('Starting task execution', { task: resolvedTask, workflow: selectedWorkflow, worktree: isWorktree });
const taskSuccess = await executeTask(task, execCwd, selectedWorkflow, cwd); const taskSuccess = await executeTask(resolvedTask, execCwd, selectedWorkflow, cwd);
if (taskSuccess && isWorktree) { if (taskSuccess && isWorktree) {
const commitResult = autoCommitAndPush(execCwd, task, cwd); const commitResult = autoCommitAndPush(execCwd, resolvedTask, cwd);
if (commitResult.success && commitResult.commitHash) { if (commitResult.success && commitResult.commitHash) {
success(`Auto-committed & pushed: ${commitResult.commitHash}`); success(`Auto-committed & pushed: ${commitResult.commitHash}`);
} else if (!commitResult.success) { } else if (!commitResult.success) {

View File

@ -12,7 +12,9 @@ import { promptInput, promptMultilineInput, confirm, selectOption } from '../pro
import { success, info } from '../utils/ui.js'; import { success, info } from '../utils/ui.js';
import { summarizeTaskName } from '../task/summarize.js'; import { summarizeTaskName } from '../task/summarize.js';
import { createLogger } from '../utils/debug.js'; import { createLogger } from '../utils/debug.js';
import { error as errorLog } from '../utils/ui.js';
import { listWorkflows } from '../config/workflowLoader.js'; import { listWorkflows } from '../config/workflowLoader.js';
import { parseIssueNumbers, resolveIssueTask } from '../github/issue.js';
import { getCurrentWorkflow } from '../config/paths.js'; import { getCurrentWorkflow } from '../config/paths.js';
import type { TaskFileData } from '../task/schema.js'; import type { TaskFileData } from '../task/schema.js';
@ -53,8 +55,19 @@ export async function addTask(cwd: string, args: string[]): Promise<void> {
let workflow: string | undefined; let workflow: string | undefined;
if (args.length > 0) { if (args.length > 0) {
// Argument mode: task content provided directly // Check if args are issue references (#N)
const issueNumbers = parseIssueNumbers(args);
if (issueNumbers.length > 0) {
try {
info('Fetching GitHub Issue...');
taskContent = resolveIssueTask(args.join(' '));
} catch (e) {
errorLog(e instanceof Error ? e.message : String(e));
return;
}
} else {
taskContent = args.join(' '); taskContent = args.join(' ');
}
} else { } else {
// Interactive mode (multiline: empty line to finish) // Interactive mode (multiline: empty line to finish)
const input = await promptMultilineInput('Task content'); const input = await promptMultilineInput('Task content');

View File

@ -14,6 +14,7 @@ export function showHelp(): void {
console.log(` console.log(`
Usage: Usage:
takt {task} Execute task with current workflow (continues session) takt {task} Execute task with current workflow (continues session)
takt "#N" Execute GitHub Issue #N as task (quote # in shell)
takt /run-tasks (/run) Run all pending tasks from .takt/tasks/ takt /run-tasks (/run) Run all pending tasks from .takt/tasks/
takt /watch Watch for tasks and auto-execute (stays resident) takt /watch Watch for tasks and auto-execute (stays resident)
takt /add-task (/add) Add a new task (interactive, YAML format) takt /add-task (/add) Add a new task (interactive, YAML format)
@ -25,6 +26,10 @@ Usage:
Examples: Examples:
takt "Fix the bug in main.ts" # Execute task (continues session) takt "Fix the bug in main.ts" # Execute task (continues session)
takt "#6" # Execute Issue #6 as task
takt "#6 #7" # Execute multiple Issues as task
takt /add-task "#6" # Create task from Issue #6
takt /add-task "#6" "#7" # Create task from multiple Issues
takt /add-task "認証機能を追加する" # Quick add task takt /add-task "認証機能を追加する" # Quick add task
takt /add-task # Interactive task creation takt /add-task # Interactive task creation
takt /clear # Clear sessions, start fresh takt /clear # Clear sessions, start fresh

183
src/github/issue.ts Normal file
View File

@ -0,0 +1,183 @@
/**
* GitHub Issue utilities
*
* Fetches issue content via `gh` CLI and formats it as task text
* for workflow execution or task creation.
*/
import { execSync } from 'node:child_process';
import { createLogger } from '../utils/debug.js';
const log = createLogger('github');
/** Regex to match `#N` patterns (issue numbers) */
const ISSUE_NUMBER_REGEX = /^#(\d+)$/;
export interface GitHubIssue {
number: number;
title: string;
body: string;
labels: string[];
comments: Array<{ author: string; body: string }>;
}
export interface GhCliStatus {
available: boolean;
error?: string;
}
/**
* Check if `gh` CLI is available and authenticated.
*/
export function checkGhCli(): GhCliStatus {
try {
execSync('gh auth status', { stdio: 'pipe' });
return { available: true };
} catch {
try {
execSync('gh --version', { stdio: 'pipe' });
return {
available: false,
error: 'gh CLI is installed but not authenticated. Run `gh auth login` first.',
};
} catch {
return {
available: false,
error: 'gh CLI is not installed. Install it from https://cli.github.com/',
};
}
}
}
/**
* Fetch issue content via `gh issue view`.
* Throws on failure (issue not found, network error, etc.).
*/
export function fetchIssue(issueNumber: number): GitHubIssue {
log.debug('Fetching issue', { issueNumber });
const raw = execSync(
`gh issue view ${issueNumber} --json number,title,body,labels,comments`,
{ encoding: 'utf-8', stdio: ['pipe', 'pipe', 'pipe'] },
);
const data = JSON.parse(raw) as {
number: number;
title: string;
body: string;
labels: Array<{ name: string }>;
comments: Array<{ author: { login: string }; body: string }>;
};
return {
number: data.number,
title: data.title,
body: data.body ?? '',
labels: data.labels.map((l) => l.name),
comments: data.comments.map((c) => ({
author: c.author.login,
body: c.body,
})),
};
}
/**
* Format a GitHub issue into task text for workflow execution.
*
* Output format:
* ```
* ## GitHub Issue #6: Fix authentication bug
*
* {body}
*
* ### Labels
* bug, priority:high
*
* ### Comments
* **user1**: Comment body...
* ```
*/
export function formatIssueAsTask(issue: GitHubIssue): string {
const parts: string[] = [];
parts.push(`## GitHub Issue #${issue.number}: ${issue.title}`);
if (issue.body) {
parts.push('');
parts.push(issue.body);
}
if (issue.labels.length > 0) {
parts.push('');
parts.push('### Labels');
parts.push(issue.labels.join(', '));
}
if (issue.comments.length > 0) {
parts.push('');
parts.push('### Comments');
for (const comment of issue.comments) {
parts.push(`**${comment.author}**: ${comment.body}`);
}
}
return parts.join('\n');
}
/**
* Parse `#N` patterns from argument strings.
* Returns issue numbers found, or empty array if none.
*
* Each argument must be exactly `#N` (no mixed text).
* Examples:
* ['#6'] [6]
* ['#6', '#7'] [6, 7]
* ['Fix bug'] []
* ['#6', 'and', '#7'] [] (mixed, not all are issue refs)
*/
export function parseIssueNumbers(args: string[]): number[] {
if (args.length === 0) return [];
const numbers: number[] = [];
for (const arg of args) {
const match = arg.match(ISSUE_NUMBER_REGEX);
if (!match?.[1]) return []; // Not all args are issue refs
numbers.push(Number.parseInt(match[1], 10));
}
return numbers;
}
/**
* Check if a single task string is an issue reference (`#N`).
*/
export function isIssueReference(task: string): boolean {
return ISSUE_NUMBER_REGEX.test(task.trim());
}
/**
* Resolve issue references in a task string.
* If task contains `#N` patterns (space-separated), fetches issues and returns formatted text.
* Otherwise returns the task string as-is.
*
* Checks gh CLI availability before fetching.
* Throws if gh CLI is not available or issue fetch fails.
*/
export function resolveIssueTask(task: string): string {
const tokens = task.trim().split(/\s+/);
const issueNumbers = parseIssueNumbers(tokens);
if (issueNumbers.length === 0) {
return task;
}
const ghStatus = checkGhCli();
if (!ghStatus.available) {
throw new Error(ghStatus.error ?? 'gh CLI is not available');
}
log.info('Resolving issue references', { issueNumbers });
const issues = issueNumbers.map((n) => fetchIssue(n));
return issues.map(formatIssueAsTask).join('\n\n---\n\n');
}

View File

@ -10,17 +10,17 @@ import type {
WorkflowState, WorkflowState,
WorkflowStep, WorkflowStep,
AgentResponse, AgentResponse,
RuleMatchMethod,
} from '../models/types.js'; } from '../models/types.js';
import { runAgent, type RunAgentOptions } from '../agents/runner.js'; import { runAgent, type RunAgentOptions } from '../agents/runner.js';
import { COMPLETE_STEP, ABORT_STEP, ERROR_MESSAGES } from './constants.js'; import { COMPLETE_STEP, ABORT_STEP, ERROR_MESSAGES } from './constants.js';
import type { WorkflowEngineOptions } from './types.js'; import type { WorkflowEngineOptions } from './types.js';
import { determineNextStepByRules } from './transitions.js'; import { determineNextStepByRules } from './transitions.js';
import { detectRuleIndex, callAiJudge } from '../claude/client.js'; import { buildInstruction as buildInstructionFromTemplate, isReportObjectConfig } from './instruction-builder.js';
import { buildInstruction as buildInstructionFromTemplate, buildReportInstruction as buildReportInstructionFromTemplate, buildStatusJudgmentInstruction as buildStatusJudgmentInstructionFromTemplate, isReportObjectConfig } from './instruction-builder.js';
import { LoopDetector } from './loop-detector.js'; import { LoopDetector } from './loop-detector.js';
import { handleBlocked } from './blocked-handler.js'; import { handleBlocked } from './blocked-handler.js';
import { ParallelLogger } from './parallel-logger.js'; import { ParallelLogger } from './parallel-logger.js';
import { detectMatchedRule } from './rule-evaluator.js';
import { needsStatusJudgmentPhase, runReportPhase, runStatusJudgmentPhase } from './phase-runner.js';
import { import {
createInitialState, createInitialState,
addUserInput, addUserInput,
@ -206,18 +206,11 @@ export class WorkflowEngine extends EventEmitter {
return this.runNormalStep(step); return this.runNormalStep(step);
} }
/** Build RunAgentOptions from a step's configuration */ /** Build common RunAgentOptions shared by all phases */
private buildAgentOptions(step: WorkflowStep): RunAgentOptions { private buildBaseOptions(step: WorkflowStep): RunAgentOptions {
// Phase 1: exclude Write from allowedTools when step has report config
const allowedTools = step.report
? step.allowedTools?.filter((t) => t !== 'Write')
: step.allowedTools;
return { return {
cwd: this.cwd, cwd: this.cwd,
sessionId: this.state.agentSessions.get(step.agent),
agentPath: step.agentPath, agentPath: step.agentPath,
allowedTools,
provider: step.provider, provider: step.provider,
model: step.model, model: step.model,
permissionMode: step.permissionMode, permissionMode: step.permissionMode,
@ -228,24 +221,29 @@ export class WorkflowEngine extends EventEmitter {
}; };
} }
/** Build RunAgentOptions from a step's configuration (Phase 1) */
private buildAgentOptions(step: WorkflowStep): RunAgentOptions {
// Phase 1: exclude Write from allowedTools when step has report config
const allowedTools = step.report
? step.allowedTools?.filter((t) => t !== 'Write')
: step.allowedTools;
return {
...this.buildBaseOptions(step),
sessionId: this.state.agentSessions.get(step.agent),
allowedTools,
};
}
/** /**
* Build RunAgentOptions for session-resume phases (Phase 2, Phase 3). * Build RunAgentOptions for session-resume phases (Phase 2, Phase 3).
* Shares common fields with the original step's agent config.
*/ */
private buildResumeOptions(step: WorkflowStep, sessionId: string, overrides: Pick<RunAgentOptions, 'allowedTools' | 'maxTurns'>): RunAgentOptions { private buildResumeOptions(step: WorkflowStep, sessionId: string, overrides: Pick<RunAgentOptions, 'allowedTools' | 'maxTurns'>): RunAgentOptions {
return { return {
cwd: this.cwd, ...this.buildBaseOptions(step),
sessionId, sessionId,
agentPath: step.agentPath,
allowedTools: overrides.allowedTools, allowedTools: overrides.allowedTools,
maxTurns: overrides.maxTurns, maxTurns: overrides.maxTurns,
provider: step.provider,
model: step.model,
permissionMode: step.permissionMode,
onStream: this.options.onStream,
onPermissionRequest: this.options.onPermissionRequest,
onAskUserQuestion: this.options.onAskUserQuestion,
bypassPermissions: this.options.bypassPermissions,
}; };
} }
@ -261,114 +259,16 @@ export class WorkflowEngine extends EventEmitter {
} }
} }
/** /** Build phase runner context for Phase 2/3 execution */
* Detect matched rule for a step's response. private buildPhaseRunnerContext() {
* Evaluation order (first match wins): return {
* 1. Aggregate conditions: all()/any() evaluate sub-step results cwd: this.cwd,
* 2. Tag detection from Phase 3 output reportDir: this.reportDir,
* 3. Tag detection from Phase 1 output (fallback) language: this.language,
* 4. ai() condition evaluation via AI judge getSessionId: (agent: string) => this.state.agentSessions.get(agent),
* 5. All-conditions AI judge (final fallback) buildResumeOptions: this.buildResumeOptions.bind(this),
* updateAgentSession: this.updateAgentSession.bind(this),
* Returns undefined for steps without rules. };
* Throws if rules exist but no rule matched (Fail Fast).
*
* @param step - The workflow step
* @param agentContent - Phase 1 output (main execution)
* @param tagContent - Phase 3 output (status judgment); empty string skips tag detection
*/
private async detectMatchedRule(step: WorkflowStep, agentContent: string, tagContent: string): Promise<{ index: number; method: RuleMatchMethod } | undefined> {
if (!step.rules || step.rules.length === 0) return undefined;
// 1. Aggregate conditions (all/any) — only meaningful for parallel parent steps
const aggIndex = this.evaluateAggregateConditions(step);
if (aggIndex >= 0) {
return { index: aggIndex, method: 'aggregate' };
}
// 2. Tag detection from Phase 3 output
if (tagContent) {
const ruleIndex = detectRuleIndex(tagContent, step.name);
if (ruleIndex >= 0 && ruleIndex < step.rules.length) {
return { index: ruleIndex, method: 'phase3_tag' };
}
}
// 3. Tag detection from Phase 1 output (fallback)
if (agentContent) {
const ruleIndex = detectRuleIndex(agentContent, step.name);
if (ruleIndex >= 0 && ruleIndex < step.rules.length) {
return { index: ruleIndex, method: 'phase1_tag' };
}
}
// 4. AI judge for ai() conditions only
const aiRuleIndex = await this.evaluateAiConditions(step, agentContent);
if (aiRuleIndex >= 0) {
return { index: aiRuleIndex, method: 'ai_judge' };
}
// 5. AI judge for all conditions (final fallback)
const fallbackIndex = await this.evaluateAllConditionsViaAiJudge(step, agentContent);
if (fallbackIndex >= 0) {
return { index: fallbackIndex, method: 'ai_judge_fallback' };
}
throw new Error(`Status not found for step "${step.name}": no rule matched after all detection phases`);
}
/**
* Evaluate aggregate conditions (all()/any()) against sub-step results.
* Returns the 0-based rule index in the step's rules array, or -1 if no match.
*
* For each aggregate rule, checks the matched condition text of sub-steps:
* - all("X"): true when ALL sub-steps have matched condition === X
* - any("X"): true when at least ONE sub-step has matched condition === X
*
* Edge cases per spec:
* - Sub-step with no matched rule: all() false, any() skip that sub-step
* - No sub-steps (0 ): both false
* - Non-parallel step: both false
*/
private evaluateAggregateConditions(step: WorkflowStep): number {
if (!step.rules || !step.parallel || step.parallel.length === 0) return -1;
for (let i = 0; i < step.rules.length; i++) {
const rule = step.rules[i]!;
if (!rule.isAggregateCondition || !rule.aggregateType || !rule.aggregateConditionText) {
continue;
}
const subSteps = step.parallel;
const targetCondition = rule.aggregateConditionText;
if (rule.aggregateType === 'all') {
const allMatch = subSteps.every((sub) => {
const output = this.state.stepOutputs.get(sub.name);
if (!output || output.matchedRuleIndex == null || !sub.rules) return false;
const matchedRule = sub.rules[output.matchedRuleIndex];
return matchedRule?.condition === targetCondition;
});
if (allMatch) {
log.debug('Aggregate all() matched', { step: step.name, condition: targetCondition, ruleIndex: i });
return i;
}
} else {
// 'any'
const anyMatch = subSteps.some((sub) => {
const output = this.state.stepOutputs.get(sub.name);
if (!output || output.matchedRuleIndex == null || !sub.rules) return false;
const matchedRule = sub.rules[output.matchedRuleIndex];
return matchedRule?.condition === targetCondition;
});
if (anyMatch) {
log.debug('Aggregate any() matched', { step: step.name, condition: targetCondition, ruleIndex: i });
return i;
}
}
}
return -1;
} }
/** Run a normal (non-parallel) step */ /** Run a normal (non-parallel) step */
@ -388,18 +288,23 @@ export class WorkflowEngine extends EventEmitter {
let response = await runAgent(step.agent, instruction, agentOptions); let response = await runAgent(step.agent, instruction, agentOptions);
this.updateAgentSession(step.agent, response.sessionId); this.updateAgentSession(step.agent, response.sessionId);
const phaseCtx = this.buildPhaseRunnerContext();
// Phase 2: report output (resume same session, Write only) // Phase 2: report output (resume same session, Write only)
if (step.report) { if (step.report) {
await this.runReportPhase(step, stepIteration); await runReportPhase(step, stepIteration, phaseCtx);
} }
// Phase 3: status judgment (resume session, no tools, output status tag) // Phase 3: status judgment (resume session, no tools, output status tag)
let tagContent = ''; let tagContent = '';
if (this.needsStatusJudgmentPhase(step)) { if (needsStatusJudgmentPhase(step)) {
tagContent = await this.runStatusJudgmentPhase(step); tagContent = await runStatusJudgmentPhase(step, phaseCtx);
} }
const match = await this.detectMatchedRule(step, response.content, tagContent); const match = await detectMatchedRule(step, response.content, tagContent, {
state: this.state,
cwd: this.cwd,
});
if (match) { if (match) {
log.debug('Rule matched', { step: step.name, ruleIndex: match.index, method: match.method }); log.debug('Rule matched', { step: step.name, ruleIndex: match.index, method: match.method });
response = { ...response, matchedRuleIndex: match.index, matchedRuleMethod: match.method }; response = { ...response, matchedRuleIndex: match.index, matchedRuleMethod: match.method };
@ -410,81 +315,6 @@ export class WorkflowEngine extends EventEmitter {
return { response, instruction }; return { response, instruction };
} }
/**
* Phase 2: Report output.
* Resumes the agent session with Write-only tools to output reports.
* The response is discarded only sessionId is updated.
*/
private async runReportPhase(step: WorkflowStep, stepIteration: number): Promise<void> {
const sessionId = this.state.agentSessions.get(step.agent);
if (!sessionId) {
throw new Error(`Report phase requires a session to resume, but no sessionId found for agent "${step.agent}" in step "${step.name}"`);
}
log.debug('Running report phase', { step: step.name, sessionId });
const reportInstruction = buildReportInstructionFromTemplate(step, {
cwd: this.cwd,
reportDir: this.reportDir,
stepIteration,
language: this.language,
});
const reportOptions = this.buildResumeOptions(step, sessionId, {
allowedTools: ['Write'],
maxTurns: 3,
});
const reportResponse = await runAgent(step.agent, reportInstruction, reportOptions);
// Update session (phase 2 may update it)
this.updateAgentSession(step.agent, reportResponse.sessionId);
log.debug('Report phase complete', { step: step.name, status: reportResponse.status });
}
/**
* Check if a step needs Phase 3 (status judgment).
* Returns true when at least one rule requires tag-based detection
* (i.e., not all rules are ai() or aggregate conditions).
*/
private needsStatusJudgmentPhase(step: WorkflowStep): boolean {
if (!step.rules || step.rules.length === 0) return false;
const allNonTagConditions = step.rules.every((r) => r.isAiCondition || r.isAggregateCondition);
return !allNonTagConditions;
}
/**
* Phase 3: Status judgment.
* Resumes the agent session with no tools to ask the agent to output a status tag.
* Returns the Phase 3 response content (containing the status tag).
*/
private async runStatusJudgmentPhase(step: WorkflowStep): Promise<string> {
const sessionId = this.state.agentSessions.get(step.agent);
if (!sessionId) {
throw new Error(`Status judgment phase requires a session to resume, but no sessionId found for agent "${step.agent}" in step "${step.name}"`);
}
log.debug('Running status judgment phase', { step: step.name, sessionId });
const judgmentInstruction = buildStatusJudgmentInstructionFromTemplate(step, {
language: this.language,
});
const judgmentOptions = this.buildResumeOptions(step, sessionId, {
allowedTools: [],
maxTurns: 3,
});
const judgmentResponse = await runAgent(step.agent, judgmentInstruction, judgmentOptions);
// Update session (phase 3 may update it)
this.updateAgentSession(step.agent, judgmentResponse.sessionId);
log.debug('Status judgment phase complete', { step: step.name, status: judgmentResponse.status });
return judgmentResponse.content;
}
/** /**
* Run a parallel step: execute all sub-steps concurrently, then aggregate results. * Run a parallel step: execute all sub-steps concurrently, then aggregate results.
* The aggregated output becomes the parent step's response for rules evaluation. * The aggregated output becomes the parent step's response for rules evaluation.
@ -509,6 +339,9 @@ export class WorkflowEngine extends EventEmitter {
}) })
: undefined; : undefined;
const phaseCtx = this.buildPhaseRunnerContext();
const ruleCtx = { state: this.state, cwd: this.cwd };
// Run all sub-steps concurrently // Run all sub-steps concurrently
const subResults = await Promise.all( const subResults = await Promise.all(
subSteps.map(async (subStep, index) => { subSteps.map(async (subStep, index) => {
@ -516,28 +349,28 @@ export class WorkflowEngine extends EventEmitter {
const subInstruction = this.buildInstruction(subStep, subIteration); const subInstruction = this.buildInstruction(subStep, subIteration);
// Phase 1: main execution (Write excluded if sub-step has report) // Phase 1: main execution (Write excluded if sub-step has report)
const agentOptions = this.buildAgentOptions(subStep); const baseOptions = this.buildAgentOptions(subStep);
// Override onStream with parallel logger's prefixed handler // Override onStream with parallel logger's prefixed handler (immutable)
if (parallelLogger) { const agentOptions = parallelLogger
agentOptions.onStream = parallelLogger.createStreamHandler(subStep.name, index); ? { ...baseOptions, onStream: parallelLogger.createStreamHandler(subStep.name, index) }
} : baseOptions;
const subResponse = await runAgent(subStep.agent, subInstruction, agentOptions); const subResponse = await runAgent(subStep.agent, subInstruction, agentOptions);
this.updateAgentSession(subStep.agent, subResponse.sessionId); this.updateAgentSession(subStep.agent, subResponse.sessionId);
// Phase 2: report output for sub-step // Phase 2: report output for sub-step
if (subStep.report) { if (subStep.report) {
await this.runReportPhase(subStep, subIteration); await runReportPhase(subStep, subIteration, phaseCtx);
} }
// Phase 3: status judgment for sub-step // Phase 3: status judgment for sub-step
let subTagContent = ''; let subTagContent = '';
if (this.needsStatusJudgmentPhase(subStep)) { if (needsStatusJudgmentPhase(subStep)) {
subTagContent = await this.runStatusJudgmentPhase(subStep); subTagContent = await runStatusJudgmentPhase(subStep, phaseCtx);
} }
const match = await this.detectMatchedRule(subStep, subResponse.content, subTagContent); const match = await detectMatchedRule(subStep, subResponse.content, subTagContent, ruleCtx);
const finalResponse = match const finalResponse = match
? { ...subResponse, matchedRuleIndex: match.index, matchedRuleMethod: match.method } ? { ...subResponse, matchedRuleIndex: match.index, matchedRuleMethod: match.method }
: subResponse; : subResponse;
@ -572,7 +405,7 @@ export class WorkflowEngine extends EventEmitter {
.join('\n\n'); .join('\n\n');
// Parent step uses aggregate conditions, so tagContent is empty // Parent step uses aggregate conditions, so tagContent is empty
const match = await this.detectMatchedRule(step, aggregatedContent, ''); const match = await detectMatchedRule(step, aggregatedContent, '', ruleCtx);
const aggregatedResponse: AgentResponse = { const aggregatedResponse: AgentResponse = {
agent: step.name, agent: step.name,
@ -587,79 +420,6 @@ export class WorkflowEngine extends EventEmitter {
return { response: aggregatedResponse, instruction: aggregatedInstruction }; return { response: aggregatedResponse, instruction: aggregatedInstruction };
} }
/**
* Evaluate ai() conditions via AI judge.
* Collects all ai() rules, calls the judge, and maps the result back to the original rule index.
* Returns the 0-based rule index in the step's rules array, or -1 if no match.
*/
private async evaluateAiConditions(step: WorkflowStep, agentOutput: string): Promise<number> {
if (!step.rules) return -1;
const aiConditions: { index: number; text: string }[] = [];
for (let i = 0; i < step.rules.length; i++) {
const rule = step.rules[i]!;
if (rule.isAiCondition && rule.aiConditionText) {
aiConditions.push({ index: i, text: rule.aiConditionText });
}
}
if (aiConditions.length === 0) return -1;
log.debug('Evaluating ai() conditions via judge', {
step: step.name,
conditionCount: aiConditions.length,
});
// Remap: judge returns 0-based index within aiConditions array
const judgeConditions = aiConditions.map((c, i) => ({ index: i, text: c.text }));
const judgeResult = await callAiJudge(agentOutput, judgeConditions, { cwd: this.cwd });
if (judgeResult >= 0 && judgeResult < aiConditions.length) {
const matched = aiConditions[judgeResult]!;
log.debug('AI judge matched condition', {
step: step.name,
judgeResult,
originalRuleIndex: matched.index,
condition: matched.text,
});
return matched.index;
}
log.debug('AI judge did not match any condition', { step: step.name });
return -1;
}
/**
* Final fallback: evaluate ALL rule conditions via AI judge.
* Unlike evaluateAiConditions (which only handles ai() flagged rules),
* this sends every rule's condition text to the judge.
* Returns the 0-based rule index, or -1 if no match.
*/
private async evaluateAllConditionsViaAiJudge(step: WorkflowStep, agentOutput: string): Promise<number> {
if (!step.rules || step.rules.length === 0) return -1;
const conditions = step.rules.map((rule, i) => ({ index: i, text: rule.condition }));
log.debug('Evaluating all conditions via AI judge (final fallback)', {
step: step.name,
conditionCount: conditions.length,
});
const judgeResult = await callAiJudge(agentOutput, conditions, { cwd: this.cwd });
if (judgeResult >= 0 && judgeResult < conditions.length) {
log.debug('AI judge (fallback) matched condition', {
step: step.name,
ruleIndex: judgeResult,
condition: conditions[judgeResult]!.text,
});
return judgeResult;
}
log.debug('AI judge (fallback) did not match any condition', { step: step.name });
return -1;
}
/** /**
* Determine next step for a completed step using rules-based routing. * Determine next step for a completed step using rules-based routing.
*/ */

View File

@ -12,6 +12,7 @@
*/ */
import type { WorkflowStep, WorkflowRule, AgentResponse, Language, ReportConfig, ReportObjectConfig } from '../models/types.js'; import type { WorkflowStep, WorkflowRule, AgentResponse, Language, ReportConfig, ReportObjectConfig } from '../models/types.js';
import { hasTagBasedRules } from './rule-utils.js';
/** /**
@ -465,13 +466,10 @@ export function buildInstruction(
// 7. Status Output Rules (for tag-based detection in Phase 1) // 7. Status Output Rules (for tag-based detection in Phase 1)
// Skip if all rules are ai() or aggregate conditions (no tags needed) // Skip if all rules are ai() or aggregate conditions (no tags needed)
if (step.rules && step.rules.length > 0) { if (hasTagBasedRules(step)) {
const allNonTagConditions = step.rules.every((r) => r.isAiCondition || r.isAggregateCondition); const statusRulesPrompt = generateStatusRulesFromRules(step.name, step.rules!, language);
if (!allNonTagConditions) {
const statusRulesPrompt = generateStatusRulesFromRules(step.name, step.rules, language);
sections.push(statusRulesPrompt); sections.push(statusRulesPrompt);
} }
}
return sections.join('\n\n'); return sections.join('\n\n');
} }

View File

@ -0,0 +1,111 @@
/**
* Phase execution logic extracted from engine.ts.
*
* Handles Phase 2 (report output) and Phase 3 (status judgment)
* as session-resume operations.
*/
import type { WorkflowStep, Language } from '../models/types.js';
import { runAgent, type RunAgentOptions } from '../agents/runner.js';
import {
buildReportInstruction as buildReportInstructionFromTemplate,
buildStatusJudgmentInstruction as buildStatusJudgmentInstructionFromTemplate,
} from './instruction-builder.js';
import { hasTagBasedRules } from './rule-utils.js';
import { createLogger } from '../utils/debug.js';
const log = createLogger('phase-runner');
export interface PhaseRunnerContext {
/** Working directory (agent work dir, may be a clone) */
cwd: string;
/** Report directory path */
reportDir: string;
/** Language for instructions */
language?: Language;
/** Get agent session ID */
getSessionId: (agent: string) => string | undefined;
/** Build resume options for a step */
buildResumeOptions: (step: WorkflowStep, sessionId: string, overrides: Pick<RunAgentOptions, 'allowedTools' | 'maxTurns'>) => RunAgentOptions;
/** Update agent session after a phase run */
updateAgentSession: (agent: string, sessionId: string | undefined) => void;
}
/**
* Check if a step needs Phase 3 (status judgment).
* Returns true when at least one rule requires tag-based detection.
*/
export function needsStatusJudgmentPhase(step: WorkflowStep): boolean {
return hasTagBasedRules(step);
}
/**
* Phase 2: Report output.
* Resumes the agent session with Write-only tools to output reports.
* The response is discarded only sessionId is updated.
*/
export async function runReportPhase(
step: WorkflowStep,
stepIteration: number,
ctx: PhaseRunnerContext,
): Promise<void> {
const sessionId = ctx.getSessionId(step.agent);
if (!sessionId) {
throw new Error(`Report phase requires a session to resume, but no sessionId found for agent "${step.agent}" in step "${step.name}"`);
}
log.debug('Running report phase', { step: step.name, sessionId });
const reportInstruction = buildReportInstructionFromTemplate(step, {
cwd: ctx.cwd,
reportDir: ctx.reportDir,
stepIteration,
language: ctx.language,
});
const reportOptions = ctx.buildResumeOptions(step, sessionId, {
allowedTools: ['Write'],
maxTurns: 3,
});
const reportResponse = await runAgent(step.agent, reportInstruction, reportOptions);
// Update session (phase 2 may update it)
ctx.updateAgentSession(step.agent, reportResponse.sessionId);
log.debug('Report phase complete', { step: step.name, status: reportResponse.status });
}
/**
* Phase 3: Status judgment.
* Resumes the agent session with no tools to ask the agent to output a status tag.
* Returns the Phase 3 response content (containing the status tag).
*/
export async function runStatusJudgmentPhase(
step: WorkflowStep,
ctx: PhaseRunnerContext,
): Promise<string> {
const sessionId = ctx.getSessionId(step.agent);
if (!sessionId) {
throw new Error(`Status judgment phase requires a session to resume, but no sessionId found for agent "${step.agent}" in step "${step.name}"`);
}
log.debug('Running status judgment phase', { step: step.name, sessionId });
const judgmentInstruction = buildStatusJudgmentInstructionFromTemplate(step, {
language: ctx.language,
});
const judgmentOptions = ctx.buildResumeOptions(step, sessionId, {
allowedTools: [],
maxTurns: 3,
});
const judgmentResponse = await runAgent(step.agent, judgmentInstruction, judgmentOptions);
// Update session (phase 3 may update it)
ctx.updateAgentSession(step.agent, judgmentResponse.sessionId);
log.debug('Status judgment phase complete', { step: step.name, status: judgmentResponse.status });
return judgmentResponse.content;
}

View File

@ -0,0 +1,218 @@
/**
* Rule evaluation logic extracted from engine.ts.
*
* Evaluates workflow step rules to determine the matched rule index.
* Supports tag-based detection, ai() conditions, aggregate conditions,
* and AI judge fallback.
*/
import type {
WorkflowStep,
WorkflowState,
RuleMatchMethod,
} from '../models/types.js';
import { detectRuleIndex, callAiJudge } from '../claude/client.js';
import { createLogger } from '../utils/debug.js';
const log = createLogger('rule-evaluator');
export interface RuleMatch {
index: number;
method: RuleMatchMethod;
}
export interface RuleEvaluatorContext {
/** Workflow state (for accessing stepOutputs in aggregate evaluation) */
state: WorkflowState;
/** Working directory (for AI judge calls) */
cwd: string;
}
/**
* Detect matched rule for a step's response.
* Evaluation order (first match wins):
* 1. Aggregate conditions: all()/any() evaluate sub-step results
* 2. Tag detection from Phase 3 output
* 3. Tag detection from Phase 1 output (fallback)
* 4. ai() condition evaluation via AI judge
* 5. All-conditions AI judge (final fallback)
*
* Returns undefined for steps without rules.
* Throws if rules exist but no rule matched (Fail Fast).
*
* @param step - The workflow step
* @param agentContent - Phase 1 output (main execution)
* @param tagContent - Phase 3 output (status judgment); empty string skips tag detection
* @param ctx - Evaluation context (state, cwd)
*/
export async function detectMatchedRule(
step: WorkflowStep,
agentContent: string,
tagContent: string,
ctx: RuleEvaluatorContext,
): Promise<RuleMatch | undefined> {
if (!step.rules || step.rules.length === 0) return undefined;
// 1. Aggregate conditions (all/any) — only meaningful for parallel parent steps
const aggIndex = evaluateAggregateConditions(step, ctx.state);
if (aggIndex >= 0) {
return { index: aggIndex, method: 'aggregate' };
}
// 2. Tag detection from Phase 3 output
if (tagContent) {
const ruleIndex = detectRuleIndex(tagContent, step.name);
if (ruleIndex >= 0 && ruleIndex < step.rules.length) {
return { index: ruleIndex, method: 'phase3_tag' };
}
}
// 3. Tag detection from Phase 1 output (fallback)
if (agentContent) {
const ruleIndex = detectRuleIndex(agentContent, step.name);
if (ruleIndex >= 0 && ruleIndex < step.rules.length) {
return { index: ruleIndex, method: 'phase1_tag' };
}
}
// 4. AI judge for ai() conditions only
const aiRuleIndex = await evaluateAiConditions(step, agentContent, ctx.cwd);
if (aiRuleIndex >= 0) {
return { index: aiRuleIndex, method: 'ai_judge' };
}
// 5. AI judge for all conditions (final fallback)
const fallbackIndex = await evaluateAllConditionsViaAiJudge(step, agentContent, ctx.cwd);
if (fallbackIndex >= 0) {
return { index: fallbackIndex, method: 'ai_judge_fallback' };
}
throw new Error(`Status not found for step "${step.name}": no rule matched after all detection phases`);
}
/**
* Evaluate aggregate conditions (all()/any()) against sub-step results.
* Returns the 0-based rule index in the step's rules array, or -1 if no match.
*
* For each aggregate rule, checks the matched condition text of sub-steps:
* - all("X"): true when ALL sub-steps have matched condition === X
* - any("X"): true when at least ONE sub-step has matched condition === X
*
* Edge cases per spec:
* - Sub-step with no matched rule: all() false, any() skip that sub-step
* - No sub-steps (0 ): both false
* - Non-parallel step: both false
*/
export function evaluateAggregateConditions(step: WorkflowStep, state: WorkflowState): number {
if (!step.rules || !step.parallel || step.parallel.length === 0) return -1;
for (let i = 0; i < step.rules.length; i++) {
const rule = step.rules[i]!;
if (!rule.isAggregateCondition || !rule.aggregateType || !rule.aggregateConditionText) {
continue;
}
const subSteps = step.parallel;
const targetCondition = rule.aggregateConditionText;
if (rule.aggregateType === 'all') {
const allMatch = subSteps.every((sub) => {
const output = state.stepOutputs.get(sub.name);
if (!output || output.matchedRuleIndex == null || !sub.rules) return false;
const matchedRule = sub.rules[output.matchedRuleIndex];
return matchedRule?.condition === targetCondition;
});
if (allMatch) {
log.debug('Aggregate all() matched', { step: step.name, condition: targetCondition, ruleIndex: i });
return i;
}
} else {
// 'any'
const anyMatch = subSteps.some((sub) => {
const output = state.stepOutputs.get(sub.name);
if (!output || output.matchedRuleIndex == null || !sub.rules) return false;
const matchedRule = sub.rules[output.matchedRuleIndex];
return matchedRule?.condition === targetCondition;
});
if (anyMatch) {
log.debug('Aggregate any() matched', { step: step.name, condition: targetCondition, ruleIndex: i });
return i;
}
}
}
return -1;
}
/**
* Evaluate ai() conditions via AI judge.
* Collects all ai() rules, calls the judge, and maps the result back to the original rule index.
* Returns the 0-based rule index in the step's rules array, or -1 if no match.
*/
export async function evaluateAiConditions(step: WorkflowStep, agentOutput: string, cwd: string): Promise<number> {
if (!step.rules) return -1;
const aiConditions: { index: number; text: string }[] = [];
for (let i = 0; i < step.rules.length; i++) {
const rule = step.rules[i]!;
if (rule.isAiCondition && rule.aiConditionText) {
aiConditions.push({ index: i, text: rule.aiConditionText });
}
}
if (aiConditions.length === 0) return -1;
log.debug('Evaluating ai() conditions via judge', {
step: step.name,
conditionCount: aiConditions.length,
});
// Remap: judge returns 0-based index within aiConditions array
const judgeConditions = aiConditions.map((c, i) => ({ index: i, text: c.text }));
const judgeResult = await callAiJudge(agentOutput, judgeConditions, { cwd });
if (judgeResult >= 0 && judgeResult < aiConditions.length) {
const matched = aiConditions[judgeResult]!;
log.debug('AI judge matched condition', {
step: step.name,
judgeResult,
originalRuleIndex: matched.index,
condition: matched.text,
});
return matched.index;
}
log.debug('AI judge did not match any condition', { step: step.name });
return -1;
}
/**
* Final fallback: evaluate ALL rule conditions via AI judge.
* Unlike evaluateAiConditions (which only handles ai() flagged rules),
* this sends every rule's condition text to the judge.
* Returns the 0-based rule index, or -1 if no match.
*/
export async function evaluateAllConditionsViaAiJudge(step: WorkflowStep, agentOutput: string, cwd: string): Promise<number> {
if (!step.rules || step.rules.length === 0) return -1;
const conditions = step.rules.map((rule, i) => ({ index: i, text: rule.condition }));
log.debug('Evaluating all conditions via AI judge (final fallback)', {
step: step.name,
conditionCount: conditions.length,
});
const judgeResult = await callAiJudge(agentOutput, conditions, { cwd });
if (judgeResult >= 0 && judgeResult < conditions.length) {
log.debug('AI judge (fallback) matched condition', {
step: step.name,
ruleIndex: judgeResult,
condition: conditions[judgeResult]!.text,
});
return judgeResult;
}
log.debug('AI judge (fallback) did not match any condition', { step: step.name });
return -1;
}

View File

@ -0,0 +1,18 @@
/**
* Shared rule utility functions used by both engine.ts and instruction-builder.ts.
*/
import type { WorkflowStep } from '../models/types.js';
/**
* Check whether a step has tag-based rules (i.e., rules that require
* [STEP:N] tag output for detection).
*
* Returns false when all rules are ai() or aggregate conditions,
* meaning no tag-based status output is needed.
*/
export function hasTagBasedRules(step: WorkflowStep): boolean {
if (!step.rules || step.rules.length === 0) return false;
const allNonTagConditions = step.rules.every((r) => r.isAiCondition || r.isAggregateCondition);
return !allNonTagConditions;
}