From 84ef8fbaf72350a4e47b04a849527452bb3de05d Mon Sep 17 00:00:00 2001 From: nrslib <38722970+nrslib@users.noreply.github.com> Date: Fri, 30 Jan 2026 18:49:55 +0900 Subject: [PATCH] =?UTF-8?q?feat:=20GitHub=20Issue=E7=95=AA=E5=8F=B7?= =?UTF-8?q?=E3=81=A7=E3=82=BF=E3=82=B9=E3=82=AF=E5=AE=9F=E8=A1=8C=E3=83=BB?= =?UTF-8?q?=E8=BF=BD=E5=8A=A0=E3=82=92=E5=8F=AF=E8=83=BD=E3=81=AB=E3=81=97?= =?UTF-8?q?=E3=80=81=E3=82=A8=E3=83=B3=E3=82=B8=E3=83=B3=E3=82=92=E3=83=AA?= =?UTF-8?q?=E3=83=95=E3=82=A1=E3=82=AF=E3=82=BF=E3=83=AA=E3=83=B3=E3=82=B0?= =?UTF-8?q?=20(#10)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - takt "#6" や /add-task "#6" "#7" でIssue内容をタスクとして実行可能に - gh CLI経由でIssue取得、タイトル・本文・ラベル・コメントをタスクテキストに変換 - engine.tsからフェーズ実行(phase-runner)、ルール評価(rule-evaluator)、 ルールユーティリティ(rule-utils)を分離 --- src/__tests__/github-issue.test.ts | 165 +++++++++++++ src/cli.ts | 21 +- src/commands/addTask.ts | 17 +- src/commands/help.ts | 5 + src/github/issue.ts | 183 +++++++++++++++ src/workflow/engine.ts | 344 +++++----------------------- src/workflow/instruction-builder.ts | 10 +- src/workflow/phase-runner.ts | 111 +++++++++ src/workflow/rule-evaluator.ts | 218 ++++++++++++++++++ src/workflow/rule-utils.ts | 18 ++ 10 files changed, 788 insertions(+), 304 deletions(-) create mode 100644 src/__tests__/github-issue.test.ts create mode 100644 src/github/issue.ts create mode 100644 src/workflow/phase-runner.ts create mode 100644 src/workflow/rule-evaluator.ts create mode 100644 src/workflow/rule-utils.ts diff --git a/src/__tests__/github-issue.test.ts b/src/__tests__/github-issue.test.ts new file mode 100644 index 0000000..671736d --- /dev/null +++ b/src/__tests__/github-issue.test.ts @@ -0,0 +1,165 @@ +/** + * Tests for github/issue module + * + * Tests parsing and formatting functions. + * checkGhCli/fetchIssue/resolveIssueTask are integration functions + * that call `gh` CLI, so they are not unit-tested here. + */ + +import { describe, it, expect } from 'vitest'; +import { + parseIssueNumbers, + isIssueReference, + formatIssueAsTask, + type GitHubIssue, +} from '../github/issue.js'; + +describe('parseIssueNumbers', () => { + it('should parse single issue reference', () => { + expect(parseIssueNumbers(['#6'])).toEqual([6]); + }); + + it('should parse multiple issue references', () => { + expect(parseIssueNumbers(['#6', '#7'])).toEqual([6, 7]); + }); + + it('should parse large issue numbers', () => { + expect(parseIssueNumbers(['#123'])).toEqual([123]); + }); + + it('should return empty for non-issue args', () => { + expect(parseIssueNumbers(['Fix bug'])).toEqual([]); + }); + + it('should return empty when mixed issue and non-issue args', () => { + expect(parseIssueNumbers(['#6', 'and', '#7'])).toEqual([]); + }); + + it('should return empty for empty args', () => { + expect(parseIssueNumbers([])).toEqual([]); + }); + + it('should not match partial issue patterns', () => { + expect(parseIssueNumbers(['#abc'])).toEqual([]); + expect(parseIssueNumbers(['#'])).toEqual([]); + expect(parseIssueNumbers(['##6'])).toEqual([]); + expect(parseIssueNumbers(['6'])).toEqual([]); + expect(parseIssueNumbers(['issue#6'])).toEqual([]); + }); + + it('should handle #0', () => { + expect(parseIssueNumbers(['#0'])).toEqual([0]); + }); +}); + +describe('isIssueReference', () => { + it('should return true for #N patterns', () => { + expect(isIssueReference('#6')).toBe(true); + expect(isIssueReference('#123')).toBe(true); + }); + + it('should return true with whitespace trim', () => { + expect(isIssueReference(' #6 ')).toBe(true); + }); + + it('should return false for non-issue text', () => { + expect(isIssueReference('Fix bug')).toBe(false); + expect(isIssueReference('#abc')).toBe(false); + expect(isIssueReference('')).toBe(false); + expect(isIssueReference('#')).toBe(false); + expect(isIssueReference('6')).toBe(false); + }); + + it('should return false for multiple issues (single string)', () => { + expect(isIssueReference('#6 #7')).toBe(false); + }); +}); + +describe('formatIssueAsTask', () => { + it('should format issue with all fields', () => { + const issue: GitHubIssue = { + number: 6, + title: 'Fix authentication bug', + body: 'The login flow is broken.', + labels: ['bug', 'priority:high'], + comments: [ + { author: 'user1', body: 'I can reproduce this.' }, + { author: 'user2', body: 'Fixed in PR #7.' }, + ], + }; + + const result = formatIssueAsTask(issue); + + expect(result).toContain('## GitHub Issue #6: Fix authentication bug'); + expect(result).toContain('The login flow is broken.'); + expect(result).toContain('### Labels'); + expect(result).toContain('bug, priority:high'); + expect(result).toContain('### Comments'); + expect(result).toContain('**user1**: I can reproduce this.'); + expect(result).toContain('**user2**: Fixed in PR #7.'); + }); + + it('should format issue with no body', () => { + const issue: GitHubIssue = { + number: 10, + title: 'Empty issue', + body: '', + labels: [], + comments: [], + }; + + const result = formatIssueAsTask(issue); + + expect(result).toBe('## GitHub Issue #10: Empty issue'); + expect(result).not.toContain('### Labels'); + expect(result).not.toContain('### Comments'); + }); + + it('should format issue with labels but no comments', () => { + const issue: GitHubIssue = { + number: 5, + title: 'Feature request', + body: 'Add dark mode.', + labels: ['enhancement'], + comments: [], + }; + + const result = formatIssueAsTask(issue); + + expect(result).toContain('### Labels'); + expect(result).toContain('enhancement'); + expect(result).not.toContain('### Comments'); + }); + + it('should format issue with comments but no labels', () => { + const issue: GitHubIssue = { + number: 3, + title: 'Discussion', + body: 'Thoughts?', + labels: [], + comments: [ + { author: 'dev', body: 'LGTM' }, + ], + }; + + const result = formatIssueAsTask(issue); + + expect(result).not.toContain('### Labels'); + expect(result).toContain('### Comments'); + expect(result).toContain('**dev**: LGTM'); + }); + + it('should handle multiline body', () => { + const issue: GitHubIssue = { + number: 1, + title: 'Multi-line', + body: 'Line 1\nLine 2\n\nLine 4', + labels: [], + comments: [], + }; + + const result = formatIssueAsTask(issue); + + expect(result).toContain('Line 1\nLine 2\n\nLine 4'); + }); +}); diff --git a/src/cli.ts b/src/cli.ts index be07349..a73974f 100644 --- a/src/cli.ts +++ b/src/cli.ts @@ -42,6 +42,7 @@ import { autoCommitAndPush } from './task/autoCommit.js'; import { summarizeTaskName } from './task/summarize.js'; import { DEFAULT_WORKFLOW_NAME } from './constants.js'; import { checkForUpdates } from './utils/updateNotifier.js'; +import { resolveIssueTask, isIssueReference } from './github/issue.js'; const require = createRequire(import.meta.url); const { version: cliVersion } = require('../package.json') as { version: string }; @@ -188,6 +189,18 @@ program // Task execution if (task) { + // Resolve #N issue references to task text + let resolvedTask: string = task; + if (isIssueReference(task) || task.trim().split(/\s+/).every((t: string) => isIssueReference(t))) { + try { + info('Fetching GitHub Issue...'); + resolvedTask = resolveIssueTask(task); + } catch (e) { + error(e instanceof Error ? e.message : String(e)); + process.exit(1); + } + } + // Get available workflows and prompt user to select const availableWorkflows = listWorkflows(); const currentWorkflow = getCurrentWorkflow(cwd); @@ -230,13 +243,13 @@ program } // Ask whether to create a worktree - const { execCwd, isWorktree } = await confirmAndCreateWorktree(cwd, task); + const { execCwd, isWorktree } = await confirmAndCreateWorktree(cwd, resolvedTask); - log.info('Starting task execution', { task, workflow: selectedWorkflow, worktree: isWorktree }); - const taskSuccess = await executeTask(task, execCwd, selectedWorkflow, cwd); + log.info('Starting task execution', { task: resolvedTask, workflow: selectedWorkflow, worktree: isWorktree }); + const taskSuccess = await executeTask(resolvedTask, execCwd, selectedWorkflow, cwd); if (taskSuccess && isWorktree) { - const commitResult = autoCommitAndPush(execCwd, task, cwd); + const commitResult = autoCommitAndPush(execCwd, resolvedTask, cwd); if (commitResult.success && commitResult.commitHash) { success(`Auto-committed & pushed: ${commitResult.commitHash}`); } else if (!commitResult.success) { diff --git a/src/commands/addTask.ts b/src/commands/addTask.ts index 4efc153..cfb75bd 100644 --- a/src/commands/addTask.ts +++ b/src/commands/addTask.ts @@ -12,7 +12,9 @@ import { promptInput, promptMultilineInput, confirm, selectOption } from '../pro import { success, info } from '../utils/ui.js'; import { summarizeTaskName } from '../task/summarize.js'; import { createLogger } from '../utils/debug.js'; +import { error as errorLog } from '../utils/ui.js'; import { listWorkflows } from '../config/workflowLoader.js'; +import { parseIssueNumbers, resolveIssueTask } from '../github/issue.js'; import { getCurrentWorkflow } from '../config/paths.js'; import type { TaskFileData } from '../task/schema.js'; @@ -53,8 +55,19 @@ export async function addTask(cwd: string, args: string[]): Promise { let workflow: string | undefined; if (args.length > 0) { - // Argument mode: task content provided directly - taskContent = args.join(' '); + // Check if args are issue references (#N) + const issueNumbers = parseIssueNumbers(args); + if (issueNumbers.length > 0) { + try { + info('Fetching GitHub Issue...'); + taskContent = resolveIssueTask(args.join(' ')); + } catch (e) { + errorLog(e instanceof Error ? e.message : String(e)); + return; + } + } else { + taskContent = args.join(' '); + } } else { // Interactive mode (multiline: empty line to finish) const input = await promptMultilineInput('Task content'); diff --git a/src/commands/help.ts b/src/commands/help.ts index 66defef..a1ed9a8 100644 --- a/src/commands/help.ts +++ b/src/commands/help.ts @@ -14,6 +14,7 @@ export function showHelp(): void { console.log(` Usage: takt {task} Execute task with current workflow (continues session) + takt "#N" Execute GitHub Issue #N as task (quote # in shell) takt /run-tasks (/run) Run all pending tasks from .takt/tasks/ takt /watch Watch for tasks and auto-execute (stays resident) takt /add-task (/add) Add a new task (interactive, YAML format) @@ -25,6 +26,10 @@ Usage: Examples: takt "Fix the bug in main.ts" # Execute task (continues session) + takt "#6" # Execute Issue #6 as task + takt "#6 #7" # Execute multiple Issues as task + takt /add-task "#6" # Create task from Issue #6 + takt /add-task "#6" "#7" # Create task from multiple Issues takt /add-task "認証機能を追加する" # Quick add task takt /add-task # Interactive task creation takt /clear # Clear sessions, start fresh diff --git a/src/github/issue.ts b/src/github/issue.ts new file mode 100644 index 0000000..27ba148 --- /dev/null +++ b/src/github/issue.ts @@ -0,0 +1,183 @@ +/** + * GitHub Issue utilities + * + * Fetches issue content via `gh` CLI and formats it as task text + * for workflow execution or task creation. + */ + +import { execSync } from 'node:child_process'; +import { createLogger } from '../utils/debug.js'; + +const log = createLogger('github'); + +/** Regex to match `#N` patterns (issue numbers) */ +const ISSUE_NUMBER_REGEX = /^#(\d+)$/; + +export interface GitHubIssue { + number: number; + title: string; + body: string; + labels: string[]; + comments: Array<{ author: string; body: string }>; +} + +export interface GhCliStatus { + available: boolean; + error?: string; +} + +/** + * Check if `gh` CLI is available and authenticated. + */ +export function checkGhCli(): GhCliStatus { + try { + execSync('gh auth status', { stdio: 'pipe' }); + return { available: true }; + } catch { + try { + execSync('gh --version', { stdio: 'pipe' }); + return { + available: false, + error: 'gh CLI is installed but not authenticated. Run `gh auth login` first.', + }; + } catch { + return { + available: false, + error: 'gh CLI is not installed. Install it from https://cli.github.com/', + }; + } + } +} + +/** + * Fetch issue content via `gh issue view`. + * Throws on failure (issue not found, network error, etc.). + */ +export function fetchIssue(issueNumber: number): GitHubIssue { + log.debug('Fetching issue', { issueNumber }); + + const raw = execSync( + `gh issue view ${issueNumber} --json number,title,body,labels,comments`, + { encoding: 'utf-8', stdio: ['pipe', 'pipe', 'pipe'] }, + ); + + const data = JSON.parse(raw) as { + number: number; + title: string; + body: string; + labels: Array<{ name: string }>; + comments: Array<{ author: { login: string }; body: string }>; + }; + + return { + number: data.number, + title: data.title, + body: data.body ?? '', + labels: data.labels.map((l) => l.name), + comments: data.comments.map((c) => ({ + author: c.author.login, + body: c.body, + })), + }; +} + +/** + * Format a GitHub issue into task text for workflow execution. + * + * Output format: + * ``` + * ## GitHub Issue #6: Fix authentication bug + * + * {body} + * + * ### Labels + * bug, priority:high + * + * ### Comments + * **user1**: Comment body... + * ``` + */ +export function formatIssueAsTask(issue: GitHubIssue): string { + const parts: string[] = []; + + parts.push(`## GitHub Issue #${issue.number}: ${issue.title}`); + + if (issue.body) { + parts.push(''); + parts.push(issue.body); + } + + if (issue.labels.length > 0) { + parts.push(''); + parts.push('### Labels'); + parts.push(issue.labels.join(', ')); + } + + if (issue.comments.length > 0) { + parts.push(''); + parts.push('### Comments'); + for (const comment of issue.comments) { + parts.push(`**${comment.author}**: ${comment.body}`); + } + } + + return parts.join('\n'); +} + +/** + * Parse `#N` patterns from argument strings. + * Returns issue numbers found, or empty array if none. + * + * Each argument must be exactly `#N` (no mixed text). + * Examples: + * ['#6'] → [6] + * ['#6', '#7'] → [6, 7] + * ['Fix bug'] → [] + * ['#6', 'and', '#7'] → [] (mixed, not all are issue refs) + */ +export function parseIssueNumbers(args: string[]): number[] { + if (args.length === 0) return []; + + const numbers: number[] = []; + for (const arg of args) { + const match = arg.match(ISSUE_NUMBER_REGEX); + if (!match?.[1]) return []; // Not all args are issue refs + numbers.push(Number.parseInt(match[1], 10)); + } + + return numbers; +} + +/** + * Check if a single task string is an issue reference (`#N`). + */ +export function isIssueReference(task: string): boolean { + return ISSUE_NUMBER_REGEX.test(task.trim()); +} + +/** + * Resolve issue references in a task string. + * If task contains `#N` patterns (space-separated), fetches issues and returns formatted text. + * Otherwise returns the task string as-is. + * + * Checks gh CLI availability before fetching. + * Throws if gh CLI is not available or issue fetch fails. + */ +export function resolveIssueTask(task: string): string { + const tokens = task.trim().split(/\s+/); + const issueNumbers = parseIssueNumbers(tokens); + + if (issueNumbers.length === 0) { + return task; + } + + const ghStatus = checkGhCli(); + if (!ghStatus.available) { + throw new Error(ghStatus.error ?? 'gh CLI is not available'); + } + + log.info('Resolving issue references', { issueNumbers }); + + const issues = issueNumbers.map((n) => fetchIssue(n)); + return issues.map(formatIssueAsTask).join('\n\n---\n\n'); +} diff --git a/src/workflow/engine.ts b/src/workflow/engine.ts index e06fcd4..57cbd0e 100644 --- a/src/workflow/engine.ts +++ b/src/workflow/engine.ts @@ -10,17 +10,17 @@ import type { WorkflowState, WorkflowStep, AgentResponse, - RuleMatchMethod, } from '../models/types.js'; import { runAgent, type RunAgentOptions } from '../agents/runner.js'; import { COMPLETE_STEP, ABORT_STEP, ERROR_MESSAGES } from './constants.js'; import type { WorkflowEngineOptions } from './types.js'; import { determineNextStepByRules } from './transitions.js'; -import { detectRuleIndex, callAiJudge } from '../claude/client.js'; -import { buildInstruction as buildInstructionFromTemplate, buildReportInstruction as buildReportInstructionFromTemplate, buildStatusJudgmentInstruction as buildStatusJudgmentInstructionFromTemplate, isReportObjectConfig } from './instruction-builder.js'; +import { buildInstruction as buildInstructionFromTemplate, isReportObjectConfig } from './instruction-builder.js'; import { LoopDetector } from './loop-detector.js'; import { handleBlocked } from './blocked-handler.js'; import { ParallelLogger } from './parallel-logger.js'; +import { detectMatchedRule } from './rule-evaluator.js'; +import { needsStatusJudgmentPhase, runReportPhase, runStatusJudgmentPhase } from './phase-runner.js'; import { createInitialState, addUserInput, @@ -206,18 +206,11 @@ export class WorkflowEngine extends EventEmitter { return this.runNormalStep(step); } - /** Build RunAgentOptions from a step's configuration */ - private buildAgentOptions(step: WorkflowStep): RunAgentOptions { - // Phase 1: exclude Write from allowedTools when step has report config - const allowedTools = step.report - ? step.allowedTools?.filter((t) => t !== 'Write') - : step.allowedTools; - + /** Build common RunAgentOptions shared by all phases */ + private buildBaseOptions(step: WorkflowStep): RunAgentOptions { return { cwd: this.cwd, - sessionId: this.state.agentSessions.get(step.agent), agentPath: step.agentPath, - allowedTools, provider: step.provider, model: step.model, permissionMode: step.permissionMode, @@ -228,24 +221,29 @@ export class WorkflowEngine extends EventEmitter { }; } + /** Build RunAgentOptions from a step's configuration (Phase 1) */ + private buildAgentOptions(step: WorkflowStep): RunAgentOptions { + // Phase 1: exclude Write from allowedTools when step has report config + const allowedTools = step.report + ? step.allowedTools?.filter((t) => t !== 'Write') + : step.allowedTools; + + return { + ...this.buildBaseOptions(step), + sessionId: this.state.agentSessions.get(step.agent), + allowedTools, + }; + } + /** * Build RunAgentOptions for session-resume phases (Phase 2, Phase 3). - * Shares common fields with the original step's agent config. */ private buildResumeOptions(step: WorkflowStep, sessionId: string, overrides: Pick): RunAgentOptions { return { - cwd: this.cwd, + ...this.buildBaseOptions(step), sessionId, - agentPath: step.agentPath, allowedTools: overrides.allowedTools, maxTurns: overrides.maxTurns, - provider: step.provider, - model: step.model, - permissionMode: step.permissionMode, - onStream: this.options.onStream, - onPermissionRequest: this.options.onPermissionRequest, - onAskUserQuestion: this.options.onAskUserQuestion, - bypassPermissions: this.options.bypassPermissions, }; } @@ -261,114 +259,16 @@ export class WorkflowEngine extends EventEmitter { } } - /** - * Detect matched rule for a step's response. - * Evaluation order (first match wins): - * 1. Aggregate conditions: all()/any() — evaluate sub-step results - * 2. Tag detection from Phase 3 output - * 3. Tag detection from Phase 1 output (fallback) - * 4. ai() condition evaluation via AI judge - * 5. All-conditions AI judge (final fallback) - * - * Returns undefined for steps without rules. - * Throws if rules exist but no rule matched (Fail Fast). - * - * @param step - The workflow step - * @param agentContent - Phase 1 output (main execution) - * @param tagContent - Phase 3 output (status judgment); empty string skips tag detection - */ - private async detectMatchedRule(step: WorkflowStep, agentContent: string, tagContent: string): Promise<{ index: number; method: RuleMatchMethod } | undefined> { - if (!step.rules || step.rules.length === 0) return undefined; - - // 1. Aggregate conditions (all/any) — only meaningful for parallel parent steps - const aggIndex = this.evaluateAggregateConditions(step); - if (aggIndex >= 0) { - return { index: aggIndex, method: 'aggregate' }; - } - - // 2. Tag detection from Phase 3 output - if (tagContent) { - const ruleIndex = detectRuleIndex(tagContent, step.name); - if (ruleIndex >= 0 && ruleIndex < step.rules.length) { - return { index: ruleIndex, method: 'phase3_tag' }; - } - } - - // 3. Tag detection from Phase 1 output (fallback) - if (agentContent) { - const ruleIndex = detectRuleIndex(agentContent, step.name); - if (ruleIndex >= 0 && ruleIndex < step.rules.length) { - return { index: ruleIndex, method: 'phase1_tag' }; - } - } - - // 4. AI judge for ai() conditions only - const aiRuleIndex = await this.evaluateAiConditions(step, agentContent); - if (aiRuleIndex >= 0) { - return { index: aiRuleIndex, method: 'ai_judge' }; - } - - // 5. AI judge for all conditions (final fallback) - const fallbackIndex = await this.evaluateAllConditionsViaAiJudge(step, agentContent); - if (fallbackIndex >= 0) { - return { index: fallbackIndex, method: 'ai_judge_fallback' }; - } - - throw new Error(`Status not found for step "${step.name}": no rule matched after all detection phases`); - } - - /** - * Evaluate aggregate conditions (all()/any()) against sub-step results. - * Returns the 0-based rule index in the step's rules array, or -1 if no match. - * - * For each aggregate rule, checks the matched condition text of sub-steps: - * - all("X"): true when ALL sub-steps have matched condition === X - * - any("X"): true when at least ONE sub-step has matched condition === X - * - * Edge cases per spec: - * - Sub-step with no matched rule: all() → false, any() → skip that sub-step - * - No sub-steps (0 件): both → false - * - Non-parallel step: both → false - */ - private evaluateAggregateConditions(step: WorkflowStep): number { - if (!step.rules || !step.parallel || step.parallel.length === 0) return -1; - - for (let i = 0; i < step.rules.length; i++) { - const rule = step.rules[i]!; - if (!rule.isAggregateCondition || !rule.aggregateType || !rule.aggregateConditionText) { - continue; - } - - const subSteps = step.parallel; - const targetCondition = rule.aggregateConditionText; - - if (rule.aggregateType === 'all') { - const allMatch = subSteps.every((sub) => { - const output = this.state.stepOutputs.get(sub.name); - if (!output || output.matchedRuleIndex == null || !sub.rules) return false; - const matchedRule = sub.rules[output.matchedRuleIndex]; - return matchedRule?.condition === targetCondition; - }); - if (allMatch) { - log.debug('Aggregate all() matched', { step: step.name, condition: targetCondition, ruleIndex: i }); - return i; - } - } else { - // 'any' - const anyMatch = subSteps.some((sub) => { - const output = this.state.stepOutputs.get(sub.name); - if (!output || output.matchedRuleIndex == null || !sub.rules) return false; - const matchedRule = sub.rules[output.matchedRuleIndex]; - return matchedRule?.condition === targetCondition; - }); - if (anyMatch) { - log.debug('Aggregate any() matched', { step: step.name, condition: targetCondition, ruleIndex: i }); - return i; - } - } - } - - return -1; + /** Build phase runner context for Phase 2/3 execution */ + private buildPhaseRunnerContext() { + return { + cwd: this.cwd, + reportDir: this.reportDir, + language: this.language, + getSessionId: (agent: string) => this.state.agentSessions.get(agent), + buildResumeOptions: this.buildResumeOptions.bind(this), + updateAgentSession: this.updateAgentSession.bind(this), + }; } /** Run a normal (non-parallel) step */ @@ -388,18 +288,23 @@ export class WorkflowEngine extends EventEmitter { let response = await runAgent(step.agent, instruction, agentOptions); this.updateAgentSession(step.agent, response.sessionId); + const phaseCtx = this.buildPhaseRunnerContext(); + // Phase 2: report output (resume same session, Write only) if (step.report) { - await this.runReportPhase(step, stepIteration); + await runReportPhase(step, stepIteration, phaseCtx); } // Phase 3: status judgment (resume session, no tools, output status tag) let tagContent = ''; - if (this.needsStatusJudgmentPhase(step)) { - tagContent = await this.runStatusJudgmentPhase(step); + if (needsStatusJudgmentPhase(step)) { + tagContent = await runStatusJudgmentPhase(step, phaseCtx); } - const match = await this.detectMatchedRule(step, response.content, tagContent); + const match = await detectMatchedRule(step, response.content, tagContent, { + state: this.state, + cwd: this.cwd, + }); if (match) { log.debug('Rule matched', { step: step.name, ruleIndex: match.index, method: match.method }); response = { ...response, matchedRuleIndex: match.index, matchedRuleMethod: match.method }; @@ -410,81 +315,6 @@ export class WorkflowEngine extends EventEmitter { return { response, instruction }; } - /** - * Phase 2: Report output. - * Resumes the agent session with Write-only tools to output reports. - * The response is discarded — only sessionId is updated. - */ - private async runReportPhase(step: WorkflowStep, stepIteration: number): Promise { - const sessionId = this.state.agentSessions.get(step.agent); - if (!sessionId) { - throw new Error(`Report phase requires a session to resume, but no sessionId found for agent "${step.agent}" in step "${step.name}"`); - } - - log.debug('Running report phase', { step: step.name, sessionId }); - - const reportInstruction = buildReportInstructionFromTemplate(step, { - cwd: this.cwd, - reportDir: this.reportDir, - stepIteration, - language: this.language, - }); - - const reportOptions = this.buildResumeOptions(step, sessionId, { - allowedTools: ['Write'], - maxTurns: 3, - }); - - const reportResponse = await runAgent(step.agent, reportInstruction, reportOptions); - - // Update session (phase 2 may update it) - this.updateAgentSession(step.agent, reportResponse.sessionId); - - log.debug('Report phase complete', { step: step.name, status: reportResponse.status }); - } - - /** - * Check if a step needs Phase 3 (status judgment). - * Returns true when at least one rule requires tag-based detection - * (i.e., not all rules are ai() or aggregate conditions). - */ - private needsStatusJudgmentPhase(step: WorkflowStep): boolean { - if (!step.rules || step.rules.length === 0) return false; - const allNonTagConditions = step.rules.every((r) => r.isAiCondition || r.isAggregateCondition); - return !allNonTagConditions; - } - - /** - * Phase 3: Status judgment. - * Resumes the agent session with no tools to ask the agent to output a status tag. - * Returns the Phase 3 response content (containing the status tag). - */ - private async runStatusJudgmentPhase(step: WorkflowStep): Promise { - const sessionId = this.state.agentSessions.get(step.agent); - if (!sessionId) { - throw new Error(`Status judgment phase requires a session to resume, but no sessionId found for agent "${step.agent}" in step "${step.name}"`); - } - - log.debug('Running status judgment phase', { step: step.name, sessionId }); - - const judgmentInstruction = buildStatusJudgmentInstructionFromTemplate(step, { - language: this.language, - }); - - const judgmentOptions = this.buildResumeOptions(step, sessionId, { - allowedTools: [], - maxTurns: 3, - }); - - const judgmentResponse = await runAgent(step.agent, judgmentInstruction, judgmentOptions); - - // Update session (phase 3 may update it) - this.updateAgentSession(step.agent, judgmentResponse.sessionId); - - log.debug('Status judgment phase complete', { step: step.name, status: judgmentResponse.status }); - return judgmentResponse.content; - } - /** * Run a parallel step: execute all sub-steps concurrently, then aggregate results. * The aggregated output becomes the parent step's response for rules evaluation. @@ -509,6 +339,9 @@ export class WorkflowEngine extends EventEmitter { }) : undefined; + const phaseCtx = this.buildPhaseRunnerContext(); + const ruleCtx = { state: this.state, cwd: this.cwd }; + // Run all sub-steps concurrently const subResults = await Promise.all( subSteps.map(async (subStep, index) => { @@ -516,28 +349,28 @@ export class WorkflowEngine extends EventEmitter { const subInstruction = this.buildInstruction(subStep, subIteration); // Phase 1: main execution (Write excluded if sub-step has report) - const agentOptions = this.buildAgentOptions(subStep); + const baseOptions = this.buildAgentOptions(subStep); - // Override onStream with parallel logger's prefixed handler - if (parallelLogger) { - agentOptions.onStream = parallelLogger.createStreamHandler(subStep.name, index); - } + // Override onStream with parallel logger's prefixed handler (immutable) + const agentOptions = parallelLogger + ? { ...baseOptions, onStream: parallelLogger.createStreamHandler(subStep.name, index) } + : baseOptions; const subResponse = await runAgent(subStep.agent, subInstruction, agentOptions); this.updateAgentSession(subStep.agent, subResponse.sessionId); // Phase 2: report output for sub-step if (subStep.report) { - await this.runReportPhase(subStep, subIteration); + await runReportPhase(subStep, subIteration, phaseCtx); } // Phase 3: status judgment for sub-step let subTagContent = ''; - if (this.needsStatusJudgmentPhase(subStep)) { - subTagContent = await this.runStatusJudgmentPhase(subStep); + if (needsStatusJudgmentPhase(subStep)) { + subTagContent = await runStatusJudgmentPhase(subStep, phaseCtx); } - const match = await this.detectMatchedRule(subStep, subResponse.content, subTagContent); + const match = await detectMatchedRule(subStep, subResponse.content, subTagContent, ruleCtx); const finalResponse = match ? { ...subResponse, matchedRuleIndex: match.index, matchedRuleMethod: match.method } : subResponse; @@ -572,7 +405,7 @@ export class WorkflowEngine extends EventEmitter { .join('\n\n'); // Parent step uses aggregate conditions, so tagContent is empty - const match = await this.detectMatchedRule(step, aggregatedContent, ''); + const match = await detectMatchedRule(step, aggregatedContent, '', ruleCtx); const aggregatedResponse: AgentResponse = { agent: step.name, @@ -587,79 +420,6 @@ export class WorkflowEngine extends EventEmitter { return { response: aggregatedResponse, instruction: aggregatedInstruction }; } - /** - * Evaluate ai() conditions via AI judge. - * Collects all ai() rules, calls the judge, and maps the result back to the original rule index. - * Returns the 0-based rule index in the step's rules array, or -1 if no match. - */ - private async evaluateAiConditions(step: WorkflowStep, agentOutput: string): Promise { - if (!step.rules) return -1; - - const aiConditions: { index: number; text: string }[] = []; - for (let i = 0; i < step.rules.length; i++) { - const rule = step.rules[i]!; - if (rule.isAiCondition && rule.aiConditionText) { - aiConditions.push({ index: i, text: rule.aiConditionText }); - } - } - - if (aiConditions.length === 0) return -1; - - log.debug('Evaluating ai() conditions via judge', { - step: step.name, - conditionCount: aiConditions.length, - }); - - // Remap: judge returns 0-based index within aiConditions array - const judgeConditions = aiConditions.map((c, i) => ({ index: i, text: c.text })); - const judgeResult = await callAiJudge(agentOutput, judgeConditions, { cwd: this.cwd }); - - if (judgeResult >= 0 && judgeResult < aiConditions.length) { - const matched = aiConditions[judgeResult]!; - log.debug('AI judge matched condition', { - step: step.name, - judgeResult, - originalRuleIndex: matched.index, - condition: matched.text, - }); - return matched.index; - } - - log.debug('AI judge did not match any condition', { step: step.name }); - return -1; - } - - /** - * Final fallback: evaluate ALL rule conditions via AI judge. - * Unlike evaluateAiConditions (which only handles ai() flagged rules), - * this sends every rule's condition text to the judge. - * Returns the 0-based rule index, or -1 if no match. - */ - private async evaluateAllConditionsViaAiJudge(step: WorkflowStep, agentOutput: string): Promise { - if (!step.rules || step.rules.length === 0) return -1; - - const conditions = step.rules.map((rule, i) => ({ index: i, text: rule.condition })); - - log.debug('Evaluating all conditions via AI judge (final fallback)', { - step: step.name, - conditionCount: conditions.length, - }); - - const judgeResult = await callAiJudge(agentOutput, conditions, { cwd: this.cwd }); - - if (judgeResult >= 0 && judgeResult < conditions.length) { - log.debug('AI judge (fallback) matched condition', { - step: step.name, - ruleIndex: judgeResult, - condition: conditions[judgeResult]!.text, - }); - return judgeResult; - } - - log.debug('AI judge (fallback) did not match any condition', { step: step.name }); - return -1; - } - /** * Determine next step for a completed step using rules-based routing. */ diff --git a/src/workflow/instruction-builder.ts b/src/workflow/instruction-builder.ts index acdd061..6b5f279 100644 --- a/src/workflow/instruction-builder.ts +++ b/src/workflow/instruction-builder.ts @@ -12,6 +12,7 @@ */ import type { WorkflowStep, WorkflowRule, AgentResponse, Language, ReportConfig, ReportObjectConfig } from '../models/types.js'; +import { hasTagBasedRules } from './rule-utils.js'; /** @@ -465,12 +466,9 @@ export function buildInstruction( // 7. Status Output Rules (for tag-based detection in Phase 1) // Skip if all rules are ai() or aggregate conditions (no tags needed) - if (step.rules && step.rules.length > 0) { - const allNonTagConditions = step.rules.every((r) => r.isAiCondition || r.isAggregateCondition); - if (!allNonTagConditions) { - const statusRulesPrompt = generateStatusRulesFromRules(step.name, step.rules, language); - sections.push(statusRulesPrompt); - } + if (hasTagBasedRules(step)) { + const statusRulesPrompt = generateStatusRulesFromRules(step.name, step.rules!, language); + sections.push(statusRulesPrompt); } return sections.join('\n\n'); diff --git a/src/workflow/phase-runner.ts b/src/workflow/phase-runner.ts new file mode 100644 index 0000000..2e5b215 --- /dev/null +++ b/src/workflow/phase-runner.ts @@ -0,0 +1,111 @@ +/** + * Phase execution logic extracted from engine.ts. + * + * Handles Phase 2 (report output) and Phase 3 (status judgment) + * as session-resume operations. + */ + +import type { WorkflowStep, Language } from '../models/types.js'; +import { runAgent, type RunAgentOptions } from '../agents/runner.js'; +import { + buildReportInstruction as buildReportInstructionFromTemplate, + buildStatusJudgmentInstruction as buildStatusJudgmentInstructionFromTemplate, +} from './instruction-builder.js'; +import { hasTagBasedRules } from './rule-utils.js'; +import { createLogger } from '../utils/debug.js'; + +const log = createLogger('phase-runner'); + +export interface PhaseRunnerContext { + /** Working directory (agent work dir, may be a clone) */ + cwd: string; + /** Report directory path */ + reportDir: string; + /** Language for instructions */ + language?: Language; + /** Get agent session ID */ + getSessionId: (agent: string) => string | undefined; + /** Build resume options for a step */ + buildResumeOptions: (step: WorkflowStep, sessionId: string, overrides: Pick) => RunAgentOptions; + /** Update agent session after a phase run */ + updateAgentSession: (agent: string, sessionId: string | undefined) => void; +} + +/** + * Check if a step needs Phase 3 (status judgment). + * Returns true when at least one rule requires tag-based detection. + */ +export function needsStatusJudgmentPhase(step: WorkflowStep): boolean { + return hasTagBasedRules(step); +} + +/** + * Phase 2: Report output. + * Resumes the agent session with Write-only tools to output reports. + * The response is discarded — only sessionId is updated. + */ +export async function runReportPhase( + step: WorkflowStep, + stepIteration: number, + ctx: PhaseRunnerContext, +): Promise { + const sessionId = ctx.getSessionId(step.agent); + if (!sessionId) { + throw new Error(`Report phase requires a session to resume, but no sessionId found for agent "${step.agent}" in step "${step.name}"`); + } + + log.debug('Running report phase', { step: step.name, sessionId }); + + const reportInstruction = buildReportInstructionFromTemplate(step, { + cwd: ctx.cwd, + reportDir: ctx.reportDir, + stepIteration, + language: ctx.language, + }); + + const reportOptions = ctx.buildResumeOptions(step, sessionId, { + allowedTools: ['Write'], + maxTurns: 3, + }); + + const reportResponse = await runAgent(step.agent, reportInstruction, reportOptions); + + // Update session (phase 2 may update it) + ctx.updateAgentSession(step.agent, reportResponse.sessionId); + + log.debug('Report phase complete', { step: step.name, status: reportResponse.status }); +} + +/** + * Phase 3: Status judgment. + * Resumes the agent session with no tools to ask the agent to output a status tag. + * Returns the Phase 3 response content (containing the status tag). + */ +export async function runStatusJudgmentPhase( + step: WorkflowStep, + ctx: PhaseRunnerContext, +): Promise { + const sessionId = ctx.getSessionId(step.agent); + if (!sessionId) { + throw new Error(`Status judgment phase requires a session to resume, but no sessionId found for agent "${step.agent}" in step "${step.name}"`); + } + + log.debug('Running status judgment phase', { step: step.name, sessionId }); + + const judgmentInstruction = buildStatusJudgmentInstructionFromTemplate(step, { + language: ctx.language, + }); + + const judgmentOptions = ctx.buildResumeOptions(step, sessionId, { + allowedTools: [], + maxTurns: 3, + }); + + const judgmentResponse = await runAgent(step.agent, judgmentInstruction, judgmentOptions); + + // Update session (phase 3 may update it) + ctx.updateAgentSession(step.agent, judgmentResponse.sessionId); + + log.debug('Status judgment phase complete', { step: step.name, status: judgmentResponse.status }); + return judgmentResponse.content; +} diff --git a/src/workflow/rule-evaluator.ts b/src/workflow/rule-evaluator.ts new file mode 100644 index 0000000..3d00ac9 --- /dev/null +++ b/src/workflow/rule-evaluator.ts @@ -0,0 +1,218 @@ +/** + * Rule evaluation logic extracted from engine.ts. + * + * Evaluates workflow step rules to determine the matched rule index. + * Supports tag-based detection, ai() conditions, aggregate conditions, + * and AI judge fallback. + */ + +import type { + WorkflowStep, + WorkflowState, + RuleMatchMethod, +} from '../models/types.js'; +import { detectRuleIndex, callAiJudge } from '../claude/client.js'; +import { createLogger } from '../utils/debug.js'; + +const log = createLogger('rule-evaluator'); + +export interface RuleMatch { + index: number; + method: RuleMatchMethod; +} + +export interface RuleEvaluatorContext { + /** Workflow state (for accessing stepOutputs in aggregate evaluation) */ + state: WorkflowState; + /** Working directory (for AI judge calls) */ + cwd: string; +} + +/** + * Detect matched rule for a step's response. + * Evaluation order (first match wins): + * 1. Aggregate conditions: all()/any() — evaluate sub-step results + * 2. Tag detection from Phase 3 output + * 3. Tag detection from Phase 1 output (fallback) + * 4. ai() condition evaluation via AI judge + * 5. All-conditions AI judge (final fallback) + * + * Returns undefined for steps without rules. + * Throws if rules exist but no rule matched (Fail Fast). + * + * @param step - The workflow step + * @param agentContent - Phase 1 output (main execution) + * @param tagContent - Phase 3 output (status judgment); empty string skips tag detection + * @param ctx - Evaluation context (state, cwd) + */ +export async function detectMatchedRule( + step: WorkflowStep, + agentContent: string, + tagContent: string, + ctx: RuleEvaluatorContext, +): Promise { + if (!step.rules || step.rules.length === 0) return undefined; + + // 1. Aggregate conditions (all/any) — only meaningful for parallel parent steps + const aggIndex = evaluateAggregateConditions(step, ctx.state); + if (aggIndex >= 0) { + return { index: aggIndex, method: 'aggregate' }; + } + + // 2. Tag detection from Phase 3 output + if (tagContent) { + const ruleIndex = detectRuleIndex(tagContent, step.name); + if (ruleIndex >= 0 && ruleIndex < step.rules.length) { + return { index: ruleIndex, method: 'phase3_tag' }; + } + } + + // 3. Tag detection from Phase 1 output (fallback) + if (agentContent) { + const ruleIndex = detectRuleIndex(agentContent, step.name); + if (ruleIndex >= 0 && ruleIndex < step.rules.length) { + return { index: ruleIndex, method: 'phase1_tag' }; + } + } + + // 4. AI judge for ai() conditions only + const aiRuleIndex = await evaluateAiConditions(step, agentContent, ctx.cwd); + if (aiRuleIndex >= 0) { + return { index: aiRuleIndex, method: 'ai_judge' }; + } + + // 5. AI judge for all conditions (final fallback) + const fallbackIndex = await evaluateAllConditionsViaAiJudge(step, agentContent, ctx.cwd); + if (fallbackIndex >= 0) { + return { index: fallbackIndex, method: 'ai_judge_fallback' }; + } + + throw new Error(`Status not found for step "${step.name}": no rule matched after all detection phases`); +} + +/** + * Evaluate aggregate conditions (all()/any()) against sub-step results. + * Returns the 0-based rule index in the step's rules array, or -1 if no match. + * + * For each aggregate rule, checks the matched condition text of sub-steps: + * - all("X"): true when ALL sub-steps have matched condition === X + * - any("X"): true when at least ONE sub-step has matched condition === X + * + * Edge cases per spec: + * - Sub-step with no matched rule: all() → false, any() → skip that sub-step + * - No sub-steps (0 件): both → false + * - Non-parallel step: both → false + */ +export function evaluateAggregateConditions(step: WorkflowStep, state: WorkflowState): number { + if (!step.rules || !step.parallel || step.parallel.length === 0) return -1; + + for (let i = 0; i < step.rules.length; i++) { + const rule = step.rules[i]!; + if (!rule.isAggregateCondition || !rule.aggregateType || !rule.aggregateConditionText) { + continue; + } + + const subSteps = step.parallel; + const targetCondition = rule.aggregateConditionText; + + if (rule.aggregateType === 'all') { + const allMatch = subSteps.every((sub) => { + const output = state.stepOutputs.get(sub.name); + if (!output || output.matchedRuleIndex == null || !sub.rules) return false; + const matchedRule = sub.rules[output.matchedRuleIndex]; + return matchedRule?.condition === targetCondition; + }); + if (allMatch) { + log.debug('Aggregate all() matched', { step: step.name, condition: targetCondition, ruleIndex: i }); + return i; + } + } else { + // 'any' + const anyMatch = subSteps.some((sub) => { + const output = state.stepOutputs.get(sub.name); + if (!output || output.matchedRuleIndex == null || !sub.rules) return false; + const matchedRule = sub.rules[output.matchedRuleIndex]; + return matchedRule?.condition === targetCondition; + }); + if (anyMatch) { + log.debug('Aggregate any() matched', { step: step.name, condition: targetCondition, ruleIndex: i }); + return i; + } + } + } + + return -1; +} + +/** + * Evaluate ai() conditions via AI judge. + * Collects all ai() rules, calls the judge, and maps the result back to the original rule index. + * Returns the 0-based rule index in the step's rules array, or -1 if no match. + */ +export async function evaluateAiConditions(step: WorkflowStep, agentOutput: string, cwd: string): Promise { + if (!step.rules) return -1; + + const aiConditions: { index: number; text: string }[] = []; + for (let i = 0; i < step.rules.length; i++) { + const rule = step.rules[i]!; + if (rule.isAiCondition && rule.aiConditionText) { + aiConditions.push({ index: i, text: rule.aiConditionText }); + } + } + + if (aiConditions.length === 0) return -1; + + log.debug('Evaluating ai() conditions via judge', { + step: step.name, + conditionCount: aiConditions.length, + }); + + // Remap: judge returns 0-based index within aiConditions array + const judgeConditions = aiConditions.map((c, i) => ({ index: i, text: c.text })); + const judgeResult = await callAiJudge(agentOutput, judgeConditions, { cwd }); + + if (judgeResult >= 0 && judgeResult < aiConditions.length) { + const matched = aiConditions[judgeResult]!; + log.debug('AI judge matched condition', { + step: step.name, + judgeResult, + originalRuleIndex: matched.index, + condition: matched.text, + }); + return matched.index; + } + + log.debug('AI judge did not match any condition', { step: step.name }); + return -1; +} + +/** + * Final fallback: evaluate ALL rule conditions via AI judge. + * Unlike evaluateAiConditions (which only handles ai() flagged rules), + * this sends every rule's condition text to the judge. + * Returns the 0-based rule index, or -1 if no match. + */ +export async function evaluateAllConditionsViaAiJudge(step: WorkflowStep, agentOutput: string, cwd: string): Promise { + if (!step.rules || step.rules.length === 0) return -1; + + const conditions = step.rules.map((rule, i) => ({ index: i, text: rule.condition })); + + log.debug('Evaluating all conditions via AI judge (final fallback)', { + step: step.name, + conditionCount: conditions.length, + }); + + const judgeResult = await callAiJudge(agentOutput, conditions, { cwd }); + + if (judgeResult >= 0 && judgeResult < conditions.length) { + log.debug('AI judge (fallback) matched condition', { + step: step.name, + ruleIndex: judgeResult, + condition: conditions[judgeResult]!.text, + }); + return judgeResult; + } + + log.debug('AI judge (fallback) did not match any condition', { step: step.name }); + return -1; +} diff --git a/src/workflow/rule-utils.ts b/src/workflow/rule-utils.ts new file mode 100644 index 0000000..0d147ba --- /dev/null +++ b/src/workflow/rule-utils.ts @@ -0,0 +1,18 @@ +/** + * Shared rule utility functions used by both engine.ts and instruction-builder.ts. + */ + +import type { WorkflowStep } from '../models/types.js'; + +/** + * Check whether a step has tag-based rules (i.e., rules that require + * [STEP:N] tag output for detection). + * + * Returns false when all rules are ai() or aggregate conditions, + * meaning no tag-based status output is needed. + */ +export function hasTagBasedRules(step: WorkflowStep): boolean { + if (!step.rules || step.rules.length === 0) return false; + const allNonTagConditions = step.rules.every((r) => r.isAiCondition || r.isAggregateCondition); + return !allNonTagConditions; +}